diff --git "a/checkpoint-1746/trainer_state.json" "b/checkpoint-1746/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-1746/trainer_state.json" @@ -0,0 +1,10492 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.0, + "global_step": 1746, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 3.0303030303030305e-06, + "loss": 1.946, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 6.060606060606061e-06, + "loss": 1.908, + "step": 2 + }, + { + "epoch": 0.01, + "learning_rate": 9.090909090909091e-06, + "loss": 2.1083, + "step": 3 + }, + { + "epoch": 0.01, + "learning_rate": 1.2121212121212122e-05, + "loss": 2.3218, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 1.5151515151515153e-05, + "loss": 1.8338, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 1.8181818181818182e-05, + "loss": 2.0202, + "step": 6 + }, + { + "epoch": 0.02, + "learning_rate": 2.1212121212121215e-05, + "loss": 2.1332, + "step": 7 + }, + { + "epoch": 0.02, + "learning_rate": 2.4242424242424244e-05, + "loss": 1.8593, + "step": 8 + }, + { + "epoch": 0.02, + "learning_rate": 2.7272727272727273e-05, + "loss": 1.5359, + "step": 9 + }, + { + "epoch": 0.02, + "learning_rate": 3.0303030303030306e-05, + "loss": 1.327, + "step": 10 + }, + { + "epoch": 0.03, + "learning_rate": 3.3333333333333335e-05, + "loss": 1.7252, + "step": 11 + }, + { + "epoch": 0.03, + "learning_rate": 3.6363636363636364e-05, + "loss": 1.4351, + "step": 12 + }, + { + "epoch": 0.03, + "learning_rate": 3.939393939393939e-05, + "loss": 1.2774, + "step": 13 + }, + { + "epoch": 0.03, + "learning_rate": 4.242424242424243e-05, + "loss": 1.5145, + "step": 14 + }, + { + "epoch": 0.03, + "learning_rate": 4.545454545454546e-05, + "loss": 1.1529, + "step": 15 + }, + { + "epoch": 0.04, + "learning_rate": 4.848484848484849e-05, + "loss": 1.0047, + "step": 16 + }, + { + "epoch": 0.04, + "learning_rate": 5.151515151515152e-05, + "loss": 1.3872, + "step": 17 + }, + { + "epoch": 0.04, + "learning_rate": 5.4545454545454546e-05, + "loss": 1.1229, + "step": 18 + }, + { + "epoch": 0.04, + "learning_rate": 5.757575757575758e-05, + "loss": 1.3386, + "step": 19 + }, + { + "epoch": 0.05, + "learning_rate": 6.060606060606061e-05, + "loss": 1.2493, + "step": 20 + }, + { + "epoch": 0.05, + "learning_rate": 6.363636363636364e-05, + "loss": 1.1427, + "step": 21 + }, + { + "epoch": 0.05, + "learning_rate": 6.666666666666667e-05, + "loss": 1.0895, + "step": 22 + }, + { + "epoch": 0.05, + "learning_rate": 6.96969696969697e-05, + "loss": 1.1989, + "step": 23 + }, + { + "epoch": 0.05, + "learning_rate": 7.272727272727273e-05, + "loss": 1.0438, + "step": 24 + }, + { + "epoch": 0.06, + "learning_rate": 7.575757575757576e-05, + "loss": 1.176, + "step": 25 + }, + { + "epoch": 0.06, + "learning_rate": 7.878787878787879e-05, + "loss": 1.1372, + "step": 26 + }, + { + "epoch": 0.06, + "learning_rate": 8.181818181818183e-05, + "loss": 1.2983, + "step": 27 + }, + { + "epoch": 0.06, + "learning_rate": 8.484848484848486e-05, + "loss": 0.9371, + "step": 28 + }, + { + "epoch": 0.07, + "learning_rate": 8.787878787878789e-05, + "loss": 1.2299, + "step": 29 + }, + { + "epoch": 0.07, + "learning_rate": 9.090909090909092e-05, + "loss": 0.9441, + "step": 30 + }, + { + "epoch": 0.07, + "learning_rate": 9.393939393939395e-05, + "loss": 1.0011, + "step": 31 + }, + { + "epoch": 0.07, + "learning_rate": 9.696969696969698e-05, + "loss": 1.1704, + "step": 32 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001, + "loss": 1.1193, + "step": 33 + }, + { + "epoch": 0.08, + "learning_rate": 0.00010303030303030303, + "loss": 1.1559, + "step": 34 + }, + { + "epoch": 0.08, + "learning_rate": 0.00010606060606060606, + "loss": 0.8677, + "step": 35 + }, + { + "epoch": 0.08, + "learning_rate": 0.00010909090909090909, + "loss": 1.0865, + "step": 36 + }, + { + "epoch": 0.08, + "learning_rate": 0.00011212121212121212, + "loss": 1.0922, + "step": 37 + }, + { + "epoch": 0.09, + "learning_rate": 0.00011515151515151516, + "loss": 0.9434, + "step": 38 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001181818181818182, + "loss": 0.9144, + "step": 39 + }, + { + "epoch": 0.09, + "learning_rate": 0.00012121212121212122, + "loss": 0.9546, + "step": 40 + }, + { + "epoch": 0.09, + "learning_rate": 0.00012424242424242425, + "loss": 1.0654, + "step": 41 + }, + { + "epoch": 0.1, + "learning_rate": 0.00012727272727272728, + "loss": 0.8077, + "step": 42 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001303030303030303, + "loss": 1.0758, + "step": 43 + }, + { + "epoch": 0.1, + "learning_rate": 0.00013333333333333334, + "loss": 1.1512, + "step": 44 + }, + { + "epoch": 0.1, + "learning_rate": 0.00013636363636363637, + "loss": 0.84, + "step": 45 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001393939393939394, + "loss": 1.0567, + "step": 46 + }, + { + "epoch": 0.11, + "learning_rate": 0.00014242424242424243, + "loss": 1.0165, + "step": 47 + }, + { + "epoch": 0.11, + "learning_rate": 0.00014545454545454546, + "loss": 0.8678, + "step": 48 + }, + { + "epoch": 0.11, + "learning_rate": 0.00014848484848484849, + "loss": 1.055, + "step": 49 + }, + { + "epoch": 0.11, + "learning_rate": 0.00015151515151515152, + "loss": 1.0669, + "step": 50 + }, + { + "epoch": 0.12, + "learning_rate": 0.00015454545454545454, + "loss": 0.9915, + "step": 51 + }, + { + "epoch": 0.12, + "learning_rate": 0.00015757575757575757, + "loss": 0.993, + "step": 52 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001606060606060606, + "loss": 1.1085, + "step": 53 + }, + { + "epoch": 0.12, + "learning_rate": 0.00016363636363636366, + "loss": 0.9391, + "step": 54 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001666666666666667, + "loss": 0.975, + "step": 55 + }, + { + "epoch": 0.13, + "learning_rate": 0.00016969696969696972, + "loss": 1.0697, + "step": 56 + }, + { + "epoch": 0.13, + "learning_rate": 0.00017272727272727275, + "loss": 0.9462, + "step": 57 + }, + { + "epoch": 0.13, + "learning_rate": 0.00017575757575757578, + "loss": 1.1209, + "step": 58 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001787878787878788, + "loss": 1.0648, + "step": 59 + }, + { + "epoch": 0.14, + "learning_rate": 0.00018181818181818183, + "loss": 0.9964, + "step": 60 + }, + { + "epoch": 0.14, + "learning_rate": 0.00018484848484848484, + "loss": 0.8451, + "step": 61 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001878787878787879, + "loss": 0.8437, + "step": 62 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019090909090909092, + "loss": 1.1271, + "step": 63 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019393939393939395, + "loss": 1.161, + "step": 64 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019696969696969698, + "loss": 1.0032, + "step": 65 + }, + { + "epoch": 0.15, + "learning_rate": 0.0002, + "loss": 1.1258, + "step": 66 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019999988957695886, + "loss": 0.9543, + "step": 67 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019999955830807923, + "loss": 1.0274, + "step": 68 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019999900619409279, + "loss": 0.9334, + "step": 69 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001999982332362188, + "loss": 1.0398, + "step": 70 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019999723943616433, + "loss": 0.9049, + "step": 71 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019999602479612417, + "loss": 0.7452, + "step": 72 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019999458931878073, + "loss": 0.8762, + "step": 73 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019999293300730427, + "loss": 1.0941, + "step": 74 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019999105586535268, + "loss": 0.7713, + "step": 75 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019998895789707154, + "loss": 0.9233, + "step": 76 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019998663910709416, + "loss": 0.8634, + "step": 77 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019998409950054146, + "loss": 0.9697, + "step": 78 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019998133908302209, + "loss": 1.0816, + "step": 79 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001999783578606323, + "loss": 0.9659, + "step": 80 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019997515583995603, + "loss": 0.9644, + "step": 81 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019997173302806478, + "loss": 0.8561, + "step": 82 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019996808943251773, + "loss": 1.0016, + "step": 83 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001999642250613616, + "loss": 0.8951, + "step": 84 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019996013992313073, + "loss": 1.0157, + "step": 85 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019995583402684694, + "loss": 0.9414, + "step": 86 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019995130738201966, + "loss": 0.8097, + "step": 87 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019994655999864582, + "loss": 0.8606, + "step": 88 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001999415918872098, + "loss": 1.0427, + "step": 89 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019993640305868352, + "loss": 0.9578, + "step": 90 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019993099352452623, + "loss": 1.1097, + "step": 91 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019992536329668478, + "loss": 0.8119, + "step": 92 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019991951238759325, + "loss": 0.9915, + "step": 93 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001999134408101731, + "loss": 0.838, + "step": 94 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019990714857783326, + "loss": 0.8935, + "step": 95 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019990063570446984, + "loss": 0.7914, + "step": 96 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019989390220446622, + "loss": 0.8724, + "step": 97 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019988694809269314, + "loss": 1.0374, + "step": 98 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019987977338450845, + "loss": 0.9028, + "step": 99 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019987237809575723, + "loss": 0.9986, + "step": 100 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019986476224277165, + "loss": 1.113, + "step": 101 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019985692584237108, + "loss": 0.8395, + "step": 102 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019984886891186184, + "loss": 1.0134, + "step": 103 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001998405914690374, + "loss": 0.8845, + "step": 104 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019983209353217812, + "loss": 0.7507, + "step": 105 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019982337512005138, + "loss": 0.9073, + "step": 106 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019981443625191148, + "loss": 0.9973, + "step": 107 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019980527694749952, + "loss": 1.0733, + "step": 108 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019979589722704346, + "loss": 0.9148, + "step": 109 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019978629711125812, + "loss": 0.8385, + "step": 110 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019977647662134488, + "loss": 0.75, + "step": 111 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019976643577899195, + "loss": 0.9002, + "step": 112 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019975617460637416, + "loss": 0.8754, + "step": 113 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001997456931261529, + "loss": 0.8886, + "step": 114 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019973499136147606, + "loss": 1.0058, + "step": 115 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019972406933597812, + "loss": 0.9276, + "step": 116 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019971292707377991, + "loss": 0.9922, + "step": 117 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019970156459948873, + "loss": 0.9507, + "step": 118 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001996899819381981, + "loss": 0.9619, + "step": 119 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019967817911548794, + "loss": 0.8163, + "step": 120 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019966615615742424, + "loss": 1.0647, + "step": 121 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001996539130905593, + "loss": 0.9348, + "step": 122 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019964144994193142, + "loss": 1.0523, + "step": 123 + }, + { + "epoch": 0.28, + "learning_rate": 0.000199628766739065, + "loss": 0.9063, + "step": 124 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019961586350997033, + "loss": 1.0227, + "step": 125 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001996027402831438, + "loss": 1.006, + "step": 126 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019958939708756746, + "loss": 0.9082, + "step": 127 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019957583395270923, + "loss": 0.8756, + "step": 128 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001995620509085228, + "loss": 0.8311, + "step": 129 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019954804798544745, + "loss": 1.0332, + "step": 130 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019953382521440815, + "loss": 0.9427, + "step": 131 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019951938262681527, + "loss": 0.838, + "step": 132 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001995047202545647, + "loss": 0.8509, + "step": 133 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019948983813003774, + "loss": 0.8944, + "step": 134 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019947473628610099, + "loss": 0.9569, + "step": 135 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019945941475610623, + "loss": 0.7805, + "step": 136 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019944387357389052, + "loss": 0.9337, + "step": 137 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001994281127737759, + "loss": 0.8712, + "step": 138 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001994121323905695, + "loss": 0.9264, + "step": 139 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001993959324595634, + "loss": 0.9323, + "step": 140 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019937951301653444, + "loss": 0.8331, + "step": 141 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001993628740977444, + "loss": 0.902, + "step": 142 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001993460157399396, + "loss": 0.8676, + "step": 143 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019932893798035116, + "loss": 0.8525, + "step": 144 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019931164085669456, + "loss": 0.8571, + "step": 145 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019929412440716985, + "loss": 1.0006, + "step": 146 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019927638867046142, + "loss": 0.9849, + "step": 147 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019925843368573794, + "loss": 0.9064, + "step": 148 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001992402594926523, + "loss": 0.9716, + "step": 149 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001992218661313415, + "loss": 0.7553, + "step": 150 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019920325364242654, + "loss": 0.7921, + "step": 151 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019918442206701245, + "loss": 0.7994, + "step": 152 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001991653714466879, + "loss": 0.8296, + "step": 153 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019914610182352548, + "loss": 0.8116, + "step": 154 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019912661324008148, + "loss": 0.9844, + "step": 155 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019910690573939557, + "loss": 0.865, + "step": 156 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019908697936499103, + "loss": 0.959, + "step": 157 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019906683416087448, + "loss": 0.7727, + "step": 158 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019904647017153582, + "loss": 0.707, + "step": 159 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019902588744194813, + "loss": 0.8597, + "step": 160 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019900508601756756, + "loss": 0.9146, + "step": 161 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001989840659443332, + "loss": 0.9571, + "step": 162 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001989628272686671, + "loss": 0.8537, + "step": 163 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019894137003747403, + "loss": 0.828, + "step": 164 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019891969429814145, + "loss": 0.8055, + "step": 165 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001988978000985394, + "loss": 0.8432, + "step": 166 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001988756874870203, + "loss": 0.8101, + "step": 167 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019885335651241903, + "loss": 0.9072, + "step": 168 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001988308072240527, + "loss": 0.7862, + "step": 169 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019880803967172047, + "loss": 0.8303, + "step": 170 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019878505390570362, + "loss": 0.9489, + "step": 171 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001987618499767653, + "loss": 1.0125, + "step": 172 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001987384279361505, + "loss": 0.809, + "step": 173 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019871478783558587, + "loss": 0.9488, + "step": 174 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001986909297272796, + "loss": 0.9664, + "step": 175 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001986668536639215, + "loss": 0.9657, + "step": 176 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001986425596986825, + "loss": 0.8123, + "step": 177 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019861804788521493, + "loss": 0.9482, + "step": 178 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019859331827765212, + "loss": 0.879, + "step": 179 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019856837093060848, + "loss": 0.896, + "step": 180 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019854320589917927, + "loss": 1.0729, + "step": 181 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019851782323894042, + "loss": 0.9844, + "step": 182 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001984922230059486, + "loss": 0.9131, + "step": 183 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019846640525674082, + "loss": 0.9417, + "step": 184 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019844037004833473, + "loss": 0.9633, + "step": 185 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001984141174382279, + "loss": 0.968, + "step": 186 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019838764748439827, + "loss": 0.8447, + "step": 187 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019836096024530373, + "loss": 0.8638, + "step": 188 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019833405577988195, + "loss": 0.9346, + "step": 189 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001983069341475504, + "loss": 0.8969, + "step": 190 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019827959540820613, + "loss": 0.8499, + "step": 191 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019825203962222572, + "loss": 0.8041, + "step": 192 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019822426685046497, + "loss": 0.9216, + "step": 193 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019819627715425903, + "loss": 0.906, + "step": 194 + }, + { + "epoch": 0.45, + "learning_rate": 0.000198168070595422, + "loss": 0.8969, + "step": 195 + }, + { + "epoch": 0.45, + "learning_rate": 0.000198139647236247, + "loss": 0.7949, + "step": 196 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019811100713950587, + "loss": 0.8996, + "step": 197 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019808215036844917, + "loss": 0.9118, + "step": 198 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001980530769868059, + "loss": 0.7355, + "step": 199 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019802378705878354, + "loss": 0.8344, + "step": 200 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019799428064906774, + "loss": 0.9639, + "step": 201 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001979645578228222, + "loss": 0.852, + "step": 202 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001979346186456887, + "loss": 0.8493, + "step": 203 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019790446318378665, + "loss": 0.851, + "step": 204 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019787409150371328, + "loss": 0.7161, + "step": 205 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019784350367254322, + "loss": 0.9846, + "step": 206 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001978126997578285, + "loss": 0.7883, + "step": 207 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019778167982759833, + "loss": 0.8691, + "step": 208 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019775044395035907, + "loss": 0.928, + "step": 209 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001977189921950939, + "loss": 0.8244, + "step": 210 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001976873246312628, + "loss": 1.0413, + "step": 211 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001976554413288023, + "loss": 0.8261, + "step": 212 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001976233423581255, + "loss": 0.823, + "step": 213 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019759102779012166, + "loss": 0.9386, + "step": 214 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019755849769615628, + "loss": 0.8156, + "step": 215 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019752575214807076, + "loss": 0.8556, + "step": 216 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019749279121818235, + "loss": 0.7769, + "step": 217 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019745961497928406, + "loss": 1.0772, + "step": 218 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019742622350464418, + "loss": 0.8147, + "step": 219 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001973926168680066, + "loss": 0.9529, + "step": 220 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019735879514359018, + "loss": 0.8688, + "step": 221 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019732475840608888, + "loss": 0.9647, + "step": 222 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019729050673067156, + "loss": 0.837, + "step": 223 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019725604019298163, + "loss": 0.9211, + "step": 224 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019722135886913715, + "loss": 0.9434, + "step": 225 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001971864628357304, + "loss": 0.6506, + "step": 226 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019715135216982798, + "loss": 0.8052, + "step": 227 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019711602694897037, + "loss": 0.7852, + "step": 228 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019708048725117192, + "loss": 0.9283, + "step": 229 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001970447331549207, + "loss": 0.9081, + "step": 230 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019700876473917824, + "loss": 0.9036, + "step": 231 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019697258208337934, + "loss": 0.716, + "step": 232 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019693618526743197, + "loss": 0.8192, + "step": 233 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001968995743717171, + "loss": 0.9773, + "step": 234 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019686274947708848, + "loss": 0.8698, + "step": 235 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001968257106648724, + "loss": 0.9062, + "step": 236 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019678845801686764, + "loss": 0.8984, + "step": 237 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019675099161534521, + "loss": 0.8087, + "step": 238 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019671331154304822, + "loss": 0.8272, + "step": 239 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019667541788319162, + "loss": 0.784, + "step": 240 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019663731071946206, + "loss": 0.8777, + "step": 241 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019659899013601772, + "loss": 0.8534, + "step": 242 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019656045621748808, + "loss": 0.9645, + "step": 243 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019652170904897387, + "loss": 0.9692, + "step": 244 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019648274871604662, + "loss": 0.838, + "step": 245 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019644357530474872, + "loss": 0.7445, + "step": 246 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001964041889015931, + "loss": 0.9065, + "step": 247 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019636458959356316, + "loss": 0.7806, + "step": 248 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019632477746811232, + "loss": 0.7971, + "step": 249 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019628475261316417, + "loss": 0.8409, + "step": 250 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019624451511711198, + "loss": 0.7432, + "step": 251 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019620406506881875, + "loss": 0.9096, + "step": 252 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019616340255761676, + "loss": 0.8004, + "step": 253 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019612252767330763, + "loss": 0.7978, + "step": 254 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001960814405061619, + "loss": 0.9535, + "step": 255 + }, + { + "epoch": 0.59, + "learning_rate": 0.000196040141146919, + "loss": 0.9945, + "step": 256 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001959986296867869, + "loss": 0.9703, + "step": 257 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019595690621744208, + "loss": 0.9639, + "step": 258 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019591497083102914, + "loss": 0.9312, + "step": 259 + }, + { + "epoch": 0.6, + "learning_rate": 0.00019587282362016083, + "loss": 0.7709, + "step": 260 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001958304646779175, + "loss": 0.8547, + "step": 261 + }, + { + "epoch": 0.6, + "learning_rate": 0.00019578789409784727, + "loss": 0.8081, + "step": 262 + }, + { + "epoch": 0.6, + "learning_rate": 0.00019574511197396563, + "loss": 0.8476, + "step": 263 + }, + { + "epoch": 0.6, + "learning_rate": 0.00019570211840075517, + "loss": 0.9658, + "step": 264 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019565891347316552, + "loss": 0.7778, + "step": 265 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001956154972866131, + "loss": 0.9926, + "step": 266 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001955718699369808, + "loss": 0.957, + "step": 267 + }, + { + "epoch": 0.61, + "learning_rate": 0.000195528031520618, + "loss": 0.9396, + "step": 268 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019548398213434007, + "loss": 0.9049, + "step": 269 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019543972187542833, + "loss": 0.9683, + "step": 270 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019539525084162992, + "loss": 0.8555, + "step": 271 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019535056913115725, + "loss": 0.8489, + "step": 272 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001953056768426882, + "loss": 0.8728, + "step": 273 + }, + { + "epoch": 0.63, + "learning_rate": 0.00019526057407536564, + "loss": 0.9443, + "step": 274 + }, + { + "epoch": 0.63, + "learning_rate": 0.00019521526092879725, + "loss": 0.8161, + "step": 275 + }, + { + "epoch": 0.63, + "learning_rate": 0.00019516973750305532, + "loss": 0.8936, + "step": 276 + }, + { + "epoch": 0.63, + "learning_rate": 0.00019512400389867657, + "loss": 0.8315, + "step": 277 + }, + { + "epoch": 0.64, + "learning_rate": 0.00019507806021666188, + "loss": 0.9298, + "step": 278 + }, + { + "epoch": 0.64, + "learning_rate": 0.00019503190655847604, + "loss": 0.8235, + "step": 279 + }, + { + "epoch": 0.64, + "learning_rate": 0.00019498554302604766, + "loss": 0.9245, + "step": 280 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001949389697217687, + "loss": 0.8302, + "step": 281 + }, + { + "epoch": 0.65, + "learning_rate": 0.00019489218674849455, + "loss": 0.8488, + "step": 282 + }, + { + "epoch": 0.65, + "learning_rate": 0.00019484519420954354, + "loss": 0.8177, + "step": 283 + }, + { + "epoch": 0.65, + "learning_rate": 0.00019479799220869682, + "loss": 1.0039, + "step": 284 + }, + { + "epoch": 0.65, + "learning_rate": 0.00019475058085019825, + "loss": 0.7685, + "step": 285 + }, + { + "epoch": 0.66, + "learning_rate": 0.00019470296023875387, + "loss": 0.9174, + "step": 286 + }, + { + "epoch": 0.66, + "learning_rate": 0.000194655130479532, + "loss": 1.0997, + "step": 287 + }, + { + "epoch": 0.66, + "learning_rate": 0.00019460709167816274, + "loss": 0.9759, + "step": 288 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001945588439407379, + "loss": 0.9397, + "step": 289 + }, + { + "epoch": 0.66, + "learning_rate": 0.00019451038737381077, + "loss": 1.0367, + "step": 290 + }, + { + "epoch": 0.67, + "learning_rate": 0.00019446172208439574, + "loss": 0.8298, + "step": 291 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001944128481799682, + "loss": 0.9094, + "step": 292 + }, + { + "epoch": 0.67, + "learning_rate": 0.00019436376576846423, + "loss": 1.1234, + "step": 293 + }, + { + "epoch": 0.67, + "learning_rate": 0.00019431447495828045, + "loss": 0.9103, + "step": 294 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001942649758582737, + "loss": 0.7841, + "step": 295 + }, + { + "epoch": 0.68, + "learning_rate": 0.00019421526857776072, + "loss": 0.8817, + "step": 296 + }, + { + "epoch": 0.68, + "learning_rate": 0.00019416535322651818, + "loss": 1.0682, + "step": 297 + }, + { + "epoch": 0.68, + "learning_rate": 0.00019411522991478214, + "loss": 0.9201, + "step": 298 + }, + { + "epoch": 0.68, + "learning_rate": 0.000194064898753248, + "loss": 4.1834, + "step": 299 + }, + { + "epoch": 0.69, + "learning_rate": 0.00019401435985307012, + "loss": 1.0391, + "step": 300 + }, + { + "epoch": 0.69, + "learning_rate": 0.00019396361332586166, + "loss": 2.5015, + "step": 301 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001939126592836944, + "loss": 0.7927, + "step": 302 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001938614978390983, + "loss": 2.2345, + "step": 303 + }, + { + "epoch": 0.7, + "learning_rate": 0.00019381012910506146, + "loss": 0.9311, + "step": 304 + }, + { + "epoch": 0.7, + "learning_rate": 0.00019375855319502962, + "loss": 0.9713, + "step": 305 + }, + { + "epoch": 0.7, + "learning_rate": 0.00019370677022290624, + "loss": 0.8967, + "step": 306 + }, + { + "epoch": 0.7, + "learning_rate": 0.00019365478030305196, + "loss": 3.095, + "step": 307 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001936025835502845, + "loss": 1.1008, + "step": 308 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001935501800798783, + "loss": 1.5409, + "step": 309 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019349757000756444, + "loss": 1.02, + "step": 310 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019344475344953012, + "loss": 1.0101, + "step": 311 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001933917305224187, + "loss": 0.7686, + "step": 312 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001933385013433292, + "loss": 1.1061, + "step": 313 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001932850660298162, + "loss": 0.8083, + "step": 314 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001932314246998895, + "loss": 1.1942, + "step": 315 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019317757747201384, + "loss": 0.8551, + "step": 316 + }, + { + "epoch": 0.73, + "learning_rate": 0.00019312352446510878, + "loss": 0.9049, + "step": 317 + }, + { + "epoch": 0.73, + "learning_rate": 0.00019306926579854821, + "loss": 0.7072, + "step": 318 + }, + { + "epoch": 0.73, + "learning_rate": 0.00019301480159216028, + "loss": 0.8552, + "step": 319 + }, + { + "epoch": 0.73, + "learning_rate": 0.00019296013196622706, + "loss": 0.8414, + "step": 320 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001929052570414843, + "loss": 0.9198, + "step": 321 + }, + { + "epoch": 0.74, + "learning_rate": 0.00019285017693912107, + "loss": 2.1953, + "step": 322 + }, + { + "epoch": 0.74, + "learning_rate": 0.00019279489178077969, + "loss": 0.851, + "step": 323 + }, + { + "epoch": 0.74, + "learning_rate": 0.00019273940168855518, + "loss": 1.0239, + "step": 324 + }, + { + "epoch": 0.74, + "learning_rate": 0.00019268370678499533, + "loss": 1.5125, + "step": 325 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019262780719310008, + "loss": 0.9171, + "step": 326 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019257170303632148, + "loss": 0.9794, + "step": 327 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019251539443856344, + "loss": 0.9023, + "step": 328 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019245888152418124, + "loss": 1.058, + "step": 329 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019240216441798142, + "loss": 0.9411, + "step": 330 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001923452432452215, + "loss": 1.197, + "step": 331 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001922881181316097, + "loss": 0.9253, + "step": 332 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001922307892033046, + "loss": 1.156, + "step": 333 + }, + { + "epoch": 0.77, + "learning_rate": 0.00019217325658691482, + "loss": 0.9424, + "step": 334 + }, + { + "epoch": 0.77, + "learning_rate": 0.00019211552040949891, + "loss": 1.1147, + "step": 335 + }, + { + "epoch": 0.77, + "learning_rate": 0.00019205758079856498, + "loss": 0.8528, + "step": 336 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001919994378820704, + "loss": 0.8105, + "step": 337 + }, + { + "epoch": 0.77, + "learning_rate": 0.00019194109178842153, + "loss": 0.9279, + "step": 338 + }, + { + "epoch": 0.78, + "learning_rate": 0.00019188254264647337, + "loss": 0.9231, + "step": 339 + }, + { + "epoch": 0.78, + "learning_rate": 0.00019182379058552948, + "loss": 1.0425, + "step": 340 + }, + { + "epoch": 0.78, + "learning_rate": 0.00019176483573534142, + "loss": 0.8794, + "step": 341 + }, + { + "epoch": 0.78, + "learning_rate": 0.00019170567822610873, + "loss": 0.9873, + "step": 342 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001916463181884784, + "loss": 0.8146, + "step": 343 + }, + { + "epoch": 0.79, + "learning_rate": 0.00019158675575354478, + "loss": 1.027, + "step": 344 + }, + { + "epoch": 0.79, + "learning_rate": 0.00019152699105284913, + "loss": 0.8093, + "step": 345 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001914670242183795, + "loss": 0.951, + "step": 346 + }, + { + "epoch": 0.79, + "learning_rate": 0.00019140685538257028, + "loss": 0.9268, + "step": 347 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019134648467830198, + "loss": 1.0205, + "step": 348 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019128591223890092, + "loss": 0.9043, + "step": 349 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019122513819813902, + "loss": 0.7387, + "step": 350 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001911641626902333, + "loss": 0.9422, + "step": 351 + }, + { + "epoch": 0.81, + "learning_rate": 0.00019110298584984578, + "loss": 0.9015, + "step": 352 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001910416078120832, + "loss": 0.7522, + "step": 353 + }, + { + "epoch": 0.81, + "learning_rate": 0.00019098002871249646, + "loss": 0.9722, + "step": 354 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001909182486870806, + "loss": 0.8358, + "step": 355 + }, + { + "epoch": 0.82, + "learning_rate": 0.00019085626787227443, + "loss": 0.9859, + "step": 356 + }, + { + "epoch": 0.82, + "learning_rate": 0.00019079408640496013, + "loss": 0.7796, + "step": 357 + }, + { + "epoch": 0.82, + "learning_rate": 0.00019073170442246302, + "loss": 0.8617, + "step": 358 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001906691220625513, + "loss": 0.7727, + "step": 359 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001906063394634356, + "loss": 0.8786, + "step": 360 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001905433567637689, + "loss": 0.9117, + "step": 361 + }, + { + "epoch": 0.83, + "learning_rate": 0.000190480174102646, + "loss": 0.9182, + "step": 362 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001904167916196033, + "loss": 0.9706, + "step": 363 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001903532094546186, + "loss": 0.8036, + "step": 364 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001902894277481105, + "loss": 0.902, + "step": 365 + }, + { + "epoch": 0.84, + "learning_rate": 0.00019022544664093854, + "loss": 0.9231, + "step": 366 + }, + { + "epoch": 0.84, + "learning_rate": 0.00019016126627440237, + "loss": 0.9751, + "step": 367 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001900968867902419, + "loss": 0.8373, + "step": 368 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001900323083306367, + "loss": 0.8695, + "step": 369 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001899675310382057, + "loss": 0.8654, + "step": 370 + }, + { + "epoch": 0.85, + "learning_rate": 0.00018990255505600706, + "loss": 0.98, + "step": 371 + }, + { + "epoch": 0.85, + "learning_rate": 0.00018983738052753767, + "loss": 0.7454, + "step": 372 + }, + { + "epoch": 0.85, + "learning_rate": 0.00018977200759673295, + "loss": 0.829, + "step": 373 + }, + { + "epoch": 0.86, + "learning_rate": 0.00018970643640796642, + "loss": 0.8262, + "step": 374 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001896406671060495, + "loss": 1.0659, + "step": 375 + }, + { + "epoch": 0.86, + "learning_rate": 0.00018957469983623112, + "loss": 0.8551, + "step": 376 + }, + { + "epoch": 0.86, + "learning_rate": 0.00018950853474419742, + "loss": 0.7991, + "step": 377 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001894421719760714, + "loss": 0.8662, + "step": 378 + }, + { + "epoch": 0.87, + "learning_rate": 0.00018937561167841263, + "loss": 0.8817, + "step": 379 + }, + { + "epoch": 0.87, + "learning_rate": 0.00018930885399821693, + "loss": 1.0894, + "step": 380 + }, + { + "epoch": 0.87, + "learning_rate": 0.000189241899082916, + "loss": 0.8225, + "step": 381 + }, + { + "epoch": 0.88, + "learning_rate": 0.00018917474708037718, + "loss": 0.9065, + "step": 382 + }, + { + "epoch": 0.88, + "learning_rate": 0.00018910739813890302, + "loss": 0.8779, + "step": 383 + }, + { + "epoch": 0.88, + "learning_rate": 0.00018903985240723104, + "loss": 0.7909, + "step": 384 + }, + { + "epoch": 0.88, + "learning_rate": 0.00018897211003453328, + "loss": 0.7649, + "step": 385 + }, + { + "epoch": 0.88, + "learning_rate": 0.00018890417117041619, + "loss": 0.9788, + "step": 386 + }, + { + "epoch": 0.89, + "learning_rate": 0.00018883603596492004, + "loss": 0.938, + "step": 387 + }, + { + "epoch": 0.89, + "learning_rate": 0.00018876770456851877, + "loss": 0.9032, + "step": 388 + }, + { + "epoch": 0.89, + "learning_rate": 0.00018869917713211964, + "loss": 0.9059, + "step": 389 + }, + { + "epoch": 0.89, + "learning_rate": 0.00018863045380706274, + "loss": 0.8896, + "step": 390 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001885615347451209, + "loss": 0.7614, + "step": 391 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001884924200984991, + "loss": 0.978, + "step": 392 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001884231100198344, + "loss": 0.9406, + "step": 393 + }, + { + "epoch": 0.9, + "learning_rate": 0.00018835360466219533, + "loss": 0.7555, + "step": 394 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001882839041790818, + "loss": 0.9049, + "step": 395 + }, + { + "epoch": 0.91, + "learning_rate": 0.00018821400872442458, + "loss": 0.7041, + "step": 396 + }, + { + "epoch": 0.91, + "learning_rate": 0.00018814391845258505, + "loss": 0.8995, + "step": 397 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001880736335183548, + "loss": 0.7461, + "step": 398 + }, + { + "epoch": 0.91, + "learning_rate": 0.00018800315407695539, + "loss": 0.9954, + "step": 399 + }, + { + "epoch": 0.92, + "learning_rate": 0.00018793248028403788, + "loss": 0.9035, + "step": 400 + }, + { + "epoch": 0.92, + "learning_rate": 0.0001878616122956826, + "loss": 0.9083, + "step": 401 + }, + { + "epoch": 0.92, + "learning_rate": 0.00018779055026839868, + "loss": 0.7286, + "step": 402 + }, + { + "epoch": 0.92, + "learning_rate": 0.0001877192943591239, + "loss": 0.8001, + "step": 403 + }, + { + "epoch": 0.93, + "learning_rate": 0.00018764784472522403, + "loss": 0.8795, + "step": 404 + }, + { + "epoch": 0.93, + "learning_rate": 0.0001875762015244929, + "loss": 0.8912, + "step": 405 + }, + { + "epoch": 0.93, + "learning_rate": 0.00018750436491515163, + "loss": 0.8848, + "step": 406 + }, + { + "epoch": 0.93, + "learning_rate": 0.00018743233505584862, + "loss": 0.8512, + "step": 407 + }, + { + "epoch": 0.93, + "learning_rate": 0.00018736011210565898, + "loss": 0.8537, + "step": 408 + }, + { + "epoch": 0.94, + "learning_rate": 0.00018728769622408423, + "loss": 0.8777, + "step": 409 + }, + { + "epoch": 0.94, + "learning_rate": 0.00018721508757105202, + "loss": 0.7849, + "step": 410 + }, + { + "epoch": 0.94, + "learning_rate": 0.00018714228630691576, + "loss": 0.9669, + "step": 411 + }, + { + "epoch": 0.94, + "learning_rate": 0.0001870692925924541, + "loss": 0.9299, + "step": 412 + }, + { + "epoch": 0.95, + "learning_rate": 0.00018699610658887088, + "loss": 1.0188, + "step": 413 + }, + { + "epoch": 0.95, + "learning_rate": 0.00018692272845779448, + "loss": 0.8388, + "step": 414 + }, + { + "epoch": 0.95, + "learning_rate": 0.00018684915836127765, + "loss": 0.7904, + "step": 415 + }, + { + "epoch": 0.95, + "learning_rate": 0.00018677539646179707, + "loss": 0.9689, + "step": 416 + }, + { + "epoch": 0.96, + "learning_rate": 0.00018670144292225297, + "loss": 0.7339, + "step": 417 + }, + { + "epoch": 0.96, + "learning_rate": 0.00018662729790596888, + "loss": 0.7894, + "step": 418 + }, + { + "epoch": 0.96, + "learning_rate": 0.00018655296157669117, + "loss": 0.7163, + "step": 419 + }, + { + "epoch": 0.96, + "learning_rate": 0.00018647843409858869, + "loss": 0.8642, + "step": 420 + }, + { + "epoch": 0.96, + "learning_rate": 0.00018640371563625246, + "loss": 0.9281, + "step": 421 + }, + { + "epoch": 0.97, + "learning_rate": 0.00018632880635469526, + "loss": 0.834, + "step": 422 + }, + { + "epoch": 0.97, + "learning_rate": 0.00018625370641935129, + "loss": 0.7316, + "step": 423 + }, + { + "epoch": 0.97, + "learning_rate": 0.00018617841599607586, + "loss": 0.8504, + "step": 424 + }, + { + "epoch": 0.97, + "learning_rate": 0.00018610293525114492, + "loss": 0.8731, + "step": 425 + }, + { + "epoch": 0.98, + "learning_rate": 0.00018602726435125474, + "loss": 0.8803, + "step": 426 + }, + { + "epoch": 0.98, + "learning_rate": 0.0001859514034635215, + "loss": 0.8417, + "step": 427 + }, + { + "epoch": 0.98, + "learning_rate": 0.000185875352755481, + "loss": 0.8947, + "step": 428 + }, + { + "epoch": 0.98, + "learning_rate": 0.00018579911239508827, + "loss": 0.8368, + "step": 429 + }, + { + "epoch": 0.99, + "learning_rate": 0.00018572268255071718, + "loss": 0.8231, + "step": 430 + }, + { + "epoch": 0.99, + "learning_rate": 0.00018564606339116, + "loss": 0.8576, + "step": 431 + }, + { + "epoch": 0.99, + "learning_rate": 0.0001855692550856272, + "loss": 0.8753, + "step": 432 + }, + { + "epoch": 0.99, + "learning_rate": 0.00018549225780374685, + "loss": 0.7778, + "step": 433 + }, + { + "epoch": 0.99, + "learning_rate": 0.00018541507171556445, + "loss": 0.7516, + "step": 434 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001853376969915425, + "loss": 0.7466, + "step": 435 + }, + { + "epoch": 1.0, + "learning_rate": 0.00018526013380255999, + "loss": 0.917, + "step": 436 + }, + { + "epoch": 1.0, + "learning_rate": 0.00018518238231991218, + "loss": 0.9042, + "step": 437 + }, + { + "epoch": 1.0, + "learning_rate": 0.00018510444271531022, + "loss": 0.8587, + "step": 438 + }, + { + "epoch": 1.01, + "learning_rate": 0.00018502631516088066, + "loss": 0.9001, + "step": 439 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001849479998291651, + "loss": 0.7977, + "step": 440 + }, + { + "epoch": 1.01, + "learning_rate": 0.00018486949689311993, + "loss": 0.8711, + "step": 441 + }, + { + "epoch": 1.01, + "learning_rate": 0.00018479080652611583, + "loss": 0.7192, + "step": 442 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001847119289019373, + "loss": 0.9608, + "step": 443 + }, + { + "epoch": 1.02, + "learning_rate": 0.00018463286419478255, + "loss": 0.7097, + "step": 444 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001845536125792629, + "loss": 0.7354, + "step": 445 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001844741742304024, + "loss": 0.8711, + "step": 446 + }, + { + "epoch": 1.02, + "learning_rate": 0.00018439454932363755, + "loss": 0.8832, + "step": 447 + }, + { + "epoch": 1.03, + "learning_rate": 0.00018431473803481684, + "loss": 0.932, + "step": 448 + }, + { + "epoch": 1.03, + "learning_rate": 0.00018423474054020034, + "loss": 0.8394, + "step": 449 + }, + { + "epoch": 1.03, + "learning_rate": 0.00018415455701645942, + "loss": 0.7698, + "step": 450 + }, + { + "epoch": 1.03, + "learning_rate": 0.00018407418764067627, + "loss": 0.8856, + "step": 451 + }, + { + "epoch": 1.04, + "learning_rate": 0.00018399363259034347, + "loss": 0.8529, + "step": 452 + }, + { + "epoch": 1.04, + "learning_rate": 0.00018391289204336368, + "loss": 0.9898, + "step": 453 + }, + { + "epoch": 1.04, + "learning_rate": 0.00018383196617804926, + "loss": 0.8312, + "step": 454 + }, + { + "epoch": 1.04, + "learning_rate": 0.00018375085517312182, + "loss": 0.8234, + "step": 455 + }, + { + "epoch": 1.04, + "learning_rate": 0.00018366955920771184, + "loss": 0.7871, + "step": 456 + }, + { + "epoch": 1.05, + "learning_rate": 0.00018358807846135825, + "loss": 0.9814, + "step": 457 + }, + { + "epoch": 1.05, + "learning_rate": 0.00018350641311400812, + "loss": 0.8183, + "step": 458 + }, + { + "epoch": 1.05, + "learning_rate": 0.0001834245633460161, + "loss": 0.8961, + "step": 459 + }, + { + "epoch": 1.05, + "learning_rate": 0.00018334252933814427, + "loss": 0.9166, + "step": 460 + }, + { + "epoch": 1.06, + "learning_rate": 0.00018326031127156148, + "loss": 1.0031, + "step": 461 + }, + { + "epoch": 1.06, + "learning_rate": 0.00018317790932784317, + "loss": 0.8171, + "step": 462 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001830953236889707, + "loss": 0.83, + "step": 463 + }, + { + "epoch": 1.06, + "learning_rate": 0.00018301255453733134, + "loss": 0.8134, + "step": 464 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001829296020557174, + "loss": 0.8561, + "step": 465 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001828464664273263, + "loss": 0.8669, + "step": 466 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001827631478357597, + "loss": 1.003, + "step": 467 + }, + { + "epoch": 1.07, + "learning_rate": 0.00018267964646502357, + "loss": 0.8715, + "step": 468 + }, + { + "epoch": 1.07, + "learning_rate": 0.00018259596249952731, + "loss": 0.7434, + "step": 469 + }, + { + "epoch": 1.08, + "learning_rate": 0.00018251209612408373, + "loss": 0.9163, + "step": 470 + }, + { + "epoch": 1.08, + "learning_rate": 0.00018242804752390844, + "loss": 1.0639, + "step": 471 + }, + { + "epoch": 1.08, + "learning_rate": 0.00018234381688461942, + "loss": 0.8266, + "step": 472 + }, + { + "epoch": 1.08, + "learning_rate": 0.00018225940439223684, + "loss": 0.7582, + "step": 473 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001821748102331823, + "loss": 0.8547, + "step": 474 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001820900345942787, + "loss": 0.7908, + "step": 475 + }, + { + "epoch": 1.09, + "learning_rate": 0.00018200507766274977, + "loss": 0.6203, + "step": 476 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001819199396262195, + "loss": 0.806, + "step": 477 + }, + { + "epoch": 1.1, + "learning_rate": 0.0001818346206727119, + "loss": 0.8016, + "step": 478 + }, + { + "epoch": 1.1, + "learning_rate": 0.0001817491209906506, + "loss": 0.8548, + "step": 479 + }, + { + "epoch": 1.1, + "learning_rate": 0.00018166344076885827, + "loss": 0.9194, + "step": 480 + }, + { + "epoch": 1.1, + "learning_rate": 0.00018157758019655634, + "loss": 0.8704, + "step": 481 + }, + { + "epoch": 1.1, + "learning_rate": 0.00018149153946336446, + "loss": 0.8373, + "step": 482 + }, + { + "epoch": 1.11, + "learning_rate": 0.0001814053187593003, + "loss": 0.8229, + "step": 483 + }, + { + "epoch": 1.11, + "learning_rate": 0.00018131891827477884, + "loss": 0.8289, + "step": 484 + }, + { + "epoch": 1.11, + "learning_rate": 0.00018123233820061218, + "loss": 0.7753, + "step": 485 + }, + { + "epoch": 1.11, + "learning_rate": 0.00018114557872800905, + "loss": 1.029, + "step": 486 + }, + { + "epoch": 1.12, + "learning_rate": 0.0001810586400485743, + "loss": 0.6198, + "step": 487 + }, + { + "epoch": 1.12, + "learning_rate": 0.0001809715223543087, + "loss": 0.8418, + "step": 488 + }, + { + "epoch": 1.12, + "learning_rate": 0.00018088422583760813, + "loss": 0.7421, + "step": 489 + }, + { + "epoch": 1.12, + "learning_rate": 0.0001807967506912636, + "loss": 0.8032, + "step": 490 + }, + { + "epoch": 1.12, + "learning_rate": 0.00018070909710846052, + "loss": 0.7956, + "step": 491 + }, + { + "epoch": 1.13, + "learning_rate": 0.00018062126528277844, + "loss": 0.9013, + "step": 492 + }, + { + "epoch": 1.13, + "learning_rate": 0.00018053325540819045, + "loss": 0.9582, + "step": 493 + }, + { + "epoch": 1.13, + "learning_rate": 0.00018044506767906295, + "loss": 0.6845, + "step": 494 + }, + { + "epoch": 1.13, + "learning_rate": 0.00018035670229015507, + "loss": 0.8731, + "step": 495 + }, + { + "epoch": 1.14, + "learning_rate": 0.0001802681594366183, + "loss": 0.8369, + "step": 496 + }, + { + "epoch": 1.14, + "learning_rate": 0.00018017943931399603, + "loss": 0.6557, + "step": 497 + }, + { + "epoch": 1.14, + "learning_rate": 0.00018009054211822324, + "loss": 0.7997, + "step": 498 + }, + { + "epoch": 1.14, + "learning_rate": 0.0001800014680456259, + "loss": 0.8348, + "step": 499 + }, + { + "epoch": 1.15, + "learning_rate": 0.0001799122172929206, + "loss": 0.9043, + "step": 500 + }, + { + "epoch": 1.15, + "learning_rate": 0.00017982279005721407, + "loss": 0.8499, + "step": 501 + }, + { + "epoch": 1.15, + "learning_rate": 0.00017973318653600293, + "loss": 0.8595, + "step": 502 + }, + { + "epoch": 1.15, + "learning_rate": 0.00017964340692717303, + "loss": 0.9468, + "step": 503 + }, + { + "epoch": 1.15, + "learning_rate": 0.0001795534514289991, + "loss": 0.9848, + "step": 504 + }, + { + "epoch": 1.16, + "learning_rate": 0.00017946332024014434, + "loss": 0.7326, + "step": 505 + }, + { + "epoch": 1.16, + "learning_rate": 0.00017937301355965996, + "loss": 0.8479, + "step": 506 + }, + { + "epoch": 1.16, + "learning_rate": 0.00017928253158698473, + "loss": 0.8669, + "step": 507 + }, + { + "epoch": 1.16, + "learning_rate": 0.00017919187452194454, + "loss": 0.8163, + "step": 508 + }, + { + "epoch": 1.17, + "learning_rate": 0.00017910104256475194, + "loss": 0.926, + "step": 509 + }, + { + "epoch": 1.17, + "learning_rate": 0.00017901003591600575, + "loss": 0.7956, + "step": 510 + }, + { + "epoch": 1.17, + "learning_rate": 0.00017891885477669064, + "loss": 0.9002, + "step": 511 + }, + { + "epoch": 1.17, + "learning_rate": 0.00017882749934817652, + "loss": 0.787, + "step": 512 + }, + { + "epoch": 1.18, + "learning_rate": 0.00017873596983221832, + "loss": 0.7519, + "step": 513 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001786442664309554, + "loss": 0.8067, + "step": 514 + }, + { + "epoch": 1.18, + "learning_rate": 0.00017855238934691108, + "loss": 0.8824, + "step": 515 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001784603387829923, + "loss": 0.8014, + "step": 516 + }, + { + "epoch": 1.18, + "learning_rate": 0.00017836811494248919, + "loss": 0.6672, + "step": 517 + }, + { + "epoch": 1.19, + "learning_rate": 0.00017827571802907444, + "loss": 0.8516, + "step": 518 + }, + { + "epoch": 1.19, + "learning_rate": 0.000178183148246803, + "loss": 0.8476, + "step": 519 + }, + { + "epoch": 1.19, + "learning_rate": 0.00017809040580011164, + "loss": 0.8493, + "step": 520 + }, + { + "epoch": 1.19, + "learning_rate": 0.0001779974908938184, + "loss": 0.7288, + "step": 521 + }, + { + "epoch": 1.2, + "learning_rate": 0.00017790440373312223, + "loss": 0.7443, + "step": 522 + }, + { + "epoch": 1.2, + "learning_rate": 0.00017781114452360245, + "loss": 0.8767, + "step": 523 + }, + { + "epoch": 1.2, + "learning_rate": 0.00017771771347121842, + "loss": 0.8025, + "step": 524 + }, + { + "epoch": 1.2, + "learning_rate": 0.0001776241107823089, + "loss": 0.8842, + "step": 525 + }, + { + "epoch": 1.21, + "learning_rate": 0.00017753033666359177, + "loss": 0.9648, + "step": 526 + }, + { + "epoch": 1.21, + "learning_rate": 0.00017743639132216353, + "loss": 0.7872, + "step": 527 + }, + { + "epoch": 1.21, + "learning_rate": 0.0001773422749654988, + "loss": 0.9122, + "step": 528 + }, + { + "epoch": 1.21, + "learning_rate": 0.00017724798780144983, + "loss": 0.7688, + "step": 529 + }, + { + "epoch": 1.21, + "learning_rate": 0.0001771535300382461, + "loss": 0.8938, + "step": 530 + }, + { + "epoch": 1.22, + "learning_rate": 0.00017705890188449394, + "loss": 0.7152, + "step": 531 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001769641035491759, + "loss": 0.7077, + "step": 532 + }, + { + "epoch": 1.22, + "learning_rate": 0.00017686913524165036, + "loss": 0.8872, + "step": 533 + }, + { + "epoch": 1.22, + "learning_rate": 0.00017677399717165116, + "loss": 0.8775, + "step": 534 + }, + { + "epoch": 1.23, + "learning_rate": 0.00017667868954928694, + "loss": 0.8508, + "step": 535 + }, + { + "epoch": 1.23, + "learning_rate": 0.00017658321258504092, + "loss": 0.8589, + "step": 536 + }, + { + "epoch": 1.23, + "learning_rate": 0.00017648756648977018, + "loss": 0.6499, + "step": 537 + }, + { + "epoch": 1.23, + "learning_rate": 0.00017639175147470538, + "loss": 0.8927, + "step": 538 + }, + { + "epoch": 1.23, + "learning_rate": 0.00017629576775145026, + "loss": 0.8702, + "step": 539 + }, + { + "epoch": 1.24, + "learning_rate": 0.00017619961553198108, + "loss": 0.7958, + "step": 540 + }, + { + "epoch": 1.24, + "learning_rate": 0.00017610329502864625, + "loss": 0.8582, + "step": 541 + }, + { + "epoch": 1.24, + "learning_rate": 0.00017600680645416583, + "loss": 0.7905, + "step": 542 + }, + { + "epoch": 1.24, + "learning_rate": 0.0001759101500216311, + "loss": 0.7574, + "step": 543 + }, + { + "epoch": 1.25, + "learning_rate": 0.00017581332594450392, + "loss": 0.861, + "step": 544 + }, + { + "epoch": 1.25, + "learning_rate": 0.00017571633443661658, + "loss": 0.7682, + "step": 545 + }, + { + "epoch": 1.25, + "learning_rate": 0.00017561917571217093, + "loss": 0.7547, + "step": 546 + }, + { + "epoch": 1.25, + "learning_rate": 0.00017552184998573825, + "loss": 0.7852, + "step": 547 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001754243574722586, + "loss": 0.7635, + "step": 548 + }, + { + "epoch": 1.26, + "learning_rate": 0.00017532669838704035, + "loss": 0.8714, + "step": 549 + }, + { + "epoch": 1.26, + "learning_rate": 0.00017522887294575977, + "loss": 0.7839, + "step": 550 + }, + { + "epoch": 1.26, + "learning_rate": 0.00017513088136446054, + "loss": 0.8551, + "step": 551 + }, + { + "epoch": 1.26, + "learning_rate": 0.00017503272385955318, + "loss": 0.7367, + "step": 552 + }, + { + "epoch": 1.27, + "learning_rate": 0.00017493440064781475, + "loss": 0.9257, + "step": 553 + }, + { + "epoch": 1.27, + "learning_rate": 0.00017483591194638817, + "loss": 0.8246, + "step": 554 + }, + { + "epoch": 1.27, + "learning_rate": 0.00017473725797278192, + "loss": 0.8319, + "step": 555 + }, + { + "epoch": 1.27, + "learning_rate": 0.00017463843894486937, + "loss": 0.8304, + "step": 556 + }, + { + "epoch": 1.28, + "learning_rate": 0.00017453945508088853, + "loss": 0.6536, + "step": 557 + }, + { + "epoch": 1.28, + "learning_rate": 0.00017444030659944138, + "loss": 0.7606, + "step": 558 + }, + { + "epoch": 1.28, + "learning_rate": 0.00017434099371949345, + "loss": 0.7084, + "step": 559 + }, + { + "epoch": 1.28, + "learning_rate": 0.00017424151666037329, + "loss": 0.8891, + "step": 560 + }, + { + "epoch": 1.29, + "learning_rate": 0.00017414187564177217, + "loss": 0.6199, + "step": 561 + }, + { + "epoch": 1.29, + "learning_rate": 0.00017404207088374333, + "loss": 0.8676, + "step": 562 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001739421026067017, + "loss": 0.8477, + "step": 563 + }, + { + "epoch": 1.29, + "learning_rate": 0.00017384197103142328, + "loss": 0.9234, + "step": 564 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001737416763790447, + "loss": 0.9103, + "step": 565 + }, + { + "epoch": 1.3, + "learning_rate": 0.00017364121887106286, + "loss": 0.7859, + "step": 566 + }, + { + "epoch": 1.3, + "learning_rate": 0.00017354059872933415, + "loss": 0.8623, + "step": 567 + }, + { + "epoch": 1.3, + "learning_rate": 0.00017343981617607424, + "loss": 0.6266, + "step": 568 + }, + { + "epoch": 1.3, + "learning_rate": 0.00017333887143385743, + "loss": 0.8105, + "step": 569 + }, + { + "epoch": 1.31, + "learning_rate": 0.00017323776472561627, + "loss": 0.7752, + "step": 570 + }, + { + "epoch": 1.31, + "learning_rate": 0.0001731364962746409, + "loss": 0.7873, + "step": 571 + }, + { + "epoch": 1.31, + "learning_rate": 0.0001730350663045788, + "loss": 0.8425, + "step": 572 + }, + { + "epoch": 1.31, + "learning_rate": 0.00017293347503943406, + "loss": 0.777, + "step": 573 + }, + { + "epoch": 1.32, + "learning_rate": 0.000172831722703567, + "loss": 0.7348, + "step": 574 + }, + { + "epoch": 1.32, + "learning_rate": 0.00017272980952169365, + "loss": 0.7797, + "step": 575 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001726277357188853, + "loss": 0.8328, + "step": 576 + }, + { + "epoch": 1.32, + "learning_rate": 0.00017252550152056795, + "loss": 0.7109, + "step": 577 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001724231071525218, + "loss": 0.7905, + "step": 578 + }, + { + "epoch": 1.33, + "learning_rate": 0.00017232055284088085, + "loss": 0.7541, + "step": 579 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001722178388121322, + "loss": 0.8954, + "step": 580 + }, + { + "epoch": 1.33, + "learning_rate": 0.00017211496529311582, + "loss": 0.8362, + "step": 581 + }, + { + "epoch": 1.33, + "learning_rate": 0.00017201193251102382, + "loss": 0.8436, + "step": 582 + }, + { + "epoch": 1.34, + "learning_rate": 0.00017190874069340014, + "loss": 0.7594, + "step": 583 + }, + { + "epoch": 1.34, + "learning_rate": 0.0001718053900681397, + "loss": 0.9342, + "step": 584 + }, + { + "epoch": 1.34, + "learning_rate": 0.00017170188086348848, + "loss": 0.8934, + "step": 585 + }, + { + "epoch": 1.34, + "learning_rate": 0.00017159821330804236, + "loss": 0.831, + "step": 586 + }, + { + "epoch": 1.34, + "learning_rate": 0.0001714943876307472, + "loss": 0.8053, + "step": 587 + }, + { + "epoch": 1.35, + "learning_rate": 0.00017139040406089786, + "loss": 0.81, + "step": 588 + }, + { + "epoch": 1.35, + "learning_rate": 0.000171286262828138, + "loss": 0.8245, + "step": 589 + }, + { + "epoch": 1.35, + "learning_rate": 0.00017118196416245947, + "loss": 0.8232, + "step": 590 + }, + { + "epoch": 1.35, + "learning_rate": 0.00017107750829420176, + "loss": 0.8244, + "step": 591 + }, + { + "epoch": 1.36, + "learning_rate": 0.0001709728954540516, + "loss": 0.7863, + "step": 592 + }, + { + "epoch": 1.36, + "learning_rate": 0.00017086812587304234, + "loss": 0.8274, + "step": 593 + }, + { + "epoch": 1.36, + "learning_rate": 0.00017076319978255345, + "loss": 0.6595, + "step": 594 + }, + { + "epoch": 1.36, + "learning_rate": 0.0001706581174143101, + "loss": 0.8582, + "step": 595 + }, + { + "epoch": 1.37, + "learning_rate": 0.00017055287900038263, + "loss": 0.6873, + "step": 596 + }, + { + "epoch": 1.37, + "learning_rate": 0.00017044748477318593, + "loss": 0.8673, + "step": 597 + }, + { + "epoch": 1.37, + "learning_rate": 0.00017034193496547902, + "loss": 0.8055, + "step": 598 + }, + { + "epoch": 1.37, + "learning_rate": 0.00017023622981036455, + "loss": 0.8232, + "step": 599 + }, + { + "epoch": 1.37, + "learning_rate": 0.0001701303695412881, + "loss": 0.8745, + "step": 600 + }, + { + "epoch": 1.38, + "learning_rate": 0.00017002435439203808, + "loss": 0.8034, + "step": 601 + }, + { + "epoch": 1.38, + "learning_rate": 0.00016991818459674468, + "loss": 0.9006, + "step": 602 + }, + { + "epoch": 1.38, + "learning_rate": 0.0001698118603898798, + "loss": 0.7828, + "step": 603 + }, + { + "epoch": 1.38, + "learning_rate": 0.00016970538200625622, + "loss": 0.8413, + "step": 604 + }, + { + "epoch": 1.39, + "learning_rate": 0.00016959874968102735, + "loss": 0.8669, + "step": 605 + }, + { + "epoch": 1.39, + "learning_rate": 0.00016949196364968646, + "loss": 0.9277, + "step": 606 + }, + { + "epoch": 1.39, + "learning_rate": 0.00016938502414806634, + "loss": 0.9256, + "step": 607 + }, + { + "epoch": 1.39, + "learning_rate": 0.00016927793141233868, + "loss": 0.8613, + "step": 608 + }, + { + "epoch": 1.4, + "learning_rate": 0.00016917068567901358, + "loss": 0.9439, + "step": 609 + }, + { + "epoch": 1.4, + "learning_rate": 0.00016906328718493906, + "loss": 0.8606, + "step": 610 + }, + { + "epoch": 1.4, + "learning_rate": 0.00016895573616730044, + "loss": 0.7483, + "step": 611 + }, + { + "epoch": 1.4, + "learning_rate": 0.00016884803286362, + "loss": 0.8359, + "step": 612 + }, + { + "epoch": 1.4, + "learning_rate": 0.0001687401775117562, + "loss": 0.7764, + "step": 613 + }, + { + "epoch": 1.41, + "learning_rate": 0.00016863217034990342, + "loss": 0.9857, + "step": 614 + }, + { + "epoch": 1.41, + "learning_rate": 0.0001685240116165912, + "loss": 0.8706, + "step": 615 + }, + { + "epoch": 1.41, + "learning_rate": 0.0001684157015506839, + "loss": 0.867, + "step": 616 + }, + { + "epoch": 1.41, + "learning_rate": 0.00016830724039138003, + "loss": 0.7974, + "step": 617 + }, + { + "epoch": 1.42, + "learning_rate": 0.00016819862837821181, + "loss": 0.7835, + "step": 618 + }, + { + "epoch": 1.42, + "learning_rate": 0.00016808986575104465, + "loss": 0.7987, + "step": 619 + }, + { + "epoch": 1.42, + "learning_rate": 0.0001679809527500765, + "loss": 0.7383, + "step": 620 + }, + { + "epoch": 1.42, + "learning_rate": 0.0001678718896158375, + "loss": 0.9224, + "step": 621 + }, + { + "epoch": 1.42, + "learning_rate": 0.00016776267658918928, + "loss": 0.8959, + "step": 622 + }, + { + "epoch": 1.43, + "learning_rate": 0.00016765331391132456, + "loss": 0.6702, + "step": 623 + }, + { + "epoch": 1.43, + "learning_rate": 0.0001675438018237665, + "loss": 0.6911, + "step": 624 + }, + { + "epoch": 1.43, + "learning_rate": 0.00016743414056836825, + "loss": 0.9364, + "step": 625 + }, + { + "epoch": 1.43, + "learning_rate": 0.00016732433038731242, + "loss": 0.7902, + "step": 626 + }, + { + "epoch": 1.44, + "learning_rate": 0.00016721437152311054, + "loss": 0.8473, + "step": 627 + }, + { + "epoch": 1.44, + "learning_rate": 0.00016710426421860235, + "loss": 0.8765, + "step": 628 + }, + { + "epoch": 1.44, + "learning_rate": 0.00016699400871695555, + "loss": 0.7705, + "step": 629 + }, + { + "epoch": 1.44, + "learning_rate": 0.00016688360526166514, + "loss": 0.8653, + "step": 630 + }, + { + "epoch": 1.45, + "learning_rate": 0.0001667730540965528, + "loss": 0.9137, + "step": 631 + }, + { + "epoch": 1.45, + "learning_rate": 0.00016666235546576648, + "loss": 0.9772, + "step": 632 + }, + { + "epoch": 1.45, + "learning_rate": 0.0001665515096137797, + "loss": 0.6433, + "step": 633 + }, + { + "epoch": 1.45, + "learning_rate": 0.0001664405167853912, + "loss": 0.8096, + "step": 634 + }, + { + "epoch": 1.45, + "learning_rate": 0.00016632937722572434, + "loss": 0.7298, + "step": 635 + }, + { + "epoch": 1.46, + "learning_rate": 0.00016621809118022647, + "loss": 0.6841, + "step": 636 + }, + { + "epoch": 1.46, + "learning_rate": 0.00016610665889466838, + "loss": 0.9471, + "step": 637 + }, + { + "epoch": 1.46, + "learning_rate": 0.00016599508061514404, + "loss": 0.8396, + "step": 638 + }, + { + "epoch": 1.46, + "learning_rate": 0.00016588335658806962, + "loss": 0.8769, + "step": 639 + }, + { + "epoch": 1.47, + "learning_rate": 0.00016577148706018328, + "loss": 0.8328, + "step": 640 + }, + { + "epoch": 1.47, + "learning_rate": 0.0001656594722785445, + "loss": 0.8932, + "step": 641 + }, + { + "epoch": 1.47, + "learning_rate": 0.0001655473124905335, + "loss": 0.8203, + "step": 642 + }, + { + "epoch": 1.47, + "learning_rate": 0.00016543500794385084, + "loss": 0.8514, + "step": 643 + }, + { + "epoch": 1.48, + "learning_rate": 0.00016532255888651666, + "loss": 0.7396, + "step": 644 + }, + { + "epoch": 1.48, + "learning_rate": 0.00016520996556687028, + "loss": 0.9178, + "step": 645 + }, + { + "epoch": 1.48, + "learning_rate": 0.0001650972282335697, + "loss": 0.6308, + "step": 646 + }, + { + "epoch": 1.48, + "learning_rate": 0.00016498434713559088, + "loss": 0.9018, + "step": 647 + }, + { + "epoch": 1.48, + "learning_rate": 0.00016487132252222727, + "loss": 0.8658, + "step": 648 + }, + { + "epoch": 1.49, + "learning_rate": 0.00016475815464308933, + "loss": 0.8228, + "step": 649 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001646448437481039, + "loss": 0.8944, + "step": 650 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001645313900875136, + "loss": 0.8617, + "step": 651 + }, + { + "epoch": 1.49, + "learning_rate": 0.00016441779391187646, + "loss": 0.9726, + "step": 652 + }, + { + "epoch": 1.5, + "learning_rate": 0.00016430405547206516, + "loss": 0.693, + "step": 653 + }, + { + "epoch": 1.5, + "learning_rate": 0.00016419017501926656, + "loss": 0.8272, + "step": 654 + }, + { + "epoch": 1.5, + "learning_rate": 0.00016407615280498124, + "loss": 0.8523, + "step": 655 + }, + { + "epoch": 1.5, + "learning_rate": 0.00016396198908102272, + "loss": 0.7444, + "step": 656 + }, + { + "epoch": 1.51, + "learning_rate": 0.00016384768409951714, + "loss": 0.8366, + "step": 657 + }, + { + "epoch": 1.51, + "learning_rate": 0.0001637332381129026, + "loss": 0.7441, + "step": 658 + }, + { + "epoch": 1.51, + "learning_rate": 0.00016361865137392854, + "loss": 0.6694, + "step": 659 + }, + { + "epoch": 1.51, + "learning_rate": 0.0001635039241356553, + "loss": 0.8103, + "step": 660 + }, + { + "epoch": 1.51, + "learning_rate": 0.0001633890566514535, + "loss": 0.9135, + "step": 661 + }, + { + "epoch": 1.52, + "learning_rate": 0.00016327404917500346, + "loss": 0.7327, + "step": 662 + }, + { + "epoch": 1.52, + "learning_rate": 0.00016315890196029467, + "loss": 0.8425, + "step": 663 + }, + { + "epoch": 1.52, + "learning_rate": 0.00016304361526162534, + "loss": 0.8812, + "step": 664 + }, + { + "epoch": 1.52, + "learning_rate": 0.00016292818933360151, + "loss": 0.777, + "step": 665 + }, + { + "epoch": 1.53, + "learning_rate": 0.0001628126244311369, + "loss": 0.8864, + "step": 666 + }, + { + "epoch": 1.53, + "learning_rate": 0.00016269692080945198, + "loss": 0.9333, + "step": 667 + }, + { + "epoch": 1.53, + "learning_rate": 0.00016258107872407375, + "loss": 0.906, + "step": 668 + }, + { + "epoch": 1.53, + "learning_rate": 0.00016246509843083492, + "loss": 0.7346, + "step": 669 + }, + { + "epoch": 1.53, + "learning_rate": 0.00016234898018587337, + "loss": 0.8555, + "step": 670 + }, + { + "epoch": 1.54, + "learning_rate": 0.00016223272424563173, + "loss": 0.8449, + "step": 671 + }, + { + "epoch": 1.54, + "learning_rate": 0.00016211633086685664, + "loss": 0.8559, + "step": 672 + }, + { + "epoch": 1.54, + "learning_rate": 0.00016199980030659838, + "loss": 0.7468, + "step": 673 + }, + { + "epoch": 1.54, + "learning_rate": 0.00016188313282221008, + "loss": 0.7986, + "step": 674 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001617663286713474, + "loss": 0.7757, + "step": 675 + }, + { + "epoch": 1.55, + "learning_rate": 0.00016164938811196757, + "loss": 0.8789, + "step": 676 + }, + { + "epoch": 1.55, + "learning_rate": 0.00016153231140232936, + "loss": 0.5499, + "step": 677 + }, + { + "epoch": 1.55, + "learning_rate": 0.00016141509880099206, + "loss": 0.9319, + "step": 678 + }, + { + "epoch": 1.56, + "learning_rate": 0.00016129775056681513, + "loss": 0.6904, + "step": 679 + }, + { + "epoch": 1.56, + "learning_rate": 0.0001611802669589575, + "loss": 0.8506, + "step": 680 + }, + { + "epoch": 1.56, + "learning_rate": 0.00016106264823687716, + "loss": 0.7242, + "step": 681 + }, + { + "epoch": 1.56, + "learning_rate": 0.00016094489466033043, + "loss": 0.6808, + "step": 682 + }, + { + "epoch": 1.56, + "learning_rate": 0.00016082700648937146, + "loss": 0.8017, + "step": 683 + }, + { + "epoch": 1.57, + "learning_rate": 0.00016070898398435167, + "loss": 0.9109, + "step": 684 + }, + { + "epoch": 1.57, + "learning_rate": 0.00016059082740591915, + "loss": 0.7277, + "step": 685 + }, + { + "epoch": 1.57, + "learning_rate": 0.00016047253701501808, + "loss": 0.8601, + "step": 686 + }, + { + "epoch": 1.57, + "learning_rate": 0.00016035411307288813, + "loss": 0.9118, + "step": 687 + }, + { + "epoch": 1.58, + "learning_rate": 0.0001602355558410639, + "loss": 0.8049, + "step": 688 + }, + { + "epoch": 1.58, + "learning_rate": 0.00016011686558137448, + "loss": 0.8174, + "step": 689 + }, + { + "epoch": 1.58, + "learning_rate": 0.00015999804255594258, + "loss": 0.8481, + "step": 690 + }, + { + "epoch": 1.58, + "learning_rate": 0.0001598790870271843, + "loss": 0.7052, + "step": 691 + }, + { + "epoch": 1.59, + "learning_rate": 0.00015975999925780813, + "loss": 0.8208, + "step": 692 + }, + { + "epoch": 1.59, + "learning_rate": 0.00015964077951081485, + "loss": 0.7257, + "step": 693 + }, + { + "epoch": 1.59, + "learning_rate": 0.00015952142804949652, + "loss": 0.858, + "step": 694 + }, + { + "epoch": 1.59, + "learning_rate": 0.00015940194513743624, + "loss": 0.9242, + "step": 695 + }, + { + "epoch": 1.59, + "learning_rate": 0.0001592823310385073, + "loss": 0.7924, + "step": 696 + }, + { + "epoch": 1.6, + "learning_rate": 0.00015916258601687274, + "loss": 0.8788, + "step": 697 + }, + { + "epoch": 1.6, + "learning_rate": 0.0001590427103369848, + "loss": 0.7946, + "step": 698 + }, + { + "epoch": 1.6, + "learning_rate": 0.00015892270426358414, + "loss": 0.8318, + "step": 699 + }, + { + "epoch": 1.6, + "learning_rate": 0.00015880256806169953, + "loss": 0.8983, + "step": 700 + }, + { + "epoch": 1.61, + "learning_rate": 0.00015868230199664711, + "loss": 0.8889, + "step": 701 + }, + { + "epoch": 1.61, + "learning_rate": 0.00015856190633402968, + "loss": 0.9692, + "step": 702 + }, + { + "epoch": 1.61, + "learning_rate": 0.0001584413813397364, + "loss": 0.7787, + "step": 703 + }, + { + "epoch": 1.61, + "learning_rate": 0.00015832072727994193, + "loss": 0.6455, + "step": 704 + }, + { + "epoch": 1.62, + "learning_rate": 0.00015819994442110616, + "loss": 1.0006, + "step": 705 + }, + { + "epoch": 1.62, + "learning_rate": 0.00015807903302997317, + "loss": 0.7384, + "step": 706 + }, + { + "epoch": 1.62, + "learning_rate": 0.00015795799337357114, + "loss": 0.8517, + "step": 707 + }, + { + "epoch": 1.62, + "learning_rate": 0.00015783682571921133, + "loss": 0.8446, + "step": 708 + }, + { + "epoch": 1.62, + "learning_rate": 0.00015771553033448775, + "loss": 0.8227, + "step": 709 + }, + { + "epoch": 1.63, + "learning_rate": 0.00015759410748727662, + "loss": 0.8374, + "step": 710 + }, + { + "epoch": 1.63, + "learning_rate": 0.0001574725574457354, + "loss": 0.7274, + "step": 711 + }, + { + "epoch": 1.63, + "learning_rate": 0.00015735088047830268, + "loss": 0.8728, + "step": 712 + }, + { + "epoch": 1.63, + "learning_rate": 0.00015722907685369723, + "loss": 1.0569, + "step": 713 + }, + { + "epoch": 1.64, + "learning_rate": 0.00015710714684091762, + "loss": 0.9775, + "step": 714 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001569850907092415, + "loss": 0.6832, + "step": 715 + }, + { + "epoch": 1.64, + "learning_rate": 0.00015686290872822504, + "loss": 0.7358, + "step": 716 + }, + { + "epoch": 1.64, + "learning_rate": 0.00015674060116770236, + "loss": 0.9015, + "step": 717 + }, + { + "epoch": 1.64, + "learning_rate": 0.00015661816829778494, + "loss": 0.8516, + "step": 718 + }, + { + "epoch": 1.65, + "learning_rate": 0.00015649561038886094, + "loss": 0.8911, + "step": 719 + }, + { + "epoch": 1.65, + "learning_rate": 0.00015637292771159472, + "loss": 0.7098, + "step": 720 + }, + { + "epoch": 1.65, + "learning_rate": 0.00015625012053692615, + "loss": 0.955, + "step": 721 + }, + { + "epoch": 1.65, + "learning_rate": 0.0001561271891360701, + "loss": 0.6421, + "step": 722 + }, + { + "epoch": 1.66, + "learning_rate": 0.0001560041337805157, + "loss": 0.8807, + "step": 723 + }, + { + "epoch": 1.66, + "learning_rate": 0.00015588095474202595, + "loss": 0.722, + "step": 724 + }, + { + "epoch": 1.66, + "learning_rate": 0.00015575765229263686, + "loss": 0.8055, + "step": 725 + }, + { + "epoch": 1.66, + "learning_rate": 0.00015563422670465712, + "loss": 0.7822, + "step": 726 + }, + { + "epoch": 1.67, + "learning_rate": 0.00015551067825066728, + "loss": 0.8311, + "step": 727 + }, + { + "epoch": 1.67, + "learning_rate": 0.00015538700720351924, + "loss": 0.8519, + "step": 728 + }, + { + "epoch": 1.67, + "learning_rate": 0.00015526321383633568, + "loss": 0.7506, + "step": 729 + }, + { + "epoch": 1.67, + "learning_rate": 0.0001551392984225094, + "loss": 0.8056, + "step": 730 + }, + { + "epoch": 1.67, + "learning_rate": 0.00015501526123570277, + "loss": 0.6968, + "step": 731 + }, + { + "epoch": 1.68, + "learning_rate": 0.000154891102549847, + "loss": 0.829, + "step": 732 + }, + { + "epoch": 1.68, + "learning_rate": 0.0001547668226391417, + "loss": 0.6682, + "step": 733 + }, + { + "epoch": 1.68, + "learning_rate": 0.00015464242177805422, + "loss": 0.8295, + "step": 734 + }, + { + "epoch": 1.68, + "learning_rate": 0.00015451790024131895, + "loss": 0.6911, + "step": 735 + }, + { + "epoch": 1.69, + "learning_rate": 0.00015439325830393687, + "loss": 0.6785, + "step": 736 + }, + { + "epoch": 1.69, + "learning_rate": 0.00015426849624117472, + "loss": 0.81, + "step": 737 + }, + { + "epoch": 1.69, + "learning_rate": 0.00015414361432856475, + "loss": 0.9955, + "step": 738 + }, + { + "epoch": 1.69, + "learning_rate": 0.00015401861284190368, + "loss": 0.8433, + "step": 739 + }, + { + "epoch": 1.7, + "learning_rate": 0.00015389349205725242, + "loss": 0.618, + "step": 740 + }, + { + "epoch": 1.7, + "learning_rate": 0.00015376825225093537, + "loss": 0.7747, + "step": 741 + }, + { + "epoch": 1.7, + "learning_rate": 0.00015364289369953967, + "loss": 0.7673, + "step": 742 + }, + { + "epoch": 1.7, + "learning_rate": 0.0001535174166799148, + "loss": 0.8066, + "step": 743 + }, + { + "epoch": 1.7, + "learning_rate": 0.00015339182146917183, + "loss": 0.8392, + "step": 744 + }, + { + "epoch": 1.71, + "learning_rate": 0.0001532661083446829, + "loss": 0.7949, + "step": 745 + }, + { + "epoch": 1.71, + "learning_rate": 0.00015314027758408044, + "loss": 0.8698, + "step": 746 + }, + { + "epoch": 1.71, + "learning_rate": 0.00015301432946525684, + "loss": 0.7715, + "step": 747 + }, + { + "epoch": 1.71, + "learning_rate": 0.00015288826426636354, + "loss": 0.7583, + "step": 748 + }, + { + "epoch": 1.72, + "learning_rate": 0.00015276208226581064, + "loss": 0.8544, + "step": 749 + }, + { + "epoch": 1.72, + "learning_rate": 0.00015263578374226605, + "loss": 0.8272, + "step": 750 + }, + { + "epoch": 1.72, + "learning_rate": 0.0001525093689746552, + "loss": 0.857, + "step": 751 + }, + { + "epoch": 1.72, + "learning_rate": 0.00015238283824216015, + "loss": 0.9208, + "step": 752 + }, + { + "epoch": 1.73, + "learning_rate": 0.000152256191824219, + "loss": 0.8626, + "step": 753 + }, + { + "epoch": 1.73, + "learning_rate": 0.00015212943000052545, + "loss": 0.9418, + "step": 754 + }, + { + "epoch": 1.73, + "learning_rate": 0.00015200255305102803, + "loss": 0.8087, + "step": 755 + }, + { + "epoch": 1.73, + "learning_rate": 0.00015187556125592945, + "loss": 0.7913, + "step": 756 + }, + { + "epoch": 1.73, + "learning_rate": 0.00015174845489568622, + "loss": 0.8973, + "step": 757 + }, + { + "epoch": 1.74, + "learning_rate": 0.00015162123425100762, + "loss": 0.701, + "step": 758 + }, + { + "epoch": 1.74, + "learning_rate": 0.00015149389960285558, + "loss": 0.898, + "step": 759 + }, + { + "epoch": 1.74, + "learning_rate": 0.00015136645123244366, + "loss": 0.8809, + "step": 760 + }, + { + "epoch": 1.74, + "learning_rate": 0.00015123888942123652, + "loss": 0.7334, + "step": 761 + }, + { + "epoch": 1.75, + "learning_rate": 0.0001511112144509495, + "loss": 0.8506, + "step": 762 + }, + { + "epoch": 1.75, + "learning_rate": 0.00015098342660354775, + "loss": 0.8469, + "step": 763 + }, + { + "epoch": 1.75, + "learning_rate": 0.0001508555261612457, + "loss": 1.0353, + "step": 764 + }, + { + "epoch": 1.75, + "learning_rate": 0.0001507275134065065, + "loss": 0.6269, + "step": 765 + }, + { + "epoch": 1.75, + "learning_rate": 0.00015059938862204127, + "loss": 0.7825, + "step": 766 + }, + { + "epoch": 1.76, + "learning_rate": 0.0001504711520908086, + "loss": 0.8388, + "step": 767 + }, + { + "epoch": 1.76, + "learning_rate": 0.00015034280409601385, + "loss": 0.7383, + "step": 768 + }, + { + "epoch": 1.76, + "learning_rate": 0.00015021434492110852, + "loss": 0.8029, + "step": 769 + }, + { + "epoch": 1.76, + "learning_rate": 0.00015008577484978966, + "loss": 0.6527, + "step": 770 + }, + { + "epoch": 1.77, + "learning_rate": 0.00014995709416599926, + "loss": 0.9434, + "step": 771 + }, + { + "epoch": 1.77, + "learning_rate": 0.00014982830315392358, + "loss": 0.753, + "step": 772 + }, + { + "epoch": 1.77, + "learning_rate": 0.00014969940209799248, + "loss": 0.8143, + "step": 773 + }, + { + "epoch": 1.77, + "learning_rate": 0.00014957039128287892, + "loss": 0.8939, + "step": 774 + }, + { + "epoch": 1.78, + "learning_rate": 0.0001494412709934982, + "loss": 0.9265, + "step": 775 + }, + { + "epoch": 1.78, + "learning_rate": 0.00014931204151500747, + "loss": 0.8261, + "step": 776 + }, + { + "epoch": 1.78, + "learning_rate": 0.00014918270313280495, + "loss": 0.8555, + "step": 777 + }, + { + "epoch": 1.78, + "learning_rate": 0.00014905325613252937, + "loss": 0.8191, + "step": 778 + }, + { + "epoch": 1.78, + "learning_rate": 0.00014892370080005936, + "loss": 0.9159, + "step": 779 + }, + { + "epoch": 1.79, + "learning_rate": 0.00014879403742151283, + "loss": 0.7936, + "step": 780 + }, + { + "epoch": 1.79, + "learning_rate": 0.00014866426628324625, + "loss": 0.8782, + "step": 781 + }, + { + "epoch": 1.79, + "learning_rate": 0.00014853438767185412, + "loss": 0.6078, + "step": 782 + }, + { + "epoch": 1.79, + "learning_rate": 0.0001484044018741682, + "loss": 0.7182, + "step": 783 + }, + { + "epoch": 1.8, + "learning_rate": 0.00014827430917725712, + "loss": 0.7528, + "step": 784 + }, + { + "epoch": 1.8, + "learning_rate": 0.00014814410986842543, + "loss": 0.902, + "step": 785 + }, + { + "epoch": 1.8, + "learning_rate": 0.00014801380423521324, + "loss": 0.8765, + "step": 786 + }, + { + "epoch": 1.8, + "learning_rate": 0.00014788339256539544, + "loss": 0.6332, + "step": 787 + }, + { + "epoch": 1.81, + "learning_rate": 0.00014775287514698105, + "loss": 0.7258, + "step": 788 + }, + { + "epoch": 1.81, + "learning_rate": 0.00014762225226821273, + "loss": 0.7754, + "step": 789 + }, + { + "epoch": 1.81, + "learning_rate": 0.00014749152421756595, + "loss": 0.7039, + "step": 790 + }, + { + "epoch": 1.81, + "learning_rate": 0.0001473606912837485, + "loss": 0.8563, + "step": 791 + }, + { + "epoch": 1.81, + "learning_rate": 0.00014722975375569978, + "loss": 0.8956, + "step": 792 + }, + { + "epoch": 1.82, + "learning_rate": 0.00014709871192259026, + "loss": 0.8724, + "step": 793 + }, + { + "epoch": 1.82, + "learning_rate": 0.0001469675660738206, + "loss": 0.8885, + "step": 794 + }, + { + "epoch": 1.82, + "learning_rate": 0.00014683631649902132, + "loss": 0.7637, + "step": 795 + }, + { + "epoch": 1.82, + "learning_rate": 0.00014670496348805195, + "loss": 0.7596, + "step": 796 + }, + { + "epoch": 1.83, + "learning_rate": 0.00014657350733100047, + "loss": 0.8221, + "step": 797 + }, + { + "epoch": 1.83, + "learning_rate": 0.00014644194831818266, + "loss": 0.8475, + "step": 798 + }, + { + "epoch": 1.83, + "learning_rate": 0.00014631028674014142, + "loss": 0.7966, + "step": 799 + }, + { + "epoch": 1.83, + "learning_rate": 0.00014617852288764625, + "loss": 0.9186, + "step": 800 + }, + { + "epoch": 1.84, + "learning_rate": 0.00014604665705169237, + "loss": 0.9027, + "step": 801 + }, + { + "epoch": 1.84, + "learning_rate": 0.0001459146895235004, + "loss": 0.9357, + "step": 802 + }, + { + "epoch": 1.84, + "learning_rate": 0.00014578262059451537, + "loss": 0.9202, + "step": 803 + }, + { + "epoch": 1.84, + "learning_rate": 0.00014565045055640638, + "loss": 0.9226, + "step": 804 + }, + { + "epoch": 1.84, + "learning_rate": 0.0001455181797010658, + "loss": 0.8416, + "step": 805 + }, + { + "epoch": 1.85, + "learning_rate": 0.0001453858083206086, + "loss": 0.8192, + "step": 806 + }, + { + "epoch": 1.85, + "learning_rate": 0.0001452533367073718, + "loss": 0.8309, + "step": 807 + }, + { + "epoch": 1.85, + "learning_rate": 0.00014512076515391375, + "loss": 0.7646, + "step": 808 + }, + { + "epoch": 1.85, + "learning_rate": 0.00014498809395301356, + "loss": 0.9335, + "step": 809 + }, + { + "epoch": 1.86, + "learning_rate": 0.00014485532339767037, + "loss": 0.9696, + "step": 810 + }, + { + "epoch": 1.86, + "learning_rate": 0.00014472245378110277, + "loss": 0.7, + "step": 811 + }, + { + "epoch": 1.86, + "learning_rate": 0.000144589485396748, + "loss": 0.8206, + "step": 812 + }, + { + "epoch": 1.86, + "learning_rate": 0.0001444564185382617, + "loss": 0.7417, + "step": 813 + }, + { + "epoch": 1.86, + "learning_rate": 0.00014432325349951667, + "loss": 0.6384, + "step": 814 + }, + { + "epoch": 1.87, + "learning_rate": 0.00014418999057460276, + "loss": 0.7801, + "step": 815 + }, + { + "epoch": 1.87, + "learning_rate": 0.0001440566300578259, + "loss": 0.8459, + "step": 816 + }, + { + "epoch": 1.87, + "learning_rate": 0.0001439231722437075, + "loss": 0.8863, + "step": 817 + }, + { + "epoch": 1.87, + "learning_rate": 0.000143789617426984, + "loss": 0.8502, + "step": 818 + }, + { + "epoch": 1.88, + "learning_rate": 0.000143655965902606, + "loss": 0.8522, + "step": 819 + }, + { + "epoch": 1.88, + "learning_rate": 0.00014352221796573757, + "loss": 0.8612, + "step": 820 + }, + { + "epoch": 1.88, + "learning_rate": 0.00014338837391175582, + "loss": 0.8065, + "step": 821 + }, + { + "epoch": 1.88, + "learning_rate": 0.0001432544340362501, + "loss": 0.8777, + "step": 822 + }, + { + "epoch": 1.89, + "learning_rate": 0.00014312039863502145, + "loss": 0.7731, + "step": 823 + }, + { + "epoch": 1.89, + "learning_rate": 0.00014298626800408166, + "loss": 0.8791, + "step": 824 + }, + { + "epoch": 1.89, + "learning_rate": 0.00014285204243965306, + "loss": 0.9095, + "step": 825 + }, + { + "epoch": 1.89, + "learning_rate": 0.00014271772223816757, + "loss": 0.8846, + "step": 826 + }, + { + "epoch": 1.89, + "learning_rate": 0.00014258330769626606, + "loss": 0.701, + "step": 827 + }, + { + "epoch": 1.9, + "learning_rate": 0.00014244879911079779, + "loss": 0.7598, + "step": 828 + }, + { + "epoch": 1.9, + "learning_rate": 0.00014231419677881966, + "loss": 1.0411, + "step": 829 + }, + { + "epoch": 1.9, + "learning_rate": 0.00014217950099759569, + "loss": 0.6915, + "step": 830 + }, + { + "epoch": 1.9, + "learning_rate": 0.00014204471206459628, + "loss": 0.8048, + "step": 831 + }, + { + "epoch": 1.91, + "learning_rate": 0.0001419098302774974, + "loss": 0.7688, + "step": 832 + }, + { + "epoch": 1.91, + "learning_rate": 0.00014177485593418028, + "loss": 0.7863, + "step": 833 + }, + { + "epoch": 1.91, + "learning_rate": 0.0001416397893327304, + "loss": 0.7627, + "step": 834 + }, + { + "epoch": 1.91, + "learning_rate": 0.00014150463077143712, + "loss": 0.7423, + "step": 835 + }, + { + "epoch": 1.92, + "learning_rate": 0.00014136938054879283, + "loss": 0.7236, + "step": 836 + }, + { + "epoch": 1.92, + "learning_rate": 0.00014123403896349227, + "loss": 0.8978, + "step": 837 + }, + { + "epoch": 1.92, + "learning_rate": 0.00014109860631443213, + "loss": 0.9403, + "step": 838 + }, + { + "epoch": 1.92, + "learning_rate": 0.00014096308290071003, + "loss": 0.7267, + "step": 839 + }, + { + "epoch": 1.92, + "learning_rate": 0.00014082746902162414, + "loss": 0.7905, + "step": 840 + }, + { + "epoch": 1.93, + "learning_rate": 0.00014069176497667242, + "loss": 0.8848, + "step": 841 + }, + { + "epoch": 1.93, + "learning_rate": 0.00014055597106555192, + "loss": 0.9057, + "step": 842 + }, + { + "epoch": 1.93, + "learning_rate": 0.00014042008758815818, + "loss": 0.7363, + "step": 843 + }, + { + "epoch": 1.93, + "learning_rate": 0.00014028411484458454, + "loss": 0.8193, + "step": 844 + }, + { + "epoch": 1.94, + "learning_rate": 0.00014014805313512145, + "loss": 0.7387, + "step": 845 + }, + { + "epoch": 1.94, + "learning_rate": 0.00014001190276025593, + "loss": 0.8871, + "step": 846 + }, + { + "epoch": 1.94, + "learning_rate": 0.0001398756640206707, + "loss": 0.7342, + "step": 847 + }, + { + "epoch": 1.94, + "learning_rate": 0.00013973933721724363, + "loss": 0.8557, + "step": 848 + }, + { + "epoch": 1.95, + "learning_rate": 0.0001396029226510472, + "loss": 0.8778, + "step": 849 + }, + { + "epoch": 1.95, + "learning_rate": 0.00013946642062334766, + "loss": 0.7844, + "step": 850 + }, + { + "epoch": 1.95, + "learning_rate": 0.00013932983143560433, + "loss": 0.7941, + "step": 851 + }, + { + "epoch": 1.95, + "learning_rate": 0.00013919315538946905, + "loss": 0.7505, + "step": 852 + }, + { + "epoch": 1.95, + "learning_rate": 0.0001390563927867856, + "loss": 0.8371, + "step": 853 + }, + { + "epoch": 1.96, + "learning_rate": 0.00013891954392958878, + "loss": 0.8128, + "step": 854 + }, + { + "epoch": 1.96, + "learning_rate": 0.0001387826091201039, + "loss": 0.7127, + "step": 855 + }, + { + "epoch": 1.96, + "learning_rate": 0.00013864558866074622, + "loss": 0.8165, + "step": 856 + }, + { + "epoch": 1.96, + "learning_rate": 0.00013850848285411994, + "loss": 0.7103, + "step": 857 + }, + { + "epoch": 1.97, + "learning_rate": 0.00013837129200301794, + "loss": 0.8373, + "step": 858 + }, + { + "epoch": 1.97, + "learning_rate": 0.00013823401641042084, + "loss": 0.6908, + "step": 859 + }, + { + "epoch": 1.97, + "learning_rate": 0.00013809665637949637, + "loss": 0.7358, + "step": 860 + }, + { + "epoch": 1.97, + "learning_rate": 0.00013795921221359877, + "loss": 0.7545, + "step": 861 + }, + { + "epoch": 1.97, + "learning_rate": 0.00013782168421626816, + "loss": 0.7681, + "step": 862 + }, + { + "epoch": 1.98, + "learning_rate": 0.00013768407269122967, + "loss": 1.026, + "step": 863 + }, + { + "epoch": 1.98, + "learning_rate": 0.000137546377942393, + "loss": 0.761, + "step": 864 + }, + { + "epoch": 1.98, + "learning_rate": 0.0001374086002738516, + "loss": 0.8442, + "step": 865 + }, + { + "epoch": 1.98, + "learning_rate": 0.00013727073998988202, + "loss": 0.7959, + "step": 866 + }, + { + "epoch": 1.99, + "learning_rate": 0.00013713279739494333, + "loss": 0.8061, + "step": 867 + }, + { + "epoch": 1.99, + "learning_rate": 0.00013699477279367636, + "loss": 0.7434, + "step": 868 + }, + { + "epoch": 1.99, + "learning_rate": 0.000136856666490903, + "loss": 0.7159, + "step": 869 + }, + { + "epoch": 1.99, + "learning_rate": 0.00013671847879162562, + "loss": 0.867, + "step": 870 + }, + { + "epoch": 2.0, + "learning_rate": 0.00013658021000102636, + "loss": 0.9237, + "step": 871 + }, + { + "epoch": 2.0, + "learning_rate": 0.0001364418604244664, + "loss": 0.8545, + "step": 872 + }, + { + "epoch": 2.0, + "learning_rate": 0.00013630343036748535, + "loss": 0.893, + "step": 873 + }, + { + "epoch": 2.0, + "learning_rate": 0.00013616492013580062, + "loss": 0.9858, + "step": 874 + }, + { + "epoch": 2.0, + "learning_rate": 0.0001360263300353066, + "loss": 0.6643, + "step": 875 + }, + { + "epoch": 2.01, + "learning_rate": 0.0001358876603720741, + "loss": 0.8081, + "step": 876 + }, + { + "epoch": 2.01, + "learning_rate": 0.00013574891145234962, + "loss": 0.7287, + "step": 877 + }, + { + "epoch": 2.01, + "learning_rate": 0.00013561008358255468, + "loss": 0.8078, + "step": 878 + }, + { + "epoch": 2.01, + "learning_rate": 0.0001354711770692853, + "loss": 0.6738, + "step": 879 + }, + { + "epoch": 2.02, + "learning_rate": 0.00013533219221931102, + "loss": 0.7508, + "step": 880 + }, + { + "epoch": 2.02, + "learning_rate": 0.0001351931293395744, + "loss": 0.8724, + "step": 881 + }, + { + "epoch": 2.02, + "learning_rate": 0.0001350539887371904, + "loss": 0.9317, + "step": 882 + }, + { + "epoch": 2.02, + "learning_rate": 0.00013491477071944557, + "loss": 0.7664, + "step": 883 + }, + { + "epoch": 2.03, + "learning_rate": 0.00013477547559379748, + "loss": 0.8065, + "step": 884 + }, + { + "epoch": 2.03, + "learning_rate": 0.00013463610366787392, + "loss": 0.738, + "step": 885 + }, + { + "epoch": 2.03, + "learning_rate": 0.00013449665524947234, + "loss": 0.7554, + "step": 886 + }, + { + "epoch": 2.03, + "learning_rate": 0.00013435713064655912, + "loss": 0.7769, + "step": 887 + }, + { + "epoch": 2.03, + "learning_rate": 0.00013421753016726887, + "loss": 0.6507, + "step": 888 + }, + { + "epoch": 2.04, + "learning_rate": 0.0001340778541199038, + "loss": 0.7293, + "step": 889 + }, + { + "epoch": 2.04, + "learning_rate": 0.00013393810281293292, + "loss": 0.8305, + "step": 890 + }, + { + "epoch": 2.04, + "learning_rate": 0.00013379827655499163, + "loss": 0.7553, + "step": 891 + }, + { + "epoch": 2.04, + "learning_rate": 0.00013365837565488064, + "loss": 0.7724, + "step": 892 + }, + { + "epoch": 2.05, + "learning_rate": 0.00013351840042156565, + "loss": 0.7061, + "step": 893 + }, + { + "epoch": 2.05, + "learning_rate": 0.00013337835116417648, + "loss": 0.7078, + "step": 894 + }, + { + "epoch": 2.05, + "learning_rate": 0.00013323822819200643, + "loss": 0.8201, + "step": 895 + }, + { + "epoch": 2.05, + "learning_rate": 0.00013309803181451156, + "loss": 0.746, + "step": 896 + }, + { + "epoch": 2.05, + "learning_rate": 0.00013295776234131015, + "loss": 0.8276, + "step": 897 + }, + { + "epoch": 2.06, + "learning_rate": 0.0001328174200821817, + "loss": 0.7922, + "step": 898 + }, + { + "epoch": 2.06, + "learning_rate": 0.0001326770053470668, + "loss": 0.7577, + "step": 899 + }, + { + "epoch": 2.06, + "learning_rate": 0.00013253651844606572, + "loss": 0.8217, + "step": 900 + }, + { + "epoch": 2.06, + "learning_rate": 0.00013239595968943832, + "loss": 0.7883, + "step": 901 + }, + { + "epoch": 2.07, + "learning_rate": 0.00013225532938760317, + "loss": 0.9568, + "step": 902 + }, + { + "epoch": 2.07, + "learning_rate": 0.00013211462785113666, + "loss": 0.7348, + "step": 903 + }, + { + "epoch": 2.07, + "learning_rate": 0.00013197385539077275, + "loss": 0.7558, + "step": 904 + }, + { + "epoch": 2.07, + "learning_rate": 0.00013183301231740183, + "loss": 0.7066, + "step": 905 + }, + { + "epoch": 2.08, + "learning_rate": 0.0001316920989420703, + "loss": 0.7663, + "step": 906 + }, + { + "epoch": 2.08, + "learning_rate": 0.00013155111557597985, + "loss": 0.79, + "step": 907 + }, + { + "epoch": 2.08, + "learning_rate": 0.00013141006253048672, + "loss": 0.8237, + "step": 908 + }, + { + "epoch": 2.08, + "learning_rate": 0.0001312689401171011, + "loss": 0.687, + "step": 909 + }, + { + "epoch": 2.08, + "learning_rate": 0.00013112774864748621, + "loss": 0.8254, + "step": 910 + }, + { + "epoch": 2.09, + "learning_rate": 0.0001309864884334579, + "loss": 0.7641, + "step": 911 + }, + { + "epoch": 2.09, + "learning_rate": 0.0001308451597869839, + "loss": 0.7845, + "step": 912 + }, + { + "epoch": 2.09, + "learning_rate": 0.00013070376302018287, + "loss": 0.8661, + "step": 913 + }, + { + "epoch": 2.09, + "learning_rate": 0.0001305622984453241, + "loss": 0.9001, + "step": 914 + }, + { + "epoch": 2.1, + "learning_rate": 0.00013042076637482654, + "loss": 0.7261, + "step": 915 + }, + { + "epoch": 2.1, + "learning_rate": 0.00013027916712125826, + "loss": 0.7954, + "step": 916 + }, + { + "epoch": 2.1, + "learning_rate": 0.0001301375009973356, + "loss": 0.792, + "step": 917 + }, + { + "epoch": 2.1, + "learning_rate": 0.00012999576831592273, + "loss": 0.8423, + "step": 918 + }, + { + "epoch": 2.11, + "learning_rate": 0.00012985396939003065, + "loss": 0.8529, + "step": 919 + }, + { + "epoch": 2.11, + "learning_rate": 0.00012971210453281674, + "loss": 0.9086, + "step": 920 + }, + { + "epoch": 2.11, + "learning_rate": 0.00012957017405758401, + "loss": 0.7099, + "step": 921 + }, + { + "epoch": 2.11, + "learning_rate": 0.00012942817827778038, + "loss": 0.7515, + "step": 922 + }, + { + "epoch": 2.11, + "learning_rate": 0.00012928611750699783, + "loss": 0.7972, + "step": 923 + }, + { + "epoch": 2.12, + "learning_rate": 0.0001291439920589722, + "loss": 0.6615, + "step": 924 + }, + { + "epoch": 2.12, + "learning_rate": 0.00012900180224758185, + "loss": 0.8229, + "step": 925 + }, + { + "epoch": 2.12, + "learning_rate": 0.00012885954838684743, + "loss": 0.8146, + "step": 926 + }, + { + "epoch": 2.12, + "learning_rate": 0.000128717230790931, + "loss": 0.8941, + "step": 927 + }, + { + "epoch": 2.13, + "learning_rate": 0.00012857484977413545, + "loss": 0.7661, + "step": 928 + }, + { + "epoch": 2.13, + "learning_rate": 0.00012843240565090365, + "loss": 0.7404, + "step": 929 + }, + { + "epoch": 2.13, + "learning_rate": 0.00012828989873581785, + "loss": 0.7971, + "step": 930 + }, + { + "epoch": 2.13, + "learning_rate": 0.000128147329343599, + "loss": 0.6813, + "step": 931 + }, + { + "epoch": 2.14, + "learning_rate": 0.00012800469778910601, + "loss": 0.7704, + "step": 932 + }, + { + "epoch": 2.14, + "learning_rate": 0.0001278620043873351, + "loss": 0.7751, + "step": 933 + }, + { + "epoch": 2.14, + "learning_rate": 0.00012771924945341906, + "loss": 0.841, + "step": 934 + }, + { + "epoch": 2.14, + "learning_rate": 0.00012757643330262657, + "loss": 0.858, + "step": 935 + }, + { + "epoch": 2.14, + "learning_rate": 0.00012743355625036143, + "loss": 0.6657, + "step": 936 + }, + { + "epoch": 2.15, + "learning_rate": 0.00012729061861216213, + "loss": 0.7735, + "step": 937 + }, + { + "epoch": 2.15, + "learning_rate": 0.00012714762070370077, + "loss": 0.8935, + "step": 938 + }, + { + "epoch": 2.15, + "learning_rate": 0.00012700456284078264, + "loss": 0.9684, + "step": 939 + }, + { + "epoch": 2.15, + "learning_rate": 0.0001268614453393454, + "loss": 0.9117, + "step": 940 + }, + { + "epoch": 2.16, + "learning_rate": 0.00012671826851545851, + "loss": 0.7613, + "step": 941 + }, + { + "epoch": 2.16, + "learning_rate": 0.00012657503268532236, + "loss": 0.9567, + "step": 942 + }, + { + "epoch": 2.16, + "learning_rate": 0.00012643173816526764, + "loss": 0.8725, + "step": 943 + }, + { + "epoch": 2.16, + "learning_rate": 0.00012628838527175464, + "loss": 0.8088, + "step": 944 + }, + { + "epoch": 2.16, + "learning_rate": 0.00012614497432137273, + "loss": 0.7655, + "step": 945 + }, + { + "epoch": 2.17, + "learning_rate": 0.00012600150563083927, + "loss": 0.7585, + "step": 946 + }, + { + "epoch": 2.17, + "learning_rate": 0.0001258579795169993, + "loss": 0.6351, + "step": 947 + }, + { + "epoch": 2.17, + "learning_rate": 0.0001257143962968246, + "loss": 0.8408, + "step": 948 + }, + { + "epoch": 2.17, + "learning_rate": 0.00012557075628741307, + "loss": 0.7144, + "step": 949 + }, + { + "epoch": 2.18, + "learning_rate": 0.00012542705980598813, + "loss": 0.7022, + "step": 950 + }, + { + "epoch": 2.18, + "learning_rate": 0.00012528330716989769, + "loss": 0.8635, + "step": 951 + }, + { + "epoch": 2.18, + "learning_rate": 0.0001251394986966139, + "loss": 0.8489, + "step": 952 + }, + { + "epoch": 2.18, + "learning_rate": 0.00012499563470373212, + "loss": 0.7563, + "step": 953 + }, + { + "epoch": 2.19, + "learning_rate": 0.00012485171550897037, + "loss": 0.9245, + "step": 954 + }, + { + "epoch": 2.19, + "learning_rate": 0.00012470774143016853, + "loss": 0.9168, + "step": 955 + }, + { + "epoch": 2.19, + "learning_rate": 0.0001245637127852877, + "loss": 0.803, + "step": 956 + }, + { + "epoch": 2.19, + "learning_rate": 0.00012441962989240952, + "loss": 0.722, + "step": 957 + }, + { + "epoch": 2.19, + "learning_rate": 0.0001242754930697354, + "loss": 0.7944, + "step": 958 + }, + { + "epoch": 2.2, + "learning_rate": 0.00012413130263558587, + "loss": 0.7759, + "step": 959 + }, + { + "epoch": 2.2, + "learning_rate": 0.00012398705890839988, + "loss": 0.9407, + "step": 960 + }, + { + "epoch": 2.2, + "learning_rate": 0.00012384276220673402, + "loss": 0.726, + "step": 961 + }, + { + "epoch": 2.2, + "learning_rate": 0.00012369841284926188, + "loss": 0.7817, + "step": 962 + }, + { + "epoch": 2.21, + "learning_rate": 0.00012355401115477345, + "loss": 0.6845, + "step": 963 + }, + { + "epoch": 2.21, + "learning_rate": 0.00012340955744217412, + "loss": 0.7638, + "step": 964 + }, + { + "epoch": 2.21, + "learning_rate": 0.0001232650520304843, + "loss": 0.8104, + "step": 965 + }, + { + "epoch": 2.21, + "learning_rate": 0.00012312049523883852, + "loss": 0.8676, + "step": 966 + }, + { + "epoch": 2.22, + "learning_rate": 0.0001229758873864848, + "loss": 0.7944, + "step": 967 + }, + { + "epoch": 2.22, + "learning_rate": 0.00012283122879278393, + "loss": 0.8001, + "step": 968 + }, + { + "epoch": 2.22, + "learning_rate": 0.00012268651977720866, + "loss": 0.7943, + "step": 969 + }, + { + "epoch": 2.22, + "learning_rate": 0.0001225417606593433, + "loss": 0.9679, + "step": 970 + }, + { + "epoch": 2.22, + "learning_rate": 0.00012239695175888263, + "loss": 0.773, + "step": 971 + }, + { + "epoch": 2.23, + "learning_rate": 0.00012225209339563145, + "loss": 0.7707, + "step": 972 + }, + { + "epoch": 2.23, + "learning_rate": 0.00012210718588950376, + "loss": 0.6727, + "step": 973 + }, + { + "epoch": 2.23, + "learning_rate": 0.00012196222956052214, + "loss": 0.7641, + "step": 974 + }, + { + "epoch": 2.23, + "learning_rate": 0.00012181722472881697, + "loss": 0.8506, + "step": 975 + }, + { + "epoch": 2.24, + "learning_rate": 0.00012167217171462566, + "loss": 0.8442, + "step": 976 + }, + { + "epoch": 2.24, + "learning_rate": 0.00012152707083829217, + "loss": 0.7853, + "step": 977 + }, + { + "epoch": 2.24, + "learning_rate": 0.00012138192242026614, + "loss": 0.7495, + "step": 978 + }, + { + "epoch": 2.24, + "learning_rate": 0.0001212367267811021, + "loss": 0.739, + "step": 979 + }, + { + "epoch": 2.25, + "learning_rate": 0.00012109148424145898, + "loss": 0.6531, + "step": 980 + }, + { + "epoch": 2.25, + "learning_rate": 0.00012094619512209915, + "loss": 0.7721, + "step": 981 + }, + { + "epoch": 2.25, + "learning_rate": 0.00012080085974388802, + "loss": 0.7346, + "step": 982 + }, + { + "epoch": 2.25, + "learning_rate": 0.0001206554784277931, + "loss": 0.8709, + "step": 983 + }, + { + "epoch": 2.25, + "learning_rate": 0.00012051005149488326, + "loss": 0.8111, + "step": 984 + }, + { + "epoch": 2.26, + "learning_rate": 0.0001203645792663282, + "loss": 0.8296, + "step": 985 + }, + { + "epoch": 2.26, + "learning_rate": 0.00012021906206339766, + "loss": 0.7569, + "step": 986 + }, + { + "epoch": 2.26, + "learning_rate": 0.00012007350020746068, + "loss": 0.7945, + "step": 987 + }, + { + "epoch": 2.26, + "learning_rate": 0.00011992789401998492, + "loss": 0.7818, + "step": 988 + }, + { + "epoch": 2.27, + "learning_rate": 0.00011978224382253589, + "loss": 0.59, + "step": 989 + }, + { + "epoch": 2.27, + "learning_rate": 0.00011963654993677645, + "loss": 0.828, + "step": 990 + }, + { + "epoch": 2.27, + "learning_rate": 0.00011949081268446571, + "loss": 0.7583, + "step": 991 + }, + { + "epoch": 2.27, + "learning_rate": 0.00011934503238745878, + "loss": 0.7453, + "step": 992 + }, + { + "epoch": 2.27, + "learning_rate": 0.00011919920936770568, + "loss": 0.826, + "step": 993 + }, + { + "epoch": 2.28, + "learning_rate": 0.00011905334394725085, + "loss": 0.7673, + "step": 994 + }, + { + "epoch": 2.28, + "learning_rate": 0.00011890743644823242, + "loss": 0.9637, + "step": 995 + }, + { + "epoch": 2.28, + "learning_rate": 0.00011876148719288128, + "loss": 0.702, + "step": 996 + }, + { + "epoch": 2.28, + "learning_rate": 0.00011861549650352069, + "loss": 0.856, + "step": 997 + }, + { + "epoch": 2.29, + "learning_rate": 0.00011846946470256538, + "loss": 0.725, + "step": 998 + }, + { + "epoch": 2.29, + "learning_rate": 0.00011832339211252084, + "loss": 0.7615, + "step": 999 + }, + { + "epoch": 2.29, + "learning_rate": 0.00011817727905598268, + "loss": 0.7691, + "step": 1000 + }, + { + "epoch": 2.29, + "learning_rate": 0.00011803112585563587, + "loss": 0.8347, + "step": 1001 + }, + { + "epoch": 2.3, + "learning_rate": 0.00011788493283425397, + "loss": 0.908, + "step": 1002 + }, + { + "epoch": 2.3, + "learning_rate": 0.00011773870031469862, + "loss": 0.8724, + "step": 1003 + }, + { + "epoch": 2.3, + "learning_rate": 0.00011759242861991855, + "loss": 0.8801, + "step": 1004 + }, + { + "epoch": 2.3, + "learning_rate": 0.0001174461180729491, + "loss": 0.861, + "step": 1005 + }, + { + "epoch": 2.3, + "learning_rate": 0.00011729976899691137, + "loss": 0.8878, + "step": 1006 + }, + { + "epoch": 2.31, + "learning_rate": 0.00011715338171501156, + "loss": 0.7662, + "step": 1007 + }, + { + "epoch": 2.31, + "learning_rate": 0.00011700695655054026, + "loss": 0.7814, + "step": 1008 + }, + { + "epoch": 2.31, + "learning_rate": 0.00011686049382687168, + "loss": 0.8727, + "step": 1009 + }, + { + "epoch": 2.31, + "learning_rate": 0.000116713993867463, + "loss": 0.8036, + "step": 1010 + }, + { + "epoch": 2.32, + "learning_rate": 0.00011656745699585371, + "loss": 0.957, + "step": 1011 + }, + { + "epoch": 2.32, + "learning_rate": 0.00011642088353566469, + "loss": 0.9257, + "step": 1012 + }, + { + "epoch": 2.32, + "learning_rate": 0.00011627427381059772, + "loss": 0.7994, + "step": 1013 + }, + { + "epoch": 2.32, + "learning_rate": 0.00011612762814443459, + "loss": 0.6582, + "step": 1014 + }, + { + "epoch": 2.33, + "learning_rate": 0.00011598094686103653, + "loss": 0.7195, + "step": 1015 + }, + { + "epoch": 2.33, + "learning_rate": 0.00011583423028434344, + "loss": 0.6673, + "step": 1016 + }, + { + "epoch": 2.33, + "learning_rate": 0.00011568747873837307, + "loss": 0.8075, + "step": 1017 + }, + { + "epoch": 2.33, + "learning_rate": 0.00011554069254722051, + "loss": 0.8945, + "step": 1018 + }, + { + "epoch": 2.33, + "learning_rate": 0.00011539387203505727, + "loss": 0.6828, + "step": 1019 + }, + { + "epoch": 2.34, + "learning_rate": 0.00011524701752613074, + "loss": 0.7014, + "step": 1020 + }, + { + "epoch": 2.34, + "learning_rate": 0.00011510012934476338, + "loss": 0.8388, + "step": 1021 + }, + { + "epoch": 2.34, + "learning_rate": 0.00011495320781535186, + "loss": 0.685, + "step": 1022 + }, + { + "epoch": 2.34, + "learning_rate": 0.00011480625326236677, + "loss": 0.7141, + "step": 1023 + }, + { + "epoch": 2.35, + "learning_rate": 0.00011465926601035137, + "loss": 0.8078, + "step": 1024 + }, + { + "epoch": 2.35, + "learning_rate": 0.00011451224638392129, + "loss": 0.7924, + "step": 1025 + }, + { + "epoch": 2.35, + "learning_rate": 0.00011436519470776362, + "loss": 0.9223, + "step": 1026 + }, + { + "epoch": 2.35, + "learning_rate": 0.00011421811130663623, + "loss": 0.8251, + "step": 1027 + }, + { + "epoch": 2.36, + "learning_rate": 0.00011407099650536706, + "loss": 0.9127, + "step": 1028 + }, + { + "epoch": 2.36, + "learning_rate": 0.00011392385062885334, + "loss": 0.7634, + "step": 1029 + }, + { + "epoch": 2.36, + "learning_rate": 0.00011377667400206101, + "loss": 0.7472, + "step": 1030 + }, + { + "epoch": 2.36, + "learning_rate": 0.00011362946695002383, + "loss": 0.7838, + "step": 1031 + }, + { + "epoch": 2.36, + "learning_rate": 0.00011348222979784289, + "loss": 0.9502, + "step": 1032 + }, + { + "epoch": 2.37, + "learning_rate": 0.00011333496287068563, + "loss": 0.7066, + "step": 1033 + }, + { + "epoch": 2.37, + "learning_rate": 0.00011318766649378532, + "loss": 0.9988, + "step": 1034 + }, + { + "epoch": 2.37, + "learning_rate": 0.00011304034099244014, + "loss": 0.9448, + "step": 1035 + }, + { + "epoch": 2.37, + "learning_rate": 0.00011289298669201282, + "loss": 0.7764, + "step": 1036 + }, + { + "epoch": 2.38, + "learning_rate": 0.00011274560391792948, + "loss": 0.7351, + "step": 1037 + }, + { + "epoch": 2.38, + "learning_rate": 0.00011259819299567922, + "loss": 0.895, + "step": 1038 + }, + { + "epoch": 2.38, + "learning_rate": 0.00011245075425081328, + "loss": 0.718, + "step": 1039 + }, + { + "epoch": 2.38, + "learning_rate": 0.00011230328800894437, + "loss": 0.7811, + "step": 1040 + }, + { + "epoch": 2.38, + "learning_rate": 0.0001121557945957459, + "loss": 0.7859, + "step": 1041 + }, + { + "epoch": 2.39, + "learning_rate": 0.00011200827433695127, + "loss": 0.7916, + "step": 1042 + }, + { + "epoch": 2.39, + "learning_rate": 0.00011186072755835322, + "loss": 0.8321, + "step": 1043 + }, + { + "epoch": 2.39, + "learning_rate": 0.00011171315458580303, + "loss": 0.7648, + "step": 1044 + }, + { + "epoch": 2.39, + "learning_rate": 0.00011156555574520981, + "loss": 0.7691, + "step": 1045 + }, + { + "epoch": 2.4, + "learning_rate": 0.00011141793136253986, + "loss": 0.6978, + "step": 1046 + }, + { + "epoch": 2.4, + "learning_rate": 0.00011127028176381578, + "loss": 0.6725, + "step": 1047 + }, + { + "epoch": 2.4, + "learning_rate": 0.00011112260727511596, + "loss": 0.8165, + "step": 1048 + }, + { + "epoch": 2.4, + "learning_rate": 0.00011097490822257377, + "loss": 0.8662, + "step": 1049 + }, + { + "epoch": 2.41, + "learning_rate": 0.00011082718493237669, + "loss": 0.8784, + "step": 1050 + }, + { + "epoch": 2.41, + "learning_rate": 0.00011067943773076586, + "loss": 0.8533, + "step": 1051 + }, + { + "epoch": 2.41, + "learning_rate": 0.00011053166694403521, + "loss": 0.6602, + "step": 1052 + }, + { + "epoch": 2.41, + "learning_rate": 0.0001103838728985307, + "loss": 0.8363, + "step": 1053 + }, + { + "epoch": 2.41, + "learning_rate": 0.0001102360559206497, + "loss": 0.8044, + "step": 1054 + }, + { + "epoch": 2.42, + "learning_rate": 0.00011008821633684019, + "loss": 0.8684, + "step": 1055 + }, + { + "epoch": 2.42, + "learning_rate": 0.00010994035447360018, + "loss": 0.7158, + "step": 1056 + }, + { + "epoch": 2.42, + "learning_rate": 0.0001097924706574767, + "loss": 0.7729, + "step": 1057 + }, + { + "epoch": 2.42, + "learning_rate": 0.00010964456521506545, + "loss": 0.685, + "step": 1058 + }, + { + "epoch": 2.43, + "learning_rate": 0.00010949663847300976, + "loss": 0.8647, + "step": 1059 + }, + { + "epoch": 2.43, + "learning_rate": 0.000109348690758, + "loss": 0.836, + "step": 1060 + }, + { + "epoch": 2.43, + "learning_rate": 0.00010920072239677301, + "loss": 0.8494, + "step": 1061 + }, + { + "epoch": 2.43, + "learning_rate": 0.00010905273371611105, + "loss": 0.9494, + "step": 1062 + }, + { + "epoch": 2.44, + "learning_rate": 0.00010890472504284133, + "loss": 0.7832, + "step": 1063 + }, + { + "epoch": 2.44, + "learning_rate": 0.00010875669670383521, + "loss": 0.7709, + "step": 1064 + }, + { + "epoch": 2.44, + "learning_rate": 0.00010860864902600747, + "loss": 0.8175, + "step": 1065 + }, + { + "epoch": 2.44, + "learning_rate": 0.00010846058233631565, + "loss": 0.8179, + "step": 1066 + }, + { + "epoch": 2.44, + "learning_rate": 0.00010831249696175918, + "loss": 0.7686, + "step": 1067 + }, + { + "epoch": 2.45, + "learning_rate": 0.00010816439322937879, + "loss": 0.8491, + "step": 1068 + }, + { + "epoch": 2.45, + "learning_rate": 0.00010801627146625588, + "loss": 0.7961, + "step": 1069 + }, + { + "epoch": 2.45, + "learning_rate": 0.00010786813199951145, + "loss": 0.8408, + "step": 1070 + }, + { + "epoch": 2.45, + "learning_rate": 0.00010771997515630574, + "loss": 0.8916, + "step": 1071 + }, + { + "epoch": 2.46, + "learning_rate": 0.00010757180126383735, + "loss": 0.8035, + "step": 1072 + }, + { + "epoch": 2.46, + "learning_rate": 0.0001074236106493425, + "loss": 0.9132, + "step": 1073 + }, + { + "epoch": 2.46, + "learning_rate": 0.0001072754036400944, + "loss": 0.8029, + "step": 1074 + }, + { + "epoch": 2.46, + "learning_rate": 0.00010712718056340236, + "loss": 0.6981, + "step": 1075 + }, + { + "epoch": 2.47, + "learning_rate": 0.00010697894174661127, + "loss": 0.7829, + "step": 1076 + }, + { + "epoch": 2.47, + "learning_rate": 0.00010683068751710075, + "loss": 0.7699, + "step": 1077 + }, + { + "epoch": 2.47, + "learning_rate": 0.00010668241820228444, + "loss": 0.7342, + "step": 1078 + }, + { + "epoch": 2.47, + "learning_rate": 0.00010653413412960935, + "loss": 0.7729, + "step": 1079 + }, + { + "epoch": 2.47, + "learning_rate": 0.00010638583562655498, + "loss": 0.9097, + "step": 1080 + }, + { + "epoch": 2.48, + "learning_rate": 0.00010623752302063283, + "loss": 0.8692, + "step": 1081 + }, + { + "epoch": 2.48, + "learning_rate": 0.00010608919663938549, + "loss": 0.8861, + "step": 1082 + }, + { + "epoch": 2.48, + "learning_rate": 0.00010594085681038588, + "loss": 0.7454, + "step": 1083 + }, + { + "epoch": 2.48, + "learning_rate": 0.00010579250386123676, + "loss": 0.8291, + "step": 1084 + }, + { + "epoch": 2.49, + "learning_rate": 0.0001056441381195698, + "loss": 0.7643, + "step": 1085 + }, + { + "epoch": 2.49, + "learning_rate": 0.00010549575991304492, + "loss": 0.8242, + "step": 1086 + }, + { + "epoch": 2.49, + "learning_rate": 0.0001053473695693496, + "loss": 0.9521, + "step": 1087 + }, + { + "epoch": 2.49, + "learning_rate": 0.00010519896741619803, + "loss": 0.8142, + "step": 1088 + }, + { + "epoch": 2.49, + "learning_rate": 0.00010505055378133067, + "loss": 0.7955, + "step": 1089 + }, + { + "epoch": 2.5, + "learning_rate": 0.00010490212899251309, + "loss": 0.7363, + "step": 1090 + }, + { + "epoch": 2.5, + "learning_rate": 0.00010475369337753569, + "loss": 0.8173, + "step": 1091 + }, + { + "epoch": 2.5, + "learning_rate": 0.00010460524726421275, + "loss": 0.7659, + "step": 1092 + }, + { + "epoch": 2.5, + "learning_rate": 0.00010445679098038157, + "loss": 0.8618, + "step": 1093 + }, + { + "epoch": 2.51, + "learning_rate": 0.00010430832485390217, + "loss": 0.7606, + "step": 1094 + }, + { + "epoch": 2.51, + "learning_rate": 0.00010415984921265609, + "loss": 0.8721, + "step": 1095 + }, + { + "epoch": 2.51, + "learning_rate": 0.00010401136438454599, + "loss": 0.8152, + "step": 1096 + }, + { + "epoch": 2.51, + "learning_rate": 0.0001038628706974948, + "loss": 0.8934, + "step": 1097 + }, + { + "epoch": 2.52, + "learning_rate": 0.00010371436847944503, + "loss": 0.8385, + "step": 1098 + }, + { + "epoch": 2.52, + "learning_rate": 0.00010356585805835797, + "loss": 0.8581, + "step": 1099 + }, + { + "epoch": 2.52, + "learning_rate": 0.00010341733976221313, + "loss": 0.788, + "step": 1100 + }, + { + "epoch": 2.52, + "learning_rate": 0.00010326881391900724, + "loss": 0.7872, + "step": 1101 + }, + { + "epoch": 2.52, + "learning_rate": 0.00010312028085675391, + "loss": 0.819, + "step": 1102 + }, + { + "epoch": 2.53, + "learning_rate": 0.00010297174090348255, + "loss": 0.854, + "step": 1103 + }, + { + "epoch": 2.53, + "learning_rate": 0.00010282319438723782, + "loss": 0.7121, + "step": 1104 + }, + { + "epoch": 2.53, + "learning_rate": 0.00010267464163607889, + "loss": 0.8977, + "step": 1105 + }, + { + "epoch": 2.53, + "learning_rate": 0.00010252608297807871, + "loss": 0.8411, + "step": 1106 + }, + { + "epoch": 2.54, + "learning_rate": 0.00010237751874132322, + "loss": 0.834, + "step": 1107 + }, + { + "epoch": 2.54, + "learning_rate": 0.00010222894925391073, + "loss": 0.7582, + "step": 1108 + }, + { + "epoch": 2.54, + "learning_rate": 0.00010208037484395114, + "loss": 0.7773, + "step": 1109 + }, + { + "epoch": 2.54, + "learning_rate": 0.00010193179583956523, + "loss": 0.7294, + "step": 1110 + }, + { + "epoch": 2.55, + "learning_rate": 0.00010178321256888385, + "loss": 0.89, + "step": 1111 + }, + { + "epoch": 2.55, + "learning_rate": 0.00010163462536004742, + "loss": 0.7675, + "step": 1112 + }, + { + "epoch": 2.55, + "learning_rate": 0.00010148603454120487, + "loss": 0.7291, + "step": 1113 + }, + { + "epoch": 2.55, + "learning_rate": 0.00010133744044051328, + "loss": 0.8403, + "step": 1114 + }, + { + "epoch": 2.55, + "learning_rate": 0.00010118884338613688, + "loss": 0.8955, + "step": 1115 + }, + { + "epoch": 2.56, + "learning_rate": 0.00010104024370624644, + "loss": 0.7537, + "step": 1116 + }, + { + "epoch": 2.56, + "learning_rate": 0.00010089164172901851, + "loss": 0.8734, + "step": 1117 + }, + { + "epoch": 2.56, + "learning_rate": 0.00010074303778263474, + "loss": 0.7312, + "step": 1118 + }, + { + "epoch": 2.56, + "learning_rate": 0.00010059443219528117, + "loss": 0.7906, + "step": 1119 + }, + { + "epoch": 2.57, + "learning_rate": 0.00010044582529514739, + "loss": 0.7756, + "step": 1120 + }, + { + "epoch": 2.57, + "learning_rate": 0.00010029721741042586, + "loss": 0.9158, + "step": 1121 + }, + { + "epoch": 2.57, + "learning_rate": 0.00010014860886931139, + "loss": 0.8481, + "step": 1122 + }, + { + "epoch": 2.57, + "learning_rate": 0.0001, + "loss": 0.8187, + "step": 1123 + }, + { + "epoch": 2.58, + "learning_rate": 9.985139113068865e-05, + "loss": 0.8507, + "step": 1124 + }, + { + "epoch": 2.58, + "learning_rate": 9.970278258957415e-05, + "loss": 0.7585, + "step": 1125 + }, + { + "epoch": 2.58, + "learning_rate": 9.955417470485265e-05, + "loss": 0.7163, + "step": 1126 + }, + { + "epoch": 2.58, + "learning_rate": 9.940556780471885e-05, + "loss": 0.8124, + "step": 1127 + }, + { + "epoch": 2.58, + "learning_rate": 9.925696221736525e-05, + "loss": 0.924, + "step": 1128 + }, + { + "epoch": 2.59, + "learning_rate": 9.91083582709815e-05, + "loss": 0.843, + "step": 1129 + }, + { + "epoch": 2.59, + "learning_rate": 9.895975629375359e-05, + "loss": 0.8461, + "step": 1130 + }, + { + "epoch": 2.59, + "learning_rate": 9.881115661386314e-05, + "loss": 0.757, + "step": 1131 + }, + { + "epoch": 2.59, + "learning_rate": 9.866255955948676e-05, + "loss": 0.7779, + "step": 1132 + }, + { + "epoch": 2.6, + "learning_rate": 9.851396545879516e-05, + "loss": 0.8325, + "step": 1133 + }, + { + "epoch": 2.6, + "learning_rate": 9.836537463995262e-05, + "loss": 0.7117, + "step": 1134 + }, + { + "epoch": 2.6, + "learning_rate": 9.821678743111618e-05, + "loss": 0.7209, + "step": 1135 + }, + { + "epoch": 2.6, + "learning_rate": 9.806820416043478e-05, + "loss": 0.6621, + "step": 1136 + }, + { + "epoch": 2.6, + "learning_rate": 9.791962515604887e-05, + "loss": 0.7836, + "step": 1137 + }, + { + "epoch": 2.61, + "learning_rate": 9.777105074608928e-05, + "loss": 0.8576, + "step": 1138 + }, + { + "epoch": 2.61, + "learning_rate": 9.762248125867678e-05, + "loss": 0.6352, + "step": 1139 + }, + { + "epoch": 2.61, + "learning_rate": 9.747391702192132e-05, + "loss": 0.7828, + "step": 1140 + }, + { + "epoch": 2.61, + "learning_rate": 9.732535836392113e-05, + "loss": 0.6583, + "step": 1141 + }, + { + "epoch": 2.62, + "learning_rate": 9.717680561276219e-05, + "loss": 0.9171, + "step": 1142 + }, + { + "epoch": 2.62, + "learning_rate": 9.702825909651748e-05, + "loss": 0.8694, + "step": 1143 + }, + { + "epoch": 2.62, + "learning_rate": 9.687971914324607e-05, + "loss": 0.9293, + "step": 1144 + }, + { + "epoch": 2.62, + "learning_rate": 9.673118608099276e-05, + "loss": 0.7273, + "step": 1145 + }, + { + "epoch": 2.63, + "learning_rate": 9.658266023778689e-05, + "loss": 0.8386, + "step": 1146 + }, + { + "epoch": 2.63, + "learning_rate": 9.643414194164204e-05, + "loss": 0.727, + "step": 1147 + }, + { + "epoch": 2.63, + "learning_rate": 9.628563152055498e-05, + "loss": 0.9991, + "step": 1148 + }, + { + "epoch": 2.63, + "learning_rate": 9.61371293025052e-05, + "loss": 0.7304, + "step": 1149 + }, + { + "epoch": 2.63, + "learning_rate": 9.598863561545404e-05, + "loss": 0.8146, + "step": 1150 + }, + { + "epoch": 2.64, + "learning_rate": 9.584015078734395e-05, + "loss": 0.8178, + "step": 1151 + }, + { + "epoch": 2.64, + "learning_rate": 9.569167514609786e-05, + "loss": 0.7202, + "step": 1152 + }, + { + "epoch": 2.64, + "learning_rate": 9.554320901961843e-05, + "loss": 0.728, + "step": 1153 + }, + { + "epoch": 2.64, + "learning_rate": 9.539475273578729e-05, + "loss": 0.7842, + "step": 1154 + }, + { + "epoch": 2.65, + "learning_rate": 9.524630662246432e-05, + "loss": 0.7706, + "step": 1155 + }, + { + "epoch": 2.65, + "learning_rate": 9.509787100748692e-05, + "loss": 0.802, + "step": 1156 + }, + { + "epoch": 2.65, + "learning_rate": 9.494944621866937e-05, + "loss": 0.9293, + "step": 1157 + }, + { + "epoch": 2.65, + "learning_rate": 9.480103258380198e-05, + "loss": 0.8051, + "step": 1158 + }, + { + "epoch": 2.66, + "learning_rate": 9.465263043065045e-05, + "loss": 0.7449, + "step": 1159 + }, + { + "epoch": 2.66, + "learning_rate": 9.450424008695509e-05, + "loss": 0.7289, + "step": 1160 + }, + { + "epoch": 2.66, + "learning_rate": 9.43558618804302e-05, + "loss": 0.6778, + "step": 1161 + }, + { + "epoch": 2.66, + "learning_rate": 9.420749613876325e-05, + "loss": 0.7731, + "step": 1162 + }, + { + "epoch": 2.66, + "learning_rate": 9.405914318961414e-05, + "loss": 0.6934, + "step": 1163 + }, + { + "epoch": 2.67, + "learning_rate": 9.391080336061454e-05, + "loss": 0.9045, + "step": 1164 + }, + { + "epoch": 2.67, + "learning_rate": 9.376247697936719e-05, + "loss": 0.8016, + "step": 1165 + }, + { + "epoch": 2.67, + "learning_rate": 9.361416437344503e-05, + "loss": 0.6214, + "step": 1166 + }, + { + "epoch": 2.67, + "learning_rate": 9.34658658703907e-05, + "loss": 0.6771, + "step": 1167 + }, + { + "epoch": 2.68, + "learning_rate": 9.331758179771561e-05, + "loss": 0.748, + "step": 1168 + }, + { + "epoch": 2.68, + "learning_rate": 9.316931248289926e-05, + "loss": 0.665, + "step": 1169 + }, + { + "epoch": 2.68, + "learning_rate": 9.302105825338876e-05, + "loss": 0.901, + "step": 1170 + }, + { + "epoch": 2.68, + "learning_rate": 9.287281943659767e-05, + "loss": 0.8342, + "step": 1171 + }, + { + "epoch": 2.68, + "learning_rate": 9.272459635990562e-05, + "loss": 0.853, + "step": 1172 + }, + { + "epoch": 2.69, + "learning_rate": 9.257638935065753e-05, + "loss": 0.8093, + "step": 1173 + }, + { + "epoch": 2.69, + "learning_rate": 9.242819873616268e-05, + "loss": 0.8451, + "step": 1174 + }, + { + "epoch": 2.69, + "learning_rate": 9.228002484369429e-05, + "loss": 0.8628, + "step": 1175 + }, + { + "epoch": 2.69, + "learning_rate": 9.213186800048861e-05, + "loss": 0.7858, + "step": 1176 + }, + { + "epoch": 2.7, + "learning_rate": 9.198372853374415e-05, + "loss": 0.9236, + "step": 1177 + }, + { + "epoch": 2.7, + "learning_rate": 9.183560677062119e-05, + "loss": 0.7925, + "step": 1178 + }, + { + "epoch": 2.7, + "learning_rate": 9.168750303824084e-05, + "loss": 0.7105, + "step": 1179 + }, + { + "epoch": 2.7, + "learning_rate": 9.153941766368439e-05, + "loss": 0.7521, + "step": 1180 + }, + { + "epoch": 2.71, + "learning_rate": 9.139135097399254e-05, + "loss": 0.8648, + "step": 1181 + }, + { + "epoch": 2.71, + "learning_rate": 9.124330329616482e-05, + "loss": 0.8409, + "step": 1182 + }, + { + "epoch": 2.71, + "learning_rate": 9.109527495715872e-05, + "loss": 0.7198, + "step": 1183 + }, + { + "epoch": 2.71, + "learning_rate": 9.094726628388899e-05, + "loss": 0.7365, + "step": 1184 + }, + { + "epoch": 2.71, + "learning_rate": 9.0799277603227e-05, + "loss": 0.7699, + "step": 1185 + }, + { + "epoch": 2.72, + "learning_rate": 9.065130924199998e-05, + "loss": 0.8041, + "step": 1186 + }, + { + "epoch": 2.72, + "learning_rate": 9.050336152699025e-05, + "loss": 0.8308, + "step": 1187 + }, + { + "epoch": 2.72, + "learning_rate": 9.035543478493458e-05, + "loss": 0.8139, + "step": 1188 + }, + { + "epoch": 2.72, + "learning_rate": 9.02075293425233e-05, + "loss": 0.7394, + "step": 1189 + }, + { + "epoch": 2.73, + "learning_rate": 9.005964552639984e-05, + "loss": 0.6738, + "step": 1190 + }, + { + "epoch": 2.73, + "learning_rate": 8.991178366315982e-05, + "loss": 0.9421, + "step": 1191 + }, + { + "epoch": 2.73, + "learning_rate": 8.976394407935034e-05, + "loss": 0.8747, + "step": 1192 + }, + { + "epoch": 2.73, + "learning_rate": 8.961612710146934e-05, + "loss": 0.8282, + "step": 1193 + }, + { + "epoch": 2.74, + "learning_rate": 8.94683330559648e-05, + "loss": 0.765, + "step": 1194 + }, + { + "epoch": 2.74, + "learning_rate": 8.932056226923416e-05, + "loss": 0.8515, + "step": 1195 + }, + { + "epoch": 2.74, + "learning_rate": 8.917281506762335e-05, + "loss": 0.6194, + "step": 1196 + }, + { + "epoch": 2.74, + "learning_rate": 8.902509177742626e-05, + "loss": 0.8852, + "step": 1197 + }, + { + "epoch": 2.74, + "learning_rate": 8.887739272488406e-05, + "loss": 0.7481, + "step": 1198 + }, + { + "epoch": 2.75, + "learning_rate": 8.872971823618424e-05, + "loss": 0.7979, + "step": 1199 + }, + { + "epoch": 2.75, + "learning_rate": 8.858206863746018e-05, + "loss": 0.8332, + "step": 1200 + }, + { + "epoch": 2.75, + "learning_rate": 8.843444425479022e-05, + "loss": 0.6716, + "step": 1201 + }, + { + "epoch": 2.75, + "learning_rate": 8.828684541419696e-05, + "loss": 0.9192, + "step": 1202 + }, + { + "epoch": 2.76, + "learning_rate": 8.813927244164679e-05, + "loss": 0.8463, + "step": 1203 + }, + { + "epoch": 2.76, + "learning_rate": 8.799172566304874e-05, + "loss": 0.6598, + "step": 1204 + }, + { + "epoch": 2.76, + "learning_rate": 8.784420540425412e-05, + "loss": 0.7823, + "step": 1205 + }, + { + "epoch": 2.76, + "learning_rate": 8.769671199105565e-05, + "loss": 0.8728, + "step": 1206 + }, + { + "epoch": 2.77, + "learning_rate": 8.754924574918675e-05, + "loss": 0.7665, + "step": 1207 + }, + { + "epoch": 2.77, + "learning_rate": 8.74018070043208e-05, + "loss": 0.8008, + "step": 1208 + }, + { + "epoch": 2.77, + "learning_rate": 8.725439608207056e-05, + "loss": 0.6833, + "step": 1209 + }, + { + "epoch": 2.77, + "learning_rate": 8.710701330798719e-05, + "loss": 0.7801, + "step": 1210 + }, + { + "epoch": 2.77, + "learning_rate": 8.695965900755985e-05, + "loss": 0.6308, + "step": 1211 + }, + { + "epoch": 2.78, + "learning_rate": 8.68123335062147e-05, + "loss": 0.7851, + "step": 1212 + }, + { + "epoch": 2.78, + "learning_rate": 8.666503712931439e-05, + "loss": 0.7592, + "step": 1213 + }, + { + "epoch": 2.78, + "learning_rate": 8.651777020215712e-05, + "loss": 0.8727, + "step": 1214 + }, + { + "epoch": 2.78, + "learning_rate": 8.637053304997618e-05, + "loss": 0.903, + "step": 1215 + }, + { + "epoch": 2.79, + "learning_rate": 8.622332599793906e-05, + "loss": 0.8076, + "step": 1216 + }, + { + "epoch": 2.79, + "learning_rate": 8.607614937114671e-05, + "loss": 0.8975, + "step": 1217 + }, + { + "epoch": 2.79, + "learning_rate": 8.592900349463297e-05, + "loss": 0.8249, + "step": 1218 + }, + { + "epoch": 2.79, + "learning_rate": 8.578188869336377e-05, + "loss": 0.8529, + "step": 1219 + }, + { + "epoch": 2.79, + "learning_rate": 8.563480529223638e-05, + "loss": 0.8351, + "step": 1220 + }, + { + "epoch": 2.8, + "learning_rate": 8.548775361607872e-05, + "loss": 0.8934, + "step": 1221 + }, + { + "epoch": 2.8, + "learning_rate": 8.534073398964866e-05, + "loss": 0.8067, + "step": 1222 + }, + { + "epoch": 2.8, + "learning_rate": 8.519374673763326e-05, + "loss": 0.8508, + "step": 1223 + }, + { + "epoch": 2.8, + "learning_rate": 8.504679218464816e-05, + "loss": 0.7419, + "step": 1224 + }, + { + "epoch": 2.81, + "learning_rate": 8.489987065523668e-05, + "loss": 0.7808, + "step": 1225 + }, + { + "epoch": 2.81, + "learning_rate": 8.475298247386927e-05, + "loss": 0.8603, + "step": 1226 + }, + { + "epoch": 2.81, + "learning_rate": 8.460612796494272e-05, + "loss": 0.8818, + "step": 1227 + }, + { + "epoch": 2.81, + "learning_rate": 8.445930745277953e-05, + "loss": 0.779, + "step": 1228 + }, + { + "epoch": 2.82, + "learning_rate": 8.431252126162695e-05, + "loss": 0.766, + "step": 1229 + }, + { + "epoch": 2.82, + "learning_rate": 8.41657697156566e-05, + "loss": 0.8743, + "step": 1230 + }, + { + "epoch": 2.82, + "learning_rate": 8.40190531389635e-05, + "loss": 0.882, + "step": 1231 + }, + { + "epoch": 2.82, + "learning_rate": 8.387237185556545e-05, + "loss": 0.7422, + "step": 1232 + }, + { + "epoch": 2.82, + "learning_rate": 8.372572618940231e-05, + "loss": 0.9271, + "step": 1233 + }, + { + "epoch": 2.83, + "learning_rate": 8.357911646433535e-05, + "loss": 0.8051, + "step": 1234 + }, + { + "epoch": 2.83, + "learning_rate": 8.343254300414628e-05, + "loss": 0.782, + "step": 1235 + }, + { + "epoch": 2.83, + "learning_rate": 8.3286006132537e-05, + "loss": 0.8754, + "step": 1236 + }, + { + "epoch": 2.83, + "learning_rate": 8.313950617312835e-05, + "loss": 0.8249, + "step": 1237 + }, + { + "epoch": 2.84, + "learning_rate": 8.299304344945977e-05, + "loss": 0.8342, + "step": 1238 + }, + { + "epoch": 2.84, + "learning_rate": 8.284661828498847e-05, + "loss": 0.8593, + "step": 1239 + }, + { + "epoch": 2.84, + "learning_rate": 8.270023100308865e-05, + "loss": 0.7507, + "step": 1240 + }, + { + "epoch": 2.84, + "learning_rate": 8.255388192705093e-05, + "loss": 0.8462, + "step": 1241 + }, + { + "epoch": 2.85, + "learning_rate": 8.240757138008149e-05, + "loss": 0.8322, + "step": 1242 + }, + { + "epoch": 2.85, + "learning_rate": 8.22612996853014e-05, + "loss": 0.8963, + "step": 1243 + }, + { + "epoch": 2.85, + "learning_rate": 8.211506716574602e-05, + "loss": 0.7419, + "step": 1244 + }, + { + "epoch": 2.85, + "learning_rate": 8.196887414436416e-05, + "loss": 0.8225, + "step": 1245 + }, + { + "epoch": 2.85, + "learning_rate": 8.182272094401735e-05, + "loss": 0.8539, + "step": 1246 + }, + { + "epoch": 2.86, + "learning_rate": 8.167660788747919e-05, + "loss": 0.7852, + "step": 1247 + }, + { + "epoch": 2.86, + "learning_rate": 8.153053529743465e-05, + "loss": 0.9128, + "step": 1248 + }, + { + "epoch": 2.86, + "learning_rate": 8.138450349647936e-05, + "loss": 0.7328, + "step": 1249 + }, + { + "epoch": 2.86, + "learning_rate": 8.123851280711877e-05, + "loss": 0.8816, + "step": 1250 + }, + { + "epoch": 2.87, + "learning_rate": 8.10925635517676e-05, + "loss": 0.7267, + "step": 1251 + }, + { + "epoch": 2.87, + "learning_rate": 8.094665605274913e-05, + "loss": 0.7362, + "step": 1252 + }, + { + "epoch": 2.87, + "learning_rate": 8.080079063229432e-05, + "loss": 0.7475, + "step": 1253 + }, + { + "epoch": 2.87, + "learning_rate": 8.065496761254126e-05, + "loss": 0.7727, + "step": 1254 + }, + { + "epoch": 2.88, + "learning_rate": 8.050918731553431e-05, + "loss": 0.746, + "step": 1255 + }, + { + "epoch": 2.88, + "learning_rate": 8.036345006322359e-05, + "loss": 0.8132, + "step": 1256 + }, + { + "epoch": 2.88, + "learning_rate": 8.021775617746412e-05, + "loss": 0.6752, + "step": 1257 + }, + { + "epoch": 2.88, + "learning_rate": 8.007210598001512e-05, + "loss": 0.7468, + "step": 1258 + }, + { + "epoch": 2.88, + "learning_rate": 7.992649979253934e-05, + "loss": 0.9141, + "step": 1259 + }, + { + "epoch": 2.89, + "learning_rate": 7.978093793660233e-05, + "loss": 0.7706, + "step": 1260 + }, + { + "epoch": 2.89, + "learning_rate": 7.963542073367181e-05, + "loss": 0.8399, + "step": 1261 + }, + { + "epoch": 2.89, + "learning_rate": 7.948994850511677e-05, + "loss": 0.834, + "step": 1262 + }, + { + "epoch": 2.89, + "learning_rate": 7.934452157220694e-05, + "loss": 0.767, + "step": 1263 + }, + { + "epoch": 2.9, + "learning_rate": 7.9199140256112e-05, + "loss": 0.75, + "step": 1264 + }, + { + "epoch": 2.9, + "learning_rate": 7.905380487790088e-05, + "loss": 0.81, + "step": 1265 + }, + { + "epoch": 2.9, + "learning_rate": 7.890851575854108e-05, + "loss": 0.8931, + "step": 1266 + }, + { + "epoch": 2.9, + "learning_rate": 7.876327321889795e-05, + "loss": 0.8929, + "step": 1267 + }, + { + "epoch": 2.9, + "learning_rate": 7.861807757973387e-05, + "loss": 0.787, + "step": 1268 + }, + { + "epoch": 2.91, + "learning_rate": 7.847292916170784e-05, + "loss": 0.8072, + "step": 1269 + }, + { + "epoch": 2.91, + "learning_rate": 7.832782828537437e-05, + "loss": 0.8121, + "step": 1270 + }, + { + "epoch": 2.91, + "learning_rate": 7.818277527118307e-05, + "loss": 0.7951, + "step": 1271 + }, + { + "epoch": 2.91, + "learning_rate": 7.803777043947789e-05, + "loss": 0.7093, + "step": 1272 + }, + { + "epoch": 2.92, + "learning_rate": 7.789281411049625e-05, + "loss": 0.7827, + "step": 1273 + }, + { + "epoch": 2.92, + "learning_rate": 7.774790660436858e-05, + "loss": 0.7433, + "step": 1274 + }, + { + "epoch": 2.92, + "learning_rate": 7.760304824111741e-05, + "loss": 0.7359, + "step": 1275 + }, + { + "epoch": 2.92, + "learning_rate": 7.745823934065671e-05, + "loss": 0.7157, + "step": 1276 + }, + { + "epoch": 2.93, + "learning_rate": 7.731348022279134e-05, + "loss": 0.961, + "step": 1277 + }, + { + "epoch": 2.93, + "learning_rate": 7.716877120721611e-05, + "loss": 0.7718, + "step": 1278 + }, + { + "epoch": 2.93, + "learning_rate": 7.702411261351523e-05, + "loss": 0.835, + "step": 1279 + }, + { + "epoch": 2.93, + "learning_rate": 7.68795047611615e-05, + "loss": 0.9129, + "step": 1280 + }, + { + "epoch": 2.93, + "learning_rate": 7.673494796951573e-05, + "loss": 0.7635, + "step": 1281 + }, + { + "epoch": 2.94, + "learning_rate": 7.659044255782593e-05, + "loss": 0.6873, + "step": 1282 + }, + { + "epoch": 2.94, + "learning_rate": 7.644598884522659e-05, + "loss": 0.6434, + "step": 1283 + }, + { + "epoch": 2.94, + "learning_rate": 7.630158715073813e-05, + "loss": 0.8408, + "step": 1284 + }, + { + "epoch": 2.94, + "learning_rate": 7.615723779326599e-05, + "loss": 0.9042, + "step": 1285 + }, + { + "epoch": 2.95, + "learning_rate": 7.601294109160012e-05, + "loss": 0.7996, + "step": 1286 + }, + { + "epoch": 2.95, + "learning_rate": 7.586869736441413e-05, + "loss": 0.923, + "step": 1287 + }, + { + "epoch": 2.95, + "learning_rate": 7.572450693026462e-05, + "loss": 0.7661, + "step": 1288 + }, + { + "epoch": 2.95, + "learning_rate": 7.55803701075905e-05, + "loss": 0.9105, + "step": 1289 + }, + { + "epoch": 2.96, + "learning_rate": 7.543628721471233e-05, + "loss": 0.8071, + "step": 1290 + }, + { + "epoch": 2.96, + "learning_rate": 7.52922585698315e-05, + "loss": 0.8234, + "step": 1291 + }, + { + "epoch": 2.96, + "learning_rate": 7.514828449102966e-05, + "loss": 0.8131, + "step": 1292 + }, + { + "epoch": 2.96, + "learning_rate": 7.500436529626786e-05, + "loss": 0.8149, + "step": 1293 + }, + { + "epoch": 2.96, + "learning_rate": 7.486050130338612e-05, + "loss": 0.8441, + "step": 1294 + }, + { + "epoch": 2.97, + "learning_rate": 7.471669283010232e-05, + "loss": 0.8269, + "step": 1295 + }, + { + "epoch": 2.97, + "learning_rate": 7.457294019401191e-05, + "loss": 0.632, + "step": 1296 + }, + { + "epoch": 2.97, + "learning_rate": 7.442924371258694e-05, + "loss": 0.8522, + "step": 1297 + }, + { + "epoch": 2.97, + "learning_rate": 7.428560370317542e-05, + "loss": 0.8387, + "step": 1298 + }, + { + "epoch": 2.98, + "learning_rate": 7.414202048300072e-05, + "loss": 0.887, + "step": 1299 + }, + { + "epoch": 2.98, + "learning_rate": 7.399849436916077e-05, + "loss": 0.8273, + "step": 1300 + }, + { + "epoch": 2.98, + "learning_rate": 7.385502567862728e-05, + "loss": 0.7807, + "step": 1301 + }, + { + "epoch": 2.98, + "learning_rate": 7.371161472824536e-05, + "loss": 0.9077, + "step": 1302 + }, + { + "epoch": 2.99, + "learning_rate": 7.35682618347324e-05, + "loss": 0.8779, + "step": 1303 + }, + { + "epoch": 2.99, + "learning_rate": 7.342496731467767e-05, + "loss": 0.8595, + "step": 1304 + }, + { + "epoch": 2.99, + "learning_rate": 7.328173148454151e-05, + "loss": 0.8391, + "step": 1305 + }, + { + "epoch": 2.99, + "learning_rate": 7.31385546606546e-05, + "loss": 0.7559, + "step": 1306 + }, + { + "epoch": 2.99, + "learning_rate": 7.29954371592174e-05, + "loss": 0.8926, + "step": 1307 + }, + { + "epoch": 3.0, + "learning_rate": 7.285237929629928e-05, + "loss": 0.8443, + "step": 1308 + }, + { + "epoch": 3.0, + "learning_rate": 7.27093813878379e-05, + "loss": 0.7854, + "step": 1309 + }, + { + "epoch": 3.0, + "learning_rate": 7.256644374963857e-05, + "loss": 0.9361, + "step": 1310 + }, + { + "epoch": 3.0, + "learning_rate": 7.242356669737344e-05, + "loss": 0.7515, + "step": 1311 + }, + { + "epoch": 3.01, + "learning_rate": 7.228075054658096e-05, + "loss": 0.5228, + "step": 1312 + }, + { + "epoch": 3.01, + "learning_rate": 7.213799561266489e-05, + "loss": 0.8614, + "step": 1313 + }, + { + "epoch": 3.01, + "learning_rate": 7.199530221089398e-05, + "loss": 0.6461, + "step": 1314 + }, + { + "epoch": 3.01, + "learning_rate": 7.185267065640104e-05, + "loss": 0.6926, + "step": 1315 + }, + { + "epoch": 3.01, + "learning_rate": 7.171010126418218e-05, + "loss": 0.8601, + "step": 1316 + }, + { + "epoch": 3.02, + "learning_rate": 7.156759434909639e-05, + "loss": 0.784, + "step": 1317 + }, + { + "epoch": 3.02, + "learning_rate": 7.142515022586456e-05, + "loss": 1.0793, + "step": 1318 + }, + { + "epoch": 3.02, + "learning_rate": 7.1282769209069e-05, + "loss": 0.71, + "step": 1319 + }, + { + "epoch": 3.02, + "learning_rate": 7.114045161315261e-05, + "loss": 0.7129, + "step": 1320 + }, + { + "epoch": 3.03, + "learning_rate": 7.099819775241819e-05, + "loss": 0.6223, + "step": 1321 + }, + { + "epoch": 3.03, + "learning_rate": 7.085600794102783e-05, + "loss": 0.643, + "step": 1322 + }, + { + "epoch": 3.03, + "learning_rate": 7.071388249300218e-05, + "loss": 0.7678, + "step": 1323 + }, + { + "epoch": 3.03, + "learning_rate": 7.057182172221967e-05, + "loss": 0.6995, + "step": 1324 + }, + { + "epoch": 3.04, + "learning_rate": 7.042982594241601e-05, + "loss": 0.6812, + "step": 1325 + }, + { + "epoch": 3.04, + "learning_rate": 7.028789546718326e-05, + "loss": 0.7234, + "step": 1326 + }, + { + "epoch": 3.04, + "learning_rate": 7.014603060996938e-05, + "loss": 0.8338, + "step": 1327 + }, + { + "epoch": 3.04, + "learning_rate": 7.00042316840773e-05, + "loss": 0.9738, + "step": 1328 + }, + { + "epoch": 3.04, + "learning_rate": 6.98624990026644e-05, + "loss": 0.6211, + "step": 1329 + }, + { + "epoch": 3.05, + "learning_rate": 6.972083287874177e-05, + "loss": 0.7343, + "step": 1330 + }, + { + "epoch": 3.05, + "learning_rate": 6.957923362517348e-05, + "loss": 0.7291, + "step": 1331 + }, + { + "epoch": 3.05, + "learning_rate": 6.943770155467593e-05, + "loss": 0.7687, + "step": 1332 + }, + { + "epoch": 3.05, + "learning_rate": 6.929623697981718e-05, + "loss": 0.7509, + "step": 1333 + }, + { + "epoch": 3.06, + "learning_rate": 6.915484021301613e-05, + "loss": 0.769, + "step": 1334 + }, + { + "epoch": 3.06, + "learning_rate": 6.90135115665421e-05, + "loss": 0.7605, + "step": 1335 + }, + { + "epoch": 3.06, + "learning_rate": 6.887225135251381e-05, + "loss": 0.7519, + "step": 1336 + }, + { + "epoch": 3.06, + "learning_rate": 6.873105988289892e-05, + "loss": 0.7648, + "step": 1337 + }, + { + "epoch": 3.07, + "learning_rate": 6.858993746951328e-05, + "loss": 0.8969, + "step": 1338 + }, + { + "epoch": 3.07, + "learning_rate": 6.844888442402018e-05, + "loss": 0.7229, + "step": 1339 + }, + { + "epoch": 3.07, + "learning_rate": 6.830790105792973e-05, + "loss": 0.6294, + "step": 1340 + }, + { + "epoch": 3.07, + "learning_rate": 6.816698768259824e-05, + "loss": 0.7872, + "step": 1341 + }, + { + "epoch": 3.07, + "learning_rate": 6.802614460922728e-05, + "loss": 0.7555, + "step": 1342 + }, + { + "epoch": 3.08, + "learning_rate": 6.788537214886335e-05, + "loss": 0.7431, + "step": 1343 + }, + { + "epoch": 3.08, + "learning_rate": 6.774467061239687e-05, + "loss": 0.7502, + "step": 1344 + }, + { + "epoch": 3.08, + "learning_rate": 6.760404031056169e-05, + "loss": 0.9202, + "step": 1345 + }, + { + "epoch": 3.08, + "learning_rate": 6.74634815539343e-05, + "loss": 0.8221, + "step": 1346 + }, + { + "epoch": 3.09, + "learning_rate": 6.732299465293322e-05, + "loss": 0.8935, + "step": 1347 + }, + { + "epoch": 3.09, + "learning_rate": 6.718257991781828e-05, + "loss": 0.6869, + "step": 1348 + }, + { + "epoch": 3.09, + "learning_rate": 6.704223765868991e-05, + "loss": 0.6931, + "step": 1349 + }, + { + "epoch": 3.09, + "learning_rate": 6.690196818548846e-05, + "loss": 0.7308, + "step": 1350 + }, + { + "epoch": 3.1, + "learning_rate": 6.67617718079936e-05, + "loss": 0.779, + "step": 1351 + }, + { + "epoch": 3.1, + "learning_rate": 6.662164883582354e-05, + "loss": 0.7807, + "step": 1352 + }, + { + "epoch": 3.1, + "learning_rate": 6.648159957843438e-05, + "loss": 0.7942, + "step": 1353 + }, + { + "epoch": 3.1, + "learning_rate": 6.63416243451194e-05, + "loss": 0.842, + "step": 1354 + }, + { + "epoch": 3.1, + "learning_rate": 6.62017234450084e-05, + "loss": 0.9713, + "step": 1355 + }, + { + "epoch": 3.11, + "learning_rate": 6.60618971870671e-05, + "loss": 0.5946, + "step": 1356 + }, + { + "epoch": 3.11, + "learning_rate": 6.592214588009625e-05, + "loss": 0.656, + "step": 1357 + }, + { + "epoch": 3.11, + "learning_rate": 6.578246983273118e-05, + "loss": 0.7192, + "step": 1358 + }, + { + "epoch": 3.11, + "learning_rate": 6.564286935344089e-05, + "loss": 0.7485, + "step": 1359 + }, + { + "epoch": 3.12, + "learning_rate": 6.550334475052767e-05, + "loss": 0.8379, + "step": 1360 + }, + { + "epoch": 3.12, + "learning_rate": 6.536389633212609e-05, + "loss": 0.9204, + "step": 1361 + }, + { + "epoch": 3.12, + "learning_rate": 6.522452440620254e-05, + "loss": 0.7924, + "step": 1362 + }, + { + "epoch": 3.12, + "learning_rate": 6.508522928055445e-05, + "loss": 0.7988, + "step": 1363 + }, + { + "epoch": 3.12, + "learning_rate": 6.494601126280963e-05, + "loss": 0.7678, + "step": 1364 + }, + { + "epoch": 3.13, + "learning_rate": 6.480687066042561e-05, + "loss": 0.7079, + "step": 1365 + }, + { + "epoch": 3.13, + "learning_rate": 6.466780778068903e-05, + "loss": 0.7104, + "step": 1366 + }, + { + "epoch": 3.13, + "learning_rate": 6.452882293071468e-05, + "loss": 0.7226, + "step": 1367 + }, + { + "epoch": 3.13, + "learning_rate": 6.43899164174453e-05, + "loss": 0.8358, + "step": 1368 + }, + { + "epoch": 3.14, + "learning_rate": 6.42510885476504e-05, + "loss": 0.6752, + "step": 1369 + }, + { + "epoch": 3.14, + "learning_rate": 6.411233962792593e-05, + "loss": 0.7962, + "step": 1370 + }, + { + "epoch": 3.14, + "learning_rate": 6.397366996469343e-05, + "loss": 0.8052, + "step": 1371 + }, + { + "epoch": 3.14, + "learning_rate": 6.383507986419939e-05, + "loss": 0.9013, + "step": 1372 + }, + { + "epoch": 3.15, + "learning_rate": 6.369656963251467e-05, + "loss": 0.798, + "step": 1373 + }, + { + "epoch": 3.15, + "learning_rate": 6.355813957553364e-05, + "loss": 0.7121, + "step": 1374 + }, + { + "epoch": 3.15, + "learning_rate": 6.341978999897365e-05, + "loss": 0.7275, + "step": 1375 + }, + { + "epoch": 3.15, + "learning_rate": 6.328152120837439e-05, + "loss": 0.7393, + "step": 1376 + }, + { + "epoch": 3.15, + "learning_rate": 6.314333350909701e-05, + "loss": 0.9145, + "step": 1377 + }, + { + "epoch": 3.16, + "learning_rate": 6.300522720632367e-05, + "loss": 0.8225, + "step": 1378 + }, + { + "epoch": 3.16, + "learning_rate": 6.286720260505668e-05, + "loss": 0.842, + "step": 1379 + }, + { + "epoch": 3.16, + "learning_rate": 6.2729260010118e-05, + "loss": 0.9227, + "step": 1380 + }, + { + "epoch": 3.16, + "learning_rate": 6.259139972614845e-05, + "loss": 0.8438, + "step": 1381 + }, + { + "epoch": 3.17, + "learning_rate": 6.245362205760704e-05, + "loss": 0.9213, + "step": 1382 + }, + { + "epoch": 3.17, + "learning_rate": 6.231592730877035e-05, + "loss": 0.7469, + "step": 1383 + }, + { + "epoch": 3.17, + "learning_rate": 6.217831578373185e-05, + "loss": 0.7289, + "step": 1384 + }, + { + "epoch": 3.17, + "learning_rate": 6.204078778640121e-05, + "loss": 0.8306, + "step": 1385 + }, + { + "epoch": 3.18, + "learning_rate": 6.190334362050365e-05, + "loss": 0.7807, + "step": 1386 + }, + { + "epoch": 3.18, + "learning_rate": 6.176598358957919e-05, + "loss": 0.7564, + "step": 1387 + }, + { + "epoch": 3.18, + "learning_rate": 6.162870799698209e-05, + "loss": 0.8306, + "step": 1388 + }, + { + "epoch": 3.18, + "learning_rate": 6.149151714588009e-05, + "loss": 0.7317, + "step": 1389 + }, + { + "epoch": 3.18, + "learning_rate": 6.135441133925382e-05, + "loss": 0.8923, + "step": 1390 + }, + { + "epoch": 3.19, + "learning_rate": 6.121739087989613e-05, + "loss": 0.7723, + "step": 1391 + }, + { + "epoch": 3.19, + "learning_rate": 6.108045607041125e-05, + "loss": 0.796, + "step": 1392 + }, + { + "epoch": 3.19, + "learning_rate": 6.0943607213214425e-05, + "loss": 0.7907, + "step": 1393 + }, + { + "epoch": 3.19, + "learning_rate": 6.0806844610530956e-05, + "loss": 0.7709, + "step": 1394 + }, + { + "epoch": 3.2, + "learning_rate": 6.0670168564395705e-05, + "loss": 0.8841, + "step": 1395 + }, + { + "epoch": 3.2, + "learning_rate": 6.053357937665237e-05, + "loss": 0.6325, + "step": 1396 + }, + { + "epoch": 3.2, + "learning_rate": 6.039707734895279e-05, + "loss": 0.8047, + "step": 1397 + }, + { + "epoch": 3.2, + "learning_rate": 6.0260662782756374e-05, + "loss": 0.7933, + "step": 1398 + }, + { + "epoch": 3.21, + "learning_rate": 6.012433597932936e-05, + "loss": 0.8016, + "step": 1399 + }, + { + "epoch": 3.21, + "learning_rate": 5.998809723974407e-05, + "loss": 0.8992, + "step": 1400 + }, + { + "epoch": 3.21, + "learning_rate": 5.985194686487854e-05, + "loss": 0.7384, + "step": 1401 + }, + { + "epoch": 3.21, + "learning_rate": 5.971588515541546e-05, + "loss": 0.7214, + "step": 1402 + }, + { + "epoch": 3.21, + "learning_rate": 5.957991241184184e-05, + "loss": 0.7394, + "step": 1403 + }, + { + "epoch": 3.22, + "learning_rate": 5.94440289344481e-05, + "loss": 0.6268, + "step": 1404 + }, + { + "epoch": 3.22, + "learning_rate": 5.9308235023327604e-05, + "loss": 0.8049, + "step": 1405 + }, + { + "epoch": 3.22, + "learning_rate": 5.9172530978375894e-05, + "loss": 0.8396, + "step": 1406 + }, + { + "epoch": 3.22, + "learning_rate": 5.9036917099290026e-05, + "loss": 0.7694, + "step": 1407 + }, + { + "epoch": 3.23, + "learning_rate": 5.890139368556791e-05, + "loss": 0.7289, + "step": 1408 + }, + { + "epoch": 3.23, + "learning_rate": 5.8765961036507736e-05, + "loss": 0.7949, + "step": 1409 + }, + { + "epoch": 3.23, + "learning_rate": 5.863061945120719e-05, + "loss": 0.9371, + "step": 1410 + }, + { + "epoch": 3.23, + "learning_rate": 5.8495369228562894e-05, + "loss": 0.7323, + "step": 1411 + }, + { + "epoch": 3.23, + "learning_rate": 5.836021066726962e-05, + "loss": 0.8331, + "step": 1412 + }, + { + "epoch": 3.24, + "learning_rate": 5.8225144065819745e-05, + "loss": 0.768, + "step": 1413 + }, + { + "epoch": 3.24, + "learning_rate": 5.809016972250263e-05, + "loss": 0.7804, + "step": 1414 + }, + { + "epoch": 3.24, + "learning_rate": 5.795528793540379e-05, + "loss": 0.771, + "step": 1415 + }, + { + "epoch": 3.24, + "learning_rate": 5.782049900240432e-05, + "loss": 0.7431, + "step": 1416 + }, + { + "epoch": 3.25, + "learning_rate": 5.768580322118034e-05, + "loss": 0.8618, + "step": 1417 + }, + { + "epoch": 3.25, + "learning_rate": 5.755120088920225e-05, + "loss": 0.7639, + "step": 1418 + }, + { + "epoch": 3.25, + "learning_rate": 5.7416692303733946e-05, + "loss": 0.8375, + "step": 1419 + }, + { + "epoch": 3.25, + "learning_rate": 5.728227776183244e-05, + "loss": 0.7409, + "step": 1420 + }, + { + "epoch": 3.26, + "learning_rate": 5.714795756034695e-05, + "loss": 0.7529, + "step": 1421 + }, + { + "epoch": 3.26, + "learning_rate": 5.701373199591835e-05, + "loss": 0.8878, + "step": 1422 + }, + { + "epoch": 3.26, + "learning_rate": 5.687960136497861e-05, + "loss": 0.6923, + "step": 1423 + }, + { + "epoch": 3.26, + "learning_rate": 5.6745565963749925e-05, + "loss": 0.8628, + "step": 1424 + }, + { + "epoch": 3.26, + "learning_rate": 5.6611626088244194e-05, + "loss": 0.6949, + "step": 1425 + }, + { + "epoch": 3.27, + "learning_rate": 5.6477782034262436e-05, + "loss": 0.7278, + "step": 1426 + }, + { + "epoch": 3.27, + "learning_rate": 5.634403409739402e-05, + "loss": 0.8781, + "step": 1427 + }, + { + "epoch": 3.27, + "learning_rate": 5.621038257301601e-05, + "loss": 0.7329, + "step": 1428 + }, + { + "epoch": 3.27, + "learning_rate": 5.6076827756292495e-05, + "loss": 0.7195, + "step": 1429 + }, + { + "epoch": 3.28, + "learning_rate": 5.594336994217415e-05, + "loss": 0.7283, + "step": 1430 + }, + { + "epoch": 3.28, + "learning_rate": 5.5810009425397294e-05, + "loss": 0.8064, + "step": 1431 + }, + { + "epoch": 3.28, + "learning_rate": 5.5676746500483336e-05, + "loss": 0.8488, + "step": 1432 + }, + { + "epoch": 3.28, + "learning_rate": 5.55435814617383e-05, + "loss": 0.8925, + "step": 1433 + }, + { + "epoch": 3.29, + "learning_rate": 5.5410514603251985e-05, + "loss": 0.7677, + "step": 1434 + }, + { + "epoch": 3.29, + "learning_rate": 5.5277546218897294e-05, + "loss": 0.8037, + "step": 1435 + }, + { + "epoch": 3.29, + "learning_rate": 5.514467660232965e-05, + "loss": 0.8046, + "step": 1436 + }, + { + "epoch": 3.29, + "learning_rate": 5.5011906046986473e-05, + "loss": 0.7885, + "step": 1437 + }, + { + "epoch": 3.29, + "learning_rate": 5.487923484608629e-05, + "loss": 0.8264, + "step": 1438 + }, + { + "epoch": 3.3, + "learning_rate": 5.4746663292628234e-05, + "loss": 0.7551, + "step": 1439 + }, + { + "epoch": 3.3, + "learning_rate": 5.4614191679391444e-05, + "loss": 0.8766, + "step": 1440 + }, + { + "epoch": 3.3, + "learning_rate": 5.448182029893423e-05, + "loss": 0.8992, + "step": 1441 + }, + { + "epoch": 3.3, + "learning_rate": 5.434954944359365e-05, + "loss": 0.6505, + "step": 1442 + }, + { + "epoch": 3.31, + "learning_rate": 5.4217379405484636e-05, + "loss": 0.8743, + "step": 1443 + }, + { + "epoch": 3.31, + "learning_rate": 5.408531047649964e-05, + "loss": 0.6965, + "step": 1444 + }, + { + "epoch": 3.31, + "learning_rate": 5.395334294830765e-05, + "loss": 0.7663, + "step": 1445 + }, + { + "epoch": 3.31, + "learning_rate": 5.382147711235377e-05, + "loss": 0.86, + "step": 1446 + }, + { + "epoch": 3.32, + "learning_rate": 5.3689713259858586e-05, + "loss": 0.7524, + "step": 1447 + }, + { + "epoch": 3.32, + "learning_rate": 5.355805168181738e-05, + "loss": 0.9115, + "step": 1448 + }, + { + "epoch": 3.32, + "learning_rate": 5.342649266899955e-05, + "loss": 0.8342, + "step": 1449 + }, + { + "epoch": 3.32, + "learning_rate": 5.329503651194805e-05, + "loss": 0.8447, + "step": 1450 + }, + { + "epoch": 3.32, + "learning_rate": 5.316368350097869e-05, + "loss": 0.7877, + "step": 1451 + }, + { + "epoch": 3.33, + "learning_rate": 5.3032433926179395e-05, + "loss": 0.7965, + "step": 1452 + }, + { + "epoch": 3.33, + "learning_rate": 5.290128807740976e-05, + "loss": 0.7844, + "step": 1453 + }, + { + "epoch": 3.33, + "learning_rate": 5.2770246244300224e-05, + "loss": 0.7405, + "step": 1454 + }, + { + "epoch": 3.33, + "learning_rate": 5.263930871625151e-05, + "loss": 0.7782, + "step": 1455 + }, + { + "epoch": 3.34, + "learning_rate": 5.2508475782434093e-05, + "loss": 0.7789, + "step": 1456 + }, + { + "epoch": 3.34, + "learning_rate": 5.237774773178734e-05, + "loss": 0.8943, + "step": 1457 + }, + { + "epoch": 3.34, + "learning_rate": 5.224712485301898e-05, + "loss": 0.7712, + "step": 1458 + }, + { + "epoch": 3.34, + "learning_rate": 5.211660743460458e-05, + "loss": 0.8608, + "step": 1459 + }, + { + "epoch": 3.34, + "learning_rate": 5.198619576478678e-05, + "loss": 0.7212, + "step": 1460 + }, + { + "epoch": 3.35, + "learning_rate": 5.1855890131574614e-05, + "loss": 0.7588, + "step": 1461 + }, + { + "epoch": 3.35, + "learning_rate": 5.17256908227429e-05, + "loss": 0.8001, + "step": 1462 + }, + { + "epoch": 3.35, + "learning_rate": 5.159559812583181e-05, + "loss": 0.8327, + "step": 1463 + }, + { + "epoch": 3.35, + "learning_rate": 5.146561232814593e-05, + "loss": 0.8874, + "step": 1464 + }, + { + "epoch": 3.36, + "learning_rate": 5.133573371675375e-05, + "loss": 0.6802, + "step": 1465 + }, + { + "epoch": 3.36, + "learning_rate": 5.1205962578487155e-05, + "loss": 0.7581, + "step": 1466 + }, + { + "epoch": 3.36, + "learning_rate": 5.1076299199940645e-05, + "loss": 0.8714, + "step": 1467 + }, + { + "epoch": 3.36, + "learning_rate": 5.094674386747067e-05, + "loss": 0.6667, + "step": 1468 + }, + { + "epoch": 3.37, + "learning_rate": 5.081729686719508e-05, + "loss": 0.8107, + "step": 1469 + }, + { + "epoch": 3.37, + "learning_rate": 5.068795848499257e-05, + "loss": 0.8891, + "step": 1470 + }, + { + "epoch": 3.37, + "learning_rate": 5.0558729006501846e-05, + "loss": 0.7259, + "step": 1471 + }, + { + "epoch": 3.37, + "learning_rate": 5.042960871712112e-05, + "loss": 0.8035, + "step": 1472 + }, + { + "epoch": 3.37, + "learning_rate": 5.030059790200756e-05, + "loss": 0.7042, + "step": 1473 + }, + { + "epoch": 3.38, + "learning_rate": 5.0171696846076446e-05, + "loss": 0.7852, + "step": 1474 + }, + { + "epoch": 3.38, + "learning_rate": 5.004290583400075e-05, + "loss": 0.8489, + "step": 1475 + }, + { + "epoch": 3.38, + "learning_rate": 4.9914225150210335e-05, + "loss": 0.7696, + "step": 1476 + }, + { + "epoch": 3.38, + "learning_rate": 4.97856550788915e-05, + "loss": 0.7, + "step": 1477 + }, + { + "epoch": 3.39, + "learning_rate": 4.9657195903986185e-05, + "loss": 0.8373, + "step": 1478 + }, + { + "epoch": 3.39, + "learning_rate": 4.952884790919141e-05, + "loss": 0.8822, + "step": 1479 + }, + { + "epoch": 3.39, + "learning_rate": 4.940061137795876e-05, + "loss": 0.7292, + "step": 1480 + }, + { + "epoch": 3.39, + "learning_rate": 4.927248659349355e-05, + "loss": 0.8165, + "step": 1481 + }, + { + "epoch": 3.4, + "learning_rate": 4.914447383875432e-05, + "loss": 0.7782, + "step": 1482 + }, + { + "epoch": 3.4, + "learning_rate": 4.901657339645226e-05, + "loss": 0.8172, + "step": 1483 + }, + { + "epoch": 3.4, + "learning_rate": 4.888878554905051e-05, + "loss": 0.8072, + "step": 1484 + }, + { + "epoch": 3.4, + "learning_rate": 4.876111057876347e-05, + "loss": 0.7715, + "step": 1485 + }, + { + "epoch": 3.4, + "learning_rate": 4.863354876755637e-05, + "loss": 0.7384, + "step": 1486 + }, + { + "epoch": 3.41, + "learning_rate": 4.850610039714444e-05, + "loss": 0.7881, + "step": 1487 + }, + { + "epoch": 3.41, + "learning_rate": 4.837876574899237e-05, + "loss": 0.7962, + "step": 1488 + }, + { + "epoch": 3.41, + "learning_rate": 4.8251545104313836e-05, + "loss": 0.5635, + "step": 1489 + }, + { + "epoch": 3.41, + "learning_rate": 4.812443874407059e-05, + "loss": 0.7454, + "step": 1490 + }, + { + "epoch": 3.42, + "learning_rate": 4.7997446948972015e-05, + "loss": 0.8505, + "step": 1491 + }, + { + "epoch": 3.42, + "learning_rate": 4.787056999947455e-05, + "loss": 0.6157, + "step": 1492 + }, + { + "epoch": 3.42, + "learning_rate": 4.774380817578101e-05, + "loss": 0.7731, + "step": 1493 + }, + { + "epoch": 3.42, + "learning_rate": 4.761716175783989e-05, + "loss": 0.8062, + "step": 1494 + }, + { + "epoch": 3.42, + "learning_rate": 4.74906310253448e-05, + "loss": 0.7027, + "step": 1495 + }, + { + "epoch": 3.43, + "learning_rate": 4.736421625773396e-05, + "loss": 0.7, + "step": 1496 + }, + { + "epoch": 3.43, + "learning_rate": 4.723791773418942e-05, + "loss": 0.7822, + "step": 1497 + }, + { + "epoch": 3.43, + "learning_rate": 4.7111735733636466e-05, + "loss": 0.6308, + "step": 1498 + }, + { + "epoch": 3.43, + "learning_rate": 4.698567053474315e-05, + "loss": 0.6722, + "step": 1499 + }, + { + "epoch": 3.44, + "learning_rate": 4.685972241591956e-05, + "loss": 0.749, + "step": 1500 + }, + { + "epoch": 3.44, + "learning_rate": 4.673389165531714e-05, + "loss": 0.7784, + "step": 1501 + }, + { + "epoch": 3.44, + "learning_rate": 4.6608178530828174e-05, + "loss": 0.7971, + "step": 1502 + }, + { + "epoch": 3.44, + "learning_rate": 4.648258332008523e-05, + "loss": 0.8398, + "step": 1503 + }, + { + "epoch": 3.45, + "learning_rate": 4.6357106300460374e-05, + "loss": 0.6559, + "step": 1504 + }, + { + "epoch": 3.45, + "learning_rate": 4.6231747749064644e-05, + "loss": 0.7837, + "step": 1505 + }, + { + "epoch": 3.45, + "learning_rate": 4.610650794274759e-05, + "loss": 0.8072, + "step": 1506 + }, + { + "epoch": 3.45, + "learning_rate": 4.598138715809633e-05, + "loss": 0.7441, + "step": 1507 + }, + { + "epoch": 3.45, + "learning_rate": 4.585638567143529e-05, + "loss": 0.8233, + "step": 1508 + }, + { + "epoch": 3.46, + "learning_rate": 4.573150375882527e-05, + "loss": 0.8868, + "step": 1509 + }, + { + "epoch": 3.46, + "learning_rate": 4.560674169606317e-05, + "loss": 0.7059, + "step": 1510 + }, + { + "epoch": 3.46, + "learning_rate": 4.548209975868108e-05, + "loss": 0.8349, + "step": 1511 + }, + { + "epoch": 3.46, + "learning_rate": 4.5357578221945794e-05, + "loss": 0.817, + "step": 1512 + }, + { + "epoch": 3.47, + "learning_rate": 4.523317736085831e-05, + "loss": 0.7375, + "step": 1513 + }, + { + "epoch": 3.47, + "learning_rate": 4.5108897450153054e-05, + "loss": 0.8338, + "step": 1514 + }, + { + "epoch": 3.47, + "learning_rate": 4.498473876429726e-05, + "loss": 0.9212, + "step": 1515 + }, + { + "epoch": 3.47, + "learning_rate": 4.4860701577490595e-05, + "loss": 0.7182, + "step": 1516 + }, + { + "epoch": 3.48, + "learning_rate": 4.473678616366433e-05, + "loss": 0.8677, + "step": 1517 + }, + { + "epoch": 3.48, + "learning_rate": 4.461299279648077e-05, + "loss": 0.7868, + "step": 1518 + }, + { + "epoch": 3.48, + "learning_rate": 4.4489321749332744e-05, + "loss": 0.7078, + "step": 1519 + }, + { + "epoch": 3.48, + "learning_rate": 4.436577329534291e-05, + "loss": 0.6872, + "step": 1520 + }, + { + "epoch": 3.48, + "learning_rate": 4.424234770736314e-05, + "loss": 0.7523, + "step": 1521 + }, + { + "epoch": 3.49, + "learning_rate": 4.411904525797408e-05, + "loss": 0.7107, + "step": 1522 + }, + { + "epoch": 3.49, + "learning_rate": 4.3995866219484326e-05, + "loss": 0.8932, + "step": 1523 + }, + { + "epoch": 3.49, + "learning_rate": 4.387281086392994e-05, + "loss": 0.7811, + "step": 1524 + }, + { + "epoch": 3.49, + "learning_rate": 4.374987946307385e-05, + "loss": 0.8946, + "step": 1525 + }, + { + "epoch": 3.5, + "learning_rate": 4.362707228840531e-05, + "loss": 0.8496, + "step": 1526 + }, + { + "epoch": 3.5, + "learning_rate": 4.350438961113911e-05, + "loss": 0.6998, + "step": 1527 + }, + { + "epoch": 3.5, + "learning_rate": 4.3381831702215084e-05, + "loss": 0.6792, + "step": 1528 + }, + { + "epoch": 3.5, + "learning_rate": 4.325939883229766e-05, + "loss": 0.7644, + "step": 1529 + }, + { + "epoch": 3.51, + "learning_rate": 4.3137091271775e-05, + "loss": 0.6055, + "step": 1530 + }, + { + "epoch": 3.51, + "learning_rate": 4.301490929075852e-05, + "loss": 0.7126, + "step": 1531 + }, + { + "epoch": 3.51, + "learning_rate": 4.289285315908237e-05, + "loss": 0.7635, + "step": 1532 + }, + { + "epoch": 3.51, + "learning_rate": 4.277092314630278e-05, + "loss": 0.9089, + "step": 1533 + }, + { + "epoch": 3.51, + "learning_rate": 4.264911952169735e-05, + "loss": 0.7267, + "step": 1534 + }, + { + "epoch": 3.52, + "learning_rate": 4.2527442554264605e-05, + "loss": 0.6774, + "step": 1535 + }, + { + "epoch": 3.52, + "learning_rate": 4.240589251272342e-05, + "loss": 0.8402, + "step": 1536 + }, + { + "epoch": 3.52, + "learning_rate": 4.228446966551226e-05, + "loss": 0.8603, + "step": 1537 + }, + { + "epoch": 3.52, + "learning_rate": 4.2163174280788697e-05, + "loss": 0.6459, + "step": 1538 + }, + { + "epoch": 3.53, + "learning_rate": 4.2042006626428906e-05, + "loss": 0.7192, + "step": 1539 + }, + { + "epoch": 3.53, + "learning_rate": 4.192096697002686e-05, + "loss": 0.8621, + "step": 1540 + }, + { + "epoch": 3.53, + "learning_rate": 4.1800055578893883e-05, + "loss": 0.8194, + "step": 1541 + }, + { + "epoch": 3.53, + "learning_rate": 4.167927272005805e-05, + "loss": 0.8702, + "step": 1542 + }, + { + "epoch": 3.53, + "learning_rate": 4.155861866026364e-05, + "loss": 0.8677, + "step": 1543 + }, + { + "epoch": 3.54, + "learning_rate": 4.143809366597037e-05, + "loss": 0.7971, + "step": 1544 + }, + { + "epoch": 3.54, + "learning_rate": 4.131769800335292e-05, + "loss": 0.7896, + "step": 1545 + }, + { + "epoch": 3.54, + "learning_rate": 4.119743193830048e-05, + "loss": 0.889, + "step": 1546 + }, + { + "epoch": 3.54, + "learning_rate": 4.10772957364159e-05, + "loss": 0.7497, + "step": 1547 + }, + { + "epoch": 3.55, + "learning_rate": 4.0957289663015255e-05, + "loss": 0.9096, + "step": 1548 + }, + { + "epoch": 3.55, + "learning_rate": 4.083741398312727e-05, + "loss": 0.8658, + "step": 1549 + }, + { + "epoch": 3.55, + "learning_rate": 4.071766896149273e-05, + "loss": 0.5634, + "step": 1550 + }, + { + "epoch": 3.55, + "learning_rate": 4.059805486256376e-05, + "loss": 0.6693, + "step": 1551 + }, + { + "epoch": 3.56, + "learning_rate": 4.0478571950503486e-05, + "loss": 0.7128, + "step": 1552 + }, + { + "epoch": 3.56, + "learning_rate": 4.035922048918519e-05, + "loss": 0.7838, + "step": 1553 + }, + { + "epoch": 3.56, + "learning_rate": 4.024000074219187e-05, + "loss": 0.9549, + "step": 1554 + }, + { + "epoch": 3.56, + "learning_rate": 4.012091297281574e-05, + "loss": 0.6245, + "step": 1555 + }, + { + "epoch": 3.56, + "learning_rate": 4.0001957444057426e-05, + "loss": 0.7671, + "step": 1556 + }, + { + "epoch": 3.57, + "learning_rate": 3.988313441862553e-05, + "loss": 0.6645, + "step": 1557 + }, + { + "epoch": 3.57, + "learning_rate": 3.976444415893608e-05, + "loss": 0.8291, + "step": 1558 + }, + { + "epoch": 3.57, + "learning_rate": 3.96458869271119e-05, + "loss": 0.8715, + "step": 1559 + }, + { + "epoch": 3.57, + "learning_rate": 3.952746298498195e-05, + "loss": 0.8423, + "step": 1560 + }, + { + "epoch": 3.58, + "learning_rate": 3.940917259408085e-05, + "loss": 0.8303, + "step": 1561 + }, + { + "epoch": 3.58, + "learning_rate": 3.929101601564834e-05, + "loss": 0.7876, + "step": 1562 + }, + { + "epoch": 3.58, + "learning_rate": 3.9172993510628574e-05, + "loss": 0.7409, + "step": 1563 + }, + { + "epoch": 3.58, + "learning_rate": 3.9055105339669595e-05, + "loss": 0.8988, + "step": 1564 + }, + { + "epoch": 3.59, + "learning_rate": 3.8937351763122845e-05, + "loss": 1.0367, + "step": 1565 + }, + { + "epoch": 3.59, + "learning_rate": 3.8819733041042515e-05, + "loss": 0.682, + "step": 1566 + }, + { + "epoch": 3.59, + "learning_rate": 3.870224943318491e-05, + "loss": 0.815, + "step": 1567 + }, + { + "epoch": 3.59, + "learning_rate": 3.858490119900794e-05, + "loss": 0.6516, + "step": 1568 + }, + { + "epoch": 3.59, + "learning_rate": 3.846768859767066e-05, + "loss": 0.7371, + "step": 1569 + }, + { + "epoch": 3.6, + "learning_rate": 3.8350611888032474e-05, + "loss": 0.7401, + "step": 1570 + }, + { + "epoch": 3.6, + "learning_rate": 3.823367132865265e-05, + "loss": 0.7305, + "step": 1571 + }, + { + "epoch": 3.6, + "learning_rate": 3.8116867177789936e-05, + "loss": 0.7422, + "step": 1572 + }, + { + "epoch": 3.6, + "learning_rate": 3.8000199693401675e-05, + "loss": 0.7621, + "step": 1573 + }, + { + "epoch": 3.61, + "learning_rate": 3.788366913314339e-05, + "loss": 0.935, + "step": 1574 + }, + { + "epoch": 3.61, + "learning_rate": 3.776727575436829e-05, + "loss": 0.7587, + "step": 1575 + }, + { + "epoch": 3.61, + "learning_rate": 3.7651019814126654e-05, + "loss": 0.9029, + "step": 1576 + }, + { + "epoch": 3.61, + "learning_rate": 3.753490156916511e-05, + "loss": 0.8324, + "step": 1577 + }, + { + "epoch": 3.62, + "learning_rate": 3.741892127592625e-05, + "loss": 0.7316, + "step": 1578 + }, + { + "epoch": 3.62, + "learning_rate": 3.730307919054803e-05, + "loss": 0.684, + "step": 1579 + }, + { + "epoch": 3.62, + "learning_rate": 3.718737556886316e-05, + "loss": 0.7547, + "step": 1580 + }, + { + "epoch": 3.62, + "learning_rate": 3.7071810666398496e-05, + "loss": 0.8581, + "step": 1581 + }, + { + "epoch": 3.62, + "learning_rate": 3.695638473837466e-05, + "loss": 0.7707, + "step": 1582 + }, + { + "epoch": 3.63, + "learning_rate": 3.684109803970531e-05, + "loss": 0.755, + "step": 1583 + }, + { + "epoch": 3.63, + "learning_rate": 3.6725950824996535e-05, + "loss": 0.8436, + "step": 1584 + }, + { + "epoch": 3.63, + "learning_rate": 3.6610943348546526e-05, + "loss": 0.7491, + "step": 1585 + }, + { + "epoch": 3.63, + "learning_rate": 3.649607586434474e-05, + "loss": 0.6946, + "step": 1586 + }, + { + "epoch": 3.64, + "learning_rate": 3.6381348626071475e-05, + "loss": 0.7697, + "step": 1587 + }, + { + "epoch": 3.64, + "learning_rate": 3.626676188709743e-05, + "loss": 0.8108, + "step": 1588 + }, + { + "epoch": 3.64, + "learning_rate": 3.6152315900482905e-05, + "loss": 0.7676, + "step": 1589 + }, + { + "epoch": 3.64, + "learning_rate": 3.603801091897731e-05, + "loss": 0.8506, + "step": 1590 + }, + { + "epoch": 3.64, + "learning_rate": 3.592384719501878e-05, + "loss": 0.7521, + "step": 1591 + }, + { + "epoch": 3.65, + "learning_rate": 3.580982498073344e-05, + "loss": 0.8371, + "step": 1592 + }, + { + "epoch": 3.65, + "learning_rate": 3.5695944527934865e-05, + "loss": 0.816, + "step": 1593 + }, + { + "epoch": 3.65, + "learning_rate": 3.5582206088123535e-05, + "loss": 0.7097, + "step": 1594 + }, + { + "epoch": 3.65, + "learning_rate": 3.546860991248641e-05, + "loss": 0.7147, + "step": 1595 + }, + { + "epoch": 3.66, + "learning_rate": 3.5355156251896136e-05, + "loss": 0.7807, + "step": 1596 + }, + { + "epoch": 3.66, + "learning_rate": 3.524184535691068e-05, + "loss": 0.8517, + "step": 1597 + }, + { + "epoch": 3.66, + "learning_rate": 3.5128677477772734e-05, + "loss": 0.8549, + "step": 1598 + }, + { + "epoch": 3.66, + "learning_rate": 3.501565286440914e-05, + "loss": 0.7514, + "step": 1599 + }, + { + "epoch": 3.67, + "learning_rate": 3.490277176643033e-05, + "loss": 0.8055, + "step": 1600 + }, + { + "epoch": 3.67, + "learning_rate": 3.4790034433129725e-05, + "loss": 0.5494, + "step": 1601 + }, + { + "epoch": 3.67, + "learning_rate": 3.467744111348338e-05, + "loss": 0.9018, + "step": 1602 + }, + { + "epoch": 3.67, + "learning_rate": 3.4564992056149214e-05, + "loss": 0.7319, + "step": 1603 + }, + { + "epoch": 3.67, + "learning_rate": 3.445268750946651e-05, + "loss": 0.8997, + "step": 1604 + }, + { + "epoch": 3.68, + "learning_rate": 3.434052772145554e-05, + "loss": 0.7977, + "step": 1605 + }, + { + "epoch": 3.68, + "learning_rate": 3.422851293981676e-05, + "loss": 0.7205, + "step": 1606 + }, + { + "epoch": 3.68, + "learning_rate": 3.411664341193041e-05, + "loss": 0.848, + "step": 1607 + }, + { + "epoch": 3.68, + "learning_rate": 3.400491938485596e-05, + "loss": 0.7864, + "step": 1608 + }, + { + "epoch": 3.69, + "learning_rate": 3.389334110533161e-05, + "loss": 0.7184, + "step": 1609 + }, + { + "epoch": 3.69, + "learning_rate": 3.378190881977359e-05, + "loss": 0.8362, + "step": 1610 + }, + { + "epoch": 3.69, + "learning_rate": 3.367062277427567e-05, + "loss": 0.6743, + "step": 1611 + }, + { + "epoch": 3.69, + "learning_rate": 3.3559483214608824e-05, + "loss": 0.7561, + "step": 1612 + }, + { + "epoch": 3.7, + "learning_rate": 3.3448490386220355e-05, + "loss": 0.7342, + "step": 1613 + }, + { + "epoch": 3.7, + "learning_rate": 3.333764453423357e-05, + "loss": 0.7918, + "step": 1614 + }, + { + "epoch": 3.7, + "learning_rate": 3.322694590344719e-05, + "loss": 0.75, + "step": 1615 + }, + { + "epoch": 3.7, + "learning_rate": 3.3116394738334866e-05, + "loss": 0.7874, + "step": 1616 + }, + { + "epoch": 3.7, + "learning_rate": 3.300599128304443e-05, + "loss": 0.7555, + "step": 1617 + }, + { + "epoch": 3.71, + "learning_rate": 3.2895735781397685e-05, + "loss": 0.8434, + "step": 1618 + }, + { + "epoch": 3.71, + "learning_rate": 3.278562847688951e-05, + "loss": 0.8756, + "step": 1619 + }, + { + "epoch": 3.71, + "learning_rate": 3.2675669612687565e-05, + "loss": 0.8765, + "step": 1620 + }, + { + "epoch": 3.71, + "learning_rate": 3.256585943163176e-05, + "loss": 0.8501, + "step": 1621 + }, + { + "epoch": 3.72, + "learning_rate": 3.2456198176233543e-05, + "loss": 1.0232, + "step": 1622 + }, + { + "epoch": 3.72, + "learning_rate": 3.234668608867547e-05, + "loss": 0.7117, + "step": 1623 + }, + { + "epoch": 3.72, + "learning_rate": 3.2237323410810715e-05, + "loss": 0.9795, + "step": 1624 + }, + { + "epoch": 3.72, + "learning_rate": 3.212811038416251e-05, + "loss": 0.887, + "step": 1625 + }, + { + "epoch": 3.73, + "learning_rate": 3.201904724992352e-05, + "loss": 0.7008, + "step": 1626 + }, + { + "epoch": 3.73, + "learning_rate": 3.191013424895536e-05, + "loss": 0.7542, + "step": 1627 + }, + { + "epoch": 3.73, + "learning_rate": 3.18013716217882e-05, + "loss": 0.871, + "step": 1628 + }, + { + "epoch": 3.73, + "learning_rate": 3.1692759608620004e-05, + "loss": 0.7761, + "step": 1629 + }, + { + "epoch": 3.73, + "learning_rate": 3.158429844931611e-05, + "loss": 0.842, + "step": 1630 + }, + { + "epoch": 3.74, + "learning_rate": 3.1475988383408774e-05, + "loss": 0.8322, + "step": 1631 + }, + { + "epoch": 3.74, + "learning_rate": 3.136782965009658e-05, + "loss": 0.7911, + "step": 1632 + }, + { + "epoch": 3.74, + "learning_rate": 3.1259822488243806e-05, + "loss": 0.8911, + "step": 1633 + }, + { + "epoch": 3.74, + "learning_rate": 3.115196713638e-05, + "loss": 0.9232, + "step": 1634 + }, + { + "epoch": 3.75, + "learning_rate": 3.104426383269957e-05, + "loss": 0.8265, + "step": 1635 + }, + { + "epoch": 3.75, + "learning_rate": 3.093671281506099e-05, + "loss": 0.7861, + "step": 1636 + }, + { + "epoch": 3.75, + "learning_rate": 3.0829314320986433e-05, + "loss": 0.6548, + "step": 1637 + }, + { + "epoch": 3.75, + "learning_rate": 3.072206858766134e-05, + "loss": 0.7974, + "step": 1638 + }, + { + "epoch": 3.75, + "learning_rate": 3.061497585193369e-05, + "loss": 0.849, + "step": 1639 + }, + { + "epoch": 3.76, + "learning_rate": 3.050803635031355e-05, + "loss": 0.7438, + "step": 1640 + }, + { + "epoch": 3.76, + "learning_rate": 3.040125031897264e-05, + "loss": 0.838, + "step": 1641 + }, + { + "epoch": 3.76, + "learning_rate": 3.029461799374378e-05, + "loss": 0.8879, + "step": 1642 + }, + { + "epoch": 3.76, + "learning_rate": 3.0188139610120248e-05, + "loss": 0.7747, + "step": 1643 + }, + { + "epoch": 3.77, + "learning_rate": 3.0081815403255332e-05, + "loss": 0.7179, + "step": 1644 + }, + { + "epoch": 3.77, + "learning_rate": 2.9975645607961955e-05, + "loss": 0.7618, + "step": 1645 + }, + { + "epoch": 3.77, + "learning_rate": 2.9869630458711927e-05, + "loss": 0.6977, + "step": 1646 + }, + { + "epoch": 3.77, + "learning_rate": 2.9763770189635497e-05, + "loss": 0.8052, + "step": 1647 + }, + { + "epoch": 3.78, + "learning_rate": 2.9658065034520978e-05, + "loss": 0.728, + "step": 1648 + }, + { + "epoch": 3.78, + "learning_rate": 2.955251522681408e-05, + "loss": 0.8593, + "step": 1649 + }, + { + "epoch": 3.78, + "learning_rate": 2.944712099961736e-05, + "loss": 0.8347, + "step": 1650 + }, + { + "epoch": 3.78, + "learning_rate": 2.9341882585689905e-05, + "loss": 0.733, + "step": 1651 + }, + { + "epoch": 3.78, + "learning_rate": 2.9236800217446593e-05, + "loss": 0.6998, + "step": 1652 + }, + { + "epoch": 3.79, + "learning_rate": 2.9131874126957727e-05, + "loss": 0.8923, + "step": 1653 + }, + { + "epoch": 3.79, + "learning_rate": 2.9027104545948414e-05, + "loss": 0.5376, + "step": 1654 + }, + { + "epoch": 3.79, + "learning_rate": 2.892249170579826e-05, + "loss": 0.7465, + "step": 1655 + }, + { + "epoch": 3.79, + "learning_rate": 2.8818035837540537e-05, + "loss": 0.7833, + "step": 1656 + }, + { + "epoch": 3.8, + "learning_rate": 2.8713737171861986e-05, + "loss": 0.7611, + "step": 1657 + }, + { + "epoch": 3.8, + "learning_rate": 2.8609595939102153e-05, + "loss": 0.7226, + "step": 1658 + }, + { + "epoch": 3.8, + "learning_rate": 2.8505612369252832e-05, + "loss": 0.8847, + "step": 1659 + }, + { + "epoch": 3.8, + "learning_rate": 2.840178669195763e-05, + "loss": 0.7511, + "step": 1660 + }, + { + "epoch": 3.81, + "learning_rate": 2.8298119136511558e-05, + "loss": 0.6833, + "step": 1661 + }, + { + "epoch": 3.81, + "learning_rate": 2.8194609931860316e-05, + "loss": 0.7595, + "step": 1662 + }, + { + "epoch": 3.81, + "learning_rate": 2.8091259306599904e-05, + "loss": 0.7486, + "step": 1663 + }, + { + "epoch": 3.81, + "learning_rate": 2.7988067488976156e-05, + "loss": 0.8106, + "step": 1664 + }, + { + "epoch": 3.81, + "learning_rate": 2.7885034706884185e-05, + "loss": 0.8012, + "step": 1665 + }, + { + "epoch": 3.82, + "learning_rate": 2.7782161187867818e-05, + "loss": 0.7598, + "step": 1666 + }, + { + "epoch": 3.82, + "learning_rate": 2.7679447159119164e-05, + "loss": 0.6638, + "step": 1667 + }, + { + "epoch": 3.82, + "learning_rate": 2.7576892847478207e-05, + "loss": 0.6576, + "step": 1668 + }, + { + "epoch": 3.82, + "learning_rate": 2.7474498479432087e-05, + "loss": 0.8174, + "step": 1669 + }, + { + "epoch": 3.83, + "learning_rate": 2.737226428111471e-05, + "loss": 0.868, + "step": 1670 + }, + { + "epoch": 3.83, + "learning_rate": 2.7270190478306378e-05, + "loss": 0.6411, + "step": 1671 + }, + { + "epoch": 3.83, + "learning_rate": 2.7168277296433053e-05, + "loss": 0.7872, + "step": 1672 + }, + { + "epoch": 3.83, + "learning_rate": 2.7066524960565965e-05, + "loss": 0.7556, + "step": 1673 + }, + { + "epoch": 3.84, + "learning_rate": 2.6964933695421192e-05, + "loss": 0.8606, + "step": 1674 + }, + { + "epoch": 3.84, + "learning_rate": 2.6863503725359107e-05, + "loss": 0.7776, + "step": 1675 + }, + { + "epoch": 3.84, + "learning_rate": 2.6762235274383772e-05, + "loss": 0.7095, + "step": 1676 + }, + { + "epoch": 3.84, + "learning_rate": 2.666112856614259e-05, + "loss": 0.8587, + "step": 1677 + }, + { + "epoch": 3.84, + "learning_rate": 2.65601838239258e-05, + "loss": 0.8568, + "step": 1678 + }, + { + "epoch": 3.85, + "learning_rate": 2.6459401270665894e-05, + "loss": 0.7725, + "step": 1679 + }, + { + "epoch": 3.85, + "learning_rate": 2.6358781128937172e-05, + "loss": 0.8665, + "step": 1680 + }, + { + "epoch": 3.85, + "learning_rate": 2.625832362095528e-05, + "loss": 0.8286, + "step": 1681 + }, + { + "epoch": 3.85, + "learning_rate": 2.6158028968576743e-05, + "loss": 0.9445, + "step": 1682 + }, + { + "epoch": 3.86, + "learning_rate": 2.6057897393298324e-05, + "loss": 0.7562, + "step": 1683 + }, + { + "epoch": 3.86, + "learning_rate": 2.5957929116256675e-05, + "loss": 0.8086, + "step": 1684 + }, + { + "epoch": 3.86, + "learning_rate": 2.5858124358227853e-05, + "loss": 0.8513, + "step": 1685 + }, + { + "epoch": 3.86, + "learning_rate": 2.5758483339626738e-05, + "loss": 0.7107, + "step": 1686 + }, + { + "epoch": 3.86, + "learning_rate": 2.565900628050659e-05, + "loss": 0.7926, + "step": 1687 + }, + { + "epoch": 3.87, + "learning_rate": 2.5559693400558658e-05, + "loss": 0.7839, + "step": 1688 + }, + { + "epoch": 3.87, + "learning_rate": 2.546054491911147e-05, + "loss": 0.8132, + "step": 1689 + }, + { + "epoch": 3.87, + "learning_rate": 2.536156105513062e-05, + "loss": 0.6755, + "step": 1690 + }, + { + "epoch": 3.87, + "learning_rate": 2.52627420272181e-05, + "loss": 0.7823, + "step": 1691 + }, + { + "epoch": 3.88, + "learning_rate": 2.5164088053611845e-05, + "loss": 0.8078, + "step": 1692 + }, + { + "epoch": 3.88, + "learning_rate": 2.5065599352185254e-05, + "loss": 0.7328, + "step": 1693 + }, + { + "epoch": 3.88, + "learning_rate": 2.4967276140446826e-05, + "loss": 0.9089, + "step": 1694 + }, + { + "epoch": 3.88, + "learning_rate": 2.48691186355395e-05, + "loss": 0.7683, + "step": 1695 + }, + { + "epoch": 3.89, + "learning_rate": 2.477112705424024e-05, + "loss": 0.7681, + "step": 1696 + }, + { + "epoch": 3.89, + "learning_rate": 2.4673301612959654e-05, + "loss": 0.8331, + "step": 1697 + }, + { + "epoch": 3.89, + "learning_rate": 2.4575642527741415e-05, + "loss": 0.7678, + "step": 1698 + }, + { + "epoch": 3.89, + "learning_rate": 2.447815001426177e-05, + "loss": 0.7815, + "step": 1699 + }, + { + "epoch": 3.89, + "learning_rate": 2.4380824287829074e-05, + "loss": 0.9155, + "step": 1700 + }, + { + "epoch": 3.9, + "learning_rate": 2.428366556338344e-05, + "loss": 0.7475, + "step": 1701 + }, + { + "epoch": 3.9, + "learning_rate": 2.4186674055496083e-05, + "loss": 0.6909, + "step": 1702 + }, + { + "epoch": 3.9, + "learning_rate": 2.4089849978368918e-05, + "loss": 0.7278, + "step": 1703 + }, + { + "epoch": 3.9, + "learning_rate": 2.399319354583418e-05, + "loss": 0.8053, + "step": 1704 + }, + { + "epoch": 3.91, + "learning_rate": 2.389670497135379e-05, + "loss": 0.6703, + "step": 1705 + }, + { + "epoch": 3.91, + "learning_rate": 2.3800384468018954e-05, + "loss": 0.7334, + "step": 1706 + }, + { + "epoch": 3.91, + "learning_rate": 2.370423224854975e-05, + "loss": 0.7021, + "step": 1707 + }, + { + "epoch": 3.91, + "learning_rate": 2.3608248525294628e-05, + "loss": 0.7711, + "step": 1708 + }, + { + "epoch": 3.92, + "learning_rate": 2.3512433510229858e-05, + "loss": 0.8555, + "step": 1709 + }, + { + "epoch": 3.92, + "learning_rate": 2.3416787414959097e-05, + "loss": 0.7019, + "step": 1710 + }, + { + "epoch": 3.92, + "learning_rate": 2.3321310450713062e-05, + "loss": 0.9331, + "step": 1711 + }, + { + "epoch": 3.92, + "learning_rate": 2.322600282834888e-05, + "loss": 0.7915, + "step": 1712 + }, + { + "epoch": 3.92, + "learning_rate": 2.3130864758349645e-05, + "loss": 0.8168, + "step": 1713 + }, + { + "epoch": 3.93, + "learning_rate": 2.303589645082411e-05, + "loss": 0.7711, + "step": 1714 + }, + { + "epoch": 3.93, + "learning_rate": 2.2941098115506065e-05, + "loss": 0.7319, + "step": 1715 + }, + { + "epoch": 3.93, + "learning_rate": 2.2846469961753915e-05, + "loss": 0.7473, + "step": 1716 + }, + { + "epoch": 3.93, + "learning_rate": 2.27520121985502e-05, + "loss": 0.7365, + "step": 1717 + }, + { + "epoch": 3.94, + "learning_rate": 2.265772503450122e-05, + "loss": 0.9078, + "step": 1718 + }, + { + "epoch": 3.94, + "learning_rate": 2.256360867783648e-05, + "loss": 0.6878, + "step": 1719 + }, + { + "epoch": 3.94, + "learning_rate": 2.246966333640823e-05, + "loss": 0.7913, + "step": 1720 + }, + { + "epoch": 3.94, + "learning_rate": 2.2375889217691137e-05, + "loss": 0.8684, + "step": 1721 + }, + { + "epoch": 3.95, + "learning_rate": 2.2282286528781605e-05, + "loss": 0.7516, + "step": 1722 + }, + { + "epoch": 3.95, + "learning_rate": 2.218885547639754e-05, + "loss": 0.787, + "step": 1723 + }, + { + "epoch": 3.95, + "learning_rate": 2.2095596266877782e-05, + "loss": 0.801, + "step": 1724 + }, + { + "epoch": 3.95, + "learning_rate": 2.2002509106181624e-05, + "loss": 0.8423, + "step": 1725 + }, + { + "epoch": 3.95, + "learning_rate": 2.1909594199888372e-05, + "loss": 0.6984, + "step": 1726 + }, + { + "epoch": 3.96, + "learning_rate": 2.181685175319702e-05, + "loss": 0.7593, + "step": 1727 + }, + { + "epoch": 3.96, + "learning_rate": 2.172428197092561e-05, + "loss": 0.7661, + "step": 1728 + }, + { + "epoch": 3.96, + "learning_rate": 2.1631885057510838e-05, + "loss": 0.8231, + "step": 1729 + }, + { + "epoch": 3.96, + "learning_rate": 2.153966121700769e-05, + "loss": 0.7426, + "step": 1730 + }, + { + "epoch": 3.97, + "learning_rate": 2.1447610653088947e-05, + "loss": 0.7836, + "step": 1731 + }, + { + "epoch": 3.97, + "learning_rate": 2.1355733569044635e-05, + "loss": 0.9467, + "step": 1732 + }, + { + "epoch": 3.97, + "learning_rate": 2.126403016778168e-05, + "loss": 0.8632, + "step": 1733 + }, + { + "epoch": 3.97, + "learning_rate": 2.117250065182349e-05, + "loss": 0.8532, + "step": 1734 + }, + { + "epoch": 3.97, + "learning_rate": 2.1081145223309395e-05, + "loss": 0.769, + "step": 1735 + }, + { + "epoch": 3.98, + "learning_rate": 2.0989964083994252e-05, + "loss": 0.6967, + "step": 1736 + }, + { + "epoch": 3.98, + "learning_rate": 2.08989574352481e-05, + "loss": 0.7737, + "step": 1737 + }, + { + "epoch": 3.98, + "learning_rate": 2.0808125478055505e-05, + "loss": 0.5646, + "step": 1738 + }, + { + "epoch": 3.98, + "learning_rate": 2.0717468413015283e-05, + "loss": 0.7515, + "step": 1739 + }, + { + "epoch": 3.99, + "learning_rate": 2.0626986440340035e-05, + "loss": 0.718, + "step": 1740 + }, + { + "epoch": 3.99, + "learning_rate": 2.053667975985567e-05, + "loss": 0.8102, + "step": 1741 + }, + { + "epoch": 3.99, + "learning_rate": 2.0446548571000935e-05, + "loss": 0.8485, + "step": 1742 + }, + { + "epoch": 3.99, + "learning_rate": 2.035659307282699e-05, + "loss": 0.7086, + "step": 1743 + }, + { + "epoch": 4.0, + "learning_rate": 2.0266813463997092e-05, + "loss": 0.7731, + "step": 1744 + }, + { + "epoch": 4.0, + "learning_rate": 2.0177209942785958e-05, + "loss": 0.5973, + "step": 1745 + }, + { + "epoch": 4.0, + "learning_rate": 2.008778270707944e-05, + "loss": 0.8096, + "step": 1746 + } + ], + "max_steps": 2180, + "num_train_epochs": 5, + "total_flos": 472941381419008.0, + "trial_name": null, + "trial_params": null +}