{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.969227041434456, "eval_steps": 100, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.875e-05, "loss": 2.5575, "step": 1 }, { "epoch": 0.0, "learning_rate": 3.75e-05, "loss": 2.5088, "step": 2 }, { "epoch": 0.01, "learning_rate": 5.625e-05, "loss": 2.5653, "step": 3 }, { "epoch": 0.01, "learning_rate": 7.5e-05, "loss": 2.5625, "step": 4 }, { "epoch": 0.01, "learning_rate": 9.374999999999999e-05, "loss": 2.4798, "step": 5 }, { "epoch": 0.01, "learning_rate": 0.0001125, "loss": 2.5331, "step": 6 }, { "epoch": 0.01, "learning_rate": 0.00013125, "loss": 2.6329, "step": 7 }, { "epoch": 0.02, "learning_rate": 0.00015, "loss": 2.5976, "step": 8 }, { "epoch": 0.02, "learning_rate": 0.00016874999999999998, "loss": 2.6081, "step": 9 }, { "epoch": 0.02, "learning_rate": 0.00018749999999999998, "loss": 2.6396, "step": 10 }, { "epoch": 0.02, "learning_rate": 0.00020624999999999997, "loss": 2.5221, "step": 11 }, { "epoch": 0.02, "learning_rate": 0.000225, "loss": 2.6252, "step": 12 }, { "epoch": 0.03, "learning_rate": 0.00024375, "loss": 2.5929, "step": 13 }, { "epoch": 0.03, "learning_rate": 0.0002625, "loss": 2.5922, "step": 14 }, { "epoch": 0.03, "learning_rate": 0.00028125, "loss": 2.4996, "step": 15 }, { "epoch": 0.03, "learning_rate": 0.0003, "loss": 2.5948, "step": 16 }, { "epoch": 0.03, "learning_rate": 0.000299997027249348, "loss": 2.5224, "step": 17 }, { "epoch": 0.03, "learning_rate": 0.00029998810911522207, "loss": 2.5954, "step": 18 }, { "epoch": 0.04, "learning_rate": 0.0002999732459511074, "loss": 2.5453, "step": 19 }, { "epoch": 0.04, "learning_rate": 0.00029995243834613037, "loss": 2.6684, "step": 20 }, { "epoch": 0.04, "learning_rate": 0.0002999256871250353, "loss": 2.5159, "step": 21 }, { "epoch": 0.04, "learning_rate": 0.0002998929933481515, "loss": 2.6213, "step": 22 }, { "epoch": 0.04, "learning_rate": 0.0002998543583113518, "loss": 2.6888, "step": 23 }, { "epoch": 0.05, "learning_rate": 0.00029980978354600055, "loss": 2.4422, "step": 24 }, { "epoch": 0.05, "learning_rate": 0.00029975927081889316, "loss": 2.5714, "step": 25 }, { "epoch": 0.05, "learning_rate": 0.0002997028221321863, "loss": 2.6195, "step": 26 }, { "epoch": 0.05, "learning_rate": 0.0002996404397233182, "loss": 2.6447, "step": 27 }, { "epoch": 0.05, "learning_rate": 0.00029957212606492007, "loss": 2.5583, "step": 28 }, { "epoch": 0.06, "learning_rate": 0.00029949788386471836, "loss": 2.5892, "step": 29 }, { "epoch": 0.06, "learning_rate": 0.00029941771606542696, "loss": 2.7688, "step": 30 }, { "epoch": 0.06, "learning_rate": 0.00029933162584463096, "loss": 2.6723, "step": 31 }, { "epoch": 0.06, "learning_rate": 0.00029923961661466045, "loss": 2.7775, "step": 32 }, { "epoch": 0.06, "learning_rate": 0.0002991416920224554, "loss": 2.6753, "step": 33 }, { "epoch": 0.07, "learning_rate": 0.0002990378559494212, "loss": 2.7772, "step": 34 }, { "epoch": 0.07, "learning_rate": 0.0002989281125112744, "loss": 2.8511, "step": 35 }, { "epoch": 0.07, "learning_rate": 0.0002988124660578801, "loss": 2.8939, "step": 36 }, { "epoch": 0.07, "learning_rate": 0.0002986909211730792, "loss": 2.8698, "step": 37 }, { "epoch": 0.07, "learning_rate": 0.0002985634826745069, "loss": 3.0635, "step": 38 }, { "epoch": 0.08, "learning_rate": 0.0002984301556134016, "loss": 3.0185, "step": 39 }, { "epoch": 0.08, "learning_rate": 0.0002982909452744047, "loss": 3.0233, "step": 40 }, { "epoch": 0.08, "learning_rate": 0.0002981458571753512, "loss": 3.1451, "step": 41 }, { "epoch": 0.08, "learning_rate": 0.000297994897067051, "loss": 3.2266, "step": 42 }, { "epoch": 0.08, "learning_rate": 0.0002978380709330609, "loss": 3.3143, "step": 43 }, { "epoch": 0.09, "learning_rate": 0.0002976753849894475, "loss": 3.3626, "step": 44 }, { "epoch": 0.09, "learning_rate": 0.00029750684568454063, "loss": 3.3557, "step": 45 }, { "epoch": 0.09, "learning_rate": 0.00029733245969867814, "loss": 3.486, "step": 46 }, { "epoch": 0.09, "learning_rate": 0.0002971522339439407, "loss": 3.4997, "step": 47 }, { "epoch": 0.09, "learning_rate": 0.0002969661755638779, "loss": 3.6059, "step": 48 }, { "epoch": 0.09, "learning_rate": 0.00029677429193322556, "loss": 3.7884, "step": 49 }, { "epoch": 0.1, "learning_rate": 0.00029657659065761267, "loss": 3.9539, "step": 50 }, { "epoch": 0.1, "learning_rate": 0.0002963730795732607, "loss": 4.0443, "step": 51 }, { "epoch": 0.1, "learning_rate": 0.00029616376674667223, "loss": 4.2825, "step": 52 }, { "epoch": 0.1, "learning_rate": 0.000295948660474312, "loss": 4.476, "step": 53 }, { "epoch": 0.1, "learning_rate": 0.0002957277692822774, "loss": 4.6866, "step": 54 }, { "epoch": 0.11, "learning_rate": 0.0002955011019259608, "loss": 5.0977, "step": 55 }, { "epoch": 0.11, "learning_rate": 0.00029526866738970286, "loss": 5.2591, "step": 56 }, { "epoch": 0.11, "learning_rate": 0.0002950304748864356, "loss": 5.6691, "step": 57 }, { "epoch": 0.11, "learning_rate": 0.00029478653385731817, "loss": 5.87, "step": 58 }, { "epoch": 0.11, "learning_rate": 0.0002945368539713617, "loss": 6.4257, "step": 59 }, { "epoch": 0.12, "learning_rate": 0.000294281445125047, "loss": 6.4306, "step": 60 }, { "epoch": 0.12, "learning_rate": 0.0002940203174419314, "loss": 6.9147, "step": 61 }, { "epoch": 0.12, "learning_rate": 0.0002937534812722483, "loss": 7.2439, "step": 62 }, { "epoch": 0.12, "learning_rate": 0.00029348094719249614, "loss": 7.2032, "step": 63 }, { "epoch": 0.12, "learning_rate": 0.00029320272600501983, "loss": 7.6343, "step": 64 }, { "epoch": 0.13, "learning_rate": 0.0002929188287375824, "loss": 7.7117, "step": 65 }, { "epoch": 0.13, "learning_rate": 0.00029262926664292744, "loss": 7.7917, "step": 66 }, { "epoch": 0.13, "learning_rate": 0.00029233405119833387, "loss": 8.1056, "step": 67 }, { "epoch": 0.13, "learning_rate": 0.0002920331941051603, "loss": 8.2865, "step": 68 }, { "epoch": 0.13, "learning_rate": 0.0002917267072883818, "loss": 8.6984, "step": 69 }, { "epoch": 0.14, "learning_rate": 0.0002914146028961167, "loss": 8.9353, "step": 70 }, { "epoch": 0.14, "learning_rate": 0.0002910968932991455, "loss": 8.8638, "step": 71 }, { "epoch": 0.14, "learning_rate": 0.0002907735910904205, "loss": 9.0658, "step": 72 }, { "epoch": 0.14, "learning_rate": 0.0002904447090845662, "loss": 9.4505, "step": 73 }, { "epoch": 0.14, "learning_rate": 0.00029011026031737193, "loss": 9.4891, "step": 74 }, { "epoch": 0.15, "learning_rate": 0.000289770258045275, "loss": 9.4665, "step": 75 }, { "epoch": 0.15, "learning_rate": 0.00028942471574483497, "loss": 10.0014, "step": 76 }, { "epoch": 0.15, "learning_rate": 0.00028907364711219997, "loss": 9.5228, "step": 77 }, { "epoch": 0.15, "learning_rate": 0.00028871706606256326, "loss": 9.6988, "step": 78 }, { "epoch": 0.15, "learning_rate": 0.00028835498672961224, "loss": 9.8295, "step": 79 }, { "epoch": 0.16, "learning_rate": 0.0002879874234649679, "loss": 9.302, "step": 80 }, { "epoch": 0.16, "learning_rate": 0.00028761439083761596, "loss": 8.9286, "step": 81 }, { "epoch": 0.16, "learning_rate": 0.0002872359036333296, "loss": 8.762, "step": 82 }, { "epoch": 0.16, "learning_rate": 0.0002868519768540833, "loss": 8.4371, "step": 83 }, { "epoch": 0.16, "learning_rate": 0.0002864626257174581, "loss": 8.2755, "step": 84 }, { "epoch": 0.16, "learning_rate": 0.00028606786565603875, "loss": 8.1114, "step": 85 }, { "epoch": 0.17, "learning_rate": 0.0002856677123168015, "loss": 7.8714, "step": 86 }, { "epoch": 0.17, "learning_rate": 0.00028526218156049433, "loss": 7.7255, "step": 87 }, { "epoch": 0.17, "learning_rate": 0.0002848512894610083, "loss": 7.6783, "step": 88 }, { "epoch": 0.17, "learning_rate": 0.00028443505230474006, "loss": 7.342, "step": 89 }, { "epoch": 0.17, "learning_rate": 0.0002840134865899468, "loss": 7.2997, "step": 90 }, { "epoch": 0.18, "learning_rate": 0.00028358660902609166, "loss": 7.2757, "step": 91 }, { "epoch": 0.18, "learning_rate": 0.00028315443653318225, "loss": 7.2005, "step": 92 }, { "epoch": 0.18, "learning_rate": 0.00028271698624109933, "loss": 7.0789, "step": 93 }, { "epoch": 0.18, "learning_rate": 0.00028227427548891803, "loss": 7.043, "step": 94 }, { "epoch": 0.18, "learning_rate": 0.0002818263218242208, "loss": 7.1089, "step": 95 }, { "epoch": 0.19, "learning_rate": 0.00028137314300240166, "loss": 6.9611, "step": 96 }, { "epoch": 0.19, "learning_rate": 0.00028091475698596236, "loss": 6.8566, "step": 97 }, { "epoch": 0.19, "learning_rate": 0.0002804511819438006, "loss": 7.0051, "step": 98 }, { "epoch": 0.19, "learning_rate": 0.0002799824362504899, "loss": 6.8284, "step": 99 }, { "epoch": 0.19, "learning_rate": 0.00027950853848555116, "loss": 6.7849, "step": 100 }, { "epoch": 0.19, "eval_loss": 6.93548059463501, "eval_runtime": 0.4634, "eval_samples_per_second": 144.584, "eval_steps_per_second": 10.79, "step": 100 }, { "epoch": 0.2, "learning_rate": 0.0002790295074327162, "loss": 6.9271, "step": 101 }, { "epoch": 0.2, "learning_rate": 0.00027854536207918336, "loss": 6.7996, "step": 102 }, { "epoch": 0.2, "learning_rate": 0.00027805612161486477, "loss": 6.7169, "step": 103 }, { "epoch": 0.2, "learning_rate": 0.00027756180543162597, "loss": 6.7617, "step": 104 }, { "epoch": 0.2, "learning_rate": 0.000277062433122517, "loss": 6.6488, "step": 105 }, { "epoch": 0.21, "learning_rate": 0.000276558024480996, "loss": 6.7194, "step": 106 }, { "epoch": 0.21, "learning_rate": 0.00027604859950014455, "loss": 6.5303, "step": 107 }, { "epoch": 0.21, "learning_rate": 0.0002755341783718752, "loss": 6.6088, "step": 108 }, { "epoch": 0.21, "learning_rate": 0.00027501478148613114, "loss": 6.5223, "step": 109 }, { "epoch": 0.21, "learning_rate": 0.0002744904294300782, "loss": 6.5884, "step": 110 }, { "epoch": 0.22, "learning_rate": 0.00027396114298728865, "loss": 6.4799, "step": 111 }, { "epoch": 0.22, "learning_rate": 0.0002734269431369173, "loss": 6.5031, "step": 112 }, { "epoch": 0.22, "learning_rate": 0.00027288785105287024, "loss": 6.483, "step": 113 }, { "epoch": 0.22, "learning_rate": 0.0002723438881029654, "loss": 6.517, "step": 114 }, { "epoch": 0.22, "learning_rate": 0.00027179507584808554, "loss": 6.4037, "step": 115 }, { "epoch": 0.22, "learning_rate": 0.000271241436041324, "loss": 6.3645, "step": 116 }, { "epoch": 0.23, "learning_rate": 0.00027068299062712195, "loss": 6.386, "step": 117 }, { "epoch": 0.23, "learning_rate": 0.00027011976174039904, "loss": 6.408, "step": 118 }, { "epoch": 0.23, "learning_rate": 0.0002695517717056757, "loss": 6.5254, "step": 119 }, { "epoch": 0.23, "learning_rate": 0.0002689790430361887, "loss": 6.2899, "step": 120 }, { "epoch": 0.23, "learning_rate": 0.0002684015984329983, "loss": 6.3701, "step": 121 }, { "epoch": 0.24, "learning_rate": 0.00026781946078408876, "loss": 6.3711, "step": 122 }, { "epoch": 0.24, "learning_rate": 0.00026723265316346104, "loss": 6.4739, "step": 123 }, { "epoch": 0.24, "learning_rate": 0.00026664119883021843, "loss": 6.4259, "step": 124 }, { "epoch": 0.24, "learning_rate": 0.00026604512122764426, "loss": 6.3035, "step": 125 }, { "epoch": 0.24, "learning_rate": 0.0002654444439822729, "loss": 6.26, "step": 126 }, { "epoch": 0.25, "learning_rate": 0.0002648391909029534, "loss": 6.3777, "step": 127 }, { "epoch": 0.25, "learning_rate": 0.00026422938597990553, "loss": 6.3888, "step": 128 }, { "epoch": 0.25, "learning_rate": 0.0002636150533837691, "loss": 6.1202, "step": 129 }, { "epoch": 0.25, "learning_rate": 0.0002629962174646457, "loss": 6.2316, "step": 130 }, { "epoch": 0.25, "learning_rate": 0.00026237290275113386, "loss": 6.1776, "step": 131 }, { "epoch": 0.26, "learning_rate": 0.00026174513394935646, "loss": 6.1823, "step": 132 }, { "epoch": 0.26, "learning_rate": 0.0002611129359419817, "loss": 6.1094, "step": 133 }, { "epoch": 0.26, "learning_rate": 0.00026047633378723683, "loss": 6.076, "step": 134 }, { "epoch": 0.26, "learning_rate": 0.0002598353527179147, "loss": 6.0366, "step": 135 }, { "epoch": 0.26, "learning_rate": 0.00025919001814037393, "loss": 6.0301, "step": 136 }, { "epoch": 0.27, "learning_rate": 0.00025854035563353166, "loss": 6.1283, "step": 137 }, { "epoch": 0.27, "learning_rate": 0.0002578863909478497, "loss": 5.8362, "step": 138 }, { "epoch": 0.27, "learning_rate": 0.00025722815000431406, "loss": 5.8631, "step": 139 }, { "epoch": 0.27, "learning_rate": 0.0002565656588934073, "loss": 5.8031, "step": 140 }, { "epoch": 0.27, "learning_rate": 0.0002558989438740745, "loss": 5.7674, "step": 141 }, { "epoch": 0.28, "learning_rate": 0.00025522803137268253, "loss": 5.7825, "step": 142 }, { "epoch": 0.28, "learning_rate": 0.0002545529479819723, "loss": 5.8328, "step": 143 }, { "epoch": 0.28, "learning_rate": 0.000253873720460005, "loss": 5.8554, "step": 144 }, { "epoch": 0.28, "learning_rate": 0.0002531903757291015, "loss": 5.8429, "step": 145 }, { "epoch": 0.28, "learning_rate": 0.000252502940874775, "loss": 5.7095, "step": 146 }, { "epoch": 0.28, "learning_rate": 0.00025181144314465764, "loss": 5.5604, "step": 147 }, { "epoch": 0.29, "learning_rate": 0.0002511159099474205, "loss": 5.5383, "step": 148 }, { "epoch": 0.29, "learning_rate": 0.00025041636885168715, "loss": 5.4359, "step": 149 }, { "epoch": 0.29, "learning_rate": 0.0002497128475849408, "loss": 5.2092, "step": 150 }, { "epoch": 0.29, "learning_rate": 0.0002490053740324256, "loss": 5.2694, "step": 151 }, { "epoch": 0.29, "learning_rate": 0.000248293976236041, "loss": 5.1733, "step": 152 }, { "epoch": 0.3, "learning_rate": 0.0002475786823932306, "loss": 4.9904, "step": 153 }, { "epoch": 0.3, "learning_rate": 0.0002468595208558641, "loss": 5.0725, "step": 154 }, { "epoch": 0.3, "learning_rate": 0.00024613652012911403, "loss": 5.1754, "step": 155 }, { "epoch": 0.3, "learning_rate": 0.00024540970887032543, "loss": 4.8482, "step": 156 }, { "epoch": 0.3, "learning_rate": 0.00024467911588788016, "loss": 5.0397, "step": 157 }, { "epoch": 0.31, "learning_rate": 0.00024394477014005514, "loss": 5.0625, "step": 158 }, { "epoch": 0.31, "learning_rate": 0.0002432067007338744, "loss": 5.1172, "step": 159 }, { "epoch": 0.31, "learning_rate": 0.0002424649369239553, "loss": 5.135, "step": 160 }, { "epoch": 0.31, "learning_rate": 0.00024171950811134927, "loss": 4.9951, "step": 161 }, { "epoch": 0.31, "learning_rate": 0.00024097044384237607, "loss": 4.9701, "step": 162 }, { "epoch": 0.32, "learning_rate": 0.000240217773807453, "loss": 4.9604, "step": 163 }, { "epoch": 0.32, "learning_rate": 0.00023946152783991786, "loss": 5.0689, "step": 164 }, { "epoch": 0.32, "learning_rate": 0.0002387017359148466, "loss": 4.9858, "step": 165 }, { "epoch": 0.32, "learning_rate": 0.00023793842814786505, "loss": 5.0683, "step": 166 }, { "epoch": 0.32, "learning_rate": 0.00023717163479395538, "loss": 5.0672, "step": 167 }, { "epoch": 0.33, "learning_rate": 0.00023640138624625684, "loss": 4.9967, "step": 168 }, { "epoch": 0.33, "learning_rate": 0.00023562771303486108, "loss": 5.1249, "step": 169 }, { "epoch": 0.33, "learning_rate": 0.00023485064582560197, "loss": 5.1674, "step": 170 }, { "epoch": 0.33, "learning_rate": 0.00023407021541884025, "loss": 4.9008, "step": 171 }, { "epoch": 0.33, "learning_rate": 0.00023328645274824254, "loss": 5.1389, "step": 172 }, { "epoch": 0.34, "learning_rate": 0.00023249938887955543, "loss": 4.9922, "step": 173 }, { "epoch": 0.34, "learning_rate": 0.00023170905500937396, "loss": 5.0586, "step": 174 }, { "epoch": 0.34, "learning_rate": 0.0002309154824639052, "loss": 5.1404, "step": 175 }, { "epoch": 0.34, "learning_rate": 0.00023011870269772642, "loss": 5.1627, "step": 176 }, { "epoch": 0.34, "learning_rate": 0.00022931874729253856, "loss": 5.0564, "step": 177 }, { "epoch": 0.35, "learning_rate": 0.00022851564795591442, "loss": 5.22, "step": 178 }, { "epoch": 0.35, "learning_rate": 0.0002277094365200416, "loss": 5.0221, "step": 179 }, { "epoch": 0.35, "learning_rate": 0.00022690014494046104, "loss": 5.0044, "step": 180 }, { "epoch": 0.35, "learning_rate": 0.0002260878052948004, "loss": 5.1381, "step": 181 }, { "epoch": 0.35, "learning_rate": 0.00022527244978150248, "loss": 4.931, "step": 182 }, { "epoch": 0.35, "learning_rate": 0.0002244541107185491, "loss": 5.0278, "step": 183 }, { "epoch": 0.36, "learning_rate": 0.00022363282054217994, "loss": 5.0771, "step": 184 }, { "epoch": 0.36, "learning_rate": 0.0002228086118056072, "loss": 5.1335, "step": 185 }, { "epoch": 0.36, "learning_rate": 0.00022198151717772494, "loss": 5.0262, "step": 186 }, { "epoch": 0.36, "learning_rate": 0.00022115156944181442, "loss": 5.1367, "step": 187 }, { "epoch": 0.36, "learning_rate": 0.00022031880149424462, "loss": 5.0092, "step": 188 }, { "epoch": 0.37, "learning_rate": 0.00021948324634316833, "loss": 4.9525, "step": 189 }, { "epoch": 0.37, "learning_rate": 0.00021864493710721384, "loss": 5.1732, "step": 190 }, { "epoch": 0.37, "learning_rate": 0.00021780390701417216, "loss": 4.944, "step": 191 }, { "epoch": 0.37, "learning_rate": 0.00021696018939968, "loss": 4.8821, "step": 192 }, { "epoch": 0.37, "learning_rate": 0.00021611381770589866, "loss": 4.9632, "step": 193 }, { "epoch": 0.38, "learning_rate": 0.00021526482548018814, "loss": 4.8977, "step": 194 }, { "epoch": 0.38, "learning_rate": 0.00021441324637377768, "loss": 4.9198, "step": 195 }, { "epoch": 0.38, "learning_rate": 0.00021355911414043185, "loss": 4.8276, "step": 196 }, { "epoch": 0.38, "learning_rate": 0.00021270246263511273, "loss": 4.8213, "step": 197 }, { "epoch": 0.38, "learning_rate": 0.00021184332581263785, "loss": 4.8753, "step": 198 }, { "epoch": 0.39, "learning_rate": 0.00021098173772633462, "loss": 4.9393, "step": 199 }, { "epoch": 0.39, "learning_rate": 0.00021011773252669027, "loss": 5.0001, "step": 200 }, { "epoch": 0.39, "eval_loss": 5.125741481781006, "eval_runtime": 0.4595, "eval_samples_per_second": 145.823, "eval_steps_per_second": 10.882, "step": 200 }, { "epoch": 0.39, "learning_rate": 0.00020925134445999843, "loss": 5.0326, "step": 201 }, { "epoch": 0.39, "learning_rate": 0.0002083826078670016, "loss": 4.9906, "step": 202 }, { "epoch": 0.39, "learning_rate": 0.00020751155718153012, "loss": 4.9179, "step": 203 }, { "epoch": 0.4, "learning_rate": 0.00020663822692913722, "loss": 5.0525, "step": 204 }, { "epoch": 0.4, "learning_rate": 0.0002057626517257306, "loss": 5.191, "step": 205 }, { "epoch": 0.4, "learning_rate": 0.00020488486627620036, "loss": 5.0969, "step": 206 }, { "epoch": 0.4, "learning_rate": 0.00020400490537304336, "loss": 5.1378, "step": 207 }, { "epoch": 0.4, "learning_rate": 0.0002031228038949843, "loss": 5.0901, "step": 208 }, { "epoch": 0.41, "learning_rate": 0.00020223859680559305, "loss": 5.2614, "step": 209 }, { "epoch": 0.41, "learning_rate": 0.00020135231915189897, "loss": 5.3052, "step": 210 }, { "epoch": 0.41, "learning_rate": 0.00020046400606300177, "loss": 5.2046, "step": 211 }, { "epoch": 0.41, "learning_rate": 0.0001995736927486789, "loss": 5.1249, "step": 212 }, { "epoch": 0.41, "learning_rate": 0.00019868141449799016, "loss": 5.3504, "step": 213 }, { "epoch": 0.41, "learning_rate": 0.00019778720667787894, "loss": 5.296, "step": 214 }, { "epoch": 0.42, "learning_rate": 0.0001968911047317703, "loss": 5.3431, "step": 215 }, { "epoch": 0.42, "learning_rate": 0.00019599314417816617, "loss": 5.3026, "step": 216 }, { "epoch": 0.42, "learning_rate": 0.00019509336060923748, "loss": 5.1208, "step": 217 }, { "epoch": 0.42, "learning_rate": 0.00019419178968941344, "loss": 5.2991, "step": 218 }, { "epoch": 0.42, "learning_rate": 0.00019328846715396797, "loss": 5.4355, "step": 219 }, { "epoch": 0.43, "learning_rate": 0.00019238342880760305, "loss": 5.3463, "step": 220 }, { "epoch": 0.43, "learning_rate": 0.00019147671052302992, "loss": 5.381, "step": 221 }, { "epoch": 0.43, "learning_rate": 0.00019056834823954683, "loss": 5.2602, "step": 222 }, { "epoch": 0.43, "learning_rate": 0.00018965837796161464, "loss": 5.6047, "step": 223 }, { "epoch": 0.43, "learning_rate": 0.00018874683575742995, "loss": 5.4004, "step": 224 }, { "epoch": 0.44, "learning_rate": 0.0001878337577574951, "loss": 5.3746, "step": 225 }, { "epoch": 0.44, "learning_rate": 0.00018691918015318644, "loss": 5.5042, "step": 226 }, { "epoch": 0.44, "learning_rate": 0.0001860031391953195, "loss": 5.6208, "step": 227 }, { "epoch": 0.44, "learning_rate": 0.00018508567119271237, "loss": 5.6673, "step": 228 }, { "epoch": 0.44, "learning_rate": 0.00018416681251074633, "loss": 5.6802, "step": 229 }, { "epoch": 0.45, "learning_rate": 0.0001832465995699248, "loss": 5.6571, "step": 230 }, { "epoch": 0.45, "learning_rate": 0.00018232506884442932, "loss": 5.5854, "step": 231 }, { "epoch": 0.45, "learning_rate": 0.00018140225686067403, "loss": 5.6822, "step": 232 }, { "epoch": 0.45, "learning_rate": 0.00018047820019585805, "loss": 5.6988, "step": 233 }, { "epoch": 0.45, "learning_rate": 0.00017955293547651535, "loss": 5.6732, "step": 234 }, { "epoch": 0.46, "learning_rate": 0.00017862649937706323, "loss": 5.628, "step": 235 }, { "epoch": 0.46, "learning_rate": 0.00017769892861834867, "loss": 5.6503, "step": 236 }, { "epoch": 0.46, "learning_rate": 0.00017677025996619265, "loss": 5.7701, "step": 237 }, { "epoch": 0.46, "learning_rate": 0.000175840530229933, "loss": 5.9086, "step": 238 }, { "epoch": 0.46, "learning_rate": 0.00017490977626096558, "loss": 5.8063, "step": 239 }, { "epoch": 0.47, "learning_rate": 0.00017397803495128322, "loss": 5.7099, "step": 240 }, { "epoch": 0.47, "learning_rate": 0.0001730453432320137, "loss": 5.8545, "step": 241 }, { "epoch": 0.47, "learning_rate": 0.000172111738071956, "loss": 5.7577, "step": 242 }, { "epoch": 0.47, "learning_rate": 0.00017117725647611468, "loss": 5.7155, "step": 243 }, { "epoch": 0.47, "learning_rate": 0.0001702419354842334, "loss": 5.614, "step": 244 }, { "epoch": 0.47, "learning_rate": 0.0001693058121693267, "loss": 5.8233, "step": 245 }, { "epoch": 0.48, "learning_rate": 0.00016836892363621052, "loss": 5.8977, "step": 246 }, { "epoch": 0.48, "learning_rate": 0.00016743130702003147, "loss": 5.7225, "step": 247 }, { "epoch": 0.48, "learning_rate": 0.00016649299948479494, "loss": 5.6069, "step": 248 }, { "epoch": 0.48, "learning_rate": 0.00016555403822189214, "loss": 5.7829, "step": 249 }, { "epoch": 0.48, "learning_rate": 0.00016461446044862584, "loss": 5.765, "step": 250 }, { "epoch": 0.49, "learning_rate": 0.00016367430340673514, "loss": 5.7193, "step": 251 }, { "epoch": 0.49, "learning_rate": 0.0001627336043609196, "loss": 5.5909, "step": 252 }, { "epoch": 0.49, "learning_rate": 0.00016179240059736183, "loss": 5.7002, "step": 253 }, { "epoch": 0.49, "learning_rate": 0.00016085072942224985, "loss": 5.7926, "step": 254 }, { "epoch": 0.49, "learning_rate": 0.00015990862816029836, "loss": 5.763, "step": 255 }, { "epoch": 0.5, "learning_rate": 0.0001589661341532692, "loss": 5.678, "step": 256 }, { "epoch": 0.5, "learning_rate": 0.00015802328475849142, "loss": 5.6742, "step": 257 }, { "epoch": 0.5, "learning_rate": 0.00015708011734738033, "loss": 5.7914, "step": 258 }, { "epoch": 0.5, "learning_rate": 0.00015613666930395644, "loss": 5.6897, "step": 259 }, { "epoch": 0.5, "learning_rate": 0.00015519297802336354, "loss": 5.7609, "step": 260 }, { "epoch": 0.51, "learning_rate": 0.0001542490809103866, "loss": 5.5896, "step": 261 }, { "epoch": 0.51, "learning_rate": 0.00015330501537796906, "loss": 5.7305, "step": 262 }, { "epoch": 0.51, "learning_rate": 0.00015236081884572984, "loss": 5.6916, "step": 263 }, { "epoch": 0.51, "learning_rate": 0.00015141652873848054, "loss": 5.8479, "step": 264 }, { "epoch": 0.51, "learning_rate": 0.00015047218248474148, "loss": 5.6922, "step": 265 }, { "epoch": 0.52, "learning_rate": 0.00014952781751525855, "loss": 5.7687, "step": 266 }, { "epoch": 0.52, "learning_rate": 0.00014858347126151948, "loss": 5.939, "step": 267 }, { "epoch": 0.52, "learning_rate": 0.00014763918115427013, "loss": 5.7656, "step": 268 }, { "epoch": 0.52, "learning_rate": 0.000146694984622031, "loss": 5.6402, "step": 269 }, { "epoch": 0.52, "learning_rate": 0.0001457509190896134, "loss": 5.745, "step": 270 }, { "epoch": 0.53, "learning_rate": 0.0001448070219766365, "loss": 5.7993, "step": 271 }, { "epoch": 0.53, "learning_rate": 0.0001438633306960436, "loss": 5.9042, "step": 272 }, { "epoch": 0.53, "learning_rate": 0.0001429198826526197, "loss": 5.9073, "step": 273 }, { "epoch": 0.53, "learning_rate": 0.0001419767152415086, "loss": 5.9313, "step": 274 }, { "epoch": 0.53, "learning_rate": 0.00014103386584673078, "loss": 5.9989, "step": 275 }, { "epoch": 0.54, "learning_rate": 0.00014009137183970167, "loss": 6.0108, "step": 276 }, { "epoch": 0.54, "learning_rate": 0.00013914927057775018, "loss": 5.9416, "step": 277 }, { "epoch": 0.54, "learning_rate": 0.0001382075994026382, "loss": 5.8839, "step": 278 }, { "epoch": 0.54, "learning_rate": 0.0001372663956390804, "loss": 6.0596, "step": 279 }, { "epoch": 0.54, "learning_rate": 0.00013632569659326486, "loss": 6.1034, "step": 280 }, { "epoch": 0.54, "learning_rate": 0.00013538553955137414, "loss": 6.1142, "step": 281 }, { "epoch": 0.55, "learning_rate": 0.00013444596177810783, "loss": 6.178, "step": 282 }, { "epoch": 0.55, "learning_rate": 0.00013350700051520506, "loss": 6.0406, "step": 283 }, { "epoch": 0.55, "learning_rate": 0.00013256869297996853, "loss": 6.4111, "step": 284 }, { "epoch": 0.55, "learning_rate": 0.00013163107636378945, "loss": 6.1454, "step": 285 }, { "epoch": 0.55, "learning_rate": 0.00013069418783067326, "loss": 6.4537, "step": 286 }, { "epoch": 0.56, "learning_rate": 0.0001297580645157666, "loss": 6.4003, "step": 287 }, { "epoch": 0.56, "learning_rate": 0.0001288227435238853, "loss": 6.3845, "step": 288 }, { "epoch": 0.56, "learning_rate": 0.000127888261928044, "loss": 6.4016, "step": 289 }, { "epoch": 0.56, "learning_rate": 0.00012695465676798627, "loss": 6.5177, "step": 290 }, { "epoch": 0.56, "learning_rate": 0.00012602196504871678, "loss": 6.4294, "step": 291 }, { "epoch": 0.57, "learning_rate": 0.00012509022373903442, "loss": 6.6357, "step": 292 }, { "epoch": 0.57, "learning_rate": 0.00012415946977006696, "loss": 6.7642, "step": 293 }, { "epoch": 0.57, "learning_rate": 0.00012322974003380735, "loss": 6.9096, "step": 294 }, { "epoch": 0.57, "learning_rate": 0.00012230107138165133, "loss": 6.7728, "step": 295 }, { "epoch": 0.57, "learning_rate": 0.00012137350062293677, "loss": 6.9269, "step": 296 }, { "epoch": 0.58, "learning_rate": 0.00012044706452348465, "loss": 6.9235, "step": 297 }, { "epoch": 0.58, "learning_rate": 0.00011952179980414195, "loss": 6.9825, "step": 298 }, { "epoch": 0.58, "learning_rate": 0.00011859774313932597, "loss": 7.2581, "step": 299 }, { "epoch": 0.58, "learning_rate": 0.0001176749311555707, "loss": 7.0916, "step": 300 }, { "epoch": 0.58, "eval_loss": 7.345704078674316, "eval_runtime": 0.4561, "eval_samples_per_second": 146.905, "eval_steps_per_second": 10.963, "step": 300 }, { "epoch": 0.58, "learning_rate": 0.00011675340043007519, "loss": 7.2218, "step": 301 }, { "epoch": 0.59, "learning_rate": 0.00011583318748925367, "loss": 7.1207, "step": 302 }, { "epoch": 0.59, "learning_rate": 0.00011491432880728765, "loss": 7.3706, "step": 303 }, { "epoch": 0.59, "learning_rate": 0.0001139968608046805, "loss": 7.3684, "step": 304 }, { "epoch": 0.59, "learning_rate": 0.00011308081984681356, "loss": 7.2527, "step": 305 }, { "epoch": 0.59, "learning_rate": 0.00011216624224250487, "loss": 7.281, "step": 306 }, { "epoch": 0.6, "learning_rate": 0.00011125316424257002, "loss": 7.3978, "step": 307 }, { "epoch": 0.6, "learning_rate": 0.00011034162203838534, "loss": 7.3763, "step": 308 }, { "epoch": 0.6, "learning_rate": 0.00010943165176045317, "loss": 7.3376, "step": 309 }, { "epoch": 0.6, "learning_rate": 0.00010852328947697004, "loss": 7.2407, "step": 310 }, { "epoch": 0.6, "learning_rate": 0.0001076165711923969, "loss": 7.2541, "step": 311 }, { "epoch": 0.6, "learning_rate": 0.00010671153284603203, "loss": 7.4064, "step": 312 }, { "epoch": 0.61, "learning_rate": 0.0001058082103105865, "loss": 7.2814, "step": 313 }, { "epoch": 0.61, "learning_rate": 0.0001049066393907625, "loss": 7.2034, "step": 314 }, { "epoch": 0.61, "learning_rate": 0.00010400685582183382, "loss": 7.2216, "step": 315 }, { "epoch": 0.61, "learning_rate": 0.00010310889526822966, "loss": 7.2839, "step": 316 }, { "epoch": 0.61, "learning_rate": 0.00010221279332212101, "loss": 7.151, "step": 317 }, { "epoch": 0.62, "learning_rate": 0.00010131858550200983, "loss": 7.0071, "step": 318 }, { "epoch": 0.62, "learning_rate": 0.00010042630725132104, "loss": 7.2225, "step": 319 }, { "epoch": 0.62, "learning_rate": 9.953599393699819e-05, "loss": 7.0026, "step": 320 }, { "epoch": 0.62, "learning_rate": 9.8647680848101e-05, "loss": 6.9343, "step": 321 }, { "epoch": 0.62, "learning_rate": 9.776140319440695e-05, "loss": 6.8403, "step": 322 }, { "epoch": 0.63, "learning_rate": 9.687719610501572e-05, "loss": 6.8452, "step": 323 }, { "epoch": 0.63, "learning_rate": 9.599509462695665e-05, "loss": 6.7219, "step": 324 }, { "epoch": 0.63, "learning_rate": 9.511513372379965e-05, "loss": 6.6855, "step": 325 }, { "epoch": 0.63, "learning_rate": 9.423734827426941e-05, "loss": 6.6364, "step": 326 }, { "epoch": 0.63, "learning_rate": 9.336177307086277e-05, "loss": 6.4916, "step": 327 }, { "epoch": 0.64, "learning_rate": 9.24884428184699e-05, "loss": 6.5035, "step": 328 }, { "epoch": 0.64, "learning_rate": 9.161739213299841e-05, "loss": 6.4218, "step": 329 }, { "epoch": 0.64, "learning_rate": 9.074865554000161e-05, "loss": 6.3434, "step": 330 }, { "epoch": 0.64, "learning_rate": 8.988226747330973e-05, "loss": 6.2743, "step": 331 }, { "epoch": 0.64, "learning_rate": 8.90182622736654e-05, "loss": 6.4393, "step": 332 }, { "epoch": 0.65, "learning_rate": 8.815667418736217e-05, "loss": 6.3851, "step": 333 }, { "epoch": 0.65, "learning_rate": 8.729753736488734e-05, "loss": 6.3961, "step": 334 }, { "epoch": 0.65, "learning_rate": 8.644088585956816e-05, "loss": 6.4392, "step": 335 }, { "epoch": 0.65, "learning_rate": 8.558675362622229e-05, "loss": 6.3019, "step": 336 }, { "epoch": 0.65, "learning_rate": 8.473517451981186e-05, "loss": 6.4448, "step": 337 }, { "epoch": 0.66, "learning_rate": 8.38861822941013e-05, "loss": 6.3658, "step": 338 }, { "epoch": 0.66, "learning_rate": 8.303981060031993e-05, "loss": 6.3605, "step": 339 }, { "epoch": 0.66, "learning_rate": 8.219609298582788e-05, "loss": 6.374, "step": 340 }, { "epoch": 0.66, "learning_rate": 8.135506289278618e-05, "loss": 6.4577, "step": 341 }, { "epoch": 0.66, "learning_rate": 8.051675365683163e-05, "loss": 6.3441, "step": 342 }, { "epoch": 0.66, "learning_rate": 7.968119850575538e-05, "loss": 6.2837, "step": 343 }, { "epoch": 0.67, "learning_rate": 7.884843055818558e-05, "loss": 6.4202, "step": 344 }, { "epoch": 0.67, "learning_rate": 7.801848282227504e-05, "loss": 6.3048, "step": 345 }, { "epoch": 0.67, "learning_rate": 7.719138819439281e-05, "loss": 6.2596, "step": 346 }, { "epoch": 0.67, "learning_rate": 7.636717945782003e-05, "loss": 6.396, "step": 347 }, { "epoch": 0.67, "learning_rate": 7.554588928145088e-05, "loss": 6.4539, "step": 348 }, { "epoch": 0.68, "learning_rate": 7.47275502184975e-05, "loss": 6.391, "step": 349 }, { "epoch": 0.68, "learning_rate": 7.391219470519957e-05, "loss": 6.3495, "step": 350 }, { "epoch": 0.68, "learning_rate": 7.309985505953892e-05, "loss": 6.5338, "step": 351 }, { "epoch": 0.68, "learning_rate": 7.229056347995841e-05, "loss": 6.5981, "step": 352 }, { "epoch": 0.68, "learning_rate": 7.148435204408557e-05, "loss": 6.4835, "step": 353 }, { "epoch": 0.69, "learning_rate": 7.068125270746138e-05, "loss": 6.5864, "step": 354 }, { "epoch": 0.69, "learning_rate": 6.98812973022736e-05, "loss": 6.6236, "step": 355 }, { "epoch": 0.69, "learning_rate": 6.908451753609481e-05, "loss": 6.6355, "step": 356 }, { "epoch": 0.69, "learning_rate": 6.829094499062603e-05, "loss": 6.7108, "step": 357 }, { "epoch": 0.69, "learning_rate": 6.750061112044455e-05, "loss": 6.6534, "step": 358 }, { "epoch": 0.7, "learning_rate": 6.671354725175742e-05, "loss": 6.7027, "step": 359 }, { "epoch": 0.7, "learning_rate": 6.592978458115978e-05, "loss": 6.7136, "step": 360 }, { "epoch": 0.7, "learning_rate": 6.514935417439802e-05, "loss": 6.6459, "step": 361 }, { "epoch": 0.7, "learning_rate": 6.43722869651389e-05, "loss": 6.7461, "step": 362 }, { "epoch": 0.7, "learning_rate": 6.359861375374315e-05, "loss": 6.697, "step": 363 }, { "epoch": 0.71, "learning_rate": 6.28283652060446e-05, "loss": 6.8183, "step": 364 }, { "epoch": 0.71, "learning_rate": 6.206157185213493e-05, "loss": 6.8525, "step": 365 }, { "epoch": 0.71, "learning_rate": 6.12982640851534e-05, "loss": 6.9362, "step": 366 }, { "epoch": 0.71, "learning_rate": 6.0538472160082105e-05, "loss": 6.7913, "step": 367 }, { "epoch": 0.71, "learning_rate": 5.978222619254696e-05, "loss": 6.6714, "step": 368 }, { "epoch": 0.72, "learning_rate": 5.902955615762392e-05, "loss": 6.8751, "step": 369 }, { "epoch": 0.72, "learning_rate": 5.828049188865071e-05, "loss": 6.718, "step": 370 }, { "epoch": 0.72, "learning_rate": 5.753506307604464e-05, "loss": 6.8452, "step": 371 }, { "epoch": 0.72, "learning_rate": 5.67932992661256e-05, "loss": 6.8406, "step": 372 }, { "epoch": 0.72, "learning_rate": 5.605522985994481e-05, "loss": 6.8587, "step": 373 }, { "epoch": 0.72, "learning_rate": 5.5320884112119776e-05, "loss": 6.9518, "step": 374 }, { "epoch": 0.73, "learning_rate": 5.4590291129674564e-05, "loss": 7.0429, "step": 375 }, { "epoch": 0.73, "learning_rate": 5.386347987088592e-05, "loss": 6.7883, "step": 376 }, { "epoch": 0.73, "learning_rate": 5.314047914413587e-05, "loss": 6.7102, "step": 377 }, { "epoch": 0.73, "learning_rate": 5.2421317606769455e-05, "loss": 6.678, "step": 378 }, { "epoch": 0.73, "learning_rate": 5.1706023763959004e-05, "loss": 6.9334, "step": 379 }, { "epoch": 0.74, "learning_rate": 5.099462596757441e-05, "loss": 6.817, "step": 380 }, { "epoch": 0.74, "learning_rate": 5.0287152415059226e-05, "loss": 6.8474, "step": 381 }, { "epoch": 0.74, "learning_rate": 4.958363114831286e-05, "loss": 6.7632, "step": 382 }, { "epoch": 0.74, "learning_rate": 4.888409005257946e-05, "loss": 6.6268, "step": 383 }, { "epoch": 0.74, "learning_rate": 4.8188556855342355e-05, "loss": 6.5044, "step": 384 }, { "epoch": 0.75, "learning_rate": 4.749705912522501e-05, "loss": 6.7882, "step": 385 }, { "epoch": 0.75, "learning_rate": 4.680962427089849e-05, "loss": 6.6334, "step": 386 }, { "epoch": 0.75, "learning_rate": 4.6126279539995005e-05, "loss": 6.8479, "step": 387 }, { "epoch": 0.75, "learning_rate": 4.544705201802772e-05, "loss": 6.6996, "step": 388 }, { "epoch": 0.75, "learning_rate": 4.477196862731747e-05, "loss": 6.6673, "step": 389 }, { "epoch": 0.76, "learning_rate": 4.41010561259255e-05, "loss": 6.6098, "step": 390 }, { "epoch": 0.76, "learning_rate": 4.343434110659271e-05, "loss": 6.8179, "step": 391 }, { "epoch": 0.76, "learning_rate": 4.277184999568594e-05, "loss": 6.7396, "step": 392 }, { "epoch": 0.76, "learning_rate": 4.2113609052150335e-05, "loss": 6.5957, "step": 393 }, { "epoch": 0.76, "learning_rate": 4.145964436646837e-05, "loss": 6.5761, "step": 394 }, { "epoch": 0.77, "learning_rate": 4.080998185962606e-05, "loss": 6.6821, "step": 395 }, { "epoch": 0.77, "learning_rate": 4.0164647282085296e-05, "loss": 6.6868, "step": 396 }, { "epoch": 0.77, "learning_rate": 3.9523666212763166e-05, "loss": 6.7086, "step": 397 }, { "epoch": 0.77, "learning_rate": 3.8887064058018244e-05, "loss": 6.6119, "step": 398 }, { "epoch": 0.77, "learning_rate": 3.825486605064354e-05, "loss": 6.5869, "step": 399 }, { "epoch": 0.78, "learning_rate": 3.7627097248866136e-05, "loss": 6.8201, "step": 400 }, { "epoch": 0.78, "eval_loss": 6.829553127288818, "eval_runtime": 0.4568, "eval_samples_per_second": 146.658, "eval_steps_per_second": 10.945, "step": 400 }, { "epoch": 0.78, "learning_rate": 3.700378253535427e-05, "loss": 6.584, "step": 401 }, { "epoch": 0.78, "learning_rate": 3.6384946616230933e-05, "loss": 6.6932, "step": 402 }, { "epoch": 0.78, "learning_rate": 3.577061402009446e-05, "loss": 6.6921, "step": 403 }, { "epoch": 0.78, "learning_rate": 3.5160809097046586e-05, "loss": 6.6652, "step": 404 }, { "epoch": 0.79, "learning_rate": 3.4555556017727096e-05, "loss": 6.5146, "step": 405 }, { "epoch": 0.79, "learning_rate": 3.395487877235575e-05, "loss": 6.5751, "step": 406 }, { "epoch": 0.79, "learning_rate": 3.335880116978154e-05, "loss": 6.4599, "step": 407 }, { "epoch": 0.79, "learning_rate": 3.276734683653894e-05, "loss": 6.5829, "step": 408 }, { "epoch": 0.79, "learning_rate": 3.2180539215911254e-05, "loss": 6.7763, "step": 409 }, { "epoch": 0.79, "learning_rate": 3.15984015670017e-05, "loss": 6.6207, "step": 410 }, { "epoch": 0.8, "learning_rate": 3.1020956963811285e-05, "loss": 6.6804, "step": 411 }, { "epoch": 0.8, "learning_rate": 3.0448228294324255e-05, "loss": 6.754, "step": 412 }, { "epoch": 0.8, "learning_rate": 2.988023825960095e-05, "loss": 6.6705, "step": 413 }, { "epoch": 0.8, "learning_rate": 2.9317009372878037e-05, "loss": 6.6933, "step": 414 }, { "epoch": 0.8, "learning_rate": 2.8758563958675974e-05, "loss": 6.7342, "step": 415 }, { "epoch": 0.81, "learning_rate": 2.8204924151914428e-05, "loss": 6.6217, "step": 416 }, { "epoch": 0.81, "learning_rate": 2.765611189703461e-05, "loss": 6.4585, "step": 417 }, { "epoch": 0.81, "learning_rate": 2.7112148947129736e-05, "loss": 6.5491, "step": 418 }, { "epoch": 0.81, "learning_rate": 2.6573056863082698e-05, "loss": 6.5655, "step": 419 }, { "epoch": 0.81, "learning_rate": 2.603885701271133e-05, "loss": 6.7607, "step": 420 }, { "epoch": 0.82, "learning_rate": 2.550957056992174e-05, "loss": 6.6543, "step": 421 }, { "epoch": 0.82, "learning_rate": 2.498521851386886e-05, "loss": 6.5007, "step": 422 }, { "epoch": 0.82, "learning_rate": 2.4465821628124837e-05, "loss": 6.6124, "step": 423 }, { "epoch": 0.82, "learning_rate": 2.3951400499855446e-05, "loss": 6.5528, "step": 424 }, { "epoch": 0.82, "learning_rate": 2.344197551900398e-05, "loss": 6.7489, "step": 425 }, { "epoch": 0.83, "learning_rate": 2.293756687748297e-05, "loss": 6.6461, "step": 426 }, { "epoch": 0.83, "learning_rate": 2.2438194568374007e-05, "loss": 6.6335, "step": 427 }, { "epoch": 0.83, "learning_rate": 2.1943878385135227e-05, "loss": 6.5654, "step": 428 }, { "epoch": 0.83, "learning_rate": 2.1454637920816646e-05, "loss": 6.5755, "step": 429 }, { "epoch": 0.83, "learning_rate": 2.0970492567283765e-05, "loss": 6.6072, "step": 430 }, { "epoch": 0.84, "learning_rate": 2.0491461514448803e-05, "loss": 6.578, "step": 431 }, { "epoch": 0.84, "learning_rate": 2.001756374951006e-05, "loss": 6.6676, "step": 432 }, { "epoch": 0.84, "learning_rate": 1.9548818056199377e-05, "loss": 6.4739, "step": 433 }, { "epoch": 0.84, "learning_rate": 1.908524301403764e-05, "loss": 6.6598, "step": 434 }, { "epoch": 0.84, "learning_rate": 1.8626856997598355e-05, "loss": 6.5202, "step": 435 }, { "epoch": 0.85, "learning_rate": 1.817367817577915e-05, "loss": 6.5515, "step": 436 }, { "epoch": 0.85, "learning_rate": 1.7725724511081924e-05, "loss": 6.4875, "step": 437 }, { "epoch": 0.85, "learning_rate": 1.72830137589007e-05, "loss": 6.6304, "step": 438 }, { "epoch": 0.85, "learning_rate": 1.6845563466817745e-05, "loss": 6.6853, "step": 439 }, { "epoch": 0.85, "learning_rate": 1.6413390973908342e-05, "loss": 6.6227, "step": 440 }, { "epoch": 0.85, "learning_rate": 1.5986513410053247e-05, "loss": 6.8159, "step": 441 }, { "epoch": 0.86, "learning_rate": 1.556494769525991e-05, "loss": 6.578, "step": 442 }, { "epoch": 0.86, "learning_rate": 1.5148710538991727e-05, "loss": 6.5347, "step": 443 }, { "epoch": 0.86, "learning_rate": 1.4737818439505656e-05, "loss": 6.8032, "step": 444 }, { "epoch": 0.86, "learning_rate": 1.433228768319853e-05, "loss": 6.6454, "step": 445 }, { "epoch": 0.86, "learning_rate": 1.3932134343961265e-05, "loss": 6.5615, "step": 446 }, { "epoch": 0.87, "learning_rate": 1.3537374282541847e-05, "loss": 6.5985, "step": 447 }, { "epoch": 0.87, "learning_rate": 1.314802314591667e-05, "loss": 6.5956, "step": 448 }, { "epoch": 0.87, "learning_rate": 1.276409636667038e-05, "loss": 6.6768, "step": 449 }, { "epoch": 0.87, "learning_rate": 1.2385609162384019e-05, "loss": 6.5845, "step": 450 }, { "epoch": 0.87, "learning_rate": 1.2012576535032087e-05, "loss": 6.513, "step": 451 }, { "epoch": 0.88, "learning_rate": 1.1645013270387738e-05, "loss": 6.6015, "step": 452 }, { "epoch": 0.88, "learning_rate": 1.1282933937436721e-05, "loss": 6.5111, "step": 453 }, { "epoch": 0.88, "learning_rate": 1.0926352887800033e-05, "loss": 6.5472, "step": 454 }, { "epoch": 0.88, "learning_rate": 1.0575284255164989e-05, "loss": 6.7356, "step": 455 }, { "epoch": 0.88, "learning_rate": 1.022974195472499e-05, "loss": 6.7244, "step": 456 }, { "epoch": 0.89, "learning_rate": 9.889739682628034e-06, "loss": 6.7113, "step": 457 }, { "epoch": 0.89, "learning_rate": 9.555290915433821e-06, "loss": 6.6869, "step": 458 }, { "epoch": 0.89, "learning_rate": 9.226408909579519e-06, "loss": 6.7583, "step": 459 }, { "epoch": 0.89, "learning_rate": 8.903106700854423e-06, "loss": 6.687, "step": 460 }, { "epoch": 0.89, "learning_rate": 8.585397103883296e-06, "loss": 6.6874, "step": 461 }, { "epoch": 0.9, "learning_rate": 8.2732927116182e-06, "loss": 6.5792, "step": 462 }, { "epoch": 0.9, "learning_rate": 7.966805894839656e-06, "loss": 6.7618, "step": 463 }, { "epoch": 0.9, "learning_rate": 7.665948801666139e-06, "loss": 6.9631, "step": 464 }, { "epoch": 0.9, "learning_rate": 7.370733357072539e-06, "loss": 6.6466, "step": 465 }, { "epoch": 0.9, "learning_rate": 7.081171262417606e-06, "loss": 6.5379, "step": 466 }, { "epoch": 0.91, "learning_rate": 6.797273994980118e-06, "loss": 6.6883, "step": 467 }, { "epoch": 0.91, "learning_rate": 6.5190528075038436e-06, "loss": 6.6152, "step": 468 }, { "epoch": 0.91, "learning_rate": 6.246518727751704e-06, "loss": 6.5607, "step": 469 }, { "epoch": 0.91, "learning_rate": 5.979682558068566e-06, "loss": 6.9042, "step": 470 }, { "epoch": 0.91, "learning_rate": 5.718554874952991e-06, "loss": 6.5923, "step": 471 }, { "epoch": 0.91, "learning_rate": 5.463146028638249e-06, "loss": 6.5547, "step": 472 }, { "epoch": 0.92, "learning_rate": 5.213466142681832e-06, "loss": 6.5457, "step": 473 }, { "epoch": 0.92, "learning_rate": 4.969525113564327e-06, "loss": 6.8205, "step": 474 }, { "epoch": 0.92, "learning_rate": 4.7313326102971225e-06, "loss": 6.5781, "step": 475 }, { "epoch": 0.92, "learning_rate": 4.498898074039126e-06, "loss": 6.6173, "step": 476 }, { "epoch": 0.92, "learning_rate": 4.272230717722602e-06, "loss": 6.6039, "step": 477 }, { "epoch": 0.93, "learning_rate": 4.051339525687991e-06, "loss": 6.7091, "step": 478 }, { "epoch": 0.93, "learning_rate": 3.83623325332772e-06, "loss": 6.7178, "step": 479 }, { "epoch": 0.93, "learning_rate": 3.6269204267392825e-06, "loss": 6.6408, "step": 480 }, { "epoch": 0.93, "learning_rate": 3.4234093423872786e-06, "loss": 6.548, "step": 481 }, { "epoch": 0.93, "learning_rate": 3.2257080667744407e-06, "loss": 6.8339, "step": 482 }, { "epoch": 0.94, "learning_rate": 3.0338244361220564e-06, "loss": 6.6542, "step": 483 }, { "epoch": 0.94, "learning_rate": 2.8477660560593196e-06, "loss": 6.4732, "step": 484 }, { "epoch": 0.94, "learning_rate": 2.6675403013218355e-06, "loss": 6.7991, "step": 485 }, { "epoch": 0.94, "learning_rate": 2.4931543154593223e-06, "loss": 6.7538, "step": 486 }, { "epoch": 0.94, "learning_rate": 2.3246150105525054e-06, "loss": 6.5082, "step": 487 }, { "epoch": 0.95, "learning_rate": 2.161929066939083e-06, "loss": 6.8893, "step": 488 }, { "epoch": 0.95, "learning_rate": 2.005102932948993e-06, "loss": 6.639, "step": 489 }, { "epoch": 0.95, "learning_rate": 1.8541428246487966e-06, "loss": 6.6773, "step": 490 }, { "epoch": 0.95, "learning_rate": 1.7090547255952935e-06, "loss": 6.6149, "step": 491 }, { "epoch": 0.95, "learning_rate": 1.5698443865983789e-06, "loss": 6.6495, "step": 492 }, { "epoch": 0.96, "learning_rate": 1.4365173254930585e-06, "loss": 6.7779, "step": 493 }, { "epoch": 0.96, "learning_rate": 1.309078826920773e-06, "loss": 6.7067, "step": 494 }, { "epoch": 0.96, "learning_rate": 1.1875339421199004e-06, "loss": 6.6096, "step": 495 }, { "epoch": 0.96, "learning_rate": 1.0718874887256146e-06, "loss": 6.4771, "step": 496 }, { "epoch": 0.96, "learning_rate": 9.621440505788225e-07, "loss": 6.7159, "step": 497 }, { "epoch": 0.97, "learning_rate": 8.583079775445423e-07, "loss": 6.5673, "step": 498 }, { "epoch": 0.97, "learning_rate": 7.603833853395247e-07, "loss": 6.6136, "step": 499 }, { "epoch": 0.97, "learning_rate": 6.68374155369017e-07, "loss": 6.6013, "step": 500 }, { "epoch": 0.97, "eval_loss": 6.8933210372924805, "eval_runtime": 0.4586, "eval_samples_per_second": 146.102, "eval_steps_per_second": 10.903, "step": 500 } ], "logging_steps": 1, "max_steps": 515, "num_train_epochs": 1, "save_steps": 500, "total_flos": 4108329021603840.0, "trial_name": null, "trial_params": null }