{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 8017, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00012473493825620557, "grad_norm": 0.0, "learning_rate": 8.298755186721993e-08, "loss": 6.5433, "step": 1 }, { "epoch": 0.00024946987651241115, "grad_norm": 0.0, "learning_rate": 1.6597510373443985e-07, "loss": 6.3101, "step": 2 }, { "epoch": 0.00037420481476861667, "grad_norm": 0.0, "learning_rate": 2.4896265560165975e-07, "loss": 6.4525, "step": 3 }, { "epoch": 0.0004989397530248223, "grad_norm": 0.0, "learning_rate": 3.319502074688797e-07, "loss": 6.3739, "step": 4 }, { "epoch": 0.0006236746912810278, "grad_norm": 0.0, "learning_rate": 4.149377593360996e-07, "loss": 6.3881, "step": 5 }, { "epoch": 0.0007484096295372333, "grad_norm": 0.0, "learning_rate": 4.979253112033195e-07, "loss": 6.5473, "step": 6 }, { "epoch": 0.000873144567793439, "grad_norm": 0.0, "learning_rate": 5.809128630705395e-07, "loss": 6.6018, "step": 7 }, { "epoch": 0.0009978795060496446, "grad_norm": 0.0, "learning_rate": 6.639004149377594e-07, "loss": 6.4326, "step": 8 }, { "epoch": 0.00112261444430585, "grad_norm": 0.0, "learning_rate": 7.468879668049793e-07, "loss": 6.1861, "step": 9 }, { "epoch": 0.0012473493825620556, "grad_norm": 0.0, "learning_rate": 8.298755186721992e-07, "loss": 6.1714, "step": 10 }, { "epoch": 0.0013720843208182613, "grad_norm": 0.0, "learning_rate": 9.128630705394191e-07, "loss": 6.1402, "step": 11 }, { "epoch": 0.0014968192590744667, "grad_norm": 0.0, "learning_rate": 9.95850622406639e-07, "loss": 5.568, "step": 12 }, { "epoch": 0.0016215541973306723, "grad_norm": 0.0, "learning_rate": 1.078838174273859e-06, "loss": 5.355, "step": 13 }, { "epoch": 0.001746289135586878, "grad_norm": 0.0, "learning_rate": 1.161825726141079e-06, "loss": 5.2926, "step": 14 }, { "epoch": 0.0018710240738430836, "grad_norm": 0.0, "learning_rate": 1.2448132780082988e-06, "loss": 5.1146, "step": 15 }, { "epoch": 0.001995759012099289, "grad_norm": 0.0, "learning_rate": 1.3278008298755188e-06, "loss": 3.7858, "step": 16 }, { "epoch": 0.0021204939503554946, "grad_norm": 0.0, "learning_rate": 1.4107883817427386e-06, "loss": 3.9101, "step": 17 }, { "epoch": 0.0022452288886117, "grad_norm": 0.0, "learning_rate": 1.4937759336099586e-06, "loss": 3.8263, "step": 18 }, { "epoch": 0.002369963826867906, "grad_norm": 0.0, "learning_rate": 1.5767634854771784e-06, "loss": 3.708, "step": 19 }, { "epoch": 0.0024946987651241113, "grad_norm": 0.0, "learning_rate": 1.6597510373443984e-06, "loss": 3.7611, "step": 20 }, { "epoch": 0.0026194337033803167, "grad_norm": 0.0, "learning_rate": 1.7427385892116182e-06, "loss": 3.6356, "step": 21 }, { "epoch": 0.0027441686416365225, "grad_norm": 0.0, "learning_rate": 1.8257261410788382e-06, "loss": 3.4969, "step": 22 }, { "epoch": 0.002868903579892728, "grad_norm": 0.0, "learning_rate": 1.908713692946058e-06, "loss": 3.4411, "step": 23 }, { "epoch": 0.0029936385181489333, "grad_norm": 0.0, "learning_rate": 1.991701244813278e-06, "loss": 3.2423, "step": 24 }, { "epoch": 0.003118373456405139, "grad_norm": 0.0, "learning_rate": 2.074688796680498e-06, "loss": 3.2376, "step": 25 }, { "epoch": 0.0032431083946613446, "grad_norm": 0.0, "learning_rate": 2.157676348547718e-06, "loss": 3.2192, "step": 26 }, { "epoch": 0.00336784333291755, "grad_norm": 0.0, "learning_rate": 2.240663900414938e-06, "loss": 3.1639, "step": 27 }, { "epoch": 0.003492578271173756, "grad_norm": 0.0, "learning_rate": 2.323651452282158e-06, "loss": 3.0787, "step": 28 }, { "epoch": 0.0036173132094299613, "grad_norm": 0.0, "learning_rate": 2.4066390041493776e-06, "loss": 3.0248, "step": 29 }, { "epoch": 0.003742048147686167, "grad_norm": 0.0, "learning_rate": 2.4896265560165977e-06, "loss": 2.8591, "step": 30 }, { "epoch": 0.0038667830859423725, "grad_norm": 0.0, "learning_rate": 2.5726141078838172e-06, "loss": 2.7848, "step": 31 }, { "epoch": 0.003991518024198578, "grad_norm": 0.0, "learning_rate": 2.6556016597510377e-06, "loss": 2.8507, "step": 32 }, { "epoch": 0.004116252962454783, "grad_norm": 0.0, "learning_rate": 2.7385892116182577e-06, "loss": 2.6824, "step": 33 }, { "epoch": 0.004240987900710989, "grad_norm": 0.0, "learning_rate": 2.8215767634854773e-06, "loss": 2.5775, "step": 34 }, { "epoch": 0.004365722838967195, "grad_norm": 0.0, "learning_rate": 2.9045643153526977e-06, "loss": 2.5103, "step": 35 }, { "epoch": 0.0044904577772234, "grad_norm": 0.0, "learning_rate": 2.9875518672199173e-06, "loss": 2.4596, "step": 36 }, { "epoch": 0.004615192715479606, "grad_norm": 0.0, "learning_rate": 3.0705394190871373e-06, "loss": 2.415, "step": 37 }, { "epoch": 0.004739927653735812, "grad_norm": 0.0, "learning_rate": 3.153526970954357e-06, "loss": 2.4246, "step": 38 }, { "epoch": 0.004864662591992017, "grad_norm": 0.0, "learning_rate": 3.2365145228215773e-06, "loss": 2.4014, "step": 39 }, { "epoch": 0.0049893975302482225, "grad_norm": 0.0, "learning_rate": 3.319502074688797e-06, "loss": 2.3319, "step": 40 }, { "epoch": 0.005114132468504428, "grad_norm": 0.0, "learning_rate": 3.402489626556017e-06, "loss": 2.2356, "step": 41 }, { "epoch": 0.005238867406760633, "grad_norm": 0.0, "learning_rate": 3.4854771784232365e-06, "loss": 2.2585, "step": 42 }, { "epoch": 0.005363602345016839, "grad_norm": 0.0, "learning_rate": 3.568464730290457e-06, "loss": 2.1735, "step": 43 }, { "epoch": 0.005488337283273045, "grad_norm": 0.0, "learning_rate": 3.6514522821576765e-06, "loss": 2.1454, "step": 44 }, { "epoch": 0.00561307222152925, "grad_norm": 0.0, "learning_rate": 3.7344398340248965e-06, "loss": 2.0499, "step": 45 }, { "epoch": 0.005737807159785456, "grad_norm": 0.0, "learning_rate": 3.817427385892116e-06, "loss": 2.0182, "step": 46 }, { "epoch": 0.005862542098041662, "grad_norm": 0.0, "learning_rate": 3.9004149377593365e-06, "loss": 2.0878, "step": 47 }, { "epoch": 0.005987277036297867, "grad_norm": 0.0, "learning_rate": 3.983402489626556e-06, "loss": 2.0311, "step": 48 }, { "epoch": 0.0061120119745540725, "grad_norm": 0.0, "learning_rate": 4.0663900414937765e-06, "loss": 1.9852, "step": 49 }, { "epoch": 0.006236746912810278, "grad_norm": 0.0, "learning_rate": 4.149377593360996e-06, "loss": 2.0084, "step": 50 }, { "epoch": 0.006361481851066483, "grad_norm": 0.0, "learning_rate": 4.232365145228216e-06, "loss": 1.9556, "step": 51 }, { "epoch": 0.006486216789322689, "grad_norm": 0.0, "learning_rate": 4.315352697095436e-06, "loss": 1.9822, "step": 52 }, { "epoch": 0.006610951727578895, "grad_norm": 0.0, "learning_rate": 4.398340248962656e-06, "loss": 2.0077, "step": 53 }, { "epoch": 0.0067356866658351, "grad_norm": 0.0, "learning_rate": 4.481327800829876e-06, "loss": 1.9894, "step": 54 }, { "epoch": 0.006860421604091306, "grad_norm": 0.0, "learning_rate": 4.564315352697096e-06, "loss": 1.9154, "step": 55 }, { "epoch": 0.006985156542347512, "grad_norm": 0.0, "learning_rate": 4.647302904564316e-06, "loss": 1.9527, "step": 56 }, { "epoch": 0.007109891480603717, "grad_norm": 0.0, "learning_rate": 4.730290456431536e-06, "loss": 1.8799, "step": 57 }, { "epoch": 0.0072346264188599225, "grad_norm": 0.0, "learning_rate": 4.813278008298755e-06, "loss": 1.8721, "step": 58 }, { "epoch": 0.007359361357116128, "grad_norm": 0.0, "learning_rate": 4.896265560165976e-06, "loss": 1.8714, "step": 59 }, { "epoch": 0.007484096295372334, "grad_norm": 0.0, "learning_rate": 4.979253112033195e-06, "loss": 1.8995, "step": 60 }, { "epoch": 0.007608831233628539, "grad_norm": 0.0, "learning_rate": 5.062240663900415e-06, "loss": 1.8452, "step": 61 }, { "epoch": 0.007733566171884745, "grad_norm": 0.0, "learning_rate": 5.1452282157676345e-06, "loss": 1.8734, "step": 62 }, { "epoch": 0.00785830111014095, "grad_norm": 0.0, "learning_rate": 5.228215767634856e-06, "loss": 1.8644, "step": 63 }, { "epoch": 0.007983036048397157, "grad_norm": 0.0, "learning_rate": 5.311203319502075e-06, "loss": 1.7735, "step": 64 }, { "epoch": 0.008107770986653362, "grad_norm": 0.0, "learning_rate": 5.394190871369295e-06, "loss": 1.8558, "step": 65 }, { "epoch": 0.008232505924909567, "grad_norm": 0.0, "learning_rate": 5.477178423236515e-06, "loss": 1.869, "step": 66 }, { "epoch": 0.008357240863165773, "grad_norm": 0.0, "learning_rate": 5.560165975103735e-06, "loss": 1.8813, "step": 67 }, { "epoch": 0.008481975801421978, "grad_norm": 0.0, "learning_rate": 5.6431535269709545e-06, "loss": 1.8026, "step": 68 }, { "epoch": 0.008606710739678183, "grad_norm": 0.0, "learning_rate": 5.726141078838174e-06, "loss": 1.802, "step": 69 }, { "epoch": 0.00873144567793439, "grad_norm": 0.0, "learning_rate": 5.809128630705395e-06, "loss": 1.8185, "step": 70 }, { "epoch": 0.008856180616190595, "grad_norm": 0.0, "learning_rate": 5.892116182572615e-06, "loss": 1.7889, "step": 71 }, { "epoch": 0.0089809155544468, "grad_norm": 0.0, "learning_rate": 5.9751037344398345e-06, "loss": 1.8216, "step": 72 }, { "epoch": 0.009105650492703007, "grad_norm": 0.0, "learning_rate": 6.058091286307054e-06, "loss": 1.7133, "step": 73 }, { "epoch": 0.009230385430959212, "grad_norm": 0.0, "learning_rate": 6.1410788381742745e-06, "loss": 1.7458, "step": 74 }, { "epoch": 0.009355120369215417, "grad_norm": 0.0, "learning_rate": 6.224066390041494e-06, "loss": 1.7643, "step": 75 }, { "epoch": 0.009479855307471623, "grad_norm": 0.0, "learning_rate": 6.307053941908714e-06, "loss": 1.7807, "step": 76 }, { "epoch": 0.009604590245727828, "grad_norm": 0.0, "learning_rate": 6.390041493775933e-06, "loss": 1.7185, "step": 77 }, { "epoch": 0.009729325183984033, "grad_norm": 0.0, "learning_rate": 6.4730290456431546e-06, "loss": 1.7574, "step": 78 }, { "epoch": 0.00985406012224024, "grad_norm": 0.0, "learning_rate": 6.556016597510374e-06, "loss": 1.7463, "step": 79 }, { "epoch": 0.009978795060496445, "grad_norm": 0.0, "learning_rate": 6.639004149377594e-06, "loss": 1.8126, "step": 80 }, { "epoch": 0.01010352999875265, "grad_norm": 0.0, "learning_rate": 6.721991701244814e-06, "loss": 1.7082, "step": 81 }, { "epoch": 0.010228264937008857, "grad_norm": 0.0, "learning_rate": 6.804979253112034e-06, "loss": 1.7296, "step": 82 }, { "epoch": 0.010352999875265062, "grad_norm": 0.0, "learning_rate": 6.887966804979253e-06, "loss": 1.7787, "step": 83 }, { "epoch": 0.010477734813521267, "grad_norm": 0.0, "learning_rate": 6.970954356846473e-06, "loss": 1.6765, "step": 84 }, { "epoch": 0.010602469751777473, "grad_norm": 0.0, "learning_rate": 7.053941908713694e-06, "loss": 1.7586, "step": 85 }, { "epoch": 0.010727204690033678, "grad_norm": 0.0, "learning_rate": 7.136929460580914e-06, "loss": 1.6971, "step": 86 }, { "epoch": 0.010851939628289883, "grad_norm": 0.0, "learning_rate": 7.219917012448133e-06, "loss": 1.6469, "step": 87 }, { "epoch": 0.01097667456654609, "grad_norm": 0.0, "learning_rate": 7.302904564315353e-06, "loss": 1.7174, "step": 88 }, { "epoch": 0.011101409504802295, "grad_norm": 0.0, "learning_rate": 7.385892116182573e-06, "loss": 1.6723, "step": 89 }, { "epoch": 0.0112261444430585, "grad_norm": 0.0, "learning_rate": 7.468879668049793e-06, "loss": 1.6942, "step": 90 }, { "epoch": 0.011350879381314707, "grad_norm": 0.0, "learning_rate": 7.5518672199170125e-06, "loss": 1.6744, "step": 91 }, { "epoch": 0.011475614319570912, "grad_norm": 0.0, "learning_rate": 7.634854771784232e-06, "loss": 1.6999, "step": 92 }, { "epoch": 0.011600349257827117, "grad_norm": 0.0, "learning_rate": 7.717842323651453e-06, "loss": 1.6785, "step": 93 }, { "epoch": 0.011725084196083323, "grad_norm": 0.0, "learning_rate": 7.800829875518673e-06, "loss": 1.6731, "step": 94 }, { "epoch": 0.011849819134339528, "grad_norm": 0.0, "learning_rate": 7.883817427385892e-06, "loss": 1.6394, "step": 95 }, { "epoch": 0.011974554072595733, "grad_norm": 0.0, "learning_rate": 7.966804979253112e-06, "loss": 1.682, "step": 96 }, { "epoch": 0.01209928901085194, "grad_norm": 0.0, "learning_rate": 8.049792531120333e-06, "loss": 1.6529, "step": 97 }, { "epoch": 0.012224023949108145, "grad_norm": 0.0, "learning_rate": 8.132780082987553e-06, "loss": 1.705, "step": 98 }, { "epoch": 0.01234875888736435, "grad_norm": 0.0, "learning_rate": 8.215767634854772e-06, "loss": 1.6763, "step": 99 }, { "epoch": 0.012473493825620557, "grad_norm": 0.0, "learning_rate": 8.298755186721992e-06, "loss": 1.6498, "step": 100 }, { "epoch": 0.012598228763876762, "grad_norm": 0.0, "learning_rate": 8.381742738589213e-06, "loss": 1.6118, "step": 101 }, { "epoch": 0.012722963702132967, "grad_norm": 0.0, "learning_rate": 8.464730290456431e-06, "loss": 1.6597, "step": 102 }, { "epoch": 0.012847698640389173, "grad_norm": 0.0, "learning_rate": 8.547717842323652e-06, "loss": 1.6173, "step": 103 }, { "epoch": 0.012972433578645378, "grad_norm": 0.0, "learning_rate": 8.630705394190872e-06, "loss": 1.624, "step": 104 }, { "epoch": 0.013097168516901583, "grad_norm": 0.0, "learning_rate": 8.713692946058093e-06, "loss": 1.6705, "step": 105 }, { "epoch": 0.01322190345515779, "grad_norm": 0.0, "learning_rate": 8.796680497925311e-06, "loss": 1.6311, "step": 106 }, { "epoch": 0.013346638393413995, "grad_norm": 0.0, "learning_rate": 8.879668049792532e-06, "loss": 1.6563, "step": 107 }, { "epoch": 0.0134713733316702, "grad_norm": 0.0, "learning_rate": 8.962655601659752e-06, "loss": 1.5695, "step": 108 }, { "epoch": 0.013596108269926407, "grad_norm": 0.0, "learning_rate": 9.045643153526971e-06, "loss": 1.6551, "step": 109 }, { "epoch": 0.013720843208182612, "grad_norm": 0.0, "learning_rate": 9.128630705394191e-06, "loss": 1.5642, "step": 110 }, { "epoch": 0.013845578146438817, "grad_norm": 0.0, "learning_rate": 9.211618257261412e-06, "loss": 1.642, "step": 111 }, { "epoch": 0.013970313084695023, "grad_norm": 0.0, "learning_rate": 9.294605809128632e-06, "loss": 1.6324, "step": 112 }, { "epoch": 0.014095048022951228, "grad_norm": 0.0, "learning_rate": 9.377593360995851e-06, "loss": 1.6351, "step": 113 }, { "epoch": 0.014219782961207433, "grad_norm": 0.0, "learning_rate": 9.460580912863071e-06, "loss": 1.6178, "step": 114 }, { "epoch": 0.01434451789946364, "grad_norm": 0.0, "learning_rate": 9.543568464730292e-06, "loss": 1.5934, "step": 115 }, { "epoch": 0.014469252837719845, "grad_norm": 0.0, "learning_rate": 9.62655601659751e-06, "loss": 1.5934, "step": 116 }, { "epoch": 0.014593987775976052, "grad_norm": 0.0, "learning_rate": 9.709543568464731e-06, "loss": 1.5518, "step": 117 }, { "epoch": 0.014718722714232257, "grad_norm": 0.0, "learning_rate": 9.792531120331951e-06, "loss": 1.5626, "step": 118 }, { "epoch": 0.014843457652488462, "grad_norm": 0.0, "learning_rate": 9.875518672199172e-06, "loss": 1.6084, "step": 119 }, { "epoch": 0.014968192590744668, "grad_norm": 0.0, "learning_rate": 9.95850622406639e-06, "loss": 1.556, "step": 120 }, { "epoch": 0.015092927529000873, "grad_norm": 0.0, "learning_rate": 1.0041493775933611e-05, "loss": 1.5625, "step": 121 }, { "epoch": 0.015217662467257078, "grad_norm": 0.0, "learning_rate": 1.012448132780083e-05, "loss": 1.6091, "step": 122 }, { "epoch": 0.015342397405513285, "grad_norm": 0.0, "learning_rate": 1.020746887966805e-05, "loss": 1.6155, "step": 123 }, { "epoch": 0.01546713234376949, "grad_norm": 0.0, "learning_rate": 1.0290456431535269e-05, "loss": 1.5679, "step": 124 }, { "epoch": 0.015591867282025695, "grad_norm": 0.0, "learning_rate": 1.0373443983402491e-05, "loss": 1.6249, "step": 125 }, { "epoch": 0.0157166022202819, "grad_norm": 0.0, "learning_rate": 1.0456431535269711e-05, "loss": 1.6231, "step": 126 }, { "epoch": 0.015841337158538107, "grad_norm": 0.0, "learning_rate": 1.053941908713693e-05, "loss": 1.6146, "step": 127 }, { "epoch": 0.015966072096794313, "grad_norm": 0.0, "learning_rate": 1.062240663900415e-05, "loss": 1.6322, "step": 128 }, { "epoch": 0.016090807035050517, "grad_norm": 0.0, "learning_rate": 1.070539419087137e-05, "loss": 1.5817, "step": 129 }, { "epoch": 0.016215541973306723, "grad_norm": 0.0, "learning_rate": 1.078838174273859e-05, "loss": 1.5249, "step": 130 }, { "epoch": 0.01634027691156293, "grad_norm": 0.0, "learning_rate": 1.0871369294605809e-05, "loss": 1.558, "step": 131 }, { "epoch": 0.016465011849819133, "grad_norm": 0.0, "learning_rate": 1.095435684647303e-05, "loss": 1.6392, "step": 132 }, { "epoch": 0.01658974678807534, "grad_norm": 0.0, "learning_rate": 1.1037344398340251e-05, "loss": 1.5706, "step": 133 }, { "epoch": 0.016714481726331547, "grad_norm": 0.0, "learning_rate": 1.112033195020747e-05, "loss": 1.5438, "step": 134 }, { "epoch": 0.01683921666458775, "grad_norm": 0.0, "learning_rate": 1.120331950207469e-05, "loss": 1.5917, "step": 135 }, { "epoch": 0.016963951602843957, "grad_norm": 0.0, "learning_rate": 1.1286307053941909e-05, "loss": 1.5581, "step": 136 }, { "epoch": 0.017088686541100163, "grad_norm": 0.0, "learning_rate": 1.136929460580913e-05, "loss": 1.5807, "step": 137 }, { "epoch": 0.017213421479356367, "grad_norm": 0.0, "learning_rate": 1.1452282157676348e-05, "loss": 1.5166, "step": 138 }, { "epoch": 0.017338156417612573, "grad_norm": 0.0, "learning_rate": 1.1535269709543569e-05, "loss": 1.5847, "step": 139 }, { "epoch": 0.01746289135586878, "grad_norm": 0.0, "learning_rate": 1.161825726141079e-05, "loss": 1.5228, "step": 140 }, { "epoch": 0.017587626294124983, "grad_norm": 0.0, "learning_rate": 1.170124481327801e-05, "loss": 1.521, "step": 141 }, { "epoch": 0.01771236123238119, "grad_norm": 0.0, "learning_rate": 1.178423236514523e-05, "loss": 1.5261, "step": 142 }, { "epoch": 0.017837096170637397, "grad_norm": 0.0, "learning_rate": 1.1867219917012449e-05, "loss": 1.5991, "step": 143 }, { "epoch": 0.0179618311088936, "grad_norm": 0.0, "learning_rate": 1.1950207468879669e-05, "loss": 1.5273, "step": 144 }, { "epoch": 0.018086566047149807, "grad_norm": 0.0, "learning_rate": 1.2033195020746888e-05, "loss": 1.4863, "step": 145 }, { "epoch": 0.018211300985406013, "grad_norm": 0.0, "learning_rate": 1.2116182572614108e-05, "loss": 1.5747, "step": 146 }, { "epoch": 0.018336035923662217, "grad_norm": 0.0, "learning_rate": 1.219917012448133e-05, "loss": 1.599, "step": 147 }, { "epoch": 0.018460770861918423, "grad_norm": 0.0, "learning_rate": 1.2282157676348549e-05, "loss": 1.4974, "step": 148 }, { "epoch": 0.01858550580017463, "grad_norm": 0.0, "learning_rate": 1.236514522821577e-05, "loss": 1.4944, "step": 149 }, { "epoch": 0.018710240738430833, "grad_norm": 0.0, "learning_rate": 1.2448132780082988e-05, "loss": 1.5278, "step": 150 }, { "epoch": 0.01883497567668704, "grad_norm": 0.0, "learning_rate": 1.2531120331950209e-05, "loss": 1.5735, "step": 151 }, { "epoch": 0.018959710614943247, "grad_norm": 0.0, "learning_rate": 1.2614107883817427e-05, "loss": 1.5169, "step": 152 }, { "epoch": 0.01908444555319945, "grad_norm": 0.0, "learning_rate": 1.2697095435684648e-05, "loss": 1.4454, "step": 153 }, { "epoch": 0.019209180491455657, "grad_norm": 0.0, "learning_rate": 1.2780082987551867e-05, "loss": 1.5051, "step": 154 }, { "epoch": 0.019333915429711863, "grad_norm": 0.0, "learning_rate": 1.2863070539419089e-05, "loss": 1.5026, "step": 155 }, { "epoch": 0.019458650367968067, "grad_norm": 0.0, "learning_rate": 1.2946058091286309e-05, "loss": 1.5415, "step": 156 }, { "epoch": 0.019583385306224273, "grad_norm": 0.0, "learning_rate": 1.3029045643153528e-05, "loss": 1.5599, "step": 157 }, { "epoch": 0.01970812024448048, "grad_norm": 0.0, "learning_rate": 1.3112033195020748e-05, "loss": 1.5213, "step": 158 }, { "epoch": 0.019832855182736683, "grad_norm": 0.0, "learning_rate": 1.3195020746887967e-05, "loss": 1.4641, "step": 159 }, { "epoch": 0.01995759012099289, "grad_norm": 0.0, "learning_rate": 1.3278008298755187e-05, "loss": 1.5197, "step": 160 }, { "epoch": 0.020082325059249097, "grad_norm": 0.0, "learning_rate": 1.3360995850622406e-05, "loss": 1.536, "step": 161 }, { "epoch": 0.0202070599975053, "grad_norm": 0.0, "learning_rate": 1.3443983402489628e-05, "loss": 1.44, "step": 162 }, { "epoch": 0.020331794935761507, "grad_norm": 0.0, "learning_rate": 1.3526970954356849e-05, "loss": 1.5401, "step": 163 }, { "epoch": 0.020456529874017713, "grad_norm": 0.0, "learning_rate": 1.3609958506224067e-05, "loss": 1.5658, "step": 164 }, { "epoch": 0.020581264812273917, "grad_norm": 0.0, "learning_rate": 1.3692946058091288e-05, "loss": 1.474, "step": 165 }, { "epoch": 0.020705999750530123, "grad_norm": 0.0, "learning_rate": 1.3775933609958507e-05, "loss": 1.5051, "step": 166 }, { "epoch": 0.02083073468878633, "grad_norm": 0.0, "learning_rate": 1.3858921161825727e-05, "loss": 1.3882, "step": 167 }, { "epoch": 0.020955469627042533, "grad_norm": 0.0, "learning_rate": 1.3941908713692946e-05, "loss": 1.5312, "step": 168 }, { "epoch": 0.02108020456529874, "grad_norm": 0.0, "learning_rate": 1.4024896265560166e-05, "loss": 1.5081, "step": 169 }, { "epoch": 0.021204939503554947, "grad_norm": 0.0, "learning_rate": 1.4107883817427388e-05, "loss": 1.4842, "step": 170 }, { "epoch": 0.02132967444181115, "grad_norm": 0.0, "learning_rate": 1.4190871369294607e-05, "loss": 1.4899, "step": 171 }, { "epoch": 0.021454409380067357, "grad_norm": 0.0, "learning_rate": 1.4273858921161828e-05, "loss": 1.5141, "step": 172 }, { "epoch": 0.021579144318323563, "grad_norm": 0.0, "learning_rate": 1.4356846473029046e-05, "loss": 1.4546, "step": 173 }, { "epoch": 0.021703879256579767, "grad_norm": 0.0, "learning_rate": 1.4439834024896267e-05, "loss": 1.5677, "step": 174 }, { "epoch": 0.021828614194835973, "grad_norm": 0.0, "learning_rate": 1.4522821576763485e-05, "loss": 1.4691, "step": 175 }, { "epoch": 0.02195334913309218, "grad_norm": 0.0, "learning_rate": 1.4605809128630706e-05, "loss": 1.4057, "step": 176 }, { "epoch": 0.022078084071348383, "grad_norm": 0.0, "learning_rate": 1.4688796680497928e-05, "loss": 1.5069, "step": 177 }, { "epoch": 0.02220281900960459, "grad_norm": 0.0, "learning_rate": 1.4771784232365147e-05, "loss": 1.4609, "step": 178 }, { "epoch": 0.022327553947860797, "grad_norm": 0.0, "learning_rate": 1.4854771784232367e-05, "loss": 1.4708, "step": 179 }, { "epoch": 0.022452288886117, "grad_norm": 0.0, "learning_rate": 1.4937759336099586e-05, "loss": 1.5024, "step": 180 }, { "epoch": 0.022577023824373207, "grad_norm": 0.0, "learning_rate": 1.5020746887966806e-05, "loss": 1.4709, "step": 181 }, { "epoch": 0.022701758762629413, "grad_norm": 0.0, "learning_rate": 1.5103734439834025e-05, "loss": 1.4758, "step": 182 }, { "epoch": 0.022826493700885617, "grad_norm": 0.0, "learning_rate": 1.5186721991701246e-05, "loss": 1.4903, "step": 183 }, { "epoch": 0.022951228639141823, "grad_norm": 0.0, "learning_rate": 1.5269709543568464e-05, "loss": 1.4932, "step": 184 }, { "epoch": 0.02307596357739803, "grad_norm": 0.0, "learning_rate": 1.5352697095435688e-05, "loss": 1.4515, "step": 185 }, { "epoch": 0.023200698515654233, "grad_norm": 0.0, "learning_rate": 1.5435684647302905e-05, "loss": 1.453, "step": 186 }, { "epoch": 0.02332543345391044, "grad_norm": 0.0, "learning_rate": 1.5518672199170126e-05, "loss": 1.4594, "step": 187 }, { "epoch": 0.023450168392166647, "grad_norm": 0.0, "learning_rate": 1.5601659751037346e-05, "loss": 1.3783, "step": 188 }, { "epoch": 0.02357490333042285, "grad_norm": 0.0, "learning_rate": 1.5684647302904566e-05, "loss": 1.4978, "step": 189 }, { "epoch": 0.023699638268679057, "grad_norm": 0.0, "learning_rate": 1.5767634854771783e-05, "loss": 1.4607, "step": 190 }, { "epoch": 0.023824373206935263, "grad_norm": 0.0, "learning_rate": 1.5850622406639004e-05, "loss": 1.4478, "step": 191 }, { "epoch": 0.023949108145191467, "grad_norm": 0.0, "learning_rate": 1.5933609958506224e-05, "loss": 1.5088, "step": 192 }, { "epoch": 0.024073843083447673, "grad_norm": 0.0, "learning_rate": 1.6016597510373445e-05, "loss": 1.4428, "step": 193 }, { "epoch": 0.02419857802170388, "grad_norm": 0.0, "learning_rate": 1.6099585062240665e-05, "loss": 1.4286, "step": 194 }, { "epoch": 0.024323312959960083, "grad_norm": 0.0, "learning_rate": 1.6182572614107886e-05, "loss": 1.4267, "step": 195 }, { "epoch": 0.02444804789821629, "grad_norm": 0.0, "learning_rate": 1.6265560165975106e-05, "loss": 1.4085, "step": 196 }, { "epoch": 0.024572782836472497, "grad_norm": 0.0, "learning_rate": 1.6348547717842323e-05, "loss": 1.4354, "step": 197 }, { "epoch": 0.0246975177747287, "grad_norm": 0.0, "learning_rate": 1.6431535269709543e-05, "loss": 1.4773, "step": 198 }, { "epoch": 0.024822252712984907, "grad_norm": 0.0, "learning_rate": 1.6514522821576764e-05, "loss": 1.458, "step": 199 }, { "epoch": 0.024946987651241113, "grad_norm": 0.0, "learning_rate": 1.6597510373443984e-05, "loss": 1.4134, "step": 200 }, { "epoch": 0.025071722589497317, "grad_norm": 0.0, "learning_rate": 1.6680497925311205e-05, "loss": 1.4628, "step": 201 }, { "epoch": 0.025196457527753523, "grad_norm": 0.0, "learning_rate": 1.6763485477178425e-05, "loss": 1.4108, "step": 202 }, { "epoch": 0.02532119246600973, "grad_norm": 0.0, "learning_rate": 1.6846473029045646e-05, "loss": 1.3974, "step": 203 }, { "epoch": 0.025445927404265933, "grad_norm": 0.0, "learning_rate": 1.6929460580912863e-05, "loss": 1.4143, "step": 204 }, { "epoch": 0.02557066234252214, "grad_norm": 0.0, "learning_rate": 1.7012448132780083e-05, "loss": 1.432, "step": 205 }, { "epoch": 0.025695397280778347, "grad_norm": 0.0, "learning_rate": 1.7095435684647304e-05, "loss": 1.3939, "step": 206 }, { "epoch": 0.02582013221903455, "grad_norm": 0.0, "learning_rate": 1.7178423236514524e-05, "loss": 1.4116, "step": 207 }, { "epoch": 0.025944867157290757, "grad_norm": 0.0, "learning_rate": 1.7261410788381744e-05, "loss": 1.4785, "step": 208 }, { "epoch": 0.026069602095546963, "grad_norm": 0.0, "learning_rate": 1.7344398340248965e-05, "loss": 1.4581, "step": 209 }, { "epoch": 0.026194337033803167, "grad_norm": 0.0, "learning_rate": 1.7427385892116185e-05, "loss": 1.4322, "step": 210 }, { "epoch": 0.026319071972059373, "grad_norm": 0.0, "learning_rate": 1.7510373443983402e-05, "loss": 1.3992, "step": 211 }, { "epoch": 0.02644380691031558, "grad_norm": 0.0, "learning_rate": 1.7593360995850623e-05, "loss": 1.4504, "step": 212 }, { "epoch": 0.026568541848571783, "grad_norm": 0.0, "learning_rate": 1.7676348547717843e-05, "loss": 1.4158, "step": 213 }, { "epoch": 0.02669327678682799, "grad_norm": 0.0, "learning_rate": 1.7759336099585064e-05, "loss": 1.4429, "step": 214 }, { "epoch": 0.026818011725084197, "grad_norm": 0.0, "learning_rate": 1.7842323651452284e-05, "loss": 1.3657, "step": 215 }, { "epoch": 0.0269427466633404, "grad_norm": 0.0, "learning_rate": 1.7925311203319504e-05, "loss": 1.4196, "step": 216 }, { "epoch": 0.027067481601596607, "grad_norm": 0.0, "learning_rate": 1.8008298755186725e-05, "loss": 1.4069, "step": 217 }, { "epoch": 0.027192216539852813, "grad_norm": 0.0, "learning_rate": 1.8091286307053942e-05, "loss": 1.4005, "step": 218 }, { "epoch": 0.027316951478109017, "grad_norm": 0.0, "learning_rate": 1.8174273858921162e-05, "loss": 1.4409, "step": 219 }, { "epoch": 0.027441686416365223, "grad_norm": 0.0, "learning_rate": 1.8257261410788383e-05, "loss": 1.4544, "step": 220 }, { "epoch": 0.02756642135462143, "grad_norm": 0.0, "learning_rate": 1.8340248962655603e-05, "loss": 1.3764, "step": 221 }, { "epoch": 0.027691156292877633, "grad_norm": 0.0, "learning_rate": 1.8423236514522824e-05, "loss": 1.3881, "step": 222 }, { "epoch": 0.02781589123113384, "grad_norm": 0.0, "learning_rate": 1.8506224066390044e-05, "loss": 1.4147, "step": 223 }, { "epoch": 0.027940626169390047, "grad_norm": 0.0, "learning_rate": 1.8589211618257264e-05, "loss": 1.3798, "step": 224 }, { "epoch": 0.02806536110764625, "grad_norm": 0.0, "learning_rate": 1.867219917012448e-05, "loss": 1.3735, "step": 225 }, { "epoch": 0.028190096045902457, "grad_norm": 0.0, "learning_rate": 1.8755186721991702e-05, "loss": 1.4403, "step": 226 }, { "epoch": 0.028314830984158663, "grad_norm": 0.0, "learning_rate": 1.8838174273858922e-05, "loss": 1.4008, "step": 227 }, { "epoch": 0.028439565922414867, "grad_norm": 0.0, "learning_rate": 1.8921161825726143e-05, "loss": 1.4804, "step": 228 }, { "epoch": 0.028564300860671073, "grad_norm": 0.0, "learning_rate": 1.9004149377593363e-05, "loss": 1.4216, "step": 229 }, { "epoch": 0.02868903579892728, "grad_norm": 0.0, "learning_rate": 1.9087136929460584e-05, "loss": 1.4521, "step": 230 }, { "epoch": 0.028813770737183483, "grad_norm": 0.0, "learning_rate": 1.9170124481327804e-05, "loss": 1.482, "step": 231 }, { "epoch": 0.02893850567543969, "grad_norm": 0.0, "learning_rate": 1.925311203319502e-05, "loss": 1.423, "step": 232 }, { "epoch": 0.029063240613695897, "grad_norm": 0.0, "learning_rate": 1.933609958506224e-05, "loss": 1.3702, "step": 233 }, { "epoch": 0.029187975551952103, "grad_norm": 0.0, "learning_rate": 1.9419087136929462e-05, "loss": 1.4495, "step": 234 }, { "epoch": 0.029312710490208307, "grad_norm": 0.0, "learning_rate": 1.9502074688796682e-05, "loss": 1.4165, "step": 235 }, { "epoch": 0.029437445428464513, "grad_norm": 0.0, "learning_rate": 1.9585062240663903e-05, "loss": 1.402, "step": 236 }, { "epoch": 0.02956218036672072, "grad_norm": 0.0, "learning_rate": 1.966804979253112e-05, "loss": 1.4022, "step": 237 }, { "epoch": 0.029686915304976923, "grad_norm": 0.0, "learning_rate": 1.9751037344398344e-05, "loss": 1.415, "step": 238 }, { "epoch": 0.02981165024323313, "grad_norm": 0.0, "learning_rate": 1.983402489626556e-05, "loss": 1.393, "step": 239 }, { "epoch": 0.029936385181489337, "grad_norm": 0.0, "learning_rate": 1.991701244813278e-05, "loss": 1.4454, "step": 240 }, { "epoch": 0.03006112011974554, "grad_norm": 0.0, "learning_rate": 2e-05, "loss": 1.444, "step": 241 }, { "epoch": 0.030185855058001747, "grad_norm": 0.0, "learning_rate": 1.9999999183873952e-05, "loss": 1.3646, "step": 242 }, { "epoch": 0.030310589996257953, "grad_norm": 0.0, "learning_rate": 1.9999996735495937e-05, "loss": 1.4145, "step": 243 }, { "epoch": 0.030435324934514157, "grad_norm": 0.0, "learning_rate": 1.999999265486636e-05, "loss": 1.4282, "step": 244 }, { "epoch": 0.030560059872770363, "grad_norm": 0.0, "learning_rate": 1.999998694198588e-05, "loss": 1.3492, "step": 245 }, { "epoch": 0.03068479481102657, "grad_norm": 0.0, "learning_rate": 1.999997959685543e-05, "loss": 1.3837, "step": 246 }, { "epoch": 0.030809529749282773, "grad_norm": 0.0, "learning_rate": 1.9999970619476214e-05, "loss": 1.3545, "step": 247 }, { "epoch": 0.03093426468753898, "grad_norm": 0.0, "learning_rate": 1.9999960009849694e-05, "loss": 1.345, "step": 248 }, { "epoch": 0.031058999625795187, "grad_norm": 0.0, "learning_rate": 1.9999947767977607e-05, "loss": 1.3299, "step": 249 }, { "epoch": 0.03118373456405139, "grad_norm": 0.0, "learning_rate": 1.9999933893861945e-05, "loss": 1.3181, "step": 250 }, { "epoch": 0.0313084695023076, "grad_norm": 0.0, "learning_rate": 1.9999918387504974e-05, "loss": 1.3889, "step": 251 }, { "epoch": 0.0314332044405638, "grad_norm": 0.0, "learning_rate": 1.9999901248909225e-05, "loss": 1.4311, "step": 252 }, { "epoch": 0.03155793937882001, "grad_norm": 0.0, "learning_rate": 1.9999882478077497e-05, "loss": 1.349, "step": 253 }, { "epoch": 0.03168267431707621, "grad_norm": 0.0, "learning_rate": 1.9999862075012852e-05, "loss": 1.4062, "step": 254 }, { "epoch": 0.03180740925533242, "grad_norm": 0.0, "learning_rate": 1.9999840039718622e-05, "loss": 1.3496, "step": 255 }, { "epoch": 0.03193214419358863, "grad_norm": 0.0, "learning_rate": 1.9999816372198405e-05, "loss": 1.364, "step": 256 }, { "epoch": 0.03205687913184483, "grad_norm": 0.0, "learning_rate": 1.9999791072456062e-05, "loss": 1.3767, "step": 257 }, { "epoch": 0.03218161407010103, "grad_norm": 0.0, "learning_rate": 1.9999764140495718e-05, "loss": 1.3467, "step": 258 }, { "epoch": 0.032306349008357244, "grad_norm": 0.0, "learning_rate": 1.9999735576321776e-05, "loss": 1.3541, "step": 259 }, { "epoch": 0.03243108394661345, "grad_norm": 0.0, "learning_rate": 1.99997053799389e-05, "loss": 1.3436, "step": 260 }, { "epoch": 0.03255581888486965, "grad_norm": 0.0, "learning_rate": 1.9999673551352008e-05, "loss": 1.4165, "step": 261 }, { "epoch": 0.03268055382312586, "grad_norm": 0.0, "learning_rate": 1.9999640090566304e-05, "loss": 1.3678, "step": 262 }, { "epoch": 0.03280528876138206, "grad_norm": 0.0, "learning_rate": 1.9999604997587248e-05, "loss": 1.3755, "step": 263 }, { "epoch": 0.03293002369963827, "grad_norm": 0.0, "learning_rate": 1.9999568272420575e-05, "loss": 1.3862, "step": 264 }, { "epoch": 0.03305475863789448, "grad_norm": 0.0, "learning_rate": 1.999952991507226e-05, "loss": 1.3936, "step": 265 }, { "epoch": 0.03317949357615068, "grad_norm": 0.0, "learning_rate": 1.9999489925548585e-05, "loss": 1.3232, "step": 266 }, { "epoch": 0.03330422851440688, "grad_norm": 0.0, "learning_rate": 1.9999448303856066e-05, "loss": 1.3867, "step": 267 }, { "epoch": 0.033428963452663094, "grad_norm": 0.0, "learning_rate": 1.99994050500015e-05, "loss": 1.3374, "step": 268 }, { "epoch": 0.0335536983909193, "grad_norm": 0.0, "learning_rate": 1.9999360163991946e-05, "loss": 1.4554, "step": 269 }, { "epoch": 0.0336784333291755, "grad_norm": 0.0, "learning_rate": 1.9999313645834727e-05, "loss": 1.3641, "step": 270 }, { "epoch": 0.03380316826743171, "grad_norm": 0.0, "learning_rate": 1.9999265495537443e-05, "loss": 1.3695, "step": 271 }, { "epoch": 0.03392790320568791, "grad_norm": 0.0, "learning_rate": 1.9999215713107952e-05, "loss": 1.3617, "step": 272 }, { "epoch": 0.03405263814394412, "grad_norm": 0.0, "learning_rate": 1.9999164298554375e-05, "loss": 1.3378, "step": 273 }, { "epoch": 0.03417737308220033, "grad_norm": 0.0, "learning_rate": 1.9999111251885108e-05, "loss": 1.377, "step": 274 }, { "epoch": 0.03430210802045653, "grad_norm": 0.0, "learning_rate": 1.9999056573108807e-05, "loss": 1.3122, "step": 275 }, { "epoch": 0.03442684295871273, "grad_norm": 0.0, "learning_rate": 1.99990002622344e-05, "loss": 1.3748, "step": 276 }, { "epoch": 0.034551577896968944, "grad_norm": 0.0, "learning_rate": 1.9998942319271076e-05, "loss": 1.3574, "step": 277 }, { "epoch": 0.03467631283522515, "grad_norm": 0.0, "learning_rate": 1.9998882744228294e-05, "loss": 1.3534, "step": 278 }, { "epoch": 0.03480104777348135, "grad_norm": 0.0, "learning_rate": 1.999882153711578e-05, "loss": 1.3472, "step": 279 }, { "epoch": 0.03492578271173756, "grad_norm": 0.0, "learning_rate": 1.9998758697943518e-05, "loss": 1.3322, "step": 280 }, { "epoch": 0.03505051764999376, "grad_norm": 0.0, "learning_rate": 1.9998694226721774e-05, "loss": 1.3991, "step": 281 }, { "epoch": 0.03517525258824997, "grad_norm": 0.0, "learning_rate": 1.9998628123461064e-05, "loss": 1.3986, "step": 282 }, { "epoch": 0.03529998752650618, "grad_norm": 0.0, "learning_rate": 1.999856038817218e-05, "loss": 1.3606, "step": 283 }, { "epoch": 0.03542472246476238, "grad_norm": 0.0, "learning_rate": 1.9998491020866176e-05, "loss": 1.3033, "step": 284 }, { "epoch": 0.03554945740301858, "grad_norm": 0.0, "learning_rate": 1.999842002155438e-05, "loss": 1.3275, "step": 285 }, { "epoch": 0.035674192341274794, "grad_norm": 0.0, "learning_rate": 1.9998347390248377e-05, "loss": 1.3367, "step": 286 }, { "epoch": 0.035798927279531, "grad_norm": 0.0, "learning_rate": 1.9998273126960024e-05, "loss": 1.3499, "step": 287 }, { "epoch": 0.0359236622177872, "grad_norm": 0.0, "learning_rate": 1.999819723170144e-05, "loss": 1.3171, "step": 288 }, { "epoch": 0.03604839715604341, "grad_norm": 0.0, "learning_rate": 1.9998119704485016e-05, "loss": 1.3367, "step": 289 }, { "epoch": 0.03617313209429961, "grad_norm": 0.0, "learning_rate": 1.9998040545323403e-05, "loss": 1.3589, "step": 290 }, { "epoch": 0.03629786703255582, "grad_norm": 0.0, "learning_rate": 1.9997959754229526e-05, "loss": 1.3679, "step": 291 }, { "epoch": 0.03642260197081203, "grad_norm": 0.0, "learning_rate": 1.9997877331216568e-05, "loss": 1.3084, "step": 292 }, { "epoch": 0.03654733690906823, "grad_norm": 0.0, "learning_rate": 1.9997793276297988e-05, "loss": 1.3152, "step": 293 }, { "epoch": 0.03667207184732443, "grad_norm": 0.0, "learning_rate": 1.99977075894875e-05, "loss": 1.4142, "step": 294 }, { "epoch": 0.036796806785580644, "grad_norm": 0.0, "learning_rate": 1.999762027079909e-05, "loss": 1.3081, "step": 295 }, { "epoch": 0.03692154172383685, "grad_norm": 0.0, "learning_rate": 1.9997531320247017e-05, "loss": 1.3402, "step": 296 }, { "epoch": 0.03704627666209305, "grad_norm": 0.0, "learning_rate": 1.9997440737845798e-05, "loss": 1.3714, "step": 297 }, { "epoch": 0.03717101160034926, "grad_norm": 0.0, "learning_rate": 1.9997348523610212e-05, "loss": 1.3283, "step": 298 }, { "epoch": 0.03729574653860546, "grad_norm": 0.0, "learning_rate": 1.999725467755532e-05, "loss": 1.3152, "step": 299 }, { "epoch": 0.03742048147686167, "grad_norm": 0.0, "learning_rate": 1.999715919969643e-05, "loss": 1.2391, "step": 300 }, { "epoch": 0.03754521641511788, "grad_norm": 0.0, "learning_rate": 1.9997062090049135e-05, "loss": 1.3388, "step": 301 }, { "epoch": 0.03766995135337408, "grad_norm": 0.0, "learning_rate": 1.999696334862928e-05, "loss": 1.3442, "step": 302 }, { "epoch": 0.03779468629163028, "grad_norm": 0.0, "learning_rate": 1.9996862975452986e-05, "loss": 1.3282, "step": 303 }, { "epoch": 0.037919421229886494, "grad_norm": 0.0, "learning_rate": 1.9996760970536635e-05, "loss": 1.3693, "step": 304 }, { "epoch": 0.0380441561681427, "grad_norm": 0.0, "learning_rate": 1.9996657333896875e-05, "loss": 1.315, "step": 305 }, { "epoch": 0.0381688911063989, "grad_norm": 0.0, "learning_rate": 1.9996552065550626e-05, "loss": 1.2988, "step": 306 }, { "epoch": 0.03829362604465511, "grad_norm": 0.0, "learning_rate": 1.9996445165515066e-05, "loss": 1.3309, "step": 307 }, { "epoch": 0.03841836098291131, "grad_norm": 0.0, "learning_rate": 1.9996336633807648e-05, "loss": 1.3785, "step": 308 }, { "epoch": 0.03854309592116752, "grad_norm": 0.0, "learning_rate": 1.9996226470446088e-05, "loss": 1.2817, "step": 309 }, { "epoch": 0.03866783085942373, "grad_norm": 0.0, "learning_rate": 1.999611467544836e-05, "loss": 1.3019, "step": 310 }, { "epoch": 0.03879256579767993, "grad_norm": 0.0, "learning_rate": 1.9996001248832715e-05, "loss": 1.3315, "step": 311 }, { "epoch": 0.03891730073593613, "grad_norm": 0.0, "learning_rate": 1.999588619061767e-05, "loss": 1.3281, "step": 312 }, { "epoch": 0.039042035674192344, "grad_norm": 0.0, "learning_rate": 1.9995769500822007e-05, "loss": 1.2834, "step": 313 }, { "epoch": 0.03916677061244855, "grad_norm": 0.0, "learning_rate": 1.999565117946477e-05, "loss": 1.3226, "step": 314 }, { "epoch": 0.03929150555070475, "grad_norm": 0.0, "learning_rate": 1.9995531226565267e-05, "loss": 1.3378, "step": 315 }, { "epoch": 0.03941624048896096, "grad_norm": 0.0, "learning_rate": 1.9995409642143083e-05, "loss": 1.2994, "step": 316 }, { "epoch": 0.03954097542721716, "grad_norm": 0.0, "learning_rate": 1.9995286426218065e-05, "loss": 1.2816, "step": 317 }, { "epoch": 0.03966571036547337, "grad_norm": 0.0, "learning_rate": 1.999516157881032e-05, "loss": 1.2961, "step": 318 }, { "epoch": 0.03979044530372958, "grad_norm": 0.0, "learning_rate": 1.999503509994023e-05, "loss": 1.304, "step": 319 }, { "epoch": 0.03991518024198578, "grad_norm": 0.0, "learning_rate": 1.9994906989628437e-05, "loss": 1.3051, "step": 320 }, { "epoch": 0.04003991518024198, "grad_norm": 0.0, "learning_rate": 1.9994777247895855e-05, "loss": 1.3011, "step": 321 }, { "epoch": 0.040164650118498194, "grad_norm": 0.0, "learning_rate": 1.9994645874763657e-05, "loss": 1.3433, "step": 322 }, { "epoch": 0.0402893850567544, "grad_norm": 0.0, "learning_rate": 1.999451287025329e-05, "loss": 1.2542, "step": 323 }, { "epoch": 0.0404141199950106, "grad_norm": 0.0, "learning_rate": 1.9994378234386464e-05, "loss": 1.2776, "step": 324 }, { "epoch": 0.04053885493326681, "grad_norm": 0.0, "learning_rate": 1.999424196718515e-05, "loss": 1.3314, "step": 325 }, { "epoch": 0.04066358987152301, "grad_norm": 0.0, "learning_rate": 1.9994104068671597e-05, "loss": 1.2867, "step": 326 }, { "epoch": 0.04078832480977922, "grad_norm": 0.0, "learning_rate": 1.9993964538868307e-05, "loss": 1.3129, "step": 327 }, { "epoch": 0.04091305974803543, "grad_norm": 0.0, "learning_rate": 1.9993823377798058e-05, "loss": 1.3098, "step": 328 }, { "epoch": 0.04103779468629163, "grad_norm": 0.0, "learning_rate": 1.9993680585483895e-05, "loss": 1.3344, "step": 329 }, { "epoch": 0.04116252962454783, "grad_norm": 0.0, "learning_rate": 1.9993536161949118e-05, "loss": 1.2674, "step": 330 }, { "epoch": 0.041287264562804044, "grad_norm": 0.0, "learning_rate": 1.9993390107217302e-05, "loss": 1.3734, "step": 331 }, { "epoch": 0.04141199950106025, "grad_norm": 0.0, "learning_rate": 1.9993242421312292e-05, "loss": 1.2497, "step": 332 }, { "epoch": 0.04153673443931645, "grad_norm": 0.0, "learning_rate": 1.9993093104258186e-05, "loss": 1.2866, "step": 333 }, { "epoch": 0.04166146937757266, "grad_norm": 0.0, "learning_rate": 1.9992942156079364e-05, "loss": 1.3242, "step": 334 }, { "epoch": 0.04178620431582886, "grad_norm": 0.0, "learning_rate": 1.9992789576800464e-05, "loss": 1.3275, "step": 335 }, { "epoch": 0.04191093925408507, "grad_norm": 0.0, "learning_rate": 1.9992635366446387e-05, "loss": 1.3239, "step": 336 }, { "epoch": 0.04203567419234128, "grad_norm": 0.0, "learning_rate": 1.9992479525042305e-05, "loss": 1.2924, "step": 337 }, { "epoch": 0.04216040913059748, "grad_norm": 0.0, "learning_rate": 1.999232205261365e-05, "loss": 1.2626, "step": 338 }, { "epoch": 0.04228514406885368, "grad_norm": 0.0, "learning_rate": 1.9992162949186138e-05, "loss": 1.3005, "step": 339 }, { "epoch": 0.042409879007109894, "grad_norm": 0.0, "learning_rate": 1.999200221478573e-05, "loss": 1.324, "step": 340 }, { "epoch": 0.0425346139453661, "grad_norm": 0.0, "learning_rate": 1.999183984943866e-05, "loss": 1.357, "step": 341 }, { "epoch": 0.0426593488836223, "grad_norm": 0.0, "learning_rate": 1.9991675853171438e-05, "loss": 1.2928, "step": 342 }, { "epoch": 0.04278408382187851, "grad_norm": 0.0, "learning_rate": 1.9991510226010827e-05, "loss": 1.2863, "step": 343 }, { "epoch": 0.04290881876013471, "grad_norm": 0.0, "learning_rate": 1.9991342967983863e-05, "loss": 1.3611, "step": 344 }, { "epoch": 0.04303355369839092, "grad_norm": 0.0, "learning_rate": 1.9991174079117846e-05, "loss": 1.2972, "step": 345 }, { "epoch": 0.04315828863664713, "grad_norm": 0.0, "learning_rate": 1.9991003559440342e-05, "loss": 1.3023, "step": 346 }, { "epoch": 0.04328302357490333, "grad_norm": 0.0, "learning_rate": 1.9990831408979186e-05, "loss": 1.3423, "step": 347 }, { "epoch": 0.04340775851315953, "grad_norm": 0.0, "learning_rate": 1.9990657627762474e-05, "loss": 1.2875, "step": 348 }, { "epoch": 0.043532493451415744, "grad_norm": 0.0, "learning_rate": 1.999048221581858e-05, "loss": 1.2739, "step": 349 }, { "epoch": 0.04365722838967195, "grad_norm": 0.0, "learning_rate": 1.9990305173176124e-05, "loss": 1.2695, "step": 350 }, { "epoch": 0.04378196332792815, "grad_norm": 0.0, "learning_rate": 1.9990126499864013e-05, "loss": 1.2989, "step": 351 }, { "epoch": 0.04390669826618436, "grad_norm": 0.0, "learning_rate": 1.9989946195911408e-05, "loss": 1.2307, "step": 352 }, { "epoch": 0.04403143320444056, "grad_norm": 0.0, "learning_rate": 1.9989764261347738e-05, "loss": 1.3085, "step": 353 }, { "epoch": 0.04415616814269677, "grad_norm": 0.0, "learning_rate": 1.9989580696202696e-05, "loss": 1.3397, "step": 354 }, { "epoch": 0.04428090308095298, "grad_norm": 0.0, "learning_rate": 1.9989395500506254e-05, "loss": 1.2844, "step": 355 }, { "epoch": 0.04440563801920918, "grad_norm": 0.0, "learning_rate": 1.9989208674288635e-05, "loss": 1.2952, "step": 356 }, { "epoch": 0.04453037295746538, "grad_norm": 0.0, "learning_rate": 1.998902021758033e-05, "loss": 1.259, "step": 357 }, { "epoch": 0.044655107895721594, "grad_norm": 0.0, "learning_rate": 1.9988830130412106e-05, "loss": 1.3077, "step": 358 }, { "epoch": 0.0447798428339778, "grad_norm": 0.0, "learning_rate": 1.9988638412814986e-05, "loss": 1.3417, "step": 359 }, { "epoch": 0.044904577772234, "grad_norm": 0.0, "learning_rate": 1.9988445064820267e-05, "loss": 1.2841, "step": 360 }, { "epoch": 0.04502931271049021, "grad_norm": 0.0, "learning_rate": 1.9988250086459505e-05, "loss": 1.3176, "step": 361 }, { "epoch": 0.04515404764874641, "grad_norm": 0.0, "learning_rate": 1.9988053477764527e-05, "loss": 1.3395, "step": 362 }, { "epoch": 0.04527878258700262, "grad_norm": 0.0, "learning_rate": 1.9987855238767423e-05, "loss": 1.3195, "step": 363 }, { "epoch": 0.04540351752525883, "grad_norm": 0.0, "learning_rate": 1.9987655369500554e-05, "loss": 1.2623, "step": 364 }, { "epoch": 0.04552825246351503, "grad_norm": 0.0, "learning_rate": 1.9987453869996542e-05, "loss": 1.3117, "step": 365 }, { "epoch": 0.04565298740177123, "grad_norm": 0.0, "learning_rate": 1.998725074028827e-05, "loss": 1.3337, "step": 366 }, { "epoch": 0.045777722340027444, "grad_norm": 0.0, "learning_rate": 1.9987045980408907e-05, "loss": 1.272, "step": 367 }, { "epoch": 0.04590245727828365, "grad_norm": 0.0, "learning_rate": 1.9986839590391864e-05, "loss": 1.2695, "step": 368 }, { "epoch": 0.04602719221653985, "grad_norm": 0.0, "learning_rate": 1.9986631570270835e-05, "loss": 1.3513, "step": 369 }, { "epoch": 0.04615192715479606, "grad_norm": 0.0, "learning_rate": 1.998642192007977e-05, "loss": 1.2675, "step": 370 }, { "epoch": 0.04627666209305226, "grad_norm": 0.0, "learning_rate": 1.9986210639852892e-05, "loss": 1.285, "step": 371 }, { "epoch": 0.04640139703130847, "grad_norm": 0.0, "learning_rate": 1.9985997729624687e-05, "loss": 1.2039, "step": 372 }, { "epoch": 0.04652613196956468, "grad_norm": 0.0, "learning_rate": 1.9985783189429907e-05, "loss": 1.259, "step": 373 }, { "epoch": 0.04665086690782088, "grad_norm": 0.0, "learning_rate": 1.9985567019303573e-05, "loss": 1.3074, "step": 374 }, { "epoch": 0.04677560184607708, "grad_norm": 0.0, "learning_rate": 1.9985349219280964e-05, "loss": 1.2448, "step": 375 }, { "epoch": 0.046900336784333294, "grad_norm": 0.0, "learning_rate": 1.9985129789397633e-05, "loss": 1.2575, "step": 376 }, { "epoch": 0.0470250717225895, "grad_norm": 0.0, "learning_rate": 1.9984908729689398e-05, "loss": 1.2684, "step": 377 }, { "epoch": 0.0471498066608457, "grad_norm": 0.0, "learning_rate": 1.9984686040192337e-05, "loss": 1.3248, "step": 378 }, { "epoch": 0.04727454159910191, "grad_norm": 0.0, "learning_rate": 1.9984461720942804e-05, "loss": 1.2474, "step": 379 }, { "epoch": 0.047399276537358113, "grad_norm": 0.0, "learning_rate": 1.9984235771977412e-05, "loss": 1.3517, "step": 380 }, { "epoch": 0.04752401147561432, "grad_norm": 0.0, "learning_rate": 1.9984008193333043e-05, "loss": 1.2417, "step": 381 }, { "epoch": 0.04764874641387053, "grad_norm": 0.0, "learning_rate": 1.998377898504684e-05, "loss": 1.2614, "step": 382 }, { "epoch": 0.04777348135212673, "grad_norm": 0.0, "learning_rate": 1.9983548147156214e-05, "loss": 1.3036, "step": 383 }, { "epoch": 0.04789821629038293, "grad_norm": 0.0, "learning_rate": 1.9983315679698844e-05, "loss": 1.2699, "step": 384 }, { "epoch": 0.048022951228639144, "grad_norm": 0.0, "learning_rate": 1.9983081582712684e-05, "loss": 1.2444, "step": 385 }, { "epoch": 0.04814768616689535, "grad_norm": 0.0, "learning_rate": 1.9982845856235934e-05, "loss": 1.2605, "step": 386 }, { "epoch": 0.04827242110515155, "grad_norm": 0.0, "learning_rate": 1.9982608500307074e-05, "loss": 1.1995, "step": 387 }, { "epoch": 0.04839715604340776, "grad_norm": 0.0, "learning_rate": 1.998236951496485e-05, "loss": 1.2644, "step": 388 }, { "epoch": 0.048521890981663963, "grad_norm": 0.0, "learning_rate": 1.9982128900248266e-05, "loss": 1.277, "step": 389 }, { "epoch": 0.04864662591992017, "grad_norm": 0.0, "learning_rate": 1.9981886656196595e-05, "loss": 1.2546, "step": 390 }, { "epoch": 0.04877136085817638, "grad_norm": 0.0, "learning_rate": 1.9981642782849378e-05, "loss": 1.2752, "step": 391 }, { "epoch": 0.04889609579643258, "grad_norm": 0.0, "learning_rate": 1.9981397280246427e-05, "loss": 1.2784, "step": 392 }, { "epoch": 0.04902083073468878, "grad_norm": 0.0, "learning_rate": 1.9981150148427814e-05, "loss": 1.2501, "step": 393 }, { "epoch": 0.049145565672944994, "grad_norm": 0.0, "learning_rate": 1.9980901387433866e-05, "loss": 1.2794, "step": 394 }, { "epoch": 0.0492703006112012, "grad_norm": 0.0, "learning_rate": 1.9980650997305194e-05, "loss": 1.2524, "step": 395 }, { "epoch": 0.0493950355494574, "grad_norm": 0.0, "learning_rate": 1.9980398978082673e-05, "loss": 1.2914, "step": 396 }, { "epoch": 0.04951977048771361, "grad_norm": 0.0, "learning_rate": 1.9980145329807435e-05, "loss": 1.2453, "step": 397 }, { "epoch": 0.049644505425969813, "grad_norm": 0.0, "learning_rate": 1.997989005252088e-05, "loss": 1.2656, "step": 398 }, { "epoch": 0.04976924036422602, "grad_norm": 0.0, "learning_rate": 1.9979633146264675e-05, "loss": 1.2875, "step": 399 }, { "epoch": 0.04989397530248223, "grad_norm": 0.0, "learning_rate": 1.997937461108076e-05, "loss": 1.2415, "step": 400 }, { "epoch": 0.05001871024073843, "grad_norm": 0.0, "learning_rate": 1.9979114447011323e-05, "loss": 1.269, "step": 401 }, { "epoch": 0.05014344517899463, "grad_norm": 0.0, "learning_rate": 1.997885265409884e-05, "loss": 1.229, "step": 402 }, { "epoch": 0.050268180117250844, "grad_norm": 0.0, "learning_rate": 1.9978589232386036e-05, "loss": 1.2219, "step": 403 }, { "epoch": 0.05039291505550705, "grad_norm": 0.0, "learning_rate": 1.9978324181915915e-05, "loss": 1.2593, "step": 404 }, { "epoch": 0.05051764999376325, "grad_norm": 0.0, "learning_rate": 1.997805750273173e-05, "loss": 1.2693, "step": 405 }, { "epoch": 0.05064238493201946, "grad_norm": 0.0, "learning_rate": 1.997778919487702e-05, "loss": 1.3044, "step": 406 }, { "epoch": 0.050767119870275663, "grad_norm": 0.0, "learning_rate": 1.997751925839557e-05, "loss": 1.292, "step": 407 }, { "epoch": 0.05089185480853187, "grad_norm": 0.0, "learning_rate": 1.9977247693331447e-05, "loss": 1.2645, "step": 408 }, { "epoch": 0.05101658974678808, "grad_norm": 0.0, "learning_rate": 1.997697449972898e-05, "loss": 1.2001, "step": 409 }, { "epoch": 0.05114132468504428, "grad_norm": 0.0, "learning_rate": 1.9976699677632753e-05, "loss": 1.3092, "step": 410 }, { "epoch": 0.05126605962330048, "grad_norm": 0.0, "learning_rate": 1.9976423227087626e-05, "loss": 1.2694, "step": 411 }, { "epoch": 0.051390794561556694, "grad_norm": 0.0, "learning_rate": 1.9976145148138727e-05, "loss": 1.2837, "step": 412 }, { "epoch": 0.0515155294998129, "grad_norm": 0.0, "learning_rate": 1.997586544083144e-05, "loss": 1.2151, "step": 413 }, { "epoch": 0.0516402644380691, "grad_norm": 0.0, "learning_rate": 1.9975584105211425e-05, "loss": 1.2963, "step": 414 }, { "epoch": 0.05176499937632531, "grad_norm": 0.0, "learning_rate": 1.99753011413246e-05, "loss": 1.3239, "step": 415 }, { "epoch": 0.051889734314581513, "grad_norm": 0.0, "learning_rate": 1.997501654921715e-05, "loss": 1.2647, "step": 416 }, { "epoch": 0.05201446925283772, "grad_norm": 0.0, "learning_rate": 1.9974730328935534e-05, "loss": 1.2288, "step": 417 }, { "epoch": 0.05213920419109393, "grad_norm": 0.0, "learning_rate": 1.9974442480526465e-05, "loss": 1.2907, "step": 418 }, { "epoch": 0.05226393912935013, "grad_norm": 0.0, "learning_rate": 1.997415300403693e-05, "loss": 1.2991, "step": 419 }, { "epoch": 0.05238867406760633, "grad_norm": 0.0, "learning_rate": 1.997386189951418e-05, "loss": 1.2482, "step": 420 }, { "epoch": 0.052513409005862544, "grad_norm": 0.0, "learning_rate": 1.9973569167005725e-05, "loss": 1.2801, "step": 421 }, { "epoch": 0.05263814394411875, "grad_norm": 0.0, "learning_rate": 1.997327480655935e-05, "loss": 1.2961, "step": 422 }, { "epoch": 0.05276287888237495, "grad_norm": 0.0, "learning_rate": 1.99729788182231e-05, "loss": 1.2311, "step": 423 }, { "epoch": 0.05288761382063116, "grad_norm": 0.0, "learning_rate": 1.9972681202045294e-05, "loss": 1.2179, "step": 424 }, { "epoch": 0.053012348758887363, "grad_norm": 0.0, "learning_rate": 1.9972381958074505e-05, "loss": 1.2805, "step": 425 }, { "epoch": 0.05313708369714357, "grad_norm": 0.0, "learning_rate": 1.9972081086359576e-05, "loss": 1.2744, "step": 426 }, { "epoch": 0.05326181863539978, "grad_norm": 0.0, "learning_rate": 1.997177858694962e-05, "loss": 1.2847, "step": 427 }, { "epoch": 0.05338655357365598, "grad_norm": 0.0, "learning_rate": 1.997147445989401e-05, "loss": 1.2783, "step": 428 }, { "epoch": 0.05351128851191218, "grad_norm": 0.0, "learning_rate": 1.9971168705242392e-05, "loss": 1.2476, "step": 429 }, { "epoch": 0.053636023450168394, "grad_norm": 0.0, "learning_rate": 1.9970861323044667e-05, "loss": 1.2692, "step": 430 }, { "epoch": 0.0537607583884246, "grad_norm": 0.0, "learning_rate": 1.997055231335101e-05, "loss": 1.2622, "step": 431 }, { "epoch": 0.0538854933266808, "grad_norm": 0.0, "learning_rate": 1.997024167621186e-05, "loss": 1.3137, "step": 432 }, { "epoch": 0.05401022826493701, "grad_norm": 0.0, "learning_rate": 1.996992941167792e-05, "loss": 1.2643, "step": 433 }, { "epoch": 0.054134963203193213, "grad_norm": 0.0, "learning_rate": 1.9969615519800163e-05, "loss": 1.2616, "step": 434 }, { "epoch": 0.05425969814144942, "grad_norm": 0.0, "learning_rate": 1.9969300000629815e-05, "loss": 1.2589, "step": 435 }, { "epoch": 0.05438443307970563, "grad_norm": 0.0, "learning_rate": 1.9968982854218388e-05, "loss": 1.3031, "step": 436 }, { "epoch": 0.05450916801796183, "grad_norm": 0.0, "learning_rate": 1.9968664080617637e-05, "loss": 1.2956, "step": 437 }, { "epoch": 0.05463390295621803, "grad_norm": 0.0, "learning_rate": 1.9968343679879602e-05, "loss": 1.2476, "step": 438 }, { "epoch": 0.054758637894474244, "grad_norm": 0.0, "learning_rate": 1.9968021652056578e-05, "loss": 1.2601, "step": 439 }, { "epoch": 0.05488337283273045, "grad_norm": 0.0, "learning_rate": 1.9967697997201125e-05, "loss": 1.2869, "step": 440 }, { "epoch": 0.05500810777098665, "grad_norm": 0.0, "learning_rate": 1.996737271536608e-05, "loss": 1.2989, "step": 441 }, { "epoch": 0.05513284270924286, "grad_norm": 0.0, "learning_rate": 1.996704580660453e-05, "loss": 1.2529, "step": 442 }, { "epoch": 0.055257577647499063, "grad_norm": 0.0, "learning_rate": 1.9966717270969834e-05, "loss": 1.2828, "step": 443 }, { "epoch": 0.05538231258575527, "grad_norm": 0.0, "learning_rate": 1.996638710851562e-05, "loss": 1.2416, "step": 444 }, { "epoch": 0.05550704752401148, "grad_norm": 0.0, "learning_rate": 1.996605531929578e-05, "loss": 1.2961, "step": 445 }, { "epoch": 0.05563178246226768, "grad_norm": 0.0, "learning_rate": 1.996572190336447e-05, "loss": 1.255, "step": 446 }, { "epoch": 0.05575651740052388, "grad_norm": 0.0, "learning_rate": 1.996538686077611e-05, "loss": 1.2572, "step": 447 }, { "epoch": 0.055881252338780094, "grad_norm": 0.0, "learning_rate": 1.9965050191585388e-05, "loss": 1.2395, "step": 448 }, { "epoch": 0.0560059872770363, "grad_norm": 0.0, "learning_rate": 1.996471189584726e-05, "loss": 1.2694, "step": 449 }, { "epoch": 0.0561307222152925, "grad_norm": 0.0, "learning_rate": 1.9964371973616938e-05, "loss": 1.2789, "step": 450 }, { "epoch": 0.05625545715354871, "grad_norm": 0.0, "learning_rate": 1.996403042494991e-05, "loss": 1.2894, "step": 451 }, { "epoch": 0.056380192091804913, "grad_norm": 0.0, "learning_rate": 1.9963687249901927e-05, "loss": 1.2087, "step": 452 }, { "epoch": 0.05650492703006112, "grad_norm": 0.0, "learning_rate": 1.9963342448528998e-05, "loss": 1.2417, "step": 453 }, { "epoch": 0.05662966196831733, "grad_norm": 0.0, "learning_rate": 1.996299602088741e-05, "loss": 1.2704, "step": 454 }, { "epoch": 0.05675439690657353, "grad_norm": 0.0, "learning_rate": 1.9962647967033707e-05, "loss": 1.2597, "step": 455 }, { "epoch": 0.05687913184482973, "grad_norm": 0.0, "learning_rate": 1.9962298287024695e-05, "loss": 1.2427, "step": 456 }, { "epoch": 0.057003866783085944, "grad_norm": 0.0, "learning_rate": 1.9961946980917457e-05, "loss": 1.276, "step": 457 }, { "epoch": 0.05712860172134215, "grad_norm": 0.0, "learning_rate": 1.996159404876933e-05, "loss": 1.237, "step": 458 }, { "epoch": 0.05725333665959835, "grad_norm": 0.0, "learning_rate": 1.9961239490637927e-05, "loss": 1.2184, "step": 459 }, { "epoch": 0.05737807159785456, "grad_norm": 0.0, "learning_rate": 1.9960883306581116e-05, "loss": 1.3028, "step": 460 }, { "epoch": 0.057502806536110763, "grad_norm": 0.0, "learning_rate": 1.9960525496657038e-05, "loss": 1.2594, "step": 461 }, { "epoch": 0.05762754147436697, "grad_norm": 0.0, "learning_rate": 1.9960166060924094e-05, "loss": 1.2484, "step": 462 }, { "epoch": 0.05775227641262318, "grad_norm": 0.0, "learning_rate": 1.9959804999440953e-05, "loss": 1.293, "step": 463 }, { "epoch": 0.05787701135087938, "grad_norm": 0.0, "learning_rate": 1.9959442312266555e-05, "loss": 1.261, "step": 464 }, { "epoch": 0.05800174628913559, "grad_norm": 0.0, "learning_rate": 1.9959077999460094e-05, "loss": 1.2582, "step": 465 }, { "epoch": 0.058126481227391794, "grad_norm": 0.0, "learning_rate": 1.995871206108104e-05, "loss": 1.269, "step": 466 }, { "epoch": 0.058251216165648, "grad_norm": 0.0, "learning_rate": 1.9958344497189114e-05, "loss": 1.2559, "step": 467 }, { "epoch": 0.05837595110390421, "grad_norm": 0.0, "learning_rate": 1.995797530784432e-05, "loss": 1.2642, "step": 468 }, { "epoch": 0.05850068604216041, "grad_norm": 0.0, "learning_rate": 1.9957604493106916e-05, "loss": 1.3278, "step": 469 }, { "epoch": 0.058625420980416613, "grad_norm": 0.0, "learning_rate": 1.995723205303743e-05, "loss": 1.2647, "step": 470 }, { "epoch": 0.058750155918672824, "grad_norm": 0.0, "learning_rate": 1.995685798769665e-05, "loss": 1.2348, "step": 471 }, { "epoch": 0.05887489085692903, "grad_norm": 0.0, "learning_rate": 1.9956482297145638e-05, "loss": 1.2587, "step": 472 }, { "epoch": 0.05899962579518523, "grad_norm": 0.0, "learning_rate": 1.9956104981445713e-05, "loss": 1.2258, "step": 473 }, { "epoch": 0.05912436073344144, "grad_norm": 0.0, "learning_rate": 1.9955726040658463e-05, "loss": 1.2298, "step": 474 }, { "epoch": 0.059249095671697644, "grad_norm": 0.0, "learning_rate": 1.995534547484574e-05, "loss": 1.2215, "step": 475 }, { "epoch": 0.05937383060995385, "grad_norm": 0.0, "learning_rate": 1.9954963284069663e-05, "loss": 1.2773, "step": 476 }, { "epoch": 0.05949856554821006, "grad_norm": 0.0, "learning_rate": 1.9954579468392618e-05, "loss": 1.3185, "step": 477 }, { "epoch": 0.05962330048646626, "grad_norm": 0.0, "learning_rate": 1.9954194027877247e-05, "loss": 1.2587, "step": 478 }, { "epoch": 0.059748035424722463, "grad_norm": 0.0, "learning_rate": 1.9953806962586468e-05, "loss": 1.2447, "step": 479 }, { "epoch": 0.059872770362978674, "grad_norm": 0.0, "learning_rate": 1.9953418272583456e-05, "loss": 1.2195, "step": 480 }, { "epoch": 0.05999750530123488, "grad_norm": 0.0, "learning_rate": 1.9953027957931658e-05, "loss": 1.2139, "step": 481 }, { "epoch": 0.06012224023949108, "grad_norm": 0.0, "learning_rate": 1.9952636018694787e-05, "loss": 1.247, "step": 482 }, { "epoch": 0.06024697517774729, "grad_norm": 0.0, "learning_rate": 1.995224245493681e-05, "loss": 1.2773, "step": 483 }, { "epoch": 0.060371710116003494, "grad_norm": 0.0, "learning_rate": 1.995184726672197e-05, "loss": 1.2763, "step": 484 }, { "epoch": 0.0604964450542597, "grad_norm": 0.0, "learning_rate": 1.9951450454114773e-05, "loss": 1.2572, "step": 485 }, { "epoch": 0.06062117999251591, "grad_norm": 0.0, "learning_rate": 1.9951052017179985e-05, "loss": 1.2889, "step": 486 }, { "epoch": 0.06074591493077211, "grad_norm": 0.0, "learning_rate": 1.9950651955982646e-05, "loss": 1.2281, "step": 487 }, { "epoch": 0.060870649869028313, "grad_norm": 0.0, "learning_rate": 1.995025027058805e-05, "loss": 1.2619, "step": 488 }, { "epoch": 0.060995384807284524, "grad_norm": 0.0, "learning_rate": 1.994984696106177e-05, "loss": 1.2244, "step": 489 }, { "epoch": 0.06112011974554073, "grad_norm": 0.0, "learning_rate": 1.9949442027469628e-05, "loss": 1.2619, "step": 490 }, { "epoch": 0.06124485468379693, "grad_norm": 0.0, "learning_rate": 1.994903546987772e-05, "loss": 1.2383, "step": 491 }, { "epoch": 0.06136958962205314, "grad_norm": 0.0, "learning_rate": 1.9948627288352415e-05, "loss": 1.2615, "step": 492 }, { "epoch": 0.061494324560309344, "grad_norm": 0.0, "learning_rate": 1.9948217482960334e-05, "loss": 1.1972, "step": 493 }, { "epoch": 0.06161905949856555, "grad_norm": 0.0, "learning_rate": 1.9947806053768363e-05, "loss": 1.2298, "step": 494 }, { "epoch": 0.06174379443682176, "grad_norm": 0.0, "learning_rate": 1.994739300084366e-05, "loss": 1.2018, "step": 495 }, { "epoch": 0.06186852937507796, "grad_norm": 0.0, "learning_rate": 1.9946978324253652e-05, "loss": 1.2576, "step": 496 }, { "epoch": 0.061993264313334163, "grad_norm": 0.0, "learning_rate": 1.9946562024066018e-05, "loss": 1.2521, "step": 497 }, { "epoch": 0.062117999251590374, "grad_norm": 0.0, "learning_rate": 1.9946144100348707e-05, "loss": 1.2777, "step": 498 }, { "epoch": 0.06224273418984658, "grad_norm": 0.0, "learning_rate": 1.994572455316994e-05, "loss": 1.2727, "step": 499 }, { "epoch": 0.06236746912810278, "grad_norm": 0.0, "learning_rate": 1.9945303382598194e-05, "loss": 1.1839, "step": 500 }, { "epoch": 0.06249220406635899, "grad_norm": 0.0, "learning_rate": 1.994488058870222e-05, "loss": 1.2328, "step": 501 }, { "epoch": 0.0626169390046152, "grad_norm": 0.0, "learning_rate": 1.9944456171551024e-05, "loss": 1.2331, "step": 502 }, { "epoch": 0.0627416739428714, "grad_norm": 0.0, "learning_rate": 1.994403013121388e-05, "loss": 1.2455, "step": 503 }, { "epoch": 0.0628664088811276, "grad_norm": 0.0, "learning_rate": 1.9943602467760334e-05, "loss": 1.2375, "step": 504 }, { "epoch": 0.06299114381938381, "grad_norm": 0.0, "learning_rate": 1.9943173181260187e-05, "loss": 1.2629, "step": 505 }, { "epoch": 0.06311587875764002, "grad_norm": 0.0, "learning_rate": 1.9942742271783507e-05, "loss": 1.2928, "step": 506 }, { "epoch": 0.06324061369589622, "grad_norm": 0.0, "learning_rate": 1.994230973940064e-05, "loss": 1.2674, "step": 507 }, { "epoch": 0.06336534863415243, "grad_norm": 0.0, "learning_rate": 1.994187558418218e-05, "loss": 1.2476, "step": 508 }, { "epoch": 0.06349008357240864, "grad_norm": 0.0, "learning_rate": 1.9941439806198985e-05, "loss": 1.2244, "step": 509 }, { "epoch": 0.06361481851066483, "grad_norm": 0.0, "learning_rate": 1.994100240552219e-05, "loss": 1.2773, "step": 510 }, { "epoch": 0.06373955344892104, "grad_norm": 0.0, "learning_rate": 1.9940563382223196e-05, "loss": 1.2494, "step": 511 }, { "epoch": 0.06386428838717725, "grad_norm": 0.0, "learning_rate": 1.994012273637366e-05, "loss": 1.2431, "step": 512 }, { "epoch": 0.06398902332543345, "grad_norm": 0.0, "learning_rate": 1.99396804680455e-05, "loss": 1.2261, "step": 513 }, { "epoch": 0.06411375826368966, "grad_norm": 0.0, "learning_rate": 1.9939236577310913e-05, "loss": 1.2366, "step": 514 }, { "epoch": 0.06423849320194587, "grad_norm": 0.0, "learning_rate": 1.9938791064242348e-05, "loss": 1.2431, "step": 515 }, { "epoch": 0.06436322814020207, "grad_norm": 0.0, "learning_rate": 1.993834392891252e-05, "loss": 1.2, "step": 516 }, { "epoch": 0.06448796307845828, "grad_norm": 0.0, "learning_rate": 1.9937895171394427e-05, "loss": 1.2552, "step": 517 }, { "epoch": 0.06461269801671449, "grad_norm": 0.0, "learning_rate": 1.993744479176131e-05, "loss": 1.2407, "step": 518 }, { "epoch": 0.06473743295497068, "grad_norm": 0.0, "learning_rate": 1.9936992790086675e-05, "loss": 1.1943, "step": 519 }, { "epoch": 0.0648621678932269, "grad_norm": 0.0, "learning_rate": 1.993653916644431e-05, "loss": 1.2048, "step": 520 }, { "epoch": 0.0649869028314831, "grad_norm": 0.0, "learning_rate": 1.9936083920908254e-05, "loss": 1.2027, "step": 521 }, { "epoch": 0.0651116377697393, "grad_norm": 0.0, "learning_rate": 1.9935627053552814e-05, "loss": 1.239, "step": 522 }, { "epoch": 0.06523637270799551, "grad_norm": 0.0, "learning_rate": 1.9935168564452562e-05, "loss": 1.1902, "step": 523 }, { "epoch": 0.06536110764625172, "grad_norm": 0.0, "learning_rate": 1.993470845368234e-05, "loss": 1.2988, "step": 524 }, { "epoch": 0.06548584258450792, "grad_norm": 0.0, "learning_rate": 1.9934246721317244e-05, "loss": 1.274, "step": 525 }, { "epoch": 0.06561057752276413, "grad_norm": 0.0, "learning_rate": 1.993378336743264e-05, "loss": 1.2507, "step": 526 }, { "epoch": 0.06573531246102034, "grad_norm": 0.0, "learning_rate": 1.9933318392104166e-05, "loss": 1.1744, "step": 527 }, { "epoch": 0.06586004739927653, "grad_norm": 0.0, "learning_rate": 1.9932851795407705e-05, "loss": 1.2147, "step": 528 }, { "epoch": 0.06598478233753274, "grad_norm": 0.0, "learning_rate": 1.9932383577419432e-05, "loss": 1.1765, "step": 529 }, { "epoch": 0.06610951727578895, "grad_norm": 0.0, "learning_rate": 1.9931913738215763e-05, "loss": 1.2337, "step": 530 }, { "epoch": 0.06623425221404515, "grad_norm": 0.0, "learning_rate": 1.993144227787339e-05, "loss": 1.2313, "step": 531 }, { "epoch": 0.06635898715230136, "grad_norm": 0.0, "learning_rate": 1.9930969196469266e-05, "loss": 1.2144, "step": 532 }, { "epoch": 0.06648372209055757, "grad_norm": 0.0, "learning_rate": 1.9930494494080612e-05, "loss": 1.2574, "step": 533 }, { "epoch": 0.06660845702881377, "grad_norm": 0.0, "learning_rate": 1.9930018170784912e-05, "loss": 1.2599, "step": 534 }, { "epoch": 0.06673319196706998, "grad_norm": 0.0, "learning_rate": 1.9929540226659907e-05, "loss": 1.26, "step": 535 }, { "epoch": 0.06685792690532619, "grad_norm": 0.0, "learning_rate": 1.992906066178362e-05, "loss": 1.2598, "step": 536 }, { "epoch": 0.06698266184358238, "grad_norm": 0.0, "learning_rate": 1.992857947623432e-05, "loss": 1.2049, "step": 537 }, { "epoch": 0.0671073967818386, "grad_norm": 0.0, "learning_rate": 1.9928096670090552e-05, "loss": 1.2517, "step": 538 }, { "epoch": 0.0672321317200948, "grad_norm": 0.0, "learning_rate": 1.992761224343112e-05, "loss": 1.2235, "step": 539 }, { "epoch": 0.067356866658351, "grad_norm": 0.0, "learning_rate": 1.99271261963351e-05, "loss": 1.263, "step": 540 }, { "epoch": 0.06748160159660721, "grad_norm": 0.0, "learning_rate": 1.992663852888182e-05, "loss": 1.1687, "step": 541 }, { "epoch": 0.06760633653486342, "grad_norm": 0.0, "learning_rate": 1.9926149241150886e-05, "loss": 1.1635, "step": 542 }, { "epoch": 0.06773107147311962, "grad_norm": 0.0, "learning_rate": 1.9925658333222155e-05, "loss": 1.2097, "step": 543 }, { "epoch": 0.06785580641137583, "grad_norm": 0.0, "learning_rate": 1.992516580517576e-05, "loss": 1.1664, "step": 544 }, { "epoch": 0.06798054134963204, "grad_norm": 0.0, "learning_rate": 1.9924671657092097e-05, "loss": 1.222, "step": 545 }, { "epoch": 0.06810527628788823, "grad_norm": 0.0, "learning_rate": 1.992417588905182e-05, "loss": 1.1945, "step": 546 }, { "epoch": 0.06823001122614444, "grad_norm": 0.0, "learning_rate": 1.992367850113585e-05, "loss": 1.2369, "step": 547 }, { "epoch": 0.06835474616440065, "grad_norm": 0.0, "learning_rate": 1.992317949342537e-05, "loss": 1.2869, "step": 548 }, { "epoch": 0.06847948110265685, "grad_norm": 0.0, "learning_rate": 1.992267886600184e-05, "loss": 1.2296, "step": 549 }, { "epoch": 0.06860421604091306, "grad_norm": 0.0, "learning_rate": 1.9922176618946966e-05, "loss": 1.2031, "step": 550 }, { "epoch": 0.06872895097916927, "grad_norm": 0.0, "learning_rate": 1.9921672752342737e-05, "loss": 1.1726, "step": 551 }, { "epoch": 0.06885368591742547, "grad_norm": 0.0, "learning_rate": 1.9921167266271385e-05, "loss": 1.2414, "step": 552 }, { "epoch": 0.06897842085568168, "grad_norm": 0.0, "learning_rate": 1.9920660160815423e-05, "loss": 1.2369, "step": 553 }, { "epoch": 0.06910315579393789, "grad_norm": 0.0, "learning_rate": 1.992015143605763e-05, "loss": 1.2369, "step": 554 }, { "epoch": 0.06922789073219408, "grad_norm": 0.0, "learning_rate": 1.9919641092081033e-05, "loss": 1.1799, "step": 555 }, { "epoch": 0.0693526256704503, "grad_norm": 0.0, "learning_rate": 1.9919129128968938e-05, "loss": 1.203, "step": 556 }, { "epoch": 0.0694773606087065, "grad_norm": 0.0, "learning_rate": 1.9918615546804908e-05, "loss": 1.1923, "step": 557 }, { "epoch": 0.0696020955469627, "grad_norm": 0.0, "learning_rate": 1.991810034567278e-05, "loss": 1.2296, "step": 558 }, { "epoch": 0.06972683048521891, "grad_norm": 0.0, "learning_rate": 1.9917583525656633e-05, "loss": 1.2398, "step": 559 }, { "epoch": 0.06985156542347512, "grad_norm": 0.0, "learning_rate": 1.991706508684084e-05, "loss": 1.2308, "step": 560 }, { "epoch": 0.06997630036173132, "grad_norm": 0.0, "learning_rate": 1.991654502931001e-05, "loss": 1.2459, "step": 561 }, { "epoch": 0.07010103529998753, "grad_norm": 0.0, "learning_rate": 1.9916023353149042e-05, "loss": 1.2299, "step": 562 }, { "epoch": 0.07022577023824374, "grad_norm": 0.0, "learning_rate": 1.9915500058443084e-05, "loss": 1.2391, "step": 563 }, { "epoch": 0.07035050517649993, "grad_norm": 0.0, "learning_rate": 1.9914975145277545e-05, "loss": 1.2303, "step": 564 }, { "epoch": 0.07047524011475614, "grad_norm": 0.0, "learning_rate": 1.9914448613738107e-05, "loss": 1.1888, "step": 565 }, { "epoch": 0.07059997505301235, "grad_norm": 0.0, "learning_rate": 1.9913920463910713e-05, "loss": 1.2513, "step": 566 }, { "epoch": 0.07072470999126855, "grad_norm": 0.0, "learning_rate": 1.9913390695881572e-05, "loss": 1.2361, "step": 567 }, { "epoch": 0.07084944492952476, "grad_norm": 0.0, "learning_rate": 1.991285930973715e-05, "loss": 1.231, "step": 568 }, { "epoch": 0.07097417986778097, "grad_norm": 0.0, "learning_rate": 1.991232630556419e-05, "loss": 1.2686, "step": 569 }, { "epoch": 0.07109891480603717, "grad_norm": 0.0, "learning_rate": 1.991179168344969e-05, "loss": 1.176, "step": 570 }, { "epoch": 0.07122364974429338, "grad_norm": 0.0, "learning_rate": 1.991125544348091e-05, "loss": 1.2385, "step": 571 }, { "epoch": 0.07134838468254959, "grad_norm": 0.0, "learning_rate": 1.9910717585745383e-05, "loss": 1.2343, "step": 572 }, { "epoch": 0.07147311962080578, "grad_norm": 0.0, "learning_rate": 1.9910178110330894e-05, "loss": 1.2128, "step": 573 }, { "epoch": 0.071597854559062, "grad_norm": 0.0, "learning_rate": 1.9909637017325508e-05, "loss": 1.2366, "step": 574 }, { "epoch": 0.0717225894973182, "grad_norm": 0.0, "learning_rate": 1.990909430681754e-05, "loss": 1.23, "step": 575 }, { "epoch": 0.0718473244355744, "grad_norm": 0.0, "learning_rate": 1.9908549978895572e-05, "loss": 1.1457, "step": 576 }, { "epoch": 0.07197205937383061, "grad_norm": 0.0, "learning_rate": 1.9908004033648452e-05, "loss": 1.1935, "step": 577 }, { "epoch": 0.07209679431208682, "grad_norm": 0.0, "learning_rate": 1.9907456471165297e-05, "loss": 1.1779, "step": 578 }, { "epoch": 0.07222152925034302, "grad_norm": 0.0, "learning_rate": 1.9906907291535483e-05, "loss": 1.2126, "step": 579 }, { "epoch": 0.07234626418859923, "grad_norm": 0.0, "learning_rate": 1.9906356494848644e-05, "loss": 1.1645, "step": 580 }, { "epoch": 0.07247099912685544, "grad_norm": 0.0, "learning_rate": 1.990580408119469e-05, "loss": 1.1953, "step": 581 }, { "epoch": 0.07259573406511163, "grad_norm": 0.0, "learning_rate": 1.9905250050663786e-05, "loss": 1.1985, "step": 582 }, { "epoch": 0.07272046900336784, "grad_norm": 0.0, "learning_rate": 1.9904694403346362e-05, "loss": 1.1872, "step": 583 }, { "epoch": 0.07284520394162405, "grad_norm": 0.0, "learning_rate": 1.9904137139333114e-05, "loss": 1.207, "step": 584 }, { "epoch": 0.07296993887988025, "grad_norm": 0.0, "learning_rate": 1.9903578258715003e-05, "loss": 1.1853, "step": 585 }, { "epoch": 0.07309467381813646, "grad_norm": 0.0, "learning_rate": 1.9903017761583253e-05, "loss": 1.2363, "step": 586 }, { "epoch": 0.07321940875639267, "grad_norm": 0.0, "learning_rate": 1.990245564802935e-05, "loss": 1.1988, "step": 587 }, { "epoch": 0.07334414369464887, "grad_norm": 0.0, "learning_rate": 1.9901891918145048e-05, "loss": 1.3107, "step": 588 }, { "epoch": 0.07346887863290508, "grad_norm": 0.0, "learning_rate": 1.990132657202236e-05, "loss": 1.2249, "step": 589 }, { "epoch": 0.07359361357116129, "grad_norm": 0.0, "learning_rate": 1.9900759609753562e-05, "loss": 1.2184, "step": 590 }, { "epoch": 0.07371834850941748, "grad_norm": 0.0, "learning_rate": 1.9900191031431202e-05, "loss": 1.2606, "step": 591 }, { "epoch": 0.0738430834476737, "grad_norm": 0.0, "learning_rate": 1.989962083714808e-05, "loss": 1.2549, "step": 592 }, { "epoch": 0.0739678183859299, "grad_norm": 0.0, "learning_rate": 1.9899049026997272e-05, "loss": 1.249, "step": 593 }, { "epoch": 0.0740925533241861, "grad_norm": 0.0, "learning_rate": 1.9898475601072107e-05, "loss": 1.2637, "step": 594 }, { "epoch": 0.07421728826244231, "grad_norm": 0.0, "learning_rate": 1.9897900559466188e-05, "loss": 1.2268, "step": 595 }, { "epoch": 0.07434202320069852, "grad_norm": 0.0, "learning_rate": 1.989732390227337e-05, "loss": 1.2541, "step": 596 }, { "epoch": 0.07446675813895472, "grad_norm": 0.0, "learning_rate": 1.989674562958778e-05, "loss": 1.1697, "step": 597 }, { "epoch": 0.07459149307721093, "grad_norm": 0.0, "learning_rate": 1.989616574150381e-05, "loss": 1.2124, "step": 598 }, { "epoch": 0.07471622801546714, "grad_norm": 0.0, "learning_rate": 1.989558423811611e-05, "loss": 1.2001, "step": 599 }, { "epoch": 0.07484096295372333, "grad_norm": 0.0, "learning_rate": 1.9895001119519596e-05, "loss": 1.2385, "step": 600 }, { "epoch": 0.07496569789197954, "grad_norm": 0.0, "learning_rate": 1.9894416385809444e-05, "loss": 1.1965, "step": 601 }, { "epoch": 0.07509043283023575, "grad_norm": 0.0, "learning_rate": 1.9893830037081108e-05, "loss": 1.2083, "step": 602 }, { "epoch": 0.07521516776849195, "grad_norm": 0.0, "learning_rate": 1.9893242073430282e-05, "loss": 1.214, "step": 603 }, { "epoch": 0.07533990270674816, "grad_norm": 0.0, "learning_rate": 1.9892652494952948e-05, "loss": 1.2242, "step": 604 }, { "epoch": 0.07546463764500437, "grad_norm": 0.0, "learning_rate": 1.989206130174533e-05, "loss": 1.1852, "step": 605 }, { "epoch": 0.07558937258326057, "grad_norm": 0.0, "learning_rate": 1.9891468493903932e-05, "loss": 1.2327, "step": 606 }, { "epoch": 0.07571410752151678, "grad_norm": 0.0, "learning_rate": 1.989087407152551e-05, "loss": 1.2151, "step": 607 }, { "epoch": 0.07583884245977299, "grad_norm": 0.0, "learning_rate": 1.9890278034707095e-05, "loss": 1.2453, "step": 608 }, { "epoch": 0.07596357739802918, "grad_norm": 0.0, "learning_rate": 1.9889680383545974e-05, "loss": 1.2363, "step": 609 }, { "epoch": 0.0760883123362854, "grad_norm": 0.0, "learning_rate": 1.9889081118139694e-05, "loss": 1.2146, "step": 610 }, { "epoch": 0.0762130472745416, "grad_norm": 0.0, "learning_rate": 1.988848023858607e-05, "loss": 1.1613, "step": 611 }, { "epoch": 0.0763377822127978, "grad_norm": 0.0, "learning_rate": 1.9887877744983194e-05, "loss": 1.2213, "step": 612 }, { "epoch": 0.07646251715105401, "grad_norm": 0.0, "learning_rate": 1.988727363742939e-05, "loss": 1.1848, "step": 613 }, { "epoch": 0.07658725208931022, "grad_norm": 0.0, "learning_rate": 1.9886667916023273e-05, "loss": 1.19, "step": 614 }, { "epoch": 0.07671198702756642, "grad_norm": 0.0, "learning_rate": 1.9886060580863713e-05, "loss": 1.2294, "step": 615 }, { "epoch": 0.07683672196582263, "grad_norm": 0.0, "learning_rate": 1.9885451632049836e-05, "loss": 1.207, "step": 616 }, { "epoch": 0.07696145690407884, "grad_norm": 0.0, "learning_rate": 1.9884841069681045e-05, "loss": 1.2822, "step": 617 }, { "epoch": 0.07708619184233503, "grad_norm": 0.0, "learning_rate": 1.9884228893856995e-05, "loss": 1.1977, "step": 618 }, { "epoch": 0.07721092678059124, "grad_norm": 0.0, "learning_rate": 1.988361510467761e-05, "loss": 1.2163, "step": 619 }, { "epoch": 0.07733566171884745, "grad_norm": 0.0, "learning_rate": 1.9882999702243073e-05, "loss": 1.2084, "step": 620 }, { "epoch": 0.07746039665710365, "grad_norm": 0.0, "learning_rate": 1.9882382686653837e-05, "loss": 1.2591, "step": 621 }, { "epoch": 0.07758513159535986, "grad_norm": 0.0, "learning_rate": 1.9881764058010615e-05, "loss": 1.1577, "step": 622 }, { "epoch": 0.07770986653361607, "grad_norm": 0.0, "learning_rate": 1.988114381641438e-05, "loss": 1.1819, "step": 623 }, { "epoch": 0.07783460147187227, "grad_norm": 0.0, "learning_rate": 1.988052196196637e-05, "loss": 1.2627, "step": 624 }, { "epoch": 0.07795933641012848, "grad_norm": 0.0, "learning_rate": 1.9879898494768093e-05, "loss": 1.2303, "step": 625 }, { "epoch": 0.07808407134838469, "grad_norm": 0.0, "learning_rate": 1.9879273414921306e-05, "loss": 1.1792, "step": 626 }, { "epoch": 0.07820880628664088, "grad_norm": 0.0, "learning_rate": 1.9878646722528044e-05, "loss": 1.2153, "step": 627 }, { "epoch": 0.0783335412248971, "grad_norm": 0.0, "learning_rate": 1.9878018417690602e-05, "loss": 1.1952, "step": 628 }, { "epoch": 0.0784582761631533, "grad_norm": 0.0, "learning_rate": 1.9877388500511526e-05, "loss": 1.2, "step": 629 }, { "epoch": 0.0785830111014095, "grad_norm": 0.0, "learning_rate": 1.987675697109364e-05, "loss": 1.1473, "step": 630 }, { "epoch": 0.07870774603966571, "grad_norm": 0.0, "learning_rate": 1.9876123829540027e-05, "loss": 1.2223, "step": 631 }, { "epoch": 0.07883248097792192, "grad_norm": 0.0, "learning_rate": 1.987548907595403e-05, "loss": 1.1847, "step": 632 }, { "epoch": 0.07895721591617812, "grad_norm": 0.0, "learning_rate": 1.9874852710439252e-05, "loss": 1.1965, "step": 633 }, { "epoch": 0.07908195085443433, "grad_norm": 0.0, "learning_rate": 1.987421473309957e-05, "loss": 1.1344, "step": 634 }, { "epoch": 0.07920668579269054, "grad_norm": 0.0, "learning_rate": 1.9873575144039115e-05, "loss": 1.2201, "step": 635 }, { "epoch": 0.07933142073094673, "grad_norm": 0.0, "learning_rate": 1.9872933943362285e-05, "loss": 1.1869, "step": 636 }, { "epoch": 0.07945615566920294, "grad_norm": 0.0, "learning_rate": 1.9872291131173743e-05, "loss": 1.1909, "step": 637 }, { "epoch": 0.07958089060745915, "grad_norm": 0.0, "learning_rate": 1.9871646707578406e-05, "loss": 1.2021, "step": 638 }, { "epoch": 0.07970562554571535, "grad_norm": 0.0, "learning_rate": 1.9871000672681468e-05, "loss": 1.1743, "step": 639 }, { "epoch": 0.07983036048397156, "grad_norm": 0.0, "learning_rate": 1.9870353026588368e-05, "loss": 1.2263, "step": 640 }, { "epoch": 0.07995509542222777, "grad_norm": 0.0, "learning_rate": 1.986970376940483e-05, "loss": 1.2064, "step": 641 }, { "epoch": 0.08007983036048397, "grad_norm": 0.0, "learning_rate": 1.986905290123682e-05, "loss": 1.1981, "step": 642 }, { "epoch": 0.08020456529874018, "grad_norm": 0.0, "learning_rate": 1.9868400422190577e-05, "loss": 1.1763, "step": 643 }, { "epoch": 0.08032930023699639, "grad_norm": 0.0, "learning_rate": 1.9867746332372607e-05, "loss": 1.1863, "step": 644 }, { "epoch": 0.08045403517525258, "grad_norm": 0.0, "learning_rate": 1.9867090631889666e-05, "loss": 1.1505, "step": 645 }, { "epoch": 0.0805787701135088, "grad_norm": 0.0, "learning_rate": 1.9866433320848793e-05, "loss": 1.1888, "step": 646 }, { "epoch": 0.080703505051765, "grad_norm": 0.0, "learning_rate": 1.986577439935727e-05, "loss": 1.2263, "step": 647 }, { "epoch": 0.0808282399900212, "grad_norm": 0.0, "learning_rate": 1.9865113867522643e-05, "loss": 1.1926, "step": 648 }, { "epoch": 0.08095297492827741, "grad_norm": 0.0, "learning_rate": 1.986445172545274e-05, "loss": 1.2014, "step": 649 }, { "epoch": 0.08107770986653362, "grad_norm": 0.0, "learning_rate": 1.9863787973255634e-05, "loss": 1.1607, "step": 650 }, { "epoch": 0.08120244480478982, "grad_norm": 0.0, "learning_rate": 1.9863122611039664e-05, "loss": 1.1933, "step": 651 }, { "epoch": 0.08132717974304603, "grad_norm": 0.0, "learning_rate": 1.9862455638913437e-05, "loss": 1.2343, "step": 652 }, { "epoch": 0.08145191468130224, "grad_norm": 0.0, "learning_rate": 1.9861787056985823e-05, "loss": 1.2011, "step": 653 }, { "epoch": 0.08157664961955843, "grad_norm": 0.0, "learning_rate": 1.986111686536594e-05, "loss": 1.1979, "step": 654 }, { "epoch": 0.08170138455781464, "grad_norm": 0.0, "learning_rate": 1.9860445064163193e-05, "loss": 1.2124, "step": 655 }, { "epoch": 0.08182611949607085, "grad_norm": 0.0, "learning_rate": 1.985977165348723e-05, "loss": 1.1718, "step": 656 }, { "epoch": 0.08195085443432705, "grad_norm": 0.0, "learning_rate": 1.9859096633447965e-05, "loss": 1.1924, "step": 657 }, { "epoch": 0.08207558937258326, "grad_norm": 0.0, "learning_rate": 1.985842000415559e-05, "loss": 1.1591, "step": 658 }, { "epoch": 0.08220032431083947, "grad_norm": 0.0, "learning_rate": 1.9857741765720536e-05, "loss": 1.1977, "step": 659 }, { "epoch": 0.08232505924909567, "grad_norm": 0.0, "learning_rate": 1.9857061918253514e-05, "loss": 1.2072, "step": 660 }, { "epoch": 0.08244979418735188, "grad_norm": 0.0, "learning_rate": 1.985638046186549e-05, "loss": 1.2318, "step": 661 }, { "epoch": 0.08257452912560809, "grad_norm": 0.0, "learning_rate": 1.98556973966677e-05, "loss": 1.1452, "step": 662 }, { "epoch": 0.08269926406386428, "grad_norm": 0.0, "learning_rate": 1.9855012722771637e-05, "loss": 1.1579, "step": 663 }, { "epoch": 0.0828239990021205, "grad_norm": 0.0, "learning_rate": 1.985432644028905e-05, "loss": 1.2145, "step": 664 }, { "epoch": 0.0829487339403767, "grad_norm": 0.0, "learning_rate": 1.9853638549331962e-05, "loss": 1.1775, "step": 665 }, { "epoch": 0.0830734688786329, "grad_norm": 0.0, "learning_rate": 1.9852949050012655e-05, "loss": 1.1882, "step": 666 }, { "epoch": 0.08319820381688911, "grad_norm": 0.0, "learning_rate": 1.9852257942443672e-05, "loss": 1.1849, "step": 667 }, { "epoch": 0.08332293875514532, "grad_norm": 0.0, "learning_rate": 1.9851565226737817e-05, "loss": 1.2077, "step": 668 }, { "epoch": 0.08344767369340152, "grad_norm": 0.0, "learning_rate": 1.9850870903008162e-05, "loss": 1.192, "step": 669 }, { "epoch": 0.08357240863165773, "grad_norm": 0.0, "learning_rate": 1.9850174971368033e-05, "loss": 1.2045, "step": 670 }, { "epoch": 0.08369714356991394, "grad_norm": 0.0, "learning_rate": 1.9849477431931032e-05, "loss": 1.2484, "step": 671 }, { "epoch": 0.08382187850817013, "grad_norm": 0.0, "learning_rate": 1.984877828481101e-05, "loss": 1.2075, "step": 672 }, { "epoch": 0.08394661344642634, "grad_norm": 0.0, "learning_rate": 1.9848077530122083e-05, "loss": 1.1991, "step": 673 }, { "epoch": 0.08407134838468255, "grad_norm": 0.0, "learning_rate": 1.984737516797863e-05, "loss": 1.1903, "step": 674 }, { "epoch": 0.08419608332293875, "grad_norm": 0.0, "learning_rate": 1.984667119849531e-05, "loss": 1.1947, "step": 675 }, { "epoch": 0.08432081826119496, "grad_norm": 0.0, "learning_rate": 1.984596562178701e-05, "loss": 1.1864, "step": 676 }, { "epoch": 0.08444555319945117, "grad_norm": 0.0, "learning_rate": 1.9845258437968905e-05, "loss": 1.1992, "step": 677 }, { "epoch": 0.08457028813770737, "grad_norm": 0.0, "learning_rate": 1.9844549647156428e-05, "loss": 1.131, "step": 678 }, { "epoch": 0.08469502307596358, "grad_norm": 0.0, "learning_rate": 1.984383924946527e-05, "loss": 1.198, "step": 679 }, { "epoch": 0.08481975801421979, "grad_norm": 0.0, "learning_rate": 1.984312724501138e-05, "loss": 1.2231, "step": 680 }, { "epoch": 0.08494449295247598, "grad_norm": 0.0, "learning_rate": 1.984241363391098e-05, "loss": 1.1506, "step": 681 }, { "epoch": 0.0850692278907322, "grad_norm": 0.0, "learning_rate": 1.9841698416280554e-05, "loss": 1.1846, "step": 682 }, { "epoch": 0.0851939628289884, "grad_norm": 0.0, "learning_rate": 1.9840981592236838e-05, "loss": 1.2164, "step": 683 }, { "epoch": 0.0853186977672446, "grad_norm": 0.0, "learning_rate": 1.9840263161896832e-05, "loss": 1.2046, "step": 684 }, { "epoch": 0.08544343270550081, "grad_norm": 0.0, "learning_rate": 1.983954312537781e-05, "loss": 1.1409, "step": 685 }, { "epoch": 0.08556816764375702, "grad_norm": 0.0, "learning_rate": 1.9838821482797295e-05, "loss": 1.2094, "step": 686 }, { "epoch": 0.08569290258201322, "grad_norm": 0.0, "learning_rate": 1.9838098234273078e-05, "loss": 1.1367, "step": 687 }, { "epoch": 0.08581763752026943, "grad_norm": 0.0, "learning_rate": 1.9837373379923212e-05, "loss": 1.1882, "step": 688 }, { "epoch": 0.08594237245852564, "grad_norm": 0.0, "learning_rate": 1.9836646919866012e-05, "loss": 1.1634, "step": 689 }, { "epoch": 0.08606710739678183, "grad_norm": 0.0, "learning_rate": 1.9835918854220056e-05, "loss": 1.169, "step": 690 }, { "epoch": 0.08619184233503804, "grad_norm": 0.0, "learning_rate": 1.983518918310418e-05, "loss": 1.1628, "step": 691 }, { "epoch": 0.08631657727329425, "grad_norm": 0.0, "learning_rate": 1.9834457906637484e-05, "loss": 1.1784, "step": 692 }, { "epoch": 0.08644131221155045, "grad_norm": 0.0, "learning_rate": 1.9833725024939335e-05, "loss": 1.1805, "step": 693 }, { "epoch": 0.08656604714980666, "grad_norm": 0.0, "learning_rate": 1.9832990538129353e-05, "loss": 1.2038, "step": 694 }, { "epoch": 0.08669078208806287, "grad_norm": 0.0, "learning_rate": 1.9832254446327427e-05, "loss": 1.1686, "step": 695 }, { "epoch": 0.08681551702631907, "grad_norm": 0.0, "learning_rate": 1.9831516749653708e-05, "loss": 1.1612, "step": 696 }, { "epoch": 0.08694025196457528, "grad_norm": 0.0, "learning_rate": 1.9830777448228603e-05, "loss": 1.1744, "step": 697 }, { "epoch": 0.08706498690283149, "grad_norm": 0.0, "learning_rate": 1.9830036542172783e-05, "loss": 1.1868, "step": 698 }, { "epoch": 0.08718972184108768, "grad_norm": 0.0, "learning_rate": 1.982929403160719e-05, "loss": 1.207, "step": 699 }, { "epoch": 0.0873144567793439, "grad_norm": 0.0, "learning_rate": 1.9828549916653013e-05, "loss": 1.189, "step": 700 }, { "epoch": 0.0874391917176001, "grad_norm": 0.0, "learning_rate": 1.9827804197431716e-05, "loss": 1.2008, "step": 701 }, { "epoch": 0.0875639266558563, "grad_norm": 0.0, "learning_rate": 1.9827056874065013e-05, "loss": 1.2109, "step": 702 }, { "epoch": 0.08768866159411251, "grad_norm": 0.0, "learning_rate": 1.982630794667489e-05, "loss": 1.1947, "step": 703 }, { "epoch": 0.08781339653236872, "grad_norm": 0.0, "learning_rate": 1.9825557415383593e-05, "loss": 1.1593, "step": 704 }, { "epoch": 0.08793813147062492, "grad_norm": 0.0, "learning_rate": 1.9824805280313625e-05, "loss": 1.2243, "step": 705 }, { "epoch": 0.08806286640888113, "grad_norm": 0.0, "learning_rate": 1.982405154158775e-05, "loss": 1.177, "step": 706 }, { "epoch": 0.08818760134713734, "grad_norm": 0.0, "learning_rate": 1.9823296199329004e-05, "loss": 1.1767, "step": 707 }, { "epoch": 0.08831233628539353, "grad_norm": 0.0, "learning_rate": 1.982253925366067e-05, "loss": 1.2191, "step": 708 }, { "epoch": 0.08843707122364974, "grad_norm": 0.0, "learning_rate": 1.982178070470631e-05, "loss": 1.178, "step": 709 }, { "epoch": 0.08856180616190595, "grad_norm": 0.0, "learning_rate": 1.982102055258973e-05, "loss": 1.1719, "step": 710 }, { "epoch": 0.08868654110016215, "grad_norm": 0.0, "learning_rate": 1.982025879743501e-05, "loss": 1.2379, "step": 711 }, { "epoch": 0.08881127603841836, "grad_norm": 0.0, "learning_rate": 1.9819495439366488e-05, "loss": 1.165, "step": 712 }, { "epoch": 0.08893601097667457, "grad_norm": 0.0, "learning_rate": 1.9818730478508762e-05, "loss": 1.2231, "step": 713 }, { "epoch": 0.08906074591493077, "grad_norm": 0.0, "learning_rate": 1.9817963914986692e-05, "loss": 1.2211, "step": 714 }, { "epoch": 0.08918548085318698, "grad_norm": 0.0, "learning_rate": 1.98171957489254e-05, "loss": 1.2188, "step": 715 }, { "epoch": 0.08931021579144319, "grad_norm": 0.0, "learning_rate": 1.9816425980450273e-05, "loss": 1.1769, "step": 716 }, { "epoch": 0.08943495072969938, "grad_norm": 0.0, "learning_rate": 1.9815654609686955e-05, "loss": 1.2183, "step": 717 }, { "epoch": 0.0895596856679556, "grad_norm": 0.0, "learning_rate": 1.9814881636761355e-05, "loss": 1.1766, "step": 718 }, { "epoch": 0.0896844206062118, "grad_norm": 0.0, "learning_rate": 1.9814107061799642e-05, "loss": 1.2082, "step": 719 }, { "epoch": 0.089809155544468, "grad_norm": 0.0, "learning_rate": 1.981333088492824e-05, "loss": 1.1694, "step": 720 }, { "epoch": 0.08993389048272421, "grad_norm": 0.0, "learning_rate": 1.9812553106273848e-05, "loss": 1.1217, "step": 721 }, { "epoch": 0.09005862542098042, "grad_norm": 0.0, "learning_rate": 1.9811773725963416e-05, "loss": 1.2319, "step": 722 }, { "epoch": 0.09018336035923662, "grad_norm": 0.0, "learning_rate": 1.9810992744124155e-05, "loss": 1.2042, "step": 723 }, { "epoch": 0.09030809529749283, "grad_norm": 0.0, "learning_rate": 1.981021016088355e-05, "loss": 1.1952, "step": 724 }, { "epoch": 0.09043283023574904, "grad_norm": 0.0, "learning_rate": 1.9809425976369327e-05, "loss": 1.1862, "step": 725 }, { "epoch": 0.09055756517400523, "grad_norm": 0.0, "learning_rate": 1.9808640190709493e-05, "loss": 1.2151, "step": 726 }, { "epoch": 0.09068230011226144, "grad_norm": 0.0, "learning_rate": 1.9807852804032306e-05, "loss": 1.1803, "step": 727 }, { "epoch": 0.09080703505051765, "grad_norm": 0.0, "learning_rate": 1.9807063816466287e-05, "loss": 1.1787, "step": 728 }, { "epoch": 0.09093176998877385, "grad_norm": 0.0, "learning_rate": 1.9806273228140216e-05, "loss": 1.1207, "step": 729 }, { "epoch": 0.09105650492703006, "grad_norm": 0.0, "learning_rate": 1.9805481039183144e-05, "loss": 1.1974, "step": 730 }, { "epoch": 0.09118123986528627, "grad_norm": 0.0, "learning_rate": 1.9804687249724366e-05, "loss": 1.1583, "step": 731 }, { "epoch": 0.09130597480354247, "grad_norm": 0.0, "learning_rate": 1.980389185989346e-05, "loss": 1.1604, "step": 732 }, { "epoch": 0.09143070974179868, "grad_norm": 0.0, "learning_rate": 1.980309486982024e-05, "loss": 1.1466, "step": 733 }, { "epoch": 0.09155544468005489, "grad_norm": 0.0, "learning_rate": 1.980229627963481e-05, "loss": 1.1974, "step": 734 }, { "epoch": 0.09168017961831108, "grad_norm": 0.0, "learning_rate": 1.980149608946751e-05, "loss": 1.1596, "step": 735 }, { "epoch": 0.0918049145565673, "grad_norm": 0.0, "learning_rate": 1.980069429944895e-05, "loss": 1.1762, "step": 736 }, { "epoch": 0.0919296494948235, "grad_norm": 0.0, "learning_rate": 1.9799890909710013e-05, "loss": 1.1832, "step": 737 }, { "epoch": 0.0920543844330797, "grad_norm": 0.0, "learning_rate": 1.9799085920381823e-05, "loss": 1.1881, "step": 738 }, { "epoch": 0.09217911937133591, "grad_norm": 0.0, "learning_rate": 1.9798279331595776e-05, "loss": 1.2147, "step": 739 }, { "epoch": 0.09230385430959212, "grad_norm": 0.0, "learning_rate": 1.979747114348353e-05, "loss": 1.1695, "step": 740 }, { "epoch": 0.09242858924784832, "grad_norm": 0.0, "learning_rate": 1.9796661356177e-05, "loss": 1.1387, "step": 741 }, { "epoch": 0.09255332418610453, "grad_norm": 0.0, "learning_rate": 1.9795849969808366e-05, "loss": 1.1421, "step": 742 }, { "epoch": 0.09267805912436074, "grad_norm": 0.0, "learning_rate": 1.9795036984510062e-05, "loss": 1.2062, "step": 743 }, { "epoch": 0.09280279406261693, "grad_norm": 0.0, "learning_rate": 1.9794222400414793e-05, "loss": 1.1871, "step": 744 }, { "epoch": 0.09292752900087314, "grad_norm": 0.0, "learning_rate": 1.9793406217655516e-05, "loss": 1.2336, "step": 745 }, { "epoch": 0.09305226393912935, "grad_norm": 0.0, "learning_rate": 1.9792588436365453e-05, "loss": 1.1676, "step": 746 }, { "epoch": 0.09317699887738555, "grad_norm": 0.0, "learning_rate": 1.9791769056678093e-05, "loss": 1.1769, "step": 747 }, { "epoch": 0.09330173381564176, "grad_norm": 0.0, "learning_rate": 1.9790948078727167e-05, "loss": 1.2313, "step": 748 }, { "epoch": 0.09342646875389797, "grad_norm": 0.0, "learning_rate": 1.9790125502646692e-05, "loss": 1.1524, "step": 749 }, { "epoch": 0.09355120369215417, "grad_norm": 0.0, "learning_rate": 1.978930132857092e-05, "loss": 1.1907, "step": 750 }, { "epoch": 0.09367593863041038, "grad_norm": 0.0, "learning_rate": 1.978847555663439e-05, "loss": 1.15, "step": 751 }, { "epoch": 0.09380067356866659, "grad_norm": 0.0, "learning_rate": 1.9787648186971882e-05, "loss": 1.2393, "step": 752 }, { "epoch": 0.09392540850692278, "grad_norm": 0.0, "learning_rate": 1.9786819219718443e-05, "loss": 1.1828, "step": 753 }, { "epoch": 0.094050143445179, "grad_norm": 0.0, "learning_rate": 1.9785988655009386e-05, "loss": 1.1817, "step": 754 }, { "epoch": 0.0941748783834352, "grad_norm": 0.0, "learning_rate": 1.9785156492980275e-05, "loss": 1.1761, "step": 755 }, { "epoch": 0.0942996133216914, "grad_norm": 0.0, "learning_rate": 1.9784322733766942e-05, "loss": 1.1775, "step": 756 }, { "epoch": 0.09442434825994761, "grad_norm": 0.0, "learning_rate": 1.9783487377505477e-05, "loss": 1.1375, "step": 757 }, { "epoch": 0.09454908319820382, "grad_norm": 0.0, "learning_rate": 1.9782650424332234e-05, "loss": 1.1846, "step": 758 }, { "epoch": 0.09467381813646002, "grad_norm": 0.0, "learning_rate": 1.9781811874383822e-05, "loss": 1.1158, "step": 759 }, { "epoch": 0.09479855307471623, "grad_norm": 0.0, "learning_rate": 1.9780971727797112e-05, "loss": 1.1422, "step": 760 }, { "epoch": 0.09492328801297244, "grad_norm": 0.0, "learning_rate": 1.978012998470924e-05, "loss": 1.1235, "step": 761 }, { "epoch": 0.09504802295122863, "grad_norm": 0.0, "learning_rate": 1.9779286645257597e-05, "loss": 1.1921, "step": 762 }, { "epoch": 0.09517275788948484, "grad_norm": 0.0, "learning_rate": 1.977844170957984e-05, "loss": 1.156, "step": 763 }, { "epoch": 0.09529749282774105, "grad_norm": 0.0, "learning_rate": 1.9777595177813884e-05, "loss": 1.2195, "step": 764 }, { "epoch": 0.09542222776599725, "grad_norm": 0.0, "learning_rate": 1.9776747050097908e-05, "loss": 1.1604, "step": 765 }, { "epoch": 0.09554696270425346, "grad_norm": 0.0, "learning_rate": 1.9775897326570334e-05, "loss": 1.1834, "step": 766 }, { "epoch": 0.09567169764250967, "grad_norm": 0.0, "learning_rate": 1.977504600736987e-05, "loss": 1.1632, "step": 767 }, { "epoch": 0.09579643258076587, "grad_norm": 0.0, "learning_rate": 1.9774193092635476e-05, "loss": 1.1789, "step": 768 }, { "epoch": 0.09592116751902208, "grad_norm": 0.0, "learning_rate": 1.9773338582506357e-05, "loss": 1.1508, "step": 769 }, { "epoch": 0.09604590245727829, "grad_norm": 0.0, "learning_rate": 1.9772482477121995e-05, "loss": 1.1376, "step": 770 }, { "epoch": 0.09617063739553448, "grad_norm": 0.0, "learning_rate": 1.977162477662214e-05, "loss": 1.1842, "step": 771 }, { "epoch": 0.0962953723337907, "grad_norm": 0.0, "learning_rate": 1.977076548114677e-05, "loss": 1.1817, "step": 772 }, { "epoch": 0.0964201072720469, "grad_norm": 0.0, "learning_rate": 1.976990459083616e-05, "loss": 1.1416, "step": 773 }, { "epoch": 0.0965448422103031, "grad_norm": 0.0, "learning_rate": 1.976904210583082e-05, "loss": 1.1573, "step": 774 }, { "epoch": 0.09666957714855931, "grad_norm": 0.0, "learning_rate": 1.976817802627153e-05, "loss": 1.1388, "step": 775 }, { "epoch": 0.09679431208681552, "grad_norm": 0.0, "learning_rate": 1.9767312352299335e-05, "loss": 1.0713, "step": 776 }, { "epoch": 0.09691904702507172, "grad_norm": 0.0, "learning_rate": 1.9766445084055532e-05, "loss": 1.1738, "step": 777 }, { "epoch": 0.09704378196332793, "grad_norm": 0.0, "learning_rate": 1.976557622168168e-05, "loss": 1.1918, "step": 778 }, { "epoch": 0.09716851690158414, "grad_norm": 0.0, "learning_rate": 1.9764705765319602e-05, "loss": 1.1734, "step": 779 }, { "epoch": 0.09729325183984033, "grad_norm": 0.0, "learning_rate": 1.9763833715111373e-05, "loss": 1.2237, "step": 780 }, { "epoch": 0.09741798677809654, "grad_norm": 0.0, "learning_rate": 1.9762960071199334e-05, "loss": 1.1747, "step": 781 }, { "epoch": 0.09754272171635275, "grad_norm": 0.0, "learning_rate": 1.9762084833726095e-05, "loss": 1.1552, "step": 782 }, { "epoch": 0.09766745665460895, "grad_norm": 0.0, "learning_rate": 1.9761208002834506e-05, "loss": 1.1713, "step": 783 }, { "epoch": 0.09779219159286516, "grad_norm": 0.0, "learning_rate": 1.9760329578667696e-05, "loss": 1.223, "step": 784 }, { "epoch": 0.09791692653112137, "grad_norm": 0.0, "learning_rate": 1.9759449561369036e-05, "loss": 1.2293, "step": 785 }, { "epoch": 0.09804166146937757, "grad_norm": 0.0, "learning_rate": 1.975856795108218e-05, "loss": 1.1682, "step": 786 }, { "epoch": 0.09816639640763378, "grad_norm": 0.0, "learning_rate": 1.975768474795102e-05, "loss": 1.1513, "step": 787 }, { "epoch": 0.09829113134588999, "grad_norm": 0.0, "learning_rate": 1.9756799952119715e-05, "loss": 1.1646, "step": 788 }, { "epoch": 0.09841586628414618, "grad_norm": 0.0, "learning_rate": 1.9755913563732696e-05, "loss": 1.1568, "step": 789 }, { "epoch": 0.0985406012224024, "grad_norm": 0.0, "learning_rate": 1.975502558293464e-05, "loss": 1.1302, "step": 790 }, { "epoch": 0.0986653361606586, "grad_norm": 0.0, "learning_rate": 1.9754136009870478e-05, "loss": 1.1457, "step": 791 }, { "epoch": 0.0987900710989148, "grad_norm": 0.0, "learning_rate": 1.9753244844685425e-05, "loss": 1.1809, "step": 792 }, { "epoch": 0.09891480603717101, "grad_norm": 0.0, "learning_rate": 1.9752352087524934e-05, "loss": 1.1301, "step": 793 }, { "epoch": 0.09903954097542722, "grad_norm": 0.0, "learning_rate": 1.9751457738534726e-05, "loss": 1.2039, "step": 794 }, { "epoch": 0.09916427591368342, "grad_norm": 0.0, "learning_rate": 1.975056179786078e-05, "loss": 1.1862, "step": 795 }, { "epoch": 0.09928901085193963, "grad_norm": 0.0, "learning_rate": 1.9749664265649346e-05, "loss": 1.1899, "step": 796 }, { "epoch": 0.09941374579019584, "grad_norm": 0.0, "learning_rate": 1.9748765142046908e-05, "loss": 1.232, "step": 797 }, { "epoch": 0.09953848072845203, "grad_norm": 0.0, "learning_rate": 1.974786442720024e-05, "loss": 1.169, "step": 798 }, { "epoch": 0.09966321566670824, "grad_norm": 0.0, "learning_rate": 1.974696212125635e-05, "loss": 1.1332, "step": 799 }, { "epoch": 0.09978795060496445, "grad_norm": 0.0, "learning_rate": 1.9746058224362527e-05, "loss": 1.2009, "step": 800 }, { "epoch": 0.09991268554322065, "grad_norm": 0.0, "learning_rate": 1.9745152736666303e-05, "loss": 1.1968, "step": 801 }, { "epoch": 0.10003742048147686, "grad_norm": 0.0, "learning_rate": 1.9744245658315477e-05, "loss": 1.1263, "step": 802 }, { "epoch": 0.10016215541973307, "grad_norm": 0.0, "learning_rate": 1.9743336989458108e-05, "loss": 1.1061, "step": 803 }, { "epoch": 0.10028689035798927, "grad_norm": 0.0, "learning_rate": 1.9742426730242517e-05, "loss": 1.1357, "step": 804 }, { "epoch": 0.10041162529624548, "grad_norm": 0.0, "learning_rate": 1.9741514880817275e-05, "loss": 1.2657, "step": 805 }, { "epoch": 0.10053636023450169, "grad_norm": 0.0, "learning_rate": 1.9740601441331228e-05, "loss": 1.1085, "step": 806 }, { "epoch": 0.10066109517275788, "grad_norm": 0.0, "learning_rate": 1.9739686411933463e-05, "loss": 1.1648, "step": 807 }, { "epoch": 0.1007858301110141, "grad_norm": 0.0, "learning_rate": 1.9738769792773338e-05, "loss": 1.124, "step": 808 }, { "epoch": 0.1009105650492703, "grad_norm": 0.0, "learning_rate": 1.973785158400047e-05, "loss": 1.1553, "step": 809 }, { "epoch": 0.1010352999875265, "grad_norm": 0.0, "learning_rate": 1.9736931785764736e-05, "loss": 1.2189, "step": 810 }, { "epoch": 0.10116003492578271, "grad_norm": 0.0, "learning_rate": 1.9736010398216266e-05, "loss": 1.1865, "step": 811 }, { "epoch": 0.10128476986403892, "grad_norm": 0.0, "learning_rate": 1.973508742150546e-05, "loss": 1.2295, "step": 812 }, { "epoch": 0.10140950480229512, "grad_norm": 0.0, "learning_rate": 1.973416285578296e-05, "loss": 1.1547, "step": 813 }, { "epoch": 0.10153423974055133, "grad_norm": 0.0, "learning_rate": 1.9733236701199693e-05, "loss": 1.2123, "step": 814 }, { "epoch": 0.10165897467880754, "grad_norm": 0.0, "learning_rate": 1.9732308957906815e-05, "loss": 1.1651, "step": 815 }, { "epoch": 0.10178370961706373, "grad_norm": 0.0, "learning_rate": 1.9731379626055768e-05, "loss": 1.1578, "step": 816 }, { "epoch": 0.10190844455531994, "grad_norm": 0.0, "learning_rate": 1.973044870579824e-05, "loss": 1.2133, "step": 817 }, { "epoch": 0.10203317949357615, "grad_norm": 0.0, "learning_rate": 1.972951619728618e-05, "loss": 1.19, "step": 818 }, { "epoch": 0.10215791443183235, "grad_norm": 0.0, "learning_rate": 1.9728582100671797e-05, "loss": 1.2229, "step": 819 }, { "epoch": 0.10228264937008856, "grad_norm": 0.0, "learning_rate": 1.9727646416107558e-05, "loss": 1.1694, "step": 820 }, { "epoch": 0.10240738430834477, "grad_norm": 0.0, "learning_rate": 1.9726709143746194e-05, "loss": 1.1113, "step": 821 }, { "epoch": 0.10253211924660097, "grad_norm": 0.0, "learning_rate": 1.9725770283740684e-05, "loss": 1.1811, "step": 822 }, { "epoch": 0.10265685418485718, "grad_norm": 0.0, "learning_rate": 1.9724829836244282e-05, "loss": 1.1442, "step": 823 }, { "epoch": 0.10278158912311339, "grad_norm": 0.0, "learning_rate": 1.9723887801410486e-05, "loss": 1.1212, "step": 824 }, { "epoch": 0.10290632406136958, "grad_norm": 0.0, "learning_rate": 1.9722944179393068e-05, "loss": 1.141, "step": 825 }, { "epoch": 0.1030310589996258, "grad_norm": 0.0, "learning_rate": 1.972199897034604e-05, "loss": 1.1598, "step": 826 }, { "epoch": 0.103155793937882, "grad_norm": 0.0, "learning_rate": 1.9721052174423693e-05, "loss": 1.1316, "step": 827 }, { "epoch": 0.1032805288761382, "grad_norm": 0.0, "learning_rate": 1.9720103791780564e-05, "loss": 1.1224, "step": 828 }, { "epoch": 0.10340526381439441, "grad_norm": 0.0, "learning_rate": 1.9719153822571456e-05, "loss": 1.1039, "step": 829 }, { "epoch": 0.10352999875265062, "grad_norm": 0.0, "learning_rate": 1.9718202266951425e-05, "loss": 1.1315, "step": 830 }, { "epoch": 0.10365473369090682, "grad_norm": 0.0, "learning_rate": 1.9717249125075784e-05, "loss": 1.1592, "step": 831 }, { "epoch": 0.10377946862916303, "grad_norm": 0.0, "learning_rate": 1.971629439710012e-05, "loss": 1.1478, "step": 832 }, { "epoch": 0.10390420356741924, "grad_norm": 0.0, "learning_rate": 1.971533808318027e-05, "loss": 1.2011, "step": 833 }, { "epoch": 0.10402893850567543, "grad_norm": 0.0, "learning_rate": 1.9714380183472314e-05, "loss": 1.1466, "step": 834 }, { "epoch": 0.10415367344393164, "grad_norm": 0.0, "learning_rate": 1.9713420698132614e-05, "loss": 1.1855, "step": 835 }, { "epoch": 0.10427840838218785, "grad_norm": 0.0, "learning_rate": 1.9712459627317787e-05, "loss": 1.0987, "step": 836 }, { "epoch": 0.10440314332044405, "grad_norm": 0.0, "learning_rate": 1.9711496971184696e-05, "loss": 1.1396, "step": 837 }, { "epoch": 0.10452787825870026, "grad_norm": 0.0, "learning_rate": 1.9710532729890474e-05, "loss": 1.1682, "step": 838 }, { "epoch": 0.10465261319695647, "grad_norm": 0.0, "learning_rate": 1.970956690359251e-05, "loss": 1.1401, "step": 839 }, { "epoch": 0.10477734813521267, "grad_norm": 0.0, "learning_rate": 1.970859949244845e-05, "loss": 1.1691, "step": 840 }, { "epoch": 0.10490208307346888, "grad_norm": 0.0, "learning_rate": 1.9707630496616203e-05, "loss": 1.1299, "step": 841 }, { "epoch": 0.10502681801172509, "grad_norm": 0.0, "learning_rate": 1.9706659916253927e-05, "loss": 1.1951, "step": 842 }, { "epoch": 0.10515155294998128, "grad_norm": 0.0, "learning_rate": 1.970568775152005e-05, "loss": 1.1178, "step": 843 }, { "epoch": 0.1052762878882375, "grad_norm": 0.0, "learning_rate": 1.9704714002573255e-05, "loss": 1.18, "step": 844 }, { "epoch": 0.1054010228264937, "grad_norm": 0.0, "learning_rate": 1.970373866957248e-05, "loss": 1.1635, "step": 845 }, { "epoch": 0.1055257577647499, "grad_norm": 0.0, "learning_rate": 1.9702761752676923e-05, "loss": 1.1217, "step": 846 }, { "epoch": 0.10565049270300611, "grad_norm": 0.0, "learning_rate": 1.9701783252046043e-05, "loss": 1.1174, "step": 847 }, { "epoch": 0.10577522764126232, "grad_norm": 0.0, "learning_rate": 1.9700803167839556e-05, "loss": 1.1561, "step": 848 }, { "epoch": 0.10589996257951852, "grad_norm": 0.0, "learning_rate": 1.9699821500217436e-05, "loss": 1.1074, "step": 849 }, { "epoch": 0.10602469751777473, "grad_norm": 0.0, "learning_rate": 1.969883824933992e-05, "loss": 1.1841, "step": 850 }, { "epoch": 0.10614943245603094, "grad_norm": 0.0, "learning_rate": 1.9697853415367486e-05, "loss": 1.2502, "step": 851 }, { "epoch": 0.10627416739428713, "grad_norm": 0.0, "learning_rate": 1.9696866998460898e-05, "loss": 1.1191, "step": 852 }, { "epoch": 0.10639890233254334, "grad_norm": 0.0, "learning_rate": 1.9695878998781162e-05, "loss": 1.1336, "step": 853 }, { "epoch": 0.10652363727079955, "grad_norm": 0.0, "learning_rate": 1.969488941648954e-05, "loss": 1.1342, "step": 854 }, { "epoch": 0.10664837220905575, "grad_norm": 0.0, "learning_rate": 1.969389825174756e-05, "loss": 1.156, "step": 855 }, { "epoch": 0.10677310714731196, "grad_norm": 0.0, "learning_rate": 1.9692905504717e-05, "loss": 1.2362, "step": 856 }, { "epoch": 0.10689784208556817, "grad_norm": 0.0, "learning_rate": 1.9691911175559905e-05, "loss": 1.1404, "step": 857 }, { "epoch": 0.10702257702382437, "grad_norm": 0.0, "learning_rate": 1.9690915264438576e-05, "loss": 1.1473, "step": 858 }, { "epoch": 0.10714731196208058, "grad_norm": 0.0, "learning_rate": 1.968991777151557e-05, "loss": 1.1728, "step": 859 }, { "epoch": 0.10727204690033679, "grad_norm": 0.0, "learning_rate": 1.96889186969537e-05, "loss": 1.2058, "step": 860 }, { "epoch": 0.10739678183859298, "grad_norm": 0.0, "learning_rate": 1.9687918040916046e-05, "loss": 1.1978, "step": 861 }, { "epoch": 0.1075215167768492, "grad_norm": 0.0, "learning_rate": 1.9686915803565934e-05, "loss": 1.1532, "step": 862 }, { "epoch": 0.1076462517151054, "grad_norm": 0.0, "learning_rate": 1.9685911985066955e-05, "loss": 1.1667, "step": 863 }, { "epoch": 0.1077709866533616, "grad_norm": 0.0, "learning_rate": 1.9684906585582965e-05, "loss": 1.1577, "step": 864 }, { "epoch": 0.10789572159161781, "grad_norm": 0.0, "learning_rate": 1.9683899605278062e-05, "loss": 1.1359, "step": 865 }, { "epoch": 0.10802045652987402, "grad_norm": 0.0, "learning_rate": 1.968289104431661e-05, "loss": 1.1571, "step": 866 }, { "epoch": 0.10814519146813022, "grad_norm": 0.0, "learning_rate": 1.968188090286324e-05, "loss": 1.1749, "step": 867 }, { "epoch": 0.10826992640638643, "grad_norm": 0.0, "learning_rate": 1.968086918108283e-05, "loss": 1.1034, "step": 868 }, { "epoch": 0.10839466134464264, "grad_norm": 0.0, "learning_rate": 1.9679855879140512e-05, "loss": 1.1777, "step": 869 }, { "epoch": 0.10851939628289883, "grad_norm": 0.0, "learning_rate": 1.967884099720169e-05, "loss": 1.1433, "step": 870 }, { "epoch": 0.10864413122115504, "grad_norm": 0.0, "learning_rate": 1.9677824535432012e-05, "loss": 1.1396, "step": 871 }, { "epoch": 0.10876886615941125, "grad_norm": 0.0, "learning_rate": 1.9676806493997395e-05, "loss": 1.1601, "step": 872 }, { "epoch": 0.10889360109766745, "grad_norm": 0.0, "learning_rate": 1.9675786873064005e-05, "loss": 1.1752, "step": 873 }, { "epoch": 0.10901833603592366, "grad_norm": 0.0, "learning_rate": 1.9674765672798274e-05, "loss": 1.194, "step": 874 }, { "epoch": 0.10914307097417987, "grad_norm": 0.0, "learning_rate": 1.9673742893366885e-05, "loss": 1.1336, "step": 875 }, { "epoch": 0.10926780591243607, "grad_norm": 0.0, "learning_rate": 1.9672718534936785e-05, "loss": 1.0703, "step": 876 }, { "epoch": 0.10939254085069228, "grad_norm": 0.0, "learning_rate": 1.9671692597675167e-05, "loss": 1.1487, "step": 877 }, { "epoch": 0.10951727578894849, "grad_norm": 0.0, "learning_rate": 1.9670665081749497e-05, "loss": 1.1387, "step": 878 }, { "epoch": 0.10964201072720468, "grad_norm": 0.0, "learning_rate": 1.966963598732749e-05, "loss": 1.1354, "step": 879 }, { "epoch": 0.1097667456654609, "grad_norm": 0.0, "learning_rate": 1.9668605314577124e-05, "loss": 1.1581, "step": 880 }, { "epoch": 0.1098914806037171, "grad_norm": 0.0, "learning_rate": 1.9667573063666622e-05, "loss": 1.1854, "step": 881 }, { "epoch": 0.1100162155419733, "grad_norm": 0.0, "learning_rate": 1.966653923476448e-05, "loss": 1.1091, "step": 882 }, { "epoch": 0.11014095048022951, "grad_norm": 0.0, "learning_rate": 1.966550382803944e-05, "loss": 1.1012, "step": 883 }, { "epoch": 0.11026568541848572, "grad_norm": 0.0, "learning_rate": 1.966446684366051e-05, "loss": 1.1457, "step": 884 }, { "epoch": 0.11039042035674192, "grad_norm": 0.0, "learning_rate": 1.9663428281796954e-05, "loss": 1.0981, "step": 885 }, { "epoch": 0.11051515529499813, "grad_norm": 0.0, "learning_rate": 1.9662388142618285e-05, "loss": 1.1329, "step": 886 }, { "epoch": 0.11063989023325434, "grad_norm": 0.0, "learning_rate": 1.9661346426294284e-05, "loss": 1.1448, "step": 887 }, { "epoch": 0.11076462517151053, "grad_norm": 0.0, "learning_rate": 1.9660303132994988e-05, "loss": 1.166, "step": 888 }, { "epoch": 0.11088936010976674, "grad_norm": 0.0, "learning_rate": 1.9659258262890683e-05, "loss": 1.1692, "step": 889 }, { "epoch": 0.11101409504802295, "grad_norm": 0.0, "learning_rate": 1.9658211816151923e-05, "loss": 1.1055, "step": 890 }, { "epoch": 0.11113882998627915, "grad_norm": 0.0, "learning_rate": 1.965716379294951e-05, "loss": 1.1489, "step": 891 }, { "epoch": 0.11126356492453536, "grad_norm": 0.0, "learning_rate": 1.9656114193454514e-05, "loss": 1.1968, "step": 892 }, { "epoch": 0.11138829986279157, "grad_norm": 0.0, "learning_rate": 1.9655063017838253e-05, "loss": 1.1433, "step": 893 }, { "epoch": 0.11151303480104777, "grad_norm": 0.0, "learning_rate": 1.96540102662723e-05, "loss": 1.1445, "step": 894 }, { "epoch": 0.11163776973930398, "grad_norm": 0.0, "learning_rate": 1.96529559389285e-05, "loss": 1.1156, "step": 895 }, { "epoch": 0.11176250467756019, "grad_norm": 0.0, "learning_rate": 1.9651900035978938e-05, "loss": 1.1377, "step": 896 }, { "epoch": 0.11188723961581638, "grad_norm": 0.0, "learning_rate": 1.9650842557595968e-05, "loss": 1.1505, "step": 897 }, { "epoch": 0.1120119745540726, "grad_norm": 0.0, "learning_rate": 1.9649783503952196e-05, "loss": 1.1513, "step": 898 }, { "epoch": 0.1121367094923288, "grad_norm": 0.0, "learning_rate": 1.9648722875220487e-05, "loss": 1.1648, "step": 899 }, { "epoch": 0.112261444430585, "grad_norm": 0.0, "learning_rate": 1.9647660671573957e-05, "loss": 1.2138, "step": 900 }, { "epoch": 0.11238617936884121, "grad_norm": 0.0, "learning_rate": 1.9646596893185995e-05, "loss": 1.124, "step": 901 }, { "epoch": 0.11251091430709742, "grad_norm": 0.0, "learning_rate": 1.964553154023023e-05, "loss": 1.1405, "step": 902 }, { "epoch": 0.11263564924535362, "grad_norm": 0.0, "learning_rate": 1.9644464612880555e-05, "loss": 1.132, "step": 903 }, { "epoch": 0.11276038418360983, "grad_norm": 0.0, "learning_rate": 1.964339611131112e-05, "loss": 1.2382, "step": 904 }, { "epoch": 0.11288511912186604, "grad_norm": 0.0, "learning_rate": 1.964232603569633e-05, "loss": 1.146, "step": 905 }, { "epoch": 0.11300985406012223, "grad_norm": 0.0, "learning_rate": 1.9641254386210847e-05, "loss": 1.1538, "step": 906 }, { "epoch": 0.11313458899837844, "grad_norm": 0.0, "learning_rate": 1.9640181163029597e-05, "loss": 1.1144, "step": 907 }, { "epoch": 0.11325932393663465, "grad_norm": 0.0, "learning_rate": 1.9639106366327753e-05, "loss": 1.1154, "step": 908 }, { "epoch": 0.11338405887489085, "grad_norm": 0.0, "learning_rate": 1.963802999628075e-05, "loss": 1.1006, "step": 909 }, { "epoch": 0.11350879381314706, "grad_norm": 0.0, "learning_rate": 1.9636952053064277e-05, "loss": 1.1512, "step": 910 }, { "epoch": 0.11363352875140327, "grad_norm": 0.0, "learning_rate": 1.9635872536854283e-05, "loss": 1.129, "step": 911 }, { "epoch": 0.11375826368965947, "grad_norm": 0.0, "learning_rate": 1.9634791447826972e-05, "loss": 1.1462, "step": 912 }, { "epoch": 0.11388299862791568, "grad_norm": 0.0, "learning_rate": 1.9633708786158803e-05, "loss": 1.1263, "step": 913 }, { "epoch": 0.11400773356617189, "grad_norm": 0.0, "learning_rate": 1.96326245520265e-05, "loss": 1.181, "step": 914 }, { "epoch": 0.11413246850442808, "grad_norm": 0.0, "learning_rate": 1.963153874560703e-05, "loss": 1.1162, "step": 915 }, { "epoch": 0.1142572034426843, "grad_norm": 0.0, "learning_rate": 1.963045136707763e-05, "loss": 1.1803, "step": 916 }, { "epoch": 0.1143819383809405, "grad_norm": 0.0, "learning_rate": 1.962936241661578e-05, "loss": 1.1123, "step": 917 }, { "epoch": 0.1145066733191967, "grad_norm": 0.0, "learning_rate": 1.9628271894399233e-05, "loss": 1.1047, "step": 918 }, { "epoch": 0.11463140825745291, "grad_norm": 0.0, "learning_rate": 1.9627179800605984e-05, "loss": 1.1256, "step": 919 }, { "epoch": 0.11475614319570912, "grad_norm": 0.0, "learning_rate": 1.962608613541429e-05, "loss": 1.1491, "step": 920 }, { "epoch": 0.11488087813396532, "grad_norm": 0.0, "learning_rate": 1.9624990899002666e-05, "loss": 1.1099, "step": 921 }, { "epoch": 0.11500561307222153, "grad_norm": 0.0, "learning_rate": 1.9623894091549886e-05, "loss": 1.1712, "step": 922 }, { "epoch": 0.11513034801047774, "grad_norm": 0.0, "learning_rate": 1.9622795713234973e-05, "loss": 1.0951, "step": 923 }, { "epoch": 0.11525508294873393, "grad_norm": 0.0, "learning_rate": 1.9621695764237206e-05, "loss": 1.1622, "step": 924 }, { "epoch": 0.11537981788699014, "grad_norm": 0.0, "learning_rate": 1.9620594244736133e-05, "loss": 1.1812, "step": 925 }, { "epoch": 0.11550455282524635, "grad_norm": 0.0, "learning_rate": 1.9619491154911545e-05, "loss": 1.153, "step": 926 }, { "epoch": 0.11562928776350256, "grad_norm": 0.0, "learning_rate": 1.9618386494943493e-05, "loss": 1.1707, "step": 927 }, { "epoch": 0.11575402270175876, "grad_norm": 0.0, "learning_rate": 1.9617280265012287e-05, "loss": 1.1652, "step": 928 }, { "epoch": 0.11587875764001497, "grad_norm": 0.0, "learning_rate": 1.9616172465298492e-05, "loss": 1.1451, "step": 929 }, { "epoch": 0.11600349257827118, "grad_norm": 0.0, "learning_rate": 1.9615063095982928e-05, "loss": 1.1559, "step": 930 }, { "epoch": 0.11612822751652738, "grad_norm": 0.0, "learning_rate": 1.9613952157246674e-05, "loss": 1.1521, "step": 931 }, { "epoch": 0.11625296245478359, "grad_norm": 0.0, "learning_rate": 1.961283964927106e-05, "loss": 1.1152, "step": 932 }, { "epoch": 0.1163776973930398, "grad_norm": 0.0, "learning_rate": 1.9611725572237677e-05, "loss": 1.1444, "step": 933 }, { "epoch": 0.116502432331296, "grad_norm": 0.0, "learning_rate": 1.9610609926328373e-05, "loss": 1.1536, "step": 934 }, { "epoch": 0.1166271672695522, "grad_norm": 0.0, "learning_rate": 1.9609492711725245e-05, "loss": 1.165, "step": 935 }, { "epoch": 0.11675190220780841, "grad_norm": 0.0, "learning_rate": 1.960837392861065e-05, "loss": 1.0875, "step": 936 }, { "epoch": 0.11687663714606461, "grad_norm": 0.0, "learning_rate": 1.9607253577167206e-05, "loss": 1.1947, "step": 937 }, { "epoch": 0.11700137208432082, "grad_norm": 0.0, "learning_rate": 1.9606131657577783e-05, "loss": 1.1163, "step": 938 }, { "epoch": 0.11712610702257703, "grad_norm": 0.0, "learning_rate": 1.9605008170025498e-05, "loss": 1.1686, "step": 939 }, { "epoch": 0.11725084196083323, "grad_norm": 0.0, "learning_rate": 1.9603883114693744e-05, "loss": 1.1438, "step": 940 }, { "epoch": 0.11737557689908944, "grad_norm": 0.0, "learning_rate": 1.9602756491766154e-05, "loss": 1.1252, "step": 941 }, { "epoch": 0.11750031183734565, "grad_norm": 0.0, "learning_rate": 1.9601628301426617e-05, "loss": 1.1245, "step": 942 }, { "epoch": 0.11762504677560184, "grad_norm": 0.0, "learning_rate": 1.960049854385929e-05, "loss": 1.1531, "step": 943 }, { "epoch": 0.11774978171385805, "grad_norm": 0.0, "learning_rate": 1.959936721924857e-05, "loss": 1.1717, "step": 944 }, { "epoch": 0.11787451665211426, "grad_norm": 0.0, "learning_rate": 1.959823432777912e-05, "loss": 1.1388, "step": 945 }, { "epoch": 0.11799925159037046, "grad_norm": 0.0, "learning_rate": 1.959709986963586e-05, "loss": 1.1534, "step": 946 }, { "epoch": 0.11812398652862667, "grad_norm": 0.0, "learning_rate": 1.959596384500396e-05, "loss": 1.1587, "step": 947 }, { "epoch": 0.11824872146688288, "grad_norm": 0.0, "learning_rate": 1.9594826254068846e-05, "loss": 1.152, "step": 948 }, { "epoch": 0.11837345640513908, "grad_norm": 0.0, "learning_rate": 1.9593687097016202e-05, "loss": 1.1289, "step": 949 }, { "epoch": 0.11849819134339529, "grad_norm": 0.0, "learning_rate": 1.959254637403197e-05, "loss": 1.169, "step": 950 }, { "epoch": 0.1186229262816515, "grad_norm": 0.0, "learning_rate": 1.9591404085302345e-05, "loss": 1.1627, "step": 951 }, { "epoch": 0.1187476612199077, "grad_norm": 0.0, "learning_rate": 1.9590260231013774e-05, "loss": 1.1162, "step": 952 }, { "epoch": 0.1188723961581639, "grad_norm": 0.0, "learning_rate": 1.9589114811352962e-05, "loss": 1.1175, "step": 953 }, { "epoch": 0.11899713109642011, "grad_norm": 0.0, "learning_rate": 1.9587967826506875e-05, "loss": 1.1419, "step": 954 }, { "epoch": 0.11912186603467631, "grad_norm": 0.0, "learning_rate": 1.9586819276662728e-05, "loss": 1.1202, "step": 955 }, { "epoch": 0.11924660097293252, "grad_norm": 0.0, "learning_rate": 1.958566916200799e-05, "loss": 1.1266, "step": 956 }, { "epoch": 0.11937133591118873, "grad_norm": 0.0, "learning_rate": 1.9584517482730394e-05, "loss": 1.1601, "step": 957 }, { "epoch": 0.11949607084944493, "grad_norm": 0.0, "learning_rate": 1.9583364239017916e-05, "loss": 1.1688, "step": 958 }, { "epoch": 0.11962080578770114, "grad_norm": 0.0, "learning_rate": 1.9582209431058806e-05, "loss": 1.1367, "step": 959 }, { "epoch": 0.11974554072595735, "grad_norm": 0.0, "learning_rate": 1.9581053059041547e-05, "loss": 1.1303, "step": 960 }, { "epoch": 0.11987027566421354, "grad_norm": 0.0, "learning_rate": 1.957989512315489e-05, "loss": 1.0973, "step": 961 }, { "epoch": 0.11999501060246975, "grad_norm": 0.0, "learning_rate": 1.9578735623587843e-05, "loss": 1.1673, "step": 962 }, { "epoch": 0.12011974554072596, "grad_norm": 0.0, "learning_rate": 1.9577574560529663e-05, "loss": 1.2002, "step": 963 }, { "epoch": 0.12024448047898216, "grad_norm": 0.0, "learning_rate": 1.9576411934169867e-05, "loss": 1.1511, "step": 964 }, { "epoch": 0.12036921541723837, "grad_norm": 0.0, "learning_rate": 1.9575247744698222e-05, "loss": 1.1388, "step": 965 }, { "epoch": 0.12049395035549458, "grad_norm": 0.0, "learning_rate": 1.9574081992304753e-05, "loss": 1.09, "step": 966 }, { "epoch": 0.12061868529375078, "grad_norm": 0.0, "learning_rate": 1.9572914677179743e-05, "loss": 1.1336, "step": 967 }, { "epoch": 0.12074342023200699, "grad_norm": 0.0, "learning_rate": 1.957174579951373e-05, "loss": 1.093, "step": 968 }, { "epoch": 0.1208681551702632, "grad_norm": 0.0, "learning_rate": 1.9570575359497493e-05, "loss": 1.1816, "step": 969 }, { "epoch": 0.1209928901085194, "grad_norm": 0.0, "learning_rate": 1.956940335732209e-05, "loss": 1.1122, "step": 970 }, { "epoch": 0.1211176250467756, "grad_norm": 0.0, "learning_rate": 1.9568229793178816e-05, "loss": 1.1463, "step": 971 }, { "epoch": 0.12124235998503181, "grad_norm": 0.0, "learning_rate": 1.9567054667259222e-05, "loss": 1.142, "step": 972 }, { "epoch": 0.12136709492328801, "grad_norm": 0.0, "learning_rate": 1.9565877979755125e-05, "loss": 1.1704, "step": 973 }, { "epoch": 0.12149182986154422, "grad_norm": 0.0, "learning_rate": 1.9564699730858584e-05, "loss": 1.1838, "step": 974 }, { "epoch": 0.12161656479980043, "grad_norm": 0.0, "learning_rate": 1.9563519920761926e-05, "loss": 1.1336, "step": 975 }, { "epoch": 0.12174129973805663, "grad_norm": 0.0, "learning_rate": 1.956233854965772e-05, "loss": 1.1126, "step": 976 }, { "epoch": 0.12186603467631284, "grad_norm": 0.0, "learning_rate": 1.95611556177388e-05, "loss": 1.1157, "step": 977 }, { "epoch": 0.12199076961456905, "grad_norm": 0.0, "learning_rate": 1.9559971125198246e-05, "loss": 1.1095, "step": 978 }, { "epoch": 0.12211550455282524, "grad_norm": 0.0, "learning_rate": 1.9558785072229395e-05, "loss": 1.1709, "step": 979 }, { "epoch": 0.12224023949108145, "grad_norm": 0.0, "learning_rate": 1.955759745902585e-05, "loss": 1.1411, "step": 980 }, { "epoch": 0.12236497442933766, "grad_norm": 0.0, "learning_rate": 1.9556408285781452e-05, "loss": 1.1383, "step": 981 }, { "epoch": 0.12248970936759386, "grad_norm": 0.0, "learning_rate": 1.9555217552690308e-05, "loss": 1.1391, "step": 982 }, { "epoch": 0.12261444430585007, "grad_norm": 0.0, "learning_rate": 1.9554025259946772e-05, "loss": 1.1447, "step": 983 }, { "epoch": 0.12273917924410628, "grad_norm": 0.0, "learning_rate": 1.9552831407745457e-05, "loss": 1.1413, "step": 984 }, { "epoch": 0.12286391418236248, "grad_norm": 0.0, "learning_rate": 1.955163599628123e-05, "loss": 1.1329, "step": 985 }, { "epoch": 0.12298864912061869, "grad_norm": 0.0, "learning_rate": 1.9550439025749217e-05, "loss": 1.1285, "step": 986 }, { "epoch": 0.1231133840588749, "grad_norm": 0.0, "learning_rate": 1.954924049634479e-05, "loss": 1.1336, "step": 987 }, { "epoch": 0.1232381189971311, "grad_norm": 0.0, "learning_rate": 1.9548040408263575e-05, "loss": 1.1578, "step": 988 }, { "epoch": 0.1233628539353873, "grad_norm": 0.0, "learning_rate": 1.954683876170146e-05, "loss": 1.1119, "step": 989 }, { "epoch": 0.12348758887364351, "grad_norm": 0.0, "learning_rate": 1.954563555685459e-05, "loss": 1.0908, "step": 990 }, { "epoch": 0.12361232381189971, "grad_norm": 0.0, "learning_rate": 1.9544430793919347e-05, "loss": 1.1221, "step": 991 }, { "epoch": 0.12373705875015592, "grad_norm": 0.0, "learning_rate": 1.954322447309239e-05, "loss": 1.153, "step": 992 }, { "epoch": 0.12386179368841213, "grad_norm": 0.0, "learning_rate": 1.9542016594570614e-05, "loss": 1.0948, "step": 993 }, { "epoch": 0.12398652862666833, "grad_norm": 0.0, "learning_rate": 1.9540807158551175e-05, "loss": 1.1051, "step": 994 }, { "epoch": 0.12411126356492454, "grad_norm": 0.0, "learning_rate": 1.9539596165231487e-05, "loss": 1.1083, "step": 995 }, { "epoch": 0.12423599850318075, "grad_norm": 0.0, "learning_rate": 1.953838361480921e-05, "loss": 1.194, "step": 996 }, { "epoch": 0.12436073344143694, "grad_norm": 0.0, "learning_rate": 1.953716950748227e-05, "loss": 1.0966, "step": 997 }, { "epoch": 0.12448546837969315, "grad_norm": 0.0, "learning_rate": 1.9535953843448834e-05, "loss": 1.0894, "step": 998 }, { "epoch": 0.12461020331794936, "grad_norm": 0.0, "learning_rate": 1.9534736622907334e-05, "loss": 1.1391, "step": 999 }, { "epoch": 0.12473493825620556, "grad_norm": 0.0, "learning_rate": 1.9533517846056442e-05, "loss": 1.1064, "step": 1000 }, { "epoch": 0.12485967319446177, "grad_norm": 0.0, "learning_rate": 1.95322975130951e-05, "loss": 1.1433, "step": 1001 }, { "epoch": 0.12498440813271798, "grad_norm": 0.0, "learning_rate": 1.95310756242225e-05, "loss": 1.1501, "step": 1002 }, { "epoch": 0.12510914307097418, "grad_norm": 0.0, "learning_rate": 1.952985217963808e-05, "loss": 1.1732, "step": 1003 }, { "epoch": 0.1252338780092304, "grad_norm": 0.0, "learning_rate": 1.9528627179541534e-05, "loss": 1.1597, "step": 1004 }, { "epoch": 0.1253586129474866, "grad_norm": 0.0, "learning_rate": 1.952740062413282e-05, "loss": 1.1587, "step": 1005 }, { "epoch": 0.1254833478857428, "grad_norm": 0.0, "learning_rate": 1.952617251361214e-05, "loss": 1.1582, "step": 1006 }, { "epoch": 0.125608082823999, "grad_norm": 0.0, "learning_rate": 1.952494284817995e-05, "loss": 1.1231, "step": 1007 }, { "epoch": 0.1257328177622552, "grad_norm": 0.0, "learning_rate": 1.9523711628036967e-05, "loss": 1.1287, "step": 1008 }, { "epoch": 0.1258575527005114, "grad_norm": 0.0, "learning_rate": 1.9522478853384154e-05, "loss": 1.1177, "step": 1009 }, { "epoch": 0.12598228763876762, "grad_norm": 0.0, "learning_rate": 1.9521244524422732e-05, "loss": 1.0999, "step": 1010 }, { "epoch": 0.12610702257702383, "grad_norm": 0.0, "learning_rate": 1.9520008641354176e-05, "loss": 1.1496, "step": 1011 }, { "epoch": 0.12623175751528004, "grad_norm": 0.0, "learning_rate": 1.951877120438021e-05, "loss": 1.1302, "step": 1012 }, { "epoch": 0.12635649245353622, "grad_norm": 0.0, "learning_rate": 1.9517532213702816e-05, "loss": 1.1575, "step": 1013 }, { "epoch": 0.12648122739179243, "grad_norm": 0.0, "learning_rate": 1.951629166952423e-05, "loss": 1.1348, "step": 1014 }, { "epoch": 0.12660596233004864, "grad_norm": 0.0, "learning_rate": 1.951504957204694e-05, "loss": 1.1499, "step": 1015 }, { "epoch": 0.12673069726830485, "grad_norm": 0.0, "learning_rate": 1.9513805921473685e-05, "loss": 1.1314, "step": 1016 }, { "epoch": 0.12685543220656106, "grad_norm": 0.0, "learning_rate": 1.9512560718007462e-05, "loss": 1.1384, "step": 1017 }, { "epoch": 0.12698016714481727, "grad_norm": 0.0, "learning_rate": 1.9511313961851522e-05, "loss": 1.1023, "step": 1018 }, { "epoch": 0.12710490208307346, "grad_norm": 0.0, "learning_rate": 1.951006565320936e-05, "loss": 1.0817, "step": 1019 }, { "epoch": 0.12722963702132967, "grad_norm": 0.0, "learning_rate": 1.9508815792284742e-05, "loss": 1.1211, "step": 1020 }, { "epoch": 0.12735437195958588, "grad_norm": 0.0, "learning_rate": 1.9507564379281666e-05, "loss": 1.1528, "step": 1021 }, { "epoch": 0.1274791068978421, "grad_norm": 0.0, "learning_rate": 1.95063114144044e-05, "loss": 1.1334, "step": 1022 }, { "epoch": 0.1276038418360983, "grad_norm": 0.0, "learning_rate": 1.9505056897857462e-05, "loss": 1.1118, "step": 1023 }, { "epoch": 0.1277285767743545, "grad_norm": 0.0, "learning_rate": 1.9503800829845613e-05, "loss": 1.1127, "step": 1024 }, { "epoch": 0.1278533117126107, "grad_norm": 0.0, "learning_rate": 1.950254321057388e-05, "loss": 1.1389, "step": 1025 }, { "epoch": 0.1279780466508669, "grad_norm": 0.0, "learning_rate": 1.9501284040247538e-05, "loss": 1.1227, "step": 1026 }, { "epoch": 0.1281027815891231, "grad_norm": 0.0, "learning_rate": 1.9500023319072113e-05, "loss": 1.1492, "step": 1027 }, { "epoch": 0.12822751652737932, "grad_norm": 0.0, "learning_rate": 1.949876104725339e-05, "loss": 1.0503, "step": 1028 }, { "epoch": 0.12835225146563553, "grad_norm": 0.0, "learning_rate": 1.9497497224997402e-05, "loss": 1.1046, "step": 1029 }, { "epoch": 0.12847698640389174, "grad_norm": 0.0, "learning_rate": 1.9496231852510434e-05, "loss": 1.1636, "step": 1030 }, { "epoch": 0.12860172134214792, "grad_norm": 0.0, "learning_rate": 1.9494964929999032e-05, "loss": 1.1369, "step": 1031 }, { "epoch": 0.12872645628040413, "grad_norm": 0.0, "learning_rate": 1.9493696457669982e-05, "loss": 1.096, "step": 1032 }, { "epoch": 0.12885119121866034, "grad_norm": 0.0, "learning_rate": 1.949242643573034e-05, "loss": 1.1121, "step": 1033 }, { "epoch": 0.12897592615691655, "grad_norm": 0.0, "learning_rate": 1.94911548643874e-05, "loss": 1.125, "step": 1034 }, { "epoch": 0.12910066109517276, "grad_norm": 0.0, "learning_rate": 1.9489881743848714e-05, "loss": 1.1212, "step": 1035 }, { "epoch": 0.12922539603342897, "grad_norm": 0.0, "learning_rate": 1.9488607074322094e-05, "loss": 1.1142, "step": 1036 }, { "epoch": 0.12935013097168516, "grad_norm": 0.0, "learning_rate": 1.9487330856015587e-05, "loss": 1.1235, "step": 1037 }, { "epoch": 0.12947486590994137, "grad_norm": 0.0, "learning_rate": 1.948605308913751e-05, "loss": 1.1715, "step": 1038 }, { "epoch": 0.12959960084819758, "grad_norm": 0.0, "learning_rate": 1.948477377389643e-05, "loss": 1.1627, "step": 1039 }, { "epoch": 0.1297243357864538, "grad_norm": 0.0, "learning_rate": 1.948349291050116e-05, "loss": 1.1719, "step": 1040 }, { "epoch": 0.12984907072471, "grad_norm": 0.0, "learning_rate": 1.9482210499160767e-05, "loss": 1.1182, "step": 1041 }, { "epoch": 0.1299738056629662, "grad_norm": 0.0, "learning_rate": 1.9480926540084578e-05, "loss": 1.0823, "step": 1042 }, { "epoch": 0.1300985406012224, "grad_norm": 0.0, "learning_rate": 1.9479641033482163e-05, "loss": 1.1308, "step": 1043 }, { "epoch": 0.1302232755394786, "grad_norm": 0.0, "learning_rate": 1.9478353979563352e-05, "loss": 1.1437, "step": 1044 }, { "epoch": 0.1303480104777348, "grad_norm": 0.0, "learning_rate": 1.9477065378538222e-05, "loss": 1.0979, "step": 1045 }, { "epoch": 0.13047274541599102, "grad_norm": 0.0, "learning_rate": 1.947577523061711e-05, "loss": 1.141, "step": 1046 }, { "epoch": 0.13059748035424723, "grad_norm": 0.0, "learning_rate": 1.9474483536010592e-05, "loss": 1.17, "step": 1047 }, { "epoch": 0.13072221529250344, "grad_norm": 0.0, "learning_rate": 1.9473190294929514e-05, "loss": 1.0991, "step": 1048 }, { "epoch": 0.13084695023075962, "grad_norm": 0.0, "learning_rate": 1.9471895507584963e-05, "loss": 1.1973, "step": 1049 }, { "epoch": 0.13097168516901583, "grad_norm": 0.0, "learning_rate": 1.947059917418828e-05, "loss": 1.1357, "step": 1050 }, { "epoch": 0.13109642010727204, "grad_norm": 0.0, "learning_rate": 1.946930129495106e-05, "loss": 1.1282, "step": 1051 }, { "epoch": 0.13122115504552825, "grad_norm": 0.0, "learning_rate": 1.9468001870085145e-05, "loss": 1.1503, "step": 1052 }, { "epoch": 0.13134588998378446, "grad_norm": 0.0, "learning_rate": 1.946670089980264e-05, "loss": 1.1082, "step": 1053 }, { "epoch": 0.13147062492204067, "grad_norm": 0.0, "learning_rate": 1.9465398384315893e-05, "loss": 1.161, "step": 1054 }, { "epoch": 0.13159535986029686, "grad_norm": 0.0, "learning_rate": 1.9464094323837514e-05, "loss": 1.1192, "step": 1055 }, { "epoch": 0.13172009479855307, "grad_norm": 0.0, "learning_rate": 1.9462788718580347e-05, "loss": 1.1282, "step": 1056 }, { "epoch": 0.13184482973680928, "grad_norm": 0.0, "learning_rate": 1.946148156875751e-05, "loss": 1.11, "step": 1057 }, { "epoch": 0.1319695646750655, "grad_norm": 0.0, "learning_rate": 1.946017287458235e-05, "loss": 1.132, "step": 1058 }, { "epoch": 0.1320942996133217, "grad_norm": 0.0, "learning_rate": 1.9458862636268494e-05, "loss": 1.1124, "step": 1059 }, { "epoch": 0.1322190345515779, "grad_norm": 0.0, "learning_rate": 1.9457550854029797e-05, "loss": 1.1679, "step": 1060 }, { "epoch": 0.1323437694898341, "grad_norm": 0.0, "learning_rate": 1.9456237528080377e-05, "loss": 1.1478, "step": 1061 }, { "epoch": 0.1324685044280903, "grad_norm": 0.0, "learning_rate": 1.9454922658634604e-05, "loss": 1.0773, "step": 1062 }, { "epoch": 0.1325932393663465, "grad_norm": 0.0, "learning_rate": 1.9453606245907094e-05, "loss": 1.1056, "step": 1063 }, { "epoch": 0.13271797430460272, "grad_norm": 0.0, "learning_rate": 1.9452288290112725e-05, "loss": 1.1262, "step": 1064 }, { "epoch": 0.13284270924285893, "grad_norm": 0.0, "learning_rate": 1.945096879146661e-05, "loss": 1.1281, "step": 1065 }, { "epoch": 0.13296744418111514, "grad_norm": 0.0, "learning_rate": 1.944964775018413e-05, "loss": 1.1214, "step": 1066 }, { "epoch": 0.13309217911937132, "grad_norm": 0.0, "learning_rate": 1.944832516648092e-05, "loss": 1.1413, "step": 1067 }, { "epoch": 0.13321691405762753, "grad_norm": 0.0, "learning_rate": 1.944700104057285e-05, "loss": 1.1123, "step": 1068 }, { "epoch": 0.13334164899588374, "grad_norm": 0.0, "learning_rate": 1.944567537267605e-05, "loss": 1.0934, "step": 1069 }, { "epoch": 0.13346638393413995, "grad_norm": 0.0, "learning_rate": 1.9444348163006904e-05, "loss": 1.1306, "step": 1070 }, { "epoch": 0.13359111887239616, "grad_norm": 0.0, "learning_rate": 1.944301941178205e-05, "loss": 1.1304, "step": 1071 }, { "epoch": 0.13371585381065237, "grad_norm": 0.0, "learning_rate": 1.944168911921837e-05, "loss": 1.1074, "step": 1072 }, { "epoch": 0.13384058874890856, "grad_norm": 0.0, "learning_rate": 1.9440357285533e-05, "loss": 1.0921, "step": 1073 }, { "epoch": 0.13396532368716477, "grad_norm": 0.0, "learning_rate": 1.9439023910943336e-05, "loss": 1.1569, "step": 1074 }, { "epoch": 0.13409005862542098, "grad_norm": 0.0, "learning_rate": 1.9437688995667008e-05, "loss": 1.0784, "step": 1075 }, { "epoch": 0.1342147935636772, "grad_norm": 0.0, "learning_rate": 1.9436352539921916e-05, "loss": 1.1296, "step": 1076 }, { "epoch": 0.1343395285019334, "grad_norm": 0.0, "learning_rate": 1.94350145439262e-05, "loss": 1.0693, "step": 1077 }, { "epoch": 0.1344642634401896, "grad_norm": 0.0, "learning_rate": 1.9433675007898255e-05, "loss": 1.1578, "step": 1078 }, { "epoch": 0.1345889983784458, "grad_norm": 0.0, "learning_rate": 1.9432333932056726e-05, "loss": 1.1414, "step": 1079 }, { "epoch": 0.134713733316702, "grad_norm": 0.0, "learning_rate": 1.9430991316620512e-05, "loss": 1.2287, "step": 1080 }, { "epoch": 0.1348384682549582, "grad_norm": 0.0, "learning_rate": 1.942964716180876e-05, "loss": 1.0994, "step": 1081 }, { "epoch": 0.13496320319321442, "grad_norm": 0.0, "learning_rate": 1.9428301467840875e-05, "loss": 1.1297, "step": 1082 }, { "epoch": 0.13508793813147063, "grad_norm": 0.0, "learning_rate": 1.94269542349365e-05, "loss": 1.1508, "step": 1083 }, { "epoch": 0.13521267306972684, "grad_norm": 0.0, "learning_rate": 1.9425605463315546e-05, "loss": 1.0755, "step": 1084 }, { "epoch": 0.13533740800798302, "grad_norm": 0.0, "learning_rate": 1.942425515319816e-05, "loss": 1.1158, "step": 1085 }, { "epoch": 0.13546214294623923, "grad_norm": 0.0, "learning_rate": 1.9422903304804747e-05, "loss": 1.1375, "step": 1086 }, { "epoch": 0.13558687788449544, "grad_norm": 0.0, "learning_rate": 1.9421549918355965e-05, "loss": 1.0972, "step": 1087 }, { "epoch": 0.13571161282275165, "grad_norm": 0.0, "learning_rate": 1.942019499407272e-05, "loss": 1.1236, "step": 1088 }, { "epoch": 0.13583634776100786, "grad_norm": 0.0, "learning_rate": 1.9418838532176173e-05, "loss": 1.1385, "step": 1089 }, { "epoch": 0.13596108269926407, "grad_norm": 0.0, "learning_rate": 1.9417480532887733e-05, "loss": 1.0836, "step": 1090 }, { "epoch": 0.13608581763752026, "grad_norm": 0.0, "learning_rate": 1.941612099642905e-05, "loss": 1.0772, "step": 1091 }, { "epoch": 0.13621055257577647, "grad_norm": 0.0, "learning_rate": 1.9414759923022045e-05, "loss": 1.1735, "step": 1092 }, { "epoch": 0.13633528751403268, "grad_norm": 0.0, "learning_rate": 1.9413397312888876e-05, "loss": 1.0814, "step": 1093 }, { "epoch": 0.1364600224522889, "grad_norm": 0.0, "learning_rate": 1.9412033166251954e-05, "loss": 1.1389, "step": 1094 }, { "epoch": 0.1365847573905451, "grad_norm": 0.0, "learning_rate": 1.9410667483333944e-05, "loss": 1.0884, "step": 1095 }, { "epoch": 0.1367094923288013, "grad_norm": 0.0, "learning_rate": 1.9409300264357757e-05, "loss": 1.0577, "step": 1096 }, { "epoch": 0.1368342272670575, "grad_norm": 0.0, "learning_rate": 1.940793150954656e-05, "loss": 1.1443, "step": 1097 }, { "epoch": 0.1369589622053137, "grad_norm": 0.0, "learning_rate": 1.940656121912377e-05, "loss": 1.0989, "step": 1098 }, { "epoch": 0.1370836971435699, "grad_norm": 0.0, "learning_rate": 1.940518939331305e-05, "loss": 1.063, "step": 1099 }, { "epoch": 0.13720843208182612, "grad_norm": 0.0, "learning_rate": 1.9403816032338318e-05, "loss": 1.1621, "step": 1100 }, { "epoch": 0.13733316702008233, "grad_norm": 0.0, "learning_rate": 1.9402441136423738e-05, "loss": 1.1752, "step": 1101 }, { "epoch": 0.13745790195833854, "grad_norm": 0.0, "learning_rate": 1.9401064705793735e-05, "loss": 1.1501, "step": 1102 }, { "epoch": 0.13758263689659472, "grad_norm": 0.0, "learning_rate": 1.9399686740672967e-05, "loss": 1.1507, "step": 1103 }, { "epoch": 0.13770737183485093, "grad_norm": 0.0, "learning_rate": 1.9398307241286364e-05, "loss": 1.1157, "step": 1104 }, { "epoch": 0.13783210677310714, "grad_norm": 0.0, "learning_rate": 1.9396926207859085e-05, "loss": 1.0677, "step": 1105 }, { "epoch": 0.13795684171136335, "grad_norm": 0.0, "learning_rate": 1.9395543640616553e-05, "loss": 1.1637, "step": 1106 }, { "epoch": 0.13808157664961956, "grad_norm": 0.0, "learning_rate": 1.9394159539784444e-05, "loss": 1.0835, "step": 1107 }, { "epoch": 0.13820631158787577, "grad_norm": 0.0, "learning_rate": 1.9392773905588668e-05, "loss": 1.081, "step": 1108 }, { "epoch": 0.13833104652613196, "grad_norm": 0.0, "learning_rate": 1.93913867382554e-05, "loss": 1.1475, "step": 1109 }, { "epoch": 0.13845578146438817, "grad_norm": 0.0, "learning_rate": 1.9389998038011068e-05, "loss": 1.1231, "step": 1110 }, { "epoch": 0.13858051640264438, "grad_norm": 0.0, "learning_rate": 1.9388607805082328e-05, "loss": 1.0812, "step": 1111 }, { "epoch": 0.1387052513409006, "grad_norm": 0.0, "learning_rate": 1.938721603969611e-05, "loss": 1.146, "step": 1112 }, { "epoch": 0.1388299862791568, "grad_norm": 0.0, "learning_rate": 1.9385822742079585e-05, "loss": 1.0993, "step": 1113 }, { "epoch": 0.138954721217413, "grad_norm": 0.0, "learning_rate": 1.9384427912460172e-05, "loss": 1.1314, "step": 1114 }, { "epoch": 0.1390794561556692, "grad_norm": 0.0, "learning_rate": 1.9383031551065546e-05, "loss": 1.1138, "step": 1115 }, { "epoch": 0.1392041910939254, "grad_norm": 0.0, "learning_rate": 1.9381633658123625e-05, "loss": 1.1517, "step": 1116 }, { "epoch": 0.1393289260321816, "grad_norm": 0.0, "learning_rate": 1.938023423386258e-05, "loss": 1.1195, "step": 1117 }, { "epoch": 0.13945366097043782, "grad_norm": 0.0, "learning_rate": 1.9378833278510835e-05, "loss": 1.1632, "step": 1118 }, { "epoch": 0.13957839590869403, "grad_norm": 0.0, "learning_rate": 1.937743079229706e-05, "loss": 1.1023, "step": 1119 }, { "epoch": 0.13970313084695024, "grad_norm": 0.0, "learning_rate": 1.9376026775450172e-05, "loss": 1.188, "step": 1120 }, { "epoch": 0.13982786578520642, "grad_norm": 0.0, "learning_rate": 1.9374621228199353e-05, "loss": 1.1246, "step": 1121 }, { "epoch": 0.13995260072346263, "grad_norm": 0.0, "learning_rate": 1.9373214150774014e-05, "loss": 1.0856, "step": 1122 }, { "epoch": 0.14007733566171884, "grad_norm": 0.0, "learning_rate": 1.9371805543403826e-05, "loss": 1.1137, "step": 1123 }, { "epoch": 0.14020207059997505, "grad_norm": 0.0, "learning_rate": 1.9370395406318713e-05, "loss": 1.0517, "step": 1124 }, { "epoch": 0.14032680553823126, "grad_norm": 0.0, "learning_rate": 1.9368983739748842e-05, "loss": 1.1367, "step": 1125 }, { "epoch": 0.14045154047648747, "grad_norm": 0.0, "learning_rate": 1.9367570543924635e-05, "loss": 1.1535, "step": 1126 }, { "epoch": 0.14057627541474366, "grad_norm": 0.0, "learning_rate": 1.936615581907676e-05, "loss": 1.1021, "step": 1127 }, { "epoch": 0.14070101035299987, "grad_norm": 0.0, "learning_rate": 1.9364739565436137e-05, "loss": 1.0551, "step": 1128 }, { "epoch": 0.14082574529125608, "grad_norm": 0.0, "learning_rate": 1.9363321783233933e-05, "loss": 1.0974, "step": 1129 }, { "epoch": 0.1409504802295123, "grad_norm": 0.0, "learning_rate": 1.9361902472701563e-05, "loss": 1.1214, "step": 1130 }, { "epoch": 0.1410752151677685, "grad_norm": 0.0, "learning_rate": 1.9360481634070704e-05, "loss": 1.1726, "step": 1131 }, { "epoch": 0.1411999501060247, "grad_norm": 0.0, "learning_rate": 1.935905926757326e-05, "loss": 1.1374, "step": 1132 }, { "epoch": 0.1413246850442809, "grad_norm": 0.0, "learning_rate": 1.9357635373441402e-05, "loss": 1.1175, "step": 1133 }, { "epoch": 0.1414494199825371, "grad_norm": 0.0, "learning_rate": 1.935620995190755e-05, "loss": 1.1196, "step": 1134 }, { "epoch": 0.1415741549207933, "grad_norm": 0.0, "learning_rate": 1.9354783003204366e-05, "loss": 1.1663, "step": 1135 }, { "epoch": 0.14169888985904952, "grad_norm": 0.0, "learning_rate": 1.935335452756476e-05, "loss": 1.1566, "step": 1136 }, { "epoch": 0.14182362479730573, "grad_norm": 0.0, "learning_rate": 1.93519245252219e-05, "loss": 1.0581, "step": 1137 }, { "epoch": 0.14194835973556194, "grad_norm": 0.0, "learning_rate": 1.9350492996409195e-05, "loss": 1.1406, "step": 1138 }, { "epoch": 0.14207309467381812, "grad_norm": 0.0, "learning_rate": 1.934905994136031e-05, "loss": 1.148, "step": 1139 }, { "epoch": 0.14219782961207433, "grad_norm": 0.0, "learning_rate": 1.9347625360309153e-05, "loss": 1.0833, "step": 1140 }, { "epoch": 0.14232256455033054, "grad_norm": 0.0, "learning_rate": 1.9346189253489888e-05, "loss": 1.1359, "step": 1141 }, { "epoch": 0.14244729948858675, "grad_norm": 0.0, "learning_rate": 1.9344751621136915e-05, "loss": 1.1354, "step": 1142 }, { "epoch": 0.14257203442684296, "grad_norm": 0.0, "learning_rate": 1.9343312463484903e-05, "loss": 1.1611, "step": 1143 }, { "epoch": 0.14269676936509917, "grad_norm": 0.0, "learning_rate": 1.934187178076875e-05, "loss": 1.1539, "step": 1144 }, { "epoch": 0.14282150430335536, "grad_norm": 0.0, "learning_rate": 1.934042957322362e-05, "loss": 1.127, "step": 1145 }, { "epoch": 0.14294623924161157, "grad_norm": 0.0, "learning_rate": 1.9338985841084907e-05, "loss": 1.1159, "step": 1146 }, { "epoch": 0.14307097417986778, "grad_norm": 0.0, "learning_rate": 1.933754058458827e-05, "loss": 1.0938, "step": 1147 }, { "epoch": 0.143195709118124, "grad_norm": 0.0, "learning_rate": 1.9336093803969616e-05, "loss": 1.12, "step": 1148 }, { "epoch": 0.1433204440563802, "grad_norm": 0.0, "learning_rate": 1.933464549946509e-05, "loss": 1.1476, "step": 1149 }, { "epoch": 0.1434451789946364, "grad_norm": 0.0, "learning_rate": 1.9333195671311093e-05, "loss": 1.0917, "step": 1150 }, { "epoch": 0.1435699139328926, "grad_norm": 0.0, "learning_rate": 1.9331744319744272e-05, "loss": 1.1102, "step": 1151 }, { "epoch": 0.1436946488711488, "grad_norm": 0.0, "learning_rate": 1.933029144500153e-05, "loss": 1.1028, "step": 1152 }, { "epoch": 0.143819383809405, "grad_norm": 0.0, "learning_rate": 1.932883704732001e-05, "loss": 1.1347, "step": 1153 }, { "epoch": 0.14394411874766122, "grad_norm": 0.0, "learning_rate": 1.9327381126937098e-05, "loss": 1.0719, "step": 1154 }, { "epoch": 0.14406885368591743, "grad_norm": 0.0, "learning_rate": 1.932592368409045e-05, "loss": 1.0831, "step": 1155 }, { "epoch": 0.14419358862417364, "grad_norm": 0.0, "learning_rate": 1.9324464719017947e-05, "loss": 1.0972, "step": 1156 }, { "epoch": 0.14431832356242982, "grad_norm": 0.0, "learning_rate": 1.932300423195774e-05, "loss": 1.0684, "step": 1157 }, { "epoch": 0.14444305850068603, "grad_norm": 0.0, "learning_rate": 1.9321542223148207e-05, "loss": 1.0786, "step": 1158 }, { "epoch": 0.14456779343894224, "grad_norm": 0.0, "learning_rate": 1.932007869282799e-05, "loss": 1.1027, "step": 1159 }, { "epoch": 0.14469252837719845, "grad_norm": 0.0, "learning_rate": 1.931861364123597e-05, "loss": 1.1092, "step": 1160 }, { "epoch": 0.14481726331545466, "grad_norm": 0.0, "learning_rate": 1.9317147068611282e-05, "loss": 1.0734, "step": 1161 }, { "epoch": 0.14494199825371087, "grad_norm": 0.0, "learning_rate": 1.931567897519331e-05, "loss": 1.1152, "step": 1162 }, { "epoch": 0.14506673319196706, "grad_norm": 0.0, "learning_rate": 1.931420936122168e-05, "loss": 1.0743, "step": 1163 }, { "epoch": 0.14519146813022327, "grad_norm": 0.0, "learning_rate": 1.9312738226936277e-05, "loss": 1.1153, "step": 1164 }, { "epoch": 0.14531620306847948, "grad_norm": 0.0, "learning_rate": 1.931126557257722e-05, "loss": 1.131, "step": 1165 }, { "epoch": 0.1454409380067357, "grad_norm": 0.0, "learning_rate": 1.930979139838489e-05, "loss": 1.1659, "step": 1166 }, { "epoch": 0.1455656729449919, "grad_norm": 0.0, "learning_rate": 1.93083157045999e-05, "loss": 1.1132, "step": 1167 }, { "epoch": 0.1456904078832481, "grad_norm": 0.0, "learning_rate": 1.9306838491463126e-05, "loss": 1.1274, "step": 1168 }, { "epoch": 0.1458151428215043, "grad_norm": 0.0, "learning_rate": 1.9305359759215686e-05, "loss": 1.1355, "step": 1169 }, { "epoch": 0.1459398777597605, "grad_norm": 0.0, "learning_rate": 1.930387950809895e-05, "loss": 1.1355, "step": 1170 }, { "epoch": 0.1460646126980167, "grad_norm": 0.0, "learning_rate": 1.9302397738354526e-05, "loss": 1.1082, "step": 1171 }, { "epoch": 0.14618934763627292, "grad_norm": 0.0, "learning_rate": 1.9300914450224278e-05, "loss": 1.1711, "step": 1172 }, { "epoch": 0.14631408257452913, "grad_norm": 0.0, "learning_rate": 1.929942964395032e-05, "loss": 1.1451, "step": 1173 }, { "epoch": 0.14643881751278534, "grad_norm": 0.0, "learning_rate": 1.9297943319775007e-05, "loss": 1.0723, "step": 1174 }, { "epoch": 0.14656355245104152, "grad_norm": 0.0, "learning_rate": 1.929645547794094e-05, "loss": 1.0819, "step": 1175 }, { "epoch": 0.14668828738929773, "grad_norm": 0.0, "learning_rate": 1.929496611869098e-05, "loss": 1.0957, "step": 1176 }, { "epoch": 0.14681302232755394, "grad_norm": 0.0, "learning_rate": 1.9293475242268224e-05, "loss": 1.1206, "step": 1177 }, { "epoch": 0.14693775726581015, "grad_norm": 0.0, "learning_rate": 1.9291982848916025e-05, "loss": 1.1448, "step": 1178 }, { "epoch": 0.14706249220406636, "grad_norm": 0.0, "learning_rate": 1.929048893887797e-05, "loss": 1.0689, "step": 1179 }, { "epoch": 0.14718722714232257, "grad_norm": 0.0, "learning_rate": 1.928899351239791e-05, "loss": 1.0981, "step": 1180 }, { "epoch": 0.14731196208057876, "grad_norm": 0.0, "learning_rate": 1.9287496569719937e-05, "loss": 1.1598, "step": 1181 }, { "epoch": 0.14743669701883497, "grad_norm": 0.0, "learning_rate": 1.9285998111088384e-05, "loss": 1.0743, "step": 1182 }, { "epoch": 0.14756143195709118, "grad_norm": 0.0, "learning_rate": 1.928449813674784e-05, "loss": 1.1013, "step": 1183 }, { "epoch": 0.1476861668953474, "grad_norm": 0.0, "learning_rate": 1.9282996646943146e-05, "loss": 1.1148, "step": 1184 }, { "epoch": 0.1478109018336036, "grad_norm": 0.0, "learning_rate": 1.9281493641919368e-05, "loss": 1.069, "step": 1185 }, { "epoch": 0.1479356367718598, "grad_norm": 0.0, "learning_rate": 1.9279989121921846e-05, "loss": 1.1031, "step": 1186 }, { "epoch": 0.148060371710116, "grad_norm": 0.0, "learning_rate": 1.9278483087196155e-05, "loss": 1.1352, "step": 1187 }, { "epoch": 0.1481851066483722, "grad_norm": 0.0, "learning_rate": 1.927697553798811e-05, "loss": 1.0962, "step": 1188 }, { "epoch": 0.1483098415866284, "grad_norm": 0.0, "learning_rate": 1.9275466474543787e-05, "loss": 1.1633, "step": 1189 }, { "epoch": 0.14843457652488462, "grad_norm": 0.0, "learning_rate": 1.9273955897109504e-05, "loss": 1.1169, "step": 1190 }, { "epoch": 0.14855931146314083, "grad_norm": 0.0, "learning_rate": 1.927244380593182e-05, "loss": 1.1146, "step": 1191 }, { "epoch": 0.14868404640139704, "grad_norm": 0.0, "learning_rate": 1.9270930201257557e-05, "loss": 1.1356, "step": 1192 }, { "epoch": 0.14880878133965322, "grad_norm": 0.0, "learning_rate": 1.9269415083333758e-05, "loss": 1.113, "step": 1193 }, { "epoch": 0.14893351627790943, "grad_norm": 0.0, "learning_rate": 1.9267898452407744e-05, "loss": 1.1076, "step": 1194 }, { "epoch": 0.14905825121616564, "grad_norm": 0.0, "learning_rate": 1.9266380308727054e-05, "loss": 1.0924, "step": 1195 }, { "epoch": 0.14918298615442185, "grad_norm": 0.0, "learning_rate": 1.9264860652539498e-05, "loss": 1.1425, "step": 1196 }, { "epoch": 0.14930772109267806, "grad_norm": 0.0, "learning_rate": 1.9263339484093115e-05, "loss": 1.0823, "step": 1197 }, { "epoch": 0.14943245603093427, "grad_norm": 0.0, "learning_rate": 1.92618168036362e-05, "loss": 1.1124, "step": 1198 }, { "epoch": 0.14955719096919046, "grad_norm": 0.0, "learning_rate": 1.9260292611417296e-05, "loss": 1.1527, "step": 1199 }, { "epoch": 0.14968192590744667, "grad_norm": 0.0, "learning_rate": 1.925876690768519e-05, "loss": 1.0595, "step": 1200 }, { "epoch": 0.14980666084570288, "grad_norm": 0.0, "learning_rate": 1.9257239692688907e-05, "loss": 1.1128, "step": 1201 }, { "epoch": 0.1499313957839591, "grad_norm": 0.0, "learning_rate": 1.9255710966677733e-05, "loss": 1.1177, "step": 1202 }, { "epoch": 0.1500561307222153, "grad_norm": 0.0, "learning_rate": 1.9254180729901194e-05, "loss": 1.0519, "step": 1203 }, { "epoch": 0.1501808656604715, "grad_norm": 0.0, "learning_rate": 1.9252648982609065e-05, "loss": 1.0798, "step": 1204 }, { "epoch": 0.1503056005987277, "grad_norm": 0.0, "learning_rate": 1.9251115725051367e-05, "loss": 1.1395, "step": 1205 }, { "epoch": 0.1504303355369839, "grad_norm": 0.0, "learning_rate": 1.9249580957478357e-05, "loss": 1.1436, "step": 1206 }, { "epoch": 0.1505550704752401, "grad_norm": 0.0, "learning_rate": 1.9248044680140558e-05, "loss": 1.1339, "step": 1207 }, { "epoch": 0.15067980541349632, "grad_norm": 0.0, "learning_rate": 1.9246506893288725e-05, "loss": 1.1102, "step": 1208 }, { "epoch": 0.15080454035175253, "grad_norm": 0.0, "learning_rate": 1.9244967597173863e-05, "loss": 1.1331, "step": 1209 }, { "epoch": 0.15092927529000874, "grad_norm": 0.0, "learning_rate": 1.9243426792047225e-05, "loss": 1.0789, "step": 1210 }, { "epoch": 0.15105401022826492, "grad_norm": 0.0, "learning_rate": 1.924188447816031e-05, "loss": 1.0248, "step": 1211 }, { "epoch": 0.15117874516652113, "grad_norm": 0.0, "learning_rate": 1.924034065576486e-05, "loss": 1.1033, "step": 1212 }, { "epoch": 0.15130348010477734, "grad_norm": 0.0, "learning_rate": 1.9238795325112867e-05, "loss": 1.071, "step": 1213 }, { "epoch": 0.15142821504303355, "grad_norm": 0.0, "learning_rate": 1.923724848645657e-05, "loss": 1.0978, "step": 1214 }, { "epoch": 0.15155294998128976, "grad_norm": 0.0, "learning_rate": 1.923570014004845e-05, "loss": 1.0932, "step": 1215 }, { "epoch": 0.15167768491954597, "grad_norm": 0.0, "learning_rate": 1.9234150286141238e-05, "loss": 1.1018, "step": 1216 }, { "epoch": 0.15180241985780216, "grad_norm": 0.0, "learning_rate": 1.9232598924987904e-05, "loss": 1.1327, "step": 1217 }, { "epoch": 0.15192715479605837, "grad_norm": 0.0, "learning_rate": 1.923104605684168e-05, "loss": 1.1135, "step": 1218 }, { "epoch": 0.15205188973431458, "grad_norm": 0.0, "learning_rate": 1.9229491681956016e-05, "loss": 1.0512, "step": 1219 }, { "epoch": 0.1521766246725708, "grad_norm": 0.0, "learning_rate": 1.922793580058464e-05, "loss": 1.0743, "step": 1220 }, { "epoch": 0.152301359610827, "grad_norm": 0.0, "learning_rate": 1.9226378412981507e-05, "loss": 1.1266, "step": 1221 }, { "epoch": 0.1524260945490832, "grad_norm": 0.0, "learning_rate": 1.9224819519400825e-05, "loss": 1.0974, "step": 1222 }, { "epoch": 0.1525508294873394, "grad_norm": 0.0, "learning_rate": 1.9223259120097035e-05, "loss": 1.1084, "step": 1223 }, { "epoch": 0.1526755644255956, "grad_norm": 0.0, "learning_rate": 1.9221697215324843e-05, "loss": 1.1021, "step": 1224 }, { "epoch": 0.1528002993638518, "grad_norm": 0.0, "learning_rate": 1.9220133805339186e-05, "loss": 1.0809, "step": 1225 }, { "epoch": 0.15292503430210802, "grad_norm": 0.0, "learning_rate": 1.9218568890395252e-05, "loss": 1.1212, "step": 1226 }, { "epoch": 0.15304976924036423, "grad_norm": 0.0, "learning_rate": 1.921700247074848e-05, "loss": 1.1368, "step": 1227 }, { "epoch": 0.15317450417862044, "grad_norm": 0.0, "learning_rate": 1.9215434546654544e-05, "loss": 1.1316, "step": 1228 }, { "epoch": 0.15329923911687662, "grad_norm": 0.0, "learning_rate": 1.921386511836937e-05, "loss": 1.1229, "step": 1229 }, { "epoch": 0.15342397405513283, "grad_norm": 0.0, "learning_rate": 1.921229418614913e-05, "loss": 1.1356, "step": 1230 }, { "epoch": 0.15354870899338904, "grad_norm": 0.0, "learning_rate": 1.9210721750250237e-05, "loss": 1.0967, "step": 1231 }, { "epoch": 0.15367344393164525, "grad_norm": 0.0, "learning_rate": 1.920914781092935e-05, "loss": 1.1164, "step": 1232 }, { "epoch": 0.15379817886990146, "grad_norm": 0.0, "learning_rate": 1.9207572368443386e-05, "loss": 1.1003, "step": 1233 }, { "epoch": 0.15392291380815767, "grad_norm": 0.0, "learning_rate": 1.9205995423049482e-05, "loss": 1.1532, "step": 1234 }, { "epoch": 0.15404764874641388, "grad_norm": 0.0, "learning_rate": 1.920441697500505e-05, "loss": 1.0597, "step": 1235 }, { "epoch": 0.15417238368467007, "grad_norm": 0.0, "learning_rate": 1.9202837024567723e-05, "loss": 1.114, "step": 1236 }, { "epoch": 0.15429711862292628, "grad_norm": 0.0, "learning_rate": 1.9201255571995392e-05, "loss": 1.0953, "step": 1237 }, { "epoch": 0.1544218535611825, "grad_norm": 0.0, "learning_rate": 1.9199672617546187e-05, "loss": 1.0832, "step": 1238 }, { "epoch": 0.1545465884994387, "grad_norm": 0.0, "learning_rate": 1.9198088161478492e-05, "loss": 1.0857, "step": 1239 }, { "epoch": 0.1546713234376949, "grad_norm": 0.0, "learning_rate": 1.9196502204050925e-05, "loss": 1.1247, "step": 1240 }, { "epoch": 0.15479605837595112, "grad_norm": 0.0, "learning_rate": 1.9194914745522354e-05, "loss": 1.1256, "step": 1241 }, { "epoch": 0.1549207933142073, "grad_norm": 0.0, "learning_rate": 1.9193325786151895e-05, "loss": 1.1067, "step": 1242 }, { "epoch": 0.1550455282524635, "grad_norm": 0.0, "learning_rate": 1.9191735326198907e-05, "loss": 1.0957, "step": 1243 }, { "epoch": 0.15517026319071972, "grad_norm": 0.0, "learning_rate": 1.919014336592299e-05, "loss": 1.1548, "step": 1244 }, { "epoch": 0.15529499812897593, "grad_norm": 0.0, "learning_rate": 1.9188549905583993e-05, "loss": 1.0478, "step": 1245 }, { "epoch": 0.15541973306723214, "grad_norm": 0.0, "learning_rate": 1.918695494544201e-05, "loss": 1.085, "step": 1246 }, { "epoch": 0.15554446800548835, "grad_norm": 0.0, "learning_rate": 1.9185358485757376e-05, "loss": 1.0943, "step": 1247 }, { "epoch": 0.15566920294374453, "grad_norm": 0.0, "learning_rate": 1.918376052679068e-05, "loss": 1.1112, "step": 1248 }, { "epoch": 0.15579393788200074, "grad_norm": 0.0, "learning_rate": 1.9182161068802742e-05, "loss": 1.0272, "step": 1249 }, { "epoch": 0.15591867282025695, "grad_norm": 0.0, "learning_rate": 1.9180560112054637e-05, "loss": 1.0465, "step": 1250 }, { "epoch": 0.15604340775851316, "grad_norm": 0.0, "learning_rate": 1.917895765680768e-05, "loss": 1.0991, "step": 1251 }, { "epoch": 0.15616814269676937, "grad_norm": 0.0, "learning_rate": 1.9177353703323435e-05, "loss": 1.0752, "step": 1252 }, { "epoch": 0.15629287763502558, "grad_norm": 0.0, "learning_rate": 1.9175748251863707e-05, "loss": 1.0523, "step": 1253 }, { "epoch": 0.15641761257328177, "grad_norm": 0.0, "learning_rate": 1.917414130269054e-05, "loss": 1.0725, "step": 1254 }, { "epoch": 0.15654234751153798, "grad_norm": 0.0, "learning_rate": 1.917253285606624e-05, "loss": 1.1162, "step": 1255 }, { "epoch": 0.1566670824497942, "grad_norm": 0.0, "learning_rate": 1.9170922912253335e-05, "loss": 1.0798, "step": 1256 }, { "epoch": 0.1567918173880504, "grad_norm": 0.0, "learning_rate": 1.916931147151461e-05, "loss": 1.0971, "step": 1257 }, { "epoch": 0.1569165523263066, "grad_norm": 0.0, "learning_rate": 1.9167698534113105e-05, "loss": 1.1087, "step": 1258 }, { "epoch": 0.15704128726456282, "grad_norm": 0.0, "learning_rate": 1.9166084100312078e-05, "loss": 1.1137, "step": 1259 }, { "epoch": 0.157166022202819, "grad_norm": 0.0, "learning_rate": 1.916446817037505e-05, "loss": 1.0835, "step": 1260 }, { "epoch": 0.1572907571410752, "grad_norm": 0.0, "learning_rate": 1.916285074456578e-05, "loss": 1.1114, "step": 1261 }, { "epoch": 0.15741549207933142, "grad_norm": 0.0, "learning_rate": 1.9161231823148275e-05, "loss": 1.1233, "step": 1262 }, { "epoch": 0.15754022701758763, "grad_norm": 0.0, "learning_rate": 1.9159611406386786e-05, "loss": 1.0485, "step": 1263 }, { "epoch": 0.15766496195584384, "grad_norm": 0.0, "learning_rate": 1.9157989494545802e-05, "loss": 1.1349, "step": 1264 }, { "epoch": 0.15778969689410005, "grad_norm": 0.0, "learning_rate": 1.9156366087890062e-05, "loss": 1.1579, "step": 1265 }, { "epoch": 0.15791443183235623, "grad_norm": 0.0, "learning_rate": 1.9154741186684544e-05, "loss": 1.0762, "step": 1266 }, { "epoch": 0.15803916677061244, "grad_norm": 0.0, "learning_rate": 1.9153114791194475e-05, "loss": 1.1057, "step": 1267 }, { "epoch": 0.15816390170886865, "grad_norm": 0.0, "learning_rate": 1.9151486901685322e-05, "loss": 1.0982, "step": 1268 }, { "epoch": 0.15828863664712486, "grad_norm": 0.0, "learning_rate": 1.91498575184228e-05, "loss": 1.1293, "step": 1269 }, { "epoch": 0.15841337158538107, "grad_norm": 0.0, "learning_rate": 1.9148226641672864e-05, "loss": 1.1032, "step": 1270 }, { "epoch": 0.15853810652363728, "grad_norm": 0.0, "learning_rate": 1.9146594271701716e-05, "loss": 1.1407, "step": 1271 }, { "epoch": 0.15866284146189347, "grad_norm": 0.0, "learning_rate": 1.9144960408775794e-05, "loss": 1.133, "step": 1272 }, { "epoch": 0.15878757640014968, "grad_norm": 0.0, "learning_rate": 1.9143325053161795e-05, "loss": 1.1591, "step": 1273 }, { "epoch": 0.1589123113384059, "grad_norm": 0.0, "learning_rate": 1.9141688205126644e-05, "loss": 1.1544, "step": 1274 }, { "epoch": 0.1590370462766621, "grad_norm": 0.0, "learning_rate": 1.9140049864937517e-05, "loss": 1.0991, "step": 1275 }, { "epoch": 0.1591617812149183, "grad_norm": 0.0, "learning_rate": 1.9138410032861833e-05, "loss": 1.1302, "step": 1276 }, { "epoch": 0.15928651615317452, "grad_norm": 0.0, "learning_rate": 1.9136768709167254e-05, "loss": 1.1472, "step": 1277 }, { "epoch": 0.1594112510914307, "grad_norm": 0.0, "learning_rate": 1.9135125894121685e-05, "loss": 1.123, "step": 1278 }, { "epoch": 0.1595359860296869, "grad_norm": 0.0, "learning_rate": 1.9133481587993274e-05, "loss": 1.1298, "step": 1279 }, { "epoch": 0.15966072096794312, "grad_norm": 0.0, "learning_rate": 1.9131835791050417e-05, "loss": 1.0552, "step": 1280 }, { "epoch": 0.15978545590619933, "grad_norm": 0.0, "learning_rate": 1.9130188503561744e-05, "loss": 1.1166, "step": 1281 }, { "epoch": 0.15991019084445554, "grad_norm": 0.0, "learning_rate": 1.9128539725796137e-05, "loss": 1.0523, "step": 1282 }, { "epoch": 0.16003492578271175, "grad_norm": 0.0, "learning_rate": 1.9126889458022713e-05, "loss": 1.097, "step": 1283 }, { "epoch": 0.16015966072096793, "grad_norm": 0.0, "learning_rate": 1.912523770051085e-05, "loss": 1.1073, "step": 1284 }, { "epoch": 0.16028439565922414, "grad_norm": 0.0, "learning_rate": 1.9123584453530145e-05, "loss": 1.0787, "step": 1285 }, { "epoch": 0.16040913059748035, "grad_norm": 0.0, "learning_rate": 1.912192971735045e-05, "loss": 1.0543, "step": 1286 }, { "epoch": 0.16053386553573656, "grad_norm": 0.0, "learning_rate": 1.9120273492241864e-05, "loss": 1.0903, "step": 1287 }, { "epoch": 0.16065860047399277, "grad_norm": 0.0, "learning_rate": 1.911861577847473e-05, "loss": 1.1365, "step": 1288 }, { "epoch": 0.16078333541224898, "grad_norm": 0.0, "learning_rate": 1.911695657631961e-05, "loss": 1.0463, "step": 1289 }, { "epoch": 0.16090807035050517, "grad_norm": 0.0, "learning_rate": 1.911529588604735e-05, "loss": 1.1128, "step": 1290 }, { "epoch": 0.16103280528876138, "grad_norm": 0.0, "learning_rate": 1.9113633707929002e-05, "loss": 1.0598, "step": 1291 }, { "epoch": 0.1611575402270176, "grad_norm": 0.0, "learning_rate": 1.911197004223588e-05, "loss": 1.0971, "step": 1292 }, { "epoch": 0.1612822751652738, "grad_norm": 0.0, "learning_rate": 1.911030488923953e-05, "loss": 1.0961, "step": 1293 }, { "epoch": 0.16140701010353, "grad_norm": 0.0, "learning_rate": 1.910863824921176e-05, "loss": 1.1146, "step": 1294 }, { "epoch": 0.16153174504178622, "grad_norm": 0.0, "learning_rate": 1.91069701224246e-05, "loss": 1.09, "step": 1295 }, { "epoch": 0.1616564799800424, "grad_norm": 0.0, "learning_rate": 1.9105300509150327e-05, "loss": 1.1108, "step": 1296 }, { "epoch": 0.1617812149182986, "grad_norm": 0.0, "learning_rate": 1.9103629409661468e-05, "loss": 1.0477, "step": 1297 }, { "epoch": 0.16190594985655482, "grad_norm": 0.0, "learning_rate": 1.910195682423079e-05, "loss": 1.0647, "step": 1298 }, { "epoch": 0.16203068479481103, "grad_norm": 0.0, "learning_rate": 1.91002827531313e-05, "loss": 1.1565, "step": 1299 }, { "epoch": 0.16215541973306724, "grad_norm": 0.0, "learning_rate": 1.9098607196636245e-05, "loss": 1.0322, "step": 1300 }, { "epoch": 0.16228015467132345, "grad_norm": 0.0, "learning_rate": 1.9096930155019124e-05, "loss": 1.0819, "step": 1301 }, { "epoch": 0.16240488960957963, "grad_norm": 0.0, "learning_rate": 1.9095251628553666e-05, "loss": 1.1268, "step": 1302 }, { "epoch": 0.16252962454783584, "grad_norm": 0.0, "learning_rate": 1.9093571617513853e-05, "loss": 1.0998, "step": 1303 }, { "epoch": 0.16265435948609205, "grad_norm": 0.0, "learning_rate": 1.9091890122173903e-05, "loss": 1.1774, "step": 1304 }, { "epoch": 0.16277909442434826, "grad_norm": 0.0, "learning_rate": 1.9090207142808283e-05, "loss": 1.0237, "step": 1305 }, { "epoch": 0.16290382936260447, "grad_norm": 0.0, "learning_rate": 1.9088522679691694e-05, "loss": 1.0875, "step": 1306 }, { "epoch": 0.16302856430086068, "grad_norm": 0.0, "learning_rate": 1.9086836733099084e-05, "loss": 1.1166, "step": 1307 }, { "epoch": 0.16315329923911687, "grad_norm": 0.0, "learning_rate": 1.908514930330564e-05, "loss": 1.1014, "step": 1308 }, { "epoch": 0.16327803417737308, "grad_norm": 0.0, "learning_rate": 1.9083460390586796e-05, "loss": 1.1115, "step": 1309 }, { "epoch": 0.1634027691156293, "grad_norm": 0.0, "learning_rate": 1.908176999521822e-05, "loss": 1.0933, "step": 1310 }, { "epoch": 0.1635275040538855, "grad_norm": 0.0, "learning_rate": 1.9080078117475833e-05, "loss": 1.1218, "step": 1311 }, { "epoch": 0.1636522389921417, "grad_norm": 0.0, "learning_rate": 1.907838475763579e-05, "loss": 1.0502, "step": 1312 }, { "epoch": 0.16377697393039792, "grad_norm": 0.0, "learning_rate": 1.907668991597449e-05, "loss": 1.1126, "step": 1313 }, { "epoch": 0.1639017088686541, "grad_norm": 0.0, "learning_rate": 1.9074993592768575e-05, "loss": 1.1447, "step": 1314 }, { "epoch": 0.1640264438069103, "grad_norm": 0.0, "learning_rate": 1.907329578829492e-05, "loss": 1.0973, "step": 1315 }, { "epoch": 0.16415117874516652, "grad_norm": 0.0, "learning_rate": 1.907159650283066e-05, "loss": 1.0943, "step": 1316 }, { "epoch": 0.16427591368342273, "grad_norm": 0.0, "learning_rate": 1.9069895736653158e-05, "loss": 1.1549, "step": 1317 }, { "epoch": 0.16440064862167894, "grad_norm": 0.0, "learning_rate": 1.906819349004002e-05, "loss": 1.1193, "step": 1318 }, { "epoch": 0.16452538355993515, "grad_norm": 0.0, "learning_rate": 1.9066489763269098e-05, "loss": 1.1334, "step": 1319 }, { "epoch": 0.16465011849819133, "grad_norm": 0.0, "learning_rate": 1.906478455661848e-05, "loss": 1.0882, "step": 1320 }, { "epoch": 0.16477485343644754, "grad_norm": 0.0, "learning_rate": 1.9063077870366504e-05, "loss": 1.1157, "step": 1321 }, { "epoch": 0.16489958837470375, "grad_norm": 0.0, "learning_rate": 1.9061369704791736e-05, "loss": 1.1333, "step": 1322 }, { "epoch": 0.16502432331295996, "grad_norm": 0.0, "learning_rate": 1.9059660060172994e-05, "loss": 1.0904, "step": 1323 }, { "epoch": 0.16514905825121617, "grad_norm": 0.0, "learning_rate": 1.9057948936789343e-05, "loss": 1.0921, "step": 1324 }, { "epoch": 0.16527379318947238, "grad_norm": 0.0, "learning_rate": 1.9056236334920074e-05, "loss": 1.1128, "step": 1325 }, { "epoch": 0.16539852812772857, "grad_norm": 0.0, "learning_rate": 1.9054522254844728e-05, "loss": 1.1062, "step": 1326 }, { "epoch": 0.16552326306598478, "grad_norm": 0.0, "learning_rate": 1.905280669684309e-05, "loss": 1.1171, "step": 1327 }, { "epoch": 0.165647998004241, "grad_norm": 0.0, "learning_rate": 1.9051089661195177e-05, "loss": 1.1261, "step": 1328 }, { "epoch": 0.1657727329424972, "grad_norm": 0.0, "learning_rate": 1.9049371148181253e-05, "loss": 1.1324, "step": 1329 }, { "epoch": 0.1658974678807534, "grad_norm": 0.0, "learning_rate": 1.9047651158081827e-05, "loss": 1.0923, "step": 1330 }, { "epoch": 0.16602220281900962, "grad_norm": 0.0, "learning_rate": 1.904592969117764e-05, "loss": 1.0813, "step": 1331 }, { "epoch": 0.1661469377572658, "grad_norm": 0.0, "learning_rate": 1.9044206747749684e-05, "loss": 1.1255, "step": 1332 }, { "epoch": 0.166271672695522, "grad_norm": 0.0, "learning_rate": 1.9042482328079178e-05, "loss": 1.0924, "step": 1333 }, { "epoch": 0.16639640763377822, "grad_norm": 0.0, "learning_rate": 1.90407564324476e-05, "loss": 1.1175, "step": 1334 }, { "epoch": 0.16652114257203443, "grad_norm": 0.0, "learning_rate": 1.9039029061136657e-05, "loss": 1.1462, "step": 1335 }, { "epoch": 0.16664587751029064, "grad_norm": 0.0, "learning_rate": 1.90373002144283e-05, "loss": 1.0465, "step": 1336 }, { "epoch": 0.16677061244854685, "grad_norm": 0.0, "learning_rate": 1.9035569892604715e-05, "loss": 1.1395, "step": 1337 }, { "epoch": 0.16689534738680303, "grad_norm": 0.0, "learning_rate": 1.903383809594834e-05, "loss": 1.1051, "step": 1338 }, { "epoch": 0.16702008232505924, "grad_norm": 0.0, "learning_rate": 1.9032104824741843e-05, "loss": 1.082, "step": 1339 }, { "epoch": 0.16714481726331545, "grad_norm": 0.0, "learning_rate": 1.9030370079268143e-05, "loss": 1.1196, "step": 1340 }, { "epoch": 0.16726955220157166, "grad_norm": 0.0, "learning_rate": 1.902863385981039e-05, "loss": 1.1096, "step": 1341 }, { "epoch": 0.16739428713982787, "grad_norm": 0.0, "learning_rate": 1.9026896166651986e-05, "loss": 1.1545, "step": 1342 }, { "epoch": 0.16751902207808408, "grad_norm": 0.0, "learning_rate": 1.9025157000076556e-05, "loss": 1.0746, "step": 1343 }, { "epoch": 0.16764375701634027, "grad_norm": 0.0, "learning_rate": 1.9023416360367984e-05, "loss": 1.1173, "step": 1344 }, { "epoch": 0.16776849195459648, "grad_norm": 0.0, "learning_rate": 1.902167424781038e-05, "loss": 1.036, "step": 1345 }, { "epoch": 0.1678932268928527, "grad_norm": 0.0, "learning_rate": 1.90199306626881e-05, "loss": 1.0863, "step": 1346 }, { "epoch": 0.1680179618311089, "grad_norm": 0.0, "learning_rate": 1.9018185605285752e-05, "loss": 1.0074, "step": 1347 }, { "epoch": 0.1681426967693651, "grad_norm": 0.0, "learning_rate": 1.901643907588816e-05, "loss": 1.107, "step": 1348 }, { "epoch": 0.16826743170762132, "grad_norm": 0.0, "learning_rate": 1.9014691074780414e-05, "loss": 1.1003, "step": 1349 }, { "epoch": 0.1683921666458775, "grad_norm": 0.0, "learning_rate": 1.901294160224782e-05, "loss": 1.0199, "step": 1350 }, { "epoch": 0.1685169015841337, "grad_norm": 0.0, "learning_rate": 1.9011190658575948e-05, "loss": 1.1606, "step": 1351 }, { "epoch": 0.16864163652238992, "grad_norm": 0.0, "learning_rate": 1.900943824405058e-05, "loss": 1.0434, "step": 1352 }, { "epoch": 0.16876637146064613, "grad_norm": 0.0, "learning_rate": 1.900768435895777e-05, "loss": 1.1013, "step": 1353 }, { "epoch": 0.16889110639890234, "grad_norm": 0.0, "learning_rate": 1.9005929003583793e-05, "loss": 1.1176, "step": 1354 }, { "epoch": 0.16901584133715855, "grad_norm": 0.0, "learning_rate": 1.900417217821516e-05, "loss": 1.0814, "step": 1355 }, { "epoch": 0.16914057627541473, "grad_norm": 0.0, "learning_rate": 1.9002413883138633e-05, "loss": 1.0319, "step": 1356 }, { "epoch": 0.16926531121367094, "grad_norm": 0.0, "learning_rate": 1.900065411864121e-05, "loss": 1.1037, "step": 1357 }, { "epoch": 0.16939004615192715, "grad_norm": 0.0, "learning_rate": 1.8998892885010134e-05, "loss": 1.0065, "step": 1358 }, { "epoch": 0.16951478109018336, "grad_norm": 0.0, "learning_rate": 1.8997130182532876e-05, "loss": 1.0375, "step": 1359 }, { "epoch": 0.16963951602843957, "grad_norm": 0.0, "learning_rate": 1.899536601149716e-05, "loss": 1.047, "step": 1360 }, { "epoch": 0.16976425096669578, "grad_norm": 0.0, "learning_rate": 1.8993600372190933e-05, "loss": 1.0829, "step": 1361 }, { "epoch": 0.16988898590495197, "grad_norm": 0.0, "learning_rate": 1.89918332649024e-05, "loss": 1.1296, "step": 1362 }, { "epoch": 0.17001372084320818, "grad_norm": 0.0, "learning_rate": 1.8990064689919995e-05, "loss": 1.1029, "step": 1363 }, { "epoch": 0.1701384557814644, "grad_norm": 0.0, "learning_rate": 1.8988294647532395e-05, "loss": 1.1035, "step": 1364 }, { "epoch": 0.1702631907197206, "grad_norm": 0.0, "learning_rate": 1.8986523138028513e-05, "loss": 1.0854, "step": 1365 }, { "epoch": 0.1703879256579768, "grad_norm": 0.0, "learning_rate": 1.898475016169751e-05, "loss": 1.0822, "step": 1366 }, { "epoch": 0.17051266059623302, "grad_norm": 0.0, "learning_rate": 1.8982975718828774e-05, "loss": 1.0766, "step": 1367 }, { "epoch": 0.1706373955344892, "grad_norm": 0.0, "learning_rate": 1.898119980971194e-05, "loss": 1.0591, "step": 1368 }, { "epoch": 0.1707621304727454, "grad_norm": 0.0, "learning_rate": 1.897942243463688e-05, "loss": 1.0745, "step": 1369 }, { "epoch": 0.17088686541100162, "grad_norm": 0.0, "learning_rate": 1.8977643593893716e-05, "loss": 1.0724, "step": 1370 }, { "epoch": 0.17101160034925783, "grad_norm": 0.0, "learning_rate": 1.8975863287772787e-05, "loss": 1.0937, "step": 1371 }, { "epoch": 0.17113633528751404, "grad_norm": 0.0, "learning_rate": 1.897408151656469e-05, "loss": 1.0926, "step": 1372 }, { "epoch": 0.17126107022577025, "grad_norm": 0.0, "learning_rate": 1.8972298280560256e-05, "loss": 1.0537, "step": 1373 }, { "epoch": 0.17138580516402643, "grad_norm": 0.0, "learning_rate": 1.897051358005055e-05, "loss": 1.047, "step": 1374 }, { "epoch": 0.17151054010228264, "grad_norm": 0.0, "learning_rate": 1.8968727415326885e-05, "loss": 1.0871, "step": 1375 }, { "epoch": 0.17163527504053885, "grad_norm": 0.0, "learning_rate": 1.8966939786680806e-05, "loss": 1.0905, "step": 1376 }, { "epoch": 0.17176000997879506, "grad_norm": 0.0, "learning_rate": 1.8965150694404094e-05, "loss": 1.0739, "step": 1377 }, { "epoch": 0.17188474491705127, "grad_norm": 0.0, "learning_rate": 1.896336013878878e-05, "loss": 1.0827, "step": 1378 }, { "epoch": 0.17200947985530748, "grad_norm": 0.0, "learning_rate": 1.8961568120127128e-05, "loss": 1.076, "step": 1379 }, { "epoch": 0.17213421479356367, "grad_norm": 0.0, "learning_rate": 1.8959774638711638e-05, "loss": 1.0509, "step": 1380 }, { "epoch": 0.17225894973181988, "grad_norm": 0.0, "learning_rate": 1.895797969483505e-05, "loss": 1.0731, "step": 1381 }, { "epoch": 0.1723836846700761, "grad_norm": 0.0, "learning_rate": 1.895618328879035e-05, "loss": 1.143, "step": 1382 }, { "epoch": 0.1725084196083323, "grad_norm": 0.0, "learning_rate": 1.8954385420870754e-05, "loss": 1.0736, "step": 1383 }, { "epoch": 0.1726331545465885, "grad_norm": 0.0, "learning_rate": 1.895258609136972e-05, "loss": 1.0663, "step": 1384 }, { "epoch": 0.17275788948484472, "grad_norm": 0.0, "learning_rate": 1.8950785300580935e-05, "loss": 1.0704, "step": 1385 }, { "epoch": 0.1728826244231009, "grad_norm": 0.0, "learning_rate": 1.8948983048798345e-05, "loss": 1.031, "step": 1386 }, { "epoch": 0.1730073593613571, "grad_norm": 0.0, "learning_rate": 1.8947179336316122e-05, "loss": 1.1143, "step": 1387 }, { "epoch": 0.17313209429961332, "grad_norm": 0.0, "learning_rate": 1.894537416342867e-05, "loss": 1.0447, "step": 1388 }, { "epoch": 0.17325682923786953, "grad_norm": 0.0, "learning_rate": 1.894356753043064e-05, "loss": 1.1046, "step": 1389 }, { "epoch": 0.17338156417612574, "grad_norm": 0.0, "learning_rate": 1.8941759437616933e-05, "loss": 1.0935, "step": 1390 }, { "epoch": 0.17350629911438195, "grad_norm": 0.0, "learning_rate": 1.8939949885282657e-05, "loss": 1.0546, "step": 1391 }, { "epoch": 0.17363103405263813, "grad_norm": 0.0, "learning_rate": 1.8938138873723187e-05, "loss": 1.1051, "step": 1392 }, { "epoch": 0.17375576899089434, "grad_norm": 0.0, "learning_rate": 1.8936326403234125e-05, "loss": 1.1052, "step": 1393 }, { "epoch": 0.17388050392915055, "grad_norm": 0.0, "learning_rate": 1.893451247411131e-05, "loss": 1.0769, "step": 1394 }, { "epoch": 0.17400523886740676, "grad_norm": 0.0, "learning_rate": 1.893269708665082e-05, "loss": 1.0574, "step": 1395 }, { "epoch": 0.17412997380566297, "grad_norm": 0.0, "learning_rate": 1.8930880241148973e-05, "loss": 1.1541, "step": 1396 }, { "epoch": 0.17425470874391918, "grad_norm": 0.0, "learning_rate": 1.8929061937902328e-05, "loss": 1.0252, "step": 1397 }, { "epoch": 0.17437944368217537, "grad_norm": 0.0, "learning_rate": 1.892724217720767e-05, "loss": 1.0589, "step": 1398 }, { "epoch": 0.17450417862043158, "grad_norm": 0.0, "learning_rate": 1.8925420959362037e-05, "loss": 1.0907, "step": 1399 }, { "epoch": 0.1746289135586878, "grad_norm": 0.0, "learning_rate": 1.8923598284662695e-05, "loss": 1.1174, "step": 1400 }, { "epoch": 0.174753648496944, "grad_norm": 0.0, "learning_rate": 1.8921774153407148e-05, "loss": 1.04, "step": 1401 }, { "epoch": 0.1748783834352002, "grad_norm": 0.0, "learning_rate": 1.8919948565893144e-05, "loss": 1.0381, "step": 1402 }, { "epoch": 0.17500311837345642, "grad_norm": 0.0, "learning_rate": 1.891812152241866e-05, "loss": 1.0702, "step": 1403 }, { "epoch": 0.1751278533117126, "grad_norm": 0.0, "learning_rate": 1.8916293023281925e-05, "loss": 1.0489, "step": 1404 }, { "epoch": 0.1752525882499688, "grad_norm": 0.0, "learning_rate": 1.8914463068781388e-05, "loss": 1.0726, "step": 1405 }, { "epoch": 0.17537732318822502, "grad_norm": 0.0, "learning_rate": 1.8912631659215745e-05, "loss": 1.0724, "step": 1406 }, { "epoch": 0.17550205812648123, "grad_norm": 0.0, "learning_rate": 1.891079879488393e-05, "loss": 1.0695, "step": 1407 }, { "epoch": 0.17562679306473744, "grad_norm": 0.0, "learning_rate": 1.890896447608511e-05, "loss": 1.0921, "step": 1408 }, { "epoch": 0.17575152800299365, "grad_norm": 0.0, "learning_rate": 1.8907128703118696e-05, "loss": 1.1122, "step": 1409 }, { "epoch": 0.17587626294124983, "grad_norm": 0.0, "learning_rate": 1.8905291476284328e-05, "loss": 1.0702, "step": 1410 }, { "epoch": 0.17600099787950604, "grad_norm": 0.0, "learning_rate": 1.8903452795881893e-05, "loss": 1.1124, "step": 1411 }, { "epoch": 0.17612573281776225, "grad_norm": 0.0, "learning_rate": 1.89016126622115e-05, "loss": 1.0876, "step": 1412 }, { "epoch": 0.17625046775601846, "grad_norm": 0.0, "learning_rate": 1.8899771075573522e-05, "loss": 1.1089, "step": 1413 }, { "epoch": 0.17637520269427467, "grad_norm": 0.0, "learning_rate": 1.8897928036268535e-05, "loss": 1.0761, "step": 1414 }, { "epoch": 0.17649993763253088, "grad_norm": 0.0, "learning_rate": 1.8896083544597377e-05, "loss": 1.1004, "step": 1415 }, { "epoch": 0.17662467257078707, "grad_norm": 0.0, "learning_rate": 1.889423760086112e-05, "loss": 1.1368, "step": 1416 }, { "epoch": 0.17674940750904328, "grad_norm": 0.0, "learning_rate": 1.8892390205361063e-05, "loss": 1.0333, "step": 1417 }, { "epoch": 0.1768741424472995, "grad_norm": 0.0, "learning_rate": 1.889054135839875e-05, "loss": 1.0995, "step": 1418 }, { "epoch": 0.1769988773855557, "grad_norm": 0.0, "learning_rate": 1.8888691060275955e-05, "loss": 1.0582, "step": 1419 }, { "epoch": 0.1771236123238119, "grad_norm": 0.0, "learning_rate": 1.8886839311294695e-05, "loss": 1.059, "step": 1420 }, { "epoch": 0.17724834726206812, "grad_norm": 0.0, "learning_rate": 1.8884986111757223e-05, "loss": 1.0849, "step": 1421 }, { "epoch": 0.1773730822003243, "grad_norm": 0.0, "learning_rate": 1.8883131461966034e-05, "loss": 1.0658, "step": 1422 }, { "epoch": 0.1774978171385805, "grad_norm": 0.0, "learning_rate": 1.8881275362223845e-05, "loss": 1.047, "step": 1423 }, { "epoch": 0.17762255207683672, "grad_norm": 0.0, "learning_rate": 1.887941781283362e-05, "loss": 1.1441, "step": 1424 }, { "epoch": 0.17774728701509293, "grad_norm": 0.0, "learning_rate": 1.8877558814098564e-05, "loss": 1.0356, "step": 1425 }, { "epoch": 0.17787202195334914, "grad_norm": 0.0, "learning_rate": 1.88756983663221e-05, "loss": 1.0798, "step": 1426 }, { "epoch": 0.17799675689160535, "grad_norm": 0.0, "learning_rate": 1.8873836469807912e-05, "loss": 1.1231, "step": 1427 }, { "epoch": 0.17812149182986153, "grad_norm": 0.0, "learning_rate": 1.88719731248599e-05, "loss": 1.0354, "step": 1428 }, { "epoch": 0.17824622676811774, "grad_norm": 0.0, "learning_rate": 1.887010833178222e-05, "loss": 1.0972, "step": 1429 }, { "epoch": 0.17837096170637395, "grad_norm": 0.0, "learning_rate": 1.886824209087924e-05, "loss": 1.0746, "step": 1430 }, { "epoch": 0.17849569664463016, "grad_norm": 0.0, "learning_rate": 1.8866374402455587e-05, "loss": 1.1135, "step": 1431 }, { "epoch": 0.17862043158288637, "grad_norm": 0.0, "learning_rate": 1.886450526681611e-05, "loss": 1.0591, "step": 1432 }, { "epoch": 0.17874516652114258, "grad_norm": 0.0, "learning_rate": 1.88626346842659e-05, "loss": 1.1603, "step": 1433 }, { "epoch": 0.17886990145939877, "grad_norm": 0.0, "learning_rate": 1.8860762655110284e-05, "loss": 1.1093, "step": 1434 }, { "epoch": 0.17899463639765498, "grad_norm": 0.0, "learning_rate": 1.8858889179654824e-05, "loss": 1.0933, "step": 1435 }, { "epoch": 0.1791193713359112, "grad_norm": 0.0, "learning_rate": 1.8857014258205323e-05, "loss": 1.0944, "step": 1436 }, { "epoch": 0.1792441062741674, "grad_norm": 0.0, "learning_rate": 1.8855137891067805e-05, "loss": 1.1029, "step": 1437 }, { "epoch": 0.1793688412124236, "grad_norm": 0.0, "learning_rate": 1.885326007854855e-05, "loss": 1.0897, "step": 1438 }, { "epoch": 0.17949357615067982, "grad_norm": 0.0, "learning_rate": 1.885138082095406e-05, "loss": 1.0703, "step": 1439 }, { "epoch": 0.179618311088936, "grad_norm": 0.0, "learning_rate": 1.8849500118591077e-05, "loss": 1.1392, "step": 1440 }, { "epoch": 0.1797430460271922, "grad_norm": 0.0, "learning_rate": 1.8847617971766577e-05, "loss": 1.0533, "step": 1441 }, { "epoch": 0.17986778096544842, "grad_norm": 0.0, "learning_rate": 1.8845734380787783e-05, "loss": 1.0478, "step": 1442 }, { "epoch": 0.17999251590370463, "grad_norm": 0.0, "learning_rate": 1.8843849345962135e-05, "loss": 1.0838, "step": 1443 }, { "epoch": 0.18011725084196084, "grad_norm": 0.0, "learning_rate": 1.8841962867597325e-05, "loss": 1.1238, "step": 1444 }, { "epoch": 0.18024198578021705, "grad_norm": 0.0, "learning_rate": 1.8840074946001264e-05, "loss": 1.0972, "step": 1445 }, { "epoch": 0.18036672071847323, "grad_norm": 0.0, "learning_rate": 1.883818558148212e-05, "loss": 1.0812, "step": 1446 }, { "epoch": 0.18049145565672944, "grad_norm": 0.0, "learning_rate": 1.883629477434828e-05, "loss": 1.0488, "step": 1447 }, { "epoch": 0.18061619059498565, "grad_norm": 0.0, "learning_rate": 1.8834402524908366e-05, "loss": 1.0634, "step": 1448 }, { "epoch": 0.18074092553324186, "grad_norm": 0.0, "learning_rate": 1.883250883347125e-05, "loss": 1.0744, "step": 1449 }, { "epoch": 0.18086566047149807, "grad_norm": 0.0, "learning_rate": 1.8830613700346026e-05, "loss": 1.088, "step": 1450 }, { "epoch": 0.18099039540975428, "grad_norm": 0.0, "learning_rate": 1.8828717125842026e-05, "loss": 1.0777, "step": 1451 }, { "epoch": 0.18111513034801047, "grad_norm": 0.0, "learning_rate": 1.8826819110268822e-05, "loss": 1.0946, "step": 1452 }, { "epoch": 0.18123986528626668, "grad_norm": 0.0, "learning_rate": 1.8824919653936217e-05, "loss": 1.1098, "step": 1453 }, { "epoch": 0.1813646002245229, "grad_norm": 0.0, "learning_rate": 1.8823018757154246e-05, "loss": 1.1247, "step": 1454 }, { "epoch": 0.1814893351627791, "grad_norm": 0.0, "learning_rate": 1.8821116420233186e-05, "loss": 1.1171, "step": 1455 }, { "epoch": 0.1816140701010353, "grad_norm": 0.0, "learning_rate": 1.881921264348355e-05, "loss": 1.1489, "step": 1456 }, { "epoch": 0.18173880503929152, "grad_norm": 0.0, "learning_rate": 1.881730742721608e-05, "loss": 1.0879, "step": 1457 }, { "epoch": 0.1818635399775477, "grad_norm": 0.0, "learning_rate": 1.8815400771741753e-05, "loss": 1.0917, "step": 1458 }, { "epoch": 0.1819882749158039, "grad_norm": 0.0, "learning_rate": 1.881349267737179e-05, "loss": 1.1207, "step": 1459 }, { "epoch": 0.18211300985406012, "grad_norm": 0.0, "learning_rate": 1.881158314441763e-05, "loss": 1.0839, "step": 1460 }, { "epoch": 0.18223774479231633, "grad_norm": 0.0, "learning_rate": 1.8809672173190964e-05, "loss": 1.0378, "step": 1461 }, { "epoch": 0.18236247973057254, "grad_norm": 0.0, "learning_rate": 1.880775976400371e-05, "loss": 1.0733, "step": 1462 }, { "epoch": 0.18248721466882875, "grad_norm": 0.0, "learning_rate": 1.8805845917168016e-05, "loss": 1.0969, "step": 1463 }, { "epoch": 0.18261194960708493, "grad_norm": 0.0, "learning_rate": 1.8803930632996277e-05, "loss": 1.1077, "step": 1464 }, { "epoch": 0.18273668454534114, "grad_norm": 0.0, "learning_rate": 1.880201391180111e-05, "loss": 1.1169, "step": 1465 }, { "epoch": 0.18286141948359735, "grad_norm": 0.0, "learning_rate": 1.8800095753895382e-05, "loss": 1.04, "step": 1466 }, { "epoch": 0.18298615442185356, "grad_norm": 0.0, "learning_rate": 1.879817615959217e-05, "loss": 1.0999, "step": 1467 }, { "epoch": 0.18311088936010977, "grad_norm": 0.0, "learning_rate": 1.8796255129204812e-05, "loss": 1.0215, "step": 1468 }, { "epoch": 0.18323562429836598, "grad_norm": 0.0, "learning_rate": 1.8794332663046866e-05, "loss": 1.0378, "step": 1469 }, { "epoch": 0.18336035923662217, "grad_norm": 0.0, "learning_rate": 1.879240876143212e-05, "loss": 1.0362, "step": 1470 }, { "epoch": 0.18348509417487838, "grad_norm": 0.0, "learning_rate": 1.8790483424674614e-05, "loss": 1.0607, "step": 1471 }, { "epoch": 0.1836098291131346, "grad_norm": 0.0, "learning_rate": 1.8788556653088604e-05, "loss": 1.0526, "step": 1472 }, { "epoch": 0.1837345640513908, "grad_norm": 0.0, "learning_rate": 1.8786628446988594e-05, "loss": 1.0973, "step": 1473 }, { "epoch": 0.183859298989647, "grad_norm": 0.0, "learning_rate": 1.8784698806689308e-05, "loss": 1.0463, "step": 1474 }, { "epoch": 0.18398403392790322, "grad_norm": 0.0, "learning_rate": 1.8782767732505713e-05, "loss": 1.047, "step": 1475 }, { "epoch": 0.1841087688661594, "grad_norm": 0.0, "learning_rate": 1.8780835224753017e-05, "loss": 1.0589, "step": 1476 }, { "epoch": 0.1842335038044156, "grad_norm": 0.0, "learning_rate": 1.8778901283746646e-05, "loss": 1.0659, "step": 1477 }, { "epoch": 0.18435823874267182, "grad_norm": 0.0, "learning_rate": 1.877696590980227e-05, "loss": 1.0853, "step": 1478 }, { "epoch": 0.18448297368092803, "grad_norm": 0.0, "learning_rate": 1.8775029103235795e-05, "loss": 1.0867, "step": 1479 }, { "epoch": 0.18460770861918424, "grad_norm": 0.0, "learning_rate": 1.877309086436335e-05, "loss": 1.1204, "step": 1480 }, { "epoch": 0.18473244355744045, "grad_norm": 0.0, "learning_rate": 1.8771151193501306e-05, "loss": 1.0491, "step": 1481 }, { "epoch": 0.18485717849569663, "grad_norm": 0.0, "learning_rate": 1.8769210090966274e-05, "loss": 1.0539, "step": 1482 }, { "epoch": 0.18498191343395284, "grad_norm": 0.0, "learning_rate": 1.876726755707508e-05, "loss": 1.0667, "step": 1483 }, { "epoch": 0.18510664837220905, "grad_norm": 0.0, "learning_rate": 1.8765323592144797e-05, "loss": 1.0991, "step": 1484 }, { "epoch": 0.18523138331046526, "grad_norm": 0.0, "learning_rate": 1.8763378196492735e-05, "loss": 1.0963, "step": 1485 }, { "epoch": 0.18535611824872147, "grad_norm": 0.0, "learning_rate": 1.8761431370436427e-05, "loss": 1.0121, "step": 1486 }, { "epoch": 0.18548085318697768, "grad_norm": 0.0, "learning_rate": 1.8759483114293644e-05, "loss": 1.0708, "step": 1487 }, { "epoch": 0.18560558812523387, "grad_norm": 0.0, "learning_rate": 1.875753342838239e-05, "loss": 1.118, "step": 1488 }, { "epoch": 0.18573032306349008, "grad_norm": 0.0, "learning_rate": 1.8755582313020912e-05, "loss": 1.1093, "step": 1489 }, { "epoch": 0.1858550580017463, "grad_norm": 0.0, "learning_rate": 1.875362976852767e-05, "loss": 1.0793, "step": 1490 }, { "epoch": 0.1859797929400025, "grad_norm": 0.0, "learning_rate": 1.8751675795221365e-05, "loss": 1.1116, "step": 1491 }, { "epoch": 0.1861045278782587, "grad_norm": 0.0, "learning_rate": 1.8749720393420948e-05, "loss": 1.1434, "step": 1492 }, { "epoch": 0.18622926281651492, "grad_norm": 0.0, "learning_rate": 1.8747763563445584e-05, "loss": 1.0105, "step": 1493 }, { "epoch": 0.1863539977547711, "grad_norm": 0.0, "learning_rate": 1.8745805305614674e-05, "loss": 1.028, "step": 1494 }, { "epoch": 0.1864787326930273, "grad_norm": 0.0, "learning_rate": 1.874384562024786e-05, "loss": 1.0831, "step": 1495 }, { "epoch": 0.18660346763128352, "grad_norm": 0.0, "learning_rate": 1.8741884507665006e-05, "loss": 1.1368, "step": 1496 }, { "epoch": 0.18672820256953973, "grad_norm": 0.0, "learning_rate": 1.8739921968186222e-05, "loss": 1.0619, "step": 1497 }, { "epoch": 0.18685293750779594, "grad_norm": 0.0, "learning_rate": 1.8737958002131837e-05, "loss": 1.0663, "step": 1498 }, { "epoch": 0.18697767244605215, "grad_norm": 0.0, "learning_rate": 1.8735992609822427e-05, "loss": 1.1168, "step": 1499 }, { "epoch": 0.18710240738430833, "grad_norm": 0.0, "learning_rate": 1.873402579157879e-05, "loss": 1.1177, "step": 1500 }, { "epoch": 0.18722714232256454, "grad_norm": 0.0, "learning_rate": 1.8732057547721962e-05, "loss": 1.1316, "step": 1501 }, { "epoch": 0.18735187726082075, "grad_norm": 0.0, "learning_rate": 1.8730087878573205e-05, "loss": 1.1219, "step": 1502 }, { "epoch": 0.18747661219907696, "grad_norm": 0.0, "learning_rate": 1.872811678445402e-05, "loss": 1.0596, "step": 1503 }, { "epoch": 0.18760134713733317, "grad_norm": 0.0, "learning_rate": 1.8726144265686145e-05, "loss": 1.0369, "step": 1504 }, { "epoch": 0.18772608207558938, "grad_norm": 0.0, "learning_rate": 1.8724170322591536e-05, "loss": 1.0898, "step": 1505 }, { "epoch": 0.18785081701384557, "grad_norm": 0.0, "learning_rate": 1.8722194955492404e-05, "loss": 1.1114, "step": 1506 }, { "epoch": 0.18797555195210178, "grad_norm": 0.0, "learning_rate": 1.8720218164711164e-05, "loss": 1.0838, "step": 1507 }, { "epoch": 0.188100286890358, "grad_norm": 0.0, "learning_rate": 1.8718239950570484e-05, "loss": 1.1091, "step": 1508 }, { "epoch": 0.1882250218286142, "grad_norm": 0.0, "learning_rate": 1.871626031339326e-05, "loss": 1.0798, "step": 1509 }, { "epoch": 0.1883497567668704, "grad_norm": 0.0, "learning_rate": 1.8714279253502616e-05, "loss": 1.1107, "step": 1510 }, { "epoch": 0.18847449170512662, "grad_norm": 0.0, "learning_rate": 1.8712296771221912e-05, "loss": 1.1, "step": 1511 }, { "epoch": 0.1885992266433828, "grad_norm": 0.0, "learning_rate": 1.871031286687474e-05, "loss": 1.0681, "step": 1512 }, { "epoch": 0.188723961581639, "grad_norm": 0.0, "learning_rate": 1.870832754078492e-05, "loss": 1.0743, "step": 1513 }, { "epoch": 0.18884869651989522, "grad_norm": 0.0, "learning_rate": 1.8706340793276516e-05, "loss": 1.091, "step": 1514 }, { "epoch": 0.18897343145815143, "grad_norm": 0.0, "learning_rate": 1.8704352624673804e-05, "loss": 1.0858, "step": 1515 }, { "epoch": 0.18909816639640764, "grad_norm": 0.0, "learning_rate": 1.870236303530131e-05, "loss": 1.0533, "step": 1516 }, { "epoch": 0.18922290133466385, "grad_norm": 0.0, "learning_rate": 1.870037202548378e-05, "loss": 1.0962, "step": 1517 }, { "epoch": 0.18934763627292003, "grad_norm": 0.0, "learning_rate": 1.8698379595546202e-05, "loss": 1.0539, "step": 1518 }, { "epoch": 0.18947237121117624, "grad_norm": 0.0, "learning_rate": 1.8696385745813793e-05, "loss": 1.0449, "step": 1519 }, { "epoch": 0.18959710614943245, "grad_norm": 0.0, "learning_rate": 1.869439047661199e-05, "loss": 1.0658, "step": 1520 }, { "epoch": 0.18972184108768866, "grad_norm": 0.0, "learning_rate": 1.8692393788266477e-05, "loss": 1.0408, "step": 1521 }, { "epoch": 0.18984657602594487, "grad_norm": 0.0, "learning_rate": 1.869039568110317e-05, "loss": 1.0401, "step": 1522 }, { "epoch": 0.18997131096420108, "grad_norm": 0.0, "learning_rate": 1.86883961554482e-05, "loss": 1.1052, "step": 1523 }, { "epoch": 0.19009604590245727, "grad_norm": 0.0, "learning_rate": 1.8686395211627944e-05, "loss": 1.0633, "step": 1524 }, { "epoch": 0.19022078084071348, "grad_norm": 0.0, "learning_rate": 1.8684392849969006e-05, "loss": 1.1292, "step": 1525 }, { "epoch": 0.1903455157789697, "grad_norm": 0.0, "learning_rate": 1.8682389070798225e-05, "loss": 1.1032, "step": 1526 }, { "epoch": 0.1904702507172259, "grad_norm": 0.0, "learning_rate": 1.8680383874442664e-05, "loss": 1.0854, "step": 1527 }, { "epoch": 0.1905949856554821, "grad_norm": 0.0, "learning_rate": 1.8678377261229624e-05, "loss": 1.0839, "step": 1528 }, { "epoch": 0.19071972059373832, "grad_norm": 0.0, "learning_rate": 1.8676369231486635e-05, "loss": 1.1194, "step": 1529 }, { "epoch": 0.1908444555319945, "grad_norm": 0.0, "learning_rate": 1.8674359785541455e-05, "loss": 1.0814, "step": 1530 }, { "epoch": 0.1909691904702507, "grad_norm": 0.0, "learning_rate": 1.8672348923722083e-05, "loss": 1.0432, "step": 1531 }, { "epoch": 0.19109392540850692, "grad_norm": 0.0, "learning_rate": 1.8670336646356735e-05, "loss": 1.1014, "step": 1532 }, { "epoch": 0.19121866034676313, "grad_norm": 0.0, "learning_rate": 1.8668322953773866e-05, "loss": 1.0537, "step": 1533 }, { "epoch": 0.19134339528501934, "grad_norm": 0.0, "learning_rate": 1.8666307846302168e-05, "loss": 1.0986, "step": 1534 }, { "epoch": 0.19146813022327555, "grad_norm": 0.0, "learning_rate": 1.866429132427055e-05, "loss": 1.0339, "step": 1535 }, { "epoch": 0.19159286516153173, "grad_norm": 0.0, "learning_rate": 1.8662273388008167e-05, "loss": 1.0679, "step": 1536 }, { "epoch": 0.19171760009978794, "grad_norm": 0.0, "learning_rate": 1.866025403784439e-05, "loss": 1.1282, "step": 1537 }, { "epoch": 0.19184233503804415, "grad_norm": 0.0, "learning_rate": 1.8658233274108827e-05, "loss": 1.1358, "step": 1538 }, { "epoch": 0.19196706997630036, "grad_norm": 0.0, "learning_rate": 1.8656211097131324e-05, "loss": 1.0809, "step": 1539 }, { "epoch": 0.19209180491455657, "grad_norm": 0.0, "learning_rate": 1.8654187507241946e-05, "loss": 1.0386, "step": 1540 }, { "epoch": 0.19221653985281278, "grad_norm": 0.0, "learning_rate": 1.8652162504771e-05, "loss": 1.076, "step": 1541 }, { "epoch": 0.19234127479106897, "grad_norm": 0.0, "learning_rate": 1.8650136090049013e-05, "loss": 1.0389, "step": 1542 }, { "epoch": 0.19246600972932518, "grad_norm": 0.0, "learning_rate": 1.8648108263406742e-05, "loss": 1.0712, "step": 1543 }, { "epoch": 0.1925907446675814, "grad_norm": 0.0, "learning_rate": 1.864607902517519e-05, "loss": 1.0837, "step": 1544 }, { "epoch": 0.1927154796058376, "grad_norm": 0.0, "learning_rate": 1.8644048375685574e-05, "loss": 1.0863, "step": 1545 }, { "epoch": 0.1928402145440938, "grad_norm": 0.0, "learning_rate": 1.8642016315269345e-05, "loss": 1.0874, "step": 1546 }, { "epoch": 0.19296494948235002, "grad_norm": 0.0, "learning_rate": 1.863998284425819e-05, "loss": 1.0649, "step": 1547 }, { "epoch": 0.1930896844206062, "grad_norm": 0.0, "learning_rate": 1.8637947962984024e-05, "loss": 1.1398, "step": 1548 }, { "epoch": 0.1932144193588624, "grad_norm": 0.0, "learning_rate": 1.8635911671778987e-05, "loss": 1.112, "step": 1549 }, { "epoch": 0.19333915429711862, "grad_norm": 0.0, "learning_rate": 1.8633873970975457e-05, "loss": 1.0519, "step": 1550 }, { "epoch": 0.19346388923537483, "grad_norm": 0.0, "learning_rate": 1.863183486090604e-05, "loss": 1.0951, "step": 1551 }, { "epoch": 0.19358862417363104, "grad_norm": 0.0, "learning_rate": 1.8629794341903558e-05, "loss": 1.1238, "step": 1552 }, { "epoch": 0.19371335911188725, "grad_norm": 0.0, "learning_rate": 1.8627752414301087e-05, "loss": 1.0462, "step": 1553 }, { "epoch": 0.19383809405014343, "grad_norm": 0.0, "learning_rate": 1.862570907843192e-05, "loss": 1.0547, "step": 1554 }, { "epoch": 0.19396282898839964, "grad_norm": 0.0, "learning_rate": 1.8623664334629576e-05, "loss": 1.1024, "step": 1555 }, { "epoch": 0.19408756392665585, "grad_norm": 0.0, "learning_rate": 1.8621618183227813e-05, "loss": 1.0678, "step": 1556 }, { "epoch": 0.19421229886491206, "grad_norm": 0.0, "learning_rate": 1.861957062456061e-05, "loss": 1.0571, "step": 1557 }, { "epoch": 0.19433703380316827, "grad_norm": 0.0, "learning_rate": 1.8617521658962183e-05, "loss": 1.0641, "step": 1558 }, { "epoch": 0.19446176874142448, "grad_norm": 0.0, "learning_rate": 1.8615471286766977e-05, "loss": 1.0466, "step": 1559 }, { "epoch": 0.19458650367968067, "grad_norm": 0.0, "learning_rate": 1.861341950830966e-05, "loss": 1.1001, "step": 1560 }, { "epoch": 0.19471123861793688, "grad_norm": 0.0, "learning_rate": 1.8611366323925138e-05, "loss": 1.0632, "step": 1561 }, { "epoch": 0.1948359735561931, "grad_norm": 0.0, "learning_rate": 1.860931173394854e-05, "loss": 1.0538, "step": 1562 }, { "epoch": 0.1949607084944493, "grad_norm": 0.0, "learning_rate": 1.860725573871523e-05, "loss": 1.1649, "step": 1563 }, { "epoch": 0.1950854434327055, "grad_norm": 0.0, "learning_rate": 1.860519833856079e-05, "loss": 1.0408, "step": 1564 }, { "epoch": 0.19521017837096172, "grad_norm": 0.0, "learning_rate": 1.860313953382105e-05, "loss": 1.0565, "step": 1565 }, { "epoch": 0.1953349133092179, "grad_norm": 0.0, "learning_rate": 1.8601079324832053e-05, "loss": 1.0662, "step": 1566 }, { "epoch": 0.1954596482474741, "grad_norm": 0.0, "learning_rate": 1.859901771193008e-05, "loss": 1.0413, "step": 1567 }, { "epoch": 0.19558438318573032, "grad_norm": 0.0, "learning_rate": 1.8596954695451633e-05, "loss": 1.0889, "step": 1568 }, { "epoch": 0.19570911812398653, "grad_norm": 0.0, "learning_rate": 1.8594890275733453e-05, "loss": 1.1284, "step": 1569 }, { "epoch": 0.19583385306224274, "grad_norm": 0.0, "learning_rate": 1.8592824453112504e-05, "loss": 1.0399, "step": 1570 }, { "epoch": 0.19595858800049895, "grad_norm": 0.0, "learning_rate": 1.8590757227925982e-05, "loss": 1.0642, "step": 1571 }, { "epoch": 0.19608332293875513, "grad_norm": 0.0, "learning_rate": 1.8588688600511307e-05, "loss": 1.049, "step": 1572 }, { "epoch": 0.19620805787701134, "grad_norm": 0.0, "learning_rate": 1.8586618571206133e-05, "loss": 1.0874, "step": 1573 }, { "epoch": 0.19633279281526755, "grad_norm": 0.0, "learning_rate": 1.858454714034834e-05, "loss": 1.1126, "step": 1574 }, { "epoch": 0.19645752775352376, "grad_norm": 0.0, "learning_rate": 1.8582474308276038e-05, "loss": 1.0148, "step": 1575 }, { "epoch": 0.19658226269177997, "grad_norm": 0.0, "learning_rate": 1.858040007532757e-05, "loss": 1.0712, "step": 1576 }, { "epoch": 0.19670699763003618, "grad_norm": 0.0, "learning_rate": 1.8578324441841496e-05, "loss": 1.1087, "step": 1577 }, { "epoch": 0.19683173256829237, "grad_norm": 0.0, "learning_rate": 1.8576247408156614e-05, "loss": 1.1197, "step": 1578 }, { "epoch": 0.19695646750654858, "grad_norm": 0.0, "learning_rate": 1.857416897461195e-05, "loss": 1.0804, "step": 1579 }, { "epoch": 0.1970812024448048, "grad_norm": 0.0, "learning_rate": 1.8572089141546755e-05, "loss": 1.0625, "step": 1580 }, { "epoch": 0.197205937383061, "grad_norm": 0.0, "learning_rate": 1.8570007909300514e-05, "loss": 1.0641, "step": 1581 }, { "epoch": 0.1973306723213172, "grad_norm": 0.0, "learning_rate": 1.856792527821293e-05, "loss": 1.1301, "step": 1582 }, { "epoch": 0.19745540725957342, "grad_norm": 0.0, "learning_rate": 1.8565841248623946e-05, "loss": 1.0518, "step": 1583 }, { "epoch": 0.1975801421978296, "grad_norm": 0.0, "learning_rate": 1.8563755820873724e-05, "loss": 1.0531, "step": 1584 }, { "epoch": 0.1977048771360858, "grad_norm": 0.0, "learning_rate": 1.8561668995302668e-05, "loss": 1.0209, "step": 1585 }, { "epoch": 0.19782961207434202, "grad_norm": 0.0, "learning_rate": 1.8559580772251386e-05, "loss": 1.0415, "step": 1586 }, { "epoch": 0.19795434701259823, "grad_norm": 0.0, "learning_rate": 1.855749115206074e-05, "loss": 1.0919, "step": 1587 }, { "epoch": 0.19807908195085444, "grad_norm": 0.0, "learning_rate": 1.8555400135071803e-05, "loss": 1.0613, "step": 1588 }, { "epoch": 0.19820381688911065, "grad_norm": 0.0, "learning_rate": 1.8553307721625887e-05, "loss": 1.1086, "step": 1589 }, { "epoch": 0.19832855182736683, "grad_norm": 0.0, "learning_rate": 1.8551213912064517e-05, "loss": 1.086, "step": 1590 }, { "epoch": 0.19845328676562304, "grad_norm": 0.0, "learning_rate": 1.854911870672947e-05, "loss": 1.0793, "step": 1591 }, { "epoch": 0.19857802170387925, "grad_norm": 0.0, "learning_rate": 1.854702210596272e-05, "loss": 1.0174, "step": 1592 }, { "epoch": 0.19870275664213546, "grad_norm": 0.0, "learning_rate": 1.8544924110106495e-05, "loss": 1.0386, "step": 1593 }, { "epoch": 0.19882749158039167, "grad_norm": 0.0, "learning_rate": 1.8542824719503244e-05, "loss": 1.0414, "step": 1594 }, { "epoch": 0.19895222651864788, "grad_norm": 0.0, "learning_rate": 1.854072393449563e-05, "loss": 1.035, "step": 1595 }, { "epoch": 0.19907696145690407, "grad_norm": 0.0, "learning_rate": 1.853862175542656e-05, "loss": 1.0575, "step": 1596 }, { "epoch": 0.19920169639516028, "grad_norm": 0.0, "learning_rate": 1.8536518182639166e-05, "loss": 1.1028, "step": 1597 }, { "epoch": 0.1993264313334165, "grad_norm": 0.0, "learning_rate": 1.8534413216476795e-05, "loss": 1.0816, "step": 1598 }, { "epoch": 0.1994511662716727, "grad_norm": 0.0, "learning_rate": 1.853230685728304e-05, "loss": 1.0747, "step": 1599 }, { "epoch": 0.1995759012099289, "grad_norm": 0.0, "learning_rate": 1.8530199105401705e-05, "loss": 1.06, "step": 1600 }, { "epoch": 0.19970063614818512, "grad_norm": 0.0, "learning_rate": 1.852808996117683e-05, "loss": 1.0965, "step": 1601 }, { "epoch": 0.1998253710864413, "grad_norm": 0.0, "learning_rate": 1.8525979424952683e-05, "loss": 1.1099, "step": 1602 }, { "epoch": 0.1999501060246975, "grad_norm": 0.0, "learning_rate": 1.8523867497073757e-05, "loss": 1.0651, "step": 1603 }, { "epoch": 0.20007484096295372, "grad_norm": 0.0, "learning_rate": 1.8521754177884766e-05, "loss": 1.0689, "step": 1604 }, { "epoch": 0.20019957590120993, "grad_norm": 0.0, "learning_rate": 1.8519639467730667e-05, "loss": 1.0924, "step": 1605 }, { "epoch": 0.20032431083946614, "grad_norm": 0.0, "learning_rate": 1.8517523366956624e-05, "loss": 1.036, "step": 1606 }, { "epoch": 0.20044904577772235, "grad_norm": 0.0, "learning_rate": 1.8515405875908042e-05, "loss": 1.0772, "step": 1607 }, { "epoch": 0.20057378071597853, "grad_norm": 0.0, "learning_rate": 1.8513286994930547e-05, "loss": 1.0556, "step": 1608 }, { "epoch": 0.20069851565423474, "grad_norm": 0.0, "learning_rate": 1.8511166724369997e-05, "loss": 1.0713, "step": 1609 }, { "epoch": 0.20082325059249095, "grad_norm": 0.0, "learning_rate": 1.8509045064572474e-05, "loss": 1.0811, "step": 1610 }, { "epoch": 0.20094798553074716, "grad_norm": 0.0, "learning_rate": 1.8506922015884284e-05, "loss": 1.0316, "step": 1611 }, { "epoch": 0.20107272046900337, "grad_norm": 0.0, "learning_rate": 1.8504797578651962e-05, "loss": 1.0536, "step": 1612 }, { "epoch": 0.20119745540725958, "grad_norm": 0.0, "learning_rate": 1.8502671753222272e-05, "loss": 1.0129, "step": 1613 }, { "epoch": 0.20132219034551577, "grad_norm": 0.0, "learning_rate": 1.85005445399422e-05, "loss": 1.0415, "step": 1614 }, { "epoch": 0.20144692528377198, "grad_norm": 0.0, "learning_rate": 1.849841593915896e-05, "loss": 1.1267, "step": 1615 }, { "epoch": 0.2015716602220282, "grad_norm": 0.0, "learning_rate": 1.8496285951219995e-05, "loss": 1.0178, "step": 1616 }, { "epoch": 0.2016963951602844, "grad_norm": 0.0, "learning_rate": 1.8494154576472976e-05, "loss": 1.0657, "step": 1617 }, { "epoch": 0.2018211300985406, "grad_norm": 0.0, "learning_rate": 1.849202181526579e-05, "loss": 1.078, "step": 1618 }, { "epoch": 0.20194586503679682, "grad_norm": 0.0, "learning_rate": 1.8489887667946562e-05, "loss": 1.0604, "step": 1619 }, { "epoch": 0.202070599975053, "grad_norm": 0.0, "learning_rate": 1.848775213486364e-05, "loss": 1.0413, "step": 1620 }, { "epoch": 0.2021953349133092, "grad_norm": 0.0, "learning_rate": 1.848561521636559e-05, "loss": 1.0245, "step": 1621 }, { "epoch": 0.20232006985156542, "grad_norm": 0.0, "learning_rate": 1.848347691280122e-05, "loss": 1.0816, "step": 1622 }, { "epoch": 0.20244480478982163, "grad_norm": 0.0, "learning_rate": 1.848133722451955e-05, "loss": 1.0846, "step": 1623 }, { "epoch": 0.20256953972807784, "grad_norm": 0.0, "learning_rate": 1.847919615186983e-05, "loss": 1.0825, "step": 1624 }, { "epoch": 0.20269427466633405, "grad_norm": 0.0, "learning_rate": 1.847705369520154e-05, "loss": 1.0566, "step": 1625 }, { "epoch": 0.20281900960459023, "grad_norm": 0.0, "learning_rate": 1.847490985486438e-05, "loss": 1.0039, "step": 1626 }, { "epoch": 0.20294374454284644, "grad_norm": 0.0, "learning_rate": 1.847276463120828e-05, "loss": 1.0716, "step": 1627 }, { "epoch": 0.20306847948110265, "grad_norm": 0.0, "learning_rate": 1.8470618024583398e-05, "loss": 1.0642, "step": 1628 }, { "epoch": 0.20319321441935886, "grad_norm": 0.0, "learning_rate": 1.8468470035340108e-05, "loss": 1.0486, "step": 1629 }, { "epoch": 0.20331794935761507, "grad_norm": 0.0, "learning_rate": 1.846632066382902e-05, "loss": 1.0711, "step": 1630 }, { "epoch": 0.20344268429587128, "grad_norm": 0.0, "learning_rate": 1.846416991040097e-05, "loss": 1.0886, "step": 1631 }, { "epoch": 0.20356741923412747, "grad_norm": 0.0, "learning_rate": 1.8462017775407005e-05, "loss": 1.0622, "step": 1632 }, { "epoch": 0.20369215417238368, "grad_norm": 0.0, "learning_rate": 1.845986425919841e-05, "loss": 1.0455, "step": 1633 }, { "epoch": 0.2038168891106399, "grad_norm": 0.0, "learning_rate": 1.8457709362126703e-05, "loss": 1.0657, "step": 1634 }, { "epoch": 0.2039416240488961, "grad_norm": 0.0, "learning_rate": 1.8455553084543603e-05, "loss": 1.0691, "step": 1635 }, { "epoch": 0.2040663589871523, "grad_norm": 0.0, "learning_rate": 1.8453395426801083e-05, "loss": 1.0806, "step": 1636 }, { "epoch": 0.20419109392540852, "grad_norm": 0.0, "learning_rate": 1.845123638925132e-05, "loss": 0.9941, "step": 1637 }, { "epoch": 0.2043158288636647, "grad_norm": 0.0, "learning_rate": 1.844907597224672e-05, "loss": 1.0901, "step": 1638 }, { "epoch": 0.2044405638019209, "grad_norm": 0.0, "learning_rate": 1.8446914176139925e-05, "loss": 1.0677, "step": 1639 }, { "epoch": 0.20456529874017712, "grad_norm": 0.0, "learning_rate": 1.8444751001283786e-05, "loss": 1.0966, "step": 1640 }, { "epoch": 0.20469003367843333, "grad_norm": 0.0, "learning_rate": 1.8442586448031396e-05, "loss": 1.0779, "step": 1641 }, { "epoch": 0.20481476861668954, "grad_norm": 0.0, "learning_rate": 1.8440420516736063e-05, "loss": 1.109, "step": 1642 }, { "epoch": 0.20493950355494575, "grad_norm": 0.0, "learning_rate": 1.8438253207751318e-05, "loss": 1.0352, "step": 1643 }, { "epoch": 0.20506423849320193, "grad_norm": 0.0, "learning_rate": 1.8436084521430922e-05, "loss": 1.0361, "step": 1644 }, { "epoch": 0.20518897343145814, "grad_norm": 0.0, "learning_rate": 1.843391445812886e-05, "loss": 1.0868, "step": 1645 }, { "epoch": 0.20531370836971435, "grad_norm": 0.0, "learning_rate": 1.843174301819934e-05, "loss": 1.0713, "step": 1646 }, { "epoch": 0.20543844330797056, "grad_norm": 0.0, "learning_rate": 1.8429570201996797e-05, "loss": 1.084, "step": 1647 }, { "epoch": 0.20556317824622677, "grad_norm": 0.0, "learning_rate": 1.8427396009875887e-05, "loss": 1.004, "step": 1648 }, { "epoch": 0.20568791318448298, "grad_norm": 0.0, "learning_rate": 1.8425220442191496e-05, "loss": 1.0465, "step": 1649 }, { "epoch": 0.20581264812273917, "grad_norm": 0.0, "learning_rate": 1.8423043499298733e-05, "loss": 1.0842, "step": 1650 }, { "epoch": 0.20593738306099538, "grad_norm": 0.0, "learning_rate": 1.8420865181552923e-05, "loss": 1.1353, "step": 1651 }, { "epoch": 0.2060621179992516, "grad_norm": 0.0, "learning_rate": 1.841868548930963e-05, "loss": 1.1208, "step": 1652 }, { "epoch": 0.2061868529375078, "grad_norm": 0.0, "learning_rate": 1.8416504422924628e-05, "loss": 1.0475, "step": 1653 }, { "epoch": 0.206311587875764, "grad_norm": 0.0, "learning_rate": 1.841432198275393e-05, "loss": 1.1025, "step": 1654 }, { "epoch": 0.20643632281402022, "grad_norm": 0.0, "learning_rate": 1.8412138169153754e-05, "loss": 1.0612, "step": 1655 }, { "epoch": 0.2065610577522764, "grad_norm": 0.0, "learning_rate": 1.8409952982480567e-05, "loss": 1.0598, "step": 1656 }, { "epoch": 0.2066857926905326, "grad_norm": 0.0, "learning_rate": 1.8407766423091033e-05, "loss": 1.0782, "step": 1657 }, { "epoch": 0.20681052762878882, "grad_norm": 0.0, "learning_rate": 1.8405578491342063e-05, "loss": 1.042, "step": 1658 }, { "epoch": 0.20693526256704503, "grad_norm": 0.0, "learning_rate": 1.8403389187590778e-05, "loss": 1.1315, "step": 1659 }, { "epoch": 0.20705999750530124, "grad_norm": 0.0, "learning_rate": 1.8401198512194532e-05, "loss": 1.0449, "step": 1660 }, { "epoch": 0.20718473244355745, "grad_norm": 0.0, "learning_rate": 1.8399006465510898e-05, "loss": 1.0621, "step": 1661 }, { "epoch": 0.20730946738181363, "grad_norm": 0.0, "learning_rate": 1.8396813047897665e-05, "loss": 1.0567, "step": 1662 }, { "epoch": 0.20743420232006984, "grad_norm": 0.0, "learning_rate": 1.8394618259712866e-05, "loss": 1.0788, "step": 1663 }, { "epoch": 0.20755893725832605, "grad_norm": 0.0, "learning_rate": 1.8392422101314736e-05, "loss": 1.0184, "step": 1664 }, { "epoch": 0.20768367219658226, "grad_norm": 0.0, "learning_rate": 1.8390224573061748e-05, "loss": 1.0219, "step": 1665 }, { "epoch": 0.20780840713483847, "grad_norm": 0.0, "learning_rate": 1.8388025675312595e-05, "loss": 1.0719, "step": 1666 }, { "epoch": 0.20793314207309468, "grad_norm": 0.0, "learning_rate": 1.838582540842619e-05, "loss": 1.0353, "step": 1667 }, { "epoch": 0.20805787701135087, "grad_norm": 0.0, "learning_rate": 1.838362377276167e-05, "loss": 1.064, "step": 1668 }, { "epoch": 0.20818261194960708, "grad_norm": 0.0, "learning_rate": 1.8381420768678406e-05, "loss": 1.0695, "step": 1669 }, { "epoch": 0.2083073468878633, "grad_norm": 0.0, "learning_rate": 1.8379216396535976e-05, "loss": 1.0538, "step": 1670 }, { "epoch": 0.2084320818261195, "grad_norm": 0.0, "learning_rate": 1.837701065669419e-05, "loss": 1.0476, "step": 1671 }, { "epoch": 0.2085568167643757, "grad_norm": 0.0, "learning_rate": 1.837480354951308e-05, "loss": 1.0655, "step": 1672 }, { "epoch": 0.20868155170263192, "grad_norm": 0.0, "learning_rate": 1.8372595075352902e-05, "loss": 1.0234, "step": 1673 }, { "epoch": 0.2088062866408881, "grad_norm": 0.0, "learning_rate": 1.8370385234574142e-05, "loss": 1.03, "step": 1674 }, { "epoch": 0.2089310215791443, "grad_norm": 0.0, "learning_rate": 1.8368174027537492e-05, "loss": 1.065, "step": 1675 }, { "epoch": 0.20905575651740052, "grad_norm": 0.0, "learning_rate": 1.836596145460388e-05, "loss": 1.0373, "step": 1676 }, { "epoch": 0.20918049145565673, "grad_norm": 0.0, "learning_rate": 1.8363747516134455e-05, "loss": 1.0232, "step": 1677 }, { "epoch": 0.20930522639391294, "grad_norm": 0.0, "learning_rate": 1.8361532212490586e-05, "loss": 1.0644, "step": 1678 }, { "epoch": 0.20942996133216915, "grad_norm": 0.0, "learning_rate": 1.835931554403387e-05, "loss": 1.0871, "step": 1679 }, { "epoch": 0.20955469627042533, "grad_norm": 0.0, "learning_rate": 1.8357097511126114e-05, "loss": 1.134, "step": 1680 }, { "epoch": 0.20967943120868154, "grad_norm": 0.0, "learning_rate": 1.8354878114129368e-05, "loss": 1.0775, "step": 1681 }, { "epoch": 0.20980416614693775, "grad_norm": 0.0, "learning_rate": 1.8352657353405883e-05, "loss": 1.1148, "step": 1682 }, { "epoch": 0.20992890108519396, "grad_norm": 0.0, "learning_rate": 1.8350435229318154e-05, "loss": 1.0582, "step": 1683 }, { "epoch": 0.21005363602345017, "grad_norm": 0.0, "learning_rate": 1.834821174222888e-05, "loss": 1.0549, "step": 1684 }, { "epoch": 0.21017837096170638, "grad_norm": 0.0, "learning_rate": 1.834598689250099e-05, "loss": 1.0771, "step": 1685 }, { "epoch": 0.21030310589996257, "grad_norm": 0.0, "learning_rate": 1.8343760680497643e-05, "loss": 1.0136, "step": 1686 }, { "epoch": 0.21042784083821878, "grad_norm": 0.0, "learning_rate": 1.8341533106582204e-05, "loss": 1.0212, "step": 1687 }, { "epoch": 0.210552575776475, "grad_norm": 0.0, "learning_rate": 1.8339304171118274e-05, "loss": 1.0731, "step": 1688 }, { "epoch": 0.2106773107147312, "grad_norm": 0.0, "learning_rate": 1.8337073874469673e-05, "loss": 1.0267, "step": 1689 }, { "epoch": 0.2108020456529874, "grad_norm": 0.0, "learning_rate": 1.833484221700044e-05, "loss": 1.0687, "step": 1690 }, { "epoch": 0.21092678059124362, "grad_norm": 0.0, "learning_rate": 1.8332609199074832e-05, "loss": 1.0291, "step": 1691 }, { "epoch": 0.2110515155294998, "grad_norm": 0.0, "learning_rate": 1.8330374821057342e-05, "loss": 1.0953, "step": 1692 }, { "epoch": 0.211176250467756, "grad_norm": 0.0, "learning_rate": 1.8328139083312675e-05, "loss": 1.0832, "step": 1693 }, { "epoch": 0.21130098540601222, "grad_norm": 0.0, "learning_rate": 1.8325901986205756e-05, "loss": 1.0494, "step": 1694 }, { "epoch": 0.21142572034426843, "grad_norm": 0.0, "learning_rate": 1.8323663530101737e-05, "loss": 1.0809, "step": 1695 }, { "epoch": 0.21155045528252464, "grad_norm": 0.0, "learning_rate": 1.8321423715365992e-05, "loss": 1.0677, "step": 1696 }, { "epoch": 0.21167519022078085, "grad_norm": 0.0, "learning_rate": 1.8319182542364117e-05, "loss": 1.0394, "step": 1697 }, { "epoch": 0.21179992515903703, "grad_norm": 0.0, "learning_rate": 1.8316940011461925e-05, "loss": 1.0475, "step": 1698 }, { "epoch": 0.21192466009729324, "grad_norm": 0.0, "learning_rate": 1.8314696123025456e-05, "loss": 1.0775, "step": 1699 }, { "epoch": 0.21204939503554945, "grad_norm": 0.0, "learning_rate": 1.8312450877420964e-05, "loss": 1.0693, "step": 1700 }, { "epoch": 0.21217412997380566, "grad_norm": 0.0, "learning_rate": 1.8310204275014932e-05, "loss": 1.0285, "step": 1701 }, { "epoch": 0.21229886491206187, "grad_norm": 0.0, "learning_rate": 1.8307956316174066e-05, "loss": 1.0729, "step": 1702 }, { "epoch": 0.21242359985031808, "grad_norm": 0.0, "learning_rate": 1.8305707001265287e-05, "loss": 1.0228, "step": 1703 }, { "epoch": 0.21254833478857427, "grad_norm": 0.0, "learning_rate": 1.830345633065574e-05, "loss": 1.0825, "step": 1704 }, { "epoch": 0.21267306972683048, "grad_norm": 0.0, "learning_rate": 1.830120430471279e-05, "loss": 1.0604, "step": 1705 }, { "epoch": 0.2127978046650867, "grad_norm": 0.0, "learning_rate": 1.8298950923804024e-05, "loss": 1.077, "step": 1706 }, { "epoch": 0.2129225396033429, "grad_norm": 0.0, "learning_rate": 1.8296696188297253e-05, "loss": 1.0313, "step": 1707 }, { "epoch": 0.2130472745415991, "grad_norm": 0.0, "learning_rate": 1.8294440098560508e-05, "loss": 1.0716, "step": 1708 }, { "epoch": 0.21317200947985532, "grad_norm": 0.0, "learning_rate": 1.8292182654962035e-05, "loss": 1.0835, "step": 1709 }, { "epoch": 0.2132967444181115, "grad_norm": 0.0, "learning_rate": 1.828992385787031e-05, "loss": 1.075, "step": 1710 }, { "epoch": 0.2134214793563677, "grad_norm": 0.0, "learning_rate": 1.828766370765402e-05, "loss": 1.0415, "step": 1711 }, { "epoch": 0.21354621429462392, "grad_norm": 0.0, "learning_rate": 1.828540220468208e-05, "loss": 1.0789, "step": 1712 }, { "epoch": 0.21367094923288013, "grad_norm": 0.0, "learning_rate": 1.8283139349323632e-05, "loss": 1.0117, "step": 1713 }, { "epoch": 0.21379568417113634, "grad_norm": 0.0, "learning_rate": 1.8280875141948026e-05, "loss": 1.0335, "step": 1714 }, { "epoch": 0.21392041910939255, "grad_norm": 0.0, "learning_rate": 1.827860958292483e-05, "loss": 1.127, "step": 1715 }, { "epoch": 0.21404515404764873, "grad_norm": 0.0, "learning_rate": 1.8276342672623854e-05, "loss": 1.0576, "step": 1716 }, { "epoch": 0.21416988898590494, "grad_norm": 0.0, "learning_rate": 1.8274074411415104e-05, "loss": 1.0973, "step": 1717 }, { "epoch": 0.21429462392416115, "grad_norm": 0.0, "learning_rate": 1.8271804799668828e-05, "loss": 1.0323, "step": 1718 }, { "epoch": 0.21441935886241736, "grad_norm": 0.0, "learning_rate": 1.8269533837755472e-05, "loss": 1.0776, "step": 1719 }, { "epoch": 0.21454409380067357, "grad_norm": 0.0, "learning_rate": 1.8267261526045723e-05, "loss": 1.0785, "step": 1720 }, { "epoch": 0.21466882873892978, "grad_norm": 0.0, "learning_rate": 1.8264987864910475e-05, "loss": 1.1081, "step": 1721 }, { "epoch": 0.21479356367718597, "grad_norm": 0.0, "learning_rate": 1.826271285472085e-05, "loss": 1.0186, "step": 1722 }, { "epoch": 0.21491829861544218, "grad_norm": 0.0, "learning_rate": 1.8260436495848185e-05, "loss": 1.0309, "step": 1723 }, { "epoch": 0.2150430335536984, "grad_norm": 0.0, "learning_rate": 1.8258158788664038e-05, "loss": 1.0893, "step": 1724 }, { "epoch": 0.2151677684919546, "grad_norm": 0.0, "learning_rate": 1.825587973354019e-05, "loss": 1.0594, "step": 1725 }, { "epoch": 0.2152925034302108, "grad_norm": 0.0, "learning_rate": 1.8253599330848638e-05, "loss": 1.0323, "step": 1726 }, { "epoch": 0.21541723836846702, "grad_norm": 0.0, "learning_rate": 1.825131758096161e-05, "loss": 1.0757, "step": 1727 }, { "epoch": 0.2155419733067232, "grad_norm": 0.0, "learning_rate": 1.8249034484251532e-05, "loss": 1.0111, "step": 1728 }, { "epoch": 0.2156667082449794, "grad_norm": 0.0, "learning_rate": 1.824675004109107e-05, "loss": 1.0685, "step": 1729 }, { "epoch": 0.21579144318323562, "grad_norm": 0.0, "learning_rate": 1.8244464251853103e-05, "loss": 1.0949, "step": 1730 }, { "epoch": 0.21591617812149183, "grad_norm": 0.0, "learning_rate": 1.8242177116910726e-05, "loss": 1.0654, "step": 1731 }, { "epoch": 0.21604091305974804, "grad_norm": 0.0, "learning_rate": 1.8239888636637263e-05, "loss": 1.0486, "step": 1732 }, { "epoch": 0.21616564799800425, "grad_norm": 0.0, "learning_rate": 1.8237598811406244e-05, "loss": 1.0951, "step": 1733 }, { "epoch": 0.21629038293626043, "grad_norm": 0.0, "learning_rate": 1.8235307641591435e-05, "loss": 1.0359, "step": 1734 }, { "epoch": 0.21641511787451664, "grad_norm": 0.0, "learning_rate": 1.8233015127566805e-05, "loss": 0.9988, "step": 1735 }, { "epoch": 0.21653985281277285, "grad_norm": 0.0, "learning_rate": 1.823072126970655e-05, "loss": 1.0466, "step": 1736 }, { "epoch": 0.21666458775102906, "grad_norm": 0.0, "learning_rate": 1.8228426068385094e-05, "loss": 1.0734, "step": 1737 }, { "epoch": 0.21678932268928527, "grad_norm": 0.0, "learning_rate": 1.8226129523977066e-05, "loss": 1.0767, "step": 1738 }, { "epoch": 0.21691405762754148, "grad_norm": 0.0, "learning_rate": 1.822383163685732e-05, "loss": 1.0911, "step": 1739 }, { "epoch": 0.21703879256579767, "grad_norm": 0.0, "learning_rate": 1.8221532407400926e-05, "loss": 1.0609, "step": 1740 }, { "epoch": 0.21716352750405388, "grad_norm": 0.0, "learning_rate": 1.8219231835983184e-05, "loss": 1.0646, "step": 1741 }, { "epoch": 0.2172882624423101, "grad_norm": 0.0, "learning_rate": 1.8216929922979596e-05, "loss": 1.0392, "step": 1742 }, { "epoch": 0.2174129973805663, "grad_norm": 0.0, "learning_rate": 1.82146266687659e-05, "loss": 1.0475, "step": 1743 }, { "epoch": 0.2175377323188225, "grad_norm": 0.0, "learning_rate": 1.8212322073718042e-05, "loss": 1.0326, "step": 1744 }, { "epoch": 0.21766246725707872, "grad_norm": 0.0, "learning_rate": 1.8210016138212186e-05, "loss": 1.0787, "step": 1745 }, { "epoch": 0.2177872021953349, "grad_norm": 0.0, "learning_rate": 1.8207708862624727e-05, "loss": 1.0443, "step": 1746 }, { "epoch": 0.2179119371335911, "grad_norm": 0.0, "learning_rate": 1.8205400247332266e-05, "loss": 1.0383, "step": 1747 }, { "epoch": 0.21803667207184732, "grad_norm": 0.0, "learning_rate": 1.8203090292711626e-05, "loss": 0.9978, "step": 1748 }, { "epoch": 0.21816140701010353, "grad_norm": 0.0, "learning_rate": 1.820077899913985e-05, "loss": 1.0588, "step": 1749 }, { "epoch": 0.21828614194835974, "grad_norm": 0.0, "learning_rate": 1.8198466366994208e-05, "loss": 1.0815, "step": 1750 }, { "epoch": 0.21841087688661595, "grad_norm": 0.0, "learning_rate": 1.8196152396652165e-05, "loss": 1.1064, "step": 1751 }, { "epoch": 0.21853561182487213, "grad_norm": 0.0, "learning_rate": 1.8193837088491432e-05, "loss": 1.0391, "step": 1752 }, { "epoch": 0.21866034676312834, "grad_norm": 0.0, "learning_rate": 1.819152044288992e-05, "loss": 1.102, "step": 1753 }, { "epoch": 0.21878508170138455, "grad_norm": 0.0, "learning_rate": 1.818920246022576e-05, "loss": 1.0462, "step": 1754 }, { "epoch": 0.21890981663964076, "grad_norm": 0.0, "learning_rate": 1.8186883140877317e-05, "loss": 1.0641, "step": 1755 }, { "epoch": 0.21903455157789697, "grad_norm": 0.0, "learning_rate": 1.818456248522315e-05, "loss": 1.0448, "step": 1756 }, { "epoch": 0.21915928651615318, "grad_norm": 0.0, "learning_rate": 1.8182240493642056e-05, "loss": 1.0791, "step": 1757 }, { "epoch": 0.21928402145440937, "grad_norm": 0.0, "learning_rate": 1.8179917166513042e-05, "loss": 1.0948, "step": 1758 }, { "epoch": 0.21940875639266558, "grad_norm": 0.0, "learning_rate": 1.817759250421533e-05, "loss": 1.0094, "step": 1759 }, { "epoch": 0.2195334913309218, "grad_norm": 0.0, "learning_rate": 1.8175266507128365e-05, "loss": 1.0816, "step": 1760 }, { "epoch": 0.219658226269178, "grad_norm": 0.0, "learning_rate": 1.8172939175631807e-05, "loss": 1.0272, "step": 1761 }, { "epoch": 0.2197829612074342, "grad_norm": 0.0, "learning_rate": 1.817061051010554e-05, "loss": 1.0652, "step": 1762 }, { "epoch": 0.21990769614569042, "grad_norm": 0.0, "learning_rate": 1.8168280510929656e-05, "loss": 1.0679, "step": 1763 }, { "epoch": 0.2200324310839466, "grad_norm": 0.0, "learning_rate": 1.816594917848447e-05, "loss": 1.0148, "step": 1764 }, { "epoch": 0.2201571660222028, "grad_norm": 0.0, "learning_rate": 1.816361651315052e-05, "loss": 1.0828, "step": 1765 }, { "epoch": 0.22028190096045902, "grad_norm": 0.0, "learning_rate": 1.816128251530855e-05, "loss": 1.0452, "step": 1766 }, { "epoch": 0.22040663589871523, "grad_norm": 0.0, "learning_rate": 1.8158947185339525e-05, "loss": 1.089, "step": 1767 }, { "epoch": 0.22053137083697144, "grad_norm": 0.0, "learning_rate": 1.8156610523624637e-05, "loss": 1.0167, "step": 1768 }, { "epoch": 0.22065610577522765, "grad_norm": 0.0, "learning_rate": 1.8154272530545283e-05, "loss": 1.0865, "step": 1769 }, { "epoch": 0.22078084071348383, "grad_norm": 0.0, "learning_rate": 1.8151933206483087e-05, "loss": 1.0165, "step": 1770 }, { "epoch": 0.22090557565174004, "grad_norm": 0.0, "learning_rate": 1.8149592551819883e-05, "loss": 1.0798, "step": 1771 }, { "epoch": 0.22103031058999625, "grad_norm": 0.0, "learning_rate": 1.814725056693772e-05, "loss": 1.06, "step": 1772 }, { "epoch": 0.22115504552825246, "grad_norm": 0.0, "learning_rate": 1.8144907252218875e-05, "loss": 1.079, "step": 1773 }, { "epoch": 0.22127978046650867, "grad_norm": 0.0, "learning_rate": 1.8142562608045834e-05, "loss": 1.0981, "step": 1774 }, { "epoch": 0.22140451540476488, "grad_norm": 0.0, "learning_rate": 1.8140216634801304e-05, "loss": 1.0974, "step": 1775 }, { "epoch": 0.22152925034302107, "grad_norm": 0.0, "learning_rate": 1.8137869332868207e-05, "loss": 1.1015, "step": 1776 }, { "epoch": 0.22165398528127728, "grad_norm": 0.0, "learning_rate": 1.8135520702629677e-05, "loss": 1.0685, "step": 1777 }, { "epoch": 0.2217787202195335, "grad_norm": 0.0, "learning_rate": 1.8133170744469074e-05, "loss": 1.0343, "step": 1778 }, { "epoch": 0.2219034551577897, "grad_norm": 0.0, "learning_rate": 1.813081945876997e-05, "loss": 1.0254, "step": 1779 }, { "epoch": 0.2220281900960459, "grad_norm": 0.0, "learning_rate": 1.8128466845916156e-05, "loss": 1.0472, "step": 1780 }, { "epoch": 0.22215292503430212, "grad_norm": 0.0, "learning_rate": 1.8126112906291632e-05, "loss": 1.117, "step": 1781 }, { "epoch": 0.2222776599725583, "grad_norm": 0.0, "learning_rate": 1.812375764028062e-05, "loss": 1.0663, "step": 1782 }, { "epoch": 0.2224023949108145, "grad_norm": 0.0, "learning_rate": 1.812140104826757e-05, "loss": 0.9966, "step": 1783 }, { "epoch": 0.22252712984907072, "grad_norm": 0.0, "learning_rate": 1.8119043130637127e-05, "loss": 1.0028, "step": 1784 }, { "epoch": 0.22265186478732693, "grad_norm": 0.0, "learning_rate": 1.8116683887774164e-05, "loss": 1.1522, "step": 1785 }, { "epoch": 0.22277659972558314, "grad_norm": 0.0, "learning_rate": 1.811432332006377e-05, "loss": 1.0181, "step": 1786 }, { "epoch": 0.22290133466383935, "grad_norm": 0.0, "learning_rate": 1.811196142789125e-05, "loss": 1.0021, "step": 1787 }, { "epoch": 0.22302606960209553, "grad_norm": 0.0, "learning_rate": 1.810959821164212e-05, "loss": 1.0306, "step": 1788 }, { "epoch": 0.22315080454035174, "grad_norm": 0.0, "learning_rate": 1.8107233671702123e-05, "loss": 1.0308, "step": 1789 }, { "epoch": 0.22327553947860795, "grad_norm": 0.0, "learning_rate": 1.810486780845721e-05, "loss": 1.0576, "step": 1790 }, { "epoch": 0.22340027441686416, "grad_norm": 0.0, "learning_rate": 1.8102500622293546e-05, "loss": 1.0626, "step": 1791 }, { "epoch": 0.22352500935512037, "grad_norm": 0.0, "learning_rate": 1.810013211359752e-05, "loss": 1.0759, "step": 1792 }, { "epoch": 0.22364974429337658, "grad_norm": 0.0, "learning_rate": 1.8097762282755726e-05, "loss": 1.0497, "step": 1793 }, { "epoch": 0.22377447923163277, "grad_norm": 0.0, "learning_rate": 1.809539113015499e-05, "loss": 1.0741, "step": 1794 }, { "epoch": 0.22389921416988898, "grad_norm": 0.0, "learning_rate": 1.8093018656182333e-05, "loss": 1.1189, "step": 1795 }, { "epoch": 0.2240239491081452, "grad_norm": 0.0, "learning_rate": 1.8090644861225008e-05, "loss": 1.0378, "step": 1796 }, { "epoch": 0.2241486840464014, "grad_norm": 0.0, "learning_rate": 1.808826974567048e-05, "loss": 1.0766, "step": 1797 }, { "epoch": 0.2242734189846576, "grad_norm": 0.0, "learning_rate": 1.808589330990642e-05, "loss": 1.0382, "step": 1798 }, { "epoch": 0.22439815392291382, "grad_norm": 0.0, "learning_rate": 1.8083515554320734e-05, "loss": 1.0745, "step": 1799 }, { "epoch": 0.22452288886117, "grad_norm": 0.0, "learning_rate": 1.8081136479301522e-05, "loss": 1.0518, "step": 1800 }, { "epoch": 0.2246476237994262, "grad_norm": 0.0, "learning_rate": 1.8078756085237112e-05, "loss": 1.1021, "step": 1801 }, { "epoch": 0.22477235873768242, "grad_norm": 0.0, "learning_rate": 1.8076374372516045e-05, "loss": 1.0914, "step": 1802 }, { "epoch": 0.22489709367593863, "grad_norm": 0.0, "learning_rate": 1.8073991341527078e-05, "loss": 1.0106, "step": 1803 }, { "epoch": 0.22502182861419484, "grad_norm": 0.0, "learning_rate": 1.807160699265918e-05, "loss": 1.0367, "step": 1804 }, { "epoch": 0.22514656355245105, "grad_norm": 0.0, "learning_rate": 1.8069221326301534e-05, "loss": 1.0063, "step": 1805 }, { "epoch": 0.22527129849070723, "grad_norm": 0.0, "learning_rate": 1.8066834342843546e-05, "loss": 1.0144, "step": 1806 }, { "epoch": 0.22539603342896344, "grad_norm": 0.0, "learning_rate": 1.806444604267483e-05, "loss": 1.0192, "step": 1807 }, { "epoch": 0.22552076836721965, "grad_norm": 0.0, "learning_rate": 1.8062056426185214e-05, "loss": 1.0716, "step": 1808 }, { "epoch": 0.22564550330547586, "grad_norm": 0.0, "learning_rate": 1.8059665493764745e-05, "loss": 1.0638, "step": 1809 }, { "epoch": 0.22577023824373207, "grad_norm": 0.0, "learning_rate": 1.8057273245803686e-05, "loss": 1.0258, "step": 1810 }, { "epoch": 0.22589497318198828, "grad_norm": 0.0, "learning_rate": 1.805487968269251e-05, "loss": 1.0302, "step": 1811 }, { "epoch": 0.22601970812024447, "grad_norm": 0.0, "learning_rate": 1.8052484804821907e-05, "loss": 1.0575, "step": 1812 }, { "epoch": 0.22614444305850068, "grad_norm": 0.0, "learning_rate": 1.8050088612582785e-05, "loss": 1.0584, "step": 1813 }, { "epoch": 0.2262691779967569, "grad_norm": 0.0, "learning_rate": 1.8047691106366256e-05, "loss": 1.0707, "step": 1814 }, { "epoch": 0.2263939129350131, "grad_norm": 0.0, "learning_rate": 1.804529228656366e-05, "loss": 1.0364, "step": 1815 }, { "epoch": 0.2265186478732693, "grad_norm": 0.0, "learning_rate": 1.8042892153566543e-05, "loss": 1.0766, "step": 1816 }, { "epoch": 0.22664338281152552, "grad_norm": 0.0, "learning_rate": 1.8040490707766664e-05, "loss": 1.0434, "step": 1817 }, { "epoch": 0.2267681177497817, "grad_norm": 0.0, "learning_rate": 1.8038087949556003e-05, "loss": 1.0752, "step": 1818 }, { "epoch": 0.2268928526880379, "grad_norm": 0.0, "learning_rate": 1.803568387932675e-05, "loss": 1.0541, "step": 1819 }, { "epoch": 0.22701758762629412, "grad_norm": 0.0, "learning_rate": 1.803327849747131e-05, "loss": 1.0537, "step": 1820 }, { "epoch": 0.22714232256455033, "grad_norm": 0.0, "learning_rate": 1.80308718043823e-05, "loss": 1.0695, "step": 1821 }, { "epoch": 0.22726705750280654, "grad_norm": 0.0, "learning_rate": 1.8028463800452553e-05, "loss": 1.0857, "step": 1822 }, { "epoch": 0.22739179244106275, "grad_norm": 0.0, "learning_rate": 1.8026054486075116e-05, "loss": 1.0704, "step": 1823 }, { "epoch": 0.22751652737931893, "grad_norm": 0.0, "learning_rate": 1.8023643861643255e-05, "loss": 1.0532, "step": 1824 }, { "epoch": 0.22764126231757514, "grad_norm": 0.0, "learning_rate": 1.802123192755044e-05, "loss": 1.1274, "step": 1825 }, { "epoch": 0.22776599725583135, "grad_norm": 0.0, "learning_rate": 1.801881868419036e-05, "loss": 1.0894, "step": 1826 }, { "epoch": 0.22789073219408756, "grad_norm": 0.0, "learning_rate": 1.801640413195692e-05, "loss": 1.0532, "step": 1827 }, { "epoch": 0.22801546713234377, "grad_norm": 0.0, "learning_rate": 1.8013988271244227e-05, "loss": 1.0669, "step": 1828 }, { "epoch": 0.22814020207059998, "grad_norm": 0.0, "learning_rate": 1.8011571102446622e-05, "loss": 1.0504, "step": 1829 }, { "epoch": 0.22826493700885617, "grad_norm": 0.0, "learning_rate": 1.800915262595864e-05, "loss": 1.0608, "step": 1830 }, { "epoch": 0.22838967194711238, "grad_norm": 0.0, "learning_rate": 1.800673284217504e-05, "loss": 1.0586, "step": 1831 }, { "epoch": 0.2285144068853686, "grad_norm": 0.0, "learning_rate": 1.8004311751490792e-05, "loss": 1.0126, "step": 1832 }, { "epoch": 0.2286391418236248, "grad_norm": 0.0, "learning_rate": 1.800188935430108e-05, "loss": 1.0164, "step": 1833 }, { "epoch": 0.228763876761881, "grad_norm": 0.0, "learning_rate": 1.7999465651001297e-05, "loss": 1.0937, "step": 1834 }, { "epoch": 0.22888861170013722, "grad_norm": 0.0, "learning_rate": 1.7997040641987057e-05, "loss": 1.0739, "step": 1835 }, { "epoch": 0.2290133466383934, "grad_norm": 0.0, "learning_rate": 1.7994614327654178e-05, "loss": 1.0069, "step": 1836 }, { "epoch": 0.2291380815766496, "grad_norm": 0.0, "learning_rate": 1.7992186708398697e-05, "loss": 1.0046, "step": 1837 }, { "epoch": 0.22926281651490582, "grad_norm": 0.0, "learning_rate": 1.7989757784616862e-05, "loss": 1.0585, "step": 1838 }, { "epoch": 0.22938755145316203, "grad_norm": 0.0, "learning_rate": 1.798732755670514e-05, "loss": 1.0752, "step": 1839 }, { "epoch": 0.22951228639141824, "grad_norm": 0.0, "learning_rate": 1.79848960250602e-05, "loss": 1.0277, "step": 1840 }, { "epoch": 0.22963702132967445, "grad_norm": 0.0, "learning_rate": 1.7982463190078928e-05, "loss": 1.0604, "step": 1841 }, { "epoch": 0.22976175626793063, "grad_norm": 0.0, "learning_rate": 1.7980029052158432e-05, "loss": 1.0631, "step": 1842 }, { "epoch": 0.22988649120618684, "grad_norm": 0.0, "learning_rate": 1.7977593611696017e-05, "loss": 1.0158, "step": 1843 }, { "epoch": 0.23001122614444305, "grad_norm": 0.0, "learning_rate": 1.7975156869089207e-05, "loss": 1.0368, "step": 1844 }, { "epoch": 0.23013596108269926, "grad_norm": 0.0, "learning_rate": 1.7972718824735748e-05, "loss": 1.0525, "step": 1845 }, { "epoch": 0.23026069602095547, "grad_norm": 0.0, "learning_rate": 1.7970279479033583e-05, "loss": 1.0644, "step": 1846 }, { "epoch": 0.23038543095921168, "grad_norm": 0.0, "learning_rate": 1.7967838832380878e-05, "loss": 1.0251, "step": 1847 }, { "epoch": 0.23051016589746787, "grad_norm": 0.0, "learning_rate": 1.7965396885176008e-05, "loss": 1.1198, "step": 1848 }, { "epoch": 0.23063490083572408, "grad_norm": 0.0, "learning_rate": 1.7962953637817556e-05, "loss": 1.0072, "step": 1849 }, { "epoch": 0.2307596357739803, "grad_norm": 0.0, "learning_rate": 1.796050909070433e-05, "loss": 1.0572, "step": 1850 }, { "epoch": 0.2308843707122365, "grad_norm": 0.0, "learning_rate": 1.795806324423534e-05, "loss": 1.0542, "step": 1851 }, { "epoch": 0.2310091056504927, "grad_norm": 0.0, "learning_rate": 1.79556160988098e-05, "loss": 1.0456, "step": 1852 }, { "epoch": 0.23113384058874892, "grad_norm": 0.0, "learning_rate": 1.7953167654827157e-05, "loss": 1.0407, "step": 1853 }, { "epoch": 0.23125857552700513, "grad_norm": 0.0, "learning_rate": 1.795071791268705e-05, "loss": 1.0536, "step": 1854 }, { "epoch": 0.2313833104652613, "grad_norm": 0.0, "learning_rate": 1.7948266872789348e-05, "loss": 1.0666, "step": 1855 }, { "epoch": 0.23150804540351752, "grad_norm": 0.0, "learning_rate": 1.7945814535534116e-05, "loss": 1.0382, "step": 1856 }, { "epoch": 0.23163278034177373, "grad_norm": 0.0, "learning_rate": 1.794336090132164e-05, "loss": 1.036, "step": 1857 }, { "epoch": 0.23175751528002994, "grad_norm": 0.0, "learning_rate": 1.794090597055241e-05, "loss": 1.0613, "step": 1858 }, { "epoch": 0.23188225021828615, "grad_norm": 0.0, "learning_rate": 1.7938449743627143e-05, "loss": 1.0689, "step": 1859 }, { "epoch": 0.23200698515654236, "grad_norm": 0.0, "learning_rate": 1.7935992220946746e-05, "loss": 1.0841, "step": 1860 }, { "epoch": 0.23213172009479854, "grad_norm": 0.0, "learning_rate": 1.7933533402912354e-05, "loss": 1.0845, "step": 1861 }, { "epoch": 0.23225645503305475, "grad_norm": 0.0, "learning_rate": 1.7931073289925308e-05, "loss": 1.0859, "step": 1862 }, { "epoch": 0.23238118997131096, "grad_norm": 0.0, "learning_rate": 1.7928611882387157e-05, "loss": 1.0575, "step": 1863 }, { "epoch": 0.23250592490956717, "grad_norm": 0.0, "learning_rate": 1.792614918069967e-05, "loss": 1.0638, "step": 1864 }, { "epoch": 0.23263065984782338, "grad_norm": 0.0, "learning_rate": 1.792368518526482e-05, "loss": 1.0422, "step": 1865 }, { "epoch": 0.2327553947860796, "grad_norm": 0.0, "learning_rate": 1.7921219896484793e-05, "loss": 1.0571, "step": 1866 }, { "epoch": 0.23288012972433578, "grad_norm": 0.0, "learning_rate": 1.7918753314761987e-05, "loss": 1.0613, "step": 1867 }, { "epoch": 0.233004864662592, "grad_norm": 0.0, "learning_rate": 1.7916285440499005e-05, "loss": 1.0913, "step": 1868 }, { "epoch": 0.2331295996008482, "grad_norm": 0.0, "learning_rate": 1.7913816274098678e-05, "loss": 1.0973, "step": 1869 }, { "epoch": 0.2332543345391044, "grad_norm": 0.0, "learning_rate": 1.791134581596402e-05, "loss": 1.0691, "step": 1870 }, { "epoch": 0.23337906947736062, "grad_norm": 0.0, "learning_rate": 1.7908874066498286e-05, "loss": 0.9701, "step": 1871 }, { "epoch": 0.23350380441561683, "grad_norm": 0.0, "learning_rate": 1.7906401026104923e-05, "loss": 1.0584, "step": 1872 }, { "epoch": 0.233628539353873, "grad_norm": 0.0, "learning_rate": 1.7903926695187595e-05, "loss": 1.084, "step": 1873 }, { "epoch": 0.23375327429212922, "grad_norm": 0.0, "learning_rate": 1.7901451074150173e-05, "loss": 1.0734, "step": 1874 }, { "epoch": 0.23387800923038543, "grad_norm": 0.0, "learning_rate": 1.789897416339674e-05, "loss": 1.0825, "step": 1875 }, { "epoch": 0.23400274416864164, "grad_norm": 0.0, "learning_rate": 1.7896495963331588e-05, "loss": 1.033, "step": 1876 }, { "epoch": 0.23412747910689785, "grad_norm": 0.0, "learning_rate": 1.789401647435923e-05, "loss": 1.003, "step": 1877 }, { "epoch": 0.23425221404515406, "grad_norm": 0.0, "learning_rate": 1.7891535696884372e-05, "loss": 1.1092, "step": 1878 }, { "epoch": 0.23437694898341024, "grad_norm": 0.0, "learning_rate": 1.7889053631311947e-05, "loss": 1.1016, "step": 1879 }, { "epoch": 0.23450168392166645, "grad_norm": 0.0, "learning_rate": 1.7886570278047087e-05, "loss": 1.0772, "step": 1880 }, { "epoch": 0.23462641885992266, "grad_norm": 0.0, "learning_rate": 1.788408563749514e-05, "loss": 1.0347, "step": 1881 }, { "epoch": 0.23475115379817887, "grad_norm": 0.0, "learning_rate": 1.7881599710061657e-05, "loss": 1.0842, "step": 1882 }, { "epoch": 0.23487588873643508, "grad_norm": 0.0, "learning_rate": 1.7879112496152408e-05, "loss": 1.0596, "step": 1883 }, { "epoch": 0.2350006236746913, "grad_norm": 0.0, "learning_rate": 1.787662399617337e-05, "loss": 1.0836, "step": 1884 }, { "epoch": 0.23512535861294748, "grad_norm": 0.0, "learning_rate": 1.7874134210530725e-05, "loss": 1.0551, "step": 1885 }, { "epoch": 0.2352500935512037, "grad_norm": 0.0, "learning_rate": 1.7871643139630872e-05, "loss": 1.0454, "step": 1886 }, { "epoch": 0.2353748284894599, "grad_norm": 0.0, "learning_rate": 1.7869150783880415e-05, "loss": 1.0806, "step": 1887 }, { "epoch": 0.2354995634277161, "grad_norm": 0.0, "learning_rate": 1.786665714368617e-05, "loss": 1.0546, "step": 1888 }, { "epoch": 0.23562429836597232, "grad_norm": 0.0, "learning_rate": 1.7864162219455164e-05, "loss": 1.0266, "step": 1889 }, { "epoch": 0.23574903330422853, "grad_norm": 0.0, "learning_rate": 1.786166601159463e-05, "loss": 1.019, "step": 1890 }, { "epoch": 0.2358737682424847, "grad_norm": 0.0, "learning_rate": 1.7859168520512008e-05, "loss": 1.0535, "step": 1891 }, { "epoch": 0.23599850318074092, "grad_norm": 0.0, "learning_rate": 1.7856669746614955e-05, "loss": 1.0523, "step": 1892 }, { "epoch": 0.23612323811899713, "grad_norm": 0.0, "learning_rate": 1.7854169690311338e-05, "loss": 1.0672, "step": 1893 }, { "epoch": 0.23624797305725334, "grad_norm": 0.0, "learning_rate": 1.785166835200922e-05, "loss": 1.0343, "step": 1894 }, { "epoch": 0.23637270799550955, "grad_norm": 0.0, "learning_rate": 1.784916573211689e-05, "loss": 1.0745, "step": 1895 }, { "epoch": 0.23649744293376576, "grad_norm": 0.0, "learning_rate": 1.784666183104284e-05, "loss": 1.0105, "step": 1896 }, { "epoch": 0.23662217787202194, "grad_norm": 0.0, "learning_rate": 1.784415664919576e-05, "loss": 1.0205, "step": 1897 }, { "epoch": 0.23674691281027815, "grad_norm": 0.0, "learning_rate": 1.7841650186984565e-05, "loss": 1.0307, "step": 1898 }, { "epoch": 0.23687164774853436, "grad_norm": 0.0, "learning_rate": 1.7839142444818375e-05, "loss": 1.1057, "step": 1899 }, { "epoch": 0.23699638268679057, "grad_norm": 0.0, "learning_rate": 1.7836633423106515e-05, "loss": 1.0036, "step": 1900 }, { "epoch": 0.23712111762504678, "grad_norm": 0.0, "learning_rate": 1.783412312225852e-05, "loss": 1.0777, "step": 1901 }, { "epoch": 0.237245852563303, "grad_norm": 0.0, "learning_rate": 1.783161154268413e-05, "loss": 1.113, "step": 1902 }, { "epoch": 0.23737058750155918, "grad_norm": 0.0, "learning_rate": 1.7829098684793303e-05, "loss": 1.0341, "step": 1903 }, { "epoch": 0.2374953224398154, "grad_norm": 0.0, "learning_rate": 1.7826584548996205e-05, "loss": 1.1264, "step": 1904 }, { "epoch": 0.2376200573780716, "grad_norm": 0.0, "learning_rate": 1.78240691357032e-05, "loss": 1.0705, "step": 1905 }, { "epoch": 0.2377447923163278, "grad_norm": 0.0, "learning_rate": 1.782155244532487e-05, "loss": 1.0731, "step": 1906 }, { "epoch": 0.23786952725458402, "grad_norm": 0.0, "learning_rate": 1.7819034478271993e-05, "loss": 1.0722, "step": 1907 }, { "epoch": 0.23799426219284023, "grad_norm": 0.0, "learning_rate": 1.7816515234955578e-05, "loss": 1.0335, "step": 1908 }, { "epoch": 0.2381189971310964, "grad_norm": 0.0, "learning_rate": 1.7813994715786825e-05, "loss": 1.0423, "step": 1909 }, { "epoch": 0.23824373206935262, "grad_norm": 0.0, "learning_rate": 1.7811472921177143e-05, "loss": 1.0686, "step": 1910 }, { "epoch": 0.23836846700760883, "grad_norm": 0.0, "learning_rate": 1.7808949851538152e-05, "loss": 1.0398, "step": 1911 }, { "epoch": 0.23849320194586504, "grad_norm": 0.0, "learning_rate": 1.7806425507281684e-05, "loss": 1.046, "step": 1912 }, { "epoch": 0.23861793688412125, "grad_norm": 0.0, "learning_rate": 1.7803899888819777e-05, "loss": 1.0593, "step": 1913 }, { "epoch": 0.23874267182237746, "grad_norm": 0.0, "learning_rate": 1.780137299656467e-05, "loss": 1.0556, "step": 1914 }, { "epoch": 0.23886740676063364, "grad_norm": 0.0, "learning_rate": 1.7798844830928818e-05, "loss": 1.0759, "step": 1915 }, { "epoch": 0.23899214169888985, "grad_norm": 0.0, "learning_rate": 1.7796315392324884e-05, "loss": 1.0449, "step": 1916 }, { "epoch": 0.23911687663714606, "grad_norm": 0.0, "learning_rate": 1.7793784681165734e-05, "loss": 1.0227, "step": 1917 }, { "epoch": 0.23924161157540227, "grad_norm": 0.0, "learning_rate": 1.7791252697864444e-05, "loss": 1.0389, "step": 1918 }, { "epoch": 0.23936634651365848, "grad_norm": 0.0, "learning_rate": 1.7788719442834293e-05, "loss": 0.991, "step": 1919 }, { "epoch": 0.2394910814519147, "grad_norm": 0.0, "learning_rate": 1.7786184916488784e-05, "loss": 1.0585, "step": 1920 }, { "epoch": 0.23961581639017088, "grad_norm": 0.0, "learning_rate": 1.7783649119241603e-05, "loss": 1.0001, "step": 1921 }, { "epoch": 0.2397405513284271, "grad_norm": 0.0, "learning_rate": 1.778111205150666e-05, "loss": 1.0545, "step": 1922 }, { "epoch": 0.2398652862666833, "grad_norm": 0.0, "learning_rate": 1.777857371369807e-05, "loss": 1.0389, "step": 1923 }, { "epoch": 0.2399900212049395, "grad_norm": 0.0, "learning_rate": 1.7776034106230156e-05, "loss": 1.0438, "step": 1924 }, { "epoch": 0.24011475614319572, "grad_norm": 0.0, "learning_rate": 1.777349322951744e-05, "loss": 1.0849, "step": 1925 }, { "epoch": 0.24023949108145193, "grad_norm": 0.0, "learning_rate": 1.7770951083974662e-05, "loss": 1.0171, "step": 1926 }, { "epoch": 0.2403642260197081, "grad_norm": 0.0, "learning_rate": 1.7768407670016766e-05, "loss": 1.0455, "step": 1927 }, { "epoch": 0.24048896095796432, "grad_norm": 0.0, "learning_rate": 1.7765862988058893e-05, "loss": 1.0357, "step": 1928 }, { "epoch": 0.24061369589622053, "grad_norm": 0.0, "learning_rate": 1.7763317038516407e-05, "loss": 1.0214, "step": 1929 }, { "epoch": 0.24073843083447674, "grad_norm": 0.0, "learning_rate": 1.7760769821804867e-05, "loss": 0.9917, "step": 1930 }, { "epoch": 0.24086316577273295, "grad_norm": 0.0, "learning_rate": 1.7758221338340043e-05, "loss": 1.009, "step": 1931 }, { "epoch": 0.24098790071098916, "grad_norm": 0.0, "learning_rate": 1.7755671588537913e-05, "loss": 1.0064, "step": 1932 }, { "epoch": 0.24111263564924534, "grad_norm": 0.0, "learning_rate": 1.775312057281466e-05, "loss": 1.0238, "step": 1933 }, { "epoch": 0.24123737058750155, "grad_norm": 0.0, "learning_rate": 1.7750568291586676e-05, "loss": 1.0815, "step": 1934 }, { "epoch": 0.24136210552575776, "grad_norm": 0.0, "learning_rate": 1.7748014745270556e-05, "loss": 1.0509, "step": 1935 }, { "epoch": 0.24148684046401397, "grad_norm": 0.0, "learning_rate": 1.7745459934283103e-05, "loss": 1.0606, "step": 1936 }, { "epoch": 0.24161157540227018, "grad_norm": 0.0, "learning_rate": 1.7742903859041324e-05, "loss": 1.0353, "step": 1937 }, { "epoch": 0.2417363103405264, "grad_norm": 0.0, "learning_rate": 1.7740346519962443e-05, "loss": 1.0705, "step": 1938 }, { "epoch": 0.24186104527878258, "grad_norm": 0.0, "learning_rate": 1.7737787917463875e-05, "loss": 1.04, "step": 1939 }, { "epoch": 0.2419857802170388, "grad_norm": 0.0, "learning_rate": 1.7735228051963248e-05, "loss": 1.0318, "step": 1940 }, { "epoch": 0.242110515155295, "grad_norm": 0.0, "learning_rate": 1.77326669238784e-05, "loss": 1.0293, "step": 1941 }, { "epoch": 0.2422352500935512, "grad_norm": 0.0, "learning_rate": 1.773010453362737e-05, "loss": 1.0164, "step": 1942 }, { "epoch": 0.24235998503180742, "grad_norm": 0.0, "learning_rate": 1.7727540881628407e-05, "loss": 1.0253, "step": 1943 }, { "epoch": 0.24248471997006363, "grad_norm": 0.0, "learning_rate": 1.7724975968299963e-05, "loss": 1.0181, "step": 1944 }, { "epoch": 0.2426094549083198, "grad_norm": 0.0, "learning_rate": 1.7722409794060692e-05, "loss": 1.0122, "step": 1945 }, { "epoch": 0.24273418984657602, "grad_norm": 0.0, "learning_rate": 1.7719842359329463e-05, "loss": 1.0059, "step": 1946 }, { "epoch": 0.24285892478483223, "grad_norm": 0.0, "learning_rate": 1.7717273664525347e-05, "loss": 1.0058, "step": 1947 }, { "epoch": 0.24298365972308844, "grad_norm": 0.0, "learning_rate": 1.7714703710067615e-05, "loss": 1.0389, "step": 1948 }, { "epoch": 0.24310839466134465, "grad_norm": 0.0, "learning_rate": 1.771213249637575e-05, "loss": 1.1012, "step": 1949 }, { "epoch": 0.24323312959960086, "grad_norm": 0.0, "learning_rate": 1.7709560023869444e-05, "loss": 1.0652, "step": 1950 }, { "epoch": 0.24335786453785704, "grad_norm": 0.0, "learning_rate": 1.770698629296858e-05, "loss": 1.0384, "step": 1951 }, { "epoch": 0.24348259947611325, "grad_norm": 0.0, "learning_rate": 1.7704411304093267e-05, "loss": 1.0513, "step": 1952 }, { "epoch": 0.24360733441436946, "grad_norm": 0.0, "learning_rate": 1.77018350576638e-05, "loss": 1.0316, "step": 1953 }, { "epoch": 0.24373206935262567, "grad_norm": 0.0, "learning_rate": 1.7699257554100688e-05, "loss": 1.0608, "step": 1954 }, { "epoch": 0.24385680429088188, "grad_norm": 0.0, "learning_rate": 1.7696678793824644e-05, "loss": 1.0107, "step": 1955 }, { "epoch": 0.2439815392291381, "grad_norm": 0.0, "learning_rate": 1.769409877725659e-05, "loss": 1.0363, "step": 1956 }, { "epoch": 0.24410627416739428, "grad_norm": 0.0, "learning_rate": 1.7691517504817652e-05, "loss": 1.079, "step": 1957 }, { "epoch": 0.2442310091056505, "grad_norm": 0.0, "learning_rate": 1.7688934976929152e-05, "loss": 1.0403, "step": 1958 }, { "epoch": 0.2443557440439067, "grad_norm": 0.0, "learning_rate": 1.768635119401263e-05, "loss": 1.0248, "step": 1959 }, { "epoch": 0.2444804789821629, "grad_norm": 0.0, "learning_rate": 1.7683766156489817e-05, "loss": 1.0669, "step": 1960 }, { "epoch": 0.24460521392041912, "grad_norm": 0.0, "learning_rate": 1.768117986478266e-05, "loss": 1.0017, "step": 1961 }, { "epoch": 0.24472994885867533, "grad_norm": 0.0, "learning_rate": 1.7678592319313313e-05, "loss": 1.0614, "step": 1962 }, { "epoch": 0.2448546837969315, "grad_norm": 0.0, "learning_rate": 1.7676003520504117e-05, "loss": 1.0339, "step": 1963 }, { "epoch": 0.24497941873518772, "grad_norm": 0.0, "learning_rate": 1.767341346877764e-05, "loss": 1.0492, "step": 1964 }, { "epoch": 0.24510415367344393, "grad_norm": 0.0, "learning_rate": 1.7670822164556642e-05, "loss": 1.0699, "step": 1965 }, { "epoch": 0.24522888861170014, "grad_norm": 0.0, "learning_rate": 1.7668229608264077e-05, "loss": 1.0203, "step": 1966 }, { "epoch": 0.24535362354995635, "grad_norm": 0.0, "learning_rate": 1.7665635800323134e-05, "loss": 1.0448, "step": 1967 }, { "epoch": 0.24547835848821256, "grad_norm": 0.0, "learning_rate": 1.7663040741157172e-05, "loss": 1.0418, "step": 1968 }, { "epoch": 0.24560309342646874, "grad_norm": 0.0, "learning_rate": 1.766044443118978e-05, "loss": 1.0342, "step": 1969 }, { "epoch": 0.24572782836472495, "grad_norm": 0.0, "learning_rate": 1.765784687084474e-05, "loss": 1.0018, "step": 1970 }, { "epoch": 0.24585256330298116, "grad_norm": 0.0, "learning_rate": 1.765524806054604e-05, "loss": 1.0264, "step": 1971 }, { "epoch": 0.24597729824123737, "grad_norm": 0.0, "learning_rate": 1.765264800071786e-05, "loss": 1.0605, "step": 1972 }, { "epoch": 0.24610203317949358, "grad_norm": 0.0, "learning_rate": 1.765004669178461e-05, "loss": 1.0491, "step": 1973 }, { "epoch": 0.2462267681177498, "grad_norm": 0.0, "learning_rate": 1.7647444134170883e-05, "loss": 1.0637, "step": 1974 }, { "epoch": 0.24635150305600598, "grad_norm": 0.0, "learning_rate": 1.764484032830148e-05, "loss": 1.0332, "step": 1975 }, { "epoch": 0.2464762379942622, "grad_norm": 0.0, "learning_rate": 1.7642235274601408e-05, "loss": 1.0574, "step": 1976 }, { "epoch": 0.2466009729325184, "grad_norm": 0.0, "learning_rate": 1.7639628973495884e-05, "loss": 0.9905, "step": 1977 }, { "epoch": 0.2467257078707746, "grad_norm": 0.0, "learning_rate": 1.763702142541032e-05, "loss": 1.0384, "step": 1978 }, { "epoch": 0.24685044280903082, "grad_norm": 0.0, "learning_rate": 1.7634412630770326e-05, "loss": 1.0585, "step": 1979 }, { "epoch": 0.24697517774728703, "grad_norm": 0.0, "learning_rate": 1.7631802590001726e-05, "loss": 1.0838, "step": 1980 }, { "epoch": 0.2470999126855432, "grad_norm": 0.0, "learning_rate": 1.7629191303530553e-05, "loss": 1.0504, "step": 1981 }, { "epoch": 0.24722464762379942, "grad_norm": 0.0, "learning_rate": 1.7626578771783026e-05, "loss": 1.0369, "step": 1982 }, { "epoch": 0.24734938256205563, "grad_norm": 0.0, "learning_rate": 1.762396499518558e-05, "loss": 1.0474, "step": 1983 }, { "epoch": 0.24747411750031184, "grad_norm": 0.0, "learning_rate": 1.762134997416484e-05, "loss": 1.0356, "step": 1984 }, { "epoch": 0.24759885243856805, "grad_norm": 0.0, "learning_rate": 1.761873370914766e-05, "loss": 1.105, "step": 1985 }, { "epoch": 0.24772358737682426, "grad_norm": 0.0, "learning_rate": 1.761611620056107e-05, "loss": 1.0509, "step": 1986 }, { "epoch": 0.24784832231508044, "grad_norm": 0.0, "learning_rate": 1.761349744883231e-05, "loss": 1.0278, "step": 1987 }, { "epoch": 0.24797305725333665, "grad_norm": 0.0, "learning_rate": 1.761087745438884e-05, "loss": 1.077, "step": 1988 }, { "epoch": 0.24809779219159286, "grad_norm": 0.0, "learning_rate": 1.7608256217658294e-05, "loss": 1.0538, "step": 1989 }, { "epoch": 0.24822252712984907, "grad_norm": 0.0, "learning_rate": 1.7605633739068532e-05, "loss": 1.0074, "step": 1990 }, { "epoch": 0.24834726206810528, "grad_norm": 0.0, "learning_rate": 1.7603010019047607e-05, "loss": 1.0436, "step": 1991 }, { "epoch": 0.2484719970063615, "grad_norm": 0.0, "learning_rate": 1.7600385058023774e-05, "loss": 1.0406, "step": 1992 }, { "epoch": 0.24859673194461768, "grad_norm": 0.0, "learning_rate": 1.7597758856425497e-05, "loss": 1.0258, "step": 1993 }, { "epoch": 0.2487214668828739, "grad_norm": 0.0, "learning_rate": 1.7595131414681434e-05, "loss": 1.0504, "step": 1994 }, { "epoch": 0.2488462018211301, "grad_norm": 0.0, "learning_rate": 1.7592502733220454e-05, "loss": 1.0988, "step": 1995 }, { "epoch": 0.2489709367593863, "grad_norm": 0.0, "learning_rate": 1.758987281247162e-05, "loss": 0.9772, "step": 1996 }, { "epoch": 0.24909567169764252, "grad_norm": 0.0, "learning_rate": 1.7587241652864206e-05, "loss": 1.0398, "step": 1997 }, { "epoch": 0.24922040663589873, "grad_norm": 0.0, "learning_rate": 1.758460925482768e-05, "loss": 1.0791, "step": 1998 }, { "epoch": 0.2493451415741549, "grad_norm": 0.0, "learning_rate": 1.7581975618791714e-05, "loss": 1.0831, "step": 1999 }, { "epoch": 0.24946987651241112, "grad_norm": 0.0, "learning_rate": 1.757934074518619e-05, "loss": 1.0541, "step": 2000 }, { "epoch": 0.24959461145066733, "grad_norm": 0.0, "learning_rate": 1.757670463444118e-05, "loss": 1.0402, "step": 2001 }, { "epoch": 0.24971934638892354, "grad_norm": 0.0, "learning_rate": 1.7574067286986968e-05, "loss": 1.0178, "step": 2002 }, { "epoch": 0.24984408132717975, "grad_norm": 0.0, "learning_rate": 1.7571428703254033e-05, "loss": 1.0368, "step": 2003 }, { "epoch": 0.24996881626543596, "grad_norm": 0.0, "learning_rate": 1.756878888367306e-05, "loss": 0.9941, "step": 2004 }, { "epoch": 0.25009355120369214, "grad_norm": 0.0, "learning_rate": 1.756614782867493e-05, "loss": 1.0632, "step": 2005 }, { "epoch": 0.25021828614194835, "grad_norm": 0.0, "learning_rate": 1.7563505538690734e-05, "loss": 1.05, "step": 2006 }, { "epoch": 0.25034302108020456, "grad_norm": 0.0, "learning_rate": 1.756086201415176e-05, "loss": 1.0007, "step": 2007 }, { "epoch": 0.2504677560184608, "grad_norm": 0.0, "learning_rate": 1.7558217255489498e-05, "loss": 1.1402, "step": 2008 }, { "epoch": 0.250592490956717, "grad_norm": 0.0, "learning_rate": 1.755557126313564e-05, "loss": 1.0883, "step": 2009 }, { "epoch": 0.2507172258949732, "grad_norm": 0.0, "learning_rate": 1.7552924037522074e-05, "loss": 1.0716, "step": 2010 }, { "epoch": 0.2508419608332294, "grad_norm": 0.0, "learning_rate": 1.7550275579080893e-05, "loss": 1.0205, "step": 2011 }, { "epoch": 0.2509666957714856, "grad_norm": 0.0, "learning_rate": 1.7547625888244403e-05, "loss": 1.0099, "step": 2012 }, { "epoch": 0.2510914307097418, "grad_norm": 0.0, "learning_rate": 1.7544974965445088e-05, "loss": 1.0875, "step": 2013 }, { "epoch": 0.251216165647998, "grad_norm": 0.0, "learning_rate": 1.7542322811115657e-05, "loss": 1.0384, "step": 2014 }, { "epoch": 0.2513409005862542, "grad_norm": 0.0, "learning_rate": 1.7539669425688997e-05, "loss": 1.0665, "step": 2015 }, { "epoch": 0.2514656355245104, "grad_norm": 0.0, "learning_rate": 1.7537014809598214e-05, "loss": 0.9946, "step": 2016 }, { "epoch": 0.2515903704627666, "grad_norm": 0.0, "learning_rate": 1.7534358963276606e-05, "loss": 1.0807, "step": 2017 }, { "epoch": 0.2517151054010228, "grad_norm": 0.0, "learning_rate": 1.753170188715768e-05, "loss": 1.0503, "step": 2018 }, { "epoch": 0.25183984033927903, "grad_norm": 0.0, "learning_rate": 1.752904358167513e-05, "loss": 1.0395, "step": 2019 }, { "epoch": 0.25196457527753524, "grad_norm": 0.0, "learning_rate": 1.752638404726286e-05, "loss": 1.0218, "step": 2020 }, { "epoch": 0.25208931021579145, "grad_norm": 0.0, "learning_rate": 1.7523723284354977e-05, "loss": 1.0505, "step": 2021 }, { "epoch": 0.25221404515404766, "grad_norm": 0.0, "learning_rate": 1.752106129338578e-05, "loss": 1.1192, "step": 2022 }, { "epoch": 0.25233878009230387, "grad_norm": 0.0, "learning_rate": 1.7518398074789776e-05, "loss": 1.0198, "step": 2023 }, { "epoch": 0.2524635150305601, "grad_norm": 0.0, "learning_rate": 1.751573362900167e-05, "loss": 1.0149, "step": 2024 }, { "epoch": 0.2525882499688163, "grad_norm": 0.0, "learning_rate": 1.751306795645636e-05, "loss": 1.0477, "step": 2025 }, { "epoch": 0.25271298490707245, "grad_norm": 0.0, "learning_rate": 1.751040105758896e-05, "loss": 1.0708, "step": 2026 }, { "epoch": 0.25283771984532866, "grad_norm": 0.0, "learning_rate": 1.750773293283477e-05, "loss": 1.0599, "step": 2027 }, { "epoch": 0.25296245478358487, "grad_norm": 0.0, "learning_rate": 1.7505063582629297e-05, "loss": 1.0773, "step": 2028 }, { "epoch": 0.2530871897218411, "grad_norm": 0.0, "learning_rate": 1.7502393007408246e-05, "loss": 1.0446, "step": 2029 }, { "epoch": 0.2532119246600973, "grad_norm": 0.0, "learning_rate": 1.7499721207607523e-05, "loss": 1.0508, "step": 2030 }, { "epoch": 0.2533366595983535, "grad_norm": 0.0, "learning_rate": 1.749704818366323e-05, "loss": 1.054, "step": 2031 }, { "epoch": 0.2534613945366097, "grad_norm": 0.0, "learning_rate": 1.7494373936011674e-05, "loss": 1.0502, "step": 2032 }, { "epoch": 0.2535861294748659, "grad_norm": 0.0, "learning_rate": 1.749169846508936e-05, "loss": 1.0714, "step": 2033 }, { "epoch": 0.25371086441312213, "grad_norm": 0.0, "learning_rate": 1.7489021771332995e-05, "loss": 1.0472, "step": 2034 }, { "epoch": 0.25383559935137834, "grad_norm": 0.0, "learning_rate": 1.7486343855179476e-05, "loss": 1.0397, "step": 2035 }, { "epoch": 0.25396033428963455, "grad_norm": 0.0, "learning_rate": 1.748366471706591e-05, "loss": 1.0257, "step": 2036 }, { "epoch": 0.25408506922789076, "grad_norm": 0.0, "learning_rate": 1.74809843574296e-05, "loss": 1.0556, "step": 2037 }, { "epoch": 0.2542098041661469, "grad_norm": 0.0, "learning_rate": 1.747830277670805e-05, "loss": 1.0969, "step": 2038 }, { "epoch": 0.2543345391044031, "grad_norm": 0.0, "learning_rate": 1.747561997533896e-05, "loss": 1.0175, "step": 2039 }, { "epoch": 0.25445927404265933, "grad_norm": 0.0, "learning_rate": 1.747293595376023e-05, "loss": 1.0243, "step": 2040 }, { "epoch": 0.25458400898091554, "grad_norm": 0.0, "learning_rate": 1.7470250712409963e-05, "loss": 1.0682, "step": 2041 }, { "epoch": 0.25470874391917175, "grad_norm": 0.0, "learning_rate": 1.7467564251726452e-05, "loss": 1.0456, "step": 2042 }, { "epoch": 0.25483347885742796, "grad_norm": 0.0, "learning_rate": 1.74648765721482e-05, "loss": 1.0018, "step": 2043 }, { "epoch": 0.2549582137956842, "grad_norm": 0.0, "learning_rate": 1.7462187674113903e-05, "loss": 1.021, "step": 2044 }, { "epoch": 0.2550829487339404, "grad_norm": 0.0, "learning_rate": 1.7459497558062456e-05, "loss": 1.0639, "step": 2045 }, { "epoch": 0.2552076836721966, "grad_norm": 0.0, "learning_rate": 1.745680622443296e-05, "loss": 1.095, "step": 2046 }, { "epoch": 0.2553324186104528, "grad_norm": 0.0, "learning_rate": 1.7454113673664695e-05, "loss": 1.0722, "step": 2047 }, { "epoch": 0.255457153548709, "grad_norm": 0.0, "learning_rate": 1.7451419906197168e-05, "loss": 1.018, "step": 2048 }, { "epoch": 0.2555818884869652, "grad_norm": 0.0, "learning_rate": 1.744872492247006e-05, "loss": 1.0902, "step": 2049 }, { "epoch": 0.2557066234252214, "grad_norm": 0.0, "learning_rate": 1.7446028722923266e-05, "loss": 1.0073, "step": 2050 }, { "epoch": 0.2558313583634776, "grad_norm": 0.0, "learning_rate": 1.7443331307996868e-05, "loss": 1.0358, "step": 2051 }, { "epoch": 0.2559560933017338, "grad_norm": 0.0, "learning_rate": 1.7440632678131157e-05, "loss": 1.0642, "step": 2052 }, { "epoch": 0.25608082823999, "grad_norm": 0.0, "learning_rate": 1.7437932833766614e-05, "loss": 0.9788, "step": 2053 }, { "epoch": 0.2562055631782462, "grad_norm": 0.0, "learning_rate": 1.7435231775343924e-05, "loss": 1.0323, "step": 2054 }, { "epoch": 0.25633029811650243, "grad_norm": 0.0, "learning_rate": 1.7432529503303967e-05, "loss": 1.0799, "step": 2055 }, { "epoch": 0.25645503305475864, "grad_norm": 0.0, "learning_rate": 1.742982601808782e-05, "loss": 1.0148, "step": 2056 }, { "epoch": 0.25657976799301485, "grad_norm": 0.0, "learning_rate": 1.7427121320136765e-05, "loss": 1.08, "step": 2057 }, { "epoch": 0.25670450293127106, "grad_norm": 0.0, "learning_rate": 1.7424415409892274e-05, "loss": 1.0112, "step": 2058 }, { "epoch": 0.25682923786952727, "grad_norm": 0.0, "learning_rate": 1.7421708287796017e-05, "loss": 1.045, "step": 2059 }, { "epoch": 0.2569539728077835, "grad_norm": 0.0, "learning_rate": 1.741899995428987e-05, "loss": 1.0506, "step": 2060 }, { "epoch": 0.2570787077460397, "grad_norm": 0.0, "learning_rate": 1.74162904098159e-05, "loss": 0.986, "step": 2061 }, { "epoch": 0.25720344268429585, "grad_norm": 0.0, "learning_rate": 1.741357965481637e-05, "loss": 1.0139, "step": 2062 }, { "epoch": 0.25732817762255206, "grad_norm": 0.0, "learning_rate": 1.741086768973374e-05, "loss": 1.0351, "step": 2063 }, { "epoch": 0.25745291256080827, "grad_norm": 0.0, "learning_rate": 1.7408154515010684e-05, "loss": 0.9952, "step": 2064 }, { "epoch": 0.2575776474990645, "grad_norm": 0.0, "learning_rate": 1.740544013109005e-05, "loss": 0.9996, "step": 2065 }, { "epoch": 0.2577023824373207, "grad_norm": 0.0, "learning_rate": 1.7402724538414895e-05, "loss": 1.0306, "step": 2066 }, { "epoch": 0.2578271173755769, "grad_norm": 0.0, "learning_rate": 1.740000773742847e-05, "loss": 1.0632, "step": 2067 }, { "epoch": 0.2579518523138331, "grad_norm": 0.0, "learning_rate": 1.7397289728574234e-05, "loss": 1.0592, "step": 2068 }, { "epoch": 0.2580765872520893, "grad_norm": 0.0, "learning_rate": 1.7394570512295832e-05, "loss": 1.0702, "step": 2069 }, { "epoch": 0.25820132219034553, "grad_norm": 0.0, "learning_rate": 1.73918500890371e-05, "loss": 0.9931, "step": 2070 }, { "epoch": 0.25832605712860174, "grad_norm": 0.0, "learning_rate": 1.738912845924209e-05, "loss": 1.0194, "step": 2071 }, { "epoch": 0.25845079206685795, "grad_norm": 0.0, "learning_rate": 1.738640562335504e-05, "loss": 1.0778, "step": 2072 }, { "epoch": 0.25857552700511416, "grad_norm": 0.0, "learning_rate": 1.7383681581820374e-05, "loss": 1.0613, "step": 2073 }, { "epoch": 0.2587002619433703, "grad_norm": 0.0, "learning_rate": 1.7380956335082735e-05, "loss": 1.0577, "step": 2074 }, { "epoch": 0.2588249968816265, "grad_norm": 0.0, "learning_rate": 1.737822988358695e-05, "loss": 0.9935, "step": 2075 }, { "epoch": 0.25894973181988273, "grad_norm": 0.0, "learning_rate": 1.7375502227778044e-05, "loss": 0.9846, "step": 2076 }, { "epoch": 0.25907446675813894, "grad_norm": 0.0, "learning_rate": 1.737277336810124e-05, "loss": 1.0452, "step": 2077 }, { "epoch": 0.25919920169639515, "grad_norm": 0.0, "learning_rate": 1.7370043305001957e-05, "loss": 1.073, "step": 2078 }, { "epoch": 0.25932393663465136, "grad_norm": 0.0, "learning_rate": 1.7367312038925804e-05, "loss": 1.0515, "step": 2079 }, { "epoch": 0.2594486715729076, "grad_norm": 0.0, "learning_rate": 1.7364579570318602e-05, "loss": 1.0362, "step": 2080 }, { "epoch": 0.2595734065111638, "grad_norm": 0.0, "learning_rate": 1.7361845899626355e-05, "loss": 0.9715, "step": 2081 }, { "epoch": 0.25969814144942, "grad_norm": 0.0, "learning_rate": 1.7359111027295262e-05, "loss": 1.0001, "step": 2082 }, { "epoch": 0.2598228763876762, "grad_norm": 0.0, "learning_rate": 1.7356374953771727e-05, "loss": 0.9842, "step": 2083 }, { "epoch": 0.2599476113259324, "grad_norm": 0.0, "learning_rate": 1.735363767950235e-05, "loss": 1.0044, "step": 2084 }, { "epoch": 0.2600723462641886, "grad_norm": 0.0, "learning_rate": 1.7350899204933917e-05, "loss": 1.041, "step": 2085 }, { "epoch": 0.2601970812024448, "grad_norm": 0.0, "learning_rate": 1.7348159530513424e-05, "loss": 1.0394, "step": 2086 }, { "epoch": 0.260321816140701, "grad_norm": 0.0, "learning_rate": 1.7345418656688043e-05, "loss": 0.9928, "step": 2087 }, { "epoch": 0.2604465510789572, "grad_norm": 0.0, "learning_rate": 1.734267658390516e-05, "loss": 1.0208, "step": 2088 }, { "epoch": 0.2605712860172134, "grad_norm": 0.0, "learning_rate": 1.7339933312612353e-05, "loss": 1.0699, "step": 2089 }, { "epoch": 0.2606960209554696, "grad_norm": 0.0, "learning_rate": 1.733718884325739e-05, "loss": 1.0412, "step": 2090 }, { "epoch": 0.26082075589372583, "grad_norm": 0.0, "learning_rate": 1.7334443176288243e-05, "loss": 1.0697, "step": 2091 }, { "epoch": 0.26094549083198204, "grad_norm": 0.0, "learning_rate": 1.7331696312153062e-05, "loss": 1.078, "step": 2092 }, { "epoch": 0.26107022577023825, "grad_norm": 0.0, "learning_rate": 1.7328948251300216e-05, "loss": 1.0429, "step": 2093 }, { "epoch": 0.26119496070849446, "grad_norm": 0.0, "learning_rate": 1.7326198994178255e-05, "loss": 1.0895, "step": 2094 }, { "epoch": 0.26131969564675067, "grad_norm": 0.0, "learning_rate": 1.7323448541235922e-05, "loss": 1.0625, "step": 2095 }, { "epoch": 0.2614444305850069, "grad_norm": 0.0, "learning_rate": 1.7320696892922164e-05, "loss": 1.1006, "step": 2096 }, { "epoch": 0.2615691655232631, "grad_norm": 0.0, "learning_rate": 1.7317944049686125e-05, "loss": 1.0483, "step": 2097 }, { "epoch": 0.26169390046151925, "grad_norm": 0.0, "learning_rate": 1.7315190011977126e-05, "loss": 1.0591, "step": 2098 }, { "epoch": 0.26181863539977546, "grad_norm": 0.0, "learning_rate": 1.7312434780244704e-05, "loss": 1.0325, "step": 2099 }, { "epoch": 0.26194337033803167, "grad_norm": 0.0, "learning_rate": 1.7309678354938583e-05, "loss": 1.021, "step": 2100 }, { "epoch": 0.2620681052762879, "grad_norm": 0.0, "learning_rate": 1.7306920736508676e-05, "loss": 0.9987, "step": 2101 }, { "epoch": 0.2621928402145441, "grad_norm": 0.0, "learning_rate": 1.7304161925405097e-05, "loss": 1.0558, "step": 2102 }, { "epoch": 0.2623175751528003, "grad_norm": 0.0, "learning_rate": 1.7301401922078162e-05, "loss": 1.081, "step": 2103 }, { "epoch": 0.2624423100910565, "grad_norm": 0.0, "learning_rate": 1.7298640726978357e-05, "loss": 1.0436, "step": 2104 }, { "epoch": 0.2625670450293127, "grad_norm": 0.0, "learning_rate": 1.7295878340556394e-05, "loss": 1.0958, "step": 2105 }, { "epoch": 0.26269177996756893, "grad_norm": 0.0, "learning_rate": 1.7293114763263154e-05, "loss": 1.0205, "step": 2106 }, { "epoch": 0.26281651490582514, "grad_norm": 0.0, "learning_rate": 1.7290349995549726e-05, "loss": 0.9927, "step": 2107 }, { "epoch": 0.26294124984408135, "grad_norm": 0.0, "learning_rate": 1.7287584037867394e-05, "loss": 1.0033, "step": 2108 }, { "epoch": 0.26306598478233756, "grad_norm": 0.0, "learning_rate": 1.7284816890667627e-05, "loss": 1.0375, "step": 2109 }, { "epoch": 0.2631907197205937, "grad_norm": 0.0, "learning_rate": 1.7282048554402095e-05, "loss": 1.0207, "step": 2110 }, { "epoch": 0.2633154546588499, "grad_norm": 0.0, "learning_rate": 1.7279279029522654e-05, "loss": 1.0704, "step": 2111 }, { "epoch": 0.26344018959710613, "grad_norm": 0.0, "learning_rate": 1.7276508316481367e-05, "loss": 0.9905, "step": 2112 }, { "epoch": 0.26356492453536234, "grad_norm": 0.0, "learning_rate": 1.7273736415730488e-05, "loss": 1.029, "step": 2113 }, { "epoch": 0.26368965947361855, "grad_norm": 0.0, "learning_rate": 1.7270963327722455e-05, "loss": 1.0445, "step": 2114 }, { "epoch": 0.26381439441187476, "grad_norm": 0.0, "learning_rate": 1.7268189052909905e-05, "loss": 1.0506, "step": 2115 }, { "epoch": 0.263939129350131, "grad_norm": 0.0, "learning_rate": 1.726541359174567e-05, "loss": 1.0249, "step": 2116 }, { "epoch": 0.2640638642883872, "grad_norm": 0.0, "learning_rate": 1.726263694468278e-05, "loss": 1.043, "step": 2117 }, { "epoch": 0.2641885992266434, "grad_norm": 0.0, "learning_rate": 1.7259859112174452e-05, "loss": 1.0287, "step": 2118 }, { "epoch": 0.2643133341648996, "grad_norm": 0.0, "learning_rate": 1.7257080094674092e-05, "loss": 1.0448, "step": 2119 }, { "epoch": 0.2644380691031558, "grad_norm": 0.0, "learning_rate": 1.7254299892635313e-05, "loss": 1.0267, "step": 2120 }, { "epoch": 0.264562804041412, "grad_norm": 0.0, "learning_rate": 1.7251518506511914e-05, "loss": 1.0485, "step": 2121 }, { "epoch": 0.2646875389796682, "grad_norm": 0.0, "learning_rate": 1.7248735936757882e-05, "loss": 1.0694, "step": 2122 }, { "epoch": 0.2648122739179244, "grad_norm": 0.0, "learning_rate": 1.7245952183827403e-05, "loss": 1.0693, "step": 2123 }, { "epoch": 0.2649370088561806, "grad_norm": 0.0, "learning_rate": 1.724316724817486e-05, "loss": 1.0311, "step": 2124 }, { "epoch": 0.2650617437944368, "grad_norm": 0.0, "learning_rate": 1.7240381130254826e-05, "loss": 1.0325, "step": 2125 }, { "epoch": 0.265186478732693, "grad_norm": 0.0, "learning_rate": 1.7237593830522063e-05, "loss": 0.9892, "step": 2126 }, { "epoch": 0.26531121367094923, "grad_norm": 0.0, "learning_rate": 1.7234805349431526e-05, "loss": 1.0152, "step": 2127 }, { "epoch": 0.26543594860920544, "grad_norm": 0.0, "learning_rate": 1.7232015687438365e-05, "loss": 1.0378, "step": 2128 }, { "epoch": 0.26556068354746165, "grad_norm": 0.0, "learning_rate": 1.722922484499793e-05, "loss": 1.0679, "step": 2129 }, { "epoch": 0.26568541848571786, "grad_norm": 0.0, "learning_rate": 1.722643282256575e-05, "loss": 1.0302, "step": 2130 }, { "epoch": 0.26581015342397407, "grad_norm": 0.0, "learning_rate": 1.7223639620597556e-05, "loss": 1.0699, "step": 2131 }, { "epoch": 0.2659348883622303, "grad_norm": 0.0, "learning_rate": 1.7220845239549272e-05, "loss": 1.0538, "step": 2132 }, { "epoch": 0.2660596233004865, "grad_norm": 0.0, "learning_rate": 1.7218049679877003e-05, "loss": 1.0489, "step": 2133 }, { "epoch": 0.26618435823874265, "grad_norm": 0.0, "learning_rate": 1.7215252942037067e-05, "loss": 1.0483, "step": 2134 }, { "epoch": 0.26630909317699886, "grad_norm": 0.0, "learning_rate": 1.721245502648595e-05, "loss": 0.9804, "step": 2135 }, { "epoch": 0.26643382811525507, "grad_norm": 0.0, "learning_rate": 1.7209655933680348e-05, "loss": 1.0565, "step": 2136 }, { "epoch": 0.2665585630535113, "grad_norm": 0.0, "learning_rate": 1.7206855664077147e-05, "loss": 1.0517, "step": 2137 }, { "epoch": 0.2666832979917675, "grad_norm": 0.0, "learning_rate": 1.7204054218133418e-05, "loss": 1.1176, "step": 2138 }, { "epoch": 0.2668080329300237, "grad_norm": 0.0, "learning_rate": 1.7201251596306424e-05, "loss": 1.0041, "step": 2139 }, { "epoch": 0.2669327678682799, "grad_norm": 0.0, "learning_rate": 1.719844779905363e-05, "loss": 1.0355, "step": 2140 }, { "epoch": 0.2670575028065361, "grad_norm": 0.0, "learning_rate": 1.719564282683268e-05, "loss": 1.0079, "step": 2141 }, { "epoch": 0.26718223774479233, "grad_norm": 0.0, "learning_rate": 1.719283668010142e-05, "loss": 1.0486, "step": 2142 }, { "epoch": 0.26730697268304854, "grad_norm": 0.0, "learning_rate": 1.7190029359317884e-05, "loss": 1.0339, "step": 2143 }, { "epoch": 0.26743170762130475, "grad_norm": 0.0, "learning_rate": 1.7187220864940296e-05, "loss": 1.0123, "step": 2144 }, { "epoch": 0.26755644255956096, "grad_norm": 0.0, "learning_rate": 1.7184411197427077e-05, "loss": 1.0841, "step": 2145 }, { "epoch": 0.2676811774978171, "grad_norm": 0.0, "learning_rate": 1.7181600357236828e-05, "loss": 1.0549, "step": 2146 }, { "epoch": 0.2678059124360733, "grad_norm": 0.0, "learning_rate": 1.717878834482836e-05, "loss": 1.081, "step": 2147 }, { "epoch": 0.26793064737432953, "grad_norm": 0.0, "learning_rate": 1.717597516066065e-05, "loss": 1.0499, "step": 2148 }, { "epoch": 0.26805538231258574, "grad_norm": 0.0, "learning_rate": 1.7173160805192895e-05, "loss": 1.032, "step": 2149 }, { "epoch": 0.26818011725084195, "grad_norm": 0.0, "learning_rate": 1.7170345278884463e-05, "loss": 1.0324, "step": 2150 }, { "epoch": 0.26830485218909816, "grad_norm": 0.0, "learning_rate": 1.716752858219491e-05, "loss": 1.0209, "step": 2151 }, { "epoch": 0.2684295871273544, "grad_norm": 0.0, "learning_rate": 1.7164710715584007e-05, "loss": 0.9964, "step": 2152 }, { "epoch": 0.2685543220656106, "grad_norm": 0.0, "learning_rate": 1.7161891679511695e-05, "loss": 1.0085, "step": 2153 }, { "epoch": 0.2686790570038668, "grad_norm": 0.0, "learning_rate": 1.7159071474438106e-05, "loss": 0.9963, "step": 2154 }, { "epoch": 0.268803791942123, "grad_norm": 0.0, "learning_rate": 1.7156250100823575e-05, "loss": 1.0962, "step": 2155 }, { "epoch": 0.2689285268803792, "grad_norm": 0.0, "learning_rate": 1.7153427559128618e-05, "loss": 1.0332, "step": 2156 }, { "epoch": 0.2690532618186354, "grad_norm": 0.0, "learning_rate": 1.715060384981395e-05, "loss": 1.049, "step": 2157 }, { "epoch": 0.2691779967568916, "grad_norm": 0.0, "learning_rate": 1.7147778973340466e-05, "loss": 1.0364, "step": 2158 }, { "epoch": 0.2693027316951478, "grad_norm": 0.0, "learning_rate": 1.714495293016926e-05, "loss": 0.9749, "step": 2159 }, { "epoch": 0.269427466633404, "grad_norm": 0.0, "learning_rate": 1.714212572076161e-05, "loss": 1.0424, "step": 2160 }, { "epoch": 0.2695522015716602, "grad_norm": 0.0, "learning_rate": 1.7139297345578992e-05, "loss": 1.0395, "step": 2161 }, { "epoch": 0.2696769365099164, "grad_norm": 0.0, "learning_rate": 1.713646780508307e-05, "loss": 1.0376, "step": 2162 }, { "epoch": 0.26980167144817263, "grad_norm": 0.0, "learning_rate": 1.7133637099735686e-05, "loss": 1.0269, "step": 2163 }, { "epoch": 0.26992640638642884, "grad_norm": 0.0, "learning_rate": 1.7130805229998894e-05, "loss": 1.0745, "step": 2164 }, { "epoch": 0.27005114132468505, "grad_norm": 0.0, "learning_rate": 1.712797219633492e-05, "loss": 1.0544, "step": 2165 }, { "epoch": 0.27017587626294126, "grad_norm": 0.0, "learning_rate": 1.7125137999206188e-05, "loss": 1.0075, "step": 2166 }, { "epoch": 0.27030061120119747, "grad_norm": 0.0, "learning_rate": 1.7122302639075313e-05, "loss": 1.0248, "step": 2167 }, { "epoch": 0.2704253461394537, "grad_norm": 0.0, "learning_rate": 1.7119466116405093e-05, "loss": 1.0545, "step": 2168 }, { "epoch": 0.2705500810777099, "grad_norm": 0.0, "learning_rate": 1.7116628431658525e-05, "loss": 1.0202, "step": 2169 }, { "epoch": 0.27067481601596605, "grad_norm": 0.0, "learning_rate": 1.7113789585298786e-05, "loss": 1.0505, "step": 2170 }, { "epoch": 0.27079955095422226, "grad_norm": 0.0, "learning_rate": 1.7110949577789247e-05, "loss": 1.0464, "step": 2171 }, { "epoch": 0.27092428589247847, "grad_norm": 0.0, "learning_rate": 1.7108108409593473e-05, "loss": 1.0601, "step": 2172 }, { "epoch": 0.2710490208307347, "grad_norm": 0.0, "learning_rate": 1.710526608117521e-05, "loss": 1.0619, "step": 2173 }, { "epoch": 0.2711737557689909, "grad_norm": 0.0, "learning_rate": 1.7102422592998403e-05, "loss": 1.0149, "step": 2174 }, { "epoch": 0.2712984907072471, "grad_norm": 0.0, "learning_rate": 1.7099577945527177e-05, "loss": 1.0522, "step": 2175 }, { "epoch": 0.2714232256455033, "grad_norm": 0.0, "learning_rate": 1.7096732139225853e-05, "loss": 1.1013, "step": 2176 }, { "epoch": 0.2715479605837595, "grad_norm": 0.0, "learning_rate": 1.709388517455893e-05, "loss": 1.0446, "step": 2177 }, { "epoch": 0.27167269552201573, "grad_norm": 0.0, "learning_rate": 1.7091037051991113e-05, "loss": 1.0505, "step": 2178 }, { "epoch": 0.27179743046027194, "grad_norm": 0.0, "learning_rate": 1.7088187771987286e-05, "loss": 1.0534, "step": 2179 }, { "epoch": 0.27192216539852815, "grad_norm": 0.0, "learning_rate": 1.7085337335012524e-05, "loss": 1.0076, "step": 2180 }, { "epoch": 0.27204690033678436, "grad_norm": 0.0, "learning_rate": 1.708248574153209e-05, "loss": 1.0013, "step": 2181 }, { "epoch": 0.2721716352750405, "grad_norm": 0.0, "learning_rate": 1.707963299201143e-05, "loss": 1.0926, "step": 2182 }, { "epoch": 0.2722963702132967, "grad_norm": 0.0, "learning_rate": 1.707677908691619e-05, "loss": 1.0205, "step": 2183 }, { "epoch": 0.27242110515155293, "grad_norm": 0.0, "learning_rate": 1.7073924026712202e-05, "loss": 1.073, "step": 2184 }, { "epoch": 0.27254584008980914, "grad_norm": 0.0, "learning_rate": 1.7071067811865477e-05, "loss": 1.0273, "step": 2185 }, { "epoch": 0.27267057502806535, "grad_norm": 0.0, "learning_rate": 1.7068210442842226e-05, "loss": 1.059, "step": 2186 }, { "epoch": 0.27279530996632156, "grad_norm": 0.0, "learning_rate": 1.7065351920108838e-05, "loss": 0.9956, "step": 2187 }, { "epoch": 0.2729200449045778, "grad_norm": 0.0, "learning_rate": 1.7062492244131907e-05, "loss": 1.0386, "step": 2188 }, { "epoch": 0.273044779842834, "grad_norm": 0.0, "learning_rate": 1.705963141537819e-05, "loss": 1.0091, "step": 2189 }, { "epoch": 0.2731695147810902, "grad_norm": 0.0, "learning_rate": 1.705676943431466e-05, "loss": 0.9931, "step": 2190 }, { "epoch": 0.2732942497193464, "grad_norm": 0.0, "learning_rate": 1.7053906301408457e-05, "loss": 1.0548, "step": 2191 }, { "epoch": 0.2734189846576026, "grad_norm": 0.0, "learning_rate": 1.7051042017126916e-05, "loss": 0.9886, "step": 2192 }, { "epoch": 0.2735437195958588, "grad_norm": 0.0, "learning_rate": 1.7048176581937562e-05, "loss": 0.9875, "step": 2193 }, { "epoch": 0.273668454534115, "grad_norm": 0.0, "learning_rate": 1.7045309996308107e-05, "loss": 1.0535, "step": 2194 }, { "epoch": 0.2737931894723712, "grad_norm": 0.0, "learning_rate": 1.704244226070645e-05, "loss": 0.9821, "step": 2195 }, { "epoch": 0.2739179244106274, "grad_norm": 0.0, "learning_rate": 1.7039573375600675e-05, "loss": 0.9991, "step": 2196 }, { "epoch": 0.2740426593488836, "grad_norm": 0.0, "learning_rate": 1.703670334145906e-05, "loss": 0.9826, "step": 2197 }, { "epoch": 0.2741673942871398, "grad_norm": 0.0, "learning_rate": 1.7033832158750065e-05, "loss": 1.0357, "step": 2198 }, { "epoch": 0.27429212922539603, "grad_norm": 0.0, "learning_rate": 1.703095982794234e-05, "loss": 0.9983, "step": 2199 }, { "epoch": 0.27441686416365224, "grad_norm": 0.0, "learning_rate": 1.7028086349504722e-05, "loss": 1.0385, "step": 2200 }, { "epoch": 0.27454159910190845, "grad_norm": 0.0, "learning_rate": 1.7025211723906233e-05, "loss": 0.9922, "step": 2201 }, { "epoch": 0.27466633404016466, "grad_norm": 0.0, "learning_rate": 1.7022335951616085e-05, "loss": 1.0187, "step": 2202 }, { "epoch": 0.27479106897842087, "grad_norm": 0.0, "learning_rate": 1.7019459033103684e-05, "loss": 0.9928, "step": 2203 }, { "epoch": 0.2749158039166771, "grad_norm": 0.0, "learning_rate": 1.7016580968838604e-05, "loss": 1.0882, "step": 2204 }, { "epoch": 0.2750405388549333, "grad_norm": 0.0, "learning_rate": 1.7013701759290622e-05, "loss": 1.0305, "step": 2205 }, { "epoch": 0.27516527379318945, "grad_norm": 0.0, "learning_rate": 1.70108214049297e-05, "loss": 1.0257, "step": 2206 }, { "epoch": 0.27529000873144566, "grad_norm": 0.0, "learning_rate": 1.7007939906225985e-05, "loss": 1.0525, "step": 2207 }, { "epoch": 0.27541474366970187, "grad_norm": 0.0, "learning_rate": 1.7005057263649806e-05, "loss": 1.0075, "step": 2208 }, { "epoch": 0.2755394786079581, "grad_norm": 0.0, "learning_rate": 1.7002173477671685e-05, "loss": 1.0337, "step": 2209 }, { "epoch": 0.2756642135462143, "grad_norm": 0.0, "learning_rate": 1.699928854876233e-05, "loss": 1.0646, "step": 2210 }, { "epoch": 0.2757889484844705, "grad_norm": 0.0, "learning_rate": 1.6996402477392635e-05, "loss": 1.0233, "step": 2211 }, { "epoch": 0.2759136834227267, "grad_norm": 0.0, "learning_rate": 1.699351526403367e-05, "loss": 0.9703, "step": 2212 }, { "epoch": 0.2760384183609829, "grad_norm": 0.0, "learning_rate": 1.6990626909156716e-05, "loss": 1.0333, "step": 2213 }, { "epoch": 0.27616315329923913, "grad_norm": 0.0, "learning_rate": 1.6987737413233214e-05, "loss": 1.0067, "step": 2214 }, { "epoch": 0.27628788823749534, "grad_norm": 0.0, "learning_rate": 1.6984846776734806e-05, "loss": 0.9981, "step": 2215 }, { "epoch": 0.27641262317575155, "grad_norm": 0.0, "learning_rate": 1.6981955000133318e-05, "loss": 1.0132, "step": 2216 }, { "epoch": 0.27653735811400776, "grad_norm": 0.0, "learning_rate": 1.6979062083900762e-05, "loss": 1.0439, "step": 2217 }, { "epoch": 0.2766620930522639, "grad_norm": 0.0, "learning_rate": 1.697616802850933e-05, "loss": 1.045, "step": 2218 }, { "epoch": 0.2767868279905201, "grad_norm": 0.0, "learning_rate": 1.6973272834431407e-05, "loss": 1.0732, "step": 2219 }, { "epoch": 0.27691156292877633, "grad_norm": 0.0, "learning_rate": 1.6970376502139563e-05, "loss": 1.0227, "step": 2220 }, { "epoch": 0.27703629786703254, "grad_norm": 0.0, "learning_rate": 1.6967479032106552e-05, "loss": 1.0334, "step": 2221 }, { "epoch": 0.27716103280528875, "grad_norm": 0.0, "learning_rate": 1.6964580424805314e-05, "loss": 1.049, "step": 2222 }, { "epoch": 0.27728576774354496, "grad_norm": 0.0, "learning_rate": 1.6961680680708974e-05, "loss": 0.9975, "step": 2223 }, { "epoch": 0.2774105026818012, "grad_norm": 0.0, "learning_rate": 1.6958779800290847e-05, "loss": 1.0186, "step": 2224 }, { "epoch": 0.2775352376200574, "grad_norm": 0.0, "learning_rate": 1.6955877784024418e-05, "loss": 1.0505, "step": 2225 }, { "epoch": 0.2776599725583136, "grad_norm": 0.0, "learning_rate": 1.6952974632383385e-05, "loss": 1.0351, "step": 2226 }, { "epoch": 0.2777847074965698, "grad_norm": 0.0, "learning_rate": 1.6950070345841608e-05, "loss": 1.0464, "step": 2227 }, { "epoch": 0.277909442434826, "grad_norm": 0.0, "learning_rate": 1.694716492487314e-05, "loss": 1.0185, "step": 2228 }, { "epoch": 0.2780341773730822, "grad_norm": 0.0, "learning_rate": 1.694425836995222e-05, "loss": 1.0251, "step": 2229 }, { "epoch": 0.2781589123113384, "grad_norm": 0.0, "learning_rate": 1.694135068155327e-05, "loss": 1.0055, "step": 2230 }, { "epoch": 0.2782836472495946, "grad_norm": 0.0, "learning_rate": 1.6938441860150895e-05, "loss": 0.9866, "step": 2231 }, { "epoch": 0.2784083821878508, "grad_norm": 0.0, "learning_rate": 1.6935531906219895e-05, "loss": 1.1086, "step": 2232 }, { "epoch": 0.278533117126107, "grad_norm": 0.0, "learning_rate": 1.6932620820235243e-05, "loss": 1.056, "step": 2233 }, { "epoch": 0.2786578520643632, "grad_norm": 0.0, "learning_rate": 1.6929708602672104e-05, "loss": 1.0002, "step": 2234 }, { "epoch": 0.27878258700261943, "grad_norm": 0.0, "learning_rate": 1.6926795254005823e-05, "loss": 1.0297, "step": 2235 }, { "epoch": 0.27890732194087564, "grad_norm": 0.0, "learning_rate": 1.6923880774711934e-05, "loss": 1.0433, "step": 2236 }, { "epoch": 0.27903205687913185, "grad_norm": 0.0, "learning_rate": 1.692096516526615e-05, "loss": 1.0558, "step": 2237 }, { "epoch": 0.27915679181738806, "grad_norm": 0.0, "learning_rate": 1.6918048426144374e-05, "loss": 1.0444, "step": 2238 }, { "epoch": 0.27928152675564427, "grad_norm": 0.0, "learning_rate": 1.6915130557822698e-05, "loss": 0.9788, "step": 2239 }, { "epoch": 0.2794062616939005, "grad_norm": 0.0, "learning_rate": 1.6912211560777377e-05, "loss": 1.0077, "step": 2240 }, { "epoch": 0.2795309966321567, "grad_norm": 0.0, "learning_rate": 1.690929143548488e-05, "loss": 1.0146, "step": 2241 }, { "epoch": 0.27965573157041285, "grad_norm": 0.0, "learning_rate": 1.6906370182421836e-05, "loss": 0.9622, "step": 2242 }, { "epoch": 0.27978046650866906, "grad_norm": 0.0, "learning_rate": 1.6903447802065066e-05, "loss": 1.0155, "step": 2243 }, { "epoch": 0.27990520144692527, "grad_norm": 0.0, "learning_rate": 1.6900524294891587e-05, "loss": 1.0151, "step": 2244 }, { "epoch": 0.2800299363851815, "grad_norm": 0.0, "learning_rate": 1.6897599661378578e-05, "loss": 1.0334, "step": 2245 }, { "epoch": 0.2801546713234377, "grad_norm": 0.0, "learning_rate": 1.689467390200342e-05, "loss": 1.0062, "step": 2246 }, { "epoch": 0.2802794062616939, "grad_norm": 0.0, "learning_rate": 1.689174701724366e-05, "loss": 1.0281, "step": 2247 }, { "epoch": 0.2804041411999501, "grad_norm": 0.0, "learning_rate": 1.6888819007577054e-05, "loss": 0.9931, "step": 2248 }, { "epoch": 0.2805288761382063, "grad_norm": 0.0, "learning_rate": 1.6885889873481514e-05, "loss": 1.0306, "step": 2249 }, { "epoch": 0.28065361107646253, "grad_norm": 0.0, "learning_rate": 1.688295961543516e-05, "loss": 1.0182, "step": 2250 }, { "epoch": 0.28077834601471874, "grad_norm": 0.0, "learning_rate": 1.6880028233916277e-05, "loss": 1.0193, "step": 2251 }, { "epoch": 0.28090308095297495, "grad_norm": 0.0, "learning_rate": 1.6877095729403338e-05, "loss": 0.9956, "step": 2252 }, { "epoch": 0.28102781589123116, "grad_norm": 0.0, "learning_rate": 1.687416210237501e-05, "loss": 1.0124, "step": 2253 }, { "epoch": 0.2811525508294873, "grad_norm": 0.0, "learning_rate": 1.6871227353310127e-05, "loss": 1.0179, "step": 2254 }, { "epoch": 0.2812772857677435, "grad_norm": 0.0, "learning_rate": 1.686829148268772e-05, "loss": 1.0405, "step": 2255 }, { "epoch": 0.28140202070599973, "grad_norm": 0.0, "learning_rate": 1.6865354490986995e-05, "loss": 0.9925, "step": 2256 }, { "epoch": 0.28152675564425594, "grad_norm": 0.0, "learning_rate": 1.686241637868734e-05, "loss": 1.0484, "step": 2257 }, { "epoch": 0.28165149058251215, "grad_norm": 0.0, "learning_rate": 1.685947714626833e-05, "loss": 1.0004, "step": 2258 }, { "epoch": 0.28177622552076836, "grad_norm": 0.0, "learning_rate": 1.6856536794209725e-05, "loss": 0.9401, "step": 2259 }, { "epoch": 0.2819009604590246, "grad_norm": 0.0, "learning_rate": 1.6853595322991464e-05, "loss": 1.0136, "step": 2260 }, { "epoch": 0.2820256953972808, "grad_norm": 0.0, "learning_rate": 1.6850652733093666e-05, "loss": 1.0258, "step": 2261 }, { "epoch": 0.282150430335537, "grad_norm": 0.0, "learning_rate": 1.684770902499664e-05, "loss": 1.0055, "step": 2262 }, { "epoch": 0.2822751652737932, "grad_norm": 0.0, "learning_rate": 1.6844764199180868e-05, "loss": 0.9902, "step": 2263 }, { "epoch": 0.2823999002120494, "grad_norm": 0.0, "learning_rate": 1.6841818256127027e-05, "loss": 1.0207, "step": 2264 }, { "epoch": 0.2825246351503056, "grad_norm": 0.0, "learning_rate": 1.6838871196315962e-05, "loss": 1.0362, "step": 2265 }, { "epoch": 0.2826493700885618, "grad_norm": 0.0, "learning_rate": 1.6835923020228714e-05, "loss": 1.0816, "step": 2266 }, { "epoch": 0.282774105026818, "grad_norm": 0.0, "learning_rate": 1.6832973728346497e-05, "loss": 1.0409, "step": 2267 }, { "epoch": 0.2828988399650742, "grad_norm": 0.0, "learning_rate": 1.683002332115071e-05, "loss": 1.0271, "step": 2268 }, { "epoch": 0.2830235749033304, "grad_norm": 0.0, "learning_rate": 1.6827071799122928e-05, "loss": 1.0429, "step": 2269 }, { "epoch": 0.2831483098415866, "grad_norm": 0.0, "learning_rate": 1.682411916274492e-05, "loss": 1.0512, "step": 2270 }, { "epoch": 0.28327304477984283, "grad_norm": 0.0, "learning_rate": 1.682116541249863e-05, "loss": 0.9693, "step": 2271 }, { "epoch": 0.28339777971809904, "grad_norm": 0.0, "learning_rate": 1.6818210548866188e-05, "loss": 0.9948, "step": 2272 }, { "epoch": 0.28352251465635525, "grad_norm": 0.0, "learning_rate": 1.6815254572329895e-05, "loss": 1.0251, "step": 2273 }, { "epoch": 0.28364724959461146, "grad_norm": 0.0, "learning_rate": 1.6812297483372243e-05, "loss": 1.0298, "step": 2274 }, { "epoch": 0.28377198453286767, "grad_norm": 0.0, "learning_rate": 1.6809339282475905e-05, "loss": 1.0606, "step": 2275 }, { "epoch": 0.2838967194711239, "grad_norm": 0.0, "learning_rate": 1.6806379970123736e-05, "loss": 0.9482, "step": 2276 }, { "epoch": 0.2840214544093801, "grad_norm": 0.0, "learning_rate": 1.6803419546798766e-05, "loss": 1.044, "step": 2277 }, { "epoch": 0.28414618934763625, "grad_norm": 0.0, "learning_rate": 1.6800458012984213e-05, "loss": 1.0825, "step": 2278 }, { "epoch": 0.28427092428589246, "grad_norm": 0.0, "learning_rate": 1.6797495369163477e-05, "loss": 1.0572, "step": 2279 }, { "epoch": 0.28439565922414867, "grad_norm": 0.0, "learning_rate": 1.679453161582013e-05, "loss": 1.0083, "step": 2280 }, { "epoch": 0.2845203941624049, "grad_norm": 0.0, "learning_rate": 1.6791566753437935e-05, "loss": 0.9875, "step": 2281 }, { "epoch": 0.2846451291006611, "grad_norm": 0.0, "learning_rate": 1.6788600782500828e-05, "loss": 1.0413, "step": 2282 }, { "epoch": 0.2847698640389173, "grad_norm": 0.0, "learning_rate": 1.6785633703492936e-05, "loss": 1.0202, "step": 2283 }, { "epoch": 0.2848945989771735, "grad_norm": 0.0, "learning_rate": 1.678266551689856e-05, "loss": 1.0311, "step": 2284 }, { "epoch": 0.2850193339154297, "grad_norm": 0.0, "learning_rate": 1.677969622320218e-05, "loss": 1.0199, "step": 2285 }, { "epoch": 0.28514406885368593, "grad_norm": 0.0, "learning_rate": 1.6776725822888466e-05, "loss": 1.0151, "step": 2286 }, { "epoch": 0.28526880379194214, "grad_norm": 0.0, "learning_rate": 1.677375431644225e-05, "loss": 1.0655, "step": 2287 }, { "epoch": 0.28539353873019835, "grad_norm": 0.0, "learning_rate": 1.677078170434857e-05, "loss": 1.077, "step": 2288 }, { "epoch": 0.28551827366845456, "grad_norm": 0.0, "learning_rate": 1.676780798709262e-05, "loss": 1.0368, "step": 2289 }, { "epoch": 0.2856430086067107, "grad_norm": 0.0, "learning_rate": 1.6764833165159796e-05, "loss": 1.0522, "step": 2290 }, { "epoch": 0.2857677435449669, "grad_norm": 0.0, "learning_rate": 1.6761857239035658e-05, "loss": 1.0127, "step": 2291 }, { "epoch": 0.28589247848322313, "grad_norm": 0.0, "learning_rate": 1.6758880209205952e-05, "loss": 1.0405, "step": 2292 }, { "epoch": 0.28601721342147934, "grad_norm": 0.0, "learning_rate": 1.6755902076156606e-05, "loss": 1.0336, "step": 2293 }, { "epoch": 0.28614194835973555, "grad_norm": 0.0, "learning_rate": 1.6752922840373724e-05, "loss": 1.0287, "step": 2294 }, { "epoch": 0.28626668329799176, "grad_norm": 0.0, "learning_rate": 1.6749942502343594e-05, "loss": 1.0679, "step": 2295 }, { "epoch": 0.286391418236248, "grad_norm": 0.0, "learning_rate": 1.6746961062552686e-05, "loss": 1.0688, "step": 2296 }, { "epoch": 0.2865161531745042, "grad_norm": 0.0, "learning_rate": 1.674397852148764e-05, "loss": 0.9617, "step": 2297 }, { "epoch": 0.2866408881127604, "grad_norm": 0.0, "learning_rate": 1.6740994879635284e-05, "loss": 0.9916, "step": 2298 }, { "epoch": 0.2867656230510166, "grad_norm": 0.0, "learning_rate": 1.6738010137482628e-05, "loss": 1.0038, "step": 2299 }, { "epoch": 0.2868903579892728, "grad_norm": 0.0, "learning_rate": 1.673502429551685e-05, "loss": 1.0137, "step": 2300 }, { "epoch": 0.287015092927529, "grad_norm": 0.0, "learning_rate": 1.673203735422532e-05, "loss": 1.0545, "step": 2301 }, { "epoch": 0.2871398278657852, "grad_norm": 0.0, "learning_rate": 1.6729049314095578e-05, "loss": 1.0169, "step": 2302 }, { "epoch": 0.2872645628040414, "grad_norm": 0.0, "learning_rate": 1.672606017561535e-05, "loss": 1.0199, "step": 2303 }, { "epoch": 0.2873892977422976, "grad_norm": 0.0, "learning_rate": 1.672306993927254e-05, "loss": 1.0529, "step": 2304 }, { "epoch": 0.2875140326805538, "grad_norm": 0.0, "learning_rate": 1.6720078605555227e-05, "loss": 1.0676, "step": 2305 }, { "epoch": 0.28763876761881, "grad_norm": 0.0, "learning_rate": 1.671708617495167e-05, "loss": 1.0542, "step": 2306 }, { "epoch": 0.28776350255706623, "grad_norm": 0.0, "learning_rate": 1.671409264795032e-05, "loss": 0.994, "step": 2307 }, { "epoch": 0.28788823749532244, "grad_norm": 0.0, "learning_rate": 1.6711098025039782e-05, "loss": 1.0224, "step": 2308 }, { "epoch": 0.28801297243357865, "grad_norm": 0.0, "learning_rate": 1.6708102306708864e-05, "loss": 1.0598, "step": 2309 }, { "epoch": 0.28813770737183486, "grad_norm": 0.0, "learning_rate": 1.6705105493446535e-05, "loss": 1.0208, "step": 2310 }, { "epoch": 0.28826244231009107, "grad_norm": 0.0, "learning_rate": 1.670210758574196e-05, "loss": 1.0292, "step": 2311 }, { "epoch": 0.2883871772483473, "grad_norm": 0.0, "learning_rate": 1.6699108584084463e-05, "loss": 1.0208, "step": 2312 }, { "epoch": 0.2885119121866035, "grad_norm": 0.0, "learning_rate": 1.6696108488963568e-05, "loss": 1.0272, "step": 2313 }, { "epoch": 0.28863664712485965, "grad_norm": 0.0, "learning_rate": 1.6693107300868953e-05, "loss": 1.0766, "step": 2314 }, { "epoch": 0.28876138206311586, "grad_norm": 0.0, "learning_rate": 1.66901050202905e-05, "loss": 1.0099, "step": 2315 }, { "epoch": 0.28888611700137207, "grad_norm": 0.0, "learning_rate": 1.6687101647718245e-05, "loss": 0.994, "step": 2316 }, { "epoch": 0.2890108519396283, "grad_norm": 0.0, "learning_rate": 1.6684097183642425e-05, "loss": 1.0413, "step": 2317 }, { "epoch": 0.2891355868778845, "grad_norm": 0.0, "learning_rate": 1.6681091628553437e-05, "loss": 1.0023, "step": 2318 }, { "epoch": 0.2892603218161407, "grad_norm": 0.0, "learning_rate": 1.667808498294187e-05, "loss": 1.0259, "step": 2319 }, { "epoch": 0.2893850567543969, "grad_norm": 0.0, "learning_rate": 1.6675077247298475e-05, "loss": 1.0317, "step": 2320 }, { "epoch": 0.2895097916926531, "grad_norm": 0.0, "learning_rate": 1.6672068422114195e-05, "loss": 0.9926, "step": 2321 }, { "epoch": 0.28963452663090933, "grad_norm": 0.0, "learning_rate": 1.666905850788015e-05, "loss": 0.9816, "step": 2322 }, { "epoch": 0.28975926156916554, "grad_norm": 0.0, "learning_rate": 1.666604750508763e-05, "loss": 1.0344, "step": 2323 }, { "epoch": 0.28988399650742175, "grad_norm": 0.0, "learning_rate": 1.666303541422811e-05, "loss": 0.9785, "step": 2324 }, { "epoch": 0.29000873144567796, "grad_norm": 0.0, "learning_rate": 1.666002223579323e-05, "loss": 0.9879, "step": 2325 }, { "epoch": 0.2901334663839341, "grad_norm": 0.0, "learning_rate": 1.6657007970274824e-05, "loss": 1.0147, "step": 2326 }, { "epoch": 0.2902582013221903, "grad_norm": 0.0, "learning_rate": 1.66539926181649e-05, "loss": 1.0073, "step": 2327 }, { "epoch": 0.29038293626044653, "grad_norm": 0.0, "learning_rate": 1.665097617995563e-05, "loss": 1.0125, "step": 2328 }, { "epoch": 0.29050767119870274, "grad_norm": 0.0, "learning_rate": 1.6647958656139377e-05, "loss": 1.0361, "step": 2329 }, { "epoch": 0.29063240613695895, "grad_norm": 0.0, "learning_rate": 1.6644940047208683e-05, "loss": 1.0575, "step": 2330 }, { "epoch": 0.29075714107521516, "grad_norm": 0.0, "learning_rate": 1.664192035365625e-05, "loss": 1.0472, "step": 2331 }, { "epoch": 0.2908818760134714, "grad_norm": 0.0, "learning_rate": 1.6638899575974975e-05, "loss": 1.0345, "step": 2332 }, { "epoch": 0.2910066109517276, "grad_norm": 0.0, "learning_rate": 1.6635877714657925e-05, "loss": 1.0259, "step": 2333 }, { "epoch": 0.2911313458899838, "grad_norm": 0.0, "learning_rate": 1.6632854770198337e-05, "loss": 1.0354, "step": 2334 }, { "epoch": 0.29125608082824, "grad_norm": 0.0, "learning_rate": 1.6629830743089645e-05, "loss": 1.0009, "step": 2335 }, { "epoch": 0.2913808157664962, "grad_norm": 0.0, "learning_rate": 1.6626805633825432e-05, "loss": 1.0139, "step": 2336 }, { "epoch": 0.2915055507047524, "grad_norm": 0.0, "learning_rate": 1.662377944289948e-05, "loss": 1.0245, "step": 2337 }, { "epoch": 0.2916302856430086, "grad_norm": 0.0, "learning_rate": 1.662075217080574e-05, "loss": 0.9966, "step": 2338 }, { "epoch": 0.2917550205812648, "grad_norm": 0.0, "learning_rate": 1.6617723818038337e-05, "loss": 0.9838, "step": 2339 }, { "epoch": 0.291879755519521, "grad_norm": 0.0, "learning_rate": 1.6614694385091575e-05, "loss": 1.02, "step": 2340 }, { "epoch": 0.2920044904577772, "grad_norm": 0.0, "learning_rate": 1.6611663872459932e-05, "loss": 1.0288, "step": 2341 }, { "epoch": 0.2921292253960334, "grad_norm": 0.0, "learning_rate": 1.660863228063807e-05, "loss": 1.0041, "step": 2342 }, { "epoch": 0.29225396033428963, "grad_norm": 0.0, "learning_rate": 1.6605599610120816e-05, "loss": 1.0245, "step": 2343 }, { "epoch": 0.29237869527254584, "grad_norm": 0.0, "learning_rate": 1.6602565861403176e-05, "loss": 1.0032, "step": 2344 }, { "epoch": 0.29250343021080205, "grad_norm": 0.0, "learning_rate": 1.659953103498034e-05, "loss": 0.9969, "step": 2345 }, { "epoch": 0.29262816514905826, "grad_norm": 0.0, "learning_rate": 1.6596495131347667e-05, "loss": 1.0124, "step": 2346 }, { "epoch": 0.29275290008731447, "grad_norm": 0.0, "learning_rate": 1.659345815100069e-05, "loss": 1.0403, "step": 2347 }, { "epoch": 0.2928776350255707, "grad_norm": 0.0, "learning_rate": 1.6590420094435125e-05, "loss": 0.9636, "step": 2348 }, { "epoch": 0.2930023699638269, "grad_norm": 0.0, "learning_rate": 1.6587380962146854e-05, "loss": 1.0059, "step": 2349 }, { "epoch": 0.29312710490208305, "grad_norm": 0.0, "learning_rate": 1.6584340754631945e-05, "loss": 1.0682, "step": 2350 }, { "epoch": 0.29325183984033926, "grad_norm": 0.0, "learning_rate": 1.6581299472386633e-05, "loss": 1.0183, "step": 2351 }, { "epoch": 0.29337657477859547, "grad_norm": 0.0, "learning_rate": 1.6578257115907334e-05, "loss": 1.0179, "step": 2352 }, { "epoch": 0.2935013097168517, "grad_norm": 0.0, "learning_rate": 1.657521368569064e-05, "loss": 1.065, "step": 2353 }, { "epoch": 0.2936260446551079, "grad_norm": 0.0, "learning_rate": 1.657216918223331e-05, "loss": 0.9933, "step": 2354 }, { "epoch": 0.2937507795933641, "grad_norm": 0.0, "learning_rate": 1.6569123606032284e-05, "loss": 1.0508, "step": 2355 }, { "epoch": 0.2938755145316203, "grad_norm": 0.0, "learning_rate": 1.656607695758468e-05, "loss": 0.9886, "step": 2356 }, { "epoch": 0.2940002494698765, "grad_norm": 0.0, "learning_rate": 1.6563029237387787e-05, "loss": 0.9817, "step": 2357 }, { "epoch": 0.29412498440813273, "grad_norm": 0.0, "learning_rate": 1.6559980445939068e-05, "loss": 1.025, "step": 2358 }, { "epoch": 0.29424971934638894, "grad_norm": 0.0, "learning_rate": 1.6556930583736167e-05, "loss": 1.0131, "step": 2359 }, { "epoch": 0.29437445428464515, "grad_norm": 0.0, "learning_rate": 1.6553879651276894e-05, "loss": 1.0271, "step": 2360 }, { "epoch": 0.29449918922290136, "grad_norm": 0.0, "learning_rate": 1.6550827649059238e-05, "loss": 1.0384, "step": 2361 }, { "epoch": 0.2946239241611575, "grad_norm": 0.0, "learning_rate": 1.6547774577581365e-05, "loss": 1.0047, "step": 2362 }, { "epoch": 0.2947486590994137, "grad_norm": 0.0, "learning_rate": 1.6544720437341616e-05, "loss": 1.0226, "step": 2363 }, { "epoch": 0.29487339403766993, "grad_norm": 0.0, "learning_rate": 1.6541665228838496e-05, "loss": 0.9743, "step": 2364 }, { "epoch": 0.29499812897592614, "grad_norm": 0.0, "learning_rate": 1.6538608952570698e-05, "loss": 0.9861, "step": 2365 }, { "epoch": 0.29512286391418235, "grad_norm": 0.0, "learning_rate": 1.6535551609037083e-05, "loss": 1.0048, "step": 2366 }, { "epoch": 0.29524759885243856, "grad_norm": 0.0, "learning_rate": 1.653249319873668e-05, "loss": 1.0002, "step": 2367 }, { "epoch": 0.2953723337906948, "grad_norm": 0.0, "learning_rate": 1.652943372216871e-05, "loss": 1.0351, "step": 2368 }, { "epoch": 0.295497068728951, "grad_norm": 0.0, "learning_rate": 1.6526373179832546e-05, "loss": 1.0409, "step": 2369 }, { "epoch": 0.2956218036672072, "grad_norm": 0.0, "learning_rate": 1.6523311572227753e-05, "loss": 0.9831, "step": 2370 }, { "epoch": 0.2957465386054634, "grad_norm": 0.0, "learning_rate": 1.6520248899854057e-05, "loss": 1.0408, "step": 2371 }, { "epoch": 0.2958712735437196, "grad_norm": 0.0, "learning_rate": 1.6517185163211367e-05, "loss": 1.0264, "step": 2372 }, { "epoch": 0.2959960084819758, "grad_norm": 0.0, "learning_rate": 1.6514120362799763e-05, "loss": 0.9919, "step": 2373 }, { "epoch": 0.296120743420232, "grad_norm": 0.0, "learning_rate": 1.6511054499119493e-05, "loss": 0.9897, "step": 2374 }, { "epoch": 0.2962454783584882, "grad_norm": 0.0, "learning_rate": 1.6507987572670986e-05, "loss": 1.0611, "step": 2375 }, { "epoch": 0.2963702132967444, "grad_norm": 0.0, "learning_rate": 1.6504919583954845e-05, "loss": 1.0452, "step": 2376 }, { "epoch": 0.2964949482350006, "grad_norm": 0.0, "learning_rate": 1.650185053347184e-05, "loss": 1.0485, "step": 2377 }, { "epoch": 0.2966196831732568, "grad_norm": 0.0, "learning_rate": 1.6498780421722912e-05, "loss": 1.0598, "step": 2378 }, { "epoch": 0.29674441811151303, "grad_norm": 0.0, "learning_rate": 1.6495709249209185e-05, "loss": 1.0163, "step": 2379 }, { "epoch": 0.29686915304976924, "grad_norm": 0.0, "learning_rate": 1.649263701643196e-05, "loss": 1.0265, "step": 2380 }, { "epoch": 0.29699388798802545, "grad_norm": 0.0, "learning_rate": 1.648956372389269e-05, "loss": 1.0057, "step": 2381 }, { "epoch": 0.29711862292628166, "grad_norm": 0.0, "learning_rate": 1.648648937209302e-05, "loss": 1.0496, "step": 2382 }, { "epoch": 0.29724335786453787, "grad_norm": 0.0, "learning_rate": 1.6483413961534764e-05, "loss": 0.9819, "step": 2383 }, { "epoch": 0.2973680928027941, "grad_norm": 0.0, "learning_rate": 1.64803374927199e-05, "loss": 1.0014, "step": 2384 }, { "epoch": 0.2974928277410503, "grad_norm": 0.0, "learning_rate": 1.647725996615059e-05, "loss": 1.0174, "step": 2385 }, { "epoch": 0.29761756267930645, "grad_norm": 0.0, "learning_rate": 1.6474181382329165e-05, "loss": 1.0011, "step": 2386 }, { "epoch": 0.29774229761756266, "grad_norm": 0.0, "learning_rate": 1.647110174175812e-05, "loss": 1.015, "step": 2387 }, { "epoch": 0.29786703255581887, "grad_norm": 0.0, "learning_rate": 1.6468021044940142e-05, "loss": 1.0096, "step": 2388 }, { "epoch": 0.2979917674940751, "grad_norm": 0.0, "learning_rate": 1.6464939292378066e-05, "loss": 0.9837, "step": 2389 }, { "epoch": 0.2981165024323313, "grad_norm": 0.0, "learning_rate": 1.6461856484574917e-05, "loss": 0.9836, "step": 2390 }, { "epoch": 0.2982412373705875, "grad_norm": 0.0, "learning_rate": 1.6458772622033894e-05, "loss": 1.0068, "step": 2391 }, { "epoch": 0.2983659723088437, "grad_norm": 0.0, "learning_rate": 1.6455687705258348e-05, "loss": 1.0248, "step": 2392 }, { "epoch": 0.2984907072470999, "grad_norm": 0.0, "learning_rate": 1.6452601734751827e-05, "loss": 1.0562, "step": 2393 }, { "epoch": 0.29861544218535613, "grad_norm": 0.0, "learning_rate": 1.644951471101803e-05, "loss": 1.0007, "step": 2394 }, { "epoch": 0.29874017712361234, "grad_norm": 0.0, "learning_rate": 1.6446426634560838e-05, "loss": 1.0401, "step": 2395 }, { "epoch": 0.29886491206186855, "grad_norm": 0.0, "learning_rate": 1.644333750588431e-05, "loss": 0.9795, "step": 2396 }, { "epoch": 0.29898964700012476, "grad_norm": 0.0, "learning_rate": 1.6440247325492665e-05, "loss": 1.0668, "step": 2397 }, { "epoch": 0.2991143819383809, "grad_norm": 0.0, "learning_rate": 1.6437156093890297e-05, "loss": 0.9592, "step": 2398 }, { "epoch": 0.2992391168766371, "grad_norm": 0.0, "learning_rate": 1.6434063811581774e-05, "loss": 0.9987, "step": 2399 }, { "epoch": 0.29936385181489333, "grad_norm": 0.0, "learning_rate": 1.6430970479071837e-05, "loss": 1.0369, "step": 2400 }, { "epoch": 0.29948858675314954, "grad_norm": 0.0, "learning_rate": 1.6427876096865394e-05, "loss": 0.9575, "step": 2401 }, { "epoch": 0.29961332169140575, "grad_norm": 0.0, "learning_rate": 1.6424780665467526e-05, "loss": 1.0227, "step": 2402 }, { "epoch": 0.29973805662966196, "grad_norm": 0.0, "learning_rate": 1.642168418538349e-05, "loss": 1.0428, "step": 2403 }, { "epoch": 0.2998627915679182, "grad_norm": 0.0, "learning_rate": 1.6418586657118698e-05, "loss": 0.9911, "step": 2404 }, { "epoch": 0.2999875265061744, "grad_norm": 0.0, "learning_rate": 1.6415488081178753e-05, "loss": 1.008, "step": 2405 }, { "epoch": 0.3001122614444306, "grad_norm": 0.0, "learning_rate": 1.641238845806942e-05, "loss": 1.0141, "step": 2406 }, { "epoch": 0.3002369963826868, "grad_norm": 0.0, "learning_rate": 1.6409287788296637e-05, "loss": 0.9955, "step": 2407 }, { "epoch": 0.300361731320943, "grad_norm": 0.0, "learning_rate": 1.640618607236651e-05, "loss": 1.0393, "step": 2408 }, { "epoch": 0.3004864662591992, "grad_norm": 0.0, "learning_rate": 1.640308331078532e-05, "loss": 0.9804, "step": 2409 }, { "epoch": 0.3006112011974554, "grad_norm": 0.0, "learning_rate": 1.6399979504059506e-05, "loss": 1.0451, "step": 2410 }, { "epoch": 0.3007359361357116, "grad_norm": 0.0, "learning_rate": 1.63968746526957e-05, "loss": 0.9881, "step": 2411 }, { "epoch": 0.3008606710739678, "grad_norm": 0.0, "learning_rate": 1.639376875720068e-05, "loss": 1.0557, "step": 2412 }, { "epoch": 0.300985406012224, "grad_norm": 0.0, "learning_rate": 1.6390661818081417e-05, "loss": 1.0807, "step": 2413 }, { "epoch": 0.3011101409504802, "grad_norm": 0.0, "learning_rate": 1.6387553835845038e-05, "loss": 1.0597, "step": 2414 }, { "epoch": 0.30123487588873643, "grad_norm": 0.0, "learning_rate": 1.6384444810998843e-05, "loss": 1.0127, "step": 2415 }, { "epoch": 0.30135961082699264, "grad_norm": 0.0, "learning_rate": 1.6381334744050304e-05, "loss": 1.018, "step": 2416 }, { "epoch": 0.30148434576524885, "grad_norm": 0.0, "learning_rate": 1.637822363550706e-05, "loss": 1.0214, "step": 2417 }, { "epoch": 0.30160908070350506, "grad_norm": 0.0, "learning_rate": 1.637511148587693e-05, "loss": 1.0481, "step": 2418 }, { "epoch": 0.30173381564176127, "grad_norm": 0.0, "learning_rate": 1.6371998295667885e-05, "loss": 0.9625, "step": 2419 }, { "epoch": 0.3018585505800175, "grad_norm": 0.0, "learning_rate": 1.6368884065388085e-05, "loss": 0.9967, "step": 2420 }, { "epoch": 0.3019832855182737, "grad_norm": 0.0, "learning_rate": 1.6365768795545844e-05, "loss": 1.0345, "step": 2421 }, { "epoch": 0.30210802045652985, "grad_norm": 0.0, "learning_rate": 1.636265248664965e-05, "loss": 0.9973, "step": 2422 }, { "epoch": 0.30223275539478606, "grad_norm": 0.0, "learning_rate": 1.6359535139208178e-05, "loss": 0.9979, "step": 2423 }, { "epoch": 0.30235749033304227, "grad_norm": 0.0, "learning_rate": 1.6356416753730244e-05, "loss": 1.048, "step": 2424 }, { "epoch": 0.3024822252712985, "grad_norm": 0.0, "learning_rate": 1.635329733072485e-05, "loss": 1.0165, "step": 2425 }, { "epoch": 0.3026069602095547, "grad_norm": 0.0, "learning_rate": 1.635017687070117e-05, "loss": 1.0066, "step": 2426 }, { "epoch": 0.3027316951478109, "grad_norm": 0.0, "learning_rate": 1.6347055374168536e-05, "loss": 1.011, "step": 2427 }, { "epoch": 0.3028564300860671, "grad_norm": 0.0, "learning_rate": 1.6343932841636455e-05, "loss": 1.0102, "step": 2428 }, { "epoch": 0.3029811650243233, "grad_norm": 0.0, "learning_rate": 1.6340809273614606e-05, "loss": 1.047, "step": 2429 }, { "epoch": 0.30310589996257953, "grad_norm": 0.0, "learning_rate": 1.6337684670612834e-05, "loss": 1.0546, "step": 2430 }, { "epoch": 0.30323063490083574, "grad_norm": 0.0, "learning_rate": 1.6334559033141152e-05, "loss": 1.0154, "step": 2431 }, { "epoch": 0.30335536983909195, "grad_norm": 0.0, "learning_rate": 1.633143236170974e-05, "loss": 1.0009, "step": 2432 }, { "epoch": 0.30348010477734816, "grad_norm": 0.0, "learning_rate": 1.6328304656828953e-05, "loss": 1.0109, "step": 2433 }, { "epoch": 0.3036048397156043, "grad_norm": 0.0, "learning_rate": 1.6325175919009307e-05, "loss": 1.0341, "step": 2434 }, { "epoch": 0.3037295746538605, "grad_norm": 0.0, "learning_rate": 1.63220461487615e-05, "loss": 1.016, "step": 2435 }, { "epoch": 0.30385430959211673, "grad_norm": 0.0, "learning_rate": 1.631891534659638e-05, "loss": 1.0428, "step": 2436 }, { "epoch": 0.30397904453037294, "grad_norm": 0.0, "learning_rate": 1.6315783513024977e-05, "loss": 1.0559, "step": 2437 }, { "epoch": 0.30410377946862915, "grad_norm": 0.0, "learning_rate": 1.6312650648558485e-05, "loss": 0.9891, "step": 2438 }, { "epoch": 0.30422851440688536, "grad_norm": 0.0, "learning_rate": 1.6309516753708264e-05, "loss": 0.9947, "step": 2439 }, { "epoch": 0.3043532493451416, "grad_norm": 0.0, "learning_rate": 1.630638182898585e-05, "loss": 1.0301, "step": 2440 }, { "epoch": 0.3044779842833978, "grad_norm": 0.0, "learning_rate": 1.6303245874902936e-05, "loss": 1.0273, "step": 2441 }, { "epoch": 0.304602719221654, "grad_norm": 0.0, "learning_rate": 1.6300108891971395e-05, "loss": 0.9869, "step": 2442 }, { "epoch": 0.3047274541599102, "grad_norm": 0.0, "learning_rate": 1.6296970880703257e-05, "loss": 1.0428, "step": 2443 }, { "epoch": 0.3048521890981664, "grad_norm": 0.0, "learning_rate": 1.6293831841610725e-05, "loss": 1.0374, "step": 2444 }, { "epoch": 0.3049769240364226, "grad_norm": 0.0, "learning_rate": 1.6290691775206168e-05, "loss": 1.0274, "step": 2445 }, { "epoch": 0.3051016589746788, "grad_norm": 0.0, "learning_rate": 1.6287550682002126e-05, "loss": 1.0411, "step": 2446 }, { "epoch": 0.305226393912935, "grad_norm": 0.0, "learning_rate": 1.6284408562511305e-05, "loss": 1.0266, "step": 2447 }, { "epoch": 0.3053511288511912, "grad_norm": 0.0, "learning_rate": 1.628126541724658e-05, "loss": 1.0041, "step": 2448 }, { "epoch": 0.3054758637894474, "grad_norm": 0.0, "learning_rate": 1.627812124672099e-05, "loss": 1.0221, "step": 2449 }, { "epoch": 0.3056005987277036, "grad_norm": 0.0, "learning_rate": 1.6274976051447734e-05, "loss": 1.0, "step": 2450 }, { "epoch": 0.30572533366595983, "grad_norm": 0.0, "learning_rate": 1.62718298319402e-05, "loss": 1.052, "step": 2451 }, { "epoch": 0.30585006860421604, "grad_norm": 0.0, "learning_rate": 1.6268682588711927e-05, "loss": 1.0807, "step": 2452 }, { "epoch": 0.30597480354247225, "grad_norm": 0.0, "learning_rate": 1.626553432227662e-05, "loss": 0.984, "step": 2453 }, { "epoch": 0.30609953848072846, "grad_norm": 0.0, "learning_rate": 1.6262385033148164e-05, "loss": 0.9954, "step": 2454 }, { "epoch": 0.30622427341898467, "grad_norm": 0.0, "learning_rate": 1.6259234721840595e-05, "loss": 1.0011, "step": 2455 }, { "epoch": 0.3063490083572409, "grad_norm": 0.0, "learning_rate": 1.625608338886812e-05, "loss": 1.0323, "step": 2456 }, { "epoch": 0.3064737432954971, "grad_norm": 0.0, "learning_rate": 1.6252931034745122e-05, "loss": 1.0077, "step": 2457 }, { "epoch": 0.30659847823375325, "grad_norm": 0.0, "learning_rate": 1.6249777659986146e-05, "loss": 0.9702, "step": 2458 }, { "epoch": 0.30672321317200946, "grad_norm": 0.0, "learning_rate": 1.62466232651059e-05, "loss": 0.9739, "step": 2459 }, { "epoch": 0.30684794811026567, "grad_norm": 0.0, "learning_rate": 1.6243467850619262e-05, "loss": 1.0168, "step": 2460 }, { "epoch": 0.3069726830485219, "grad_norm": 0.0, "learning_rate": 1.6240311417041274e-05, "loss": 1.0536, "step": 2461 }, { "epoch": 0.3070974179867781, "grad_norm": 0.0, "learning_rate": 1.623715396488714e-05, "loss": 0.9934, "step": 2462 }, { "epoch": 0.3072221529250343, "grad_norm": 0.0, "learning_rate": 1.6233995494672242e-05, "loss": 0.9783, "step": 2463 }, { "epoch": 0.3073468878632905, "grad_norm": 0.0, "learning_rate": 1.6230836006912127e-05, "loss": 1.0294, "step": 2464 }, { "epoch": 0.3074716228015467, "grad_norm": 0.0, "learning_rate": 1.6227675502122492e-05, "loss": 1.0033, "step": 2465 }, { "epoch": 0.30759635773980293, "grad_norm": 0.0, "learning_rate": 1.622451398081922e-05, "loss": 1.0207, "step": 2466 }, { "epoch": 0.30772109267805914, "grad_norm": 0.0, "learning_rate": 1.6221351443518343e-05, "loss": 1.0155, "step": 2467 }, { "epoch": 0.30784582761631535, "grad_norm": 0.0, "learning_rate": 1.621818789073607e-05, "loss": 1.0469, "step": 2468 }, { "epoch": 0.30797056255457156, "grad_norm": 0.0, "learning_rate": 1.621502332298878e-05, "loss": 0.9771, "step": 2469 }, { "epoch": 0.30809529749282777, "grad_norm": 0.0, "learning_rate": 1.6211857740793e-05, "loss": 0.9978, "step": 2470 }, { "epoch": 0.3082200324310839, "grad_norm": 0.0, "learning_rate": 1.620869114466544e-05, "loss": 0.9917, "step": 2471 }, { "epoch": 0.30834476736934013, "grad_norm": 0.0, "learning_rate": 1.6205523535122963e-05, "loss": 1.0526, "step": 2472 }, { "epoch": 0.30846950230759634, "grad_norm": 0.0, "learning_rate": 1.6202354912682602e-05, "loss": 1.06, "step": 2473 }, { "epoch": 0.30859423724585255, "grad_norm": 0.0, "learning_rate": 1.619918527786156e-05, "loss": 1.0306, "step": 2474 }, { "epoch": 0.30871897218410876, "grad_norm": 0.0, "learning_rate": 1.6196014631177203e-05, "loss": 0.975, "step": 2475 }, { "epoch": 0.308843707122365, "grad_norm": 0.0, "learning_rate": 1.6192842973147053e-05, "loss": 1.0581, "step": 2476 }, { "epoch": 0.3089684420606212, "grad_norm": 0.0, "learning_rate": 1.6189670304288814e-05, "loss": 1.0027, "step": 2477 }, { "epoch": 0.3090931769988774, "grad_norm": 0.0, "learning_rate": 1.6186496625120338e-05, "loss": 1.0669, "step": 2478 }, { "epoch": 0.3092179119371336, "grad_norm": 0.0, "learning_rate": 1.6183321936159654e-05, "loss": 0.9937, "step": 2479 }, { "epoch": 0.3093426468753898, "grad_norm": 0.0, "learning_rate": 1.6180146237924945e-05, "loss": 1.0345, "step": 2480 }, { "epoch": 0.309467381813646, "grad_norm": 0.0, "learning_rate": 1.6176969530934573e-05, "loss": 0.9873, "step": 2481 }, { "epoch": 0.30959211675190224, "grad_norm": 0.0, "learning_rate": 1.6173791815707053e-05, "loss": 1.0618, "step": 2482 }, { "epoch": 0.3097168516901584, "grad_norm": 0.0, "learning_rate": 1.6170613092761064e-05, "loss": 0.9951, "step": 2483 }, { "epoch": 0.3098415866284146, "grad_norm": 0.0, "learning_rate": 1.6167433362615463e-05, "loss": 0.9702, "step": 2484 }, { "epoch": 0.3099663215666708, "grad_norm": 0.0, "learning_rate": 1.6164252625789255e-05, "loss": 0.9784, "step": 2485 }, { "epoch": 0.310091056504927, "grad_norm": 0.0, "learning_rate": 1.616107088280162e-05, "loss": 0.994, "step": 2486 }, { "epoch": 0.31021579144318323, "grad_norm": 0.0, "learning_rate": 1.6157888134171896e-05, "loss": 0.9925, "step": 2487 }, { "epoch": 0.31034052638143944, "grad_norm": 0.0, "learning_rate": 1.6154704380419587e-05, "loss": 1.0161, "step": 2488 }, { "epoch": 0.31046526131969565, "grad_norm": 0.0, "learning_rate": 1.615151962206437e-05, "loss": 1.0179, "step": 2489 }, { "epoch": 0.31058999625795186, "grad_norm": 0.0, "learning_rate": 1.6148333859626063e-05, "loss": 1.0141, "step": 2490 }, { "epoch": 0.31071473119620807, "grad_norm": 0.0, "learning_rate": 1.6145147093624677e-05, "loss": 1.0485, "step": 2491 }, { "epoch": 0.3108394661344643, "grad_norm": 0.0, "learning_rate": 1.6141959324580366e-05, "loss": 0.9776, "step": 2492 }, { "epoch": 0.3109642010727205, "grad_norm": 0.0, "learning_rate": 1.613877055301346e-05, "loss": 1.0243, "step": 2493 }, { "epoch": 0.3110889360109767, "grad_norm": 0.0, "learning_rate": 1.6135580779444438e-05, "loss": 1.049, "step": 2494 }, { "epoch": 0.31121367094923286, "grad_norm": 0.0, "learning_rate": 1.6132390004393954e-05, "loss": 1.0525, "step": 2495 }, { "epoch": 0.31133840588748907, "grad_norm": 0.0, "learning_rate": 1.6129198228382826e-05, "loss": 1.0552, "step": 2496 }, { "epoch": 0.3114631408257453, "grad_norm": 0.0, "learning_rate": 1.6126005451932028e-05, "loss": 0.9828, "step": 2497 }, { "epoch": 0.3115878757640015, "grad_norm": 0.0, "learning_rate": 1.6122811675562712e-05, "loss": 1.0339, "step": 2498 }, { "epoch": 0.3117126107022577, "grad_norm": 0.0, "learning_rate": 1.611961689979617e-05, "loss": 1.0209, "step": 2499 }, { "epoch": 0.3118373456405139, "grad_norm": 0.0, "learning_rate": 1.6116421125153876e-05, "loss": 1.0252, "step": 2500 }, { "epoch": 0.3119620805787701, "grad_norm": 0.0, "learning_rate": 1.611322435215746e-05, "loss": 1.036, "step": 2501 }, { "epoch": 0.31208681551702633, "grad_norm": 0.0, "learning_rate": 1.6110026581328715e-05, "loss": 0.9988, "step": 2502 }, { "epoch": 0.31221155045528254, "grad_norm": 0.0, "learning_rate": 1.61068278131896e-05, "loss": 0.9658, "step": 2503 }, { "epoch": 0.31233628539353875, "grad_norm": 0.0, "learning_rate": 1.6103628048262236e-05, "loss": 1.0228, "step": 2504 }, { "epoch": 0.31246102033179496, "grad_norm": 0.0, "learning_rate": 1.61004272870689e-05, "loss": 0.9389, "step": 2505 }, { "epoch": 0.31258575527005117, "grad_norm": 0.0, "learning_rate": 1.6097225530132044e-05, "loss": 1.0081, "step": 2506 }, { "epoch": 0.3127104902083073, "grad_norm": 0.0, "learning_rate": 1.609402277797427e-05, "loss": 1.0306, "step": 2507 }, { "epoch": 0.31283522514656353, "grad_norm": 0.0, "learning_rate": 1.6090819031118344e-05, "loss": 0.9998, "step": 2508 }, { "epoch": 0.31295996008481974, "grad_norm": 0.0, "learning_rate": 1.608761429008721e-05, "loss": 1.0615, "step": 2509 }, { "epoch": 0.31308469502307595, "grad_norm": 0.0, "learning_rate": 1.6084408555403952e-05, "loss": 1.0166, "step": 2510 }, { "epoch": 0.31320942996133216, "grad_norm": 0.0, "learning_rate": 1.608120182759183e-05, "loss": 0.9958, "step": 2511 }, { "epoch": 0.3133341648995884, "grad_norm": 0.0, "learning_rate": 1.6077994107174267e-05, "loss": 1.0546, "step": 2512 }, { "epoch": 0.3134588998378446, "grad_norm": 0.0, "learning_rate": 1.6074785394674835e-05, "loss": 0.9834, "step": 2513 }, { "epoch": 0.3135836347761008, "grad_norm": 0.0, "learning_rate": 1.607157569061729e-05, "loss": 0.9852, "step": 2514 }, { "epoch": 0.313708369714357, "grad_norm": 0.0, "learning_rate": 1.6068364995525522e-05, "loss": 1.0413, "step": 2515 }, { "epoch": 0.3138331046526132, "grad_norm": 0.0, "learning_rate": 1.6065153309923607e-05, "loss": 1.0214, "step": 2516 }, { "epoch": 0.3139578395908694, "grad_norm": 0.0, "learning_rate": 1.606194063433577e-05, "loss": 0.9851, "step": 2517 }, { "epoch": 0.31408257452912564, "grad_norm": 0.0, "learning_rate": 1.60587269692864e-05, "loss": 0.9813, "step": 2518 }, { "epoch": 0.3142073094673818, "grad_norm": 0.0, "learning_rate": 1.605551231530005e-05, "loss": 0.9957, "step": 2519 }, { "epoch": 0.314332044405638, "grad_norm": 0.0, "learning_rate": 1.6052296672901433e-05, "loss": 1.0111, "step": 2520 }, { "epoch": 0.3144567793438942, "grad_norm": 0.0, "learning_rate": 1.6049080042615417e-05, "loss": 0.9929, "step": 2521 }, { "epoch": 0.3145815142821504, "grad_norm": 0.0, "learning_rate": 1.6045862424967046e-05, "loss": 0.9869, "step": 2522 }, { "epoch": 0.31470624922040663, "grad_norm": 0.0, "learning_rate": 1.604264382048151e-05, "loss": 0.9884, "step": 2523 }, { "epoch": 0.31483098415866284, "grad_norm": 0.0, "learning_rate": 1.6039424229684168e-05, "loss": 1.0231, "step": 2524 }, { "epoch": 0.31495571909691905, "grad_norm": 0.0, "learning_rate": 1.6036203653100538e-05, "loss": 1.0537, "step": 2525 }, { "epoch": 0.31508045403517526, "grad_norm": 0.0, "learning_rate": 1.6032982091256302e-05, "loss": 1.0585, "step": 2526 }, { "epoch": 0.31520518897343147, "grad_norm": 0.0, "learning_rate": 1.6029759544677298e-05, "loss": 1.0329, "step": 2527 }, { "epoch": 0.3153299239116877, "grad_norm": 0.0, "learning_rate": 1.6026536013889526e-05, "loss": 1.0321, "step": 2528 }, { "epoch": 0.3154546588499439, "grad_norm": 0.0, "learning_rate": 1.602331149941915e-05, "loss": 1.0478, "step": 2529 }, { "epoch": 0.3155793937882001, "grad_norm": 0.0, "learning_rate": 1.602008600179249e-05, "loss": 1.0217, "step": 2530 }, { "epoch": 0.31570412872645626, "grad_norm": 0.0, "learning_rate": 1.6016859521536024e-05, "loss": 1.0548, "step": 2531 }, { "epoch": 0.31582886366471247, "grad_norm": 0.0, "learning_rate": 1.6013632059176405e-05, "loss": 1.0491, "step": 2532 }, { "epoch": 0.3159535986029687, "grad_norm": 0.0, "learning_rate": 1.601040361524043e-05, "loss": 0.9926, "step": 2533 }, { "epoch": 0.3160783335412249, "grad_norm": 0.0, "learning_rate": 1.6007174190255062e-05, "loss": 1.0202, "step": 2534 }, { "epoch": 0.3162030684794811, "grad_norm": 0.0, "learning_rate": 1.600394378474743e-05, "loss": 0.981, "step": 2535 }, { "epoch": 0.3163278034177373, "grad_norm": 0.0, "learning_rate": 1.6000712399244813e-05, "loss": 1.0019, "step": 2536 }, { "epoch": 0.3164525383559935, "grad_norm": 0.0, "learning_rate": 1.5997480034274652e-05, "loss": 0.9475, "step": 2537 }, { "epoch": 0.31657727329424973, "grad_norm": 0.0, "learning_rate": 1.5994246690364557e-05, "loss": 1.0027, "step": 2538 }, { "epoch": 0.31670200823250594, "grad_norm": 0.0, "learning_rate": 1.5991012368042286e-05, "loss": 1.0111, "step": 2539 }, { "epoch": 0.31682674317076215, "grad_norm": 0.0, "learning_rate": 1.5987777067835762e-05, "loss": 1.0557, "step": 2540 }, { "epoch": 0.31695147810901836, "grad_norm": 0.0, "learning_rate": 1.598454079027307e-05, "loss": 0.9889, "step": 2541 }, { "epoch": 0.31707621304727457, "grad_norm": 0.0, "learning_rate": 1.598130353588246e-05, "loss": 1.0487, "step": 2542 }, { "epoch": 0.3172009479855307, "grad_norm": 0.0, "learning_rate": 1.5978065305192316e-05, "loss": 1.0004, "step": 2543 }, { "epoch": 0.31732568292378693, "grad_norm": 0.0, "learning_rate": 1.5974826098731213e-05, "loss": 0.9892, "step": 2544 }, { "epoch": 0.31745041786204314, "grad_norm": 0.0, "learning_rate": 1.5971585917027864e-05, "loss": 0.9392, "step": 2545 }, { "epoch": 0.31757515280029935, "grad_norm": 0.0, "learning_rate": 1.596834476061115e-05, "loss": 1.0153, "step": 2546 }, { "epoch": 0.31769988773855556, "grad_norm": 0.0, "learning_rate": 1.5965102630010112e-05, "loss": 0.9817, "step": 2547 }, { "epoch": 0.3178246226768118, "grad_norm": 0.0, "learning_rate": 1.596185952575394e-05, "loss": 0.9983, "step": 2548 }, { "epoch": 0.317949357615068, "grad_norm": 0.0, "learning_rate": 1.5958615448372003e-05, "loss": 1.0619, "step": 2549 }, { "epoch": 0.3180740925533242, "grad_norm": 0.0, "learning_rate": 1.5955370398393806e-05, "loss": 1.0442, "step": 2550 }, { "epoch": 0.3181988274915804, "grad_norm": 0.0, "learning_rate": 1.5952124376349025e-05, "loss": 1.0214, "step": 2551 }, { "epoch": 0.3183235624298366, "grad_norm": 0.0, "learning_rate": 1.5948877382767497e-05, "loss": 0.9828, "step": 2552 }, { "epoch": 0.3184482973680928, "grad_norm": 0.0, "learning_rate": 1.5945629418179205e-05, "loss": 1.0358, "step": 2553 }, { "epoch": 0.31857303230634904, "grad_norm": 0.0, "learning_rate": 1.5942380483114305e-05, "loss": 1.0445, "step": 2554 }, { "epoch": 0.3186977672446052, "grad_norm": 0.0, "learning_rate": 1.5939130578103104e-05, "loss": 1.069, "step": 2555 }, { "epoch": 0.3188225021828614, "grad_norm": 0.0, "learning_rate": 1.5935879703676067e-05, "loss": 1.0102, "step": 2556 }, { "epoch": 0.3189472371211176, "grad_norm": 0.0, "learning_rate": 1.5932627860363824e-05, "loss": 0.9692, "step": 2557 }, { "epoch": 0.3190719720593738, "grad_norm": 0.0, "learning_rate": 1.592937504869715e-05, "loss": 1.0272, "step": 2558 }, { "epoch": 0.31919670699763003, "grad_norm": 0.0, "learning_rate": 1.592612126920699e-05, "loss": 1.0056, "step": 2559 }, { "epoch": 0.31932144193588624, "grad_norm": 0.0, "learning_rate": 1.592286652242444e-05, "loss": 1.007, "step": 2560 }, { "epoch": 0.31944617687414245, "grad_norm": 0.0, "learning_rate": 1.591961080888076e-05, "loss": 1.0677, "step": 2561 }, { "epoch": 0.31957091181239866, "grad_norm": 0.0, "learning_rate": 1.5916354129107364e-05, "loss": 1.0062, "step": 2562 }, { "epoch": 0.31969564675065487, "grad_norm": 0.0, "learning_rate": 1.5913096483635827e-05, "loss": 0.9742, "step": 2563 }, { "epoch": 0.3198203816889111, "grad_norm": 0.0, "learning_rate": 1.590983787299787e-05, "loss": 1.0364, "step": 2564 }, { "epoch": 0.3199451166271673, "grad_norm": 0.0, "learning_rate": 1.590657829772539e-05, "loss": 1.0192, "step": 2565 }, { "epoch": 0.3200698515654235, "grad_norm": 0.0, "learning_rate": 1.5903317758350424e-05, "loss": 1.0484, "step": 2566 }, { "epoch": 0.32019458650367966, "grad_norm": 0.0, "learning_rate": 1.5900056255405184e-05, "loss": 1.0025, "step": 2567 }, { "epoch": 0.32031932144193587, "grad_norm": 0.0, "learning_rate": 1.589679378942202e-05, "loss": 0.9388, "step": 2568 }, { "epoch": 0.3204440563801921, "grad_norm": 0.0, "learning_rate": 1.589353036093345e-05, "loss": 1.0127, "step": 2569 }, { "epoch": 0.3205687913184483, "grad_norm": 0.0, "learning_rate": 1.5890265970472153e-05, "loss": 1.0195, "step": 2570 }, { "epoch": 0.3206935262567045, "grad_norm": 0.0, "learning_rate": 1.5887000618570955e-05, "loss": 1.0047, "step": 2571 }, { "epoch": 0.3208182611949607, "grad_norm": 0.0, "learning_rate": 1.5883734305762846e-05, "loss": 1.0053, "step": 2572 }, { "epoch": 0.3209429961332169, "grad_norm": 0.0, "learning_rate": 1.5880467032580974e-05, "loss": 0.9613, "step": 2573 }, { "epoch": 0.32106773107147313, "grad_norm": 0.0, "learning_rate": 1.587719879955863e-05, "loss": 1.0245, "step": 2574 }, { "epoch": 0.32119246600972934, "grad_norm": 0.0, "learning_rate": 1.5873929607229283e-05, "loss": 1.0025, "step": 2575 }, { "epoch": 0.32131720094798555, "grad_norm": 0.0, "learning_rate": 1.5870659456126542e-05, "loss": 1.0279, "step": 2576 }, { "epoch": 0.32144193588624176, "grad_norm": 0.0, "learning_rate": 1.586738834678418e-05, "loss": 1.0534, "step": 2577 }, { "epoch": 0.32156667082449797, "grad_norm": 0.0, "learning_rate": 1.5864116279736124e-05, "loss": 1.031, "step": 2578 }, { "epoch": 0.3216914057627541, "grad_norm": 0.0, "learning_rate": 1.5860843255516457e-05, "loss": 1.0364, "step": 2579 }, { "epoch": 0.32181614070101033, "grad_norm": 0.0, "learning_rate": 1.585756927465942e-05, "loss": 1.0211, "step": 2580 }, { "epoch": 0.32194087563926654, "grad_norm": 0.0, "learning_rate": 1.5854294337699407e-05, "loss": 1.0328, "step": 2581 }, { "epoch": 0.32206561057752275, "grad_norm": 0.0, "learning_rate": 1.5851018445170973e-05, "loss": 1.0587, "step": 2582 }, { "epoch": 0.32219034551577896, "grad_norm": 0.0, "learning_rate": 1.5847741597608825e-05, "loss": 1.0187, "step": 2583 }, { "epoch": 0.3223150804540352, "grad_norm": 0.0, "learning_rate": 1.584446379554783e-05, "loss": 0.9978, "step": 2584 }, { "epoch": 0.3224398153922914, "grad_norm": 0.0, "learning_rate": 1.5841185039523004e-05, "loss": 1.0153, "step": 2585 }, { "epoch": 0.3225645503305476, "grad_norm": 0.0, "learning_rate": 1.5837905330069525e-05, "loss": 1.0243, "step": 2586 }, { "epoch": 0.3226892852688038, "grad_norm": 0.0, "learning_rate": 1.583462466772272e-05, "loss": 1.0225, "step": 2587 }, { "epoch": 0.32281402020706, "grad_norm": 0.0, "learning_rate": 1.5831343053018082e-05, "loss": 1.0413, "step": 2588 }, { "epoch": 0.3229387551453162, "grad_norm": 0.0, "learning_rate": 1.582806048649125e-05, "loss": 1.0011, "step": 2589 }, { "epoch": 0.32306349008357244, "grad_norm": 0.0, "learning_rate": 1.5824776968678024e-05, "loss": 0.9667, "step": 2590 }, { "epoch": 0.3231882250218286, "grad_norm": 0.0, "learning_rate": 1.5821492500114353e-05, "loss": 0.975, "step": 2591 }, { "epoch": 0.3233129599600848, "grad_norm": 0.0, "learning_rate": 1.5818207081336345e-05, "loss": 1.0046, "step": 2592 }, { "epoch": 0.323437694898341, "grad_norm": 0.0, "learning_rate": 1.5814920712880267e-05, "loss": 1.0814, "step": 2593 }, { "epoch": 0.3235624298365972, "grad_norm": 0.0, "learning_rate": 1.5811633395282538e-05, "loss": 0.9965, "step": 2594 }, { "epoch": 0.32368716477485343, "grad_norm": 0.0, "learning_rate": 1.5808345129079726e-05, "loss": 0.9569, "step": 2595 }, { "epoch": 0.32381189971310964, "grad_norm": 0.0, "learning_rate": 1.580505591480856e-05, "loss": 1.0159, "step": 2596 }, { "epoch": 0.32393663465136585, "grad_norm": 0.0, "learning_rate": 1.5801765753005923e-05, "loss": 1.0349, "step": 2597 }, { "epoch": 0.32406136958962206, "grad_norm": 0.0, "learning_rate": 1.579847464420886e-05, "loss": 1.0623, "step": 2598 }, { "epoch": 0.32418610452787827, "grad_norm": 0.0, "learning_rate": 1.5795182588954553e-05, "loss": 1.0163, "step": 2599 }, { "epoch": 0.3243108394661345, "grad_norm": 0.0, "learning_rate": 1.5791889587780347e-05, "loss": 1.0243, "step": 2600 }, { "epoch": 0.3244355744043907, "grad_norm": 0.0, "learning_rate": 1.578859564122375e-05, "loss": 1.0512, "step": 2601 }, { "epoch": 0.3245603093426469, "grad_norm": 0.0, "learning_rate": 1.578530074982242e-05, "loss": 0.9724, "step": 2602 }, { "epoch": 0.32468504428090306, "grad_norm": 0.0, "learning_rate": 1.5782004914114155e-05, "loss": 1.0165, "step": 2603 }, { "epoch": 0.32480977921915927, "grad_norm": 0.0, "learning_rate": 1.5778708134636928e-05, "loss": 0.9998, "step": 2604 }, { "epoch": 0.3249345141574155, "grad_norm": 0.0, "learning_rate": 1.577541041192885e-05, "loss": 1.0343, "step": 2605 }, { "epoch": 0.3250592490956717, "grad_norm": 0.0, "learning_rate": 1.57721117465282e-05, "loss": 1.0035, "step": 2606 }, { "epoch": 0.3251839840339279, "grad_norm": 0.0, "learning_rate": 1.5768812138973394e-05, "loss": 1.0304, "step": 2607 }, { "epoch": 0.3253087189721841, "grad_norm": 0.0, "learning_rate": 1.576551158980302e-05, "loss": 1.022, "step": 2608 }, { "epoch": 0.3254334539104403, "grad_norm": 0.0, "learning_rate": 1.5762210099555804e-05, "loss": 1.0522, "step": 2609 }, { "epoch": 0.32555818884869653, "grad_norm": 0.0, "learning_rate": 1.5758907668770637e-05, "loss": 1.0108, "step": 2610 }, { "epoch": 0.32568292378695274, "grad_norm": 0.0, "learning_rate": 1.5755604297986556e-05, "loss": 1.0151, "step": 2611 }, { "epoch": 0.32580765872520895, "grad_norm": 0.0, "learning_rate": 1.5752299987742757e-05, "loss": 1.0331, "step": 2612 }, { "epoch": 0.32593239366346516, "grad_norm": 0.0, "learning_rate": 1.5748994738578582e-05, "loss": 0.9443, "step": 2613 }, { "epoch": 0.32605712860172137, "grad_norm": 0.0, "learning_rate": 1.5745688551033538e-05, "loss": 0.9856, "step": 2614 }, { "epoch": 0.3261818635399775, "grad_norm": 0.0, "learning_rate": 1.574238142564727e-05, "loss": 1.0215, "step": 2615 }, { "epoch": 0.32630659847823373, "grad_norm": 0.0, "learning_rate": 1.5739073362959594e-05, "loss": 0.9971, "step": 2616 }, { "epoch": 0.32643133341648994, "grad_norm": 0.0, "learning_rate": 1.573576436351046e-05, "loss": 1.0112, "step": 2617 }, { "epoch": 0.32655606835474615, "grad_norm": 0.0, "learning_rate": 1.573245442783999e-05, "loss": 0.9812, "step": 2618 }, { "epoch": 0.32668080329300236, "grad_norm": 0.0, "learning_rate": 1.5729143556488438e-05, "loss": 1.0275, "step": 2619 }, { "epoch": 0.3268055382312586, "grad_norm": 0.0, "learning_rate": 1.572583174999623e-05, "loss": 1.0038, "step": 2620 }, { "epoch": 0.3269302731695148, "grad_norm": 0.0, "learning_rate": 1.572251900890393e-05, "loss": 1.007, "step": 2621 }, { "epoch": 0.327055008107771, "grad_norm": 0.0, "learning_rate": 1.5719205333752263e-05, "loss": 0.9774, "step": 2622 }, { "epoch": 0.3271797430460272, "grad_norm": 0.0, "learning_rate": 1.5715890725082106e-05, "loss": 1.0112, "step": 2623 }, { "epoch": 0.3273044779842834, "grad_norm": 0.0, "learning_rate": 1.5712575183434486e-05, "loss": 0.9699, "step": 2624 }, { "epoch": 0.3274292129225396, "grad_norm": 0.0, "learning_rate": 1.5709258709350585e-05, "loss": 0.9986, "step": 2625 }, { "epoch": 0.32755394786079584, "grad_norm": 0.0, "learning_rate": 1.570594130337173e-05, "loss": 0.9975, "step": 2626 }, { "epoch": 0.327678682799052, "grad_norm": 0.0, "learning_rate": 1.570262296603941e-05, "loss": 1.0167, "step": 2627 }, { "epoch": 0.3278034177373082, "grad_norm": 0.0, "learning_rate": 1.569930369789526e-05, "loss": 1.0209, "step": 2628 }, { "epoch": 0.3279281526755644, "grad_norm": 0.0, "learning_rate": 1.5695983499481065e-05, "loss": 0.9849, "step": 2629 }, { "epoch": 0.3280528876138206, "grad_norm": 0.0, "learning_rate": 1.5692662371338772e-05, "loss": 0.9992, "step": 2630 }, { "epoch": 0.32817762255207683, "grad_norm": 0.0, "learning_rate": 1.5689340314010467e-05, "loss": 1.0177, "step": 2631 }, { "epoch": 0.32830235749033304, "grad_norm": 0.0, "learning_rate": 1.5686017328038393e-05, "loss": 0.983, "step": 2632 }, { "epoch": 0.32842709242858925, "grad_norm": 0.0, "learning_rate": 1.568269341396495e-05, "loss": 0.985, "step": 2633 }, { "epoch": 0.32855182736684546, "grad_norm": 0.0, "learning_rate": 1.567936857233268e-05, "loss": 0.9748, "step": 2634 }, { "epoch": 0.32867656230510167, "grad_norm": 0.0, "learning_rate": 1.567604280368429e-05, "loss": 0.9637, "step": 2635 }, { "epoch": 0.3288012972433579, "grad_norm": 0.0, "learning_rate": 1.5672716108562614e-05, "loss": 1.0336, "step": 2636 }, { "epoch": 0.3289260321816141, "grad_norm": 0.0, "learning_rate": 1.5669388487510663e-05, "loss": 1.0201, "step": 2637 }, { "epoch": 0.3290507671198703, "grad_norm": 0.0, "learning_rate": 1.566605994107159e-05, "loss": 0.9937, "step": 2638 }, { "epoch": 0.32917550205812646, "grad_norm": 0.0, "learning_rate": 1.5662730469788688e-05, "loss": 1.0079, "step": 2639 }, { "epoch": 0.32930023699638267, "grad_norm": 0.0, "learning_rate": 1.565940007420542e-05, "loss": 1.0063, "step": 2640 }, { "epoch": 0.3294249719346389, "grad_norm": 0.0, "learning_rate": 1.5656068754865388e-05, "loss": 1.0341, "step": 2641 }, { "epoch": 0.3295497068728951, "grad_norm": 0.0, "learning_rate": 1.5652736512312345e-05, "loss": 1.0027, "step": 2642 }, { "epoch": 0.3296744418111513, "grad_norm": 0.0, "learning_rate": 1.5649403347090196e-05, "loss": 1.0074, "step": 2643 }, { "epoch": 0.3297991767494075, "grad_norm": 0.0, "learning_rate": 1.5646069259743007e-05, "loss": 1.0157, "step": 2644 }, { "epoch": 0.3299239116876637, "grad_norm": 0.0, "learning_rate": 1.564273425081497e-05, "loss": 1.0167, "step": 2645 }, { "epoch": 0.33004864662591993, "grad_norm": 0.0, "learning_rate": 1.563939832085046e-05, "loss": 0.9613, "step": 2646 }, { "epoch": 0.33017338156417614, "grad_norm": 0.0, "learning_rate": 1.5636061470393968e-05, "loss": 1.0092, "step": 2647 }, { "epoch": 0.33029811650243235, "grad_norm": 0.0, "learning_rate": 1.5632723699990165e-05, "loss": 0.9869, "step": 2648 }, { "epoch": 0.33042285144068856, "grad_norm": 0.0, "learning_rate": 1.5629385010183852e-05, "loss": 0.9851, "step": 2649 }, { "epoch": 0.33054758637894477, "grad_norm": 0.0, "learning_rate": 1.5626045401519986e-05, "loss": 0.9859, "step": 2650 }, { "epoch": 0.3306723213172009, "grad_norm": 0.0, "learning_rate": 1.562270487454368e-05, "loss": 0.9885, "step": 2651 }, { "epoch": 0.33079705625545713, "grad_norm": 0.0, "learning_rate": 1.5619363429800195e-05, "loss": 1.0053, "step": 2652 }, { "epoch": 0.33092179119371334, "grad_norm": 0.0, "learning_rate": 1.561602106783493e-05, "loss": 1.0278, "step": 2653 }, { "epoch": 0.33104652613196955, "grad_norm": 0.0, "learning_rate": 1.5612677789193452e-05, "loss": 1.0602, "step": 2654 }, { "epoch": 0.33117126107022576, "grad_norm": 0.0, "learning_rate": 1.5609333594421458e-05, "loss": 1.0249, "step": 2655 }, { "epoch": 0.331295996008482, "grad_norm": 0.0, "learning_rate": 1.5605988484064818e-05, "loss": 1.0241, "step": 2656 }, { "epoch": 0.3314207309467382, "grad_norm": 0.0, "learning_rate": 1.5602642458669527e-05, "loss": 0.9801, "step": 2657 }, { "epoch": 0.3315454658849944, "grad_norm": 0.0, "learning_rate": 1.559929551878175e-05, "loss": 1.0203, "step": 2658 }, { "epoch": 0.3316702008232506, "grad_norm": 0.0, "learning_rate": 1.559594766494778e-05, "loss": 1.0043, "step": 2659 }, { "epoch": 0.3317949357615068, "grad_norm": 0.0, "learning_rate": 1.5592598897714078e-05, "loss": 0.9899, "step": 2660 }, { "epoch": 0.331919670699763, "grad_norm": 0.0, "learning_rate": 1.558924921762725e-05, "loss": 0.97, "step": 2661 }, { "epoch": 0.33204440563801924, "grad_norm": 0.0, "learning_rate": 1.5585898625234047e-05, "loss": 0.971, "step": 2662 }, { "epoch": 0.3321691405762754, "grad_norm": 0.0, "learning_rate": 1.5582547121081368e-05, "loss": 1.021, "step": 2663 }, { "epoch": 0.3322938755145316, "grad_norm": 0.0, "learning_rate": 1.5579194705716263e-05, "loss": 1.0446, "step": 2664 }, { "epoch": 0.3324186104527878, "grad_norm": 0.0, "learning_rate": 1.557584137968593e-05, "loss": 1.0086, "step": 2665 }, { "epoch": 0.332543345391044, "grad_norm": 0.0, "learning_rate": 1.5572487143537717e-05, "loss": 0.9814, "step": 2666 }, { "epoch": 0.33266808032930023, "grad_norm": 0.0, "learning_rate": 1.556913199781912e-05, "loss": 0.9872, "step": 2667 }, { "epoch": 0.33279281526755644, "grad_norm": 0.0, "learning_rate": 1.556577594307779e-05, "loss": 0.9912, "step": 2668 }, { "epoch": 0.33291755020581265, "grad_norm": 0.0, "learning_rate": 1.556241897986151e-05, "loss": 0.9919, "step": 2669 }, { "epoch": 0.33304228514406886, "grad_norm": 0.0, "learning_rate": 1.5559061108718225e-05, "loss": 0.9407, "step": 2670 }, { "epoch": 0.33316702008232507, "grad_norm": 0.0, "learning_rate": 1.5555702330196024e-05, "loss": 1.0375, "step": 2671 }, { "epoch": 0.3332917550205813, "grad_norm": 0.0, "learning_rate": 1.5552342644843142e-05, "loss": 1.0659, "step": 2672 }, { "epoch": 0.3334164899588375, "grad_norm": 0.0, "learning_rate": 1.554898205320797e-05, "loss": 1.0327, "step": 2673 }, { "epoch": 0.3335412248970937, "grad_norm": 0.0, "learning_rate": 1.5545620555839036e-05, "loss": 0.9462, "step": 2674 }, { "epoch": 0.33366595983534986, "grad_norm": 0.0, "learning_rate": 1.5542258153285025e-05, "loss": 0.9917, "step": 2675 }, { "epoch": 0.33379069477360607, "grad_norm": 0.0, "learning_rate": 1.553889484609476e-05, "loss": 0.9822, "step": 2676 }, { "epoch": 0.3339154297118623, "grad_norm": 0.0, "learning_rate": 1.5535530634817227e-05, "loss": 1.0097, "step": 2677 }, { "epoch": 0.3340401646501185, "grad_norm": 0.0, "learning_rate": 1.5532165520001537e-05, "loss": 1.0045, "step": 2678 }, { "epoch": 0.3341648995883747, "grad_norm": 0.0, "learning_rate": 1.5528799502196975e-05, "loss": 0.9702, "step": 2679 }, { "epoch": 0.3342896345266309, "grad_norm": 0.0, "learning_rate": 1.552543258195295e-05, "loss": 1.0045, "step": 2680 }, { "epoch": 0.3344143694648871, "grad_norm": 0.0, "learning_rate": 1.5522064759819037e-05, "loss": 0.9841, "step": 2681 }, { "epoch": 0.33453910440314333, "grad_norm": 0.0, "learning_rate": 1.551869603634494e-05, "loss": 1.0172, "step": 2682 }, { "epoch": 0.33466383934139954, "grad_norm": 0.0, "learning_rate": 1.5515326412080527e-05, "loss": 1.0014, "step": 2683 }, { "epoch": 0.33478857427965575, "grad_norm": 0.0, "learning_rate": 1.5511955887575803e-05, "loss": 1.0391, "step": 2684 }, { "epoch": 0.33491330921791196, "grad_norm": 0.0, "learning_rate": 1.5508584463380923e-05, "loss": 0.9775, "step": 2685 }, { "epoch": 0.33503804415616817, "grad_norm": 0.0, "learning_rate": 1.5505212140046186e-05, "loss": 1.0322, "step": 2686 }, { "epoch": 0.3351627790944243, "grad_norm": 0.0, "learning_rate": 1.5501838918122043e-05, "loss": 1.007, "step": 2687 }, { "epoch": 0.33528751403268053, "grad_norm": 0.0, "learning_rate": 1.5498464798159088e-05, "loss": 0.9868, "step": 2688 }, { "epoch": 0.33541224897093674, "grad_norm": 0.0, "learning_rate": 1.5495089780708062e-05, "loss": 1.0296, "step": 2689 }, { "epoch": 0.33553698390919295, "grad_norm": 0.0, "learning_rate": 1.5491713866319852e-05, "loss": 1.0114, "step": 2690 }, { "epoch": 0.33566171884744916, "grad_norm": 0.0, "learning_rate": 1.5488337055545497e-05, "loss": 1.0364, "step": 2691 }, { "epoch": 0.3357864537857054, "grad_norm": 0.0, "learning_rate": 1.5484959348936174e-05, "loss": 0.9922, "step": 2692 }, { "epoch": 0.3359111887239616, "grad_norm": 0.0, "learning_rate": 1.5481580747043208e-05, "loss": 1.045, "step": 2693 }, { "epoch": 0.3360359236622178, "grad_norm": 0.0, "learning_rate": 1.5478201250418074e-05, "loss": 0.9512, "step": 2694 }, { "epoch": 0.336160658600474, "grad_norm": 0.0, "learning_rate": 1.5474820859612394e-05, "loss": 0.9585, "step": 2695 }, { "epoch": 0.3362853935387302, "grad_norm": 0.0, "learning_rate": 1.547143957517793e-05, "loss": 1.0041, "step": 2696 }, { "epoch": 0.3364101284769864, "grad_norm": 0.0, "learning_rate": 1.546805739766659e-05, "loss": 1.0173, "step": 2697 }, { "epoch": 0.33653486341524264, "grad_norm": 0.0, "learning_rate": 1.5464674327630437e-05, "loss": 1.031, "step": 2698 }, { "epoch": 0.3366595983534988, "grad_norm": 0.0, "learning_rate": 1.546129036562167e-05, "loss": 1.0128, "step": 2699 }, { "epoch": 0.336784333291755, "grad_norm": 0.0, "learning_rate": 1.5457905512192634e-05, "loss": 0.9932, "step": 2700 }, { "epoch": 0.3369090682300112, "grad_norm": 0.0, "learning_rate": 1.5454519767895826e-05, "loss": 1.0376, "step": 2701 }, { "epoch": 0.3370338031682674, "grad_norm": 0.0, "learning_rate": 1.5451133133283886e-05, "loss": 0.9697, "step": 2702 }, { "epoch": 0.33715853810652363, "grad_norm": 0.0, "learning_rate": 1.5447745608909595e-05, "loss": 0.9579, "step": 2703 }, { "epoch": 0.33728327304477984, "grad_norm": 0.0, "learning_rate": 1.5444357195325885e-05, "loss": 0.9458, "step": 2704 }, { "epoch": 0.33740800798303605, "grad_norm": 0.0, "learning_rate": 1.5440967893085827e-05, "loss": 0.967, "step": 2705 }, { "epoch": 0.33753274292129226, "grad_norm": 0.0, "learning_rate": 1.5437577702742646e-05, "loss": 1.0007, "step": 2706 }, { "epoch": 0.33765747785954847, "grad_norm": 0.0, "learning_rate": 1.54341866248497e-05, "loss": 1.0336, "step": 2707 }, { "epoch": 0.3377822127978047, "grad_norm": 0.0, "learning_rate": 1.5430794659960505e-05, "loss": 1.0902, "step": 2708 }, { "epoch": 0.3379069477360609, "grad_norm": 0.0, "learning_rate": 1.5427401808628714e-05, "loss": 0.964, "step": 2709 }, { "epoch": 0.3380316826743171, "grad_norm": 0.0, "learning_rate": 1.542400807140812e-05, "loss": 0.9523, "step": 2710 }, { "epoch": 0.33815641761257326, "grad_norm": 0.0, "learning_rate": 1.5420613448852667e-05, "loss": 0.9623, "step": 2711 }, { "epoch": 0.33828115255082947, "grad_norm": 0.0, "learning_rate": 1.541721794151645e-05, "loss": 0.9673, "step": 2712 }, { "epoch": 0.3384058874890857, "grad_norm": 0.0, "learning_rate": 1.5413821549953697e-05, "loss": 1.0096, "step": 2713 }, { "epoch": 0.3385306224273419, "grad_norm": 0.0, "learning_rate": 1.541042427471879e-05, "loss": 0.9572, "step": 2714 }, { "epoch": 0.3386553573655981, "grad_norm": 0.0, "learning_rate": 1.5407026116366238e-05, "loss": 1.0304, "step": 2715 }, { "epoch": 0.3387800923038543, "grad_norm": 0.0, "learning_rate": 1.5403627075450717e-05, "loss": 1.0092, "step": 2716 }, { "epoch": 0.3389048272421105, "grad_norm": 0.0, "learning_rate": 1.540022715252703e-05, "loss": 1.0069, "step": 2717 }, { "epoch": 0.33902956218036673, "grad_norm": 0.0, "learning_rate": 1.5396826348150134e-05, "loss": 0.9823, "step": 2718 }, { "epoch": 0.33915429711862294, "grad_norm": 0.0, "learning_rate": 1.539342466287513e-05, "loss": 1.0404, "step": 2719 }, { "epoch": 0.33927903205687915, "grad_norm": 0.0, "learning_rate": 1.5390022097257248e-05, "loss": 1.0079, "step": 2720 }, { "epoch": 0.33940376699513536, "grad_norm": 0.0, "learning_rate": 1.538661865185188e-05, "loss": 0.9925, "step": 2721 }, { "epoch": 0.33952850193339157, "grad_norm": 0.0, "learning_rate": 1.538321432721455e-05, "loss": 1.0124, "step": 2722 }, { "epoch": 0.3396532368716477, "grad_norm": 0.0, "learning_rate": 1.5379809123900936e-05, "loss": 1.035, "step": 2723 }, { "epoch": 0.33977797180990393, "grad_norm": 0.0, "learning_rate": 1.5376403042466846e-05, "loss": 1.0668, "step": 2724 }, { "epoch": 0.33990270674816014, "grad_norm": 0.0, "learning_rate": 1.5372996083468242e-05, "loss": 1.0162, "step": 2725 }, { "epoch": 0.34002744168641635, "grad_norm": 0.0, "learning_rate": 1.5369588247461224e-05, "loss": 0.9666, "step": 2726 }, { "epoch": 0.34015217662467256, "grad_norm": 0.0, "learning_rate": 1.5366179535002033e-05, "loss": 1.0114, "step": 2727 }, { "epoch": 0.3402769115629288, "grad_norm": 0.0, "learning_rate": 1.5362769946647068e-05, "loss": 0.9696, "step": 2728 }, { "epoch": 0.340401646501185, "grad_norm": 0.0, "learning_rate": 1.535935948295285e-05, "loss": 1.0251, "step": 2729 }, { "epoch": 0.3405263814394412, "grad_norm": 0.0, "learning_rate": 1.5355948144476056e-05, "loss": 0.943, "step": 2730 }, { "epoch": 0.3406511163776974, "grad_norm": 0.0, "learning_rate": 1.5352535931773506e-05, "loss": 0.9749, "step": 2731 }, { "epoch": 0.3407758513159536, "grad_norm": 0.0, "learning_rate": 1.534912284540215e-05, "loss": 0.9967, "step": 2732 }, { "epoch": 0.3409005862542098, "grad_norm": 0.0, "learning_rate": 1.5345708885919095e-05, "loss": 1.0092, "step": 2733 }, { "epoch": 0.34102532119246604, "grad_norm": 0.0, "learning_rate": 1.534229405388159e-05, "loss": 1.0158, "step": 2734 }, { "epoch": 0.3411500561307222, "grad_norm": 0.0, "learning_rate": 1.533887834984701e-05, "loss": 0.9763, "step": 2735 }, { "epoch": 0.3412747910689784, "grad_norm": 0.0, "learning_rate": 1.5335461774372898e-05, "loss": 1.0088, "step": 2736 }, { "epoch": 0.3413995260072346, "grad_norm": 0.0, "learning_rate": 1.5332044328016916e-05, "loss": 0.9853, "step": 2737 }, { "epoch": 0.3415242609454908, "grad_norm": 0.0, "learning_rate": 1.5328626011336874e-05, "loss": 1.0425, "step": 2738 }, { "epoch": 0.34164899588374703, "grad_norm": 0.0, "learning_rate": 1.532520682489074e-05, "loss": 1.0607, "step": 2739 }, { "epoch": 0.34177373082200324, "grad_norm": 0.0, "learning_rate": 1.5321786769236604e-05, "loss": 1.0344, "step": 2740 }, { "epoch": 0.34189846576025945, "grad_norm": 0.0, "learning_rate": 1.5318365844932702e-05, "loss": 1.0292, "step": 2741 }, { "epoch": 0.34202320069851566, "grad_norm": 0.0, "learning_rate": 1.531494405253742e-05, "loss": 0.9895, "step": 2742 }, { "epoch": 0.34214793563677187, "grad_norm": 0.0, "learning_rate": 1.5311521392609283e-05, "loss": 1.0076, "step": 2743 }, { "epoch": 0.3422726705750281, "grad_norm": 0.0, "learning_rate": 1.530809786570695e-05, "loss": 1.0054, "step": 2744 }, { "epoch": 0.3423974055132843, "grad_norm": 0.0, "learning_rate": 1.5304673472389227e-05, "loss": 1.014, "step": 2745 }, { "epoch": 0.3425221404515405, "grad_norm": 0.0, "learning_rate": 1.5301248213215065e-05, "loss": 1.0007, "step": 2746 }, { "epoch": 0.34264687538979666, "grad_norm": 0.0, "learning_rate": 1.5297822088743552e-05, "loss": 0.9896, "step": 2747 }, { "epoch": 0.34277161032805287, "grad_norm": 0.0, "learning_rate": 1.5294395099533913e-05, "loss": 1.0277, "step": 2748 }, { "epoch": 0.3428963452663091, "grad_norm": 0.0, "learning_rate": 1.5290967246145528e-05, "loss": 0.9899, "step": 2749 }, { "epoch": 0.3430210802045653, "grad_norm": 0.0, "learning_rate": 1.52875385291379e-05, "loss": 1.0151, "step": 2750 }, { "epoch": 0.3431458151428215, "grad_norm": 0.0, "learning_rate": 1.5284108949070687e-05, "loss": 0.9973, "step": 2751 }, { "epoch": 0.3432705500810777, "grad_norm": 0.0, "learning_rate": 1.528067850650368e-05, "loss": 0.9389, "step": 2752 }, { "epoch": 0.3433952850193339, "grad_norm": 0.0, "learning_rate": 1.5277247201996818e-05, "loss": 0.9974, "step": 2753 }, { "epoch": 0.34352001995759013, "grad_norm": 0.0, "learning_rate": 1.5273815036110177e-05, "loss": 1.0054, "step": 2754 }, { "epoch": 0.34364475489584634, "grad_norm": 0.0, "learning_rate": 1.5270382009403963e-05, "loss": 0.9171, "step": 2755 }, { "epoch": 0.34376948983410255, "grad_norm": 0.0, "learning_rate": 1.5266948122438544e-05, "loss": 1.0608, "step": 2756 }, { "epoch": 0.34389422477235876, "grad_norm": 0.0, "learning_rate": 1.5263513375774413e-05, "loss": 1.0108, "step": 2757 }, { "epoch": 0.34401895971061497, "grad_norm": 0.0, "learning_rate": 1.5260077769972204e-05, "loss": 0.9701, "step": 2758 }, { "epoch": 0.3441436946488711, "grad_norm": 0.0, "learning_rate": 1.52566413055927e-05, "loss": 0.9974, "step": 2759 }, { "epoch": 0.34426842958712733, "grad_norm": 0.0, "learning_rate": 1.5253203983196812e-05, "loss": 1.0154, "step": 2760 }, { "epoch": 0.34439316452538354, "grad_norm": 0.0, "learning_rate": 1.5249765803345602e-05, "loss": 1.0549, "step": 2761 }, { "epoch": 0.34451789946363975, "grad_norm": 0.0, "learning_rate": 1.524632676660027e-05, "loss": 1.0566, "step": 2762 }, { "epoch": 0.34464263440189596, "grad_norm": 0.0, "learning_rate": 1.5242886873522149e-05, "loss": 0.9954, "step": 2763 }, { "epoch": 0.3447673693401522, "grad_norm": 0.0, "learning_rate": 1.5239446124672717e-05, "loss": 0.9936, "step": 2764 }, { "epoch": 0.3448921042784084, "grad_norm": 0.0, "learning_rate": 1.5236004520613592e-05, "loss": 0.975, "step": 2765 }, { "epoch": 0.3450168392166646, "grad_norm": 0.0, "learning_rate": 1.5232562061906533e-05, "loss": 0.9909, "step": 2766 }, { "epoch": 0.3451415741549208, "grad_norm": 0.0, "learning_rate": 1.5229118749113432e-05, "loss": 1.08, "step": 2767 }, { "epoch": 0.345266309093177, "grad_norm": 0.0, "learning_rate": 1.5225674582796321e-05, "loss": 0.9894, "step": 2768 }, { "epoch": 0.3453910440314332, "grad_norm": 0.0, "learning_rate": 1.5222229563517385e-05, "loss": 1.0201, "step": 2769 }, { "epoch": 0.34551577896968944, "grad_norm": 0.0, "learning_rate": 1.5218783691838935e-05, "loss": 1.0026, "step": 2770 }, { "epoch": 0.3456405139079456, "grad_norm": 0.0, "learning_rate": 1.5215336968323415e-05, "loss": 0.9778, "step": 2771 }, { "epoch": 0.3457652488462018, "grad_norm": 0.0, "learning_rate": 1.5211889393533427e-05, "loss": 0.9991, "step": 2772 }, { "epoch": 0.345889983784458, "grad_norm": 0.0, "learning_rate": 1.52084409680317e-05, "loss": 1.0153, "step": 2773 }, { "epoch": 0.3460147187227142, "grad_norm": 0.0, "learning_rate": 1.5204991692381099e-05, "loss": 0.9828, "step": 2774 }, { "epoch": 0.34613945366097043, "grad_norm": 0.0, "learning_rate": 1.5201541567144639e-05, "loss": 1.0393, "step": 2775 }, { "epoch": 0.34626418859922664, "grad_norm": 0.0, "learning_rate": 1.5198090592885469e-05, "loss": 0.9762, "step": 2776 }, { "epoch": 0.34638892353748285, "grad_norm": 0.0, "learning_rate": 1.5194638770166865e-05, "loss": 1.0246, "step": 2777 }, { "epoch": 0.34651365847573906, "grad_norm": 0.0, "learning_rate": 1.5191186099552261e-05, "loss": 1.0668, "step": 2778 }, { "epoch": 0.34663839341399527, "grad_norm": 0.0, "learning_rate": 1.5187732581605217e-05, "loss": 1.0404, "step": 2779 }, { "epoch": 0.3467631283522515, "grad_norm": 0.0, "learning_rate": 1.5184278216889431e-05, "loss": 0.9436, "step": 2780 }, { "epoch": 0.3468878632905077, "grad_norm": 0.0, "learning_rate": 1.5180823005968746e-05, "loss": 1.046, "step": 2781 }, { "epoch": 0.3470125982287639, "grad_norm": 0.0, "learning_rate": 1.517736694940714e-05, "loss": 0.9809, "step": 2782 }, { "epoch": 0.34713733316702006, "grad_norm": 0.0, "learning_rate": 1.5173910047768728e-05, "loss": 0.99, "step": 2783 }, { "epoch": 0.34726206810527627, "grad_norm": 0.0, "learning_rate": 1.517045230161776e-05, "loss": 1.009, "step": 2784 }, { "epoch": 0.3473868030435325, "grad_norm": 0.0, "learning_rate": 1.5166993711518631e-05, "loss": 0.9996, "step": 2785 }, { "epoch": 0.3475115379817887, "grad_norm": 0.0, "learning_rate": 1.5163534278035869e-05, "loss": 0.9775, "step": 2786 }, { "epoch": 0.3476362729200449, "grad_norm": 0.0, "learning_rate": 1.5160074001734142e-05, "loss": 1.0312, "step": 2787 }, { "epoch": 0.3477610078583011, "grad_norm": 0.0, "learning_rate": 1.515661288317825e-05, "loss": 1.0535, "step": 2788 }, { "epoch": 0.3478857427965573, "grad_norm": 0.0, "learning_rate": 1.5153150922933141e-05, "loss": 0.977, "step": 2789 }, { "epoch": 0.34801047773481353, "grad_norm": 0.0, "learning_rate": 1.514968812156389e-05, "loss": 0.9796, "step": 2790 }, { "epoch": 0.34813521267306974, "grad_norm": 0.0, "learning_rate": 1.5146224479635715e-05, "loss": 1.0475, "step": 2791 }, { "epoch": 0.34825994761132595, "grad_norm": 0.0, "learning_rate": 1.5142759997713969e-05, "loss": 1.0165, "step": 2792 }, { "epoch": 0.34838468254958216, "grad_norm": 0.0, "learning_rate": 1.5139294676364146e-05, "loss": 0.9553, "step": 2793 }, { "epoch": 0.34850941748783837, "grad_norm": 0.0, "learning_rate": 1.5135828516151868e-05, "loss": 0.9831, "step": 2794 }, { "epoch": 0.3486341524260945, "grad_norm": 0.0, "learning_rate": 1.5132361517642902e-05, "loss": 1.0291, "step": 2795 }, { "epoch": 0.34875888736435073, "grad_norm": 0.0, "learning_rate": 1.5128893681403152e-05, "loss": 1.0299, "step": 2796 }, { "epoch": 0.34888362230260694, "grad_norm": 0.0, "learning_rate": 1.5125425007998653e-05, "loss": 1.033, "step": 2797 }, { "epoch": 0.34900835724086315, "grad_norm": 0.0, "learning_rate": 1.5121955497995581e-05, "loss": 0.9655, "step": 2798 }, { "epoch": 0.34913309217911936, "grad_norm": 0.0, "learning_rate": 1.5118485151960252e-05, "loss": 0.9885, "step": 2799 }, { "epoch": 0.3492578271173756, "grad_norm": 0.0, "learning_rate": 1.5115013970459108e-05, "loss": 1.0125, "step": 2800 }, { "epoch": 0.3493825620556318, "grad_norm": 0.0, "learning_rate": 1.5111541954058733e-05, "loss": 1.0099, "step": 2801 }, { "epoch": 0.349507296993888, "grad_norm": 0.0, "learning_rate": 1.5108069103325852e-05, "loss": 0.9761, "step": 2802 }, { "epoch": 0.3496320319321442, "grad_norm": 0.0, "learning_rate": 1.5104595418827317e-05, "loss": 0.984, "step": 2803 }, { "epoch": 0.3497567668704004, "grad_norm": 0.0, "learning_rate": 1.5101120901130126e-05, "loss": 0.9781, "step": 2804 }, { "epoch": 0.3498815018086566, "grad_norm": 0.0, "learning_rate": 1.5097645550801404e-05, "loss": 0.9944, "step": 2805 }, { "epoch": 0.35000623674691284, "grad_norm": 0.0, "learning_rate": 1.509416936840842e-05, "loss": 0.927, "step": 2806 }, { "epoch": 0.350130971685169, "grad_norm": 0.0, "learning_rate": 1.509069235451857e-05, "loss": 1.0058, "step": 2807 }, { "epoch": 0.3502557066234252, "grad_norm": 0.0, "learning_rate": 1.5087214509699388e-05, "loss": 1.0173, "step": 2808 }, { "epoch": 0.3503804415616814, "grad_norm": 0.0, "learning_rate": 1.5083735834518556e-05, "loss": 1.0044, "step": 2809 }, { "epoch": 0.3505051764999376, "grad_norm": 0.0, "learning_rate": 1.5080256329543877e-05, "loss": 0.9838, "step": 2810 }, { "epoch": 0.35062991143819383, "grad_norm": 0.0, "learning_rate": 1.5076775995343286e-05, "loss": 0.9809, "step": 2811 }, { "epoch": 0.35075464637645004, "grad_norm": 0.0, "learning_rate": 1.5073294832484872e-05, "loss": 1.0173, "step": 2812 }, { "epoch": 0.35087938131470625, "grad_norm": 0.0, "learning_rate": 1.5069812841536844e-05, "loss": 0.9986, "step": 2813 }, { "epoch": 0.35100411625296246, "grad_norm": 0.0, "learning_rate": 1.5066330023067549e-05, "loss": 1.0107, "step": 2814 }, { "epoch": 0.35112885119121867, "grad_norm": 0.0, "learning_rate": 1.5062846377645476e-05, "loss": 0.9821, "step": 2815 }, { "epoch": 0.3512535861294749, "grad_norm": 0.0, "learning_rate": 1.5059361905839236e-05, "loss": 0.9763, "step": 2816 }, { "epoch": 0.3513783210677311, "grad_norm": 0.0, "learning_rate": 1.505587660821759e-05, "loss": 1.0172, "step": 2817 }, { "epoch": 0.3515030560059873, "grad_norm": 0.0, "learning_rate": 1.5052390485349424e-05, "loss": 0.9506, "step": 2818 }, { "epoch": 0.35162779094424346, "grad_norm": 0.0, "learning_rate": 1.5048903537803759e-05, "loss": 1.0249, "step": 2819 }, { "epoch": 0.35175252588249967, "grad_norm": 0.0, "learning_rate": 1.5045415766149755e-05, "loss": 0.976, "step": 2820 }, { "epoch": 0.3518772608207559, "grad_norm": 0.0, "learning_rate": 1.5041927170956707e-05, "loss": 1.0174, "step": 2821 }, { "epoch": 0.3520019957590121, "grad_norm": 0.0, "learning_rate": 1.5038437752794035e-05, "loss": 0.9818, "step": 2822 }, { "epoch": 0.3521267306972683, "grad_norm": 0.0, "learning_rate": 1.5034947512231304e-05, "loss": 0.9849, "step": 2823 }, { "epoch": 0.3522514656355245, "grad_norm": 0.0, "learning_rate": 1.5031456449838207e-05, "loss": 1.0105, "step": 2824 }, { "epoch": 0.3523762005737807, "grad_norm": 0.0, "learning_rate": 1.5027964566184578e-05, "loss": 0.9842, "step": 2825 }, { "epoch": 0.35250093551203693, "grad_norm": 0.0, "learning_rate": 1.5024471861840378e-05, "loss": 0.934, "step": 2826 }, { "epoch": 0.35262567045029314, "grad_norm": 0.0, "learning_rate": 1.5020978337375702e-05, "loss": 0.9912, "step": 2827 }, { "epoch": 0.35275040538854935, "grad_norm": 0.0, "learning_rate": 1.5017483993360782e-05, "loss": 0.9825, "step": 2828 }, { "epoch": 0.35287514032680556, "grad_norm": 0.0, "learning_rate": 1.5013988830365986e-05, "loss": 1.0161, "step": 2829 }, { "epoch": 0.35299987526506177, "grad_norm": 0.0, "learning_rate": 1.501049284896181e-05, "loss": 1.0253, "step": 2830 }, { "epoch": 0.3531246102033179, "grad_norm": 0.0, "learning_rate": 1.5006996049718887e-05, "loss": 1.0009, "step": 2831 }, { "epoch": 0.35324934514157413, "grad_norm": 0.0, "learning_rate": 1.5003498433207987e-05, "loss": 0.9749, "step": 2832 }, { "epoch": 0.35337408007983034, "grad_norm": 0.0, "learning_rate": 1.5000000000000002e-05, "loss": 0.9799, "step": 2833 }, { "epoch": 0.35349881501808655, "grad_norm": 0.0, "learning_rate": 1.4996500750665968e-05, "loss": 1.0003, "step": 2834 }, { "epoch": 0.35362354995634276, "grad_norm": 0.0, "learning_rate": 1.4993000685777051e-05, "loss": 0.9438, "step": 2835 }, { "epoch": 0.353748284894599, "grad_norm": 0.0, "learning_rate": 1.4989499805904548e-05, "loss": 0.9778, "step": 2836 }, { "epoch": 0.3538730198328552, "grad_norm": 0.0, "learning_rate": 1.498599811161989e-05, "loss": 0.9979, "step": 2837 }, { "epoch": 0.3539977547711114, "grad_norm": 0.0, "learning_rate": 1.4982495603494648e-05, "loss": 1.0163, "step": 2838 }, { "epoch": 0.3541224897093676, "grad_norm": 0.0, "learning_rate": 1.4978992282100518e-05, "loss": 1.0, "step": 2839 }, { "epoch": 0.3542472246476238, "grad_norm": 0.0, "learning_rate": 1.497548814800932e-05, "loss": 1.0519, "step": 2840 }, { "epoch": 0.35437195958588, "grad_norm": 0.0, "learning_rate": 1.497198320179303e-05, "loss": 0.9876, "step": 2841 }, { "epoch": 0.35449669452413624, "grad_norm": 0.0, "learning_rate": 1.4968477444023739e-05, "loss": 1.0283, "step": 2842 }, { "epoch": 0.3546214294623924, "grad_norm": 0.0, "learning_rate": 1.496497087527367e-05, "loss": 0.9821, "step": 2843 }, { "epoch": 0.3547461644006486, "grad_norm": 0.0, "learning_rate": 1.4961463496115194e-05, "loss": 0.9752, "step": 2844 }, { "epoch": 0.3548708993389048, "grad_norm": 0.0, "learning_rate": 1.4957955307120796e-05, "loss": 1.0184, "step": 2845 }, { "epoch": 0.354995634277161, "grad_norm": 0.0, "learning_rate": 1.4954446308863098e-05, "loss": 0.9585, "step": 2846 }, { "epoch": 0.35512036921541723, "grad_norm": 0.0, "learning_rate": 1.4950936501914863e-05, "loss": 0.9566, "step": 2847 }, { "epoch": 0.35524510415367344, "grad_norm": 0.0, "learning_rate": 1.4947425886848983e-05, "loss": 0.961, "step": 2848 }, { "epoch": 0.35536983909192965, "grad_norm": 0.0, "learning_rate": 1.494391446423847e-05, "loss": 0.9629, "step": 2849 }, { "epoch": 0.35549457403018586, "grad_norm": 0.0, "learning_rate": 1.4940402234656484e-05, "loss": 0.9813, "step": 2850 }, { "epoch": 0.35561930896844207, "grad_norm": 0.0, "learning_rate": 1.4936889198676303e-05, "loss": 0.9803, "step": 2851 }, { "epoch": 0.3557440439066983, "grad_norm": 0.0, "learning_rate": 1.4933375356871349e-05, "loss": 1.0245, "step": 2852 }, { "epoch": 0.3558687788449545, "grad_norm": 0.0, "learning_rate": 1.4929860709815166e-05, "loss": 0.9981, "step": 2853 }, { "epoch": 0.3559935137832107, "grad_norm": 0.0, "learning_rate": 1.4926345258081433e-05, "loss": 0.9748, "step": 2854 }, { "epoch": 0.35611824872146686, "grad_norm": 0.0, "learning_rate": 1.4922829002243968e-05, "loss": 0.9947, "step": 2855 }, { "epoch": 0.35624298365972307, "grad_norm": 0.0, "learning_rate": 1.49193119428767e-05, "loss": 1.0089, "step": 2856 }, { "epoch": 0.3563677185979793, "grad_norm": 0.0, "learning_rate": 1.4915794080553706e-05, "loss": 0.9712, "step": 2857 }, { "epoch": 0.3564924535362355, "grad_norm": 0.0, "learning_rate": 1.4912275415849195e-05, "loss": 0.9885, "step": 2858 }, { "epoch": 0.3566171884744917, "grad_norm": 0.0, "learning_rate": 1.4908755949337499e-05, "loss": 1.0503, "step": 2859 }, { "epoch": 0.3567419234127479, "grad_norm": 0.0, "learning_rate": 1.4905235681593079e-05, "loss": 1.0304, "step": 2860 }, { "epoch": 0.3568666583510041, "grad_norm": 0.0, "learning_rate": 1.490171461319054e-05, "loss": 1.0446, "step": 2861 }, { "epoch": 0.35699139328926033, "grad_norm": 0.0, "learning_rate": 1.4898192744704605e-05, "loss": 0.9798, "step": 2862 }, { "epoch": 0.35711612822751654, "grad_norm": 0.0, "learning_rate": 1.4894670076710127e-05, "loss": 0.9971, "step": 2863 }, { "epoch": 0.35724086316577275, "grad_norm": 0.0, "learning_rate": 1.4891146609782102e-05, "loss": 0.9864, "step": 2864 }, { "epoch": 0.35736559810402896, "grad_norm": 0.0, "learning_rate": 1.4887622344495643e-05, "loss": 0.9692, "step": 2865 }, { "epoch": 0.35749033304228517, "grad_norm": 0.0, "learning_rate": 1.4884097281426004e-05, "loss": 1.0061, "step": 2866 }, { "epoch": 0.3576150679805413, "grad_norm": 0.0, "learning_rate": 1.488057142114856e-05, "loss": 1.0409, "step": 2867 }, { "epoch": 0.35773980291879753, "grad_norm": 0.0, "learning_rate": 1.4877044764238821e-05, "loss": 1.0067, "step": 2868 }, { "epoch": 0.35786453785705374, "grad_norm": 0.0, "learning_rate": 1.4873517311272425e-05, "loss": 1.0363, "step": 2869 }, { "epoch": 0.35798927279530995, "grad_norm": 0.0, "learning_rate": 1.4869989062825143e-05, "loss": 0.9545, "step": 2870 }, { "epoch": 0.35811400773356616, "grad_norm": 0.0, "learning_rate": 1.4866460019472877e-05, "loss": 0.9931, "step": 2871 }, { "epoch": 0.3582387426718224, "grad_norm": 0.0, "learning_rate": 1.4862930181791654e-05, "loss": 1.0165, "step": 2872 }, { "epoch": 0.3583634776100786, "grad_norm": 0.0, "learning_rate": 1.4859399550357629e-05, "loss": 0.963, "step": 2873 }, { "epoch": 0.3584882125483348, "grad_norm": 0.0, "learning_rate": 1.4855868125747093e-05, "loss": 0.9482, "step": 2874 }, { "epoch": 0.358612947486591, "grad_norm": 0.0, "learning_rate": 1.4852335908536464e-05, "loss": 0.9399, "step": 2875 }, { "epoch": 0.3587376824248472, "grad_norm": 0.0, "learning_rate": 1.4848802899302288e-05, "loss": 0.9738, "step": 2876 }, { "epoch": 0.3588624173631034, "grad_norm": 0.0, "learning_rate": 1.4845269098621237e-05, "loss": 0.9525, "step": 2877 }, { "epoch": 0.35898715230135964, "grad_norm": 0.0, "learning_rate": 1.484173450707013e-05, "loss": 1.0399, "step": 2878 }, { "epoch": 0.3591118872396158, "grad_norm": 0.0, "learning_rate": 1.4838199125225885e-05, "loss": 1.0152, "step": 2879 }, { "epoch": 0.359236622177872, "grad_norm": 0.0, "learning_rate": 1.4834662953665572e-05, "loss": 0.9662, "step": 2880 }, { "epoch": 0.3593613571161282, "grad_norm": 0.0, "learning_rate": 1.4831125992966386e-05, "loss": 1.0186, "step": 2881 }, { "epoch": 0.3594860920543844, "grad_norm": 0.0, "learning_rate": 1.4827588243705647e-05, "loss": 0.9814, "step": 2882 }, { "epoch": 0.35961082699264063, "grad_norm": 0.0, "learning_rate": 1.48240497064608e-05, "loss": 0.9667, "step": 2883 }, { "epoch": 0.35973556193089684, "grad_norm": 0.0, "learning_rate": 1.4820510381809432e-05, "loss": 0.9956, "step": 2884 }, { "epoch": 0.35986029686915305, "grad_norm": 0.0, "learning_rate": 1.4816970270329242e-05, "loss": 0.9922, "step": 2885 }, { "epoch": 0.35998503180740926, "grad_norm": 0.0, "learning_rate": 1.481342937259807e-05, "loss": 1.0126, "step": 2886 }, { "epoch": 0.36010976674566547, "grad_norm": 0.0, "learning_rate": 1.4809887689193878e-05, "loss": 0.9904, "step": 2887 }, { "epoch": 0.3602345016839217, "grad_norm": 0.0, "learning_rate": 1.480634522069476e-05, "loss": 0.9835, "step": 2888 }, { "epoch": 0.3603592366221779, "grad_norm": 0.0, "learning_rate": 1.4802801967678932e-05, "loss": 1.0033, "step": 2889 }, { "epoch": 0.3604839715604341, "grad_norm": 0.0, "learning_rate": 1.4799257930724747e-05, "loss": 0.9804, "step": 2890 }, { "epoch": 0.36060870649869026, "grad_norm": 0.0, "learning_rate": 1.4795713110410679e-05, "loss": 0.9716, "step": 2891 }, { "epoch": 0.36073344143694647, "grad_norm": 0.0, "learning_rate": 1.4792167507315332e-05, "loss": 0.9334, "step": 2892 }, { "epoch": 0.3608581763752027, "grad_norm": 0.0, "learning_rate": 1.4788621122017434e-05, "loss": 0.9822, "step": 2893 }, { "epoch": 0.3609829113134589, "grad_norm": 0.0, "learning_rate": 1.4785073955095855e-05, "loss": 0.9925, "step": 2894 }, { "epoch": 0.3611076462517151, "grad_norm": 0.0, "learning_rate": 1.4781526007129575e-05, "loss": 1.0099, "step": 2895 }, { "epoch": 0.3612323811899713, "grad_norm": 0.0, "learning_rate": 1.4777977278697704e-05, "loss": 1.0453, "step": 2896 }, { "epoch": 0.3613571161282275, "grad_norm": 0.0, "learning_rate": 1.4774427770379492e-05, "loss": 0.9504, "step": 2897 }, { "epoch": 0.36148185106648373, "grad_norm": 0.0, "learning_rate": 1.4770877482754304e-05, "loss": 1.0245, "step": 2898 }, { "epoch": 0.36160658600473994, "grad_norm": 0.0, "learning_rate": 1.4767326416401639e-05, "loss": 1.0735, "step": 2899 }, { "epoch": 0.36173132094299615, "grad_norm": 0.0, "learning_rate": 1.476377457190112e-05, "loss": 0.9711, "step": 2900 }, { "epoch": 0.36185605588125236, "grad_norm": 0.0, "learning_rate": 1.4760221949832497e-05, "loss": 0.9436, "step": 2901 }, { "epoch": 0.36198079081950857, "grad_norm": 0.0, "learning_rate": 1.4756668550775643e-05, "loss": 1.0249, "step": 2902 }, { "epoch": 0.3621055257577647, "grad_norm": 0.0, "learning_rate": 1.475311437531057e-05, "loss": 1.0056, "step": 2903 }, { "epoch": 0.36223026069602093, "grad_norm": 0.0, "learning_rate": 1.4749559424017405e-05, "loss": 1.0012, "step": 2904 }, { "epoch": 0.36235499563427714, "grad_norm": 0.0, "learning_rate": 1.4746003697476406e-05, "loss": 0.9912, "step": 2905 }, { "epoch": 0.36247973057253335, "grad_norm": 0.0, "learning_rate": 1.4742447196267957e-05, "loss": 0.9979, "step": 2906 }, { "epoch": 0.36260446551078956, "grad_norm": 0.0, "learning_rate": 1.473888992097257e-05, "loss": 1.0139, "step": 2907 }, { "epoch": 0.3627292004490458, "grad_norm": 0.0, "learning_rate": 1.4735331872170882e-05, "loss": 0.9583, "step": 2908 }, { "epoch": 0.362853935387302, "grad_norm": 0.0, "learning_rate": 1.4731773050443654e-05, "loss": 0.9923, "step": 2909 }, { "epoch": 0.3629786703255582, "grad_norm": 0.0, "learning_rate": 1.4728213456371777e-05, "loss": 0.989, "step": 2910 }, { "epoch": 0.3631034052638144, "grad_norm": 0.0, "learning_rate": 1.4724653090536266e-05, "loss": 1.0014, "step": 2911 }, { "epoch": 0.3632281402020706, "grad_norm": 0.0, "learning_rate": 1.4721091953518264e-05, "loss": 0.922, "step": 2912 }, { "epoch": 0.3633528751403268, "grad_norm": 0.0, "learning_rate": 1.4717530045899038e-05, "loss": 0.9941, "step": 2913 }, { "epoch": 0.36347761007858304, "grad_norm": 0.0, "learning_rate": 1.4713967368259981e-05, "loss": 1.0087, "step": 2914 }, { "epoch": 0.3636023450168392, "grad_norm": 0.0, "learning_rate": 1.4710403921182605e-05, "loss": 0.9785, "step": 2915 }, { "epoch": 0.3637270799550954, "grad_norm": 0.0, "learning_rate": 1.4706839705248564e-05, "loss": 1.0208, "step": 2916 }, { "epoch": 0.3638518148933516, "grad_norm": 0.0, "learning_rate": 1.4703274721039626e-05, "loss": 0.9719, "step": 2917 }, { "epoch": 0.3639765498316078, "grad_norm": 0.0, "learning_rate": 1.4699708969137684e-05, "loss": 0.9952, "step": 2918 }, { "epoch": 0.36410128476986403, "grad_norm": 0.0, "learning_rate": 1.4696142450124758e-05, "loss": 0.9925, "step": 2919 }, { "epoch": 0.36422601970812024, "grad_norm": 0.0, "learning_rate": 1.4692575164582997e-05, "loss": 1.0146, "step": 2920 }, { "epoch": 0.36435075464637645, "grad_norm": 0.0, "learning_rate": 1.4689007113094667e-05, "loss": 1.0125, "step": 2921 }, { "epoch": 0.36447548958463266, "grad_norm": 0.0, "learning_rate": 1.468543829624217e-05, "loss": 1.0407, "step": 2922 }, { "epoch": 0.36460022452288887, "grad_norm": 0.0, "learning_rate": 1.4681868714608021e-05, "loss": 1.0328, "step": 2923 }, { "epoch": 0.3647249594611451, "grad_norm": 0.0, "learning_rate": 1.467829836877487e-05, "loss": 0.9793, "step": 2924 }, { "epoch": 0.3648496943994013, "grad_norm": 0.0, "learning_rate": 1.4674727259325487e-05, "loss": 1.0099, "step": 2925 }, { "epoch": 0.3649744293376575, "grad_norm": 0.0, "learning_rate": 1.4671155386842764e-05, "loss": 0.9585, "step": 2926 }, { "epoch": 0.36509916427591366, "grad_norm": 0.0, "learning_rate": 1.4667582751909723e-05, "loss": 1.0338, "step": 2927 }, { "epoch": 0.36522389921416987, "grad_norm": 0.0, "learning_rate": 1.466400935510951e-05, "loss": 0.933, "step": 2928 }, { "epoch": 0.3653486341524261, "grad_norm": 0.0, "learning_rate": 1.4660435197025391e-05, "loss": 0.9981, "step": 2929 }, { "epoch": 0.3654733690906823, "grad_norm": 0.0, "learning_rate": 1.4656860278240756e-05, "loss": 0.9828, "step": 2930 }, { "epoch": 0.3655981040289385, "grad_norm": 0.0, "learning_rate": 1.4653284599339128e-05, "loss": 0.976, "step": 2931 }, { "epoch": 0.3657228389671947, "grad_norm": 0.0, "learning_rate": 1.4649708160904142e-05, "loss": 0.9517, "step": 2932 }, { "epoch": 0.3658475739054509, "grad_norm": 0.0, "learning_rate": 1.4646130963519567e-05, "loss": 0.9611, "step": 2933 }, { "epoch": 0.36597230884370713, "grad_norm": 0.0, "learning_rate": 1.4642553007769292e-05, "loss": 0.9719, "step": 2934 }, { "epoch": 0.36609704378196334, "grad_norm": 0.0, "learning_rate": 1.4638974294237325e-05, "loss": 1.0356, "step": 2935 }, { "epoch": 0.36622177872021955, "grad_norm": 0.0, "learning_rate": 1.4635394823507807e-05, "loss": 0.9767, "step": 2936 }, { "epoch": 0.36634651365847576, "grad_norm": 0.0, "learning_rate": 1.4631814596164995e-05, "loss": 0.9099, "step": 2937 }, { "epoch": 0.36647124859673197, "grad_norm": 0.0, "learning_rate": 1.4628233612793277e-05, "loss": 0.9964, "step": 2938 }, { "epoch": 0.3665959835349881, "grad_norm": 0.0, "learning_rate": 1.4624651873977152e-05, "loss": 1.0148, "step": 2939 }, { "epoch": 0.36672071847324433, "grad_norm": 0.0, "learning_rate": 1.4621069380301256e-05, "loss": 1.0399, "step": 2940 }, { "epoch": 0.36684545341150054, "grad_norm": 0.0, "learning_rate": 1.4617486132350343e-05, "loss": 0.9567, "step": 2941 }, { "epoch": 0.36697018834975675, "grad_norm": 0.0, "learning_rate": 1.4613902130709282e-05, "loss": 0.9391, "step": 2942 }, { "epoch": 0.36709492328801296, "grad_norm": 0.0, "learning_rate": 1.461031737596308e-05, "loss": 1.0167, "step": 2943 }, { "epoch": 0.3672196582262692, "grad_norm": 0.0, "learning_rate": 1.4606731868696857e-05, "loss": 0.9683, "step": 2944 }, { "epoch": 0.3673443931645254, "grad_norm": 0.0, "learning_rate": 1.460314560949586e-05, "loss": 0.9692, "step": 2945 }, { "epoch": 0.3674691281027816, "grad_norm": 0.0, "learning_rate": 1.4599558598945452e-05, "loss": 0.9964, "step": 2946 }, { "epoch": 0.3675938630410378, "grad_norm": 0.0, "learning_rate": 1.4595970837631126e-05, "loss": 0.9467, "step": 2947 }, { "epoch": 0.367718597979294, "grad_norm": 0.0, "learning_rate": 1.4592382326138498e-05, "loss": 0.9851, "step": 2948 }, { "epoch": 0.3678433329175502, "grad_norm": 0.0, "learning_rate": 1.45887930650533e-05, "loss": 0.9967, "step": 2949 }, { "epoch": 0.36796806785580644, "grad_norm": 0.0, "learning_rate": 1.458520305496139e-05, "loss": 0.9696, "step": 2950 }, { "epoch": 0.3680928027940626, "grad_norm": 0.0, "learning_rate": 1.4581612296448752e-05, "loss": 1.0167, "step": 2951 }, { "epoch": 0.3682175377323188, "grad_norm": 0.0, "learning_rate": 1.4578020790101484e-05, "loss": 1.0345, "step": 2952 }, { "epoch": 0.368342272670575, "grad_norm": 0.0, "learning_rate": 1.4574428536505809e-05, "loss": 1.0029, "step": 2953 }, { "epoch": 0.3684670076088312, "grad_norm": 0.0, "learning_rate": 1.457083553624808e-05, "loss": 0.959, "step": 2954 }, { "epoch": 0.36859174254708743, "grad_norm": 0.0, "learning_rate": 1.4567241789914758e-05, "loss": 0.9828, "step": 2955 }, { "epoch": 0.36871647748534364, "grad_norm": 0.0, "learning_rate": 1.4563647298092438e-05, "loss": 0.9585, "step": 2956 }, { "epoch": 0.36884121242359985, "grad_norm": 0.0, "learning_rate": 1.456005206136783e-05, "loss": 0.9818, "step": 2957 }, { "epoch": 0.36896594736185606, "grad_norm": 0.0, "learning_rate": 1.455645608032777e-05, "loss": 1.0333, "step": 2958 }, { "epoch": 0.36909068230011227, "grad_norm": 0.0, "learning_rate": 1.4552859355559205e-05, "loss": 1.03, "step": 2959 }, { "epoch": 0.3692154172383685, "grad_norm": 0.0, "learning_rate": 1.454926188764922e-05, "loss": 1.0312, "step": 2960 }, { "epoch": 0.3693401521766247, "grad_norm": 0.0, "learning_rate": 1.4545663677185007e-05, "loss": 0.9644, "step": 2961 }, { "epoch": 0.3694648871148809, "grad_norm": 0.0, "learning_rate": 1.4542064724753886e-05, "loss": 1.0274, "step": 2962 }, { "epoch": 0.36958962205313706, "grad_norm": 0.0, "learning_rate": 1.4538465030943298e-05, "loss": 1.0076, "step": 2963 }, { "epoch": 0.36971435699139327, "grad_norm": 0.0, "learning_rate": 1.4534864596340806e-05, "loss": 0.9479, "step": 2964 }, { "epoch": 0.3698390919296495, "grad_norm": 0.0, "learning_rate": 1.4531263421534083e-05, "loss": 1.0408, "step": 2965 }, { "epoch": 0.3699638268679057, "grad_norm": 0.0, "learning_rate": 1.452766150711094e-05, "loss": 1.0255, "step": 2966 }, { "epoch": 0.3700885618061619, "grad_norm": 0.0, "learning_rate": 1.4524058853659297e-05, "loss": 0.9856, "step": 2967 }, { "epoch": 0.3702132967444181, "grad_norm": 0.0, "learning_rate": 1.45204554617672e-05, "loss": 1.0415, "step": 2968 }, { "epoch": 0.3703380316826743, "grad_norm": 0.0, "learning_rate": 1.4516851332022808e-05, "loss": 0.9953, "step": 2969 }, { "epoch": 0.37046276662093053, "grad_norm": 0.0, "learning_rate": 1.451324646501441e-05, "loss": 1.0328, "step": 2970 }, { "epoch": 0.37058750155918674, "grad_norm": 0.0, "learning_rate": 1.4509640861330415e-05, "loss": 0.9855, "step": 2971 }, { "epoch": 0.37071223649744295, "grad_norm": 0.0, "learning_rate": 1.4506034521559337e-05, "loss": 0.9958, "step": 2972 }, { "epoch": 0.37083697143569916, "grad_norm": 0.0, "learning_rate": 1.4502427446289833e-05, "loss": 1.0174, "step": 2973 }, { "epoch": 0.37096170637395537, "grad_norm": 0.0, "learning_rate": 1.4498819636110665e-05, "loss": 0.9964, "step": 2974 }, { "epoch": 0.3710864413122115, "grad_norm": 0.0, "learning_rate": 1.4495211091610716e-05, "loss": 1.0322, "step": 2975 }, { "epoch": 0.37121117625046773, "grad_norm": 0.0, "learning_rate": 1.4491601813378995e-05, "loss": 0.9638, "step": 2976 }, { "epoch": 0.37133591118872394, "grad_norm": 0.0, "learning_rate": 1.4487991802004625e-05, "loss": 0.9745, "step": 2977 }, { "epoch": 0.37146064612698015, "grad_norm": 0.0, "learning_rate": 1.4484381058076849e-05, "loss": 0.9817, "step": 2978 }, { "epoch": 0.37158538106523636, "grad_norm": 0.0, "learning_rate": 1.4480769582185036e-05, "loss": 1.0031, "step": 2979 }, { "epoch": 0.3717101160034926, "grad_norm": 0.0, "learning_rate": 1.4477157374918665e-05, "loss": 1.0024, "step": 2980 }, { "epoch": 0.3718348509417488, "grad_norm": 0.0, "learning_rate": 1.4473544436867344e-05, "loss": 1.0173, "step": 2981 }, { "epoch": 0.371959585880005, "grad_norm": 0.0, "learning_rate": 1.4469930768620792e-05, "loss": 0.9673, "step": 2982 }, { "epoch": 0.3720843208182612, "grad_norm": 0.0, "learning_rate": 1.4466316370768853e-05, "loss": 0.9887, "step": 2983 }, { "epoch": 0.3722090557565174, "grad_norm": 0.0, "learning_rate": 1.446270124390149e-05, "loss": 0.9361, "step": 2984 }, { "epoch": 0.3723337906947736, "grad_norm": 0.0, "learning_rate": 1.4459085388608778e-05, "loss": 0.9535, "step": 2985 }, { "epoch": 0.37245852563302984, "grad_norm": 0.0, "learning_rate": 1.4455468805480918e-05, "loss": 0.992, "step": 2986 }, { "epoch": 0.372583260571286, "grad_norm": 0.0, "learning_rate": 1.4451851495108224e-05, "loss": 0.9618, "step": 2987 }, { "epoch": 0.3727079955095422, "grad_norm": 0.0, "learning_rate": 1.4448233458081137e-05, "loss": 0.9839, "step": 2988 }, { "epoch": 0.3728327304477984, "grad_norm": 0.0, "learning_rate": 1.4444614694990212e-05, "loss": 0.9588, "step": 2989 }, { "epoch": 0.3729574653860546, "grad_norm": 0.0, "learning_rate": 1.4440995206426118e-05, "loss": 1.0113, "step": 2990 }, { "epoch": 0.37308220032431083, "grad_norm": 0.0, "learning_rate": 1.4437374992979654e-05, "loss": 0.9995, "step": 2991 }, { "epoch": 0.37320693526256704, "grad_norm": 0.0, "learning_rate": 1.4433754055241722e-05, "loss": 1.015, "step": 2992 }, { "epoch": 0.37333167020082325, "grad_norm": 0.0, "learning_rate": 1.4430132393803353e-05, "loss": 0.9556, "step": 2993 }, { "epoch": 0.37345640513907946, "grad_norm": 0.0, "learning_rate": 1.4426510009255697e-05, "loss": 1.0231, "step": 2994 }, { "epoch": 0.37358114007733567, "grad_norm": 0.0, "learning_rate": 1.4422886902190014e-05, "loss": 0.9579, "step": 2995 }, { "epoch": 0.3737058750155919, "grad_norm": 0.0, "learning_rate": 1.4419263073197689e-05, "loss": 1.0043, "step": 2996 }, { "epoch": 0.3738306099538481, "grad_norm": 0.0, "learning_rate": 1.4415638522870223e-05, "loss": 0.9983, "step": 2997 }, { "epoch": 0.3739553448921043, "grad_norm": 0.0, "learning_rate": 1.441201325179923e-05, "loss": 1.0113, "step": 2998 }, { "epoch": 0.37408007983036046, "grad_norm": 0.0, "learning_rate": 1.4408387260576449e-05, "loss": 0.9667, "step": 2999 }, { "epoch": 0.37420481476861667, "grad_norm": 0.0, "learning_rate": 1.4404760549793733e-05, "loss": 1.0073, "step": 3000 }, { "epoch": 0.3743295497068729, "grad_norm": 0.0, "learning_rate": 1.440113312004305e-05, "loss": 0.9697, "step": 3001 }, { "epoch": 0.3744542846451291, "grad_norm": 0.0, "learning_rate": 1.4397504971916491e-05, "loss": 0.9757, "step": 3002 }, { "epoch": 0.3745790195833853, "grad_norm": 0.0, "learning_rate": 1.4393876106006259e-05, "loss": 0.9475, "step": 3003 }, { "epoch": 0.3747037545216415, "grad_norm": 0.0, "learning_rate": 1.4390246522904681e-05, "loss": 1.0173, "step": 3004 }, { "epoch": 0.3748284894598977, "grad_norm": 0.0, "learning_rate": 1.4386616223204189e-05, "loss": 0.9797, "step": 3005 }, { "epoch": 0.37495322439815393, "grad_norm": 0.0, "learning_rate": 1.4382985207497344e-05, "loss": 1.0025, "step": 3006 }, { "epoch": 0.37507795933641014, "grad_norm": 0.0, "learning_rate": 1.4379353476376821e-05, "loss": 1.0467, "step": 3007 }, { "epoch": 0.37520269427466635, "grad_norm": 0.0, "learning_rate": 1.4375721030435408e-05, "loss": 0.9644, "step": 3008 }, { "epoch": 0.37532742921292256, "grad_norm": 0.0, "learning_rate": 1.437208787026601e-05, "loss": 0.987, "step": 3009 }, { "epoch": 0.37545216415117877, "grad_norm": 0.0, "learning_rate": 1.4368453996461651e-05, "loss": 1.0218, "step": 3010 }, { "epoch": 0.3755768990894349, "grad_norm": 0.0, "learning_rate": 1.436481940961547e-05, "loss": 1.0074, "step": 3011 }, { "epoch": 0.37570163402769113, "grad_norm": 0.0, "learning_rate": 1.4361184110320729e-05, "loss": 1.0206, "step": 3012 }, { "epoch": 0.37582636896594734, "grad_norm": 0.0, "learning_rate": 1.4357548099170794e-05, "loss": 0.9973, "step": 3013 }, { "epoch": 0.37595110390420355, "grad_norm": 0.0, "learning_rate": 1.435391137675916e-05, "loss": 0.9567, "step": 3014 }, { "epoch": 0.37607583884245976, "grad_norm": 0.0, "learning_rate": 1.4350273943679422e-05, "loss": 1.027, "step": 3015 }, { "epoch": 0.376200573780716, "grad_norm": 0.0, "learning_rate": 1.434663580052531e-05, "loss": 1.0106, "step": 3016 }, { "epoch": 0.3763253087189722, "grad_norm": 0.0, "learning_rate": 1.4342996947890657e-05, "loss": 0.9757, "step": 3017 }, { "epoch": 0.3764500436572284, "grad_norm": 0.0, "learning_rate": 1.4339357386369414e-05, "loss": 0.9881, "step": 3018 }, { "epoch": 0.3765747785954846, "grad_norm": 0.0, "learning_rate": 1.4335717116555654e-05, "loss": 0.937, "step": 3019 }, { "epoch": 0.3766995135337408, "grad_norm": 0.0, "learning_rate": 1.4332076139043557e-05, "loss": 1.0019, "step": 3020 }, { "epoch": 0.376824248471997, "grad_norm": 0.0, "learning_rate": 1.432843445442742e-05, "loss": 1.0064, "step": 3021 }, { "epoch": 0.37694898341025324, "grad_norm": 0.0, "learning_rate": 1.4324792063301662e-05, "loss": 0.9876, "step": 3022 }, { "epoch": 0.3770737183485094, "grad_norm": 0.0, "learning_rate": 1.4321148966260814e-05, "loss": 0.986, "step": 3023 }, { "epoch": 0.3771984532867656, "grad_norm": 0.0, "learning_rate": 1.4317505163899518e-05, "loss": 0.9915, "step": 3024 }, { "epoch": 0.3773231882250218, "grad_norm": 0.0, "learning_rate": 1.4313860656812537e-05, "loss": 1.025, "step": 3025 }, { "epoch": 0.377447923163278, "grad_norm": 0.0, "learning_rate": 1.4310215445594741e-05, "loss": 0.9767, "step": 3026 }, { "epoch": 0.37757265810153423, "grad_norm": 0.0, "learning_rate": 1.430656953084113e-05, "loss": 0.971, "step": 3027 }, { "epoch": 0.37769739303979044, "grad_norm": 0.0, "learning_rate": 1.43029229131468e-05, "loss": 0.9737, "step": 3028 }, { "epoch": 0.37782212797804665, "grad_norm": 0.0, "learning_rate": 1.4299275593106973e-05, "loss": 1.0346, "step": 3029 }, { "epoch": 0.37794686291630286, "grad_norm": 0.0, "learning_rate": 1.4295627571316989e-05, "loss": 1.0182, "step": 3030 }, { "epoch": 0.37807159785455907, "grad_norm": 0.0, "learning_rate": 1.4291978848372292e-05, "loss": 0.9776, "step": 3031 }, { "epoch": 0.3781963327928153, "grad_norm": 0.0, "learning_rate": 1.4288329424868446e-05, "loss": 0.9522, "step": 3032 }, { "epoch": 0.3783210677310715, "grad_norm": 0.0, "learning_rate": 1.428467930140113e-05, "loss": 0.9716, "step": 3033 }, { "epoch": 0.3784458026693277, "grad_norm": 0.0, "learning_rate": 1.4281028478566135e-05, "loss": 1.0126, "step": 3034 }, { "epoch": 0.37857053760758386, "grad_norm": 0.0, "learning_rate": 1.4277376956959368e-05, "loss": 1.0134, "step": 3035 }, { "epoch": 0.37869527254584007, "grad_norm": 0.0, "learning_rate": 1.427372473717685e-05, "loss": 0.9809, "step": 3036 }, { "epoch": 0.3788200074840963, "grad_norm": 0.0, "learning_rate": 1.4270071819814717e-05, "loss": 0.9358, "step": 3037 }, { "epoch": 0.3789447424223525, "grad_norm": 0.0, "learning_rate": 1.4266418205469212e-05, "loss": 0.9935, "step": 3038 }, { "epoch": 0.3790694773606087, "grad_norm": 0.0, "learning_rate": 1.42627638947367e-05, "loss": 0.9901, "step": 3039 }, { "epoch": 0.3791942122988649, "grad_norm": 0.0, "learning_rate": 1.425910888821366e-05, "loss": 0.9794, "step": 3040 }, { "epoch": 0.3793189472371211, "grad_norm": 0.0, "learning_rate": 1.4255453186496674e-05, "loss": 1.0176, "step": 3041 }, { "epoch": 0.37944368217537733, "grad_norm": 0.0, "learning_rate": 1.4251796790182453e-05, "loss": 0.9847, "step": 3042 }, { "epoch": 0.37956841711363354, "grad_norm": 0.0, "learning_rate": 1.4248139699867807e-05, "loss": 0.9724, "step": 3043 }, { "epoch": 0.37969315205188975, "grad_norm": 0.0, "learning_rate": 1.4244481916149665e-05, "loss": 0.9954, "step": 3044 }, { "epoch": 0.37981788699014596, "grad_norm": 0.0, "learning_rate": 1.4240823439625074e-05, "loss": 0.9797, "step": 3045 }, { "epoch": 0.37994262192840217, "grad_norm": 0.0, "learning_rate": 1.4237164270891185e-05, "loss": 1.0049, "step": 3046 }, { "epoch": 0.3800673568666583, "grad_norm": 0.0, "learning_rate": 1.4233504410545271e-05, "loss": 0.9807, "step": 3047 }, { "epoch": 0.38019209180491453, "grad_norm": 0.0, "learning_rate": 1.4229843859184709e-05, "loss": 1.0228, "step": 3048 }, { "epoch": 0.38031682674317074, "grad_norm": 0.0, "learning_rate": 1.4226182617406996e-05, "loss": 1.0118, "step": 3049 }, { "epoch": 0.38044156168142695, "grad_norm": 0.0, "learning_rate": 1.4222520685809739e-05, "loss": 0.9834, "step": 3050 }, { "epoch": 0.38056629661968316, "grad_norm": 0.0, "learning_rate": 1.4218858064990655e-05, "loss": 0.9491, "step": 3051 }, { "epoch": 0.3806910315579394, "grad_norm": 0.0, "learning_rate": 1.421519475554758e-05, "loss": 1.0132, "step": 3052 }, { "epoch": 0.3808157664961956, "grad_norm": 0.0, "learning_rate": 1.4211530758078455e-05, "loss": 0.9888, "step": 3053 }, { "epoch": 0.3809405014344518, "grad_norm": 0.0, "learning_rate": 1.4207866073181343e-05, "loss": 1.0014, "step": 3054 }, { "epoch": 0.381065236372708, "grad_norm": 0.0, "learning_rate": 1.4204200701454403e-05, "loss": 1.0023, "step": 3055 }, { "epoch": 0.3811899713109642, "grad_norm": 0.0, "learning_rate": 1.420053464349592e-05, "loss": 1.0, "step": 3056 }, { "epoch": 0.3813147062492204, "grad_norm": 0.0, "learning_rate": 1.4196867899904292e-05, "loss": 0.9602, "step": 3057 }, { "epoch": 0.38143944118747664, "grad_norm": 0.0, "learning_rate": 1.4193200471278019e-05, "loss": 1.0216, "step": 3058 }, { "epoch": 0.3815641761257328, "grad_norm": 0.0, "learning_rate": 1.4189532358215718e-05, "loss": 1.0133, "step": 3059 }, { "epoch": 0.381688911063989, "grad_norm": 0.0, "learning_rate": 1.4185863561316121e-05, "loss": 0.9568, "step": 3060 }, { "epoch": 0.3818136460022452, "grad_norm": 0.0, "learning_rate": 1.4182194081178065e-05, "loss": 0.9772, "step": 3061 }, { "epoch": 0.3819383809405014, "grad_norm": 0.0, "learning_rate": 1.4178523918400502e-05, "loss": 0.9679, "step": 3062 }, { "epoch": 0.38206311587875763, "grad_norm": 0.0, "learning_rate": 1.4174853073582498e-05, "loss": 0.9641, "step": 3063 }, { "epoch": 0.38218785081701384, "grad_norm": 0.0, "learning_rate": 1.4171181547323221e-05, "loss": 0.9453, "step": 3064 }, { "epoch": 0.38231258575527005, "grad_norm": 0.0, "learning_rate": 1.4167509340221961e-05, "loss": 0.9787, "step": 3065 }, { "epoch": 0.38243732069352626, "grad_norm": 0.0, "learning_rate": 1.4163836452878115e-05, "loss": 0.9799, "step": 3066 }, { "epoch": 0.38256205563178247, "grad_norm": 0.0, "learning_rate": 1.4160162885891193e-05, "loss": 0.9952, "step": 3067 }, { "epoch": 0.3826867905700387, "grad_norm": 0.0, "learning_rate": 1.4156488639860806e-05, "loss": 0.9505, "step": 3068 }, { "epoch": 0.3828115255082949, "grad_norm": 0.0, "learning_rate": 1.415281371538669e-05, "loss": 0.9478, "step": 3069 }, { "epoch": 0.3829362604465511, "grad_norm": 0.0, "learning_rate": 1.4149138113068687e-05, "loss": 0.9617, "step": 3070 }, { "epoch": 0.38306099538480726, "grad_norm": 0.0, "learning_rate": 1.4145461833506745e-05, "loss": 1.0045, "step": 3071 }, { "epoch": 0.38318573032306347, "grad_norm": 0.0, "learning_rate": 1.4141784877300921e-05, "loss": 0.9752, "step": 3072 }, { "epoch": 0.3833104652613197, "grad_norm": 0.0, "learning_rate": 1.4138107245051394e-05, "loss": 0.9869, "step": 3073 }, { "epoch": 0.3834352001995759, "grad_norm": 0.0, "learning_rate": 1.4134428937358441e-05, "loss": 0.9524, "step": 3074 }, { "epoch": 0.3835599351378321, "grad_norm": 0.0, "learning_rate": 1.4130749954822458e-05, "loss": 0.9767, "step": 3075 }, { "epoch": 0.3836846700760883, "grad_norm": 0.0, "learning_rate": 1.4127070298043949e-05, "loss": 1.0055, "step": 3076 }, { "epoch": 0.3838094050143445, "grad_norm": 0.0, "learning_rate": 1.4123389967623525e-05, "loss": 0.9509, "step": 3077 }, { "epoch": 0.38393413995260073, "grad_norm": 0.0, "learning_rate": 1.4119708964161905e-05, "loss": 0.9935, "step": 3078 }, { "epoch": 0.38405887489085694, "grad_norm": 0.0, "learning_rate": 1.4116027288259924e-05, "loss": 1.002, "step": 3079 }, { "epoch": 0.38418360982911315, "grad_norm": 0.0, "learning_rate": 1.4112344940518528e-05, "loss": 0.9392, "step": 3080 }, { "epoch": 0.38430834476736936, "grad_norm": 0.0, "learning_rate": 1.4108661921538766e-05, "loss": 0.9782, "step": 3081 }, { "epoch": 0.38443307970562557, "grad_norm": 0.0, "learning_rate": 1.4104978231921798e-05, "loss": 1.0338, "step": 3082 }, { "epoch": 0.3845578146438817, "grad_norm": 0.0, "learning_rate": 1.4101293872268897e-05, "loss": 1.0271, "step": 3083 }, { "epoch": 0.38468254958213793, "grad_norm": 0.0, "learning_rate": 1.4097608843181445e-05, "loss": 0.9719, "step": 3084 }, { "epoch": 0.38480728452039414, "grad_norm": 0.0, "learning_rate": 1.4093923145260926e-05, "loss": 0.9556, "step": 3085 }, { "epoch": 0.38493201945865035, "grad_norm": 0.0, "learning_rate": 1.4090236779108946e-05, "loss": 0.9877, "step": 3086 }, { "epoch": 0.38505675439690656, "grad_norm": 0.0, "learning_rate": 1.408654974532721e-05, "loss": 0.9998, "step": 3087 }, { "epoch": 0.3851814893351628, "grad_norm": 0.0, "learning_rate": 1.4082862044517529e-05, "loss": 0.9605, "step": 3088 }, { "epoch": 0.385306224273419, "grad_norm": 0.0, "learning_rate": 1.4079173677281836e-05, "loss": 0.9805, "step": 3089 }, { "epoch": 0.3854309592116752, "grad_norm": 0.0, "learning_rate": 1.4075484644222166e-05, "loss": 0.9659, "step": 3090 }, { "epoch": 0.3855556941499314, "grad_norm": 0.0, "learning_rate": 1.4071794945940657e-05, "loss": 0.9757, "step": 3091 }, { "epoch": 0.3856804290881876, "grad_norm": 0.0, "learning_rate": 1.4068104583039563e-05, "loss": 0.9821, "step": 3092 }, { "epoch": 0.3858051640264438, "grad_norm": 0.0, "learning_rate": 1.4064413556121253e-05, "loss": 0.984, "step": 3093 }, { "epoch": 0.38592989896470004, "grad_norm": 0.0, "learning_rate": 1.4060721865788178e-05, "loss": 0.9969, "step": 3094 }, { "epoch": 0.38605463390295625, "grad_norm": 0.0, "learning_rate": 1.4057029512642926e-05, "loss": 0.94, "step": 3095 }, { "epoch": 0.3861793688412124, "grad_norm": 0.0, "learning_rate": 1.4053336497288183e-05, "loss": 0.9915, "step": 3096 }, { "epoch": 0.3863041037794686, "grad_norm": 0.0, "learning_rate": 1.4049642820326737e-05, "loss": 0.9828, "step": 3097 }, { "epoch": 0.3864288387177248, "grad_norm": 0.0, "learning_rate": 1.4045948482361494e-05, "loss": 0.981, "step": 3098 }, { "epoch": 0.38655357365598103, "grad_norm": 0.0, "learning_rate": 1.4042253483995459e-05, "loss": 0.9483, "step": 3099 }, { "epoch": 0.38667830859423724, "grad_norm": 0.0, "learning_rate": 1.4038557825831754e-05, "loss": 0.9687, "step": 3100 }, { "epoch": 0.38680304353249345, "grad_norm": 0.0, "learning_rate": 1.4034861508473595e-05, "loss": 1.0194, "step": 3101 }, { "epoch": 0.38692777847074966, "grad_norm": 0.0, "learning_rate": 1.4031164532524322e-05, "loss": 0.9242, "step": 3102 }, { "epoch": 0.38705251340900587, "grad_norm": 0.0, "learning_rate": 1.4027466898587375e-05, "loss": 0.9959, "step": 3103 }, { "epoch": 0.3871772483472621, "grad_norm": 0.0, "learning_rate": 1.4023768607266294e-05, "loss": 1.0282, "step": 3104 }, { "epoch": 0.3873019832855183, "grad_norm": 0.0, "learning_rate": 1.4020069659164741e-05, "loss": 0.9968, "step": 3105 }, { "epoch": 0.3874267182237745, "grad_norm": 0.0, "learning_rate": 1.4016370054886474e-05, "loss": 0.9893, "step": 3106 }, { "epoch": 0.3875514531620307, "grad_norm": 0.0, "learning_rate": 1.4012669795035359e-05, "loss": 0.9615, "step": 3107 }, { "epoch": 0.38767618810028687, "grad_norm": 0.0, "learning_rate": 1.4008968880215377e-05, "loss": 0.9916, "step": 3108 }, { "epoch": 0.3878009230385431, "grad_norm": 0.0, "learning_rate": 1.4005267311030608e-05, "loss": 0.9918, "step": 3109 }, { "epoch": 0.3879256579767993, "grad_norm": 0.0, "learning_rate": 1.4001565088085242e-05, "loss": 0.9782, "step": 3110 }, { "epoch": 0.3880503929150555, "grad_norm": 0.0, "learning_rate": 1.3997862211983573e-05, "loss": 0.9948, "step": 3111 }, { "epoch": 0.3881751278533117, "grad_norm": 0.0, "learning_rate": 1.3994158683330006e-05, "loss": 0.9972, "step": 3112 }, { "epoch": 0.3882998627915679, "grad_norm": 0.0, "learning_rate": 1.399045450272905e-05, "loss": 0.9498, "step": 3113 }, { "epoch": 0.38842459772982413, "grad_norm": 0.0, "learning_rate": 1.398674967078532e-05, "loss": 0.994, "step": 3114 }, { "epoch": 0.38854933266808034, "grad_norm": 0.0, "learning_rate": 1.3983044188103541e-05, "loss": 1.0164, "step": 3115 }, { "epoch": 0.38867406760633655, "grad_norm": 0.0, "learning_rate": 1.3979338055288536e-05, "loss": 0.987, "step": 3116 }, { "epoch": 0.38879880254459276, "grad_norm": 0.0, "learning_rate": 1.3975631272945243e-05, "loss": 0.9887, "step": 3117 }, { "epoch": 0.38892353748284897, "grad_norm": 0.0, "learning_rate": 1.3971923841678698e-05, "loss": 0.9671, "step": 3118 }, { "epoch": 0.3890482724211052, "grad_norm": 0.0, "learning_rate": 1.3968215762094054e-05, "loss": 0.9526, "step": 3119 }, { "epoch": 0.38917300735936133, "grad_norm": 0.0, "learning_rate": 1.3964507034796558e-05, "loss": 1.0171, "step": 3120 }, { "epoch": 0.38929774229761754, "grad_norm": 0.0, "learning_rate": 1.396079766039157e-05, "loss": 0.9535, "step": 3121 }, { "epoch": 0.38942247723587375, "grad_norm": 0.0, "learning_rate": 1.3957087639484552e-05, "loss": 0.9566, "step": 3122 }, { "epoch": 0.38954721217412996, "grad_norm": 0.0, "learning_rate": 1.3953376972681075e-05, "loss": 0.9827, "step": 3123 }, { "epoch": 0.3896719471123862, "grad_norm": 0.0, "learning_rate": 1.394966566058681e-05, "loss": 0.9889, "step": 3124 }, { "epoch": 0.3897966820506424, "grad_norm": 0.0, "learning_rate": 1.3945953703807538e-05, "loss": 0.9427, "step": 3125 }, { "epoch": 0.3899214169888986, "grad_norm": 0.0, "learning_rate": 1.394224110294915e-05, "loss": 0.9887, "step": 3126 }, { "epoch": 0.3900461519271548, "grad_norm": 0.0, "learning_rate": 1.3938527858617625e-05, "loss": 0.9784, "step": 3127 }, { "epoch": 0.390170886865411, "grad_norm": 0.0, "learning_rate": 1.3934813971419066e-05, "loss": 0.985, "step": 3128 }, { "epoch": 0.3902956218036672, "grad_norm": 0.0, "learning_rate": 1.3931099441959671e-05, "loss": 1.0162, "step": 3129 }, { "epoch": 0.39042035674192344, "grad_norm": 0.0, "learning_rate": 1.3927384270845744e-05, "loss": 0.9494, "step": 3130 }, { "epoch": 0.39054509168017965, "grad_norm": 0.0, "learning_rate": 1.3923668458683692e-05, "loss": 0.9986, "step": 3131 }, { "epoch": 0.3906698266184358, "grad_norm": 0.0, "learning_rate": 1.3919952006080037e-05, "loss": 0.9688, "step": 3132 }, { "epoch": 0.390794561556692, "grad_norm": 0.0, "learning_rate": 1.3916234913641393e-05, "loss": 1.0352, "step": 3133 }, { "epoch": 0.3909192964949482, "grad_norm": 0.0, "learning_rate": 1.391251718197448e-05, "loss": 0.9956, "step": 3134 }, { "epoch": 0.39104403143320443, "grad_norm": 0.0, "learning_rate": 1.3908798811686131e-05, "loss": 1.0133, "step": 3135 }, { "epoch": 0.39116876637146064, "grad_norm": 0.0, "learning_rate": 1.3905079803383277e-05, "loss": 1.0227, "step": 3136 }, { "epoch": 0.39129350130971685, "grad_norm": 0.0, "learning_rate": 1.390136015767295e-05, "loss": 0.9668, "step": 3137 }, { "epoch": 0.39141823624797306, "grad_norm": 0.0, "learning_rate": 1.3897639875162297e-05, "loss": 0.9886, "step": 3138 }, { "epoch": 0.39154297118622927, "grad_norm": 0.0, "learning_rate": 1.3893918956458554e-05, "loss": 1.0659, "step": 3139 }, { "epoch": 0.3916677061244855, "grad_norm": 0.0, "learning_rate": 1.3890197402169073e-05, "loss": 0.9825, "step": 3140 }, { "epoch": 0.3917924410627417, "grad_norm": 0.0, "learning_rate": 1.3886475212901301e-05, "loss": 0.9552, "step": 3141 }, { "epoch": 0.3919171760009979, "grad_norm": 0.0, "learning_rate": 1.38827523892628e-05, "loss": 0.9722, "step": 3142 }, { "epoch": 0.3920419109392541, "grad_norm": 0.0, "learning_rate": 1.3879028931861227e-05, "loss": 1.0113, "step": 3143 }, { "epoch": 0.39216664587751027, "grad_norm": 0.0, "learning_rate": 1.387530484130434e-05, "loss": 0.9917, "step": 3144 }, { "epoch": 0.3922913808157665, "grad_norm": 0.0, "learning_rate": 1.3871580118200007e-05, "loss": 0.9677, "step": 3145 }, { "epoch": 0.3924161157540227, "grad_norm": 0.0, "learning_rate": 1.3867854763156198e-05, "loss": 1.0043, "step": 3146 }, { "epoch": 0.3925408506922789, "grad_norm": 0.0, "learning_rate": 1.3864128776780983e-05, "loss": 1.0103, "step": 3147 }, { "epoch": 0.3926655856305351, "grad_norm": 0.0, "learning_rate": 1.3860402159682535e-05, "loss": 0.9191, "step": 3148 }, { "epoch": 0.3927903205687913, "grad_norm": 0.0, "learning_rate": 1.385667491246914e-05, "loss": 1.0082, "step": 3149 }, { "epoch": 0.39291505550704753, "grad_norm": 0.0, "learning_rate": 1.385294703574917e-05, "loss": 1.0177, "step": 3150 }, { "epoch": 0.39303979044530374, "grad_norm": 0.0, "learning_rate": 1.3849218530131112e-05, "loss": 1.0003, "step": 3151 }, { "epoch": 0.39316452538355995, "grad_norm": 0.0, "learning_rate": 1.3845489396223552e-05, "loss": 0.9592, "step": 3152 }, { "epoch": 0.39328926032181616, "grad_norm": 0.0, "learning_rate": 1.3841759634635177e-05, "loss": 0.994, "step": 3153 }, { "epoch": 0.39341399526007237, "grad_norm": 0.0, "learning_rate": 1.383802924597478e-05, "loss": 0.9481, "step": 3154 }, { "epoch": 0.3935387301983286, "grad_norm": 0.0, "learning_rate": 1.3834298230851252e-05, "loss": 1.0097, "step": 3155 }, { "epoch": 0.39366346513658473, "grad_norm": 0.0, "learning_rate": 1.3830566589873595e-05, "loss": 1.0487, "step": 3156 }, { "epoch": 0.39378820007484094, "grad_norm": 0.0, "learning_rate": 1.3826834323650899e-05, "loss": 0.9492, "step": 3157 }, { "epoch": 0.39391293501309715, "grad_norm": 0.0, "learning_rate": 1.3823101432792368e-05, "loss": 0.9724, "step": 3158 }, { "epoch": 0.39403766995135336, "grad_norm": 0.0, "learning_rate": 1.3819367917907304e-05, "loss": 1.037, "step": 3159 }, { "epoch": 0.3941624048896096, "grad_norm": 0.0, "learning_rate": 1.381563377960511e-05, "loss": 1.0117, "step": 3160 }, { "epoch": 0.3942871398278658, "grad_norm": 0.0, "learning_rate": 1.3811899018495291e-05, "loss": 0.9664, "step": 3161 }, { "epoch": 0.394411874766122, "grad_norm": 0.0, "learning_rate": 1.3808163635187454e-05, "loss": 1.0267, "step": 3162 }, { "epoch": 0.3945366097043782, "grad_norm": 0.0, "learning_rate": 1.380442763029131e-05, "loss": 0.9626, "step": 3163 }, { "epoch": 0.3946613446426344, "grad_norm": 0.0, "learning_rate": 1.3800691004416669e-05, "loss": 0.952, "step": 3164 }, { "epoch": 0.3947860795808906, "grad_norm": 0.0, "learning_rate": 1.379695375817344e-05, "loss": 0.9734, "step": 3165 }, { "epoch": 0.39491081451914684, "grad_norm": 0.0, "learning_rate": 1.3793215892171636e-05, "loss": 0.9918, "step": 3166 }, { "epoch": 0.39503554945740305, "grad_norm": 0.0, "learning_rate": 1.3789477407021372e-05, "loss": 0.9655, "step": 3167 }, { "epoch": 0.3951602843956592, "grad_norm": 0.0, "learning_rate": 1.3785738303332862e-05, "loss": 0.9759, "step": 3168 }, { "epoch": 0.3952850193339154, "grad_norm": 0.0, "learning_rate": 1.3781998581716427e-05, "loss": 0.9183, "step": 3169 }, { "epoch": 0.3954097542721716, "grad_norm": 0.0, "learning_rate": 1.3778258242782477e-05, "loss": 1.0107, "step": 3170 }, { "epoch": 0.39553448921042783, "grad_norm": 0.0, "learning_rate": 1.3774517287141531e-05, "loss": 0.9764, "step": 3171 }, { "epoch": 0.39565922414868404, "grad_norm": 0.0, "learning_rate": 1.3770775715404212e-05, "loss": 0.9632, "step": 3172 }, { "epoch": 0.39578395908694025, "grad_norm": 0.0, "learning_rate": 1.3767033528181232e-05, "loss": 0.9521, "step": 3173 }, { "epoch": 0.39590869402519646, "grad_norm": 0.0, "learning_rate": 1.3763290726083419e-05, "loss": 0.9208, "step": 3174 }, { "epoch": 0.39603342896345267, "grad_norm": 0.0, "learning_rate": 1.3759547309721681e-05, "loss": 1.043, "step": 3175 }, { "epoch": 0.3961581639017089, "grad_norm": 0.0, "learning_rate": 1.375580327970705e-05, "loss": 0.9707, "step": 3176 }, { "epoch": 0.3962828988399651, "grad_norm": 0.0, "learning_rate": 1.3752058636650638e-05, "loss": 1.0261, "step": 3177 }, { "epoch": 0.3964076337782213, "grad_norm": 0.0, "learning_rate": 1.3748313381163667e-05, "loss": 0.9977, "step": 3178 }, { "epoch": 0.3965323687164775, "grad_norm": 0.0, "learning_rate": 1.374456751385746e-05, "loss": 1.0012, "step": 3179 }, { "epoch": 0.39665710365473367, "grad_norm": 0.0, "learning_rate": 1.3740821035343432e-05, "loss": 0.9868, "step": 3180 }, { "epoch": 0.3967818385929899, "grad_norm": 0.0, "learning_rate": 1.3737073946233106e-05, "loss": 1.01, "step": 3181 }, { "epoch": 0.3969065735312461, "grad_norm": 0.0, "learning_rate": 1.37333262471381e-05, "loss": 0.962, "step": 3182 }, { "epoch": 0.3970313084695023, "grad_norm": 0.0, "learning_rate": 1.372957793867014e-05, "loss": 1.0073, "step": 3183 }, { "epoch": 0.3971560434077585, "grad_norm": 0.0, "learning_rate": 1.372582902144103e-05, "loss": 0.9918, "step": 3184 }, { "epoch": 0.3972807783460147, "grad_norm": 0.0, "learning_rate": 1.3722079496062702e-05, "loss": 0.9594, "step": 3185 }, { "epoch": 0.39740551328427093, "grad_norm": 0.0, "learning_rate": 1.3718329363147165e-05, "loss": 0.9517, "step": 3186 }, { "epoch": 0.39753024822252714, "grad_norm": 0.0, "learning_rate": 1.3714578623306535e-05, "loss": 0.9567, "step": 3187 }, { "epoch": 0.39765498316078335, "grad_norm": 0.0, "learning_rate": 1.371082727715303e-05, "loss": 0.9545, "step": 3188 }, { "epoch": 0.39777971809903956, "grad_norm": 0.0, "learning_rate": 1.3707075325298971e-05, "loss": 1.0223, "step": 3189 }, { "epoch": 0.39790445303729577, "grad_norm": 0.0, "learning_rate": 1.3703322768356758e-05, "loss": 0.9758, "step": 3190 }, { "epoch": 0.398029187975552, "grad_norm": 0.0, "learning_rate": 1.3699569606938909e-05, "loss": 0.9617, "step": 3191 }, { "epoch": 0.39815392291380813, "grad_norm": 0.0, "learning_rate": 1.3695815841658035e-05, "loss": 0.9634, "step": 3192 }, { "epoch": 0.39827865785206434, "grad_norm": 0.0, "learning_rate": 1.3692061473126845e-05, "loss": 0.9851, "step": 3193 }, { "epoch": 0.39840339279032055, "grad_norm": 0.0, "learning_rate": 1.3688306501958149e-05, "loss": 1.0094, "step": 3194 }, { "epoch": 0.39852812772857676, "grad_norm": 0.0, "learning_rate": 1.3684550928764846e-05, "loss": 0.965, "step": 3195 }, { "epoch": 0.398652862666833, "grad_norm": 0.0, "learning_rate": 1.3680794754159947e-05, "loss": 0.9625, "step": 3196 }, { "epoch": 0.3987775976050892, "grad_norm": 0.0, "learning_rate": 1.3677037978756551e-05, "loss": 0.9661, "step": 3197 }, { "epoch": 0.3989023325433454, "grad_norm": 0.0, "learning_rate": 1.3673280603167856e-05, "loss": 0.9705, "step": 3198 }, { "epoch": 0.3990270674816016, "grad_norm": 0.0, "learning_rate": 1.3669522628007168e-05, "loss": 0.9842, "step": 3199 }, { "epoch": 0.3991518024198578, "grad_norm": 0.0, "learning_rate": 1.366576405388788e-05, "loss": 0.9805, "step": 3200 }, { "epoch": 0.399276537358114, "grad_norm": 0.0, "learning_rate": 1.3662004881423481e-05, "loss": 0.9413, "step": 3201 }, { "epoch": 0.39940127229637024, "grad_norm": 0.0, "learning_rate": 1.3658245111227571e-05, "loss": 0.9387, "step": 3202 }, { "epoch": 0.39952600723462645, "grad_norm": 0.0, "learning_rate": 1.3654484743913829e-05, "loss": 1.0036, "step": 3203 }, { "epoch": 0.3996507421728826, "grad_norm": 0.0, "learning_rate": 1.3650723780096051e-05, "loss": 0.9709, "step": 3204 }, { "epoch": 0.3997754771111388, "grad_norm": 0.0, "learning_rate": 1.364696222038812e-05, "loss": 0.9495, "step": 3205 }, { "epoch": 0.399900212049395, "grad_norm": 0.0, "learning_rate": 1.3643200065404015e-05, "loss": 0.9752, "step": 3206 }, { "epoch": 0.40002494698765123, "grad_norm": 0.0, "learning_rate": 1.3639437315757813e-05, "loss": 0.9768, "step": 3207 }, { "epoch": 0.40014968192590744, "grad_norm": 0.0, "learning_rate": 1.363567397206369e-05, "loss": 0.9364, "step": 3208 }, { "epoch": 0.40027441686416365, "grad_norm": 0.0, "learning_rate": 1.3631910034935923e-05, "loss": 0.966, "step": 3209 }, { "epoch": 0.40039915180241986, "grad_norm": 0.0, "learning_rate": 1.3628145504988875e-05, "loss": 1.0138, "step": 3210 }, { "epoch": 0.40052388674067607, "grad_norm": 0.0, "learning_rate": 1.3624380382837017e-05, "loss": 0.9138, "step": 3211 }, { "epoch": 0.4006486216789323, "grad_norm": 0.0, "learning_rate": 1.3620614669094915e-05, "loss": 1.0184, "step": 3212 }, { "epoch": 0.4007733566171885, "grad_norm": 0.0, "learning_rate": 1.3616848364377217e-05, "loss": 0.9764, "step": 3213 }, { "epoch": 0.4008980915554447, "grad_norm": 0.0, "learning_rate": 1.3613081469298687e-05, "loss": 1.0089, "step": 3214 }, { "epoch": 0.4010228264937009, "grad_norm": 0.0, "learning_rate": 1.3609313984474178e-05, "loss": 0.992, "step": 3215 }, { "epoch": 0.40114756143195707, "grad_norm": 0.0, "learning_rate": 1.3605545910518636e-05, "loss": 0.9763, "step": 3216 }, { "epoch": 0.4012722963702133, "grad_norm": 0.0, "learning_rate": 1.3601777248047105e-05, "loss": 0.9488, "step": 3217 }, { "epoch": 0.4013970313084695, "grad_norm": 0.0, "learning_rate": 1.3598007997674726e-05, "loss": 1.0048, "step": 3218 }, { "epoch": 0.4015217662467257, "grad_norm": 0.0, "learning_rate": 1.3594238160016741e-05, "loss": 1.0139, "step": 3219 }, { "epoch": 0.4016465011849819, "grad_norm": 0.0, "learning_rate": 1.3590467735688475e-05, "loss": 0.9416, "step": 3220 }, { "epoch": 0.4017712361232381, "grad_norm": 0.0, "learning_rate": 1.3586696725305357e-05, "loss": 0.9595, "step": 3221 }, { "epoch": 0.40189597106149433, "grad_norm": 0.0, "learning_rate": 1.358292512948292e-05, "loss": 1.0081, "step": 3222 }, { "epoch": 0.40202070599975054, "grad_norm": 0.0, "learning_rate": 1.357915294883677e-05, "loss": 0.9855, "step": 3223 }, { "epoch": 0.40214544093800675, "grad_norm": 0.0, "learning_rate": 1.3575380183982633e-05, "loss": 0.9934, "step": 3224 }, { "epoch": 0.40227017587626296, "grad_norm": 0.0, "learning_rate": 1.3571606835536315e-05, "loss": 1.0198, "step": 3225 }, { "epoch": 0.40239491081451917, "grad_norm": 0.0, "learning_rate": 1.3567832904113718e-05, "loss": 0.9924, "step": 3226 }, { "epoch": 0.4025196457527754, "grad_norm": 0.0, "learning_rate": 1.356405839033085e-05, "loss": 0.966, "step": 3227 }, { "epoch": 0.40264438069103153, "grad_norm": 0.0, "learning_rate": 1.3560283294803801e-05, "loss": 1.004, "step": 3228 }, { "epoch": 0.40276911562928774, "grad_norm": 0.0, "learning_rate": 1.3556507618148769e-05, "loss": 0.9422, "step": 3229 }, { "epoch": 0.40289385056754395, "grad_norm": 0.0, "learning_rate": 1.3552731360982028e-05, "loss": 0.9666, "step": 3230 }, { "epoch": 0.40301858550580016, "grad_norm": 0.0, "learning_rate": 1.3548954523919969e-05, "loss": 1.0024, "step": 3231 }, { "epoch": 0.4031433204440564, "grad_norm": 0.0, "learning_rate": 1.3545177107579062e-05, "loss": 0.9876, "step": 3232 }, { "epoch": 0.4032680553823126, "grad_norm": 0.0, "learning_rate": 1.3541399112575877e-05, "loss": 0.9556, "step": 3233 }, { "epoch": 0.4033927903205688, "grad_norm": 0.0, "learning_rate": 1.3537620539527079e-05, "loss": 0.9703, "step": 3234 }, { "epoch": 0.403517525258825, "grad_norm": 0.0, "learning_rate": 1.3533841389049429e-05, "loss": 0.997, "step": 3235 }, { "epoch": 0.4036422601970812, "grad_norm": 0.0, "learning_rate": 1.3530061661759772e-05, "loss": 0.9855, "step": 3236 }, { "epoch": 0.4037669951353374, "grad_norm": 0.0, "learning_rate": 1.3526281358275061e-05, "loss": 0.9595, "step": 3237 }, { "epoch": 0.40389173007359364, "grad_norm": 0.0, "learning_rate": 1.3522500479212337e-05, "loss": 0.977, "step": 3238 }, { "epoch": 0.40401646501184985, "grad_norm": 0.0, "learning_rate": 1.3518719025188734e-05, "loss": 0.9571, "step": 3239 }, { "epoch": 0.404141199950106, "grad_norm": 0.0, "learning_rate": 1.3514936996821475e-05, "loss": 0.9967, "step": 3240 }, { "epoch": 0.4042659348883622, "grad_norm": 0.0, "learning_rate": 1.351115439472789e-05, "loss": 0.9753, "step": 3241 }, { "epoch": 0.4043906698266184, "grad_norm": 0.0, "learning_rate": 1.3507371219525391e-05, "loss": 0.9819, "step": 3242 }, { "epoch": 0.40451540476487463, "grad_norm": 0.0, "learning_rate": 1.350358747183149e-05, "loss": 0.9919, "step": 3243 }, { "epoch": 0.40464013970313084, "grad_norm": 0.0, "learning_rate": 1.3499803152263787e-05, "loss": 1.041, "step": 3244 }, { "epoch": 0.40476487464138705, "grad_norm": 0.0, "learning_rate": 1.3496018261439985e-05, "loss": 0.9784, "step": 3245 }, { "epoch": 0.40488960957964326, "grad_norm": 0.0, "learning_rate": 1.3492232799977862e-05, "loss": 0.9538, "step": 3246 }, { "epoch": 0.40501434451789947, "grad_norm": 0.0, "learning_rate": 1.3488446768495309e-05, "loss": 0.9814, "step": 3247 }, { "epoch": 0.4051390794561557, "grad_norm": 0.0, "learning_rate": 1.3484660167610302e-05, "loss": 1.0288, "step": 3248 }, { "epoch": 0.4052638143944119, "grad_norm": 0.0, "learning_rate": 1.3480872997940906e-05, "loss": 1.0033, "step": 3249 }, { "epoch": 0.4053885493326681, "grad_norm": 0.0, "learning_rate": 1.3477085260105284e-05, "loss": 0.9826, "step": 3250 }, { "epoch": 0.4055132842709243, "grad_norm": 0.0, "learning_rate": 1.3473296954721695e-05, "loss": 1.0501, "step": 3251 }, { "epoch": 0.40563801920918047, "grad_norm": 0.0, "learning_rate": 1.3469508082408478e-05, "loss": 0.982, "step": 3252 }, { "epoch": 0.4057627541474367, "grad_norm": 0.0, "learning_rate": 1.3465718643784076e-05, "loss": 0.978, "step": 3253 }, { "epoch": 0.4058874890856929, "grad_norm": 0.0, "learning_rate": 1.3461928639467021e-05, "loss": 0.9963, "step": 3254 }, { "epoch": 0.4060122240239491, "grad_norm": 0.0, "learning_rate": 1.3458138070075942e-05, "loss": 1.0015, "step": 3255 }, { "epoch": 0.4061369589622053, "grad_norm": 0.0, "learning_rate": 1.3454346936229547e-05, "loss": 1.0006, "step": 3256 }, { "epoch": 0.4062616939004615, "grad_norm": 0.0, "learning_rate": 1.3450555238546648e-05, "loss": 0.9848, "step": 3257 }, { "epoch": 0.40638642883871773, "grad_norm": 0.0, "learning_rate": 1.3446762977646147e-05, "loss": 0.9208, "step": 3258 }, { "epoch": 0.40651116377697394, "grad_norm": 0.0, "learning_rate": 1.3442970154147035e-05, "loss": 0.9922, "step": 3259 }, { "epoch": 0.40663589871523015, "grad_norm": 0.0, "learning_rate": 1.3439176768668396e-05, "loss": 0.9871, "step": 3260 }, { "epoch": 0.40676063365348636, "grad_norm": 0.0, "learning_rate": 1.3435382821829408e-05, "loss": 0.9676, "step": 3261 }, { "epoch": 0.40688536859174257, "grad_norm": 0.0, "learning_rate": 1.3431588314249341e-05, "loss": 0.991, "step": 3262 }, { "epoch": 0.4070101035299988, "grad_norm": 0.0, "learning_rate": 1.3427793246547547e-05, "loss": 1.0465, "step": 3263 }, { "epoch": 0.40713483846825493, "grad_norm": 0.0, "learning_rate": 1.3423997619343483e-05, "loss": 0.9862, "step": 3264 }, { "epoch": 0.40725957340651114, "grad_norm": 0.0, "learning_rate": 1.342020143325669e-05, "loss": 0.9564, "step": 3265 }, { "epoch": 0.40738430834476735, "grad_norm": 0.0, "learning_rate": 1.3416404688906797e-05, "loss": 0.9957, "step": 3266 }, { "epoch": 0.40750904328302356, "grad_norm": 0.0, "learning_rate": 1.3412607386913532e-05, "loss": 0.9201, "step": 3267 }, { "epoch": 0.4076337782212798, "grad_norm": 0.0, "learning_rate": 1.3408809527896714e-05, "loss": 1.0086, "step": 3268 }, { "epoch": 0.407758513159536, "grad_norm": 0.0, "learning_rate": 1.3405011112476244e-05, "loss": 0.9862, "step": 3269 }, { "epoch": 0.4078832480977922, "grad_norm": 0.0, "learning_rate": 1.340121214127212e-05, "loss": 0.9405, "step": 3270 }, { "epoch": 0.4080079830360484, "grad_norm": 0.0, "learning_rate": 1.339741261490443e-05, "loss": 0.9376, "step": 3271 }, { "epoch": 0.4081327179743046, "grad_norm": 0.0, "learning_rate": 1.3393612533993354e-05, "loss": 0.9668, "step": 3272 }, { "epoch": 0.4082574529125608, "grad_norm": 0.0, "learning_rate": 1.338981189915916e-05, "loss": 0.9637, "step": 3273 }, { "epoch": 0.40838218785081704, "grad_norm": 0.0, "learning_rate": 1.3386010711022206e-05, "loss": 0.9999, "step": 3274 }, { "epoch": 0.40850692278907325, "grad_norm": 0.0, "learning_rate": 1.3382208970202943e-05, "loss": 0.9754, "step": 3275 }, { "epoch": 0.4086316577273294, "grad_norm": 0.0, "learning_rate": 1.3378406677321914e-05, "loss": 0.9569, "step": 3276 }, { "epoch": 0.4087563926655856, "grad_norm": 0.0, "learning_rate": 1.3374603832999741e-05, "loss": 0.9692, "step": 3277 }, { "epoch": 0.4088811276038418, "grad_norm": 0.0, "learning_rate": 1.3370800437857154e-05, "loss": 0.9739, "step": 3278 }, { "epoch": 0.40900586254209803, "grad_norm": 0.0, "learning_rate": 1.3366996492514956e-05, "loss": 0.9653, "step": 3279 }, { "epoch": 0.40913059748035424, "grad_norm": 0.0, "learning_rate": 1.336319199759405e-05, "loss": 1.0045, "step": 3280 }, { "epoch": 0.40925533241861045, "grad_norm": 0.0, "learning_rate": 1.3359386953715423e-05, "loss": 0.9631, "step": 3281 }, { "epoch": 0.40938006735686666, "grad_norm": 0.0, "learning_rate": 1.3355581361500156e-05, "loss": 0.9982, "step": 3282 }, { "epoch": 0.40950480229512287, "grad_norm": 0.0, "learning_rate": 1.3351775221569416e-05, "loss": 0.9522, "step": 3283 }, { "epoch": 0.4096295372333791, "grad_norm": 0.0, "learning_rate": 1.3347968534544467e-05, "loss": 1.0326, "step": 3284 }, { "epoch": 0.4097542721716353, "grad_norm": 0.0, "learning_rate": 1.3344161301046648e-05, "loss": 1.0264, "step": 3285 }, { "epoch": 0.4098790071098915, "grad_norm": 0.0, "learning_rate": 1.3340353521697396e-05, "loss": 0.945, "step": 3286 }, { "epoch": 0.4100037420481477, "grad_norm": 0.0, "learning_rate": 1.333654519711824e-05, "loss": 0.9292, "step": 3287 }, { "epoch": 0.41012847698640387, "grad_norm": 0.0, "learning_rate": 1.3332736327930798e-05, "loss": 0.9846, "step": 3288 }, { "epoch": 0.4102532119246601, "grad_norm": 0.0, "learning_rate": 1.3328926914756765e-05, "loss": 0.9719, "step": 3289 }, { "epoch": 0.4103779468629163, "grad_norm": 0.0, "learning_rate": 1.3325116958217944e-05, "loss": 1.0035, "step": 3290 }, { "epoch": 0.4105026818011725, "grad_norm": 0.0, "learning_rate": 1.3321306458936204e-05, "loss": 0.9325, "step": 3291 }, { "epoch": 0.4106274167394287, "grad_norm": 0.0, "learning_rate": 1.3317495417533523e-05, "loss": 0.966, "step": 3292 }, { "epoch": 0.4107521516776849, "grad_norm": 0.0, "learning_rate": 1.3313683834631954e-05, "loss": 0.9942, "step": 3293 }, { "epoch": 0.41087688661594113, "grad_norm": 0.0, "learning_rate": 1.3309871710853646e-05, "loss": 0.9606, "step": 3294 }, { "epoch": 0.41100162155419734, "grad_norm": 0.0, "learning_rate": 1.3306059046820836e-05, "loss": 1.0202, "step": 3295 }, { "epoch": 0.41112635649245355, "grad_norm": 0.0, "learning_rate": 1.3302245843155843e-05, "loss": 0.9717, "step": 3296 }, { "epoch": 0.41125109143070976, "grad_norm": 0.0, "learning_rate": 1.3298432100481078e-05, "loss": 0.9596, "step": 3297 }, { "epoch": 0.41137582636896597, "grad_norm": 0.0, "learning_rate": 1.3294617819419044e-05, "loss": 0.9696, "step": 3298 }, { "epoch": 0.4115005613072222, "grad_norm": 0.0, "learning_rate": 1.3290803000592323e-05, "loss": 1.0161, "step": 3299 }, { "epoch": 0.41162529624547833, "grad_norm": 0.0, "learning_rate": 1.328698764462359e-05, "loss": 0.9859, "step": 3300 }, { "epoch": 0.41175003118373454, "grad_norm": 0.0, "learning_rate": 1.3283171752135614e-05, "loss": 1.0292, "step": 3301 }, { "epoch": 0.41187476612199075, "grad_norm": 0.0, "learning_rate": 1.3279355323751234e-05, "loss": 0.9557, "step": 3302 }, { "epoch": 0.41199950106024696, "grad_norm": 0.0, "learning_rate": 1.3275538360093397e-05, "loss": 1.0402, "step": 3303 }, { "epoch": 0.4121242359985032, "grad_norm": 0.0, "learning_rate": 1.327172086178512e-05, "loss": 1.013, "step": 3304 }, { "epoch": 0.4122489709367594, "grad_norm": 0.0, "learning_rate": 1.3267902829449523e-05, "loss": 1.0211, "step": 3305 }, { "epoch": 0.4123737058750156, "grad_norm": 0.0, "learning_rate": 1.3264084263709798e-05, "loss": 0.9713, "step": 3306 }, { "epoch": 0.4124984408132718, "grad_norm": 0.0, "learning_rate": 1.3260265165189234e-05, "loss": 0.9679, "step": 3307 }, { "epoch": 0.412623175751528, "grad_norm": 0.0, "learning_rate": 1.3256445534511209e-05, "loss": 0.958, "step": 3308 }, { "epoch": 0.4127479106897842, "grad_norm": 0.0, "learning_rate": 1.3252625372299172e-05, "loss": 0.9831, "step": 3309 }, { "epoch": 0.41287264562804044, "grad_norm": 0.0, "learning_rate": 1.3248804679176679e-05, "loss": 0.9826, "step": 3310 }, { "epoch": 0.41299738056629665, "grad_norm": 0.0, "learning_rate": 1.324498345576736e-05, "loss": 1.0188, "step": 3311 }, { "epoch": 0.4131221155045528, "grad_norm": 0.0, "learning_rate": 1.3241161702694933e-05, "loss": 0.9678, "step": 3312 }, { "epoch": 0.413246850442809, "grad_norm": 0.0, "learning_rate": 1.3237339420583213e-05, "loss": 0.918, "step": 3313 }, { "epoch": 0.4133715853810652, "grad_norm": 0.0, "learning_rate": 1.3233516610056082e-05, "loss": 0.9525, "step": 3314 }, { "epoch": 0.41349632031932143, "grad_norm": 0.0, "learning_rate": 1.3229693271737525e-05, "loss": 0.9662, "step": 3315 }, { "epoch": 0.41362105525757764, "grad_norm": 0.0, "learning_rate": 1.3225869406251603e-05, "loss": 0.9819, "step": 3316 }, { "epoch": 0.41374579019583385, "grad_norm": 0.0, "learning_rate": 1.3222045014222474e-05, "loss": 1.0782, "step": 3317 }, { "epoch": 0.41387052513409006, "grad_norm": 0.0, "learning_rate": 1.3218220096274373e-05, "loss": 0.9712, "step": 3318 }, { "epoch": 0.41399526007234627, "grad_norm": 0.0, "learning_rate": 1.3214394653031616e-05, "loss": 0.9891, "step": 3319 }, { "epoch": 0.4141199950106025, "grad_norm": 0.0, "learning_rate": 1.3210568685118621e-05, "loss": 0.9657, "step": 3320 }, { "epoch": 0.4142447299488587, "grad_norm": 0.0, "learning_rate": 1.3206742193159878e-05, "loss": 1.0027, "step": 3321 }, { "epoch": 0.4143694648871149, "grad_norm": 0.0, "learning_rate": 1.3202915177779967e-05, "loss": 0.9132, "step": 3322 }, { "epoch": 0.4144941998253711, "grad_norm": 0.0, "learning_rate": 1.3199087639603553e-05, "loss": 0.9376, "step": 3323 }, { "epoch": 0.41461893476362727, "grad_norm": 0.0, "learning_rate": 1.319525957925539e-05, "loss": 1.0008, "step": 3324 }, { "epoch": 0.4147436697018835, "grad_norm": 0.0, "learning_rate": 1.319143099736031e-05, "loss": 0.949, "step": 3325 }, { "epoch": 0.4148684046401397, "grad_norm": 0.0, "learning_rate": 1.3187601894543235e-05, "loss": 0.935, "step": 3326 }, { "epoch": 0.4149931395783959, "grad_norm": 0.0, "learning_rate": 1.3183772271429174e-05, "loss": 0.9845, "step": 3327 }, { "epoch": 0.4151178745166521, "grad_norm": 0.0, "learning_rate": 1.3179942128643216e-05, "loss": 0.9711, "step": 3328 }, { "epoch": 0.4152426094549083, "grad_norm": 0.0, "learning_rate": 1.3176111466810532e-05, "loss": 1.0098, "step": 3329 }, { "epoch": 0.41536734439316453, "grad_norm": 0.0, "learning_rate": 1.3172280286556394e-05, "loss": 0.9763, "step": 3330 }, { "epoch": 0.41549207933142074, "grad_norm": 0.0, "learning_rate": 1.3168448588506137e-05, "loss": 0.9655, "step": 3331 }, { "epoch": 0.41561681426967695, "grad_norm": 0.0, "learning_rate": 1.3164616373285194e-05, "loss": 0.9915, "step": 3332 }, { "epoch": 0.41574154920793316, "grad_norm": 0.0, "learning_rate": 1.316078364151908e-05, "loss": 0.9332, "step": 3333 }, { "epoch": 0.41586628414618937, "grad_norm": 0.0, "learning_rate": 1.3156950393833393e-05, "loss": 1.0026, "step": 3334 }, { "epoch": 0.4159910190844456, "grad_norm": 0.0, "learning_rate": 1.3153116630853813e-05, "loss": 0.9388, "step": 3335 }, { "epoch": 0.41611575402270173, "grad_norm": 0.0, "learning_rate": 1.3149282353206112e-05, "loss": 0.9679, "step": 3336 }, { "epoch": 0.41624048896095794, "grad_norm": 0.0, "learning_rate": 1.3145447561516138e-05, "loss": 0.9391, "step": 3337 }, { "epoch": 0.41636522389921415, "grad_norm": 0.0, "learning_rate": 1.3141612256409825e-05, "loss": 0.9821, "step": 3338 }, { "epoch": 0.41648995883747036, "grad_norm": 0.0, "learning_rate": 1.3137776438513193e-05, "loss": 0.9738, "step": 3339 }, { "epoch": 0.4166146937757266, "grad_norm": 0.0, "learning_rate": 1.3133940108452341e-05, "loss": 0.9998, "step": 3340 }, { "epoch": 0.4167394287139828, "grad_norm": 0.0, "learning_rate": 1.3130103266853462e-05, "loss": 1.0078, "step": 3341 }, { "epoch": 0.416864163652239, "grad_norm": 0.0, "learning_rate": 1.3126265914342814e-05, "loss": 0.9664, "step": 3342 }, { "epoch": 0.4169888985904952, "grad_norm": 0.0, "learning_rate": 1.3122428051546759e-05, "loss": 0.9335, "step": 3343 }, { "epoch": 0.4171136335287514, "grad_norm": 0.0, "learning_rate": 1.3118589679091732e-05, "loss": 0.9833, "step": 3344 }, { "epoch": 0.4172383684670076, "grad_norm": 0.0, "learning_rate": 1.3114750797604248e-05, "loss": 0.9464, "step": 3345 }, { "epoch": 0.41736310340526384, "grad_norm": 0.0, "learning_rate": 1.3110911407710909e-05, "loss": 0.9253, "step": 3346 }, { "epoch": 0.41748783834352005, "grad_norm": 0.0, "learning_rate": 1.3107071510038407e-05, "loss": 0.9595, "step": 3347 }, { "epoch": 0.4176125732817762, "grad_norm": 0.0, "learning_rate": 1.3103231105213505e-05, "loss": 1.0093, "step": 3348 }, { "epoch": 0.4177373082200324, "grad_norm": 0.0, "learning_rate": 1.3099390193863053e-05, "loss": 1.0055, "step": 3349 }, { "epoch": 0.4178620431582886, "grad_norm": 0.0, "learning_rate": 1.3095548776613987e-05, "loss": 1.002, "step": 3350 }, { "epoch": 0.41798677809654483, "grad_norm": 0.0, "learning_rate": 1.3091706854093323e-05, "loss": 0.972, "step": 3351 }, { "epoch": 0.41811151303480104, "grad_norm": 0.0, "learning_rate": 1.3087864426928158e-05, "loss": 1.0143, "step": 3352 }, { "epoch": 0.41823624797305725, "grad_norm": 0.0, "learning_rate": 1.3084021495745673e-05, "loss": 0.981, "step": 3353 }, { "epoch": 0.41836098291131346, "grad_norm": 0.0, "learning_rate": 1.3080178061173135e-05, "loss": 0.9726, "step": 3354 }, { "epoch": 0.41848571784956967, "grad_norm": 0.0, "learning_rate": 1.3076334123837884e-05, "loss": 1.0101, "step": 3355 }, { "epoch": 0.4186104527878259, "grad_norm": 0.0, "learning_rate": 1.3072489684367348e-05, "loss": 0.9661, "step": 3356 }, { "epoch": 0.4187351877260821, "grad_norm": 0.0, "learning_rate": 1.3068644743389043e-05, "loss": 0.9868, "step": 3357 }, { "epoch": 0.4188599226643383, "grad_norm": 0.0, "learning_rate": 1.3064799301530556e-05, "loss": 0.9527, "step": 3358 }, { "epoch": 0.4189846576025945, "grad_norm": 0.0, "learning_rate": 1.3060953359419556e-05, "loss": 0.9969, "step": 3359 }, { "epoch": 0.41910939254085067, "grad_norm": 0.0, "learning_rate": 1.3057106917683803e-05, "loss": 0.9082, "step": 3360 }, { "epoch": 0.4192341274791069, "grad_norm": 0.0, "learning_rate": 1.3053259976951134e-05, "loss": 0.9957, "step": 3361 }, { "epoch": 0.4193588624173631, "grad_norm": 0.0, "learning_rate": 1.3049412537849459e-05, "loss": 0.9363, "step": 3362 }, { "epoch": 0.4194835973556193, "grad_norm": 0.0, "learning_rate": 1.3045564601006789e-05, "loss": 0.9809, "step": 3363 }, { "epoch": 0.4196083322938755, "grad_norm": 0.0, "learning_rate": 1.3041716167051197e-05, "loss": 0.957, "step": 3364 }, { "epoch": 0.4197330672321317, "grad_norm": 0.0, "learning_rate": 1.3037867236610841e-05, "loss": 0.9818, "step": 3365 }, { "epoch": 0.41985780217038793, "grad_norm": 0.0, "learning_rate": 1.303401781031397e-05, "loss": 0.9462, "step": 3366 }, { "epoch": 0.41998253710864414, "grad_norm": 0.0, "learning_rate": 1.3030167888788908e-05, "loss": 1.0061, "step": 3367 }, { "epoch": 0.42010727204690035, "grad_norm": 0.0, "learning_rate": 1.3026317472664052e-05, "loss": 0.9403, "step": 3368 }, { "epoch": 0.42023200698515656, "grad_norm": 0.0, "learning_rate": 1.3022466562567894e-05, "loss": 0.9711, "step": 3369 }, { "epoch": 0.42035674192341277, "grad_norm": 0.0, "learning_rate": 1.3018615159128995e-05, "loss": 1.0123, "step": 3370 }, { "epoch": 0.420481476861669, "grad_norm": 0.0, "learning_rate": 1.3014763262976007e-05, "loss": 0.9474, "step": 3371 }, { "epoch": 0.42060621179992513, "grad_norm": 0.0, "learning_rate": 1.3010910874737648e-05, "loss": 0.9982, "step": 3372 }, { "epoch": 0.42073094673818134, "grad_norm": 0.0, "learning_rate": 1.300705799504273e-05, "loss": 0.9958, "step": 3373 }, { "epoch": 0.42085568167643755, "grad_norm": 0.0, "learning_rate": 1.3003204624520145e-05, "loss": 0.9617, "step": 3374 }, { "epoch": 0.42098041661469376, "grad_norm": 0.0, "learning_rate": 1.2999350763798853e-05, "loss": 1.0738, "step": 3375 }, { "epoch": 0.42110515155295, "grad_norm": 0.0, "learning_rate": 1.29954964135079e-05, "loss": 0.9649, "step": 3376 }, { "epoch": 0.4212298864912062, "grad_norm": 0.0, "learning_rate": 1.2991641574276419e-05, "loss": 0.96, "step": 3377 }, { "epoch": 0.4213546214294624, "grad_norm": 0.0, "learning_rate": 1.2987786246733611e-05, "loss": 0.9957, "step": 3378 }, { "epoch": 0.4214793563677186, "grad_norm": 0.0, "learning_rate": 1.2983930431508766e-05, "loss": 0.9929, "step": 3379 }, { "epoch": 0.4216040913059748, "grad_norm": 0.0, "learning_rate": 1.2980074129231254e-05, "loss": 0.948, "step": 3380 }, { "epoch": 0.421728826244231, "grad_norm": 0.0, "learning_rate": 1.2976217340530517e-05, "loss": 0.9329, "step": 3381 }, { "epoch": 0.42185356118248724, "grad_norm": 0.0, "learning_rate": 1.2972360066036078e-05, "loss": 0.9542, "step": 3382 }, { "epoch": 0.42197829612074345, "grad_norm": 0.0, "learning_rate": 1.2968502306377541e-05, "loss": 1.0123, "step": 3383 }, { "epoch": 0.4221030310589996, "grad_norm": 0.0, "learning_rate": 1.2964644062184597e-05, "loss": 0.9891, "step": 3384 }, { "epoch": 0.4222277659972558, "grad_norm": 0.0, "learning_rate": 1.2960785334087e-05, "loss": 0.936, "step": 3385 }, { "epoch": 0.422352500935512, "grad_norm": 0.0, "learning_rate": 1.2956926122714597e-05, "loss": 0.9238, "step": 3386 }, { "epoch": 0.42247723587376823, "grad_norm": 0.0, "learning_rate": 1.295306642869731e-05, "loss": 0.9796, "step": 3387 }, { "epoch": 0.42260197081202444, "grad_norm": 0.0, "learning_rate": 1.2949206252665132e-05, "loss": 0.9864, "step": 3388 }, { "epoch": 0.42272670575028065, "grad_norm": 0.0, "learning_rate": 1.2945345595248142e-05, "loss": 0.9495, "step": 3389 }, { "epoch": 0.42285144068853686, "grad_norm": 0.0, "learning_rate": 1.2941484457076503e-05, "loss": 0.9546, "step": 3390 }, { "epoch": 0.42297617562679307, "grad_norm": 0.0, "learning_rate": 1.2937622838780444e-05, "loss": 0.9985, "step": 3391 }, { "epoch": 0.4231009105650493, "grad_norm": 0.0, "learning_rate": 1.2933760740990284e-05, "loss": 0.9158, "step": 3392 }, { "epoch": 0.4232256455033055, "grad_norm": 0.0, "learning_rate": 1.2929898164336407e-05, "loss": 0.9402, "step": 3393 }, { "epoch": 0.4233503804415617, "grad_norm": 0.0, "learning_rate": 1.2926035109449293e-05, "loss": 0.9277, "step": 3394 }, { "epoch": 0.4234751153798179, "grad_norm": 0.0, "learning_rate": 1.2922171576959479e-05, "loss": 0.9485, "step": 3395 }, { "epoch": 0.42359985031807407, "grad_norm": 0.0, "learning_rate": 1.2918307567497596e-05, "loss": 0.9937, "step": 3396 }, { "epoch": 0.4237245852563303, "grad_norm": 0.0, "learning_rate": 1.2914443081694353e-05, "loss": 0.9304, "step": 3397 }, { "epoch": 0.4238493201945865, "grad_norm": 0.0, "learning_rate": 1.2910578120180524e-05, "loss": 0.9483, "step": 3398 }, { "epoch": 0.4239740551328427, "grad_norm": 0.0, "learning_rate": 1.2906712683586968e-05, "loss": 0.9738, "step": 3399 }, { "epoch": 0.4240987900710989, "grad_norm": 0.0, "learning_rate": 1.2902846772544625e-05, "loss": 0.9746, "step": 3400 }, { "epoch": 0.4242235250093551, "grad_norm": 0.0, "learning_rate": 1.289898038768451e-05, "loss": 0.9915, "step": 3401 }, { "epoch": 0.42434825994761133, "grad_norm": 0.0, "learning_rate": 1.289511352963771e-05, "loss": 0.9905, "step": 3402 }, { "epoch": 0.42447299488586754, "grad_norm": 0.0, "learning_rate": 1.28912461990354e-05, "loss": 0.9626, "step": 3403 }, { "epoch": 0.42459772982412375, "grad_norm": 0.0, "learning_rate": 1.2887378396508822e-05, "loss": 0.964, "step": 3404 }, { "epoch": 0.42472246476237996, "grad_norm": 0.0, "learning_rate": 1.2883510122689297e-05, "loss": 0.9625, "step": 3405 }, { "epoch": 0.42484719970063617, "grad_norm": 0.0, "learning_rate": 1.2879641378208229e-05, "loss": 0.9928, "step": 3406 }, { "epoch": 0.4249719346388924, "grad_norm": 0.0, "learning_rate": 1.2875772163697094e-05, "loss": 0.9273, "step": 3407 }, { "epoch": 0.42509666957714853, "grad_norm": 0.0, "learning_rate": 1.2871902479787439e-05, "loss": 0.9696, "step": 3408 }, { "epoch": 0.42522140451540474, "grad_norm": 0.0, "learning_rate": 1.2868032327110904e-05, "loss": 0.9538, "step": 3409 }, { "epoch": 0.42534613945366095, "grad_norm": 0.0, "learning_rate": 1.2864161706299187e-05, "loss": 0.9322, "step": 3410 }, { "epoch": 0.42547087439191716, "grad_norm": 0.0, "learning_rate": 1.286029061798408e-05, "loss": 0.9607, "step": 3411 }, { "epoch": 0.4255956093301734, "grad_norm": 0.0, "learning_rate": 1.285641906279743e-05, "loss": 1.0039, "step": 3412 }, { "epoch": 0.4257203442684296, "grad_norm": 0.0, "learning_rate": 1.2852547041371181e-05, "loss": 1.01, "step": 3413 }, { "epoch": 0.4258450792066858, "grad_norm": 0.0, "learning_rate": 1.2848674554337346e-05, "loss": 0.9915, "step": 3414 }, { "epoch": 0.425969814144942, "grad_norm": 0.0, "learning_rate": 1.2844801602328007e-05, "loss": 0.977, "step": 3415 }, { "epoch": 0.4260945490831982, "grad_norm": 0.0, "learning_rate": 1.2840928185975327e-05, "loss": 1.0159, "step": 3416 }, { "epoch": 0.4262192840214544, "grad_norm": 0.0, "learning_rate": 1.2837054305911551e-05, "loss": 0.9784, "step": 3417 }, { "epoch": 0.42634401895971064, "grad_norm": 0.0, "learning_rate": 1.2833179962768988e-05, "loss": 0.9816, "step": 3418 }, { "epoch": 0.42646875389796685, "grad_norm": 0.0, "learning_rate": 1.2829305157180033e-05, "loss": 0.9545, "step": 3419 }, { "epoch": 0.426593488836223, "grad_norm": 0.0, "learning_rate": 1.282542988977715e-05, "loss": 0.9429, "step": 3420 }, { "epoch": 0.4267182237744792, "grad_norm": 0.0, "learning_rate": 1.2821554161192879e-05, "loss": 0.9452, "step": 3421 }, { "epoch": 0.4268429587127354, "grad_norm": 0.0, "learning_rate": 1.2817677972059838e-05, "loss": 0.9926, "step": 3422 }, { "epoch": 0.42696769365099163, "grad_norm": 0.0, "learning_rate": 1.2813801323010719e-05, "loss": 0.9275, "step": 3423 }, { "epoch": 0.42709242858924784, "grad_norm": 0.0, "learning_rate": 1.280992421467829e-05, "loss": 1.0094, "step": 3424 }, { "epoch": 0.42721716352750405, "grad_norm": 0.0, "learning_rate": 1.280604664769539e-05, "loss": 0.9568, "step": 3425 }, { "epoch": 0.42734189846576026, "grad_norm": 0.0, "learning_rate": 1.2802168622694938e-05, "loss": 0.9901, "step": 3426 }, { "epoch": 0.42746663340401647, "grad_norm": 0.0, "learning_rate": 1.2798290140309924e-05, "loss": 0.9292, "step": 3427 }, { "epoch": 0.4275913683422727, "grad_norm": 0.0, "learning_rate": 1.2794411201173414e-05, "loss": 0.999, "step": 3428 }, { "epoch": 0.4277161032805289, "grad_norm": 0.0, "learning_rate": 1.2790531805918546e-05, "loss": 0.9397, "step": 3429 }, { "epoch": 0.4278408382187851, "grad_norm": 0.0, "learning_rate": 1.2786651955178545e-05, "loss": 1.0181, "step": 3430 }, { "epoch": 0.4279655731570413, "grad_norm": 0.0, "learning_rate": 1.2782771649586689e-05, "loss": 0.9423, "step": 3431 }, { "epoch": 0.42809030809529747, "grad_norm": 0.0, "learning_rate": 1.2778890889776349e-05, "loss": 1.0065, "step": 3432 }, { "epoch": 0.4282150430335537, "grad_norm": 0.0, "learning_rate": 1.2775009676380959e-05, "loss": 1.0111, "step": 3433 }, { "epoch": 0.4283397779718099, "grad_norm": 0.0, "learning_rate": 1.2771128010034032e-05, "loss": 0.9946, "step": 3434 }, { "epoch": 0.4284645129100661, "grad_norm": 0.0, "learning_rate": 1.2767245891369156e-05, "loss": 0.9979, "step": 3435 }, { "epoch": 0.4285892478483223, "grad_norm": 0.0, "learning_rate": 1.2763363321019986e-05, "loss": 0.9975, "step": 3436 }, { "epoch": 0.4287139827865785, "grad_norm": 0.0, "learning_rate": 1.2759480299620262e-05, "loss": 0.9689, "step": 3437 }, { "epoch": 0.42883871772483473, "grad_norm": 0.0, "learning_rate": 1.2755596827803784e-05, "loss": 0.93, "step": 3438 }, { "epoch": 0.42896345266309094, "grad_norm": 0.0, "learning_rate": 1.2751712906204434e-05, "loss": 0.9797, "step": 3439 }, { "epoch": 0.42908818760134715, "grad_norm": 0.0, "learning_rate": 1.274782853545617e-05, "loss": 0.9024, "step": 3440 }, { "epoch": 0.42921292253960336, "grad_norm": 0.0, "learning_rate": 1.2743943716193017e-05, "loss": 0.9292, "step": 3441 }, { "epoch": 0.42933765747785957, "grad_norm": 0.0, "learning_rate": 1.2740058449049073e-05, "loss": 0.9948, "step": 3442 }, { "epoch": 0.4294623924161158, "grad_norm": 0.0, "learning_rate": 1.2736172734658516e-05, "loss": 0.9626, "step": 3443 }, { "epoch": 0.42958712735437193, "grad_norm": 0.0, "learning_rate": 1.273228657365559e-05, "loss": 0.9573, "step": 3444 }, { "epoch": 0.42971186229262814, "grad_norm": 0.0, "learning_rate": 1.2728399966674612e-05, "loss": 0.9927, "step": 3445 }, { "epoch": 0.42983659723088435, "grad_norm": 0.0, "learning_rate": 1.2724512914349979e-05, "loss": 0.9902, "step": 3446 }, { "epoch": 0.42996133216914056, "grad_norm": 0.0, "learning_rate": 1.2720625417316153e-05, "loss": 0.95, "step": 3447 }, { "epoch": 0.4300860671073968, "grad_norm": 0.0, "learning_rate": 1.271673747620767e-05, "loss": 0.9866, "step": 3448 }, { "epoch": 0.430210802045653, "grad_norm": 0.0, "learning_rate": 1.2712849091659147e-05, "loss": 0.9574, "step": 3449 }, { "epoch": 0.4303355369839092, "grad_norm": 0.0, "learning_rate": 1.2708960264305261e-05, "loss": 0.9589, "step": 3450 }, { "epoch": 0.4304602719221654, "grad_norm": 0.0, "learning_rate": 1.2705070994780763e-05, "loss": 0.9908, "step": 3451 }, { "epoch": 0.4305850068604216, "grad_norm": 0.0, "learning_rate": 1.2701181283720487e-05, "loss": 0.9819, "step": 3452 }, { "epoch": 0.4307097417986778, "grad_norm": 0.0, "learning_rate": 1.269729113175933e-05, "loss": 0.9634, "step": 3453 }, { "epoch": 0.43083447673693404, "grad_norm": 0.0, "learning_rate": 1.2693400539532263e-05, "loss": 0.969, "step": 3454 }, { "epoch": 0.43095921167519025, "grad_norm": 0.0, "learning_rate": 1.2689509507674324e-05, "loss": 0.9598, "step": 3455 }, { "epoch": 0.4310839466134464, "grad_norm": 0.0, "learning_rate": 1.2685618036820633e-05, "loss": 0.9557, "step": 3456 }, { "epoch": 0.4312086815517026, "grad_norm": 0.0, "learning_rate": 1.2681726127606374e-05, "loss": 0.969, "step": 3457 }, { "epoch": 0.4313334164899588, "grad_norm": 0.0, "learning_rate": 1.2677833780666806e-05, "loss": 0.9605, "step": 3458 }, { "epoch": 0.43145815142821503, "grad_norm": 0.0, "learning_rate": 1.2673940996637255e-05, "loss": 1.0218, "step": 3459 }, { "epoch": 0.43158288636647124, "grad_norm": 0.0, "learning_rate": 1.267004777615313e-05, "loss": 0.9712, "step": 3460 }, { "epoch": 0.43170762130472745, "grad_norm": 0.0, "learning_rate": 1.266615411984989e-05, "loss": 0.9409, "step": 3461 }, { "epoch": 0.43183235624298366, "grad_norm": 0.0, "learning_rate": 1.2662260028363084e-05, "loss": 0.9351, "step": 3462 }, { "epoch": 0.43195709118123987, "grad_norm": 0.0, "learning_rate": 1.2658365502328329e-05, "loss": 0.9786, "step": 3463 }, { "epoch": 0.4320818261194961, "grad_norm": 0.0, "learning_rate": 1.2654470542381305e-05, "loss": 0.9518, "step": 3464 }, { "epoch": 0.4322065610577523, "grad_norm": 0.0, "learning_rate": 1.2650575149157773e-05, "loss": 0.9123, "step": 3465 }, { "epoch": 0.4323312959960085, "grad_norm": 0.0, "learning_rate": 1.2646679323293553e-05, "loss": 1.0002, "step": 3466 }, { "epoch": 0.4324560309342647, "grad_norm": 0.0, "learning_rate": 1.2642783065424546e-05, "loss": 1.0283, "step": 3467 }, { "epoch": 0.43258076587252087, "grad_norm": 0.0, "learning_rate": 1.2638886376186719e-05, "loss": 0.9589, "step": 3468 }, { "epoch": 0.4327055008107771, "grad_norm": 0.0, "learning_rate": 1.2634989256216108e-05, "loss": 0.967, "step": 3469 }, { "epoch": 0.4328302357490333, "grad_norm": 0.0, "learning_rate": 1.2631091706148824e-05, "loss": 0.9934, "step": 3470 }, { "epoch": 0.4329549706872895, "grad_norm": 0.0, "learning_rate": 1.2627193726621045e-05, "loss": 1.0199, "step": 3471 }, { "epoch": 0.4330797056255457, "grad_norm": 0.0, "learning_rate": 1.2623295318269018e-05, "loss": 0.9486, "step": 3472 }, { "epoch": 0.4332044405638019, "grad_norm": 0.0, "learning_rate": 1.261939648172906e-05, "loss": 0.9873, "step": 3473 }, { "epoch": 0.43332917550205813, "grad_norm": 0.0, "learning_rate": 1.2615497217637565e-05, "loss": 1.0065, "step": 3474 }, { "epoch": 0.43345391044031434, "grad_norm": 0.0, "learning_rate": 1.2611597526630983e-05, "loss": 0.9482, "step": 3475 }, { "epoch": 0.43357864537857055, "grad_norm": 0.0, "learning_rate": 1.2607697409345851e-05, "loss": 0.9828, "step": 3476 }, { "epoch": 0.43370338031682676, "grad_norm": 0.0, "learning_rate": 1.2603796866418762e-05, "loss": 0.9745, "step": 3477 }, { "epoch": 0.43382811525508297, "grad_norm": 0.0, "learning_rate": 1.2599895898486381e-05, "loss": 0.9499, "step": 3478 }, { "epoch": 0.4339528501933392, "grad_norm": 0.0, "learning_rate": 1.2595994506185446e-05, "loss": 0.966, "step": 3479 }, { "epoch": 0.43407758513159533, "grad_norm": 0.0, "learning_rate": 1.2592092690152765e-05, "loss": 0.928, "step": 3480 }, { "epoch": 0.43420232006985154, "grad_norm": 0.0, "learning_rate": 1.2588190451025209e-05, "loss": 0.9769, "step": 3481 }, { "epoch": 0.43432705500810775, "grad_norm": 0.0, "learning_rate": 1.2584287789439725e-05, "loss": 0.9664, "step": 3482 }, { "epoch": 0.43445178994636396, "grad_norm": 0.0, "learning_rate": 1.2580384706033324e-05, "loss": 0.9561, "step": 3483 }, { "epoch": 0.4345765248846202, "grad_norm": 0.0, "learning_rate": 1.2576481201443087e-05, "loss": 0.9593, "step": 3484 }, { "epoch": 0.4347012598228764, "grad_norm": 0.0, "learning_rate": 1.2572577276306163e-05, "loss": 0.9454, "step": 3485 }, { "epoch": 0.4348259947611326, "grad_norm": 0.0, "learning_rate": 1.2568672931259778e-05, "loss": 0.9535, "step": 3486 }, { "epoch": 0.4349507296993888, "grad_norm": 0.0, "learning_rate": 1.256476816694121e-05, "loss": 0.9495, "step": 3487 }, { "epoch": 0.435075464637645, "grad_norm": 0.0, "learning_rate": 1.2560862983987825e-05, "loss": 1.038, "step": 3488 }, { "epoch": 0.4352001995759012, "grad_norm": 0.0, "learning_rate": 1.2556957383037038e-05, "loss": 0.9563, "step": 3489 }, { "epoch": 0.43532493451415744, "grad_norm": 0.0, "learning_rate": 1.2553051364726347e-05, "loss": 0.9618, "step": 3490 }, { "epoch": 0.43544966945241365, "grad_norm": 0.0, "learning_rate": 1.254914492969331e-05, "loss": 0.9761, "step": 3491 }, { "epoch": 0.4355744043906698, "grad_norm": 0.0, "learning_rate": 1.2545238078575557e-05, "loss": 1.0031, "step": 3492 }, { "epoch": 0.435699139328926, "grad_norm": 0.0, "learning_rate": 1.2541330812010786e-05, "loss": 0.9482, "step": 3493 }, { "epoch": 0.4358238742671822, "grad_norm": 0.0, "learning_rate": 1.253742313063676e-05, "loss": 0.9401, "step": 3494 }, { "epoch": 0.43594860920543843, "grad_norm": 0.0, "learning_rate": 1.253351503509131e-05, "loss": 0.9929, "step": 3495 }, { "epoch": 0.43607334414369464, "grad_norm": 0.0, "learning_rate": 1.2529606526012338e-05, "loss": 1.0057, "step": 3496 }, { "epoch": 0.43619807908195085, "grad_norm": 0.0, "learning_rate": 1.2525697604037807e-05, "loss": 1.006, "step": 3497 }, { "epoch": 0.43632281402020706, "grad_norm": 0.0, "learning_rate": 1.2521788269805757e-05, "loss": 0.9039, "step": 3498 }, { "epoch": 0.43644754895846327, "grad_norm": 0.0, "learning_rate": 1.2517878523954287e-05, "loss": 0.9701, "step": 3499 }, { "epoch": 0.4365722838967195, "grad_norm": 0.0, "learning_rate": 1.2513968367121567e-05, "loss": 1.0131, "step": 3500 }, { "epoch": 0.4366970188349757, "grad_norm": 0.0, "learning_rate": 1.2510057799945832e-05, "loss": 0.9729, "step": 3501 }, { "epoch": 0.4368217537732319, "grad_norm": 0.0, "learning_rate": 1.2506146823065386e-05, "loss": 1.0178, "step": 3502 }, { "epoch": 0.4369464887114881, "grad_norm": 0.0, "learning_rate": 1.2502235437118597e-05, "loss": 0.9274, "step": 3503 }, { "epoch": 0.43707122364974427, "grad_norm": 0.0, "learning_rate": 1.2498323642743906e-05, "loss": 0.9752, "step": 3504 }, { "epoch": 0.4371959585880005, "grad_norm": 0.0, "learning_rate": 1.2494411440579814e-05, "loss": 0.9826, "step": 3505 }, { "epoch": 0.4373206935262567, "grad_norm": 0.0, "learning_rate": 1.2490498831264892e-05, "loss": 0.9658, "step": 3506 }, { "epoch": 0.4374454284645129, "grad_norm": 0.0, "learning_rate": 1.2486585815437775e-05, "loss": 0.9483, "step": 3507 }, { "epoch": 0.4375701634027691, "grad_norm": 0.0, "learning_rate": 1.2482672393737164e-05, "loss": 0.9848, "step": 3508 }, { "epoch": 0.4376948983410253, "grad_norm": 0.0, "learning_rate": 1.2478758566801834e-05, "loss": 0.947, "step": 3509 }, { "epoch": 0.43781963327928153, "grad_norm": 0.0, "learning_rate": 1.2474844335270613e-05, "loss": 0.9839, "step": 3510 }, { "epoch": 0.43794436821753774, "grad_norm": 0.0, "learning_rate": 1.247092969978241e-05, "loss": 0.9765, "step": 3511 }, { "epoch": 0.43806910315579395, "grad_norm": 0.0, "learning_rate": 1.2467014660976185e-05, "loss": 1.0252, "step": 3512 }, { "epoch": 0.43819383809405016, "grad_norm": 0.0, "learning_rate": 1.2463099219490977e-05, "loss": 1.0336, "step": 3513 }, { "epoch": 0.43831857303230637, "grad_norm": 0.0, "learning_rate": 1.245918337596588e-05, "loss": 1.0399, "step": 3514 }, { "epoch": 0.4384433079705626, "grad_norm": 0.0, "learning_rate": 1.2455267131040058e-05, "loss": 0.998, "step": 3515 }, { "epoch": 0.43856804290881873, "grad_norm": 0.0, "learning_rate": 1.245135048535275e-05, "loss": 1.0037, "step": 3516 }, { "epoch": 0.43869277784707494, "grad_norm": 0.0, "learning_rate": 1.2447433439543239e-05, "loss": 0.9442, "step": 3517 }, { "epoch": 0.43881751278533115, "grad_norm": 0.0, "learning_rate": 1.2443515994250892e-05, "loss": 0.9792, "step": 3518 }, { "epoch": 0.43894224772358736, "grad_norm": 0.0, "learning_rate": 1.2439598150115138e-05, "loss": 0.9353, "step": 3519 }, { "epoch": 0.4390669826618436, "grad_norm": 0.0, "learning_rate": 1.243567990777546e-05, "loss": 0.9689, "step": 3520 }, { "epoch": 0.4391917176000998, "grad_norm": 0.0, "learning_rate": 1.2431761267871419e-05, "loss": 0.9947, "step": 3521 }, { "epoch": 0.439316452538356, "grad_norm": 0.0, "learning_rate": 1.2427842231042636e-05, "loss": 0.9636, "step": 3522 }, { "epoch": 0.4394411874766122, "grad_norm": 0.0, "learning_rate": 1.2423922797928794e-05, "loss": 0.9197, "step": 3523 }, { "epoch": 0.4395659224148684, "grad_norm": 0.0, "learning_rate": 1.2420002969169642e-05, "loss": 1.0126, "step": 3524 }, { "epoch": 0.4396906573531246, "grad_norm": 0.0, "learning_rate": 1.2416082745405001e-05, "loss": 0.9496, "step": 3525 }, { "epoch": 0.43981539229138084, "grad_norm": 0.0, "learning_rate": 1.2412162127274748e-05, "loss": 0.9894, "step": 3526 }, { "epoch": 0.43994012722963705, "grad_norm": 0.0, "learning_rate": 1.240824111541882e-05, "loss": 0.9462, "step": 3527 }, { "epoch": 0.4400648621678932, "grad_norm": 0.0, "learning_rate": 1.2404319710477238e-05, "loss": 0.9094, "step": 3528 }, { "epoch": 0.4401895971061494, "grad_norm": 0.0, "learning_rate": 1.2400397913090061e-05, "loss": 0.9447, "step": 3529 }, { "epoch": 0.4403143320444056, "grad_norm": 0.0, "learning_rate": 1.2396475723897432e-05, "loss": 0.9984, "step": 3530 }, { "epoch": 0.44043906698266183, "grad_norm": 0.0, "learning_rate": 1.239255314353955e-05, "loss": 0.9543, "step": 3531 }, { "epoch": 0.44056380192091804, "grad_norm": 0.0, "learning_rate": 1.2388630172656681e-05, "loss": 0.9943, "step": 3532 }, { "epoch": 0.44068853685917425, "grad_norm": 0.0, "learning_rate": 1.238470681188915e-05, "loss": 0.9707, "step": 3533 }, { "epoch": 0.44081327179743046, "grad_norm": 0.0, "learning_rate": 1.2380783061877348e-05, "loss": 0.9753, "step": 3534 }, { "epoch": 0.44093800673568667, "grad_norm": 0.0, "learning_rate": 1.2376858923261732e-05, "loss": 0.9293, "step": 3535 }, { "epoch": 0.4410627416739429, "grad_norm": 0.0, "learning_rate": 1.2372934396682822e-05, "loss": 1.022, "step": 3536 }, { "epoch": 0.4411874766121991, "grad_norm": 0.0, "learning_rate": 1.2369009482781191e-05, "loss": 0.9523, "step": 3537 }, { "epoch": 0.4413122115504553, "grad_norm": 0.0, "learning_rate": 1.2365084182197493e-05, "loss": 0.9512, "step": 3538 }, { "epoch": 0.4414369464887115, "grad_norm": 0.0, "learning_rate": 1.2361158495572436e-05, "loss": 0.9812, "step": 3539 }, { "epoch": 0.44156168142696767, "grad_norm": 0.0, "learning_rate": 1.2357232423546786e-05, "loss": 0.9633, "step": 3540 }, { "epoch": 0.4416864163652239, "grad_norm": 0.0, "learning_rate": 1.2353305966761377e-05, "loss": 1.0156, "step": 3541 }, { "epoch": 0.4418111513034801, "grad_norm": 0.0, "learning_rate": 1.2349379125857109e-05, "loss": 0.993, "step": 3542 }, { "epoch": 0.4419358862417363, "grad_norm": 0.0, "learning_rate": 1.2345451901474941e-05, "loss": 1.0018, "step": 3543 }, { "epoch": 0.4420606211799925, "grad_norm": 0.0, "learning_rate": 1.2341524294255893e-05, "loss": 0.9537, "step": 3544 }, { "epoch": 0.4421853561182487, "grad_norm": 0.0, "learning_rate": 1.2337596304841053e-05, "loss": 0.9651, "step": 3545 }, { "epoch": 0.44231009105650493, "grad_norm": 0.0, "learning_rate": 1.2333667933871565e-05, "loss": 0.9624, "step": 3546 }, { "epoch": 0.44243482599476114, "grad_norm": 0.0, "learning_rate": 1.2329739181988635e-05, "loss": 0.9792, "step": 3547 }, { "epoch": 0.44255956093301735, "grad_norm": 0.0, "learning_rate": 1.2325810049833539e-05, "loss": 0.9534, "step": 3548 }, { "epoch": 0.44268429587127356, "grad_norm": 0.0, "learning_rate": 1.2321880538047615e-05, "loss": 1.0021, "step": 3549 }, { "epoch": 0.44280903080952977, "grad_norm": 0.0, "learning_rate": 1.231795064727225e-05, "loss": 0.9226, "step": 3550 }, { "epoch": 0.442933765747786, "grad_norm": 0.0, "learning_rate": 1.23140203781489e-05, "loss": 1.0251, "step": 3551 }, { "epoch": 0.44305850068604213, "grad_norm": 0.0, "learning_rate": 1.2310089731319093e-05, "loss": 0.9552, "step": 3552 }, { "epoch": 0.44318323562429834, "grad_norm": 0.0, "learning_rate": 1.2306158707424402e-05, "loss": 0.9304, "step": 3553 }, { "epoch": 0.44330797056255455, "grad_norm": 0.0, "learning_rate": 1.2302227307106474e-05, "loss": 0.9634, "step": 3554 }, { "epoch": 0.44343270550081076, "grad_norm": 0.0, "learning_rate": 1.229829553100701e-05, "loss": 0.9898, "step": 3555 }, { "epoch": 0.443557440439067, "grad_norm": 0.0, "learning_rate": 1.2294363379767777e-05, "loss": 0.9973, "step": 3556 }, { "epoch": 0.4436821753773232, "grad_norm": 0.0, "learning_rate": 1.2290430854030597e-05, "loss": 0.9487, "step": 3557 }, { "epoch": 0.4438069103155794, "grad_norm": 0.0, "learning_rate": 1.2286497954437362e-05, "loss": 1.0067, "step": 3558 }, { "epoch": 0.4439316452538356, "grad_norm": 0.0, "learning_rate": 1.2282564681630018e-05, "loss": 0.9397, "step": 3559 }, { "epoch": 0.4440563801920918, "grad_norm": 0.0, "learning_rate": 1.2278631036250575e-05, "loss": 1.028, "step": 3560 }, { "epoch": 0.444181115130348, "grad_norm": 0.0, "learning_rate": 1.2274697018941101e-05, "loss": 0.97, "step": 3561 }, { "epoch": 0.44430585006860424, "grad_norm": 0.0, "learning_rate": 1.2270762630343734e-05, "loss": 0.9506, "step": 3562 }, { "epoch": 0.44443058500686045, "grad_norm": 0.0, "learning_rate": 1.2266827871100658e-05, "loss": 1.0113, "step": 3563 }, { "epoch": 0.4445553199451166, "grad_norm": 0.0, "learning_rate": 1.2262892741854124e-05, "loss": 0.9949, "step": 3564 }, { "epoch": 0.4446800548833728, "grad_norm": 0.0, "learning_rate": 1.2258957243246447e-05, "loss": 0.9504, "step": 3565 }, { "epoch": 0.444804789821629, "grad_norm": 0.0, "learning_rate": 1.2255021375920005e-05, "loss": 0.9439, "step": 3566 }, { "epoch": 0.44492952475988523, "grad_norm": 0.0, "learning_rate": 1.2251085140517222e-05, "loss": 0.9601, "step": 3567 }, { "epoch": 0.44505425969814144, "grad_norm": 0.0, "learning_rate": 1.2247148537680595e-05, "loss": 0.9292, "step": 3568 }, { "epoch": 0.44517899463639765, "grad_norm": 0.0, "learning_rate": 1.2243211568052678e-05, "loss": 0.9659, "step": 3569 }, { "epoch": 0.44530372957465386, "grad_norm": 0.0, "learning_rate": 1.2239274232276079e-05, "loss": 0.9722, "step": 3570 }, { "epoch": 0.44542846451291007, "grad_norm": 0.0, "learning_rate": 1.2235336530993475e-05, "loss": 1.0019, "step": 3571 }, { "epoch": 0.4455531994511663, "grad_norm": 0.0, "learning_rate": 1.22313984648476e-05, "loss": 0.9643, "step": 3572 }, { "epoch": 0.4456779343894225, "grad_norm": 0.0, "learning_rate": 1.2227460034481239e-05, "loss": 0.9801, "step": 3573 }, { "epoch": 0.4458026693276787, "grad_norm": 0.0, "learning_rate": 1.2223521240537247e-05, "loss": 0.9663, "step": 3574 }, { "epoch": 0.4459274042659349, "grad_norm": 0.0, "learning_rate": 1.221958208365853e-05, "loss": 0.9519, "step": 3575 }, { "epoch": 0.44605213920419107, "grad_norm": 0.0, "learning_rate": 1.2215642564488066e-05, "loss": 0.9248, "step": 3576 }, { "epoch": 0.4461768741424473, "grad_norm": 0.0, "learning_rate": 1.2211702683668878e-05, "loss": 0.9829, "step": 3577 }, { "epoch": 0.4463016090807035, "grad_norm": 0.0, "learning_rate": 1.2207762441844055e-05, "loss": 0.9714, "step": 3578 }, { "epoch": 0.4464263440189597, "grad_norm": 0.0, "learning_rate": 1.2203821839656748e-05, "loss": 0.9718, "step": 3579 }, { "epoch": 0.4465510789572159, "grad_norm": 0.0, "learning_rate": 1.2199880877750157e-05, "loss": 0.9699, "step": 3580 }, { "epoch": 0.4466758138954721, "grad_norm": 0.0, "learning_rate": 1.2195939556767544e-05, "loss": 0.964, "step": 3581 }, { "epoch": 0.44680054883372833, "grad_norm": 0.0, "learning_rate": 1.2191997877352239e-05, "loss": 0.9915, "step": 3582 }, { "epoch": 0.44692528377198454, "grad_norm": 0.0, "learning_rate": 1.2188055840147621e-05, "loss": 0.9708, "step": 3583 }, { "epoch": 0.44705001871024075, "grad_norm": 0.0, "learning_rate": 1.218411344579713e-05, "loss": 0.9064, "step": 3584 }, { "epoch": 0.44717475364849696, "grad_norm": 0.0, "learning_rate": 1.218017069494426e-05, "loss": 0.9405, "step": 3585 }, { "epoch": 0.44729948858675317, "grad_norm": 0.0, "learning_rate": 1.2176227588232575e-05, "loss": 0.9254, "step": 3586 }, { "epoch": 0.4474242235250094, "grad_norm": 0.0, "learning_rate": 1.217228412630568e-05, "loss": 0.9418, "step": 3587 }, { "epoch": 0.44754895846326553, "grad_norm": 0.0, "learning_rate": 1.2168340309807257e-05, "loss": 1.0066, "step": 3588 }, { "epoch": 0.44767369340152174, "grad_norm": 0.0, "learning_rate": 1.2164396139381029e-05, "loss": 0.991, "step": 3589 }, { "epoch": 0.44779842833977795, "grad_norm": 0.0, "learning_rate": 1.2160451615670793e-05, "loss": 0.9328, "step": 3590 }, { "epoch": 0.44792316327803416, "grad_norm": 0.0, "learning_rate": 1.2156506739320381e-05, "loss": 0.9746, "step": 3591 }, { "epoch": 0.4480478982162904, "grad_norm": 0.0, "learning_rate": 1.2152561510973709e-05, "loss": 0.9838, "step": 3592 }, { "epoch": 0.4481726331545466, "grad_norm": 0.0, "learning_rate": 1.214861593127473e-05, "loss": 0.936, "step": 3593 }, { "epoch": 0.4482973680928028, "grad_norm": 0.0, "learning_rate": 1.2144670000867462e-05, "loss": 1.0212, "step": 3594 }, { "epoch": 0.448422103031059, "grad_norm": 0.0, "learning_rate": 1.2140723720395989e-05, "loss": 0.977, "step": 3595 }, { "epoch": 0.4485468379693152, "grad_norm": 0.0, "learning_rate": 1.2136777090504434e-05, "loss": 0.9165, "step": 3596 }, { "epoch": 0.4486715729075714, "grad_norm": 0.0, "learning_rate": 1.2132830111836991e-05, "loss": 0.9736, "step": 3597 }, { "epoch": 0.44879630784582764, "grad_norm": 0.0, "learning_rate": 1.2128882785037905e-05, "loss": 0.935, "step": 3598 }, { "epoch": 0.44892104278408385, "grad_norm": 0.0, "learning_rate": 1.2124935110751481e-05, "loss": 0.9958, "step": 3599 }, { "epoch": 0.44904577772234, "grad_norm": 0.0, "learning_rate": 1.2120987089622077e-05, "loss": 0.9337, "step": 3600 }, { "epoch": 0.4491705126605962, "grad_norm": 0.0, "learning_rate": 1.211703872229411e-05, "loss": 0.9409, "step": 3601 }, { "epoch": 0.4492952475988524, "grad_norm": 0.0, "learning_rate": 1.2113090009412054e-05, "loss": 0.9656, "step": 3602 }, { "epoch": 0.44941998253710863, "grad_norm": 0.0, "learning_rate": 1.2109140951620435e-05, "loss": 0.9994, "step": 3603 }, { "epoch": 0.44954471747536484, "grad_norm": 0.0, "learning_rate": 1.2105191549563842e-05, "loss": 0.9808, "step": 3604 }, { "epoch": 0.44966945241362105, "grad_norm": 0.0, "learning_rate": 1.2101241803886921e-05, "loss": 0.9782, "step": 3605 }, { "epoch": 0.44979418735187726, "grad_norm": 0.0, "learning_rate": 1.209729171523436e-05, "loss": 1.0256, "step": 3606 }, { "epoch": 0.44991892229013347, "grad_norm": 0.0, "learning_rate": 1.2093341284250922e-05, "loss": 0.9873, "step": 3607 }, { "epoch": 0.4500436572283897, "grad_norm": 0.0, "learning_rate": 1.2089390511581411e-05, "loss": 0.934, "step": 3608 }, { "epoch": 0.4501683921666459, "grad_norm": 0.0, "learning_rate": 1.2085439397870699e-05, "loss": 0.976, "step": 3609 }, { "epoch": 0.4502931271049021, "grad_norm": 0.0, "learning_rate": 1.20814879437637e-05, "loss": 0.9288, "step": 3610 }, { "epoch": 0.4504178620431583, "grad_norm": 0.0, "learning_rate": 1.2077536149905395e-05, "loss": 0.9451, "step": 3611 }, { "epoch": 0.45054259698141447, "grad_norm": 0.0, "learning_rate": 1.2073584016940822e-05, "loss": 0.9981, "step": 3612 }, { "epoch": 0.4506673319196707, "grad_norm": 0.0, "learning_rate": 1.2069631545515056e-05, "loss": 0.9219, "step": 3613 }, { "epoch": 0.4507920668579269, "grad_norm": 0.0, "learning_rate": 1.2065678736273247e-05, "loss": 0.9685, "step": 3614 }, { "epoch": 0.4509168017961831, "grad_norm": 0.0, "learning_rate": 1.2061725589860595e-05, "loss": 0.9645, "step": 3615 }, { "epoch": 0.4510415367344393, "grad_norm": 0.0, "learning_rate": 1.205777210692235e-05, "loss": 1.002, "step": 3616 }, { "epoch": 0.4511662716726955, "grad_norm": 0.0, "learning_rate": 1.205381828810382e-05, "loss": 0.999, "step": 3617 }, { "epoch": 0.45129100661095173, "grad_norm": 0.0, "learning_rate": 1.2049864134050372e-05, "loss": 1.0243, "step": 3618 }, { "epoch": 0.45141574154920794, "grad_norm": 0.0, "learning_rate": 1.2045909645407419e-05, "loss": 1.0007, "step": 3619 }, { "epoch": 0.45154047648746415, "grad_norm": 0.0, "learning_rate": 1.2041954822820434e-05, "loss": 1.0168, "step": 3620 }, { "epoch": 0.45166521142572036, "grad_norm": 0.0, "learning_rate": 1.2037999666934943e-05, "loss": 0.9692, "step": 3621 }, { "epoch": 0.45178994636397657, "grad_norm": 0.0, "learning_rate": 1.2034044178396534e-05, "loss": 0.9963, "step": 3622 }, { "epoch": 0.4519146813022328, "grad_norm": 0.0, "learning_rate": 1.2030088357850831e-05, "loss": 0.9316, "step": 3623 }, { "epoch": 0.45203941624048893, "grad_norm": 0.0, "learning_rate": 1.2026132205943534e-05, "loss": 0.9618, "step": 3624 }, { "epoch": 0.45216415117874514, "grad_norm": 0.0, "learning_rate": 1.2022175723320382e-05, "loss": 0.9683, "step": 3625 }, { "epoch": 0.45228888611700135, "grad_norm": 0.0, "learning_rate": 1.201821891062717e-05, "loss": 0.9712, "step": 3626 }, { "epoch": 0.45241362105525756, "grad_norm": 0.0, "learning_rate": 1.2014261768509757e-05, "loss": 1.0183, "step": 3627 }, { "epoch": 0.4525383559935138, "grad_norm": 0.0, "learning_rate": 1.2010304297614042e-05, "loss": 0.9932, "step": 3628 }, { "epoch": 0.45266309093177, "grad_norm": 0.0, "learning_rate": 1.2006346498585988e-05, "loss": 1.0142, "step": 3629 }, { "epoch": 0.4527878258700262, "grad_norm": 0.0, "learning_rate": 1.2002388372071603e-05, "loss": 0.9171, "step": 3630 }, { "epoch": 0.4529125608082824, "grad_norm": 0.0, "learning_rate": 1.1998429918716954e-05, "loss": 0.9815, "step": 3631 }, { "epoch": 0.4530372957465386, "grad_norm": 0.0, "learning_rate": 1.1994471139168166e-05, "loss": 0.9453, "step": 3632 }, { "epoch": 0.4531620306847948, "grad_norm": 0.0, "learning_rate": 1.1990512034071407e-05, "loss": 0.9519, "step": 3633 }, { "epoch": 0.45328676562305104, "grad_norm": 0.0, "learning_rate": 1.19865526040729e-05, "loss": 0.9518, "step": 3634 }, { "epoch": 0.45341150056130725, "grad_norm": 0.0, "learning_rate": 1.1982592849818933e-05, "loss": 1.004, "step": 3635 }, { "epoch": 0.4535362354995634, "grad_norm": 0.0, "learning_rate": 1.197863277195583e-05, "loss": 0.9694, "step": 3636 }, { "epoch": 0.4536609704378196, "grad_norm": 0.0, "learning_rate": 1.1974672371129977e-05, "loss": 0.9898, "step": 3637 }, { "epoch": 0.4537857053760758, "grad_norm": 0.0, "learning_rate": 1.1970711647987812e-05, "loss": 0.9935, "step": 3638 }, { "epoch": 0.45391044031433203, "grad_norm": 0.0, "learning_rate": 1.1966750603175825e-05, "loss": 0.9232, "step": 3639 }, { "epoch": 0.45403517525258824, "grad_norm": 0.0, "learning_rate": 1.1962789237340555e-05, "loss": 0.9883, "step": 3640 }, { "epoch": 0.45415991019084445, "grad_norm": 0.0, "learning_rate": 1.1958827551128604e-05, "loss": 0.9457, "step": 3641 }, { "epoch": 0.45428464512910066, "grad_norm": 0.0, "learning_rate": 1.1954865545186616e-05, "loss": 0.9598, "step": 3642 }, { "epoch": 0.45440938006735687, "grad_norm": 0.0, "learning_rate": 1.1950903220161286e-05, "loss": 1.0123, "step": 3643 }, { "epoch": 0.4545341150056131, "grad_norm": 0.0, "learning_rate": 1.1946940576699367e-05, "loss": 0.9619, "step": 3644 }, { "epoch": 0.4546588499438693, "grad_norm": 0.0, "learning_rate": 1.1942977615447666e-05, "loss": 0.9281, "step": 3645 }, { "epoch": 0.4547835848821255, "grad_norm": 0.0, "learning_rate": 1.1939014337053036e-05, "loss": 1.0266, "step": 3646 }, { "epoch": 0.4549083198203817, "grad_norm": 0.0, "learning_rate": 1.1935050742162384e-05, "loss": 0.9428, "step": 3647 }, { "epoch": 0.45503305475863787, "grad_norm": 0.0, "learning_rate": 1.1931086831422667e-05, "loss": 0.9763, "step": 3648 }, { "epoch": 0.4551577896968941, "grad_norm": 0.0, "learning_rate": 1.1927122605480899e-05, "loss": 0.9202, "step": 3649 }, { "epoch": 0.4552825246351503, "grad_norm": 0.0, "learning_rate": 1.1923158064984137e-05, "loss": 0.9502, "step": 3650 }, { "epoch": 0.4554072595734065, "grad_norm": 0.0, "learning_rate": 1.1919193210579497e-05, "loss": 0.9717, "step": 3651 }, { "epoch": 0.4555319945116627, "grad_norm": 0.0, "learning_rate": 1.1915228042914144e-05, "loss": 0.9259, "step": 3652 }, { "epoch": 0.4556567294499189, "grad_norm": 0.0, "learning_rate": 1.1911262562635291e-05, "loss": 0.9759, "step": 3653 }, { "epoch": 0.45578146438817513, "grad_norm": 0.0, "learning_rate": 1.1907296770390204e-05, "loss": 0.9554, "step": 3654 }, { "epoch": 0.45590619932643134, "grad_norm": 0.0, "learning_rate": 1.1903330666826202e-05, "loss": 0.987, "step": 3655 }, { "epoch": 0.45603093426468755, "grad_norm": 0.0, "learning_rate": 1.1899364252590652e-05, "loss": 0.9398, "step": 3656 }, { "epoch": 0.45615566920294376, "grad_norm": 0.0, "learning_rate": 1.1895397528330975e-05, "loss": 0.9202, "step": 3657 }, { "epoch": 0.45628040414119997, "grad_norm": 0.0, "learning_rate": 1.1891430494694637e-05, "loss": 0.9873, "step": 3658 }, { "epoch": 0.4564051390794562, "grad_norm": 0.0, "learning_rate": 1.1887463152329162e-05, "loss": 0.9719, "step": 3659 }, { "epoch": 0.45652987401771233, "grad_norm": 0.0, "learning_rate": 1.1883495501882114e-05, "loss": 0.9658, "step": 3660 }, { "epoch": 0.45665460895596854, "grad_norm": 0.0, "learning_rate": 1.187952754400112e-05, "loss": 0.9444, "step": 3661 }, { "epoch": 0.45677934389422475, "grad_norm": 0.0, "learning_rate": 1.1875559279333847e-05, "loss": 0.9569, "step": 3662 }, { "epoch": 0.45690407883248096, "grad_norm": 0.0, "learning_rate": 1.187159070852802e-05, "loss": 0.9787, "step": 3663 }, { "epoch": 0.4570288137707372, "grad_norm": 0.0, "learning_rate": 1.1867621832231401e-05, "loss": 0.939, "step": 3664 }, { "epoch": 0.4571535487089934, "grad_norm": 0.0, "learning_rate": 1.1863652651091824e-05, "loss": 0.9601, "step": 3665 }, { "epoch": 0.4572782836472496, "grad_norm": 0.0, "learning_rate": 1.1859683165757146e-05, "loss": 0.9617, "step": 3666 }, { "epoch": 0.4574030185855058, "grad_norm": 0.0, "learning_rate": 1.1855713376875295e-05, "loss": 1.0165, "step": 3667 }, { "epoch": 0.457527753523762, "grad_norm": 0.0, "learning_rate": 1.1851743285094239e-05, "loss": 0.9373, "step": 3668 }, { "epoch": 0.4576524884620182, "grad_norm": 0.0, "learning_rate": 1.1847772891061996e-05, "loss": 0.9735, "step": 3669 }, { "epoch": 0.45777722340027444, "grad_norm": 0.0, "learning_rate": 1.1843802195426634e-05, "loss": 0.9725, "step": 3670 }, { "epoch": 0.45790195833853065, "grad_norm": 0.0, "learning_rate": 1.1839831198836274e-05, "loss": 0.938, "step": 3671 }, { "epoch": 0.4580266932767868, "grad_norm": 0.0, "learning_rate": 1.1835859901939079e-05, "loss": 0.9826, "step": 3672 }, { "epoch": 0.458151428215043, "grad_norm": 0.0, "learning_rate": 1.1831888305383268e-05, "loss": 1.0022, "step": 3673 }, { "epoch": 0.4582761631532992, "grad_norm": 0.0, "learning_rate": 1.18279164098171e-05, "loss": 0.9562, "step": 3674 }, { "epoch": 0.45840089809155543, "grad_norm": 0.0, "learning_rate": 1.1823944215888899e-05, "loss": 0.985, "step": 3675 }, { "epoch": 0.45852563302981164, "grad_norm": 0.0, "learning_rate": 1.1819971724247017e-05, "loss": 0.9706, "step": 3676 }, { "epoch": 0.45865036796806785, "grad_norm": 0.0, "learning_rate": 1.1815998935539865e-05, "loss": 0.9604, "step": 3677 }, { "epoch": 0.45877510290632406, "grad_norm": 0.0, "learning_rate": 1.1812025850415912e-05, "loss": 0.8965, "step": 3678 }, { "epoch": 0.45889983784458027, "grad_norm": 0.0, "learning_rate": 1.1808052469523654e-05, "loss": 1.0053, "step": 3679 }, { "epoch": 0.4590245727828365, "grad_norm": 0.0, "learning_rate": 1.1804078793511655e-05, "loss": 0.9182, "step": 3680 }, { "epoch": 0.4591493077210927, "grad_norm": 0.0, "learning_rate": 1.1800104823028515e-05, "loss": 0.9636, "step": 3681 }, { "epoch": 0.4592740426593489, "grad_norm": 0.0, "learning_rate": 1.1796130558722889e-05, "loss": 0.9524, "step": 3682 }, { "epoch": 0.4593987775976051, "grad_norm": 0.0, "learning_rate": 1.1792156001243473e-05, "loss": 0.9885, "step": 3683 }, { "epoch": 0.45952351253586127, "grad_norm": 0.0, "learning_rate": 1.1788181151239018e-05, "loss": 0.992, "step": 3684 }, { "epoch": 0.4596482474741175, "grad_norm": 0.0, "learning_rate": 1.1784206009358323e-05, "loss": 0.999, "step": 3685 }, { "epoch": 0.4597729824123737, "grad_norm": 0.0, "learning_rate": 1.1780230576250227e-05, "loss": 0.9668, "step": 3686 }, { "epoch": 0.4598977173506299, "grad_norm": 0.0, "learning_rate": 1.1776254852563619e-05, "loss": 0.9723, "step": 3687 }, { "epoch": 0.4600224522888861, "grad_norm": 0.0, "learning_rate": 1.1772278838947442e-05, "loss": 0.9348, "step": 3688 }, { "epoch": 0.4601471872271423, "grad_norm": 0.0, "learning_rate": 1.1768302536050675e-05, "loss": 0.9465, "step": 3689 }, { "epoch": 0.46027192216539853, "grad_norm": 0.0, "learning_rate": 1.1764325944522359e-05, "loss": 1.0134, "step": 3690 }, { "epoch": 0.46039665710365474, "grad_norm": 0.0, "learning_rate": 1.1760349065011572e-05, "loss": 0.9797, "step": 3691 }, { "epoch": 0.46052139204191095, "grad_norm": 0.0, "learning_rate": 1.1756371898167437e-05, "loss": 0.9678, "step": 3692 }, { "epoch": 0.46064612698016716, "grad_norm": 0.0, "learning_rate": 1.1752394444639132e-05, "loss": 1.049, "step": 3693 }, { "epoch": 0.46077086191842337, "grad_norm": 0.0, "learning_rate": 1.1748416705075873e-05, "loss": 0.9399, "step": 3694 }, { "epoch": 0.4608955968566796, "grad_norm": 0.0, "learning_rate": 1.1744438680126936e-05, "loss": 0.9358, "step": 3695 }, { "epoch": 0.46102033179493573, "grad_norm": 0.0, "learning_rate": 1.1740460370441624e-05, "loss": 0.9816, "step": 3696 }, { "epoch": 0.46114506673319194, "grad_norm": 0.0, "learning_rate": 1.1736481776669307e-05, "loss": 0.9783, "step": 3697 }, { "epoch": 0.46126980167144815, "grad_norm": 0.0, "learning_rate": 1.1732502899459384e-05, "loss": 0.9702, "step": 3698 }, { "epoch": 0.46139453660970436, "grad_norm": 0.0, "learning_rate": 1.1728523739461312e-05, "loss": 0.9767, "step": 3699 }, { "epoch": 0.4615192715479606, "grad_norm": 0.0, "learning_rate": 1.1724544297324592e-05, "loss": 0.9547, "step": 3700 }, { "epoch": 0.4616440064862168, "grad_norm": 0.0, "learning_rate": 1.1720564573698765e-05, "loss": 0.9849, "step": 3701 }, { "epoch": 0.461768741424473, "grad_norm": 0.0, "learning_rate": 1.1716584569233425e-05, "loss": 0.9256, "step": 3702 }, { "epoch": 0.4618934763627292, "grad_norm": 0.0, "learning_rate": 1.1712604284578211e-05, "loss": 0.9105, "step": 3703 }, { "epoch": 0.4620182113009854, "grad_norm": 0.0, "learning_rate": 1.1708623720382798e-05, "loss": 0.9767, "step": 3704 }, { "epoch": 0.4621429462392416, "grad_norm": 0.0, "learning_rate": 1.1704642877296926e-05, "loss": 0.992, "step": 3705 }, { "epoch": 0.46226768117749784, "grad_norm": 0.0, "learning_rate": 1.1700661755970357e-05, "loss": 0.9844, "step": 3706 }, { "epoch": 0.46239241611575405, "grad_norm": 0.0, "learning_rate": 1.1696680357052918e-05, "loss": 0.9491, "step": 3707 }, { "epoch": 0.46251715105401026, "grad_norm": 0.0, "learning_rate": 1.1692698681194474e-05, "loss": 0.985, "step": 3708 }, { "epoch": 0.4626418859922664, "grad_norm": 0.0, "learning_rate": 1.1688716729044931e-05, "loss": 0.9671, "step": 3709 }, { "epoch": 0.4627666209305226, "grad_norm": 0.0, "learning_rate": 1.1684734501254242e-05, "loss": 0.9645, "step": 3710 }, { "epoch": 0.46289135586877883, "grad_norm": 0.0, "learning_rate": 1.1680751998472415e-05, "loss": 0.9833, "step": 3711 }, { "epoch": 0.46301609080703504, "grad_norm": 0.0, "learning_rate": 1.1676769221349489e-05, "loss": 0.9284, "step": 3712 }, { "epoch": 0.46314082574529125, "grad_norm": 0.0, "learning_rate": 1.1672786170535553e-05, "loss": 0.9516, "step": 3713 }, { "epoch": 0.46326556068354746, "grad_norm": 0.0, "learning_rate": 1.1668802846680747e-05, "loss": 0.971, "step": 3714 }, { "epoch": 0.46339029562180367, "grad_norm": 0.0, "learning_rate": 1.1664819250435246e-05, "loss": 0.9167, "step": 3715 }, { "epoch": 0.4635150305600599, "grad_norm": 0.0, "learning_rate": 1.166083538244927e-05, "loss": 0.9503, "step": 3716 }, { "epoch": 0.4636397654983161, "grad_norm": 0.0, "learning_rate": 1.165685124337309e-05, "loss": 0.9572, "step": 3717 }, { "epoch": 0.4637645004365723, "grad_norm": 0.0, "learning_rate": 1.1652866833857021e-05, "loss": 0.9256, "step": 3718 }, { "epoch": 0.4638892353748285, "grad_norm": 0.0, "learning_rate": 1.1648882154551416e-05, "loss": 0.9467, "step": 3719 }, { "epoch": 0.4640139703130847, "grad_norm": 0.0, "learning_rate": 1.1644897206106673e-05, "loss": 0.94, "step": 3720 }, { "epoch": 0.4641387052513409, "grad_norm": 0.0, "learning_rate": 1.1640911989173242e-05, "loss": 0.9616, "step": 3721 }, { "epoch": 0.4642634401895971, "grad_norm": 0.0, "learning_rate": 1.1636926504401605e-05, "loss": 0.9638, "step": 3722 }, { "epoch": 0.4643881751278533, "grad_norm": 0.0, "learning_rate": 1.1632940752442294e-05, "loss": 1.0578, "step": 3723 }, { "epoch": 0.4645129100661095, "grad_norm": 0.0, "learning_rate": 1.162895473394589e-05, "loss": 0.9832, "step": 3724 }, { "epoch": 0.4646376450043657, "grad_norm": 0.0, "learning_rate": 1.1624968449563005e-05, "loss": 0.9844, "step": 3725 }, { "epoch": 0.46476237994262193, "grad_norm": 0.0, "learning_rate": 1.1620981899944304e-05, "loss": 0.9672, "step": 3726 }, { "epoch": 0.46488711488087814, "grad_norm": 0.0, "learning_rate": 1.1616995085740492e-05, "loss": 0.9397, "step": 3727 }, { "epoch": 0.46501184981913435, "grad_norm": 0.0, "learning_rate": 1.161300800760232e-05, "loss": 0.9585, "step": 3728 }, { "epoch": 0.46513658475739056, "grad_norm": 0.0, "learning_rate": 1.1609020666180574e-05, "loss": 0.9757, "step": 3729 }, { "epoch": 0.46526131969564677, "grad_norm": 0.0, "learning_rate": 1.1605033062126092e-05, "loss": 0.9645, "step": 3730 }, { "epoch": 0.465386054633903, "grad_norm": 0.0, "learning_rate": 1.1601045196089758e-05, "loss": 0.9578, "step": 3731 }, { "epoch": 0.4655107895721592, "grad_norm": 0.0, "learning_rate": 1.1597057068722478e-05, "loss": 0.9343, "step": 3732 }, { "epoch": 0.46563552451041534, "grad_norm": 0.0, "learning_rate": 1.1593068680675227e-05, "loss": 0.9254, "step": 3733 }, { "epoch": 0.46576025944867155, "grad_norm": 0.0, "learning_rate": 1.1589080032599007e-05, "loss": 0.9349, "step": 3734 }, { "epoch": 0.46588499438692776, "grad_norm": 0.0, "learning_rate": 1.1585091125144863e-05, "loss": 1.006, "step": 3735 }, { "epoch": 0.466009729325184, "grad_norm": 0.0, "learning_rate": 1.1581101958963887e-05, "loss": 0.9077, "step": 3736 }, { "epoch": 0.4661344642634402, "grad_norm": 0.0, "learning_rate": 1.1577112534707213e-05, "loss": 0.9565, "step": 3737 }, { "epoch": 0.4662591992016964, "grad_norm": 0.0, "learning_rate": 1.1573122853026015e-05, "loss": 0.95, "step": 3738 }, { "epoch": 0.4663839341399526, "grad_norm": 0.0, "learning_rate": 1.1569132914571509e-05, "loss": 0.9482, "step": 3739 }, { "epoch": 0.4665086690782088, "grad_norm": 0.0, "learning_rate": 1.1565142719994953e-05, "loss": 0.9021, "step": 3740 }, { "epoch": 0.466633404016465, "grad_norm": 0.0, "learning_rate": 1.156115226994765e-05, "loss": 0.9783, "step": 3741 }, { "epoch": 0.46675813895472124, "grad_norm": 0.0, "learning_rate": 1.155716156508094e-05, "loss": 0.9683, "step": 3742 }, { "epoch": 0.46688287389297745, "grad_norm": 0.0, "learning_rate": 1.1553170606046201e-05, "loss": 0.949, "step": 3743 }, { "epoch": 0.46700760883123366, "grad_norm": 0.0, "learning_rate": 1.1549179393494872e-05, "loss": 0.9667, "step": 3744 }, { "epoch": 0.4671323437694898, "grad_norm": 0.0, "learning_rate": 1.1545187928078407e-05, "loss": 0.9398, "step": 3745 }, { "epoch": 0.467257078707746, "grad_norm": 0.0, "learning_rate": 1.1541196210448317e-05, "loss": 0.9789, "step": 3746 }, { "epoch": 0.46738181364600223, "grad_norm": 0.0, "learning_rate": 1.1537204241256155e-05, "loss": 0.9683, "step": 3747 }, { "epoch": 0.46750654858425844, "grad_norm": 0.0, "learning_rate": 1.1533212021153509e-05, "loss": 0.9174, "step": 3748 }, { "epoch": 0.46763128352251465, "grad_norm": 0.0, "learning_rate": 1.1529219550792005e-05, "loss": 1.0049, "step": 3749 }, { "epoch": 0.46775601846077086, "grad_norm": 0.0, "learning_rate": 1.1525226830823322e-05, "loss": 0.954, "step": 3750 }, { "epoch": 0.46788075339902707, "grad_norm": 0.0, "learning_rate": 1.1521233861899168e-05, "loss": 0.931, "step": 3751 }, { "epoch": 0.4680054883372833, "grad_norm": 0.0, "learning_rate": 1.1517240644671297e-05, "loss": 0.9671, "step": 3752 }, { "epoch": 0.4681302232755395, "grad_norm": 0.0, "learning_rate": 1.1513247179791505e-05, "loss": 0.9868, "step": 3753 }, { "epoch": 0.4682549582137957, "grad_norm": 0.0, "learning_rate": 1.1509253467911625e-05, "loss": 0.9877, "step": 3754 }, { "epoch": 0.4683796931520519, "grad_norm": 0.0, "learning_rate": 1.150525950968353e-05, "loss": 0.9152, "step": 3755 }, { "epoch": 0.4685044280903081, "grad_norm": 0.0, "learning_rate": 1.1501265305759136e-05, "loss": 0.9465, "step": 3756 }, { "epoch": 0.4686291630285643, "grad_norm": 0.0, "learning_rate": 1.14972708567904e-05, "loss": 0.955, "step": 3757 }, { "epoch": 0.4687538979668205, "grad_norm": 0.0, "learning_rate": 1.149327616342931e-05, "loss": 1.0019, "step": 3758 }, { "epoch": 0.4688786329050767, "grad_norm": 0.0, "learning_rate": 1.1489281226327906e-05, "loss": 1.0068, "step": 3759 }, { "epoch": 0.4690033678433329, "grad_norm": 0.0, "learning_rate": 1.1485286046138259e-05, "loss": 0.932, "step": 3760 }, { "epoch": 0.4691281027815891, "grad_norm": 0.0, "learning_rate": 1.1481290623512491e-05, "loss": 0.9817, "step": 3761 }, { "epoch": 0.46925283771984533, "grad_norm": 0.0, "learning_rate": 1.1477294959102744e-05, "loss": 0.9425, "step": 3762 }, { "epoch": 0.46937757265810154, "grad_norm": 0.0, "learning_rate": 1.1473299053561222e-05, "loss": 0.9206, "step": 3763 }, { "epoch": 0.46950230759635775, "grad_norm": 0.0, "learning_rate": 1.1469302907540152e-05, "loss": 0.9481, "step": 3764 }, { "epoch": 0.46962704253461396, "grad_norm": 0.0, "learning_rate": 1.1465306521691805e-05, "loss": 0.9327, "step": 3765 }, { "epoch": 0.46975177747287017, "grad_norm": 0.0, "learning_rate": 1.1461309896668493e-05, "loss": 0.9412, "step": 3766 }, { "epoch": 0.4698765124111264, "grad_norm": 0.0, "learning_rate": 1.1457313033122568e-05, "loss": 0.9642, "step": 3767 }, { "epoch": 0.4700012473493826, "grad_norm": 0.0, "learning_rate": 1.1453315931706417e-05, "loss": 0.9847, "step": 3768 }, { "epoch": 0.47012598228763874, "grad_norm": 0.0, "learning_rate": 1.1449318593072468e-05, "loss": 0.9211, "step": 3769 }, { "epoch": 0.47025071722589495, "grad_norm": 0.0, "learning_rate": 1.1445321017873187e-05, "loss": 0.9341, "step": 3770 }, { "epoch": 0.47037545216415116, "grad_norm": 0.0, "learning_rate": 1.1441323206761084e-05, "loss": 0.9648, "step": 3771 }, { "epoch": 0.4705001871024074, "grad_norm": 0.0, "learning_rate": 1.1437325160388692e-05, "loss": 0.9826, "step": 3772 }, { "epoch": 0.4706249220406636, "grad_norm": 0.0, "learning_rate": 1.1433326879408599e-05, "loss": 0.979, "step": 3773 }, { "epoch": 0.4707496569789198, "grad_norm": 0.0, "learning_rate": 1.1429328364473431e-05, "loss": 1.0074, "step": 3774 }, { "epoch": 0.470874391917176, "grad_norm": 0.0, "learning_rate": 1.1425329616235836e-05, "loss": 0.9325, "step": 3775 }, { "epoch": 0.4709991268554322, "grad_norm": 0.0, "learning_rate": 1.1421330635348518e-05, "loss": 0.9631, "step": 3776 }, { "epoch": 0.4711238617936884, "grad_norm": 0.0, "learning_rate": 1.1417331422464206e-05, "loss": 0.9208, "step": 3777 }, { "epoch": 0.47124859673194464, "grad_norm": 0.0, "learning_rate": 1.1413331978235677e-05, "loss": 0.9405, "step": 3778 }, { "epoch": 0.47137333167020085, "grad_norm": 0.0, "learning_rate": 1.140933230331574e-05, "loss": 0.9559, "step": 3779 }, { "epoch": 0.47149806660845706, "grad_norm": 0.0, "learning_rate": 1.1405332398357239e-05, "loss": 0.9563, "step": 3780 }, { "epoch": 0.4716228015467132, "grad_norm": 0.0, "learning_rate": 1.1401332264013063e-05, "loss": 0.9993, "step": 3781 }, { "epoch": 0.4717475364849694, "grad_norm": 0.0, "learning_rate": 1.1397331900936137e-05, "loss": 0.9478, "step": 3782 }, { "epoch": 0.47187227142322563, "grad_norm": 0.0, "learning_rate": 1.1393331309779417e-05, "loss": 0.9361, "step": 3783 }, { "epoch": 0.47199700636148184, "grad_norm": 0.0, "learning_rate": 1.13893304911959e-05, "loss": 0.927, "step": 3784 }, { "epoch": 0.47212174129973805, "grad_norm": 0.0, "learning_rate": 1.1385329445838623e-05, "loss": 0.97, "step": 3785 }, { "epoch": 0.47224647623799426, "grad_norm": 0.0, "learning_rate": 1.1381328174360655e-05, "loss": 1.0228, "step": 3786 }, { "epoch": 0.47237121117625047, "grad_norm": 0.0, "learning_rate": 1.1377326677415108e-05, "loss": 0.9361, "step": 3787 }, { "epoch": 0.4724959461145067, "grad_norm": 0.0, "learning_rate": 1.1373324955655127e-05, "loss": 0.9198, "step": 3788 }, { "epoch": 0.4726206810527629, "grad_norm": 0.0, "learning_rate": 1.1369323009733888e-05, "loss": 0.9594, "step": 3789 }, { "epoch": 0.4727454159910191, "grad_norm": 0.0, "learning_rate": 1.1365320840304615e-05, "loss": 0.951, "step": 3790 }, { "epoch": 0.4728701509292753, "grad_norm": 0.0, "learning_rate": 1.1361318448020563e-05, "loss": 1.0014, "step": 3791 }, { "epoch": 0.4729948858675315, "grad_norm": 0.0, "learning_rate": 1.1357315833535016e-05, "loss": 0.9829, "step": 3792 }, { "epoch": 0.4731196208057877, "grad_norm": 0.0, "learning_rate": 1.1353312997501313e-05, "loss": 0.9905, "step": 3793 }, { "epoch": 0.4732443557440439, "grad_norm": 0.0, "learning_rate": 1.134930994057281e-05, "loss": 0.9971, "step": 3794 }, { "epoch": 0.4733690906823001, "grad_norm": 0.0, "learning_rate": 1.1345306663402909e-05, "loss": 0.9543, "step": 3795 }, { "epoch": 0.4734938256205563, "grad_norm": 0.0, "learning_rate": 1.1341303166645043e-05, "loss": 0.9691, "step": 3796 }, { "epoch": 0.4736185605588125, "grad_norm": 0.0, "learning_rate": 1.133729945095269e-05, "loss": 0.9722, "step": 3797 }, { "epoch": 0.47374329549706873, "grad_norm": 0.0, "learning_rate": 1.133329551697935e-05, "loss": 0.9076, "step": 3798 }, { "epoch": 0.47386803043532494, "grad_norm": 0.0, "learning_rate": 1.1329291365378574e-05, "loss": 0.9719, "step": 3799 }, { "epoch": 0.47399276537358115, "grad_norm": 0.0, "learning_rate": 1.1325286996803932e-05, "loss": 0.9598, "step": 3800 }, { "epoch": 0.47411750031183736, "grad_norm": 0.0, "learning_rate": 1.1321282411909044e-05, "loss": 0.9925, "step": 3801 }, { "epoch": 0.47424223525009357, "grad_norm": 0.0, "learning_rate": 1.1317277611347554e-05, "loss": 0.9266, "step": 3802 }, { "epoch": 0.4743669701883498, "grad_norm": 0.0, "learning_rate": 1.131327259577315e-05, "loss": 0.9891, "step": 3803 }, { "epoch": 0.474491705126606, "grad_norm": 0.0, "learning_rate": 1.1309267365839556e-05, "loss": 0.9592, "step": 3804 }, { "epoch": 0.47461644006486214, "grad_norm": 0.0, "learning_rate": 1.130526192220052e-05, "loss": 0.9899, "step": 3805 }, { "epoch": 0.47474117500311835, "grad_norm": 0.0, "learning_rate": 1.1301256265509827e-05, "loss": 0.9372, "step": 3806 }, { "epoch": 0.47486590994137456, "grad_norm": 0.0, "learning_rate": 1.1297250396421314e-05, "loss": 0.9671, "step": 3807 }, { "epoch": 0.4749906448796308, "grad_norm": 0.0, "learning_rate": 1.1293244315588827e-05, "loss": 0.941, "step": 3808 }, { "epoch": 0.475115379817887, "grad_norm": 0.0, "learning_rate": 1.1289238023666267e-05, "loss": 0.9071, "step": 3809 }, { "epoch": 0.4752401147561432, "grad_norm": 0.0, "learning_rate": 1.1285231521307562e-05, "loss": 0.9689, "step": 3810 }, { "epoch": 0.4753648496943994, "grad_norm": 0.0, "learning_rate": 1.1281224809166673e-05, "loss": 1.0019, "step": 3811 }, { "epoch": 0.4754895846326556, "grad_norm": 0.0, "learning_rate": 1.1277217887897592e-05, "loss": 1.0149, "step": 3812 }, { "epoch": 0.4756143195709118, "grad_norm": 0.0, "learning_rate": 1.1273210758154354e-05, "loss": 0.9078, "step": 3813 }, { "epoch": 0.47573905450916804, "grad_norm": 0.0, "learning_rate": 1.1269203420591024e-05, "loss": 0.9556, "step": 3814 }, { "epoch": 0.47586378944742425, "grad_norm": 0.0, "learning_rate": 1.1265195875861698e-05, "loss": 0.9222, "step": 3815 }, { "epoch": 0.47598852438568046, "grad_norm": 0.0, "learning_rate": 1.1261188124620512e-05, "loss": 0.9685, "step": 3816 }, { "epoch": 0.4761132593239366, "grad_norm": 0.0, "learning_rate": 1.125718016752163e-05, "loss": 0.9776, "step": 3817 }, { "epoch": 0.4762379942621928, "grad_norm": 0.0, "learning_rate": 1.1253172005219249e-05, "loss": 0.931, "step": 3818 }, { "epoch": 0.47636272920044903, "grad_norm": 0.0, "learning_rate": 1.1249163638367603e-05, "loss": 0.8946, "step": 3819 }, { "epoch": 0.47648746413870524, "grad_norm": 0.0, "learning_rate": 1.1245155067620963e-05, "loss": 0.9534, "step": 3820 }, { "epoch": 0.47661219907696145, "grad_norm": 0.0, "learning_rate": 1.1241146293633625e-05, "loss": 0.923, "step": 3821 }, { "epoch": 0.47673693401521766, "grad_norm": 0.0, "learning_rate": 1.1237137317059923e-05, "loss": 0.9775, "step": 3822 }, { "epoch": 0.47686166895347387, "grad_norm": 0.0, "learning_rate": 1.1233128138554222e-05, "loss": 0.9928, "step": 3823 }, { "epoch": 0.4769864038917301, "grad_norm": 0.0, "learning_rate": 1.1229118758770923e-05, "loss": 0.8817, "step": 3824 }, { "epoch": 0.4771111388299863, "grad_norm": 0.0, "learning_rate": 1.1225109178364456e-05, "loss": 0.947, "step": 3825 }, { "epoch": 0.4772358737682425, "grad_norm": 0.0, "learning_rate": 1.1221099397989283e-05, "loss": 0.9872, "step": 3826 }, { "epoch": 0.4773606087064987, "grad_norm": 0.0, "learning_rate": 1.1217089418299914e-05, "loss": 0.9364, "step": 3827 }, { "epoch": 0.4774853436447549, "grad_norm": 0.0, "learning_rate": 1.1213079239950861e-05, "loss": 0.954, "step": 3828 }, { "epoch": 0.4776100785830111, "grad_norm": 0.0, "learning_rate": 1.1209068863596695e-05, "loss": 0.9241, "step": 3829 }, { "epoch": 0.4777348135212673, "grad_norm": 0.0, "learning_rate": 1.1205058289892012e-05, "loss": 0.969, "step": 3830 }, { "epoch": 0.4778595484595235, "grad_norm": 0.0, "learning_rate": 1.1201047519491434e-05, "loss": 0.9923, "step": 3831 }, { "epoch": 0.4779842833977797, "grad_norm": 0.0, "learning_rate": 1.1197036553049626e-05, "loss": 0.9319, "step": 3832 }, { "epoch": 0.4781090183360359, "grad_norm": 0.0, "learning_rate": 1.1193025391221275e-05, "loss": 0.907, "step": 3833 }, { "epoch": 0.47823375327429213, "grad_norm": 0.0, "learning_rate": 1.1189014034661106e-05, "loss": 0.9779, "step": 3834 }, { "epoch": 0.47835848821254834, "grad_norm": 0.0, "learning_rate": 1.1185002484023867e-05, "loss": 0.9296, "step": 3835 }, { "epoch": 0.47848322315080455, "grad_norm": 0.0, "learning_rate": 1.1180990739964348e-05, "loss": 0.9629, "step": 3836 }, { "epoch": 0.47860795808906076, "grad_norm": 0.0, "learning_rate": 1.1176978803137372e-05, "loss": 0.9963, "step": 3837 }, { "epoch": 0.47873269302731697, "grad_norm": 0.0, "learning_rate": 1.1172966674197785e-05, "loss": 0.9625, "step": 3838 }, { "epoch": 0.4788574279655732, "grad_norm": 0.0, "learning_rate": 1.1168954353800463e-05, "loss": 0.9598, "step": 3839 }, { "epoch": 0.4789821629038294, "grad_norm": 0.0, "learning_rate": 1.1164941842600323e-05, "loss": 0.9092, "step": 3840 }, { "epoch": 0.47910689784208554, "grad_norm": 0.0, "learning_rate": 1.1160929141252303e-05, "loss": 0.9655, "step": 3841 }, { "epoch": 0.47923163278034175, "grad_norm": 0.0, "learning_rate": 1.1156916250411383e-05, "loss": 0.9672, "step": 3842 }, { "epoch": 0.47935636771859796, "grad_norm": 0.0, "learning_rate": 1.1152903170732565e-05, "loss": 0.9531, "step": 3843 }, { "epoch": 0.4794811026568542, "grad_norm": 0.0, "learning_rate": 1.1148889902870888e-05, "loss": 0.9435, "step": 3844 }, { "epoch": 0.4796058375951104, "grad_norm": 0.0, "learning_rate": 1.1144876447481409e-05, "loss": 0.9408, "step": 3845 }, { "epoch": 0.4797305725333666, "grad_norm": 0.0, "learning_rate": 1.1140862805219235e-05, "loss": 0.9843, "step": 3846 }, { "epoch": 0.4798553074716228, "grad_norm": 0.0, "learning_rate": 1.1136848976739492e-05, "loss": 0.9744, "step": 3847 }, { "epoch": 0.479980042409879, "grad_norm": 0.0, "learning_rate": 1.1132834962697335e-05, "loss": 0.9308, "step": 3848 }, { "epoch": 0.4801047773481352, "grad_norm": 0.0, "learning_rate": 1.1128820763747949e-05, "loss": 0.9222, "step": 3849 }, { "epoch": 0.48022951228639144, "grad_norm": 0.0, "learning_rate": 1.1124806380546564e-05, "loss": 0.9672, "step": 3850 }, { "epoch": 0.48035424722464765, "grad_norm": 0.0, "learning_rate": 1.1120791813748419e-05, "loss": 0.9977, "step": 3851 }, { "epoch": 0.48047898216290386, "grad_norm": 0.0, "learning_rate": 1.1116777064008793e-05, "loss": 0.991, "step": 3852 }, { "epoch": 0.48060371710116, "grad_norm": 0.0, "learning_rate": 1.1112762131983e-05, "loss": 0.921, "step": 3853 }, { "epoch": 0.4807284520394162, "grad_norm": 0.0, "learning_rate": 1.1108747018326369e-05, "loss": 0.9436, "step": 3854 }, { "epoch": 0.48085318697767243, "grad_norm": 0.0, "learning_rate": 1.1104731723694278e-05, "loss": 0.9733, "step": 3855 }, { "epoch": 0.48097792191592864, "grad_norm": 0.0, "learning_rate": 1.1100716248742119e-05, "loss": 0.9286, "step": 3856 }, { "epoch": 0.48110265685418485, "grad_norm": 0.0, "learning_rate": 1.1096700594125318e-05, "loss": 0.9382, "step": 3857 }, { "epoch": 0.48122739179244106, "grad_norm": 0.0, "learning_rate": 1.1092684760499332e-05, "loss": 1.0201, "step": 3858 }, { "epoch": 0.48135212673069727, "grad_norm": 0.0, "learning_rate": 1.1088668748519646e-05, "loss": 0.9911, "step": 3859 }, { "epoch": 0.4814768616689535, "grad_norm": 0.0, "learning_rate": 1.1084652558841777e-05, "loss": 0.9733, "step": 3860 }, { "epoch": 0.4816015966072097, "grad_norm": 0.0, "learning_rate": 1.1080636192121267e-05, "loss": 0.9855, "step": 3861 }, { "epoch": 0.4817263315454659, "grad_norm": 0.0, "learning_rate": 1.1076619649013687e-05, "loss": 0.9667, "step": 3862 }, { "epoch": 0.4818510664837221, "grad_norm": 0.0, "learning_rate": 1.1072602930174639e-05, "loss": 0.9884, "step": 3863 }, { "epoch": 0.4819758014219783, "grad_norm": 0.0, "learning_rate": 1.1068586036259752e-05, "loss": 0.9103, "step": 3864 }, { "epoch": 0.4821005363602345, "grad_norm": 0.0, "learning_rate": 1.1064568967924683e-05, "loss": 0.9322, "step": 3865 }, { "epoch": 0.4822252712984907, "grad_norm": 0.0, "learning_rate": 1.1060551725825124e-05, "loss": 0.9297, "step": 3866 }, { "epoch": 0.4823500062367469, "grad_norm": 0.0, "learning_rate": 1.1056534310616787e-05, "loss": 0.9706, "step": 3867 }, { "epoch": 0.4824747411750031, "grad_norm": 0.0, "learning_rate": 1.1052516722955412e-05, "loss": 1.004, "step": 3868 }, { "epoch": 0.4825994761132593, "grad_norm": 0.0, "learning_rate": 1.1048498963496776e-05, "loss": 0.9647, "step": 3869 }, { "epoch": 0.48272421105151553, "grad_norm": 0.0, "learning_rate": 1.104448103289668e-05, "loss": 0.9769, "step": 3870 }, { "epoch": 0.48284894598977174, "grad_norm": 0.0, "learning_rate": 1.1040462931810942e-05, "loss": 0.9903, "step": 3871 }, { "epoch": 0.48297368092802795, "grad_norm": 0.0, "learning_rate": 1.103644466089543e-05, "loss": 0.945, "step": 3872 }, { "epoch": 0.48309841586628416, "grad_norm": 0.0, "learning_rate": 1.1032426220806018e-05, "loss": 0.9402, "step": 3873 }, { "epoch": 0.48322315080454037, "grad_norm": 0.0, "learning_rate": 1.1028407612198618e-05, "loss": 0.9069, "step": 3874 }, { "epoch": 0.4833478857427966, "grad_norm": 0.0, "learning_rate": 1.102438883572917e-05, "loss": 0.9577, "step": 3875 }, { "epoch": 0.4834726206810528, "grad_norm": 0.0, "learning_rate": 1.102036989205364e-05, "loss": 0.8837, "step": 3876 }, { "epoch": 0.48359735561930894, "grad_norm": 0.0, "learning_rate": 1.101635078182802e-05, "loss": 0.9674, "step": 3877 }, { "epoch": 0.48372209055756515, "grad_norm": 0.0, "learning_rate": 1.1012331505708332e-05, "loss": 0.9578, "step": 3878 }, { "epoch": 0.48384682549582136, "grad_norm": 0.0, "learning_rate": 1.100831206435062e-05, "loss": 1.0, "step": 3879 }, { "epoch": 0.4839715604340776, "grad_norm": 0.0, "learning_rate": 1.1004292458410959e-05, "loss": 0.9657, "step": 3880 }, { "epoch": 0.4840962953723338, "grad_norm": 0.0, "learning_rate": 1.100027268854545e-05, "loss": 0.9706, "step": 3881 }, { "epoch": 0.48422103031059, "grad_norm": 0.0, "learning_rate": 1.0996252755410224e-05, "loss": 1.008, "step": 3882 }, { "epoch": 0.4843457652488462, "grad_norm": 0.0, "learning_rate": 1.0992232659661433e-05, "loss": 0.9431, "step": 3883 }, { "epoch": 0.4844705001871024, "grad_norm": 0.0, "learning_rate": 1.0988212401955259e-05, "loss": 0.9376, "step": 3884 }, { "epoch": 0.4845952351253586, "grad_norm": 0.0, "learning_rate": 1.0984191982947902e-05, "loss": 0.9706, "step": 3885 }, { "epoch": 0.48471997006361484, "grad_norm": 0.0, "learning_rate": 1.098017140329561e-05, "loss": 0.9596, "step": 3886 }, { "epoch": 0.48484470500187105, "grad_norm": 0.0, "learning_rate": 1.0976150663654627e-05, "loss": 0.913, "step": 3887 }, { "epoch": 0.48496943994012726, "grad_norm": 0.0, "learning_rate": 1.0972129764681252e-05, "loss": 1.0045, "step": 3888 }, { "epoch": 0.4850941748783834, "grad_norm": 0.0, "learning_rate": 1.0968108707031792e-05, "loss": 0.9479, "step": 3889 }, { "epoch": 0.4852189098166396, "grad_norm": 0.0, "learning_rate": 1.0964087491362588e-05, "loss": 0.9772, "step": 3890 }, { "epoch": 0.48534364475489583, "grad_norm": 0.0, "learning_rate": 1.0960066118329995e-05, "loss": 0.9601, "step": 3891 }, { "epoch": 0.48546837969315204, "grad_norm": 0.0, "learning_rate": 1.0956044588590408e-05, "loss": 0.945, "step": 3892 }, { "epoch": 0.48559311463140825, "grad_norm": 0.0, "learning_rate": 1.0952022902800248e-05, "loss": 0.9052, "step": 3893 }, { "epoch": 0.48571784956966446, "grad_norm": 0.0, "learning_rate": 1.0948001061615945e-05, "loss": 0.96, "step": 3894 }, { "epoch": 0.48584258450792067, "grad_norm": 0.0, "learning_rate": 1.0943979065693974e-05, "loss": 0.9605, "step": 3895 }, { "epoch": 0.4859673194461769, "grad_norm": 0.0, "learning_rate": 1.0939956915690819e-05, "loss": 0.9764, "step": 3896 }, { "epoch": 0.4860920543844331, "grad_norm": 0.0, "learning_rate": 1.0935934612263002e-05, "loss": 0.9297, "step": 3897 }, { "epoch": 0.4862167893226893, "grad_norm": 0.0, "learning_rate": 1.0931912156067058e-05, "loss": 0.9226, "step": 3898 }, { "epoch": 0.4863415242609455, "grad_norm": 0.0, "learning_rate": 1.0927889547759555e-05, "loss": 0.9625, "step": 3899 }, { "epoch": 0.4864662591992017, "grad_norm": 0.0, "learning_rate": 1.0923866787997093e-05, "loss": 0.968, "step": 3900 }, { "epoch": 0.4865909941374579, "grad_norm": 0.0, "learning_rate": 1.0919843877436276e-05, "loss": 0.9646, "step": 3901 }, { "epoch": 0.4867157290757141, "grad_norm": 0.0, "learning_rate": 1.0915820816733748e-05, "loss": 0.9583, "step": 3902 }, { "epoch": 0.4868404640139703, "grad_norm": 0.0, "learning_rate": 1.091179760654618e-05, "loss": 0.979, "step": 3903 }, { "epoch": 0.4869651989522265, "grad_norm": 0.0, "learning_rate": 1.0907774247530252e-05, "loss": 0.9392, "step": 3904 }, { "epoch": 0.4870899338904827, "grad_norm": 0.0, "learning_rate": 1.0903750740342682e-05, "loss": 0.9846, "step": 3905 }, { "epoch": 0.48721466882873893, "grad_norm": 0.0, "learning_rate": 1.0899727085640209e-05, "loss": 0.9757, "step": 3906 }, { "epoch": 0.48733940376699514, "grad_norm": 0.0, "learning_rate": 1.0895703284079595e-05, "loss": 0.9683, "step": 3907 }, { "epoch": 0.48746413870525135, "grad_norm": 0.0, "learning_rate": 1.0891679336317622e-05, "loss": 0.9979, "step": 3908 }, { "epoch": 0.48758887364350756, "grad_norm": 0.0, "learning_rate": 1.08876552430111e-05, "loss": 0.9209, "step": 3909 }, { "epoch": 0.48771360858176377, "grad_norm": 0.0, "learning_rate": 1.088363100481687e-05, "loss": 0.9516, "step": 3910 }, { "epoch": 0.48783834352002, "grad_norm": 0.0, "learning_rate": 1.0879606622391781e-05, "loss": 0.9694, "step": 3911 }, { "epoch": 0.4879630784582762, "grad_norm": 0.0, "learning_rate": 1.087558209639272e-05, "loss": 0.9694, "step": 3912 }, { "epoch": 0.48808781339653234, "grad_norm": 0.0, "learning_rate": 1.0871557427476585e-05, "loss": 0.9634, "step": 3913 }, { "epoch": 0.48821254833478855, "grad_norm": 0.0, "learning_rate": 1.0867532616300305e-05, "loss": 0.9592, "step": 3914 }, { "epoch": 0.48833728327304476, "grad_norm": 0.0, "learning_rate": 1.0863507663520831e-05, "loss": 0.9771, "step": 3915 }, { "epoch": 0.488462018211301, "grad_norm": 0.0, "learning_rate": 1.085948256979514e-05, "loss": 0.9707, "step": 3916 }, { "epoch": 0.4885867531495572, "grad_norm": 0.0, "learning_rate": 1.0855457335780224e-05, "loss": 0.962, "step": 3917 }, { "epoch": 0.4887114880878134, "grad_norm": 0.0, "learning_rate": 1.0851431962133106e-05, "loss": 0.9139, "step": 3918 }, { "epoch": 0.4888362230260696, "grad_norm": 0.0, "learning_rate": 1.0847406449510827e-05, "loss": 0.9126, "step": 3919 }, { "epoch": 0.4889609579643258, "grad_norm": 0.0, "learning_rate": 1.0843380798570452e-05, "loss": 0.9019, "step": 3920 }, { "epoch": 0.489085692902582, "grad_norm": 0.0, "learning_rate": 1.0839355009969068e-05, "loss": 0.9677, "step": 3921 }, { "epoch": 0.48921042784083824, "grad_norm": 0.0, "learning_rate": 1.0835329084363787e-05, "loss": 0.9314, "step": 3922 }, { "epoch": 0.48933516277909445, "grad_norm": 0.0, "learning_rate": 1.0831303022411745e-05, "loss": 0.9767, "step": 3923 }, { "epoch": 0.48945989771735066, "grad_norm": 0.0, "learning_rate": 1.082727682477009e-05, "loss": 0.947, "step": 3924 }, { "epoch": 0.4895846326556068, "grad_norm": 0.0, "learning_rate": 1.0823250492096e-05, "loss": 0.9161, "step": 3925 }, { "epoch": 0.489709367593863, "grad_norm": 0.0, "learning_rate": 1.0819224025046677e-05, "loss": 0.9562, "step": 3926 }, { "epoch": 0.48983410253211923, "grad_norm": 0.0, "learning_rate": 1.081519742427934e-05, "loss": 0.9412, "step": 3927 }, { "epoch": 0.48995883747037544, "grad_norm": 0.0, "learning_rate": 1.0811170690451232e-05, "loss": 0.9313, "step": 3928 }, { "epoch": 0.49008357240863165, "grad_norm": 0.0, "learning_rate": 1.0807143824219622e-05, "loss": 0.9777, "step": 3929 }, { "epoch": 0.49020830734688786, "grad_norm": 0.0, "learning_rate": 1.0803116826241789e-05, "loss": 0.9613, "step": 3930 }, { "epoch": 0.49033304228514407, "grad_norm": 0.0, "learning_rate": 1.0799089697175041e-05, "loss": 0.9558, "step": 3931 }, { "epoch": 0.4904577772234003, "grad_norm": 0.0, "learning_rate": 1.0795062437676712e-05, "loss": 0.9099, "step": 3932 }, { "epoch": 0.4905825121616565, "grad_norm": 0.0, "learning_rate": 1.0791035048404148e-05, "loss": 0.9569, "step": 3933 }, { "epoch": 0.4907072470999127, "grad_norm": 0.0, "learning_rate": 1.0787007530014723e-05, "loss": 0.9577, "step": 3934 }, { "epoch": 0.4908319820381689, "grad_norm": 0.0, "learning_rate": 1.0782979883165832e-05, "loss": 0.9624, "step": 3935 }, { "epoch": 0.4909567169764251, "grad_norm": 0.0, "learning_rate": 1.0778952108514882e-05, "loss": 0.9895, "step": 3936 }, { "epoch": 0.4910814519146813, "grad_norm": 0.0, "learning_rate": 1.077492420671931e-05, "loss": 0.9889, "step": 3937 }, { "epoch": 0.4912061868529375, "grad_norm": 0.0, "learning_rate": 1.0770896178436572e-05, "loss": 0.9694, "step": 3938 }, { "epoch": 0.4913309217911937, "grad_norm": 0.0, "learning_rate": 1.0766868024324146e-05, "loss": 1.0258, "step": 3939 }, { "epoch": 0.4914556567294499, "grad_norm": 0.0, "learning_rate": 1.0762839745039526e-05, "loss": 0.9964, "step": 3940 }, { "epoch": 0.4915803916677061, "grad_norm": 0.0, "learning_rate": 1.0758811341240226e-05, "loss": 1.0452, "step": 3941 }, { "epoch": 0.49170512660596233, "grad_norm": 0.0, "learning_rate": 1.0754782813583784e-05, "loss": 0.9506, "step": 3942 }, { "epoch": 0.49182986154421854, "grad_norm": 0.0, "learning_rate": 1.0750754162727763e-05, "loss": 0.9921, "step": 3943 }, { "epoch": 0.49195459648247475, "grad_norm": 0.0, "learning_rate": 1.0746725389329733e-05, "loss": 0.951, "step": 3944 }, { "epoch": 0.49207933142073096, "grad_norm": 0.0, "learning_rate": 1.0742696494047295e-05, "loss": 0.9118, "step": 3945 }, { "epoch": 0.49220406635898717, "grad_norm": 0.0, "learning_rate": 1.0738667477538072e-05, "loss": 0.946, "step": 3946 }, { "epoch": 0.4923288012972434, "grad_norm": 0.0, "learning_rate": 1.0734638340459688e-05, "loss": 0.9108, "step": 3947 }, { "epoch": 0.4924535362354996, "grad_norm": 0.0, "learning_rate": 1.0730609083469808e-05, "loss": 0.9887, "step": 3948 }, { "epoch": 0.49257827117375574, "grad_norm": 0.0, "learning_rate": 1.0726579707226108e-05, "loss": 0.9034, "step": 3949 }, { "epoch": 0.49270300611201195, "grad_norm": 0.0, "learning_rate": 1.0722550212386282e-05, "loss": 0.9181, "step": 3950 }, { "epoch": 0.49282774105026816, "grad_norm": 0.0, "learning_rate": 1.0718520599608046e-05, "loss": 0.9475, "step": 3951 }, { "epoch": 0.4929524759885244, "grad_norm": 0.0, "learning_rate": 1.0714490869549134e-05, "loss": 0.9278, "step": 3952 }, { "epoch": 0.4930772109267806, "grad_norm": 0.0, "learning_rate": 1.0710461022867303e-05, "loss": 0.9341, "step": 3953 }, { "epoch": 0.4932019458650368, "grad_norm": 0.0, "learning_rate": 1.0706431060220318e-05, "loss": 0.9401, "step": 3954 }, { "epoch": 0.493326680803293, "grad_norm": 0.0, "learning_rate": 1.0702400982265977e-05, "loss": 0.935, "step": 3955 }, { "epoch": 0.4934514157415492, "grad_norm": 0.0, "learning_rate": 1.069837078966209e-05, "loss": 1.0162, "step": 3956 }, { "epoch": 0.4935761506798054, "grad_norm": 0.0, "learning_rate": 1.0694340483066483e-05, "loss": 0.9545, "step": 3957 }, { "epoch": 0.49370088561806164, "grad_norm": 0.0, "learning_rate": 1.0690310063137003e-05, "loss": 0.9531, "step": 3958 }, { "epoch": 0.49382562055631785, "grad_norm": 0.0, "learning_rate": 1.068627953053152e-05, "loss": 0.9886, "step": 3959 }, { "epoch": 0.49395035549457406, "grad_norm": 0.0, "learning_rate": 1.0682248885907916e-05, "loss": 1.0141, "step": 3960 }, { "epoch": 0.4940750904328302, "grad_norm": 0.0, "learning_rate": 1.0678218129924093e-05, "loss": 0.9634, "step": 3961 }, { "epoch": 0.4941998253710864, "grad_norm": 0.0, "learning_rate": 1.0674187263237977e-05, "loss": 0.9422, "step": 3962 }, { "epoch": 0.49432456030934263, "grad_norm": 0.0, "learning_rate": 1.0670156286507503e-05, "loss": 0.9833, "step": 3963 }, { "epoch": 0.49444929524759884, "grad_norm": 0.0, "learning_rate": 1.0666125200390627e-05, "loss": 0.9321, "step": 3964 }, { "epoch": 0.49457403018585505, "grad_norm": 0.0, "learning_rate": 1.0662094005545322e-05, "loss": 0.9767, "step": 3965 }, { "epoch": 0.49469876512411126, "grad_norm": 0.0, "learning_rate": 1.065806270262959e-05, "loss": 0.998, "step": 3966 }, { "epoch": 0.49482350006236747, "grad_norm": 0.0, "learning_rate": 1.0654031292301432e-05, "loss": 1.0105, "step": 3967 }, { "epoch": 0.4949482350006237, "grad_norm": 0.0, "learning_rate": 1.064999977521888e-05, "loss": 0.9239, "step": 3968 }, { "epoch": 0.4950729699388799, "grad_norm": 0.0, "learning_rate": 1.064596815203998e-05, "loss": 0.9348, "step": 3969 }, { "epoch": 0.4951977048771361, "grad_norm": 0.0, "learning_rate": 1.0641936423422789e-05, "loss": 0.9702, "step": 3970 }, { "epoch": 0.4953224398153923, "grad_norm": 0.0, "learning_rate": 1.0637904590025392e-05, "loss": 0.9978, "step": 3971 }, { "epoch": 0.4954471747536485, "grad_norm": 0.0, "learning_rate": 1.0633872652505885e-05, "loss": 1.0193, "step": 3972 }, { "epoch": 0.4955719096919047, "grad_norm": 0.0, "learning_rate": 1.0629840611522382e-05, "loss": 0.9318, "step": 3973 }, { "epoch": 0.4956966446301609, "grad_norm": 0.0, "learning_rate": 1.0625808467733012e-05, "loss": 0.9812, "step": 3974 }, { "epoch": 0.4958213795684171, "grad_norm": 0.0, "learning_rate": 1.0621776221795923e-05, "loss": 0.956, "step": 3975 }, { "epoch": 0.4959461145066733, "grad_norm": 0.0, "learning_rate": 1.0617743874369282e-05, "loss": 0.9142, "step": 3976 }, { "epoch": 0.4960708494449295, "grad_norm": 0.0, "learning_rate": 1.0613711426111264e-05, "loss": 0.9664, "step": 3977 }, { "epoch": 0.49619558438318573, "grad_norm": 0.0, "learning_rate": 1.0609678877680069e-05, "loss": 0.9619, "step": 3978 }, { "epoch": 0.49632031932144194, "grad_norm": 0.0, "learning_rate": 1.0605646229733917e-05, "loss": 0.9916, "step": 3979 }, { "epoch": 0.49644505425969815, "grad_norm": 0.0, "learning_rate": 1.0601613482931028e-05, "loss": 0.9648, "step": 3980 }, { "epoch": 0.49656978919795436, "grad_norm": 0.0, "learning_rate": 1.0597580637929652e-05, "loss": 1.0033, "step": 3981 }, { "epoch": 0.49669452413621057, "grad_norm": 0.0, "learning_rate": 1.0593547695388054e-05, "loss": 0.9568, "step": 3982 }, { "epoch": 0.4968192590744668, "grad_norm": 0.0, "learning_rate": 1.0589514655964506e-05, "loss": 0.9721, "step": 3983 }, { "epoch": 0.496943994012723, "grad_norm": 0.0, "learning_rate": 1.0585481520317305e-05, "loss": 1.0029, "step": 3984 }, { "epoch": 0.49706872895097914, "grad_norm": 0.0, "learning_rate": 1.0581448289104759e-05, "loss": 1.0147, "step": 3985 }, { "epoch": 0.49719346388923535, "grad_norm": 0.0, "learning_rate": 1.0577414962985198e-05, "loss": 0.9656, "step": 3986 }, { "epoch": 0.49731819882749156, "grad_norm": 0.0, "learning_rate": 1.0573381542616954e-05, "loss": 0.8953, "step": 3987 }, { "epoch": 0.4974429337657478, "grad_norm": 0.0, "learning_rate": 1.0569348028658388e-05, "loss": 0.9929, "step": 3988 }, { "epoch": 0.497567668704004, "grad_norm": 0.0, "learning_rate": 1.0565314421767871e-05, "loss": 0.9874, "step": 3989 }, { "epoch": 0.4976924036422602, "grad_norm": 0.0, "learning_rate": 1.056128072260379e-05, "loss": 0.9307, "step": 3990 }, { "epoch": 0.4978171385805164, "grad_norm": 0.0, "learning_rate": 1.0557246931824545e-05, "loss": 1.0005, "step": 3991 }, { "epoch": 0.4979418735187726, "grad_norm": 0.0, "learning_rate": 1.0553213050088553e-05, "loss": 0.9684, "step": 3992 }, { "epoch": 0.4980666084570288, "grad_norm": 0.0, "learning_rate": 1.0549179078054243e-05, "loss": 0.9474, "step": 3993 }, { "epoch": 0.49819134339528504, "grad_norm": 0.0, "learning_rate": 1.0545145016380065e-05, "loss": 0.9187, "step": 3994 }, { "epoch": 0.49831607833354125, "grad_norm": 0.0, "learning_rate": 1.0541110865724475e-05, "loss": 0.9652, "step": 3995 }, { "epoch": 0.49844081327179746, "grad_norm": 0.0, "learning_rate": 1.0537076626745953e-05, "loss": 0.9892, "step": 3996 }, { "epoch": 0.4985655482100536, "grad_norm": 0.0, "learning_rate": 1.0533042300102984e-05, "loss": 0.9161, "step": 3997 }, { "epoch": 0.4986902831483098, "grad_norm": 0.0, "learning_rate": 1.0529007886454075e-05, "loss": 0.9359, "step": 3998 }, { "epoch": 0.49881501808656603, "grad_norm": 0.0, "learning_rate": 1.0524973386457742e-05, "loss": 0.9745, "step": 3999 }, { "epoch": 0.49893975302482224, "grad_norm": 0.0, "learning_rate": 1.0520938800772516e-05, "loss": 1.0079, "step": 4000 }, { "epoch": 0.49906448796307845, "grad_norm": 0.0, "learning_rate": 1.0516904130056946e-05, "loss": 0.9459, "step": 4001 }, { "epoch": 0.49918922290133466, "grad_norm": 0.0, "learning_rate": 1.0512869374969593e-05, "loss": 0.9739, "step": 4002 }, { "epoch": 0.49931395783959087, "grad_norm": 0.0, "learning_rate": 1.0508834536169028e-05, "loss": 0.9257, "step": 4003 }, { "epoch": 0.4994386927778471, "grad_norm": 0.0, "learning_rate": 1.0504799614313837e-05, "loss": 0.9673, "step": 4004 }, { "epoch": 0.4995634277161033, "grad_norm": 0.0, "learning_rate": 1.0500764610062627e-05, "loss": 0.998, "step": 4005 }, { "epoch": 0.4996881626543595, "grad_norm": 0.0, "learning_rate": 1.0496729524074002e-05, "loss": 0.9912, "step": 4006 }, { "epoch": 0.4998128975926157, "grad_norm": 0.0, "learning_rate": 1.0492694357006598e-05, "loss": 0.9683, "step": 4007 }, { "epoch": 0.4999376325308719, "grad_norm": 0.0, "learning_rate": 1.0488659109519059e-05, "loss": 0.9466, "step": 4008 }, { "epoch": 0.5000623674691281, "grad_norm": 0.0, "learning_rate": 1.0484623782270032e-05, "loss": 0.9679, "step": 4009 }, { "epoch": 0.5001871024073843, "grad_norm": 0.0, "learning_rate": 1.0480588375918185e-05, "loss": 0.951, "step": 4010 }, { "epoch": 0.5003118373456406, "grad_norm": 0.0, "learning_rate": 1.0476552891122197e-05, "loss": 0.9913, "step": 4011 }, { "epoch": 0.5004365722838967, "grad_norm": 0.0, "learning_rate": 1.047251732854077e-05, "loss": 0.9463, "step": 4012 }, { "epoch": 0.500561307222153, "grad_norm": 0.0, "learning_rate": 1.04684816888326e-05, "loss": 0.963, "step": 4013 }, { "epoch": 0.5006860421604091, "grad_norm": 0.0, "learning_rate": 1.0464445972656408e-05, "loss": 0.9677, "step": 4014 }, { "epoch": 0.5008107770986653, "grad_norm": 0.0, "learning_rate": 1.0460410180670921e-05, "loss": 0.9878, "step": 4015 }, { "epoch": 0.5009355120369215, "grad_norm": 0.0, "learning_rate": 1.0456374313534891e-05, "loss": 0.9436, "step": 4016 }, { "epoch": 0.5010602469751777, "grad_norm": 0.0, "learning_rate": 1.0452338371907065e-05, "loss": 0.905, "step": 4017 }, { "epoch": 0.501184981913434, "grad_norm": 0.0, "learning_rate": 1.0448302356446214e-05, "loss": 0.9537, "step": 4018 }, { "epoch": 0.5013097168516901, "grad_norm": 0.0, "learning_rate": 1.0444266267811118e-05, "loss": 0.9721, "step": 4019 }, { "epoch": 0.5014344517899464, "grad_norm": 0.0, "learning_rate": 1.0440230106660565e-05, "loss": 0.9332, "step": 4020 }, { "epoch": 0.5015591867282025, "grad_norm": 0.0, "learning_rate": 1.0436193873653362e-05, "loss": 0.9367, "step": 4021 }, { "epoch": 0.5016839216664588, "grad_norm": 0.0, "learning_rate": 1.0432157569448323e-05, "loss": 0.9608, "step": 4022 }, { "epoch": 0.501808656604715, "grad_norm": 0.0, "learning_rate": 1.042812119470427e-05, "loss": 0.9861, "step": 4023 }, { "epoch": 0.5019333915429712, "grad_norm": 0.0, "learning_rate": 1.0424084750080049e-05, "loss": 0.9829, "step": 4024 }, { "epoch": 0.5020581264812274, "grad_norm": 0.0, "learning_rate": 1.0420048236234506e-05, "loss": 0.9917, "step": 4025 }, { "epoch": 0.5021828614194837, "grad_norm": 0.0, "learning_rate": 1.04160116538265e-05, "loss": 0.9988, "step": 4026 }, { "epoch": 0.5023075963577398, "grad_norm": 0.0, "learning_rate": 1.0411975003514905e-05, "loss": 0.9876, "step": 4027 }, { "epoch": 0.502432331295996, "grad_norm": 0.0, "learning_rate": 1.04079382859586e-05, "loss": 0.9532, "step": 4028 }, { "epoch": 0.5025570662342522, "grad_norm": 0.0, "learning_rate": 1.0403901501816489e-05, "loss": 0.9775, "step": 4029 }, { "epoch": 0.5026818011725084, "grad_norm": 0.0, "learning_rate": 1.0399864651747467e-05, "loss": 0.9604, "step": 4030 }, { "epoch": 0.5028065361107646, "grad_norm": 0.0, "learning_rate": 1.0395827736410453e-05, "loss": 0.9102, "step": 4031 }, { "epoch": 0.5029312710490208, "grad_norm": 0.0, "learning_rate": 1.0391790756464377e-05, "loss": 0.9514, "step": 4032 }, { "epoch": 0.5030560059872771, "grad_norm": 0.0, "learning_rate": 1.038775371256817e-05, "loss": 0.9995, "step": 4033 }, { "epoch": 0.5031807409255332, "grad_norm": 0.0, "learning_rate": 1.038371660538078e-05, "loss": 0.9175, "step": 4034 }, { "epoch": 0.5033054758637895, "grad_norm": 0.0, "learning_rate": 1.037967943556117e-05, "loss": 1.0132, "step": 4035 }, { "epoch": 0.5034302108020456, "grad_norm": 0.0, "learning_rate": 1.0375642203768303e-05, "loss": 0.9797, "step": 4036 }, { "epoch": 0.5035549457403019, "grad_norm": 0.0, "learning_rate": 1.0371604910661158e-05, "loss": 0.9376, "step": 4037 }, { "epoch": 0.5036796806785581, "grad_norm": 0.0, "learning_rate": 1.0367567556898723e-05, "loss": 0.9818, "step": 4038 }, { "epoch": 0.5038044156168142, "grad_norm": 0.0, "learning_rate": 1.036353014314e-05, "loss": 0.9486, "step": 4039 }, { "epoch": 0.5039291505550705, "grad_norm": 0.0, "learning_rate": 1.0359492670043988e-05, "loss": 0.9894, "step": 4040 }, { "epoch": 0.5040538854933266, "grad_norm": 0.0, "learning_rate": 1.035545513826971e-05, "loss": 0.9947, "step": 4041 }, { "epoch": 0.5041786204315829, "grad_norm": 0.0, "learning_rate": 1.0351417548476199e-05, "loss": 0.9606, "step": 4042 }, { "epoch": 0.5043033553698391, "grad_norm": 0.0, "learning_rate": 1.0347379901322478e-05, "loss": 0.9357, "step": 4043 }, { "epoch": 0.5044280903080953, "grad_norm": 0.0, "learning_rate": 1.0343342197467601e-05, "loss": 1.0328, "step": 4044 }, { "epoch": 0.5045528252463515, "grad_norm": 0.0, "learning_rate": 1.0339304437570626e-05, "loss": 0.9422, "step": 4045 }, { "epoch": 0.5046775601846077, "grad_norm": 0.0, "learning_rate": 1.0335266622290608e-05, "loss": 1.0152, "step": 4046 }, { "epoch": 0.5048022951228639, "grad_norm": 0.0, "learning_rate": 1.0331228752286624e-05, "loss": 0.9187, "step": 4047 }, { "epoch": 0.5049270300611202, "grad_norm": 0.0, "learning_rate": 1.0327190828217763e-05, "loss": 0.9345, "step": 4048 }, { "epoch": 0.5050517649993763, "grad_norm": 0.0, "learning_rate": 1.0323152850743107e-05, "loss": 0.9496, "step": 4049 }, { "epoch": 0.5051764999376326, "grad_norm": 0.0, "learning_rate": 1.031911482052176e-05, "loss": 0.8995, "step": 4050 }, { "epoch": 0.5053012348758887, "grad_norm": 0.0, "learning_rate": 1.0315076738212829e-05, "loss": 0.9326, "step": 4051 }, { "epoch": 0.5054259698141449, "grad_norm": 0.0, "learning_rate": 1.0311038604475431e-05, "loss": 0.9429, "step": 4052 }, { "epoch": 0.5055507047524012, "grad_norm": 0.0, "learning_rate": 1.0307000419968693e-05, "loss": 0.9597, "step": 4053 }, { "epoch": 0.5056754396906573, "grad_norm": 0.0, "learning_rate": 1.0302962185351746e-05, "loss": 0.9326, "step": 4054 }, { "epoch": 0.5058001746289136, "grad_norm": 0.0, "learning_rate": 1.0298923901283736e-05, "loss": 0.9667, "step": 4055 }, { "epoch": 0.5059249095671697, "grad_norm": 0.0, "learning_rate": 1.0294885568423805e-05, "loss": 1.0133, "step": 4056 }, { "epoch": 0.506049644505426, "grad_norm": 0.0, "learning_rate": 1.0290847187431115e-05, "loss": 0.942, "step": 4057 }, { "epoch": 0.5061743794436822, "grad_norm": 0.0, "learning_rate": 1.0286808758964834e-05, "loss": 0.9353, "step": 4058 }, { "epoch": 0.5062991143819384, "grad_norm": 0.0, "learning_rate": 1.0282770283684133e-05, "loss": 0.9309, "step": 4059 }, { "epoch": 0.5064238493201946, "grad_norm": 0.0, "learning_rate": 1.0278731762248194e-05, "loss": 0.9854, "step": 4060 }, { "epoch": 0.5065485842584508, "grad_norm": 0.0, "learning_rate": 1.0274693195316202e-05, "loss": 0.9602, "step": 4061 }, { "epoch": 0.506673319196707, "grad_norm": 0.0, "learning_rate": 1.0270654583547358e-05, "loss": 0.9669, "step": 4062 }, { "epoch": 0.5067980541349631, "grad_norm": 0.0, "learning_rate": 1.0266615927600858e-05, "loss": 0.9417, "step": 4063 }, { "epoch": 0.5069227890732194, "grad_norm": 0.0, "learning_rate": 1.0262577228135921e-05, "loss": 0.9351, "step": 4064 }, { "epoch": 0.5070475240114756, "grad_norm": 0.0, "learning_rate": 1.0258538485811765e-05, "loss": 0.9261, "step": 4065 }, { "epoch": 0.5071722589497318, "grad_norm": 0.0, "learning_rate": 1.0254499701287604e-05, "loss": 0.9323, "step": 4066 }, { "epoch": 0.507296993887988, "grad_norm": 0.0, "learning_rate": 1.0250460875222676e-05, "loss": 0.9645, "step": 4067 }, { "epoch": 0.5074217288262443, "grad_norm": 0.0, "learning_rate": 1.0246422008276223e-05, "loss": 0.9214, "step": 4068 }, { "epoch": 0.5075464637645004, "grad_norm": 0.0, "learning_rate": 1.0242383101107483e-05, "loss": 0.9917, "step": 4069 }, { "epoch": 0.5076711987027567, "grad_norm": 0.0, "learning_rate": 1.0238344154375712e-05, "loss": 0.9562, "step": 4070 }, { "epoch": 0.5077959336410128, "grad_norm": 0.0, "learning_rate": 1.0234305168740164e-05, "loss": 0.9778, "step": 4071 }, { "epoch": 0.5079206685792691, "grad_norm": 0.0, "learning_rate": 1.0230266144860108e-05, "loss": 0.9324, "step": 4072 }, { "epoch": 0.5080454035175253, "grad_norm": 0.0, "learning_rate": 1.0226227083394811e-05, "loss": 0.9689, "step": 4073 }, { "epoch": 0.5081701384557815, "grad_norm": 0.0, "learning_rate": 1.0222187985003552e-05, "loss": 0.9327, "step": 4074 }, { "epoch": 0.5082948733940377, "grad_norm": 0.0, "learning_rate": 1.0218148850345613e-05, "loss": 0.975, "step": 4075 }, { "epoch": 0.5084196083322938, "grad_norm": 0.0, "learning_rate": 1.0214109680080282e-05, "loss": 0.9818, "step": 4076 }, { "epoch": 0.5085443432705501, "grad_norm": 0.0, "learning_rate": 1.021007047486685e-05, "loss": 0.9176, "step": 4077 }, { "epoch": 0.5086690782088062, "grad_norm": 0.0, "learning_rate": 1.0206031235364627e-05, "loss": 0.9742, "step": 4078 }, { "epoch": 0.5087938131470625, "grad_norm": 0.0, "learning_rate": 1.0201991962232906e-05, "loss": 0.9557, "step": 4079 }, { "epoch": 0.5089185480853187, "grad_norm": 0.0, "learning_rate": 1.0197952656131008e-05, "loss": 0.9299, "step": 4080 }, { "epoch": 0.5090432830235749, "grad_norm": 0.0, "learning_rate": 1.0193913317718245e-05, "loss": 0.951, "step": 4081 }, { "epoch": 0.5091680179618311, "grad_norm": 0.0, "learning_rate": 1.0189873947653941e-05, "loss": 0.9084, "step": 4082 }, { "epoch": 0.5092927529000874, "grad_norm": 0.0, "learning_rate": 1.0185834546597421e-05, "loss": 0.9881, "step": 4083 }, { "epoch": 0.5094174878383435, "grad_norm": 0.0, "learning_rate": 1.0181795115208017e-05, "loss": 0.9773, "step": 4084 }, { "epoch": 0.5095422227765998, "grad_norm": 0.0, "learning_rate": 1.0177755654145071e-05, "loss": 0.9482, "step": 4085 }, { "epoch": 0.5096669577148559, "grad_norm": 0.0, "learning_rate": 1.0173716164067918e-05, "loss": 0.9301, "step": 4086 }, { "epoch": 0.5097916926531121, "grad_norm": 0.0, "learning_rate": 1.0169676645635908e-05, "loss": 0.9737, "step": 4087 }, { "epoch": 0.5099164275913683, "grad_norm": 0.0, "learning_rate": 1.0165637099508395e-05, "loss": 0.9956, "step": 4088 }, { "epoch": 0.5100411625296245, "grad_norm": 0.0, "learning_rate": 1.0161597526344728e-05, "loss": 0.9757, "step": 4089 }, { "epoch": 0.5101658974678808, "grad_norm": 0.0, "learning_rate": 1.015755792680427e-05, "loss": 0.9886, "step": 4090 }, { "epoch": 0.5102906324061369, "grad_norm": 0.0, "learning_rate": 1.015351830154639e-05, "loss": 1.0141, "step": 4091 }, { "epoch": 0.5104153673443932, "grad_norm": 0.0, "learning_rate": 1.0149478651230451e-05, "loss": 0.9088, "step": 4092 }, { "epoch": 0.5105401022826493, "grad_norm": 0.0, "learning_rate": 1.014543897651583e-05, "loss": 0.9907, "step": 4093 }, { "epoch": 0.5106648372209056, "grad_norm": 0.0, "learning_rate": 1.0141399278061897e-05, "loss": 1.0239, "step": 4094 }, { "epoch": 0.5107895721591618, "grad_norm": 0.0, "learning_rate": 1.0137359556528041e-05, "loss": 0.9018, "step": 4095 }, { "epoch": 0.510914307097418, "grad_norm": 0.0, "learning_rate": 1.0133319812573642e-05, "loss": 0.9534, "step": 4096 }, { "epoch": 0.5110390420356742, "grad_norm": 0.0, "learning_rate": 1.0129280046858085e-05, "loss": 0.9542, "step": 4097 }, { "epoch": 0.5111637769739305, "grad_norm": 0.0, "learning_rate": 1.0125240260040772e-05, "loss": 0.9765, "step": 4098 }, { "epoch": 0.5112885119121866, "grad_norm": 0.0, "learning_rate": 1.0121200452781086e-05, "loss": 0.9776, "step": 4099 }, { "epoch": 0.5114132468504428, "grad_norm": 0.0, "learning_rate": 1.0117160625738432e-05, "loss": 0.9888, "step": 4100 }, { "epoch": 0.511537981788699, "grad_norm": 0.0, "learning_rate": 1.011312077957221e-05, "loss": 0.9556, "step": 4101 }, { "epoch": 0.5116627167269552, "grad_norm": 0.0, "learning_rate": 1.0109080914941825e-05, "loss": 0.9636, "step": 4102 }, { "epoch": 0.5117874516652114, "grad_norm": 0.0, "learning_rate": 1.0105041032506685e-05, "loss": 0.9788, "step": 4103 }, { "epoch": 0.5119121866034676, "grad_norm": 0.0, "learning_rate": 1.01010011329262e-05, "loss": 0.923, "step": 4104 }, { "epoch": 0.5120369215417239, "grad_norm": 0.0, "learning_rate": 1.0096961216859786e-05, "loss": 0.9314, "step": 4105 }, { "epoch": 0.51216165647998, "grad_norm": 0.0, "learning_rate": 1.0092921284966854e-05, "loss": 0.9461, "step": 4106 }, { "epoch": 0.5122863914182363, "grad_norm": 0.0, "learning_rate": 1.0088881337906825e-05, "loss": 0.9782, "step": 4107 }, { "epoch": 0.5124111263564924, "grad_norm": 0.0, "learning_rate": 1.0084841376339128e-05, "loss": 0.94, "step": 4108 }, { "epoch": 0.5125358612947487, "grad_norm": 0.0, "learning_rate": 1.0080801400923172e-05, "loss": 0.9533, "step": 4109 }, { "epoch": 0.5126605962330049, "grad_norm": 0.0, "learning_rate": 1.0076761412318395e-05, "loss": 0.9891, "step": 4110 }, { "epoch": 0.512785331171261, "grad_norm": 0.0, "learning_rate": 1.007272141118422e-05, "loss": 0.9464, "step": 4111 }, { "epoch": 0.5129100661095173, "grad_norm": 0.0, "learning_rate": 1.0068681398180074e-05, "loss": 0.948, "step": 4112 }, { "epoch": 0.5130348010477734, "grad_norm": 0.0, "learning_rate": 1.0064641373965394e-05, "loss": 0.9286, "step": 4113 }, { "epoch": 0.5131595359860297, "grad_norm": 0.0, "learning_rate": 1.0060601339199613e-05, "loss": 0.9929, "step": 4114 }, { "epoch": 0.5132842709242859, "grad_norm": 0.0, "learning_rate": 1.0056561294542166e-05, "loss": 0.9634, "step": 4115 }, { "epoch": 0.5134090058625421, "grad_norm": 0.0, "learning_rate": 1.0052521240652489e-05, "loss": 0.9715, "step": 4116 }, { "epoch": 0.5135337408007983, "grad_norm": 0.0, "learning_rate": 1.004848117819002e-05, "loss": 0.9341, "step": 4117 }, { "epoch": 0.5136584757390545, "grad_norm": 0.0, "learning_rate": 1.0044441107814203e-05, "loss": 0.986, "step": 4118 }, { "epoch": 0.5137832106773107, "grad_norm": 0.0, "learning_rate": 1.0040401030184474e-05, "loss": 0.9272, "step": 4119 }, { "epoch": 0.513907945615567, "grad_norm": 0.0, "learning_rate": 1.003636094596028e-05, "loss": 0.9437, "step": 4120 }, { "epoch": 0.5140326805538231, "grad_norm": 0.0, "learning_rate": 1.0032320855801063e-05, "loss": 0.9101, "step": 4121 }, { "epoch": 0.5141574154920794, "grad_norm": 0.0, "learning_rate": 1.002828076036627e-05, "loss": 0.9389, "step": 4122 }, { "epoch": 0.5142821504303355, "grad_norm": 0.0, "learning_rate": 1.0024240660315341e-05, "loss": 0.9317, "step": 4123 }, { "epoch": 0.5144068853685917, "grad_norm": 0.0, "learning_rate": 1.0020200556307727e-05, "loss": 0.938, "step": 4124 }, { "epoch": 0.514531620306848, "grad_norm": 0.0, "learning_rate": 1.001616044900287e-05, "loss": 0.9914, "step": 4125 }, { "epoch": 0.5146563552451041, "grad_norm": 0.0, "learning_rate": 1.0012120339060221e-05, "loss": 1.0269, "step": 4126 }, { "epoch": 0.5147810901833604, "grad_norm": 0.0, "learning_rate": 1.0008080227139229e-05, "loss": 0.9137, "step": 4127 }, { "epoch": 0.5149058251216165, "grad_norm": 0.0, "learning_rate": 1.000404011389934e-05, "loss": 0.9118, "step": 4128 }, { "epoch": 0.5150305600598728, "grad_norm": 0.0, "learning_rate": 1e-05, "loss": 0.9367, "step": 4129 }, { "epoch": 0.515155294998129, "grad_norm": 0.0, "learning_rate": 9.995959886100665e-06, "loss": 0.9434, "step": 4130 }, { "epoch": 0.5152800299363852, "grad_norm": 0.0, "learning_rate": 9.991919772860775e-06, "loss": 0.9181, "step": 4131 }, { "epoch": 0.5154047648746414, "grad_norm": 0.0, "learning_rate": 9.987879660939782e-06, "loss": 0.9477, "step": 4132 }, { "epoch": 0.5155294998128976, "grad_norm": 0.0, "learning_rate": 9.98383955099713e-06, "loss": 0.9584, "step": 4133 }, { "epoch": 0.5156542347511538, "grad_norm": 0.0, "learning_rate": 9.979799443692276e-06, "loss": 0.9012, "step": 4134 }, { "epoch": 0.51577896968941, "grad_norm": 0.0, "learning_rate": 9.975759339684665e-06, "loss": 0.9617, "step": 4135 }, { "epoch": 0.5159037046276662, "grad_norm": 0.0, "learning_rate": 9.971719239633734e-06, "loss": 0.9176, "step": 4136 }, { "epoch": 0.5160284395659224, "grad_norm": 0.0, "learning_rate": 9.96767914419894e-06, "loss": 0.9773, "step": 4137 }, { "epoch": 0.5161531745041786, "grad_norm": 0.0, "learning_rate": 9.963639054039722e-06, "loss": 1.006, "step": 4138 }, { "epoch": 0.5162779094424348, "grad_norm": 0.0, "learning_rate": 9.959598969815528e-06, "loss": 0.8844, "step": 4139 }, { "epoch": 0.5164026443806911, "grad_norm": 0.0, "learning_rate": 9.9555588921858e-06, "loss": 0.9162, "step": 4140 }, { "epoch": 0.5165273793189472, "grad_norm": 0.0, "learning_rate": 9.951518821809984e-06, "loss": 0.9851, "step": 4141 }, { "epoch": 0.5166521142572035, "grad_norm": 0.0, "learning_rate": 9.947478759347518e-06, "loss": 0.9109, "step": 4142 }, { "epoch": 0.5167768491954596, "grad_norm": 0.0, "learning_rate": 9.943438705457837e-06, "loss": 0.9232, "step": 4143 }, { "epoch": 0.5169015841337159, "grad_norm": 0.0, "learning_rate": 9.93939866080039e-06, "loss": 0.9559, "step": 4144 }, { "epoch": 0.517026319071972, "grad_norm": 0.0, "learning_rate": 9.935358626034607e-06, "loss": 0.9676, "step": 4145 }, { "epoch": 0.5171510540102283, "grad_norm": 0.0, "learning_rate": 9.93131860181993e-06, "loss": 0.8908, "step": 4146 }, { "epoch": 0.5172757889484845, "grad_norm": 0.0, "learning_rate": 9.927278588815786e-06, "loss": 0.9467, "step": 4147 }, { "epoch": 0.5174005238867406, "grad_norm": 0.0, "learning_rate": 9.923238587681609e-06, "loss": 0.9621, "step": 4148 }, { "epoch": 0.5175252588249969, "grad_norm": 0.0, "learning_rate": 9.91919859907683e-06, "loss": 0.9231, "step": 4149 }, { "epoch": 0.517649993763253, "grad_norm": 0.0, "learning_rate": 9.915158623660877e-06, "loss": 0.9393, "step": 4150 }, { "epoch": 0.5177747287015093, "grad_norm": 0.0, "learning_rate": 9.911118662093176e-06, "loss": 0.9805, "step": 4151 }, { "epoch": 0.5178994636397655, "grad_norm": 0.0, "learning_rate": 9.90707871503315e-06, "loss": 0.9437, "step": 4152 }, { "epoch": 0.5180241985780217, "grad_norm": 0.0, "learning_rate": 9.903038783140216e-06, "loss": 0.9659, "step": 4153 }, { "epoch": 0.5181489335162779, "grad_norm": 0.0, "learning_rate": 9.898998867073802e-06, "loss": 0.9528, "step": 4154 }, { "epoch": 0.5182736684545342, "grad_norm": 0.0, "learning_rate": 9.894958967493317e-06, "loss": 0.9584, "step": 4155 }, { "epoch": 0.5183984033927903, "grad_norm": 0.0, "learning_rate": 9.890919085058179e-06, "loss": 1.0016, "step": 4156 }, { "epoch": 0.5185231383310466, "grad_norm": 0.0, "learning_rate": 9.886879220427791e-06, "loss": 0.9524, "step": 4157 }, { "epoch": 0.5186478732693027, "grad_norm": 0.0, "learning_rate": 9.882839374261572e-06, "loss": 0.9842, "step": 4158 }, { "epoch": 0.5187726082075589, "grad_norm": 0.0, "learning_rate": 9.878799547218919e-06, "loss": 0.9752, "step": 4159 }, { "epoch": 0.5188973431458151, "grad_norm": 0.0, "learning_rate": 9.874759739959231e-06, "loss": 0.9757, "step": 4160 }, { "epoch": 0.5190220780840713, "grad_norm": 0.0, "learning_rate": 9.870719953141917e-06, "loss": 1.0116, "step": 4161 }, { "epoch": 0.5191468130223276, "grad_norm": 0.0, "learning_rate": 9.86668018742636e-06, "loss": 0.9933, "step": 4162 }, { "epoch": 0.5192715479605837, "grad_norm": 0.0, "learning_rate": 9.862640443471962e-06, "loss": 0.9573, "step": 4163 }, { "epoch": 0.51939628289884, "grad_norm": 0.0, "learning_rate": 9.858600721938105e-06, "loss": 0.9645, "step": 4164 }, { "epoch": 0.5195210178370961, "grad_norm": 0.0, "learning_rate": 9.854561023484174e-06, "loss": 0.9069, "step": 4165 }, { "epoch": 0.5196457527753524, "grad_norm": 0.0, "learning_rate": 9.850521348769552e-06, "loss": 0.9161, "step": 4166 }, { "epoch": 0.5197704877136086, "grad_norm": 0.0, "learning_rate": 9.846481698453613e-06, "loss": 0.8889, "step": 4167 }, { "epoch": 0.5198952226518648, "grad_norm": 0.0, "learning_rate": 9.842442073195733e-06, "loss": 0.9003, "step": 4168 }, { "epoch": 0.520019957590121, "grad_norm": 0.0, "learning_rate": 9.838402473655277e-06, "loss": 0.9444, "step": 4169 }, { "epoch": 0.5201446925283773, "grad_norm": 0.0, "learning_rate": 9.834362900491609e-06, "loss": 0.9693, "step": 4170 }, { "epoch": 0.5202694274666334, "grad_norm": 0.0, "learning_rate": 9.830323354364097e-06, "loss": 0.9667, "step": 4171 }, { "epoch": 0.5203941624048896, "grad_norm": 0.0, "learning_rate": 9.826283835932084e-06, "loss": 0.9453, "step": 4172 }, { "epoch": 0.5205188973431458, "grad_norm": 0.0, "learning_rate": 9.822244345854932e-06, "loss": 0.9356, "step": 4173 }, { "epoch": 0.520643632281402, "grad_norm": 0.0, "learning_rate": 9.818204884791983e-06, "loss": 0.9728, "step": 4174 }, { "epoch": 0.5207683672196582, "grad_norm": 0.0, "learning_rate": 9.814165453402582e-06, "loss": 0.9554, "step": 4175 }, { "epoch": 0.5208931021579144, "grad_norm": 0.0, "learning_rate": 9.810126052346062e-06, "loss": 0.9124, "step": 4176 }, { "epoch": 0.5210178370961707, "grad_norm": 0.0, "learning_rate": 9.806086682281759e-06, "loss": 0.9654, "step": 4177 }, { "epoch": 0.5211425720344268, "grad_norm": 0.0, "learning_rate": 9.802047343868997e-06, "loss": 0.9393, "step": 4178 }, { "epoch": 0.5212673069726831, "grad_norm": 0.0, "learning_rate": 9.798008037767094e-06, "loss": 0.9665, "step": 4179 }, { "epoch": 0.5213920419109392, "grad_norm": 0.0, "learning_rate": 9.793968764635378e-06, "loss": 0.9501, "step": 4180 }, { "epoch": 0.5215167768491955, "grad_norm": 0.0, "learning_rate": 9.789929525133153e-06, "loss": 0.9615, "step": 4181 }, { "epoch": 0.5216415117874517, "grad_norm": 0.0, "learning_rate": 9.785890319919721e-06, "loss": 0.9723, "step": 4182 }, { "epoch": 0.5217662467257078, "grad_norm": 0.0, "learning_rate": 9.78185114965439e-06, "loss": 0.9455, "step": 4183 }, { "epoch": 0.5218909816639641, "grad_norm": 0.0, "learning_rate": 9.77781201499645e-06, "loss": 0.9198, "step": 4184 }, { "epoch": 0.5220157166022202, "grad_norm": 0.0, "learning_rate": 9.77377291660519e-06, "loss": 0.948, "step": 4185 }, { "epoch": 0.5221404515404765, "grad_norm": 0.0, "learning_rate": 9.769733855139895e-06, "loss": 0.9923, "step": 4186 }, { "epoch": 0.5222651864787327, "grad_norm": 0.0, "learning_rate": 9.765694831259839e-06, "loss": 0.9496, "step": 4187 }, { "epoch": 0.5223899214169889, "grad_norm": 0.0, "learning_rate": 9.761655845624293e-06, "loss": 0.9239, "step": 4188 }, { "epoch": 0.5225146563552451, "grad_norm": 0.0, "learning_rate": 9.757616898892517e-06, "loss": 0.9177, "step": 4189 }, { "epoch": 0.5226393912935013, "grad_norm": 0.0, "learning_rate": 9.75357799172378e-06, "loss": 0.9767, "step": 4190 }, { "epoch": 0.5227641262317575, "grad_norm": 0.0, "learning_rate": 9.749539124777323e-06, "loss": 0.959, "step": 4191 }, { "epoch": 0.5228888611700138, "grad_norm": 0.0, "learning_rate": 9.7455002987124e-06, "loss": 0.9712, "step": 4192 }, { "epoch": 0.5230135961082699, "grad_norm": 0.0, "learning_rate": 9.741461514188242e-06, "loss": 0.9816, "step": 4193 }, { "epoch": 0.5231383310465262, "grad_norm": 0.0, "learning_rate": 9.73742277186408e-06, "loss": 0.9554, "step": 4194 }, { "epoch": 0.5232630659847823, "grad_norm": 0.0, "learning_rate": 9.733384072399145e-06, "loss": 0.9452, "step": 4195 }, { "epoch": 0.5233878009230385, "grad_norm": 0.0, "learning_rate": 9.729345416452644e-06, "loss": 0.9395, "step": 4196 }, { "epoch": 0.5235125358612948, "grad_norm": 0.0, "learning_rate": 9.725306804683803e-06, "loss": 0.9764, "step": 4197 }, { "epoch": 0.5236372707995509, "grad_norm": 0.0, "learning_rate": 9.721268237751813e-06, "loss": 0.9182, "step": 4198 }, { "epoch": 0.5237620057378072, "grad_norm": 0.0, "learning_rate": 9.717229716315868e-06, "loss": 0.9716, "step": 4199 }, { "epoch": 0.5238867406760633, "grad_norm": 0.0, "learning_rate": 9.713191241035169e-06, "loss": 1.0092, "step": 4200 }, { "epoch": 0.5240114756143196, "grad_norm": 0.0, "learning_rate": 9.709152812568886e-06, "loss": 0.9284, "step": 4201 }, { "epoch": 0.5241362105525758, "grad_norm": 0.0, "learning_rate": 9.705114431576198e-06, "loss": 0.9245, "step": 4202 }, { "epoch": 0.524260945490832, "grad_norm": 0.0, "learning_rate": 9.70107609871627e-06, "loss": 0.9536, "step": 4203 }, { "epoch": 0.5243856804290882, "grad_norm": 0.0, "learning_rate": 9.697037814648255e-06, "loss": 0.9801, "step": 4204 }, { "epoch": 0.5245104153673444, "grad_norm": 0.0, "learning_rate": 9.69299958003131e-06, "loss": 0.9647, "step": 4205 }, { "epoch": 0.5246351503056006, "grad_norm": 0.0, "learning_rate": 9.688961395524569e-06, "loss": 0.9629, "step": 4206 }, { "epoch": 0.5247598852438567, "grad_norm": 0.0, "learning_rate": 9.684923261787175e-06, "loss": 0.9328, "step": 4207 }, { "epoch": 0.524884620182113, "grad_norm": 0.0, "learning_rate": 9.68088517947824e-06, "loss": 0.9976, "step": 4208 }, { "epoch": 0.5250093551203692, "grad_norm": 0.0, "learning_rate": 9.676847149256894e-06, "loss": 0.9986, "step": 4209 }, { "epoch": 0.5251340900586254, "grad_norm": 0.0, "learning_rate": 9.67280917178224e-06, "loss": 0.9834, "step": 4210 }, { "epoch": 0.5252588249968816, "grad_norm": 0.0, "learning_rate": 9.668771247713376e-06, "loss": 0.959, "step": 4211 }, { "epoch": 0.5253835599351379, "grad_norm": 0.0, "learning_rate": 9.664733377709395e-06, "loss": 0.9622, "step": 4212 }, { "epoch": 0.525508294873394, "grad_norm": 0.0, "learning_rate": 9.660695562429379e-06, "loss": 0.9562, "step": 4213 }, { "epoch": 0.5256330298116503, "grad_norm": 0.0, "learning_rate": 9.6566578025324e-06, "loss": 0.9359, "step": 4214 }, { "epoch": 0.5257577647499064, "grad_norm": 0.0, "learning_rate": 9.652620098677527e-06, "loss": 0.8896, "step": 4215 }, { "epoch": 0.5258824996881627, "grad_norm": 0.0, "learning_rate": 9.648582451523805e-06, "loss": 1.0019, "step": 4216 }, { "epoch": 0.5260072346264189, "grad_norm": 0.0, "learning_rate": 9.644544861730293e-06, "loss": 0.984, "step": 4217 }, { "epoch": 0.5261319695646751, "grad_norm": 0.0, "learning_rate": 9.640507329956012e-06, "loss": 0.9314, "step": 4218 }, { "epoch": 0.5262567045029313, "grad_norm": 0.0, "learning_rate": 9.636469856860005e-06, "loss": 0.9177, "step": 4219 }, { "epoch": 0.5263814394411874, "grad_norm": 0.0, "learning_rate": 9.63243244310128e-06, "loss": 0.9817, "step": 4220 }, { "epoch": 0.5265061743794437, "grad_norm": 0.0, "learning_rate": 9.628395089338844e-06, "loss": 0.9281, "step": 4221 }, { "epoch": 0.5266309093176998, "grad_norm": 0.0, "learning_rate": 9.624357796231702e-06, "loss": 0.9234, "step": 4222 }, { "epoch": 0.5267556442559561, "grad_norm": 0.0, "learning_rate": 9.620320564438834e-06, "loss": 0.9901, "step": 4223 }, { "epoch": 0.5268803791942123, "grad_norm": 0.0, "learning_rate": 9.616283394619223e-06, "loss": 0.9427, "step": 4224 }, { "epoch": 0.5270051141324685, "grad_norm": 0.0, "learning_rate": 9.612246287431832e-06, "loss": 0.9371, "step": 4225 }, { "epoch": 0.5271298490707247, "grad_norm": 0.0, "learning_rate": 9.608209243535627e-06, "loss": 0.9661, "step": 4226 }, { "epoch": 0.527254584008981, "grad_norm": 0.0, "learning_rate": 9.60417226358955e-06, "loss": 0.9249, "step": 4227 }, { "epoch": 0.5273793189472371, "grad_norm": 0.0, "learning_rate": 9.600135348252535e-06, "loss": 0.9566, "step": 4228 }, { "epoch": 0.5275040538854934, "grad_norm": 0.0, "learning_rate": 9.596098498183515e-06, "loss": 0.9437, "step": 4229 }, { "epoch": 0.5276287888237495, "grad_norm": 0.0, "learning_rate": 9.5920617140414e-06, "loss": 0.913, "step": 4230 }, { "epoch": 0.5277535237620057, "grad_norm": 0.0, "learning_rate": 9.588024996485098e-06, "loss": 0.9389, "step": 4231 }, { "epoch": 0.527878258700262, "grad_norm": 0.0, "learning_rate": 9.583988346173504e-06, "loss": 1.0094, "step": 4232 }, { "epoch": 0.5280029936385181, "grad_norm": 0.0, "learning_rate": 9.579951763765498e-06, "loss": 0.9672, "step": 4233 }, { "epoch": 0.5281277285767744, "grad_norm": 0.0, "learning_rate": 9.575915249919955e-06, "loss": 0.9574, "step": 4234 }, { "epoch": 0.5282524635150305, "grad_norm": 0.0, "learning_rate": 9.57187880529573e-06, "loss": 0.905, "step": 4235 }, { "epoch": 0.5283771984532868, "grad_norm": 0.0, "learning_rate": 9.56784243055168e-06, "loss": 0.9948, "step": 4236 }, { "epoch": 0.5285019333915429, "grad_norm": 0.0, "learning_rate": 9.563806126346643e-06, "loss": 0.9607, "step": 4237 }, { "epoch": 0.5286266683297992, "grad_norm": 0.0, "learning_rate": 9.559769893339437e-06, "loss": 0.9477, "step": 4238 }, { "epoch": 0.5287514032680554, "grad_norm": 0.0, "learning_rate": 9.555733732188887e-06, "loss": 0.9118, "step": 4239 }, { "epoch": 0.5288761382063116, "grad_norm": 0.0, "learning_rate": 9.551697643553787e-06, "loss": 0.9585, "step": 4240 }, { "epoch": 0.5290008731445678, "grad_norm": 0.0, "learning_rate": 9.547661628092938e-06, "loss": 0.9267, "step": 4241 }, { "epoch": 0.529125608082824, "grad_norm": 0.0, "learning_rate": 9.54362568646511e-06, "loss": 0.9643, "step": 4242 }, { "epoch": 0.5292503430210802, "grad_norm": 0.0, "learning_rate": 9.53958981932908e-06, "loss": 0.9768, "step": 4243 }, { "epoch": 0.5293750779593364, "grad_norm": 0.0, "learning_rate": 9.535554027343599e-06, "loss": 0.9475, "step": 4244 }, { "epoch": 0.5294998128975926, "grad_norm": 0.0, "learning_rate": 9.531518311167402e-06, "loss": 0.9265, "step": 4245 }, { "epoch": 0.5296245478358488, "grad_norm": 0.0, "learning_rate": 9.527482671459233e-06, "loss": 0.974, "step": 4246 }, { "epoch": 0.529749282774105, "grad_norm": 0.0, "learning_rate": 9.523447108877801e-06, "loss": 0.9532, "step": 4247 }, { "epoch": 0.5298740177123612, "grad_norm": 0.0, "learning_rate": 9.519411624081818e-06, "loss": 0.9601, "step": 4248 }, { "epoch": 0.5299987526506175, "grad_norm": 0.0, "learning_rate": 9.515376217729973e-06, "loss": 0.9278, "step": 4249 }, { "epoch": 0.5301234875888736, "grad_norm": 0.0, "learning_rate": 9.511340890480944e-06, "loss": 0.923, "step": 4250 }, { "epoch": 0.5302482225271299, "grad_norm": 0.0, "learning_rate": 9.507305642993403e-06, "loss": 0.9708, "step": 4251 }, { "epoch": 0.530372957465386, "grad_norm": 0.0, "learning_rate": 9.503270475925997e-06, "loss": 0.9271, "step": 4252 }, { "epoch": 0.5304976924036423, "grad_norm": 0.0, "learning_rate": 9.499235389937378e-06, "loss": 0.9529, "step": 4253 }, { "epoch": 0.5306224273418985, "grad_norm": 0.0, "learning_rate": 9.495200385686168e-06, "loss": 0.9676, "step": 4254 }, { "epoch": 0.5307471622801546, "grad_norm": 0.0, "learning_rate": 9.491165463830975e-06, "loss": 0.9228, "step": 4255 }, { "epoch": 0.5308718972184109, "grad_norm": 0.0, "learning_rate": 9.48713062503041e-06, "loss": 0.9553, "step": 4256 }, { "epoch": 0.530996632156667, "grad_norm": 0.0, "learning_rate": 9.483095869943056e-06, "loss": 0.9783, "step": 4257 }, { "epoch": 0.5311213670949233, "grad_norm": 0.0, "learning_rate": 9.479061199227486e-06, "loss": 0.9528, "step": 4258 }, { "epoch": 0.5312461020331795, "grad_norm": 0.0, "learning_rate": 9.475026613542264e-06, "loss": 0.9341, "step": 4259 }, { "epoch": 0.5313708369714357, "grad_norm": 0.0, "learning_rate": 9.47099211354593e-06, "loss": 0.9928, "step": 4260 }, { "epoch": 0.5314955719096919, "grad_norm": 0.0, "learning_rate": 9.46695769989702e-06, "loss": 0.947, "step": 4261 }, { "epoch": 0.5316203068479481, "grad_norm": 0.0, "learning_rate": 9.462923373254049e-06, "loss": 1.0086, "step": 4262 }, { "epoch": 0.5317450417862043, "grad_norm": 0.0, "learning_rate": 9.458889134275527e-06, "loss": 0.9385, "step": 4263 }, { "epoch": 0.5318697767244606, "grad_norm": 0.0, "learning_rate": 9.454854983619936e-06, "loss": 0.9242, "step": 4264 }, { "epoch": 0.5319945116627167, "grad_norm": 0.0, "learning_rate": 9.450820921945758e-06, "loss": 0.9537, "step": 4265 }, { "epoch": 0.532119246600973, "grad_norm": 0.0, "learning_rate": 9.44678694991145e-06, "loss": 0.948, "step": 4266 }, { "epoch": 0.5322439815392291, "grad_norm": 0.0, "learning_rate": 9.442753068175458e-06, "loss": 0.9236, "step": 4267 }, { "epoch": 0.5323687164774853, "grad_norm": 0.0, "learning_rate": 9.438719277396213e-06, "loss": 0.9757, "step": 4268 }, { "epoch": 0.5324934514157416, "grad_norm": 0.0, "learning_rate": 9.43468557823213e-06, "loss": 0.9265, "step": 4269 }, { "epoch": 0.5326181863539977, "grad_norm": 0.0, "learning_rate": 9.430651971341616e-06, "loss": 0.9465, "step": 4270 }, { "epoch": 0.532742921292254, "grad_norm": 0.0, "learning_rate": 9.426618457383051e-06, "loss": 0.9057, "step": 4271 }, { "epoch": 0.5328676562305101, "grad_norm": 0.0, "learning_rate": 9.422585037014805e-06, "loss": 0.9399, "step": 4272 }, { "epoch": 0.5329923911687664, "grad_norm": 0.0, "learning_rate": 9.418551710895243e-06, "loss": 1.0215, "step": 4273 }, { "epoch": 0.5331171261070226, "grad_norm": 0.0, "learning_rate": 9.414518479682697e-06, "loss": 1.0108, "step": 4274 }, { "epoch": 0.5332418610452788, "grad_norm": 0.0, "learning_rate": 9.410485344035498e-06, "loss": 0.9492, "step": 4275 }, { "epoch": 0.533366595983535, "grad_norm": 0.0, "learning_rate": 9.406452304611951e-06, "loss": 0.9306, "step": 4276 }, { "epoch": 0.5334913309217912, "grad_norm": 0.0, "learning_rate": 9.40241936207035e-06, "loss": 0.9009, "step": 4277 }, { "epoch": 0.5336160658600474, "grad_norm": 0.0, "learning_rate": 9.398386517068975e-06, "loss": 0.966, "step": 4278 }, { "epoch": 0.5337408007983035, "grad_norm": 0.0, "learning_rate": 9.394353770266084e-06, "loss": 0.9885, "step": 4279 }, { "epoch": 0.5338655357365598, "grad_norm": 0.0, "learning_rate": 9.390321122319933e-06, "loss": 0.9444, "step": 4280 }, { "epoch": 0.533990270674816, "grad_norm": 0.0, "learning_rate": 9.386288573888736e-06, "loss": 0.9523, "step": 4281 }, { "epoch": 0.5341150056130722, "grad_norm": 0.0, "learning_rate": 9.382256125630722e-06, "loss": 0.9578, "step": 4282 }, { "epoch": 0.5342397405513284, "grad_norm": 0.0, "learning_rate": 9.37822377820408e-06, "loss": 0.9626, "step": 4283 }, { "epoch": 0.5343644754895847, "grad_norm": 0.0, "learning_rate": 9.374191532266991e-06, "loss": 0.9524, "step": 4284 }, { "epoch": 0.5344892104278408, "grad_norm": 0.0, "learning_rate": 9.370159388477623e-06, "loss": 0.9489, "step": 4285 }, { "epoch": 0.5346139453660971, "grad_norm": 0.0, "learning_rate": 9.366127347494117e-06, "loss": 0.9202, "step": 4286 }, { "epoch": 0.5347386803043532, "grad_norm": 0.0, "learning_rate": 9.362095409974612e-06, "loss": 0.9252, "step": 4287 }, { "epoch": 0.5348634152426095, "grad_norm": 0.0, "learning_rate": 9.358063576577216e-06, "loss": 0.9499, "step": 4288 }, { "epoch": 0.5349881501808657, "grad_norm": 0.0, "learning_rate": 9.354031847960023e-06, "loss": 0.9346, "step": 4289 }, { "epoch": 0.5351128851191219, "grad_norm": 0.0, "learning_rate": 9.350000224781125e-06, "loss": 0.9101, "step": 4290 }, { "epoch": 0.5352376200573781, "grad_norm": 0.0, "learning_rate": 9.34596870769857e-06, "loss": 0.9602, "step": 4291 }, { "epoch": 0.5353623549956342, "grad_norm": 0.0, "learning_rate": 9.341937297370414e-06, "loss": 0.9028, "step": 4292 }, { "epoch": 0.5354870899338905, "grad_norm": 0.0, "learning_rate": 9.33790599445468e-06, "loss": 0.9448, "step": 4293 }, { "epoch": 0.5356118248721466, "grad_norm": 0.0, "learning_rate": 9.333874799609378e-06, "loss": 0.9334, "step": 4294 }, { "epoch": 0.5357365598104029, "grad_norm": 0.0, "learning_rate": 9.329843713492502e-06, "loss": 0.9123, "step": 4295 }, { "epoch": 0.5358612947486591, "grad_norm": 0.0, "learning_rate": 9.325812736762025e-06, "loss": 0.9288, "step": 4296 }, { "epoch": 0.5359860296869153, "grad_norm": 0.0, "learning_rate": 9.321781870075908e-06, "loss": 0.9348, "step": 4297 }, { "epoch": 0.5361107646251715, "grad_norm": 0.0, "learning_rate": 9.317751114092084e-06, "loss": 0.9612, "step": 4298 }, { "epoch": 0.5362354995634278, "grad_norm": 0.0, "learning_rate": 9.313720469468482e-06, "loss": 0.9932, "step": 4299 }, { "epoch": 0.5363602345016839, "grad_norm": 0.0, "learning_rate": 9.309689936863002e-06, "loss": 0.9686, "step": 4300 }, { "epoch": 0.5364849694399402, "grad_norm": 0.0, "learning_rate": 9.305659516933519e-06, "loss": 0.9814, "step": 4301 }, { "epoch": 0.5366097043781963, "grad_norm": 0.0, "learning_rate": 9.301629210337914e-06, "loss": 0.9477, "step": 4302 }, { "epoch": 0.5367344393164525, "grad_norm": 0.0, "learning_rate": 9.297599017734023e-06, "loss": 0.9815, "step": 4303 }, { "epoch": 0.5368591742547087, "grad_norm": 0.0, "learning_rate": 9.293568939779685e-06, "loss": 0.9638, "step": 4304 }, { "epoch": 0.5369839091929649, "grad_norm": 0.0, "learning_rate": 9.289538977132702e-06, "loss": 0.9263, "step": 4305 }, { "epoch": 0.5371086441312212, "grad_norm": 0.0, "learning_rate": 9.285509130450869e-06, "loss": 0.9509, "step": 4306 }, { "epoch": 0.5372333790694773, "grad_norm": 0.0, "learning_rate": 9.281479400391958e-06, "loss": 0.9564, "step": 4307 }, { "epoch": 0.5373581140077336, "grad_norm": 0.0, "learning_rate": 9.27744978761372e-06, "loss": 0.9209, "step": 4308 }, { "epoch": 0.5374828489459897, "grad_norm": 0.0, "learning_rate": 9.273420292773895e-06, "loss": 0.9428, "step": 4309 }, { "epoch": 0.537607583884246, "grad_norm": 0.0, "learning_rate": 9.269390916530197e-06, "loss": 0.9859, "step": 4310 }, { "epoch": 0.5377323188225022, "grad_norm": 0.0, "learning_rate": 9.265361659540315e-06, "loss": 0.9895, "step": 4311 }, { "epoch": 0.5378570537607584, "grad_norm": 0.0, "learning_rate": 9.261332522461934e-06, "loss": 0.8889, "step": 4312 }, { "epoch": 0.5379817886990146, "grad_norm": 0.0, "learning_rate": 9.257303505952705e-06, "loss": 0.9487, "step": 4313 }, { "epoch": 0.5381065236372709, "grad_norm": 0.0, "learning_rate": 9.253274610670269e-06, "loss": 0.9161, "step": 4314 }, { "epoch": 0.538231258575527, "grad_norm": 0.0, "learning_rate": 9.249245837272237e-06, "loss": 0.9442, "step": 4315 }, { "epoch": 0.5383559935137832, "grad_norm": 0.0, "learning_rate": 9.245217186416218e-06, "loss": 0.9533, "step": 4316 }, { "epoch": 0.5384807284520394, "grad_norm": 0.0, "learning_rate": 9.241188658759779e-06, "loss": 0.9418, "step": 4317 }, { "epoch": 0.5386054633902956, "grad_norm": 0.0, "learning_rate": 9.237160254960477e-06, "loss": 0.9193, "step": 4318 }, { "epoch": 0.5387301983285518, "grad_norm": 0.0, "learning_rate": 9.233131975675857e-06, "loss": 0.9479, "step": 4319 }, { "epoch": 0.538854933266808, "grad_norm": 0.0, "learning_rate": 9.229103821563428e-06, "loss": 0.9608, "step": 4320 }, { "epoch": 0.5389796682050643, "grad_norm": 0.0, "learning_rate": 9.225075793280693e-06, "loss": 0.9468, "step": 4321 }, { "epoch": 0.5391044031433204, "grad_norm": 0.0, "learning_rate": 9.221047891485122e-06, "loss": 0.9361, "step": 4322 }, { "epoch": 0.5392291380815767, "grad_norm": 0.0, "learning_rate": 9.217020116834173e-06, "loss": 0.9632, "step": 4323 }, { "epoch": 0.5393538730198328, "grad_norm": 0.0, "learning_rate": 9.212992469985278e-06, "loss": 0.9288, "step": 4324 }, { "epoch": 0.5394786079580891, "grad_norm": 0.0, "learning_rate": 9.208964951595852e-06, "loss": 0.944, "step": 4325 }, { "epoch": 0.5396033428963453, "grad_norm": 0.0, "learning_rate": 9.204937562323293e-06, "loss": 0.9607, "step": 4326 }, { "epoch": 0.5397280778346015, "grad_norm": 0.0, "learning_rate": 9.200910302824964e-06, "loss": 0.9942, "step": 4327 }, { "epoch": 0.5398528127728577, "grad_norm": 0.0, "learning_rate": 9.196883173758215e-06, "loss": 0.959, "step": 4328 }, { "epoch": 0.5399775477111138, "grad_norm": 0.0, "learning_rate": 9.192856175780383e-06, "loss": 0.9679, "step": 4329 }, { "epoch": 0.5401022826493701, "grad_norm": 0.0, "learning_rate": 9.188829309548768e-06, "loss": 0.9429, "step": 4330 }, { "epoch": 0.5402270175876263, "grad_norm": 0.0, "learning_rate": 9.184802575720664e-06, "loss": 0.9162, "step": 4331 }, { "epoch": 0.5403517525258825, "grad_norm": 0.0, "learning_rate": 9.180775974953325e-06, "loss": 0.9678, "step": 4332 }, { "epoch": 0.5404764874641387, "grad_norm": 0.0, "learning_rate": 9.176749507904002e-06, "loss": 0.9248, "step": 4333 }, { "epoch": 0.5406012224023949, "grad_norm": 0.0, "learning_rate": 9.172723175229915e-06, "loss": 0.9713, "step": 4334 }, { "epoch": 0.5407259573406511, "grad_norm": 0.0, "learning_rate": 9.168696977588256e-06, "loss": 0.9537, "step": 4335 }, { "epoch": 0.5408506922789074, "grad_norm": 0.0, "learning_rate": 9.164670915636214e-06, "loss": 0.9766, "step": 4336 }, { "epoch": 0.5409754272171635, "grad_norm": 0.0, "learning_rate": 9.160644990030932e-06, "loss": 0.9872, "step": 4337 }, { "epoch": 0.5411001621554198, "grad_norm": 0.0, "learning_rate": 9.156619201429551e-06, "loss": 0.9363, "step": 4338 }, { "epoch": 0.5412248970936759, "grad_norm": 0.0, "learning_rate": 9.152593550489178e-06, "loss": 0.8979, "step": 4339 }, { "epoch": 0.5413496320319321, "grad_norm": 0.0, "learning_rate": 9.148568037866898e-06, "loss": 0.9812, "step": 4340 }, { "epoch": 0.5414743669701884, "grad_norm": 0.0, "learning_rate": 9.14454266421978e-06, "loss": 0.9382, "step": 4341 }, { "epoch": 0.5415991019084445, "grad_norm": 0.0, "learning_rate": 9.140517430204864e-06, "loss": 0.9991, "step": 4342 }, { "epoch": 0.5417238368467008, "grad_norm": 0.0, "learning_rate": 9.136492336479172e-06, "loss": 0.9449, "step": 4343 }, { "epoch": 0.5418485717849569, "grad_norm": 0.0, "learning_rate": 9.132467383699702e-06, "loss": 0.9698, "step": 4344 }, { "epoch": 0.5419733067232132, "grad_norm": 0.0, "learning_rate": 9.128442572523418e-06, "loss": 0.9457, "step": 4345 }, { "epoch": 0.5420980416614694, "grad_norm": 0.0, "learning_rate": 9.124417903607286e-06, "loss": 0.9315, "step": 4346 }, { "epoch": 0.5422227765997256, "grad_norm": 0.0, "learning_rate": 9.120393377608219e-06, "loss": 0.9493, "step": 4347 }, { "epoch": 0.5423475115379818, "grad_norm": 0.0, "learning_rate": 9.116368995183132e-06, "loss": 0.9763, "step": 4348 }, { "epoch": 0.542472246476238, "grad_norm": 0.0, "learning_rate": 9.1123447569889e-06, "loss": 1.0093, "step": 4349 }, { "epoch": 0.5425969814144942, "grad_norm": 0.0, "learning_rate": 9.108320663682381e-06, "loss": 0.912, "step": 4350 }, { "epoch": 0.5427217163527505, "grad_norm": 0.0, "learning_rate": 9.10429671592041e-06, "loss": 0.9321, "step": 4351 }, { "epoch": 0.5428464512910066, "grad_norm": 0.0, "learning_rate": 9.100272914359793e-06, "loss": 0.9247, "step": 4352 }, { "epoch": 0.5429711862292628, "grad_norm": 0.0, "learning_rate": 9.096249259657322e-06, "loss": 0.8941, "step": 4353 }, { "epoch": 0.543095921167519, "grad_norm": 0.0, "learning_rate": 9.09222575246975e-06, "loss": 0.9674, "step": 4354 }, { "epoch": 0.5432206561057752, "grad_norm": 0.0, "learning_rate": 9.088202393453823e-06, "loss": 0.9368, "step": 4355 }, { "epoch": 0.5433453910440315, "grad_norm": 0.0, "learning_rate": 9.084179183266254e-06, "loss": 0.9525, "step": 4356 }, { "epoch": 0.5434701259822876, "grad_norm": 0.0, "learning_rate": 9.080156122563726e-06, "loss": 0.919, "step": 4357 }, { "epoch": 0.5435948609205439, "grad_norm": 0.0, "learning_rate": 9.07613321200291e-06, "loss": 0.9183, "step": 4358 }, { "epoch": 0.5437195958588, "grad_norm": 0.0, "learning_rate": 9.072110452240445e-06, "loss": 0.8995, "step": 4359 }, { "epoch": 0.5438443307970563, "grad_norm": 0.0, "learning_rate": 9.068087843932945e-06, "loss": 0.9379, "step": 4360 }, { "epoch": 0.5439690657353125, "grad_norm": 0.0, "learning_rate": 9.064065387737005e-06, "loss": 0.8884, "step": 4361 }, { "epoch": 0.5440938006735687, "grad_norm": 0.0, "learning_rate": 9.060043084309184e-06, "loss": 0.9546, "step": 4362 }, { "epoch": 0.5442185356118249, "grad_norm": 0.0, "learning_rate": 9.05602093430603e-06, "loss": 0.9132, "step": 4363 }, { "epoch": 0.544343270550081, "grad_norm": 0.0, "learning_rate": 9.051998938384055e-06, "loss": 0.925, "step": 4364 }, { "epoch": 0.5444680054883373, "grad_norm": 0.0, "learning_rate": 9.047977097199756e-06, "loss": 0.8782, "step": 4365 }, { "epoch": 0.5445927404265934, "grad_norm": 0.0, "learning_rate": 9.04395541140959e-06, "loss": 0.886, "step": 4366 }, { "epoch": 0.5447174753648497, "grad_norm": 0.0, "learning_rate": 9.039933881670008e-06, "loss": 0.9511, "step": 4367 }, { "epoch": 0.5448422103031059, "grad_norm": 0.0, "learning_rate": 9.035912508637419e-06, "loss": 0.9265, "step": 4368 }, { "epoch": 0.5449669452413621, "grad_norm": 0.0, "learning_rate": 9.03189129296821e-06, "loss": 0.9664, "step": 4369 }, { "epoch": 0.5450916801796183, "grad_norm": 0.0, "learning_rate": 9.027870235318751e-06, "loss": 0.9657, "step": 4370 }, { "epoch": 0.5452164151178746, "grad_norm": 0.0, "learning_rate": 9.023849336345371e-06, "loss": 0.9913, "step": 4371 }, { "epoch": 0.5453411500561307, "grad_norm": 0.0, "learning_rate": 9.019828596704394e-06, "loss": 0.9457, "step": 4372 }, { "epoch": 0.545465884994387, "grad_norm": 0.0, "learning_rate": 9.015808017052103e-06, "loss": 0.9243, "step": 4373 }, { "epoch": 0.5455906199326431, "grad_norm": 0.0, "learning_rate": 9.011787598044746e-06, "loss": 0.9233, "step": 4374 }, { "epoch": 0.5457153548708994, "grad_norm": 0.0, "learning_rate": 9.007767340338572e-06, "loss": 0.9859, "step": 4375 }, { "epoch": 0.5458400898091555, "grad_norm": 0.0, "learning_rate": 9.003747244589778e-06, "loss": 0.9579, "step": 4376 }, { "epoch": 0.5459648247474117, "grad_norm": 0.0, "learning_rate": 8.999727311454552e-06, "loss": 0.9429, "step": 4377 }, { "epoch": 0.546089559685668, "grad_norm": 0.0, "learning_rate": 8.995707541589045e-06, "loss": 0.9537, "step": 4378 }, { "epoch": 0.5462142946239241, "grad_norm": 0.0, "learning_rate": 8.991687935649385e-06, "loss": 0.946, "step": 4379 }, { "epoch": 0.5463390295621804, "grad_norm": 0.0, "learning_rate": 8.987668494291673e-06, "loss": 0.9217, "step": 4380 }, { "epoch": 0.5464637645004365, "grad_norm": 0.0, "learning_rate": 8.983649218171981e-06, "loss": 0.9821, "step": 4381 }, { "epoch": 0.5465884994386928, "grad_norm": 0.0, "learning_rate": 8.979630107946362e-06, "loss": 0.9184, "step": 4382 }, { "epoch": 0.546713234376949, "grad_norm": 0.0, "learning_rate": 8.975611164270832e-06, "loss": 0.9465, "step": 4383 }, { "epoch": 0.5468379693152052, "grad_norm": 0.0, "learning_rate": 8.971592387801384e-06, "loss": 0.9021, "step": 4384 }, { "epoch": 0.5469627042534614, "grad_norm": 0.0, "learning_rate": 8.967573779193987e-06, "loss": 0.9368, "step": 4385 }, { "epoch": 0.5470874391917177, "grad_norm": 0.0, "learning_rate": 8.963555339104574e-06, "loss": 0.9154, "step": 4386 }, { "epoch": 0.5472121741299738, "grad_norm": 0.0, "learning_rate": 8.95953706818906e-06, "loss": 0.8944, "step": 4387 }, { "epoch": 0.54733690906823, "grad_norm": 0.0, "learning_rate": 8.955518967103324e-06, "loss": 0.9611, "step": 4388 }, { "epoch": 0.5474616440064862, "grad_norm": 0.0, "learning_rate": 8.951501036503225e-06, "loss": 0.9338, "step": 4389 }, { "epoch": 0.5475863789447424, "grad_norm": 0.0, "learning_rate": 8.947483277044593e-06, "loss": 0.9219, "step": 4390 }, { "epoch": 0.5477111138829986, "grad_norm": 0.0, "learning_rate": 8.943465689383215e-06, "loss": 0.9944, "step": 4391 }, { "epoch": 0.5478358488212548, "grad_norm": 0.0, "learning_rate": 8.93944827417488e-06, "loss": 0.953, "step": 4392 }, { "epoch": 0.5479605837595111, "grad_norm": 0.0, "learning_rate": 8.935431032075317e-06, "loss": 0.9475, "step": 4393 }, { "epoch": 0.5480853186977672, "grad_norm": 0.0, "learning_rate": 8.931413963740251e-06, "loss": 0.9708, "step": 4394 }, { "epoch": 0.5482100536360235, "grad_norm": 0.0, "learning_rate": 8.927397069825366e-06, "loss": 0.982, "step": 4395 }, { "epoch": 0.5483347885742796, "grad_norm": 0.0, "learning_rate": 8.923380350986317e-06, "loss": 0.9641, "step": 4396 }, { "epoch": 0.5484595235125359, "grad_norm": 0.0, "learning_rate": 8.919363807878738e-06, "loss": 0.907, "step": 4397 }, { "epoch": 0.5485842584507921, "grad_norm": 0.0, "learning_rate": 8.915347441158223e-06, "loss": 0.9182, "step": 4398 }, { "epoch": 0.5487089933890483, "grad_norm": 0.0, "learning_rate": 8.911331251480357e-06, "loss": 0.976, "step": 4399 }, { "epoch": 0.5488337283273045, "grad_norm": 0.0, "learning_rate": 8.90731523950067e-06, "loss": 0.9562, "step": 4400 }, { "epoch": 0.5489584632655606, "grad_norm": 0.0, "learning_rate": 8.903299405874685e-06, "loss": 0.9573, "step": 4401 }, { "epoch": 0.5490831982038169, "grad_norm": 0.0, "learning_rate": 8.899283751257885e-06, "loss": 0.9833, "step": 4402 }, { "epoch": 0.5492079331420731, "grad_norm": 0.0, "learning_rate": 8.895268276305724e-06, "loss": 0.927, "step": 4403 }, { "epoch": 0.5493326680803293, "grad_norm": 0.0, "learning_rate": 8.891252981673633e-06, "loss": 0.9845, "step": 4404 }, { "epoch": 0.5494574030185855, "grad_norm": 0.0, "learning_rate": 8.887237868017004e-06, "loss": 0.964, "step": 4405 }, { "epoch": 0.5495821379568417, "grad_norm": 0.0, "learning_rate": 8.88322293599121e-06, "loss": 0.9446, "step": 4406 }, { "epoch": 0.5497068728950979, "grad_norm": 0.0, "learning_rate": 8.879208186251586e-06, "loss": 0.8941, "step": 4407 }, { "epoch": 0.5498316078333542, "grad_norm": 0.0, "learning_rate": 8.875193619453438e-06, "loss": 0.9326, "step": 4408 }, { "epoch": 0.5499563427716103, "grad_norm": 0.0, "learning_rate": 8.871179236252053e-06, "loss": 0.923, "step": 4409 }, { "epoch": 0.5500810777098666, "grad_norm": 0.0, "learning_rate": 8.867165037302667e-06, "loss": 0.9323, "step": 4410 }, { "epoch": 0.5502058126481227, "grad_norm": 0.0, "learning_rate": 8.863151023260512e-06, "loss": 0.9415, "step": 4411 }, { "epoch": 0.5503305475863789, "grad_norm": 0.0, "learning_rate": 8.859137194780768e-06, "loss": 0.9488, "step": 4412 }, { "epoch": 0.5504552825246352, "grad_norm": 0.0, "learning_rate": 8.855123552518593e-06, "loss": 0.9297, "step": 4413 }, { "epoch": 0.5505800174628913, "grad_norm": 0.0, "learning_rate": 8.851110097129119e-06, "loss": 0.948, "step": 4414 }, { "epoch": 0.5507047524011476, "grad_norm": 0.0, "learning_rate": 8.847096829267437e-06, "loss": 0.9055, "step": 4415 }, { "epoch": 0.5508294873394037, "grad_norm": 0.0, "learning_rate": 8.84308374958862e-06, "loss": 0.9479, "step": 4416 }, { "epoch": 0.55095422227766, "grad_norm": 0.0, "learning_rate": 8.839070858747697e-06, "loss": 0.9504, "step": 4417 }, { "epoch": 0.5510789572159162, "grad_norm": 0.0, "learning_rate": 8.83505815739968e-06, "loss": 0.9267, "step": 4418 }, { "epoch": 0.5512036921541724, "grad_norm": 0.0, "learning_rate": 8.831045646199544e-06, "loss": 0.9939, "step": 4419 }, { "epoch": 0.5513284270924286, "grad_norm": 0.0, "learning_rate": 8.827033325802218e-06, "loss": 0.8871, "step": 4420 }, { "epoch": 0.5514531620306848, "grad_norm": 0.0, "learning_rate": 8.82302119686263e-06, "loss": 0.8951, "step": 4421 }, { "epoch": 0.551577896968941, "grad_norm": 0.0, "learning_rate": 8.819009260035652e-06, "loss": 0.9508, "step": 4422 }, { "epoch": 0.5517026319071973, "grad_norm": 0.0, "learning_rate": 8.814997515976137e-06, "loss": 0.961, "step": 4423 }, { "epoch": 0.5518273668454534, "grad_norm": 0.0, "learning_rate": 8.810985965338901e-06, "loss": 0.9696, "step": 4424 }, { "epoch": 0.5519521017837096, "grad_norm": 0.0, "learning_rate": 8.806974608778728e-06, "loss": 0.9518, "step": 4425 }, { "epoch": 0.5520768367219658, "grad_norm": 0.0, "learning_rate": 8.802963446950378e-06, "loss": 0.8907, "step": 4426 }, { "epoch": 0.552201571660222, "grad_norm": 0.0, "learning_rate": 8.798952480508564e-06, "loss": 0.9745, "step": 4427 }, { "epoch": 0.5523263065984783, "grad_norm": 0.0, "learning_rate": 8.794941710107991e-06, "loss": 0.9348, "step": 4428 }, { "epoch": 0.5524510415367344, "grad_norm": 0.0, "learning_rate": 8.79093113640331e-06, "loss": 0.9979, "step": 4429 }, { "epoch": 0.5525757764749907, "grad_norm": 0.0, "learning_rate": 8.786920760049142e-06, "loss": 0.9131, "step": 4430 }, { "epoch": 0.5527005114132468, "grad_norm": 0.0, "learning_rate": 8.782910581700093e-06, "loss": 0.9649, "step": 4431 }, { "epoch": 0.5528252463515031, "grad_norm": 0.0, "learning_rate": 8.778900602010715e-06, "loss": 0.972, "step": 4432 }, { "epoch": 0.5529499812897593, "grad_norm": 0.0, "learning_rate": 8.774890821635548e-06, "loss": 0.9317, "step": 4433 }, { "epoch": 0.5530747162280155, "grad_norm": 0.0, "learning_rate": 8.770881241229079e-06, "loss": 0.9288, "step": 4434 }, { "epoch": 0.5531994511662717, "grad_norm": 0.0, "learning_rate": 8.76687186144578e-06, "loss": 0.9626, "step": 4435 }, { "epoch": 0.5533241861045278, "grad_norm": 0.0, "learning_rate": 8.762862682940082e-06, "loss": 0.8851, "step": 4436 }, { "epoch": 0.5534489210427841, "grad_norm": 0.0, "learning_rate": 8.758853706366375e-06, "loss": 0.9554, "step": 4437 }, { "epoch": 0.5535736559810402, "grad_norm": 0.0, "learning_rate": 8.754844932379038e-06, "loss": 0.9778, "step": 4438 }, { "epoch": 0.5536983909192965, "grad_norm": 0.0, "learning_rate": 8.750836361632398e-06, "loss": 0.9265, "step": 4439 }, { "epoch": 0.5538231258575527, "grad_norm": 0.0, "learning_rate": 8.746827994780755e-06, "loss": 0.9776, "step": 4440 }, { "epoch": 0.5539478607958089, "grad_norm": 0.0, "learning_rate": 8.742819832478376e-06, "loss": 0.9061, "step": 4441 }, { "epoch": 0.5540725957340651, "grad_norm": 0.0, "learning_rate": 8.738811875379491e-06, "loss": 0.9463, "step": 4442 }, { "epoch": 0.5541973306723214, "grad_norm": 0.0, "learning_rate": 8.734804124138303e-06, "loss": 0.9334, "step": 4443 }, { "epoch": 0.5543220656105775, "grad_norm": 0.0, "learning_rate": 8.730796579408976e-06, "loss": 0.9494, "step": 4444 }, { "epoch": 0.5544468005488338, "grad_norm": 0.0, "learning_rate": 8.72678924184565e-06, "loss": 0.9605, "step": 4445 }, { "epoch": 0.5545715354870899, "grad_norm": 0.0, "learning_rate": 8.722782112102413e-06, "loss": 0.9458, "step": 4446 }, { "epoch": 0.5546962704253462, "grad_norm": 0.0, "learning_rate": 8.71877519083333e-06, "loss": 0.9312, "step": 4447 }, { "epoch": 0.5548210053636023, "grad_norm": 0.0, "learning_rate": 8.714768478692441e-06, "loss": 0.9468, "step": 4448 }, { "epoch": 0.5549457403018585, "grad_norm": 0.0, "learning_rate": 8.710761976333734e-06, "loss": 0.9237, "step": 4449 }, { "epoch": 0.5550704752401148, "grad_norm": 0.0, "learning_rate": 8.706755684411176e-06, "loss": 0.9448, "step": 4450 }, { "epoch": 0.5551952101783709, "grad_norm": 0.0, "learning_rate": 8.702749603578691e-06, "loss": 0.8897, "step": 4451 }, { "epoch": 0.5553199451166272, "grad_norm": 0.0, "learning_rate": 8.698743734490174e-06, "loss": 0.9253, "step": 4452 }, { "epoch": 0.5554446800548833, "grad_norm": 0.0, "learning_rate": 8.694738077799487e-06, "loss": 0.8929, "step": 4453 }, { "epoch": 0.5555694149931396, "grad_norm": 0.0, "learning_rate": 8.690732634160446e-06, "loss": 0.9496, "step": 4454 }, { "epoch": 0.5556941499313958, "grad_norm": 0.0, "learning_rate": 8.686727404226852e-06, "loss": 0.9638, "step": 4455 }, { "epoch": 0.555818884869652, "grad_norm": 0.0, "learning_rate": 8.682722388652446e-06, "loss": 0.9502, "step": 4456 }, { "epoch": 0.5559436198079082, "grad_norm": 0.0, "learning_rate": 8.678717588090959e-06, "loss": 0.9354, "step": 4457 }, { "epoch": 0.5560683547461645, "grad_norm": 0.0, "learning_rate": 8.674713003196071e-06, "loss": 0.9446, "step": 4458 }, { "epoch": 0.5561930896844206, "grad_norm": 0.0, "learning_rate": 8.67070863462143e-06, "loss": 0.9843, "step": 4459 }, { "epoch": 0.5563178246226768, "grad_norm": 0.0, "learning_rate": 8.666704483020652e-06, "loss": 0.936, "step": 4460 }, { "epoch": 0.556442559560933, "grad_norm": 0.0, "learning_rate": 8.662700549047313e-06, "loss": 0.9872, "step": 4461 }, { "epoch": 0.5565672944991892, "grad_norm": 0.0, "learning_rate": 8.658696833354959e-06, "loss": 0.9621, "step": 4462 }, { "epoch": 0.5566920294374454, "grad_norm": 0.0, "learning_rate": 8.654693336597096e-06, "loss": 0.9365, "step": 4463 }, { "epoch": 0.5568167643757016, "grad_norm": 0.0, "learning_rate": 8.650690059427192e-06, "loss": 0.9683, "step": 4464 }, { "epoch": 0.5569414993139579, "grad_norm": 0.0, "learning_rate": 8.646687002498692e-06, "loss": 0.9937, "step": 4465 }, { "epoch": 0.557066234252214, "grad_norm": 0.0, "learning_rate": 8.642684166464984e-06, "loss": 0.8877, "step": 4466 }, { "epoch": 0.5571909691904703, "grad_norm": 0.0, "learning_rate": 8.638681551979442e-06, "loss": 0.9705, "step": 4467 }, { "epoch": 0.5573157041287264, "grad_norm": 0.0, "learning_rate": 8.634679159695387e-06, "loss": 0.9457, "step": 4468 }, { "epoch": 0.5574404390669827, "grad_norm": 0.0, "learning_rate": 8.630676990266115e-06, "loss": 0.9968, "step": 4469 }, { "epoch": 0.5575651740052389, "grad_norm": 0.0, "learning_rate": 8.626675044344878e-06, "loss": 0.9441, "step": 4470 }, { "epoch": 0.5576899089434951, "grad_norm": 0.0, "learning_rate": 8.622673322584894e-06, "loss": 0.911, "step": 4471 }, { "epoch": 0.5578146438817513, "grad_norm": 0.0, "learning_rate": 8.618671825639348e-06, "loss": 0.9293, "step": 4472 }, { "epoch": 0.5579393788200074, "grad_norm": 0.0, "learning_rate": 8.614670554161377e-06, "loss": 0.9506, "step": 4473 }, { "epoch": 0.5580641137582637, "grad_norm": 0.0, "learning_rate": 8.610669508804101e-06, "loss": 0.9925, "step": 4474 }, { "epoch": 0.5581888486965199, "grad_norm": 0.0, "learning_rate": 8.606668690220588e-06, "loss": 0.9176, "step": 4475 }, { "epoch": 0.5583135836347761, "grad_norm": 0.0, "learning_rate": 8.602668099063865e-06, "loss": 0.9493, "step": 4476 }, { "epoch": 0.5584383185730323, "grad_norm": 0.0, "learning_rate": 8.598667735986939e-06, "loss": 1.0171, "step": 4477 }, { "epoch": 0.5585630535112885, "grad_norm": 0.0, "learning_rate": 8.594667601642763e-06, "loss": 0.935, "step": 4478 }, { "epoch": 0.5586877884495447, "grad_norm": 0.0, "learning_rate": 8.590667696684266e-06, "loss": 0.9462, "step": 4479 }, { "epoch": 0.558812523387801, "grad_norm": 0.0, "learning_rate": 8.586668021764328e-06, "loss": 0.9436, "step": 4480 }, { "epoch": 0.5589372583260571, "grad_norm": 0.0, "learning_rate": 8.582668577535797e-06, "loss": 0.9308, "step": 4481 }, { "epoch": 0.5590619932643134, "grad_norm": 0.0, "learning_rate": 8.578669364651489e-06, "loss": 0.9745, "step": 4482 }, { "epoch": 0.5591867282025695, "grad_norm": 0.0, "learning_rate": 8.574670383764166e-06, "loss": 0.9036, "step": 4483 }, { "epoch": 0.5593114631408257, "grad_norm": 0.0, "learning_rate": 8.570671635526572e-06, "loss": 0.9576, "step": 4484 }, { "epoch": 0.559436198079082, "grad_norm": 0.0, "learning_rate": 8.5666731205914e-06, "loss": 0.9263, "step": 4485 }, { "epoch": 0.5595609330173381, "grad_norm": 0.0, "learning_rate": 8.562674839611311e-06, "loss": 0.9935, "step": 4486 }, { "epoch": 0.5596856679555944, "grad_norm": 0.0, "learning_rate": 8.558676793238922e-06, "loss": 0.9481, "step": 4487 }, { "epoch": 0.5598104028938505, "grad_norm": 0.0, "learning_rate": 8.554678982126814e-06, "loss": 0.951, "step": 4488 }, { "epoch": 0.5599351378321068, "grad_norm": 0.0, "learning_rate": 8.550681406927534e-06, "loss": 0.9467, "step": 4489 }, { "epoch": 0.560059872770363, "grad_norm": 0.0, "learning_rate": 8.546684068293583e-06, "loss": 0.925, "step": 4490 }, { "epoch": 0.5601846077086192, "grad_norm": 0.0, "learning_rate": 8.542686966877435e-06, "loss": 0.9433, "step": 4491 }, { "epoch": 0.5603093426468754, "grad_norm": 0.0, "learning_rate": 8.53869010333151e-06, "loss": 0.9449, "step": 4492 }, { "epoch": 0.5604340775851316, "grad_norm": 0.0, "learning_rate": 8.534693478308196e-06, "loss": 0.883, "step": 4493 }, { "epoch": 0.5605588125233878, "grad_norm": 0.0, "learning_rate": 8.530697092459851e-06, "loss": 0.9615, "step": 4494 }, { "epoch": 0.5606835474616441, "grad_norm": 0.0, "learning_rate": 8.52670094643878e-06, "loss": 0.956, "step": 4495 }, { "epoch": 0.5608082823999002, "grad_norm": 0.0, "learning_rate": 8.522705040897257e-06, "loss": 0.9292, "step": 4496 }, { "epoch": 0.5609330173381564, "grad_norm": 0.0, "learning_rate": 8.518709376487515e-06, "loss": 0.9648, "step": 4497 }, { "epoch": 0.5610577522764126, "grad_norm": 0.0, "learning_rate": 8.514713953861743e-06, "loss": 0.9091, "step": 4498 }, { "epoch": 0.5611824872146688, "grad_norm": 0.0, "learning_rate": 8.510718773672099e-06, "loss": 0.9552, "step": 4499 }, { "epoch": 0.5613072221529251, "grad_norm": 0.0, "learning_rate": 8.506723836570692e-06, "loss": 0.9333, "step": 4500 }, { "epoch": 0.5614319570911812, "grad_norm": 0.0, "learning_rate": 8.502729143209605e-06, "loss": 0.9448, "step": 4501 }, { "epoch": 0.5615566920294375, "grad_norm": 0.0, "learning_rate": 8.498734694240869e-06, "loss": 0.9329, "step": 4502 }, { "epoch": 0.5616814269676936, "grad_norm": 0.0, "learning_rate": 8.494740490316471e-06, "loss": 0.9264, "step": 4503 }, { "epoch": 0.5618061619059499, "grad_norm": 0.0, "learning_rate": 8.490746532088378e-06, "loss": 0.8917, "step": 4504 }, { "epoch": 0.561930896844206, "grad_norm": 0.0, "learning_rate": 8.486752820208496e-06, "loss": 0.9819, "step": 4505 }, { "epoch": 0.5620556317824623, "grad_norm": 0.0, "learning_rate": 8.482759355328704e-06, "loss": 0.9836, "step": 4506 }, { "epoch": 0.5621803667207185, "grad_norm": 0.0, "learning_rate": 8.478766138100834e-06, "loss": 0.9706, "step": 4507 }, { "epoch": 0.5623051016589746, "grad_norm": 0.0, "learning_rate": 8.474773169176683e-06, "loss": 0.9146, "step": 4508 }, { "epoch": 0.5624298365972309, "grad_norm": 0.0, "learning_rate": 8.470780449208e-06, "loss": 0.9721, "step": 4509 }, { "epoch": 0.562554571535487, "grad_norm": 0.0, "learning_rate": 8.466787978846493e-06, "loss": 0.8869, "step": 4510 }, { "epoch": 0.5626793064737433, "grad_norm": 0.0, "learning_rate": 8.462795758743846e-06, "loss": 0.9053, "step": 4511 }, { "epoch": 0.5628040414119995, "grad_norm": 0.0, "learning_rate": 8.458803789551683e-06, "loss": 0.9066, "step": 4512 }, { "epoch": 0.5629287763502557, "grad_norm": 0.0, "learning_rate": 8.454812071921597e-06, "loss": 0.9282, "step": 4513 }, { "epoch": 0.5630535112885119, "grad_norm": 0.0, "learning_rate": 8.450820606505133e-06, "loss": 0.8972, "step": 4514 }, { "epoch": 0.5631782462267682, "grad_norm": 0.0, "learning_rate": 8.4468293939538e-06, "loss": 0.9553, "step": 4515 }, { "epoch": 0.5633029811650243, "grad_norm": 0.0, "learning_rate": 8.442838434919066e-06, "loss": 0.9291, "step": 4516 }, { "epoch": 0.5634277161032806, "grad_norm": 0.0, "learning_rate": 8.438847730052352e-06, "loss": 0.9078, "step": 4517 }, { "epoch": 0.5635524510415367, "grad_norm": 0.0, "learning_rate": 8.43485728000505e-06, "loss": 0.9388, "step": 4518 }, { "epoch": 0.563677185979793, "grad_norm": 0.0, "learning_rate": 8.430867085428496e-06, "loss": 0.8979, "step": 4519 }, { "epoch": 0.5638019209180491, "grad_norm": 0.0, "learning_rate": 8.426877146973987e-06, "loss": 0.9298, "step": 4520 }, { "epoch": 0.5639266558563053, "grad_norm": 0.0, "learning_rate": 8.422887465292789e-06, "loss": 0.8972, "step": 4521 }, { "epoch": 0.5640513907945616, "grad_norm": 0.0, "learning_rate": 8.418898041036115e-06, "loss": 0.9743, "step": 4522 }, { "epoch": 0.5641761257328177, "grad_norm": 0.0, "learning_rate": 8.414908874855142e-06, "loss": 1.0071, "step": 4523 }, { "epoch": 0.564300860671074, "grad_norm": 0.0, "learning_rate": 8.410919967400996e-06, "loss": 0.9045, "step": 4524 }, { "epoch": 0.5644255956093301, "grad_norm": 0.0, "learning_rate": 8.406931319324776e-06, "loss": 0.8844, "step": 4525 }, { "epoch": 0.5645503305475864, "grad_norm": 0.0, "learning_rate": 8.402942931277525e-06, "loss": 0.9738, "step": 4526 }, { "epoch": 0.5646750654858426, "grad_norm": 0.0, "learning_rate": 8.398954803910245e-06, "loss": 0.9846, "step": 4527 }, { "epoch": 0.5647998004240988, "grad_norm": 0.0, "learning_rate": 8.39496693787391e-06, "loss": 0.8857, "step": 4528 }, { "epoch": 0.564924535362355, "grad_norm": 0.0, "learning_rate": 8.390979333819427e-06, "loss": 0.9429, "step": 4529 }, { "epoch": 0.5650492703006113, "grad_norm": 0.0, "learning_rate": 8.386991992397684e-06, "loss": 0.9242, "step": 4530 }, { "epoch": 0.5651740052388674, "grad_norm": 0.0, "learning_rate": 8.383004914259511e-06, "loss": 0.9515, "step": 4531 }, { "epoch": 0.5652987401771236, "grad_norm": 0.0, "learning_rate": 8.379018100055697e-06, "loss": 0.93, "step": 4532 }, { "epoch": 0.5654234751153798, "grad_norm": 0.0, "learning_rate": 8.375031550437e-06, "loss": 0.883, "step": 4533 }, { "epoch": 0.565548210053636, "grad_norm": 0.0, "learning_rate": 8.371045266054114e-06, "loss": 0.9259, "step": 4534 }, { "epoch": 0.5656729449918922, "grad_norm": 0.0, "learning_rate": 8.367059247557708e-06, "loss": 0.9507, "step": 4535 }, { "epoch": 0.5657976799301484, "grad_norm": 0.0, "learning_rate": 8.3630734955984e-06, "loss": 0.9372, "step": 4536 }, { "epoch": 0.5659224148684047, "grad_norm": 0.0, "learning_rate": 8.35908801082676e-06, "loss": 0.9851, "step": 4537 }, { "epoch": 0.5660471498066608, "grad_norm": 0.0, "learning_rate": 8.35510279389333e-06, "loss": 0.9031, "step": 4538 }, { "epoch": 0.5661718847449171, "grad_norm": 0.0, "learning_rate": 8.351117845448586e-06, "loss": 1.0056, "step": 4539 }, { "epoch": 0.5662966196831732, "grad_norm": 0.0, "learning_rate": 8.34713316614298e-06, "loss": 0.904, "step": 4540 }, { "epoch": 0.5664213546214295, "grad_norm": 0.0, "learning_rate": 8.34314875662691e-06, "loss": 0.9614, "step": 4541 }, { "epoch": 0.5665460895596857, "grad_norm": 0.0, "learning_rate": 8.339164617550732e-06, "loss": 0.9234, "step": 4542 }, { "epoch": 0.5666708244979419, "grad_norm": 0.0, "learning_rate": 8.335180749564759e-06, "loss": 0.944, "step": 4543 }, { "epoch": 0.5667955594361981, "grad_norm": 0.0, "learning_rate": 8.331197153319256e-06, "loss": 0.9401, "step": 4544 }, { "epoch": 0.5669202943744542, "grad_norm": 0.0, "learning_rate": 8.327213829464448e-06, "loss": 0.9148, "step": 4545 }, { "epoch": 0.5670450293127105, "grad_norm": 0.0, "learning_rate": 8.323230778650511e-06, "loss": 0.9448, "step": 4546 }, { "epoch": 0.5671697642509667, "grad_norm": 0.0, "learning_rate": 8.319248001527586e-06, "loss": 0.924, "step": 4547 }, { "epoch": 0.5672944991892229, "grad_norm": 0.0, "learning_rate": 8.315265498745762e-06, "loss": 0.9891, "step": 4548 }, { "epoch": 0.5674192341274791, "grad_norm": 0.0, "learning_rate": 8.311283270955072e-06, "loss": 0.9398, "step": 4549 }, { "epoch": 0.5675439690657353, "grad_norm": 0.0, "learning_rate": 8.307301318805531e-06, "loss": 1.0083, "step": 4550 }, { "epoch": 0.5676687040039915, "grad_norm": 0.0, "learning_rate": 8.303319642947083e-06, "loss": 0.9258, "step": 4551 }, { "epoch": 0.5677934389422478, "grad_norm": 0.0, "learning_rate": 8.299338244029646e-06, "loss": 0.9249, "step": 4552 }, { "epoch": 0.5679181738805039, "grad_norm": 0.0, "learning_rate": 8.295357122703081e-06, "loss": 0.9261, "step": 4553 }, { "epoch": 0.5680429088187602, "grad_norm": 0.0, "learning_rate": 8.291376279617203e-06, "loss": 0.9784, "step": 4554 }, { "epoch": 0.5681676437570163, "grad_norm": 0.0, "learning_rate": 8.287395715421796e-06, "loss": 0.9206, "step": 4555 }, { "epoch": 0.5682923786952725, "grad_norm": 0.0, "learning_rate": 8.283415430766576e-06, "loss": 0.9945, "step": 4556 }, { "epoch": 0.5684171136335288, "grad_norm": 0.0, "learning_rate": 8.279435426301238e-06, "loss": 0.9407, "step": 4557 }, { "epoch": 0.5685418485717849, "grad_norm": 0.0, "learning_rate": 8.27545570267541e-06, "loss": 1.0006, "step": 4558 }, { "epoch": 0.5686665835100412, "grad_norm": 0.0, "learning_rate": 8.27147626053869e-06, "loss": 0.9517, "step": 4559 }, { "epoch": 0.5687913184482973, "grad_norm": 0.0, "learning_rate": 8.26749710054062e-06, "loss": 1.0103, "step": 4560 }, { "epoch": 0.5689160533865536, "grad_norm": 0.0, "learning_rate": 8.263518223330698e-06, "loss": 0.9287, "step": 4561 }, { "epoch": 0.5690407883248098, "grad_norm": 0.0, "learning_rate": 8.25953962955838e-06, "loss": 0.9389, "step": 4562 }, { "epoch": 0.569165523263066, "grad_norm": 0.0, "learning_rate": 8.255561319873066e-06, "loss": 0.9494, "step": 4563 }, { "epoch": 0.5692902582013222, "grad_norm": 0.0, "learning_rate": 8.251583294924129e-06, "loss": 0.9696, "step": 4564 }, { "epoch": 0.5694149931395784, "grad_norm": 0.0, "learning_rate": 8.247605555360873e-06, "loss": 0.9305, "step": 4565 }, { "epoch": 0.5695397280778346, "grad_norm": 0.0, "learning_rate": 8.243628101832564e-06, "loss": 0.9147, "step": 4566 }, { "epoch": 0.5696644630160909, "grad_norm": 0.0, "learning_rate": 8.239650934988432e-06, "loss": 0.9635, "step": 4567 }, { "epoch": 0.569789197954347, "grad_norm": 0.0, "learning_rate": 8.235674055477641e-06, "loss": 0.8758, "step": 4568 }, { "epoch": 0.5699139328926032, "grad_norm": 0.0, "learning_rate": 8.231697463949327e-06, "loss": 0.9161, "step": 4569 }, { "epoch": 0.5700386678308594, "grad_norm": 0.0, "learning_rate": 8.227721161052564e-06, "loss": 0.9156, "step": 4570 }, { "epoch": 0.5701634027691156, "grad_norm": 0.0, "learning_rate": 8.223745147436385e-06, "loss": 0.9769, "step": 4571 }, { "epoch": 0.5702881377073719, "grad_norm": 0.0, "learning_rate": 8.21976942374978e-06, "loss": 0.9342, "step": 4572 }, { "epoch": 0.570412872645628, "grad_norm": 0.0, "learning_rate": 8.215793990641679e-06, "loss": 0.9666, "step": 4573 }, { "epoch": 0.5705376075838843, "grad_norm": 0.0, "learning_rate": 8.211818848760983e-06, "loss": 0.9431, "step": 4574 }, { "epoch": 0.5706623425221404, "grad_norm": 0.0, "learning_rate": 8.207843998756527e-06, "loss": 0.9115, "step": 4575 }, { "epoch": 0.5707870774603967, "grad_norm": 0.0, "learning_rate": 8.203869441277114e-06, "loss": 0.9266, "step": 4576 }, { "epoch": 0.5709118123986529, "grad_norm": 0.0, "learning_rate": 8.199895176971489e-06, "loss": 1.0261, "step": 4577 }, { "epoch": 0.5710365473369091, "grad_norm": 0.0, "learning_rate": 8.195921206488348e-06, "loss": 0.9282, "step": 4578 }, { "epoch": 0.5711612822751653, "grad_norm": 0.0, "learning_rate": 8.191947530476349e-06, "loss": 0.889, "step": 4579 }, { "epoch": 0.5712860172134214, "grad_norm": 0.0, "learning_rate": 8.187974149584093e-06, "loss": 0.9417, "step": 4580 }, { "epoch": 0.5714107521516777, "grad_norm": 0.0, "learning_rate": 8.184001064460137e-06, "loss": 0.9101, "step": 4581 }, { "epoch": 0.5715354870899338, "grad_norm": 0.0, "learning_rate": 8.180028275752988e-06, "loss": 0.9237, "step": 4582 }, { "epoch": 0.5716602220281901, "grad_norm": 0.0, "learning_rate": 8.176055784111103e-06, "loss": 1.0013, "step": 4583 }, { "epoch": 0.5717849569664463, "grad_norm": 0.0, "learning_rate": 8.172083590182902e-06, "loss": 0.9881, "step": 4584 }, { "epoch": 0.5719096919047025, "grad_norm": 0.0, "learning_rate": 8.168111694616733e-06, "loss": 0.9656, "step": 4585 }, { "epoch": 0.5720344268429587, "grad_norm": 0.0, "learning_rate": 8.164140098060923e-06, "loss": 0.9267, "step": 4586 }, { "epoch": 0.572159161781215, "grad_norm": 0.0, "learning_rate": 8.160168801163729e-06, "loss": 0.9723, "step": 4587 }, { "epoch": 0.5722838967194711, "grad_norm": 0.0, "learning_rate": 8.156197804573368e-06, "loss": 0.9097, "step": 4588 }, { "epoch": 0.5724086316577274, "grad_norm": 0.0, "learning_rate": 8.152227108938009e-06, "loss": 0.9404, "step": 4589 }, { "epoch": 0.5725333665959835, "grad_norm": 0.0, "learning_rate": 8.148256714905764e-06, "loss": 0.9327, "step": 4590 }, { "epoch": 0.5726581015342398, "grad_norm": 0.0, "learning_rate": 8.144286623124708e-06, "loss": 0.899, "step": 4591 }, { "epoch": 0.572782836472496, "grad_norm": 0.0, "learning_rate": 8.140316834242854e-06, "loss": 0.9143, "step": 4592 }, { "epoch": 0.5729075714107521, "grad_norm": 0.0, "learning_rate": 8.13634734890818e-06, "loss": 0.9937, "step": 4593 }, { "epoch": 0.5730323063490084, "grad_norm": 0.0, "learning_rate": 8.1323781677686e-06, "loss": 0.9675, "step": 4594 }, { "epoch": 0.5731570412872645, "grad_norm": 0.0, "learning_rate": 8.128409291471984e-06, "loss": 0.9542, "step": 4595 }, { "epoch": 0.5732817762255208, "grad_norm": 0.0, "learning_rate": 8.124440720666156e-06, "loss": 0.9324, "step": 4596 }, { "epoch": 0.5734065111637769, "grad_norm": 0.0, "learning_rate": 8.120472455998882e-06, "loss": 0.9923, "step": 4597 }, { "epoch": 0.5735312461020332, "grad_norm": 0.0, "learning_rate": 8.11650449811789e-06, "loss": 0.9463, "step": 4598 }, { "epoch": 0.5736559810402894, "grad_norm": 0.0, "learning_rate": 8.112536847670844e-06, "loss": 0.9743, "step": 4599 }, { "epoch": 0.5737807159785456, "grad_norm": 0.0, "learning_rate": 8.108569505305366e-06, "loss": 0.9724, "step": 4600 }, { "epoch": 0.5739054509168018, "grad_norm": 0.0, "learning_rate": 8.10460247166903e-06, "loss": 0.8949, "step": 4601 }, { "epoch": 0.574030185855058, "grad_norm": 0.0, "learning_rate": 8.100635747409347e-06, "loss": 0.9439, "step": 4602 }, { "epoch": 0.5741549207933142, "grad_norm": 0.0, "learning_rate": 8.0966693331738e-06, "loss": 0.9413, "step": 4603 }, { "epoch": 0.5742796557315704, "grad_norm": 0.0, "learning_rate": 8.0927032296098e-06, "loss": 0.9312, "step": 4604 }, { "epoch": 0.5744043906698266, "grad_norm": 0.0, "learning_rate": 8.088737437364712e-06, "loss": 0.9263, "step": 4605 }, { "epoch": 0.5745291256080828, "grad_norm": 0.0, "learning_rate": 8.08477195708586e-06, "loss": 0.9498, "step": 4606 }, { "epoch": 0.574653860546339, "grad_norm": 0.0, "learning_rate": 8.080806789420504e-06, "loss": 0.931, "step": 4607 }, { "epoch": 0.5747785954845952, "grad_norm": 0.0, "learning_rate": 8.076841935015866e-06, "loss": 0.9985, "step": 4608 }, { "epoch": 0.5749033304228515, "grad_norm": 0.0, "learning_rate": 8.072877394519103e-06, "loss": 0.9688, "step": 4609 }, { "epoch": 0.5750280653611076, "grad_norm": 0.0, "learning_rate": 8.068913168577336e-06, "loss": 0.9332, "step": 4610 }, { "epoch": 0.5751528002993639, "grad_norm": 0.0, "learning_rate": 8.064949257837622e-06, "loss": 0.9158, "step": 4611 }, { "epoch": 0.57527753523762, "grad_norm": 0.0, "learning_rate": 8.060985662946967e-06, "loss": 0.8925, "step": 4612 }, { "epoch": 0.5754022701758763, "grad_norm": 0.0, "learning_rate": 8.057022384552336e-06, "loss": 0.8866, "step": 4613 }, { "epoch": 0.5755270051141325, "grad_norm": 0.0, "learning_rate": 8.053059423300635e-06, "loss": 0.9282, "step": 4614 }, { "epoch": 0.5756517400523887, "grad_norm": 0.0, "learning_rate": 8.04909677983872e-06, "loss": 0.9357, "step": 4615 }, { "epoch": 0.5757764749906449, "grad_norm": 0.0, "learning_rate": 8.04513445481339e-06, "loss": 0.8948, "step": 4616 }, { "epoch": 0.575901209928901, "grad_norm": 0.0, "learning_rate": 8.041172448871399e-06, "loss": 0.8618, "step": 4617 }, { "epoch": 0.5760259448671573, "grad_norm": 0.0, "learning_rate": 8.037210762659447e-06, "loss": 0.94, "step": 4618 }, { "epoch": 0.5761506798054135, "grad_norm": 0.0, "learning_rate": 8.033249396824177e-06, "loss": 0.9382, "step": 4619 }, { "epoch": 0.5762754147436697, "grad_norm": 0.0, "learning_rate": 8.02928835201219e-06, "loss": 0.9283, "step": 4620 }, { "epoch": 0.5764001496819259, "grad_norm": 0.0, "learning_rate": 8.025327628870028e-06, "loss": 0.957, "step": 4621 }, { "epoch": 0.5765248846201821, "grad_norm": 0.0, "learning_rate": 8.021367228044172e-06, "loss": 0.9328, "step": 4622 }, { "epoch": 0.5766496195584383, "grad_norm": 0.0, "learning_rate": 8.01740715018107e-06, "loss": 1.008, "step": 4623 }, { "epoch": 0.5767743544966946, "grad_norm": 0.0, "learning_rate": 8.0134473959271e-06, "loss": 0.9907, "step": 4624 }, { "epoch": 0.5768990894349507, "grad_norm": 0.0, "learning_rate": 8.009487965928597e-06, "loss": 0.9268, "step": 4625 }, { "epoch": 0.577023824373207, "grad_norm": 0.0, "learning_rate": 8.005528860831835e-06, "loss": 0.8837, "step": 4626 }, { "epoch": 0.5771485593114631, "grad_norm": 0.0, "learning_rate": 8.001570081283048e-06, "loss": 0.9497, "step": 4627 }, { "epoch": 0.5772732942497193, "grad_norm": 0.0, "learning_rate": 7.997611627928403e-06, "loss": 0.994, "step": 4628 }, { "epoch": 0.5773980291879756, "grad_norm": 0.0, "learning_rate": 7.993653501414015e-06, "loss": 0.9672, "step": 4629 }, { "epoch": 0.5775227641262317, "grad_norm": 0.0, "learning_rate": 7.989695702385961e-06, "loss": 0.9156, "step": 4630 }, { "epoch": 0.577647499064488, "grad_norm": 0.0, "learning_rate": 7.985738231490245e-06, "loss": 0.9143, "step": 4631 }, { "epoch": 0.5777722340027441, "grad_norm": 0.0, "learning_rate": 7.981781089372832e-06, "loss": 0.9194, "step": 4632 }, { "epoch": 0.5778969689410004, "grad_norm": 0.0, "learning_rate": 7.977824276679623e-06, "loss": 0.9778, "step": 4633 }, { "epoch": 0.5780217038792566, "grad_norm": 0.0, "learning_rate": 7.973867794056469e-06, "loss": 0.9647, "step": 4634 }, { "epoch": 0.5781464388175128, "grad_norm": 0.0, "learning_rate": 7.969911642149172e-06, "loss": 0.8971, "step": 4635 }, { "epoch": 0.578271173755769, "grad_norm": 0.0, "learning_rate": 7.96595582160347e-06, "loss": 0.923, "step": 4636 }, { "epoch": 0.5783959086940252, "grad_norm": 0.0, "learning_rate": 7.962000333065058e-06, "loss": 1.0058, "step": 4637 }, { "epoch": 0.5785206436322814, "grad_norm": 0.0, "learning_rate": 7.95804517717957e-06, "loss": 0.918, "step": 4638 }, { "epoch": 0.5786453785705377, "grad_norm": 0.0, "learning_rate": 7.954090354592583e-06, "loss": 0.9627, "step": 4639 }, { "epoch": 0.5787701135087938, "grad_norm": 0.0, "learning_rate": 7.95013586594963e-06, "loss": 0.909, "step": 4640 }, { "epoch": 0.57889484844705, "grad_norm": 0.0, "learning_rate": 7.946181711896181e-06, "loss": 0.9402, "step": 4641 }, { "epoch": 0.5790195833853062, "grad_norm": 0.0, "learning_rate": 7.942227893077652e-06, "loss": 0.9594, "step": 4642 }, { "epoch": 0.5791443183235624, "grad_norm": 0.0, "learning_rate": 7.938274410139406e-06, "loss": 0.9242, "step": 4643 }, { "epoch": 0.5792690532618187, "grad_norm": 0.0, "learning_rate": 7.934321263726754e-06, "loss": 0.8933, "step": 4644 }, { "epoch": 0.5793937882000748, "grad_norm": 0.0, "learning_rate": 7.930368454484949e-06, "loss": 0.9271, "step": 4645 }, { "epoch": 0.5795185231383311, "grad_norm": 0.0, "learning_rate": 7.926415983059181e-06, "loss": 0.9599, "step": 4646 }, { "epoch": 0.5796432580765872, "grad_norm": 0.0, "learning_rate": 7.922463850094607e-06, "loss": 0.9298, "step": 4647 }, { "epoch": 0.5797679930148435, "grad_norm": 0.0, "learning_rate": 7.918512056236301e-06, "loss": 0.9141, "step": 4648 }, { "epoch": 0.5798927279530997, "grad_norm": 0.0, "learning_rate": 7.914560602129304e-06, "loss": 0.9201, "step": 4649 }, { "epoch": 0.5800174628913559, "grad_norm": 0.0, "learning_rate": 7.91060948841859e-06, "loss": 0.9645, "step": 4650 }, { "epoch": 0.5801421978296121, "grad_norm": 0.0, "learning_rate": 7.90665871574908e-06, "loss": 0.9375, "step": 4651 }, { "epoch": 0.5802669327678682, "grad_norm": 0.0, "learning_rate": 7.902708284765643e-06, "loss": 0.9472, "step": 4652 }, { "epoch": 0.5803916677061245, "grad_norm": 0.0, "learning_rate": 7.898758196113084e-06, "loss": 0.9759, "step": 4653 }, { "epoch": 0.5805164026443806, "grad_norm": 0.0, "learning_rate": 7.89480845043616e-06, "loss": 1.0059, "step": 4654 }, { "epoch": 0.5806411375826369, "grad_norm": 0.0, "learning_rate": 7.89085904837957e-06, "loss": 0.9356, "step": 4655 }, { "epoch": 0.5807658725208931, "grad_norm": 0.0, "learning_rate": 7.886909990587949e-06, "loss": 0.9149, "step": 4656 }, { "epoch": 0.5808906074591493, "grad_norm": 0.0, "learning_rate": 7.882961277705897e-06, "loss": 0.933, "step": 4657 }, { "epoch": 0.5810153423974055, "grad_norm": 0.0, "learning_rate": 7.879012910377925e-06, "loss": 0.9452, "step": 4658 }, { "epoch": 0.5811400773356618, "grad_norm": 0.0, "learning_rate": 7.875064889248522e-06, "loss": 1.0153, "step": 4659 }, { "epoch": 0.5812648122739179, "grad_norm": 0.0, "learning_rate": 7.871117214962096e-06, "loss": 0.9634, "step": 4660 }, { "epoch": 0.5813895472121742, "grad_norm": 0.0, "learning_rate": 7.86716988816301e-06, "loss": 0.9606, "step": 4661 }, { "epoch": 0.5815142821504303, "grad_norm": 0.0, "learning_rate": 7.863222909495569e-06, "loss": 0.8842, "step": 4662 }, { "epoch": 0.5816390170886866, "grad_norm": 0.0, "learning_rate": 7.859276279604014e-06, "loss": 0.9313, "step": 4663 }, { "epoch": 0.5817637520269427, "grad_norm": 0.0, "learning_rate": 7.85532999913254e-06, "loss": 0.9228, "step": 4664 }, { "epoch": 0.5818884869651989, "grad_norm": 0.0, "learning_rate": 7.851384068725272e-06, "loss": 0.9249, "step": 4665 }, { "epoch": 0.5820132219034552, "grad_norm": 0.0, "learning_rate": 7.847438489026296e-06, "loss": 0.9179, "step": 4666 }, { "epoch": 0.5821379568417113, "grad_norm": 0.0, "learning_rate": 7.843493260679624e-06, "loss": 0.9277, "step": 4667 }, { "epoch": 0.5822626917799676, "grad_norm": 0.0, "learning_rate": 7.839548384329212e-06, "loss": 0.9675, "step": 4668 }, { "epoch": 0.5823874267182237, "grad_norm": 0.0, "learning_rate": 7.835603860618973e-06, "loss": 0.9612, "step": 4669 }, { "epoch": 0.58251216165648, "grad_norm": 0.0, "learning_rate": 7.831659690192745e-06, "loss": 0.9259, "step": 4670 }, { "epoch": 0.5826368965947362, "grad_norm": 0.0, "learning_rate": 7.827715873694321e-06, "loss": 0.9353, "step": 4671 }, { "epoch": 0.5827616315329924, "grad_norm": 0.0, "learning_rate": 7.823772411767432e-06, "loss": 0.9581, "step": 4672 }, { "epoch": 0.5828863664712486, "grad_norm": 0.0, "learning_rate": 7.819829305055743e-06, "loss": 0.9377, "step": 4673 }, { "epoch": 0.5830111014095049, "grad_norm": 0.0, "learning_rate": 7.815886554202876e-06, "loss": 0.9015, "step": 4674 }, { "epoch": 0.583135836347761, "grad_norm": 0.0, "learning_rate": 7.81194415985238e-06, "loss": 0.9159, "step": 4675 }, { "epoch": 0.5832605712860172, "grad_norm": 0.0, "learning_rate": 7.808002122647763e-06, "loss": 0.9816, "step": 4676 }, { "epoch": 0.5833853062242734, "grad_norm": 0.0, "learning_rate": 7.804060443232456e-06, "loss": 0.9556, "step": 4677 }, { "epoch": 0.5835100411625296, "grad_norm": 0.0, "learning_rate": 7.800119122249847e-06, "loss": 0.9114, "step": 4678 }, { "epoch": 0.5836347761007858, "grad_norm": 0.0, "learning_rate": 7.796178160343255e-06, "loss": 0.9775, "step": 4679 }, { "epoch": 0.583759511039042, "grad_norm": 0.0, "learning_rate": 7.792237558155944e-06, "loss": 0.9019, "step": 4680 }, { "epoch": 0.5838842459772983, "grad_norm": 0.0, "learning_rate": 7.788297316331123e-06, "loss": 0.9315, "step": 4681 }, { "epoch": 0.5840089809155544, "grad_norm": 0.0, "learning_rate": 7.784357435511933e-06, "loss": 0.9299, "step": 4682 }, { "epoch": 0.5841337158538107, "grad_norm": 0.0, "learning_rate": 7.780417916341471e-06, "loss": 0.9309, "step": 4683 }, { "epoch": 0.5842584507920668, "grad_norm": 0.0, "learning_rate": 7.77647875946276e-06, "loss": 0.9086, "step": 4684 }, { "epoch": 0.5843831857303231, "grad_norm": 0.0, "learning_rate": 7.772539965518764e-06, "loss": 0.8944, "step": 4685 }, { "epoch": 0.5845079206685793, "grad_norm": 0.0, "learning_rate": 7.768601535152404e-06, "loss": 0.9276, "step": 4686 }, { "epoch": 0.5846326556068355, "grad_norm": 0.0, "learning_rate": 7.764663469006526e-06, "loss": 0.9469, "step": 4687 }, { "epoch": 0.5847573905450917, "grad_norm": 0.0, "learning_rate": 7.760725767723923e-06, "loss": 0.9541, "step": 4688 }, { "epoch": 0.5848821254833478, "grad_norm": 0.0, "learning_rate": 7.756788431947327e-06, "loss": 0.9893, "step": 4689 }, { "epoch": 0.5850068604216041, "grad_norm": 0.0, "learning_rate": 7.752851462319408e-06, "loss": 0.9395, "step": 4690 }, { "epoch": 0.5851315953598603, "grad_norm": 0.0, "learning_rate": 7.748914859482781e-06, "loss": 0.9483, "step": 4691 }, { "epoch": 0.5852563302981165, "grad_norm": 0.0, "learning_rate": 7.744978624079995e-06, "loss": 0.9725, "step": 4692 }, { "epoch": 0.5853810652363727, "grad_norm": 0.0, "learning_rate": 7.741042756753554e-06, "loss": 0.9644, "step": 4693 }, { "epoch": 0.5855058001746289, "grad_norm": 0.0, "learning_rate": 7.737107258145878e-06, "loss": 0.9289, "step": 4694 }, { "epoch": 0.5856305351128851, "grad_norm": 0.0, "learning_rate": 7.733172128899345e-06, "loss": 0.929, "step": 4695 }, { "epoch": 0.5857552700511414, "grad_norm": 0.0, "learning_rate": 7.72923736965627e-06, "loss": 0.9928, "step": 4696 }, { "epoch": 0.5858800049893975, "grad_norm": 0.0, "learning_rate": 7.725302981058899e-06, "loss": 0.9213, "step": 4697 }, { "epoch": 0.5860047399276538, "grad_norm": 0.0, "learning_rate": 7.721368963749426e-06, "loss": 0.9271, "step": 4698 }, { "epoch": 0.5861294748659099, "grad_norm": 0.0, "learning_rate": 7.717435318369984e-06, "loss": 0.9005, "step": 4699 }, { "epoch": 0.5862542098041661, "grad_norm": 0.0, "learning_rate": 7.713502045562641e-06, "loss": 0.8722, "step": 4700 }, { "epoch": 0.5863789447424224, "grad_norm": 0.0, "learning_rate": 7.709569145969406e-06, "loss": 0.9903, "step": 4701 }, { "epoch": 0.5865036796806785, "grad_norm": 0.0, "learning_rate": 7.705636620232225e-06, "loss": 0.9416, "step": 4702 }, { "epoch": 0.5866284146189348, "grad_norm": 0.0, "learning_rate": 7.701704468992994e-06, "loss": 0.8964, "step": 4703 }, { "epoch": 0.5867531495571909, "grad_norm": 0.0, "learning_rate": 7.697772692893526e-06, "loss": 0.9391, "step": 4704 }, { "epoch": 0.5868778844954472, "grad_norm": 0.0, "learning_rate": 7.6938412925756e-06, "loss": 0.9144, "step": 4705 }, { "epoch": 0.5870026194337034, "grad_norm": 0.0, "learning_rate": 7.68991026868091e-06, "loss": 0.9476, "step": 4706 }, { "epoch": 0.5871273543719596, "grad_norm": 0.0, "learning_rate": 7.685979621851102e-06, "loss": 0.9307, "step": 4707 }, { "epoch": 0.5872520893102158, "grad_norm": 0.0, "learning_rate": 7.682049352727756e-06, "loss": 0.9625, "step": 4708 }, { "epoch": 0.587376824248472, "grad_norm": 0.0, "learning_rate": 7.67811946195239e-06, "loss": 0.9771, "step": 4709 }, { "epoch": 0.5875015591867282, "grad_norm": 0.0, "learning_rate": 7.674189950166463e-06, "loss": 0.9542, "step": 4710 }, { "epoch": 0.5876262941249845, "grad_norm": 0.0, "learning_rate": 7.670260818011365e-06, "loss": 0.9627, "step": 4711 }, { "epoch": 0.5877510290632406, "grad_norm": 0.0, "learning_rate": 7.666332066128439e-06, "loss": 0.995, "step": 4712 }, { "epoch": 0.5878757640014968, "grad_norm": 0.0, "learning_rate": 7.662403695158953e-06, "loss": 0.9042, "step": 4713 }, { "epoch": 0.588000498939753, "grad_norm": 0.0, "learning_rate": 7.658475705744109e-06, "loss": 0.9011, "step": 4714 }, { "epoch": 0.5881252338780092, "grad_norm": 0.0, "learning_rate": 7.654548098525062e-06, "loss": 0.9518, "step": 4715 }, { "epoch": 0.5882499688162655, "grad_norm": 0.0, "learning_rate": 7.650620874142893e-06, "loss": 0.9821, "step": 4716 }, { "epoch": 0.5883747037545216, "grad_norm": 0.0, "learning_rate": 7.646694033238626e-06, "loss": 0.9489, "step": 4717 }, { "epoch": 0.5884994386927779, "grad_norm": 0.0, "learning_rate": 7.64276757645322e-06, "loss": 0.9408, "step": 4718 }, { "epoch": 0.588624173631034, "grad_norm": 0.0, "learning_rate": 7.638841504427567e-06, "loss": 0.9407, "step": 4719 }, { "epoch": 0.5887489085692903, "grad_norm": 0.0, "learning_rate": 7.634915817802509e-06, "loss": 0.9064, "step": 4720 }, { "epoch": 0.5888736435075465, "grad_norm": 0.0, "learning_rate": 7.630990517218809e-06, "loss": 0.9285, "step": 4721 }, { "epoch": 0.5889983784458027, "grad_norm": 0.0, "learning_rate": 7.627065603317183e-06, "loss": 0.9517, "step": 4722 }, { "epoch": 0.5891231133840589, "grad_norm": 0.0, "learning_rate": 7.623141076738271e-06, "loss": 0.9319, "step": 4723 }, { "epoch": 0.589247848322315, "grad_norm": 0.0, "learning_rate": 7.619216938122653e-06, "loss": 0.9037, "step": 4724 }, { "epoch": 0.5893725832605713, "grad_norm": 0.0, "learning_rate": 7.615293188110854e-06, "loss": 0.9823, "step": 4725 }, { "epoch": 0.5894973181988274, "grad_norm": 0.0, "learning_rate": 7.6113698273433215e-06, "loss": 0.9251, "step": 4726 }, { "epoch": 0.5896220531370837, "grad_norm": 0.0, "learning_rate": 7.607446856460452e-06, "loss": 0.9244, "step": 4727 }, { "epoch": 0.5897467880753399, "grad_norm": 0.0, "learning_rate": 7.603524276102568e-06, "loss": 0.9536, "step": 4728 }, { "epoch": 0.5898715230135961, "grad_norm": 0.0, "learning_rate": 7.599602086909943e-06, "loss": 0.9361, "step": 4729 }, { "epoch": 0.5899962579518523, "grad_norm": 0.0, "learning_rate": 7.595680289522769e-06, "loss": 0.9646, "step": 4730 }, { "epoch": 0.5901209928901086, "grad_norm": 0.0, "learning_rate": 7.591758884581179e-06, "loss": 0.962, "step": 4731 }, { "epoch": 0.5902457278283647, "grad_norm": 0.0, "learning_rate": 7.5878378727252565e-06, "loss": 0.8979, "step": 4732 }, { "epoch": 0.590370462766621, "grad_norm": 0.0, "learning_rate": 7.5839172545950005e-06, "loss": 0.914, "step": 4733 }, { "epoch": 0.5904951977048771, "grad_norm": 0.0, "learning_rate": 7.57999703083036e-06, "loss": 0.9272, "step": 4734 }, { "epoch": 0.5906199326431334, "grad_norm": 0.0, "learning_rate": 7.576077202071211e-06, "loss": 0.9396, "step": 4735 }, { "epoch": 0.5907446675813895, "grad_norm": 0.0, "learning_rate": 7.5721577689573685e-06, "loss": 0.9641, "step": 4736 }, { "epoch": 0.5908694025196457, "grad_norm": 0.0, "learning_rate": 7.568238732128586e-06, "loss": 0.9195, "step": 4737 }, { "epoch": 0.590994137457902, "grad_norm": 0.0, "learning_rate": 7.564320092224542e-06, "loss": 0.9502, "step": 4738 }, { "epoch": 0.5911188723961581, "grad_norm": 0.0, "learning_rate": 7.5604018498848656e-06, "loss": 0.924, "step": 4739 }, { "epoch": 0.5912436073344144, "grad_norm": 0.0, "learning_rate": 7.556484005749112e-06, "loss": 0.9665, "step": 4740 }, { "epoch": 0.5913683422726705, "grad_norm": 0.0, "learning_rate": 7.552566560456762e-06, "loss": 0.9344, "step": 4741 }, { "epoch": 0.5914930772109268, "grad_norm": 0.0, "learning_rate": 7.548649514647255e-06, "loss": 0.8884, "step": 4742 }, { "epoch": 0.591617812149183, "grad_norm": 0.0, "learning_rate": 7.544732868959942e-06, "loss": 0.9017, "step": 4743 }, { "epoch": 0.5917425470874392, "grad_norm": 0.0, "learning_rate": 7.5408166240341244e-06, "loss": 0.9187, "step": 4744 }, { "epoch": 0.5918672820256954, "grad_norm": 0.0, "learning_rate": 7.536900780509029e-06, "loss": 0.9477, "step": 4745 }, { "epoch": 0.5919920169639517, "grad_norm": 0.0, "learning_rate": 7.5329853390238186e-06, "loss": 0.9121, "step": 4746 }, { "epoch": 0.5921167519022078, "grad_norm": 0.0, "learning_rate": 7.529070300217596e-06, "loss": 0.9281, "step": 4747 }, { "epoch": 0.592241486840464, "grad_norm": 0.0, "learning_rate": 7.525155664729387e-06, "loss": 0.8883, "step": 4748 }, { "epoch": 0.5923662217787202, "grad_norm": 0.0, "learning_rate": 7.52124143319817e-06, "loss": 0.9194, "step": 4749 }, { "epoch": 0.5924909567169764, "grad_norm": 0.0, "learning_rate": 7.5173276062628364e-06, "loss": 0.9566, "step": 4750 }, { "epoch": 0.5926156916552326, "grad_norm": 0.0, "learning_rate": 7.513414184562229e-06, "loss": 0.961, "step": 4751 }, { "epoch": 0.5927404265934888, "grad_norm": 0.0, "learning_rate": 7.509501168735113e-06, "loss": 0.944, "step": 4752 }, { "epoch": 0.5928651615317451, "grad_norm": 0.0, "learning_rate": 7.505588559420188e-06, "loss": 0.9604, "step": 4753 }, { "epoch": 0.5929898964700012, "grad_norm": 0.0, "learning_rate": 7.501676357256097e-06, "loss": 0.9708, "step": 4754 }, { "epoch": 0.5931146314082575, "grad_norm": 0.0, "learning_rate": 7.497764562881404e-06, "loss": 0.9405, "step": 4755 }, { "epoch": 0.5932393663465136, "grad_norm": 0.0, "learning_rate": 7.493853176934618e-06, "loss": 0.9511, "step": 4756 }, { "epoch": 0.5933641012847699, "grad_norm": 0.0, "learning_rate": 7.4899422000541735e-06, "loss": 0.9095, "step": 4757 }, { "epoch": 0.5934888362230261, "grad_norm": 0.0, "learning_rate": 7.4860316328784345e-06, "loss": 0.9567, "step": 4758 }, { "epoch": 0.5936135711612823, "grad_norm": 0.0, "learning_rate": 7.482121476045716e-06, "loss": 0.9022, "step": 4759 }, { "epoch": 0.5937383060995385, "grad_norm": 0.0, "learning_rate": 7.478211730194245e-06, "loss": 0.9579, "step": 4760 }, { "epoch": 0.5938630410377946, "grad_norm": 0.0, "learning_rate": 7.474302395962196e-06, "loss": 0.9282, "step": 4761 }, { "epoch": 0.5939877759760509, "grad_norm": 0.0, "learning_rate": 7.4703934739876675e-06, "loss": 0.9562, "step": 4762 }, { "epoch": 0.5941125109143071, "grad_norm": 0.0, "learning_rate": 7.466484964908693e-06, "loss": 0.9097, "step": 4763 }, { "epoch": 0.5942372458525633, "grad_norm": 0.0, "learning_rate": 7.462576869363245e-06, "loss": 0.9525, "step": 4764 }, { "epoch": 0.5943619807908195, "grad_norm": 0.0, "learning_rate": 7.458669187989214e-06, "loss": 0.9353, "step": 4765 }, { "epoch": 0.5944867157290757, "grad_norm": 0.0, "learning_rate": 7.454761921424446e-06, "loss": 0.9091, "step": 4766 }, { "epoch": 0.5946114506673319, "grad_norm": 0.0, "learning_rate": 7.45085507030669e-06, "loss": 0.9644, "step": 4767 }, { "epoch": 0.5947361856055882, "grad_norm": 0.0, "learning_rate": 7.446948635273655e-06, "loss": 0.9305, "step": 4768 }, { "epoch": 0.5948609205438443, "grad_norm": 0.0, "learning_rate": 7.443042616962966e-06, "loss": 0.9682, "step": 4769 }, { "epoch": 0.5949856554821006, "grad_norm": 0.0, "learning_rate": 7.439137016012179e-06, "loss": 0.9628, "step": 4770 }, { "epoch": 0.5951103904203567, "grad_norm": 0.0, "learning_rate": 7.435231833058791e-06, "loss": 0.907, "step": 4771 }, { "epoch": 0.5952351253586129, "grad_norm": 0.0, "learning_rate": 7.431327068740225e-06, "loss": 0.9153, "step": 4772 }, { "epoch": 0.5953598602968692, "grad_norm": 0.0, "learning_rate": 7.427422723693839e-06, "loss": 0.9276, "step": 4773 }, { "epoch": 0.5954845952351253, "grad_norm": 0.0, "learning_rate": 7.423518798556919e-06, "loss": 0.9286, "step": 4774 }, { "epoch": 0.5956093301733816, "grad_norm": 0.0, "learning_rate": 7.4196152939666784e-06, "loss": 0.9413, "step": 4775 }, { "epoch": 0.5957340651116377, "grad_norm": 0.0, "learning_rate": 7.415712210560279e-06, "loss": 0.9484, "step": 4776 }, { "epoch": 0.595858800049894, "grad_norm": 0.0, "learning_rate": 7.411809548974792e-06, "loss": 0.9648, "step": 4777 }, { "epoch": 0.5959835349881502, "grad_norm": 0.0, "learning_rate": 7.407907309847238e-06, "loss": 0.8932, "step": 4778 }, { "epoch": 0.5961082699264064, "grad_norm": 0.0, "learning_rate": 7.404005493814557e-06, "loss": 0.9511, "step": 4779 }, { "epoch": 0.5962330048646626, "grad_norm": 0.0, "learning_rate": 7.400104101513622e-06, "loss": 0.9646, "step": 4780 }, { "epoch": 0.5963577398029188, "grad_norm": 0.0, "learning_rate": 7.396203133581243e-06, "loss": 0.9328, "step": 4781 }, { "epoch": 0.596482474741175, "grad_norm": 0.0, "learning_rate": 7.392302590654152e-06, "loss": 0.9681, "step": 4782 }, { "epoch": 0.5966072096794313, "grad_norm": 0.0, "learning_rate": 7.3884024733690195e-06, "loss": 0.9561, "step": 4783 }, { "epoch": 0.5967319446176874, "grad_norm": 0.0, "learning_rate": 7.384502782362437e-06, "loss": 0.9627, "step": 4784 }, { "epoch": 0.5968566795559436, "grad_norm": 0.0, "learning_rate": 7.380603518270942e-06, "loss": 0.9849, "step": 4785 }, { "epoch": 0.5969814144941998, "grad_norm": 0.0, "learning_rate": 7.376704681730988e-06, "loss": 0.9413, "step": 4786 }, { "epoch": 0.597106149432456, "grad_norm": 0.0, "learning_rate": 7.372806273378958e-06, "loss": 0.9, "step": 4787 }, { "epoch": 0.5972308843707123, "grad_norm": 0.0, "learning_rate": 7.368908293851178e-06, "loss": 0.8921, "step": 4788 }, { "epoch": 0.5973556193089684, "grad_norm": 0.0, "learning_rate": 7.365010743783894e-06, "loss": 0.9255, "step": 4789 }, { "epoch": 0.5974803542472247, "grad_norm": 0.0, "learning_rate": 7.361113623813284e-06, "loss": 0.9246, "step": 4790 }, { "epoch": 0.5976050891854808, "grad_norm": 0.0, "learning_rate": 7.3572169345754575e-06, "loss": 0.9047, "step": 4791 }, { "epoch": 0.5977298241237371, "grad_norm": 0.0, "learning_rate": 7.3533206767064505e-06, "loss": 0.9936, "step": 4792 }, { "epoch": 0.5978545590619933, "grad_norm": 0.0, "learning_rate": 7.3494248508422316e-06, "loss": 0.9209, "step": 4793 }, { "epoch": 0.5979792940002495, "grad_norm": 0.0, "learning_rate": 7.345529457618694e-06, "loss": 0.9068, "step": 4794 }, { "epoch": 0.5981040289385057, "grad_norm": 0.0, "learning_rate": 7.341634497671673e-06, "loss": 0.936, "step": 4795 }, { "epoch": 0.5982287638767618, "grad_norm": 0.0, "learning_rate": 7.33773997163692e-06, "loss": 0.9284, "step": 4796 }, { "epoch": 0.5983534988150181, "grad_norm": 0.0, "learning_rate": 7.333845880150112e-06, "loss": 0.9727, "step": 4797 }, { "epoch": 0.5984782337532742, "grad_norm": 0.0, "learning_rate": 7.3299522238468755e-06, "loss": 0.9345, "step": 4798 }, { "epoch": 0.5986029686915305, "grad_norm": 0.0, "learning_rate": 7.326059003362744e-06, "loss": 0.911, "step": 4799 }, { "epoch": 0.5987277036297867, "grad_norm": 0.0, "learning_rate": 7.322166219333197e-06, "loss": 0.974, "step": 4800 }, { "epoch": 0.5988524385680429, "grad_norm": 0.0, "learning_rate": 7.3182738723936255e-06, "loss": 0.9205, "step": 4801 }, { "epoch": 0.5989771735062991, "grad_norm": 0.0, "learning_rate": 7.31438196317937e-06, "loss": 0.9153, "step": 4802 }, { "epoch": 0.5991019084445554, "grad_norm": 0.0, "learning_rate": 7.31049049232568e-06, "loss": 0.9929, "step": 4803 }, { "epoch": 0.5992266433828115, "grad_norm": 0.0, "learning_rate": 7.306599460467741e-06, "loss": 0.8853, "step": 4804 }, { "epoch": 0.5993513783210678, "grad_norm": 0.0, "learning_rate": 7.3027088682406725e-06, "loss": 0.958, "step": 4805 }, { "epoch": 0.5994761132593239, "grad_norm": 0.0, "learning_rate": 7.298818716279513e-06, "loss": 0.9751, "step": 4806 }, { "epoch": 0.5996008481975802, "grad_norm": 0.0, "learning_rate": 7.294929005219238e-06, "loss": 0.9006, "step": 4807 }, { "epoch": 0.5997255831358363, "grad_norm": 0.0, "learning_rate": 7.291039735694745e-06, "loss": 0.8898, "step": 4808 }, { "epoch": 0.5998503180740925, "grad_norm": 0.0, "learning_rate": 7.287150908340857e-06, "loss": 0.9368, "step": 4809 }, { "epoch": 0.5999750530123488, "grad_norm": 0.0, "learning_rate": 7.2832625237923315e-06, "loss": 0.9734, "step": 4810 }, { "epoch": 0.6000997879506049, "grad_norm": 0.0, "learning_rate": 7.279374582683849e-06, "loss": 0.9431, "step": 4811 }, { "epoch": 0.6002245228888612, "grad_norm": 0.0, "learning_rate": 7.275487085650026e-06, "loss": 0.9536, "step": 4812 }, { "epoch": 0.6003492578271173, "grad_norm": 0.0, "learning_rate": 7.271600033325393e-06, "loss": 0.9155, "step": 4813 }, { "epoch": 0.6004739927653736, "grad_norm": 0.0, "learning_rate": 7.267713426344414e-06, "loss": 0.9114, "step": 4814 }, { "epoch": 0.6005987277036298, "grad_norm": 0.0, "learning_rate": 7.263827265341488e-06, "loss": 0.9632, "step": 4815 }, { "epoch": 0.600723462641886, "grad_norm": 0.0, "learning_rate": 7.259941550950928e-06, "loss": 0.9554, "step": 4816 }, { "epoch": 0.6008481975801422, "grad_norm": 0.0, "learning_rate": 7.256056283806987e-06, "loss": 0.9105, "step": 4817 }, { "epoch": 0.6009729325183985, "grad_norm": 0.0, "learning_rate": 7.252171464543831e-06, "loss": 0.9427, "step": 4818 }, { "epoch": 0.6010976674566546, "grad_norm": 0.0, "learning_rate": 7.248287093795568e-06, "loss": 0.9522, "step": 4819 }, { "epoch": 0.6012224023949108, "grad_norm": 0.0, "learning_rate": 7.244403172196222e-06, "loss": 0.9322, "step": 4820 }, { "epoch": 0.601347137333167, "grad_norm": 0.0, "learning_rate": 7.24051970037974e-06, "loss": 0.9279, "step": 4821 }, { "epoch": 0.6014718722714232, "grad_norm": 0.0, "learning_rate": 7.236636678980018e-06, "loss": 0.9295, "step": 4822 }, { "epoch": 0.6015966072096794, "grad_norm": 0.0, "learning_rate": 7.232754108630846e-06, "loss": 0.9248, "step": 4823 }, { "epoch": 0.6017213421479356, "grad_norm": 0.0, "learning_rate": 7.2288719899659685e-06, "loss": 0.948, "step": 4824 }, { "epoch": 0.6018460770861919, "grad_norm": 0.0, "learning_rate": 7.224990323619045e-06, "loss": 0.9259, "step": 4825 }, { "epoch": 0.601970812024448, "grad_norm": 0.0, "learning_rate": 7.221109110223654e-06, "loss": 0.9213, "step": 4826 }, { "epoch": 0.6020955469627043, "grad_norm": 0.0, "learning_rate": 7.217228350413315e-06, "loss": 0.8995, "step": 4827 }, { "epoch": 0.6022202819009604, "grad_norm": 0.0, "learning_rate": 7.213348044821459e-06, "loss": 0.9426, "step": 4828 }, { "epoch": 0.6023450168392167, "grad_norm": 0.0, "learning_rate": 7.2094681940814556e-06, "loss": 0.8974, "step": 4829 }, { "epoch": 0.6024697517774729, "grad_norm": 0.0, "learning_rate": 7.205588798826593e-06, "loss": 0.8937, "step": 4830 }, { "epoch": 0.6025944867157291, "grad_norm": 0.0, "learning_rate": 7.201709859690081e-06, "loss": 0.9783, "step": 4831 }, { "epoch": 0.6027192216539853, "grad_norm": 0.0, "learning_rate": 7.197831377305069e-06, "loss": 0.8707, "step": 4832 }, { "epoch": 0.6028439565922414, "grad_norm": 0.0, "learning_rate": 7.193953352304612e-06, "loss": 0.9677, "step": 4833 }, { "epoch": 0.6029686915304977, "grad_norm": 0.0, "learning_rate": 7.190075785321713e-06, "loss": 0.9711, "step": 4834 }, { "epoch": 0.6030934264687539, "grad_norm": 0.0, "learning_rate": 7.186198676989283e-06, "loss": 0.9465, "step": 4835 }, { "epoch": 0.6032181614070101, "grad_norm": 0.0, "learning_rate": 7.182322027940165e-06, "loss": 0.9211, "step": 4836 }, { "epoch": 0.6033428963452663, "grad_norm": 0.0, "learning_rate": 7.178445838807124e-06, "loss": 0.9605, "step": 4837 }, { "epoch": 0.6034676312835225, "grad_norm": 0.0, "learning_rate": 7.174570110222854e-06, "loss": 0.9639, "step": 4838 }, { "epoch": 0.6035923662217787, "grad_norm": 0.0, "learning_rate": 7.170694842819971e-06, "loss": 0.9258, "step": 4839 }, { "epoch": 0.603717101160035, "grad_norm": 0.0, "learning_rate": 7.1668200372310124e-06, "loss": 0.9354, "step": 4840 }, { "epoch": 0.6038418360982911, "grad_norm": 0.0, "learning_rate": 7.162945694088451e-06, "loss": 0.9293, "step": 4841 }, { "epoch": 0.6039665710365474, "grad_norm": 0.0, "learning_rate": 7.159071814024675e-06, "loss": 0.962, "step": 4842 }, { "epoch": 0.6040913059748035, "grad_norm": 0.0, "learning_rate": 7.155198397671997e-06, "loss": 0.9664, "step": 4843 }, { "epoch": 0.6042160409130597, "grad_norm": 0.0, "learning_rate": 7.151325445662658e-06, "loss": 0.9542, "step": 4844 }, { "epoch": 0.604340775851316, "grad_norm": 0.0, "learning_rate": 7.14745295862882e-06, "loss": 0.9042, "step": 4845 }, { "epoch": 0.6044655107895721, "grad_norm": 0.0, "learning_rate": 7.1435809372025725e-06, "loss": 0.9372, "step": 4846 }, { "epoch": 0.6045902457278284, "grad_norm": 0.0, "learning_rate": 7.139709382015927e-06, "loss": 0.9321, "step": 4847 }, { "epoch": 0.6047149806660845, "grad_norm": 0.0, "learning_rate": 7.135838293700814e-06, "loss": 0.9448, "step": 4848 }, { "epoch": 0.6048397156043408, "grad_norm": 0.0, "learning_rate": 7.131967672889101e-06, "loss": 0.9125, "step": 4849 }, { "epoch": 0.604964450542597, "grad_norm": 0.0, "learning_rate": 7.12809752021256e-06, "loss": 0.9559, "step": 4850 }, { "epoch": 0.6050891854808532, "grad_norm": 0.0, "learning_rate": 7.12422783630291e-06, "loss": 0.9519, "step": 4851 }, { "epoch": 0.6052139204191094, "grad_norm": 0.0, "learning_rate": 7.120358621791772e-06, "loss": 0.9341, "step": 4852 }, { "epoch": 0.6053386553573656, "grad_norm": 0.0, "learning_rate": 7.116489877310705e-06, "loss": 0.9655, "step": 4853 }, { "epoch": 0.6054633902956218, "grad_norm": 0.0, "learning_rate": 7.1126216034911826e-06, "loss": 0.9352, "step": 4854 }, { "epoch": 0.6055881252338781, "grad_norm": 0.0, "learning_rate": 7.1087538009646025e-06, "loss": 0.918, "step": 4855 }, { "epoch": 0.6057128601721342, "grad_norm": 0.0, "learning_rate": 7.104886470362292e-06, "loss": 0.8742, "step": 4856 }, { "epoch": 0.6058375951103904, "grad_norm": 0.0, "learning_rate": 7.101019612315492e-06, "loss": 0.9289, "step": 4857 }, { "epoch": 0.6059623300486466, "grad_norm": 0.0, "learning_rate": 7.097153227455379e-06, "loss": 0.9275, "step": 4858 }, { "epoch": 0.6060870649869028, "grad_norm": 0.0, "learning_rate": 7.0932873164130375e-06, "loss": 0.9231, "step": 4859 }, { "epoch": 0.6062117999251591, "grad_norm": 0.0, "learning_rate": 7.08942187981948e-06, "loss": 0.9262, "step": 4860 }, { "epoch": 0.6063365348634152, "grad_norm": 0.0, "learning_rate": 7.085556918305652e-06, "loss": 0.9575, "step": 4861 }, { "epoch": 0.6064612698016715, "grad_norm": 0.0, "learning_rate": 7.081692432502404e-06, "loss": 0.9789, "step": 4862 }, { "epoch": 0.6065860047399276, "grad_norm": 0.0, "learning_rate": 7.077828423040525e-06, "loss": 0.9399, "step": 4863 }, { "epoch": 0.6067107396781839, "grad_norm": 0.0, "learning_rate": 7.073964890550713e-06, "loss": 0.9227, "step": 4864 }, { "epoch": 0.60683547461644, "grad_norm": 0.0, "learning_rate": 7.070101835663594e-06, "loss": 0.8746, "step": 4865 }, { "epoch": 0.6069602095546963, "grad_norm": 0.0, "learning_rate": 7.066239259009722e-06, "loss": 0.9236, "step": 4866 }, { "epoch": 0.6070849444929525, "grad_norm": 0.0, "learning_rate": 7.062377161219556e-06, "loss": 0.9346, "step": 4867 }, { "epoch": 0.6072096794312086, "grad_norm": 0.0, "learning_rate": 7.058515542923499e-06, "loss": 0.9244, "step": 4868 }, { "epoch": 0.6073344143694649, "grad_norm": 0.0, "learning_rate": 7.054654404751858e-06, "loss": 0.893, "step": 4869 }, { "epoch": 0.607459149307721, "grad_norm": 0.0, "learning_rate": 7.050793747334872e-06, "loss": 0.9436, "step": 4870 }, { "epoch": 0.6075838842459773, "grad_norm": 0.0, "learning_rate": 7.046933571302697e-06, "loss": 0.9022, "step": 4871 }, { "epoch": 0.6077086191842335, "grad_norm": 0.0, "learning_rate": 7.043073877285405e-06, "loss": 0.8585, "step": 4872 }, { "epoch": 0.6078333541224897, "grad_norm": 0.0, "learning_rate": 7.039214665913004e-06, "loss": 0.9234, "step": 4873 }, { "epoch": 0.6079580890607459, "grad_norm": 0.0, "learning_rate": 7.035355937815407e-06, "loss": 0.893, "step": 4874 }, { "epoch": 0.6080828239990022, "grad_norm": 0.0, "learning_rate": 7.031497693622461e-06, "loss": 0.9628, "step": 4875 }, { "epoch": 0.6082075589372583, "grad_norm": 0.0, "learning_rate": 7.027639933963928e-06, "loss": 0.9274, "step": 4876 }, { "epoch": 0.6083322938755146, "grad_norm": 0.0, "learning_rate": 7.023782659469484e-06, "loss": 0.8855, "step": 4877 }, { "epoch": 0.6084570288137707, "grad_norm": 0.0, "learning_rate": 7.019925870768747e-06, "loss": 0.9314, "step": 4878 }, { "epoch": 0.608581763752027, "grad_norm": 0.0, "learning_rate": 7.016069568491233e-06, "loss": 0.9338, "step": 4879 }, { "epoch": 0.6087064986902831, "grad_norm": 0.0, "learning_rate": 7.01221375326639e-06, "loss": 0.8651, "step": 4880 }, { "epoch": 0.6088312336285393, "grad_norm": 0.0, "learning_rate": 7.008358425723586e-06, "loss": 0.9435, "step": 4881 }, { "epoch": 0.6089559685667956, "grad_norm": 0.0, "learning_rate": 7.004503586492103e-06, "loss": 0.9161, "step": 4882 }, { "epoch": 0.6090807035050517, "grad_norm": 0.0, "learning_rate": 7.000649236201153e-06, "loss": 0.9309, "step": 4883 }, { "epoch": 0.609205438443308, "grad_norm": 0.0, "learning_rate": 6.996795375479856e-06, "loss": 0.887, "step": 4884 }, { "epoch": 0.6093301733815641, "grad_norm": 0.0, "learning_rate": 6.992942004957271e-06, "loss": 0.9493, "step": 4885 }, { "epoch": 0.6094549083198204, "grad_norm": 0.0, "learning_rate": 6.9890891252623515e-06, "loss": 0.9022, "step": 4886 }, { "epoch": 0.6095796432580766, "grad_norm": 0.0, "learning_rate": 6.985236737023997e-06, "loss": 0.9341, "step": 4887 }, { "epoch": 0.6097043781963328, "grad_norm": 0.0, "learning_rate": 6.981384840871007e-06, "loss": 0.948, "step": 4888 }, { "epoch": 0.609829113134589, "grad_norm": 0.0, "learning_rate": 6.977533437432109e-06, "loss": 0.9197, "step": 4889 }, { "epoch": 0.6099538480728453, "grad_norm": 0.0, "learning_rate": 6.973682527335951e-06, "loss": 0.9252, "step": 4890 }, { "epoch": 0.6100785830111014, "grad_norm": 0.0, "learning_rate": 6.9698321112110966e-06, "loss": 0.9661, "step": 4891 }, { "epoch": 0.6102033179493576, "grad_norm": 0.0, "learning_rate": 6.965982189686033e-06, "loss": 0.9431, "step": 4892 }, { "epoch": 0.6103280528876138, "grad_norm": 0.0, "learning_rate": 6.962132763389162e-06, "loss": 0.9091, "step": 4893 }, { "epoch": 0.61045278782587, "grad_norm": 0.0, "learning_rate": 6.958283832948807e-06, "loss": 0.9472, "step": 4894 }, { "epoch": 0.6105775227641262, "grad_norm": 0.0, "learning_rate": 6.954435398993215e-06, "loss": 0.9437, "step": 4895 }, { "epoch": 0.6107022577023824, "grad_norm": 0.0, "learning_rate": 6.950587462150539e-06, "loss": 0.8938, "step": 4896 }, { "epoch": 0.6108269926406387, "grad_norm": 0.0, "learning_rate": 6.94674002304887e-06, "loss": 0.918, "step": 4897 }, { "epoch": 0.6109517275788948, "grad_norm": 0.0, "learning_rate": 6.9428930823162e-06, "loss": 0.8929, "step": 4898 }, { "epoch": 0.6110764625171511, "grad_norm": 0.0, "learning_rate": 6.939046640580447e-06, "loss": 0.9633, "step": 4899 }, { "epoch": 0.6112011974554072, "grad_norm": 0.0, "learning_rate": 6.935200698469448e-06, "loss": 0.9242, "step": 4900 }, { "epoch": 0.6113259323936635, "grad_norm": 0.0, "learning_rate": 6.931355256610959e-06, "loss": 0.9742, "step": 4901 }, { "epoch": 0.6114506673319197, "grad_norm": 0.0, "learning_rate": 6.9275103156326536e-06, "loss": 0.9537, "step": 4902 }, { "epoch": 0.6115754022701759, "grad_norm": 0.0, "learning_rate": 6.923665876162118e-06, "loss": 0.9479, "step": 4903 }, { "epoch": 0.6117001372084321, "grad_norm": 0.0, "learning_rate": 6.9198219388268685e-06, "loss": 0.9, "step": 4904 }, { "epoch": 0.6118248721466882, "grad_norm": 0.0, "learning_rate": 6.915978504254331e-06, "loss": 0.912, "step": 4905 }, { "epoch": 0.6119496070849445, "grad_norm": 0.0, "learning_rate": 6.912135573071845e-06, "loss": 0.9201, "step": 4906 }, { "epoch": 0.6120743420232007, "grad_norm": 0.0, "learning_rate": 6.9082931459066814e-06, "loss": 0.9103, "step": 4907 }, { "epoch": 0.6121990769614569, "grad_norm": 0.0, "learning_rate": 6.904451223386015e-06, "loss": 0.925, "step": 4908 }, { "epoch": 0.6123238118997131, "grad_norm": 0.0, "learning_rate": 6.90060980613695e-06, "loss": 0.8952, "step": 4909 }, { "epoch": 0.6124485468379693, "grad_norm": 0.0, "learning_rate": 6.8967688947864995e-06, "loss": 0.9373, "step": 4910 }, { "epoch": 0.6125732817762255, "grad_norm": 0.0, "learning_rate": 6.892928489961596e-06, "loss": 0.9037, "step": 4911 }, { "epoch": 0.6126980167144818, "grad_norm": 0.0, "learning_rate": 6.889088592289092e-06, "loss": 0.9313, "step": 4912 }, { "epoch": 0.6128227516527379, "grad_norm": 0.0, "learning_rate": 6.885249202395754e-06, "loss": 0.8975, "step": 4913 }, { "epoch": 0.6129474865909942, "grad_norm": 0.0, "learning_rate": 6.881410320908271e-06, "loss": 0.9545, "step": 4914 }, { "epoch": 0.6130722215292503, "grad_norm": 0.0, "learning_rate": 6.877571948453245e-06, "loss": 0.9639, "step": 4915 }, { "epoch": 0.6131969564675065, "grad_norm": 0.0, "learning_rate": 6.873734085657188e-06, "loss": 0.8952, "step": 4916 }, { "epoch": 0.6133216914057628, "grad_norm": 0.0, "learning_rate": 6.8698967331465435e-06, "loss": 0.8847, "step": 4917 }, { "epoch": 0.6134464263440189, "grad_norm": 0.0, "learning_rate": 6.866059891547661e-06, "loss": 0.9739, "step": 4918 }, { "epoch": 0.6135711612822752, "grad_norm": 0.0, "learning_rate": 6.862223561486811e-06, "loss": 0.9775, "step": 4919 }, { "epoch": 0.6136958962205313, "grad_norm": 0.0, "learning_rate": 6.858387743590175e-06, "loss": 0.9101, "step": 4920 }, { "epoch": 0.6138206311587876, "grad_norm": 0.0, "learning_rate": 6.854552438483866e-06, "loss": 0.9588, "step": 4921 }, { "epoch": 0.6139453660970438, "grad_norm": 0.0, "learning_rate": 6.850717646793892e-06, "loss": 0.9568, "step": 4922 }, { "epoch": 0.6140701010353, "grad_norm": 0.0, "learning_rate": 6.846883369146187e-06, "loss": 0.9252, "step": 4923 }, { "epoch": 0.6141948359735562, "grad_norm": 0.0, "learning_rate": 6.84304960616661e-06, "loss": 0.9554, "step": 4924 }, { "epoch": 0.6143195709118124, "grad_norm": 0.0, "learning_rate": 6.839216358480922e-06, "loss": 0.9088, "step": 4925 }, { "epoch": 0.6144443058500686, "grad_norm": 0.0, "learning_rate": 6.835383626714809e-06, "loss": 0.9207, "step": 4926 }, { "epoch": 0.6145690407883249, "grad_norm": 0.0, "learning_rate": 6.831551411493868e-06, "loss": 0.9293, "step": 4927 }, { "epoch": 0.614693775726581, "grad_norm": 0.0, "learning_rate": 6.82771971344361e-06, "loss": 0.9668, "step": 4928 }, { "epoch": 0.6148185106648372, "grad_norm": 0.0, "learning_rate": 6.823888533189469e-06, "loss": 0.9651, "step": 4929 }, { "epoch": 0.6149432456030934, "grad_norm": 0.0, "learning_rate": 6.820057871356786e-06, "loss": 0.8876, "step": 4930 }, { "epoch": 0.6150679805413496, "grad_norm": 0.0, "learning_rate": 6.81622772857083e-06, "loss": 0.9076, "step": 4931 }, { "epoch": 0.6151927154796059, "grad_norm": 0.0, "learning_rate": 6.812398105456769e-06, "loss": 0.9202, "step": 4932 }, { "epoch": 0.615317450417862, "grad_norm": 0.0, "learning_rate": 6.8085690026396915e-06, "loss": 0.9332, "step": 4933 }, { "epoch": 0.6154421853561183, "grad_norm": 0.0, "learning_rate": 6.804740420744613e-06, "loss": 0.928, "step": 4934 }, { "epoch": 0.6155669202943744, "grad_norm": 0.0, "learning_rate": 6.800912360396447e-06, "loss": 0.9712, "step": 4935 }, { "epoch": 0.6156916552326307, "grad_norm": 0.0, "learning_rate": 6.797084822220036e-06, "loss": 0.8845, "step": 4936 }, { "epoch": 0.6158163901708869, "grad_norm": 0.0, "learning_rate": 6.793257806840123e-06, "loss": 0.9291, "step": 4937 }, { "epoch": 0.6159411251091431, "grad_norm": 0.0, "learning_rate": 6.7894313148813814e-06, "loss": 0.9268, "step": 4938 }, { "epoch": 0.6160658600473993, "grad_norm": 0.0, "learning_rate": 6.785605346968387e-06, "loss": 0.9133, "step": 4939 }, { "epoch": 0.6161905949856555, "grad_norm": 0.0, "learning_rate": 6.781779903725629e-06, "loss": 0.8899, "step": 4940 }, { "epoch": 0.6163153299239117, "grad_norm": 0.0, "learning_rate": 6.777954985777529e-06, "loss": 0.9168, "step": 4941 }, { "epoch": 0.6164400648621678, "grad_norm": 0.0, "learning_rate": 6.7741305937483955e-06, "loss": 0.8869, "step": 4942 }, { "epoch": 0.6165647998004241, "grad_norm": 0.0, "learning_rate": 6.770306728262478e-06, "loss": 0.9354, "step": 4943 }, { "epoch": 0.6166895347386803, "grad_norm": 0.0, "learning_rate": 6.766483389943921e-06, "loss": 0.9184, "step": 4944 }, { "epoch": 0.6168142696769365, "grad_norm": 0.0, "learning_rate": 6.762660579416791e-06, "loss": 0.8831, "step": 4945 }, { "epoch": 0.6169390046151927, "grad_norm": 0.0, "learning_rate": 6.758838297305068e-06, "loss": 0.8932, "step": 4946 }, { "epoch": 0.617063739553449, "grad_norm": 0.0, "learning_rate": 6.755016544232642e-06, "loss": 0.9566, "step": 4947 }, { "epoch": 0.6171884744917051, "grad_norm": 0.0, "learning_rate": 6.751195320823325e-06, "loss": 0.9034, "step": 4948 }, { "epoch": 0.6173132094299614, "grad_norm": 0.0, "learning_rate": 6.747374627700832e-06, "loss": 0.9234, "step": 4949 }, { "epoch": 0.6174379443682175, "grad_norm": 0.0, "learning_rate": 6.743554465488795e-06, "loss": 0.9355, "step": 4950 }, { "epoch": 0.6175626793064738, "grad_norm": 0.0, "learning_rate": 6.73973483481077e-06, "loss": 0.9749, "step": 4951 }, { "epoch": 0.61768741424473, "grad_norm": 0.0, "learning_rate": 6.7359157362902025e-06, "loss": 0.9079, "step": 4952 }, { "epoch": 0.6178121491829861, "grad_norm": 0.0, "learning_rate": 6.73209717055048e-06, "loss": 0.8748, "step": 4953 }, { "epoch": 0.6179368841212424, "grad_norm": 0.0, "learning_rate": 6.72827913821488e-06, "loss": 0.928, "step": 4954 }, { "epoch": 0.6180616190594985, "grad_norm": 0.0, "learning_rate": 6.7244616399066065e-06, "loss": 0.9665, "step": 4955 }, { "epoch": 0.6181863539977548, "grad_norm": 0.0, "learning_rate": 6.720644676248769e-06, "loss": 0.9132, "step": 4956 }, { "epoch": 0.6183110889360109, "grad_norm": 0.0, "learning_rate": 6.716828247864391e-06, "loss": 0.9665, "step": 4957 }, { "epoch": 0.6184358238742672, "grad_norm": 0.0, "learning_rate": 6.713012355376413e-06, "loss": 1.0286, "step": 4958 }, { "epoch": 0.6185605588125234, "grad_norm": 0.0, "learning_rate": 6.7091969994076785e-06, "loss": 0.9791, "step": 4959 }, { "epoch": 0.6186852937507796, "grad_norm": 0.0, "learning_rate": 6.705382180580959e-06, "loss": 0.9402, "step": 4960 }, { "epoch": 0.6188100286890358, "grad_norm": 0.0, "learning_rate": 6.701567899518924e-06, "loss": 0.9588, "step": 4961 }, { "epoch": 0.618934763627292, "grad_norm": 0.0, "learning_rate": 6.697754156844159e-06, "loss": 0.9154, "step": 4962 }, { "epoch": 0.6190594985655482, "grad_norm": 0.0, "learning_rate": 6.693940953179166e-06, "loss": 0.9103, "step": 4963 }, { "epoch": 0.6191842335038045, "grad_norm": 0.0, "learning_rate": 6.690128289146354e-06, "loss": 0.9243, "step": 4964 }, { "epoch": 0.6193089684420606, "grad_norm": 0.0, "learning_rate": 6.6863161653680484e-06, "loss": 0.9256, "step": 4965 }, { "epoch": 0.6194337033803168, "grad_norm": 0.0, "learning_rate": 6.682504582466482e-06, "loss": 0.9859, "step": 4966 }, { "epoch": 0.619558438318573, "grad_norm": 0.0, "learning_rate": 6.678693541063799e-06, "loss": 0.9362, "step": 4967 }, { "epoch": 0.6196831732568292, "grad_norm": 0.0, "learning_rate": 6.674883041782062e-06, "loss": 0.9577, "step": 4968 }, { "epoch": 0.6198079081950855, "grad_norm": 0.0, "learning_rate": 6.671073085243234e-06, "loss": 0.8799, "step": 4969 }, { "epoch": 0.6199326431333416, "grad_norm": 0.0, "learning_rate": 6.667263672069204e-06, "loss": 0.9681, "step": 4970 }, { "epoch": 0.6200573780715979, "grad_norm": 0.0, "learning_rate": 6.6634548028817596e-06, "loss": 0.8977, "step": 4971 }, { "epoch": 0.620182113009854, "grad_norm": 0.0, "learning_rate": 6.659646478302606e-06, "loss": 0.895, "step": 4972 }, { "epoch": 0.6203068479481103, "grad_norm": 0.0, "learning_rate": 6.655838698953359e-06, "loss": 0.9323, "step": 4973 }, { "epoch": 0.6204315828863665, "grad_norm": 0.0, "learning_rate": 6.652031465455537e-06, "loss": 0.904, "step": 4974 }, { "epoch": 0.6205563178246227, "grad_norm": 0.0, "learning_rate": 6.648224778430585e-06, "loss": 0.9049, "step": 4975 }, { "epoch": 0.6206810527628789, "grad_norm": 0.0, "learning_rate": 6.644418638499845e-06, "loss": 0.9146, "step": 4976 }, { "epoch": 0.620805787701135, "grad_norm": 0.0, "learning_rate": 6.640613046284581e-06, "loss": 0.9123, "step": 4977 }, { "epoch": 0.6209305226393913, "grad_norm": 0.0, "learning_rate": 6.636808002405955e-06, "loss": 0.9104, "step": 4978 }, { "epoch": 0.6210552575776475, "grad_norm": 0.0, "learning_rate": 6.633003507485045e-06, "loss": 0.9613, "step": 4979 }, { "epoch": 0.6211799925159037, "grad_norm": 0.0, "learning_rate": 6.6291995621428494e-06, "loss": 0.9188, "step": 4980 }, { "epoch": 0.6213047274541599, "grad_norm": 0.0, "learning_rate": 6.6253961670002595e-06, "loss": 0.9263, "step": 4981 }, { "epoch": 0.6214294623924161, "grad_norm": 0.0, "learning_rate": 6.62159332267809e-06, "loss": 0.9655, "step": 4982 }, { "epoch": 0.6215541973306723, "grad_norm": 0.0, "learning_rate": 6.6177910297970605e-06, "loss": 0.9359, "step": 4983 }, { "epoch": 0.6216789322689286, "grad_norm": 0.0, "learning_rate": 6.613989288977798e-06, "loss": 0.9601, "step": 4984 }, { "epoch": 0.6218036672071847, "grad_norm": 0.0, "learning_rate": 6.610188100840844e-06, "loss": 0.891, "step": 4985 }, { "epoch": 0.621928402145441, "grad_norm": 0.0, "learning_rate": 6.6063874660066475e-06, "loss": 0.9459, "step": 4986 }, { "epoch": 0.6220531370836971, "grad_norm": 0.0, "learning_rate": 6.602587385095571e-06, "loss": 0.9721, "step": 4987 }, { "epoch": 0.6221778720219534, "grad_norm": 0.0, "learning_rate": 6.598787858727885e-06, "loss": 0.9332, "step": 4988 }, { "epoch": 0.6223026069602096, "grad_norm": 0.0, "learning_rate": 6.594988887523759e-06, "loss": 0.9301, "step": 4989 }, { "epoch": 0.6224273418984657, "grad_norm": 0.0, "learning_rate": 6.591190472103288e-06, "loss": 0.9148, "step": 4990 }, { "epoch": 0.622552076836722, "grad_norm": 0.0, "learning_rate": 6.587392613086468e-06, "loss": 0.9478, "step": 4991 }, { "epoch": 0.6226768117749781, "grad_norm": 0.0, "learning_rate": 6.5835953110932045e-06, "loss": 0.951, "step": 4992 }, { "epoch": 0.6228015467132344, "grad_norm": 0.0, "learning_rate": 6.579798566743314e-06, "loss": 0.903, "step": 4993 }, { "epoch": 0.6229262816514906, "grad_norm": 0.0, "learning_rate": 6.576002380656521e-06, "loss": 0.8939, "step": 4994 }, { "epoch": 0.6230510165897468, "grad_norm": 0.0, "learning_rate": 6.572206753452457e-06, "loss": 0.8804, "step": 4995 }, { "epoch": 0.623175751528003, "grad_norm": 0.0, "learning_rate": 6.568411685750662e-06, "loss": 0.9856, "step": 4996 }, { "epoch": 0.6233004864662592, "grad_norm": 0.0, "learning_rate": 6.564617178170593e-06, "loss": 0.8853, "step": 4997 }, { "epoch": 0.6234252214045154, "grad_norm": 0.0, "learning_rate": 6.560823231331606e-06, "loss": 0.8701, "step": 4998 }, { "epoch": 0.6235499563427717, "grad_norm": 0.0, "learning_rate": 6.557029845852969e-06, "loss": 0.9239, "step": 4999 }, { "epoch": 0.6236746912810278, "grad_norm": 0.0, "learning_rate": 6.553237022353857e-06, "loss": 0.9321, "step": 5000 }, { "epoch": 0.623799426219284, "grad_norm": 0.0, "learning_rate": 6.549444761453355e-06, "loss": 0.9135, "step": 5001 }, { "epoch": 0.6239241611575402, "grad_norm": 0.0, "learning_rate": 6.545653063770458e-06, "loss": 0.9688, "step": 5002 }, { "epoch": 0.6240488960957964, "grad_norm": 0.0, "learning_rate": 6.541861929924061e-06, "loss": 0.9161, "step": 5003 }, { "epoch": 0.6241736310340527, "grad_norm": 0.0, "learning_rate": 6.538071360532981e-06, "loss": 0.9542, "step": 5004 }, { "epoch": 0.6242983659723088, "grad_norm": 0.0, "learning_rate": 6.534281356215928e-06, "loss": 0.8754, "step": 5005 }, { "epoch": 0.6244231009105651, "grad_norm": 0.0, "learning_rate": 6.5304919175915235e-06, "loss": 0.8912, "step": 5006 }, { "epoch": 0.6245478358488212, "grad_norm": 0.0, "learning_rate": 6.526703045278308e-06, "loss": 0.8878, "step": 5007 }, { "epoch": 0.6246725707870775, "grad_norm": 0.0, "learning_rate": 6.522914739894716e-06, "loss": 0.9181, "step": 5008 }, { "epoch": 0.6247973057253337, "grad_norm": 0.0, "learning_rate": 6.519127002059096e-06, "loss": 0.9243, "step": 5009 }, { "epoch": 0.6249220406635899, "grad_norm": 0.0, "learning_rate": 6.5153398323897e-06, "loss": 0.9472, "step": 5010 }, { "epoch": 0.6250467756018461, "grad_norm": 0.0, "learning_rate": 6.5115532315046935e-06, "loss": 0.9212, "step": 5011 }, { "epoch": 0.6251715105401023, "grad_norm": 0.0, "learning_rate": 6.5077672000221425e-06, "loss": 0.9141, "step": 5012 }, { "epoch": 0.6252962454783585, "grad_norm": 0.0, "learning_rate": 6.503981738560019e-06, "loss": 0.9173, "step": 5013 }, { "epoch": 0.6254209804166146, "grad_norm": 0.0, "learning_rate": 6.500196847736217e-06, "loss": 0.9319, "step": 5014 }, { "epoch": 0.6255457153548709, "grad_norm": 0.0, "learning_rate": 6.496412528168512e-06, "loss": 0.9467, "step": 5015 }, { "epoch": 0.6256704502931271, "grad_norm": 0.0, "learning_rate": 6.4926287804746105e-06, "loss": 0.8847, "step": 5016 }, { "epoch": 0.6257951852313833, "grad_norm": 0.0, "learning_rate": 6.488845605272114e-06, "loss": 0.9483, "step": 5017 }, { "epoch": 0.6259199201696395, "grad_norm": 0.0, "learning_rate": 6.485063003178526e-06, "loss": 0.9396, "step": 5018 }, { "epoch": 0.6260446551078958, "grad_norm": 0.0, "learning_rate": 6.481280974811273e-06, "loss": 0.9546, "step": 5019 }, { "epoch": 0.6261693900461519, "grad_norm": 0.0, "learning_rate": 6.4774995207876654e-06, "loss": 0.873, "step": 5020 }, { "epoch": 0.6262941249844082, "grad_norm": 0.0, "learning_rate": 6.473718641724942e-06, "loss": 0.9057, "step": 5021 }, { "epoch": 0.6264188599226643, "grad_norm": 0.0, "learning_rate": 6.4699383382402326e-06, "loss": 0.9534, "step": 5022 }, { "epoch": 0.6265435948609206, "grad_norm": 0.0, "learning_rate": 6.466158610950575e-06, "loss": 0.8971, "step": 5023 }, { "epoch": 0.6266683297991767, "grad_norm": 0.0, "learning_rate": 6.462379460472925e-06, "loss": 0.9439, "step": 5024 }, { "epoch": 0.6267930647374329, "grad_norm": 0.0, "learning_rate": 6.4586008874241245e-06, "loss": 0.9471, "step": 5025 }, { "epoch": 0.6269177996756892, "grad_norm": 0.0, "learning_rate": 6.454822892420941e-06, "loss": 0.8951, "step": 5026 }, { "epoch": 0.6270425346139453, "grad_norm": 0.0, "learning_rate": 6.4510454760800335e-06, "loss": 0.9465, "step": 5027 }, { "epoch": 0.6271672695522016, "grad_norm": 0.0, "learning_rate": 6.4472686390179735e-06, "loss": 0.9077, "step": 5028 }, { "epoch": 0.6272920044904577, "grad_norm": 0.0, "learning_rate": 6.443492381851237e-06, "loss": 0.9832, "step": 5029 }, { "epoch": 0.627416739428714, "grad_norm": 0.0, "learning_rate": 6.4397167051962e-06, "loss": 0.9208, "step": 5030 }, { "epoch": 0.6275414743669702, "grad_norm": 0.0, "learning_rate": 6.435941609669154e-06, "loss": 0.9231, "step": 5031 }, { "epoch": 0.6276662093052264, "grad_norm": 0.0, "learning_rate": 6.432167095886282e-06, "loss": 0.9102, "step": 5032 }, { "epoch": 0.6277909442434826, "grad_norm": 0.0, "learning_rate": 6.428393164463689e-06, "loss": 0.9666, "step": 5033 }, { "epoch": 0.6279156791817389, "grad_norm": 0.0, "learning_rate": 6.424619816017372e-06, "loss": 0.9727, "step": 5034 }, { "epoch": 0.628040414119995, "grad_norm": 0.0, "learning_rate": 6.42084705116323e-06, "loss": 0.908, "step": 5035 }, { "epoch": 0.6281651490582513, "grad_norm": 0.0, "learning_rate": 6.417074870517085e-06, "loss": 0.9528, "step": 5036 }, { "epoch": 0.6282898839965074, "grad_norm": 0.0, "learning_rate": 6.413303274694642e-06, "loss": 0.9708, "step": 5037 }, { "epoch": 0.6284146189347636, "grad_norm": 0.0, "learning_rate": 6.409532264311529e-06, "loss": 0.9735, "step": 5038 }, { "epoch": 0.6285393538730198, "grad_norm": 0.0, "learning_rate": 6.4057618399832645e-06, "loss": 0.9287, "step": 5039 }, { "epoch": 0.628664088811276, "grad_norm": 0.0, "learning_rate": 6.4019920023252745e-06, "loss": 0.8874, "step": 5040 }, { "epoch": 0.6287888237495323, "grad_norm": 0.0, "learning_rate": 6.3982227519528986e-06, "loss": 0.9003, "step": 5041 }, { "epoch": 0.6289135586877884, "grad_norm": 0.0, "learning_rate": 6.394454089481366e-06, "loss": 0.896, "step": 5042 }, { "epoch": 0.6290382936260447, "grad_norm": 0.0, "learning_rate": 6.390686015525825e-06, "loss": 0.9412, "step": 5043 }, { "epoch": 0.6291630285643008, "grad_norm": 0.0, "learning_rate": 6.3869185307013135e-06, "loss": 0.905, "step": 5044 }, { "epoch": 0.6292877635025571, "grad_norm": 0.0, "learning_rate": 6.383151635622786e-06, "loss": 0.9483, "step": 5045 }, { "epoch": 0.6294124984408133, "grad_norm": 0.0, "learning_rate": 6.379385330905092e-06, "loss": 0.9469, "step": 5046 }, { "epoch": 0.6295372333790695, "grad_norm": 0.0, "learning_rate": 6.375619617162985e-06, "loss": 0.9561, "step": 5047 }, { "epoch": 0.6296619683173257, "grad_norm": 0.0, "learning_rate": 6.371854495011128e-06, "loss": 0.9955, "step": 5048 }, { "epoch": 0.6297867032555818, "grad_norm": 0.0, "learning_rate": 6.368089965064079e-06, "loss": 0.9334, "step": 5049 }, { "epoch": 0.6299114381938381, "grad_norm": 0.0, "learning_rate": 6.364326027936314e-06, "loss": 0.9345, "step": 5050 }, { "epoch": 0.6300361731320943, "grad_norm": 0.0, "learning_rate": 6.360562684242193e-06, "loss": 0.9667, "step": 5051 }, { "epoch": 0.6301609080703505, "grad_norm": 0.0, "learning_rate": 6.3567999345959875e-06, "loss": 0.9397, "step": 5052 }, { "epoch": 0.6302856430086067, "grad_norm": 0.0, "learning_rate": 6.353037779611882e-06, "loss": 0.9596, "step": 5053 }, { "epoch": 0.6304103779468629, "grad_norm": 0.0, "learning_rate": 6.349276219903948e-06, "loss": 0.9251, "step": 5054 }, { "epoch": 0.6305351128851191, "grad_norm": 0.0, "learning_rate": 6.345515256086172e-06, "loss": 0.926, "step": 5055 }, { "epoch": 0.6306598478233754, "grad_norm": 0.0, "learning_rate": 6.3417548887724354e-06, "loss": 0.9722, "step": 5056 }, { "epoch": 0.6307845827616315, "grad_norm": 0.0, "learning_rate": 6.337995118576521e-06, "loss": 0.9059, "step": 5057 }, { "epoch": 0.6309093176998878, "grad_norm": 0.0, "learning_rate": 6.334235946112126e-06, "loss": 0.9539, "step": 5058 }, { "epoch": 0.6310340526381439, "grad_norm": 0.0, "learning_rate": 6.330477371992831e-06, "loss": 0.9538, "step": 5059 }, { "epoch": 0.6311587875764002, "grad_norm": 0.0, "learning_rate": 6.326719396832146e-06, "loss": 0.9118, "step": 5060 }, { "epoch": 0.6312835225146564, "grad_norm": 0.0, "learning_rate": 6.3229620212434504e-06, "loss": 0.9366, "step": 5061 }, { "epoch": 0.6314082574529125, "grad_norm": 0.0, "learning_rate": 6.319205245840057e-06, "loss": 0.8726, "step": 5062 }, { "epoch": 0.6315329923911688, "grad_norm": 0.0, "learning_rate": 6.315449071235158e-06, "loss": 0.908, "step": 5063 }, { "epoch": 0.6316577273294249, "grad_norm": 0.0, "learning_rate": 6.311693498041855e-06, "loss": 0.884, "step": 5064 }, { "epoch": 0.6317824622676812, "grad_norm": 0.0, "learning_rate": 6.3079385268731575e-06, "loss": 0.9652, "step": 5065 }, { "epoch": 0.6319071972059374, "grad_norm": 0.0, "learning_rate": 6.304184158341966e-06, "loss": 0.9518, "step": 5066 }, { "epoch": 0.6320319321441936, "grad_norm": 0.0, "learning_rate": 6.300430393061094e-06, "loss": 0.9371, "step": 5067 }, { "epoch": 0.6321566670824498, "grad_norm": 0.0, "learning_rate": 6.2966772316432464e-06, "loss": 0.9126, "step": 5068 }, { "epoch": 0.632281402020706, "grad_norm": 0.0, "learning_rate": 6.292924674701032e-06, "loss": 0.9027, "step": 5069 }, { "epoch": 0.6324061369589622, "grad_norm": 0.0, "learning_rate": 6.289172722846971e-06, "loss": 0.9255, "step": 5070 }, { "epoch": 0.6325308718972185, "grad_norm": 0.0, "learning_rate": 6.2854213766934655e-06, "loss": 0.93, "step": 5071 }, { "epoch": 0.6326556068354746, "grad_norm": 0.0, "learning_rate": 6.281670636852839e-06, "loss": 0.9091, "step": 5072 }, { "epoch": 0.6327803417737308, "grad_norm": 0.0, "learning_rate": 6.277920503937303e-06, "loss": 0.8966, "step": 5073 }, { "epoch": 0.632905076711987, "grad_norm": 0.0, "learning_rate": 6.274170978558971e-06, "loss": 1.0057, "step": 5074 }, { "epoch": 0.6330298116502432, "grad_norm": 0.0, "learning_rate": 6.270422061329866e-06, "loss": 0.9432, "step": 5075 }, { "epoch": 0.6331545465884995, "grad_norm": 0.0, "learning_rate": 6.2666737528619e-06, "loss": 0.9481, "step": 5076 }, { "epoch": 0.6332792815267556, "grad_norm": 0.0, "learning_rate": 6.262926053766898e-06, "loss": 0.9518, "step": 5077 }, { "epoch": 0.6334040164650119, "grad_norm": 0.0, "learning_rate": 6.25917896465657e-06, "loss": 0.8952, "step": 5078 }, { "epoch": 0.633528751403268, "grad_norm": 0.0, "learning_rate": 6.255432486142543e-06, "loss": 0.9361, "step": 5079 }, { "epoch": 0.6336534863415243, "grad_norm": 0.0, "learning_rate": 6.251686618836336e-06, "loss": 0.8998, "step": 5080 }, { "epoch": 0.6337782212797805, "grad_norm": 0.0, "learning_rate": 6.247941363349365e-06, "loss": 0.9123, "step": 5081 }, { "epoch": 0.6339029562180367, "grad_norm": 0.0, "learning_rate": 6.244196720292954e-06, "loss": 0.9105, "step": 5082 }, { "epoch": 0.6340276911562929, "grad_norm": 0.0, "learning_rate": 6.2404526902783205e-06, "loss": 0.9111, "step": 5083 }, { "epoch": 0.6341524260945491, "grad_norm": 0.0, "learning_rate": 6.236709273916585e-06, "loss": 0.9133, "step": 5084 }, { "epoch": 0.6342771610328053, "grad_norm": 0.0, "learning_rate": 6.232966471818771e-06, "loss": 0.8972, "step": 5085 }, { "epoch": 0.6344018959710614, "grad_norm": 0.0, "learning_rate": 6.229224284595792e-06, "loss": 0.9144, "step": 5086 }, { "epoch": 0.6345266309093177, "grad_norm": 0.0, "learning_rate": 6.225482712858473e-06, "loss": 0.9683, "step": 5087 }, { "epoch": 0.6346513658475739, "grad_norm": 0.0, "learning_rate": 6.2217417572175255e-06, "loss": 0.9336, "step": 5088 }, { "epoch": 0.6347761007858301, "grad_norm": 0.0, "learning_rate": 6.218001418283577e-06, "loss": 0.9216, "step": 5089 }, { "epoch": 0.6349008357240863, "grad_norm": 0.0, "learning_rate": 6.214261696667139e-06, "loss": 0.9474, "step": 5090 }, { "epoch": 0.6350255706623426, "grad_norm": 0.0, "learning_rate": 6.2105225929786296e-06, "loss": 0.8929, "step": 5091 }, { "epoch": 0.6351503056005987, "grad_norm": 0.0, "learning_rate": 6.206784107828367e-06, "loss": 0.916, "step": 5092 }, { "epoch": 0.635275040538855, "grad_norm": 0.0, "learning_rate": 6.203046241826563e-06, "loss": 0.8898, "step": 5093 }, { "epoch": 0.6353997754771111, "grad_norm": 0.0, "learning_rate": 6.199308995583335e-06, "loss": 0.8908, "step": 5094 }, { "epoch": 0.6355245104153674, "grad_norm": 0.0, "learning_rate": 6.195572369708689e-06, "loss": 0.9297, "step": 5095 }, { "epoch": 0.6356492453536235, "grad_norm": 0.0, "learning_rate": 6.191836364812548e-06, "loss": 0.9091, "step": 5096 }, { "epoch": 0.6357739802918797, "grad_norm": 0.0, "learning_rate": 6.188100981504712e-06, "loss": 0.9056, "step": 5097 }, { "epoch": 0.635898715230136, "grad_norm": 0.0, "learning_rate": 6.1843662203948905e-06, "loss": 0.9548, "step": 5098 }, { "epoch": 0.6360234501683921, "grad_norm": 0.0, "learning_rate": 6.180632082092699e-06, "loss": 0.9287, "step": 5099 }, { "epoch": 0.6361481851066484, "grad_norm": 0.0, "learning_rate": 6.176898567207633e-06, "loss": 0.9186, "step": 5100 }, { "epoch": 0.6362729200449045, "grad_norm": 0.0, "learning_rate": 6.173165676349103e-06, "loss": 0.9331, "step": 5101 }, { "epoch": 0.6363976549831608, "grad_norm": 0.0, "learning_rate": 6.16943341012641e-06, "loss": 0.8877, "step": 5102 }, { "epoch": 0.636522389921417, "grad_norm": 0.0, "learning_rate": 6.165701769148748e-06, "loss": 0.9774, "step": 5103 }, { "epoch": 0.6366471248596732, "grad_norm": 0.0, "learning_rate": 6.161970754025224e-06, "loss": 0.9027, "step": 5104 }, { "epoch": 0.6367718597979294, "grad_norm": 0.0, "learning_rate": 6.158240365364823e-06, "loss": 0.9683, "step": 5105 }, { "epoch": 0.6368965947361857, "grad_norm": 0.0, "learning_rate": 6.154510603776451e-06, "loss": 0.9379, "step": 5106 }, { "epoch": 0.6370213296744418, "grad_norm": 0.0, "learning_rate": 6.150781469868892e-06, "loss": 0.9247, "step": 5107 }, { "epoch": 0.6371460646126981, "grad_norm": 0.0, "learning_rate": 6.147052964250831e-06, "loss": 0.9442, "step": 5108 }, { "epoch": 0.6372707995509542, "grad_norm": 0.0, "learning_rate": 6.143325087530863e-06, "loss": 0.9259, "step": 5109 }, { "epoch": 0.6373955344892104, "grad_norm": 0.0, "learning_rate": 6.139597840317464e-06, "loss": 0.9298, "step": 5110 }, { "epoch": 0.6375202694274666, "grad_norm": 0.0, "learning_rate": 6.13587122321902e-06, "loss": 0.943, "step": 5111 }, { "epoch": 0.6376450043657228, "grad_norm": 0.0, "learning_rate": 6.1321452368438035e-06, "loss": 0.9514, "step": 5112 }, { "epoch": 0.6377697393039791, "grad_norm": 0.0, "learning_rate": 6.1284198817999964e-06, "loss": 0.9485, "step": 5113 }, { "epoch": 0.6378944742422352, "grad_norm": 0.0, "learning_rate": 6.1246951586956635e-06, "loss": 0.9311, "step": 5114 }, { "epoch": 0.6380192091804915, "grad_norm": 0.0, "learning_rate": 6.1209710681387745e-06, "loss": 0.9241, "step": 5115 }, { "epoch": 0.6381439441187476, "grad_norm": 0.0, "learning_rate": 6.117247610737202e-06, "loss": 0.9782, "step": 5116 }, { "epoch": 0.6382686790570039, "grad_norm": 0.0, "learning_rate": 6.113524787098699e-06, "loss": 0.9135, "step": 5117 }, { "epoch": 0.6383934139952601, "grad_norm": 0.0, "learning_rate": 6.109802597830932e-06, "loss": 0.9416, "step": 5118 }, { "epoch": 0.6385181489335163, "grad_norm": 0.0, "learning_rate": 6.106081043541452e-06, "loss": 0.9339, "step": 5119 }, { "epoch": 0.6386428838717725, "grad_norm": 0.0, "learning_rate": 6.102360124837707e-06, "loss": 0.9342, "step": 5120 }, { "epoch": 0.6387676188100286, "grad_norm": 0.0, "learning_rate": 6.098639842327052e-06, "loss": 0.9523, "step": 5121 }, { "epoch": 0.6388923537482849, "grad_norm": 0.0, "learning_rate": 6.094920196616726e-06, "loss": 0.9707, "step": 5122 }, { "epoch": 0.6390170886865411, "grad_norm": 0.0, "learning_rate": 6.091201188313871e-06, "loss": 0.929, "step": 5123 }, { "epoch": 0.6391418236247973, "grad_norm": 0.0, "learning_rate": 6.087482818025523e-06, "loss": 0.8739, "step": 5124 }, { "epoch": 0.6392665585630535, "grad_norm": 0.0, "learning_rate": 6.083765086358611e-06, "loss": 0.911, "step": 5125 }, { "epoch": 0.6393912935013097, "grad_norm": 0.0, "learning_rate": 6.080047993919966e-06, "loss": 0.9157, "step": 5126 }, { "epoch": 0.6395160284395659, "grad_norm": 0.0, "learning_rate": 6.076331541316308e-06, "loss": 0.8973, "step": 5127 }, { "epoch": 0.6396407633778222, "grad_norm": 0.0, "learning_rate": 6.072615729154261e-06, "loss": 0.9747, "step": 5128 }, { "epoch": 0.6397654983160783, "grad_norm": 0.0, "learning_rate": 6.0689005580403325e-06, "loss": 0.9183, "step": 5129 }, { "epoch": 0.6398902332543346, "grad_norm": 0.0, "learning_rate": 6.065186028580938e-06, "loss": 0.9644, "step": 5130 }, { "epoch": 0.6400149681925907, "grad_norm": 0.0, "learning_rate": 6.0614721413823794e-06, "loss": 0.9236, "step": 5131 }, { "epoch": 0.640139703130847, "grad_norm": 0.0, "learning_rate": 6.0577588970508534e-06, "loss": 0.9156, "step": 5132 }, { "epoch": 0.6402644380691032, "grad_norm": 0.0, "learning_rate": 6.054046296192464e-06, "loss": 0.9521, "step": 5133 }, { "epoch": 0.6403891730073593, "grad_norm": 0.0, "learning_rate": 6.050334339413191e-06, "loss": 0.9435, "step": 5134 }, { "epoch": 0.6405139079456156, "grad_norm": 0.0, "learning_rate": 6.046623027318928e-06, "loss": 0.943, "step": 5135 }, { "epoch": 0.6406386428838717, "grad_norm": 0.0, "learning_rate": 6.042912360515452e-06, "loss": 0.8812, "step": 5136 }, { "epoch": 0.640763377822128, "grad_norm": 0.0, "learning_rate": 6.039202339608432e-06, "loss": 0.939, "step": 5137 }, { "epoch": 0.6408881127603842, "grad_norm": 0.0, "learning_rate": 6.0354929652034445e-06, "loss": 0.9625, "step": 5138 }, { "epoch": 0.6410128476986404, "grad_norm": 0.0, "learning_rate": 6.031784237905949e-06, "loss": 0.9457, "step": 5139 }, { "epoch": 0.6411375826368966, "grad_norm": 0.0, "learning_rate": 6.028076158321304e-06, "loss": 0.9555, "step": 5140 }, { "epoch": 0.6412623175751528, "grad_norm": 0.0, "learning_rate": 6.024368727054763e-06, "loss": 0.9364, "step": 5141 }, { "epoch": 0.641387052513409, "grad_norm": 0.0, "learning_rate": 6.020661944711466e-06, "loss": 0.903, "step": 5142 }, { "epoch": 0.6415117874516653, "grad_norm": 0.0, "learning_rate": 6.016955811896464e-06, "loss": 0.9763, "step": 5143 }, { "epoch": 0.6416365223899214, "grad_norm": 0.0, "learning_rate": 6.013250329214681e-06, "loss": 0.9002, "step": 5144 }, { "epoch": 0.6417612573281776, "grad_norm": 0.0, "learning_rate": 6.009545497270952e-06, "loss": 0.9636, "step": 5145 }, { "epoch": 0.6418859922664338, "grad_norm": 0.0, "learning_rate": 6.005841316669996e-06, "loss": 0.9091, "step": 5146 }, { "epoch": 0.64201072720469, "grad_norm": 0.0, "learning_rate": 6.00213778801643e-06, "loss": 0.9238, "step": 5147 }, { "epoch": 0.6421354621429463, "grad_norm": 0.0, "learning_rate": 5.9984349119147635e-06, "loss": 0.93, "step": 5148 }, { "epoch": 0.6422601970812024, "grad_norm": 0.0, "learning_rate": 5.994732688969396e-06, "loss": 0.9465, "step": 5149 }, { "epoch": 0.6423849320194587, "grad_norm": 0.0, "learning_rate": 5.991031119784627e-06, "loss": 0.9534, "step": 5150 }, { "epoch": 0.6425096669577148, "grad_norm": 0.0, "learning_rate": 5.987330204964641e-06, "loss": 0.8806, "step": 5151 }, { "epoch": 0.6426344018959711, "grad_norm": 0.0, "learning_rate": 5.983629945113529e-06, "loss": 0.9138, "step": 5152 }, { "epoch": 0.6427591368342273, "grad_norm": 0.0, "learning_rate": 5.979930340835265e-06, "loss": 0.9194, "step": 5153 }, { "epoch": 0.6428838717724835, "grad_norm": 0.0, "learning_rate": 5.976231392733707e-06, "loss": 0.8898, "step": 5154 }, { "epoch": 0.6430086067107397, "grad_norm": 0.0, "learning_rate": 5.97253310141263e-06, "loss": 0.9098, "step": 5155 }, { "epoch": 0.6431333416489959, "grad_norm": 0.0, "learning_rate": 5.968835467475679e-06, "loss": 0.9265, "step": 5156 }, { "epoch": 0.6432580765872521, "grad_norm": 0.0, "learning_rate": 5.965138491526408e-06, "loss": 0.9285, "step": 5157 }, { "epoch": 0.6433828115255082, "grad_norm": 0.0, "learning_rate": 5.961442174168253e-06, "loss": 0.8771, "step": 5158 }, { "epoch": 0.6435075464637645, "grad_norm": 0.0, "learning_rate": 5.957746516004543e-06, "loss": 0.9386, "step": 5159 }, { "epoch": 0.6436322814020207, "grad_norm": 0.0, "learning_rate": 5.954051517638511e-06, "loss": 0.9123, "step": 5160 }, { "epoch": 0.6437570163402769, "grad_norm": 0.0, "learning_rate": 5.950357179673264e-06, "loss": 0.976, "step": 5161 }, { "epoch": 0.6438817512785331, "grad_norm": 0.0, "learning_rate": 5.94666350271182e-06, "loss": 0.9599, "step": 5162 }, { "epoch": 0.6440064862167894, "grad_norm": 0.0, "learning_rate": 5.942970487357073e-06, "loss": 0.9413, "step": 5163 }, { "epoch": 0.6441312211550455, "grad_norm": 0.0, "learning_rate": 5.939278134211824e-06, "loss": 0.935, "step": 5164 }, { "epoch": 0.6442559560933018, "grad_norm": 0.0, "learning_rate": 5.935586443878754e-06, "loss": 0.905, "step": 5165 }, { "epoch": 0.6443806910315579, "grad_norm": 0.0, "learning_rate": 5.931895416960436e-06, "loss": 0.9522, "step": 5166 }, { "epoch": 0.6445054259698142, "grad_norm": 0.0, "learning_rate": 5.928205054059345e-06, "loss": 0.9306, "step": 5167 }, { "epoch": 0.6446301609080703, "grad_norm": 0.0, "learning_rate": 5.924515355777833e-06, "loss": 0.9586, "step": 5168 }, { "epoch": 0.6447548958463265, "grad_norm": 0.0, "learning_rate": 5.920826322718165e-06, "loss": 0.9439, "step": 5169 }, { "epoch": 0.6448796307845828, "grad_norm": 0.0, "learning_rate": 5.917137955482474e-06, "loss": 0.9056, "step": 5170 }, { "epoch": 0.6450043657228389, "grad_norm": 0.0, "learning_rate": 5.9134502546727944e-06, "loss": 0.9332, "step": 5171 }, { "epoch": 0.6451291006610952, "grad_norm": 0.0, "learning_rate": 5.909763220891057e-06, "loss": 0.8942, "step": 5172 }, { "epoch": 0.6452538355993513, "grad_norm": 0.0, "learning_rate": 5.9060768547390746e-06, "loss": 0.9971, "step": 5173 }, { "epoch": 0.6453785705376076, "grad_norm": 0.0, "learning_rate": 5.902391156818558e-06, "loss": 0.9386, "step": 5174 }, { "epoch": 0.6455033054758638, "grad_norm": 0.0, "learning_rate": 5.898706127731106e-06, "loss": 0.9404, "step": 5175 }, { "epoch": 0.64562804041412, "grad_norm": 0.0, "learning_rate": 5.895021768078204e-06, "loss": 0.9385, "step": 5176 }, { "epoch": 0.6457527753523762, "grad_norm": 0.0, "learning_rate": 5.891338078461238e-06, "loss": 0.9364, "step": 5177 }, { "epoch": 0.6458775102906325, "grad_norm": 0.0, "learning_rate": 5.887655059481473e-06, "loss": 0.9091, "step": 5178 }, { "epoch": 0.6460022452288886, "grad_norm": 0.0, "learning_rate": 5.8839727117400795e-06, "loss": 0.9343, "step": 5179 }, { "epoch": 0.6461269801671449, "grad_norm": 0.0, "learning_rate": 5.880291035838097e-06, "loss": 0.9518, "step": 5180 }, { "epoch": 0.646251715105401, "grad_norm": 0.0, "learning_rate": 5.87661003237648e-06, "loss": 0.9435, "step": 5181 }, { "epoch": 0.6463764500436572, "grad_norm": 0.0, "learning_rate": 5.872929701956054e-06, "loss": 0.9032, "step": 5182 }, { "epoch": 0.6465011849819134, "grad_norm": 0.0, "learning_rate": 5.869250045177542e-06, "loss": 0.9465, "step": 5183 }, { "epoch": 0.6466259199201696, "grad_norm": 0.0, "learning_rate": 5.865571062641562e-06, "loss": 0.9252, "step": 5184 }, { "epoch": 0.6467506548584259, "grad_norm": 0.0, "learning_rate": 5.8618927549486095e-06, "loss": 0.9348, "step": 5185 }, { "epoch": 0.646875389796682, "grad_norm": 0.0, "learning_rate": 5.858215122699082e-06, "loss": 0.8962, "step": 5186 }, { "epoch": 0.6470001247349383, "grad_norm": 0.0, "learning_rate": 5.854538166493262e-06, "loss": 0.9022, "step": 5187 }, { "epoch": 0.6471248596731944, "grad_norm": 0.0, "learning_rate": 5.850861886931314e-06, "loss": 0.8805, "step": 5188 }, { "epoch": 0.6472495946114507, "grad_norm": 0.0, "learning_rate": 5.847186284613312e-06, "loss": 0.9351, "step": 5189 }, { "epoch": 0.6473743295497069, "grad_norm": 0.0, "learning_rate": 5.8435113601391935e-06, "loss": 0.9432, "step": 5190 }, { "epoch": 0.6474990644879631, "grad_norm": 0.0, "learning_rate": 5.839837114108811e-06, "loss": 0.9227, "step": 5191 }, { "epoch": 0.6476237994262193, "grad_norm": 0.0, "learning_rate": 5.836163547121887e-06, "loss": 0.9532, "step": 5192 }, { "epoch": 0.6477485343644754, "grad_norm": 0.0, "learning_rate": 5.832490659778041e-06, "loss": 0.9274, "step": 5193 }, { "epoch": 0.6478732693027317, "grad_norm": 0.0, "learning_rate": 5.828818452676786e-06, "loss": 0.9436, "step": 5194 }, { "epoch": 0.6479980042409879, "grad_norm": 0.0, "learning_rate": 5.825146926417504e-06, "loss": 0.8971, "step": 5195 }, { "epoch": 0.6481227391792441, "grad_norm": 0.0, "learning_rate": 5.821476081599502e-06, "loss": 0.9939, "step": 5196 }, { "epoch": 0.6482474741175003, "grad_norm": 0.0, "learning_rate": 5.817805918821937e-06, "loss": 0.8779, "step": 5197 }, { "epoch": 0.6483722090557565, "grad_norm": 0.0, "learning_rate": 5.814136438683879e-06, "loss": 0.9384, "step": 5198 }, { "epoch": 0.6484969439940127, "grad_norm": 0.0, "learning_rate": 5.810467641784285e-06, "loss": 0.935, "step": 5199 }, { "epoch": 0.648621678932269, "grad_norm": 0.0, "learning_rate": 5.806799528721985e-06, "loss": 0.9694, "step": 5200 }, { "epoch": 0.6487464138705251, "grad_norm": 0.0, "learning_rate": 5.80313210009571e-06, "loss": 0.9065, "step": 5201 }, { "epoch": 0.6488711488087814, "grad_norm": 0.0, "learning_rate": 5.7994653565040806e-06, "loss": 0.9526, "step": 5202 }, { "epoch": 0.6489958837470375, "grad_norm": 0.0, "learning_rate": 5.795799298545599e-06, "loss": 0.8874, "step": 5203 }, { "epoch": 0.6491206186852938, "grad_norm": 0.0, "learning_rate": 5.792133926818664e-06, "loss": 0.9618, "step": 5204 }, { "epoch": 0.64924535362355, "grad_norm": 0.0, "learning_rate": 5.788469241921546e-06, "loss": 0.9105, "step": 5205 }, { "epoch": 0.6493700885618061, "grad_norm": 0.0, "learning_rate": 5.7848052444524215e-06, "loss": 0.9262, "step": 5206 }, { "epoch": 0.6494948235000624, "grad_norm": 0.0, "learning_rate": 5.781141935009344e-06, "loss": 0.9298, "step": 5207 }, { "epoch": 0.6496195584383185, "grad_norm": 0.0, "learning_rate": 5.777479314190266e-06, "loss": 0.9188, "step": 5208 }, { "epoch": 0.6497442933765748, "grad_norm": 0.0, "learning_rate": 5.773817382593008e-06, "loss": 0.9482, "step": 5209 }, { "epoch": 0.649869028314831, "grad_norm": 0.0, "learning_rate": 5.770156140815293e-06, "loss": 0.9303, "step": 5210 }, { "epoch": 0.6499937632530872, "grad_norm": 0.0, "learning_rate": 5.7664955894547345e-06, "loss": 0.9316, "step": 5211 }, { "epoch": 0.6501184981913434, "grad_norm": 0.0, "learning_rate": 5.762835729108815e-06, "loss": 0.9449, "step": 5212 }, { "epoch": 0.6502432331295996, "grad_norm": 0.0, "learning_rate": 5.75917656037493e-06, "loss": 0.9176, "step": 5213 }, { "epoch": 0.6503679680678558, "grad_norm": 0.0, "learning_rate": 5.755518083850338e-06, "loss": 0.9307, "step": 5214 }, { "epoch": 0.6504927030061121, "grad_norm": 0.0, "learning_rate": 5.751860300132195e-06, "loss": 0.9102, "step": 5215 }, { "epoch": 0.6506174379443682, "grad_norm": 0.0, "learning_rate": 5.7482032098175515e-06, "loss": 0.9304, "step": 5216 }, { "epoch": 0.6507421728826244, "grad_norm": 0.0, "learning_rate": 5.744546813503327e-06, "loss": 0.9393, "step": 5217 }, { "epoch": 0.6508669078208806, "grad_norm": 0.0, "learning_rate": 5.740891111786342e-06, "loss": 0.926, "step": 5218 }, { "epoch": 0.6509916427591368, "grad_norm": 0.0, "learning_rate": 5.7372361052633e-06, "loss": 0.8736, "step": 5219 }, { "epoch": 0.6511163776973931, "grad_norm": 0.0, "learning_rate": 5.733581794530793e-06, "loss": 0.933, "step": 5220 }, { "epoch": 0.6512411126356492, "grad_norm": 0.0, "learning_rate": 5.729928180185288e-06, "loss": 0.9216, "step": 5221 }, { "epoch": 0.6513658475739055, "grad_norm": 0.0, "learning_rate": 5.7262752628231525e-06, "loss": 0.8986, "step": 5222 }, { "epoch": 0.6514905825121616, "grad_norm": 0.0, "learning_rate": 5.722623043040634e-06, "loss": 0.8854, "step": 5223 }, { "epoch": 0.6516153174504179, "grad_norm": 0.0, "learning_rate": 5.718971521433867e-06, "loss": 0.8735, "step": 5224 }, { "epoch": 0.651740052388674, "grad_norm": 0.0, "learning_rate": 5.715320698598875e-06, "loss": 0.9413, "step": 5225 }, { "epoch": 0.6518647873269303, "grad_norm": 0.0, "learning_rate": 5.711670575131558e-06, "loss": 0.9172, "step": 5226 }, { "epoch": 0.6519895222651865, "grad_norm": 0.0, "learning_rate": 5.708021151627712e-06, "loss": 0.942, "step": 5227 }, { "epoch": 0.6521142572034427, "grad_norm": 0.0, "learning_rate": 5.704372428683016e-06, "loss": 0.9743, "step": 5228 }, { "epoch": 0.6522389921416989, "grad_norm": 0.0, "learning_rate": 5.700724406893025e-06, "loss": 0.9002, "step": 5229 }, { "epoch": 0.652363727079955, "grad_norm": 0.0, "learning_rate": 5.697077086853205e-06, "loss": 0.9444, "step": 5230 }, { "epoch": 0.6524884620182113, "grad_norm": 0.0, "learning_rate": 5.693430469158878e-06, "loss": 0.8815, "step": 5231 }, { "epoch": 0.6526131969564675, "grad_norm": 0.0, "learning_rate": 5.689784554405258e-06, "loss": 0.9128, "step": 5232 }, { "epoch": 0.6527379318947237, "grad_norm": 0.0, "learning_rate": 5.686139343187468e-06, "loss": 0.8808, "step": 5233 }, { "epoch": 0.6528626668329799, "grad_norm": 0.0, "learning_rate": 5.682494836100485e-06, "loss": 0.9132, "step": 5234 }, { "epoch": 0.6529874017712362, "grad_norm": 0.0, "learning_rate": 5.6788510337391875e-06, "loss": 0.861, "step": 5235 }, { "epoch": 0.6531121367094923, "grad_norm": 0.0, "learning_rate": 5.675207936698337e-06, "loss": 0.9151, "step": 5236 }, { "epoch": 0.6532368716477486, "grad_norm": 0.0, "learning_rate": 5.671565545572584e-06, "loss": 0.9229, "step": 5237 }, { "epoch": 0.6533616065860047, "grad_norm": 0.0, "learning_rate": 5.667923860956448e-06, "loss": 0.9392, "step": 5238 }, { "epoch": 0.653486341524261, "grad_norm": 0.0, "learning_rate": 5.66428288344435e-06, "loss": 0.925, "step": 5239 }, { "epoch": 0.6536110764625171, "grad_norm": 0.0, "learning_rate": 5.660642613630587e-06, "loss": 0.9286, "step": 5240 }, { "epoch": 0.6537358114007733, "grad_norm": 0.0, "learning_rate": 5.657003052109344e-06, "loss": 0.9113, "step": 5241 }, { "epoch": 0.6538605463390296, "grad_norm": 0.0, "learning_rate": 5.6533641994746936e-06, "loss": 0.9329, "step": 5242 }, { "epoch": 0.6539852812772857, "grad_norm": 0.0, "learning_rate": 5.649726056320579e-06, "loss": 0.9539, "step": 5243 }, { "epoch": 0.654110016215542, "grad_norm": 0.0, "learning_rate": 5.6460886232408445e-06, "loss": 0.8781, "step": 5244 }, { "epoch": 0.6542347511537981, "grad_norm": 0.0, "learning_rate": 5.642451900829209e-06, "loss": 0.8897, "step": 5245 }, { "epoch": 0.6543594860920544, "grad_norm": 0.0, "learning_rate": 5.63881588967927e-06, "loss": 0.9473, "step": 5246 }, { "epoch": 0.6544842210303106, "grad_norm": 0.0, "learning_rate": 5.63518059038453e-06, "loss": 0.9048, "step": 5247 }, { "epoch": 0.6546089559685668, "grad_norm": 0.0, "learning_rate": 5.631546003538355e-06, "loss": 0.9013, "step": 5248 }, { "epoch": 0.654733690906823, "grad_norm": 0.0, "learning_rate": 5.627912129733992e-06, "loss": 0.9169, "step": 5249 }, { "epoch": 0.6548584258450793, "grad_norm": 0.0, "learning_rate": 5.624278969564597e-06, "loss": 0.8939, "step": 5250 }, { "epoch": 0.6549831607833354, "grad_norm": 0.0, "learning_rate": 5.620646523623181e-06, "loss": 0.9679, "step": 5251 }, { "epoch": 0.6551078957215917, "grad_norm": 0.0, "learning_rate": 5.617014792502657e-06, "loss": 0.8899, "step": 5252 }, { "epoch": 0.6552326306598478, "grad_norm": 0.0, "learning_rate": 5.613383776795811e-06, "loss": 0.9479, "step": 5253 }, { "epoch": 0.655357365598104, "grad_norm": 0.0, "learning_rate": 5.609753477095324e-06, "loss": 0.9063, "step": 5254 }, { "epoch": 0.6554821005363602, "grad_norm": 0.0, "learning_rate": 5.606123893993742e-06, "loss": 0.9154, "step": 5255 }, { "epoch": 0.6556068354746164, "grad_norm": 0.0, "learning_rate": 5.602495028083511e-06, "loss": 0.9453, "step": 5256 }, { "epoch": 0.6557315704128727, "grad_norm": 0.0, "learning_rate": 5.598866879956955e-06, "loss": 0.9258, "step": 5257 }, { "epoch": 0.6558563053511288, "grad_norm": 0.0, "learning_rate": 5.5952394502062694e-06, "loss": 0.9059, "step": 5258 }, { "epoch": 0.6559810402893851, "grad_norm": 0.0, "learning_rate": 5.591612739423555e-06, "loss": 0.9085, "step": 5259 }, { "epoch": 0.6561057752276412, "grad_norm": 0.0, "learning_rate": 5.587986748200773e-06, "loss": 0.8987, "step": 5260 }, { "epoch": 0.6562305101658975, "grad_norm": 0.0, "learning_rate": 5.584361477129779e-06, "loss": 0.9408, "step": 5261 }, { "epoch": 0.6563552451041537, "grad_norm": 0.0, "learning_rate": 5.580736926802315e-06, "loss": 0.8975, "step": 5262 }, { "epoch": 0.6564799800424099, "grad_norm": 0.0, "learning_rate": 5.5771130978099896e-06, "loss": 0.8984, "step": 5263 }, { "epoch": 0.6566047149806661, "grad_norm": 0.0, "learning_rate": 5.573489990744305e-06, "loss": 0.933, "step": 5264 }, { "epoch": 0.6567294499189222, "grad_norm": 0.0, "learning_rate": 5.569867606196652e-06, "loss": 0.9423, "step": 5265 }, { "epoch": 0.6568541848571785, "grad_norm": 0.0, "learning_rate": 5.566245944758279e-06, "loss": 0.9248, "step": 5266 }, { "epoch": 0.6569789197954347, "grad_norm": 0.0, "learning_rate": 5.562625007020351e-06, "loss": 0.9762, "step": 5267 }, { "epoch": 0.6571036547336909, "grad_norm": 0.0, "learning_rate": 5.559004793573883e-06, "loss": 0.9146, "step": 5268 }, { "epoch": 0.6572283896719471, "grad_norm": 0.0, "learning_rate": 5.555385305009791e-06, "loss": 0.94, "step": 5269 }, { "epoch": 0.6573531246102033, "grad_norm": 0.0, "learning_rate": 5.5517665419188635e-06, "loss": 0.9202, "step": 5270 }, { "epoch": 0.6574778595484595, "grad_norm": 0.0, "learning_rate": 5.548148504891779e-06, "loss": 0.9087, "step": 5271 }, { "epoch": 0.6576025944867158, "grad_norm": 0.0, "learning_rate": 5.5445311945190875e-06, "loss": 0.9177, "step": 5272 }, { "epoch": 0.6577273294249719, "grad_norm": 0.0, "learning_rate": 5.540914611391226e-06, "loss": 0.9765, "step": 5273 }, { "epoch": 0.6578520643632282, "grad_norm": 0.0, "learning_rate": 5.5372987560985155e-06, "loss": 0.9277, "step": 5274 }, { "epoch": 0.6579767993014843, "grad_norm": 0.0, "learning_rate": 5.533683629231146e-06, "loss": 0.858, "step": 5275 }, { "epoch": 0.6581015342397406, "grad_norm": 0.0, "learning_rate": 5.5300692313792095e-06, "loss": 0.947, "step": 5276 }, { "epoch": 0.6582262691779968, "grad_norm": 0.0, "learning_rate": 5.5264555631326625e-06, "loss": 0.9492, "step": 5277 }, { "epoch": 0.6583510041162529, "grad_norm": 0.0, "learning_rate": 5.522842625081335e-06, "loss": 0.8912, "step": 5278 }, { "epoch": 0.6584757390545092, "grad_norm": 0.0, "learning_rate": 5.5192304178149695e-06, "loss": 0.8956, "step": 5279 }, { "epoch": 0.6586004739927653, "grad_norm": 0.0, "learning_rate": 5.515618941923154e-06, "loss": 0.9439, "step": 5280 }, { "epoch": 0.6587252089310216, "grad_norm": 0.0, "learning_rate": 5.512008197995379e-06, "loss": 0.9176, "step": 5281 }, { "epoch": 0.6588499438692778, "grad_norm": 0.0, "learning_rate": 5.508398186621011e-06, "loss": 0.9071, "step": 5282 }, { "epoch": 0.658974678807534, "grad_norm": 0.0, "learning_rate": 5.504788908389287e-06, "loss": 0.9169, "step": 5283 }, { "epoch": 0.6590994137457902, "grad_norm": 0.0, "learning_rate": 5.501180363889338e-06, "loss": 0.922, "step": 5284 }, { "epoch": 0.6592241486840464, "grad_norm": 0.0, "learning_rate": 5.497572553710167e-06, "loss": 0.9315, "step": 5285 }, { "epoch": 0.6593488836223026, "grad_norm": 0.0, "learning_rate": 5.493965478440662e-06, "loss": 0.9318, "step": 5286 }, { "epoch": 0.6594736185605589, "grad_norm": 0.0, "learning_rate": 5.490359138669588e-06, "loss": 0.909, "step": 5287 }, { "epoch": 0.659598353498815, "grad_norm": 0.0, "learning_rate": 5.486753534985591e-06, "loss": 0.9249, "step": 5288 }, { "epoch": 0.6597230884370712, "grad_norm": 0.0, "learning_rate": 5.483148667977194e-06, "loss": 0.9413, "step": 5289 }, { "epoch": 0.6598478233753274, "grad_norm": 0.0, "learning_rate": 5.479544538232804e-06, "loss": 0.9541, "step": 5290 }, { "epoch": 0.6599725583135836, "grad_norm": 0.0, "learning_rate": 5.4759411463407075e-06, "loss": 0.9458, "step": 5291 }, { "epoch": 0.6600972932518399, "grad_norm": 0.0, "learning_rate": 5.47233849288906e-06, "loss": 0.9066, "step": 5292 }, { "epoch": 0.660222028190096, "grad_norm": 0.0, "learning_rate": 5.468736578465918e-06, "loss": 0.9135, "step": 5293 }, { "epoch": 0.6603467631283523, "grad_norm": 0.0, "learning_rate": 5.465135403659201e-06, "loss": 0.9516, "step": 5294 }, { "epoch": 0.6604714980666084, "grad_norm": 0.0, "learning_rate": 5.461534969056701e-06, "loss": 0.9331, "step": 5295 }, { "epoch": 0.6605962330048647, "grad_norm": 0.0, "learning_rate": 5.457935275246116e-06, "loss": 0.8872, "step": 5296 }, { "epoch": 0.6607209679431209, "grad_norm": 0.0, "learning_rate": 5.454336322814995e-06, "loss": 0.9489, "step": 5297 }, { "epoch": 0.6608457028813771, "grad_norm": 0.0, "learning_rate": 5.450738112350782e-06, "loss": 0.9075, "step": 5298 }, { "epoch": 0.6609704378196333, "grad_norm": 0.0, "learning_rate": 5.447140644440798e-06, "loss": 0.9418, "step": 5299 }, { "epoch": 0.6610951727578895, "grad_norm": 0.0, "learning_rate": 5.443543919672236e-06, "loss": 0.9279, "step": 5300 }, { "epoch": 0.6612199076961457, "grad_norm": 0.0, "learning_rate": 5.4399479386321714e-06, "loss": 0.9027, "step": 5301 }, { "epoch": 0.6613446426344018, "grad_norm": 0.0, "learning_rate": 5.436352701907563e-06, "loss": 0.966, "step": 5302 }, { "epoch": 0.6614693775726581, "grad_norm": 0.0, "learning_rate": 5.432758210085247e-06, "loss": 0.8905, "step": 5303 }, { "epoch": 0.6615941125109143, "grad_norm": 0.0, "learning_rate": 5.429164463751921e-06, "loss": 0.8639, "step": 5304 }, { "epoch": 0.6617188474491705, "grad_norm": 0.0, "learning_rate": 5.4255714634941934e-06, "loss": 0.9463, "step": 5305 }, { "epoch": 0.6618435823874267, "grad_norm": 0.0, "learning_rate": 5.42197920989852e-06, "loss": 0.9027, "step": 5306 }, { "epoch": 0.661968317325683, "grad_norm": 0.0, "learning_rate": 5.418387703551251e-06, "loss": 0.9243, "step": 5307 }, { "epoch": 0.6620930522639391, "grad_norm": 0.0, "learning_rate": 5.414796945038614e-06, "loss": 0.9335, "step": 5308 }, { "epoch": 0.6622177872021954, "grad_norm": 0.0, "learning_rate": 5.411206934946703e-06, "loss": 0.9131, "step": 5309 }, { "epoch": 0.6623425221404515, "grad_norm": 0.0, "learning_rate": 5.407617673861505e-06, "loss": 0.9224, "step": 5310 }, { "epoch": 0.6624672570787078, "grad_norm": 0.0, "learning_rate": 5.404029162368878e-06, "loss": 0.9101, "step": 5311 }, { "epoch": 0.662591992016964, "grad_norm": 0.0, "learning_rate": 5.400441401054548e-06, "loss": 0.9368, "step": 5312 }, { "epoch": 0.6627167269552201, "grad_norm": 0.0, "learning_rate": 5.3968543905041446e-06, "loss": 0.9087, "step": 5313 }, { "epoch": 0.6628414618934764, "grad_norm": 0.0, "learning_rate": 5.393268131303144e-06, "loss": 0.9178, "step": 5314 }, { "epoch": 0.6629661968317325, "grad_norm": 0.0, "learning_rate": 5.38968262403692e-06, "loss": 0.9019, "step": 5315 }, { "epoch": 0.6630909317699888, "grad_norm": 0.0, "learning_rate": 5.386097869290722e-06, "loss": 0.9364, "step": 5316 }, { "epoch": 0.6632156667082449, "grad_norm": 0.0, "learning_rate": 5.382513867649663e-06, "loss": 0.9122, "step": 5317 }, { "epoch": 0.6633404016465012, "grad_norm": 0.0, "learning_rate": 5.378930619698747e-06, "loss": 1.0189, "step": 5318 }, { "epoch": 0.6634651365847574, "grad_norm": 0.0, "learning_rate": 5.37534812602285e-06, "loss": 0.9303, "step": 5319 }, { "epoch": 0.6635898715230136, "grad_norm": 0.0, "learning_rate": 5.3717663872067295e-06, "loss": 0.9296, "step": 5320 }, { "epoch": 0.6637146064612698, "grad_norm": 0.0, "learning_rate": 5.3681854038350044e-06, "loss": 0.8876, "step": 5321 }, { "epoch": 0.663839341399526, "grad_norm": 0.0, "learning_rate": 5.364605176492196e-06, "loss": 0.9211, "step": 5322 }, { "epoch": 0.6639640763377822, "grad_norm": 0.0, "learning_rate": 5.3610257057626765e-06, "loss": 0.9373, "step": 5323 }, { "epoch": 0.6640888112760385, "grad_norm": 0.0, "learning_rate": 5.3574469922307105e-06, "loss": 0.8926, "step": 5324 }, { "epoch": 0.6642135462142946, "grad_norm": 0.0, "learning_rate": 5.353869036480437e-06, "loss": 0.9649, "step": 5325 }, { "epoch": 0.6643382811525508, "grad_norm": 0.0, "learning_rate": 5.35029183909586e-06, "loss": 0.8905, "step": 5326 }, { "epoch": 0.664463016090807, "grad_norm": 0.0, "learning_rate": 5.346715400660875e-06, "loss": 0.8629, "step": 5327 }, { "epoch": 0.6645877510290632, "grad_norm": 0.0, "learning_rate": 5.343139721759248e-06, "loss": 0.9118, "step": 5328 }, { "epoch": 0.6647124859673195, "grad_norm": 0.0, "learning_rate": 5.339564802974615e-06, "loss": 0.9056, "step": 5329 }, { "epoch": 0.6648372209055756, "grad_norm": 0.0, "learning_rate": 5.335990644890493e-06, "loss": 0.884, "step": 5330 }, { "epoch": 0.6649619558438319, "grad_norm": 0.0, "learning_rate": 5.332417248090278e-06, "loss": 0.9268, "step": 5331 }, { "epoch": 0.665086690782088, "grad_norm": 0.0, "learning_rate": 5.328844613157236e-06, "loss": 0.8844, "step": 5332 }, { "epoch": 0.6652114257203443, "grad_norm": 0.0, "learning_rate": 5.325272740674517e-06, "loss": 0.9096, "step": 5333 }, { "epoch": 0.6653361606586005, "grad_norm": 0.0, "learning_rate": 5.321701631225133e-06, "loss": 0.9037, "step": 5334 }, { "epoch": 0.6654608955968567, "grad_norm": 0.0, "learning_rate": 5.318131285391981e-06, "loss": 0.9876, "step": 5335 }, { "epoch": 0.6655856305351129, "grad_norm": 0.0, "learning_rate": 5.314561703757832e-06, "loss": 0.9016, "step": 5336 }, { "epoch": 0.665710365473369, "grad_norm": 0.0, "learning_rate": 5.310992886905337e-06, "loss": 0.9746, "step": 5337 }, { "epoch": 0.6658351004116253, "grad_norm": 0.0, "learning_rate": 5.307424835417003e-06, "loss": 0.9462, "step": 5338 }, { "epoch": 0.6659598353498815, "grad_norm": 0.0, "learning_rate": 5.3038575498752444e-06, "loss": 0.9368, "step": 5339 }, { "epoch": 0.6660845702881377, "grad_norm": 0.0, "learning_rate": 5.300291030862322e-06, "loss": 0.9147, "step": 5340 }, { "epoch": 0.6662093052263939, "grad_norm": 0.0, "learning_rate": 5.296725278960375e-06, "loss": 0.9397, "step": 5341 }, { "epoch": 0.6663340401646501, "grad_norm": 0.0, "learning_rate": 5.293160294751438e-06, "loss": 0.9542, "step": 5342 }, { "epoch": 0.6664587751029063, "grad_norm": 0.0, "learning_rate": 5.289596078817396e-06, "loss": 0.9134, "step": 5343 }, { "epoch": 0.6665835100411626, "grad_norm": 0.0, "learning_rate": 5.286032631740023e-06, "loss": 0.9215, "step": 5344 }, { "epoch": 0.6667082449794187, "grad_norm": 0.0, "learning_rate": 5.282469954100968e-06, "loss": 0.9446, "step": 5345 }, { "epoch": 0.666832979917675, "grad_norm": 0.0, "learning_rate": 5.278908046481739e-06, "loss": 0.9566, "step": 5346 }, { "epoch": 0.6669577148559311, "grad_norm": 0.0, "learning_rate": 5.275346909463737e-06, "loss": 0.9377, "step": 5347 }, { "epoch": 0.6670824497941874, "grad_norm": 0.0, "learning_rate": 5.271786543628225e-06, "loss": 0.8979, "step": 5348 }, { "epoch": 0.6672071847324436, "grad_norm": 0.0, "learning_rate": 5.268226949556347e-06, "loss": 0.8913, "step": 5349 }, { "epoch": 0.6673319196706997, "grad_norm": 0.0, "learning_rate": 5.264668127829122e-06, "loss": 0.97, "step": 5350 }, { "epoch": 0.667456654608956, "grad_norm": 0.0, "learning_rate": 5.261110079027433e-06, "loss": 0.8922, "step": 5351 }, { "epoch": 0.6675813895472121, "grad_norm": 0.0, "learning_rate": 5.257552803732044e-06, "loss": 0.9172, "step": 5352 }, { "epoch": 0.6677061244854684, "grad_norm": 0.0, "learning_rate": 5.253996302523596e-06, "loss": 0.9109, "step": 5353 }, { "epoch": 0.6678308594237246, "grad_norm": 0.0, "learning_rate": 5.250440575982601e-06, "loss": 0.9607, "step": 5354 }, { "epoch": 0.6679555943619808, "grad_norm": 0.0, "learning_rate": 5.246885624689434e-06, "loss": 0.9237, "step": 5355 }, { "epoch": 0.668080329300237, "grad_norm": 0.0, "learning_rate": 5.243331449224359e-06, "loss": 0.911, "step": 5356 }, { "epoch": 0.6682050642384932, "grad_norm": 0.0, "learning_rate": 5.239778050167511e-06, "loss": 0.9371, "step": 5357 }, { "epoch": 0.6683297991767494, "grad_norm": 0.0, "learning_rate": 5.236225428098882e-06, "loss": 0.8861, "step": 5358 }, { "epoch": 0.6684545341150057, "grad_norm": 0.0, "learning_rate": 5.232673583598365e-06, "loss": 0.8691, "step": 5359 }, { "epoch": 0.6685792690532618, "grad_norm": 0.0, "learning_rate": 5.229122517245698e-06, "loss": 0.9249, "step": 5360 }, { "epoch": 0.668704003991518, "grad_norm": 0.0, "learning_rate": 5.2255722296205104e-06, "loss": 0.9056, "step": 5361 }, { "epoch": 0.6688287389297742, "grad_norm": 0.0, "learning_rate": 5.2220227213023e-06, "loss": 0.8859, "step": 5362 }, { "epoch": 0.6689534738680304, "grad_norm": 0.0, "learning_rate": 5.218473992870432e-06, "loss": 0.9037, "step": 5363 }, { "epoch": 0.6690782088062867, "grad_norm": 0.0, "learning_rate": 5.214926044904147e-06, "loss": 0.9441, "step": 5364 }, { "epoch": 0.6692029437445428, "grad_norm": 0.0, "learning_rate": 5.211378877982565e-06, "loss": 0.9552, "step": 5365 }, { "epoch": 0.6693276786827991, "grad_norm": 0.0, "learning_rate": 5.207832492684674e-06, "loss": 0.8454, "step": 5366 }, { "epoch": 0.6694524136210552, "grad_norm": 0.0, "learning_rate": 5.204286889589325e-06, "loss": 0.9179, "step": 5367 }, { "epoch": 0.6695771485593115, "grad_norm": 0.0, "learning_rate": 5.200742069275255e-06, "loss": 0.9022, "step": 5368 }, { "epoch": 0.6697018834975677, "grad_norm": 0.0, "learning_rate": 5.197198032321069e-06, "loss": 0.8773, "step": 5369 }, { "epoch": 0.6698266184358239, "grad_norm": 0.0, "learning_rate": 5.193654779305241e-06, "loss": 0.9066, "step": 5370 }, { "epoch": 0.6699513533740801, "grad_norm": 0.0, "learning_rate": 5.190112310806126e-06, "loss": 0.9315, "step": 5371 }, { "epoch": 0.6700760883123363, "grad_norm": 0.0, "learning_rate": 5.186570627401932e-06, "loss": 0.9305, "step": 5372 }, { "epoch": 0.6702008232505925, "grad_norm": 0.0, "learning_rate": 5.18302972967076e-06, "loss": 0.9117, "step": 5373 }, { "epoch": 0.6703255581888486, "grad_norm": 0.0, "learning_rate": 5.179489618190574e-06, "loss": 0.8818, "step": 5374 }, { "epoch": 0.6704502931271049, "grad_norm": 0.0, "learning_rate": 5.175950293539198e-06, "loss": 0.8803, "step": 5375 }, { "epoch": 0.6705750280653611, "grad_norm": 0.0, "learning_rate": 5.172411756294358e-06, "loss": 0.9047, "step": 5376 }, { "epoch": 0.6706997630036173, "grad_norm": 0.0, "learning_rate": 5.168874007033615e-06, "loss": 0.9762, "step": 5377 }, { "epoch": 0.6708244979418735, "grad_norm": 0.0, "learning_rate": 5.165337046334429e-06, "loss": 0.8949, "step": 5378 }, { "epoch": 0.6709492328801298, "grad_norm": 0.0, "learning_rate": 5.16180087477412e-06, "loss": 0.9083, "step": 5379 }, { "epoch": 0.6710739678183859, "grad_norm": 0.0, "learning_rate": 5.1582654929298745e-06, "loss": 0.9533, "step": 5380 }, { "epoch": 0.6711987027566422, "grad_norm": 0.0, "learning_rate": 5.154730901378762e-06, "loss": 0.8954, "step": 5381 }, { "epoch": 0.6713234376948983, "grad_norm": 0.0, "learning_rate": 5.151197100697714e-06, "loss": 0.896, "step": 5382 }, { "epoch": 0.6714481726331546, "grad_norm": 0.0, "learning_rate": 5.14766409146354e-06, "loss": 0.945, "step": 5383 }, { "epoch": 0.6715729075714107, "grad_norm": 0.0, "learning_rate": 5.14413187425291e-06, "loss": 0.9058, "step": 5384 }, { "epoch": 0.6716976425096669, "grad_norm": 0.0, "learning_rate": 5.1406004496423735e-06, "loss": 0.9362, "step": 5385 }, { "epoch": 0.6718223774479232, "grad_norm": 0.0, "learning_rate": 5.137069818208352e-06, "loss": 0.9307, "step": 5386 }, { "epoch": 0.6719471123861793, "grad_norm": 0.0, "learning_rate": 5.133539980527123e-06, "loss": 0.988, "step": 5387 }, { "epoch": 0.6720718473244356, "grad_norm": 0.0, "learning_rate": 5.130010937174858e-06, "loss": 0.9175, "step": 5388 }, { "epoch": 0.6721965822626917, "grad_norm": 0.0, "learning_rate": 5.1264826887275774e-06, "loss": 0.9412, "step": 5389 }, { "epoch": 0.672321317200948, "grad_norm": 0.0, "learning_rate": 5.122955235761182e-06, "loss": 0.9463, "step": 5390 }, { "epoch": 0.6724460521392042, "grad_norm": 0.0, "learning_rate": 5.119428578851447e-06, "loss": 0.8866, "step": 5391 }, { "epoch": 0.6725707870774604, "grad_norm": 0.0, "learning_rate": 5.115902718574001e-06, "loss": 0.9389, "step": 5392 }, { "epoch": 0.6726955220157166, "grad_norm": 0.0, "learning_rate": 5.112377655504359e-06, "loss": 0.8928, "step": 5393 }, { "epoch": 0.6728202569539729, "grad_norm": 0.0, "learning_rate": 5.1088533902179e-06, "loss": 0.951, "step": 5394 }, { "epoch": 0.672944991892229, "grad_norm": 0.0, "learning_rate": 5.105329923289872e-06, "loss": 0.9529, "step": 5395 }, { "epoch": 0.6730697268304853, "grad_norm": 0.0, "learning_rate": 5.1018072552954e-06, "loss": 0.9002, "step": 5396 }, { "epoch": 0.6731944617687414, "grad_norm": 0.0, "learning_rate": 5.098285386809463e-06, "loss": 0.9162, "step": 5397 }, { "epoch": 0.6733191967069976, "grad_norm": 0.0, "learning_rate": 5.094764318406921e-06, "loss": 0.9081, "step": 5398 }, { "epoch": 0.6734439316452538, "grad_norm": 0.0, "learning_rate": 5.0912440506625026e-06, "loss": 0.9509, "step": 5399 }, { "epoch": 0.67356866658351, "grad_norm": 0.0, "learning_rate": 5.087724584150808e-06, "loss": 0.937, "step": 5400 }, { "epoch": 0.6736934015217663, "grad_norm": 0.0, "learning_rate": 5.084205919446295e-06, "loss": 0.8796, "step": 5401 }, { "epoch": 0.6738181364600224, "grad_norm": 0.0, "learning_rate": 5.080688057123304e-06, "loss": 0.9696, "step": 5402 }, { "epoch": 0.6739428713982787, "grad_norm": 0.0, "learning_rate": 5.0771709977560404e-06, "loss": 0.906, "step": 5403 }, { "epoch": 0.6740676063365348, "grad_norm": 0.0, "learning_rate": 5.073654741918565e-06, "loss": 0.915, "step": 5404 }, { "epoch": 0.6741923412747911, "grad_norm": 0.0, "learning_rate": 5.070139290184837e-06, "loss": 0.9657, "step": 5405 }, { "epoch": 0.6743170762130473, "grad_norm": 0.0, "learning_rate": 5.066624643128653e-06, "loss": 0.9045, "step": 5406 }, { "epoch": 0.6744418111513035, "grad_norm": 0.0, "learning_rate": 5.063110801323697e-06, "loss": 0.9155, "step": 5407 }, { "epoch": 0.6745665460895597, "grad_norm": 0.0, "learning_rate": 5.059597765343521e-06, "loss": 0.9356, "step": 5408 }, { "epoch": 0.6746912810278158, "grad_norm": 0.0, "learning_rate": 5.056085535761532e-06, "loss": 0.913, "step": 5409 }, { "epoch": 0.6748160159660721, "grad_norm": 0.0, "learning_rate": 5.052574113151021e-06, "loss": 0.9559, "step": 5410 }, { "epoch": 0.6749407509043283, "grad_norm": 0.0, "learning_rate": 5.049063498085136e-06, "loss": 0.9126, "step": 5411 }, { "epoch": 0.6750654858425845, "grad_norm": 0.0, "learning_rate": 5.045553691136906e-06, "loss": 0.9169, "step": 5412 }, { "epoch": 0.6751902207808407, "grad_norm": 0.0, "learning_rate": 5.042044692879211e-06, "loss": 0.9102, "step": 5413 }, { "epoch": 0.6753149557190969, "grad_norm": 0.0, "learning_rate": 5.03853650388481e-06, "loss": 0.9376, "step": 5414 }, { "epoch": 0.6754396906573531, "grad_norm": 0.0, "learning_rate": 5.035029124726329e-06, "loss": 0.905, "step": 5415 }, { "epoch": 0.6755644255956094, "grad_norm": 0.0, "learning_rate": 5.031522555976263e-06, "loss": 0.9026, "step": 5416 }, { "epoch": 0.6756891605338655, "grad_norm": 0.0, "learning_rate": 5.0280167982069735e-06, "loss": 0.8835, "step": 5417 }, { "epoch": 0.6758138954721218, "grad_norm": 0.0, "learning_rate": 5.024511851990681e-06, "loss": 0.9192, "step": 5418 }, { "epoch": 0.6759386304103779, "grad_norm": 0.0, "learning_rate": 5.021007717899486e-06, "loss": 0.941, "step": 5419 }, { "epoch": 0.6760633653486342, "grad_norm": 0.0, "learning_rate": 5.017504396505355e-06, "loss": 0.9614, "step": 5420 }, { "epoch": 0.6761881002868904, "grad_norm": 0.0, "learning_rate": 5.0140018883801076e-06, "loss": 0.8937, "step": 5421 }, { "epoch": 0.6763128352251465, "grad_norm": 0.0, "learning_rate": 5.010500194095456e-06, "loss": 0.917, "step": 5422 }, { "epoch": 0.6764375701634028, "grad_norm": 0.0, "learning_rate": 5.006999314222951e-06, "loss": 0.9186, "step": 5423 }, { "epoch": 0.6765623051016589, "grad_norm": 0.0, "learning_rate": 5.003499249334034e-06, "loss": 0.9096, "step": 5424 }, { "epoch": 0.6766870400399152, "grad_norm": 0.0, "learning_rate": 5.000000000000003e-06, "loss": 0.9124, "step": 5425 }, { "epoch": 0.6768117749781714, "grad_norm": 0.0, "learning_rate": 4.996501566792017e-06, "loss": 0.9346, "step": 5426 }, { "epoch": 0.6769365099164276, "grad_norm": 0.0, "learning_rate": 4.993003950281114e-06, "loss": 0.9265, "step": 5427 }, { "epoch": 0.6770612448546838, "grad_norm": 0.0, "learning_rate": 4.98950715103819e-06, "loss": 0.8493, "step": 5428 }, { "epoch": 0.67718597979294, "grad_norm": 0.0, "learning_rate": 4.986011169634017e-06, "loss": 0.8918, "step": 5429 }, { "epoch": 0.6773107147311962, "grad_norm": 0.0, "learning_rate": 4.982516006639221e-06, "loss": 0.8991, "step": 5430 }, { "epoch": 0.6774354496694525, "grad_norm": 0.0, "learning_rate": 4.9790216626243015e-06, "loss": 0.9461, "step": 5431 }, { "epoch": 0.6775601846077086, "grad_norm": 0.0, "learning_rate": 4.975528138159629e-06, "loss": 0.9028, "step": 5432 }, { "epoch": 0.6776849195459648, "grad_norm": 0.0, "learning_rate": 4.972035433815422e-06, "loss": 0.958, "step": 5433 }, { "epoch": 0.677809654484221, "grad_norm": 0.0, "learning_rate": 4.968543550161795e-06, "loss": 0.9383, "step": 5434 }, { "epoch": 0.6779343894224772, "grad_norm": 0.0, "learning_rate": 4.965052487768699e-06, "loss": 0.879, "step": 5435 }, { "epoch": 0.6780591243607335, "grad_norm": 0.0, "learning_rate": 4.961562247205968e-06, "loss": 0.8807, "step": 5436 }, { "epoch": 0.6781838592989896, "grad_norm": 0.0, "learning_rate": 4.958072829043299e-06, "loss": 0.9858, "step": 5437 }, { "epoch": 0.6783085942372459, "grad_norm": 0.0, "learning_rate": 4.954584233850247e-06, "loss": 0.9021, "step": 5438 }, { "epoch": 0.678433329175502, "grad_norm": 0.0, "learning_rate": 4.951096462196243e-06, "loss": 0.9266, "step": 5439 }, { "epoch": 0.6785580641137583, "grad_norm": 0.0, "learning_rate": 4.9476095146505775e-06, "loss": 0.9106, "step": 5440 }, { "epoch": 0.6786827990520145, "grad_norm": 0.0, "learning_rate": 4.944123391782411e-06, "loss": 0.9066, "step": 5441 }, { "epoch": 0.6788075339902707, "grad_norm": 0.0, "learning_rate": 4.940638094160767e-06, "loss": 0.9169, "step": 5442 }, { "epoch": 0.6789322689285269, "grad_norm": 0.0, "learning_rate": 4.9371536223545295e-06, "loss": 0.9207, "step": 5443 }, { "epoch": 0.6790570038667831, "grad_norm": 0.0, "learning_rate": 4.933669976932452e-06, "loss": 0.9296, "step": 5444 }, { "epoch": 0.6791817388050393, "grad_norm": 0.0, "learning_rate": 4.930187158463158e-06, "loss": 0.8947, "step": 5445 }, { "epoch": 0.6793064737432954, "grad_norm": 0.0, "learning_rate": 4.926705167515132e-06, "loss": 1.0408, "step": 5446 }, { "epoch": 0.6794312086815517, "grad_norm": 0.0, "learning_rate": 4.923224004656716e-06, "loss": 0.9057, "step": 5447 }, { "epoch": 0.6795559436198079, "grad_norm": 0.0, "learning_rate": 4.919743670456127e-06, "loss": 0.9436, "step": 5448 }, { "epoch": 0.6796806785580641, "grad_norm": 0.0, "learning_rate": 4.916264165481448e-06, "loss": 0.9554, "step": 5449 }, { "epoch": 0.6798054134963203, "grad_norm": 0.0, "learning_rate": 4.912785490300609e-06, "loss": 0.9242, "step": 5450 }, { "epoch": 0.6799301484345766, "grad_norm": 0.0, "learning_rate": 4.9093076454814335e-06, "loss": 0.8832, "step": 5451 }, { "epoch": 0.6800548833728327, "grad_norm": 0.0, "learning_rate": 4.9058306315915826e-06, "loss": 0.9139, "step": 5452 }, { "epoch": 0.680179618311089, "grad_norm": 0.0, "learning_rate": 4.902354449198596e-06, "loss": 0.9475, "step": 5453 }, { "epoch": 0.6803043532493451, "grad_norm": 0.0, "learning_rate": 4.8988790988698774e-06, "loss": 0.905, "step": 5454 }, { "epoch": 0.6804290881876014, "grad_norm": 0.0, "learning_rate": 4.895404581172686e-06, "loss": 0.9953, "step": 5455 }, { "epoch": 0.6805538231258575, "grad_norm": 0.0, "learning_rate": 4.891930896674152e-06, "loss": 0.9483, "step": 5456 }, { "epoch": 0.6806785580641137, "grad_norm": 0.0, "learning_rate": 4.888458045941269e-06, "loss": 0.9538, "step": 5457 }, { "epoch": 0.68080329300237, "grad_norm": 0.0, "learning_rate": 4.884986029540898e-06, "loss": 0.933, "step": 5458 }, { "epoch": 0.6809280279406261, "grad_norm": 0.0, "learning_rate": 4.881514848039752e-06, "loss": 0.9078, "step": 5459 }, { "epoch": 0.6810527628788824, "grad_norm": 0.0, "learning_rate": 4.878044502004419e-06, "loss": 0.9232, "step": 5460 }, { "epoch": 0.6811774978171385, "grad_norm": 0.0, "learning_rate": 4.874574992001348e-06, "loss": 0.8729, "step": 5461 }, { "epoch": 0.6813022327553948, "grad_norm": 0.0, "learning_rate": 4.871106318596849e-06, "loss": 0.917, "step": 5462 }, { "epoch": 0.681426967693651, "grad_norm": 0.0, "learning_rate": 4.867638482357101e-06, "loss": 0.9011, "step": 5463 }, { "epoch": 0.6815517026319072, "grad_norm": 0.0, "learning_rate": 4.864171483848136e-06, "loss": 0.9809, "step": 5464 }, { "epoch": 0.6816764375701634, "grad_norm": 0.0, "learning_rate": 4.860705323635857e-06, "loss": 0.9579, "step": 5465 }, { "epoch": 0.6818011725084197, "grad_norm": 0.0, "learning_rate": 4.8572400022860345e-06, "loss": 0.9287, "step": 5466 }, { "epoch": 0.6819259074466758, "grad_norm": 0.0, "learning_rate": 4.8537755203642845e-06, "loss": 0.9135, "step": 5467 }, { "epoch": 0.6820506423849321, "grad_norm": 0.0, "learning_rate": 4.850311878436113e-06, "loss": 0.9654, "step": 5468 }, { "epoch": 0.6821753773231882, "grad_norm": 0.0, "learning_rate": 4.846849077066864e-06, "loss": 0.9123, "step": 5469 }, { "epoch": 0.6823001122614444, "grad_norm": 0.0, "learning_rate": 4.843387116821749e-06, "loss": 0.9296, "step": 5470 }, { "epoch": 0.6824248471997006, "grad_norm": 0.0, "learning_rate": 4.839925998265863e-06, "loss": 0.9041, "step": 5471 }, { "epoch": 0.6825495821379568, "grad_norm": 0.0, "learning_rate": 4.836465721964134e-06, "loss": 0.8676, "step": 5472 }, { "epoch": 0.6826743170762131, "grad_norm": 0.0, "learning_rate": 4.8330062884813714e-06, "loss": 0.8845, "step": 5473 }, { "epoch": 0.6827990520144692, "grad_norm": 0.0, "learning_rate": 4.829547698382246e-06, "loss": 0.9179, "step": 5474 }, { "epoch": 0.6829237869527255, "grad_norm": 0.0, "learning_rate": 4.8260899522312775e-06, "loss": 0.9164, "step": 5475 }, { "epoch": 0.6830485218909816, "grad_norm": 0.0, "learning_rate": 4.822633050592863e-06, "loss": 0.9421, "step": 5476 }, { "epoch": 0.6831732568292379, "grad_norm": 0.0, "learning_rate": 4.819176994031255e-06, "loss": 0.9228, "step": 5477 }, { "epoch": 0.6832979917674941, "grad_norm": 0.0, "learning_rate": 4.815721783110569e-06, "loss": 0.9095, "step": 5478 }, { "epoch": 0.6834227267057503, "grad_norm": 0.0, "learning_rate": 4.812267418394784e-06, "loss": 0.8997, "step": 5479 }, { "epoch": 0.6835474616440065, "grad_norm": 0.0, "learning_rate": 4.808813900447742e-06, "loss": 0.923, "step": 5480 }, { "epoch": 0.6836721965822626, "grad_norm": 0.0, "learning_rate": 4.805361229833136e-06, "loss": 0.9399, "step": 5481 }, { "epoch": 0.6837969315205189, "grad_norm": 0.0, "learning_rate": 4.801909407114535e-06, "loss": 0.9168, "step": 5482 }, { "epoch": 0.6839216664587751, "grad_norm": 0.0, "learning_rate": 4.798458432855364e-06, "loss": 0.9344, "step": 5483 }, { "epoch": 0.6840464013970313, "grad_norm": 0.0, "learning_rate": 4.795008307618904e-06, "loss": 0.8857, "step": 5484 }, { "epoch": 0.6841711363352875, "grad_norm": 0.0, "learning_rate": 4.791559031968304e-06, "loss": 0.9197, "step": 5485 }, { "epoch": 0.6842958712735437, "grad_norm": 0.0, "learning_rate": 4.7881106064665785e-06, "loss": 0.9332, "step": 5486 }, { "epoch": 0.6844206062117999, "grad_norm": 0.0, "learning_rate": 4.784663031676585e-06, "loss": 0.8623, "step": 5487 }, { "epoch": 0.6845453411500562, "grad_norm": 0.0, "learning_rate": 4.781216308161072e-06, "loss": 0.9097, "step": 5488 }, { "epoch": 0.6846700760883123, "grad_norm": 0.0, "learning_rate": 4.7777704364826175e-06, "loss": 0.9346, "step": 5489 }, { "epoch": 0.6847948110265686, "grad_norm": 0.0, "learning_rate": 4.774325417203678e-06, "loss": 0.9114, "step": 5490 }, { "epoch": 0.6849195459648247, "grad_norm": 0.0, "learning_rate": 4.770881250886574e-06, "loss": 0.9565, "step": 5491 }, { "epoch": 0.685044280903081, "grad_norm": 0.0, "learning_rate": 4.767437938093472e-06, "loss": 0.9139, "step": 5492 }, { "epoch": 0.6851690158413372, "grad_norm": 0.0, "learning_rate": 4.7639954793864095e-06, "loss": 0.9135, "step": 5493 }, { "epoch": 0.6852937507795933, "grad_norm": 0.0, "learning_rate": 4.760553875327284e-06, "loss": 0.9071, "step": 5494 }, { "epoch": 0.6854184857178496, "grad_norm": 0.0, "learning_rate": 4.7571131264778555e-06, "loss": 0.8622, "step": 5495 }, { "epoch": 0.6855432206561057, "grad_norm": 0.0, "learning_rate": 4.753673233399731e-06, "loss": 0.911, "step": 5496 }, { "epoch": 0.685667955594362, "grad_norm": 0.0, "learning_rate": 4.7502341966544e-06, "loss": 0.9356, "step": 5497 }, { "epoch": 0.6857926905326182, "grad_norm": 0.0, "learning_rate": 4.7467960168031905e-06, "loss": 0.9471, "step": 5498 }, { "epoch": 0.6859174254708744, "grad_norm": 0.0, "learning_rate": 4.743358694407303e-06, "loss": 0.8808, "step": 5499 }, { "epoch": 0.6860421604091306, "grad_norm": 0.0, "learning_rate": 4.7399222300278e-06, "loss": 0.8683, "step": 5500 }, { "epoch": 0.6861668953473868, "grad_norm": 0.0, "learning_rate": 4.736486624225591e-06, "loss": 0.9099, "step": 5501 }, { "epoch": 0.686291630285643, "grad_norm": 0.0, "learning_rate": 4.733051877561458e-06, "loss": 0.9519, "step": 5502 }, { "epoch": 0.6864163652238993, "grad_norm": 0.0, "learning_rate": 4.729617990596041e-06, "loss": 0.9428, "step": 5503 }, { "epoch": 0.6865411001621554, "grad_norm": 0.0, "learning_rate": 4.726184963889826e-06, "loss": 0.9136, "step": 5504 }, { "epoch": 0.6866658351004116, "grad_norm": 0.0, "learning_rate": 4.722752798003184e-06, "loss": 0.9558, "step": 5505 }, { "epoch": 0.6867905700386678, "grad_norm": 0.0, "learning_rate": 4.7193214934963204e-06, "loss": 0.9434, "step": 5506 }, { "epoch": 0.686915304976924, "grad_norm": 0.0, "learning_rate": 4.715891050929316e-06, "loss": 0.9415, "step": 5507 }, { "epoch": 0.6870400399151803, "grad_norm": 0.0, "learning_rate": 4.7124614708621055e-06, "loss": 0.9083, "step": 5508 }, { "epoch": 0.6871647748534364, "grad_norm": 0.0, "learning_rate": 4.7090327538544765e-06, "loss": 0.9236, "step": 5509 }, { "epoch": 0.6872895097916927, "grad_norm": 0.0, "learning_rate": 4.705604900466088e-06, "loss": 0.9359, "step": 5510 }, { "epoch": 0.6874142447299488, "grad_norm": 0.0, "learning_rate": 4.702177911256452e-06, "loss": 0.897, "step": 5511 }, { "epoch": 0.6875389796682051, "grad_norm": 0.0, "learning_rate": 4.6987517867849395e-06, "loss": 0.9395, "step": 5512 }, { "epoch": 0.6876637146064613, "grad_norm": 0.0, "learning_rate": 4.6953265276107725e-06, "loss": 0.9198, "step": 5513 }, { "epoch": 0.6877884495447175, "grad_norm": 0.0, "learning_rate": 4.691902134293054e-06, "loss": 0.9157, "step": 5514 }, { "epoch": 0.6879131844829737, "grad_norm": 0.0, "learning_rate": 4.688478607390723e-06, "loss": 0.8705, "step": 5515 }, { "epoch": 0.6880379194212299, "grad_norm": 0.0, "learning_rate": 4.685055947462579e-06, "loss": 0.9505, "step": 5516 }, { "epoch": 0.6881626543594861, "grad_norm": 0.0, "learning_rate": 4.681634155067301e-06, "loss": 0.9031, "step": 5517 }, { "epoch": 0.6882873892977422, "grad_norm": 0.0, "learning_rate": 4.6782132307634e-06, "loss": 0.8416, "step": 5518 }, { "epoch": 0.6884121242359985, "grad_norm": 0.0, "learning_rate": 4.674793175109261e-06, "loss": 0.9211, "step": 5519 }, { "epoch": 0.6885368591742547, "grad_norm": 0.0, "learning_rate": 4.6713739886631285e-06, "loss": 0.9324, "step": 5520 }, { "epoch": 0.6886615941125109, "grad_norm": 0.0, "learning_rate": 4.66795567198309e-06, "loss": 0.9477, "step": 5521 }, { "epoch": 0.6887863290507671, "grad_norm": 0.0, "learning_rate": 4.664538225627107e-06, "loss": 0.8675, "step": 5522 }, { "epoch": 0.6889110639890234, "grad_norm": 0.0, "learning_rate": 4.66112165015299e-06, "loss": 0.8696, "step": 5523 }, { "epoch": 0.6890357989272795, "grad_norm": 0.0, "learning_rate": 4.657705946118414e-06, "loss": 0.9055, "step": 5524 }, { "epoch": 0.6891605338655358, "grad_norm": 0.0, "learning_rate": 4.654291114080908e-06, "loss": 0.8742, "step": 5525 }, { "epoch": 0.6892852688037919, "grad_norm": 0.0, "learning_rate": 4.650877154597853e-06, "loss": 0.9267, "step": 5526 }, { "epoch": 0.6894100037420482, "grad_norm": 0.0, "learning_rate": 4.6474640682265e-06, "loss": 0.9828, "step": 5527 }, { "epoch": 0.6895347386803043, "grad_norm": 0.0, "learning_rate": 4.644051855523945e-06, "loss": 0.8863, "step": 5528 }, { "epoch": 0.6896594736185605, "grad_norm": 0.0, "learning_rate": 4.640640517047154e-06, "loss": 0.8695, "step": 5529 }, { "epoch": 0.6897842085568168, "grad_norm": 0.0, "learning_rate": 4.6372300533529314e-06, "loss": 0.9166, "step": 5530 }, { "epoch": 0.6899089434950729, "grad_norm": 0.0, "learning_rate": 4.633820464997967e-06, "loss": 0.8954, "step": 5531 }, { "epoch": 0.6900336784333292, "grad_norm": 0.0, "learning_rate": 4.630411752538784e-06, "loss": 0.9244, "step": 5532 }, { "epoch": 0.6901584133715853, "grad_norm": 0.0, "learning_rate": 4.627003916531761e-06, "loss": 0.9462, "step": 5533 }, { "epoch": 0.6902831483098416, "grad_norm": 0.0, "learning_rate": 4.623596957533159e-06, "loss": 0.9393, "step": 5534 }, { "epoch": 0.6904078832480978, "grad_norm": 0.0, "learning_rate": 4.620190876099068e-06, "loss": 0.888, "step": 5535 }, { "epoch": 0.690532618186354, "grad_norm": 0.0, "learning_rate": 4.6167856727854495e-06, "loss": 0.8911, "step": 5536 }, { "epoch": 0.6906573531246102, "grad_norm": 0.0, "learning_rate": 4.613381348148125e-06, "loss": 0.8869, "step": 5537 }, { "epoch": 0.6907820880628665, "grad_norm": 0.0, "learning_rate": 4.609977902742755e-06, "loss": 0.9454, "step": 5538 }, { "epoch": 0.6909068230011226, "grad_norm": 0.0, "learning_rate": 4.606575337124874e-06, "loss": 0.8968, "step": 5539 }, { "epoch": 0.6910315579393789, "grad_norm": 0.0, "learning_rate": 4.603173651849865e-06, "loss": 0.9262, "step": 5540 }, { "epoch": 0.691156292877635, "grad_norm": 0.0, "learning_rate": 4.599772847472973e-06, "loss": 0.9412, "step": 5541 }, { "epoch": 0.6912810278158912, "grad_norm": 0.0, "learning_rate": 4.5963729245492875e-06, "loss": 0.9253, "step": 5542 }, { "epoch": 0.6914057627541474, "grad_norm": 0.0, "learning_rate": 4.592973883633765e-06, "loss": 0.9316, "step": 5543 }, { "epoch": 0.6915304976924036, "grad_norm": 0.0, "learning_rate": 4.589575725281216e-06, "loss": 0.9238, "step": 5544 }, { "epoch": 0.6916552326306599, "grad_norm": 0.0, "learning_rate": 4.586178450046304e-06, "loss": 0.8824, "step": 5545 }, { "epoch": 0.691779967568916, "grad_norm": 0.0, "learning_rate": 4.582782058483554e-06, "loss": 0.8531, "step": 5546 }, { "epoch": 0.6919047025071723, "grad_norm": 0.0, "learning_rate": 4.579386551147336e-06, "loss": 0.9489, "step": 5547 }, { "epoch": 0.6920294374454284, "grad_norm": 0.0, "learning_rate": 4.575991928591885e-06, "loss": 1.0001, "step": 5548 }, { "epoch": 0.6921541723836847, "grad_norm": 0.0, "learning_rate": 4.572598191371294e-06, "loss": 0.9202, "step": 5549 }, { "epoch": 0.6922789073219409, "grad_norm": 0.0, "learning_rate": 4.569205340039495e-06, "loss": 0.9127, "step": 5550 }, { "epoch": 0.6924036422601971, "grad_norm": 0.0, "learning_rate": 4.565813375150302e-06, "loss": 0.9101, "step": 5551 }, { "epoch": 0.6925283771984533, "grad_norm": 0.0, "learning_rate": 4.562422297257356e-06, "loss": 0.9489, "step": 5552 }, { "epoch": 0.6926531121367095, "grad_norm": 0.0, "learning_rate": 4.559032106914173e-06, "loss": 0.8652, "step": 5553 }, { "epoch": 0.6927778470749657, "grad_norm": 0.0, "learning_rate": 4.5556428046741195e-06, "loss": 0.8766, "step": 5554 }, { "epoch": 0.6929025820132219, "grad_norm": 0.0, "learning_rate": 4.552254391090408e-06, "loss": 0.8917, "step": 5555 }, { "epoch": 0.6930273169514781, "grad_norm": 0.0, "learning_rate": 4.548866866716118e-06, "loss": 0.971, "step": 5556 }, { "epoch": 0.6931520518897343, "grad_norm": 0.0, "learning_rate": 4.545480232104176e-06, "loss": 0.9532, "step": 5557 }, { "epoch": 0.6932767868279905, "grad_norm": 0.0, "learning_rate": 4.542094487807372e-06, "loss": 0.8886, "step": 5558 }, { "epoch": 0.6934015217662467, "grad_norm": 0.0, "learning_rate": 4.538709634378335e-06, "loss": 0.9351, "step": 5559 }, { "epoch": 0.693526256704503, "grad_norm": 0.0, "learning_rate": 4.535325672369567e-06, "loss": 0.9153, "step": 5560 }, { "epoch": 0.6936509916427591, "grad_norm": 0.0, "learning_rate": 4.531942602333416e-06, "loss": 0.9519, "step": 5561 }, { "epoch": 0.6937757265810154, "grad_norm": 0.0, "learning_rate": 4.528560424822072e-06, "loss": 0.8818, "step": 5562 }, { "epoch": 0.6939004615192715, "grad_norm": 0.0, "learning_rate": 4.52517914038761e-06, "loss": 0.8959, "step": 5563 }, { "epoch": 0.6940251964575278, "grad_norm": 0.0, "learning_rate": 4.521798749581927e-06, "loss": 0.8509, "step": 5564 }, { "epoch": 0.694149931395784, "grad_norm": 0.0, "learning_rate": 4.518419252956794e-06, "loss": 0.9566, "step": 5565 }, { "epoch": 0.6942746663340401, "grad_norm": 0.0, "learning_rate": 4.5150406510638326e-06, "loss": 0.9149, "step": 5566 }, { "epoch": 0.6943994012722964, "grad_norm": 0.0, "learning_rate": 4.511662944454507e-06, "loss": 0.9174, "step": 5567 }, { "epoch": 0.6945241362105525, "grad_norm": 0.0, "learning_rate": 4.508286133680149e-06, "loss": 0.8963, "step": 5568 }, { "epoch": 0.6946488711488088, "grad_norm": 0.0, "learning_rate": 4.504910219291941e-06, "loss": 0.9363, "step": 5569 }, { "epoch": 0.694773606087065, "grad_norm": 0.0, "learning_rate": 4.501535201840913e-06, "loss": 0.8914, "step": 5570 }, { "epoch": 0.6948983410253212, "grad_norm": 0.0, "learning_rate": 4.498161081877961e-06, "loss": 0.895, "step": 5571 }, { "epoch": 0.6950230759635774, "grad_norm": 0.0, "learning_rate": 4.494787859953817e-06, "loss": 0.942, "step": 5572 }, { "epoch": 0.6951478109018336, "grad_norm": 0.0, "learning_rate": 4.491415536619081e-06, "loss": 0.9435, "step": 5573 }, { "epoch": 0.6952725458400898, "grad_norm": 0.0, "learning_rate": 4.488044112424198e-06, "loss": 0.9259, "step": 5574 }, { "epoch": 0.6953972807783461, "grad_norm": 0.0, "learning_rate": 4.484673587919477e-06, "loss": 0.9498, "step": 5575 }, { "epoch": 0.6955220157166022, "grad_norm": 0.0, "learning_rate": 4.481303963655061e-06, "loss": 0.9352, "step": 5576 }, { "epoch": 0.6956467506548585, "grad_norm": 0.0, "learning_rate": 4.477935240180966e-06, "loss": 0.9119, "step": 5577 }, { "epoch": 0.6957714855931146, "grad_norm": 0.0, "learning_rate": 4.474567418047053e-06, "loss": 0.9193, "step": 5578 }, { "epoch": 0.6958962205313708, "grad_norm": 0.0, "learning_rate": 4.471200497803025e-06, "loss": 0.9344, "step": 5579 }, { "epoch": 0.6960209554696271, "grad_norm": 0.0, "learning_rate": 4.467834479998464e-06, "loss": 0.8643, "step": 5580 }, { "epoch": 0.6961456904078832, "grad_norm": 0.0, "learning_rate": 4.464469365182778e-06, "loss": 0.9312, "step": 5581 }, { "epoch": 0.6962704253461395, "grad_norm": 0.0, "learning_rate": 4.461105153905241e-06, "loss": 0.945, "step": 5582 }, { "epoch": 0.6963951602843956, "grad_norm": 0.0, "learning_rate": 4.457741846714981e-06, "loss": 0.875, "step": 5583 }, { "epoch": 0.6965198952226519, "grad_norm": 0.0, "learning_rate": 4.454379444160966e-06, "loss": 0.9396, "step": 5584 }, { "epoch": 0.696644630160908, "grad_norm": 0.0, "learning_rate": 4.4510179467920325e-06, "loss": 0.941, "step": 5585 }, { "epoch": 0.6967693650991643, "grad_norm": 0.0, "learning_rate": 4.447657355156858e-06, "loss": 0.9618, "step": 5586 }, { "epoch": 0.6968941000374205, "grad_norm": 0.0, "learning_rate": 4.444297669803981e-06, "loss": 0.9212, "step": 5587 }, { "epoch": 0.6970188349756767, "grad_norm": 0.0, "learning_rate": 4.440938891281779e-06, "loss": 0.941, "step": 5588 }, { "epoch": 0.6971435699139329, "grad_norm": 0.0, "learning_rate": 4.437581020138493e-06, "loss": 0.9181, "step": 5589 }, { "epoch": 0.697268304852189, "grad_norm": 0.0, "learning_rate": 4.434224056922212e-06, "loss": 0.8808, "step": 5590 }, { "epoch": 0.6973930397904453, "grad_norm": 0.0, "learning_rate": 4.430868002180877e-06, "loss": 0.9068, "step": 5591 }, { "epoch": 0.6975177747287015, "grad_norm": 0.0, "learning_rate": 4.427512856462285e-06, "loss": 0.9411, "step": 5592 }, { "epoch": 0.6976425096669577, "grad_norm": 0.0, "learning_rate": 4.424158620314073e-06, "loss": 0.9059, "step": 5593 }, { "epoch": 0.6977672446052139, "grad_norm": 0.0, "learning_rate": 4.42080529428374e-06, "loss": 0.9142, "step": 5594 }, { "epoch": 0.6978919795434702, "grad_norm": 0.0, "learning_rate": 4.417452878918638e-06, "loss": 0.9088, "step": 5595 }, { "epoch": 0.6980167144817263, "grad_norm": 0.0, "learning_rate": 4.414101374765953e-06, "loss": 0.8844, "step": 5596 }, { "epoch": 0.6981414494199826, "grad_norm": 0.0, "learning_rate": 4.410750782372751e-06, "loss": 0.9195, "step": 5597 }, { "epoch": 0.6982661843582387, "grad_norm": 0.0, "learning_rate": 4.407401102285923e-06, "loss": 0.9731, "step": 5598 }, { "epoch": 0.698390919296495, "grad_norm": 0.0, "learning_rate": 4.4040523350522224e-06, "loss": 0.9417, "step": 5599 }, { "epoch": 0.6985156542347511, "grad_norm": 0.0, "learning_rate": 4.400704481218259e-06, "loss": 0.8774, "step": 5600 }, { "epoch": 0.6986403891730074, "grad_norm": 0.0, "learning_rate": 4.3973575413304755e-06, "loss": 0.9091, "step": 5601 }, { "epoch": 0.6987651241112636, "grad_norm": 0.0, "learning_rate": 4.394011515935186e-06, "loss": 0.9036, "step": 5602 }, { "epoch": 0.6988898590495197, "grad_norm": 0.0, "learning_rate": 4.390666405578541e-06, "loss": 0.9043, "step": 5603 }, { "epoch": 0.699014593987776, "grad_norm": 0.0, "learning_rate": 4.3873222108065535e-06, "loss": 0.9204, "step": 5604 }, { "epoch": 0.6991393289260321, "grad_norm": 0.0, "learning_rate": 4.3839789321650724e-06, "loss": 0.9142, "step": 5605 }, { "epoch": 0.6992640638642884, "grad_norm": 0.0, "learning_rate": 4.380636570199809e-06, "loss": 0.9274, "step": 5606 }, { "epoch": 0.6993887988025446, "grad_norm": 0.0, "learning_rate": 4.37729512545632e-06, "loss": 0.9323, "step": 5607 }, { "epoch": 0.6995135337408008, "grad_norm": 0.0, "learning_rate": 4.373954598480015e-06, "loss": 0.9895, "step": 5608 }, { "epoch": 0.699638268679057, "grad_norm": 0.0, "learning_rate": 4.3706149898161546e-06, "loss": 0.9682, "step": 5609 }, { "epoch": 0.6997630036173133, "grad_norm": 0.0, "learning_rate": 4.36727630000984e-06, "loss": 0.9194, "step": 5610 }, { "epoch": 0.6998877385555694, "grad_norm": 0.0, "learning_rate": 4.363938529606034e-06, "loss": 0.9412, "step": 5611 }, { "epoch": 0.7000124734938257, "grad_norm": 0.0, "learning_rate": 4.360601679149546e-06, "loss": 0.9467, "step": 5612 }, { "epoch": 0.7001372084320818, "grad_norm": 0.0, "learning_rate": 4.357265749185027e-06, "loss": 0.8696, "step": 5613 }, { "epoch": 0.700261943370338, "grad_norm": 0.0, "learning_rate": 4.353930740256997e-06, "loss": 0.8962, "step": 5614 }, { "epoch": 0.7003866783085942, "grad_norm": 0.0, "learning_rate": 4.350596652909803e-06, "loss": 0.9595, "step": 5615 }, { "epoch": 0.7005114132468504, "grad_norm": 0.0, "learning_rate": 4.347263487687656e-06, "loss": 0.9271, "step": 5616 }, { "epoch": 0.7006361481851067, "grad_norm": 0.0, "learning_rate": 4.343931245134616e-06, "loss": 0.9462, "step": 5617 }, { "epoch": 0.7007608831233628, "grad_norm": 0.0, "learning_rate": 4.340599925794582e-06, "loss": 0.9014, "step": 5618 }, { "epoch": 0.7008856180616191, "grad_norm": 0.0, "learning_rate": 4.337269530211313e-06, "loss": 0.9099, "step": 5619 }, { "epoch": 0.7010103529998752, "grad_norm": 0.0, "learning_rate": 4.333940058928413e-06, "loss": 0.9377, "step": 5620 }, { "epoch": 0.7011350879381315, "grad_norm": 0.0, "learning_rate": 4.33061151248934e-06, "loss": 0.8721, "step": 5621 }, { "epoch": 0.7012598228763877, "grad_norm": 0.0, "learning_rate": 4.3272838914373885e-06, "loss": 0.9101, "step": 5622 }, { "epoch": 0.7013845578146439, "grad_norm": 0.0, "learning_rate": 4.323957196315714e-06, "loss": 0.8572, "step": 5623 }, { "epoch": 0.7015092927529001, "grad_norm": 0.0, "learning_rate": 4.320631427667321e-06, "loss": 0.9495, "step": 5624 }, { "epoch": 0.7016340276911563, "grad_norm": 0.0, "learning_rate": 4.317306586035049e-06, "loss": 0.9214, "step": 5625 }, { "epoch": 0.7017587626294125, "grad_norm": 0.0, "learning_rate": 4.313982671961609e-06, "loss": 0.9333, "step": 5626 }, { "epoch": 0.7018834975676687, "grad_norm": 0.0, "learning_rate": 4.3106596859895355e-06, "loss": 0.9292, "step": 5627 }, { "epoch": 0.7020082325059249, "grad_norm": 0.0, "learning_rate": 4.30733762866123e-06, "loss": 0.9032, "step": 5628 }, { "epoch": 0.7021329674441811, "grad_norm": 0.0, "learning_rate": 4.304016500518937e-06, "loss": 0.9651, "step": 5629 }, { "epoch": 0.7022577023824373, "grad_norm": 0.0, "learning_rate": 4.300696302104745e-06, "loss": 0.9431, "step": 5630 }, { "epoch": 0.7023824373206935, "grad_norm": 0.0, "learning_rate": 4.297377033960593e-06, "loss": 0.8985, "step": 5631 }, { "epoch": 0.7025071722589498, "grad_norm": 0.0, "learning_rate": 4.294058696628272e-06, "loss": 0.9211, "step": 5632 }, { "epoch": 0.7026319071972059, "grad_norm": 0.0, "learning_rate": 4.2907412906494175e-06, "loss": 0.9403, "step": 5633 }, { "epoch": 0.7027566421354622, "grad_norm": 0.0, "learning_rate": 4.287424816565517e-06, "loss": 0.8723, "step": 5634 }, { "epoch": 0.7028813770737183, "grad_norm": 0.0, "learning_rate": 4.284109274917897e-06, "loss": 0.9252, "step": 5635 }, { "epoch": 0.7030061120119746, "grad_norm": 0.0, "learning_rate": 4.28079466624774e-06, "loss": 0.9429, "step": 5636 }, { "epoch": 0.7031308469502308, "grad_norm": 0.0, "learning_rate": 4.277480991096073e-06, "loss": 0.9297, "step": 5637 }, { "epoch": 0.7032555818884869, "grad_norm": 0.0, "learning_rate": 4.274168250003776e-06, "loss": 0.8649, "step": 5638 }, { "epoch": 0.7033803168267432, "grad_norm": 0.0, "learning_rate": 4.270856443511565e-06, "loss": 0.9004, "step": 5639 }, { "epoch": 0.7035050517649993, "grad_norm": 0.0, "learning_rate": 4.267545572160014e-06, "loss": 0.904, "step": 5640 }, { "epoch": 0.7036297867032556, "grad_norm": 0.0, "learning_rate": 4.264235636489542e-06, "loss": 0.9086, "step": 5641 }, { "epoch": 0.7037545216415118, "grad_norm": 0.0, "learning_rate": 4.260926637040405e-06, "loss": 0.927, "step": 5642 }, { "epoch": 0.703879256579768, "grad_norm": 0.0, "learning_rate": 4.2576185743527294e-06, "loss": 0.8241, "step": 5643 }, { "epoch": 0.7040039915180242, "grad_norm": 0.0, "learning_rate": 4.254311448966468e-06, "loss": 0.8581, "step": 5644 }, { "epoch": 0.7041287264562804, "grad_norm": 0.0, "learning_rate": 4.251005261421418e-06, "loss": 0.8891, "step": 5645 }, { "epoch": 0.7042534613945366, "grad_norm": 0.0, "learning_rate": 4.247700012257249e-06, "loss": 0.9239, "step": 5646 }, { "epoch": 0.7043781963327929, "grad_norm": 0.0, "learning_rate": 4.244395702013448e-06, "loss": 0.9391, "step": 5647 }, { "epoch": 0.704502931271049, "grad_norm": 0.0, "learning_rate": 4.2410923312293665e-06, "loss": 0.9307, "step": 5648 }, { "epoch": 0.7046276662093053, "grad_norm": 0.0, "learning_rate": 4.237789900444197e-06, "loss": 0.9276, "step": 5649 }, { "epoch": 0.7047524011475614, "grad_norm": 0.0, "learning_rate": 4.234488410196985e-06, "loss": 0.9418, "step": 5650 }, { "epoch": 0.7048771360858176, "grad_norm": 0.0, "learning_rate": 4.231187861026608e-06, "loss": 0.9104, "step": 5651 }, { "epoch": 0.7050018710240739, "grad_norm": 0.0, "learning_rate": 4.2278882534718035e-06, "loss": 0.9075, "step": 5652 }, { "epoch": 0.70512660596233, "grad_norm": 0.0, "learning_rate": 4.22458958807115e-06, "loss": 0.8972, "step": 5653 }, { "epoch": 0.7052513409005863, "grad_norm": 0.0, "learning_rate": 4.221291865363073e-06, "loss": 0.9006, "step": 5654 }, { "epoch": 0.7053760758388424, "grad_norm": 0.0, "learning_rate": 4.2179950858858465e-06, "loss": 0.924, "step": 5655 }, { "epoch": 0.7055008107770987, "grad_norm": 0.0, "learning_rate": 4.214699250177583e-06, "loss": 0.898, "step": 5656 }, { "epoch": 0.7056255457153549, "grad_norm": 0.0, "learning_rate": 4.211404358776249e-06, "loss": 0.9549, "step": 5657 }, { "epoch": 0.7057502806536111, "grad_norm": 0.0, "learning_rate": 4.208110412219656e-06, "loss": 0.8477, "step": 5658 }, { "epoch": 0.7058750155918673, "grad_norm": 0.0, "learning_rate": 4.20481741104545e-06, "loss": 0.9342, "step": 5659 }, { "epoch": 0.7059997505301235, "grad_norm": 0.0, "learning_rate": 4.201525355791144e-06, "loss": 0.899, "step": 5660 }, { "epoch": 0.7061244854683797, "grad_norm": 0.0, "learning_rate": 4.198234246994079e-06, "loss": 0.9104, "step": 5661 }, { "epoch": 0.7062492204066358, "grad_norm": 0.0, "learning_rate": 4.194944085191441e-06, "loss": 0.9207, "step": 5662 }, { "epoch": 0.7063739553448921, "grad_norm": 0.0, "learning_rate": 4.1916548709202784e-06, "loss": 0.8626, "step": 5663 }, { "epoch": 0.7064986902831483, "grad_norm": 0.0, "learning_rate": 4.1883666047174665e-06, "loss": 0.9062, "step": 5664 }, { "epoch": 0.7066234252214045, "grad_norm": 0.0, "learning_rate": 4.185079287119733e-06, "loss": 0.9054, "step": 5665 }, { "epoch": 0.7067481601596607, "grad_norm": 0.0, "learning_rate": 4.181792918663655e-06, "loss": 0.9361, "step": 5666 }, { "epoch": 0.706872895097917, "grad_norm": 0.0, "learning_rate": 4.178507499885651e-06, "loss": 0.9133, "step": 5667 }, { "epoch": 0.7069976300361731, "grad_norm": 0.0, "learning_rate": 4.17522303132198e-06, "loss": 0.9161, "step": 5668 }, { "epoch": 0.7071223649744294, "grad_norm": 0.0, "learning_rate": 4.171939513508751e-06, "loss": 0.9463, "step": 5669 }, { "epoch": 0.7072470999126855, "grad_norm": 0.0, "learning_rate": 4.168656946981922e-06, "loss": 0.8916, "step": 5670 }, { "epoch": 0.7073718348509418, "grad_norm": 0.0, "learning_rate": 4.1653753322772795e-06, "loss": 0.9137, "step": 5671 }, { "epoch": 0.707496569789198, "grad_norm": 0.0, "learning_rate": 4.162094669930479e-06, "loss": 0.946, "step": 5672 }, { "epoch": 0.7076213047274542, "grad_norm": 0.0, "learning_rate": 4.158814960476999e-06, "loss": 0.9205, "step": 5673 }, { "epoch": 0.7077460396657104, "grad_norm": 0.0, "learning_rate": 4.1555362044521715e-06, "loss": 0.9582, "step": 5674 }, { "epoch": 0.7078707746039665, "grad_norm": 0.0, "learning_rate": 4.152258402391178e-06, "loss": 0.9258, "step": 5675 }, { "epoch": 0.7079955095422228, "grad_norm": 0.0, "learning_rate": 4.14898155482903e-06, "loss": 0.9239, "step": 5676 }, { "epoch": 0.7081202444804789, "grad_norm": 0.0, "learning_rate": 4.1457056623005954e-06, "loss": 0.8917, "step": 5677 }, { "epoch": 0.7082449794187352, "grad_norm": 0.0, "learning_rate": 4.142430725340587e-06, "loss": 0.9817, "step": 5678 }, { "epoch": 0.7083697143569914, "grad_norm": 0.0, "learning_rate": 4.139156744483545e-06, "loss": 0.8832, "step": 5679 }, { "epoch": 0.7084944492952476, "grad_norm": 0.0, "learning_rate": 4.13588372026388e-06, "loss": 0.8772, "step": 5680 }, { "epoch": 0.7086191842335038, "grad_norm": 0.0, "learning_rate": 4.132611653215822e-06, "loss": 0.9318, "step": 5681 }, { "epoch": 0.70874391917176, "grad_norm": 0.0, "learning_rate": 4.129340543873459e-06, "loss": 0.8697, "step": 5682 }, { "epoch": 0.7088686541100162, "grad_norm": 0.0, "learning_rate": 4.126070392770718e-06, "loss": 0.9466, "step": 5683 }, { "epoch": 0.7089933890482725, "grad_norm": 0.0, "learning_rate": 4.122801200441372e-06, "loss": 0.9006, "step": 5684 }, { "epoch": 0.7091181239865286, "grad_norm": 0.0, "learning_rate": 4.119532967419031e-06, "loss": 0.9406, "step": 5685 }, { "epoch": 0.7092428589247848, "grad_norm": 0.0, "learning_rate": 4.116265694237155e-06, "loss": 0.9361, "step": 5686 }, { "epoch": 0.709367593863041, "grad_norm": 0.0, "learning_rate": 4.112999381429049e-06, "loss": 0.8742, "step": 5687 }, { "epoch": 0.7094923288012972, "grad_norm": 0.0, "learning_rate": 4.1097340295278475e-06, "loss": 0.9531, "step": 5688 }, { "epoch": 0.7096170637395535, "grad_norm": 0.0, "learning_rate": 4.106469639066552e-06, "loss": 0.8974, "step": 5689 }, { "epoch": 0.7097417986778096, "grad_norm": 0.0, "learning_rate": 4.103206210577983e-06, "loss": 0.9572, "step": 5690 }, { "epoch": 0.7098665336160659, "grad_norm": 0.0, "learning_rate": 4.0999437445948185e-06, "loss": 0.8795, "step": 5691 }, { "epoch": 0.709991268554322, "grad_norm": 0.0, "learning_rate": 4.096682241649578e-06, "loss": 0.8974, "step": 5692 }, { "epoch": 0.7101160034925783, "grad_norm": 0.0, "learning_rate": 4.093421702274614e-06, "loss": 0.9446, "step": 5693 }, { "epoch": 0.7102407384308345, "grad_norm": 0.0, "learning_rate": 4.090162127002131e-06, "loss": 0.9555, "step": 5694 }, { "epoch": 0.7103654733690907, "grad_norm": 0.0, "learning_rate": 4.086903516364179e-06, "loss": 0.9725, "step": 5695 }, { "epoch": 0.7104902083073469, "grad_norm": 0.0, "learning_rate": 4.083645870892639e-06, "loss": 0.8969, "step": 5696 }, { "epoch": 0.7106149432456031, "grad_norm": 0.0, "learning_rate": 4.080389191119241e-06, "loss": 0.898, "step": 5697 }, { "epoch": 0.7107396781838593, "grad_norm": 0.0, "learning_rate": 4.077133477575562e-06, "loss": 0.935, "step": 5698 }, { "epoch": 0.7108644131221155, "grad_norm": 0.0, "learning_rate": 4.073878730793012e-06, "loss": 0.8836, "step": 5699 }, { "epoch": 0.7109891480603717, "grad_norm": 0.0, "learning_rate": 4.0706249513028516e-06, "loss": 0.9319, "step": 5700 }, { "epoch": 0.7111138829986279, "grad_norm": 0.0, "learning_rate": 4.067372139636181e-06, "loss": 0.934, "step": 5701 }, { "epoch": 0.7112386179368841, "grad_norm": 0.0, "learning_rate": 4.064120296323935e-06, "loss": 0.8865, "step": 5702 }, { "epoch": 0.7113633528751403, "grad_norm": 0.0, "learning_rate": 4.060869421896897e-06, "loss": 0.9401, "step": 5703 }, { "epoch": 0.7114880878133966, "grad_norm": 0.0, "learning_rate": 4.057619516885699e-06, "loss": 0.8878, "step": 5704 }, { "epoch": 0.7116128227516527, "grad_norm": 0.0, "learning_rate": 4.054370581820796e-06, "loss": 0.9371, "step": 5705 }, { "epoch": 0.711737557689909, "grad_norm": 0.0, "learning_rate": 4.051122617232508e-06, "loss": 0.9084, "step": 5706 }, { "epoch": 0.7118622926281651, "grad_norm": 0.0, "learning_rate": 4.047875623650981e-06, "loss": 0.9181, "step": 5707 }, { "epoch": 0.7119870275664214, "grad_norm": 0.0, "learning_rate": 4.0446296016061955e-06, "loss": 0.8848, "step": 5708 }, { "epoch": 0.7121117625046776, "grad_norm": 0.0, "learning_rate": 4.041384551628002e-06, "loss": 0.9695, "step": 5709 }, { "epoch": 0.7122364974429337, "grad_norm": 0.0, "learning_rate": 4.038140474246061e-06, "loss": 0.8751, "step": 5710 }, { "epoch": 0.71236123238119, "grad_norm": 0.0, "learning_rate": 4.034897369989891e-06, "loss": 0.9076, "step": 5711 }, { "epoch": 0.7124859673194461, "grad_norm": 0.0, "learning_rate": 4.031655239388854e-06, "loss": 0.9706, "step": 5712 }, { "epoch": 0.7126107022577024, "grad_norm": 0.0, "learning_rate": 4.028414082972141e-06, "loss": 0.8907, "step": 5713 }, { "epoch": 0.7127354371959586, "grad_norm": 0.0, "learning_rate": 4.025173901268791e-06, "loss": 0.9124, "step": 5714 }, { "epoch": 0.7128601721342148, "grad_norm": 0.0, "learning_rate": 4.021934694807685e-06, "loss": 0.9622, "step": 5715 }, { "epoch": 0.712984907072471, "grad_norm": 0.0, "learning_rate": 4.018696464117543e-06, "loss": 0.9129, "step": 5716 }, { "epoch": 0.7131096420107272, "grad_norm": 0.0, "learning_rate": 4.015459209726929e-06, "loss": 0.9443, "step": 5717 }, { "epoch": 0.7132343769489834, "grad_norm": 0.0, "learning_rate": 4.012222932164239e-06, "loss": 0.886, "step": 5718 }, { "epoch": 0.7133591118872397, "grad_norm": 0.0, "learning_rate": 4.008987631957717e-06, "loss": 0.9211, "step": 5719 }, { "epoch": 0.7134838468254958, "grad_norm": 0.0, "learning_rate": 4.005753309635446e-06, "loss": 0.8903, "step": 5720 }, { "epoch": 0.7136085817637521, "grad_norm": 0.0, "learning_rate": 4.002519965725352e-06, "loss": 0.9058, "step": 5721 }, { "epoch": 0.7137333167020082, "grad_norm": 0.0, "learning_rate": 3.999287600755192e-06, "loss": 0.9001, "step": 5722 }, { "epoch": 0.7138580516402644, "grad_norm": 0.0, "learning_rate": 3.996056215252573e-06, "loss": 0.8837, "step": 5723 }, { "epoch": 0.7139827865785207, "grad_norm": 0.0, "learning_rate": 3.992825809744941e-06, "loss": 0.9504, "step": 5724 }, { "epoch": 0.7141075215167768, "grad_norm": 0.0, "learning_rate": 3.989596384759571e-06, "loss": 0.9359, "step": 5725 }, { "epoch": 0.7142322564550331, "grad_norm": 0.0, "learning_rate": 3.986367940823598e-06, "loss": 0.9203, "step": 5726 }, { "epoch": 0.7143569913932892, "grad_norm": 0.0, "learning_rate": 3.983140478463977e-06, "loss": 0.8978, "step": 5727 }, { "epoch": 0.7144817263315455, "grad_norm": 0.0, "learning_rate": 3.979913998207514e-06, "loss": 0.8955, "step": 5728 }, { "epoch": 0.7146064612698017, "grad_norm": 0.0, "learning_rate": 3.976688500580857e-06, "loss": 0.8813, "step": 5729 }, { "epoch": 0.7147311962080579, "grad_norm": 0.0, "learning_rate": 3.973463986110478e-06, "loss": 0.8805, "step": 5730 }, { "epoch": 0.7148559311463141, "grad_norm": 0.0, "learning_rate": 3.970240455322705e-06, "loss": 0.9185, "step": 5731 }, { "epoch": 0.7149806660845703, "grad_norm": 0.0, "learning_rate": 3.9670179087437e-06, "loss": 0.9674, "step": 5732 }, { "epoch": 0.7151054010228265, "grad_norm": 0.0, "learning_rate": 3.963796346899465e-06, "loss": 0.9504, "step": 5733 }, { "epoch": 0.7152301359610826, "grad_norm": 0.0, "learning_rate": 3.9605757703158355e-06, "loss": 0.9665, "step": 5734 }, { "epoch": 0.7153548708993389, "grad_norm": 0.0, "learning_rate": 3.957356179518493e-06, "loss": 0.9022, "step": 5735 }, { "epoch": 0.7154796058375951, "grad_norm": 0.0, "learning_rate": 3.954137575032956e-06, "loss": 0.9096, "step": 5736 }, { "epoch": 0.7156043407758513, "grad_norm": 0.0, "learning_rate": 3.950919957384583e-06, "loss": 0.9055, "step": 5737 }, { "epoch": 0.7157290757141075, "grad_norm": 0.0, "learning_rate": 3.947703327098572e-06, "loss": 0.9328, "step": 5738 }, { "epoch": 0.7158538106523638, "grad_norm": 0.0, "learning_rate": 3.944487684699952e-06, "loss": 0.9042, "step": 5739 }, { "epoch": 0.7159785455906199, "grad_norm": 0.0, "learning_rate": 3.9412730307136e-06, "loss": 0.8821, "step": 5740 }, { "epoch": 0.7161032805288762, "grad_norm": 0.0, "learning_rate": 3.938059365664234e-06, "loss": 0.931, "step": 5741 }, { "epoch": 0.7162280154671323, "grad_norm": 0.0, "learning_rate": 3.934846690076393e-06, "loss": 0.9324, "step": 5742 }, { "epoch": 0.7163527504053886, "grad_norm": 0.0, "learning_rate": 3.93163500447448e-06, "loss": 0.9245, "step": 5743 }, { "epoch": 0.7164774853436447, "grad_norm": 0.0, "learning_rate": 3.9284243093827136e-06, "loss": 0.9189, "step": 5744 }, { "epoch": 0.716602220281901, "grad_norm": 0.0, "learning_rate": 3.925214605325164e-06, "loss": 0.9071, "step": 5745 }, { "epoch": 0.7167269552201572, "grad_norm": 0.0, "learning_rate": 3.922005892825737e-06, "loss": 0.8847, "step": 5746 }, { "epoch": 0.7168516901584133, "grad_norm": 0.0, "learning_rate": 3.918798172408173e-06, "loss": 0.9196, "step": 5747 }, { "epoch": 0.7169764250966696, "grad_norm": 0.0, "learning_rate": 3.9155914445960504e-06, "loss": 0.8998, "step": 5748 }, { "epoch": 0.7171011600349257, "grad_norm": 0.0, "learning_rate": 3.912385709912794e-06, "loss": 0.945, "step": 5749 }, { "epoch": 0.717225894973182, "grad_norm": 0.0, "learning_rate": 3.90918096888166e-06, "loss": 0.9155, "step": 5750 }, { "epoch": 0.7173506299114382, "grad_norm": 0.0, "learning_rate": 3.905977222025736e-06, "loss": 0.9098, "step": 5751 }, { "epoch": 0.7174753648496944, "grad_norm": 0.0, "learning_rate": 3.9027744698679595e-06, "loss": 0.9427, "step": 5752 }, { "epoch": 0.7176000997879506, "grad_norm": 0.0, "learning_rate": 3.8995727129311035e-06, "loss": 0.9382, "step": 5753 }, { "epoch": 0.7177248347262069, "grad_norm": 0.0, "learning_rate": 3.896371951737764e-06, "loss": 0.9258, "step": 5754 }, { "epoch": 0.717849569664463, "grad_norm": 0.0, "learning_rate": 3.893172186810401e-06, "loss": 0.902, "step": 5755 }, { "epoch": 0.7179743046027193, "grad_norm": 0.0, "learning_rate": 3.889973418671287e-06, "loss": 0.9654, "step": 5756 }, { "epoch": 0.7180990395409754, "grad_norm": 0.0, "learning_rate": 3.886775647842542e-06, "loss": 0.8619, "step": 5757 }, { "epoch": 0.7182237744792316, "grad_norm": 0.0, "learning_rate": 3.88357887484613e-06, "loss": 0.9294, "step": 5758 }, { "epoch": 0.7183485094174878, "grad_norm": 0.0, "learning_rate": 3.880383100203835e-06, "loss": 0.8533, "step": 5759 }, { "epoch": 0.718473244355744, "grad_norm": 0.0, "learning_rate": 3.877188324437293e-06, "loss": 0.9064, "step": 5760 }, { "epoch": 0.7185979792940003, "grad_norm": 0.0, "learning_rate": 3.873994548067972e-06, "loss": 0.9381, "step": 5761 }, { "epoch": 0.7187227142322564, "grad_norm": 0.0, "learning_rate": 3.870801771617176e-06, "loss": 0.8826, "step": 5762 }, { "epoch": 0.7188474491705127, "grad_norm": 0.0, "learning_rate": 3.867609995606051e-06, "loss": 0.9063, "step": 5763 }, { "epoch": 0.7189721841087688, "grad_norm": 0.0, "learning_rate": 3.864419220555568e-06, "loss": 0.932, "step": 5764 }, { "epoch": 0.7190969190470251, "grad_norm": 0.0, "learning_rate": 3.861229446986545e-06, "loss": 0.9086, "step": 5765 }, { "epoch": 0.7192216539852813, "grad_norm": 0.0, "learning_rate": 3.858040675419633e-06, "loss": 0.9069, "step": 5766 }, { "epoch": 0.7193463889235375, "grad_norm": 0.0, "learning_rate": 3.854852906375326e-06, "loss": 0.9414, "step": 5767 }, { "epoch": 0.7194711238617937, "grad_norm": 0.0, "learning_rate": 3.851666140373937e-06, "loss": 0.8929, "step": 5768 }, { "epoch": 0.71959585880005, "grad_norm": 0.0, "learning_rate": 3.848480377935635e-06, "loss": 0.9668, "step": 5769 }, { "epoch": 0.7197205937383061, "grad_norm": 0.0, "learning_rate": 3.845295619580417e-06, "loss": 0.9099, "step": 5770 }, { "epoch": 0.7198453286765623, "grad_norm": 0.0, "learning_rate": 3.842111865828105e-06, "loss": 0.8917, "step": 5771 }, { "epoch": 0.7199700636148185, "grad_norm": 0.0, "learning_rate": 3.838929117198385e-06, "loss": 0.9152, "step": 5772 }, { "epoch": 0.7200947985530747, "grad_norm": 0.0, "learning_rate": 3.835747374210747e-06, "loss": 0.9275, "step": 5773 }, { "epoch": 0.7202195334913309, "grad_norm": 0.0, "learning_rate": 3.83256663738454e-06, "loss": 0.9287, "step": 5774 }, { "epoch": 0.7203442684295871, "grad_norm": 0.0, "learning_rate": 3.829386907238939e-06, "loss": 0.9437, "step": 5775 }, { "epoch": 0.7204690033678434, "grad_norm": 0.0, "learning_rate": 3.826208184292952e-06, "loss": 0.9384, "step": 5776 }, { "epoch": 0.7205937383060995, "grad_norm": 0.0, "learning_rate": 3.823030469065431e-06, "loss": 0.9429, "step": 5777 }, { "epoch": 0.7207184732443558, "grad_norm": 0.0, "learning_rate": 3.819853762075057e-06, "loss": 0.8827, "step": 5778 }, { "epoch": 0.7208432081826119, "grad_norm": 0.0, "learning_rate": 3.816678063840353e-06, "loss": 0.9009, "step": 5779 }, { "epoch": 0.7209679431208682, "grad_norm": 0.0, "learning_rate": 3.8135033748796667e-06, "loss": 0.8927, "step": 5780 }, { "epoch": 0.7210926780591244, "grad_norm": 0.0, "learning_rate": 3.81032969571119e-06, "loss": 0.8921, "step": 5781 }, { "epoch": 0.7212174129973805, "grad_norm": 0.0, "learning_rate": 3.8071570268529477e-06, "loss": 0.8967, "step": 5782 }, { "epoch": 0.7213421479356368, "grad_norm": 0.0, "learning_rate": 3.803985368822799e-06, "loss": 0.9297, "step": 5783 }, { "epoch": 0.7214668828738929, "grad_norm": 0.0, "learning_rate": 3.800814722138443e-06, "loss": 0.912, "step": 5784 }, { "epoch": 0.7215916178121492, "grad_norm": 0.0, "learning_rate": 3.797645087317401e-06, "loss": 0.8987, "step": 5785 }, { "epoch": 0.7217163527504054, "grad_norm": 0.0, "learning_rate": 3.7944764648770404e-06, "loss": 0.9313, "step": 5786 }, { "epoch": 0.7218410876886616, "grad_norm": 0.0, "learning_rate": 3.791308855334566e-06, "loss": 0.89, "step": 5787 }, { "epoch": 0.7219658226269178, "grad_norm": 0.0, "learning_rate": 3.788142259206998e-06, "loss": 0.9337, "step": 5788 }, { "epoch": 0.722090557565174, "grad_norm": 0.0, "learning_rate": 3.7849766770112217e-06, "loss": 0.8874, "step": 5789 }, { "epoch": 0.7222152925034302, "grad_norm": 0.0, "learning_rate": 3.781812109263928e-06, "loss": 0.8844, "step": 5790 }, { "epoch": 0.7223400274416865, "grad_norm": 0.0, "learning_rate": 3.7786485564816577e-06, "loss": 0.8732, "step": 5791 }, { "epoch": 0.7224647623799426, "grad_norm": 0.0, "learning_rate": 3.775486019180785e-06, "loss": 0.9625, "step": 5792 }, { "epoch": 0.7225894973181989, "grad_norm": 0.0, "learning_rate": 3.772324497877511e-06, "loss": 0.933, "step": 5793 }, { "epoch": 0.722714232256455, "grad_norm": 0.0, "learning_rate": 3.7691639930878767e-06, "loss": 0.879, "step": 5794 }, { "epoch": 0.7228389671947112, "grad_norm": 0.0, "learning_rate": 3.766004505327757e-06, "loss": 0.9386, "step": 5795 }, { "epoch": 0.7229637021329675, "grad_norm": 0.0, "learning_rate": 3.7628460351128638e-06, "loss": 0.8814, "step": 5796 }, { "epoch": 0.7230884370712236, "grad_norm": 0.0, "learning_rate": 3.7596885829587325e-06, "loss": 0.9613, "step": 5797 }, { "epoch": 0.7232131720094799, "grad_norm": 0.0, "learning_rate": 3.7565321493807417e-06, "loss": 0.963, "step": 5798 }, { "epoch": 0.723337906947736, "grad_norm": 0.0, "learning_rate": 3.7533767348941043e-06, "loss": 0.9169, "step": 5799 }, { "epoch": 0.7234626418859923, "grad_norm": 0.0, "learning_rate": 3.750222340013854e-06, "loss": 0.8727, "step": 5800 }, { "epoch": 0.7235873768242485, "grad_norm": 0.0, "learning_rate": 3.74706896525488e-06, "loss": 0.9267, "step": 5801 }, { "epoch": 0.7237121117625047, "grad_norm": 0.0, "learning_rate": 3.7439166111318827e-06, "loss": 0.9467, "step": 5802 }, { "epoch": 0.7238368467007609, "grad_norm": 0.0, "learning_rate": 3.7407652781594094e-06, "loss": 0.8974, "step": 5803 }, { "epoch": 0.7239615816390171, "grad_norm": 0.0, "learning_rate": 3.7376149668518414e-06, "loss": 0.8569, "step": 5804 }, { "epoch": 0.7240863165772733, "grad_norm": 0.0, "learning_rate": 3.73446567772338e-06, "loss": 0.9718, "step": 5805 }, { "epoch": 0.7242110515155294, "grad_norm": 0.0, "learning_rate": 3.7313174112880745e-06, "loss": 0.9381, "step": 5806 }, { "epoch": 0.7243357864537857, "grad_norm": 0.0, "learning_rate": 3.728170168059799e-06, "loss": 0.9196, "step": 5807 }, { "epoch": 0.7244605213920419, "grad_norm": 0.0, "learning_rate": 3.725023948552264e-06, "loss": 0.9117, "step": 5808 }, { "epoch": 0.7245852563302981, "grad_norm": 0.0, "learning_rate": 3.7218787532790167e-06, "loss": 0.9157, "step": 5809 }, { "epoch": 0.7247099912685543, "grad_norm": 0.0, "learning_rate": 3.7187345827534226e-06, "loss": 0.9196, "step": 5810 }, { "epoch": 0.7248347262068106, "grad_norm": 0.0, "learning_rate": 3.7155914374886946e-06, "loss": 0.896, "step": 5811 }, { "epoch": 0.7249594611450667, "grad_norm": 0.0, "learning_rate": 3.7124493179978737e-06, "loss": 0.9526, "step": 5812 }, { "epoch": 0.725084196083323, "grad_norm": 0.0, "learning_rate": 3.709308224793835e-06, "loss": 0.9277, "step": 5813 }, { "epoch": 0.7252089310215791, "grad_norm": 0.0, "learning_rate": 3.7061681583892782e-06, "loss": 0.9171, "step": 5814 }, { "epoch": 0.7253336659598354, "grad_norm": 0.0, "learning_rate": 3.703029119296745e-06, "loss": 0.8983, "step": 5815 }, { "epoch": 0.7254584008980915, "grad_norm": 0.0, "learning_rate": 3.699891108028608e-06, "loss": 0.894, "step": 5816 }, { "epoch": 0.7255831358363478, "grad_norm": 0.0, "learning_rate": 3.6967541250970605e-06, "loss": 0.9496, "step": 5817 }, { "epoch": 0.725707870774604, "grad_norm": 0.0, "learning_rate": 3.693618171014152e-06, "loss": 0.9334, "step": 5818 }, { "epoch": 0.7258326057128601, "grad_norm": 0.0, "learning_rate": 3.690483246291735e-06, "loss": 0.9242, "step": 5819 }, { "epoch": 0.7259573406511164, "grad_norm": 0.0, "learning_rate": 3.687349351441516e-06, "loss": 0.9334, "step": 5820 }, { "epoch": 0.7260820755893725, "grad_norm": 0.0, "learning_rate": 3.6842164869750265e-06, "loss": 0.8922, "step": 5821 }, { "epoch": 0.7262068105276288, "grad_norm": 0.0, "learning_rate": 3.681084653403624e-06, "loss": 0.9357, "step": 5822 }, { "epoch": 0.726331545465885, "grad_norm": 0.0, "learning_rate": 3.677953851238504e-06, "loss": 0.8529, "step": 5823 }, { "epoch": 0.7264562804041412, "grad_norm": 0.0, "learning_rate": 3.674824080990693e-06, "loss": 0.9246, "step": 5824 }, { "epoch": 0.7265810153423974, "grad_norm": 0.0, "learning_rate": 3.6716953431710525e-06, "loss": 0.9153, "step": 5825 }, { "epoch": 0.7267057502806537, "grad_norm": 0.0, "learning_rate": 3.6685676382902647e-06, "loss": 0.9429, "step": 5826 }, { "epoch": 0.7268304852189098, "grad_norm": 0.0, "learning_rate": 3.665440966858852e-06, "loss": 0.9112, "step": 5827 }, { "epoch": 0.7269552201571661, "grad_norm": 0.0, "learning_rate": 3.662315329387167e-06, "loss": 0.9228, "step": 5828 }, { "epoch": 0.7270799550954222, "grad_norm": 0.0, "learning_rate": 3.6591907263853933e-06, "loss": 0.8718, "step": 5829 }, { "epoch": 0.7272046900336784, "grad_norm": 0.0, "learning_rate": 3.6560671583635467e-06, "loss": 0.8943, "step": 5830 }, { "epoch": 0.7273294249719346, "grad_norm": 0.0, "learning_rate": 3.652944625831466e-06, "loss": 0.868, "step": 5831 }, { "epoch": 0.7274541599101908, "grad_norm": 0.0, "learning_rate": 3.6498231292988317e-06, "loss": 0.9149, "step": 5832 }, { "epoch": 0.7275788948484471, "grad_norm": 0.0, "learning_rate": 3.646702669275152e-06, "loss": 0.8946, "step": 5833 }, { "epoch": 0.7277036297867032, "grad_norm": 0.0, "learning_rate": 3.6435832462697564e-06, "loss": 0.9112, "step": 5834 }, { "epoch": 0.7278283647249595, "grad_norm": 0.0, "learning_rate": 3.6404648607918247e-06, "loss": 0.9356, "step": 5835 }, { "epoch": 0.7279530996632156, "grad_norm": 0.0, "learning_rate": 3.637347513350352e-06, "loss": 0.8688, "step": 5836 }, { "epoch": 0.7280778346014719, "grad_norm": 0.0, "learning_rate": 3.6342312044541596e-06, "loss": 0.9469, "step": 5837 }, { "epoch": 0.7282025695397281, "grad_norm": 0.0, "learning_rate": 3.631115934611922e-06, "loss": 0.9533, "step": 5838 }, { "epoch": 0.7283273044779843, "grad_norm": 0.0, "learning_rate": 3.628001704332118e-06, "loss": 0.9127, "step": 5839 }, { "epoch": 0.7284520394162405, "grad_norm": 0.0, "learning_rate": 3.6248885141230737e-06, "loss": 0.9276, "step": 5840 }, { "epoch": 0.7285767743544967, "grad_norm": 0.0, "learning_rate": 3.6217763644929393e-06, "loss": 0.9196, "step": 5841 }, { "epoch": 0.7287015092927529, "grad_norm": 0.0, "learning_rate": 3.6186652559497006e-06, "loss": 0.9473, "step": 5842 }, { "epoch": 0.7288262442310091, "grad_norm": 0.0, "learning_rate": 3.615555189001161e-06, "loss": 0.9433, "step": 5843 }, { "epoch": 0.7289509791692653, "grad_norm": 0.0, "learning_rate": 3.6124461641549648e-06, "loss": 0.9552, "step": 5844 }, { "epoch": 0.7290757141075215, "grad_norm": 0.0, "learning_rate": 3.6093381819185837e-06, "loss": 0.8897, "step": 5845 }, { "epoch": 0.7292004490457777, "grad_norm": 0.0, "learning_rate": 3.6062312427993195e-06, "loss": 0.9629, "step": 5846 }, { "epoch": 0.7293251839840339, "grad_norm": 0.0, "learning_rate": 3.603125347304306e-06, "loss": 0.9053, "step": 5847 }, { "epoch": 0.7294499189222902, "grad_norm": 0.0, "learning_rate": 3.600020495940496e-06, "loss": 0.9402, "step": 5848 }, { "epoch": 0.7295746538605463, "grad_norm": 0.0, "learning_rate": 3.5969166892146844e-06, "loss": 0.9029, "step": 5849 }, { "epoch": 0.7296993887988026, "grad_norm": 0.0, "learning_rate": 3.593813927633494e-06, "loss": 0.9139, "step": 5850 }, { "epoch": 0.7298241237370587, "grad_norm": 0.0, "learning_rate": 3.5907122117033653e-06, "loss": 0.8779, "step": 5851 }, { "epoch": 0.729948858675315, "grad_norm": 0.0, "learning_rate": 3.58761154193058e-06, "loss": 0.9361, "step": 5852 }, { "epoch": 0.7300735936135712, "grad_norm": 0.0, "learning_rate": 3.5845119188212518e-06, "loss": 0.9064, "step": 5853 }, { "epoch": 0.7301983285518273, "grad_norm": 0.0, "learning_rate": 3.5814133428813035e-06, "loss": 0.9485, "step": 5854 }, { "epoch": 0.7303230634900836, "grad_norm": 0.0, "learning_rate": 3.5783158146165177e-06, "loss": 0.9257, "step": 5855 }, { "epoch": 0.7304477984283397, "grad_norm": 0.0, "learning_rate": 3.575219334532476e-06, "loss": 0.8881, "step": 5856 }, { "epoch": 0.730572533366596, "grad_norm": 0.0, "learning_rate": 3.5721239031346067e-06, "loss": 0.927, "step": 5857 }, { "epoch": 0.7306972683048522, "grad_norm": 0.0, "learning_rate": 3.5690295209281634e-06, "loss": 0.903, "step": 5858 }, { "epoch": 0.7308220032431084, "grad_norm": 0.0, "learning_rate": 3.5659361884182285e-06, "loss": 0.9706, "step": 5859 }, { "epoch": 0.7309467381813646, "grad_norm": 0.0, "learning_rate": 3.5628439061097063e-06, "loss": 0.9311, "step": 5860 }, { "epoch": 0.7310714731196208, "grad_norm": 0.0, "learning_rate": 3.5597526745073387e-06, "loss": 0.9701, "step": 5861 }, { "epoch": 0.731196208057877, "grad_norm": 0.0, "learning_rate": 3.556662494115695e-06, "loss": 0.9137, "step": 5862 }, { "epoch": 0.7313209429961333, "grad_norm": 0.0, "learning_rate": 3.553573365439161e-06, "loss": 0.9547, "step": 5863 }, { "epoch": 0.7314456779343894, "grad_norm": 0.0, "learning_rate": 3.550485288981975e-06, "loss": 0.9812, "step": 5864 }, { "epoch": 0.7315704128726457, "grad_norm": 0.0, "learning_rate": 3.5473982652481773e-06, "loss": 0.9164, "step": 5865 }, { "epoch": 0.7316951478109018, "grad_norm": 0.0, "learning_rate": 3.544312294741652e-06, "loss": 0.9206, "step": 5866 }, { "epoch": 0.731819882749158, "grad_norm": 0.0, "learning_rate": 3.541227377966111e-06, "loss": 0.9516, "step": 5867 }, { "epoch": 0.7319446176874143, "grad_norm": 0.0, "learning_rate": 3.538143515425083e-06, "loss": 0.8635, "step": 5868 }, { "epoch": 0.7320693526256704, "grad_norm": 0.0, "learning_rate": 3.5350607076219358e-06, "loss": 0.9196, "step": 5869 }, { "epoch": 0.7321940875639267, "grad_norm": 0.0, "learning_rate": 3.531978955059864e-06, "loss": 0.8977, "step": 5870 }, { "epoch": 0.7323188225021828, "grad_norm": 0.0, "learning_rate": 3.5288982582418783e-06, "loss": 0.9292, "step": 5871 }, { "epoch": 0.7324435574404391, "grad_norm": 0.0, "learning_rate": 3.5258186176708385e-06, "loss": 0.9451, "step": 5872 }, { "epoch": 0.7325682923786953, "grad_norm": 0.0, "learning_rate": 3.522740033849411e-06, "loss": 0.9188, "step": 5873 }, { "epoch": 0.7326930273169515, "grad_norm": 0.0, "learning_rate": 3.5196625072801007e-06, "loss": 0.895, "step": 5874 }, { "epoch": 0.7328177622552077, "grad_norm": 0.0, "learning_rate": 3.5165860384652374e-06, "loss": 0.8268, "step": 5875 }, { "epoch": 0.7329424971934639, "grad_norm": 0.0, "learning_rate": 3.513510627906982e-06, "loss": 0.9149, "step": 5876 }, { "epoch": 0.7330672321317201, "grad_norm": 0.0, "learning_rate": 3.510436276107312e-06, "loss": 0.874, "step": 5877 }, { "epoch": 0.7331919670699762, "grad_norm": 0.0, "learning_rate": 3.5073629835680433e-06, "loss": 0.877, "step": 5878 }, { "epoch": 0.7333167020082325, "grad_norm": 0.0, "learning_rate": 3.5042907507908177e-06, "loss": 0.8525, "step": 5879 }, { "epoch": 0.7334414369464887, "grad_norm": 0.0, "learning_rate": 3.5012195782770896e-06, "loss": 0.9396, "step": 5880 }, { "epoch": 0.7335661718847449, "grad_norm": 0.0, "learning_rate": 3.4981494665281666e-06, "loss": 0.936, "step": 5881 }, { "epoch": 0.7336909068230011, "grad_norm": 0.0, "learning_rate": 3.4950804160451624e-06, "loss": 0.9376, "step": 5882 }, { "epoch": 0.7338156417612574, "grad_norm": 0.0, "learning_rate": 3.4920124273290148e-06, "loss": 0.8693, "step": 5883 }, { "epoch": 0.7339403766995135, "grad_norm": 0.0, "learning_rate": 3.4889455008805107e-06, "loss": 0.9365, "step": 5884 }, { "epoch": 0.7340651116377698, "grad_norm": 0.0, "learning_rate": 3.4858796372002412e-06, "loss": 0.9389, "step": 5885 }, { "epoch": 0.7341898465760259, "grad_norm": 0.0, "learning_rate": 3.482814836788635e-06, "loss": 0.9116, "step": 5886 }, { "epoch": 0.7343145815142822, "grad_norm": 0.0, "learning_rate": 3.4797511001459474e-06, "loss": 0.8644, "step": 5887 }, { "epoch": 0.7344393164525383, "grad_norm": 0.0, "learning_rate": 3.4766884277722524e-06, "loss": 0.8721, "step": 5888 }, { "epoch": 0.7345640513907946, "grad_norm": 0.0, "learning_rate": 3.473626820167457e-06, "loss": 0.8809, "step": 5889 }, { "epoch": 0.7346887863290508, "grad_norm": 0.0, "learning_rate": 3.470566277831293e-06, "loss": 0.9303, "step": 5890 }, { "epoch": 0.7348135212673069, "grad_norm": 0.0, "learning_rate": 3.4675068012633195e-06, "loss": 0.9267, "step": 5891 }, { "epoch": 0.7349382562055632, "grad_norm": 0.0, "learning_rate": 3.464448390962919e-06, "loss": 0.8901, "step": 5892 }, { "epoch": 0.7350629911438193, "grad_norm": 0.0, "learning_rate": 3.4613910474293045e-06, "loss": 0.8936, "step": 5893 }, { "epoch": 0.7351877260820756, "grad_norm": 0.0, "learning_rate": 3.4583347711615055e-06, "loss": 0.8997, "step": 5894 }, { "epoch": 0.7353124610203318, "grad_norm": 0.0, "learning_rate": 3.4552795626583855e-06, "loss": 0.8789, "step": 5895 }, { "epoch": 0.735437195958588, "grad_norm": 0.0, "learning_rate": 3.452225422418637e-06, "loss": 0.9725, "step": 5896 }, { "epoch": 0.7355619308968442, "grad_norm": 0.0, "learning_rate": 3.4491723509407603e-06, "loss": 0.9517, "step": 5897 }, { "epoch": 0.7356866658351005, "grad_norm": 0.0, "learning_rate": 3.4461203487231087e-06, "loss": 0.9172, "step": 5898 }, { "epoch": 0.7358114007733566, "grad_norm": 0.0, "learning_rate": 3.4430694162638378e-06, "loss": 0.8449, "step": 5899 }, { "epoch": 0.7359361357116129, "grad_norm": 0.0, "learning_rate": 3.4400195540609314e-06, "loss": 0.9364, "step": 5900 }, { "epoch": 0.736060870649869, "grad_norm": 0.0, "learning_rate": 3.4369707626122163e-06, "loss": 0.8798, "step": 5901 }, { "epoch": 0.7361856055881252, "grad_norm": 0.0, "learning_rate": 3.4339230424153225e-06, "loss": 0.9294, "step": 5902 }, { "epoch": 0.7363103405263814, "grad_norm": 0.0, "learning_rate": 3.430876393967718e-06, "loss": 0.9681, "step": 5903 }, { "epoch": 0.7364350754646376, "grad_norm": 0.0, "learning_rate": 3.4278308177666964e-06, "loss": 0.928, "step": 5904 }, { "epoch": 0.7365598104028939, "grad_norm": 0.0, "learning_rate": 3.424786314309365e-06, "loss": 0.9659, "step": 5905 }, { "epoch": 0.73668454534115, "grad_norm": 0.0, "learning_rate": 3.4217428840926683e-06, "loss": 0.877, "step": 5906 }, { "epoch": 0.7368092802794063, "grad_norm": 0.0, "learning_rate": 3.418700527613369e-06, "loss": 0.9108, "step": 5907 }, { "epoch": 0.7369340152176624, "grad_norm": 0.0, "learning_rate": 3.4156592453680604e-06, "loss": 0.9057, "step": 5908 }, { "epoch": 0.7370587501559187, "grad_norm": 0.0, "learning_rate": 3.4126190378531465e-06, "loss": 0.9127, "step": 5909 }, { "epoch": 0.7371834850941749, "grad_norm": 0.0, "learning_rate": 3.4095799055648793e-06, "loss": 0.9212, "step": 5910 }, { "epoch": 0.7373082200324311, "grad_norm": 0.0, "learning_rate": 3.4065418489993118e-06, "loss": 0.9149, "step": 5911 }, { "epoch": 0.7374329549706873, "grad_norm": 0.0, "learning_rate": 3.4035048686523344e-06, "loss": 0.8771, "step": 5912 }, { "epoch": 0.7375576899089435, "grad_norm": 0.0, "learning_rate": 3.4004689650196633e-06, "loss": 0.9328, "step": 5913 }, { "epoch": 0.7376824248471997, "grad_norm": 0.0, "learning_rate": 3.3974341385968256e-06, "loss": 0.9066, "step": 5914 }, { "epoch": 0.7378071597854559, "grad_norm": 0.0, "learning_rate": 3.3944003898791877e-06, "loss": 0.9112, "step": 5915 }, { "epoch": 0.7379318947237121, "grad_norm": 0.0, "learning_rate": 3.3913677193619356e-06, "loss": 0.922, "step": 5916 }, { "epoch": 0.7380566296619683, "grad_norm": 0.0, "learning_rate": 3.388336127540067e-06, "loss": 0.95, "step": 5917 }, { "epoch": 0.7381813646002245, "grad_norm": 0.0, "learning_rate": 3.3853056149084283e-06, "loss": 0.9196, "step": 5918 }, { "epoch": 0.7383060995384807, "grad_norm": 0.0, "learning_rate": 3.382276181961666e-06, "loss": 0.9222, "step": 5919 }, { "epoch": 0.738430834476737, "grad_norm": 0.0, "learning_rate": 3.3792478291942623e-06, "loss": 0.9095, "step": 5920 }, { "epoch": 0.7385555694149931, "grad_norm": 0.0, "learning_rate": 3.3762205571005236e-06, "loss": 0.9135, "step": 5921 }, { "epoch": 0.7386803043532494, "grad_norm": 0.0, "learning_rate": 3.3731943661745725e-06, "loss": 0.9087, "step": 5922 }, { "epoch": 0.7388050392915055, "grad_norm": 0.0, "learning_rate": 3.3701692569103607e-06, "loss": 0.8772, "step": 5923 }, { "epoch": 0.7389297742297618, "grad_norm": 0.0, "learning_rate": 3.3671452298016637e-06, "loss": 0.9226, "step": 5924 }, { "epoch": 0.739054509168018, "grad_norm": 0.0, "learning_rate": 3.364122285342082e-06, "loss": 0.9108, "step": 5925 }, { "epoch": 0.7391792441062741, "grad_norm": 0.0, "learning_rate": 3.3611004240250257e-06, "loss": 0.9229, "step": 5926 }, { "epoch": 0.7393039790445304, "grad_norm": 0.0, "learning_rate": 3.358079646343753e-06, "loss": 0.9232, "step": 5927 }, { "epoch": 0.7394287139827865, "grad_norm": 0.0, "learning_rate": 3.355059952791323e-06, "loss": 0.8677, "step": 5928 }, { "epoch": 0.7395534489210428, "grad_norm": 0.0, "learning_rate": 3.3520413438606215e-06, "loss": 0.8614, "step": 5929 }, { "epoch": 0.739678183859299, "grad_norm": 0.0, "learning_rate": 3.3490238200443727e-06, "loss": 0.9178, "step": 5930 }, { "epoch": 0.7398029187975552, "grad_norm": 0.0, "learning_rate": 3.3460073818351023e-06, "loss": 0.9405, "step": 5931 }, { "epoch": 0.7399276537358114, "grad_norm": 0.0, "learning_rate": 3.342992029725175e-06, "loss": 0.8983, "step": 5932 }, { "epoch": 0.7400523886740676, "grad_norm": 0.0, "learning_rate": 3.339977764206773e-06, "loss": 0.9192, "step": 5933 }, { "epoch": 0.7401771236123238, "grad_norm": 0.0, "learning_rate": 3.3369645857718958e-06, "loss": 0.8924, "step": 5934 }, { "epoch": 0.7403018585505801, "grad_norm": 0.0, "learning_rate": 3.3339524949123714e-06, "loss": 0.9372, "step": 5935 }, { "epoch": 0.7404265934888362, "grad_norm": 0.0, "learning_rate": 3.33094149211985e-06, "loss": 0.8959, "step": 5936 }, { "epoch": 0.7405513284270925, "grad_norm": 0.0, "learning_rate": 3.3279315778858034e-06, "loss": 0.8889, "step": 5937 }, { "epoch": 0.7406760633653486, "grad_norm": 0.0, "learning_rate": 3.324922752701528e-06, "loss": 0.8865, "step": 5938 }, { "epoch": 0.7408007983036048, "grad_norm": 0.0, "learning_rate": 3.321915017058135e-06, "loss": 0.9008, "step": 5939 }, { "epoch": 0.7409255332418611, "grad_norm": 0.0, "learning_rate": 3.318908371446563e-06, "loss": 0.8971, "step": 5940 }, { "epoch": 0.7410502681801172, "grad_norm": 0.0, "learning_rate": 3.3159028163575757e-06, "loss": 0.9115, "step": 5941 }, { "epoch": 0.7411750031183735, "grad_norm": 0.0, "learning_rate": 3.312898352281756e-06, "loss": 0.9124, "step": 5942 }, { "epoch": 0.7412997380566296, "grad_norm": 0.0, "learning_rate": 3.3098949797095e-06, "loss": 0.9044, "step": 5943 }, { "epoch": 0.7414244729948859, "grad_norm": 0.0, "learning_rate": 3.306892699131047e-06, "loss": 0.8948, "step": 5944 }, { "epoch": 0.741549207933142, "grad_norm": 0.0, "learning_rate": 3.3038915110364377e-06, "loss": 0.9039, "step": 5945 }, { "epoch": 0.7416739428713983, "grad_norm": 0.0, "learning_rate": 3.300891415915535e-06, "loss": 0.9319, "step": 5946 }, { "epoch": 0.7417986778096545, "grad_norm": 0.0, "learning_rate": 3.2978924142580427e-06, "loss": 0.9598, "step": 5947 }, { "epoch": 0.7419234127479107, "grad_norm": 0.0, "learning_rate": 3.2948945065534655e-06, "loss": 0.8994, "step": 5948 }, { "epoch": 0.7420481476861669, "grad_norm": 0.0, "learning_rate": 3.2918976932911385e-06, "loss": 0.8925, "step": 5949 }, { "epoch": 0.742172882624423, "grad_norm": 0.0, "learning_rate": 3.2889019749602213e-06, "loss": 0.9115, "step": 5950 }, { "epoch": 0.7422976175626793, "grad_norm": 0.0, "learning_rate": 3.2859073520496854e-06, "loss": 0.8979, "step": 5951 }, { "epoch": 0.7424223525009355, "grad_norm": 0.0, "learning_rate": 3.2829138250483304e-06, "loss": 0.948, "step": 5952 }, { "epoch": 0.7425470874391917, "grad_norm": 0.0, "learning_rate": 3.279921394444776e-06, "loss": 0.9125, "step": 5953 }, { "epoch": 0.7426718223774479, "grad_norm": 0.0, "learning_rate": 3.2769300607274625e-06, "loss": 0.9079, "step": 5954 }, { "epoch": 0.7427965573157042, "grad_norm": 0.0, "learning_rate": 3.273939824384653e-06, "loss": 0.9546, "step": 5955 }, { "epoch": 0.7429212922539603, "grad_norm": 0.0, "learning_rate": 3.2709506859044248e-06, "loss": 0.9178, "step": 5956 }, { "epoch": 0.7430460271922166, "grad_norm": 0.0, "learning_rate": 3.267962645774684e-06, "loss": 0.9348, "step": 5957 }, { "epoch": 0.7431707621304727, "grad_norm": 0.0, "learning_rate": 3.264975704483151e-06, "loss": 0.9374, "step": 5958 }, { "epoch": 0.743295497068729, "grad_norm": 0.0, "learning_rate": 3.2619898625173763e-06, "loss": 0.8804, "step": 5959 }, { "epoch": 0.7434202320069851, "grad_norm": 0.0, "learning_rate": 3.2590051203647176e-06, "loss": 0.9169, "step": 5960 }, { "epoch": 0.7435449669452414, "grad_norm": 0.0, "learning_rate": 3.256021478512362e-06, "loss": 0.8955, "step": 5961 }, { "epoch": 0.7436697018834976, "grad_norm": 0.0, "learning_rate": 3.253038937447319e-06, "loss": 0.9449, "step": 5962 }, { "epoch": 0.7437944368217537, "grad_norm": 0.0, "learning_rate": 3.250057497656406e-06, "loss": 0.851, "step": 5963 }, { "epoch": 0.74391917176001, "grad_norm": 0.0, "learning_rate": 3.2470771596262797e-06, "loss": 0.9054, "step": 5964 }, { "epoch": 0.7440439066982661, "grad_norm": 0.0, "learning_rate": 3.2440979238433977e-06, "loss": 0.9556, "step": 5965 }, { "epoch": 0.7441686416365224, "grad_norm": 0.0, "learning_rate": 3.2411197907940504e-06, "loss": 0.889, "step": 5966 }, { "epoch": 0.7442933765747786, "grad_norm": 0.0, "learning_rate": 3.238142760964348e-06, "loss": 0.9103, "step": 5967 }, { "epoch": 0.7444181115130348, "grad_norm": 0.0, "learning_rate": 3.2351668348402077e-06, "loss": 0.9121, "step": 5968 }, { "epoch": 0.744542846451291, "grad_norm": 0.0, "learning_rate": 3.2321920129073815e-06, "loss": 0.9369, "step": 5969 }, { "epoch": 0.7446675813895473, "grad_norm": 0.0, "learning_rate": 3.229218295651433e-06, "loss": 0.9098, "step": 5970 }, { "epoch": 0.7447923163278034, "grad_norm": 0.0, "learning_rate": 3.2262456835577525e-06, "loss": 0.9007, "step": 5971 }, { "epoch": 0.7449170512660597, "grad_norm": 0.0, "learning_rate": 3.2232741771115394e-06, "loss": 0.917, "step": 5972 }, { "epoch": 0.7450417862043158, "grad_norm": 0.0, "learning_rate": 3.220303776797821e-06, "loss": 0.9223, "step": 5973 }, { "epoch": 0.745166521142572, "grad_norm": 0.0, "learning_rate": 3.217334483101441e-06, "loss": 0.9149, "step": 5974 }, { "epoch": 0.7452912560808282, "grad_norm": 0.0, "learning_rate": 3.2143662965070634e-06, "loss": 0.8904, "step": 5975 }, { "epoch": 0.7454159910190844, "grad_norm": 0.0, "learning_rate": 3.2113992174991738e-06, "loss": 0.8728, "step": 5976 }, { "epoch": 0.7455407259573407, "grad_norm": 0.0, "learning_rate": 3.2084332465620692e-06, "loss": 0.8937, "step": 5977 }, { "epoch": 0.7456654608955968, "grad_norm": 0.0, "learning_rate": 3.2054683841798727e-06, "loss": 0.9415, "step": 5978 }, { "epoch": 0.7457901958338531, "grad_norm": 0.0, "learning_rate": 3.2025046308365284e-06, "loss": 0.8695, "step": 5979 }, { "epoch": 0.7459149307721092, "grad_norm": 0.0, "learning_rate": 3.1995419870157853e-06, "loss": 0.9321, "step": 5980 }, { "epoch": 0.7460396657103655, "grad_norm": 0.0, "learning_rate": 3.1965804532012357e-06, "loss": 0.8844, "step": 5981 }, { "epoch": 0.7461644006486217, "grad_norm": 0.0, "learning_rate": 3.1936200298762653e-06, "loss": 0.9502, "step": 5982 }, { "epoch": 0.7462891355868779, "grad_norm": 0.0, "learning_rate": 3.1906607175240943e-06, "loss": 0.9208, "step": 5983 }, { "epoch": 0.7464138705251341, "grad_norm": 0.0, "learning_rate": 3.187702516627761e-06, "loss": 0.9551, "step": 5984 }, { "epoch": 0.7465386054633903, "grad_norm": 0.0, "learning_rate": 3.1847454276701097e-06, "loss": 0.8954, "step": 5985 }, { "epoch": 0.7466633404016465, "grad_norm": 0.0, "learning_rate": 3.1817894511338152e-06, "loss": 0.8946, "step": 5986 }, { "epoch": 0.7467880753399027, "grad_norm": 0.0, "learning_rate": 3.1788345875013704e-06, "loss": 0.9102, "step": 5987 }, { "epoch": 0.7469128102781589, "grad_norm": 0.0, "learning_rate": 3.175880837255084e-06, "loss": 0.9249, "step": 5988 }, { "epoch": 0.7470375452164151, "grad_norm": 0.0, "learning_rate": 3.1729282008770767e-06, "loss": 0.8915, "step": 5989 }, { "epoch": 0.7471622801546713, "grad_norm": 0.0, "learning_rate": 3.169976678849296e-06, "loss": 0.9789, "step": 5990 }, { "epoch": 0.7472870150929275, "grad_norm": 0.0, "learning_rate": 3.1670262716535092e-06, "loss": 0.889, "step": 5991 }, { "epoch": 0.7474117500311838, "grad_norm": 0.0, "learning_rate": 3.1640769797712865e-06, "loss": 0.8803, "step": 5992 }, { "epoch": 0.7475364849694399, "grad_norm": 0.0, "learning_rate": 3.1611288036840393e-06, "loss": 0.8917, "step": 5993 }, { "epoch": 0.7476612199076962, "grad_norm": 0.0, "learning_rate": 3.1581817438729755e-06, "loss": 0.9205, "step": 5994 }, { "epoch": 0.7477859548459523, "grad_norm": 0.0, "learning_rate": 3.155235800819132e-06, "loss": 0.8391, "step": 5995 }, { "epoch": 0.7479106897842086, "grad_norm": 0.0, "learning_rate": 3.152290975003366e-06, "loss": 0.9229, "step": 5996 }, { "epoch": 0.7480354247224648, "grad_norm": 0.0, "learning_rate": 3.1493472669063373e-06, "loss": 0.9357, "step": 5997 }, { "epoch": 0.7481601596607209, "grad_norm": 0.0, "learning_rate": 3.1464046770085398e-06, "loss": 0.9685, "step": 5998 }, { "epoch": 0.7482848945989772, "grad_norm": 0.0, "learning_rate": 3.143463205790277e-06, "loss": 0.9484, "step": 5999 }, { "epoch": 0.7484096295372333, "grad_norm": 0.0, "learning_rate": 3.1405228537316713e-06, "loss": 0.9347, "step": 6000 }, { "epoch": 0.7485343644754896, "grad_norm": 0.0, "learning_rate": 3.1375836213126653e-06, "loss": 0.9302, "step": 6001 }, { "epoch": 0.7486590994137458, "grad_norm": 0.0, "learning_rate": 3.1346455090130103e-06, "loss": 0.9319, "step": 6002 }, { "epoch": 0.748783834352002, "grad_norm": 0.0, "learning_rate": 3.131708517312283e-06, "loss": 0.893, "step": 6003 }, { "epoch": 0.7489085692902582, "grad_norm": 0.0, "learning_rate": 3.1287726466898737e-06, "loss": 0.9326, "step": 6004 }, { "epoch": 0.7490333042285144, "grad_norm": 0.0, "learning_rate": 3.1258378976249938e-06, "loss": 0.917, "step": 6005 }, { "epoch": 0.7491580391667706, "grad_norm": 0.0, "learning_rate": 3.122904270596663e-06, "loss": 0.8947, "step": 6006 }, { "epoch": 0.7492827741050269, "grad_norm": 0.0, "learning_rate": 3.119971766083726e-06, "loss": 0.8854, "step": 6007 }, { "epoch": 0.749407509043283, "grad_norm": 0.0, "learning_rate": 3.1170403845648443e-06, "loss": 0.8804, "step": 6008 }, { "epoch": 0.7495322439815393, "grad_norm": 0.0, "learning_rate": 3.1141101265184838e-06, "loss": 0.9266, "step": 6009 }, { "epoch": 0.7496569789197954, "grad_norm": 0.0, "learning_rate": 3.11118099242295e-06, "loss": 0.8957, "step": 6010 }, { "epoch": 0.7497817138580516, "grad_norm": 0.0, "learning_rate": 3.1082529827563435e-06, "loss": 0.9164, "step": 6011 }, { "epoch": 0.7499064487963079, "grad_norm": 0.0, "learning_rate": 3.1053260979965837e-06, "loss": 0.8879, "step": 6012 }, { "epoch": 0.750031183734564, "grad_norm": 0.0, "learning_rate": 3.102400338621425e-06, "loss": 0.864, "step": 6013 }, { "epoch": 0.7501559186728203, "grad_norm": 0.0, "learning_rate": 3.099475705108417e-06, "loss": 0.9273, "step": 6014 }, { "epoch": 0.7502806536110764, "grad_norm": 0.0, "learning_rate": 3.096552197934933e-06, "loss": 0.9336, "step": 6015 }, { "epoch": 0.7504053885493327, "grad_norm": 0.0, "learning_rate": 3.0936298175781664e-06, "loss": 0.9161, "step": 6016 }, { "epoch": 0.7505301234875889, "grad_norm": 0.0, "learning_rate": 3.090708564515125e-06, "loss": 0.9161, "step": 6017 }, { "epoch": 0.7506548584258451, "grad_norm": 0.0, "learning_rate": 3.0877884392226244e-06, "loss": 0.9252, "step": 6018 }, { "epoch": 0.7507795933641013, "grad_norm": 0.0, "learning_rate": 3.0848694421773075e-06, "loss": 0.9238, "step": 6019 }, { "epoch": 0.7509043283023575, "grad_norm": 0.0, "learning_rate": 3.081951573855626e-06, "loss": 0.8933, "step": 6020 }, { "epoch": 0.7510290632406137, "grad_norm": 0.0, "learning_rate": 3.0790348347338507e-06, "loss": 0.9152, "step": 6021 }, { "epoch": 0.7511537981788698, "grad_norm": 0.0, "learning_rate": 3.076119225288071e-06, "loss": 0.9422, "step": 6022 }, { "epoch": 0.7512785331171261, "grad_norm": 0.0, "learning_rate": 3.0732047459941793e-06, "loss": 0.8952, "step": 6023 }, { "epoch": 0.7514032680553823, "grad_norm": 0.0, "learning_rate": 3.070291397327898e-06, "loss": 0.9463, "step": 6024 }, { "epoch": 0.7515280029936385, "grad_norm": 0.0, "learning_rate": 3.0673791797647602e-06, "loss": 0.9185, "step": 6025 }, { "epoch": 0.7516527379318947, "grad_norm": 0.0, "learning_rate": 3.064468093780104e-06, "loss": 0.8995, "step": 6026 }, { "epoch": 0.751777472870151, "grad_norm": 0.0, "learning_rate": 3.0615581398491056e-06, "loss": 0.8675, "step": 6027 }, { "epoch": 0.7519022078084071, "grad_norm": 0.0, "learning_rate": 3.058649318446736e-06, "loss": 0.9198, "step": 6028 }, { "epoch": 0.7520269427466634, "grad_norm": 0.0, "learning_rate": 3.0557416300477805e-06, "loss": 0.8518, "step": 6029 }, { "epoch": 0.7521516776849195, "grad_norm": 0.0, "learning_rate": 3.052835075126862e-06, "loss": 0.8997, "step": 6030 }, { "epoch": 0.7522764126231758, "grad_norm": 0.0, "learning_rate": 3.0499296541583935e-06, "loss": 0.8863, "step": 6031 }, { "epoch": 0.752401147561432, "grad_norm": 0.0, "learning_rate": 3.047025367616615e-06, "loss": 0.9342, "step": 6032 }, { "epoch": 0.7525258824996882, "grad_norm": 0.0, "learning_rate": 3.04412221597558e-06, "loss": 0.9076, "step": 6033 }, { "epoch": 0.7526506174379444, "grad_norm": 0.0, "learning_rate": 3.0412201997091596e-06, "loss": 0.8985, "step": 6034 }, { "epoch": 0.7527753523762005, "grad_norm": 0.0, "learning_rate": 3.0383193192910287e-06, "loss": 0.8797, "step": 6035 }, { "epoch": 0.7529000873144568, "grad_norm": 0.0, "learning_rate": 3.035419575194689e-06, "loss": 0.9514, "step": 6036 }, { "epoch": 0.7530248222527129, "grad_norm": 0.0, "learning_rate": 3.032520967893453e-06, "loss": 0.8931, "step": 6037 }, { "epoch": 0.7531495571909692, "grad_norm": 0.0, "learning_rate": 3.029623497860438e-06, "loss": 0.9457, "step": 6038 }, { "epoch": 0.7532742921292254, "grad_norm": 0.0, "learning_rate": 3.0267271655685958e-06, "loss": 0.9256, "step": 6039 }, { "epoch": 0.7533990270674816, "grad_norm": 0.0, "learning_rate": 3.0238319714906737e-06, "loss": 0.9153, "step": 6040 }, { "epoch": 0.7535237620057378, "grad_norm": 0.0, "learning_rate": 3.0209379160992414e-06, "loss": 0.9165, "step": 6041 }, { "epoch": 0.753648496943994, "grad_norm": 0.0, "learning_rate": 3.0180449998666862e-06, "loss": 0.899, "step": 6042 }, { "epoch": 0.7537732318822502, "grad_norm": 0.0, "learning_rate": 3.015153223265197e-06, "loss": 0.913, "step": 6043 }, { "epoch": 0.7538979668205065, "grad_norm": 0.0, "learning_rate": 3.012262586766789e-06, "loss": 0.8598, "step": 6044 }, { "epoch": 0.7540227017587626, "grad_norm": 0.0, "learning_rate": 3.0093730908432894e-06, "loss": 0.8747, "step": 6045 }, { "epoch": 0.7541474366970188, "grad_norm": 0.0, "learning_rate": 3.0064847359663284e-06, "loss": 0.8655, "step": 6046 }, { "epoch": 0.754272171635275, "grad_norm": 0.0, "learning_rate": 3.00359752260737e-06, "loss": 0.863, "step": 6047 }, { "epoch": 0.7543969065735312, "grad_norm": 0.0, "learning_rate": 3.000711451237671e-06, "loss": 0.924, "step": 6048 }, { "epoch": 0.7545216415117875, "grad_norm": 0.0, "learning_rate": 2.9978265223283152e-06, "loss": 0.9125, "step": 6049 }, { "epoch": 0.7546463764500436, "grad_norm": 0.0, "learning_rate": 2.994942736350194e-06, "loss": 0.9602, "step": 6050 }, { "epoch": 0.7547711113882999, "grad_norm": 0.0, "learning_rate": 2.9920600937740175e-06, "loss": 0.9065, "step": 6051 }, { "epoch": 0.754895846326556, "grad_norm": 0.0, "learning_rate": 2.9891785950703e-06, "loss": 0.9802, "step": 6052 }, { "epoch": 0.7550205812648123, "grad_norm": 0.0, "learning_rate": 2.986298240709378e-06, "loss": 0.9815, "step": 6053 }, { "epoch": 0.7551453162030685, "grad_norm": 0.0, "learning_rate": 2.983419031161401e-06, "loss": 0.9187, "step": 6054 }, { "epoch": 0.7552700511413247, "grad_norm": 0.0, "learning_rate": 2.980540966896317e-06, "loss": 0.9525, "step": 6055 }, { "epoch": 0.7553947860795809, "grad_norm": 0.0, "learning_rate": 2.9776640483839146e-06, "loss": 0.9127, "step": 6056 }, { "epoch": 0.7555195210178371, "grad_norm": 0.0, "learning_rate": 2.9747882760937684e-06, "loss": 0.8579, "step": 6057 }, { "epoch": 0.7556442559560933, "grad_norm": 0.0, "learning_rate": 2.9719136504952793e-06, "loss": 0.9292, "step": 6058 }, { "epoch": 0.7557689908943495, "grad_norm": 0.0, "learning_rate": 2.9690401720576634e-06, "loss": 0.8996, "step": 6059 }, { "epoch": 0.7558937258326057, "grad_norm": 0.0, "learning_rate": 2.966167841249937e-06, "loss": 0.9068, "step": 6060 }, { "epoch": 0.7560184607708619, "grad_norm": 0.0, "learning_rate": 2.9632966585409415e-06, "loss": 0.8952, "step": 6061 }, { "epoch": 0.7561431957091181, "grad_norm": 0.0, "learning_rate": 2.9604266243993287e-06, "loss": 0.9394, "step": 6062 }, { "epoch": 0.7562679306473743, "grad_norm": 0.0, "learning_rate": 2.957557739293554e-06, "loss": 0.9033, "step": 6063 }, { "epoch": 0.7563926655856306, "grad_norm": 0.0, "learning_rate": 2.9546900036918956e-06, "loss": 0.9265, "step": 6064 }, { "epoch": 0.7565174005238867, "grad_norm": 0.0, "learning_rate": 2.9518234180624393e-06, "loss": 0.9282, "step": 6065 }, { "epoch": 0.756642135462143, "grad_norm": 0.0, "learning_rate": 2.948957982873085e-06, "loss": 0.899, "step": 6066 }, { "epoch": 0.7567668704003991, "grad_norm": 0.0, "learning_rate": 2.9460936985915444e-06, "loss": 0.9529, "step": 6067 }, { "epoch": 0.7568916053386554, "grad_norm": 0.0, "learning_rate": 2.9432305656853423e-06, "loss": 0.9377, "step": 6068 }, { "epoch": 0.7570163402769116, "grad_norm": 0.0, "learning_rate": 2.940368584621809e-06, "loss": 0.9526, "step": 6069 }, { "epoch": 0.7571410752151677, "grad_norm": 0.0, "learning_rate": 2.9375077558680954e-06, "loss": 0.946, "step": 6070 }, { "epoch": 0.757265810153424, "grad_norm": 0.0, "learning_rate": 2.9346480798911636e-06, "loss": 0.9173, "step": 6071 }, { "epoch": 0.7573905450916801, "grad_norm": 0.0, "learning_rate": 2.931789557157775e-06, "loss": 0.8712, "step": 6072 }, { "epoch": 0.7575152800299364, "grad_norm": 0.0, "learning_rate": 2.9289321881345257e-06, "loss": 0.8973, "step": 6073 }, { "epoch": 0.7576400149681926, "grad_norm": 0.0, "learning_rate": 2.9260759732878042e-06, "loss": 0.8909, "step": 6074 }, { "epoch": 0.7577647499064488, "grad_norm": 0.0, "learning_rate": 2.9232209130838094e-06, "loss": 0.9203, "step": 6075 }, { "epoch": 0.757889484844705, "grad_norm": 0.0, "learning_rate": 2.9203670079885725e-06, "loss": 0.9193, "step": 6076 }, { "epoch": 0.7580142197829612, "grad_norm": 0.0, "learning_rate": 2.917514258467914e-06, "loss": 0.9116, "step": 6077 }, { "epoch": 0.7581389547212174, "grad_norm": 0.0, "learning_rate": 2.9146626649874764e-06, "loss": 0.9091, "step": 6078 }, { "epoch": 0.7582636896594737, "grad_norm": 0.0, "learning_rate": 2.9118122280127158e-06, "loss": 0.8933, "step": 6079 }, { "epoch": 0.7583884245977298, "grad_norm": 0.0, "learning_rate": 2.9089629480088886e-06, "loss": 0.9698, "step": 6080 }, { "epoch": 0.7585131595359861, "grad_norm": 0.0, "learning_rate": 2.906114825441072e-06, "loss": 0.8538, "step": 6081 }, { "epoch": 0.7586378944742422, "grad_norm": 0.0, "learning_rate": 2.9032678607741526e-06, "loss": 0.9152, "step": 6082 }, { "epoch": 0.7587626294124984, "grad_norm": 0.0, "learning_rate": 2.9004220544728243e-06, "loss": 0.9145, "step": 6083 }, { "epoch": 0.7588873643507547, "grad_norm": 0.0, "learning_rate": 2.897577407001597e-06, "loss": 0.9393, "step": 6084 }, { "epoch": 0.7590120992890108, "grad_norm": 0.0, "learning_rate": 2.8947339188247916e-06, "loss": 0.9195, "step": 6085 }, { "epoch": 0.7591368342272671, "grad_norm": 0.0, "learning_rate": 2.8918915904065305e-06, "loss": 0.9069, "step": 6086 }, { "epoch": 0.7592615691655232, "grad_norm": 0.0, "learning_rate": 2.8890504222107553e-06, "loss": 0.8874, "step": 6087 }, { "epoch": 0.7593863041037795, "grad_norm": 0.0, "learning_rate": 2.8862104147012203e-06, "loss": 0.9347, "step": 6088 }, { "epoch": 0.7595110390420357, "grad_norm": 0.0, "learning_rate": 2.8833715683414796e-06, "loss": 0.9676, "step": 6089 }, { "epoch": 0.7596357739802919, "grad_norm": 0.0, "learning_rate": 2.8805338835949093e-06, "loss": 0.9172, "step": 6090 }, { "epoch": 0.7597605089185481, "grad_norm": 0.0, "learning_rate": 2.877697360924693e-06, "loss": 0.9044, "step": 6091 }, { "epoch": 0.7598852438568043, "grad_norm": 0.0, "learning_rate": 2.874862000793812e-06, "loss": 0.9041, "step": 6092 }, { "epoch": 0.7600099787950605, "grad_norm": 0.0, "learning_rate": 2.872027803665084e-06, "loss": 0.8964, "step": 6093 }, { "epoch": 0.7601347137333166, "grad_norm": 0.0, "learning_rate": 2.8691947700011103e-06, "loss": 0.8687, "step": 6094 }, { "epoch": 0.7602594486715729, "grad_norm": 0.0, "learning_rate": 2.8663629002643157e-06, "loss": 0.9108, "step": 6095 }, { "epoch": 0.7603841836098291, "grad_norm": 0.0, "learning_rate": 2.863532194916937e-06, "loss": 0.9251, "step": 6096 }, { "epoch": 0.7605089185480853, "grad_norm": 0.0, "learning_rate": 2.8607026544210115e-06, "loss": 0.9137, "step": 6097 }, { "epoch": 0.7606336534863415, "grad_norm": 0.0, "learning_rate": 2.857874279238393e-06, "loss": 0.9575, "step": 6098 }, { "epoch": 0.7607583884245978, "grad_norm": 0.0, "learning_rate": 2.855047069830743e-06, "loss": 0.8949, "step": 6099 }, { "epoch": 0.7608831233628539, "grad_norm": 0.0, "learning_rate": 2.8522210266595386e-06, "loss": 0.8936, "step": 6100 }, { "epoch": 0.7610078583011102, "grad_norm": 0.0, "learning_rate": 2.849396150186051e-06, "loss": 0.9086, "step": 6101 }, { "epoch": 0.7611325932393663, "grad_norm": 0.0, "learning_rate": 2.8465724408713834e-06, "loss": 0.8997, "step": 6102 }, { "epoch": 0.7612573281776226, "grad_norm": 0.0, "learning_rate": 2.8437498991764266e-06, "loss": 0.9041, "step": 6103 }, { "epoch": 0.7613820631158787, "grad_norm": 0.0, "learning_rate": 2.8409285255618948e-06, "loss": 0.8882, "step": 6104 }, { "epoch": 0.761506798054135, "grad_norm": 0.0, "learning_rate": 2.83810832048831e-06, "loss": 0.8993, "step": 6105 }, { "epoch": 0.7616315329923912, "grad_norm": 0.0, "learning_rate": 2.835289284415994e-06, "loss": 0.9242, "step": 6106 }, { "epoch": 0.7617562679306473, "grad_norm": 0.0, "learning_rate": 2.8324714178050895e-06, "loss": 0.9274, "step": 6107 }, { "epoch": 0.7618810028689036, "grad_norm": 0.0, "learning_rate": 2.8296547211155435e-06, "loss": 0.8718, "step": 6108 }, { "epoch": 0.7620057378071597, "grad_norm": 0.0, "learning_rate": 2.826839194807105e-06, "loss": 0.9256, "step": 6109 }, { "epoch": 0.762130472745416, "grad_norm": 0.0, "learning_rate": 2.8240248393393498e-06, "loss": 0.9099, "step": 6110 }, { "epoch": 0.7622552076836722, "grad_norm": 0.0, "learning_rate": 2.821211655171644e-06, "loss": 0.885, "step": 6111 }, { "epoch": 0.7623799426219284, "grad_norm": 0.0, "learning_rate": 2.818399642763171e-06, "loss": 0.9123, "step": 6112 }, { "epoch": 0.7625046775601846, "grad_norm": 0.0, "learning_rate": 2.815588802572927e-06, "loss": 0.9242, "step": 6113 }, { "epoch": 0.7626294124984409, "grad_norm": 0.0, "learning_rate": 2.8127791350597067e-06, "loss": 0.975, "step": 6114 }, { "epoch": 0.762754147436697, "grad_norm": 0.0, "learning_rate": 2.809970640682119e-06, "loss": 0.9072, "step": 6115 }, { "epoch": 0.7628788823749533, "grad_norm": 0.0, "learning_rate": 2.8071633198985825e-06, "loss": 0.8814, "step": 6116 }, { "epoch": 0.7630036173132094, "grad_norm": 0.0, "learning_rate": 2.8043571731673256e-06, "loss": 0.9143, "step": 6117 }, { "epoch": 0.7631283522514656, "grad_norm": 0.0, "learning_rate": 2.8015522009463736e-06, "loss": 0.9149, "step": 6118 }, { "epoch": 0.7632530871897218, "grad_norm": 0.0, "learning_rate": 2.7987484036935797e-06, "loss": 0.9046, "step": 6119 }, { "epoch": 0.763377822127978, "grad_norm": 0.0, "learning_rate": 2.795945781866589e-06, "loss": 0.9209, "step": 6120 }, { "epoch": 0.7635025570662343, "grad_norm": 0.0, "learning_rate": 2.793144335922854e-06, "loss": 0.9731, "step": 6121 }, { "epoch": 0.7636272920044904, "grad_norm": 0.0, "learning_rate": 2.790344066319652e-06, "loss": 0.9288, "step": 6122 }, { "epoch": 0.7637520269427467, "grad_norm": 0.0, "learning_rate": 2.7875449735140516e-06, "loss": 0.9335, "step": 6123 }, { "epoch": 0.7638767618810028, "grad_norm": 0.0, "learning_rate": 2.7847470579629356e-06, "loss": 0.9211, "step": 6124 }, { "epoch": 0.7640014968192591, "grad_norm": 0.0, "learning_rate": 2.7819503201229993e-06, "loss": 0.9087, "step": 6125 }, { "epoch": 0.7641262317575153, "grad_norm": 0.0, "learning_rate": 2.779154760450732e-06, "loss": 0.9081, "step": 6126 }, { "epoch": 0.7642509666957715, "grad_norm": 0.0, "learning_rate": 2.776360379402445e-06, "loss": 0.9482, "step": 6127 }, { "epoch": 0.7643757016340277, "grad_norm": 0.0, "learning_rate": 2.7735671774342508e-06, "loss": 0.9323, "step": 6128 }, { "epoch": 0.764500436572284, "grad_norm": 0.0, "learning_rate": 2.770775155002071e-06, "loss": 0.9283, "step": 6129 }, { "epoch": 0.7646251715105401, "grad_norm": 0.0, "learning_rate": 2.767984312561637e-06, "loss": 0.9036, "step": 6130 }, { "epoch": 0.7647499064487963, "grad_norm": 0.0, "learning_rate": 2.7651946505684778e-06, "loss": 0.9386, "step": 6131 }, { "epoch": 0.7648746413870525, "grad_norm": 0.0, "learning_rate": 2.7624061694779403e-06, "loss": 0.9301, "step": 6132 }, { "epoch": 0.7649993763253087, "grad_norm": 0.0, "learning_rate": 2.7596188697451742e-06, "loss": 0.9133, "step": 6133 }, { "epoch": 0.7651241112635649, "grad_norm": 0.0, "learning_rate": 2.7568327518251405e-06, "loss": 0.9144, "step": 6134 }, { "epoch": 0.7652488462018211, "grad_norm": 0.0, "learning_rate": 2.754047816172595e-06, "loss": 0.8939, "step": 6135 }, { "epoch": 0.7653735811400774, "grad_norm": 0.0, "learning_rate": 2.751264063242122e-06, "loss": 0.8747, "step": 6136 }, { "epoch": 0.7654983160783335, "grad_norm": 0.0, "learning_rate": 2.7484814934880923e-06, "loss": 0.9412, "step": 6137 }, { "epoch": 0.7656230510165898, "grad_norm": 0.0, "learning_rate": 2.7457001073646874e-06, "loss": 0.9515, "step": 6138 }, { "epoch": 0.7657477859548459, "grad_norm": 0.0, "learning_rate": 2.7429199053259105e-06, "loss": 0.9561, "step": 6139 }, { "epoch": 0.7658725208931022, "grad_norm": 0.0, "learning_rate": 2.7401408878255518e-06, "loss": 0.8855, "step": 6140 }, { "epoch": 0.7659972558313584, "grad_norm": 0.0, "learning_rate": 2.7373630553172204e-06, "loss": 0.9259, "step": 6141 }, { "epoch": 0.7661219907696145, "grad_norm": 0.0, "learning_rate": 2.7345864082543317e-06, "loss": 0.8887, "step": 6142 }, { "epoch": 0.7662467257078708, "grad_norm": 0.0, "learning_rate": 2.7318109470900987e-06, "loss": 0.9155, "step": 6143 }, { "epoch": 0.7663714606461269, "grad_norm": 0.0, "learning_rate": 2.7290366722775486e-06, "loss": 0.8714, "step": 6144 }, { "epoch": 0.7664961955843832, "grad_norm": 0.0, "learning_rate": 2.726263584269513e-06, "loss": 0.9241, "step": 6145 }, { "epoch": 0.7666209305226394, "grad_norm": 0.0, "learning_rate": 2.7234916835186343e-06, "loss": 0.9237, "step": 6146 }, { "epoch": 0.7667456654608956, "grad_norm": 0.0, "learning_rate": 2.7207209704773485e-06, "loss": 0.9035, "step": 6147 }, { "epoch": 0.7668704003991518, "grad_norm": 0.0, "learning_rate": 2.7179514455979104e-06, "loss": 0.8991, "step": 6148 }, { "epoch": 0.766995135337408, "grad_norm": 0.0, "learning_rate": 2.7151831093323756e-06, "loss": 0.9149, "step": 6149 }, { "epoch": 0.7671198702756642, "grad_norm": 0.0, "learning_rate": 2.7124159621326074e-06, "loss": 0.9639, "step": 6150 }, { "epoch": 0.7672446052139205, "grad_norm": 0.0, "learning_rate": 2.709650004450275e-06, "loss": 0.8806, "step": 6151 }, { "epoch": 0.7673693401521766, "grad_norm": 0.0, "learning_rate": 2.706885236736848e-06, "loss": 0.9032, "step": 6152 }, { "epoch": 0.7674940750904329, "grad_norm": 0.0, "learning_rate": 2.7041216594436093e-06, "loss": 0.9161, "step": 6153 }, { "epoch": 0.767618810028689, "grad_norm": 0.0, "learning_rate": 2.7013592730216464e-06, "loss": 0.9119, "step": 6154 }, { "epoch": 0.7677435449669452, "grad_norm": 0.0, "learning_rate": 2.698598077921841e-06, "loss": 0.9148, "step": 6155 }, { "epoch": 0.7678682799052015, "grad_norm": 0.0, "learning_rate": 2.695838074594904e-06, "loss": 0.8967, "step": 6156 }, { "epoch": 0.7679930148434576, "grad_norm": 0.0, "learning_rate": 2.693079263491326e-06, "loss": 0.9111, "step": 6157 }, { "epoch": 0.7681177497817139, "grad_norm": 0.0, "learning_rate": 2.6903216450614187e-06, "loss": 0.9449, "step": 6158 }, { "epoch": 0.76824248471997, "grad_norm": 0.0, "learning_rate": 2.6875652197552993e-06, "loss": 0.895, "step": 6159 }, { "epoch": 0.7683672196582263, "grad_norm": 0.0, "learning_rate": 2.6848099880228774e-06, "loss": 0.9567, "step": 6160 }, { "epoch": 0.7684919545964825, "grad_norm": 0.0, "learning_rate": 2.6820559503138797e-06, "loss": 0.8631, "step": 6161 }, { "epoch": 0.7686166895347387, "grad_norm": 0.0, "learning_rate": 2.6793031070778363e-06, "loss": 0.909, "step": 6162 }, { "epoch": 0.7687414244729949, "grad_norm": 0.0, "learning_rate": 2.6765514587640815e-06, "loss": 0.8608, "step": 6163 }, { "epoch": 0.7688661594112511, "grad_norm": 0.0, "learning_rate": 2.67380100582175e-06, "loss": 0.9229, "step": 6164 }, { "epoch": 0.7689908943495073, "grad_norm": 0.0, "learning_rate": 2.6710517486997856e-06, "loss": 0.9197, "step": 6165 }, { "epoch": 0.7691156292877634, "grad_norm": 0.0, "learning_rate": 2.6683036878469413e-06, "loss": 0.8847, "step": 6166 }, { "epoch": 0.7692403642260197, "grad_norm": 0.0, "learning_rate": 2.6655568237117602e-06, "loss": 0.8928, "step": 6167 }, { "epoch": 0.7693650991642759, "grad_norm": 0.0, "learning_rate": 2.6628111567426107e-06, "loss": 0.9262, "step": 6168 }, { "epoch": 0.7694898341025321, "grad_norm": 0.0, "learning_rate": 2.660066687387648e-06, "loss": 0.9048, "step": 6169 }, { "epoch": 0.7696145690407883, "grad_norm": 0.0, "learning_rate": 2.6573234160948404e-06, "loss": 0.8483, "step": 6170 }, { "epoch": 0.7697393039790446, "grad_norm": 0.0, "learning_rate": 2.6545813433119615e-06, "loss": 0.9287, "step": 6171 }, { "epoch": 0.7698640389173007, "grad_norm": 0.0, "learning_rate": 2.651840469486582e-06, "loss": 0.9552, "step": 6172 }, { "epoch": 0.769988773855557, "grad_norm": 0.0, "learning_rate": 2.649100795066084e-06, "loss": 0.9319, "step": 6173 }, { "epoch": 0.7701135087938131, "grad_norm": 0.0, "learning_rate": 2.646362320497651e-06, "loss": 0.916, "step": 6174 }, { "epoch": 0.7702382437320694, "grad_norm": 0.0, "learning_rate": 2.6436250462282707e-06, "loss": 0.9052, "step": 6175 }, { "epoch": 0.7703629786703255, "grad_norm": 0.0, "learning_rate": 2.6408889727047405e-06, "loss": 0.8774, "step": 6176 }, { "epoch": 0.7704877136085818, "grad_norm": 0.0, "learning_rate": 2.6381541003736486e-06, "loss": 0.9288, "step": 6177 }, { "epoch": 0.770612448546838, "grad_norm": 0.0, "learning_rate": 2.635420429681398e-06, "loss": 0.9005, "step": 6178 }, { "epoch": 0.7707371834850941, "grad_norm": 0.0, "learning_rate": 2.632687961074194e-06, "loss": 0.9641, "step": 6179 }, { "epoch": 0.7708619184233504, "grad_norm": 0.0, "learning_rate": 2.629956694998046e-06, "loss": 0.9061, "step": 6180 }, { "epoch": 0.7709866533616065, "grad_norm": 0.0, "learning_rate": 2.6272266318987606e-06, "loss": 0.8961, "step": 6181 }, { "epoch": 0.7711113882998628, "grad_norm": 0.0, "learning_rate": 2.6244977722219557e-06, "loss": 0.9203, "step": 6182 }, { "epoch": 0.771236123238119, "grad_norm": 0.0, "learning_rate": 2.6217701164130514e-06, "loss": 0.8929, "step": 6183 }, { "epoch": 0.7713608581763752, "grad_norm": 0.0, "learning_rate": 2.6190436649172637e-06, "loss": 0.891, "step": 6184 }, { "epoch": 0.7714855931146314, "grad_norm": 0.0, "learning_rate": 2.6163184181796285e-06, "loss": 0.8932, "step": 6185 }, { "epoch": 0.7716103280528877, "grad_norm": 0.0, "learning_rate": 2.613594376644966e-06, "loss": 0.9136, "step": 6186 }, { "epoch": 0.7717350629911438, "grad_norm": 0.0, "learning_rate": 2.610871540757911e-06, "loss": 0.9202, "step": 6187 }, { "epoch": 0.7718597979294001, "grad_norm": 0.0, "learning_rate": 2.608149910962903e-06, "loss": 0.8953, "step": 6188 }, { "epoch": 0.7719845328676562, "grad_norm": 0.0, "learning_rate": 2.6054294877041732e-06, "loss": 0.9181, "step": 6189 }, { "epoch": 0.7721092678059125, "grad_norm": 0.0, "learning_rate": 2.602710271425767e-06, "loss": 0.9215, "step": 6190 }, { "epoch": 0.7722340027441686, "grad_norm": 0.0, "learning_rate": 2.599992262571529e-06, "loss": 0.8957, "step": 6191 }, { "epoch": 0.7723587376824248, "grad_norm": 0.0, "learning_rate": 2.5972754615851116e-06, "loss": 0.9027, "step": 6192 }, { "epoch": 0.7724834726206811, "grad_norm": 0.0, "learning_rate": 2.594559868909956e-06, "loss": 0.8852, "step": 6193 }, { "epoch": 0.7726082075589372, "grad_norm": 0.0, "learning_rate": 2.5918454849893202e-06, "loss": 0.9313, "step": 6194 }, { "epoch": 0.7727329424971935, "grad_norm": 0.0, "learning_rate": 2.5891323102662593e-06, "loss": 0.9337, "step": 6195 }, { "epoch": 0.7728576774354496, "grad_norm": 0.0, "learning_rate": 2.5864203451836333e-06, "loss": 0.9046, "step": 6196 }, { "epoch": 0.7729824123737059, "grad_norm": 0.0, "learning_rate": 2.583709590184105e-06, "loss": 0.9276, "step": 6197 }, { "epoch": 0.7731071473119621, "grad_norm": 0.0, "learning_rate": 2.5810000457101312e-06, "loss": 0.8807, "step": 6198 }, { "epoch": 0.7732318822502183, "grad_norm": 0.0, "learning_rate": 2.578291712203983e-06, "loss": 0.8911, "step": 6199 }, { "epoch": 0.7733566171884745, "grad_norm": 0.0, "learning_rate": 2.5755845901077305e-06, "loss": 0.8253, "step": 6200 }, { "epoch": 0.7734813521267307, "grad_norm": 0.0, "learning_rate": 2.5728786798632344e-06, "loss": 0.9254, "step": 6201 }, { "epoch": 0.7736060870649869, "grad_norm": 0.0, "learning_rate": 2.5701739819121807e-06, "loss": 0.9247, "step": 6202 }, { "epoch": 0.7737308220032431, "grad_norm": 0.0, "learning_rate": 2.5674704966960384e-06, "loss": 0.8779, "step": 6203 }, { "epoch": 0.7738555569414993, "grad_norm": 0.0, "learning_rate": 2.5647682246560778e-06, "loss": 0.8673, "step": 6204 }, { "epoch": 0.7739802918797555, "grad_norm": 0.0, "learning_rate": 2.5620671662333907e-06, "loss": 0.9657, "step": 6205 }, { "epoch": 0.7741050268180117, "grad_norm": 0.0, "learning_rate": 2.5593673218688474e-06, "loss": 0.8751, "step": 6206 }, { "epoch": 0.7742297617562679, "grad_norm": 0.0, "learning_rate": 2.556668692003135e-06, "loss": 0.9229, "step": 6207 }, { "epoch": 0.7743544966945242, "grad_norm": 0.0, "learning_rate": 2.5539712770767377e-06, "loss": 0.934, "step": 6208 }, { "epoch": 0.7744792316327803, "grad_norm": 0.0, "learning_rate": 2.5512750775299434e-06, "loss": 0.9228, "step": 6209 }, { "epoch": 0.7746039665710366, "grad_norm": 0.0, "learning_rate": 2.548580093802835e-06, "loss": 0.9161, "step": 6210 }, { "epoch": 0.7747287015092927, "grad_norm": 0.0, "learning_rate": 2.545886326335305e-06, "loss": 0.8768, "step": 6211 }, { "epoch": 0.774853436447549, "grad_norm": 0.0, "learning_rate": 2.5431937755670433e-06, "loss": 0.9018, "step": 6212 }, { "epoch": 0.7749781713858052, "grad_norm": 0.0, "learning_rate": 2.540502441937542e-06, "loss": 0.9616, "step": 6213 }, { "epoch": 0.7751029063240614, "grad_norm": 0.0, "learning_rate": 2.5378123258861e-06, "loss": 0.9065, "step": 6214 }, { "epoch": 0.7752276412623176, "grad_norm": 0.0, "learning_rate": 2.5351234278518022e-06, "loss": 0.908, "step": 6215 }, { "epoch": 0.7753523762005737, "grad_norm": 0.0, "learning_rate": 2.5324357482735495e-06, "loss": 0.9281, "step": 6216 }, { "epoch": 0.77547711113883, "grad_norm": 0.0, "learning_rate": 2.529749287590042e-06, "loss": 0.9102, "step": 6217 }, { "epoch": 0.7756018460770862, "grad_norm": 0.0, "learning_rate": 2.52706404623977e-06, "loss": 0.9195, "step": 6218 }, { "epoch": 0.7757265810153424, "grad_norm": 0.0, "learning_rate": 2.5243800246610418e-06, "loss": 0.8649, "step": 6219 }, { "epoch": 0.7758513159535986, "grad_norm": 0.0, "learning_rate": 2.5216972232919533e-06, "loss": 0.9044, "step": 6220 }, { "epoch": 0.7759760508918548, "grad_norm": 0.0, "learning_rate": 2.5190156425703993e-06, "loss": 0.9296, "step": 6221 }, { "epoch": 0.776100785830111, "grad_norm": 0.0, "learning_rate": 2.5163352829340925e-06, "loss": 0.8633, "step": 6222 }, { "epoch": 0.7762255207683673, "grad_norm": 0.0, "learning_rate": 2.5136561448205267e-06, "loss": 0.9154, "step": 6223 }, { "epoch": 0.7763502557066234, "grad_norm": 0.0, "learning_rate": 2.510978228667008e-06, "loss": 0.899, "step": 6224 }, { "epoch": 0.7764749906448797, "grad_norm": 0.0, "learning_rate": 2.50830153491064e-06, "loss": 0.9052, "step": 6225 }, { "epoch": 0.7765997255831358, "grad_norm": 0.0, "learning_rate": 2.5056260639883278e-06, "loss": 0.9156, "step": 6226 }, { "epoch": 0.776724460521392, "grad_norm": 0.0, "learning_rate": 2.5029518163367715e-06, "loss": 0.9228, "step": 6227 }, { "epoch": 0.7768491954596483, "grad_norm": 0.0, "learning_rate": 2.5002787923924797e-06, "loss": 0.8699, "step": 6228 }, { "epoch": 0.7769739303979044, "grad_norm": 0.0, "learning_rate": 2.497606992591758e-06, "loss": 0.9029, "step": 6229 }, { "epoch": 0.7770986653361607, "grad_norm": 0.0, "learning_rate": 2.4949364173707026e-06, "loss": 0.872, "step": 6230 }, { "epoch": 0.7772234002744168, "grad_norm": 0.0, "learning_rate": 2.4922670671652315e-06, "loss": 0.8817, "step": 6231 }, { "epoch": 0.7773481352126731, "grad_norm": 0.0, "learning_rate": 2.4895989424110423e-06, "loss": 0.9069, "step": 6232 }, { "epoch": 0.7774728701509293, "grad_norm": 0.0, "learning_rate": 2.4869320435436407e-06, "loss": 0.9023, "step": 6233 }, { "epoch": 0.7775976050891855, "grad_norm": 0.0, "learning_rate": 2.484266370998336e-06, "loss": 0.9081, "step": 6234 }, { "epoch": 0.7777223400274417, "grad_norm": 0.0, "learning_rate": 2.4816019252102274e-06, "loss": 0.9325, "step": 6235 }, { "epoch": 0.7778470749656979, "grad_norm": 0.0, "learning_rate": 2.4789387066142222e-06, "loss": 0.8939, "step": 6236 }, { "epoch": 0.7779718099039541, "grad_norm": 0.0, "learning_rate": 2.4762767156450285e-06, "loss": 0.9042, "step": 6237 }, { "epoch": 0.7780965448422104, "grad_norm": 0.0, "learning_rate": 2.4736159527371395e-06, "loss": 0.8853, "step": 6238 }, { "epoch": 0.7782212797804665, "grad_norm": 0.0, "learning_rate": 2.4709564183248734e-06, "loss": 0.9045, "step": 6239 }, { "epoch": 0.7783460147187227, "grad_norm": 0.0, "learning_rate": 2.4682981128423223e-06, "loss": 0.9094, "step": 6240 }, { "epoch": 0.7784707496569789, "grad_norm": 0.0, "learning_rate": 2.4656410367233928e-06, "loss": 0.8991, "step": 6241 }, { "epoch": 0.7785954845952351, "grad_norm": 0.0, "learning_rate": 2.4629851904017852e-06, "loss": 0.8825, "step": 6242 }, { "epoch": 0.7787202195334914, "grad_norm": 0.0, "learning_rate": 2.460330574311006e-06, "loss": 0.8867, "step": 6243 }, { "epoch": 0.7788449544717475, "grad_norm": 0.0, "learning_rate": 2.4576771888843478e-06, "loss": 0.8724, "step": 6244 }, { "epoch": 0.7789696894100038, "grad_norm": 0.0, "learning_rate": 2.4550250345549122e-06, "loss": 0.8971, "step": 6245 }, { "epoch": 0.7790944243482599, "grad_norm": 0.0, "learning_rate": 2.4523741117556023e-06, "loss": 0.9172, "step": 6246 }, { "epoch": 0.7792191592865162, "grad_norm": 0.0, "learning_rate": 2.4497244209191062e-06, "loss": 0.8729, "step": 6247 }, { "epoch": 0.7793438942247723, "grad_norm": 0.0, "learning_rate": 2.4470759624779317e-06, "loss": 0.8811, "step": 6248 }, { "epoch": 0.7794686291630286, "grad_norm": 0.0, "learning_rate": 2.4444287368643682e-06, "loss": 0.9145, "step": 6249 }, { "epoch": 0.7795933641012848, "grad_norm": 0.0, "learning_rate": 2.4417827445105035e-06, "loss": 0.9145, "step": 6250 }, { "epoch": 0.7797180990395409, "grad_norm": 0.0, "learning_rate": 2.439137985848242e-06, "loss": 0.8923, "step": 6251 }, { "epoch": 0.7798428339777972, "grad_norm": 0.0, "learning_rate": 2.436494461309268e-06, "loss": 0.963, "step": 6252 }, { "epoch": 0.7799675689160533, "grad_norm": 0.0, "learning_rate": 2.4338521713250717e-06, "loss": 0.8685, "step": 6253 }, { "epoch": 0.7800923038543096, "grad_norm": 0.0, "learning_rate": 2.431211116326946e-06, "loss": 0.9113, "step": 6254 }, { "epoch": 0.7802170387925658, "grad_norm": 0.0, "learning_rate": 2.428571296745971e-06, "loss": 0.8842, "step": 6255 }, { "epoch": 0.780341773730822, "grad_norm": 0.0, "learning_rate": 2.425932713013035e-06, "loss": 0.8641, "step": 6256 }, { "epoch": 0.7804665086690782, "grad_norm": 0.0, "learning_rate": 2.423295365558821e-06, "loss": 0.8839, "step": 6257 }, { "epoch": 0.7805912436073345, "grad_norm": 0.0, "learning_rate": 2.420659254813812e-06, "loss": 0.9302, "step": 6258 }, { "epoch": 0.7807159785455906, "grad_norm": 0.0, "learning_rate": 2.418024381208286e-06, "loss": 0.9567, "step": 6259 }, { "epoch": 0.7808407134838469, "grad_norm": 0.0, "learning_rate": 2.4153907451723246e-06, "loss": 0.9469, "step": 6260 }, { "epoch": 0.780965448422103, "grad_norm": 0.0, "learning_rate": 2.4127583471357974e-06, "loss": 0.909, "step": 6261 }, { "epoch": 0.7810901833603593, "grad_norm": 0.0, "learning_rate": 2.4101271875283818e-06, "loss": 0.9432, "step": 6262 }, { "epoch": 0.7812149182986154, "grad_norm": 0.0, "learning_rate": 2.4074972667795505e-06, "loss": 0.9092, "step": 6263 }, { "epoch": 0.7813396532368716, "grad_norm": 0.0, "learning_rate": 2.4048685853185673e-06, "loss": 0.9353, "step": 6264 }, { "epoch": 0.7814643881751279, "grad_norm": 0.0, "learning_rate": 2.4022411435745076e-06, "loss": 0.8972, "step": 6265 }, { "epoch": 0.781589123113384, "grad_norm": 0.0, "learning_rate": 2.3996149419762315e-06, "loss": 0.8765, "step": 6266 }, { "epoch": 0.7817138580516403, "grad_norm": 0.0, "learning_rate": 2.3969899809523955e-06, "loss": 0.8577, "step": 6267 }, { "epoch": 0.7818385929898964, "grad_norm": 0.0, "learning_rate": 2.3943662609314723e-06, "loss": 0.9306, "step": 6268 }, { "epoch": 0.7819633279281527, "grad_norm": 0.0, "learning_rate": 2.391743782341709e-06, "loss": 0.9337, "step": 6269 }, { "epoch": 0.7820880628664089, "grad_norm": 0.0, "learning_rate": 2.389122545611163e-06, "loss": 0.9105, "step": 6270 }, { "epoch": 0.7822127978046651, "grad_norm": 0.0, "learning_rate": 2.3865025511676896e-06, "loss": 0.8821, "step": 6271 }, { "epoch": 0.7823375327429213, "grad_norm": 0.0, "learning_rate": 2.383883799438933e-06, "loss": 0.8425, "step": 6272 }, { "epoch": 0.7824622676811775, "grad_norm": 0.0, "learning_rate": 2.3812662908523407e-06, "loss": 0.8852, "step": 6273 }, { "epoch": 0.7825870026194337, "grad_norm": 0.0, "learning_rate": 2.3786500258351576e-06, "loss": 0.9224, "step": 6274 }, { "epoch": 0.7827117375576899, "grad_norm": 0.0, "learning_rate": 2.3760350048144253e-06, "loss": 0.8413, "step": 6275 }, { "epoch": 0.7828364724959461, "grad_norm": 0.0, "learning_rate": 2.373421228216974e-06, "loss": 0.8523, "step": 6276 }, { "epoch": 0.7829612074342023, "grad_norm": 0.0, "learning_rate": 2.37080869646945e-06, "loss": 0.964, "step": 6277 }, { "epoch": 0.7830859423724585, "grad_norm": 0.0, "learning_rate": 2.368197409998273e-06, "loss": 0.8717, "step": 6278 }, { "epoch": 0.7832106773107147, "grad_norm": 0.0, "learning_rate": 2.365587369229676e-06, "loss": 0.9056, "step": 6279 }, { "epoch": 0.783335412248971, "grad_norm": 0.0, "learning_rate": 2.362978574589686e-06, "loss": 0.8971, "step": 6280 }, { "epoch": 0.7834601471872271, "grad_norm": 0.0, "learning_rate": 2.360371026504118e-06, "loss": 0.9048, "step": 6281 }, { "epoch": 0.7835848821254834, "grad_norm": 0.0, "learning_rate": 2.357764725398591e-06, "loss": 0.9292, "step": 6282 }, { "epoch": 0.7837096170637395, "grad_norm": 0.0, "learning_rate": 2.355159671698526e-06, "loss": 0.9525, "step": 6283 }, { "epoch": 0.7838343520019958, "grad_norm": 0.0, "learning_rate": 2.3525558658291203e-06, "loss": 0.9014, "step": 6284 }, { "epoch": 0.783959086940252, "grad_norm": 0.0, "learning_rate": 2.3499533082153937e-06, "loss": 0.9452, "step": 6285 }, { "epoch": 0.7840838218785082, "grad_norm": 0.0, "learning_rate": 2.3473519992821427e-06, "loss": 0.9125, "step": 6286 }, { "epoch": 0.7842085568167644, "grad_norm": 0.0, "learning_rate": 2.3447519394539654e-06, "loss": 0.9, "step": 6287 }, { "epoch": 0.7843332917550205, "grad_norm": 0.0, "learning_rate": 2.3421531291552636e-06, "loss": 0.8562, "step": 6288 }, { "epoch": 0.7844580266932768, "grad_norm": 0.0, "learning_rate": 2.339555568810221e-06, "loss": 0.9, "step": 6289 }, { "epoch": 0.784582761631533, "grad_norm": 0.0, "learning_rate": 2.336959258842828e-06, "loss": 0.9028, "step": 6290 }, { "epoch": 0.7847074965697892, "grad_norm": 0.0, "learning_rate": 2.3343641996768686e-06, "loss": 0.9134, "step": 6291 }, { "epoch": 0.7848322315080454, "grad_norm": 0.0, "learning_rate": 2.331770391735925e-06, "loss": 0.9179, "step": 6292 }, { "epoch": 0.7849569664463016, "grad_norm": 0.0, "learning_rate": 2.3291778354433614e-06, "loss": 0.8765, "step": 6293 }, { "epoch": 0.7850817013845578, "grad_norm": 0.0, "learning_rate": 2.3265865312223612e-06, "loss": 0.8591, "step": 6294 }, { "epoch": 0.7852064363228141, "grad_norm": 0.0, "learning_rate": 2.3239964794958825e-06, "loss": 0.9264, "step": 6295 }, { "epoch": 0.7853311712610702, "grad_norm": 0.0, "learning_rate": 2.321407680686689e-06, "loss": 0.9015, "step": 6296 }, { "epoch": 0.7854559061993265, "grad_norm": 0.0, "learning_rate": 2.318820135217341e-06, "loss": 0.9097, "step": 6297 }, { "epoch": 0.7855806411375826, "grad_norm": 0.0, "learning_rate": 2.316233843510186e-06, "loss": 0.9086, "step": 6298 }, { "epoch": 0.7857053760758388, "grad_norm": 0.0, "learning_rate": 2.313648805987374e-06, "loss": 0.8724, "step": 6299 }, { "epoch": 0.7858301110140951, "grad_norm": 0.0, "learning_rate": 2.311065023070852e-06, "loss": 0.9049, "step": 6300 }, { "epoch": 0.7859548459523512, "grad_norm": 0.0, "learning_rate": 2.3084824951823515e-06, "loss": 0.9201, "step": 6301 }, { "epoch": 0.7860795808906075, "grad_norm": 0.0, "learning_rate": 2.305901222743411e-06, "loss": 0.9356, "step": 6302 }, { "epoch": 0.7862043158288636, "grad_norm": 0.0, "learning_rate": 2.3033212061753562e-06, "loss": 0.875, "step": 6303 }, { "epoch": 0.7863290507671199, "grad_norm": 0.0, "learning_rate": 2.3007424458993143e-06, "loss": 0.8912, "step": 6304 }, { "epoch": 0.786453785705376, "grad_norm": 0.0, "learning_rate": 2.298164942336205e-06, "loss": 0.8785, "step": 6305 }, { "epoch": 0.7865785206436323, "grad_norm": 0.0, "learning_rate": 2.2955886959067363e-06, "loss": 0.9298, "step": 6306 }, { "epoch": 0.7867032555818885, "grad_norm": 0.0, "learning_rate": 2.2930137070314196e-06, "loss": 0.9677, "step": 6307 }, { "epoch": 0.7868279905201447, "grad_norm": 0.0, "learning_rate": 2.2904399761305573e-06, "loss": 0.8513, "step": 6308 }, { "epoch": 0.7869527254584009, "grad_norm": 0.0, "learning_rate": 2.2878675036242515e-06, "loss": 0.8998, "step": 6309 }, { "epoch": 0.7870774603966572, "grad_norm": 0.0, "learning_rate": 2.285296289932385e-06, "loss": 0.8901, "step": 6310 }, { "epoch": 0.7872021953349133, "grad_norm": 0.0, "learning_rate": 2.2827263354746553e-06, "loss": 0.9237, "step": 6311 }, { "epoch": 0.7873269302731695, "grad_norm": 0.0, "learning_rate": 2.2801576406705404e-06, "loss": 0.8631, "step": 6312 }, { "epoch": 0.7874516652114257, "grad_norm": 0.0, "learning_rate": 2.2775902059393087e-06, "loss": 0.9339, "step": 6313 }, { "epoch": 0.7875764001496819, "grad_norm": 0.0, "learning_rate": 2.2750240317000417e-06, "loss": 0.8978, "step": 6314 }, { "epoch": 0.7877011350879382, "grad_norm": 0.0, "learning_rate": 2.272459118371595e-06, "loss": 0.9348, "step": 6315 }, { "epoch": 0.7878258700261943, "grad_norm": 0.0, "learning_rate": 2.26989546637263e-06, "loss": 0.8874, "step": 6316 }, { "epoch": 0.7879506049644506, "grad_norm": 0.0, "learning_rate": 2.267333076121604e-06, "loss": 0.9024, "step": 6317 }, { "epoch": 0.7880753399027067, "grad_norm": 0.0, "learning_rate": 2.264771948036756e-06, "loss": 0.9054, "step": 6318 }, { "epoch": 0.788200074840963, "grad_norm": 0.0, "learning_rate": 2.262212082536129e-06, "loss": 0.9462, "step": 6319 }, { "epoch": 0.7883248097792191, "grad_norm": 0.0, "learning_rate": 2.2596534800375604e-06, "loss": 0.9321, "step": 6320 }, { "epoch": 0.7884495447174754, "grad_norm": 0.0, "learning_rate": 2.2570961409586756e-06, "loss": 0.9075, "step": 6321 }, { "epoch": 0.7885742796557316, "grad_norm": 0.0, "learning_rate": 2.2545400657169013e-06, "loss": 0.9127, "step": 6322 }, { "epoch": 0.7886990145939877, "grad_norm": 0.0, "learning_rate": 2.2519852547294473e-06, "loss": 0.9158, "step": 6323 }, { "epoch": 0.788823749532244, "grad_norm": 0.0, "learning_rate": 2.2494317084133265e-06, "loss": 0.8903, "step": 6324 }, { "epoch": 0.7889484844705001, "grad_norm": 0.0, "learning_rate": 2.246879427185341e-06, "loss": 0.9308, "step": 6325 }, { "epoch": 0.7890732194087564, "grad_norm": 0.0, "learning_rate": 2.2443284114620913e-06, "loss": 0.8902, "step": 6326 }, { "epoch": 0.7891979543470126, "grad_norm": 0.0, "learning_rate": 2.2417786616599613e-06, "loss": 0.8977, "step": 6327 }, { "epoch": 0.7893226892852688, "grad_norm": 0.0, "learning_rate": 2.2392301781951374e-06, "loss": 0.891, "step": 6328 }, { "epoch": 0.789447424223525, "grad_norm": 0.0, "learning_rate": 2.2366829614835995e-06, "loss": 0.8803, "step": 6329 }, { "epoch": 0.7895721591617813, "grad_norm": 0.0, "learning_rate": 2.234137011941108e-06, "loss": 0.885, "step": 6330 }, { "epoch": 0.7896968941000374, "grad_norm": 0.0, "learning_rate": 2.2315923299832388e-06, "loss": 0.9197, "step": 6331 }, { "epoch": 0.7898216290382937, "grad_norm": 0.0, "learning_rate": 2.2290489160253383e-06, "loss": 0.8835, "step": 6332 }, { "epoch": 0.7899463639765498, "grad_norm": 0.0, "learning_rate": 2.2265067704825593e-06, "loss": 0.9336, "step": 6333 }, { "epoch": 0.7900710989148061, "grad_norm": 0.0, "learning_rate": 2.223965893769847e-06, "loss": 0.889, "step": 6334 }, { "epoch": 0.7901958338530622, "grad_norm": 0.0, "learning_rate": 2.221426286301931e-06, "loss": 0.8873, "step": 6335 }, { "epoch": 0.7903205687913184, "grad_norm": 0.0, "learning_rate": 2.218887948493341e-06, "loss": 0.9146, "step": 6336 }, { "epoch": 0.7904453037295747, "grad_norm": 0.0, "learning_rate": 2.2163508807584e-06, "loss": 0.8991, "step": 6337 }, { "epoch": 0.7905700386678308, "grad_norm": 0.0, "learning_rate": 2.2138150835112217e-06, "loss": 0.8985, "step": 6338 }, { "epoch": 0.7906947736060871, "grad_norm": 0.0, "learning_rate": 2.211280557165708e-06, "loss": 0.9184, "step": 6339 }, { "epoch": 0.7908195085443432, "grad_norm": 0.0, "learning_rate": 2.20874730213556e-06, "loss": 0.8619, "step": 6340 }, { "epoch": 0.7909442434825995, "grad_norm": 0.0, "learning_rate": 2.206215318834268e-06, "loss": 0.8868, "step": 6341 }, { "epoch": 0.7910689784208557, "grad_norm": 0.0, "learning_rate": 2.203684607675116e-06, "loss": 0.9263, "step": 6342 }, { "epoch": 0.7911937133591119, "grad_norm": 0.0, "learning_rate": 2.201155169071184e-06, "loss": 0.9139, "step": 6343 }, { "epoch": 0.7913184482973681, "grad_norm": 0.0, "learning_rate": 2.198627003435332e-06, "loss": 0.9152, "step": 6344 }, { "epoch": 0.7914431832356243, "grad_norm": 0.0, "learning_rate": 2.196100111180225e-06, "loss": 0.9046, "step": 6345 }, { "epoch": 0.7915679181738805, "grad_norm": 0.0, "learning_rate": 2.193574492718319e-06, "loss": 0.9297, "step": 6346 }, { "epoch": 0.7916926531121367, "grad_norm": 0.0, "learning_rate": 2.1910501484618473e-06, "loss": 0.9004, "step": 6347 }, { "epoch": 0.7918173880503929, "grad_norm": 0.0, "learning_rate": 2.188527078822861e-06, "loss": 0.9782, "step": 6348 }, { "epoch": 0.7919421229886491, "grad_norm": 0.0, "learning_rate": 2.1860052842131775e-06, "loss": 0.8592, "step": 6349 }, { "epoch": 0.7920668579269053, "grad_norm": 0.0, "learning_rate": 2.1834847650444223e-06, "loss": 0.9235, "step": 6350 }, { "epoch": 0.7921915928651615, "grad_norm": 0.0, "learning_rate": 2.180965521728009e-06, "loss": 0.8872, "step": 6351 }, { "epoch": 0.7923163278034178, "grad_norm": 0.0, "learning_rate": 2.178447554675136e-06, "loss": 0.9169, "step": 6352 }, { "epoch": 0.7924410627416739, "grad_norm": 0.0, "learning_rate": 2.1759308642968024e-06, "loss": 0.8938, "step": 6353 }, { "epoch": 0.7925657976799302, "grad_norm": 0.0, "learning_rate": 2.1734154510037965e-06, "loss": 0.8601, "step": 6354 }, { "epoch": 0.7926905326181863, "grad_norm": 0.0, "learning_rate": 2.170901315206698e-06, "loss": 0.9305, "step": 6355 }, { "epoch": 0.7928152675564426, "grad_norm": 0.0, "learning_rate": 2.1683884573158732e-06, "loss": 0.8768, "step": 6356 }, { "epoch": 0.7929400024946988, "grad_norm": 0.0, "learning_rate": 2.165876877741485e-06, "loss": 0.9486, "step": 6357 }, { "epoch": 0.793064737432955, "grad_norm": 0.0, "learning_rate": 2.163366576893491e-06, "loss": 0.8676, "step": 6358 }, { "epoch": 0.7931894723712112, "grad_norm": 0.0, "learning_rate": 2.1608575551816257e-06, "loss": 0.9228, "step": 6359 }, { "epoch": 0.7933142073094673, "grad_norm": 0.0, "learning_rate": 2.158349813015438e-06, "loss": 0.8627, "step": 6360 }, { "epoch": 0.7934389422477236, "grad_norm": 0.0, "learning_rate": 2.155843350804243e-06, "loss": 0.9327, "step": 6361 }, { "epoch": 0.7935636771859798, "grad_norm": 0.0, "learning_rate": 2.153338168957165e-06, "loss": 0.8848, "step": 6362 }, { "epoch": 0.793688412124236, "grad_norm": 0.0, "learning_rate": 2.150834267883113e-06, "loss": 0.9029, "step": 6363 }, { "epoch": 0.7938131470624922, "grad_norm": 0.0, "learning_rate": 2.1483316479907824e-06, "loss": 0.9516, "step": 6364 }, { "epoch": 0.7939378820007484, "grad_norm": 0.0, "learning_rate": 2.1458303096886657e-06, "loss": 0.9197, "step": 6365 }, { "epoch": 0.7940626169390046, "grad_norm": 0.0, "learning_rate": 2.1433302533850453e-06, "loss": 0.9197, "step": 6366 }, { "epoch": 0.7941873518772609, "grad_norm": 0.0, "learning_rate": 2.1408314794879925e-06, "loss": 0.9042, "step": 6367 }, { "epoch": 0.794312086815517, "grad_norm": 0.0, "learning_rate": 2.1383339884053743e-06, "loss": 0.9341, "step": 6368 }, { "epoch": 0.7944368217537733, "grad_norm": 0.0, "learning_rate": 2.135837780544838e-06, "loss": 0.8793, "step": 6369 }, { "epoch": 0.7945615566920294, "grad_norm": 0.0, "learning_rate": 2.1333428563138304e-06, "loss": 0.8937, "step": 6370 }, { "epoch": 0.7946862916302856, "grad_norm": 0.0, "learning_rate": 2.1308492161195848e-06, "loss": 0.8805, "step": 6371 }, { "epoch": 0.7948110265685419, "grad_norm": 0.0, "learning_rate": 2.1283568603691306e-06, "loss": 0.9824, "step": 6372 }, { "epoch": 0.794935761506798, "grad_norm": 0.0, "learning_rate": 2.125865789469278e-06, "loss": 0.9077, "step": 6373 }, { "epoch": 0.7950604964450543, "grad_norm": 0.0, "learning_rate": 2.1233760038266336e-06, "loss": 0.8802, "step": 6374 }, { "epoch": 0.7951852313833104, "grad_norm": 0.0, "learning_rate": 2.120887503847596e-06, "loss": 0.9124, "step": 6375 }, { "epoch": 0.7953099663215667, "grad_norm": 0.0, "learning_rate": 2.118400289938345e-06, "loss": 0.8788, "step": 6376 }, { "epoch": 0.7954347012598229, "grad_norm": 0.0, "learning_rate": 2.1159143625048652e-06, "loss": 0.9083, "step": 6377 }, { "epoch": 0.7955594361980791, "grad_norm": 0.0, "learning_rate": 2.1134297219529155e-06, "loss": 0.8723, "step": 6378 }, { "epoch": 0.7956841711363353, "grad_norm": 0.0, "learning_rate": 2.110946368688055e-06, "loss": 0.9095, "step": 6379 }, { "epoch": 0.7958089060745915, "grad_norm": 0.0, "learning_rate": 2.1084643031156306e-06, "loss": 0.9159, "step": 6380 }, { "epoch": 0.7959336410128477, "grad_norm": 0.0, "learning_rate": 2.1059835256407746e-06, "loss": 0.923, "step": 6381 }, { "epoch": 0.796058375951104, "grad_norm": 0.0, "learning_rate": 2.1035040366684145e-06, "loss": 0.9278, "step": 6382 }, { "epoch": 0.7961831108893601, "grad_norm": 0.0, "learning_rate": 2.1010258366032645e-06, "loss": 0.8936, "step": 6383 }, { "epoch": 0.7963078458276163, "grad_norm": 0.0, "learning_rate": 2.0985489258498326e-06, "loss": 0.9301, "step": 6384 }, { "epoch": 0.7964325807658725, "grad_norm": 0.0, "learning_rate": 2.0960733048124082e-06, "loss": 0.8908, "step": 6385 }, { "epoch": 0.7965573157041287, "grad_norm": 0.0, "learning_rate": 2.093598973895078e-06, "loss": 0.9054, "step": 6386 }, { "epoch": 0.796682050642385, "grad_norm": 0.0, "learning_rate": 2.091125933501713e-06, "loss": 0.8583, "step": 6387 }, { "epoch": 0.7968067855806411, "grad_norm": 0.0, "learning_rate": 2.0886541840359776e-06, "loss": 0.8937, "step": 6388 }, { "epoch": 0.7969315205188974, "grad_norm": 0.0, "learning_rate": 2.0861837259013262e-06, "loss": 0.9192, "step": 6389 }, { "epoch": 0.7970562554571535, "grad_norm": 0.0, "learning_rate": 2.0837145595009946e-06, "loss": 0.9333, "step": 6390 }, { "epoch": 0.7971809903954098, "grad_norm": 0.0, "learning_rate": 2.0812466852380153e-06, "loss": 0.9235, "step": 6391 }, { "epoch": 0.797305725333666, "grad_norm": 0.0, "learning_rate": 2.07878010351521e-06, "loss": 0.9168, "step": 6392 }, { "epoch": 0.7974304602719222, "grad_norm": 0.0, "learning_rate": 2.0763148147351785e-06, "loss": 0.895, "step": 6393 }, { "epoch": 0.7975551952101784, "grad_norm": 0.0, "learning_rate": 2.0738508193003305e-06, "loss": 0.9073, "step": 6394 }, { "epoch": 0.7976799301484345, "grad_norm": 0.0, "learning_rate": 2.0713881176128438e-06, "loss": 0.8956, "step": 6395 }, { "epoch": 0.7978046650866908, "grad_norm": 0.0, "learning_rate": 2.0689267100746947e-06, "loss": 0.9083, "step": 6396 }, { "epoch": 0.7979294000249469, "grad_norm": 0.0, "learning_rate": 2.0664665970876496e-06, "loss": 0.862, "step": 6397 }, { "epoch": 0.7980541349632032, "grad_norm": 0.0, "learning_rate": 2.0640077790532576e-06, "loss": 0.8689, "step": 6398 }, { "epoch": 0.7981788699014594, "grad_norm": 0.0, "learning_rate": 2.061550256372861e-06, "loss": 0.9156, "step": 6399 }, { "epoch": 0.7983036048397156, "grad_norm": 0.0, "learning_rate": 2.0590940294475893e-06, "loss": 0.8484, "step": 6400 }, { "epoch": 0.7984283397779718, "grad_norm": 0.0, "learning_rate": 2.0566390986783646e-06, "loss": 0.9556, "step": 6401 }, { "epoch": 0.798553074716228, "grad_norm": 0.0, "learning_rate": 2.0541854644658867e-06, "loss": 0.9226, "step": 6402 }, { "epoch": 0.7986778096544842, "grad_norm": 0.0, "learning_rate": 2.0517331272106543e-06, "loss": 0.9098, "step": 6403 }, { "epoch": 0.7988025445927405, "grad_norm": 0.0, "learning_rate": 2.0492820873129528e-06, "loss": 0.9269, "step": 6404 }, { "epoch": 0.7989272795309966, "grad_norm": 0.0, "learning_rate": 2.0468323451728446e-06, "loss": 0.9249, "step": 6405 }, { "epoch": 0.7990520144692529, "grad_norm": 0.0, "learning_rate": 2.0443839011902023e-06, "loss": 0.8758, "step": 6406 }, { "epoch": 0.799176749407509, "grad_norm": 0.0, "learning_rate": 2.0419367557646653e-06, "loss": 0.9091, "step": 6407 }, { "epoch": 0.7993014843457652, "grad_norm": 0.0, "learning_rate": 2.039490909295669e-06, "loss": 0.9286, "step": 6408 }, { "epoch": 0.7994262192840215, "grad_norm": 0.0, "learning_rate": 2.0370463621824445e-06, "loss": 0.8951, "step": 6409 }, { "epoch": 0.7995509542222776, "grad_norm": 0.0, "learning_rate": 2.0346031148239954e-06, "loss": 0.8587, "step": 6410 }, { "epoch": 0.7996756891605339, "grad_norm": 0.0, "learning_rate": 2.0321611676191244e-06, "loss": 0.9462, "step": 6411 }, { "epoch": 0.79980042409879, "grad_norm": 0.0, "learning_rate": 2.0297205209664194e-06, "loss": 0.908, "step": 6412 }, { "epoch": 0.7999251590370463, "grad_norm": 0.0, "learning_rate": 2.027281175264254e-06, "loss": 0.9498, "step": 6413 }, { "epoch": 0.8000498939753025, "grad_norm": 0.0, "learning_rate": 2.024843130910795e-06, "loss": 0.9254, "step": 6414 }, { "epoch": 0.8001746289135587, "grad_norm": 0.0, "learning_rate": 2.0224063883039868e-06, "loss": 0.8991, "step": 6415 }, { "epoch": 0.8002993638518149, "grad_norm": 0.0, "learning_rate": 2.0199709478415707e-06, "loss": 0.8747, "step": 6416 }, { "epoch": 0.8004240987900711, "grad_norm": 0.0, "learning_rate": 2.01753680992107e-06, "loss": 0.9521, "step": 6417 }, { "epoch": 0.8005488337283273, "grad_norm": 0.0, "learning_rate": 2.0151039749398027e-06, "loss": 0.9352, "step": 6418 }, { "epoch": 0.8006735686665835, "grad_norm": 0.0, "learning_rate": 2.012672443294861e-06, "loss": 0.8717, "step": 6419 }, { "epoch": 0.8007983036048397, "grad_norm": 0.0, "learning_rate": 2.010242215383137e-06, "loss": 0.8619, "step": 6420 }, { "epoch": 0.8009230385430959, "grad_norm": 0.0, "learning_rate": 2.007813291601306e-06, "loss": 0.8839, "step": 6421 }, { "epoch": 0.8010477734813521, "grad_norm": 0.0, "learning_rate": 2.005385672345822e-06, "loss": 0.8976, "step": 6422 }, { "epoch": 0.8011725084196083, "grad_norm": 0.0, "learning_rate": 2.002959358012946e-06, "loss": 0.9293, "step": 6423 }, { "epoch": 0.8012972433578646, "grad_norm": 0.0, "learning_rate": 2.0005343489987038e-06, "loss": 0.8669, "step": 6424 }, { "epoch": 0.8014219782961207, "grad_norm": 0.0, "learning_rate": 1.9981106456989207e-06, "loss": 0.9532, "step": 6425 }, { "epoch": 0.801546713234377, "grad_norm": 0.0, "learning_rate": 1.99568824850921e-06, "loss": 0.8662, "step": 6426 }, { "epoch": 0.8016714481726331, "grad_norm": 0.0, "learning_rate": 1.993267157824962e-06, "loss": 0.8719, "step": 6427 }, { "epoch": 0.8017961831108894, "grad_norm": 0.0, "learning_rate": 1.9908473740413612e-06, "loss": 0.8969, "step": 6428 }, { "epoch": 0.8019209180491456, "grad_norm": 0.0, "learning_rate": 1.9884288975533806e-06, "loss": 0.8931, "step": 6429 }, { "epoch": 0.8020456529874018, "grad_norm": 0.0, "learning_rate": 1.9860117287557756e-06, "loss": 0.901, "step": 6430 }, { "epoch": 0.802170387925658, "grad_norm": 0.0, "learning_rate": 1.9835958680430854e-06, "loss": 0.9038, "step": 6431 }, { "epoch": 0.8022951228639141, "grad_norm": 0.0, "learning_rate": 1.9811813158096427e-06, "loss": 0.906, "step": 6432 }, { "epoch": 0.8024198578021704, "grad_norm": 0.0, "learning_rate": 1.9787680724495617e-06, "loss": 0.8885, "step": 6433 }, { "epoch": 0.8025445927404266, "grad_norm": 0.0, "learning_rate": 1.9763561383567463e-06, "loss": 0.9115, "step": 6434 }, { "epoch": 0.8026693276786828, "grad_norm": 0.0, "learning_rate": 1.9739455139248864e-06, "loss": 0.9305, "step": 6435 }, { "epoch": 0.802794062616939, "grad_norm": 0.0, "learning_rate": 1.971536199547451e-06, "loss": 0.8825, "step": 6436 }, { "epoch": 0.8029187975551952, "grad_norm": 0.0, "learning_rate": 1.9691281956177054e-06, "loss": 0.925, "step": 6437 }, { "epoch": 0.8030435324934514, "grad_norm": 0.0, "learning_rate": 1.966721502528697e-06, "loss": 0.9341, "step": 6438 }, { "epoch": 0.8031682674317077, "grad_norm": 0.0, "learning_rate": 1.964316120673252e-06, "loss": 0.8979, "step": 6439 }, { "epoch": 0.8032930023699638, "grad_norm": 0.0, "learning_rate": 1.9619120504440004e-06, "loss": 0.9135, "step": 6440 }, { "epoch": 0.8034177373082201, "grad_norm": 0.0, "learning_rate": 1.9595092922333414e-06, "loss": 0.8712, "step": 6441 }, { "epoch": 0.8035424722464762, "grad_norm": 0.0, "learning_rate": 1.957107846433459e-06, "loss": 0.8463, "step": 6442 }, { "epoch": 0.8036672071847324, "grad_norm": 0.0, "learning_rate": 1.9547077134363423e-06, "loss": 0.8886, "step": 6443 }, { "epoch": 0.8037919421229887, "grad_norm": 0.0, "learning_rate": 1.9523088936337453e-06, "loss": 0.9117, "step": 6444 }, { "epoch": 0.8039166770612448, "grad_norm": 0.0, "learning_rate": 1.949911387417217e-06, "loss": 0.9733, "step": 6445 }, { "epoch": 0.8040414119995011, "grad_norm": 0.0, "learning_rate": 1.9475151951780956e-06, "loss": 0.9316, "step": 6446 }, { "epoch": 0.8041661469377572, "grad_norm": 0.0, "learning_rate": 1.945120317307493e-06, "loss": 0.8964, "step": 6447 }, { "epoch": 0.8042908818760135, "grad_norm": 0.0, "learning_rate": 1.9427267541963167e-06, "loss": 0.9312, "step": 6448 }, { "epoch": 0.8044156168142697, "grad_norm": 0.0, "learning_rate": 1.9403345062352574e-06, "loss": 0.9094, "step": 6449 }, { "epoch": 0.8045403517525259, "grad_norm": 0.0, "learning_rate": 1.937943573814789e-06, "loss": 0.9164, "step": 6450 }, { "epoch": 0.8046650866907821, "grad_norm": 0.0, "learning_rate": 1.9355539573251737e-06, "loss": 0.8984, "step": 6451 }, { "epoch": 0.8047898216290383, "grad_norm": 0.0, "learning_rate": 1.9331656571564582e-06, "loss": 0.9022, "step": 6452 }, { "epoch": 0.8049145565672945, "grad_norm": 0.0, "learning_rate": 1.930778673698468e-06, "loss": 0.945, "step": 6453 }, { "epoch": 0.8050392915055508, "grad_norm": 0.0, "learning_rate": 1.928393007340823e-06, "loss": 0.8845, "step": 6454 }, { "epoch": 0.8051640264438069, "grad_norm": 0.0, "learning_rate": 1.9260086584729253e-06, "loss": 0.8993, "step": 6455 }, { "epoch": 0.8052887613820631, "grad_norm": 0.0, "learning_rate": 1.923625627483956e-06, "loss": 0.891, "step": 6456 }, { "epoch": 0.8054134963203193, "grad_norm": 0.0, "learning_rate": 1.9212439147628893e-06, "loss": 0.9146, "step": 6457 }, { "epoch": 0.8055382312585755, "grad_norm": 0.0, "learning_rate": 1.9188635206984818e-06, "loss": 0.9154, "step": 6458 }, { "epoch": 0.8056629661968318, "grad_norm": 0.0, "learning_rate": 1.9164844456792675e-06, "loss": 0.911, "step": 6459 }, { "epoch": 0.8057877011350879, "grad_norm": 0.0, "learning_rate": 1.914106690093581e-06, "loss": 0.9082, "step": 6460 }, { "epoch": 0.8059124360733442, "grad_norm": 0.0, "learning_rate": 1.9117302543295235e-06, "loss": 0.8648, "step": 6461 }, { "epoch": 0.8060371710116003, "grad_norm": 0.0, "learning_rate": 1.909355138774994e-06, "loss": 0.9046, "step": 6462 }, { "epoch": 0.8061619059498566, "grad_norm": 0.0, "learning_rate": 1.906981343817671e-06, "loss": 0.8904, "step": 6463 }, { "epoch": 0.8062866408881127, "grad_norm": 0.0, "learning_rate": 1.904608869845015e-06, "loss": 0.9428, "step": 6464 }, { "epoch": 0.806411375826369, "grad_norm": 0.0, "learning_rate": 1.9022377172442753e-06, "loss": 0.9265, "step": 6465 }, { "epoch": 0.8065361107646252, "grad_norm": 0.0, "learning_rate": 1.8998678864024823e-06, "loss": 0.907, "step": 6466 }, { "epoch": 0.8066608457028813, "grad_norm": 0.0, "learning_rate": 1.8974993777064566e-06, "loss": 0.9047, "step": 6467 }, { "epoch": 0.8067855806411376, "grad_norm": 0.0, "learning_rate": 1.89513219154279e-06, "loss": 0.8617, "step": 6468 }, { "epoch": 0.8069103155793937, "grad_norm": 0.0, "learning_rate": 1.8927663282978781e-06, "loss": 0.9459, "step": 6469 }, { "epoch": 0.80703505051765, "grad_norm": 0.0, "learning_rate": 1.8904017883578806e-06, "loss": 0.9275, "step": 6470 }, { "epoch": 0.8071597854559062, "grad_norm": 0.0, "learning_rate": 1.8880385721087534e-06, "loss": 0.9517, "step": 6471 }, { "epoch": 0.8072845203941624, "grad_norm": 0.0, "learning_rate": 1.8856766799362346e-06, "loss": 0.9355, "step": 6472 }, { "epoch": 0.8074092553324186, "grad_norm": 0.0, "learning_rate": 1.8833161122258404e-06, "loss": 0.9395, "step": 6473 }, { "epoch": 0.8075339902706749, "grad_norm": 0.0, "learning_rate": 1.8809568693628765e-06, "loss": 0.9226, "step": 6474 }, { "epoch": 0.807658725208931, "grad_norm": 0.0, "learning_rate": 1.8785989517324355e-06, "loss": 0.9188, "step": 6475 }, { "epoch": 0.8077834601471873, "grad_norm": 0.0, "learning_rate": 1.876242359719378e-06, "loss": 0.8872, "step": 6476 }, { "epoch": 0.8079081950854434, "grad_norm": 0.0, "learning_rate": 1.873887093708372e-06, "loss": 0.9373, "step": 6477 }, { "epoch": 0.8080329300236997, "grad_norm": 0.0, "learning_rate": 1.8715331540838488e-06, "loss": 0.9574, "step": 6478 }, { "epoch": 0.8081576649619558, "grad_norm": 0.0, "learning_rate": 1.8691805412300311e-06, "loss": 0.9381, "step": 6479 }, { "epoch": 0.808282399900212, "grad_norm": 0.0, "learning_rate": 1.8668292555309287e-06, "loss": 0.9056, "step": 6480 }, { "epoch": 0.8084071348384683, "grad_norm": 0.0, "learning_rate": 1.8644792973703252e-06, "loss": 0.9025, "step": 6481 }, { "epoch": 0.8085318697767244, "grad_norm": 0.0, "learning_rate": 1.8621306671317963e-06, "loss": 0.8387, "step": 6482 }, { "epoch": 0.8086566047149807, "grad_norm": 0.0, "learning_rate": 1.8597833651986962e-06, "loss": 0.877, "step": 6483 }, { "epoch": 0.8087813396532368, "grad_norm": 0.0, "learning_rate": 1.8574373919541678e-06, "loss": 0.9417, "step": 6484 }, { "epoch": 0.8089060745914931, "grad_norm": 0.0, "learning_rate": 1.8550927477811254e-06, "loss": 0.9219, "step": 6485 }, { "epoch": 0.8090308095297493, "grad_norm": 0.0, "learning_rate": 1.852749433062282e-06, "loss": 0.881, "step": 6486 }, { "epoch": 0.8091555444680055, "grad_norm": 0.0, "learning_rate": 1.8504074481801237e-06, "loss": 0.8944, "step": 6487 }, { "epoch": 0.8092802794062617, "grad_norm": 0.0, "learning_rate": 1.8480667935169138e-06, "loss": 0.8822, "step": 6488 }, { "epoch": 0.809405014344518, "grad_norm": 0.0, "learning_rate": 1.8457274694547178e-06, "loss": 0.8701, "step": 6489 }, { "epoch": 0.8095297492827741, "grad_norm": 0.0, "learning_rate": 1.8433894763753634e-06, "loss": 0.8582, "step": 6490 }, { "epoch": 0.8096544842210303, "grad_norm": 0.0, "learning_rate": 1.8410528146604746e-06, "loss": 0.9473, "step": 6491 }, { "epoch": 0.8097792191592865, "grad_norm": 0.0, "learning_rate": 1.8387174846914546e-06, "loss": 0.9225, "step": 6492 }, { "epoch": 0.8099039540975427, "grad_norm": 0.0, "learning_rate": 1.8363834868494833e-06, "loss": 0.908, "step": 6493 }, { "epoch": 0.8100286890357989, "grad_norm": 0.0, "learning_rate": 1.8340508215155307e-06, "loss": 0.9427, "step": 6494 }, { "epoch": 0.8101534239740551, "grad_norm": 0.0, "learning_rate": 1.831719489070346e-06, "loss": 0.9387, "step": 6495 }, { "epoch": 0.8102781589123114, "grad_norm": 0.0, "learning_rate": 1.829389489894462e-06, "loss": 0.9216, "step": 6496 }, { "epoch": 0.8104028938505675, "grad_norm": 0.0, "learning_rate": 1.8270608243681953e-06, "loss": 0.945, "step": 6497 }, { "epoch": 0.8105276287888238, "grad_norm": 0.0, "learning_rate": 1.8247334928716386e-06, "loss": 0.8993, "step": 6498 }, { "epoch": 0.8106523637270799, "grad_norm": 0.0, "learning_rate": 1.8224074957846728e-06, "loss": 0.935, "step": 6499 }, { "epoch": 0.8107770986653362, "grad_norm": 0.0, "learning_rate": 1.8200828334869603e-06, "loss": 0.941, "step": 6500 }, { "epoch": 0.8109018336035924, "grad_norm": 0.0, "learning_rate": 1.8177595063579466e-06, "loss": 0.9066, "step": 6501 }, { "epoch": 0.8110265685418486, "grad_norm": 0.0, "learning_rate": 1.8154375147768488e-06, "loss": 0.91, "step": 6502 }, { "epoch": 0.8111513034801048, "grad_norm": 0.0, "learning_rate": 1.8131168591226854e-06, "loss": 0.8908, "step": 6503 }, { "epoch": 0.8112760384183609, "grad_norm": 0.0, "learning_rate": 1.8107975397742406e-06, "loss": 0.9649, "step": 6504 }, { "epoch": 0.8114007733566172, "grad_norm": 0.0, "learning_rate": 1.808479557110081e-06, "loss": 0.9223, "step": 6505 }, { "epoch": 0.8115255082948734, "grad_norm": 0.0, "learning_rate": 1.80616291150857e-06, "loss": 0.8908, "step": 6506 }, { "epoch": 0.8116502432331296, "grad_norm": 0.0, "learning_rate": 1.8038476033478347e-06, "loss": 0.892, "step": 6507 }, { "epoch": 0.8117749781713858, "grad_norm": 0.0, "learning_rate": 1.8015336330057943e-06, "loss": 0.9625, "step": 6508 }, { "epoch": 0.811899713109642, "grad_norm": 0.0, "learning_rate": 1.7992210008601507e-06, "loss": 0.9075, "step": 6509 }, { "epoch": 0.8120244480478982, "grad_norm": 0.0, "learning_rate": 1.7969097072883767e-06, "loss": 0.8933, "step": 6510 }, { "epoch": 0.8121491829861545, "grad_norm": 0.0, "learning_rate": 1.7945997526677371e-06, "loss": 0.8724, "step": 6511 }, { "epoch": 0.8122739179244106, "grad_norm": 0.0, "learning_rate": 1.7922911373752749e-06, "loss": 0.9189, "step": 6512 }, { "epoch": 0.8123986528626669, "grad_norm": 0.0, "learning_rate": 1.7899838617878163e-06, "loss": 0.894, "step": 6513 }, { "epoch": 0.812523387800923, "grad_norm": 0.0, "learning_rate": 1.7876779262819633e-06, "loss": 0.8762, "step": 6514 }, { "epoch": 0.8126481227391792, "grad_norm": 0.0, "learning_rate": 1.7853733312341036e-06, "loss": 0.926, "step": 6515 }, { "epoch": 0.8127728576774355, "grad_norm": 0.0, "learning_rate": 1.7830700770204058e-06, "loss": 0.8767, "step": 6516 }, { "epoch": 0.8128975926156916, "grad_norm": 0.0, "learning_rate": 1.7807681640168194e-06, "loss": 0.907, "step": 6517 }, { "epoch": 0.8130223275539479, "grad_norm": 0.0, "learning_rate": 1.778467592599077e-06, "loss": 0.9304, "step": 6518 }, { "epoch": 0.813147062492204, "grad_norm": 0.0, "learning_rate": 1.7761683631426841e-06, "loss": 0.9228, "step": 6519 }, { "epoch": 0.8132717974304603, "grad_norm": 0.0, "learning_rate": 1.7738704760229352e-06, "loss": 0.9448, "step": 6520 }, { "epoch": 0.8133965323687165, "grad_norm": 0.0, "learning_rate": 1.7715739316149083e-06, "loss": 0.8559, "step": 6521 }, { "epoch": 0.8135212673069727, "grad_norm": 0.0, "learning_rate": 1.7692787302934466e-06, "loss": 0.8561, "step": 6522 }, { "epoch": 0.8136460022452289, "grad_norm": 0.0, "learning_rate": 1.7669848724331984e-06, "loss": 0.8639, "step": 6523 }, { "epoch": 0.8137707371834851, "grad_norm": 0.0, "learning_rate": 1.7646923584085685e-06, "loss": 0.856, "step": 6524 }, { "epoch": 0.8138954721217413, "grad_norm": 0.0, "learning_rate": 1.7624011885937565e-06, "loss": 0.9371, "step": 6525 }, { "epoch": 0.8140202070599976, "grad_norm": 0.0, "learning_rate": 1.760111363362741e-06, "loss": 0.8945, "step": 6526 }, { "epoch": 0.8141449419982537, "grad_norm": 0.0, "learning_rate": 1.7578228830892762e-06, "loss": 0.9665, "step": 6527 }, { "epoch": 0.8142696769365099, "grad_norm": 0.0, "learning_rate": 1.7555357481469003e-06, "loss": 0.9126, "step": 6528 }, { "epoch": 0.8143944118747661, "grad_norm": 0.0, "learning_rate": 1.7532499589089324e-06, "loss": 0.9447, "step": 6529 }, { "epoch": 0.8145191468130223, "grad_norm": 0.0, "learning_rate": 1.7509655157484728e-06, "loss": 0.8812, "step": 6530 }, { "epoch": 0.8146438817512786, "grad_norm": 0.0, "learning_rate": 1.7486824190383966e-06, "loss": 0.9351, "step": 6531 }, { "epoch": 0.8147686166895347, "grad_norm": 0.0, "learning_rate": 1.7464006691513624e-06, "loss": 0.9038, "step": 6532 }, { "epoch": 0.814893351627791, "grad_norm": 0.0, "learning_rate": 1.7441202664598144e-06, "loss": 0.9117, "step": 6533 }, { "epoch": 0.8150180865660471, "grad_norm": 0.0, "learning_rate": 1.7418412113359639e-06, "loss": 0.8912, "step": 6534 }, { "epoch": 0.8151428215043034, "grad_norm": 0.0, "learning_rate": 1.7395635041518188e-06, "loss": 0.8758, "step": 6535 }, { "epoch": 0.8152675564425595, "grad_norm": 0.0, "learning_rate": 1.737287145279153e-06, "loss": 0.9183, "step": 6536 }, { "epoch": 0.8153922913808158, "grad_norm": 0.0, "learning_rate": 1.7350121350895254e-06, "loss": 0.9329, "step": 6537 }, { "epoch": 0.815517026319072, "grad_norm": 0.0, "learning_rate": 1.7327384739542807e-06, "loss": 0.9356, "step": 6538 }, { "epoch": 0.8156417612573281, "grad_norm": 0.0, "learning_rate": 1.7304661622445296e-06, "loss": 0.8731, "step": 6539 }, { "epoch": 0.8157664961955844, "grad_norm": 0.0, "learning_rate": 1.7281952003311753e-06, "loss": 0.8812, "step": 6540 }, { "epoch": 0.8158912311338405, "grad_norm": 0.0, "learning_rate": 1.7259255885848946e-06, "loss": 0.8555, "step": 6541 }, { "epoch": 0.8160159660720968, "grad_norm": 0.0, "learning_rate": 1.723657327376147e-06, "loss": 0.93, "step": 6542 }, { "epoch": 0.816140701010353, "grad_norm": 0.0, "learning_rate": 1.7213904170751706e-06, "loss": 0.8464, "step": 6543 }, { "epoch": 0.8162654359486092, "grad_norm": 0.0, "learning_rate": 1.7191248580519792e-06, "loss": 0.85, "step": 6544 }, { "epoch": 0.8163901708868654, "grad_norm": 0.0, "learning_rate": 1.7168606506763696e-06, "loss": 0.8824, "step": 6545 }, { "epoch": 0.8165149058251217, "grad_norm": 0.0, "learning_rate": 1.7145977953179194e-06, "loss": 0.8896, "step": 6546 }, { "epoch": 0.8166396407633778, "grad_norm": 0.0, "learning_rate": 1.7123362923459853e-06, "loss": 0.8715, "step": 6547 }, { "epoch": 0.8167643757016341, "grad_norm": 0.0, "learning_rate": 1.7100761421296962e-06, "loss": 0.9, "step": 6548 }, { "epoch": 0.8168891106398902, "grad_norm": 0.0, "learning_rate": 1.7078173450379688e-06, "loss": 0.9338, "step": 6549 }, { "epoch": 0.8170138455781465, "grad_norm": 0.0, "learning_rate": 1.7055599014394974e-06, "loss": 0.8864, "step": 6550 }, { "epoch": 0.8171385805164026, "grad_norm": 0.0, "learning_rate": 1.7033038117027466e-06, "loss": 0.8806, "step": 6551 }, { "epoch": 0.8172633154546588, "grad_norm": 0.0, "learning_rate": 1.7010490761959774e-06, "loss": 0.8677, "step": 6552 }, { "epoch": 0.8173880503929151, "grad_norm": 0.0, "learning_rate": 1.698795695287212e-06, "loss": 0.856, "step": 6553 }, { "epoch": 0.8175127853311712, "grad_norm": 0.0, "learning_rate": 1.6965436693442616e-06, "loss": 0.9074, "step": 6554 }, { "epoch": 0.8176375202694275, "grad_norm": 0.0, "learning_rate": 1.6942929987347156e-06, "loss": 0.9331, "step": 6555 }, { "epoch": 0.8177622552076836, "grad_norm": 0.0, "learning_rate": 1.6920436838259357e-06, "loss": 0.9754, "step": 6556 }, { "epoch": 0.8178869901459399, "grad_norm": 0.0, "learning_rate": 1.6897957249850682e-06, "loss": 0.9548, "step": 6557 }, { "epoch": 0.8180117250841961, "grad_norm": 0.0, "learning_rate": 1.6875491225790385e-06, "loss": 0.8601, "step": 6558 }, { "epoch": 0.8181364600224523, "grad_norm": 0.0, "learning_rate": 1.6853038769745466e-06, "loss": 0.8761, "step": 6559 }, { "epoch": 0.8182611949607085, "grad_norm": 0.0, "learning_rate": 1.6830599885380783e-06, "loss": 0.8649, "step": 6560 }, { "epoch": 0.8183859298989647, "grad_norm": 0.0, "learning_rate": 1.680817457635885e-06, "loss": 0.9318, "step": 6561 }, { "epoch": 0.8185106648372209, "grad_norm": 0.0, "learning_rate": 1.678576284634008e-06, "loss": 0.9044, "step": 6562 }, { "epoch": 0.8186353997754771, "grad_norm": 0.0, "learning_rate": 1.6763364698982632e-06, "loss": 0.9205, "step": 6563 }, { "epoch": 0.8187601347137333, "grad_norm": 0.0, "learning_rate": 1.6740980137942474e-06, "loss": 0.8993, "step": 6564 }, { "epoch": 0.8188848696519895, "grad_norm": 0.0, "learning_rate": 1.671860916687329e-06, "loss": 0.9478, "step": 6565 }, { "epoch": 0.8190096045902457, "grad_norm": 0.0, "learning_rate": 1.66962517894266e-06, "loss": 0.9393, "step": 6566 }, { "epoch": 0.8191343395285019, "grad_norm": 0.0, "learning_rate": 1.6673908009251705e-06, "loss": 0.9056, "step": 6567 }, { "epoch": 0.8192590744667582, "grad_norm": 0.0, "learning_rate": 1.6651577829995625e-06, "loss": 0.9448, "step": 6568 }, { "epoch": 0.8193838094050143, "grad_norm": 0.0, "learning_rate": 1.6629261255303286e-06, "loss": 0.8742, "step": 6569 }, { "epoch": 0.8195085443432706, "grad_norm": 0.0, "learning_rate": 1.6606958288817255e-06, "loss": 0.8881, "step": 6570 }, { "epoch": 0.8196332792815267, "grad_norm": 0.0, "learning_rate": 1.6584668934177962e-06, "loss": 0.8992, "step": 6571 }, { "epoch": 0.819758014219783, "grad_norm": 0.0, "learning_rate": 1.6562393195023608e-06, "loss": 0.9399, "step": 6572 }, { "epoch": 0.8198827491580392, "grad_norm": 0.0, "learning_rate": 1.6540131074990106e-06, "loss": 0.8726, "step": 6573 }, { "epoch": 0.8200074840962954, "grad_norm": 0.0, "learning_rate": 1.651788257771123e-06, "loss": 0.9444, "step": 6574 }, { "epoch": 0.8201322190345516, "grad_norm": 0.0, "learning_rate": 1.6495647706818486e-06, "loss": 0.9274, "step": 6575 }, { "epoch": 0.8202569539728077, "grad_norm": 0.0, "learning_rate": 1.6473426465941189e-06, "loss": 0.9487, "step": 6576 }, { "epoch": 0.820381688911064, "grad_norm": 0.0, "learning_rate": 1.6451218858706374e-06, "loss": 0.8645, "step": 6577 }, { "epoch": 0.8205064238493202, "grad_norm": 0.0, "learning_rate": 1.6429024888738887e-06, "loss": 0.8868, "step": 6578 }, { "epoch": 0.8206311587875764, "grad_norm": 0.0, "learning_rate": 1.640684455966135e-06, "loss": 0.8931, "step": 6579 }, { "epoch": 0.8207558937258326, "grad_norm": 0.0, "learning_rate": 1.6384677875094145e-06, "loss": 0.9173, "step": 6580 }, { "epoch": 0.8208806286640888, "grad_norm": 0.0, "learning_rate": 1.636252483865548e-06, "loss": 0.9416, "step": 6581 }, { "epoch": 0.821005363602345, "grad_norm": 0.0, "learning_rate": 1.6340385453961216e-06, "loss": 0.893, "step": 6582 }, { "epoch": 0.8211300985406013, "grad_norm": 0.0, "learning_rate": 1.6318259724625096e-06, "loss": 0.9111, "step": 6583 }, { "epoch": 0.8212548334788574, "grad_norm": 0.0, "learning_rate": 1.629614765425861e-06, "loss": 0.9261, "step": 6584 }, { "epoch": 0.8213795684171137, "grad_norm": 0.0, "learning_rate": 1.6274049246470958e-06, "loss": 0.8913, "step": 6585 }, { "epoch": 0.8215043033553698, "grad_norm": 0.0, "learning_rate": 1.6251964504869221e-06, "loss": 0.9258, "step": 6586 }, { "epoch": 0.821629038293626, "grad_norm": 0.0, "learning_rate": 1.622989343305813e-06, "loss": 0.9209, "step": 6587 }, { "epoch": 0.8217537732318823, "grad_norm": 0.0, "learning_rate": 1.6207836034640268e-06, "loss": 0.9005, "step": 6588 }, { "epoch": 0.8218785081701384, "grad_norm": 0.0, "learning_rate": 1.6185792313215977e-06, "loss": 0.9078, "step": 6589 }, { "epoch": 0.8220032431083947, "grad_norm": 0.0, "learning_rate": 1.6163762272383298e-06, "loss": 0.8895, "step": 6590 }, { "epoch": 0.8221279780466508, "grad_norm": 0.0, "learning_rate": 1.6141745915738127e-06, "loss": 0.8694, "step": 6591 }, { "epoch": 0.8222527129849071, "grad_norm": 0.0, "learning_rate": 1.6119743246874064e-06, "loss": 0.9034, "step": 6592 }, { "epoch": 0.8223774479231633, "grad_norm": 0.0, "learning_rate": 1.6097754269382538e-06, "loss": 0.9086, "step": 6593 }, { "epoch": 0.8225021828614195, "grad_norm": 0.0, "learning_rate": 1.6075778986852663e-06, "loss": 0.9251, "step": 6594 }, { "epoch": 0.8226269177996757, "grad_norm": 0.0, "learning_rate": 1.6053817402871363e-06, "loss": 0.904, "step": 6595 }, { "epoch": 0.8227516527379319, "grad_norm": 0.0, "learning_rate": 1.6031869521023369e-06, "loss": 0.8865, "step": 6596 }, { "epoch": 0.8228763876761881, "grad_norm": 0.0, "learning_rate": 1.6009935344891036e-06, "loss": 0.8542, "step": 6597 }, { "epoch": 0.8230011226144444, "grad_norm": 0.0, "learning_rate": 1.598801487805469e-06, "loss": 0.8959, "step": 6598 }, { "epoch": 0.8231258575527005, "grad_norm": 0.0, "learning_rate": 1.5966108124092217e-06, "loss": 0.8864, "step": 6599 }, { "epoch": 0.8232505924909567, "grad_norm": 0.0, "learning_rate": 1.5944215086579384e-06, "loss": 0.9262, "step": 6600 }, { "epoch": 0.8233753274292129, "grad_norm": 0.0, "learning_rate": 1.5922335769089703e-06, "loss": 0.8385, "step": 6601 }, { "epoch": 0.8235000623674691, "grad_norm": 0.0, "learning_rate": 1.5900470175194383e-06, "loss": 0.899, "step": 6602 }, { "epoch": 0.8236247973057254, "grad_norm": 0.0, "learning_rate": 1.5878618308462479e-06, "loss": 0.9328, "step": 6603 }, { "epoch": 0.8237495322439815, "grad_norm": 0.0, "learning_rate": 1.585678017246075e-06, "loss": 0.8837, "step": 6604 }, { "epoch": 0.8238742671822378, "grad_norm": 0.0, "learning_rate": 1.5834955770753723e-06, "loss": 0.9123, "step": 6605 }, { "epoch": 0.8239990021204939, "grad_norm": 0.0, "learning_rate": 1.5813145106903737e-06, "loss": 0.9178, "step": 6606 }, { "epoch": 0.8241237370587502, "grad_norm": 0.0, "learning_rate": 1.579134818447079e-06, "loss": 0.9158, "step": 6607 }, { "epoch": 0.8242484719970063, "grad_norm": 0.0, "learning_rate": 1.5769565007012699e-06, "loss": 0.9061, "step": 6608 }, { "epoch": 0.8243732069352626, "grad_norm": 0.0, "learning_rate": 1.5747795578085046e-06, "loss": 0.9077, "step": 6609 }, { "epoch": 0.8244979418735188, "grad_norm": 0.0, "learning_rate": 1.5726039901241152e-06, "loss": 0.8893, "step": 6610 }, { "epoch": 0.8246226768117749, "grad_norm": 0.0, "learning_rate": 1.5704297980032068e-06, "loss": 0.9387, "step": 6611 }, { "epoch": 0.8247474117500312, "grad_norm": 0.0, "learning_rate": 1.5682569818006632e-06, "loss": 0.8946, "step": 6612 }, { "epoch": 0.8248721466882873, "grad_norm": 0.0, "learning_rate": 1.566085541871145e-06, "loss": 0.899, "step": 6613 }, { "epoch": 0.8249968816265436, "grad_norm": 0.0, "learning_rate": 1.5639154785690801e-06, "loss": 0.9553, "step": 6614 }, { "epoch": 0.8251216165647998, "grad_norm": 0.0, "learning_rate": 1.5617467922486863e-06, "loss": 0.8938, "step": 6615 }, { "epoch": 0.825246351503056, "grad_norm": 0.0, "learning_rate": 1.5595794832639421e-06, "loss": 0.8875, "step": 6616 }, { "epoch": 0.8253710864413122, "grad_norm": 0.0, "learning_rate": 1.557413551968604e-06, "loss": 0.9238, "step": 6617 }, { "epoch": 0.8254958213795685, "grad_norm": 0.0, "learning_rate": 1.5552489987162155e-06, "loss": 0.8887, "step": 6618 }, { "epoch": 0.8256205563178246, "grad_norm": 0.0, "learning_rate": 1.5530858238600789e-06, "loss": 0.9367, "step": 6619 }, { "epoch": 0.8257452912560809, "grad_norm": 0.0, "learning_rate": 1.5509240277532821e-06, "loss": 0.8723, "step": 6620 }, { "epoch": 0.825870026194337, "grad_norm": 0.0, "learning_rate": 1.5487636107486825e-06, "loss": 0.931, "step": 6621 }, { "epoch": 0.8259947611325933, "grad_norm": 0.0, "learning_rate": 1.5466045731989199e-06, "loss": 0.8833, "step": 6622 }, { "epoch": 0.8261194960708494, "grad_norm": 0.0, "learning_rate": 1.5444469154563968e-06, "loss": 0.8777, "step": 6623 }, { "epoch": 0.8262442310091056, "grad_norm": 0.0, "learning_rate": 1.5422906378733006e-06, "loss": 0.99, "step": 6624 }, { "epoch": 0.8263689659473619, "grad_norm": 0.0, "learning_rate": 1.5401357408015893e-06, "loss": 0.9183, "step": 6625 }, { "epoch": 0.826493700885618, "grad_norm": 0.0, "learning_rate": 1.5379822245929976e-06, "loss": 0.8479, "step": 6626 }, { "epoch": 0.8266184358238743, "grad_norm": 0.0, "learning_rate": 1.5358300895990352e-06, "loss": 0.9229, "step": 6627 }, { "epoch": 0.8267431707621304, "grad_norm": 0.0, "learning_rate": 1.53367933617098e-06, "loss": 0.8665, "step": 6628 }, { "epoch": 0.8268679057003867, "grad_norm": 0.0, "learning_rate": 1.5315299646598924e-06, "loss": 0.9153, "step": 6629 }, { "epoch": 0.8269926406386429, "grad_norm": 0.0, "learning_rate": 1.5293819754166062e-06, "loss": 0.9358, "step": 6630 }, { "epoch": 0.8271173755768991, "grad_norm": 0.0, "learning_rate": 1.5272353687917197e-06, "loss": 0.9014, "step": 6631 }, { "epoch": 0.8272421105151553, "grad_norm": 0.0, "learning_rate": 1.5250901451356226e-06, "loss": 0.9012, "step": 6632 }, { "epoch": 0.8273668454534115, "grad_norm": 0.0, "learning_rate": 1.522946304798465e-06, "loss": 0.8695, "step": 6633 }, { "epoch": 0.8274915803916677, "grad_norm": 0.0, "learning_rate": 1.5208038481301713e-06, "loss": 0.9328, "step": 6634 }, { "epoch": 0.8276163153299239, "grad_norm": 0.0, "learning_rate": 1.518662775480454e-06, "loss": 0.9222, "step": 6635 }, { "epoch": 0.8277410502681801, "grad_norm": 0.0, "learning_rate": 1.5165230871987823e-06, "loss": 0.9225, "step": 6636 }, { "epoch": 0.8278657852064363, "grad_norm": 0.0, "learning_rate": 1.5143847836344105e-06, "loss": 0.893, "step": 6637 }, { "epoch": 0.8279905201446925, "grad_norm": 0.0, "learning_rate": 1.5122478651363626e-06, "loss": 0.9188, "step": 6638 }, { "epoch": 0.8281152550829487, "grad_norm": 0.0, "learning_rate": 1.5101123320534406e-06, "loss": 0.9218, "step": 6639 }, { "epoch": 0.828239990021205, "grad_norm": 0.0, "learning_rate": 1.5079781847342122e-06, "loss": 0.8453, "step": 6640 }, { "epoch": 0.8283647249594611, "grad_norm": 0.0, "learning_rate": 1.505845423527027e-06, "loss": 0.8703, "step": 6641 }, { "epoch": 0.8284894598977174, "grad_norm": 0.0, "learning_rate": 1.5037140487800073e-06, "loss": 0.9197, "step": 6642 }, { "epoch": 0.8286141948359735, "grad_norm": 0.0, "learning_rate": 1.5015840608410403e-06, "loss": 0.9075, "step": 6643 }, { "epoch": 0.8287389297742298, "grad_norm": 0.0, "learning_rate": 1.4994554600578036e-06, "loss": 0.9277, "step": 6644 }, { "epoch": 0.828863664712486, "grad_norm": 0.0, "learning_rate": 1.4973282467777306e-06, "loss": 0.8771, "step": 6645 }, { "epoch": 0.8289883996507422, "grad_norm": 0.0, "learning_rate": 1.4952024213480387e-06, "loss": 0.8954, "step": 6646 }, { "epoch": 0.8291131345889984, "grad_norm": 0.0, "learning_rate": 1.4930779841157184e-06, "loss": 0.8889, "step": 6647 }, { "epoch": 0.8292378695272545, "grad_norm": 0.0, "learning_rate": 1.4909549354275277e-06, "loss": 0.8935, "step": 6648 }, { "epoch": 0.8293626044655108, "grad_norm": 0.0, "learning_rate": 1.4888332756300027e-06, "loss": 0.9418, "step": 6649 }, { "epoch": 0.829487339403767, "grad_norm": 0.0, "learning_rate": 1.4867130050694556e-06, "loss": 0.867, "step": 6650 }, { "epoch": 0.8296120743420232, "grad_norm": 0.0, "learning_rate": 1.4845941240919603e-06, "loss": 0.8927, "step": 6651 }, { "epoch": 0.8297368092802794, "grad_norm": 0.0, "learning_rate": 1.4824766330433793e-06, "loss": 0.9163, "step": 6652 }, { "epoch": 0.8298615442185356, "grad_norm": 0.0, "learning_rate": 1.480360532269336e-06, "loss": 0.9276, "step": 6653 }, { "epoch": 0.8299862791567918, "grad_norm": 0.0, "learning_rate": 1.478245822115233e-06, "loss": 0.9527, "step": 6654 }, { "epoch": 0.8301110140950481, "grad_norm": 0.0, "learning_rate": 1.4761325029262442e-06, "loss": 0.8961, "step": 6655 }, { "epoch": 0.8302357490333042, "grad_norm": 0.0, "learning_rate": 1.474020575047318e-06, "loss": 0.9274, "step": 6656 }, { "epoch": 0.8303604839715605, "grad_norm": 0.0, "learning_rate": 1.47191003882317e-06, "loss": 0.9028, "step": 6657 }, { "epoch": 0.8304852189098166, "grad_norm": 0.0, "learning_rate": 1.4698008945982966e-06, "loss": 0.9205, "step": 6658 }, { "epoch": 0.8306099538480728, "grad_norm": 0.0, "learning_rate": 1.4676931427169639e-06, "loss": 0.9456, "step": 6659 }, { "epoch": 0.8307346887863291, "grad_norm": 0.0, "learning_rate": 1.465586783523204e-06, "loss": 0.9437, "step": 6660 }, { "epoch": 0.8308594237245852, "grad_norm": 0.0, "learning_rate": 1.4634818173608378e-06, "loss": 0.9067, "step": 6661 }, { "epoch": 0.8309841586628415, "grad_norm": 0.0, "learning_rate": 1.4613782445734404e-06, "loss": 0.8968, "step": 6662 }, { "epoch": 0.8311088936010976, "grad_norm": 0.0, "learning_rate": 1.4592760655043713e-06, "loss": 0.9218, "step": 6663 }, { "epoch": 0.8312336285393539, "grad_norm": 0.0, "learning_rate": 1.457175280496761e-06, "loss": 0.9335, "step": 6664 }, { "epoch": 0.83135836347761, "grad_norm": 0.0, "learning_rate": 1.4550758898935047e-06, "loss": 0.9082, "step": 6665 }, { "epoch": 0.8314830984158663, "grad_norm": 0.0, "learning_rate": 1.4529778940372806e-06, "loss": 0.9298, "step": 6666 }, { "epoch": 0.8316078333541225, "grad_norm": 0.0, "learning_rate": 1.4508812932705364e-06, "loss": 0.8999, "step": 6667 }, { "epoch": 0.8317325682923787, "grad_norm": 0.0, "learning_rate": 1.4487860879354842e-06, "loss": 0.8592, "step": 6668 }, { "epoch": 0.8318573032306349, "grad_norm": 0.0, "learning_rate": 1.4466922783741167e-06, "loss": 0.9199, "step": 6669 }, { "epoch": 0.8319820381688912, "grad_norm": 0.0, "learning_rate": 1.4445998649281978e-06, "loss": 0.8868, "step": 6670 }, { "epoch": 0.8321067731071473, "grad_norm": 0.0, "learning_rate": 1.4425088479392612e-06, "loss": 0.8762, "step": 6671 }, { "epoch": 0.8322315080454035, "grad_norm": 0.0, "learning_rate": 1.440419227748614e-06, "loss": 0.8749, "step": 6672 }, { "epoch": 0.8323562429836597, "grad_norm": 0.0, "learning_rate": 1.4383310046973365e-06, "loss": 0.9303, "step": 6673 }, { "epoch": 0.8324809779219159, "grad_norm": 0.0, "learning_rate": 1.4362441791262748e-06, "loss": 0.9118, "step": 6674 }, { "epoch": 0.8326057128601722, "grad_norm": 0.0, "learning_rate": 1.4341587513760546e-06, "loss": 0.9268, "step": 6675 }, { "epoch": 0.8327304477984283, "grad_norm": 0.0, "learning_rate": 1.4320747217870722e-06, "loss": 0.9025, "step": 6676 }, { "epoch": 0.8328551827366846, "grad_norm": 0.0, "learning_rate": 1.4299920906994868e-06, "loss": 0.8946, "step": 6677 }, { "epoch": 0.8329799176749407, "grad_norm": 0.0, "learning_rate": 1.4279108584532453e-06, "loss": 0.8903, "step": 6678 }, { "epoch": 0.833104652613197, "grad_norm": 0.0, "learning_rate": 1.4258310253880537e-06, "loss": 0.9156, "step": 6679 }, { "epoch": 0.8332293875514531, "grad_norm": 0.0, "learning_rate": 1.423752591843387e-06, "loss": 0.8789, "step": 6680 }, { "epoch": 0.8333541224897094, "grad_norm": 0.0, "learning_rate": 1.4216755581585073e-06, "loss": 0.897, "step": 6681 }, { "epoch": 0.8334788574279656, "grad_norm": 0.0, "learning_rate": 1.4195999246724335e-06, "loss": 0.8808, "step": 6682 }, { "epoch": 0.8336035923662217, "grad_norm": 0.0, "learning_rate": 1.4175256917239621e-06, "loss": 0.9183, "step": 6683 }, { "epoch": 0.833728327304478, "grad_norm": 0.0, "learning_rate": 1.4154528596516625e-06, "loss": 0.8744, "step": 6684 }, { "epoch": 0.8338530622427341, "grad_norm": 0.0, "learning_rate": 1.4133814287938707e-06, "loss": 0.8933, "step": 6685 }, { "epoch": 0.8339777971809904, "grad_norm": 0.0, "learning_rate": 1.4113113994886962e-06, "loss": 0.8338, "step": 6686 }, { "epoch": 0.8341025321192466, "grad_norm": 0.0, "learning_rate": 1.4092427720740209e-06, "loss": 0.8709, "step": 6687 }, { "epoch": 0.8342272670575028, "grad_norm": 0.0, "learning_rate": 1.407175546887497e-06, "loss": 0.9199, "step": 6688 }, { "epoch": 0.834352001995759, "grad_norm": 0.0, "learning_rate": 1.40510972426655e-06, "loss": 0.9417, "step": 6689 }, { "epoch": 0.8344767369340153, "grad_norm": 0.0, "learning_rate": 1.4030453045483693e-06, "loss": 0.9061, "step": 6690 }, { "epoch": 0.8346014718722714, "grad_norm": 0.0, "learning_rate": 1.4009822880699231e-06, "loss": 0.8893, "step": 6691 }, { "epoch": 0.8347262068105277, "grad_norm": 0.0, "learning_rate": 1.3989206751679484e-06, "loss": 0.9022, "step": 6692 }, { "epoch": 0.8348509417487838, "grad_norm": 0.0, "learning_rate": 1.3968604661789532e-06, "loss": 0.9107, "step": 6693 }, { "epoch": 0.8349756766870401, "grad_norm": 0.0, "learning_rate": 1.3948016614392113e-06, "loss": 0.883, "step": 6694 }, { "epoch": 0.8351004116252962, "grad_norm": 0.0, "learning_rate": 1.3927442612847741e-06, "loss": 0.8859, "step": 6695 }, { "epoch": 0.8352251465635524, "grad_norm": 0.0, "learning_rate": 1.390688266051463e-06, "loss": 0.862, "step": 6696 }, { "epoch": 0.8353498815018087, "grad_norm": 0.0, "learning_rate": 1.388633676074862e-06, "loss": 0.9394, "step": 6697 }, { "epoch": 0.8354746164400648, "grad_norm": 0.0, "learning_rate": 1.3865804916903414e-06, "loss": 0.9308, "step": 6698 }, { "epoch": 0.8355993513783211, "grad_norm": 0.0, "learning_rate": 1.3845287132330254e-06, "loss": 0.8698, "step": 6699 }, { "epoch": 0.8357240863165772, "grad_norm": 0.0, "learning_rate": 1.3824783410378183e-06, "loss": 0.8889, "step": 6700 }, { "epoch": 0.8358488212548335, "grad_norm": 0.0, "learning_rate": 1.3804293754393948e-06, "loss": 0.8907, "step": 6701 }, { "epoch": 0.8359735561930897, "grad_norm": 0.0, "learning_rate": 1.3783818167721918e-06, "loss": 0.9426, "step": 6702 }, { "epoch": 0.8360982911313459, "grad_norm": 0.0, "learning_rate": 1.3763356653704274e-06, "loss": 0.9249, "step": 6703 }, { "epoch": 0.8362230260696021, "grad_norm": 0.0, "learning_rate": 1.3742909215680833e-06, "loss": 0.9236, "step": 6704 }, { "epoch": 0.8363477610078583, "grad_norm": 0.0, "learning_rate": 1.372247585698916e-06, "loss": 0.8859, "step": 6705 }, { "epoch": 0.8364724959461145, "grad_norm": 0.0, "learning_rate": 1.370205658096445e-06, "loss": 0.8651, "step": 6706 }, { "epoch": 0.8365972308843707, "grad_norm": 0.0, "learning_rate": 1.3681651390939666e-06, "loss": 0.9227, "step": 6707 }, { "epoch": 0.8367219658226269, "grad_norm": 0.0, "learning_rate": 1.366126029024545e-06, "loss": 0.9209, "step": 6708 }, { "epoch": 0.8368467007608831, "grad_norm": 0.0, "learning_rate": 1.3640883282210127e-06, "loss": 0.904, "step": 6709 }, { "epoch": 0.8369714356991393, "grad_norm": 0.0, "learning_rate": 1.3620520370159795e-06, "loss": 0.9049, "step": 6710 }, { "epoch": 0.8370961706373955, "grad_norm": 0.0, "learning_rate": 1.3600171557418117e-06, "loss": 0.8741, "step": 6711 }, { "epoch": 0.8372209055756518, "grad_norm": 0.0, "learning_rate": 1.357983684730657e-06, "loss": 0.8992, "step": 6712 }, { "epoch": 0.8373456405139079, "grad_norm": 0.0, "learning_rate": 1.3559516243144323e-06, "loss": 0.901, "step": 6713 }, { "epoch": 0.8374703754521642, "grad_norm": 0.0, "learning_rate": 1.3539209748248116e-06, "loss": 0.9285, "step": 6714 }, { "epoch": 0.8375951103904203, "grad_norm": 0.0, "learning_rate": 1.3518917365932594e-06, "loss": 0.8731, "step": 6715 }, { "epoch": 0.8377198453286766, "grad_norm": 0.0, "learning_rate": 1.3498639099509915e-06, "loss": 0.9369, "step": 6716 }, { "epoch": 0.8378445802669328, "grad_norm": 0.0, "learning_rate": 1.347837495229002e-06, "loss": 0.8943, "step": 6717 }, { "epoch": 0.837969315205189, "grad_norm": 0.0, "learning_rate": 1.3458124927580552e-06, "loss": 0.8758, "step": 6718 }, { "epoch": 0.8380940501434452, "grad_norm": 0.0, "learning_rate": 1.343788902868678e-06, "loss": 0.8891, "step": 6719 }, { "epoch": 0.8382187850817013, "grad_norm": 0.0, "learning_rate": 1.341766725891175e-06, "loss": 0.9218, "step": 6720 }, { "epoch": 0.8383435200199576, "grad_norm": 0.0, "learning_rate": 1.339745962155613e-06, "loss": 0.9277, "step": 6721 }, { "epoch": 0.8384682549582138, "grad_norm": 0.0, "learning_rate": 1.3377266119918375e-06, "loss": 0.8897, "step": 6722 }, { "epoch": 0.83859298989647, "grad_norm": 0.0, "learning_rate": 1.3357086757294512e-06, "loss": 0.8947, "step": 6723 }, { "epoch": 0.8387177248347262, "grad_norm": 0.0, "learning_rate": 1.3336921536978343e-06, "loss": 0.8988, "step": 6724 }, { "epoch": 0.8388424597729824, "grad_norm": 0.0, "learning_rate": 1.3316770462261365e-06, "loss": 0.9402, "step": 6725 }, { "epoch": 0.8389671947112386, "grad_norm": 0.0, "learning_rate": 1.3296633536432668e-06, "loss": 0.8712, "step": 6726 }, { "epoch": 0.8390919296494949, "grad_norm": 0.0, "learning_rate": 1.3276510762779215e-06, "loss": 0.8683, "step": 6727 }, { "epoch": 0.839216664587751, "grad_norm": 0.0, "learning_rate": 1.3256402144585456e-06, "loss": 0.8919, "step": 6728 }, { "epoch": 0.8393413995260073, "grad_norm": 0.0, "learning_rate": 1.3236307685133675e-06, "loss": 0.911, "step": 6729 }, { "epoch": 0.8394661344642634, "grad_norm": 0.0, "learning_rate": 1.3216227387703795e-06, "loss": 0.8931, "step": 6730 }, { "epoch": 0.8395908694025196, "grad_norm": 0.0, "learning_rate": 1.3196161255573392e-06, "loss": 0.9012, "step": 6731 }, { "epoch": 0.8397156043407759, "grad_norm": 0.0, "learning_rate": 1.3176109292017781e-06, "loss": 0.8697, "step": 6732 }, { "epoch": 0.839840339279032, "grad_norm": 0.0, "learning_rate": 1.3156071500309953e-06, "loss": 0.8986, "step": 6733 }, { "epoch": 0.8399650742172883, "grad_norm": 0.0, "learning_rate": 1.3136047883720582e-06, "loss": 0.9494, "step": 6734 }, { "epoch": 0.8400898091555444, "grad_norm": 0.0, "learning_rate": 1.3116038445518043e-06, "loss": 0.8832, "step": 6735 }, { "epoch": 0.8402145440938007, "grad_norm": 0.0, "learning_rate": 1.3096043188968333e-06, "loss": 0.923, "step": 6736 }, { "epoch": 0.8403392790320569, "grad_norm": 0.0, "learning_rate": 1.307606211733522e-06, "loss": 0.876, "step": 6737 }, { "epoch": 0.8404640139703131, "grad_norm": 0.0, "learning_rate": 1.3056095233880105e-06, "loss": 0.8987, "step": 6738 }, { "epoch": 0.8405887489085693, "grad_norm": 0.0, "learning_rate": 1.3036142541862119e-06, "loss": 0.8867, "step": 6739 }, { "epoch": 0.8407134838468255, "grad_norm": 0.0, "learning_rate": 1.3016204044537995e-06, "loss": 0.8826, "step": 6740 }, { "epoch": 0.8408382187850817, "grad_norm": 0.0, "learning_rate": 1.2996279745162211e-06, "loss": 0.8844, "step": 6741 }, { "epoch": 0.840962953723338, "grad_norm": 0.0, "learning_rate": 1.2976369646986954e-06, "loss": 0.9209, "step": 6742 }, { "epoch": 0.8410876886615941, "grad_norm": 0.0, "learning_rate": 1.2956473753261978e-06, "loss": 0.9671, "step": 6743 }, { "epoch": 0.8412124235998503, "grad_norm": 0.0, "learning_rate": 1.2936592067234888e-06, "loss": 0.8605, "step": 6744 }, { "epoch": 0.8413371585381065, "grad_norm": 0.0, "learning_rate": 1.2916724592150799e-06, "loss": 0.9455, "step": 6745 }, { "epoch": 0.8414618934763627, "grad_norm": 0.0, "learning_rate": 1.2896871331252615e-06, "loss": 0.9076, "step": 6746 }, { "epoch": 0.841586628414619, "grad_norm": 0.0, "learning_rate": 1.287703228778091e-06, "loss": 0.914, "step": 6747 }, { "epoch": 0.8417113633528751, "grad_norm": 0.0, "learning_rate": 1.2857207464973876e-06, "loss": 0.889, "step": 6748 }, { "epoch": 0.8418360982911314, "grad_norm": 0.0, "learning_rate": 1.283739686606743e-06, "loss": 0.8356, "step": 6749 }, { "epoch": 0.8419608332293875, "grad_norm": 0.0, "learning_rate": 1.2817600494295179e-06, "loss": 0.9667, "step": 6750 }, { "epoch": 0.8420855681676438, "grad_norm": 0.0, "learning_rate": 1.2797818352888413e-06, "loss": 0.8597, "step": 6751 }, { "epoch": 0.8422103031059, "grad_norm": 0.0, "learning_rate": 1.2778050445076018e-06, "loss": 0.8927, "step": 6752 }, { "epoch": 0.8423350380441562, "grad_norm": 0.0, "learning_rate": 1.2758296774084633e-06, "loss": 0.9371, "step": 6753 }, { "epoch": 0.8424597729824124, "grad_norm": 0.0, "learning_rate": 1.273855734313858e-06, "loss": 0.9225, "step": 6754 }, { "epoch": 0.8425845079206685, "grad_norm": 0.0, "learning_rate": 1.2718832155459803e-06, "loss": 0.8781, "step": 6755 }, { "epoch": 0.8427092428589248, "grad_norm": 0.0, "learning_rate": 1.2699121214267995e-06, "loss": 0.8744, "step": 6756 }, { "epoch": 0.8428339777971809, "grad_norm": 0.0, "learning_rate": 1.2679424522780426e-06, "loss": 0.887, "step": 6757 }, { "epoch": 0.8429587127354372, "grad_norm": 0.0, "learning_rate": 1.2659742084212111e-06, "loss": 0.9444, "step": 6758 }, { "epoch": 0.8430834476736934, "grad_norm": 0.0, "learning_rate": 1.2640073901775752e-06, "loss": 0.8816, "step": 6759 }, { "epoch": 0.8432081826119496, "grad_norm": 0.0, "learning_rate": 1.262041997868162e-06, "loss": 0.8959, "step": 6760 }, { "epoch": 0.8433329175502058, "grad_norm": 0.0, "learning_rate": 1.2600780318137806e-06, "loss": 0.9314, "step": 6761 }, { "epoch": 0.843457652488462, "grad_norm": 0.0, "learning_rate": 1.2581154923349958e-06, "loss": 0.8796, "step": 6762 }, { "epoch": 0.8435823874267182, "grad_norm": 0.0, "learning_rate": 1.2561543797521436e-06, "loss": 0.9398, "step": 6763 }, { "epoch": 0.8437071223649745, "grad_norm": 0.0, "learning_rate": 1.2541946943853289e-06, "loss": 0.9099, "step": 6764 }, { "epoch": 0.8438318573032306, "grad_norm": 0.0, "learning_rate": 1.2522364365544204e-06, "loss": 0.874, "step": 6765 }, { "epoch": 0.8439565922414869, "grad_norm": 0.0, "learning_rate": 1.2502796065790534e-06, "loss": 0.8297, "step": 6766 }, { "epoch": 0.844081327179743, "grad_norm": 0.0, "learning_rate": 1.2483242047786347e-06, "loss": 0.8874, "step": 6767 }, { "epoch": 0.8442060621179992, "grad_norm": 0.0, "learning_rate": 1.2463702314723369e-06, "loss": 0.8626, "step": 6768 }, { "epoch": 0.8443307970562555, "grad_norm": 0.0, "learning_rate": 1.2444176869790925e-06, "loss": 0.9193, "step": 6769 }, { "epoch": 0.8444555319945116, "grad_norm": 0.0, "learning_rate": 1.2424665716176087e-06, "loss": 0.8973, "step": 6770 }, { "epoch": 0.8445802669327679, "grad_norm": 0.0, "learning_rate": 1.2405168857063587e-06, "loss": 0.9439, "step": 6771 }, { "epoch": 0.844705001871024, "grad_norm": 0.0, "learning_rate": 1.2385686295635735e-06, "loss": 0.935, "step": 6772 }, { "epoch": 0.8448297368092803, "grad_norm": 0.0, "learning_rate": 1.2366218035072674e-06, "loss": 0.9056, "step": 6773 }, { "epoch": 0.8449544717475365, "grad_norm": 0.0, "learning_rate": 1.234676407855203e-06, "loss": 0.9071, "step": 6774 }, { "epoch": 0.8450792066857927, "grad_norm": 0.0, "learning_rate": 1.2327324429249232e-06, "loss": 0.8939, "step": 6775 }, { "epoch": 0.8452039416240489, "grad_norm": 0.0, "learning_rate": 1.2307899090337317e-06, "loss": 0.9218, "step": 6776 }, { "epoch": 0.8453286765623051, "grad_norm": 0.0, "learning_rate": 1.2288488064986947e-06, "loss": 0.8809, "step": 6777 }, { "epoch": 0.8454534115005613, "grad_norm": 0.0, "learning_rate": 1.2269091356366525e-06, "loss": 0.9348, "step": 6778 }, { "epoch": 0.8455781464388175, "grad_norm": 0.0, "learning_rate": 1.2249708967642082e-06, "loss": 0.9314, "step": 6779 }, { "epoch": 0.8457028813770737, "grad_norm": 0.0, "learning_rate": 1.22303409019773e-06, "loss": 0.9333, "step": 6780 }, { "epoch": 0.8458276163153299, "grad_norm": 0.0, "learning_rate": 1.2210987162533571e-06, "loss": 0.9153, "step": 6781 }, { "epoch": 0.8459523512535861, "grad_norm": 0.0, "learning_rate": 1.219164775246986e-06, "loss": 0.895, "step": 6782 }, { "epoch": 0.8460770861918423, "grad_norm": 0.0, "learning_rate": 1.2172322674942872e-06, "loss": 0.9179, "step": 6783 }, { "epoch": 0.8462018211300986, "grad_norm": 0.0, "learning_rate": 1.215301193310695e-06, "loss": 0.9268, "step": 6784 }, { "epoch": 0.8463265560683547, "grad_norm": 0.0, "learning_rate": 1.213371553011411e-06, "loss": 0.9118, "step": 6785 }, { "epoch": 0.846451291006611, "grad_norm": 0.0, "learning_rate": 1.2114433469113963e-06, "loss": 0.9224, "step": 6786 }, { "epoch": 0.8465760259448671, "grad_norm": 0.0, "learning_rate": 1.2095165753253868e-06, "loss": 0.8994, "step": 6787 }, { "epoch": 0.8467007608831234, "grad_norm": 0.0, "learning_rate": 1.20759123856788e-06, "loss": 0.8803, "step": 6788 }, { "epoch": 0.8468254958213796, "grad_norm": 0.0, "learning_rate": 1.2056673369531357e-06, "loss": 0.9247, "step": 6789 }, { "epoch": 0.8469502307596358, "grad_norm": 0.0, "learning_rate": 1.2037448707951894e-06, "loss": 0.9125, "step": 6790 }, { "epoch": 0.847074965697892, "grad_norm": 0.0, "learning_rate": 1.2018238404078298e-06, "loss": 0.91, "step": 6791 }, { "epoch": 0.8471997006361481, "grad_norm": 0.0, "learning_rate": 1.1999042461046207e-06, "loss": 0.8867, "step": 6792 }, { "epoch": 0.8473244355744044, "grad_norm": 0.0, "learning_rate": 1.1979860881988903e-06, "loss": 0.9302, "step": 6793 }, { "epoch": 0.8474491705126606, "grad_norm": 0.0, "learning_rate": 1.1960693670037248e-06, "loss": 0.9102, "step": 6794 }, { "epoch": 0.8475739054509168, "grad_norm": 0.0, "learning_rate": 1.1941540828319852e-06, "loss": 0.9282, "step": 6795 }, { "epoch": 0.847698640389173, "grad_norm": 0.0, "learning_rate": 1.1922402359962936e-06, "loss": 0.8542, "step": 6796 }, { "epoch": 0.8478233753274292, "grad_norm": 0.0, "learning_rate": 1.190327826809039e-06, "loss": 0.9462, "step": 6797 }, { "epoch": 0.8479481102656854, "grad_norm": 0.0, "learning_rate": 1.1884168555823728e-06, "loss": 0.9517, "step": 6798 }, { "epoch": 0.8480728452039417, "grad_norm": 0.0, "learning_rate": 1.186507322628213e-06, "loss": 0.891, "step": 6799 }, { "epoch": 0.8481975801421978, "grad_norm": 0.0, "learning_rate": 1.1845992282582464e-06, "loss": 0.9184, "step": 6800 }, { "epoch": 0.8483223150804541, "grad_norm": 0.0, "learning_rate": 1.18269257278392e-06, "loss": 0.912, "step": 6801 }, { "epoch": 0.8484470500187102, "grad_norm": 0.0, "learning_rate": 1.1807873565164507e-06, "loss": 0.8901, "step": 6802 }, { "epoch": 0.8485717849569665, "grad_norm": 0.0, "learning_rate": 1.1788835797668142e-06, "loss": 0.8955, "step": 6803 }, { "epoch": 0.8486965198952227, "grad_norm": 0.0, "learning_rate": 1.176981242845756e-06, "loss": 0.9812, "step": 6804 }, { "epoch": 0.8488212548334788, "grad_norm": 0.0, "learning_rate": 1.1750803460637895e-06, "loss": 0.9305, "step": 6805 }, { "epoch": 0.8489459897717351, "grad_norm": 0.0, "learning_rate": 1.1731808897311803e-06, "loss": 0.8985, "step": 6806 }, { "epoch": 0.8490707247099912, "grad_norm": 0.0, "learning_rate": 1.1712828741579763e-06, "loss": 0.9302, "step": 6807 }, { "epoch": 0.8491954596482475, "grad_norm": 0.0, "learning_rate": 1.1693862996539795e-06, "loss": 0.8647, "step": 6808 }, { "epoch": 0.8493201945865037, "grad_norm": 0.0, "learning_rate": 1.1674911665287514e-06, "loss": 0.9543, "step": 6809 }, { "epoch": 0.8494449295247599, "grad_norm": 0.0, "learning_rate": 1.1655974750916366e-06, "loss": 0.849, "step": 6810 }, { "epoch": 0.8495696644630161, "grad_norm": 0.0, "learning_rate": 1.1637052256517245e-06, "loss": 0.905, "step": 6811 }, { "epoch": 0.8496943994012723, "grad_norm": 0.0, "learning_rate": 1.1618144185178825e-06, "loss": 0.8634, "step": 6812 }, { "epoch": 0.8498191343395285, "grad_norm": 0.0, "learning_rate": 1.1599250539987362e-06, "loss": 0.9291, "step": 6813 }, { "epoch": 0.8499438692777848, "grad_norm": 0.0, "learning_rate": 1.1580371324026807e-06, "loss": 0.8566, "step": 6814 }, { "epoch": 0.8500686042160409, "grad_norm": 0.0, "learning_rate": 1.1561506540378674e-06, "loss": 0.8892, "step": 6815 }, { "epoch": 0.8501933391542971, "grad_norm": 0.0, "learning_rate": 1.1542656192122192e-06, "loss": 0.9006, "step": 6816 }, { "epoch": 0.8503180740925533, "grad_norm": 0.0, "learning_rate": 1.152382028233422e-06, "loss": 0.9244, "step": 6817 }, { "epoch": 0.8504428090308095, "grad_norm": 0.0, "learning_rate": 1.1504998814089252e-06, "loss": 0.9657, "step": 6818 }, { "epoch": 0.8505675439690658, "grad_norm": 0.0, "learning_rate": 1.1486191790459445e-06, "loss": 0.907, "step": 6819 }, { "epoch": 0.8506922789073219, "grad_norm": 0.0, "learning_rate": 1.146739921451453e-06, "loss": 0.8903, "step": 6820 }, { "epoch": 0.8508170138455782, "grad_norm": 0.0, "learning_rate": 1.1448621089321964e-06, "loss": 0.9136, "step": 6821 }, { "epoch": 0.8509417487838343, "grad_norm": 0.0, "learning_rate": 1.1429857417946822e-06, "loss": 0.9131, "step": 6822 }, { "epoch": 0.8510664837220906, "grad_norm": 0.0, "learning_rate": 1.141110820345177e-06, "loss": 0.9215, "step": 6823 }, { "epoch": 0.8511912186603467, "grad_norm": 0.0, "learning_rate": 1.1392373448897164e-06, "loss": 0.9151, "step": 6824 }, { "epoch": 0.851315953598603, "grad_norm": 0.0, "learning_rate": 1.1373653157341035e-06, "loss": 0.8913, "step": 6825 }, { "epoch": 0.8514406885368592, "grad_norm": 0.0, "learning_rate": 1.1354947331838917e-06, "loss": 0.8728, "step": 6826 }, { "epoch": 0.8515654234751154, "grad_norm": 0.0, "learning_rate": 1.1336255975444165e-06, "loss": 0.8782, "step": 6827 }, { "epoch": 0.8516901584133716, "grad_norm": 0.0, "learning_rate": 1.131757909120762e-06, "loss": 0.9105, "step": 6828 }, { "epoch": 0.8518148933516277, "grad_norm": 0.0, "learning_rate": 1.129891668217783e-06, "loss": 0.8742, "step": 6829 }, { "epoch": 0.851939628289884, "grad_norm": 0.0, "learning_rate": 1.128026875140099e-06, "loss": 0.9327, "step": 6830 }, { "epoch": 0.8520643632281402, "grad_norm": 0.0, "learning_rate": 1.1261635301920914e-06, "loss": 0.8867, "step": 6831 }, { "epoch": 0.8521890981663964, "grad_norm": 0.0, "learning_rate": 1.1243016336779021e-06, "loss": 0.9148, "step": 6832 }, { "epoch": 0.8523138331046526, "grad_norm": 0.0, "learning_rate": 1.1224411859014417e-06, "loss": 0.9064, "step": 6833 }, { "epoch": 0.8524385680429089, "grad_norm": 0.0, "learning_rate": 1.1205821871663825e-06, "loss": 0.9092, "step": 6834 }, { "epoch": 0.852563302981165, "grad_norm": 0.0, "learning_rate": 1.1187246377761563e-06, "loss": 0.9145, "step": 6835 }, { "epoch": 0.8526880379194213, "grad_norm": 0.0, "learning_rate": 1.1168685380339684e-06, "loss": 0.898, "step": 6836 }, { "epoch": 0.8528127728576774, "grad_norm": 0.0, "learning_rate": 1.1150138882427763e-06, "loss": 0.9444, "step": 6837 }, { "epoch": 0.8529375077959337, "grad_norm": 0.0, "learning_rate": 1.1131606887053058e-06, "loss": 0.9304, "step": 6838 }, { "epoch": 0.8530622427341898, "grad_norm": 0.0, "learning_rate": 1.1113089397240495e-06, "loss": 0.8863, "step": 6839 }, { "epoch": 0.853186977672446, "grad_norm": 0.0, "learning_rate": 1.1094586416012555e-06, "loss": 0.9343, "step": 6840 }, { "epoch": 0.8533117126107023, "grad_norm": 0.0, "learning_rate": 1.1076097946389398e-06, "loss": 0.8755, "step": 6841 }, { "epoch": 0.8534364475489584, "grad_norm": 0.0, "learning_rate": 1.1057623991388833e-06, "loss": 0.9597, "step": 6842 }, { "epoch": 0.8535611824872147, "grad_norm": 0.0, "learning_rate": 1.1039164554026217e-06, "loss": 0.9074, "step": 6843 }, { "epoch": 0.8536859174254708, "grad_norm": 0.0, "learning_rate": 1.102071963731468e-06, "loss": 0.9128, "step": 6844 }, { "epoch": 0.8538106523637271, "grad_norm": 0.0, "learning_rate": 1.100228924426483e-06, "loss": 0.9204, "step": 6845 }, { "epoch": 0.8539353873019833, "grad_norm": 0.0, "learning_rate": 1.098387337788499e-06, "loss": 0.9075, "step": 6846 }, { "epoch": 0.8540601222402395, "grad_norm": 0.0, "learning_rate": 1.0965472041181102e-06, "loss": 0.8952, "step": 6847 }, { "epoch": 0.8541848571784957, "grad_norm": 0.0, "learning_rate": 1.094708523715675e-06, "loss": 0.8625, "step": 6848 }, { "epoch": 0.854309592116752, "grad_norm": 0.0, "learning_rate": 1.092871296881307e-06, "loss": 0.883, "step": 6849 }, { "epoch": 0.8544343270550081, "grad_norm": 0.0, "learning_rate": 1.0910355239148917e-06, "loss": 0.8766, "step": 6850 }, { "epoch": 0.8545590619932644, "grad_norm": 0.0, "learning_rate": 1.089201205116074e-06, "loss": 0.902, "step": 6851 }, { "epoch": 0.8546837969315205, "grad_norm": 0.0, "learning_rate": 1.087368340784256e-06, "loss": 0.9283, "step": 6852 }, { "epoch": 0.8548085318697767, "grad_norm": 0.0, "learning_rate": 1.0855369312186148e-06, "loss": 0.8959, "step": 6853 }, { "epoch": 0.8549332668080329, "grad_norm": 0.0, "learning_rate": 1.0837069767180785e-06, "loss": 0.895, "step": 6854 }, { "epoch": 0.8550580017462891, "grad_norm": 0.0, "learning_rate": 1.0818784775813385e-06, "loss": 0.9043, "step": 6855 }, { "epoch": 0.8551827366845454, "grad_norm": 0.0, "learning_rate": 1.0800514341068592e-06, "loss": 0.8901, "step": 6856 }, { "epoch": 0.8553074716228015, "grad_norm": 0.0, "learning_rate": 1.078225846592854e-06, "loss": 0.8736, "step": 6857 }, { "epoch": 0.8554322065610578, "grad_norm": 0.0, "learning_rate": 1.0764017153373086e-06, "loss": 0.915, "step": 6858 }, { "epoch": 0.8555569414993139, "grad_norm": 0.0, "learning_rate": 1.0745790406379675e-06, "loss": 0.8981, "step": 6859 }, { "epoch": 0.8556816764375702, "grad_norm": 0.0, "learning_rate": 1.072757822792333e-06, "loss": 0.9034, "step": 6860 }, { "epoch": 0.8558064113758264, "grad_norm": 0.0, "learning_rate": 1.0709380620976762e-06, "loss": 0.8834, "step": 6861 }, { "epoch": 0.8559311463140826, "grad_norm": 0.0, "learning_rate": 1.0691197588510284e-06, "loss": 0.8909, "step": 6862 }, { "epoch": 0.8560558812523388, "grad_norm": 0.0, "learning_rate": 1.067302913349182e-06, "loss": 0.9016, "step": 6863 }, { "epoch": 0.8561806161905949, "grad_norm": 0.0, "learning_rate": 1.0654875258886921e-06, "loss": 0.9068, "step": 6864 }, { "epoch": 0.8563053511288512, "grad_norm": 0.0, "learning_rate": 1.0636735967658785e-06, "loss": 0.8573, "step": 6865 }, { "epoch": 0.8564300860671074, "grad_norm": 0.0, "learning_rate": 1.0618611262768152e-06, "loss": 0.8961, "step": 6866 }, { "epoch": 0.8565548210053636, "grad_norm": 0.0, "learning_rate": 1.0600501147173447e-06, "loss": 0.9083, "step": 6867 }, { "epoch": 0.8566795559436198, "grad_norm": 0.0, "learning_rate": 1.0582405623830726e-06, "loss": 0.9376, "step": 6868 }, { "epoch": 0.856804290881876, "grad_norm": 0.0, "learning_rate": 1.0564324695693572e-06, "loss": 0.8748, "step": 6869 }, { "epoch": 0.8569290258201322, "grad_norm": 0.0, "learning_rate": 1.0546258365713323e-06, "loss": 0.9256, "step": 6870 }, { "epoch": 0.8570537607583885, "grad_norm": 0.0, "learning_rate": 1.0528206636838834e-06, "loss": 0.8314, "step": 6871 }, { "epoch": 0.8571784956966446, "grad_norm": 0.0, "learning_rate": 1.0510169512016544e-06, "loss": 0.9014, "step": 6872 }, { "epoch": 0.8573032306349009, "grad_norm": 0.0, "learning_rate": 1.0492146994190667e-06, "loss": 0.9138, "step": 6873 }, { "epoch": 0.857427965573157, "grad_norm": 0.0, "learning_rate": 1.0474139086302848e-06, "loss": 0.8274, "step": 6874 }, { "epoch": 0.8575527005114133, "grad_norm": 0.0, "learning_rate": 1.0456145791292471e-06, "loss": 0.9244, "step": 6875 }, { "epoch": 0.8576774354496695, "grad_norm": 0.0, "learning_rate": 1.0438167112096508e-06, "loss": 0.9079, "step": 6876 }, { "epoch": 0.8578021703879256, "grad_norm": 0.0, "learning_rate": 1.042020305164949e-06, "loss": 0.8605, "step": 6877 }, { "epoch": 0.8579269053261819, "grad_norm": 0.0, "learning_rate": 1.040225361288364e-06, "loss": 0.8412, "step": 6878 }, { "epoch": 0.858051640264438, "grad_norm": 0.0, "learning_rate": 1.0384318798728732e-06, "loss": 0.8897, "step": 6879 }, { "epoch": 0.8581763752026943, "grad_norm": 0.0, "learning_rate": 1.0366398612112227e-06, "loss": 0.9044, "step": 6880 }, { "epoch": 0.8583011101409505, "grad_norm": 0.0, "learning_rate": 1.0348493055959064e-06, "loss": 0.8626, "step": 6881 }, { "epoch": 0.8584258450792067, "grad_norm": 0.0, "learning_rate": 1.0330602133191992e-06, "loss": 0.9261, "step": 6882 }, { "epoch": 0.8585505800174629, "grad_norm": 0.0, "learning_rate": 1.0312725846731174e-06, "loss": 0.8448, "step": 6883 }, { "epoch": 0.8586753149557191, "grad_norm": 0.0, "learning_rate": 1.0294864199494503e-06, "loss": 0.93, "step": 6884 }, { "epoch": 0.8588000498939753, "grad_norm": 0.0, "learning_rate": 1.0277017194397466e-06, "loss": 0.9158, "step": 6885 }, { "epoch": 0.8589247848322316, "grad_norm": 0.0, "learning_rate": 1.0259184834353108e-06, "loss": 0.8561, "step": 6886 }, { "epoch": 0.8590495197704877, "grad_norm": 0.0, "learning_rate": 1.0241367122272139e-06, "loss": 0.9068, "step": 6887 }, { "epoch": 0.8591742547087439, "grad_norm": 0.0, "learning_rate": 1.0223564061062884e-06, "loss": 0.8963, "step": 6888 }, { "epoch": 0.8592989896470001, "grad_norm": 0.0, "learning_rate": 1.0205775653631178e-06, "loss": 0.9327, "step": 6889 }, { "epoch": 0.8594237245852563, "grad_norm": 0.0, "learning_rate": 1.018800190288063e-06, "loss": 0.9215, "step": 6890 }, { "epoch": 0.8595484595235126, "grad_norm": 0.0, "learning_rate": 1.0170242811712295e-06, "loss": 0.8622, "step": 6891 }, { "epoch": 0.8596731944617687, "grad_norm": 0.0, "learning_rate": 1.0152498383024922e-06, "loss": 0.8792, "step": 6892 }, { "epoch": 0.859797929400025, "grad_norm": 0.0, "learning_rate": 1.0134768619714885e-06, "loss": 0.9126, "step": 6893 }, { "epoch": 0.8599226643382811, "grad_norm": 0.0, "learning_rate": 1.0117053524676078e-06, "loss": 0.9288, "step": 6894 }, { "epoch": 0.8600473992765374, "grad_norm": 0.0, "learning_rate": 1.009935310080007e-06, "loss": 0.9472, "step": 6895 }, { "epoch": 0.8601721342147935, "grad_norm": 0.0, "learning_rate": 1.0081667350976022e-06, "loss": 0.864, "step": 6896 }, { "epoch": 0.8602968691530498, "grad_norm": 0.0, "learning_rate": 1.0063996278090704e-06, "loss": 0.9097, "step": 6897 }, { "epoch": 0.860421604091306, "grad_norm": 0.0, "learning_rate": 1.0046339885028434e-06, "loss": 0.8675, "step": 6898 }, { "epoch": 0.8605463390295622, "grad_norm": 0.0, "learning_rate": 1.0028698174671248e-06, "loss": 0.8729, "step": 6899 }, { "epoch": 0.8606710739678184, "grad_norm": 0.0, "learning_rate": 1.0011071149898688e-06, "loss": 0.8943, "step": 6900 }, { "epoch": 0.8607958089060745, "grad_norm": 0.0, "learning_rate": 9.993458813587885e-07, "loss": 0.9296, "step": 6901 }, { "epoch": 0.8609205438443308, "grad_norm": 0.0, "learning_rate": 9.975861168613688e-07, "loss": 0.924, "step": 6902 }, { "epoch": 0.861045278782587, "grad_norm": 0.0, "learning_rate": 9.958278217848428e-07, "loss": 0.8943, "step": 6903 }, { "epoch": 0.8611700137208432, "grad_norm": 0.0, "learning_rate": 9.94070996416211e-07, "loss": 0.9084, "step": 6904 }, { "epoch": 0.8612947486590994, "grad_norm": 0.0, "learning_rate": 9.92315641042232e-07, "loss": 0.8974, "step": 6905 }, { "epoch": 0.8614194835973557, "grad_norm": 0.0, "learning_rate": 9.905617559494208e-07, "loss": 0.8789, "step": 6906 }, { "epoch": 0.8615442185356118, "grad_norm": 0.0, "learning_rate": 9.888093414240574e-07, "loss": 0.9129, "step": 6907 }, { "epoch": 0.8616689534738681, "grad_norm": 0.0, "learning_rate": 9.870583977521807e-07, "loss": 0.8843, "step": 6908 }, { "epoch": 0.8617936884121242, "grad_norm": 0.0, "learning_rate": 9.85308925219588e-07, "loss": 0.9082, "step": 6909 }, { "epoch": 0.8619184233503805, "grad_norm": 0.0, "learning_rate": 9.835609241118404e-07, "loss": 0.9198, "step": 6910 }, { "epoch": 0.8620431582886366, "grad_norm": 0.0, "learning_rate": 9.818143947142512e-07, "loss": 0.8787, "step": 6911 }, { "epoch": 0.8621678932268928, "grad_norm": 0.0, "learning_rate": 9.800693373118998e-07, "loss": 0.942, "step": 6912 }, { "epoch": 0.8622926281651491, "grad_norm": 0.0, "learning_rate": 9.783257521896228e-07, "loss": 0.9106, "step": 6913 }, { "epoch": 0.8624173631034052, "grad_norm": 0.0, "learning_rate": 9.765836396320216e-07, "loss": 0.8806, "step": 6914 }, { "epoch": 0.8625420980416615, "grad_norm": 0.0, "learning_rate": 9.748429999234444e-07, "loss": 0.9062, "step": 6915 }, { "epoch": 0.8626668329799176, "grad_norm": 0.0, "learning_rate": 9.73103833348017e-07, "loss": 0.851, "step": 6916 }, { "epoch": 0.8627915679181739, "grad_norm": 0.0, "learning_rate": 9.713661401896101e-07, "loss": 0.9581, "step": 6917 }, { "epoch": 0.8629163028564301, "grad_norm": 0.0, "learning_rate": 9.696299207318571e-07, "loss": 0.8685, "step": 6918 }, { "epoch": 0.8630410377946863, "grad_norm": 0.0, "learning_rate": 9.678951752581577e-07, "loss": 0.8783, "step": 6919 }, { "epoch": 0.8631657727329425, "grad_norm": 0.0, "learning_rate": 9.661619040516623e-07, "loss": 0.9038, "step": 6920 }, { "epoch": 0.8632905076711987, "grad_norm": 0.0, "learning_rate": 9.644301073952866e-07, "loss": 0.9334, "step": 6921 }, { "epoch": 0.8634152426094549, "grad_norm": 0.0, "learning_rate": 9.626997855717045e-07, "loss": 0.9104, "step": 6922 }, { "epoch": 0.8635399775477112, "grad_norm": 0.0, "learning_rate": 9.609709388633447e-07, "loss": 0.8893, "step": 6923 }, { "epoch": 0.8636647124859673, "grad_norm": 0.0, "learning_rate": 9.592435675523991e-07, "loss": 0.9286, "step": 6924 }, { "epoch": 0.8637894474242235, "grad_norm": 0.0, "learning_rate": 9.575176719208211e-07, "loss": 0.8489, "step": 6925 }, { "epoch": 0.8639141823624797, "grad_norm": 0.0, "learning_rate": 9.557932522503176e-07, "loss": 0.8833, "step": 6926 }, { "epoch": 0.8640389173007359, "grad_norm": 0.0, "learning_rate": 9.540703088223614e-07, "loss": 0.9026, "step": 6927 }, { "epoch": 0.8641636522389922, "grad_norm": 0.0, "learning_rate": 9.523488419181737e-07, "loss": 0.9375, "step": 6928 }, { "epoch": 0.8642883871772483, "grad_norm": 0.0, "learning_rate": 9.506288518187468e-07, "loss": 0.8773, "step": 6929 }, { "epoch": 0.8644131221155046, "grad_norm": 0.0, "learning_rate": 9.489103388048238e-07, "loss": 0.8814, "step": 6930 }, { "epoch": 0.8645378570537607, "grad_norm": 0.0, "learning_rate": 9.471933031569125e-07, "loss": 0.8873, "step": 6931 }, { "epoch": 0.864662591992017, "grad_norm": 0.0, "learning_rate": 9.45477745155271e-07, "loss": 0.86, "step": 6932 }, { "epoch": 0.8647873269302732, "grad_norm": 0.0, "learning_rate": 9.437636650799264e-07, "loss": 0.9326, "step": 6933 }, { "epoch": 0.8649120618685294, "grad_norm": 0.0, "learning_rate": 9.420510632106594e-07, "loss": 0.9104, "step": 6934 }, { "epoch": 0.8650367968067856, "grad_norm": 0.0, "learning_rate": 9.403399398270041e-07, "loss": 0.8711, "step": 6935 }, { "epoch": 0.8651615317450417, "grad_norm": 0.0, "learning_rate": 9.386302952082671e-07, "loss": 0.9023, "step": 6936 }, { "epoch": 0.865286266683298, "grad_norm": 0.0, "learning_rate": 9.369221296335007e-07, "loss": 0.9024, "step": 6937 }, { "epoch": 0.8654110016215542, "grad_norm": 0.0, "learning_rate": 9.352154433815209e-07, "loss": 0.9198, "step": 6938 }, { "epoch": 0.8655357365598104, "grad_norm": 0.0, "learning_rate": 9.335102367309057e-07, "loss": 0.8641, "step": 6939 }, { "epoch": 0.8656604714980666, "grad_norm": 0.0, "learning_rate": 9.318065099599815e-07, "loss": 0.9374, "step": 6940 }, { "epoch": 0.8657852064363228, "grad_norm": 0.0, "learning_rate": 9.301042633468438e-07, "loss": 0.9136, "step": 6941 }, { "epoch": 0.865909941374579, "grad_norm": 0.0, "learning_rate": 9.2840349716934e-07, "loss": 0.9501, "step": 6942 }, { "epoch": 0.8660346763128353, "grad_norm": 0.0, "learning_rate": 9.267042117050817e-07, "loss": 0.9623, "step": 6943 }, { "epoch": 0.8661594112510914, "grad_norm": 0.0, "learning_rate": 9.250064072314302e-07, "loss": 0.9139, "step": 6944 }, { "epoch": 0.8662841461893477, "grad_norm": 0.0, "learning_rate": 9.233100840255127e-07, "loss": 0.889, "step": 6945 }, { "epoch": 0.8664088811276038, "grad_norm": 0.0, "learning_rate": 9.216152423642122e-07, "loss": 0.925, "step": 6946 }, { "epoch": 0.8665336160658601, "grad_norm": 0.0, "learning_rate": 9.199218825241673e-07, "loss": 0.9498, "step": 6947 }, { "epoch": 0.8666583510041163, "grad_norm": 0.0, "learning_rate": 9.182300047817816e-07, "loss": 0.8929, "step": 6948 }, { "epoch": 0.8667830859423724, "grad_norm": 0.0, "learning_rate": 9.165396094132073e-07, "loss": 0.9379, "step": 6949 }, { "epoch": 0.8669078208806287, "grad_norm": 0.0, "learning_rate": 9.148506966943615e-07, "loss": 0.8752, "step": 6950 }, { "epoch": 0.8670325558188848, "grad_norm": 0.0, "learning_rate": 9.131632669009194e-07, "loss": 0.871, "step": 6951 }, { "epoch": 0.8671572907571411, "grad_norm": 0.0, "learning_rate": 9.114773203083061e-07, "loss": 0.8757, "step": 6952 }, { "epoch": 0.8672820256953973, "grad_norm": 0.0, "learning_rate": 9.097928571917181e-07, "loss": 0.9036, "step": 6953 }, { "epoch": 0.8674067606336535, "grad_norm": 0.0, "learning_rate": 9.081098778260966e-07, "loss": 0.9284, "step": 6954 }, { "epoch": 0.8675314955719097, "grad_norm": 0.0, "learning_rate": 9.064283824861486e-07, "loss": 0.8183, "step": 6955 }, { "epoch": 0.8676562305101659, "grad_norm": 0.0, "learning_rate": 9.047483714463379e-07, "loss": 0.9764, "step": 6956 }, { "epoch": 0.8677809654484221, "grad_norm": 0.0, "learning_rate": 9.030698449808805e-07, "loss": 0.8869, "step": 6957 }, { "epoch": 0.8679057003866784, "grad_norm": 0.0, "learning_rate": 9.013928033637576e-07, "loss": 0.9355, "step": 6958 }, { "epoch": 0.8680304353249345, "grad_norm": 0.0, "learning_rate": 8.997172468687033e-07, "loss": 0.8907, "step": 6959 }, { "epoch": 0.8681551702631907, "grad_norm": 0.0, "learning_rate": 8.980431757692132e-07, "loss": 0.9041, "step": 6960 }, { "epoch": 0.8682799052014469, "grad_norm": 0.0, "learning_rate": 8.963705903385344e-07, "loss": 0.8934, "step": 6961 }, { "epoch": 0.8684046401397031, "grad_norm": 0.0, "learning_rate": 8.946994908496753e-07, "loss": 0.8377, "step": 6962 }, { "epoch": 0.8685293750779594, "grad_norm": 0.0, "learning_rate": 8.930298775754054e-07, "loss": 0.8507, "step": 6963 }, { "epoch": 0.8686541100162155, "grad_norm": 0.0, "learning_rate": 8.91361750788241e-07, "loss": 0.8969, "step": 6964 }, { "epoch": 0.8687788449544718, "grad_norm": 0.0, "learning_rate": 8.896951107604701e-07, "loss": 0.9294, "step": 6965 }, { "epoch": 0.8689035798927279, "grad_norm": 0.0, "learning_rate": 8.880299577641249e-07, "loss": 0.9186, "step": 6966 }, { "epoch": 0.8690283148309842, "grad_norm": 0.0, "learning_rate": 8.863662920710015e-07, "loss": 0.918, "step": 6967 }, { "epoch": 0.8691530497692403, "grad_norm": 0.0, "learning_rate": 8.847041139526546e-07, "loss": 0.9162, "step": 6968 }, { "epoch": 0.8692777847074966, "grad_norm": 0.0, "learning_rate": 8.830434236803909e-07, "loss": 0.9074, "step": 6969 }, { "epoch": 0.8694025196457528, "grad_norm": 0.0, "learning_rate": 8.813842215252766e-07, "loss": 0.8985, "step": 6970 }, { "epoch": 0.869527254584009, "grad_norm": 0.0, "learning_rate": 8.797265077581362e-07, "loss": 0.9525, "step": 6971 }, { "epoch": 0.8696519895222652, "grad_norm": 0.0, "learning_rate": 8.780702826495502e-07, "loss": 0.8857, "step": 6972 }, { "epoch": 0.8697767244605213, "grad_norm": 0.0, "learning_rate": 8.764155464698598e-07, "loss": 0.9371, "step": 6973 }, { "epoch": 0.8699014593987776, "grad_norm": 0.0, "learning_rate": 8.747622994891536e-07, "loss": 0.9085, "step": 6974 }, { "epoch": 0.8700261943370338, "grad_norm": 0.0, "learning_rate": 8.731105419772868e-07, "loss": 0.9053, "step": 6975 }, { "epoch": 0.87015092927529, "grad_norm": 0.0, "learning_rate": 8.714602742038658e-07, "loss": 0.8931, "step": 6976 }, { "epoch": 0.8702756642135462, "grad_norm": 0.0, "learning_rate": 8.698114964382598e-07, "loss": 0.9657, "step": 6977 }, { "epoch": 0.8704003991518025, "grad_norm": 0.0, "learning_rate": 8.681642089495867e-07, "loss": 0.9253, "step": 6978 }, { "epoch": 0.8705251340900586, "grad_norm": 0.0, "learning_rate": 8.665184120067271e-07, "loss": 0.9063, "step": 6979 }, { "epoch": 0.8706498690283149, "grad_norm": 0.0, "learning_rate": 8.648741058783184e-07, "loss": 0.8842, "step": 6980 }, { "epoch": 0.870774603966571, "grad_norm": 0.0, "learning_rate": 8.632312908327467e-07, "loss": 0.8707, "step": 6981 }, { "epoch": 0.8708993389048273, "grad_norm": 0.0, "learning_rate": 8.615899671381689e-07, "loss": 0.8978, "step": 6982 }, { "epoch": 0.8710240738430834, "grad_norm": 0.0, "learning_rate": 8.599501350624861e-07, "loss": 0.901, "step": 6983 }, { "epoch": 0.8711488087813396, "grad_norm": 0.0, "learning_rate": 8.583117948733566e-07, "loss": 0.921, "step": 6984 }, { "epoch": 0.8712735437195959, "grad_norm": 0.0, "learning_rate": 8.566749468382074e-07, "loss": 0.8723, "step": 6985 }, { "epoch": 0.871398278657852, "grad_norm": 0.0, "learning_rate": 8.55039591224206e-07, "loss": 0.9148, "step": 6986 }, { "epoch": 0.8715230135961083, "grad_norm": 0.0, "learning_rate": 8.534057282982877e-07, "loss": 0.8554, "step": 6987 }, { "epoch": 0.8716477485343644, "grad_norm": 0.0, "learning_rate": 8.517733583271382e-07, "loss": 0.8634, "step": 6988 }, { "epoch": 0.8717724834726207, "grad_norm": 0.0, "learning_rate": 8.501424815772041e-07, "loss": 0.8788, "step": 6989 }, { "epoch": 0.8718972184108769, "grad_norm": 0.0, "learning_rate": 8.485130983146816e-07, "loss": 0.8947, "step": 6990 }, { "epoch": 0.8720219533491331, "grad_norm": 0.0, "learning_rate": 8.468852088055291e-07, "loss": 0.8321, "step": 6991 }, { "epoch": 0.8721466882873893, "grad_norm": 0.0, "learning_rate": 8.452588133154593e-07, "loss": 0.9384, "step": 6992 }, { "epoch": 0.8722714232256455, "grad_norm": 0.0, "learning_rate": 8.436339121099413e-07, "loss": 0.8862, "step": 6993 }, { "epoch": 0.8723961581639017, "grad_norm": 0.0, "learning_rate": 8.420105054542005e-07, "loss": 0.9008, "step": 6994 }, { "epoch": 0.872520893102158, "grad_norm": 0.0, "learning_rate": 8.40388593613215e-07, "loss": 0.911, "step": 6995 }, { "epoch": 0.8726456280404141, "grad_norm": 0.0, "learning_rate": 8.387681768517242e-07, "loss": 0.9297, "step": 6996 }, { "epoch": 0.8727703629786703, "grad_norm": 0.0, "learning_rate": 8.37149255434222e-07, "loss": 0.8583, "step": 6997 }, { "epoch": 0.8728950979169265, "grad_norm": 0.0, "learning_rate": 8.355318296249515e-07, "loss": 0.888, "step": 6998 }, { "epoch": 0.8730198328551827, "grad_norm": 0.0, "learning_rate": 8.339158996879248e-07, "loss": 0.9295, "step": 6999 }, { "epoch": 0.873144567793439, "grad_norm": 0.0, "learning_rate": 8.323014658869e-07, "loss": 0.9154, "step": 7000 }, { "epoch": 0.8732693027316951, "grad_norm": 0.0, "learning_rate": 8.306885284853872e-07, "loss": 0.8878, "step": 7001 }, { "epoch": 0.8733940376699514, "grad_norm": 0.0, "learning_rate": 8.29077087746668e-07, "loss": 0.8918, "step": 7002 }, { "epoch": 0.8735187726082075, "grad_norm": 0.0, "learning_rate": 8.274671439337645e-07, "loss": 0.8781, "step": 7003 }, { "epoch": 0.8736435075464638, "grad_norm": 0.0, "learning_rate": 8.258586973094606e-07, "loss": 0.8833, "step": 7004 }, { "epoch": 0.87376824248472, "grad_norm": 0.0, "learning_rate": 8.242517481362966e-07, "loss": 0.8957, "step": 7005 }, { "epoch": 0.8738929774229762, "grad_norm": 0.0, "learning_rate": 8.226462966765681e-07, "loss": 0.9262, "step": 7006 }, { "epoch": 0.8740177123612324, "grad_norm": 0.0, "learning_rate": 8.21042343192322e-07, "loss": 0.9017, "step": 7007 }, { "epoch": 0.8741424472994885, "grad_norm": 0.0, "learning_rate": 8.194398879453647e-07, "loss": 0.9593, "step": 7008 }, { "epoch": 0.8742671822377448, "grad_norm": 0.0, "learning_rate": 8.178389311972612e-07, "loss": 0.9225, "step": 7009 }, { "epoch": 0.874391917176001, "grad_norm": 0.0, "learning_rate": 8.162394732093215e-07, "loss": 0.9477, "step": 7010 }, { "epoch": 0.8745166521142572, "grad_norm": 0.0, "learning_rate": 8.146415142426245e-07, "loss": 0.9166, "step": 7011 }, { "epoch": 0.8746413870525134, "grad_norm": 0.0, "learning_rate": 8.130450545579926e-07, "loss": 0.8976, "step": 7012 }, { "epoch": 0.8747661219907696, "grad_norm": 0.0, "learning_rate": 8.114500944160086e-07, "loss": 0.859, "step": 7013 }, { "epoch": 0.8748908569290258, "grad_norm": 0.0, "learning_rate": 8.098566340770131e-07, "loss": 0.9434, "step": 7014 }, { "epoch": 0.8750155918672821, "grad_norm": 0.0, "learning_rate": 8.082646738010957e-07, "loss": 0.8747, "step": 7015 }, { "epoch": 0.8751403268055382, "grad_norm": 0.0, "learning_rate": 8.066742138481066e-07, "loss": 0.8966, "step": 7016 }, { "epoch": 0.8752650617437945, "grad_norm": 0.0, "learning_rate": 8.050852544776489e-07, "loss": 0.8715, "step": 7017 }, { "epoch": 0.8753897966820506, "grad_norm": 0.0, "learning_rate": 8.034977959490775e-07, "loss": 0.8961, "step": 7018 }, { "epoch": 0.8755145316203069, "grad_norm": 0.0, "learning_rate": 8.019118385215107e-07, "loss": 0.876, "step": 7019 }, { "epoch": 0.8756392665585631, "grad_norm": 0.0, "learning_rate": 8.003273824538138e-07, "loss": 0.9036, "step": 7020 }, { "epoch": 0.8757640014968192, "grad_norm": 0.0, "learning_rate": 7.987444280046098e-07, "loss": 0.9325, "step": 7021 }, { "epoch": 0.8758887364350755, "grad_norm": 0.0, "learning_rate": 7.971629754322774e-07, "loss": 0.8872, "step": 7022 }, { "epoch": 0.8760134713733316, "grad_norm": 0.0, "learning_rate": 7.955830249949525e-07, "loss": 0.9262, "step": 7023 }, { "epoch": 0.8761382063115879, "grad_norm": 0.0, "learning_rate": 7.940045769505167e-07, "loss": 0.9182, "step": 7024 }, { "epoch": 0.876262941249844, "grad_norm": 0.0, "learning_rate": 7.924276315566171e-07, "loss": 0.9059, "step": 7025 }, { "epoch": 0.8763876761881003, "grad_norm": 0.0, "learning_rate": 7.9085218907065e-07, "loss": 0.9152, "step": 7026 }, { "epoch": 0.8765124111263565, "grad_norm": 0.0, "learning_rate": 7.892782497497642e-07, "loss": 0.8744, "step": 7027 }, { "epoch": 0.8766371460646127, "grad_norm": 0.0, "learning_rate": 7.877058138508731e-07, "loss": 0.9161, "step": 7028 }, { "epoch": 0.8767618810028689, "grad_norm": 0.0, "learning_rate": 7.861348816306314e-07, "loss": 0.8621, "step": 7029 }, { "epoch": 0.8768866159411252, "grad_norm": 0.0, "learning_rate": 7.845654533454572e-07, "loss": 0.8915, "step": 7030 }, { "epoch": 0.8770113508793813, "grad_norm": 0.0, "learning_rate": 7.829975292515224e-07, "loss": 0.9104, "step": 7031 }, { "epoch": 0.8771360858176375, "grad_norm": 0.0, "learning_rate": 7.814311096047478e-07, "loss": 0.941, "step": 7032 }, { "epoch": 0.8772608207558937, "grad_norm": 0.0, "learning_rate": 7.798661946608166e-07, "loss": 0.9021, "step": 7033 }, { "epoch": 0.8773855556941499, "grad_norm": 0.0, "learning_rate": 7.783027846751611e-07, "loss": 0.9334, "step": 7034 }, { "epoch": 0.8775102906324062, "grad_norm": 0.0, "learning_rate": 7.767408799029675e-07, "loss": 0.8856, "step": 7035 }, { "epoch": 0.8776350255706623, "grad_norm": 0.0, "learning_rate": 7.751804805991792e-07, "loss": 0.9069, "step": 7036 }, { "epoch": 0.8777597605089186, "grad_norm": 0.0, "learning_rate": 7.736215870184927e-07, "loss": 0.869, "step": 7037 }, { "epoch": 0.8778844954471747, "grad_norm": 0.0, "learning_rate": 7.720641994153588e-07, "loss": 0.8713, "step": 7038 }, { "epoch": 0.878009230385431, "grad_norm": 0.0, "learning_rate": 7.705083180439832e-07, "loss": 0.9099, "step": 7039 }, { "epoch": 0.8781339653236871, "grad_norm": 0.0, "learning_rate": 7.68953943158326e-07, "loss": 0.8793, "step": 7040 }, { "epoch": 0.8782587002619434, "grad_norm": 0.0, "learning_rate": 7.674010750120964e-07, "loss": 0.9462, "step": 7041 }, { "epoch": 0.8783834352001996, "grad_norm": 0.0, "learning_rate": 7.65849713858764e-07, "loss": 0.9359, "step": 7042 }, { "epoch": 0.8785081701384558, "grad_norm": 0.0, "learning_rate": 7.642998599515517e-07, "loss": 0.8695, "step": 7043 }, { "epoch": 0.878632905076712, "grad_norm": 0.0, "learning_rate": 7.627515135434293e-07, "loss": 0.9376, "step": 7044 }, { "epoch": 0.8787576400149681, "grad_norm": 0.0, "learning_rate": 7.612046748871327e-07, "loss": 0.8557, "step": 7045 }, { "epoch": 0.8788823749532244, "grad_norm": 0.0, "learning_rate": 7.59659344235143e-07, "loss": 0.9316, "step": 7046 }, { "epoch": 0.8790071098914806, "grad_norm": 0.0, "learning_rate": 7.581155218396918e-07, "loss": 0.8795, "step": 7047 }, { "epoch": 0.8791318448297368, "grad_norm": 0.0, "learning_rate": 7.565732079527765e-07, "loss": 0.8967, "step": 7048 }, { "epoch": 0.879256579767993, "grad_norm": 0.0, "learning_rate": 7.550324028261391e-07, "loss": 0.9366, "step": 7049 }, { "epoch": 0.8793813147062493, "grad_norm": 0.0, "learning_rate": 7.534931067112772e-07, "loss": 0.908, "step": 7050 }, { "epoch": 0.8795060496445054, "grad_norm": 0.0, "learning_rate": 7.519553198594453e-07, "loss": 0.8828, "step": 7051 }, { "epoch": 0.8796307845827617, "grad_norm": 0.0, "learning_rate": 7.504190425216451e-07, "loss": 0.8864, "step": 7052 }, { "epoch": 0.8797555195210178, "grad_norm": 0.0, "learning_rate": 7.48884274948638e-07, "loss": 0.9045, "step": 7053 }, { "epoch": 0.8798802544592741, "grad_norm": 0.0, "learning_rate": 7.47351017390936e-07, "loss": 0.8915, "step": 7054 }, { "epoch": 0.8800049893975302, "grad_norm": 0.0, "learning_rate": 7.458192700988065e-07, "loss": 0.8924, "step": 7055 }, { "epoch": 0.8801297243357864, "grad_norm": 0.0, "learning_rate": 7.442890333222685e-07, "loss": 0.9038, "step": 7056 }, { "epoch": 0.8802544592740427, "grad_norm": 0.0, "learning_rate": 7.427603073110967e-07, "loss": 0.8641, "step": 7057 }, { "epoch": 0.8803791942122988, "grad_norm": 0.0, "learning_rate": 7.412330923148148e-07, "loss": 0.8745, "step": 7058 }, { "epoch": 0.8805039291505551, "grad_norm": 0.0, "learning_rate": 7.397073885827055e-07, "loss": 0.8628, "step": 7059 }, { "epoch": 0.8806286640888112, "grad_norm": 0.0, "learning_rate": 7.381831963638019e-07, "loss": 0.9094, "step": 7060 }, { "epoch": 0.8807533990270675, "grad_norm": 0.0, "learning_rate": 7.366605159068874e-07, "loss": 0.9138, "step": 7061 }, { "epoch": 0.8808781339653237, "grad_norm": 0.0, "learning_rate": 7.351393474605051e-07, "loss": 0.9378, "step": 7062 }, { "epoch": 0.8810028689035799, "grad_norm": 0.0, "learning_rate": 7.336196912729488e-07, "loss": 0.8596, "step": 7063 }, { "epoch": 0.8811276038418361, "grad_norm": 0.0, "learning_rate": 7.321015475922599e-07, "loss": 0.8884, "step": 7064 }, { "epoch": 0.8812523387800923, "grad_norm": 0.0, "learning_rate": 7.305849166662426e-07, "loss": 0.9192, "step": 7065 }, { "epoch": 0.8813770737183485, "grad_norm": 0.0, "learning_rate": 7.290697987424477e-07, "loss": 0.9202, "step": 7066 }, { "epoch": 0.8815018086566048, "grad_norm": 0.0, "learning_rate": 7.275561940681796e-07, "loss": 0.9148, "step": 7067 }, { "epoch": 0.8816265435948609, "grad_norm": 0.0, "learning_rate": 7.260441028904996e-07, "loss": 0.869, "step": 7068 }, { "epoch": 0.8817512785331171, "grad_norm": 0.0, "learning_rate": 7.245335254562147e-07, "loss": 0.9158, "step": 7069 }, { "epoch": 0.8818760134713733, "grad_norm": 0.0, "learning_rate": 7.23024462011892e-07, "loss": 0.9235, "step": 7070 }, { "epoch": 0.8820007484096295, "grad_norm": 0.0, "learning_rate": 7.21516912803849e-07, "loss": 0.9076, "step": 7071 }, { "epoch": 0.8821254833478858, "grad_norm": 0.0, "learning_rate": 7.200108780781556e-07, "loss": 0.9267, "step": 7072 }, { "epoch": 0.8822502182861419, "grad_norm": 0.0, "learning_rate": 7.185063580806318e-07, "loss": 0.96, "step": 7073 }, { "epoch": 0.8823749532243982, "grad_norm": 0.0, "learning_rate": 7.170033530568587e-07, "loss": 0.9198, "step": 7074 }, { "epoch": 0.8824996881626543, "grad_norm": 0.0, "learning_rate": 7.155018632521593e-07, "loss": 0.896, "step": 7075 }, { "epoch": 0.8826244231009106, "grad_norm": 0.0, "learning_rate": 7.140018889116163e-07, "loss": 0.905, "step": 7076 }, { "epoch": 0.8827491580391668, "grad_norm": 0.0, "learning_rate": 7.12503430280066e-07, "loss": 0.8942, "step": 7077 }, { "epoch": 0.882873892977423, "grad_norm": 0.0, "learning_rate": 7.110064876020906e-07, "loss": 0.9353, "step": 7078 }, { "epoch": 0.8829986279156792, "grad_norm": 0.0, "learning_rate": 7.095110611220312e-07, "loss": 0.8922, "step": 7079 }, { "epoch": 0.8831233628539353, "grad_norm": 0.0, "learning_rate": 7.080171510839795e-07, "loss": 0.8476, "step": 7080 }, { "epoch": 0.8832480977921916, "grad_norm": 0.0, "learning_rate": 7.065247577317747e-07, "loss": 0.8553, "step": 7081 }, { "epoch": 0.8833728327304478, "grad_norm": 0.0, "learning_rate": 7.050338813090207e-07, "loss": 0.922, "step": 7082 }, { "epoch": 0.883497567668704, "grad_norm": 0.0, "learning_rate": 7.035445220590597e-07, "loss": 0.9053, "step": 7083 }, { "epoch": 0.8836223026069602, "grad_norm": 0.0, "learning_rate": 7.02056680224995e-07, "loss": 0.8988, "step": 7084 }, { "epoch": 0.8837470375452164, "grad_norm": 0.0, "learning_rate": 7.005703560496823e-07, "loss": 0.8892, "step": 7085 }, { "epoch": 0.8838717724834726, "grad_norm": 0.0, "learning_rate": 6.990855497757221e-07, "loss": 0.9113, "step": 7086 }, { "epoch": 0.8839965074217289, "grad_norm": 0.0, "learning_rate": 6.976022616454759e-07, "loss": 0.9485, "step": 7087 }, { "epoch": 0.884121242359985, "grad_norm": 0.0, "learning_rate": 6.961204919010523e-07, "loss": 0.9455, "step": 7088 }, { "epoch": 0.8842459772982413, "grad_norm": 0.0, "learning_rate": 6.946402407843156e-07, "loss": 0.9347, "step": 7089 }, { "epoch": 0.8843707122364974, "grad_norm": 0.0, "learning_rate": 6.931615085368748e-07, "loss": 0.9015, "step": 7090 }, { "epoch": 0.8844954471747537, "grad_norm": 0.0, "learning_rate": 6.916842954001035e-07, "loss": 0.8859, "step": 7091 }, { "epoch": 0.8846201821130099, "grad_norm": 0.0, "learning_rate": 6.902086016151166e-07, "loss": 0.9246, "step": 7092 }, { "epoch": 0.884744917051266, "grad_norm": 0.0, "learning_rate": 6.887344274227803e-07, "loss": 0.9216, "step": 7093 }, { "epoch": 0.8848696519895223, "grad_norm": 0.0, "learning_rate": 6.872617730637255e-07, "loss": 0.9205, "step": 7094 }, { "epoch": 0.8849943869277784, "grad_norm": 0.0, "learning_rate": 6.857906387783197e-07, "loss": 0.9383, "step": 7095 }, { "epoch": 0.8851191218660347, "grad_norm": 0.0, "learning_rate": 6.843210248066922e-07, "loss": 0.8575, "step": 7096 }, { "epoch": 0.8852438568042909, "grad_norm": 0.0, "learning_rate": 6.828529313887211e-07, "loss": 0.8909, "step": 7097 }, { "epoch": 0.8853685917425471, "grad_norm": 0.0, "learning_rate": 6.813863587640345e-07, "loss": 0.857, "step": 7098 }, { "epoch": 0.8854933266808033, "grad_norm": 0.0, "learning_rate": 6.799213071720156e-07, "loss": 0.8616, "step": 7099 }, { "epoch": 0.8856180616190595, "grad_norm": 0.0, "learning_rate": 6.784577768517963e-07, "loss": 0.9264, "step": 7100 }, { "epoch": 0.8857427965573157, "grad_norm": 0.0, "learning_rate": 6.769957680422634e-07, "loss": 0.8783, "step": 7101 }, { "epoch": 0.885867531495572, "grad_norm": 0.0, "learning_rate": 6.755352809820537e-07, "loss": 0.8784, "step": 7102 }, { "epoch": 0.8859922664338281, "grad_norm": 0.0, "learning_rate": 6.740763159095532e-07, "loss": 0.8801, "step": 7103 }, { "epoch": 0.8861170013720843, "grad_norm": 0.0, "learning_rate": 6.726188730629035e-07, "loss": 0.8634, "step": 7104 }, { "epoch": 0.8862417363103405, "grad_norm": 0.0, "learning_rate": 6.711629526799946e-07, "loss": 0.9062, "step": 7105 }, { "epoch": 0.8863664712485967, "grad_norm": 0.0, "learning_rate": 6.697085549984727e-07, "loss": 0.8676, "step": 7106 }, { "epoch": 0.886491206186853, "grad_norm": 0.0, "learning_rate": 6.682556802557272e-07, "loss": 0.8642, "step": 7107 }, { "epoch": 0.8866159411251091, "grad_norm": 0.0, "learning_rate": 6.668043286889092e-07, "loss": 0.9215, "step": 7108 }, { "epoch": 0.8867406760633654, "grad_norm": 0.0, "learning_rate": 6.653545005349138e-07, "loss": 0.9014, "step": 7109 }, { "epoch": 0.8868654110016215, "grad_norm": 0.0, "learning_rate": 6.63906196030385e-07, "loss": 0.8935, "step": 7110 }, { "epoch": 0.8869901459398778, "grad_norm": 0.0, "learning_rate": 6.624594154117303e-07, "loss": 0.9067, "step": 7111 }, { "epoch": 0.887114880878134, "grad_norm": 0.0, "learning_rate": 6.610141589150953e-07, "loss": 0.8867, "step": 7112 }, { "epoch": 0.8872396158163902, "grad_norm": 0.0, "learning_rate": 6.595704267763848e-07, "loss": 0.9005, "step": 7113 }, { "epoch": 0.8873643507546464, "grad_norm": 0.0, "learning_rate": 6.581282192312521e-07, "loss": 0.9888, "step": 7114 }, { "epoch": 0.8874890856929026, "grad_norm": 0.0, "learning_rate": 6.566875365151004e-07, "loss": 0.8895, "step": 7115 }, { "epoch": 0.8876138206311588, "grad_norm": 0.0, "learning_rate": 6.552483788630858e-07, "loss": 0.8679, "step": 7116 }, { "epoch": 0.8877385555694149, "grad_norm": 0.0, "learning_rate": 6.538107465101162e-07, "loss": 0.8807, "step": 7117 }, { "epoch": 0.8878632905076712, "grad_norm": 0.0, "learning_rate": 6.523746396908493e-07, "loss": 0.9327, "step": 7118 }, { "epoch": 0.8879880254459274, "grad_norm": 0.0, "learning_rate": 6.50940058639693e-07, "loss": 0.9099, "step": 7119 }, { "epoch": 0.8881127603841836, "grad_norm": 0.0, "learning_rate": 6.495070035908068e-07, "loss": 0.8917, "step": 7120 }, { "epoch": 0.8882374953224398, "grad_norm": 0.0, "learning_rate": 6.480754747781037e-07, "loss": 0.8744, "step": 7121 }, { "epoch": 0.888362230260696, "grad_norm": 0.0, "learning_rate": 6.466454724352422e-07, "loss": 0.8649, "step": 7122 }, { "epoch": 0.8884869651989522, "grad_norm": 0.0, "learning_rate": 6.45216996795639e-07, "loss": 0.8668, "step": 7123 }, { "epoch": 0.8886117001372085, "grad_norm": 0.0, "learning_rate": 6.437900480924519e-07, "loss": 0.8683, "step": 7124 }, { "epoch": 0.8887364350754646, "grad_norm": 0.0, "learning_rate": 6.423646265585993e-07, "loss": 0.9056, "step": 7125 }, { "epoch": 0.8888611700137209, "grad_norm": 0.0, "learning_rate": 6.409407324267448e-07, "loss": 0.9306, "step": 7126 }, { "epoch": 0.888985904951977, "grad_norm": 0.0, "learning_rate": 6.395183659293003e-07, "loss": 0.8771, "step": 7127 }, { "epoch": 0.8891106398902332, "grad_norm": 0.0, "learning_rate": 6.38097527298438e-07, "loss": 0.8783, "step": 7128 }, { "epoch": 0.8892353748284895, "grad_norm": 0.0, "learning_rate": 6.36678216766069e-07, "loss": 0.928, "step": 7129 }, { "epoch": 0.8893601097667456, "grad_norm": 0.0, "learning_rate": 6.352604345638647e-07, "loss": 0.849, "step": 7130 }, { "epoch": 0.8894848447050019, "grad_norm": 0.0, "learning_rate": 6.33844180923242e-07, "loss": 0.8808, "step": 7131 }, { "epoch": 0.889609579643258, "grad_norm": 0.0, "learning_rate": 6.324294560753663e-07, "loss": 0.8724, "step": 7132 }, { "epoch": 0.8897343145815143, "grad_norm": 0.0, "learning_rate": 6.310162602511593e-07, "loss": 0.9039, "step": 7133 }, { "epoch": 0.8898590495197705, "grad_norm": 0.0, "learning_rate": 6.296045936812889e-07, "loss": 0.912, "step": 7134 }, { "epoch": 0.8899837844580267, "grad_norm": 0.0, "learning_rate": 6.281944565961772e-07, "loss": 0.9116, "step": 7135 }, { "epoch": 0.8901085193962829, "grad_norm": 0.0, "learning_rate": 6.267858492259904e-07, "loss": 0.9256, "step": 7136 }, { "epoch": 0.8902332543345391, "grad_norm": 0.0, "learning_rate": 6.253787718006499e-07, "loss": 0.9292, "step": 7137 }, { "epoch": 0.8903579892727953, "grad_norm": 0.0, "learning_rate": 6.239732245498287e-07, "loss": 0.9062, "step": 7138 }, { "epoch": 0.8904827242110516, "grad_norm": 0.0, "learning_rate": 6.225692077029421e-07, "loss": 0.9039, "step": 7139 }, { "epoch": 0.8906074591493077, "grad_norm": 0.0, "learning_rate": 6.211667214891682e-07, "loss": 0.8733, "step": 7140 }, { "epoch": 0.8907321940875639, "grad_norm": 0.0, "learning_rate": 6.197657661374223e-07, "loss": 0.895, "step": 7141 }, { "epoch": 0.8908569290258201, "grad_norm": 0.0, "learning_rate": 6.183663418763774e-07, "loss": 0.865, "step": 7142 }, { "epoch": 0.8909816639640763, "grad_norm": 0.0, "learning_rate": 6.169684489344574e-07, "loss": 0.8622, "step": 7143 }, { "epoch": 0.8911063989023326, "grad_norm": 0.0, "learning_rate": 6.155720875398297e-07, "loss": 0.8816, "step": 7144 }, { "epoch": 0.8912311338405887, "grad_norm": 0.0, "learning_rate": 6.141772579204175e-07, "loss": 0.9683, "step": 7145 }, { "epoch": 0.891355868778845, "grad_norm": 0.0, "learning_rate": 6.12783960303892e-07, "loss": 0.9106, "step": 7146 }, { "epoch": 0.8914806037171011, "grad_norm": 0.0, "learning_rate": 6.113921949176749e-07, "loss": 0.9217, "step": 7147 }, { "epoch": 0.8916053386553574, "grad_norm": 0.0, "learning_rate": 6.100019619889374e-07, "loss": 0.964, "step": 7148 }, { "epoch": 0.8917300735936136, "grad_norm": 0.0, "learning_rate": 6.086132617445995e-07, "loss": 0.8911, "step": 7149 }, { "epoch": 0.8918548085318698, "grad_norm": 0.0, "learning_rate": 6.072260944113328e-07, "loss": 0.8892, "step": 7150 }, { "epoch": 0.891979543470126, "grad_norm": 0.0, "learning_rate": 6.058404602155576e-07, "loss": 0.8817, "step": 7151 }, { "epoch": 0.8921042784083821, "grad_norm": 0.0, "learning_rate": 6.044563593834473e-07, "loss": 0.8742, "step": 7152 }, { "epoch": 0.8922290133466384, "grad_norm": 0.0, "learning_rate": 6.030737921409169e-07, "loss": 0.8943, "step": 7153 }, { "epoch": 0.8923537482848946, "grad_norm": 0.0, "learning_rate": 6.01692758713639e-07, "loss": 0.8634, "step": 7154 }, { "epoch": 0.8924784832231508, "grad_norm": 0.0, "learning_rate": 6.003132593270345e-07, "loss": 0.9119, "step": 7155 }, { "epoch": 0.892603218161407, "grad_norm": 0.0, "learning_rate": 5.989352942062676e-07, "loss": 0.9218, "step": 7156 }, { "epoch": 0.8927279530996632, "grad_norm": 0.0, "learning_rate": 5.975588635762631e-07, "loss": 0.8478, "step": 7157 }, { "epoch": 0.8928526880379194, "grad_norm": 0.0, "learning_rate": 5.961839676616854e-07, "loss": 0.9027, "step": 7158 }, { "epoch": 0.8929774229761757, "grad_norm": 0.0, "learning_rate": 5.948106066869519e-07, "loss": 0.9285, "step": 7159 }, { "epoch": 0.8931021579144318, "grad_norm": 0.0, "learning_rate": 5.934387808762332e-07, "loss": 0.9507, "step": 7160 }, { "epoch": 0.8932268928526881, "grad_norm": 0.0, "learning_rate": 5.920684904534424e-07, "loss": 0.9344, "step": 7161 }, { "epoch": 0.8933516277909442, "grad_norm": 0.0, "learning_rate": 5.906997356422461e-07, "loss": 0.9213, "step": 7162 }, { "epoch": 0.8934763627292005, "grad_norm": 0.0, "learning_rate": 5.893325166660601e-07, "loss": 0.9003, "step": 7163 }, { "epoch": 0.8936010976674567, "grad_norm": 0.0, "learning_rate": 5.879668337480505e-07, "loss": 0.8852, "step": 7164 }, { "epoch": 0.8937258326057128, "grad_norm": 0.0, "learning_rate": 5.866026871111275e-07, "loss": 0.9388, "step": 7165 }, { "epoch": 0.8938505675439691, "grad_norm": 0.0, "learning_rate": 5.852400769779576e-07, "loss": 0.9145, "step": 7166 }, { "epoch": 0.8939753024822252, "grad_norm": 0.0, "learning_rate": 5.838790035709507e-07, "loss": 0.9562, "step": 7167 }, { "epoch": 0.8941000374204815, "grad_norm": 0.0, "learning_rate": 5.825194671122702e-07, "loss": 0.8612, "step": 7168 }, { "epoch": 0.8942247723587377, "grad_norm": 0.0, "learning_rate": 5.811614678238276e-07, "loss": 0.8921, "step": 7169 }, { "epoch": 0.8943495072969939, "grad_norm": 0.0, "learning_rate": 5.798050059272786e-07, "loss": 0.9066, "step": 7170 }, { "epoch": 0.8944742422352501, "grad_norm": 0.0, "learning_rate": 5.784500816440353e-07, "loss": 0.8579, "step": 7171 }, { "epoch": 0.8945989771735063, "grad_norm": 0.0, "learning_rate": 5.77096695195255e-07, "loss": 0.8784, "step": 7172 }, { "epoch": 0.8947237121117625, "grad_norm": 0.0, "learning_rate": 5.757448468018423e-07, "loss": 0.9211, "step": 7173 }, { "epoch": 0.8948484470500188, "grad_norm": 0.0, "learning_rate": 5.743945366844572e-07, "loss": 0.9145, "step": 7174 }, { "epoch": 0.8949731819882749, "grad_norm": 0.0, "learning_rate": 5.730457650635024e-07, "loss": 0.8543, "step": 7175 }, { "epoch": 0.8950979169265311, "grad_norm": 0.0, "learning_rate": 5.716985321591272e-07, "loss": 0.8829, "step": 7176 }, { "epoch": 0.8952226518647873, "grad_norm": 0.0, "learning_rate": 5.703528381912415e-07, "loss": 0.8728, "step": 7177 }, { "epoch": 0.8953473868030435, "grad_norm": 0.0, "learning_rate": 5.690086833794906e-07, "loss": 0.9117, "step": 7178 }, { "epoch": 0.8954721217412998, "grad_norm": 0.0, "learning_rate": 5.676660679432766e-07, "loss": 0.92, "step": 7179 }, { "epoch": 0.8955968566795559, "grad_norm": 0.0, "learning_rate": 5.663249921017477e-07, "loss": 0.9202, "step": 7180 }, { "epoch": 0.8957215916178122, "grad_norm": 0.0, "learning_rate": 5.649854560738033e-07, "loss": 0.9218, "step": 7181 }, { "epoch": 0.8958463265560683, "grad_norm": 0.0, "learning_rate": 5.636474600780872e-07, "loss": 0.8951, "step": 7182 }, { "epoch": 0.8959710614943246, "grad_norm": 0.0, "learning_rate": 5.623110043329937e-07, "loss": 0.8825, "step": 7183 }, { "epoch": 0.8960957964325807, "grad_norm": 0.0, "learning_rate": 5.609760890566673e-07, "loss": 0.9077, "step": 7184 }, { "epoch": 0.896220531370837, "grad_norm": 0.0, "learning_rate": 5.596427144670002e-07, "loss": 0.9116, "step": 7185 }, { "epoch": 0.8963452663090932, "grad_norm": 0.0, "learning_rate": 5.583108807816329e-07, "loss": 0.8539, "step": 7186 }, { "epoch": 0.8964700012473494, "grad_norm": 0.0, "learning_rate": 5.569805882179524e-07, "loss": 0.9241, "step": 7187 }, { "epoch": 0.8965947361856056, "grad_norm": 0.0, "learning_rate": 5.556518369930963e-07, "loss": 0.9078, "step": 7188 }, { "epoch": 0.8967194711238617, "grad_norm": 0.0, "learning_rate": 5.543246273239533e-07, "loss": 0.8964, "step": 7189 }, { "epoch": 0.896844206062118, "grad_norm": 0.0, "learning_rate": 5.529989594271523e-07, "loss": 0.9167, "step": 7190 }, { "epoch": 0.8969689410003742, "grad_norm": 0.0, "learning_rate": 5.516748335190825e-07, "loss": 0.8754, "step": 7191 }, { "epoch": 0.8970936759386304, "grad_norm": 0.0, "learning_rate": 5.503522498158698e-07, "loss": 0.8906, "step": 7192 }, { "epoch": 0.8972184108768866, "grad_norm": 0.0, "learning_rate": 5.490312085333904e-07, "loss": 0.8806, "step": 7193 }, { "epoch": 0.8973431458151429, "grad_norm": 0.0, "learning_rate": 5.477117098872797e-07, "loss": 0.8683, "step": 7194 }, { "epoch": 0.897467880753399, "grad_norm": 0.0, "learning_rate": 5.463937540929065e-07, "loss": 0.8776, "step": 7195 }, { "epoch": 0.8975926156916553, "grad_norm": 0.0, "learning_rate": 5.450773413653976e-07, "loss": 0.8858, "step": 7196 }, { "epoch": 0.8977173506299114, "grad_norm": 0.0, "learning_rate": 5.437624719196222e-07, "loss": 0.9138, "step": 7197 }, { "epoch": 0.8978420855681677, "grad_norm": 0.0, "learning_rate": 5.424491459702053e-07, "loss": 0.8459, "step": 7198 }, { "epoch": 0.8979668205064238, "grad_norm": 0.0, "learning_rate": 5.411373637315087e-07, "loss": 0.8426, "step": 7199 }, { "epoch": 0.89809155544468, "grad_norm": 0.0, "learning_rate": 5.398271254176502e-07, "loss": 0.931, "step": 7200 }, { "epoch": 0.8982162903829363, "grad_norm": 0.0, "learning_rate": 5.385184312424973e-07, "loss": 0.8968, "step": 7201 }, { "epoch": 0.8983410253211924, "grad_norm": 0.0, "learning_rate": 5.37211281419655e-07, "loss": 0.9135, "step": 7202 }, { "epoch": 0.8984657602594487, "grad_norm": 0.0, "learning_rate": 5.359056761624903e-07, "loss": 0.9114, "step": 7203 }, { "epoch": 0.8985904951977048, "grad_norm": 0.0, "learning_rate": 5.346016156841072e-07, "loss": 0.9116, "step": 7204 }, { "epoch": 0.8987152301359611, "grad_norm": 0.0, "learning_rate": 5.332991001973609e-07, "loss": 0.9268, "step": 7205 }, { "epoch": 0.8988399650742173, "grad_norm": 0.0, "learning_rate": 5.319981299148569e-07, "loss": 0.8891, "step": 7206 }, { "epoch": 0.8989647000124735, "grad_norm": 0.0, "learning_rate": 5.306987050489442e-07, "loss": 0.9017, "step": 7207 }, { "epoch": 0.8990894349507297, "grad_norm": 0.0, "learning_rate": 5.294008258117223e-07, "loss": 0.8815, "step": 7208 }, { "epoch": 0.899214169888986, "grad_norm": 0.0, "learning_rate": 5.281044924150391e-07, "loss": 0.9195, "step": 7209 }, { "epoch": 0.8993389048272421, "grad_norm": 0.0, "learning_rate": 5.268097050704857e-07, "loss": 0.8855, "step": 7210 }, { "epoch": 0.8994636397654984, "grad_norm": 0.0, "learning_rate": 5.255164639894084e-07, "loss": 0.8736, "step": 7211 }, { "epoch": 0.8995883747037545, "grad_norm": 0.0, "learning_rate": 5.242247693828939e-07, "loss": 0.9449, "step": 7212 }, { "epoch": 0.8997131096420107, "grad_norm": 0.0, "learning_rate": 5.229346214617792e-07, "loss": 0.8741, "step": 7213 }, { "epoch": 0.8998378445802669, "grad_norm": 0.0, "learning_rate": 5.216460204366502e-07, "loss": 0.8851, "step": 7214 }, { "epoch": 0.8999625795185231, "grad_norm": 0.0, "learning_rate": 5.203589665178399e-07, "loss": 0.8927, "step": 7215 }, { "epoch": 0.9000873144567794, "grad_norm": 0.0, "learning_rate": 5.190734599154257e-07, "loss": 0.919, "step": 7216 }, { "epoch": 0.9002120493950355, "grad_norm": 0.0, "learning_rate": 5.177895008392353e-07, "loss": 0.9188, "step": 7217 }, { "epoch": 0.9003367843332918, "grad_norm": 0.0, "learning_rate": 5.165070894988444e-07, "loss": 0.9608, "step": 7218 }, { "epoch": 0.9004615192715479, "grad_norm": 0.0, "learning_rate": 5.152262261035723e-07, "loss": 0.9113, "step": 7219 }, { "epoch": 0.9005862542098042, "grad_norm": 0.0, "learning_rate": 5.139469108624917e-07, "loss": 0.8857, "step": 7220 }, { "epoch": 0.9007109891480604, "grad_norm": 0.0, "learning_rate": 5.126691439844178e-07, "loss": 0.9039, "step": 7221 }, { "epoch": 0.9008357240863166, "grad_norm": 0.0, "learning_rate": 5.113929256779105e-07, "loss": 0.8615, "step": 7222 }, { "epoch": 0.9009604590245728, "grad_norm": 0.0, "learning_rate": 5.101182561512874e-07, "loss": 0.8558, "step": 7223 }, { "epoch": 0.9010851939628289, "grad_norm": 0.0, "learning_rate": 5.088451356126023e-07, "loss": 0.8726, "step": 7224 }, { "epoch": 0.9012099289010852, "grad_norm": 0.0, "learning_rate": 5.075735642696611e-07, "loss": 0.8453, "step": 7225 }, { "epoch": 0.9013346638393414, "grad_norm": 0.0, "learning_rate": 5.063035423300189e-07, "loss": 0.9495, "step": 7226 }, { "epoch": 0.9014593987775976, "grad_norm": 0.0, "learning_rate": 5.05035070000971e-07, "loss": 0.9087, "step": 7227 }, { "epoch": 0.9015841337158538, "grad_norm": 0.0, "learning_rate": 5.037681474895673e-07, "loss": 0.8933, "step": 7228 }, { "epoch": 0.90170886865411, "grad_norm": 0.0, "learning_rate": 5.025027750026001e-07, "loss": 0.9071, "step": 7229 }, { "epoch": 0.9018336035923662, "grad_norm": 0.0, "learning_rate": 5.012389527466111e-07, "loss": 0.9128, "step": 7230 }, { "epoch": 0.9019583385306225, "grad_norm": 0.0, "learning_rate": 4.999766809278872e-07, "loss": 0.8867, "step": 7231 }, { "epoch": 0.9020830734688786, "grad_norm": 0.0, "learning_rate": 4.987159597524649e-07, "loss": 0.8539, "step": 7232 }, { "epoch": 0.9022078084071349, "grad_norm": 0.0, "learning_rate": 4.974567894261218e-07, "loss": 0.8345, "step": 7233 }, { "epoch": 0.902332543345391, "grad_norm": 0.0, "learning_rate": 4.961991701543889e-07, "loss": 0.88, "step": 7234 }, { "epoch": 0.9024572782836473, "grad_norm": 0.0, "learning_rate": 4.949431021425422e-07, "loss": 0.8648, "step": 7235 }, { "epoch": 0.9025820132219035, "grad_norm": 0.0, "learning_rate": 4.936885855955997e-07, "loss": 0.8706, "step": 7236 }, { "epoch": 0.9027067481601596, "grad_norm": 0.0, "learning_rate": 4.924356207183356e-07, "loss": 0.9001, "step": 7237 }, { "epoch": 0.9028314830984159, "grad_norm": 0.0, "learning_rate": 4.911842077152618e-07, "loss": 0.9489, "step": 7238 }, { "epoch": 0.902956218036672, "grad_norm": 0.0, "learning_rate": 4.899343467906392e-07, "loss": 0.8877, "step": 7239 }, { "epoch": 0.9030809529749283, "grad_norm": 0.0, "learning_rate": 4.886860381484814e-07, "loss": 0.8613, "step": 7240 }, { "epoch": 0.9032056879131845, "grad_norm": 0.0, "learning_rate": 4.874392819925389e-07, "loss": 0.9414, "step": 7241 }, { "epoch": 0.9033304228514407, "grad_norm": 0.0, "learning_rate": 4.861940785263164e-07, "loss": 0.902, "step": 7242 }, { "epoch": 0.9034551577896969, "grad_norm": 0.0, "learning_rate": 4.849504279530637e-07, "loss": 0.8701, "step": 7243 }, { "epoch": 0.9035798927279531, "grad_norm": 0.0, "learning_rate": 4.837083304757717e-07, "loss": 0.921, "step": 7244 }, { "epoch": 0.9037046276662093, "grad_norm": 0.0, "learning_rate": 4.82467786297186e-07, "loss": 0.8491, "step": 7245 }, { "epoch": 0.9038293626044656, "grad_norm": 0.0, "learning_rate": 4.812287956197925e-07, "loss": 0.8865, "step": 7246 }, { "epoch": 0.9039540975427217, "grad_norm": 0.0, "learning_rate": 4.799913586458271e-07, "loss": 0.921, "step": 7247 }, { "epoch": 0.9040788324809779, "grad_norm": 0.0, "learning_rate": 4.787554755772683e-07, "loss": 0.8605, "step": 7248 }, { "epoch": 0.9042035674192341, "grad_norm": 0.0, "learning_rate": 4.775211466158469e-07, "loss": 0.884, "step": 7249 }, { "epoch": 0.9043283023574903, "grad_norm": 0.0, "learning_rate": 4.76288371963034e-07, "loss": 0.9059, "step": 7250 }, { "epoch": 0.9044530372957466, "grad_norm": 0.0, "learning_rate": 4.750571518200509e-07, "loss": 0.8863, "step": 7251 }, { "epoch": 0.9045777722340027, "grad_norm": 0.0, "learning_rate": 4.7382748638786336e-07, "loss": 0.8797, "step": 7252 }, { "epoch": 0.904702507172259, "grad_norm": 0.0, "learning_rate": 4.7259937586718205e-07, "loss": 0.8966, "step": 7253 }, { "epoch": 0.9048272421105151, "grad_norm": 0.0, "learning_rate": 4.713728204584678e-07, "loss": 0.9068, "step": 7254 }, { "epoch": 0.9049519770487714, "grad_norm": 0.0, "learning_rate": 4.7014782036192473e-07, "loss": 0.8811, "step": 7255 }, { "epoch": 0.9050767119870275, "grad_norm": 0.0, "learning_rate": 4.6892437577750195e-07, "loss": 0.946, "step": 7256 }, { "epoch": 0.9052014469252838, "grad_norm": 0.0, "learning_rate": 4.677024869049007e-07, "loss": 0.9249, "step": 7257 }, { "epoch": 0.90532618186354, "grad_norm": 0.0, "learning_rate": 4.6648215394355935e-07, "loss": 0.8559, "step": 7258 }, { "epoch": 0.9054509168017962, "grad_norm": 0.0, "learning_rate": 4.652633770926696e-07, "loss": 0.9264, "step": 7259 }, { "epoch": 0.9055756517400524, "grad_norm": 0.0, "learning_rate": 4.640461565511678e-07, "loss": 0.9101, "step": 7260 }, { "epoch": 0.9057003866783085, "grad_norm": 0.0, "learning_rate": 4.628304925177318e-07, "loss": 0.8991, "step": 7261 }, { "epoch": 0.9058251216165648, "grad_norm": 0.0, "learning_rate": 4.6161638519078956e-07, "loss": 0.8572, "step": 7262 }, { "epoch": 0.905949856554821, "grad_norm": 0.0, "learning_rate": 4.604038347685147e-07, "loss": 0.8777, "step": 7263 }, { "epoch": 0.9060745914930772, "grad_norm": 0.0, "learning_rate": 4.591928414488278e-07, "loss": 0.8836, "step": 7264 }, { "epoch": 0.9061993264313334, "grad_norm": 0.0, "learning_rate": 4.579834054293886e-07, "loss": 0.9031, "step": 7265 }, { "epoch": 0.9063240613695897, "grad_norm": 0.0, "learning_rate": 4.567755269076124e-07, "loss": 0.8895, "step": 7266 }, { "epoch": 0.9064487963078458, "grad_norm": 0.0, "learning_rate": 4.555692060806527e-07, "loss": 0.8654, "step": 7267 }, { "epoch": 0.9065735312461021, "grad_norm": 0.0, "learning_rate": 4.543644431454131e-07, "loss": 0.8792, "step": 7268 }, { "epoch": 0.9066982661843582, "grad_norm": 0.0, "learning_rate": 4.5316123829854063e-07, "loss": 0.945, "step": 7269 }, { "epoch": 0.9068230011226145, "grad_norm": 0.0, "learning_rate": 4.519595917364272e-07, "loss": 0.911, "step": 7270 }, { "epoch": 0.9069477360608706, "grad_norm": 0.0, "learning_rate": 4.507595036552137e-07, "loss": 0.9051, "step": 7271 }, { "epoch": 0.9070724709991268, "grad_norm": 0.0, "learning_rate": 4.495609742507856e-07, "loss": 0.8828, "step": 7272 }, { "epoch": 0.9071972059373831, "grad_norm": 0.0, "learning_rate": 4.4836400371876975e-07, "loss": 0.8941, "step": 7273 }, { "epoch": 0.9073219408756392, "grad_norm": 0.0, "learning_rate": 4.4716859225454433e-07, "loss": 0.8965, "step": 7274 }, { "epoch": 0.9074466758138955, "grad_norm": 0.0, "learning_rate": 4.4597474005323105e-07, "loss": 0.8671, "step": 7275 }, { "epoch": 0.9075714107521516, "grad_norm": 0.0, "learning_rate": 4.44782447309694e-07, "loss": 0.9482, "step": 7276 }, { "epoch": 0.9076961456904079, "grad_norm": 0.0, "learning_rate": 4.4359171421854973e-07, "loss": 0.8912, "step": 7277 }, { "epoch": 0.9078208806286641, "grad_norm": 0.0, "learning_rate": 4.424025409741517e-07, "loss": 0.8995, "step": 7278 }, { "epoch": 0.9079456155669203, "grad_norm": 0.0, "learning_rate": 4.412149277706046e-07, "loss": 0.8982, "step": 7279 }, { "epoch": 0.9080703505051765, "grad_norm": 0.0, "learning_rate": 4.400288748017578e-07, "loss": 0.9225, "step": 7280 }, { "epoch": 0.9081950854434327, "grad_norm": 0.0, "learning_rate": 4.388443822612043e-07, "loss": 0.8503, "step": 7281 }, { "epoch": 0.9083198203816889, "grad_norm": 0.0, "learning_rate": 4.376614503422805e-07, "loss": 0.8687, "step": 7282 }, { "epoch": 0.9084445553199452, "grad_norm": 0.0, "learning_rate": 4.364800792380763e-07, "loss": 0.9048, "step": 7283 }, { "epoch": 0.9085692902582013, "grad_norm": 0.0, "learning_rate": 4.3530026914141767e-07, "loss": 0.8618, "step": 7284 }, { "epoch": 0.9086940251964575, "grad_norm": 0.0, "learning_rate": 4.3412202024487706e-07, "loss": 0.9358, "step": 7285 }, { "epoch": 0.9088187601347137, "grad_norm": 0.0, "learning_rate": 4.3294533274078063e-07, "loss": 0.8739, "step": 7286 }, { "epoch": 0.9089434950729699, "grad_norm": 0.0, "learning_rate": 4.317702068211882e-07, "loss": 0.8642, "step": 7287 }, { "epoch": 0.9090682300112262, "grad_norm": 0.0, "learning_rate": 4.305966426779118e-07, "loss": 0.8918, "step": 7288 }, { "epoch": 0.9091929649494823, "grad_norm": 0.0, "learning_rate": 4.294246405025071e-07, "loss": 0.8761, "step": 7289 }, { "epoch": 0.9093176998877386, "grad_norm": 0.0, "learning_rate": 4.2825420048627333e-07, "loss": 0.8589, "step": 7290 }, { "epoch": 0.9094424348259947, "grad_norm": 0.0, "learning_rate": 4.270853228202565e-07, "loss": 0.9139, "step": 7291 }, { "epoch": 0.909567169764251, "grad_norm": 0.0, "learning_rate": 4.2591800769524625e-07, "loss": 0.8803, "step": 7292 }, { "epoch": 0.9096919047025072, "grad_norm": 0.0, "learning_rate": 4.247522553017791e-07, "loss": 0.8846, "step": 7293 }, { "epoch": 0.9098166396407634, "grad_norm": 0.0, "learning_rate": 4.2358806583013503e-07, "loss": 0.8802, "step": 7294 }, { "epoch": 0.9099413745790196, "grad_norm": 0.0, "learning_rate": 4.2242543947033863e-07, "loss": 0.8655, "step": 7295 }, { "epoch": 0.9100661095172757, "grad_norm": 0.0, "learning_rate": 4.2126437641215823e-07, "loss": 0.926, "step": 7296 }, { "epoch": 0.910190844455532, "grad_norm": 0.0, "learning_rate": 4.2010487684511105e-07, "loss": 0.906, "step": 7297 }, { "epoch": 0.9103155793937882, "grad_norm": 0.0, "learning_rate": 4.189469409584568e-07, "loss": 0.9212, "step": 7298 }, { "epoch": 0.9104403143320444, "grad_norm": 0.0, "learning_rate": 4.1779056894119763e-07, "loss": 0.9373, "step": 7299 }, { "epoch": 0.9105650492703006, "grad_norm": 0.0, "learning_rate": 4.166357609820837e-07, "loss": 0.8797, "step": 7300 }, { "epoch": 0.9106897842085568, "grad_norm": 0.0, "learning_rate": 4.154825172696098e-07, "loss": 0.9436, "step": 7301 }, { "epoch": 0.910814519146813, "grad_norm": 0.0, "learning_rate": 4.143308379920108e-07, "loss": 0.9297, "step": 7302 }, { "epoch": 0.9109392540850693, "grad_norm": 0.0, "learning_rate": 4.131807233372753e-07, "loss": 0.9129, "step": 7303 }, { "epoch": 0.9110639890233254, "grad_norm": 0.0, "learning_rate": 4.120321734931265e-07, "loss": 0.9704, "step": 7304 }, { "epoch": 0.9111887239615817, "grad_norm": 0.0, "learning_rate": 4.1088518864703884e-07, "loss": 0.8871, "step": 7305 }, { "epoch": 0.9113134588998378, "grad_norm": 0.0, "learning_rate": 4.0973976898622923e-07, "loss": 0.8868, "step": 7306 }, { "epoch": 0.9114381938380941, "grad_norm": 0.0, "learning_rate": 4.0859591469765704e-07, "loss": 0.8702, "step": 7307 }, { "epoch": 0.9115629287763503, "grad_norm": 0.0, "learning_rate": 4.0745362596803063e-07, "loss": 0.9075, "step": 7308 }, { "epoch": 0.9116876637146064, "grad_norm": 0.0, "learning_rate": 4.063129029837987e-07, "loss": 0.9223, "step": 7309 }, { "epoch": 0.9118123986528627, "grad_norm": 0.0, "learning_rate": 4.0517374593115666e-07, "loss": 0.8843, "step": 7310 }, { "epoch": 0.9119371335911188, "grad_norm": 0.0, "learning_rate": 4.0403615499604365e-07, "loss": 0.8915, "step": 7311 }, { "epoch": 0.9120618685293751, "grad_norm": 0.0, "learning_rate": 4.029001303641422e-07, "loss": 0.8863, "step": 7312 }, { "epoch": 0.9121866034676313, "grad_norm": 0.0, "learning_rate": 4.017656722208807e-07, "loss": 0.8587, "step": 7313 }, { "epoch": 0.9123113384058875, "grad_norm": 0.0, "learning_rate": 4.006327807514321e-07, "loss": 0.93, "step": 7314 }, { "epoch": 0.9124360733441437, "grad_norm": 0.0, "learning_rate": 3.99501456140714e-07, "loss": 0.9696, "step": 7315 }, { "epoch": 0.9125608082823999, "grad_norm": 0.0, "learning_rate": 3.983716985733832e-07, "loss": 0.8744, "step": 7316 }, { "epoch": 0.9126855432206561, "grad_norm": 0.0, "learning_rate": 3.9724350823384774e-07, "loss": 0.9268, "step": 7317 }, { "epoch": 0.9128102781589124, "grad_norm": 0.0, "learning_rate": 3.961168853062569e-07, "loss": 0.8998, "step": 7318 }, { "epoch": 0.9129350130971685, "grad_norm": 0.0, "learning_rate": 3.949918299745004e-07, "loss": 0.9351, "step": 7319 }, { "epoch": 0.9130597480354247, "grad_norm": 0.0, "learning_rate": 3.9386834242222114e-07, "loss": 0.9033, "step": 7320 }, { "epoch": 0.9131844829736809, "grad_norm": 0.0, "learning_rate": 3.9274642283279486e-07, "loss": 0.9036, "step": 7321 }, { "epoch": 0.9133092179119371, "grad_norm": 0.0, "learning_rate": 3.916260713893516e-07, "loss": 0.8771, "step": 7322 }, { "epoch": 0.9134339528501934, "grad_norm": 0.0, "learning_rate": 3.9050728827475957e-07, "loss": 0.8732, "step": 7323 }, { "epoch": 0.9135586877884495, "grad_norm": 0.0, "learning_rate": 3.893900736716305e-07, "loss": 0.8968, "step": 7324 }, { "epoch": 0.9136834227267058, "grad_norm": 0.0, "learning_rate": 3.8827442776232403e-07, "loss": 0.9269, "step": 7325 }, { "epoch": 0.9138081576649619, "grad_norm": 0.0, "learning_rate": 3.871603507289412e-07, "loss": 0.9112, "step": 7326 }, { "epoch": 0.9139328926032182, "grad_norm": 0.0, "learning_rate": 3.8604784275332765e-07, "loss": 0.8995, "step": 7327 }, { "epoch": 0.9140576275414743, "grad_norm": 0.0, "learning_rate": 3.8493690401707253e-07, "loss": 0.9161, "step": 7328 }, { "epoch": 0.9141823624797306, "grad_norm": 0.0, "learning_rate": 3.838275347015086e-07, "loss": 0.8739, "step": 7329 }, { "epoch": 0.9143070974179868, "grad_norm": 0.0, "learning_rate": 3.827197349877143e-07, "loss": 0.8893, "step": 7330 }, { "epoch": 0.914431832356243, "grad_norm": 0.0, "learning_rate": 3.816135050565073e-07, "loss": 0.8423, "step": 7331 }, { "epoch": 0.9145565672944992, "grad_norm": 0.0, "learning_rate": 3.8050884508845756e-07, "loss": 0.8688, "step": 7332 }, { "epoch": 0.9146813022327553, "grad_norm": 0.0, "learning_rate": 3.794057552638686e-07, "loss": 0.8632, "step": 7333 }, { "epoch": 0.9148060371710116, "grad_norm": 0.0, "learning_rate": 3.783042357627942e-07, "loss": 0.8979, "step": 7334 }, { "epoch": 0.9149307721092678, "grad_norm": 0.0, "learning_rate": 3.772042867650305e-07, "loss": 0.9351, "step": 7335 }, { "epoch": 0.915055507047524, "grad_norm": 0.0, "learning_rate": 3.761059084501162e-07, "loss": 0.8753, "step": 7336 }, { "epoch": 0.9151802419857802, "grad_norm": 0.0, "learning_rate": 3.7500910099733443e-07, "loss": 0.8898, "step": 7337 }, { "epoch": 0.9153049769240365, "grad_norm": 0.0, "learning_rate": 3.739138645857121e-07, "loss": 0.8834, "step": 7338 }, { "epoch": 0.9154297118622926, "grad_norm": 0.0, "learning_rate": 3.7282019939401835e-07, "loss": 0.9774, "step": 7339 }, { "epoch": 0.9155544468005489, "grad_norm": 0.0, "learning_rate": 3.717281056007704e-07, "loss": 0.9196, "step": 7340 }, { "epoch": 0.915679181738805, "grad_norm": 0.0, "learning_rate": 3.706375833842213e-07, "loss": 0.902, "step": 7341 }, { "epoch": 0.9158039166770613, "grad_norm": 0.0, "learning_rate": 3.6954863292237297e-07, "loss": 0.9557, "step": 7342 }, { "epoch": 0.9159286516153174, "grad_norm": 0.0, "learning_rate": 3.684612543929711e-07, "loss": 0.8764, "step": 7343 }, { "epoch": 0.9160533865535736, "grad_norm": 0.0, "learning_rate": 3.6737544797350254e-07, "loss": 0.8967, "step": 7344 }, { "epoch": 0.9161781214918299, "grad_norm": 0.0, "learning_rate": 3.662912138411967e-07, "loss": 0.8926, "step": 7345 }, { "epoch": 0.916302856430086, "grad_norm": 0.0, "learning_rate": 3.6520855217302973e-07, "loss": 0.9201, "step": 7346 }, { "epoch": 0.9164275913683423, "grad_norm": 0.0, "learning_rate": 3.6412746314572035e-07, "loss": 0.8884, "step": 7347 }, { "epoch": 0.9165523263065984, "grad_norm": 0.0, "learning_rate": 3.630479469357251e-07, "loss": 0.8925, "step": 7348 }, { "epoch": 0.9166770612448547, "grad_norm": 0.0, "learning_rate": 3.6197000371925307e-07, "loss": 0.8872, "step": 7349 }, { "epoch": 0.9168017961831109, "grad_norm": 0.0, "learning_rate": 3.6089363367224907e-07, "loss": 0.8595, "step": 7350 }, { "epoch": 0.9169265311213671, "grad_norm": 0.0, "learning_rate": 3.5981883697040363e-07, "loss": 0.872, "step": 7351 }, { "epoch": 0.9170512660596233, "grad_norm": 0.0, "learning_rate": 3.587456137891532e-07, "loss": 0.9063, "step": 7352 }, { "epoch": 0.9171760009978795, "grad_norm": 0.0, "learning_rate": 3.576739643036731e-07, "loss": 0.8968, "step": 7353 }, { "epoch": 0.9173007359361357, "grad_norm": 0.0, "learning_rate": 3.566038886888823e-07, "loss": 0.8876, "step": 7354 }, { "epoch": 0.917425470874392, "grad_norm": 0.0, "learning_rate": 3.555353871194467e-07, "loss": 0.8938, "step": 7355 }, { "epoch": 0.9175502058126481, "grad_norm": 0.0, "learning_rate": 3.544684597697723e-07, "loss": 0.8969, "step": 7356 }, { "epoch": 0.9176749407509043, "grad_norm": 0.0, "learning_rate": 3.5340310681400646e-07, "loss": 0.923, "step": 7357 }, { "epoch": 0.9177996756891605, "grad_norm": 0.0, "learning_rate": 3.5233932842604237e-07, "loss": 0.8867, "step": 7358 }, { "epoch": 0.9179244106274167, "grad_norm": 0.0, "learning_rate": 3.512771247795166e-07, "loss": 0.9104, "step": 7359 }, { "epoch": 0.918049145565673, "grad_norm": 0.0, "learning_rate": 3.5021649604780714e-07, "loss": 0.9001, "step": 7360 }, { "epoch": 0.9181738805039291, "grad_norm": 0.0, "learning_rate": 3.491574424040356e-07, "loss": 0.8423, "step": 7361 }, { "epoch": 0.9182986154421854, "grad_norm": 0.0, "learning_rate": 3.4809996402106473e-07, "loss": 0.8892, "step": 7362 }, { "epoch": 0.9184233503804415, "grad_norm": 0.0, "learning_rate": 3.470440610715031e-07, "loss": 0.8957, "step": 7363 }, { "epoch": 0.9185480853186978, "grad_norm": 0.0, "learning_rate": 3.4598973372770186e-07, "loss": 0.9158, "step": 7364 }, { "epoch": 0.918672820256954, "grad_norm": 0.0, "learning_rate": 3.4493698216174986e-07, "loss": 0.9003, "step": 7365 }, { "epoch": 0.9187975551952102, "grad_norm": 0.0, "learning_rate": 3.4388580654548755e-07, "loss": 0.8386, "step": 7366 }, { "epoch": 0.9189222901334664, "grad_norm": 0.0, "learning_rate": 3.428362070504887e-07, "loss": 0.8474, "step": 7367 }, { "epoch": 0.9190470250717225, "grad_norm": 0.0, "learning_rate": 3.4178818384807746e-07, "loss": 0.8877, "step": 7368 }, { "epoch": 0.9191717600099788, "grad_norm": 0.0, "learning_rate": 3.4074173710931804e-07, "loss": 0.8841, "step": 7369 }, { "epoch": 0.919296494948235, "grad_norm": 0.0, "learning_rate": 3.396968670050138e-07, "loss": 0.9, "step": 7370 }, { "epoch": 0.9194212298864912, "grad_norm": 0.0, "learning_rate": 3.3865357370571616e-07, "loss": 0.8675, "step": 7371 }, { "epoch": 0.9195459648247474, "grad_norm": 0.0, "learning_rate": 3.3761185738171665e-07, "loss": 0.8677, "step": 7372 }, { "epoch": 0.9196706997630036, "grad_norm": 0.0, "learning_rate": 3.3657171820305035e-07, "loss": 0.899, "step": 7373 }, { "epoch": 0.9197954347012598, "grad_norm": 0.0, "learning_rate": 3.3553315633949144e-07, "loss": 0.9082, "step": 7374 }, { "epoch": 0.9199201696395161, "grad_norm": 0.0, "learning_rate": 3.3449617196056217e-07, "loss": 0.842, "step": 7375 }, { "epoch": 0.9200449045777722, "grad_norm": 0.0, "learning_rate": 3.3346076523552486e-07, "loss": 0.8963, "step": 7376 }, { "epoch": 0.9201696395160285, "grad_norm": 0.0, "learning_rate": 3.3242693633337986e-07, "loss": 0.8884, "step": 7377 }, { "epoch": 0.9202943744542846, "grad_norm": 0.0, "learning_rate": 3.3139468542288e-07, "loss": 0.9053, "step": 7378 }, { "epoch": 0.9204191093925409, "grad_norm": 0.0, "learning_rate": 3.303640126725094e-07, "loss": 0.8664, "step": 7379 }, { "epoch": 0.9205438443307971, "grad_norm": 0.0, "learning_rate": 3.2933491825050237e-07, "loss": 0.8656, "step": 7380 }, { "epoch": 0.9206685792690532, "grad_norm": 0.0, "learning_rate": 3.283074023248345e-07, "loss": 0.9353, "step": 7381 }, { "epoch": 0.9207933142073095, "grad_norm": 0.0, "learning_rate": 3.2728146506321944e-07, "loss": 0.9221, "step": 7382 }, { "epoch": 0.9209180491455656, "grad_norm": 0.0, "learning_rate": 3.262571066331166e-07, "loss": 0.8847, "step": 7383 }, { "epoch": 0.9210427840838219, "grad_norm": 0.0, "learning_rate": 3.2523432720172776e-07, "loss": 0.8498, "step": 7384 }, { "epoch": 0.921167519022078, "grad_norm": 0.0, "learning_rate": 3.2421312693599605e-07, "loss": 0.9386, "step": 7385 }, { "epoch": 0.9212922539603343, "grad_norm": 0.0, "learning_rate": 3.231935060026092e-07, "loss": 0.8934, "step": 7386 }, { "epoch": 0.9214169888985905, "grad_norm": 0.0, "learning_rate": 3.2217546456799086e-07, "loss": 0.8891, "step": 7387 }, { "epoch": 0.9215417238368467, "grad_norm": 0.0, "learning_rate": 3.211590027983136e-07, "loss": 0.8851, "step": 7388 }, { "epoch": 0.9216664587751029, "grad_norm": 0.0, "learning_rate": 3.201441208594891e-07, "loss": 0.9241, "step": 7389 }, { "epoch": 0.9217911937133592, "grad_norm": 0.0, "learning_rate": 3.191308189171738e-07, "loss": 0.8694, "step": 7390 }, { "epoch": 0.9219159286516153, "grad_norm": 0.0, "learning_rate": 3.1811909713676093e-07, "loss": 0.9128, "step": 7391 }, { "epoch": 0.9220406635898715, "grad_norm": 0.0, "learning_rate": 3.171089556833895e-07, "loss": 0.8938, "step": 7392 }, { "epoch": 0.9221653985281277, "grad_norm": 0.0, "learning_rate": 3.161003947219421e-07, "loss": 0.9072, "step": 7393 }, { "epoch": 0.9222901334663839, "grad_norm": 0.0, "learning_rate": 3.150934144170381e-07, "loss": 0.8745, "step": 7394 }, { "epoch": 0.9224148684046402, "grad_norm": 0.0, "learning_rate": 3.140880149330461e-07, "loss": 0.8937, "step": 7395 }, { "epoch": 0.9225396033428963, "grad_norm": 0.0, "learning_rate": 3.1308419643406915e-07, "loss": 0.8846, "step": 7396 }, { "epoch": 0.9226643382811526, "grad_norm": 0.0, "learning_rate": 3.1208195908395747e-07, "loss": 0.9338, "step": 7397 }, { "epoch": 0.9227890732194087, "grad_norm": 0.0, "learning_rate": 3.1108130304630223e-07, "loss": 0.8792, "step": 7398 }, { "epoch": 0.922913808157665, "grad_norm": 0.0, "learning_rate": 3.1008222848443293e-07, "loss": 0.931, "step": 7399 }, { "epoch": 0.9230385430959211, "grad_norm": 0.0, "learning_rate": 3.090847355614257e-07, "loss": 0.9059, "step": 7400 }, { "epoch": 0.9231632780341774, "grad_norm": 0.0, "learning_rate": 3.080888244400959e-07, "loss": 0.9079, "step": 7401 }, { "epoch": 0.9232880129724336, "grad_norm": 0.0, "learning_rate": 3.070944952830035e-07, "loss": 0.9114, "step": 7402 }, { "epoch": 0.9234127479106898, "grad_norm": 0.0, "learning_rate": 3.061017482524442e-07, "loss": 0.9354, "step": 7403 }, { "epoch": 0.923537482848946, "grad_norm": 0.0, "learning_rate": 3.051105835104629e-07, "loss": 0.9345, "step": 7404 }, { "epoch": 0.9236622177872021, "grad_norm": 0.0, "learning_rate": 3.0412100121884005e-07, "loss": 0.8351, "step": 7405 }, { "epoch": 0.9237869527254584, "grad_norm": 0.0, "learning_rate": 3.0313300153910097e-07, "loss": 0.9563, "step": 7406 }, { "epoch": 0.9239116876637146, "grad_norm": 0.0, "learning_rate": 3.0214658463251443e-07, "loss": 0.871, "step": 7407 }, { "epoch": 0.9240364226019708, "grad_norm": 0.0, "learning_rate": 3.0116175066008613e-07, "loss": 0.8778, "step": 7408 }, { "epoch": 0.924161157540227, "grad_norm": 0.0, "learning_rate": 3.001784997825652e-07, "loss": 0.895, "step": 7409 }, { "epoch": 0.9242858924784833, "grad_norm": 0.0, "learning_rate": 2.991968321604466e-07, "loss": 0.8792, "step": 7410 }, { "epoch": 0.9244106274167394, "grad_norm": 0.0, "learning_rate": 2.982167479539577e-07, "loss": 0.9286, "step": 7411 }, { "epoch": 0.9245353623549957, "grad_norm": 0.0, "learning_rate": 2.9723824732307946e-07, "loss": 0.8801, "step": 7412 }, { "epoch": 0.9246600972932518, "grad_norm": 0.0, "learning_rate": 2.9626133042752303e-07, "loss": 0.8526, "step": 7413 }, { "epoch": 0.9247848322315081, "grad_norm": 0.0, "learning_rate": 2.9528599742674635e-07, "loss": 0.9459, "step": 7414 }, { "epoch": 0.9249095671697642, "grad_norm": 0.0, "learning_rate": 2.94312248479951e-07, "loss": 0.87, "step": 7415 }, { "epoch": 0.9250343021080205, "grad_norm": 0.0, "learning_rate": 2.9334008374607423e-07, "loss": 0.914, "step": 7416 }, { "epoch": 0.9251590370462767, "grad_norm": 0.0, "learning_rate": 2.9236950338380033e-07, "loss": 0.9526, "step": 7417 }, { "epoch": 0.9252837719845328, "grad_norm": 0.0, "learning_rate": 2.9140050755155134e-07, "loss": 0.8616, "step": 7418 }, { "epoch": 0.9254085069227891, "grad_norm": 0.0, "learning_rate": 2.9043309640749193e-07, "loss": 0.8633, "step": 7419 }, { "epoch": 0.9255332418610452, "grad_norm": 0.0, "learning_rate": 2.894672701095269e-07, "loss": 0.8667, "step": 7420 }, { "epoch": 0.9256579767993015, "grad_norm": 0.0, "learning_rate": 2.885030288153057e-07, "loss": 0.9011, "step": 7421 }, { "epoch": 0.9257827117375577, "grad_norm": 0.0, "learning_rate": 2.875403726822146e-07, "loss": 0.884, "step": 7422 }, { "epoch": 0.9259074466758139, "grad_norm": 0.0, "learning_rate": 2.865793018673857e-07, "loss": 0.8934, "step": 7423 }, { "epoch": 0.9260321816140701, "grad_norm": 0.0, "learning_rate": 2.85619816527688e-07, "loss": 0.8621, "step": 7424 }, { "epoch": 0.9261569165523263, "grad_norm": 0.0, "learning_rate": 2.846619168197351e-07, "loss": 0.8823, "step": 7425 }, { "epoch": 0.9262816514905825, "grad_norm": 0.0, "learning_rate": 2.8370560289987856e-07, "loss": 0.8418, "step": 7426 }, { "epoch": 0.9264063864288388, "grad_norm": 0.0, "learning_rate": 2.827508749242147e-07, "loss": 0.8895, "step": 7427 }, { "epoch": 0.9265311213670949, "grad_norm": 0.0, "learning_rate": 2.817977330485788e-07, "loss": 0.8996, "step": 7428 }, { "epoch": 0.9266558563053511, "grad_norm": 0.0, "learning_rate": 2.8084617742854645e-07, "loss": 0.9208, "step": 7429 }, { "epoch": 0.9267805912436073, "grad_norm": 0.0, "learning_rate": 2.798962082194379e-07, "loss": 0.9421, "step": 7430 }, { "epoch": 0.9269053261818635, "grad_norm": 0.0, "learning_rate": 2.789478255763078e-07, "loss": 0.8318, "step": 7431 }, { "epoch": 0.9270300611201198, "grad_norm": 0.0, "learning_rate": 2.780010296539615e-07, "loss": 0.9212, "step": 7432 }, { "epoch": 0.9271547960583759, "grad_norm": 0.0, "learning_rate": 2.7705582060693627e-07, "loss": 0.9186, "step": 7433 }, { "epoch": 0.9272795309966322, "grad_norm": 0.0, "learning_rate": 2.7611219858951543e-07, "loss": 0.8703, "step": 7434 }, { "epoch": 0.9274042659348883, "grad_norm": 0.0, "learning_rate": 2.7517016375572137e-07, "loss": 0.9037, "step": 7435 }, { "epoch": 0.9275290008731446, "grad_norm": 0.0, "learning_rate": 2.7422971625931884e-07, "loss": 0.8975, "step": 7436 }, { "epoch": 0.9276537358114008, "grad_norm": 0.0, "learning_rate": 2.732908562538106e-07, "loss": 0.8994, "step": 7437 }, { "epoch": 0.927778470749657, "grad_norm": 0.0, "learning_rate": 2.72353583892444e-07, "loss": 0.8952, "step": 7438 }, { "epoch": 0.9279032056879132, "grad_norm": 0.0, "learning_rate": 2.714178993282068e-07, "loss": 0.8638, "step": 7439 }, { "epoch": 0.9280279406261694, "grad_norm": 0.0, "learning_rate": 2.7048380271382124e-07, "loss": 0.8724, "step": 7440 }, { "epoch": 0.9281526755644256, "grad_norm": 0.0, "learning_rate": 2.6955129420176193e-07, "loss": 0.9504, "step": 7441 }, { "epoch": 0.9282774105026818, "grad_norm": 0.0, "learning_rate": 2.686203739442328e-07, "loss": 0.8856, "step": 7442 }, { "epoch": 0.928402145440938, "grad_norm": 0.0, "learning_rate": 2.6769104209318665e-07, "loss": 0.8873, "step": 7443 }, { "epoch": 0.9285268803791942, "grad_norm": 0.0, "learning_rate": 2.6676329880031213e-07, "loss": 0.9414, "step": 7444 }, { "epoch": 0.9286516153174504, "grad_norm": 0.0, "learning_rate": 2.658371442170404e-07, "loss": 0.8791, "step": 7445 }, { "epoch": 0.9287763502557066, "grad_norm": 0.0, "learning_rate": 2.649125784945439e-07, "loss": 0.8711, "step": 7446 }, { "epoch": 0.9289010851939629, "grad_norm": 0.0, "learning_rate": 2.6398960178373513e-07, "loss": 0.8688, "step": 7447 }, { "epoch": 0.929025820132219, "grad_norm": 0.0, "learning_rate": 2.6306821423526474e-07, "loss": 0.8938, "step": 7448 }, { "epoch": 0.9291505550704753, "grad_norm": 0.0, "learning_rate": 2.6214841599953024e-07, "loss": 0.9234, "step": 7449 }, { "epoch": 0.9292752900087314, "grad_norm": 0.0, "learning_rate": 2.612302072266637e-07, "loss": 0.9034, "step": 7450 }, { "epoch": 0.9294000249469877, "grad_norm": 0.0, "learning_rate": 2.603135880665397e-07, "loss": 0.9246, "step": 7451 }, { "epoch": 0.9295247598852439, "grad_norm": 0.0, "learning_rate": 2.593985586687764e-07, "loss": 0.8776, "step": 7452 }, { "epoch": 0.9296494948235, "grad_norm": 0.0, "learning_rate": 2.5848511918272535e-07, "loss": 0.9365, "step": 7453 }, { "epoch": 0.9297742297617563, "grad_norm": 0.0, "learning_rate": 2.575732697574851e-07, "loss": 0.9067, "step": 7454 }, { "epoch": 0.9298989647000124, "grad_norm": 0.0, "learning_rate": 2.5666301054189323e-07, "loss": 0.8993, "step": 7455 }, { "epoch": 0.9300236996382687, "grad_norm": 0.0, "learning_rate": 2.557543416845265e-07, "loss": 0.855, "step": 7456 }, { "epoch": 0.9301484345765249, "grad_norm": 0.0, "learning_rate": 2.548472633337007e-07, "loss": 0.9184, "step": 7457 }, { "epoch": 0.9302731695147811, "grad_norm": 0.0, "learning_rate": 2.5394177563747736e-07, "loss": 0.903, "step": 7458 }, { "epoch": 0.9303979044530373, "grad_norm": 0.0, "learning_rate": 2.530378787436527e-07, "loss": 0.9007, "step": 7459 }, { "epoch": 0.9305226393912935, "grad_norm": 0.0, "learning_rate": 2.5213557279976317e-07, "loss": 0.8215, "step": 7460 }, { "epoch": 0.9306473743295497, "grad_norm": 0.0, "learning_rate": 2.512348579530932e-07, "loss": 0.9348, "step": 7461 }, { "epoch": 0.930772109267806, "grad_norm": 0.0, "learning_rate": 2.503357343506585e-07, "loss": 0.9138, "step": 7462 }, { "epoch": 0.9308968442060621, "grad_norm": 0.0, "learning_rate": 2.494382021392194e-07, "loss": 0.8914, "step": 7463 }, { "epoch": 0.9310215791443184, "grad_norm": 0.0, "learning_rate": 2.485422614652766e-07, "loss": 0.879, "step": 7464 }, { "epoch": 0.9311463140825745, "grad_norm": 0.0, "learning_rate": 2.4764791247506967e-07, "loss": 0.8431, "step": 7465 }, { "epoch": 0.9312710490208307, "grad_norm": 0.0, "learning_rate": 2.467551553145775e-07, "loss": 0.8611, "step": 7466 }, { "epoch": 0.931395783959087, "grad_norm": 0.0, "learning_rate": 2.458639901295223e-07, "loss": 0.8962, "step": 7467 }, { "epoch": 0.9315205188973431, "grad_norm": 0.0, "learning_rate": 2.449744170653645e-07, "loss": 0.9378, "step": 7468 }, { "epoch": 0.9316452538355994, "grad_norm": 0.0, "learning_rate": 2.4408643626730565e-07, "loss": 0.8594, "step": 7469 }, { "epoch": 0.9317699887738555, "grad_norm": 0.0, "learning_rate": 2.432000478802854e-07, "loss": 0.8856, "step": 7470 }, { "epoch": 0.9318947237121118, "grad_norm": 0.0, "learning_rate": 2.4231525204898354e-07, "loss": 0.8759, "step": 7471 }, { "epoch": 0.932019458650368, "grad_norm": 0.0, "learning_rate": 2.414320489178223e-07, "loss": 0.9563, "step": 7472 }, { "epoch": 0.9321441935886242, "grad_norm": 0.0, "learning_rate": 2.405504386309643e-07, "loss": 0.9137, "step": 7473 }, { "epoch": 0.9322689285268804, "grad_norm": 0.0, "learning_rate": 2.3967042133230756e-07, "loss": 0.9255, "step": 7474 }, { "epoch": 0.9323936634651366, "grad_norm": 0.0, "learning_rate": 2.387919971654962e-07, "loss": 0.8995, "step": 7475 }, { "epoch": 0.9325183984033928, "grad_norm": 0.0, "learning_rate": 2.3791516627390877e-07, "loss": 0.9135, "step": 7476 }, { "epoch": 0.9326431333416489, "grad_norm": 0.0, "learning_rate": 2.370399288006664e-07, "loss": 0.9318, "step": 7477 }, { "epoch": 0.9327678682799052, "grad_norm": 0.0, "learning_rate": 2.3616628488863147e-07, "loss": 0.9006, "step": 7478 }, { "epoch": 0.9328926032181614, "grad_norm": 0.0, "learning_rate": 2.3529423468040324e-07, "loss": 0.8881, "step": 7479 }, { "epoch": 0.9330173381564176, "grad_norm": 0.0, "learning_rate": 2.344237783183223e-07, "loss": 0.9048, "step": 7480 }, { "epoch": 0.9331420730946738, "grad_norm": 0.0, "learning_rate": 2.3355491594447055e-07, "loss": 0.9148, "step": 7481 }, { "epoch": 0.93326680803293, "grad_norm": 0.0, "learning_rate": 2.3268764770066564e-07, "loss": 0.9373, "step": 7482 }, { "epoch": 0.9333915429711862, "grad_norm": 0.0, "learning_rate": 2.3182197372846994e-07, "loss": 0.8942, "step": 7483 }, { "epoch": 0.9335162779094425, "grad_norm": 0.0, "learning_rate": 2.3095789416918258e-07, "loss": 0.8659, "step": 7484 }, { "epoch": 0.9336410128476986, "grad_norm": 0.0, "learning_rate": 2.300954091638441e-07, "loss": 0.8843, "step": 7485 }, { "epoch": 0.9337657477859549, "grad_norm": 0.0, "learning_rate": 2.292345188532308e-07, "loss": 0.9311, "step": 7486 }, { "epoch": 0.933890482724211, "grad_norm": 0.0, "learning_rate": 2.2837522337786467e-07, "loss": 0.9732, "step": 7487 }, { "epoch": 0.9340152176624673, "grad_norm": 0.0, "learning_rate": 2.2751752287800354e-07, "loss": 0.9255, "step": 7488 }, { "epoch": 0.9341399526007235, "grad_norm": 0.0, "learning_rate": 2.2666141749364434e-07, "loss": 0.8712, "step": 7489 }, { "epoch": 0.9342646875389796, "grad_norm": 0.0, "learning_rate": 2.2580690736452858e-07, "loss": 0.9262, "step": 7490 }, { "epoch": 0.9343894224772359, "grad_norm": 0.0, "learning_rate": 2.2495399263012919e-07, "loss": 0.8903, "step": 7491 }, { "epoch": 0.934514157415492, "grad_norm": 0.0, "learning_rate": 2.2410267342966695e-07, "loss": 0.8995, "step": 7492 }, { "epoch": 0.9346388923537483, "grad_norm": 0.0, "learning_rate": 2.2325294990209744e-07, "loss": 0.8804, "step": 7493 }, { "epoch": 0.9347636272920045, "grad_norm": 0.0, "learning_rate": 2.2240482218611524e-07, "loss": 0.9049, "step": 7494 }, { "epoch": 0.9348883622302607, "grad_norm": 0.0, "learning_rate": 2.2155829042015963e-07, "loss": 0.8846, "step": 7495 }, { "epoch": 0.9350130971685169, "grad_norm": 0.0, "learning_rate": 2.207133547424045e-07, "loss": 0.9314, "step": 7496 }, { "epoch": 0.9351378321067731, "grad_norm": 0.0, "learning_rate": 2.198700152907629e-07, "loss": 0.9429, "step": 7497 }, { "epoch": 0.9352625670450293, "grad_norm": 0.0, "learning_rate": 2.1902827220289137e-07, "loss": 0.9296, "step": 7498 }, { "epoch": 0.9353873019832856, "grad_norm": 0.0, "learning_rate": 2.181881256161822e-07, "loss": 0.8922, "step": 7499 }, { "epoch": 0.9355120369215417, "grad_norm": 0.0, "learning_rate": 2.1734957566776903e-07, "loss": 0.9488, "step": 7500 }, { "epoch": 0.9356367718597979, "grad_norm": 0.0, "learning_rate": 2.1651262249452465e-07, "loss": 0.8941, "step": 7501 }, { "epoch": 0.9357615067980541, "grad_norm": 0.0, "learning_rate": 2.1567726623306084e-07, "loss": 0.829, "step": 7502 }, { "epoch": 0.9358862417363103, "grad_norm": 0.0, "learning_rate": 2.1484350701972745e-07, "loss": 0.9013, "step": 7503 }, { "epoch": 0.9360109766745666, "grad_norm": 0.0, "learning_rate": 2.140113449906167e-07, "loss": 0.8815, "step": 7504 }, { "epoch": 0.9361357116128227, "grad_norm": 0.0, "learning_rate": 2.1318078028155886e-07, "loss": 0.9027, "step": 7505 }, { "epoch": 0.936260446551079, "grad_norm": 0.0, "learning_rate": 2.123518130281199e-07, "loss": 0.9177, "step": 7506 }, { "epoch": 0.9363851814893351, "grad_norm": 0.0, "learning_rate": 2.1152444336561272e-07, "loss": 0.9073, "step": 7507 }, { "epoch": 0.9365099164275914, "grad_norm": 0.0, "learning_rate": 2.106986714290804e-07, "loss": 0.9127, "step": 7508 }, { "epoch": 0.9366346513658476, "grad_norm": 0.0, "learning_rate": 2.0987449735331178e-07, "loss": 0.8471, "step": 7509 }, { "epoch": 0.9367593863041038, "grad_norm": 0.0, "learning_rate": 2.090519212728348e-07, "loss": 0.8948, "step": 7510 }, { "epoch": 0.93688412124236, "grad_norm": 0.0, "learning_rate": 2.0823094332191097e-07, "loss": 0.9388, "step": 7511 }, { "epoch": 0.9370088561806162, "grad_norm": 0.0, "learning_rate": 2.074115636345464e-07, "loss": 0.8949, "step": 7512 }, { "epoch": 0.9371335911188724, "grad_norm": 0.0, "learning_rate": 2.0659378234448524e-07, "loss": 0.9098, "step": 7513 }, { "epoch": 0.9372583260571286, "grad_norm": 0.0, "learning_rate": 2.0577759958520848e-07, "loss": 0.8785, "step": 7514 }, { "epoch": 0.9373830609953848, "grad_norm": 0.0, "learning_rate": 2.049630154899396e-07, "loss": 0.8663, "step": 7515 }, { "epoch": 0.937507795933641, "grad_norm": 0.0, "learning_rate": 2.0415003019163659e-07, "loss": 0.9339, "step": 7516 }, { "epoch": 0.9376325308718972, "grad_norm": 0.0, "learning_rate": 2.0333864382300116e-07, "loss": 0.9135, "step": 7517 }, { "epoch": 0.9377572658101534, "grad_norm": 0.0, "learning_rate": 2.025288565164707e-07, "loss": 0.8269, "step": 7518 }, { "epoch": 0.9378820007484097, "grad_norm": 0.0, "learning_rate": 2.01720668404225e-07, "loss": 0.8628, "step": 7519 }, { "epoch": 0.9380067356866658, "grad_norm": 0.0, "learning_rate": 2.0091407961817967e-07, "loss": 0.8911, "step": 7520 }, { "epoch": 0.9381314706249221, "grad_norm": 0.0, "learning_rate": 2.0010909028998826e-07, "loss": 0.8801, "step": 7521 }, { "epoch": 0.9382562055631782, "grad_norm": 0.0, "learning_rate": 1.9930570055104903e-07, "loss": 0.9371, "step": 7522 }, { "epoch": 0.9383809405014345, "grad_norm": 0.0, "learning_rate": 1.9850391053249153e-07, "loss": 0.9138, "step": 7523 }, { "epoch": 0.9385056754396907, "grad_norm": 0.0, "learning_rate": 1.9770372036519214e-07, "loss": 0.8872, "step": 7524 }, { "epoch": 0.9386304103779468, "grad_norm": 0.0, "learning_rate": 1.9690513017975976e-07, "loss": 0.9266, "step": 7525 }, { "epoch": 0.9387551453162031, "grad_norm": 0.0, "learning_rate": 1.9610814010654343e-07, "loss": 0.93, "step": 7526 }, { "epoch": 0.9388798802544592, "grad_norm": 0.0, "learning_rate": 1.953127502756358e-07, "loss": 0.8915, "step": 7527 }, { "epoch": 0.9390046151927155, "grad_norm": 0.0, "learning_rate": 1.945189608168596e-07, "loss": 0.8825, "step": 7528 }, { "epoch": 0.9391293501309717, "grad_norm": 0.0, "learning_rate": 1.9372677185978462e-07, "loss": 0.902, "step": 7529 }, { "epoch": 0.9392540850692279, "grad_norm": 0.0, "learning_rate": 1.9293618353371512e-07, "loss": 0.8839, "step": 7530 }, { "epoch": 0.9393788200074841, "grad_norm": 0.0, "learning_rate": 1.921471959676957e-07, "loss": 0.943, "step": 7531 }, { "epoch": 0.9395035549457403, "grad_norm": 0.0, "learning_rate": 1.913598092905089e-07, "loss": 0.9413, "step": 7532 }, { "epoch": 0.9396282898839965, "grad_norm": 0.0, "learning_rate": 1.9057402363067413e-07, "loss": 0.9131, "step": 7533 }, { "epoch": 0.9397530248222528, "grad_norm": 0.0, "learning_rate": 1.8978983911645322e-07, "loss": 0.8776, "step": 7534 }, { "epoch": 0.9398777597605089, "grad_norm": 0.0, "learning_rate": 1.890072558758449e-07, "loss": 0.8929, "step": 7535 }, { "epoch": 0.9400024946987652, "grad_norm": 0.0, "learning_rate": 1.88226274036587e-07, "loss": 0.9101, "step": 7536 }, { "epoch": 0.9401272296370213, "grad_norm": 0.0, "learning_rate": 1.874468937261531e-07, "loss": 0.9418, "step": 7537 }, { "epoch": 0.9402519645752775, "grad_norm": 0.0, "learning_rate": 1.8666911507176032e-07, "loss": 0.8994, "step": 7538 }, { "epoch": 0.9403766995135338, "grad_norm": 0.0, "learning_rate": 1.8589293820036048e-07, "loss": 0.9639, "step": 7539 }, { "epoch": 0.9405014344517899, "grad_norm": 0.0, "learning_rate": 1.851183632386444e-07, "loss": 0.9067, "step": 7540 }, { "epoch": 0.9406261693900462, "grad_norm": 0.0, "learning_rate": 1.8434539031304433e-07, "loss": 0.8969, "step": 7541 }, { "epoch": 0.9407509043283023, "grad_norm": 0.0, "learning_rate": 1.835740195497271e-07, "loss": 0.9227, "step": 7542 }, { "epoch": 0.9408756392665586, "grad_norm": 0.0, "learning_rate": 1.8280425107460088e-07, "loss": 0.9154, "step": 7543 }, { "epoch": 0.9410003742048147, "grad_norm": 0.0, "learning_rate": 1.8203608501331183e-07, "loss": 0.9572, "step": 7544 }, { "epoch": 0.941125109143071, "grad_norm": 0.0, "learning_rate": 1.8126952149124188e-07, "loss": 0.9075, "step": 7545 }, { "epoch": 0.9412498440813272, "grad_norm": 0.0, "learning_rate": 1.805045606335143e-07, "loss": 0.8909, "step": 7546 }, { "epoch": 0.9413745790195834, "grad_norm": 0.0, "learning_rate": 1.797412025649914e-07, "loss": 0.9335, "step": 7547 }, { "epoch": 0.9414993139578396, "grad_norm": 0.0, "learning_rate": 1.789794474102724e-07, "loss": 0.9451, "step": 7548 }, { "epoch": 0.9416240488960957, "grad_norm": 0.0, "learning_rate": 1.7821929529369343e-07, "loss": 0.8611, "step": 7549 }, { "epoch": 0.941748783834352, "grad_norm": 0.0, "learning_rate": 1.7746074633933075e-07, "loss": 0.9088, "step": 7550 }, { "epoch": 0.9418735187726082, "grad_norm": 0.0, "learning_rate": 1.7670380067099868e-07, "loss": 0.9104, "step": 7551 }, { "epoch": 0.9419982537108644, "grad_norm": 0.0, "learning_rate": 1.7594845841225062e-07, "loss": 0.8983, "step": 7552 }, { "epoch": 0.9421229886491206, "grad_norm": 0.0, "learning_rate": 1.7519471968637792e-07, "loss": 0.8798, "step": 7553 }, { "epoch": 0.9422477235873769, "grad_norm": 0.0, "learning_rate": 1.7444258461640885e-07, "loss": 0.9278, "step": 7554 }, { "epoch": 0.942372458525633, "grad_norm": 0.0, "learning_rate": 1.7369205332510964e-07, "loss": 0.8571, "step": 7555 }, { "epoch": 0.9424971934638893, "grad_norm": 0.0, "learning_rate": 1.7294312593498897e-07, "loss": 0.8594, "step": 7556 }, { "epoch": 0.9426219284021454, "grad_norm": 0.0, "learning_rate": 1.7219580256828683e-07, "loss": 0.952, "step": 7557 }, { "epoch": 0.9427466633404017, "grad_norm": 0.0, "learning_rate": 1.7145008334698898e-07, "loss": 0.8899, "step": 7558 }, { "epoch": 0.9428713982786578, "grad_norm": 0.0, "learning_rate": 1.707059683928125e-07, "loss": 0.848, "step": 7559 }, { "epoch": 0.9429961332169141, "grad_norm": 0.0, "learning_rate": 1.6996345782721692e-07, "loss": 0.8932, "step": 7560 }, { "epoch": 0.9431208681551703, "grad_norm": 0.0, "learning_rate": 1.6922255177140078e-07, "loss": 0.9069, "step": 7561 }, { "epoch": 0.9432456030934264, "grad_norm": 0.0, "learning_rate": 1.684832503462952e-07, "loss": 0.9193, "step": 7562 }, { "epoch": 0.9433703380316827, "grad_norm": 0.0, "learning_rate": 1.677455536725736e-07, "loss": 0.9228, "step": 7563 }, { "epoch": 0.9434950729699388, "grad_norm": 0.0, "learning_rate": 1.6700946187064858e-07, "loss": 0.8607, "step": 7564 }, { "epoch": 0.9436198079081951, "grad_norm": 0.0, "learning_rate": 1.6627497506066737e-07, "loss": 0.9416, "step": 7565 }, { "epoch": 0.9437445428464513, "grad_norm": 0.0, "learning_rate": 1.6554209336251738e-07, "loss": 0.9042, "step": 7566 }, { "epoch": 0.9438692777847075, "grad_norm": 0.0, "learning_rate": 1.648108168958229e-07, "loss": 0.9139, "step": 7567 }, { "epoch": 0.9439940127229637, "grad_norm": 0.0, "learning_rate": 1.6408114577994627e-07, "loss": 0.9037, "step": 7568 }, { "epoch": 0.94411874766122, "grad_norm": 0.0, "learning_rate": 1.6335308013398888e-07, "loss": 0.8511, "step": 7569 }, { "epoch": 0.9442434825994761, "grad_norm": 0.0, "learning_rate": 1.626266200767901e-07, "loss": 0.8994, "step": 7570 }, { "epoch": 0.9443682175377324, "grad_norm": 0.0, "learning_rate": 1.6190176572692506e-07, "loss": 0.9138, "step": 7571 }, { "epoch": 0.9444929524759885, "grad_norm": 0.0, "learning_rate": 1.6117851720270805e-07, "loss": 0.8811, "step": 7572 }, { "epoch": 0.9446176874142447, "grad_norm": 0.0, "learning_rate": 1.6045687462219352e-07, "loss": 0.8582, "step": 7573 }, { "epoch": 0.9447424223525009, "grad_norm": 0.0, "learning_rate": 1.5973683810317054e-07, "loss": 0.8444, "step": 7574 }, { "epoch": 0.9448671572907571, "grad_norm": 0.0, "learning_rate": 1.5901840776316623e-07, "loss": 0.8555, "step": 7575 }, { "epoch": 0.9449918922290134, "grad_norm": 0.0, "learning_rate": 1.5830158371944793e-07, "loss": 0.9203, "step": 7576 }, { "epoch": 0.9451166271672695, "grad_norm": 0.0, "learning_rate": 1.5758636608901977e-07, "loss": 0.9209, "step": 7577 }, { "epoch": 0.9452413621055258, "grad_norm": 0.0, "learning_rate": 1.5687275498862174e-07, "loss": 0.9287, "step": 7578 }, { "epoch": 0.9453660970437819, "grad_norm": 0.0, "learning_rate": 1.56160750534734e-07, "loss": 0.9617, "step": 7579 }, { "epoch": 0.9454908319820382, "grad_norm": 0.0, "learning_rate": 1.5545035284357357e-07, "loss": 0.9329, "step": 7580 }, { "epoch": 0.9456155669202944, "grad_norm": 0.0, "learning_rate": 1.5474156203109548e-07, "loss": 0.8667, "step": 7581 }, { "epoch": 0.9457403018585506, "grad_norm": 0.0, "learning_rate": 1.5403437821299272e-07, "loss": 0.9338, "step": 7582 }, { "epoch": 0.9458650367968068, "grad_norm": 0.0, "learning_rate": 1.53328801504693e-07, "loss": 0.9413, "step": 7583 }, { "epoch": 0.945989771735063, "grad_norm": 0.0, "learning_rate": 1.5262483202136747e-07, "loss": 0.929, "step": 7584 }, { "epoch": 0.9461145066733192, "grad_norm": 0.0, "learning_rate": 1.519224698779198e-07, "loss": 0.9473, "step": 7585 }, { "epoch": 0.9462392416115754, "grad_norm": 0.0, "learning_rate": 1.5122171518899274e-07, "loss": 0.9266, "step": 7586 }, { "epoch": 0.9463639765498316, "grad_norm": 0.0, "learning_rate": 1.5052256806897037e-07, "loss": 0.9455, "step": 7587 }, { "epoch": 0.9464887114880878, "grad_norm": 0.0, "learning_rate": 1.4982502863196692e-07, "loss": 0.8761, "step": 7588 }, { "epoch": 0.946613446426344, "grad_norm": 0.0, "learning_rate": 1.491290969918402e-07, "loss": 0.9291, "step": 7589 }, { "epoch": 0.9467381813646002, "grad_norm": 0.0, "learning_rate": 1.4843477326218493e-07, "loss": 0.8982, "step": 7590 }, { "epoch": 0.9468629163028565, "grad_norm": 0.0, "learning_rate": 1.4774205755633041e-07, "loss": 0.8605, "step": 7591 }, { "epoch": 0.9469876512411126, "grad_norm": 0.0, "learning_rate": 1.4705094998734738e-07, "loss": 0.9081, "step": 7592 }, { "epoch": 0.9471123861793689, "grad_norm": 0.0, "learning_rate": 1.4636145066803886e-07, "loss": 0.8844, "step": 7593 }, { "epoch": 0.947237121117625, "grad_norm": 0.0, "learning_rate": 1.4567355971095266e-07, "loss": 0.9255, "step": 7594 }, { "epoch": 0.9473618560558813, "grad_norm": 0.0, "learning_rate": 1.449872772283656e-07, "loss": 0.8577, "step": 7595 }, { "epoch": 0.9474865909941375, "grad_norm": 0.0, "learning_rate": 1.4430260333229919e-07, "loss": 0.9313, "step": 7596 }, { "epoch": 0.9476113259323936, "grad_norm": 0.0, "learning_rate": 1.4361953813450846e-07, "loss": 0.9021, "step": 7597 }, { "epoch": 0.9477360608706499, "grad_norm": 0.0, "learning_rate": 1.429380817464865e-07, "loss": 0.9226, "step": 7598 }, { "epoch": 0.947860795808906, "grad_norm": 0.0, "learning_rate": 1.422582342794665e-07, "loss": 0.8746, "step": 7599 }, { "epoch": 0.9479855307471623, "grad_norm": 0.0, "learning_rate": 1.4157999584441417e-07, "loss": 0.9176, "step": 7600 }, { "epoch": 0.9481102656854185, "grad_norm": 0.0, "learning_rate": 1.409033665520354e-07, "loss": 0.8661, "step": 7601 }, { "epoch": 0.9482350006236747, "grad_norm": 0.0, "learning_rate": 1.4022834651277407e-07, "loss": 0.8692, "step": 7602 }, { "epoch": 0.9483597355619309, "grad_norm": 0.0, "learning_rate": 1.395549358368087e-07, "loss": 0.8369, "step": 7603 }, { "epoch": 0.9484844705001871, "grad_norm": 0.0, "learning_rate": 1.3888313463406022e-07, "loss": 0.8579, "step": 7604 }, { "epoch": 0.9486092054384433, "grad_norm": 0.0, "learning_rate": 1.3821294301418097e-07, "loss": 0.9185, "step": 7605 }, { "epoch": 0.9487339403766996, "grad_norm": 0.0, "learning_rate": 1.3754436108656234e-07, "loss": 0.887, "step": 7606 }, { "epoch": 0.9488586753149557, "grad_norm": 0.0, "learning_rate": 1.3687738896033588e-07, "loss": 0.9236, "step": 7607 }, { "epoch": 0.948983410253212, "grad_norm": 0.0, "learning_rate": 1.3621202674436783e-07, "loss": 0.8483, "step": 7608 }, { "epoch": 0.9491081451914681, "grad_norm": 0.0, "learning_rate": 1.3554827454726137e-07, "loss": 0.8624, "step": 7609 }, { "epoch": 0.9492328801297243, "grad_norm": 0.0, "learning_rate": 1.348861324773576e-07, "loss": 0.9151, "step": 7610 }, { "epoch": 0.9493576150679806, "grad_norm": 0.0, "learning_rate": 1.342256006427356e-07, "loss": 0.9069, "step": 7611 }, { "epoch": 0.9494823500062367, "grad_norm": 0.0, "learning_rate": 1.3356667915121025e-07, "loss": 0.9221, "step": 7612 }, { "epoch": 0.949607084944493, "grad_norm": 0.0, "learning_rate": 1.3290936811033438e-07, "loss": 0.8767, "step": 7613 }, { "epoch": 0.9497318198827491, "grad_norm": 0.0, "learning_rate": 1.3225366762739778e-07, "loss": 0.9172, "step": 7614 }, { "epoch": 0.9498565548210054, "grad_norm": 0.0, "learning_rate": 1.315995778094259e-07, "loss": 0.8875, "step": 7615 }, { "epoch": 0.9499812897592615, "grad_norm": 0.0, "learning_rate": 1.3094709876318447e-07, "loss": 0.9052, "step": 7616 }, { "epoch": 0.9501060246975178, "grad_norm": 0.0, "learning_rate": 1.3029623059517493e-07, "loss": 0.9255, "step": 7617 }, { "epoch": 0.950230759635774, "grad_norm": 0.0, "learning_rate": 1.2964697341163234e-07, "loss": 0.9131, "step": 7618 }, { "epoch": 0.9503554945740302, "grad_norm": 0.0, "learning_rate": 1.2899932731853637e-07, "loss": 0.8915, "step": 7619 }, { "epoch": 0.9504802295122864, "grad_norm": 0.0, "learning_rate": 1.2835329242159467e-07, "loss": 0.908, "step": 7620 }, { "epoch": 0.9506049644505425, "grad_norm": 0.0, "learning_rate": 1.2770886882625954e-07, "loss": 0.8914, "step": 7621 }, { "epoch": 0.9507296993887988, "grad_norm": 0.0, "learning_rate": 1.2706605663771575e-07, "loss": 0.938, "step": 7622 }, { "epoch": 0.950854434327055, "grad_norm": 0.0, "learning_rate": 1.2642485596088606e-07, "loss": 0.905, "step": 7623 }, { "epoch": 0.9509791692653112, "grad_norm": 0.0, "learning_rate": 1.2578526690043337e-07, "loss": 0.9486, "step": 7624 }, { "epoch": 0.9511039042035674, "grad_norm": 0.0, "learning_rate": 1.2514728956075084e-07, "loss": 0.9075, "step": 7625 }, { "epoch": 0.9512286391418237, "grad_norm": 0.0, "learning_rate": 1.245109240459741e-07, "loss": 0.8558, "step": 7626 }, { "epoch": 0.9513533740800798, "grad_norm": 0.0, "learning_rate": 1.2387617045997446e-07, "loss": 0.83, "step": 7627 }, { "epoch": 0.9514781090183361, "grad_norm": 0.0, "learning_rate": 1.2324302890636131e-07, "loss": 0.9527, "step": 7628 }, { "epoch": 0.9516028439565922, "grad_norm": 0.0, "learning_rate": 1.226114994884753e-07, "loss": 0.8275, "step": 7629 }, { "epoch": 0.9517275788948485, "grad_norm": 0.0, "learning_rate": 1.219815823094006e-07, "loss": 0.9063, "step": 7630 }, { "epoch": 0.9518523138331046, "grad_norm": 0.0, "learning_rate": 1.2135327747195613e-07, "loss": 0.9119, "step": 7631 }, { "epoch": 0.9519770487713609, "grad_norm": 0.0, "learning_rate": 1.2072658507869428e-07, "loss": 0.8869, "step": 7632 }, { "epoch": 0.9521017837096171, "grad_norm": 0.0, "learning_rate": 1.201015052319099e-07, "loss": 0.9219, "step": 7633 }, { "epoch": 0.9522265186478732, "grad_norm": 0.0, "learning_rate": 1.1947803803363135e-07, "loss": 0.8855, "step": 7634 }, { "epoch": 0.9523512535861295, "grad_norm": 0.0, "learning_rate": 1.1885618358562278e-07, "loss": 0.892, "step": 7635 }, { "epoch": 0.9524759885243856, "grad_norm": 0.0, "learning_rate": 1.1823594198938749e-07, "loss": 0.9188, "step": 7636 }, { "epoch": 0.9526007234626419, "grad_norm": 0.0, "learning_rate": 1.1761731334616333e-07, "loss": 0.9172, "step": 7637 }, { "epoch": 0.9527254584008981, "grad_norm": 0.0, "learning_rate": 1.1700029775692845e-07, "loss": 0.9467, "step": 7638 }, { "epoch": 0.9528501933391543, "grad_norm": 0.0, "learning_rate": 1.1638489532239339e-07, "loss": 0.8491, "step": 7639 }, { "epoch": 0.9529749282774105, "grad_norm": 0.0, "learning_rate": 1.157711061430078e-07, "loss": 0.9475, "step": 7640 }, { "epoch": 0.9530996632156667, "grad_norm": 0.0, "learning_rate": 1.151589303189582e-07, "loss": 0.8778, "step": 7641 }, { "epoch": 0.9532243981539229, "grad_norm": 0.0, "learning_rate": 1.1454836795016577e-07, "loss": 0.9326, "step": 7642 }, { "epoch": 0.9533491330921792, "grad_norm": 0.0, "learning_rate": 1.1393941913629081e-07, "loss": 0.9006, "step": 7643 }, { "epoch": 0.9534738680304353, "grad_norm": 0.0, "learning_rate": 1.1333208397672823e-07, "loss": 0.9291, "step": 7644 }, { "epoch": 0.9535986029686915, "grad_norm": 0.0, "learning_rate": 1.127263625706132e-07, "loss": 0.8597, "step": 7645 }, { "epoch": 0.9537233379069477, "grad_norm": 0.0, "learning_rate": 1.1212225501680995e-07, "loss": 0.8884, "step": 7646 }, { "epoch": 0.9538480728452039, "grad_norm": 0.0, "learning_rate": 1.1151976141392851e-07, "loss": 0.8921, "step": 7647 }, { "epoch": 0.9539728077834602, "grad_norm": 0.0, "learning_rate": 1.1091888186030908e-07, "loss": 0.942, "step": 7648 }, { "epoch": 0.9540975427217163, "grad_norm": 0.0, "learning_rate": 1.1031961645402878e-07, "loss": 0.8983, "step": 7649 }, { "epoch": 0.9542222776599726, "grad_norm": 0.0, "learning_rate": 1.0972196529290602e-07, "loss": 0.9207, "step": 7650 }, { "epoch": 0.9543470125982287, "grad_norm": 0.0, "learning_rate": 1.0912592847449056e-07, "loss": 0.8908, "step": 7651 }, { "epoch": 0.954471747536485, "grad_norm": 0.0, "learning_rate": 1.0853150609607122e-07, "loss": 0.8591, "step": 7652 }, { "epoch": 0.9545964824747412, "grad_norm": 0.0, "learning_rate": 1.0793869825467262e-07, "loss": 0.9064, "step": 7653 }, { "epoch": 0.9547212174129974, "grad_norm": 0.0, "learning_rate": 1.0734750504705516e-07, "loss": 0.926, "step": 7654 }, { "epoch": 0.9548459523512536, "grad_norm": 0.0, "learning_rate": 1.0675792656971828e-07, "loss": 0.8967, "step": 7655 }, { "epoch": 0.9549706872895098, "grad_norm": 0.0, "learning_rate": 1.0616996291889504e-07, "loss": 0.8772, "step": 7656 }, { "epoch": 0.955095422227766, "grad_norm": 0.0, "learning_rate": 1.055836141905553e-07, "loss": 0.8822, "step": 7657 }, { "epoch": 0.9552201571660222, "grad_norm": 0.0, "learning_rate": 1.0499888048040585e-07, "loss": 0.9095, "step": 7658 }, { "epoch": 0.9553448921042784, "grad_norm": 0.0, "learning_rate": 1.0441576188389146e-07, "loss": 0.9051, "step": 7659 }, { "epoch": 0.9554696270425346, "grad_norm": 0.0, "learning_rate": 1.0383425849619155e-07, "loss": 0.8806, "step": 7660 }, { "epoch": 0.9555943619807908, "grad_norm": 0.0, "learning_rate": 1.032543704122213e-07, "loss": 0.9049, "step": 7661 }, { "epoch": 0.955719096919047, "grad_norm": 0.0, "learning_rate": 1.0267609772663278e-07, "loss": 0.902, "step": 7662 }, { "epoch": 0.9558438318573033, "grad_norm": 0.0, "learning_rate": 1.0209944053381605e-07, "loss": 0.9329, "step": 7663 }, { "epoch": 0.9559685667955594, "grad_norm": 0.0, "learning_rate": 1.0152439892789356e-07, "loss": 0.9231, "step": 7664 }, { "epoch": 0.9560933017338157, "grad_norm": 0.0, "learning_rate": 1.0095097300273026e-07, "loss": 0.904, "step": 7665 }, { "epoch": 0.9562180366720718, "grad_norm": 0.0, "learning_rate": 1.0037916285192129e-07, "loss": 0.9254, "step": 7666 }, { "epoch": 0.9563427716103281, "grad_norm": 0.0, "learning_rate": 9.980896856880084e-08, "loss": 0.8829, "step": 7667 }, { "epoch": 0.9564675065485843, "grad_norm": 0.0, "learning_rate": 9.924039024643895e-08, "loss": 0.8809, "step": 7668 }, { "epoch": 0.9565922414868404, "grad_norm": 0.0, "learning_rate": 9.867342797764135e-08, "loss": 0.9055, "step": 7669 }, { "epoch": 0.9567169764250967, "grad_norm": 0.0, "learning_rate": 9.810808185495291e-08, "loss": 0.8878, "step": 7670 }, { "epoch": 0.9568417113633528, "grad_norm": 0.0, "learning_rate": 9.754435197064981e-08, "loss": 0.8701, "step": 7671 }, { "epoch": 0.9569664463016091, "grad_norm": 0.0, "learning_rate": 9.698223841674847e-08, "loss": 0.905, "step": 7672 }, { "epoch": 0.9570911812398653, "grad_norm": 0.0, "learning_rate": 9.642174128499882e-08, "loss": 0.9274, "step": 7673 }, { "epoch": 0.9572159161781215, "grad_norm": 0.0, "learning_rate": 9.586286066688877e-08, "loss": 0.9317, "step": 7674 }, { "epoch": 0.9573406511163777, "grad_norm": 0.0, "learning_rate": 9.530559665364203e-08, "loss": 0.9305, "step": 7675 }, { "epoch": 0.9574653860546339, "grad_norm": 0.0, "learning_rate": 9.474994933621807e-08, "loss": 0.8765, "step": 7676 }, { "epoch": 0.9575901209928901, "grad_norm": 0.0, "learning_rate": 9.41959188053132e-08, "loss": 0.8806, "step": 7677 }, { "epoch": 0.9577148559311464, "grad_norm": 0.0, "learning_rate": 9.364350515135623e-08, "loss": 0.889, "step": 7678 }, { "epoch": 0.9578395908694025, "grad_norm": 0.0, "learning_rate": 9.309270846451835e-08, "loss": 0.9496, "step": 7679 }, { "epoch": 0.9579643258076588, "grad_norm": 0.0, "learning_rate": 9.254352883470207e-08, "loss": 0.9457, "step": 7680 }, { "epoch": 0.9580890607459149, "grad_norm": 0.0, "learning_rate": 9.199596635154684e-08, "loss": 0.8631, "step": 7681 }, { "epoch": 0.9582137956841711, "grad_norm": 0.0, "learning_rate": 9.145002110443002e-08, "loss": 0.8919, "step": 7682 }, { "epoch": 0.9583385306224274, "grad_norm": 0.0, "learning_rate": 9.090569318246256e-08, "loss": 0.9048, "step": 7683 }, { "epoch": 0.9584632655606835, "grad_norm": 0.0, "learning_rate": 9.036298267449228e-08, "loss": 0.8901, "step": 7684 }, { "epoch": 0.9585880004989398, "grad_norm": 0.0, "learning_rate": 8.982188966910498e-08, "loss": 0.9972, "step": 7685 }, { "epoch": 0.9587127354371959, "grad_norm": 0.0, "learning_rate": 8.928241425461781e-08, "loss": 0.9174, "step": 7686 }, { "epoch": 0.9588374703754522, "grad_norm": 0.0, "learning_rate": 8.874455651909031e-08, "loss": 0.8634, "step": 7687 }, { "epoch": 0.9589622053137083, "grad_norm": 0.0, "learning_rate": 8.820831655031115e-08, "loss": 0.8793, "step": 7688 }, { "epoch": 0.9590869402519646, "grad_norm": 0.0, "learning_rate": 8.76736944358103e-08, "loss": 0.8837, "step": 7689 }, { "epoch": 0.9592116751902208, "grad_norm": 0.0, "learning_rate": 8.714069026285022e-08, "loss": 0.9178, "step": 7690 }, { "epoch": 0.959336410128477, "grad_norm": 0.0, "learning_rate": 8.660930411843127e-08, "loss": 0.8979, "step": 7691 }, { "epoch": 0.9594611450667332, "grad_norm": 0.0, "learning_rate": 8.607953608928965e-08, "loss": 0.8992, "step": 7692 }, { "epoch": 0.9595858800049893, "grad_norm": 0.0, "learning_rate": 8.555138626189619e-08, "loss": 0.8898, "step": 7693 }, { "epoch": 0.9597106149432456, "grad_norm": 0.0, "learning_rate": 8.502485472245859e-08, "loss": 0.9016, "step": 7694 }, { "epoch": 0.9598353498815018, "grad_norm": 0.0, "learning_rate": 8.44999415569192e-08, "loss": 0.9199, "step": 7695 }, { "epoch": 0.959960084819758, "grad_norm": 0.0, "learning_rate": 8.39766468509573e-08, "loss": 0.8971, "step": 7696 }, { "epoch": 0.9600848197580142, "grad_norm": 0.0, "learning_rate": 8.345497068998897e-08, "loss": 0.9349, "step": 7697 }, { "epoch": 0.9602095546962705, "grad_norm": 0.0, "learning_rate": 8.29349131591628e-08, "loss": 0.9186, "step": 7698 }, { "epoch": 0.9603342896345266, "grad_norm": 0.0, "learning_rate": 8.241647434336864e-08, "loss": 0.8457, "step": 7699 }, { "epoch": 0.9604590245727829, "grad_norm": 0.0, "learning_rate": 8.189965432722546e-08, "loss": 0.8777, "step": 7700 }, { "epoch": 0.960583759511039, "grad_norm": 0.0, "learning_rate": 8.138445319509248e-08, "loss": 0.8905, "step": 7701 }, { "epoch": 0.9607084944492953, "grad_norm": 0.0, "learning_rate": 8.087087103106461e-08, "loss": 0.9152, "step": 7702 }, { "epoch": 0.9608332293875514, "grad_norm": 0.0, "learning_rate": 8.035890791896928e-08, "loss": 0.8667, "step": 7703 }, { "epoch": 0.9609579643258077, "grad_norm": 0.0, "learning_rate": 7.984856394237294e-08, "loss": 0.8869, "step": 7704 }, { "epoch": 0.9610826992640639, "grad_norm": 0.0, "learning_rate": 7.933983918457677e-08, "loss": 0.975, "step": 7705 }, { "epoch": 0.96120743420232, "grad_norm": 0.0, "learning_rate": 7.883273372861766e-08, "loss": 0.9293, "step": 7706 }, { "epoch": 0.9613321691405763, "grad_norm": 0.0, "learning_rate": 7.832724765726828e-08, "loss": 0.8765, "step": 7707 }, { "epoch": 0.9614569040788324, "grad_norm": 0.0, "learning_rate": 7.782338105303489e-08, "loss": 0.9171, "step": 7708 }, { "epoch": 0.9615816390170887, "grad_norm": 0.0, "learning_rate": 7.732113399816276e-08, "loss": 0.9198, "step": 7709 }, { "epoch": 0.9617063739553449, "grad_norm": 0.0, "learning_rate": 7.68205065746297e-08, "loss": 0.8547, "step": 7710 }, { "epoch": 0.9618311088936011, "grad_norm": 0.0, "learning_rate": 7.632149886415363e-08, "loss": 0.8904, "step": 7711 }, { "epoch": 0.9619558438318573, "grad_norm": 0.0, "learning_rate": 7.582411094818276e-08, "loss": 0.8791, "step": 7712 }, { "epoch": 0.9620805787701135, "grad_norm": 0.0, "learning_rate": 7.532834290790436e-08, "loss": 0.8689, "step": 7713 }, { "epoch": 0.9622053137083697, "grad_norm": 0.0, "learning_rate": 7.483419482423926e-08, "loss": 0.9058, "step": 7714 }, { "epoch": 0.962330048646626, "grad_norm": 0.0, "learning_rate": 7.434166677784626e-08, "loss": 0.8769, "step": 7715 }, { "epoch": 0.9624547835848821, "grad_norm": 0.0, "learning_rate": 7.385075884911775e-08, "loss": 0.881, "step": 7716 }, { "epoch": 0.9625795185231383, "grad_norm": 0.0, "learning_rate": 7.336147111818181e-08, "loss": 0.9376, "step": 7717 }, { "epoch": 0.9627042534613945, "grad_norm": 0.0, "learning_rate": 7.287380366490238e-08, "loss": 0.9145, "step": 7718 }, { "epoch": 0.9628289883996507, "grad_norm": 0.0, "learning_rate": 7.238775656888019e-08, "loss": 0.8818, "step": 7719 }, { "epoch": 0.962953723337907, "grad_norm": 0.0, "learning_rate": 7.19033299094496e-08, "loss": 0.9109, "step": 7720 }, { "epoch": 0.9630784582761631, "grad_norm": 0.0, "learning_rate": 7.142052376568176e-08, "loss": 0.9415, "step": 7721 }, { "epoch": 0.9632031932144194, "grad_norm": 0.0, "learning_rate": 7.093933821638255e-08, "loss": 0.9449, "step": 7722 }, { "epoch": 0.9633279281526755, "grad_norm": 0.0, "learning_rate": 7.045977334009357e-08, "loss": 0.9573, "step": 7723 }, { "epoch": 0.9634526630909318, "grad_norm": 0.0, "learning_rate": 6.998182921509222e-08, "loss": 0.9048, "step": 7724 }, { "epoch": 0.963577398029188, "grad_norm": 0.0, "learning_rate": 6.950550591938943e-08, "loss": 0.9145, "step": 7725 }, { "epoch": 0.9637021329674442, "grad_norm": 0.0, "learning_rate": 6.903080353073522e-08, "loss": 0.8535, "step": 7726 }, { "epoch": 0.9638268679057004, "grad_norm": 0.0, "learning_rate": 6.855772212661204e-08, "loss": 0.8912, "step": 7727 }, { "epoch": 0.9639516028439566, "grad_norm": 0.0, "learning_rate": 6.808626178423927e-08, "loss": 0.9118, "step": 7728 }, { "epoch": 0.9640763377822128, "grad_norm": 0.0, "learning_rate": 6.761642258056977e-08, "loss": 0.8686, "step": 7729 }, { "epoch": 0.964201072720469, "grad_norm": 0.0, "learning_rate": 6.714820459229554e-08, "loss": 0.907, "step": 7730 }, { "epoch": 0.9643258076587252, "grad_norm": 0.0, "learning_rate": 6.668160789583878e-08, "loss": 0.9098, "step": 7731 }, { "epoch": 0.9644505425969814, "grad_norm": 0.0, "learning_rate": 6.621663256736078e-08, "loss": 0.9226, "step": 7732 }, { "epoch": 0.9645752775352376, "grad_norm": 0.0, "learning_rate": 6.575327868275972e-08, "loss": 0.8909, "step": 7733 }, { "epoch": 0.9647000124734938, "grad_norm": 0.0, "learning_rate": 6.529154631766288e-08, "loss": 0.9185, "step": 7734 }, { "epoch": 0.9648247474117501, "grad_norm": 0.0, "learning_rate": 6.483143554743776e-08, "loss": 0.8981, "step": 7735 }, { "epoch": 0.9649494823500062, "grad_norm": 0.0, "learning_rate": 6.437294644718872e-08, "loss": 0.8773, "step": 7736 }, { "epoch": 0.9650742172882625, "grad_norm": 0.0, "learning_rate": 6.391607909174813e-08, "loss": 0.8735, "step": 7737 }, { "epoch": 0.9651989522265186, "grad_norm": 0.0, "learning_rate": 6.346083355569188e-08, "loss": 0.9003, "step": 7738 }, { "epoch": 0.9653236871647749, "grad_norm": 0.0, "learning_rate": 6.30072099133261e-08, "loss": 0.8931, "step": 7739 }, { "epoch": 0.9654484221030311, "grad_norm": 0.0, "learning_rate": 6.255520823869487e-08, "loss": 0.9295, "step": 7740 }, { "epoch": 0.9655731570412872, "grad_norm": 0.0, "learning_rate": 6.210482860557366e-08, "loss": 0.8875, "step": 7741 }, { "epoch": 0.9656978919795435, "grad_norm": 0.0, "learning_rate": 6.165607108747807e-08, "loss": 0.9029, "step": 7742 }, { "epoch": 0.9658226269177996, "grad_norm": 0.0, "learning_rate": 6.120893575765618e-08, "loss": 0.8948, "step": 7743 }, { "epoch": 0.9659473618560559, "grad_norm": 0.0, "learning_rate": 6.076342268909075e-08, "loss": 0.8757, "step": 7744 }, { "epoch": 0.966072096794312, "grad_norm": 0.0, "learning_rate": 6.031953195450136e-08, "loss": 0.8849, "step": 7745 }, { "epoch": 0.9661968317325683, "grad_norm": 0.0, "learning_rate": 5.987726362634339e-08, "loss": 0.8985, "step": 7746 }, { "epoch": 0.9663215666708245, "grad_norm": 0.0, "learning_rate": 5.943661777680354e-08, "loss": 0.8624, "step": 7747 }, { "epoch": 0.9664463016090807, "grad_norm": 0.0, "learning_rate": 5.8997594477809836e-08, "loss": 0.8854, "step": 7748 }, { "epoch": 0.9665710365473369, "grad_norm": 0.0, "learning_rate": 5.8560193801018294e-08, "loss": 0.9344, "step": 7749 }, { "epoch": 0.9666957714855932, "grad_norm": 0.0, "learning_rate": 5.812441581782624e-08, "loss": 0.8512, "step": 7750 }, { "epoch": 0.9668205064238493, "grad_norm": 0.0, "learning_rate": 5.769026059936233e-08, "loss": 0.953, "step": 7751 }, { "epoch": 0.9669452413621056, "grad_norm": 0.0, "learning_rate": 5.7257728216492114e-08, "loss": 0.8709, "step": 7752 }, { "epoch": 0.9670699763003617, "grad_norm": 0.0, "learning_rate": 5.682681873981577e-08, "loss": 0.9364, "step": 7753 }, { "epoch": 0.9671947112386179, "grad_norm": 0.0, "learning_rate": 5.6397532239668154e-08, "loss": 0.9063, "step": 7754 }, { "epoch": 0.9673194461768742, "grad_norm": 0.0, "learning_rate": 5.5969868786120983e-08, "loss": 0.8966, "step": 7755 }, { "epoch": 0.9674441811151303, "grad_norm": 0.0, "learning_rate": 5.554382844897843e-08, "loss": 0.8591, "step": 7756 }, { "epoch": 0.9675689160533866, "grad_norm": 0.0, "learning_rate": 5.5119411297781534e-08, "loss": 0.9179, "step": 7757 }, { "epoch": 0.9676936509916427, "grad_norm": 0.0, "learning_rate": 5.4696617401805984e-08, "loss": 0.8999, "step": 7758 }, { "epoch": 0.967818385929899, "grad_norm": 0.0, "learning_rate": 5.427544683006103e-08, "loss": 0.9069, "step": 7759 }, { "epoch": 0.9679431208681551, "grad_norm": 0.0, "learning_rate": 5.385589965129501e-08, "loss": 0.9313, "step": 7760 }, { "epoch": 0.9680678558064114, "grad_norm": 0.0, "learning_rate": 5.3437975933985366e-08, "loss": 0.8845, "step": 7761 }, { "epoch": 0.9681925907446676, "grad_norm": 0.0, "learning_rate": 5.302167574635087e-08, "loss": 0.8739, "step": 7762 }, { "epoch": 0.9683173256829238, "grad_norm": 0.0, "learning_rate": 5.260699915633938e-08, "loss": 0.8518, "step": 7763 }, { "epoch": 0.96844206062118, "grad_norm": 0.0, "learning_rate": 5.219394623163898e-08, "loss": 0.8796, "step": 7764 }, { "epoch": 0.9685667955594361, "grad_norm": 0.0, "learning_rate": 5.178251703967019e-08, "loss": 0.9339, "step": 7765 }, { "epoch": 0.9686915304976924, "grad_norm": 0.0, "learning_rate": 5.137271164758595e-08, "loss": 0.9018, "step": 7766 }, { "epoch": 0.9688162654359486, "grad_norm": 0.0, "learning_rate": 5.096453012227942e-08, "loss": 0.8565, "step": 7767 }, { "epoch": 0.9689410003742048, "grad_norm": 0.0, "learning_rate": 5.0557972530375086e-08, "loss": 0.9718, "step": 7768 }, { "epoch": 0.969065735312461, "grad_norm": 0.0, "learning_rate": 5.015303893823431e-08, "loss": 0.8612, "step": 7769 }, { "epoch": 0.9691904702507173, "grad_norm": 0.0, "learning_rate": 4.974972941195089e-08, "loss": 0.8851, "step": 7770 }, { "epoch": 0.9693152051889734, "grad_norm": 0.0, "learning_rate": 4.93480440173566e-08, "loss": 0.9046, "step": 7771 }, { "epoch": 0.9694399401272297, "grad_norm": 0.0, "learning_rate": 4.894798282001567e-08, "loss": 0.9093, "step": 7772 }, { "epoch": 0.9695646750654858, "grad_norm": 0.0, "learning_rate": 4.85495458852292e-08, "loss": 0.8918, "step": 7773 }, { "epoch": 0.9696894100037421, "grad_norm": 0.0, "learning_rate": 4.815273327803183e-08, "loss": 0.9367, "step": 7774 }, { "epoch": 0.9698141449419982, "grad_norm": 0.0, "learning_rate": 4.775754506319175e-08, "loss": 0.9284, "step": 7775 }, { "epoch": 0.9699388798802545, "grad_norm": 0.0, "learning_rate": 4.736398130521513e-08, "loss": 0.8901, "step": 7776 }, { "epoch": 0.9700636148185107, "grad_norm": 0.0, "learning_rate": 4.6972042068341714e-08, "loss": 0.86, "step": 7777 }, { "epoch": 0.9701883497567668, "grad_norm": 0.0, "learning_rate": 4.6581727416544766e-08, "loss": 0.9244, "step": 7778 }, { "epoch": 0.9703130846950231, "grad_norm": 0.0, "learning_rate": 4.619303741353554e-08, "loss": 0.9051, "step": 7779 }, { "epoch": 0.9704378196332792, "grad_norm": 0.0, "learning_rate": 4.580597212275661e-08, "loss": 0.9047, "step": 7780 }, { "epoch": 0.9705625545715355, "grad_norm": 0.0, "learning_rate": 4.5420531607385244e-08, "loss": 0.896, "step": 7781 }, { "epoch": 0.9706872895097917, "grad_norm": 0.0, "learning_rate": 4.503671593033776e-08, "loss": 0.9003, "step": 7782 }, { "epoch": 0.9708120244480479, "grad_norm": 0.0, "learning_rate": 4.4654525154260717e-08, "loss": 0.9246, "step": 7783 }, { "epoch": 0.9709367593863041, "grad_norm": 0.0, "learning_rate": 4.4273959341538666e-08, "loss": 0.8897, "step": 7784 }, { "epoch": 0.9710614943245603, "grad_norm": 0.0, "learning_rate": 4.389501855428857e-08, "loss": 0.9427, "step": 7785 }, { "epoch": 0.9711862292628165, "grad_norm": 0.0, "learning_rate": 4.35177028543643e-08, "loss": 0.8948, "step": 7786 }, { "epoch": 0.9713109642010728, "grad_norm": 0.0, "learning_rate": 4.314201230335102e-08, "loss": 0.8841, "step": 7787 }, { "epoch": 0.9714356991393289, "grad_norm": 0.0, "learning_rate": 4.2767946962573025e-08, "loss": 0.9072, "step": 7788 }, { "epoch": 0.9715604340775851, "grad_norm": 0.0, "learning_rate": 4.2395506893085916e-08, "loss": 0.8639, "step": 7789 }, { "epoch": 0.9716851690158413, "grad_norm": 0.0, "learning_rate": 4.202469215568217e-08, "loss": 0.9051, "step": 7790 }, { "epoch": 0.9718099039540975, "grad_norm": 0.0, "learning_rate": 4.1655502810888926e-08, "loss": 0.9275, "step": 7791 }, { "epoch": 0.9719346388923538, "grad_norm": 0.0, "learning_rate": 4.128793891896465e-08, "loss": 0.9081, "step": 7792 }, { "epoch": 0.9720593738306099, "grad_norm": 0.0, "learning_rate": 4.0922000539906914e-08, "loss": 0.8862, "step": 7793 }, { "epoch": 0.9721841087688662, "grad_norm": 0.0, "learning_rate": 4.0557687733445704e-08, "loss": 0.9036, "step": 7794 }, { "epoch": 0.9723088437071223, "grad_norm": 0.0, "learning_rate": 4.019500055904568e-08, "loss": 0.9608, "step": 7795 }, { "epoch": 0.9724335786453786, "grad_norm": 0.0, "learning_rate": 3.983393907590727e-08, "loss": 0.8862, "step": 7796 }, { "epoch": 0.9725583135836348, "grad_norm": 0.0, "learning_rate": 3.947450334296443e-08, "loss": 0.8921, "step": 7797 }, { "epoch": 0.972683048521891, "grad_norm": 0.0, "learning_rate": 3.911669341888469e-08, "loss": 0.9224, "step": 7798 }, { "epoch": 0.9728077834601472, "grad_norm": 0.0, "learning_rate": 3.876050936207465e-08, "loss": 0.9133, "step": 7799 }, { "epoch": 0.9729325183984034, "grad_norm": 0.0, "learning_rate": 3.8405951230670035e-08, "loss": 0.9125, "step": 7800 }, { "epoch": 0.9730572533366596, "grad_norm": 0.0, "learning_rate": 3.805301908254455e-08, "loss": 0.9039, "step": 7801 }, { "epoch": 0.9731819882749158, "grad_norm": 0.0, "learning_rate": 3.770171297530545e-08, "loss": 0.8742, "step": 7802 }, { "epoch": 0.973306723213172, "grad_norm": 0.0, "learning_rate": 3.735203296629575e-08, "loss": 0.8548, "step": 7803 }, { "epoch": 0.9734314581514282, "grad_norm": 0.0, "learning_rate": 3.70039791125909e-08, "loss": 0.8835, "step": 7804 }, { "epoch": 0.9735561930896844, "grad_norm": 0.0, "learning_rate": 3.665755147100214e-08, "loss": 0.8854, "step": 7805 }, { "epoch": 0.9736809280279406, "grad_norm": 0.0, "learning_rate": 3.6312750098076445e-08, "loss": 0.9352, "step": 7806 }, { "epoch": 0.9738056629661969, "grad_norm": 0.0, "learning_rate": 3.5969575050091024e-08, "loss": 0.9126, "step": 7807 }, { "epoch": 0.973930397904453, "grad_norm": 0.0, "learning_rate": 3.56280263830644e-08, "loss": 0.877, "step": 7808 }, { "epoch": 0.9740551328427093, "grad_norm": 0.0, "learning_rate": 3.528810415274309e-08, "loss": 0.9235, "step": 7809 }, { "epoch": 0.9741798677809654, "grad_norm": 0.0, "learning_rate": 3.4949808414612705e-08, "loss": 0.9081, "step": 7810 }, { "epoch": 0.9743046027192217, "grad_norm": 0.0, "learning_rate": 3.461313922389131e-08, "loss": 0.8242, "step": 7811 }, { "epoch": 0.9744293376574779, "grad_norm": 0.0, "learning_rate": 3.427809663553161e-08, "loss": 0.8448, "step": 7812 }, { "epoch": 0.974554072595734, "grad_norm": 0.0, "learning_rate": 3.394468070422097e-08, "loss": 0.9392, "step": 7813 }, { "epoch": 0.9746788075339903, "grad_norm": 0.0, "learning_rate": 3.36128914843814e-08, "loss": 0.9167, "step": 7814 }, { "epoch": 0.9748035424722464, "grad_norm": 0.0, "learning_rate": 3.328272903016849e-08, "loss": 0.8646, "step": 7815 }, { "epoch": 0.9749282774105027, "grad_norm": 0.0, "learning_rate": 3.2954193395473566e-08, "loss": 0.8904, "step": 7816 }, { "epoch": 0.9750530123487589, "grad_norm": 0.0, "learning_rate": 3.262728463392262e-08, "loss": 0.8839, "step": 7817 }, { "epoch": 0.9751777472870151, "grad_norm": 0.0, "learning_rate": 3.230200279887519e-08, "loss": 0.9266, "step": 7818 }, { "epoch": 0.9753024822252713, "grad_norm": 0.0, "learning_rate": 3.197834794342436e-08, "loss": 0.9223, "step": 7819 }, { "epoch": 0.9754272171635275, "grad_norm": 0.0, "learning_rate": 3.1656320120400094e-08, "loss": 0.8816, "step": 7820 }, { "epoch": 0.9755519521017837, "grad_norm": 0.0, "learning_rate": 3.13359193823648e-08, "loss": 0.9249, "step": 7821 }, { "epoch": 0.97567668704004, "grad_norm": 0.0, "learning_rate": 3.101714578161552e-08, "loss": 0.8956, "step": 7822 }, { "epoch": 0.9758014219782961, "grad_norm": 0.0, "learning_rate": 3.069999937018508e-08, "loss": 0.8872, "step": 7823 }, { "epoch": 0.9759261569165524, "grad_norm": 0.0, "learning_rate": 3.038448019983986e-08, "loss": 0.9167, "step": 7824 }, { "epoch": 0.9760508918548085, "grad_norm": 0.0, "learning_rate": 3.0070588322079765e-08, "loss": 0.8777, "step": 7825 }, { "epoch": 0.9761756267930647, "grad_norm": 0.0, "learning_rate": 2.9758323788140475e-08, "loss": 0.8657, "step": 7826 }, { "epoch": 0.976300361731321, "grad_norm": 0.0, "learning_rate": 2.9447686648990115e-08, "loss": 0.8782, "step": 7827 }, { "epoch": 0.9764250966695771, "grad_norm": 0.0, "learning_rate": 2.9138676955333676e-08, "loss": 0.9222, "step": 7828 }, { "epoch": 0.9765498316078334, "grad_norm": 0.0, "learning_rate": 2.8831294757609707e-08, "loss": 0.8781, "step": 7829 }, { "epoch": 0.9766745665460895, "grad_norm": 0.0, "learning_rate": 2.852554010598918e-08, "loss": 0.9293, "step": 7830 }, { "epoch": 0.9767993014843458, "grad_norm": 0.0, "learning_rate": 2.8221413050381064e-08, "loss": 0.9366, "step": 7831 }, { "epoch": 0.976924036422602, "grad_norm": 0.0, "learning_rate": 2.791891364042565e-08, "loss": 0.971, "step": 7832 }, { "epoch": 0.9770487713608582, "grad_norm": 0.0, "learning_rate": 2.7618041925496775e-08, "loss": 0.8669, "step": 7833 }, { "epoch": 0.9771735062991144, "grad_norm": 0.0, "learning_rate": 2.7318797954707378e-08, "loss": 0.8849, "step": 7834 }, { "epoch": 0.9772982412373706, "grad_norm": 0.0, "learning_rate": 2.7021181776899495e-08, "loss": 0.9066, "step": 7835 }, { "epoch": 0.9774229761756268, "grad_norm": 0.0, "learning_rate": 2.6725193440650942e-08, "loss": 0.9093, "step": 7836 }, { "epoch": 0.9775477111138829, "grad_norm": 0.0, "learning_rate": 2.643083299427751e-08, "loss": 0.9029, "step": 7837 }, { "epoch": 0.9776724460521392, "grad_norm": 0.0, "learning_rate": 2.6138100485822994e-08, "loss": 0.94, "step": 7838 }, { "epoch": 0.9777971809903954, "grad_norm": 0.0, "learning_rate": 2.5846995963071387e-08, "loss": 0.894, "step": 7839 }, { "epoch": 0.9779219159286516, "grad_norm": 0.0, "learning_rate": 2.5557519473535796e-08, "loss": 0.8974, "step": 7840 }, { "epoch": 0.9780466508669078, "grad_norm": 0.0, "learning_rate": 2.5269671064467315e-08, "loss": 0.8398, "step": 7841 }, { "epoch": 0.978171385805164, "grad_norm": 0.0, "learning_rate": 2.498345078285058e-08, "loss": 0.9112, "step": 7842 }, { "epoch": 0.9782961207434202, "grad_norm": 0.0, "learning_rate": 2.469885867540378e-08, "loss": 0.9315, "step": 7843 }, { "epoch": 0.9784208556816765, "grad_norm": 0.0, "learning_rate": 2.4415894788578642e-08, "loss": 0.9008, "step": 7844 }, { "epoch": 0.9785455906199326, "grad_norm": 0.0, "learning_rate": 2.4134559168562666e-08, "loss": 0.8574, "step": 7845 }, { "epoch": 0.9786703255581889, "grad_norm": 0.0, "learning_rate": 2.3854851861276895e-08, "loss": 0.9293, "step": 7846 }, { "epoch": 0.978795060496445, "grad_norm": 0.0, "learning_rate": 2.357677291237592e-08, "loss": 0.8705, "step": 7847 }, { "epoch": 0.9789197954347013, "grad_norm": 0.0, "learning_rate": 2.3300322367251215e-08, "loss": 0.8992, "step": 7848 }, { "epoch": 0.9790445303729575, "grad_norm": 0.0, "learning_rate": 2.302550027102335e-08, "loss": 0.9168, "step": 7849 }, { "epoch": 0.9791692653112136, "grad_norm": 0.0, "learning_rate": 2.2752306668553102e-08, "loss": 0.8829, "step": 7850 }, { "epoch": 0.9792940002494699, "grad_norm": 0.0, "learning_rate": 2.2480741604430368e-08, "loss": 0.9285, "step": 7851 }, { "epoch": 0.979418735187726, "grad_norm": 0.0, "learning_rate": 2.2210805122983013e-08, "loss": 0.8863, "step": 7852 }, { "epoch": 0.9795434701259823, "grad_norm": 0.0, "learning_rate": 2.1942497268270245e-08, "loss": 0.8889, "step": 7853 }, { "epoch": 0.9796682050642385, "grad_norm": 0.0, "learning_rate": 2.1675818084089252e-08, "loss": 0.8897, "step": 7854 }, { "epoch": 0.9797929400024947, "grad_norm": 0.0, "learning_rate": 2.1410767613965212e-08, "loss": 0.9029, "step": 7855 }, { "epoch": 0.9799176749407509, "grad_norm": 0.0, "learning_rate": 2.1147345901162407e-08, "loss": 0.9085, "step": 7856 }, { "epoch": 0.9800424098790071, "grad_norm": 0.0, "learning_rate": 2.088555298867978e-08, "loss": 0.9842, "step": 7857 }, { "epoch": 0.9801671448172633, "grad_norm": 0.0, "learning_rate": 2.062538891924537e-08, "loss": 0.859, "step": 7858 }, { "epoch": 0.9802918797555196, "grad_norm": 0.0, "learning_rate": 2.036685373532743e-08, "loss": 0.8708, "step": 7859 }, { "epoch": 0.9804166146937757, "grad_norm": 0.0, "learning_rate": 2.0109947479123315e-08, "loss": 0.9026, "step": 7860 }, { "epoch": 0.9805413496320319, "grad_norm": 0.0, "learning_rate": 1.985467019256726e-08, "loss": 0.8809, "step": 7861 }, { "epoch": 0.9806660845702881, "grad_norm": 0.0, "learning_rate": 1.9601021917327045e-08, "loss": 0.9482, "step": 7862 }, { "epoch": 0.9807908195085443, "grad_norm": 0.0, "learning_rate": 1.9349002694805107e-08, "loss": 0.8909, "step": 7863 }, { "epoch": 0.9809155544468006, "grad_norm": 0.0, "learning_rate": 1.909861256613632e-08, "loss": 0.8499, "step": 7864 }, { "epoch": 0.9810402893850567, "grad_norm": 0.0, "learning_rate": 1.8849851572191325e-08, "loss": 0.9245, "step": 7865 }, { "epoch": 0.981165024323313, "grad_norm": 0.0, "learning_rate": 1.8602719753573196e-08, "loss": 0.9164, "step": 7866 }, { "epoch": 0.9812897592615691, "grad_norm": 0.0, "learning_rate": 1.8357217150620775e-08, "loss": 0.8717, "step": 7867 }, { "epoch": 0.9814144941998254, "grad_norm": 0.0, "learning_rate": 1.811334380340757e-08, "loss": 0.8924, "step": 7868 }, { "epoch": 0.9815392291380816, "grad_norm": 0.0, "learning_rate": 1.7871099751737287e-08, "loss": 0.9328, "step": 7869 }, { "epoch": 0.9816639640763378, "grad_norm": 0.0, "learning_rate": 1.7630485035151635e-08, "loss": 0.9034, "step": 7870 }, { "epoch": 0.981788699014594, "grad_norm": 0.0, "learning_rate": 1.739149969292586e-08, "loss": 0.8827, "step": 7871 }, { "epoch": 0.9819134339528502, "grad_norm": 0.0, "learning_rate": 1.7154143764067653e-08, "loss": 0.8815, "step": 7872 }, { "epoch": 0.9820381688911064, "grad_norm": 0.0, "learning_rate": 1.6918417287318245e-08, "loss": 0.8992, "step": 7873 }, { "epoch": 0.9821629038293626, "grad_norm": 0.0, "learning_rate": 1.6684320301155744e-08, "loss": 0.9141, "step": 7874 }, { "epoch": 0.9822876387676188, "grad_norm": 0.0, "learning_rate": 1.6451852843789585e-08, "loss": 0.8656, "step": 7875 }, { "epoch": 0.982412373705875, "grad_norm": 0.0, "learning_rate": 1.622101495316497e-08, "loss": 0.9087, "step": 7876 }, { "epoch": 0.9825371086441312, "grad_norm": 0.0, "learning_rate": 1.599180666696065e-08, "loss": 0.8736, "step": 7877 }, { "epoch": 0.9826618435823874, "grad_norm": 0.0, "learning_rate": 1.5764228022588924e-08, "loss": 0.9154, "step": 7878 }, { "epoch": 0.9827865785206437, "grad_norm": 0.0, "learning_rate": 1.5538279057196736e-08, "loss": 0.8951, "step": 7879 }, { "epoch": 0.9829113134588998, "grad_norm": 0.0, "learning_rate": 1.5313959807663482e-08, "loss": 0.8482, "step": 7880 }, { "epoch": 0.9830360483971561, "grad_norm": 0.0, "learning_rate": 1.509127031060431e-08, "loss": 0.9096, "step": 7881 }, { "epoch": 0.9831607833354122, "grad_norm": 0.0, "learning_rate": 1.487021060236904e-08, "loss": 0.8942, "step": 7882 }, { "epoch": 0.9832855182736685, "grad_norm": 0.0, "learning_rate": 1.4650780719038804e-08, "loss": 0.8728, "step": 7883 }, { "epoch": 0.9834102532119247, "grad_norm": 0.0, "learning_rate": 1.44329806964294e-08, "loss": 0.9246, "step": 7884 }, { "epoch": 0.9835349881501808, "grad_norm": 0.0, "learning_rate": 1.4216810570093498e-08, "loss": 0.9511, "step": 7885 }, { "epoch": 0.9836597230884371, "grad_norm": 0.0, "learning_rate": 1.4002270375313986e-08, "loss": 0.9524, "step": 7886 }, { "epoch": 0.9837844580266932, "grad_norm": 0.0, "learning_rate": 1.378936014710841e-08, "loss": 0.9212, "step": 7887 }, { "epoch": 0.9839091929649495, "grad_norm": 0.0, "learning_rate": 1.3578079920231192e-08, "loss": 0.8966, "step": 7888 }, { "epoch": 0.9840339279032057, "grad_norm": 0.0, "learning_rate": 1.3368429729168075e-08, "loss": 0.8629, "step": 7889 }, { "epoch": 0.9841586628414619, "grad_norm": 0.0, "learning_rate": 1.3160409608137249e-08, "loss": 0.8892, "step": 7890 }, { "epoch": 0.9842833977797181, "grad_norm": 0.0, "learning_rate": 1.2954019591095989e-08, "loss": 0.9285, "step": 7891 }, { "epoch": 0.9844081327179743, "grad_norm": 0.0, "learning_rate": 1.2749259711729577e-08, "loss": 0.8867, "step": 7892 }, { "epoch": 0.9845328676562305, "grad_norm": 0.0, "learning_rate": 1.2546130003461276e-08, "loss": 0.9011, "step": 7893 }, { "epoch": 0.9846576025944868, "grad_norm": 0.0, "learning_rate": 1.2344630499447895e-08, "loss": 0.9644, "step": 7894 }, { "epoch": 0.9847823375327429, "grad_norm": 0.0, "learning_rate": 1.2144761232577573e-08, "loss": 0.9497, "step": 7895 }, { "epoch": 0.9849070724709992, "grad_norm": 0.0, "learning_rate": 1.1946522235474212e-08, "loss": 0.9315, "step": 7896 }, { "epoch": 0.9850318074092553, "grad_norm": 0.0, "learning_rate": 1.1749913540496372e-08, "loss": 0.886, "step": 7897 }, { "epoch": 0.9851565423475115, "grad_norm": 0.0, "learning_rate": 1.155493517973394e-08, "loss": 0.9215, "step": 7898 }, { "epoch": 0.9852812772857678, "grad_norm": 0.0, "learning_rate": 1.1361587185014788e-08, "loss": 0.8804, "step": 7899 }, { "epoch": 0.9854060122240239, "grad_norm": 0.0, "learning_rate": 1.1169869587895899e-08, "loss": 0.9167, "step": 7900 }, { "epoch": 0.9855307471622802, "grad_norm": 0.0, "learning_rate": 1.0979782419671126e-08, "loss": 0.835, "step": 7901 }, { "epoch": 0.9856554821005363, "grad_norm": 0.0, "learning_rate": 1.0791325711367872e-08, "loss": 0.8562, "step": 7902 }, { "epoch": 0.9857802170387926, "grad_norm": 0.0, "learning_rate": 1.060449949374709e-08, "loss": 0.8799, "step": 7903 }, { "epoch": 0.9859049519770487, "grad_norm": 0.0, "learning_rate": 1.041930379730327e-08, "loss": 0.8679, "step": 7904 }, { "epoch": 0.986029686915305, "grad_norm": 0.0, "learning_rate": 1.023573865226446e-08, "loss": 0.9221, "step": 7905 }, { "epoch": 0.9861544218535612, "grad_norm": 0.0, "learning_rate": 1.0053804088594465e-08, "loss": 0.9329, "step": 7906 }, { "epoch": 0.9862791567918174, "grad_norm": 0.0, "learning_rate": 9.873500135987312e-09, "loss": 0.9115, "step": 7907 }, { "epoch": 0.9864038917300736, "grad_norm": 0.0, "learning_rate": 9.694826823876125e-09, "loss": 0.8862, "step": 7908 }, { "epoch": 0.9865286266683297, "grad_norm": 0.0, "learning_rate": 9.517784181422018e-09, "loss": 0.9125, "step": 7909 }, { "epoch": 0.986653361606586, "grad_norm": 0.0, "learning_rate": 9.34237223752521e-09, "loss": 0.9544, "step": 7910 }, { "epoch": 0.9867780965448422, "grad_norm": 0.0, "learning_rate": 9.16859102081613e-09, "loss": 0.8789, "step": 7911 }, { "epoch": 0.9869028314830984, "grad_norm": 0.0, "learning_rate": 8.996440559659869e-09, "loss": 0.9045, "step": 7912 }, { "epoch": 0.9870275664213546, "grad_norm": 0.0, "learning_rate": 8.82592088215728e-09, "loss": 0.8678, "step": 7913 }, { "epoch": 0.9871523013596109, "grad_norm": 0.0, "learning_rate": 8.657032016139433e-09, "loss": 0.8873, "step": 7914 }, { "epoch": 0.987277036297867, "grad_norm": 0.0, "learning_rate": 8.489773989174276e-09, "loss": 0.9097, "step": 7915 }, { "epoch": 0.9874017712361233, "grad_norm": 0.0, "learning_rate": 8.324146828563307e-09, "loss": 0.9168, "step": 7916 }, { "epoch": 0.9875265061743794, "grad_norm": 0.0, "learning_rate": 8.160150561340452e-09, "loss": 0.8912, "step": 7917 }, { "epoch": 0.9876512411126357, "grad_norm": 0.0, "learning_rate": 7.997785214273191e-09, "loss": 0.9099, "step": 7918 }, { "epoch": 0.9877759760508918, "grad_norm": 0.0, "learning_rate": 7.837050813863655e-09, "loss": 0.8735, "step": 7919 }, { "epoch": 0.9879007109891481, "grad_norm": 0.0, "learning_rate": 7.677947386349749e-09, "loss": 0.9124, "step": 7920 }, { "epoch": 0.9880254459274043, "grad_norm": 0.0, "learning_rate": 7.520474957699586e-09, "loss": 0.9486, "step": 7921 }, { "epoch": 0.9881501808656604, "grad_norm": 0.0, "learning_rate": 7.3646335536159406e-09, "loss": 0.8811, "step": 7922 }, { "epoch": 0.9882749158039167, "grad_norm": 0.0, "learning_rate": 7.210423199538464e-09, "loss": 0.8515, "step": 7923 }, { "epoch": 0.9883996507421728, "grad_norm": 0.0, "learning_rate": 7.05784392063591e-09, "loss": 0.8952, "step": 7924 }, { "epoch": 0.9885243856804291, "grad_norm": 0.0, "learning_rate": 6.906895741813913e-09, "loss": 0.8921, "step": 7925 }, { "epoch": 0.9886491206186853, "grad_norm": 0.0, "learning_rate": 6.757578687710542e-09, "loss": 0.8559, "step": 7926 }, { "epoch": 0.9887738555569415, "grad_norm": 0.0, "learning_rate": 6.609892782699634e-09, "loss": 0.9066, "step": 7927 }, { "epoch": 0.9888985904951977, "grad_norm": 0.0, "learning_rate": 6.463838050885241e-09, "loss": 0.8654, "step": 7928 }, { "epoch": 0.989023325433454, "grad_norm": 0.0, "learning_rate": 6.319414516108291e-09, "loss": 0.8514, "step": 7929 }, { "epoch": 0.9891480603717101, "grad_norm": 0.0, "learning_rate": 6.176622201942151e-09, "loss": 0.9158, "step": 7930 }, { "epoch": 0.9892727953099664, "grad_norm": 0.0, "learning_rate": 6.035461131693732e-09, "loss": 0.8874, "step": 7931 }, { "epoch": 0.9893975302482225, "grad_norm": 0.0, "learning_rate": 5.895931328405713e-09, "loss": 0.918, "step": 7932 }, { "epoch": 0.9895222651864787, "grad_norm": 0.0, "learning_rate": 5.758032814850989e-09, "loss": 0.8788, "step": 7933 }, { "epoch": 0.9896470001247349, "grad_norm": 0.0, "learning_rate": 5.6217656135382214e-09, "loss": 0.9283, "step": 7934 }, { "epoch": 0.9897717350629911, "grad_norm": 0.0, "learning_rate": 5.4871297467107285e-09, "loss": 0.9546, "step": 7935 }, { "epoch": 0.9898964700012474, "grad_norm": 0.0, "learning_rate": 5.354125236343155e-09, "loss": 0.8538, "step": 7936 }, { "epoch": 0.9900212049395035, "grad_norm": 0.0, "learning_rate": 5.222752104147022e-09, "loss": 0.9094, "step": 7937 }, { "epoch": 0.9901459398777598, "grad_norm": 0.0, "learning_rate": 5.093010371564066e-09, "loss": 0.9857, "step": 7938 }, { "epoch": 0.9902706748160159, "grad_norm": 0.0, "learning_rate": 4.964900059771794e-09, "loss": 0.8774, "step": 7939 }, { "epoch": 0.9903954097542722, "grad_norm": 0.0, "learning_rate": 4.838421189681253e-09, "loss": 0.8955, "step": 7940 }, { "epoch": 0.9905201446925284, "grad_norm": 0.0, "learning_rate": 4.713573781938152e-09, "loss": 0.9069, "step": 7941 }, { "epoch": 0.9906448796307846, "grad_norm": 0.0, "learning_rate": 4.5903578569184146e-09, "loss": 0.9288, "step": 7942 }, { "epoch": 0.9907696145690408, "grad_norm": 0.0, "learning_rate": 4.468773434734841e-09, "loss": 0.9408, "step": 7943 }, { "epoch": 0.990894349507297, "grad_norm": 0.0, "learning_rate": 4.348820535233778e-09, "loss": 0.8892, "step": 7944 }, { "epoch": 0.9910190844455532, "grad_norm": 0.0, "learning_rate": 4.230499177994007e-09, "loss": 0.8844, "step": 7945 }, { "epoch": 0.9911438193838094, "grad_norm": 0.0, "learning_rate": 4.1138093823289705e-09, "loss": 0.9492, "step": 7946 }, { "epoch": 0.9912685543220656, "grad_norm": 0.0, "learning_rate": 3.998751167284543e-09, "loss": 0.885, "step": 7947 }, { "epoch": 0.9913932892603218, "grad_norm": 0.0, "learning_rate": 3.885324551642367e-09, "loss": 0.9219, "step": 7948 }, { "epoch": 0.991518024198578, "grad_norm": 0.0, "learning_rate": 3.773529553916522e-09, "loss": 0.8798, "step": 7949 }, { "epoch": 0.9916427591368342, "grad_norm": 0.0, "learning_rate": 3.6633661923524133e-09, "loss": 0.8823, "step": 7950 }, { "epoch": 0.9917674940750905, "grad_norm": 0.0, "learning_rate": 3.5548344849345438e-09, "loss": 0.8941, "step": 7951 }, { "epoch": 0.9918922290133466, "grad_norm": 0.0, "learning_rate": 3.447934449375412e-09, "loss": 0.8966, "step": 7952 }, { "epoch": 0.9920169639516029, "grad_norm": 0.0, "learning_rate": 3.3426661031255024e-09, "loss": 0.902, "step": 7953 }, { "epoch": 0.992141698889859, "grad_norm": 0.0, "learning_rate": 3.239029463367738e-09, "loss": 0.9254, "step": 7954 }, { "epoch": 0.9922664338281153, "grad_norm": 0.0, "learning_rate": 3.137024547016365e-09, "loss": 0.8662, "step": 7955 }, { "epoch": 0.9923911687663715, "grad_norm": 0.0, "learning_rate": 3.0366513707213995e-09, "loss": 0.865, "step": 7956 }, { "epoch": 0.9925159037046276, "grad_norm": 0.0, "learning_rate": 2.9379099508675125e-09, "loss": 0.8738, "step": 7957 }, { "epoch": 0.9926406386428839, "grad_norm": 0.0, "learning_rate": 2.8408003035718114e-09, "loss": 0.9308, "step": 7958 }, { "epoch": 0.99276537358114, "grad_norm": 0.0, "learning_rate": 2.7453224446838402e-09, "loss": 0.8843, "step": 7959 }, { "epoch": 0.9928901085193963, "grad_norm": 0.0, "learning_rate": 2.6514763897889096e-09, "loss": 0.9275, "step": 7960 }, { "epoch": 0.9930148434576525, "grad_norm": 0.0, "learning_rate": 2.559262154204767e-09, "loss": 0.8948, "step": 7961 }, { "epoch": 0.9931395783959087, "grad_norm": 0.0, "learning_rate": 2.4686797529827055e-09, "loss": 0.9405, "step": 7962 }, { "epoch": 0.9932643133341649, "grad_norm": 0.0, "learning_rate": 2.379729200908676e-09, "loss": 0.9327, "step": 7963 }, { "epoch": 0.9933890482724211, "grad_norm": 0.0, "learning_rate": 2.2924105125021746e-09, "loss": 0.8432, "step": 7964 }, { "epoch": 0.9935137832106773, "grad_norm": 0.0, "learning_rate": 2.2067237020140243e-09, "loss": 0.8749, "step": 7965 }, { "epoch": 0.9936385181489336, "grad_norm": 0.0, "learning_rate": 2.122668783431925e-09, "loss": 0.8908, "step": 7966 }, { "epoch": 0.9937632530871897, "grad_norm": 0.0, "learning_rate": 2.0402457704749024e-09, "loss": 0.8775, "step": 7967 }, { "epoch": 0.993887988025446, "grad_norm": 0.0, "learning_rate": 1.9594546765977495e-09, "loss": 0.8641, "step": 7968 }, { "epoch": 0.9940127229637021, "grad_norm": 0.0, "learning_rate": 1.8802955149865854e-09, "loss": 0.9346, "step": 7969 }, { "epoch": 0.9941374579019583, "grad_norm": 0.0, "learning_rate": 1.8027682985610751e-09, "loss": 0.928, "step": 7970 }, { "epoch": 0.9942621928402146, "grad_norm": 0.0, "learning_rate": 1.7268730399777612e-09, "loss": 0.894, "step": 7971 }, { "epoch": 0.9943869277784707, "grad_norm": 0.0, "learning_rate": 1.652609751624512e-09, "loss": 0.8259, "step": 7972 }, { "epoch": 0.994511662716727, "grad_norm": 0.0, "learning_rate": 1.5799784456216328e-09, "loss": 0.8796, "step": 7973 }, { "epoch": 0.9946363976549831, "grad_norm": 0.0, "learning_rate": 1.5089791338251947e-09, "loss": 0.9803, "step": 7974 }, { "epoch": 0.9947611325932394, "grad_norm": 0.0, "learning_rate": 1.4396118278237058e-09, "loss": 0.8724, "step": 7975 }, { "epoch": 0.9948858675314955, "grad_norm": 0.0, "learning_rate": 1.3718765389392208e-09, "loss": 0.9159, "step": 7976 }, { "epoch": 0.9950106024697518, "grad_norm": 0.0, "learning_rate": 1.3057732782295606e-09, "loss": 0.906, "step": 7977 }, { "epoch": 0.995135337408008, "grad_norm": 0.0, "learning_rate": 1.2413020564827626e-09, "loss": 0.8765, "step": 7978 }, { "epoch": 0.9952600723462642, "grad_norm": 0.0, "learning_rate": 1.178462884222631e-09, "loss": 0.9213, "step": 7979 }, { "epoch": 0.9953848072845204, "grad_norm": 0.0, "learning_rate": 1.1172557717065157e-09, "loss": 0.8926, "step": 7980 }, { "epoch": 0.9955095422227765, "grad_norm": 0.0, "learning_rate": 1.0576807289253143e-09, "loss": 0.9095, "step": 7981 }, { "epoch": 0.9956342771610328, "grad_norm": 0.0, "learning_rate": 9.997377656012496e-10, "loss": 0.8696, "step": 7982 }, { "epoch": 0.995759012099289, "grad_norm": 0.0, "learning_rate": 9.43426891193422e-10, "loss": 0.9217, "step": 7983 }, { "epoch": 0.9958837470375452, "grad_norm": 0.0, "learning_rate": 8.887481148944776e-10, "loss": 0.9402, "step": 7984 }, { "epoch": 0.9960084819758014, "grad_norm": 0.0, "learning_rate": 8.357014456272794e-10, "loss": 0.8834, "step": 7985 }, { "epoch": 0.9961332169140577, "grad_norm": 0.0, "learning_rate": 7.842868920504564e-10, "loss": 0.8635, "step": 7986 }, { "epoch": 0.9962579518523138, "grad_norm": 0.0, "learning_rate": 7.34504462557295e-10, "loss": 0.8803, "step": 7987 }, { "epoch": 0.9963826867905701, "grad_norm": 0.0, "learning_rate": 6.863541652735173e-10, "loss": 0.8678, "step": 7988 }, { "epoch": 0.9965074217288262, "grad_norm": 0.0, "learning_rate": 6.398360080572819e-10, "loss": 0.9099, "step": 7989 }, { "epoch": 0.9966321566670825, "grad_norm": 0.0, "learning_rate": 5.949499985025142e-10, "loss": 0.9496, "step": 7990 }, { "epoch": 0.9967568916053386, "grad_norm": 0.0, "learning_rate": 5.516961439355761e-10, "loss": 0.9352, "step": 7991 }, { "epoch": 0.9968816265435949, "grad_norm": 0.0, "learning_rate": 5.100744514163758e-10, "loss": 0.9465, "step": 7992 }, { "epoch": 0.9970063614818511, "grad_norm": 0.0, "learning_rate": 4.700849277383679e-10, "loss": 0.9046, "step": 7993 }, { "epoch": 0.9971310964201072, "grad_norm": 0.0, "learning_rate": 4.317275794296638e-10, "loss": 0.94, "step": 7994 }, { "epoch": 0.9972558313583635, "grad_norm": 0.0, "learning_rate": 3.950024127508112e-10, "loss": 0.943, "step": 7995 }, { "epoch": 0.9973805662966196, "grad_norm": 0.0, "learning_rate": 3.5990943369590413e-10, "loss": 0.9167, "step": 7996 }, { "epoch": 0.9975053012348759, "grad_norm": 0.0, "learning_rate": 3.2644864799369346e-10, "loss": 0.8945, "step": 7997 }, { "epoch": 0.9976300361731321, "grad_norm": 0.0, "learning_rate": 2.94620061104256e-10, "loss": 0.9371, "step": 7998 }, { "epoch": 0.9977547711113883, "grad_norm": 0.0, "learning_rate": 2.64423678225656e-10, "loss": 0.8776, "step": 7999 }, { "epoch": 0.9978795060496445, "grad_norm": 0.0, "learning_rate": 2.358595042839529e-10, "loss": 0.8685, "step": 8000 }, { "epoch": 0.9980042409879007, "grad_norm": 0.0, "learning_rate": 2.0892754394208348e-10, "loss": 0.9104, "step": 8001 }, { "epoch": 0.9981289759261569, "grad_norm": 0.0, "learning_rate": 1.8362780159764115e-10, "loss": 0.8852, "step": 8002 }, { "epoch": 0.9982537108644132, "grad_norm": 0.0, "learning_rate": 1.5996028137843511e-10, "loss": 0.8884, "step": 8003 }, { "epoch": 0.9983784458026693, "grad_norm": 0.0, "learning_rate": 1.379249871491517e-10, "loss": 0.8657, "step": 8004 }, { "epoch": 0.9985031807409255, "grad_norm": 0.0, "learning_rate": 1.1752192250469307e-10, "loss": 0.8889, "step": 8005 }, { "epoch": 0.9986279156791817, "grad_norm": 0.0, "learning_rate": 9.875109077683853e-11, "loss": 0.898, "step": 8006 }, { "epoch": 0.9987526506174379, "grad_norm": 0.0, "learning_rate": 8.16124950286934e-11, "loss": 0.8613, "step": 8007 }, { "epoch": 0.9988773855556942, "grad_norm": 0.0, "learning_rate": 6.610613805690947e-11, "loss": 0.9277, "step": 8008 }, { "epoch": 0.9990021204939503, "grad_norm": 0.0, "learning_rate": 5.223202239501568e-11, "loss": 0.9622, "step": 8009 }, { "epoch": 0.9991268554322066, "grad_norm": 0.0, "learning_rate": 3.999015030564657e-11, "loss": 0.9087, "step": 8010 }, { "epoch": 0.9992515903704627, "grad_norm": 0.0, "learning_rate": 2.938052378720357e-11, "loss": 0.8766, "step": 8011 }, { "epoch": 0.999376325308719, "grad_norm": 0.0, "learning_rate": 2.0403144571634615e-11, "loss": 0.8852, "step": 8012 }, { "epoch": 0.9995010602469752, "grad_norm": 0.0, "learning_rate": 1.3058014124434082e-11, "loss": 0.906, "step": 8013 }, { "epoch": 0.9996257951852314, "grad_norm": 0.0, "learning_rate": 7.345133644642843e-12, "loss": 0.8907, "step": 8014 }, { "epoch": 0.9997505301234876, "grad_norm": 0.0, "learning_rate": 3.2645040637380163e-12, "loss": 0.941, "step": 8015 }, { "epoch": 0.9998752650617438, "grad_norm": 0.0, "learning_rate": 8.161260489636391e-13, "loss": 0.9176, "step": 8016 }, { "epoch": 1.0, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 0.8931, "step": 8017 }, { "epoch": 1.0, "step": 8017, "total_flos": 7.133618647935201e+20, "train_loss": 1.0184825783405158, "train_runtime": 70667.3144, "train_samples_per_second": 116.156, "train_steps_per_second": 0.113 } ], "logging_steps": 1.0, "max_steps": 8017, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.133618647935201e+20, "train_batch_size": 8, "trial_name": null, "trial_params": null }