{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.33104200338831224, "eval_steps": 500, "global_step": 1700, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00019473059022841898, "grad_norm": 0.97265625, "learning_rate": 1.0000000000000002e-06, "loss": 3.0625, "step": 1 }, { "epoch": 0.00038946118045683797, "grad_norm": 0.95703125, "learning_rate": 2.0000000000000003e-06, "loss": 3.0592, "step": 2 }, { "epoch": 0.0005841917706852569, "grad_norm": 0.91796875, "learning_rate": 3e-06, "loss": 3.0624, "step": 3 }, { "epoch": 0.0007789223609136759, "grad_norm": 0.8515625, "learning_rate": 4.000000000000001e-06, "loss": 3.0779, "step": 4 }, { "epoch": 0.0009736529511420949, "grad_norm": 0.82421875, "learning_rate": 5e-06, "loss": 3.0704, "step": 5 }, { "epoch": 0.0011683835413705138, "grad_norm": 0.84375, "learning_rate": 6e-06, "loss": 3.0795, "step": 6 }, { "epoch": 0.0013631141315989328, "grad_norm": 0.94140625, "learning_rate": 7e-06, "loss": 3.0531, "step": 7 }, { "epoch": 0.0015578447218273519, "grad_norm": 0.91015625, "learning_rate": 8.000000000000001e-06, "loss": 3.0557, "step": 8 }, { "epoch": 0.001752575312055771, "grad_norm": 0.80859375, "learning_rate": 9e-06, "loss": 3.0463, "step": 9 }, { "epoch": 0.0019473059022841897, "grad_norm": 0.77734375, "learning_rate": 1e-05, "loss": 3.0676, "step": 10 }, { "epoch": 0.0021420364925126088, "grad_norm": 0.83203125, "learning_rate": 1.1000000000000001e-05, "loss": 3.0434, "step": 11 }, { "epoch": 0.0023367670827410276, "grad_norm": 1.0390625, "learning_rate": 1.2e-05, "loss": 3.0498, "step": 12 }, { "epoch": 0.002531497672969447, "grad_norm": 1.71875, "learning_rate": 1.3000000000000001e-05, "loss": 3.0523, "step": 13 }, { "epoch": 0.0027262282631978657, "grad_norm": 1.3671875, "learning_rate": 1.4e-05, "loss": 3.0558, "step": 14 }, { "epoch": 0.002920958853426285, "grad_norm": 0.90234375, "learning_rate": 1.5000000000000002e-05, "loss": 3.0365, "step": 15 }, { "epoch": 0.0031156894436547037, "grad_norm": 0.85546875, "learning_rate": 1.6000000000000003e-05, "loss": 3.0144, "step": 16 }, { "epoch": 0.0033104200338831226, "grad_norm": 0.95703125, "learning_rate": 1.7e-05, "loss": 3.0222, "step": 17 }, { "epoch": 0.003505150624111542, "grad_norm": 0.95703125, "learning_rate": 1.8e-05, "loss": 3.018, "step": 18 }, { "epoch": 0.0036998812143399606, "grad_norm": 1.2421875, "learning_rate": 1.9e-05, "loss": 3.0269, "step": 19 }, { "epoch": 0.0038946118045683794, "grad_norm": 2.015625, "learning_rate": 2e-05, "loss": 3.0331, "step": 20 }, { "epoch": 0.004089342394796799, "grad_norm": 1.1640625, "learning_rate": 2.1000000000000002e-05, "loss": 3.0341, "step": 21 }, { "epoch": 0.0042840729850252175, "grad_norm": 0.99609375, "learning_rate": 2.2000000000000003e-05, "loss": 2.9996, "step": 22 }, { "epoch": 0.004478803575253636, "grad_norm": 1.2734375, "learning_rate": 2.3e-05, "loss": 3.022, "step": 23 }, { "epoch": 0.004673534165482055, "grad_norm": 1.796875, "learning_rate": 2.4e-05, "loss": 2.9947, "step": 24 }, { "epoch": 0.004868264755710475, "grad_norm": 1.8203125, "learning_rate": 2.5e-05, "loss": 3.0017, "step": 25 }, { "epoch": 0.005062995345938894, "grad_norm": 1.1171875, "learning_rate": 2.6000000000000002e-05, "loss": 3.0014, "step": 26 }, { "epoch": 0.0052577259361673125, "grad_norm": 1.4921875, "learning_rate": 2.7000000000000002e-05, "loss": 3.0155, "step": 27 }, { "epoch": 0.005452456526395731, "grad_norm": 1.609375, "learning_rate": 2.8e-05, "loss": 3.0046, "step": 28 }, { "epoch": 0.00564718711662415, "grad_norm": 1.4765625, "learning_rate": 2.9e-05, "loss": 3.0053, "step": 29 }, { "epoch": 0.00584191770685257, "grad_norm": 1.7578125, "learning_rate": 3.0000000000000004e-05, "loss": 2.9963, "step": 30 }, { "epoch": 0.006036648297080989, "grad_norm": 2.09375, "learning_rate": 3.1e-05, "loss": 2.9719, "step": 31 }, { "epoch": 0.0062313788873094075, "grad_norm": 1.5, "learning_rate": 3.2000000000000005e-05, "loss": 2.9868, "step": 32 }, { "epoch": 0.006426109477537826, "grad_norm": 1.8984375, "learning_rate": 3.3e-05, "loss": 2.9856, "step": 33 }, { "epoch": 0.006620840067766245, "grad_norm": 1.5390625, "learning_rate": 3.4e-05, "loss": 3.0007, "step": 34 }, { "epoch": 0.006815570657994665, "grad_norm": 1.359375, "learning_rate": 3.5000000000000004e-05, "loss": 2.9758, "step": 35 }, { "epoch": 0.007010301248223084, "grad_norm": 2.359375, "learning_rate": 3.6e-05, "loss": 2.9728, "step": 36 }, { "epoch": 0.007205031838451502, "grad_norm": 2.0625, "learning_rate": 3.7000000000000005e-05, "loss": 2.9665, "step": 37 }, { "epoch": 0.007399762428679921, "grad_norm": 1.8984375, "learning_rate": 3.8e-05, "loss": 2.9688, "step": 38 }, { "epoch": 0.00759449301890834, "grad_norm": 1.21875, "learning_rate": 3.9e-05, "loss": 2.988, "step": 39 }, { "epoch": 0.007789223609136759, "grad_norm": 2.0, "learning_rate": 4e-05, "loss": 2.9744, "step": 40 }, { "epoch": 0.007983954199365179, "grad_norm": 1.8359375, "learning_rate": 3.9999996198006636e-05, "loss": 2.9476, "step": 41 }, { "epoch": 0.008178684789593597, "grad_norm": 2.75, "learning_rate": 3.9999984792027976e-05, "loss": 2.942, "step": 42 }, { "epoch": 0.008373415379822016, "grad_norm": 1.578125, "learning_rate": 3.999996578206837e-05, "loss": 2.9624, "step": 43 }, { "epoch": 0.008568145970050435, "grad_norm": 3.15625, "learning_rate": 3.999993916813503e-05, "loss": 2.966, "step": 44 }, { "epoch": 0.008762876560278854, "grad_norm": 1.515625, "learning_rate": 3.9999904950238085e-05, "loss": 2.9464, "step": 45 }, { "epoch": 0.008957607150507273, "grad_norm": 4.25, "learning_rate": 3.9999863128390545e-05, "loss": 2.9666, "step": 46 }, { "epoch": 0.009152337740735692, "grad_norm": 3.15625, "learning_rate": 3.9999813702608305e-05, "loss": 2.9594, "step": 47 }, { "epoch": 0.00934706833096411, "grad_norm": 3.703125, "learning_rate": 3.9999756672910155e-05, "loss": 2.9664, "step": 48 }, { "epoch": 0.009541798921192531, "grad_norm": 3.296875, "learning_rate": 3.999969203931779e-05, "loss": 2.9415, "step": 49 }, { "epoch": 0.00973652951142095, "grad_norm": 2.890625, "learning_rate": 3.9999619801855775e-05, "loss": 2.9606, "step": 50 }, { "epoch": 0.009931260101649369, "grad_norm": 2.8125, "learning_rate": 3.999953996055157e-05, "loss": 2.9471, "step": 51 }, { "epoch": 0.010125990691877787, "grad_norm": 2.328125, "learning_rate": 3.9999452515435545e-05, "loss": 2.9346, "step": 52 }, { "epoch": 0.010320721282106206, "grad_norm": 3.296875, "learning_rate": 3.9999357466540934e-05, "loss": 2.9223, "step": 53 }, { "epoch": 0.010515451872334625, "grad_norm": 2.265625, "learning_rate": 3.999925481390388e-05, "loss": 2.9388, "step": 54 }, { "epoch": 0.010710182462563044, "grad_norm": 3.265625, "learning_rate": 3.99991445575634e-05, "loss": 2.9453, "step": 55 }, { "epoch": 0.010904913052791463, "grad_norm": 2.265625, "learning_rate": 3.9999026697561426e-05, "loss": 2.9142, "step": 56 }, { "epoch": 0.011099643643019881, "grad_norm": 2.9375, "learning_rate": 3.999890123394277e-05, "loss": 2.9351, "step": 57 }, { "epoch": 0.0112943742332483, "grad_norm": 2.40625, "learning_rate": 3.999876816675513e-05, "loss": 2.9431, "step": 58 }, { "epoch": 0.01148910482347672, "grad_norm": 1.8046875, "learning_rate": 3.99986274960491e-05, "loss": 2.9416, "step": 59 }, { "epoch": 0.01168383541370514, "grad_norm": 4.28125, "learning_rate": 3.9998479221878155e-05, "loss": 2.9497, "step": 60 }, { "epoch": 0.011878566003933558, "grad_norm": 3.3125, "learning_rate": 3.999832334429867e-05, "loss": 2.9299, "step": 61 }, { "epoch": 0.012073296594161977, "grad_norm": 5.0625, "learning_rate": 3.9998159863369916e-05, "loss": 2.951, "step": 62 }, { "epoch": 0.012268027184390396, "grad_norm": 4.25, "learning_rate": 3.999798877915404e-05, "loss": 2.9284, "step": 63 }, { "epoch": 0.012462757774618815, "grad_norm": 3.984375, "learning_rate": 3.99978100917161e-05, "loss": 2.9504, "step": 64 }, { "epoch": 0.012657488364847234, "grad_norm": 3.921875, "learning_rate": 3.999762380112403e-05, "loss": 2.927, "step": 65 }, { "epoch": 0.012852218955075653, "grad_norm": 2.0625, "learning_rate": 3.999742990744864e-05, "loss": 2.9422, "step": 66 }, { "epoch": 0.013046949545304071, "grad_norm": 2.140625, "learning_rate": 3.9997228410763675e-05, "loss": 2.9264, "step": 67 }, { "epoch": 0.01324168013553249, "grad_norm": 3.15625, "learning_rate": 3.9997019311145724e-05, "loss": 2.9341, "step": 68 }, { "epoch": 0.013436410725760909, "grad_norm": 2.6875, "learning_rate": 3.999680260867429e-05, "loss": 2.9183, "step": 69 }, { "epoch": 0.01363114131598933, "grad_norm": 3.96875, "learning_rate": 3.9996578303431775e-05, "loss": 2.9261, "step": 70 }, { "epoch": 0.013825871906217748, "grad_norm": 3.484375, "learning_rate": 3.999634639550344e-05, "loss": 2.9119, "step": 71 }, { "epoch": 0.014020602496446167, "grad_norm": 2.765625, "learning_rate": 3.999610688497748e-05, "loss": 2.9234, "step": 72 }, { "epoch": 0.014215333086674586, "grad_norm": 2.546875, "learning_rate": 3.9995859771944936e-05, "loss": 2.9181, "step": 73 }, { "epoch": 0.014410063676903005, "grad_norm": 3.3125, "learning_rate": 3.9995605056499775e-05, "loss": 2.8889, "step": 74 }, { "epoch": 0.014604794267131424, "grad_norm": 3.0625, "learning_rate": 3.9995342738738824e-05, "loss": 2.9338, "step": 75 }, { "epoch": 0.014799524857359843, "grad_norm": 3.265625, "learning_rate": 3.9995072818761836e-05, "loss": 2.9161, "step": 76 }, { "epoch": 0.014994255447588261, "grad_norm": 3.328125, "learning_rate": 3.999479529667142e-05, "loss": 2.9041, "step": 77 }, { "epoch": 0.01518898603781668, "grad_norm": 1.84375, "learning_rate": 3.999451017257309e-05, "loss": 2.9224, "step": 78 }, { "epoch": 0.015383716628045099, "grad_norm": 1.6484375, "learning_rate": 3.999421744657525e-05, "loss": 2.9374, "step": 79 }, { "epoch": 0.015578447218273518, "grad_norm": 3.484375, "learning_rate": 3.9993917118789205e-05, "loss": 2.9133, "step": 80 }, { "epoch": 0.01577317780850194, "grad_norm": 2.5625, "learning_rate": 3.999360918932913e-05, "loss": 2.9206, "step": 81 }, { "epoch": 0.015967908398730357, "grad_norm": 4.125, "learning_rate": 3.99932936583121e-05, "loss": 2.9109, "step": 82 }, { "epoch": 0.016162638988958776, "grad_norm": 4.125, "learning_rate": 3.9992970525858084e-05, "loss": 2.8969, "step": 83 }, { "epoch": 0.016357369579187195, "grad_norm": 1.2578125, "learning_rate": 3.999263979208993e-05, "loss": 2.899, "step": 84 }, { "epoch": 0.016552100169415614, "grad_norm": 2.484375, "learning_rate": 3.9992301457133384e-05, "loss": 2.9096, "step": 85 }, { "epoch": 0.016746830759644032, "grad_norm": 1.796875, "learning_rate": 3.999195552111709e-05, "loss": 2.894, "step": 86 }, { "epoch": 0.01694156134987245, "grad_norm": 1.578125, "learning_rate": 3.999160198417256e-05, "loss": 2.8972, "step": 87 }, { "epoch": 0.01713629194010087, "grad_norm": 2.671875, "learning_rate": 3.999124084643422e-05, "loss": 2.9006, "step": 88 }, { "epoch": 0.01733102253032929, "grad_norm": 1.4453125, "learning_rate": 3.9990872108039364e-05, "loss": 2.8865, "step": 89 }, { "epoch": 0.017525753120557708, "grad_norm": 4.65625, "learning_rate": 3.999049576912819e-05, "loss": 2.929, "step": 90 }, { "epoch": 0.017720483710786127, "grad_norm": 4.09375, "learning_rate": 3.9990111829843795e-05, "loss": 2.9034, "step": 91 }, { "epoch": 0.017915214301014545, "grad_norm": 3.046875, "learning_rate": 3.9989720290332126e-05, "loss": 2.9342, "step": 92 }, { "epoch": 0.018109944891242964, "grad_norm": 3.0, "learning_rate": 3.998932115074207e-05, "loss": 2.9049, "step": 93 }, { "epoch": 0.018304675481471383, "grad_norm": 2.625, "learning_rate": 3.9988914411225366e-05, "loss": 2.8866, "step": 94 }, { "epoch": 0.018499406071699802, "grad_norm": 2.375, "learning_rate": 3.998850007193666e-05, "loss": 2.9089, "step": 95 }, { "epoch": 0.01869413666192822, "grad_norm": 2.921875, "learning_rate": 3.998807813303348e-05, "loss": 2.9015, "step": 96 }, { "epoch": 0.018888867252156643, "grad_norm": 2.40625, "learning_rate": 3.9987648594676246e-05, "loss": 2.8994, "step": 97 }, { "epoch": 0.019083597842385062, "grad_norm": 3.546875, "learning_rate": 3.998721145702829e-05, "loss": 2.9017, "step": 98 }, { "epoch": 0.01927832843261348, "grad_norm": 3.09375, "learning_rate": 3.9986766720255786e-05, "loss": 2.9012, "step": 99 }, { "epoch": 0.0194730590228419, "grad_norm": 2.65625, "learning_rate": 3.9986314384527823e-05, "loss": 2.8928, "step": 100 }, { "epoch": 0.019667789613070318, "grad_norm": 2.53125, "learning_rate": 3.998585445001639e-05, "loss": 2.8996, "step": 101 }, { "epoch": 0.019862520203298737, "grad_norm": 2.546875, "learning_rate": 3.9985386916896354e-05, "loss": 2.9069, "step": 102 }, { "epoch": 0.020057250793527156, "grad_norm": 2.203125, "learning_rate": 3.998491178534546e-05, "loss": 2.9112, "step": 103 }, { "epoch": 0.020251981383755575, "grad_norm": 3.09375, "learning_rate": 3.998442905554437e-05, "loss": 2.8826, "step": 104 }, { "epoch": 0.020446711973983994, "grad_norm": 2.78125, "learning_rate": 3.9983938727676605e-05, "loss": 2.9088, "step": 105 }, { "epoch": 0.020641442564212412, "grad_norm": 2.59375, "learning_rate": 3.998344080192858e-05, "loss": 2.878, "step": 106 }, { "epoch": 0.02083617315444083, "grad_norm": 2.46875, "learning_rate": 3.998293527848962e-05, "loss": 2.901, "step": 107 }, { "epoch": 0.02103090374466925, "grad_norm": 2.359375, "learning_rate": 3.998242215755193e-05, "loss": 2.8678, "step": 108 }, { "epoch": 0.02122563433489767, "grad_norm": 2.0625, "learning_rate": 3.9981901439310575e-05, "loss": 2.8947, "step": 109 }, { "epoch": 0.021420364925126088, "grad_norm": 3.0, "learning_rate": 3.998137312396355e-05, "loss": 2.8832, "step": 110 }, { "epoch": 0.021615095515354506, "grad_norm": 2.5, "learning_rate": 3.998083721171172e-05, "loss": 2.8863, "step": 111 }, { "epoch": 0.021809826105582925, "grad_norm": 3.0625, "learning_rate": 3.998029370275883e-05, "loss": 2.8958, "step": 112 }, { "epoch": 0.022004556695811344, "grad_norm": 2.859375, "learning_rate": 3.997974259731153e-05, "loss": 2.8797, "step": 113 }, { "epoch": 0.022199287286039763, "grad_norm": 2.359375, "learning_rate": 3.997918389557933e-05, "loss": 2.8945, "step": 114 }, { "epoch": 0.02239401787626818, "grad_norm": 2.125, "learning_rate": 3.997861759777468e-05, "loss": 2.8896, "step": 115 }, { "epoch": 0.0225887484664966, "grad_norm": 2.875, "learning_rate": 3.997804370411286e-05, "loss": 2.8889, "step": 116 }, { "epoch": 0.02278347905672502, "grad_norm": 2.578125, "learning_rate": 3.997746221481208e-05, "loss": 2.9048, "step": 117 }, { "epoch": 0.02297820964695344, "grad_norm": 2.734375, "learning_rate": 3.997687313009341e-05, "loss": 2.8841, "step": 118 }, { "epoch": 0.02317294023718186, "grad_norm": 2.515625, "learning_rate": 3.9976276450180833e-05, "loss": 2.8869, "step": 119 }, { "epoch": 0.02336767082741028, "grad_norm": 2.640625, "learning_rate": 3.9975672175301194e-05, "loss": 2.8733, "step": 120 }, { "epoch": 0.023562401417638698, "grad_norm": 2.390625, "learning_rate": 3.997506030568424e-05, "loss": 2.891, "step": 121 }, { "epoch": 0.023757132007867117, "grad_norm": 2.859375, "learning_rate": 3.997444084156261e-05, "loss": 2.8863, "step": 122 }, { "epoch": 0.023951862598095536, "grad_norm": 2.671875, "learning_rate": 3.9973813783171815e-05, "loss": 2.8925, "step": 123 }, { "epoch": 0.024146593188323955, "grad_norm": 2.625, "learning_rate": 3.997317913075027e-05, "loss": 2.8833, "step": 124 }, { "epoch": 0.024341323778552373, "grad_norm": 2.359375, "learning_rate": 3.997253688453927e-05, "loss": 2.8847, "step": 125 }, { "epoch": 0.024536054368780792, "grad_norm": 2.578125, "learning_rate": 3.9971887044782985e-05, "loss": 2.888, "step": 126 }, { "epoch": 0.02473078495900921, "grad_norm": 2.125, "learning_rate": 3.997122961172849e-05, "loss": 2.8671, "step": 127 }, { "epoch": 0.02492551554923763, "grad_norm": 3.015625, "learning_rate": 3.9970564585625754e-05, "loss": 2.8693, "step": 128 }, { "epoch": 0.02512024613946605, "grad_norm": 2.65625, "learning_rate": 3.9969891966727593e-05, "loss": 2.8651, "step": 129 }, { "epoch": 0.025314976729694467, "grad_norm": 2.640625, "learning_rate": 3.996921175528976e-05, "loss": 2.8721, "step": 130 }, { "epoch": 0.025509707319922886, "grad_norm": 2.40625, "learning_rate": 3.996852395157086e-05, "loss": 2.8592, "step": 131 }, { "epoch": 0.025704437910151305, "grad_norm": 2.609375, "learning_rate": 3.9967828555832395e-05, "loss": 2.8701, "step": 132 }, { "epoch": 0.025899168500379724, "grad_norm": 2.34375, "learning_rate": 3.996712556833876e-05, "loss": 2.8901, "step": 133 }, { "epoch": 0.026093899090608143, "grad_norm": 2.5625, "learning_rate": 3.9966414989357226e-05, "loss": 2.8688, "step": 134 }, { "epoch": 0.02628862968083656, "grad_norm": 2.25, "learning_rate": 3.996569681915795e-05, "loss": 2.8651, "step": 135 }, { "epoch": 0.02648336027106498, "grad_norm": 2.46875, "learning_rate": 3.9964971058013986e-05, "loss": 2.872, "step": 136 }, { "epoch": 0.0266780908612934, "grad_norm": 2.125, "learning_rate": 3.996423770620127e-05, "loss": 2.8867, "step": 137 }, { "epoch": 0.026872821451521818, "grad_norm": 2.671875, "learning_rate": 3.996349676399862e-05, "loss": 2.9028, "step": 138 }, { "epoch": 0.027067552041750237, "grad_norm": 2.265625, "learning_rate": 3.996274823168774e-05, "loss": 2.8654, "step": 139 }, { "epoch": 0.02726228263197866, "grad_norm": 2.734375, "learning_rate": 3.996199210955322e-05, "loss": 2.8561, "step": 140 }, { "epoch": 0.027457013222207078, "grad_norm": 2.453125, "learning_rate": 3.996122839788254e-05, "loss": 2.8661, "step": 141 }, { "epoch": 0.027651743812435497, "grad_norm": 2.40625, "learning_rate": 3.996045709696606e-05, "loss": 2.8655, "step": 142 }, { "epoch": 0.027846474402663916, "grad_norm": 2.203125, "learning_rate": 3.995967820709704e-05, "loss": 2.8586, "step": 143 }, { "epoch": 0.028041204992892334, "grad_norm": 2.421875, "learning_rate": 3.9958891728571585e-05, "loss": 2.8472, "step": 144 }, { "epoch": 0.028235935583120753, "grad_norm": 2.15625, "learning_rate": 3.9958097661688746e-05, "loss": 2.8924, "step": 145 }, { "epoch": 0.028430666173349172, "grad_norm": 2.390625, "learning_rate": 3.9957296006750406e-05, "loss": 2.8624, "step": 146 }, { "epoch": 0.02862539676357759, "grad_norm": 2.09375, "learning_rate": 3.9956486764061365e-05, "loss": 2.8657, "step": 147 }, { "epoch": 0.02882012735380601, "grad_norm": 2.484375, "learning_rate": 3.9955669933929294e-05, "loss": 2.8625, "step": 148 }, { "epoch": 0.02901485794403443, "grad_norm": 2.171875, "learning_rate": 3.9954845516664736e-05, "loss": 2.8548, "step": 149 }, { "epoch": 0.029209588534262847, "grad_norm": 2.5625, "learning_rate": 3.995401351258117e-05, "loss": 2.8624, "step": 150 }, { "epoch": 0.029404319124491266, "grad_norm": 2.453125, "learning_rate": 3.995317392199488e-05, "loss": 2.8623, "step": 151 }, { "epoch": 0.029599049714719685, "grad_norm": 2.453125, "learning_rate": 3.99523267452251e-05, "loss": 2.8664, "step": 152 }, { "epoch": 0.029793780304948104, "grad_norm": 2.328125, "learning_rate": 3.9951471982593936e-05, "loss": 2.8531, "step": 153 }, { "epoch": 0.029988510895176523, "grad_norm": 2.21875, "learning_rate": 3.995060963442635e-05, "loss": 2.8745, "step": 154 }, { "epoch": 0.03018324148540494, "grad_norm": 2.09375, "learning_rate": 3.994973970105021e-05, "loss": 2.8563, "step": 155 }, { "epoch": 0.03037797207563336, "grad_norm": 1.953125, "learning_rate": 3.994886218279627e-05, "loss": 2.8517, "step": 156 }, { "epoch": 0.03057270266586178, "grad_norm": 2.15625, "learning_rate": 3.994797707999816e-05, "loss": 2.8422, "step": 157 }, { "epoch": 0.030767433256090198, "grad_norm": 1.765625, "learning_rate": 3.9947084392992386e-05, "loss": 2.8434, "step": 158 }, { "epoch": 0.030962163846318617, "grad_norm": 3.125, "learning_rate": 3.994618412211836e-05, "loss": 2.8664, "step": 159 }, { "epoch": 0.031156894436547036, "grad_norm": 2.71875, "learning_rate": 3.994527626771836e-05, "loss": 2.859, "step": 160 }, { "epoch": 0.03135162502677546, "grad_norm": 2.484375, "learning_rate": 3.9944360830137554e-05, "loss": 2.8459, "step": 161 }, { "epoch": 0.03154635561700388, "grad_norm": 2.421875, "learning_rate": 3.994343780972399e-05, "loss": 2.8534, "step": 162 }, { "epoch": 0.031741086207232296, "grad_norm": 2.046875, "learning_rate": 3.994250720682859e-05, "loss": 2.8518, "step": 163 }, { "epoch": 0.031935816797460714, "grad_norm": 1.59375, "learning_rate": 3.994156902180518e-05, "loss": 2.8317, "step": 164 }, { "epoch": 0.03213054738768913, "grad_norm": 2.171875, "learning_rate": 3.9940623255010454e-05, "loss": 2.8486, "step": 165 }, { "epoch": 0.03232527797791755, "grad_norm": 1.34375, "learning_rate": 3.993966990680399e-05, "loss": 2.8575, "step": 166 }, { "epoch": 0.03252000856814597, "grad_norm": 2.984375, "learning_rate": 3.9938708977548256e-05, "loss": 2.8517, "step": 167 }, { "epoch": 0.03271473915837439, "grad_norm": 2.421875, "learning_rate": 3.993774046760859e-05, "loss": 2.8867, "step": 168 }, { "epoch": 0.03290946974860281, "grad_norm": 2.984375, "learning_rate": 3.993676437735322e-05, "loss": 2.8349, "step": 169 }, { "epoch": 0.03310420033883123, "grad_norm": 2.875, "learning_rate": 3.993578070715326e-05, "loss": 2.8626, "step": 170 }, { "epoch": 0.033298930929059646, "grad_norm": 1.8359375, "learning_rate": 3.993478945738269e-05, "loss": 2.8534, "step": 171 }, { "epoch": 0.033493661519288065, "grad_norm": 2.046875, "learning_rate": 3.99337906284184e-05, "loss": 2.8419, "step": 172 }, { "epoch": 0.033688392109516484, "grad_norm": 1.703125, "learning_rate": 3.993278422064012e-05, "loss": 2.8403, "step": 173 }, { "epoch": 0.0338831226997449, "grad_norm": 1.8359375, "learning_rate": 3.9931770234430504e-05, "loss": 2.8521, "step": 174 }, { "epoch": 0.03407785328997332, "grad_norm": 1.3515625, "learning_rate": 3.993074867017507e-05, "loss": 2.848, "step": 175 }, { "epoch": 0.03427258388020174, "grad_norm": 2.546875, "learning_rate": 3.9929719528262205e-05, "loss": 2.8589, "step": 176 }, { "epoch": 0.03446731447043016, "grad_norm": 2.046875, "learning_rate": 3.9928682809083195e-05, "loss": 2.8713, "step": 177 }, { "epoch": 0.03466204506065858, "grad_norm": 2.375, "learning_rate": 3.99276385130322e-05, "loss": 2.8502, "step": 178 }, { "epoch": 0.034856775650887, "grad_norm": 2.390625, "learning_rate": 3.992658664050626e-05, "loss": 2.8415, "step": 179 }, { "epoch": 0.035051506241115415, "grad_norm": 1.7109375, "learning_rate": 3.9925527191905285e-05, "loss": 2.8511, "step": 180 }, { "epoch": 0.035246236831343834, "grad_norm": 2.09375, "learning_rate": 3.9924460167632094e-05, "loss": 2.8261, "step": 181 }, { "epoch": 0.03544096742157225, "grad_norm": 1.515625, "learning_rate": 3.9923385568092364e-05, "loss": 2.8649, "step": 182 }, { "epoch": 0.03563569801180067, "grad_norm": 2.75, "learning_rate": 3.992230339369465e-05, "loss": 2.8341, "step": 183 }, { "epoch": 0.03583042860202909, "grad_norm": 2.328125, "learning_rate": 3.99212136448504e-05, "loss": 2.8325, "step": 184 }, { "epoch": 0.03602515919225751, "grad_norm": 2.609375, "learning_rate": 3.9920116321973935e-05, "loss": 2.8139, "step": 185 }, { "epoch": 0.03621988978248593, "grad_norm": 2.03125, "learning_rate": 3.9919011425482456e-05, "loss": 2.8263, "step": 186 }, { "epoch": 0.03641462037271435, "grad_norm": 2.453125, "learning_rate": 3.9917898955796044e-05, "loss": 2.8474, "step": 187 }, { "epoch": 0.036609350962942766, "grad_norm": 1.5, "learning_rate": 3.991677891333765e-05, "loss": 2.8471, "step": 188 }, { "epoch": 0.036804081553171185, "grad_norm": 3.203125, "learning_rate": 3.991565129853314e-05, "loss": 2.848, "step": 189 }, { "epoch": 0.036998812143399604, "grad_norm": 2.671875, "learning_rate": 3.9914516111811204e-05, "loss": 2.8344, "step": 190 }, { "epoch": 0.03719354273362802, "grad_norm": 3.140625, "learning_rate": 3.991337335360345e-05, "loss": 2.8606, "step": 191 }, { "epoch": 0.03738827332385644, "grad_norm": 3.09375, "learning_rate": 3.9912223024344354e-05, "loss": 2.8294, "step": 192 }, { "epoch": 0.03758300391408486, "grad_norm": 2.046875, "learning_rate": 3.991106512447127e-05, "loss": 2.8365, "step": 193 }, { "epoch": 0.037777734504313286, "grad_norm": 2.25, "learning_rate": 3.990989965442444e-05, "loss": 2.8336, "step": 194 }, { "epoch": 0.037972465094541705, "grad_norm": 1.6953125, "learning_rate": 3.9908726614646955e-05, "loss": 2.8225, "step": 195 }, { "epoch": 0.038167195684770124, "grad_norm": 2.546875, "learning_rate": 3.990754600558482e-05, "loss": 2.845, "step": 196 }, { "epoch": 0.03836192627499854, "grad_norm": 1.9609375, "learning_rate": 3.9906357827686895e-05, "loss": 2.8078, "step": 197 }, { "epoch": 0.03855665686522696, "grad_norm": 2.953125, "learning_rate": 3.990516208140493e-05, "loss": 2.8399, "step": 198 }, { "epoch": 0.03875138745545538, "grad_norm": 2.765625, "learning_rate": 3.990395876719354e-05, "loss": 2.8332, "step": 199 }, { "epoch": 0.0389461180456838, "grad_norm": 2.421875, "learning_rate": 3.990274788551023e-05, "loss": 2.8359, "step": 200 }, { "epoch": 0.03914084863591222, "grad_norm": 2.390625, "learning_rate": 3.990152943681537e-05, "loss": 2.8253, "step": 201 }, { "epoch": 0.039335579226140636, "grad_norm": 2.640625, "learning_rate": 3.990030342157222e-05, "loss": 2.8394, "step": 202 }, { "epoch": 0.039530309816369055, "grad_norm": 1.8671875, "learning_rate": 3.9899069840246906e-05, "loss": 2.8302, "step": 203 }, { "epoch": 0.039725040406597474, "grad_norm": 2.546875, "learning_rate": 3.9897828693308434e-05, "loss": 2.8443, "step": 204 }, { "epoch": 0.03991977099682589, "grad_norm": 1.71875, "learning_rate": 3.989657998122869e-05, "loss": 2.8171, "step": 205 }, { "epoch": 0.04011450158705431, "grad_norm": 2.96875, "learning_rate": 3.989532370448244e-05, "loss": 2.8084, "step": 206 }, { "epoch": 0.04030923217728273, "grad_norm": 2.40625, "learning_rate": 3.98940598635473e-05, "loss": 2.8457, "step": 207 }, { "epoch": 0.04050396276751115, "grad_norm": 2.609375, "learning_rate": 3.989278845890381e-05, "loss": 2.8221, "step": 208 }, { "epoch": 0.04069869335773957, "grad_norm": 2.40625, "learning_rate": 3.9891509491035334e-05, "loss": 2.8287, "step": 209 }, { "epoch": 0.04089342394796799, "grad_norm": 2.28125, "learning_rate": 3.989022296042814e-05, "loss": 2.7986, "step": 210 }, { "epoch": 0.041088154538196406, "grad_norm": 2.03125, "learning_rate": 3.988892886757137e-05, "loss": 2.8082, "step": 211 }, { "epoch": 0.041282885128424825, "grad_norm": 2.71875, "learning_rate": 3.988762721295703e-05, "loss": 2.8256, "step": 212 }, { "epoch": 0.041477615718653243, "grad_norm": 2.265625, "learning_rate": 3.988631799708002e-05, "loss": 2.8246, "step": 213 }, { "epoch": 0.04167234630888166, "grad_norm": 2.9375, "learning_rate": 3.988500122043809e-05, "loss": 2.8291, "step": 214 }, { "epoch": 0.04186707689911008, "grad_norm": 2.8125, "learning_rate": 3.9883676883531895e-05, "loss": 2.8105, "step": 215 }, { "epoch": 0.0420618074893385, "grad_norm": 1.9375, "learning_rate": 3.988234498686493e-05, "loss": 2.7985, "step": 216 }, { "epoch": 0.04225653807956692, "grad_norm": 1.9296875, "learning_rate": 3.988100553094358e-05, "loss": 2.8031, "step": 217 }, { "epoch": 0.04245126866979534, "grad_norm": 2.203125, "learning_rate": 3.987965851627713e-05, "loss": 2.8259, "step": 218 }, { "epoch": 0.042645999260023756, "grad_norm": 1.8046875, "learning_rate": 3.987830394337768e-05, "loss": 2.7989, "step": 219 }, { "epoch": 0.042840729850252175, "grad_norm": 2.9375, "learning_rate": 3.987694181276027e-05, "loss": 2.811, "step": 220 }, { "epoch": 0.043035460440480594, "grad_norm": 2.796875, "learning_rate": 3.987557212494275e-05, "loss": 2.8235, "step": 221 }, { "epoch": 0.04323019103070901, "grad_norm": 1.609375, "learning_rate": 3.98741948804459e-05, "loss": 2.8029, "step": 222 }, { "epoch": 0.04342492162093743, "grad_norm": 1.796875, "learning_rate": 3.987281007979333e-05, "loss": 2.8179, "step": 223 }, { "epoch": 0.04361965221116585, "grad_norm": 1.8671875, "learning_rate": 3.9871417723511555e-05, "loss": 2.8174, "step": 224 }, { "epoch": 0.04381438280139427, "grad_norm": 1.2578125, "learning_rate": 3.987001781212994e-05, "loss": 2.8341, "step": 225 }, { "epoch": 0.04400911339162269, "grad_norm": 3.1875, "learning_rate": 3.9868610346180726e-05, "loss": 2.8261, "step": 226 }, { "epoch": 0.04420384398185111, "grad_norm": 2.6875, "learning_rate": 3.986719532619904e-05, "loss": 2.814, "step": 227 }, { "epoch": 0.044398574572079526, "grad_norm": 2.40625, "learning_rate": 3.9865772752722866e-05, "loss": 2.8147, "step": 228 }, { "epoch": 0.044593305162307945, "grad_norm": 2.375, "learning_rate": 3.986434262629307e-05, "loss": 2.8104, "step": 229 }, { "epoch": 0.04478803575253636, "grad_norm": 1.96875, "learning_rate": 3.986290494745338e-05, "loss": 2.8118, "step": 230 }, { "epoch": 0.04498276634276478, "grad_norm": 1.8046875, "learning_rate": 3.98614597167504e-05, "loss": 2.8133, "step": 231 }, { "epoch": 0.0451774969329932, "grad_norm": 2.546875, "learning_rate": 3.9860006934733605e-05, "loss": 2.8165, "step": 232 }, { "epoch": 0.04537222752322162, "grad_norm": 2.265625, "learning_rate": 3.985854660195535e-05, "loss": 2.8021, "step": 233 }, { "epoch": 0.04556695811345004, "grad_norm": 2.53125, "learning_rate": 3.985707871897085e-05, "loss": 2.8255, "step": 234 }, { "epoch": 0.04576168870367846, "grad_norm": 2.5625, "learning_rate": 3.985560328633819e-05, "loss": 2.8181, "step": 235 }, { "epoch": 0.04595641929390688, "grad_norm": 1.8671875, "learning_rate": 3.9854120304618327e-05, "loss": 2.8204, "step": 236 }, { "epoch": 0.0461511498841353, "grad_norm": 2.0625, "learning_rate": 3.985262977437509e-05, "loss": 2.7901, "step": 237 }, { "epoch": 0.04634588047436372, "grad_norm": 1.859375, "learning_rate": 3.985113169617518e-05, "loss": 2.7858, "step": 238 }, { "epoch": 0.04654061106459214, "grad_norm": 1.6015625, "learning_rate": 3.9849626070588165e-05, "loss": 2.8177, "step": 239 }, { "epoch": 0.04673534165482056, "grad_norm": 2.8125, "learning_rate": 3.984811289818649e-05, "loss": 2.813, "step": 240 }, { "epoch": 0.04693007224504898, "grad_norm": 2.46875, "learning_rate": 3.984659217954545e-05, "loss": 2.7943, "step": 241 }, { "epoch": 0.047124802835277396, "grad_norm": 2.234375, "learning_rate": 3.9845063915243224e-05, "loss": 2.8103, "step": 242 }, { "epoch": 0.047319533425505815, "grad_norm": 2.234375, "learning_rate": 3.984352810586086e-05, "loss": 2.8177, "step": 243 }, { "epoch": 0.047514264015734234, "grad_norm": 2.0625, "learning_rate": 3.984198475198227e-05, "loss": 2.7931, "step": 244 }, { "epoch": 0.04770899460596265, "grad_norm": 1.7578125, "learning_rate": 3.9840433854194234e-05, "loss": 2.8071, "step": 245 }, { "epoch": 0.04790372519619107, "grad_norm": 2.328125, "learning_rate": 3.983887541308641e-05, "loss": 2.8016, "step": 246 }, { "epoch": 0.04809845578641949, "grad_norm": 1.96875, "learning_rate": 3.983730942925131e-05, "loss": 2.81, "step": 247 }, { "epoch": 0.04829318637664791, "grad_norm": 2.453125, "learning_rate": 3.983573590328432e-05, "loss": 2.807, "step": 248 }, { "epoch": 0.04848791696687633, "grad_norm": 2.125, "learning_rate": 3.98341548357837e-05, "loss": 2.8085, "step": 249 }, { "epoch": 0.04868264755710475, "grad_norm": 2.25, "learning_rate": 3.9832566227350564e-05, "loss": 2.788, "step": 250 }, { "epoch": 0.048877378147333166, "grad_norm": 2.015625, "learning_rate": 3.98309700785889e-05, "loss": 2.786, "step": 251 }, { "epoch": 0.049072108737561584, "grad_norm": 2.390625, "learning_rate": 3.9829366390105566e-05, "loss": 2.808, "step": 252 }, { "epoch": 0.04926683932779, "grad_norm": 2.0, "learning_rate": 3.982775516251028e-05, "loss": 2.807, "step": 253 }, { "epoch": 0.04946156991801842, "grad_norm": 2.171875, "learning_rate": 3.982613639641564e-05, "loss": 2.7798, "step": 254 }, { "epoch": 0.04965630050824684, "grad_norm": 1.921875, "learning_rate": 3.982451009243708e-05, "loss": 2.8126, "step": 255 }, { "epoch": 0.04985103109847526, "grad_norm": 2.25, "learning_rate": 3.982287625119293e-05, "loss": 2.8028, "step": 256 }, { "epoch": 0.05004576168870368, "grad_norm": 1.8125, "learning_rate": 3.982123487330438e-05, "loss": 2.8078, "step": 257 }, { "epoch": 0.0502404922789321, "grad_norm": 1.9296875, "learning_rate": 3.9819585959395486e-05, "loss": 2.7725, "step": 258 }, { "epoch": 0.050435222869160516, "grad_norm": 1.578125, "learning_rate": 3.9817929510093144e-05, "loss": 2.8028, "step": 259 }, { "epoch": 0.050629953459388935, "grad_norm": 2.140625, "learning_rate": 3.981626552602715e-05, "loss": 2.7998, "step": 260 }, { "epoch": 0.050824684049617354, "grad_norm": 1.6953125, "learning_rate": 3.981459400783014e-05, "loss": 2.7972, "step": 261 }, { "epoch": 0.05101941463984577, "grad_norm": 2.796875, "learning_rate": 3.981291495613764e-05, "loss": 2.809, "step": 262 }, { "epoch": 0.05121414523007419, "grad_norm": 2.71875, "learning_rate": 3.981122837158801e-05, "loss": 2.7838, "step": 263 }, { "epoch": 0.05140887582030261, "grad_norm": 1.4765625, "learning_rate": 3.980953425482248e-05, "loss": 2.7956, "step": 264 }, { "epoch": 0.05160360641053103, "grad_norm": 1.8125, "learning_rate": 3.980783260648518e-05, "loss": 2.7875, "step": 265 }, { "epoch": 0.05179833700075945, "grad_norm": 1.6484375, "learning_rate": 3.980612342722306e-05, "loss": 2.775, "step": 266 }, { "epoch": 0.05199306759098787, "grad_norm": 1.421875, "learning_rate": 3.980440671768594e-05, "loss": 2.7914, "step": 267 }, { "epoch": 0.052187798181216286, "grad_norm": 2.546875, "learning_rate": 3.980268247852653e-05, "loss": 2.7803, "step": 268 }, { "epoch": 0.052382528771444704, "grad_norm": 2.015625, "learning_rate": 3.980095071040037e-05, "loss": 2.7797, "step": 269 }, { "epoch": 0.05257725936167312, "grad_norm": 2.796875, "learning_rate": 3.979921141396588e-05, "loss": 2.7891, "step": 270 }, { "epoch": 0.05277198995190154, "grad_norm": 2.609375, "learning_rate": 3.9797464589884344e-05, "loss": 2.7975, "step": 271 }, { "epoch": 0.05296672054212996, "grad_norm": 1.6796875, "learning_rate": 3.97957102388199e-05, "loss": 2.7746, "step": 272 }, { "epoch": 0.05316145113235838, "grad_norm": 1.6484375, "learning_rate": 3.979394836143955e-05, "loss": 2.7739, "step": 273 }, { "epoch": 0.0533561817225868, "grad_norm": 1.4609375, "learning_rate": 3.979217895841317e-05, "loss": 2.79, "step": 274 }, { "epoch": 0.05355091231281522, "grad_norm": 1.3515625, "learning_rate": 3.979040203041347e-05, "loss": 2.7767, "step": 275 }, { "epoch": 0.053745642903043636, "grad_norm": 1.7578125, "learning_rate": 3.978861757811604e-05, "loss": 2.7691, "step": 276 }, { "epoch": 0.053940373493272055, "grad_norm": 1.25, "learning_rate": 3.978682560219934e-05, "loss": 2.7946, "step": 277 }, { "epoch": 0.054135104083500474, "grad_norm": 2.171875, "learning_rate": 3.9785026103344664e-05, "loss": 2.7638, "step": 278 }, { "epoch": 0.0543298346737289, "grad_norm": 1.546875, "learning_rate": 3.978321908223619e-05, "loss": 2.7949, "step": 279 }, { "epoch": 0.05452456526395732, "grad_norm": 3.4375, "learning_rate": 3.978140453956093e-05, "loss": 2.8131, "step": 280 }, { "epoch": 0.05471929585418574, "grad_norm": 3.328125, "learning_rate": 3.977958247600879e-05, "loss": 2.7778, "step": 281 }, { "epoch": 0.054914026444414156, "grad_norm": 1.4609375, "learning_rate": 3.9777752892272515e-05, "loss": 2.7709, "step": 282 }, { "epoch": 0.055108757034642575, "grad_norm": 2.8125, "learning_rate": 3.97759157890477e-05, "loss": 2.7963, "step": 283 }, { "epoch": 0.055303487624870994, "grad_norm": 2.234375, "learning_rate": 3.977407116703283e-05, "loss": 2.7975, "step": 284 }, { "epoch": 0.05549821821509941, "grad_norm": 2.3125, "learning_rate": 3.977221902692921e-05, "loss": 2.7718, "step": 285 }, { "epoch": 0.05569294880532783, "grad_norm": 2.390625, "learning_rate": 3.977035936944102e-05, "loss": 2.7864, "step": 286 }, { "epoch": 0.05588767939555625, "grad_norm": 1.84375, "learning_rate": 3.976849219527532e-05, "loss": 2.7984, "step": 287 }, { "epoch": 0.05608240998578467, "grad_norm": 2.09375, "learning_rate": 3.9766617505141996e-05, "loss": 2.7621, "step": 288 }, { "epoch": 0.05627714057601309, "grad_norm": 1.609375, "learning_rate": 3.9764735299753804e-05, "loss": 2.78, "step": 289 }, { "epoch": 0.05647187116624151, "grad_norm": 2.296875, "learning_rate": 3.976284557982636e-05, "loss": 2.7936, "step": 290 }, { "epoch": 0.056666601756469925, "grad_norm": 1.796875, "learning_rate": 3.976094834607814e-05, "loss": 2.7811, "step": 291 }, { "epoch": 0.056861332346698344, "grad_norm": 2.703125, "learning_rate": 3.9759043599230464e-05, "loss": 2.7803, "step": 292 }, { "epoch": 0.05705606293692676, "grad_norm": 2.140625, "learning_rate": 3.975713134000751e-05, "loss": 2.7814, "step": 293 }, { "epoch": 0.05725079352715518, "grad_norm": 2.453125, "learning_rate": 3.975521156913633e-05, "loss": 2.7846, "step": 294 }, { "epoch": 0.0574455241173836, "grad_norm": 1.8046875, "learning_rate": 3.975328428734681e-05, "loss": 2.748, "step": 295 }, { "epoch": 0.05764025470761202, "grad_norm": 2.59375, "learning_rate": 3.97513494953717e-05, "loss": 2.7715, "step": 296 }, { "epoch": 0.05783498529784044, "grad_norm": 1.8984375, "learning_rate": 3.974940719394662e-05, "loss": 2.7667, "step": 297 }, { "epoch": 0.05802971588806886, "grad_norm": 3.078125, "learning_rate": 3.9747457383810016e-05, "loss": 2.7667, "step": 298 }, { "epoch": 0.058224446478297276, "grad_norm": 2.78125, "learning_rate": 3.9745500065703215e-05, "loss": 2.7784, "step": 299 }, { "epoch": 0.058419177068525695, "grad_norm": 2.28125, "learning_rate": 3.9743535240370385e-05, "loss": 2.7495, "step": 300 }, { "epoch": 0.058613907658754114, "grad_norm": 2.25, "learning_rate": 3.974156290855855e-05, "loss": 2.7728, "step": 301 }, { "epoch": 0.05880863824898253, "grad_norm": 2.453125, "learning_rate": 3.973958307101759e-05, "loss": 2.7698, "step": 302 }, { "epoch": 0.05900336883921095, "grad_norm": 2.15625, "learning_rate": 3.973759572850024e-05, "loss": 2.7699, "step": 303 }, { "epoch": 0.05919809942943937, "grad_norm": 2.53125, "learning_rate": 3.973560088176209e-05, "loss": 2.7987, "step": 304 }, { "epoch": 0.05939283001966779, "grad_norm": 2.3125, "learning_rate": 3.973359853156156e-05, "loss": 2.7683, "step": 305 }, { "epoch": 0.05958756060989621, "grad_norm": 2.15625, "learning_rate": 3.9731588678659966e-05, "loss": 2.746, "step": 306 }, { "epoch": 0.059782291200124626, "grad_norm": 1.9921875, "learning_rate": 3.972957132382144e-05, "loss": 2.7646, "step": 307 }, { "epoch": 0.059977021790353045, "grad_norm": 2.546875, "learning_rate": 3.9727546467812984e-05, "loss": 2.7698, "step": 308 }, { "epoch": 0.060171752380581464, "grad_norm": 2.328125, "learning_rate": 3.972551411140444e-05, "loss": 2.7469, "step": 309 }, { "epoch": 0.06036648297080988, "grad_norm": 2.453125, "learning_rate": 3.9723474255368516e-05, "loss": 2.7641, "step": 310 }, { "epoch": 0.0605612135610383, "grad_norm": 2.234375, "learning_rate": 3.972142690048077e-05, "loss": 2.7547, "step": 311 }, { "epoch": 0.06075594415126672, "grad_norm": 2.171875, "learning_rate": 3.971937204751958e-05, "loss": 2.7617, "step": 312 }, { "epoch": 0.06095067474149514, "grad_norm": 2.046875, "learning_rate": 3.9717309697266224e-05, "loss": 2.7612, "step": 313 }, { "epoch": 0.06114540533172356, "grad_norm": 2.359375, "learning_rate": 3.9715239850504795e-05, "loss": 2.7504, "step": 314 }, { "epoch": 0.06134013592195198, "grad_norm": 2.15625, "learning_rate": 3.9713162508022254e-05, "loss": 2.7592, "step": 315 }, { "epoch": 0.061534866512180396, "grad_norm": 2.15625, "learning_rate": 3.97110776706084e-05, "loss": 2.763, "step": 316 }, { "epoch": 0.061729597102408815, "grad_norm": 2.0625, "learning_rate": 3.970898533905589e-05, "loss": 2.7518, "step": 317 }, { "epoch": 0.061924327692637234, "grad_norm": 2.203125, "learning_rate": 3.9706885514160224e-05, "loss": 2.745, "step": 318 }, { "epoch": 0.06211905828286565, "grad_norm": 1.9609375, "learning_rate": 3.9704778196719756e-05, "loss": 2.7815, "step": 319 }, { "epoch": 0.06231378887309407, "grad_norm": 2.3125, "learning_rate": 3.970266338753569e-05, "loss": 2.7587, "step": 320 }, { "epoch": 0.06250851946332249, "grad_norm": 1.9921875, "learning_rate": 3.970054108741206e-05, "loss": 2.7711, "step": 321 }, { "epoch": 0.06270325005355092, "grad_norm": 2.203125, "learning_rate": 3.969841129715579e-05, "loss": 2.7447, "step": 322 }, { "epoch": 0.06289798064377933, "grad_norm": 2.015625, "learning_rate": 3.9696274017576595e-05, "loss": 2.7521, "step": 323 }, { "epoch": 0.06309271123400775, "grad_norm": 2.28125, "learning_rate": 3.969412924948709e-05, "loss": 2.7688, "step": 324 }, { "epoch": 0.06328744182423617, "grad_norm": 2.25, "learning_rate": 3.9691976993702706e-05, "loss": 2.7484, "step": 325 }, { "epoch": 0.06348217241446459, "grad_norm": 1.984375, "learning_rate": 3.968981725104172e-05, "loss": 2.7565, "step": 326 }, { "epoch": 0.063676903004693, "grad_norm": 1.875, "learning_rate": 3.968765002232529e-05, "loss": 2.7532, "step": 327 }, { "epoch": 0.06387163359492143, "grad_norm": 2.234375, "learning_rate": 3.9685475308377364e-05, "loss": 2.7582, "step": 328 }, { "epoch": 0.06406636418514984, "grad_norm": 1.984375, "learning_rate": 3.968329311002479e-05, "loss": 2.7563, "step": 329 }, { "epoch": 0.06426109477537827, "grad_norm": 2.265625, "learning_rate": 3.9681103428097225e-05, "loss": 2.7577, "step": 330 }, { "epoch": 0.06445582536560668, "grad_norm": 2.234375, "learning_rate": 3.967890626342719e-05, "loss": 2.7584, "step": 331 }, { "epoch": 0.0646505559558351, "grad_norm": 2.03125, "learning_rate": 3.9676701616850045e-05, "loss": 2.754, "step": 332 }, { "epoch": 0.06484528654606352, "grad_norm": 1.90625, "learning_rate": 3.9674489489204e-05, "loss": 2.7401, "step": 333 }, { "epoch": 0.06504001713629194, "grad_norm": 2.078125, "learning_rate": 3.9672269881330095e-05, "loss": 2.76, "step": 334 }, { "epoch": 0.06523474772652035, "grad_norm": 1.859375, "learning_rate": 3.967004279407223e-05, "loss": 2.7586, "step": 335 }, { "epoch": 0.06542947831674878, "grad_norm": 2.265625, "learning_rate": 3.966780822827714e-05, "loss": 2.7424, "step": 336 }, { "epoch": 0.06562420890697719, "grad_norm": 2.109375, "learning_rate": 3.966556618479441e-05, "loss": 2.7741, "step": 337 }, { "epoch": 0.06581893949720562, "grad_norm": 2.109375, "learning_rate": 3.966331666447645e-05, "loss": 2.7579, "step": 338 }, { "epoch": 0.06601367008743403, "grad_norm": 1.9609375, "learning_rate": 3.9661059668178546e-05, "loss": 2.7644, "step": 339 }, { "epoch": 0.06620840067766245, "grad_norm": 2.0625, "learning_rate": 3.965879519675879e-05, "loss": 2.7143, "step": 340 }, { "epoch": 0.06640313126789087, "grad_norm": 1.828125, "learning_rate": 3.9656523251078135e-05, "loss": 2.7482, "step": 341 }, { "epoch": 0.06659786185811929, "grad_norm": 2.125, "learning_rate": 3.9654243832000384e-05, "loss": 2.7462, "step": 342 }, { "epoch": 0.0667925924483477, "grad_norm": 1.9453125, "learning_rate": 3.965195694039216e-05, "loss": 2.7609, "step": 343 }, { "epoch": 0.06698732303857613, "grad_norm": 2.171875, "learning_rate": 3.964966257712294e-05, "loss": 2.7451, "step": 344 }, { "epoch": 0.06718205362880456, "grad_norm": 1.96875, "learning_rate": 3.964736074306504e-05, "loss": 2.7562, "step": 345 }, { "epoch": 0.06737678421903297, "grad_norm": 2.21875, "learning_rate": 3.9645051439093616e-05, "loss": 2.739, "step": 346 }, { "epoch": 0.0675715148092614, "grad_norm": 1.984375, "learning_rate": 3.9642734666086664e-05, "loss": 2.7597, "step": 347 }, { "epoch": 0.0677662453994898, "grad_norm": 2.109375, "learning_rate": 3.964041042492502e-05, "loss": 2.7549, "step": 348 }, { "epoch": 0.06796097598971823, "grad_norm": 1.8671875, "learning_rate": 3.963807871649236e-05, "loss": 2.7629, "step": 349 }, { "epoch": 0.06815570657994664, "grad_norm": 2.21875, "learning_rate": 3.9635739541675195e-05, "loss": 2.7625, "step": 350 }, { "epoch": 0.06835043717017507, "grad_norm": 2.0, "learning_rate": 3.9633392901362884e-05, "loss": 2.761, "step": 351 }, { "epoch": 0.06854516776040348, "grad_norm": 2.34375, "learning_rate": 3.96310387964476e-05, "loss": 2.7469, "step": 352 }, { "epoch": 0.0687398983506319, "grad_norm": 2.1875, "learning_rate": 3.96286772278244e-05, "loss": 2.7396, "step": 353 }, { "epoch": 0.06893462894086032, "grad_norm": 1.9609375, "learning_rate": 3.962630819639112e-05, "loss": 2.735, "step": 354 }, { "epoch": 0.06912935953108874, "grad_norm": 1.7890625, "learning_rate": 3.9623931703048497e-05, "loss": 2.761, "step": 355 }, { "epoch": 0.06932409012131716, "grad_norm": 2.34375, "learning_rate": 3.962154774870005e-05, "loss": 2.7588, "step": 356 }, { "epoch": 0.06951882071154558, "grad_norm": 2.015625, "learning_rate": 3.961915633425216e-05, "loss": 2.7265, "step": 357 }, { "epoch": 0.069713551301774, "grad_norm": 2.46875, "learning_rate": 3.961675746061405e-05, "loss": 2.7473, "step": 358 }, { "epoch": 0.06990828189200242, "grad_norm": 2.21875, "learning_rate": 3.961435112869775e-05, "loss": 2.7514, "step": 359 }, { "epoch": 0.07010301248223083, "grad_norm": 2.265625, "learning_rate": 3.9611937339418164e-05, "loss": 2.7634, "step": 360 }, { "epoch": 0.07029774307245926, "grad_norm": 2.015625, "learning_rate": 3.9609516093693016e-05, "loss": 2.7656, "step": 361 }, { "epoch": 0.07049247366268767, "grad_norm": 2.328125, "learning_rate": 3.960708739244285e-05, "loss": 2.766, "step": 362 }, { "epoch": 0.0706872042529161, "grad_norm": 2.015625, "learning_rate": 3.9604651236591064e-05, "loss": 2.7408, "step": 363 }, { "epoch": 0.0708819348431445, "grad_norm": 2.5625, "learning_rate": 3.9602207627063875e-05, "loss": 2.7465, "step": 364 }, { "epoch": 0.07107666543337293, "grad_norm": 2.25, "learning_rate": 3.959975656479036e-05, "loss": 2.7491, "step": 365 }, { "epoch": 0.07127139602360134, "grad_norm": 2.4375, "learning_rate": 3.9597298050702385e-05, "loss": 2.7458, "step": 366 }, { "epoch": 0.07146612661382977, "grad_norm": 2.234375, "learning_rate": 3.959483208573469e-05, "loss": 2.7252, "step": 367 }, { "epoch": 0.07166085720405818, "grad_norm": 2.421875, "learning_rate": 3.9592358670824835e-05, "loss": 2.7255, "step": 368 }, { "epoch": 0.07185558779428661, "grad_norm": 2.203125, "learning_rate": 3.958987780691321e-05, "loss": 2.7454, "step": 369 }, { "epoch": 0.07205031838451502, "grad_norm": 2.578125, "learning_rate": 3.958738949494303e-05, "loss": 2.7425, "step": 370 }, { "epoch": 0.07224504897474344, "grad_norm": 2.421875, "learning_rate": 3.9584893735860356e-05, "loss": 2.7341, "step": 371 }, { "epoch": 0.07243977956497186, "grad_norm": 2.203125, "learning_rate": 3.958239053061407e-05, "loss": 2.7321, "step": 372 }, { "epoch": 0.07263451015520028, "grad_norm": 2.015625, "learning_rate": 3.95798798801559e-05, "loss": 2.7465, "step": 373 }, { "epoch": 0.0728292407454287, "grad_norm": 2.3125, "learning_rate": 3.9577361785440375e-05, "loss": 2.75, "step": 374 }, { "epoch": 0.07302397133565712, "grad_norm": 2.03125, "learning_rate": 3.957483624742489e-05, "loss": 2.7682, "step": 375 }, { "epoch": 0.07321870192588553, "grad_norm": 2.5, "learning_rate": 3.9572303267069646e-05, "loss": 2.7491, "step": 376 }, { "epoch": 0.07341343251611396, "grad_norm": 2.453125, "learning_rate": 3.956976284533767e-05, "loss": 2.7305, "step": 377 }, { "epoch": 0.07360816310634237, "grad_norm": 1.890625, "learning_rate": 3.956721498319486e-05, "loss": 2.7461, "step": 378 }, { "epoch": 0.0738028936965708, "grad_norm": 1.890625, "learning_rate": 3.956465968160988e-05, "loss": 2.7662, "step": 379 }, { "epoch": 0.07399762428679921, "grad_norm": 1.921875, "learning_rate": 3.956209694155426e-05, "loss": 2.7107, "step": 380 }, { "epoch": 0.07419235487702763, "grad_norm": 1.7578125, "learning_rate": 3.9559526764002364e-05, "loss": 2.7418, "step": 381 }, { "epoch": 0.07438708546725604, "grad_norm": 2.4375, "learning_rate": 3.955694914993136e-05, "loss": 2.7114, "step": 382 }, { "epoch": 0.07458181605748447, "grad_norm": 2.171875, "learning_rate": 3.955436410032126e-05, "loss": 2.7266, "step": 383 }, { "epoch": 0.07477654664771288, "grad_norm": 2.0, "learning_rate": 3.9551771616154904e-05, "loss": 2.7181, "step": 384 }, { "epoch": 0.07497127723794131, "grad_norm": 2.015625, "learning_rate": 3.954917169841794e-05, "loss": 2.7378, "step": 385 }, { "epoch": 0.07516600782816972, "grad_norm": 1.8671875, "learning_rate": 3.954656434809886e-05, "loss": 2.7383, "step": 386 }, { "epoch": 0.07536073841839815, "grad_norm": 1.6328125, "learning_rate": 3.954394956618899e-05, "loss": 2.7201, "step": 387 }, { "epoch": 0.07555546900862657, "grad_norm": 2.328125, "learning_rate": 3.954132735368244e-05, "loss": 2.7214, "step": 388 }, { "epoch": 0.07575019959885498, "grad_norm": 2.15625, "learning_rate": 3.9538697711576204e-05, "loss": 2.7281, "step": 389 }, { "epoch": 0.07594493018908341, "grad_norm": 2.109375, "learning_rate": 3.9536060640870054e-05, "loss": 2.7301, "step": 390 }, { "epoch": 0.07613966077931182, "grad_norm": 2.078125, "learning_rate": 3.95334161425666e-05, "loss": 2.7327, "step": 391 }, { "epoch": 0.07633439136954025, "grad_norm": 1.9453125, "learning_rate": 3.953076421767129e-05, "loss": 2.7518, "step": 392 }, { "epoch": 0.07652912195976866, "grad_norm": 1.7421875, "learning_rate": 3.9528104867192375e-05, "loss": 2.7392, "step": 393 }, { "epoch": 0.07672385254999708, "grad_norm": 2.09375, "learning_rate": 3.952543809214094e-05, "loss": 2.738, "step": 394 }, { "epoch": 0.0769185831402255, "grad_norm": 1.7578125, "learning_rate": 3.952276389353089e-05, "loss": 2.7102, "step": 395 }, { "epoch": 0.07711331373045392, "grad_norm": 2.3125, "learning_rate": 3.952008227237896e-05, "loss": 2.7204, "step": 396 }, { "epoch": 0.07730804432068233, "grad_norm": 2.21875, "learning_rate": 3.951739322970469e-05, "loss": 2.7353, "step": 397 }, { "epoch": 0.07750277491091076, "grad_norm": 1.8046875, "learning_rate": 3.9514696766530474e-05, "loss": 2.721, "step": 398 }, { "epoch": 0.07769750550113917, "grad_norm": 1.5546875, "learning_rate": 3.951199288388147e-05, "loss": 2.7347, "step": 399 }, { "epoch": 0.0778922360913676, "grad_norm": 2.28125, "learning_rate": 3.9509281582785725e-05, "loss": 2.7236, "step": 400 }, { "epoch": 0.07808696668159601, "grad_norm": 2.046875, "learning_rate": 3.950656286427406e-05, "loss": 2.7279, "step": 401 }, { "epoch": 0.07828169727182444, "grad_norm": 2.109375, "learning_rate": 3.950383672938013e-05, "loss": 2.7401, "step": 402 }, { "epoch": 0.07847642786205285, "grad_norm": 1.96875, "learning_rate": 3.950110317914041e-05, "loss": 2.7379, "step": 403 }, { "epoch": 0.07867115845228127, "grad_norm": 2.015625, "learning_rate": 3.949836221459419e-05, "loss": 2.7411, "step": 404 }, { "epoch": 0.07886588904250968, "grad_norm": 1.90625, "learning_rate": 3.9495613836783595e-05, "loss": 2.7416, "step": 405 }, { "epoch": 0.07906061963273811, "grad_norm": 2.046875, "learning_rate": 3.949285804675354e-05, "loss": 2.7151, "step": 406 }, { "epoch": 0.07925535022296652, "grad_norm": 1.96875, "learning_rate": 3.94900948455518e-05, "loss": 2.732, "step": 407 }, { "epoch": 0.07945008081319495, "grad_norm": 2.0, "learning_rate": 3.948732423422891e-05, "loss": 2.7233, "step": 408 }, { "epoch": 0.07964481140342336, "grad_norm": 1.765625, "learning_rate": 3.9484546213838276e-05, "loss": 2.7343, "step": 409 }, { "epoch": 0.07983954199365179, "grad_norm": 2.203125, "learning_rate": 3.94817607854361e-05, "loss": 2.7015, "step": 410 }, { "epoch": 0.0800342725838802, "grad_norm": 1.984375, "learning_rate": 3.9478967950081386e-05, "loss": 2.7251, "step": 411 }, { "epoch": 0.08022900317410862, "grad_norm": 1.984375, "learning_rate": 3.947616770883598e-05, "loss": 2.7163, "step": 412 }, { "epoch": 0.08042373376433704, "grad_norm": 2.03125, "learning_rate": 3.947336006276452e-05, "loss": 2.7153, "step": 413 }, { "epoch": 0.08061846435456546, "grad_norm": 1.8125, "learning_rate": 3.9470545012934495e-05, "loss": 2.7189, "step": 414 }, { "epoch": 0.08081319494479387, "grad_norm": 1.53125, "learning_rate": 3.9467722560416156e-05, "loss": 2.7194, "step": 415 }, { "epoch": 0.0810079255350223, "grad_norm": 2.3125, "learning_rate": 3.946489270628262e-05, "loss": 2.7202, "step": 416 }, { "epoch": 0.08120265612525071, "grad_norm": 1.953125, "learning_rate": 3.946205545160978e-05, "loss": 2.7067, "step": 417 }, { "epoch": 0.08139738671547914, "grad_norm": 2.265625, "learning_rate": 3.945921079747637e-05, "loss": 2.7156, "step": 418 }, { "epoch": 0.08159211730570755, "grad_norm": 2.28125, "learning_rate": 3.9456358744963914e-05, "loss": 2.7257, "step": 419 }, { "epoch": 0.08178684789593597, "grad_norm": 1.5078125, "learning_rate": 3.945349929515678e-05, "loss": 2.7084, "step": 420 }, { "epoch": 0.08198157848616439, "grad_norm": 1.3984375, "learning_rate": 3.9450632449142106e-05, "loss": 2.7142, "step": 421 }, { "epoch": 0.08217630907639281, "grad_norm": 2.0625, "learning_rate": 3.9447758208009876e-05, "loss": 2.7337, "step": 422 }, { "epoch": 0.08237103966662122, "grad_norm": 1.65625, "learning_rate": 3.944487657285287e-05, "loss": 2.7216, "step": 423 }, { "epoch": 0.08256577025684965, "grad_norm": 2.78125, "learning_rate": 3.944198754476669e-05, "loss": 2.7214, "step": 424 }, { "epoch": 0.08276050084707806, "grad_norm": 2.703125, "learning_rate": 3.9439091124849745e-05, "loss": 2.72, "step": 425 }, { "epoch": 0.08295523143730649, "grad_norm": 1.265625, "learning_rate": 3.943618731420324e-05, "loss": 2.714, "step": 426 }, { "epoch": 0.0831499620275349, "grad_norm": 1.484375, "learning_rate": 3.943327611393121e-05, "loss": 2.7319, "step": 427 }, { "epoch": 0.08334469261776332, "grad_norm": 1.734375, "learning_rate": 3.9430357525140495e-05, "loss": 2.7207, "step": 428 }, { "epoch": 0.08353942320799174, "grad_norm": 1.1484375, "learning_rate": 3.9427431548940734e-05, "loss": 2.7368, "step": 429 }, { "epoch": 0.08373415379822016, "grad_norm": 2.96875, "learning_rate": 3.9424498186444376e-05, "loss": 2.7301, "step": 430 }, { "epoch": 0.08392888438844859, "grad_norm": 2.578125, "learning_rate": 3.9421557438766697e-05, "loss": 2.7317, "step": 431 }, { "epoch": 0.084123614978677, "grad_norm": 2.046875, "learning_rate": 3.941860930702576e-05, "loss": 2.722, "step": 432 }, { "epoch": 0.08431834556890543, "grad_norm": 2.265625, "learning_rate": 3.941565379234244e-05, "loss": 2.7366, "step": 433 }, { "epoch": 0.08451307615913384, "grad_norm": 1.2421875, "learning_rate": 3.941269089584043e-05, "loss": 2.7072, "step": 434 }, { "epoch": 0.08470780674936226, "grad_norm": 1.5, "learning_rate": 3.9409720618646205e-05, "loss": 2.7394, "step": 435 }, { "epoch": 0.08490253733959068, "grad_norm": 1.6328125, "learning_rate": 3.940674296188909e-05, "loss": 2.7141, "step": 436 }, { "epoch": 0.0850972679298191, "grad_norm": 1.09375, "learning_rate": 3.940375792670116e-05, "loss": 2.7222, "step": 437 }, { "epoch": 0.08529199852004751, "grad_norm": 2.984375, "learning_rate": 3.940076551421734e-05, "loss": 2.7358, "step": 438 }, { "epoch": 0.08548672911027594, "grad_norm": 2.578125, "learning_rate": 3.9397765725575336e-05, "loss": 2.6998, "step": 439 }, { "epoch": 0.08568145970050435, "grad_norm": 2.140625, "learning_rate": 3.939475856191567e-05, "loss": 2.7203, "step": 440 }, { "epoch": 0.08587619029073278, "grad_norm": 2.15625, "learning_rate": 3.9391744024381665e-05, "loss": 2.7161, "step": 441 }, { "epoch": 0.08607092088096119, "grad_norm": 1.578125, "learning_rate": 3.938872211411944e-05, "loss": 2.7172, "step": 442 }, { "epoch": 0.08626565147118961, "grad_norm": 1.453125, "learning_rate": 3.938569283227793e-05, "loss": 2.7161, "step": 443 }, { "epoch": 0.08646038206141803, "grad_norm": 2.125, "learning_rate": 3.938265618000885e-05, "loss": 2.7215, "step": 444 }, { "epoch": 0.08665511265164645, "grad_norm": 1.7421875, "learning_rate": 3.937961215846676e-05, "loss": 2.704, "step": 445 }, { "epoch": 0.08684984324187486, "grad_norm": 2.34375, "learning_rate": 3.937656076880897e-05, "loss": 2.7228, "step": 446 }, { "epoch": 0.08704457383210329, "grad_norm": 2.140625, "learning_rate": 3.937350201219563e-05, "loss": 2.7281, "step": 447 }, { "epoch": 0.0872393044223317, "grad_norm": 1.7421875, "learning_rate": 3.9370435889789674e-05, "loss": 2.7229, "step": 448 }, { "epoch": 0.08743403501256013, "grad_norm": 1.6796875, "learning_rate": 3.9367362402756836e-05, "loss": 2.7229, "step": 449 }, { "epoch": 0.08762876560278854, "grad_norm": 1.8359375, "learning_rate": 3.9364281552265664e-05, "loss": 2.7102, "step": 450 }, { "epoch": 0.08782349619301696, "grad_norm": 1.6015625, "learning_rate": 3.9361193339487484e-05, "loss": 2.7025, "step": 451 }, { "epoch": 0.08801822678324538, "grad_norm": 2.125, "learning_rate": 3.9358097765596445e-05, "loss": 2.7212, "step": 452 }, { "epoch": 0.0882129573734738, "grad_norm": 1.796875, "learning_rate": 3.935499483176947e-05, "loss": 2.7282, "step": 453 }, { "epoch": 0.08840768796370221, "grad_norm": 2.078125, "learning_rate": 3.935188453918629e-05, "loss": 2.7163, "step": 454 }, { "epoch": 0.08860241855393064, "grad_norm": 1.8984375, "learning_rate": 3.934876688902945e-05, "loss": 2.7122, "step": 455 }, { "epoch": 0.08879714914415905, "grad_norm": 1.84375, "learning_rate": 3.934564188248427e-05, "loss": 2.7304, "step": 456 }, { "epoch": 0.08899187973438748, "grad_norm": 1.671875, "learning_rate": 3.934250952073887e-05, "loss": 2.7093, "step": 457 }, { "epoch": 0.08918661032461589, "grad_norm": 2.0, "learning_rate": 3.933936980498419e-05, "loss": 2.6912, "step": 458 }, { "epoch": 0.08938134091484431, "grad_norm": 1.6875, "learning_rate": 3.933622273641393e-05, "loss": 2.6993, "step": 459 }, { "epoch": 0.08957607150507273, "grad_norm": 2.09375, "learning_rate": 3.933306831622461e-05, "loss": 2.7167, "step": 460 }, { "epoch": 0.08977080209530115, "grad_norm": 1.8671875, "learning_rate": 3.932990654561554e-05, "loss": 2.7129, "step": 461 }, { "epoch": 0.08996553268552956, "grad_norm": 1.8046875, "learning_rate": 3.9326737425788824e-05, "loss": 2.6996, "step": 462 }, { "epoch": 0.09016026327575799, "grad_norm": 1.6953125, "learning_rate": 3.932356095794935e-05, "loss": 2.7033, "step": 463 }, { "epoch": 0.0903549938659864, "grad_norm": 1.953125, "learning_rate": 3.9320377143304816e-05, "loss": 2.7025, "step": 464 }, { "epoch": 0.09054972445621483, "grad_norm": 1.8125, "learning_rate": 3.931718598306571e-05, "loss": 2.7123, "step": 465 }, { "epoch": 0.09074445504644324, "grad_norm": 1.9375, "learning_rate": 3.93139874784453e-05, "loss": 2.697, "step": 466 }, { "epoch": 0.09093918563667167, "grad_norm": 1.640625, "learning_rate": 3.9310781630659666e-05, "loss": 2.7097, "step": 467 }, { "epoch": 0.09113391622690008, "grad_norm": 2.0625, "learning_rate": 3.9307568440927665e-05, "loss": 2.6997, "step": 468 }, { "epoch": 0.0913286468171285, "grad_norm": 1.828125, "learning_rate": 3.930434791047094e-05, "loss": 2.7047, "step": 469 }, { "epoch": 0.09152337740735692, "grad_norm": 2.015625, "learning_rate": 3.930112004051395e-05, "loss": 2.7126, "step": 470 }, { "epoch": 0.09171810799758534, "grad_norm": 1.7109375, "learning_rate": 3.929788483228392e-05, "loss": 2.7118, "step": 471 }, { "epoch": 0.09191283858781377, "grad_norm": 2.03125, "learning_rate": 3.929464228701087e-05, "loss": 2.7257, "step": 472 }, { "epoch": 0.09210756917804218, "grad_norm": 1.6640625, "learning_rate": 3.9291392405927624e-05, "loss": 2.7023, "step": 473 }, { "epoch": 0.0923022997682706, "grad_norm": 2.046875, "learning_rate": 3.928813519026978e-05, "loss": 2.6788, "step": 474 }, { "epoch": 0.09249703035849902, "grad_norm": 1.7109375, "learning_rate": 3.928487064127573e-05, "loss": 2.7217, "step": 475 }, { "epoch": 0.09269176094872744, "grad_norm": 2.296875, "learning_rate": 3.928159876018665e-05, "loss": 2.7107, "step": 476 }, { "epoch": 0.09288649153895585, "grad_norm": 1.953125, "learning_rate": 3.9278319548246505e-05, "loss": 2.7205, "step": 477 }, { "epoch": 0.09308122212918428, "grad_norm": 2.078125, "learning_rate": 3.927503300670206e-05, "loss": 2.7072, "step": 478 }, { "epoch": 0.09327595271941269, "grad_norm": 1.8671875, "learning_rate": 3.927173913680285e-05, "loss": 2.6888, "step": 479 }, { "epoch": 0.09347068330964112, "grad_norm": 2.078125, "learning_rate": 3.9268437939801195e-05, "loss": 2.6925, "step": 480 }, { "epoch": 0.09366541389986953, "grad_norm": 1.640625, "learning_rate": 3.9265129416952216e-05, "loss": 2.6936, "step": 481 }, { "epoch": 0.09386014449009795, "grad_norm": 2.421875, "learning_rate": 3.9261813569513816e-05, "loss": 2.7304, "step": 482 }, { "epoch": 0.09405487508032637, "grad_norm": 2.0625, "learning_rate": 3.9258490398746665e-05, "loss": 2.6862, "step": 483 }, { "epoch": 0.09424960567055479, "grad_norm": 2.5, "learning_rate": 3.9255159905914244e-05, "loss": 2.6904, "step": 484 }, { "epoch": 0.0944443362607832, "grad_norm": 2.203125, "learning_rate": 3.925182209228279e-05, "loss": 2.7077, "step": 485 }, { "epoch": 0.09463906685101163, "grad_norm": 2.25, "learning_rate": 3.924847695912135e-05, "loss": 2.7149, "step": 486 }, { "epoch": 0.09483379744124004, "grad_norm": 2.125, "learning_rate": 3.924512450770173e-05, "loss": 2.7006, "step": 487 }, { "epoch": 0.09502852803146847, "grad_norm": 2.078125, "learning_rate": 3.924176473929854e-05, "loss": 2.7121, "step": 488 }, { "epoch": 0.09522325862169688, "grad_norm": 1.890625, "learning_rate": 3.9238397655189165e-05, "loss": 2.6932, "step": 489 }, { "epoch": 0.0954179892119253, "grad_norm": 2.28125, "learning_rate": 3.923502325665375e-05, "loss": 2.7218, "step": 490 }, { "epoch": 0.09561271980215372, "grad_norm": 1.9140625, "learning_rate": 3.923164154497525e-05, "loss": 2.6892, "step": 491 }, { "epoch": 0.09580745039238214, "grad_norm": 2.421875, "learning_rate": 3.922825252143939e-05, "loss": 2.691, "step": 492 }, { "epoch": 0.09600218098261055, "grad_norm": 2.203125, "learning_rate": 3.922485618733468e-05, "loss": 2.699, "step": 493 }, { "epoch": 0.09619691157283898, "grad_norm": 2.0, "learning_rate": 3.92214525439524e-05, "loss": 2.6965, "step": 494 }, { "epoch": 0.09639164216306739, "grad_norm": 1.75, "learning_rate": 3.92180415925866e-05, "loss": 2.7039, "step": 495 }, { "epoch": 0.09658637275329582, "grad_norm": 2.34375, "learning_rate": 3.9214623334534136e-05, "loss": 2.6724, "step": 496 }, { "epoch": 0.09678110334352423, "grad_norm": 2.046875, "learning_rate": 3.921119777109463e-05, "loss": 2.6996, "step": 497 }, { "epoch": 0.09697583393375266, "grad_norm": 2.34375, "learning_rate": 3.920776490357047e-05, "loss": 2.6998, "step": 498 }, { "epoch": 0.09717056452398107, "grad_norm": 2.234375, "learning_rate": 3.920432473326683e-05, "loss": 2.6903, "step": 499 }, { "epoch": 0.0973652951142095, "grad_norm": 1.8359375, "learning_rate": 3.9200877261491664e-05, "loss": 2.687, "step": 500 }, { "epoch": 0.0975600257044379, "grad_norm": 1.671875, "learning_rate": 3.9197422489555694e-05, "loss": 2.6841, "step": 501 }, { "epoch": 0.09775475629466633, "grad_norm": 2.265625, "learning_rate": 3.9193960418772434e-05, "loss": 2.6863, "step": 502 }, { "epoch": 0.09794948688489474, "grad_norm": 2.328125, "learning_rate": 3.919049105045815e-05, "loss": 2.6962, "step": 503 }, { "epoch": 0.09814421747512317, "grad_norm": 1.6171875, "learning_rate": 3.9187014385931895e-05, "loss": 2.7143, "step": 504 }, { "epoch": 0.09833894806535158, "grad_norm": 1.3515625, "learning_rate": 3.918353042651549e-05, "loss": 2.6927, "step": 505 }, { "epoch": 0.09853367865558, "grad_norm": 2.609375, "learning_rate": 3.918003917353355e-05, "loss": 2.7081, "step": 506 }, { "epoch": 0.09872840924580842, "grad_norm": 2.4375, "learning_rate": 3.917654062831343e-05, "loss": 2.6944, "step": 507 }, { "epoch": 0.09892313983603684, "grad_norm": 2.015625, "learning_rate": 3.9173034792185286e-05, "loss": 2.6908, "step": 508 }, { "epoch": 0.09911787042626526, "grad_norm": 1.953125, "learning_rate": 3.916952166648202e-05, "loss": 2.6684, "step": 509 }, { "epoch": 0.09931260101649368, "grad_norm": 1.640625, "learning_rate": 3.916600125253934e-05, "loss": 2.6886, "step": 510 }, { "epoch": 0.0995073316067221, "grad_norm": 1.5, "learning_rate": 3.9162473551695695e-05, "loss": 2.6996, "step": 511 }, { "epoch": 0.09970206219695052, "grad_norm": 4.53125, "learning_rate": 3.915893856529231e-05, "loss": 2.7046, "step": 512 }, { "epoch": 0.09989679278717893, "grad_norm": 3.40625, "learning_rate": 3.915539629467319e-05, "loss": 2.7045, "step": 513 }, { "epoch": 0.10009152337740736, "grad_norm": 2.984375, "learning_rate": 3.915184674118511e-05, "loss": 2.7074, "step": 514 }, { "epoch": 0.10028625396763578, "grad_norm": 2.578125, "learning_rate": 3.9148289906177595e-05, "loss": 2.6798, "step": 515 }, { "epoch": 0.1004809845578642, "grad_norm": 2.46875, "learning_rate": 3.914472579100296e-05, "loss": 2.6958, "step": 516 }, { "epoch": 0.10067571514809262, "grad_norm": 1.90625, "learning_rate": 3.9141154397016273e-05, "loss": 2.6877, "step": 517 }, { "epoch": 0.10087044573832103, "grad_norm": 1.9921875, "learning_rate": 3.9137575725575376e-05, "loss": 2.6889, "step": 518 }, { "epoch": 0.10106517632854946, "grad_norm": 1.0, "learning_rate": 3.9133989778040885e-05, "loss": 2.692, "step": 519 }, { "epoch": 0.10125990691877787, "grad_norm": 2.046875, "learning_rate": 3.913039655577617e-05, "loss": 2.7028, "step": 520 }, { "epoch": 0.1014546375090063, "grad_norm": 1.21875, "learning_rate": 3.912679606014737e-05, "loss": 2.705, "step": 521 }, { "epoch": 0.10164936809923471, "grad_norm": 2.484375, "learning_rate": 3.9123188292523396e-05, "loss": 2.6904, "step": 522 }, { "epoch": 0.10184409868946313, "grad_norm": 1.8515625, "learning_rate": 3.911957325427592e-05, "loss": 2.6709, "step": 523 }, { "epoch": 0.10203882927969155, "grad_norm": 2.84375, "learning_rate": 3.911595094677937e-05, "loss": 2.6937, "step": 524 }, { "epoch": 0.10223355986991997, "grad_norm": 2.734375, "learning_rate": 3.911232137141094e-05, "loss": 2.6945, "step": 525 }, { "epoch": 0.10242829046014838, "grad_norm": 1.71875, "learning_rate": 3.910868452955061e-05, "loss": 2.6891, "step": 526 }, { "epoch": 0.10262302105037681, "grad_norm": 1.78125, "learning_rate": 3.91050404225811e-05, "loss": 2.7044, "step": 527 }, { "epoch": 0.10281775164060522, "grad_norm": 1.625, "learning_rate": 3.9101389051887884e-05, "loss": 2.6946, "step": 528 }, { "epoch": 0.10301248223083365, "grad_norm": 1.453125, "learning_rate": 3.9097730418859214e-05, "loss": 2.6905, "step": 529 }, { "epoch": 0.10320721282106206, "grad_norm": 1.9453125, "learning_rate": 3.909406452488611e-05, "loss": 2.6865, "step": 530 }, { "epoch": 0.10340194341129048, "grad_norm": 1.3515625, "learning_rate": 3.909039137136234e-05, "loss": 2.6897, "step": 531 }, { "epoch": 0.1035966740015189, "grad_norm": 2.8125, "learning_rate": 3.908671095968443e-05, "loss": 2.7116, "step": 532 }, { "epoch": 0.10379140459174732, "grad_norm": 2.4375, "learning_rate": 3.908302329125168e-05, "loss": 2.6837, "step": 533 }, { "epoch": 0.10398613518197573, "grad_norm": 2.125, "learning_rate": 3.9079328367466115e-05, "loss": 2.7053, "step": 534 }, { "epoch": 0.10418086577220416, "grad_norm": 2.0625, "learning_rate": 3.907562618973257e-05, "loss": 2.679, "step": 535 }, { "epoch": 0.10437559636243257, "grad_norm": 1.84375, "learning_rate": 3.9071916759458595e-05, "loss": 2.7068, "step": 536 }, { "epoch": 0.104570326952661, "grad_norm": 1.609375, "learning_rate": 3.906820007805451e-05, "loss": 2.6759, "step": 537 }, { "epoch": 0.10476505754288941, "grad_norm": 2.0625, "learning_rate": 3.906447614693341e-05, "loss": 2.6758, "step": 538 }, { "epoch": 0.10495978813311783, "grad_norm": 1.7421875, "learning_rate": 3.906074496751112e-05, "loss": 2.6989, "step": 539 }, { "epoch": 0.10515451872334625, "grad_norm": 2.046875, "learning_rate": 3.905700654120624e-05, "loss": 2.6617, "step": 540 }, { "epoch": 0.10534924931357467, "grad_norm": 1.7890625, "learning_rate": 3.905326086944011e-05, "loss": 2.6789, "step": 541 }, { "epoch": 0.10554397990380308, "grad_norm": 1.890625, "learning_rate": 3.904950795363683e-05, "loss": 2.6862, "step": 542 }, { "epoch": 0.10573871049403151, "grad_norm": 1.546875, "learning_rate": 3.904574779522326e-05, "loss": 2.6735, "step": 543 }, { "epoch": 0.10593344108425992, "grad_norm": 2.109375, "learning_rate": 3.904198039562902e-05, "loss": 2.6881, "step": 544 }, { "epoch": 0.10612817167448835, "grad_norm": 1.609375, "learning_rate": 3.9038205756286454e-05, "loss": 2.6647, "step": 545 }, { "epoch": 0.10632290226471676, "grad_norm": 2.65625, "learning_rate": 3.903442387863069e-05, "loss": 2.6868, "step": 546 }, { "epoch": 0.10651763285494519, "grad_norm": 2.46875, "learning_rate": 3.9030634764099584e-05, "loss": 2.6766, "step": 547 }, { "epoch": 0.1067123634451736, "grad_norm": 1.859375, "learning_rate": 3.902683841413377e-05, "loss": 2.686, "step": 548 }, { "epoch": 0.10690709403540202, "grad_norm": 1.875, "learning_rate": 3.902303483017661e-05, "loss": 2.6768, "step": 549 }, { "epoch": 0.10710182462563043, "grad_norm": 1.640625, "learning_rate": 3.9019224013674215e-05, "loss": 2.6748, "step": 550 }, { "epoch": 0.10729655521585886, "grad_norm": 1.4140625, "learning_rate": 3.901540596607547e-05, "loss": 2.6757, "step": 551 }, { "epoch": 0.10749128580608727, "grad_norm": 2.078125, "learning_rate": 3.901158068883199e-05, "loss": 2.7015, "step": 552 }, { "epoch": 0.1076860163963157, "grad_norm": 1.671875, "learning_rate": 3.9007748183398145e-05, "loss": 2.671, "step": 553 }, { "epoch": 0.10788074698654411, "grad_norm": 2.40625, "learning_rate": 3.9003908451231036e-05, "loss": 2.6531, "step": 554 }, { "epoch": 0.10807547757677254, "grad_norm": 2.3125, "learning_rate": 3.900006149379054e-05, "loss": 2.6771, "step": 555 }, { "epoch": 0.10827020816700095, "grad_norm": 1.5625, "learning_rate": 3.899620731253927e-05, "loss": 2.6793, "step": 556 }, { "epoch": 0.10846493875722937, "grad_norm": 1.6640625, "learning_rate": 3.899234590894258e-05, "loss": 2.696, "step": 557 }, { "epoch": 0.1086596693474578, "grad_norm": 1.71875, "learning_rate": 3.8988477284468566e-05, "loss": 2.6835, "step": 558 }, { "epoch": 0.10885439993768621, "grad_norm": 1.3671875, "learning_rate": 3.8984601440588086e-05, "loss": 2.6724, "step": 559 }, { "epoch": 0.10904913052791464, "grad_norm": 2.21875, "learning_rate": 3.898071837877472e-05, "loss": 2.6852, "step": 560 }, { "epoch": 0.10924386111814305, "grad_norm": 1.890625, "learning_rate": 3.897682810050483e-05, "loss": 2.6818, "step": 561 }, { "epoch": 0.10943859170837147, "grad_norm": 2.078125, "learning_rate": 3.897293060725747e-05, "loss": 2.6863, "step": 562 }, { "epoch": 0.10963332229859989, "grad_norm": 1.984375, "learning_rate": 3.8969025900514486e-05, "loss": 2.6638, "step": 563 }, { "epoch": 0.10982805288882831, "grad_norm": 1.4140625, "learning_rate": 3.896511398176043e-05, "loss": 2.679, "step": 564 }, { "epoch": 0.11002278347905672, "grad_norm": 1.4453125, "learning_rate": 3.8961194852482614e-05, "loss": 2.6811, "step": 565 }, { "epoch": 0.11021751406928515, "grad_norm": 1.84375, "learning_rate": 3.8957268514171096e-05, "loss": 2.6766, "step": 566 }, { "epoch": 0.11041224465951356, "grad_norm": 1.4296875, "learning_rate": 3.895333496831866e-05, "loss": 2.6819, "step": 567 }, { "epoch": 0.11060697524974199, "grad_norm": 2.515625, "learning_rate": 3.8949394216420835e-05, "loss": 2.6757, "step": 568 }, { "epoch": 0.1108017058399704, "grad_norm": 2.359375, "learning_rate": 3.89454462599759e-05, "loss": 2.6869, "step": 569 }, { "epoch": 0.11099643643019882, "grad_norm": 1.484375, "learning_rate": 3.8941491100484855e-05, "loss": 2.683, "step": 570 }, { "epoch": 0.11119116702042724, "grad_norm": 1.6484375, "learning_rate": 3.893752873945147e-05, "loss": 2.6797, "step": 571 }, { "epoch": 0.11138589761065566, "grad_norm": 1.5859375, "learning_rate": 3.893355917838221e-05, "loss": 2.6708, "step": 572 }, { "epoch": 0.11158062820088407, "grad_norm": 1.265625, "learning_rate": 3.892958241878631e-05, "loss": 2.6459, "step": 573 }, { "epoch": 0.1117753587911125, "grad_norm": 2.40625, "learning_rate": 3.892559846217572e-05, "loss": 2.6645, "step": 574 }, { "epoch": 0.11197008938134091, "grad_norm": 2.125, "learning_rate": 3.8921607310065154e-05, "loss": 2.6589, "step": 575 }, { "epoch": 0.11216481997156934, "grad_norm": 1.7734375, "learning_rate": 3.891760896397204e-05, "loss": 2.6633, "step": 576 }, { "epoch": 0.11235955056179775, "grad_norm": 1.765625, "learning_rate": 3.8913603425416546e-05, "loss": 2.6846, "step": 577 }, { "epoch": 0.11255428115202618, "grad_norm": 1.46875, "learning_rate": 3.890959069592157e-05, "loss": 2.6624, "step": 578 }, { "epoch": 0.11274901174225459, "grad_norm": 1.25, "learning_rate": 3.890557077701275e-05, "loss": 2.6788, "step": 579 }, { "epoch": 0.11294374233248301, "grad_norm": 2.0625, "learning_rate": 3.890154367021846e-05, "loss": 2.6805, "step": 580 }, { "epoch": 0.11313847292271142, "grad_norm": 1.71875, "learning_rate": 3.8897509377069804e-05, "loss": 2.6679, "step": 581 }, { "epoch": 0.11333320351293985, "grad_norm": 2.1875, "learning_rate": 3.889346789910062e-05, "loss": 2.6729, "step": 582 }, { "epoch": 0.11352793410316826, "grad_norm": 2.0625, "learning_rate": 3.888941923784747e-05, "loss": 2.6516, "step": 583 }, { "epoch": 0.11372266469339669, "grad_norm": 1.390625, "learning_rate": 3.888536339484965e-05, "loss": 2.6774, "step": 584 }, { "epoch": 0.1139173952836251, "grad_norm": 1.40625, "learning_rate": 3.888130037164919e-05, "loss": 2.646, "step": 585 }, { "epoch": 0.11411212587385353, "grad_norm": 1.625, "learning_rate": 3.887723016979086e-05, "loss": 2.673, "step": 586 }, { "epoch": 0.11430685646408194, "grad_norm": 1.2578125, "learning_rate": 3.887315279082213e-05, "loss": 2.6507, "step": 587 }, { "epoch": 0.11450158705431036, "grad_norm": 2.46875, "learning_rate": 3.886906823629323e-05, "loss": 2.6773, "step": 588 }, { "epoch": 0.11469631764453878, "grad_norm": 2.296875, "learning_rate": 3.88649765077571e-05, "loss": 2.6679, "step": 589 }, { "epoch": 0.1148910482347672, "grad_norm": 1.53125, "learning_rate": 3.886087760676942e-05, "loss": 2.6876, "step": 590 }, { "epoch": 0.11508577882499561, "grad_norm": 1.734375, "learning_rate": 3.885677153488858e-05, "loss": 2.687, "step": 591 }, { "epoch": 0.11528050941522404, "grad_norm": 1.4296875, "learning_rate": 3.88526582936757e-05, "loss": 2.6523, "step": 592 }, { "epoch": 0.11547524000545245, "grad_norm": 1.2421875, "learning_rate": 3.884853788469465e-05, "loss": 2.6543, "step": 593 }, { "epoch": 0.11566997059568088, "grad_norm": 1.984375, "learning_rate": 3.884441030951199e-05, "loss": 2.6719, "step": 594 }, { "epoch": 0.11586470118590929, "grad_norm": 1.6015625, "learning_rate": 3.884027556969703e-05, "loss": 2.6796, "step": 595 }, { "epoch": 0.11605943177613771, "grad_norm": 2.328125, "learning_rate": 3.88361336668218e-05, "loss": 2.6492, "step": 596 }, { "epoch": 0.11625416236636613, "grad_norm": 2.171875, "learning_rate": 3.883198460246104e-05, "loss": 2.679, "step": 597 }, { "epoch": 0.11644889295659455, "grad_norm": 1.3984375, "learning_rate": 3.882782837819222e-05, "loss": 2.6618, "step": 598 }, { "epoch": 0.11664362354682296, "grad_norm": 1.390625, "learning_rate": 3.882366499559554e-05, "loss": 2.6688, "step": 599 }, { "epoch": 0.11683835413705139, "grad_norm": 1.5546875, "learning_rate": 3.881949445625391e-05, "loss": 2.6594, "step": 600 }, { "epoch": 0.11703308472727982, "grad_norm": 1.171875, "learning_rate": 3.881531676175297e-05, "loss": 2.6838, "step": 601 }, { "epoch": 0.11722781531750823, "grad_norm": 2.28125, "learning_rate": 3.881113191368108e-05, "loss": 2.661, "step": 602 }, { "epoch": 0.11742254590773665, "grad_norm": 2.046875, "learning_rate": 3.880693991362931e-05, "loss": 2.6572, "step": 603 }, { "epoch": 0.11761727649796506, "grad_norm": 1.703125, "learning_rate": 3.880274076319146e-05, "loss": 2.6652, "step": 604 }, { "epoch": 0.11781200708819349, "grad_norm": 1.8125, "learning_rate": 3.8798534463964036e-05, "loss": 2.6715, "step": 605 }, { "epoch": 0.1180067376784219, "grad_norm": 1.421875, "learning_rate": 3.879432101754629e-05, "loss": 2.663, "step": 606 }, { "epoch": 0.11820146826865033, "grad_norm": 1.2578125, "learning_rate": 3.8790100425540144e-05, "loss": 2.6672, "step": 607 }, { "epoch": 0.11839619885887874, "grad_norm": 1.796875, "learning_rate": 3.878587268955028e-05, "loss": 2.6639, "step": 608 }, { "epoch": 0.11859092944910717, "grad_norm": 1.3515625, "learning_rate": 3.878163781118408e-05, "loss": 2.6562, "step": 609 }, { "epoch": 0.11878566003933558, "grad_norm": 2.140625, "learning_rate": 3.8777395792051644e-05, "loss": 2.6381, "step": 610 }, { "epoch": 0.118980390629564, "grad_norm": 1.953125, "learning_rate": 3.877314663376578e-05, "loss": 2.6648, "step": 611 }, { "epoch": 0.11917512121979242, "grad_norm": 1.6171875, "learning_rate": 3.8768890337942016e-05, "loss": 2.6649, "step": 612 }, { "epoch": 0.11936985181002084, "grad_norm": 1.5, "learning_rate": 3.8764626906198594e-05, "loss": 2.6466, "step": 613 }, { "epoch": 0.11956458240024925, "grad_norm": 1.859375, "learning_rate": 3.876035634015647e-05, "loss": 2.6616, "step": 614 }, { "epoch": 0.11975931299047768, "grad_norm": 1.515625, "learning_rate": 3.875607864143931e-05, "loss": 2.6442, "step": 615 }, { "epoch": 0.11995404358070609, "grad_norm": 2.15625, "learning_rate": 3.875179381167348e-05, "loss": 2.6782, "step": 616 }, { "epoch": 0.12014877417093452, "grad_norm": 1.8984375, "learning_rate": 3.874750185248808e-05, "loss": 2.6581, "step": 617 }, { "epoch": 0.12034350476116293, "grad_norm": 1.6875, "learning_rate": 3.8743202765514914e-05, "loss": 2.6498, "step": 618 }, { "epoch": 0.12053823535139135, "grad_norm": 1.4765625, "learning_rate": 3.873889655238848e-05, "loss": 2.6659, "step": 619 }, { "epoch": 0.12073296594161977, "grad_norm": 1.625, "learning_rate": 3.873458321474602e-05, "loss": 2.6682, "step": 620 }, { "epoch": 0.12092769653184819, "grad_norm": 1.3046875, "learning_rate": 3.873026275422744e-05, "loss": 2.644, "step": 621 }, { "epoch": 0.1211224271220766, "grad_norm": 2.046875, "learning_rate": 3.872593517247538e-05, "loss": 2.6594, "step": 622 }, { "epoch": 0.12131715771230503, "grad_norm": 1.796875, "learning_rate": 3.872160047113519e-05, "loss": 2.6616, "step": 623 }, { "epoch": 0.12151188830253344, "grad_norm": 1.890625, "learning_rate": 3.8717258651854914e-05, "loss": 2.6474, "step": 624 }, { "epoch": 0.12170661889276187, "grad_norm": 1.734375, "learning_rate": 3.871290971628532e-05, "loss": 2.6511, "step": 625 }, { "epoch": 0.12190134948299028, "grad_norm": 1.7265625, "learning_rate": 3.870855366607986e-05, "loss": 2.6804, "step": 626 }, { "epoch": 0.1220960800732187, "grad_norm": 1.5390625, "learning_rate": 3.8704190502894696e-05, "loss": 2.6632, "step": 627 }, { "epoch": 0.12229081066344712, "grad_norm": 1.8515625, "learning_rate": 3.869982022838871e-05, "loss": 2.6615, "step": 628 }, { "epoch": 0.12248554125367554, "grad_norm": 1.609375, "learning_rate": 3.8695442844223485e-05, "loss": 2.6748, "step": 629 }, { "epoch": 0.12268027184390395, "grad_norm": 2.140625, "learning_rate": 3.869105835206328e-05, "loss": 2.6571, "step": 630 }, { "epoch": 0.12287500243413238, "grad_norm": 1.8828125, "learning_rate": 3.868666675357509e-05, "loss": 2.6524, "step": 631 }, { "epoch": 0.12306973302436079, "grad_norm": 1.8359375, "learning_rate": 3.8682268050428594e-05, "loss": 2.6533, "step": 632 }, { "epoch": 0.12326446361458922, "grad_norm": 1.609375, "learning_rate": 3.8677862244296175e-05, "loss": 2.655, "step": 633 }, { "epoch": 0.12345919420481763, "grad_norm": 1.7578125, "learning_rate": 3.8673449336852916e-05, "loss": 2.6527, "step": 634 }, { "epoch": 0.12365392479504606, "grad_norm": 1.4375, "learning_rate": 3.866902932977661e-05, "loss": 2.6763, "step": 635 }, { "epoch": 0.12384865538527447, "grad_norm": 2.046875, "learning_rate": 3.866460222474773e-05, "loss": 2.6608, "step": 636 }, { "epoch": 0.12404338597550289, "grad_norm": 1.6484375, "learning_rate": 3.866016802344946e-05, "loss": 2.6479, "step": 637 }, { "epoch": 0.1242381165657313, "grad_norm": 2.25, "learning_rate": 3.865572672756769e-05, "loss": 2.656, "step": 638 }, { "epoch": 0.12443284715595973, "grad_norm": 1.9375, "learning_rate": 3.865127833879099e-05, "loss": 2.6697, "step": 639 }, { "epoch": 0.12462757774618814, "grad_norm": 1.96875, "learning_rate": 3.864682285881063e-05, "loss": 2.6664, "step": 640 }, { "epoch": 0.12482230833641657, "grad_norm": 1.7265625, "learning_rate": 3.864236028932059e-05, "loss": 2.662, "step": 641 }, { "epoch": 0.12501703892664498, "grad_norm": 2.015625, "learning_rate": 3.8637890632017534e-05, "loss": 2.6386, "step": 642 }, { "epoch": 0.1252117695168734, "grad_norm": 1.640625, "learning_rate": 3.8633413888600815e-05, "loss": 2.6466, "step": 643 }, { "epoch": 0.12540650010710183, "grad_norm": 2.3125, "learning_rate": 3.86289300607725e-05, "loss": 2.665, "step": 644 }, { "epoch": 0.12560123069733026, "grad_norm": 1.875, "learning_rate": 3.862443915023733e-05, "loss": 2.6505, "step": 645 }, { "epoch": 0.12579596128755866, "grad_norm": 2.34375, "learning_rate": 3.861994115870274e-05, "loss": 2.6715, "step": 646 }, { "epoch": 0.12599069187778708, "grad_norm": 2.265625, "learning_rate": 3.861543608787888e-05, "loss": 2.6575, "step": 647 }, { "epoch": 0.1261854224680155, "grad_norm": 1.71875, "learning_rate": 3.861092393947856e-05, "loss": 2.6604, "step": 648 }, { "epoch": 0.12638015305824393, "grad_norm": 1.6484375, "learning_rate": 3.86064047152173e-05, "loss": 2.6351, "step": 649 }, { "epoch": 0.12657488364847233, "grad_norm": 1.8359375, "learning_rate": 3.860187841681331e-05, "loss": 2.6526, "step": 650 }, { "epoch": 0.12676961423870076, "grad_norm": 1.46875, "learning_rate": 3.8597345045987486e-05, "loss": 2.664, "step": 651 }, { "epoch": 0.12696434482892918, "grad_norm": 2.46875, "learning_rate": 3.8592804604463404e-05, "loss": 2.6433, "step": 652 }, { "epoch": 0.1271590754191576, "grad_norm": 2.28125, "learning_rate": 3.858825709396735e-05, "loss": 2.6552, "step": 653 }, { "epoch": 0.127353806009386, "grad_norm": 1.796875, "learning_rate": 3.858370251622827e-05, "loss": 2.6661, "step": 654 }, { "epoch": 0.12754853659961443, "grad_norm": 1.8203125, "learning_rate": 3.857914087297782e-05, "loss": 2.6458, "step": 655 }, { "epoch": 0.12774326718984286, "grad_norm": 1.828125, "learning_rate": 3.857457216595033e-05, "loss": 2.6577, "step": 656 }, { "epoch": 0.12793799778007128, "grad_norm": 1.5703125, "learning_rate": 3.856999639688283e-05, "loss": 2.6389, "step": 657 }, { "epoch": 0.12813272837029968, "grad_norm": 2.109375, "learning_rate": 3.8565413567515006e-05, "loss": 2.6449, "step": 658 }, { "epoch": 0.1283274589605281, "grad_norm": 1.9375, "learning_rate": 3.856082367958927e-05, "loss": 2.6369, "step": 659 }, { "epoch": 0.12852218955075653, "grad_norm": 1.953125, "learning_rate": 3.855622673485066e-05, "loss": 2.632, "step": 660 }, { "epoch": 0.12871692014098496, "grad_norm": 1.859375, "learning_rate": 3.855162273504697e-05, "loss": 2.6554, "step": 661 }, { "epoch": 0.12891165073121336, "grad_norm": 1.6875, "learning_rate": 3.854701168192862e-05, "loss": 2.6512, "step": 662 }, { "epoch": 0.12910638132144178, "grad_norm": 1.59375, "learning_rate": 3.854239357724872e-05, "loss": 2.6367, "step": 663 }, { "epoch": 0.1293011119116702, "grad_norm": 1.9140625, "learning_rate": 3.853776842276308e-05, "loss": 2.6361, "step": 664 }, { "epoch": 0.12949584250189863, "grad_norm": 1.546875, "learning_rate": 3.853313622023019e-05, "loss": 2.6443, "step": 665 }, { "epoch": 0.12969057309212703, "grad_norm": 2.03125, "learning_rate": 3.8528496971411195e-05, "loss": 2.6386, "step": 666 }, { "epoch": 0.12988530368235546, "grad_norm": 1.8984375, "learning_rate": 3.852385067806994e-05, "loss": 2.6532, "step": 667 }, { "epoch": 0.13008003427258388, "grad_norm": 1.625, "learning_rate": 3.851919734197294e-05, "loss": 2.626, "step": 668 }, { "epoch": 0.1302747648628123, "grad_norm": 1.59375, "learning_rate": 3.851453696488939e-05, "loss": 2.6581, "step": 669 }, { "epoch": 0.1304694954530407, "grad_norm": 1.8046875, "learning_rate": 3.850986954859118e-05, "loss": 2.6416, "step": 670 }, { "epoch": 0.13066422604326913, "grad_norm": 1.40625, "learning_rate": 3.850519509485283e-05, "loss": 2.6329, "step": 671 }, { "epoch": 0.13085895663349756, "grad_norm": 2.390625, "learning_rate": 3.850051360545158e-05, "loss": 2.6494, "step": 672 }, { "epoch": 0.13105368722372598, "grad_norm": 2.0625, "learning_rate": 3.849582508216733e-05, "loss": 2.6599, "step": 673 }, { "epoch": 0.13124841781395438, "grad_norm": 1.8203125, "learning_rate": 3.849112952678265e-05, "loss": 2.6392, "step": 674 }, { "epoch": 0.1314431484041828, "grad_norm": 1.78125, "learning_rate": 3.848642694108279e-05, "loss": 2.6531, "step": 675 }, { "epoch": 0.13163787899441123, "grad_norm": 1.5234375, "learning_rate": 3.848171732685566e-05, "loss": 2.6283, "step": 676 }, { "epoch": 0.13183260958463966, "grad_norm": 1.28125, "learning_rate": 3.8477000685891866e-05, "loss": 2.6424, "step": 677 }, { "epoch": 0.13202734017486806, "grad_norm": 2.203125, "learning_rate": 3.847227701998467e-05, "loss": 2.653, "step": 678 }, { "epoch": 0.13222207076509648, "grad_norm": 1.859375, "learning_rate": 3.8467546330929995e-05, "loss": 2.6495, "step": 679 }, { "epoch": 0.1324168013553249, "grad_norm": 2.265625, "learning_rate": 3.846280862052646e-05, "loss": 2.6455, "step": 680 }, { "epoch": 0.13261153194555333, "grad_norm": 2.265625, "learning_rate": 3.845806389057532e-05, "loss": 2.6616, "step": 681 }, { "epoch": 0.13280626253578173, "grad_norm": 1.2421875, "learning_rate": 3.845331214288054e-05, "loss": 2.6401, "step": 682 }, { "epoch": 0.13300099312601016, "grad_norm": 1.359375, "learning_rate": 3.8448553379248724e-05, "loss": 2.6653, "step": 683 }, { "epoch": 0.13319572371623858, "grad_norm": 1.6875, "learning_rate": 3.8443787601489145e-05, "loss": 2.6463, "step": 684 }, { "epoch": 0.133390454306467, "grad_norm": 1.25, "learning_rate": 3.843901481141375e-05, "loss": 2.63, "step": 685 }, { "epoch": 0.1335851848966954, "grad_norm": 2.84375, "learning_rate": 3.843423501083715e-05, "loss": 2.6326, "step": 686 }, { "epoch": 0.13377991548692383, "grad_norm": 2.8125, "learning_rate": 3.842944820157663e-05, "loss": 2.6498, "step": 687 }, { "epoch": 0.13397464607715226, "grad_norm": 1.03125, "learning_rate": 3.842465438545213e-05, "loss": 2.6426, "step": 688 }, { "epoch": 0.13416937666738069, "grad_norm": 1.6875, "learning_rate": 3.841985356428625e-05, "loss": 2.651, "step": 689 }, { "epoch": 0.1343641072576091, "grad_norm": 1.109375, "learning_rate": 3.841504573990426e-05, "loss": 2.6384, "step": 690 }, { "epoch": 0.1345588378478375, "grad_norm": 1.3671875, "learning_rate": 3.841023091413409e-05, "loss": 2.6358, "step": 691 }, { "epoch": 0.13475356843806593, "grad_norm": 1.28125, "learning_rate": 3.840540908880634e-05, "loss": 2.6452, "step": 692 }, { "epoch": 0.13494829902829436, "grad_norm": 1.1171875, "learning_rate": 3.840058026575426e-05, "loss": 2.6144, "step": 693 }, { "epoch": 0.1351430296185228, "grad_norm": 1.40625, "learning_rate": 3.8395744446813765e-05, "loss": 2.6419, "step": 694 }, { "epoch": 0.13533776020875118, "grad_norm": 0.99609375, "learning_rate": 3.839090163382344e-05, "loss": 2.6616, "step": 695 }, { "epoch": 0.1355324907989796, "grad_norm": 1.8046875, "learning_rate": 3.83860518286245e-05, "loss": 2.6595, "step": 696 }, { "epoch": 0.13572722138920804, "grad_norm": 1.1484375, "learning_rate": 3.838119503306086e-05, "loss": 2.663, "step": 697 }, { "epoch": 0.13592195197943646, "grad_norm": 1.90625, "learning_rate": 3.837633124897905e-05, "loss": 2.6043, "step": 698 }, { "epoch": 0.13611668256966486, "grad_norm": 1.5078125, "learning_rate": 3.837146047822829e-05, "loss": 2.649, "step": 699 }, { "epoch": 0.13631141315989329, "grad_norm": 1.9609375, "learning_rate": 3.836658272266044e-05, "loss": 2.6512, "step": 700 }, { "epoch": 0.1365061437501217, "grad_norm": 1.625, "learning_rate": 3.836169798413002e-05, "loss": 2.6409, "step": 701 }, { "epoch": 0.13670087434035014, "grad_norm": 1.9296875, "learning_rate": 3.8356806264494205e-05, "loss": 2.6307, "step": 702 }, { "epoch": 0.13689560493057853, "grad_norm": 1.546875, "learning_rate": 3.8351907565612824e-05, "loss": 2.6392, "step": 703 }, { "epoch": 0.13709033552080696, "grad_norm": 1.8203125, "learning_rate": 3.834700188934836e-05, "loss": 2.6623, "step": 704 }, { "epoch": 0.1372850661110354, "grad_norm": 1.46875, "learning_rate": 3.834208923756594e-05, "loss": 2.6541, "step": 705 }, { "epoch": 0.1374797967012638, "grad_norm": 1.6640625, "learning_rate": 3.8337169612133355e-05, "loss": 2.64, "step": 706 }, { "epoch": 0.1376745272914922, "grad_norm": 1.4296875, "learning_rate": 3.833224301492105e-05, "loss": 2.6577, "step": 707 }, { "epoch": 0.13786925788172064, "grad_norm": 1.859375, "learning_rate": 3.8327309447802114e-05, "loss": 2.6638, "step": 708 }, { "epoch": 0.13806398847194906, "grad_norm": 1.3984375, "learning_rate": 3.8322368912652275e-05, "loss": 2.626, "step": 709 }, { "epoch": 0.1382587190621775, "grad_norm": 2.125, "learning_rate": 3.831742141134993e-05, "loss": 2.6703, "step": 710 }, { "epoch": 0.13845344965240589, "grad_norm": 1.84375, "learning_rate": 3.831246694577611e-05, "loss": 2.6508, "step": 711 }, { "epoch": 0.1386481802426343, "grad_norm": 2.0, "learning_rate": 3.83075055178145e-05, "loss": 2.6474, "step": 712 }, { "epoch": 0.13884291083286274, "grad_norm": 1.8046875, "learning_rate": 3.830253712935144e-05, "loss": 2.6394, "step": 713 }, { "epoch": 0.13903764142309116, "grad_norm": 1.6875, "learning_rate": 3.8297561782275893e-05, "loss": 2.6607, "step": 714 }, { "epoch": 0.13923237201331956, "grad_norm": 1.5, "learning_rate": 3.82925794784795e-05, "loss": 2.6444, "step": 715 }, { "epoch": 0.139427102603548, "grad_norm": 1.53125, "learning_rate": 3.8287590219856524e-05, "loss": 2.641, "step": 716 }, { "epoch": 0.1396218331937764, "grad_norm": 1.203125, "learning_rate": 3.8282594008303866e-05, "loss": 2.6586, "step": 717 }, { "epoch": 0.13981656378400484, "grad_norm": 1.4609375, "learning_rate": 3.82775908457211e-05, "loss": 2.6224, "step": 718 }, { "epoch": 0.14001129437423324, "grad_norm": 1.2265625, "learning_rate": 3.827258073401041e-05, "loss": 2.6356, "step": 719 }, { "epoch": 0.14020602496446166, "grad_norm": 1.546875, "learning_rate": 3.8267563675076644e-05, "loss": 2.6399, "step": 720 }, { "epoch": 0.1404007555546901, "grad_norm": 1.0859375, "learning_rate": 3.826253967082729e-05, "loss": 2.6377, "step": 721 }, { "epoch": 0.1405954861449185, "grad_norm": 1.9375, "learning_rate": 3.8257508723172464e-05, "loss": 2.6365, "step": 722 }, { "epoch": 0.1407902167351469, "grad_norm": 1.5625, "learning_rate": 3.8252470834024925e-05, "loss": 2.6582, "step": 723 }, { "epoch": 0.14098494732537534, "grad_norm": 2.0, "learning_rate": 3.824742600530008e-05, "loss": 2.633, "step": 724 }, { "epoch": 0.14117967791560376, "grad_norm": 1.84375, "learning_rate": 3.824237423891598e-05, "loss": 2.6319, "step": 725 }, { "epoch": 0.1413744085058322, "grad_norm": 1.5859375, "learning_rate": 3.823731553679328e-05, "loss": 2.6185, "step": 726 }, { "epoch": 0.1415691390960606, "grad_norm": 1.5625, "learning_rate": 3.8232249900855315e-05, "loss": 2.6416, "step": 727 }, { "epoch": 0.141763869686289, "grad_norm": 1.359375, "learning_rate": 3.8227177333028035e-05, "loss": 2.6204, "step": 728 }, { "epoch": 0.14195860027651744, "grad_norm": 1.40625, "learning_rate": 3.822209783524001e-05, "loss": 2.6509, "step": 729 }, { "epoch": 0.14215333086674586, "grad_norm": 1.328125, "learning_rate": 3.821701140942248e-05, "loss": 2.623, "step": 730 }, { "epoch": 0.1423480614569743, "grad_norm": 1.1875, "learning_rate": 3.821191805750929e-05, "loss": 2.6383, "step": 731 }, { "epoch": 0.1425427920472027, "grad_norm": 1.40625, "learning_rate": 3.820681778143694e-05, "loss": 2.6333, "step": 732 }, { "epoch": 0.1427375226374311, "grad_norm": 1.1640625, "learning_rate": 3.820171058314454e-05, "loss": 2.6126, "step": 733 }, { "epoch": 0.14293225322765954, "grad_norm": 1.484375, "learning_rate": 3.8196596464573857e-05, "loss": 2.6236, "step": 734 }, { "epoch": 0.14312698381788796, "grad_norm": 1.2109375, "learning_rate": 3.819147542766925e-05, "loss": 2.6436, "step": 735 }, { "epoch": 0.14332171440811636, "grad_norm": 1.5078125, "learning_rate": 3.8186347474377766e-05, "loss": 2.6375, "step": 736 }, { "epoch": 0.1435164449983448, "grad_norm": 1.171875, "learning_rate": 3.818121260664903e-05, "loss": 2.6299, "step": 737 }, { "epoch": 0.14371117558857321, "grad_norm": 1.5234375, "learning_rate": 3.8176070826435315e-05, "loss": 2.6281, "step": 738 }, { "epoch": 0.14390590617880164, "grad_norm": 1.4296875, "learning_rate": 3.8170922135691525e-05, "loss": 2.6311, "step": 739 }, { "epoch": 0.14410063676903004, "grad_norm": 1.40625, "learning_rate": 3.816576653637519e-05, "loss": 2.6281, "step": 740 }, { "epoch": 0.14429536735925846, "grad_norm": 1.2734375, "learning_rate": 3.816060403044647e-05, "loss": 2.6514, "step": 741 }, { "epoch": 0.1444900979494869, "grad_norm": 1.3125, "learning_rate": 3.815543461986814e-05, "loss": 2.657, "step": 742 }, { "epoch": 0.14468482853971532, "grad_norm": 1.15625, "learning_rate": 3.8150258306605616e-05, "loss": 2.6295, "step": 743 }, { "epoch": 0.1448795591299437, "grad_norm": 1.5390625, "learning_rate": 3.814507509262691e-05, "loss": 2.6369, "step": 744 }, { "epoch": 0.14507428972017214, "grad_norm": 1.171875, "learning_rate": 3.8139884979902696e-05, "loss": 2.6159, "step": 745 }, { "epoch": 0.14526902031040057, "grad_norm": 1.7109375, "learning_rate": 3.813468797040624e-05, "loss": 2.6359, "step": 746 }, { "epoch": 0.145463750900629, "grad_norm": 1.5390625, "learning_rate": 3.812948406611345e-05, "loss": 2.6319, "step": 747 }, { "epoch": 0.1456584814908574, "grad_norm": 1.421875, "learning_rate": 3.8124273269002834e-05, "loss": 2.6277, "step": 748 }, { "epoch": 0.14585321208108581, "grad_norm": 1.28125, "learning_rate": 3.8119055581055545e-05, "loss": 2.6151, "step": 749 }, { "epoch": 0.14604794267131424, "grad_norm": 1.0859375, "learning_rate": 3.8113831004255343e-05, "loss": 2.6108, "step": 750 }, { "epoch": 0.14624267326154267, "grad_norm": 1.2421875, "learning_rate": 3.8108599540588604e-05, "loss": 2.6297, "step": 751 }, { "epoch": 0.14643740385177106, "grad_norm": 0.95703125, "learning_rate": 3.810336119204433e-05, "loss": 2.6323, "step": 752 }, { "epoch": 0.1466321344419995, "grad_norm": 1.25, "learning_rate": 3.809811596061414e-05, "loss": 2.6296, "step": 753 }, { "epoch": 0.14682686503222792, "grad_norm": 1.1640625, "learning_rate": 3.8092863848292256e-05, "loss": 2.6193, "step": 754 }, { "epoch": 0.14702159562245634, "grad_norm": 1.4296875, "learning_rate": 3.808760485707555e-05, "loss": 2.6339, "step": 755 }, { "epoch": 0.14721632621268474, "grad_norm": 1.0625, "learning_rate": 3.8082338988963465e-05, "loss": 2.6367, "step": 756 }, { "epoch": 0.14741105680291317, "grad_norm": 1.359375, "learning_rate": 3.807706624595809e-05, "loss": 2.6058, "step": 757 }, { "epoch": 0.1476057873931416, "grad_norm": 1.0625, "learning_rate": 3.8071786630064115e-05, "loss": 2.6202, "step": 758 }, { "epoch": 0.14780051798337002, "grad_norm": 1.53125, "learning_rate": 3.8066500143288856e-05, "loss": 2.636, "step": 759 }, { "epoch": 0.14799524857359841, "grad_norm": 1.265625, "learning_rate": 3.806120678764222e-05, "loss": 2.6268, "step": 760 }, { "epoch": 0.14818997916382684, "grad_norm": 1.4609375, "learning_rate": 3.805590656513674e-05, "loss": 2.6347, "step": 761 }, { "epoch": 0.14838470975405527, "grad_norm": 1.2578125, "learning_rate": 3.8050599477787564e-05, "loss": 2.6117, "step": 762 }, { "epoch": 0.1485794403442837, "grad_norm": 1.34375, "learning_rate": 3.804528552761244e-05, "loss": 2.6346, "step": 763 }, { "epoch": 0.1487741709345121, "grad_norm": 1.2421875, "learning_rate": 3.803996471663171e-05, "loss": 2.6181, "step": 764 }, { "epoch": 0.14896890152474052, "grad_norm": 1.0546875, "learning_rate": 3.803463704686838e-05, "loss": 2.6196, "step": 765 }, { "epoch": 0.14916363211496894, "grad_norm": 1.3125, "learning_rate": 3.8029302520348e-05, "loss": 2.6245, "step": 766 }, { "epoch": 0.14935836270519737, "grad_norm": 1.15625, "learning_rate": 3.802396113909875e-05, "loss": 2.6178, "step": 767 }, { "epoch": 0.14955309329542577, "grad_norm": 1.21875, "learning_rate": 3.801861290515144e-05, "loss": 2.6309, "step": 768 }, { "epoch": 0.1497478238856542, "grad_norm": 0.94921875, "learning_rate": 3.801325782053945e-05, "loss": 2.6408, "step": 769 }, { "epoch": 0.14994255447588262, "grad_norm": 1.34375, "learning_rate": 3.800789588729878e-05, "loss": 2.6137, "step": 770 }, { "epoch": 0.15013728506611104, "grad_norm": 1.0859375, "learning_rate": 3.800252710746803e-05, "loss": 2.6187, "step": 771 }, { "epoch": 0.15033201565633944, "grad_norm": 1.2890625, "learning_rate": 3.7997151483088425e-05, "loss": 2.6183, "step": 772 }, { "epoch": 0.15052674624656787, "grad_norm": 1.0390625, "learning_rate": 3.799176901620377e-05, "loss": 2.6231, "step": 773 }, { "epoch": 0.1507214768367963, "grad_norm": 1.5, "learning_rate": 3.798637970886045e-05, "loss": 2.6136, "step": 774 }, { "epoch": 0.15091620742702472, "grad_norm": 1.234375, "learning_rate": 3.7980983563107505e-05, "loss": 2.6364, "step": 775 }, { "epoch": 0.15111093801725314, "grad_norm": 1.4453125, "learning_rate": 3.797558058099653e-05, "loss": 2.6123, "step": 776 }, { "epoch": 0.15130566860748154, "grad_norm": 1.265625, "learning_rate": 3.797017076458174e-05, "loss": 2.6155, "step": 777 }, { "epoch": 0.15150039919770997, "grad_norm": 1.3515625, "learning_rate": 3.796475411591995e-05, "loss": 2.6195, "step": 778 }, { "epoch": 0.1516951297879384, "grad_norm": 1.2265625, "learning_rate": 3.7959330637070545e-05, "loss": 2.6241, "step": 779 }, { "epoch": 0.15188986037816682, "grad_norm": 1.1640625, "learning_rate": 3.795390033009556e-05, "loss": 2.6329, "step": 780 }, { "epoch": 0.15208459096839522, "grad_norm": 1.046875, "learning_rate": 3.794846319705957e-05, "loss": 2.6221, "step": 781 }, { "epoch": 0.15227932155862364, "grad_norm": 1.2109375, "learning_rate": 3.794301924002977e-05, "loss": 2.6052, "step": 782 }, { "epoch": 0.15247405214885207, "grad_norm": 1.1015625, "learning_rate": 3.793756846107596e-05, "loss": 2.6014, "step": 783 }, { "epoch": 0.1526687827390805, "grad_norm": 1.3671875, "learning_rate": 3.793211086227052e-05, "loss": 2.6186, "step": 784 }, { "epoch": 0.1528635133293089, "grad_norm": 0.9296875, "learning_rate": 3.792664644568842e-05, "loss": 2.6166, "step": 785 }, { "epoch": 0.15305824391953732, "grad_norm": 1.5625, "learning_rate": 3.792117521340723e-05, "loss": 2.5918, "step": 786 }, { "epoch": 0.15325297450976574, "grad_norm": 1.328125, "learning_rate": 3.791569716750711e-05, "loss": 2.6242, "step": 787 }, { "epoch": 0.15344770509999417, "grad_norm": 1.1640625, "learning_rate": 3.791021231007082e-05, "loss": 2.6234, "step": 788 }, { "epoch": 0.15364243569022257, "grad_norm": 1.4453125, "learning_rate": 3.790472064318367e-05, "loss": 2.6173, "step": 789 }, { "epoch": 0.153837166280451, "grad_norm": 0.8828125, "learning_rate": 3.789922216893362e-05, "loss": 2.5962, "step": 790 }, { "epoch": 0.15403189687067942, "grad_norm": 1.390625, "learning_rate": 3.789371688941117e-05, "loss": 2.5859, "step": 791 }, { "epoch": 0.15422662746090784, "grad_norm": 0.90625, "learning_rate": 3.788820480670942e-05, "loss": 2.6311, "step": 792 }, { "epoch": 0.15442135805113624, "grad_norm": 1.3671875, "learning_rate": 3.788268592292407e-05, "loss": 2.6127, "step": 793 }, { "epoch": 0.15461608864136467, "grad_norm": 1.171875, "learning_rate": 3.787716024015339e-05, "loss": 2.6194, "step": 794 }, { "epoch": 0.1548108192315931, "grad_norm": 0.9765625, "learning_rate": 3.787162776049825e-05, "loss": 2.625, "step": 795 }, { "epoch": 0.15500554982182152, "grad_norm": 1.265625, "learning_rate": 3.786608848606208e-05, "loss": 2.6314, "step": 796 }, { "epoch": 0.15520028041204992, "grad_norm": 0.95703125, "learning_rate": 3.786054241895092e-05, "loss": 2.6172, "step": 797 }, { "epoch": 0.15539501100227834, "grad_norm": 1.3828125, "learning_rate": 3.785498956127338e-05, "loss": 2.6323, "step": 798 }, { "epoch": 0.15558974159250677, "grad_norm": 0.94140625, "learning_rate": 3.784942991514065e-05, "loss": 2.6157, "step": 799 }, { "epoch": 0.1557844721827352, "grad_norm": 1.3125, "learning_rate": 3.78438634826665e-05, "loss": 2.6031, "step": 800 }, { "epoch": 0.1559792027729636, "grad_norm": 1.171875, "learning_rate": 3.7838290265967294e-05, "loss": 2.6035, "step": 801 }, { "epoch": 0.15617393336319202, "grad_norm": 1.2265625, "learning_rate": 3.783271026716196e-05, "loss": 2.6212, "step": 802 }, { "epoch": 0.15636866395342044, "grad_norm": 1.3671875, "learning_rate": 3.7827123488372e-05, "loss": 2.6063, "step": 803 }, { "epoch": 0.15656339454364887, "grad_norm": 1.0703125, "learning_rate": 3.782152993172153e-05, "loss": 2.6018, "step": 804 }, { "epoch": 0.15675812513387727, "grad_norm": 1.96875, "learning_rate": 3.781592959933719e-05, "loss": 2.5956, "step": 805 }, { "epoch": 0.1569528557241057, "grad_norm": 1.359375, "learning_rate": 3.781032249334823e-05, "loss": 2.5966, "step": 806 }, { "epoch": 0.15714758631433412, "grad_norm": 2.40625, "learning_rate": 3.7804708615886475e-05, "loss": 2.6398, "step": 807 }, { "epoch": 0.15734231690456255, "grad_norm": 1.953125, "learning_rate": 3.77990879690863e-05, "loss": 2.6086, "step": 808 }, { "epoch": 0.15753704749479094, "grad_norm": 2.28125, "learning_rate": 3.779346055508469e-05, "loss": 2.6323, "step": 809 }, { "epoch": 0.15773177808501937, "grad_norm": 2.140625, "learning_rate": 3.778782637602119e-05, "loss": 2.6206, "step": 810 }, { "epoch": 0.1579265086752478, "grad_norm": 1.6953125, "learning_rate": 3.778218543403789e-05, "loss": 2.6005, "step": 811 }, { "epoch": 0.15812123926547622, "grad_norm": 1.59375, "learning_rate": 3.7776537731279486e-05, "loss": 2.6496, "step": 812 }, { "epoch": 0.15831596985570462, "grad_norm": 1.84375, "learning_rate": 3.777088326989322e-05, "loss": 2.602, "step": 813 }, { "epoch": 0.15851070044593304, "grad_norm": 1.484375, "learning_rate": 3.776522205202892e-05, "loss": 2.6059, "step": 814 }, { "epoch": 0.15870543103616147, "grad_norm": 2.203125, "learning_rate": 3.7759554079838985e-05, "loss": 2.6009, "step": 815 }, { "epoch": 0.1589001616263899, "grad_norm": 2.0625, "learning_rate": 3.775387935547836e-05, "loss": 2.6057, "step": 816 }, { "epoch": 0.15909489221661832, "grad_norm": 1.828125, "learning_rate": 3.774819788110459e-05, "loss": 2.603, "step": 817 }, { "epoch": 0.15928962280684672, "grad_norm": 1.78125, "learning_rate": 3.774250965887775e-05, "loss": 2.5996, "step": 818 }, { "epoch": 0.15948435339707515, "grad_norm": 1.5390625, "learning_rate": 3.77368146909605e-05, "loss": 2.5989, "step": 819 }, { "epoch": 0.15967908398730357, "grad_norm": 1.5, "learning_rate": 3.773111297951808e-05, "loss": 2.5956, "step": 820 }, { "epoch": 0.159873814577532, "grad_norm": 1.828125, "learning_rate": 3.772540452671825e-05, "loss": 2.6217, "step": 821 }, { "epoch": 0.1600685451677604, "grad_norm": 1.4765625, "learning_rate": 3.771968933473138e-05, "loss": 2.6071, "step": 822 }, { "epoch": 0.16026327575798882, "grad_norm": 2.03125, "learning_rate": 3.771396740573038e-05, "loss": 2.6055, "step": 823 }, { "epoch": 0.16045800634821725, "grad_norm": 1.8828125, "learning_rate": 3.7708238741890724e-05, "loss": 2.6055, "step": 824 }, { "epoch": 0.16065273693844567, "grad_norm": 1.59375, "learning_rate": 3.770250334539044e-05, "loss": 2.5986, "step": 825 }, { "epoch": 0.16084746752867407, "grad_norm": 1.40625, "learning_rate": 3.769676121841012e-05, "loss": 2.6104, "step": 826 }, { "epoch": 0.1610421981189025, "grad_norm": 1.90625, "learning_rate": 3.7691012363132926e-05, "loss": 2.5941, "step": 827 }, { "epoch": 0.16123692870913092, "grad_norm": 1.6953125, "learning_rate": 3.7685256781744565e-05, "loss": 2.6057, "step": 828 }, { "epoch": 0.16143165929935935, "grad_norm": 1.921875, "learning_rate": 3.7679494476433306e-05, "loss": 2.5887, "step": 829 }, { "epoch": 0.16162638988958775, "grad_norm": 1.859375, "learning_rate": 3.767372544938996e-05, "loss": 2.5898, "step": 830 }, { "epoch": 0.16182112047981617, "grad_norm": 1.421875, "learning_rate": 3.7667949702807935e-05, "loss": 2.5778, "step": 831 }, { "epoch": 0.1620158510700446, "grad_norm": 1.2890625, "learning_rate": 3.766216723888315e-05, "loss": 2.6034, "step": 832 }, { "epoch": 0.16221058166027302, "grad_norm": 1.9765625, "learning_rate": 3.7656378059814086e-05, "loss": 2.6007, "step": 833 }, { "epoch": 0.16240531225050142, "grad_norm": 1.75, "learning_rate": 3.76505821678018e-05, "loss": 2.6121, "step": 834 }, { "epoch": 0.16260004284072985, "grad_norm": 1.8984375, "learning_rate": 3.764477956504988e-05, "loss": 2.5991, "step": 835 }, { "epoch": 0.16279477343095827, "grad_norm": 1.8671875, "learning_rate": 3.763897025376447e-05, "loss": 2.5936, "step": 836 }, { "epoch": 0.1629895040211867, "grad_norm": 1.3125, "learning_rate": 3.763315423615427e-05, "loss": 2.5897, "step": 837 }, { "epoch": 0.1631842346114151, "grad_norm": 1.203125, "learning_rate": 3.762733151443052e-05, "loss": 2.5903, "step": 838 }, { "epoch": 0.16337896520164352, "grad_norm": 1.890625, "learning_rate": 3.762150209080702e-05, "loss": 2.6236, "step": 839 }, { "epoch": 0.16357369579187195, "grad_norm": 1.5234375, "learning_rate": 3.761566596750011e-05, "loss": 2.5881, "step": 840 }, { "epoch": 0.16376842638210037, "grad_norm": 2.15625, "learning_rate": 3.760982314672869e-05, "loss": 2.5709, "step": 841 }, { "epoch": 0.16396315697232877, "grad_norm": 2.078125, "learning_rate": 3.7603973630714184e-05, "loss": 2.581, "step": 842 }, { "epoch": 0.1641578875625572, "grad_norm": 1.171875, "learning_rate": 3.759811742168058e-05, "loss": 2.5867, "step": 843 }, { "epoch": 0.16435261815278562, "grad_norm": 1.2421875, "learning_rate": 3.75922545218544e-05, "loss": 2.5865, "step": 844 }, { "epoch": 0.16454734874301405, "grad_norm": 1.84375, "learning_rate": 3.758638493346472e-05, "loss": 2.5918, "step": 845 }, { "epoch": 0.16474207933324245, "grad_norm": 1.515625, "learning_rate": 3.758050865874315e-05, "loss": 2.5979, "step": 846 }, { "epoch": 0.16493680992347087, "grad_norm": 2.125, "learning_rate": 3.7574625699923856e-05, "loss": 2.6103, "step": 847 }, { "epoch": 0.1651315405136993, "grad_norm": 2.03125, "learning_rate": 3.756873605924353e-05, "loss": 2.5925, "step": 848 }, { "epoch": 0.16532627110392772, "grad_norm": 1.203125, "learning_rate": 3.756283973894139e-05, "loss": 2.5903, "step": 849 }, { "epoch": 0.16552100169415612, "grad_norm": 1.171875, "learning_rate": 3.755693674125924e-05, "loss": 2.5953, "step": 850 }, { "epoch": 0.16571573228438455, "grad_norm": 1.703125, "learning_rate": 3.755102706844138e-05, "loss": 2.5923, "step": 851 }, { "epoch": 0.16591046287461297, "grad_norm": 1.2734375, "learning_rate": 3.754511072273467e-05, "loss": 2.5877, "step": 852 }, { "epoch": 0.1661051934648414, "grad_norm": 2.5625, "learning_rate": 3.7539187706388506e-05, "loss": 2.5853, "step": 853 }, { "epoch": 0.1662999240550698, "grad_norm": 2.5, "learning_rate": 3.7533258021654804e-05, "loss": 2.6113, "step": 854 }, { "epoch": 0.16649465464529822, "grad_norm": 0.8515625, "learning_rate": 3.7527321670788023e-05, "loss": 2.5966, "step": 855 }, { "epoch": 0.16668938523552665, "grad_norm": 1.734375, "learning_rate": 3.7521378656045174e-05, "loss": 2.5832, "step": 856 }, { "epoch": 0.16688411582575507, "grad_norm": 0.84765625, "learning_rate": 3.751542897968579e-05, "loss": 2.587, "step": 857 }, { "epoch": 0.16707884641598347, "grad_norm": 1.734375, "learning_rate": 3.750947264397191e-05, "loss": 2.6037, "step": 858 }, { "epoch": 0.1672735770062119, "grad_norm": 1.1015625, "learning_rate": 3.7503509651168154e-05, "loss": 2.5926, "step": 859 }, { "epoch": 0.16746830759644032, "grad_norm": 2.359375, "learning_rate": 3.749754000354163e-05, "loss": 2.6014, "step": 860 }, { "epoch": 0.16766303818666875, "grad_norm": 2.046875, "learning_rate": 3.7491563703362e-05, "loss": 2.6108, "step": 861 }, { "epoch": 0.16785776877689718, "grad_norm": 1.6484375, "learning_rate": 3.7485580752901455e-05, "loss": 2.5987, "step": 862 }, { "epoch": 0.16805249936712557, "grad_norm": 1.6171875, "learning_rate": 3.747959115443471e-05, "loss": 2.6041, "step": 863 }, { "epoch": 0.168247229957354, "grad_norm": 1.3125, "learning_rate": 3.747359491023899e-05, "loss": 2.6048, "step": 864 }, { "epoch": 0.16844196054758243, "grad_norm": 1.2890625, "learning_rate": 3.746759202259408e-05, "loss": 2.5816, "step": 865 }, { "epoch": 0.16863669113781085, "grad_norm": 1.359375, "learning_rate": 3.7461582493782266e-05, "loss": 2.5977, "step": 866 }, { "epoch": 0.16883142172803925, "grad_norm": 1.0625, "learning_rate": 3.7455566326088374e-05, "loss": 2.5649, "step": 867 }, { "epoch": 0.16902615231826768, "grad_norm": 1.1875, "learning_rate": 3.744954352179973e-05, "loss": 2.5808, "step": 868 }, { "epoch": 0.1692208829084961, "grad_norm": 1.0859375, "learning_rate": 3.744351408320622e-05, "loss": 2.5943, "step": 869 }, { "epoch": 0.16941561349872453, "grad_norm": 1.015625, "learning_rate": 3.743747801260023e-05, "loss": 2.5988, "step": 870 }, { "epoch": 0.16961034408895292, "grad_norm": 1.2890625, "learning_rate": 3.743143531227666e-05, "loss": 2.6178, "step": 871 }, { "epoch": 0.16980507467918135, "grad_norm": 0.9140625, "learning_rate": 3.742538598453294e-05, "loss": 2.5826, "step": 872 }, { "epoch": 0.16999980526940978, "grad_norm": 1.28125, "learning_rate": 3.741933003166903e-05, "loss": 2.5879, "step": 873 }, { "epoch": 0.1701945358596382, "grad_norm": 0.98046875, "learning_rate": 3.741326745598739e-05, "loss": 2.591, "step": 874 }, { "epoch": 0.1703892664498666, "grad_norm": 1.296875, "learning_rate": 3.740719825979302e-05, "loss": 2.5926, "step": 875 }, { "epoch": 0.17058399704009503, "grad_norm": 1.1015625, "learning_rate": 3.740112244539341e-05, "loss": 2.6006, "step": 876 }, { "epoch": 0.17077872763032345, "grad_norm": 1.234375, "learning_rate": 3.739504001509859e-05, "loss": 2.5969, "step": 877 }, { "epoch": 0.17097345822055188, "grad_norm": 1.1484375, "learning_rate": 3.738895097122109e-05, "loss": 2.5747, "step": 878 }, { "epoch": 0.17116818881078028, "grad_norm": 1.3515625, "learning_rate": 3.738285531607597e-05, "loss": 2.5874, "step": 879 }, { "epoch": 0.1713629194010087, "grad_norm": 1.0546875, "learning_rate": 3.737675305198078e-05, "loss": 2.5885, "step": 880 }, { "epoch": 0.17155764999123713, "grad_norm": 1.6484375, "learning_rate": 3.7370644181255616e-05, "loss": 2.599, "step": 881 }, { "epoch": 0.17175238058146555, "grad_norm": 1.171875, "learning_rate": 3.7364528706223045e-05, "loss": 2.5876, "step": 882 }, { "epoch": 0.17194711117169395, "grad_norm": 1.796875, "learning_rate": 3.735840662920818e-05, "loss": 2.6105, "step": 883 }, { "epoch": 0.17214184176192238, "grad_norm": 1.4921875, "learning_rate": 3.735227795253862e-05, "loss": 2.5754, "step": 884 }, { "epoch": 0.1723365723521508, "grad_norm": 1.65625, "learning_rate": 3.7346142678544494e-05, "loss": 2.6086, "step": 885 }, { "epoch": 0.17253130294237923, "grad_norm": 1.421875, "learning_rate": 3.734000080955843e-05, "loss": 2.5988, "step": 886 }, { "epoch": 0.17272603353260763, "grad_norm": 1.6484375, "learning_rate": 3.733385234791556e-05, "loss": 2.5978, "step": 887 }, { "epoch": 0.17292076412283605, "grad_norm": 1.21875, "learning_rate": 3.732769729595352e-05, "loss": 2.6004, "step": 888 }, { "epoch": 0.17311549471306448, "grad_norm": 2.03125, "learning_rate": 3.7321535656012456e-05, "loss": 2.5987, "step": 889 }, { "epoch": 0.1733102253032929, "grad_norm": 1.6328125, "learning_rate": 3.731536743043503e-05, "loss": 2.61, "step": 890 }, { "epoch": 0.1735049558935213, "grad_norm": 1.9453125, "learning_rate": 3.7309192621566394e-05, "loss": 2.5962, "step": 891 }, { "epoch": 0.17369968648374973, "grad_norm": 1.8984375, "learning_rate": 3.730301123175419e-05, "loss": 2.6114, "step": 892 }, { "epoch": 0.17389441707397815, "grad_norm": 1.3984375, "learning_rate": 3.72968232633486e-05, "loss": 2.5827, "step": 893 }, { "epoch": 0.17408914766420658, "grad_norm": 1.3125, "learning_rate": 3.729062871870228e-05, "loss": 2.5964, "step": 894 }, { "epoch": 0.17428387825443498, "grad_norm": 1.5078125, "learning_rate": 3.7284427600170384e-05, "loss": 2.579, "step": 895 }, { "epoch": 0.1744786088446634, "grad_norm": 1.21875, "learning_rate": 3.727821991011058e-05, "loss": 2.597, "step": 896 }, { "epoch": 0.17467333943489183, "grad_norm": 2.125, "learning_rate": 3.7272005650883026e-05, "loss": 2.5936, "step": 897 }, { "epoch": 0.17486807002512025, "grad_norm": 1.859375, "learning_rate": 3.7265784824850375e-05, "loss": 2.5859, "step": 898 }, { "epoch": 0.17506280061534865, "grad_norm": 1.6015625, "learning_rate": 3.725955743437779e-05, "loss": 2.5946, "step": 899 }, { "epoch": 0.17525753120557708, "grad_norm": 1.578125, "learning_rate": 3.725332348183291e-05, "loss": 2.5893, "step": 900 }, { "epoch": 0.1754522617958055, "grad_norm": 1.390625, "learning_rate": 3.7247082969585884e-05, "loss": 2.5967, "step": 901 }, { "epoch": 0.17564699238603393, "grad_norm": 1.2734375, "learning_rate": 3.7240835900009356e-05, "loss": 2.5908, "step": 902 }, { "epoch": 0.17584172297626235, "grad_norm": 1.5234375, "learning_rate": 3.7234582275478454e-05, "loss": 2.5986, "step": 903 }, { "epoch": 0.17603645356649075, "grad_norm": 1.296875, "learning_rate": 3.72283220983708e-05, "loss": 2.5938, "step": 904 }, { "epoch": 0.17623118415671918, "grad_norm": 1.7734375, "learning_rate": 3.722205537106651e-05, "loss": 2.5849, "step": 905 }, { "epoch": 0.1764259147469476, "grad_norm": 1.3828125, "learning_rate": 3.7215782095948194e-05, "loss": 2.5781, "step": 906 }, { "epoch": 0.17662064533717603, "grad_norm": 2.109375, "learning_rate": 3.7209502275400934e-05, "loss": 2.5674, "step": 907 }, { "epoch": 0.17681537592740443, "grad_norm": 1.8828125, "learning_rate": 3.7203215911812324e-05, "loss": 2.5952, "step": 908 }, { "epoch": 0.17701010651763285, "grad_norm": 1.8046875, "learning_rate": 3.719692300757244e-05, "loss": 2.5735, "step": 909 }, { "epoch": 0.17720483710786128, "grad_norm": 1.8125, "learning_rate": 3.719062356507383e-05, "loss": 2.582, "step": 910 }, { "epoch": 0.1773995676980897, "grad_norm": 1.5703125, "learning_rate": 3.718431758671154e-05, "loss": 2.5931, "step": 911 }, { "epoch": 0.1775942982883181, "grad_norm": 1.53125, "learning_rate": 3.717800507488311e-05, "loss": 2.5936, "step": 912 }, { "epoch": 0.17778902887854653, "grad_norm": 1.7578125, "learning_rate": 3.7171686031988535e-05, "loss": 2.5788, "step": 913 }, { "epoch": 0.17798375946877495, "grad_norm": 1.4609375, "learning_rate": 3.716536046043032e-05, "loss": 2.5871, "step": 914 }, { "epoch": 0.17817849005900338, "grad_norm": 1.9921875, "learning_rate": 3.715902836261344e-05, "loss": 2.5852, "step": 915 }, { "epoch": 0.17837322064923178, "grad_norm": 1.8515625, "learning_rate": 3.715268974094536e-05, "loss": 2.6022, "step": 916 }, { "epoch": 0.1785679512394602, "grad_norm": 1.3828125, "learning_rate": 3.714634459783602e-05, "loss": 2.572, "step": 917 }, { "epoch": 0.17876268182968863, "grad_norm": 1.484375, "learning_rate": 3.7139992935697834e-05, "loss": 2.577, "step": 918 }, { "epoch": 0.17895741241991706, "grad_norm": 1.1171875, "learning_rate": 3.71336347569457e-05, "loss": 2.5989, "step": 919 }, { "epoch": 0.17915214301014545, "grad_norm": 1.1484375, "learning_rate": 3.7127270063996996e-05, "loss": 2.5796, "step": 920 }, { "epoch": 0.17934687360037388, "grad_norm": 1.3671875, "learning_rate": 3.712089885927157e-05, "loss": 2.5922, "step": 921 }, { "epoch": 0.1795416041906023, "grad_norm": 1.0078125, "learning_rate": 3.711452114519175e-05, "loss": 2.5922, "step": 922 }, { "epoch": 0.17973633478083073, "grad_norm": 1.8671875, "learning_rate": 3.710813692418235e-05, "loss": 2.5812, "step": 923 }, { "epoch": 0.17993106537105913, "grad_norm": 1.4453125, "learning_rate": 3.710174619867063e-05, "loss": 2.5583, "step": 924 }, { "epoch": 0.18012579596128755, "grad_norm": 1.9296875, "learning_rate": 3.709534897108635e-05, "loss": 2.5884, "step": 925 }, { "epoch": 0.18032052655151598, "grad_norm": 1.7421875, "learning_rate": 3.708894524386174e-05, "loss": 2.5698, "step": 926 }, { "epoch": 0.1805152571417444, "grad_norm": 1.4140625, "learning_rate": 3.708253501943147e-05, "loss": 2.5647, "step": 927 }, { "epoch": 0.1807099877319728, "grad_norm": 1.34375, "learning_rate": 3.707611830023272e-05, "loss": 2.5576, "step": 928 }, { "epoch": 0.18090471832220123, "grad_norm": 1.46875, "learning_rate": 3.7069695088705114e-05, "loss": 2.5785, "step": 929 }, { "epoch": 0.18109944891242966, "grad_norm": 1.125, "learning_rate": 3.706326538729076e-05, "loss": 2.5638, "step": 930 }, { "epoch": 0.18129417950265808, "grad_norm": 2.078125, "learning_rate": 3.705682919843422e-05, "loss": 2.5727, "step": 931 }, { "epoch": 0.18148891009288648, "grad_norm": 1.7890625, "learning_rate": 3.705038652458253e-05, "loss": 2.5993, "step": 932 }, { "epoch": 0.1816836406831149, "grad_norm": 1.6796875, "learning_rate": 3.704393736818519e-05, "loss": 2.5928, "step": 933 }, { "epoch": 0.18187837127334333, "grad_norm": 1.640625, "learning_rate": 3.703748173169417e-05, "loss": 2.5917, "step": 934 }, { "epoch": 0.18207310186357176, "grad_norm": 1.4296875, "learning_rate": 3.70310196175639e-05, "loss": 2.5711, "step": 935 }, { "epoch": 0.18226783245380015, "grad_norm": 1.1953125, "learning_rate": 3.7024551028251256e-05, "loss": 2.5911, "step": 936 }, { "epoch": 0.18246256304402858, "grad_norm": 1.671875, "learning_rate": 3.701807596621561e-05, "loss": 2.5751, "step": 937 }, { "epoch": 0.182657293634257, "grad_norm": 1.28125, "learning_rate": 3.701159443391876e-05, "loss": 2.5744, "step": 938 }, { "epoch": 0.18285202422448543, "grad_norm": 2.09375, "learning_rate": 3.7005106433825e-05, "loss": 2.5894, "step": 939 }, { "epoch": 0.18304675481471383, "grad_norm": 1.859375, "learning_rate": 3.6998611968401044e-05, "loss": 2.5525, "step": 940 }, { "epoch": 0.18324148540494226, "grad_norm": 1.5625, "learning_rate": 3.6992111040116095e-05, "loss": 2.5748, "step": 941 }, { "epoch": 0.18343621599517068, "grad_norm": 1.5234375, "learning_rate": 3.69856036514418e-05, "loss": 2.5566, "step": 942 }, { "epoch": 0.1836309465853991, "grad_norm": 1.4609375, "learning_rate": 3.6979089804852254e-05, "loss": 2.5643, "step": 943 }, { "epoch": 0.18382567717562753, "grad_norm": 1.3359375, "learning_rate": 3.6972569502824034e-05, "loss": 2.5686, "step": 944 }, { "epoch": 0.18402040776585593, "grad_norm": 1.671875, "learning_rate": 3.696604274783614e-05, "loss": 2.5501, "step": 945 }, { "epoch": 0.18421513835608436, "grad_norm": 1.40625, "learning_rate": 3.6959509542370045e-05, "loss": 2.6066, "step": 946 }, { "epoch": 0.18440986894631278, "grad_norm": 1.7734375, "learning_rate": 3.695296988890968e-05, "loss": 2.5824, "step": 947 }, { "epoch": 0.1846045995365412, "grad_norm": 1.5703125, "learning_rate": 3.6946423789941394e-05, "loss": 2.5888, "step": 948 }, { "epoch": 0.1847993301267696, "grad_norm": 1.625, "learning_rate": 3.6939871247954024e-05, "loss": 2.5632, "step": 949 }, { "epoch": 0.18499406071699803, "grad_norm": 1.3359375, "learning_rate": 3.693331226543885e-05, "loss": 2.5799, "step": 950 }, { "epoch": 0.18518879130722646, "grad_norm": 1.7421875, "learning_rate": 3.6926746844889574e-05, "loss": 2.5812, "step": 951 }, { "epoch": 0.18538352189745488, "grad_norm": 1.4453125, "learning_rate": 3.692017498880238e-05, "loss": 2.5825, "step": 952 }, { "epoch": 0.18557825248768328, "grad_norm": 1.8515625, "learning_rate": 3.6913596699675866e-05, "loss": 2.5754, "step": 953 }, { "epoch": 0.1857729830779117, "grad_norm": 1.65625, "learning_rate": 3.690701198001112e-05, "loss": 2.5989, "step": 954 }, { "epoch": 0.18596771366814013, "grad_norm": 1.6171875, "learning_rate": 3.690042083231162e-05, "loss": 2.5748, "step": 955 }, { "epoch": 0.18616244425836856, "grad_norm": 1.3671875, "learning_rate": 3.6893823259083335e-05, "loss": 2.5648, "step": 956 }, { "epoch": 0.18635717484859696, "grad_norm": 1.7890625, "learning_rate": 3.688721926283465e-05, "loss": 2.5634, "step": 957 }, { "epoch": 0.18655190543882538, "grad_norm": 1.4140625, "learning_rate": 3.68806088460764e-05, "loss": 2.5693, "step": 958 }, { "epoch": 0.1867466360290538, "grad_norm": 2.078125, "learning_rate": 3.687399201132186e-05, "loss": 2.5696, "step": 959 }, { "epoch": 0.18694136661928223, "grad_norm": 1.984375, "learning_rate": 3.6867368761086755e-05, "loss": 2.5833, "step": 960 }, { "epoch": 0.18713609720951063, "grad_norm": 1.484375, "learning_rate": 3.686073909788923e-05, "loss": 2.5639, "step": 961 }, { "epoch": 0.18733082779973906, "grad_norm": 1.421875, "learning_rate": 3.685410302424988e-05, "loss": 2.5853, "step": 962 }, { "epoch": 0.18752555838996748, "grad_norm": 1.7421875, "learning_rate": 3.684746054269174e-05, "loss": 2.5727, "step": 963 }, { "epoch": 0.1877202889801959, "grad_norm": 1.4375, "learning_rate": 3.684081165574028e-05, "loss": 2.5907, "step": 964 }, { "epoch": 0.1879150195704243, "grad_norm": 2.234375, "learning_rate": 3.683415636592339e-05, "loss": 2.5791, "step": 965 }, { "epoch": 0.18810975016065273, "grad_norm": 2.140625, "learning_rate": 3.682749467577142e-05, "loss": 2.5647, "step": 966 }, { "epoch": 0.18830448075088116, "grad_norm": 1.1171875, "learning_rate": 3.682082658781713e-05, "loss": 2.5944, "step": 967 }, { "epoch": 0.18849921134110958, "grad_norm": 1.2421875, "learning_rate": 3.681415210459573e-05, "loss": 2.5656, "step": 968 }, { "epoch": 0.18869394193133798, "grad_norm": 1.7265625, "learning_rate": 3.680747122864485e-05, "loss": 2.5786, "step": 969 }, { "epoch": 0.1888886725215664, "grad_norm": 1.3046875, "learning_rate": 3.680078396250456e-05, "loss": 2.5806, "step": 970 }, { "epoch": 0.18908340311179483, "grad_norm": 2.5, "learning_rate": 3.679409030871735e-05, "loss": 2.5476, "step": 971 }, { "epoch": 0.18927813370202326, "grad_norm": 2.5, "learning_rate": 3.678739026982814e-05, "loss": 2.5604, "step": 972 }, { "epoch": 0.18947286429225166, "grad_norm": 0.96875, "learning_rate": 3.678068384838428e-05, "loss": 2.575, "step": 973 }, { "epoch": 0.18966759488248008, "grad_norm": 1.65625, "learning_rate": 3.677397104693556e-05, "loss": 2.5489, "step": 974 }, { "epoch": 0.1898623254727085, "grad_norm": 0.99609375, "learning_rate": 3.676725186803416e-05, "loss": 2.5664, "step": 975 }, { "epoch": 0.19005705606293694, "grad_norm": 1.453125, "learning_rate": 3.6760526314234735e-05, "loss": 2.5796, "step": 976 }, { "epoch": 0.19025178665316533, "grad_norm": 1.1328125, "learning_rate": 3.6753794388094305e-05, "loss": 2.5721, "step": 977 }, { "epoch": 0.19044651724339376, "grad_norm": 1.2421875, "learning_rate": 3.6747056092172376e-05, "loss": 2.5646, "step": 978 }, { "epoch": 0.19064124783362218, "grad_norm": 1.265625, "learning_rate": 3.674031142903082e-05, "loss": 2.5701, "step": 979 }, { "epoch": 0.1908359784238506, "grad_norm": 1.0703125, "learning_rate": 3.673356040123396e-05, "loss": 2.5769, "step": 980 }, { "epoch": 0.191030709014079, "grad_norm": 1.390625, "learning_rate": 3.672680301134853e-05, "loss": 2.5766, "step": 981 }, { "epoch": 0.19122543960430743, "grad_norm": 1.0703125, "learning_rate": 3.672003926194369e-05, "loss": 2.593, "step": 982 }, { "epoch": 0.19142017019453586, "grad_norm": 1.3203125, "learning_rate": 3.671326915559102e-05, "loss": 2.5453, "step": 983 }, { "epoch": 0.19161490078476429, "grad_norm": 1.0703125, "learning_rate": 3.67064926948645e-05, "loss": 2.562, "step": 984 }, { "epoch": 0.19180963137499268, "grad_norm": 1.421875, "learning_rate": 3.6699709882340535e-05, "loss": 2.5703, "step": 985 }, { "epoch": 0.1920043619652211, "grad_norm": 0.96875, "learning_rate": 3.669292072059794e-05, "loss": 2.5428, "step": 986 }, { "epoch": 0.19219909255544954, "grad_norm": 1.6953125, "learning_rate": 3.668612521221797e-05, "loss": 2.566, "step": 987 }, { "epoch": 0.19239382314567796, "grad_norm": 1.265625, "learning_rate": 3.6679323359784254e-05, "loss": 2.5726, "step": 988 }, { "epoch": 0.1925885537359064, "grad_norm": 1.9296875, "learning_rate": 3.6672515165882864e-05, "loss": 2.5719, "step": 989 }, { "epoch": 0.19278328432613479, "grad_norm": 1.6171875, "learning_rate": 3.6665700633102264e-05, "loss": 2.5755, "step": 990 }, { "epoch": 0.1929780149163632, "grad_norm": 1.7109375, "learning_rate": 3.665887976403333e-05, "loss": 2.5924, "step": 991 }, { "epoch": 0.19317274550659164, "grad_norm": 1.421875, "learning_rate": 3.6652052561269364e-05, "loss": 2.587, "step": 992 }, { "epoch": 0.19336747609682006, "grad_norm": 1.6796875, "learning_rate": 3.664521902740606e-05, "loss": 2.5606, "step": 993 }, { "epoch": 0.19356220668704846, "grad_norm": 1.390625, "learning_rate": 3.663837916504152e-05, "loss": 2.5867, "step": 994 }, { "epoch": 0.19375693727727689, "grad_norm": 1.734375, "learning_rate": 3.6631532976776254e-05, "loss": 2.5751, "step": 995 }, { "epoch": 0.1939516678675053, "grad_norm": 1.4296875, "learning_rate": 3.662468046521318e-05, "loss": 2.5722, "step": 996 }, { "epoch": 0.19414639845773374, "grad_norm": 1.9140625, "learning_rate": 3.6617821632957626e-05, "loss": 2.5813, "step": 997 }, { "epoch": 0.19434112904796214, "grad_norm": 1.59375, "learning_rate": 3.66109564826173e-05, "loss": 2.5675, "step": 998 }, { "epoch": 0.19453585963819056, "grad_norm": 1.9375, "learning_rate": 3.660408501680234e-05, "loss": 2.5711, "step": 999 }, { "epoch": 0.194730590228419, "grad_norm": 1.765625, "learning_rate": 3.659720723812527e-05, "loss": 2.5406, "step": 1000 }, { "epoch": 0.1949253208186474, "grad_norm": 1.6484375, "learning_rate": 3.6590323149201015e-05, "loss": 2.5652, "step": 1001 }, { "epoch": 0.1951200514088758, "grad_norm": 1.5234375, "learning_rate": 3.65834327526469e-05, "loss": 2.5701, "step": 1002 }, { "epoch": 0.19531478199910424, "grad_norm": 1.3515625, "learning_rate": 3.6576536051082656e-05, "loss": 2.5672, "step": 1003 }, { "epoch": 0.19550951258933266, "grad_norm": 1.265625, "learning_rate": 3.656963304713039e-05, "loss": 2.5627, "step": 1004 }, { "epoch": 0.1957042431795611, "grad_norm": 1.5078125, "learning_rate": 3.656272374341463e-05, "loss": 2.5577, "step": 1005 }, { "epoch": 0.1958989737697895, "grad_norm": 1.171875, "learning_rate": 3.6555808142562294e-05, "loss": 2.5666, "step": 1006 }, { "epoch": 0.1960937043600179, "grad_norm": 2.109375, "learning_rate": 3.654888624720268e-05, "loss": 2.5885, "step": 1007 }, { "epoch": 0.19628843495024634, "grad_norm": 1.75, "learning_rate": 3.6541958059967495e-05, "loss": 2.565, "step": 1008 }, { "epoch": 0.19648316554047476, "grad_norm": 1.859375, "learning_rate": 3.653502358349082e-05, "loss": 2.5759, "step": 1009 }, { "epoch": 0.19667789613070316, "grad_norm": 1.7265625, "learning_rate": 3.652808282040915e-05, "loss": 2.5707, "step": 1010 }, { "epoch": 0.1968726267209316, "grad_norm": 1.4296875, "learning_rate": 3.652113577336135e-05, "loss": 2.5651, "step": 1011 }, { "epoch": 0.19706735731116, "grad_norm": 1.4609375, "learning_rate": 3.6514182444988685e-05, "loss": 2.5589, "step": 1012 }, { "epoch": 0.19726208790138844, "grad_norm": 1.515625, "learning_rate": 3.6507222837934806e-05, "loss": 2.5647, "step": 1013 }, { "epoch": 0.19745681849161684, "grad_norm": 1.3515625, "learning_rate": 3.6500256954845754e-05, "loss": 2.5722, "step": 1014 }, { "epoch": 0.19765154908184526, "grad_norm": 1.7734375, "learning_rate": 3.649328479836996e-05, "loss": 2.5644, "step": 1015 }, { "epoch": 0.1978462796720737, "grad_norm": 1.6640625, "learning_rate": 3.648630637115821e-05, "loss": 2.5615, "step": 1016 }, { "epoch": 0.19804101026230211, "grad_norm": 1.453125, "learning_rate": 3.6479321675863715e-05, "loss": 2.5346, "step": 1017 }, { "epoch": 0.1982357408525305, "grad_norm": 1.40625, "learning_rate": 3.647233071514205e-05, "loss": 2.5652, "step": 1018 }, { "epoch": 0.19843047144275894, "grad_norm": 1.578125, "learning_rate": 3.6465333491651175e-05, "loss": 2.5512, "step": 1019 }, { "epoch": 0.19862520203298736, "grad_norm": 1.3828125, "learning_rate": 3.6458330008051426e-05, "loss": 2.57, "step": 1020 }, { "epoch": 0.1988199326232158, "grad_norm": 1.921875, "learning_rate": 3.645132026700552e-05, "loss": 2.5618, "step": 1021 }, { "epoch": 0.1990146632134442, "grad_norm": 1.71875, "learning_rate": 3.644430427117857e-05, "loss": 2.5616, "step": 1022 }, { "epoch": 0.1992093938036726, "grad_norm": 1.5078125, "learning_rate": 3.6437282023238025e-05, "loss": 2.5624, "step": 1023 }, { "epoch": 0.19940412439390104, "grad_norm": 1.40625, "learning_rate": 3.643025352585377e-05, "loss": 2.5812, "step": 1024 }, { "epoch": 0.19959885498412946, "grad_norm": 1.5703125, "learning_rate": 3.642321878169801e-05, "loss": 2.5584, "step": 1025 }, { "epoch": 0.19979358557435786, "grad_norm": 1.3984375, "learning_rate": 3.641617779344537e-05, "loss": 2.5681, "step": 1026 }, { "epoch": 0.1999883161645863, "grad_norm": 1.7109375, "learning_rate": 3.640913056377282e-05, "loss": 2.5672, "step": 1027 }, { "epoch": 0.20018304675481471, "grad_norm": 1.5546875, "learning_rate": 3.640207709535971e-05, "loss": 2.5459, "step": 1028 }, { "epoch": 0.20037777734504314, "grad_norm": 1.5859375, "learning_rate": 3.6395017390887775e-05, "loss": 2.5611, "step": 1029 }, { "epoch": 0.20057250793527157, "grad_norm": 1.5, "learning_rate": 3.638795145304109e-05, "loss": 2.5566, "step": 1030 }, { "epoch": 0.20076723852549996, "grad_norm": 1.515625, "learning_rate": 3.6380879284506134e-05, "loss": 2.5664, "step": 1031 }, { "epoch": 0.2009619691157284, "grad_norm": 1.3984375, "learning_rate": 3.6373800887971744e-05, "loss": 2.5627, "step": 1032 }, { "epoch": 0.20115669970595682, "grad_norm": 1.765625, "learning_rate": 3.636671626612911e-05, "loss": 2.5553, "step": 1033 }, { "epoch": 0.20135143029618524, "grad_norm": 1.703125, "learning_rate": 3.635962542167181e-05, "loss": 2.5773, "step": 1034 }, { "epoch": 0.20154616088641364, "grad_norm": 1.421875, "learning_rate": 3.635252835729577e-05, "loss": 2.5612, "step": 1035 }, { "epoch": 0.20174089147664206, "grad_norm": 1.5, "learning_rate": 3.6345425075699295e-05, "loss": 2.5646, "step": 1036 }, { "epoch": 0.2019356220668705, "grad_norm": 1.4765625, "learning_rate": 3.633831557958305e-05, "loss": 2.5552, "step": 1037 }, { "epoch": 0.20213035265709892, "grad_norm": 1.2578125, "learning_rate": 3.6331199871650055e-05, "loss": 2.5649, "step": 1038 }, { "epoch": 0.20232508324732731, "grad_norm": 1.921875, "learning_rate": 3.63240779546057e-05, "loss": 2.5576, "step": 1039 }, { "epoch": 0.20251981383755574, "grad_norm": 1.765625, "learning_rate": 3.631694983115773e-05, "loss": 2.5617, "step": 1040 }, { "epoch": 0.20271454442778417, "grad_norm": 1.4921875, "learning_rate": 3.630981550401627e-05, "loss": 2.5611, "step": 1041 }, { "epoch": 0.2029092750180126, "grad_norm": 1.40625, "learning_rate": 3.630267497589376e-05, "loss": 2.5549, "step": 1042 }, { "epoch": 0.203104005608241, "grad_norm": 1.546875, "learning_rate": 3.629552824950504e-05, "loss": 2.5857, "step": 1043 }, { "epoch": 0.20329873619846942, "grad_norm": 1.3671875, "learning_rate": 3.6288375327567286e-05, "loss": 2.5728, "step": 1044 }, { "epoch": 0.20349346678869784, "grad_norm": 1.875, "learning_rate": 3.628121621280004e-05, "loss": 2.5613, "step": 1045 }, { "epoch": 0.20368819737892627, "grad_norm": 1.7734375, "learning_rate": 3.6274050907925186e-05, "loss": 2.5439, "step": 1046 }, { "epoch": 0.20388292796915466, "grad_norm": 1.296875, "learning_rate": 3.6266879415666964e-05, "loss": 2.5526, "step": 1047 }, { "epoch": 0.2040776585593831, "grad_norm": 1.28125, "learning_rate": 3.625970173875198e-05, "loss": 2.5684, "step": 1048 }, { "epoch": 0.20427238914961152, "grad_norm": 1.6015625, "learning_rate": 3.6252517879909175e-05, "loss": 2.5858, "step": 1049 }, { "epoch": 0.20446711973983994, "grad_norm": 1.4453125, "learning_rate": 3.624532784186986e-05, "loss": 2.5783, "step": 1050 }, { "epoch": 0.20466185033006834, "grad_norm": 1.84375, "learning_rate": 3.623813162736767e-05, "loss": 2.5656, "step": 1051 }, { "epoch": 0.20485658092029677, "grad_norm": 1.7890625, "learning_rate": 3.62309292391386e-05, "loss": 2.5531, "step": 1052 }, { "epoch": 0.2050513115105252, "grad_norm": 1.2578125, "learning_rate": 3.6223720679921e-05, "loss": 2.5686, "step": 1053 }, { "epoch": 0.20524604210075362, "grad_norm": 1.234375, "learning_rate": 3.6216505952455564e-05, "loss": 2.5447, "step": 1054 }, { "epoch": 0.20544077269098202, "grad_norm": 1.625, "learning_rate": 3.620928505948531e-05, "loss": 2.5511, "step": 1055 }, { "epoch": 0.20563550328121044, "grad_norm": 1.390625, "learning_rate": 3.620205800375563e-05, "loss": 2.5359, "step": 1056 }, { "epoch": 0.20583023387143887, "grad_norm": 2.015625, "learning_rate": 3.619482478801425e-05, "loss": 2.5701, "step": 1057 }, { "epoch": 0.2060249644616673, "grad_norm": 1.8359375, "learning_rate": 3.6187585415011224e-05, "loss": 2.5427, "step": 1058 }, { "epoch": 0.2062196950518957, "grad_norm": 1.3203125, "learning_rate": 3.6180339887498953e-05, "loss": 2.5706, "step": 1059 }, { "epoch": 0.20641442564212412, "grad_norm": 1.3515625, "learning_rate": 3.617308820823219e-05, "loss": 2.5526, "step": 1060 }, { "epoch": 0.20660915623235254, "grad_norm": 1.578125, "learning_rate": 3.616583037996802e-05, "loss": 2.5572, "step": 1061 }, { "epoch": 0.20680388682258097, "grad_norm": 1.3828125, "learning_rate": 3.615856640546586e-05, "loss": 2.5355, "step": 1062 }, { "epoch": 0.20699861741280937, "grad_norm": 1.734375, "learning_rate": 3.6151296287487464e-05, "loss": 2.5614, "step": 1063 }, { "epoch": 0.2071933480030378, "grad_norm": 1.6640625, "learning_rate": 3.614402002879693e-05, "loss": 2.5574, "step": 1064 }, { "epoch": 0.20738807859326622, "grad_norm": 1.40625, "learning_rate": 3.6136737632160696e-05, "loss": 2.5609, "step": 1065 }, { "epoch": 0.20758280918349464, "grad_norm": 1.3359375, "learning_rate": 3.612944910034751e-05, "loss": 2.5538, "step": 1066 }, { "epoch": 0.20777753977372304, "grad_norm": 1.5703125, "learning_rate": 3.612215443612849e-05, "loss": 2.566, "step": 1067 }, { "epoch": 0.20797227036395147, "grad_norm": 1.375, "learning_rate": 3.6114853642277024e-05, "loss": 2.5535, "step": 1068 }, { "epoch": 0.2081670009541799, "grad_norm": 1.765625, "learning_rate": 3.610754672156891e-05, "loss": 2.5758, "step": 1069 }, { "epoch": 0.20836173154440832, "grad_norm": 1.6875, "learning_rate": 3.61002336767822e-05, "loss": 2.5433, "step": 1070 }, { "epoch": 0.20855646213463672, "grad_norm": 1.4375, "learning_rate": 3.609291451069733e-05, "loss": 2.5467, "step": 1071 }, { "epoch": 0.20875119272486514, "grad_norm": 1.390625, "learning_rate": 3.608558922609704e-05, "loss": 2.5798, "step": 1072 }, { "epoch": 0.20894592331509357, "grad_norm": 1.5390625, "learning_rate": 3.607825782576639e-05, "loss": 2.5621, "step": 1073 }, { "epoch": 0.209140653905322, "grad_norm": 1.359375, "learning_rate": 3.607092031249278e-05, "loss": 2.5388, "step": 1074 }, { "epoch": 0.20933538449555042, "grad_norm": 1.7109375, "learning_rate": 3.606357668906592e-05, "loss": 2.5432, "step": 1075 }, { "epoch": 0.20953011508577882, "grad_norm": 1.46875, "learning_rate": 3.6056226958277863e-05, "loss": 2.5396, "step": 1076 }, { "epoch": 0.20972484567600724, "grad_norm": 1.640625, "learning_rate": 3.604887112292297e-05, "loss": 2.5672, "step": 1077 }, { "epoch": 0.20991957626623567, "grad_norm": 1.5234375, "learning_rate": 3.604150918579791e-05, "loss": 2.556, "step": 1078 }, { "epoch": 0.2101143068564641, "grad_norm": 1.59375, "learning_rate": 3.6034141149701696e-05, "loss": 2.5513, "step": 1079 }, { "epoch": 0.2103090374466925, "grad_norm": 1.4609375, "learning_rate": 3.602676701743565e-05, "loss": 2.5325, "step": 1080 }, { "epoch": 0.21050376803692092, "grad_norm": 1.5703125, "learning_rate": 3.601938679180343e-05, "loss": 2.5287, "step": 1081 }, { "epoch": 0.21069849862714934, "grad_norm": 1.390625, "learning_rate": 3.6012000475610965e-05, "loss": 2.5526, "step": 1082 }, { "epoch": 0.21089322921737777, "grad_norm": 1.6015625, "learning_rate": 3.600460807166653e-05, "loss": 2.5346, "step": 1083 }, { "epoch": 0.21108795980760617, "grad_norm": 1.5078125, "learning_rate": 3.599720958278073e-05, "loss": 2.5446, "step": 1084 }, { "epoch": 0.2112826903978346, "grad_norm": 1.609375, "learning_rate": 3.598980501176646e-05, "loss": 2.5594, "step": 1085 }, { "epoch": 0.21147742098806302, "grad_norm": 1.53125, "learning_rate": 3.5982394361438926e-05, "loss": 2.5424, "step": 1086 }, { "epoch": 0.21167215157829145, "grad_norm": 1.484375, "learning_rate": 3.597497763461565e-05, "loss": 2.5478, "step": 1087 }, { "epoch": 0.21186688216851984, "grad_norm": 1.359375, "learning_rate": 3.596755483411648e-05, "loss": 2.5446, "step": 1088 }, { "epoch": 0.21206161275874827, "grad_norm": 1.5546875, "learning_rate": 3.596012596276355e-05, "loss": 2.5494, "step": 1089 }, { "epoch": 0.2122563433489767, "grad_norm": 1.3671875, "learning_rate": 3.595269102338131e-05, "loss": 2.5268, "step": 1090 }, { "epoch": 0.21245107393920512, "grad_norm": 1.796875, "learning_rate": 3.594525001879652e-05, "loss": 2.5424, "step": 1091 }, { "epoch": 0.21264580452943352, "grad_norm": 1.703125, "learning_rate": 3.593780295183826e-05, "loss": 2.5377, "step": 1092 }, { "epoch": 0.21284053511966194, "grad_norm": 1.328125, "learning_rate": 3.5930349825337883e-05, "loss": 2.5588, "step": 1093 }, { "epoch": 0.21303526570989037, "grad_norm": 1.3046875, "learning_rate": 3.5922890642129064e-05, "loss": 2.5528, "step": 1094 }, { "epoch": 0.2132299963001188, "grad_norm": 1.578125, "learning_rate": 3.591542540504779e-05, "loss": 2.5519, "step": 1095 }, { "epoch": 0.2134247268903472, "grad_norm": 1.4140625, "learning_rate": 3.5907954116932326e-05, "loss": 2.5769, "step": 1096 }, { "epoch": 0.21361945748057562, "grad_norm": 1.7265625, "learning_rate": 3.590047678062326e-05, "loss": 2.5394, "step": 1097 }, { "epoch": 0.21381418807080405, "grad_norm": 1.7109375, "learning_rate": 3.5892993398963466e-05, "loss": 2.5832, "step": 1098 }, { "epoch": 0.21400891866103247, "grad_norm": 1.3515625, "learning_rate": 3.5885503974798125e-05, "loss": 2.5689, "step": 1099 }, { "epoch": 0.21420364925126087, "grad_norm": 1.3203125, "learning_rate": 3.5878008510974705e-05, "loss": 2.5566, "step": 1100 }, { "epoch": 0.2143983798414893, "grad_norm": 1.453125, "learning_rate": 3.5870507010342984e-05, "loss": 2.5382, "step": 1101 }, { "epoch": 0.21459311043171772, "grad_norm": 1.2421875, "learning_rate": 3.5862999475755024e-05, "loss": 2.5539, "step": 1102 }, { "epoch": 0.21478784102194615, "grad_norm": 1.984375, "learning_rate": 3.5855485910065177e-05, "loss": 2.5547, "step": 1103 }, { "epoch": 0.21498257161217454, "grad_norm": 1.8515625, "learning_rate": 3.584796631613011e-05, "loss": 2.5699, "step": 1104 }, { "epoch": 0.21517730220240297, "grad_norm": 1.25, "learning_rate": 3.584044069680876e-05, "loss": 2.5673, "step": 1105 }, { "epoch": 0.2153720327926314, "grad_norm": 1.1328125, "learning_rate": 3.583290905496236e-05, "loss": 2.5436, "step": 1106 }, { "epoch": 0.21556676338285982, "grad_norm": 1.5234375, "learning_rate": 3.582537139345444e-05, "loss": 2.5652, "step": 1107 }, { "epoch": 0.21576149397308822, "grad_norm": 1.1484375, "learning_rate": 3.5817827715150814e-05, "loss": 2.5557, "step": 1108 }, { "epoch": 0.21595622456331665, "grad_norm": 2.046875, "learning_rate": 3.581027802291957e-05, "loss": 2.5513, "step": 1109 }, { "epoch": 0.21615095515354507, "grad_norm": 1.90625, "learning_rate": 3.580272231963112e-05, "loss": 2.545, "step": 1110 }, { "epoch": 0.2163456857437735, "grad_norm": 1.2578125, "learning_rate": 3.579516060815811e-05, "loss": 2.5484, "step": 1111 }, { "epoch": 0.2165404163340019, "grad_norm": 1.1875, "learning_rate": 3.578759289137553e-05, "loss": 2.5365, "step": 1112 }, { "epoch": 0.21673514692423032, "grad_norm": 1.609375, "learning_rate": 3.57800191721606e-05, "loss": 2.5459, "step": 1113 }, { "epoch": 0.21692987751445875, "grad_norm": 1.234375, "learning_rate": 3.577243945339284e-05, "loss": 2.5694, "step": 1114 }, { "epoch": 0.21712460810468717, "grad_norm": 2.015625, "learning_rate": 3.576485373795406e-05, "loss": 2.5564, "step": 1115 }, { "epoch": 0.2173193386949156, "grad_norm": 1.921875, "learning_rate": 3.575726202872835e-05, "loss": 2.5484, "step": 1116 }, { "epoch": 0.217514069285144, "grad_norm": 1.203125, "learning_rate": 3.5749664328602064e-05, "loss": 2.5619, "step": 1117 }, { "epoch": 0.21770879987537242, "grad_norm": 1.2265625, "learning_rate": 3.574206064046384e-05, "loss": 2.5377, "step": 1118 }, { "epoch": 0.21790353046560085, "grad_norm": 1.34375, "learning_rate": 3.5734450967204606e-05, "loss": 2.5396, "step": 1119 }, { "epoch": 0.21809826105582927, "grad_norm": 1.1171875, "learning_rate": 3.572683531171755e-05, "loss": 2.5416, "step": 1120 }, { "epoch": 0.21829299164605767, "grad_norm": 2.015625, "learning_rate": 3.5719213676898144e-05, "loss": 2.5407, "step": 1121 }, { "epoch": 0.2184877222362861, "grad_norm": 1.859375, "learning_rate": 3.571158606564412e-05, "loss": 2.5647, "step": 1122 }, { "epoch": 0.21868245282651452, "grad_norm": 1.4609375, "learning_rate": 3.570395248085549e-05, "loss": 2.5505, "step": 1123 }, { "epoch": 0.21887718341674295, "grad_norm": 1.421875, "learning_rate": 3.5696312925434546e-05, "loss": 2.5591, "step": 1124 }, { "epoch": 0.21907191400697135, "grad_norm": 1.28125, "learning_rate": 3.568866740228584e-05, "loss": 2.5515, "step": 1125 }, { "epoch": 0.21926664459719977, "grad_norm": 1.1953125, "learning_rate": 3.568101591431619e-05, "loss": 2.5624, "step": 1126 }, { "epoch": 0.2194613751874282, "grad_norm": 1.7890625, "learning_rate": 3.567335846443469e-05, "loss": 2.5403, "step": 1127 }, { "epoch": 0.21965610577765662, "grad_norm": 1.484375, "learning_rate": 3.5665695055552696e-05, "loss": 2.5614, "step": 1128 }, { "epoch": 0.21985083636788502, "grad_norm": 1.7421875, "learning_rate": 3.565802569058383e-05, "loss": 2.5646, "step": 1129 }, { "epoch": 0.22004556695811345, "grad_norm": 1.625, "learning_rate": 3.565035037244398e-05, "loss": 2.5536, "step": 1130 }, { "epoch": 0.22024029754834187, "grad_norm": 1.3671875, "learning_rate": 3.56426691040513e-05, "loss": 2.5502, "step": 1131 }, { "epoch": 0.2204350281385703, "grad_norm": 1.375, "learning_rate": 3.563498188832621e-05, "loss": 2.5323, "step": 1132 }, { "epoch": 0.2206297587287987, "grad_norm": 1.453125, "learning_rate": 3.562728872819136e-05, "loss": 2.5473, "step": 1133 }, { "epoch": 0.22082448931902712, "grad_norm": 1.2421875, "learning_rate": 3.561958962657171e-05, "loss": 2.558, "step": 1134 }, { "epoch": 0.22101921990925555, "grad_norm": 1.6953125, "learning_rate": 3.5611884586394445e-05, "loss": 2.5398, "step": 1135 }, { "epoch": 0.22121395049948397, "grad_norm": 1.515625, "learning_rate": 3.5604173610589006e-05, "loss": 2.5471, "step": 1136 }, { "epoch": 0.22140868108971237, "grad_norm": 1.6328125, "learning_rate": 3.5596456702087114e-05, "loss": 2.5735, "step": 1137 }, { "epoch": 0.2216034116799408, "grad_norm": 1.546875, "learning_rate": 3.558873386382273e-05, "loss": 2.5515, "step": 1138 }, { "epoch": 0.22179814227016922, "grad_norm": 1.40625, "learning_rate": 3.558100509873207e-05, "loss": 2.5508, "step": 1139 }, { "epoch": 0.22199287286039765, "grad_norm": 1.4921875, "learning_rate": 3.55732704097536e-05, "loss": 2.5598, "step": 1140 }, { "epoch": 0.22218760345062605, "grad_norm": 1.53125, "learning_rate": 3.556552979982805e-05, "loss": 2.5425, "step": 1141 }, { "epoch": 0.22238233404085447, "grad_norm": 1.3984375, "learning_rate": 3.555778327189839e-05, "loss": 2.5531, "step": 1142 }, { "epoch": 0.2225770646310829, "grad_norm": 1.53125, "learning_rate": 3.5550030828909856e-05, "loss": 2.5707, "step": 1143 }, { "epoch": 0.22277179522131133, "grad_norm": 1.4453125, "learning_rate": 3.554227247380991e-05, "loss": 2.5315, "step": 1144 }, { "epoch": 0.22296652581153972, "grad_norm": 1.4375, "learning_rate": 3.553450820954828e-05, "loss": 2.5588, "step": 1145 }, { "epoch": 0.22316125640176815, "grad_norm": 1.3359375, "learning_rate": 3.5526738039076934e-05, "loss": 2.5611, "step": 1146 }, { "epoch": 0.22335598699199657, "grad_norm": 1.609375, "learning_rate": 3.551896196535008e-05, "loss": 2.5491, "step": 1147 }, { "epoch": 0.223550717582225, "grad_norm": 1.5078125, "learning_rate": 3.551117999132418e-05, "loss": 2.5559, "step": 1148 }, { "epoch": 0.2237454481724534, "grad_norm": 1.5859375, "learning_rate": 3.550339211995793e-05, "loss": 2.5626, "step": 1149 }, { "epoch": 0.22394017876268182, "grad_norm": 1.4609375, "learning_rate": 3.549559835421228e-05, "loss": 2.5366, "step": 1150 }, { "epoch": 0.22413490935291025, "grad_norm": 1.375, "learning_rate": 3.548779869705041e-05, "loss": 2.5544, "step": 1151 }, { "epoch": 0.22432963994313868, "grad_norm": 1.265625, "learning_rate": 3.5479993151437754e-05, "loss": 2.5494, "step": 1152 }, { "epoch": 0.22452437053336707, "grad_norm": 1.4921875, "learning_rate": 3.547218172034196e-05, "loss": 2.5546, "step": 1153 }, { "epoch": 0.2247191011235955, "grad_norm": 1.328125, "learning_rate": 3.546436440673294e-05, "loss": 2.5212, "step": 1154 }, { "epoch": 0.22491383171382393, "grad_norm": 1.6640625, "learning_rate": 3.545654121358284e-05, "loss": 2.547, "step": 1155 }, { "epoch": 0.22510856230405235, "grad_norm": 1.5234375, "learning_rate": 3.5448712143866016e-05, "loss": 2.5549, "step": 1156 }, { "epoch": 0.22530329289428075, "grad_norm": 1.4453125, "learning_rate": 3.544087720055908e-05, "loss": 2.535, "step": 1157 }, { "epoch": 0.22549802348450917, "grad_norm": 1.40625, "learning_rate": 3.543303638664087e-05, "loss": 2.533, "step": 1158 }, { "epoch": 0.2256927540747376, "grad_norm": 1.546875, "learning_rate": 3.542518970509246e-05, "loss": 2.5299, "step": 1159 }, { "epoch": 0.22588748466496603, "grad_norm": 1.34375, "learning_rate": 3.541733715889716e-05, "loss": 2.5469, "step": 1160 }, { "epoch": 0.22608221525519445, "grad_norm": 1.6875, "learning_rate": 3.540947875104049e-05, "loss": 2.5253, "step": 1161 }, { "epoch": 0.22627694584542285, "grad_norm": 1.484375, "learning_rate": 3.540161448451023e-05, "loss": 2.5393, "step": 1162 }, { "epoch": 0.22647167643565128, "grad_norm": 1.453125, "learning_rate": 3.539374436229635e-05, "loss": 2.5538, "step": 1163 }, { "epoch": 0.2266664070258797, "grad_norm": 1.34375, "learning_rate": 3.5385868387391076e-05, "loss": 2.5387, "step": 1164 }, { "epoch": 0.22686113761610813, "grad_norm": 1.3671875, "learning_rate": 3.5377986562788844e-05, "loss": 2.5283, "step": 1165 }, { "epoch": 0.22705586820633653, "grad_norm": 1.296875, "learning_rate": 3.537009889148632e-05, "loss": 2.5525, "step": 1166 }, { "epoch": 0.22725059879656495, "grad_norm": 1.625, "learning_rate": 3.5362205376482386e-05, "loss": 2.5341, "step": 1167 }, { "epoch": 0.22744532938679338, "grad_norm": 1.46875, "learning_rate": 3.535430602077816e-05, "loss": 2.5464, "step": 1168 }, { "epoch": 0.2276400599770218, "grad_norm": 1.5078125, "learning_rate": 3.534640082737697e-05, "loss": 2.5389, "step": 1169 }, { "epoch": 0.2278347905672502, "grad_norm": 1.375, "learning_rate": 3.533848979928436e-05, "loss": 2.5453, "step": 1170 }, { "epoch": 0.22802952115747863, "grad_norm": 1.34375, "learning_rate": 3.533057293950811e-05, "loss": 2.5227, "step": 1171 }, { "epoch": 0.22822425174770705, "grad_norm": 1.171875, "learning_rate": 3.532265025105819e-05, "loss": 2.5617, "step": 1172 }, { "epoch": 0.22841898233793548, "grad_norm": 1.671875, "learning_rate": 3.53147217369468e-05, "loss": 2.526, "step": 1173 }, { "epoch": 0.22861371292816388, "grad_norm": 1.453125, "learning_rate": 3.5306787400188376e-05, "loss": 2.5293, "step": 1174 }, { "epoch": 0.2288084435183923, "grad_norm": 1.578125, "learning_rate": 3.5298847243799525e-05, "loss": 2.5378, "step": 1175 }, { "epoch": 0.22900317410862073, "grad_norm": 1.453125, "learning_rate": 3.5290901270799104e-05, "loss": 2.5319, "step": 1176 }, { "epoch": 0.22919790469884915, "grad_norm": 1.3359375, "learning_rate": 3.528294948420816e-05, "loss": 2.5647, "step": 1177 }, { "epoch": 0.22939263528907755, "grad_norm": 1.21875, "learning_rate": 3.5274991887049954e-05, "loss": 2.5349, "step": 1178 }, { "epoch": 0.22958736587930598, "grad_norm": 1.5, "learning_rate": 3.526702848234996e-05, "loss": 2.5412, "step": 1179 }, { "epoch": 0.2297820964695344, "grad_norm": 1.4453125, "learning_rate": 3.5259059273135874e-05, "loss": 2.5493, "step": 1180 }, { "epoch": 0.22997682705976283, "grad_norm": 1.46875, "learning_rate": 3.525108426243757e-05, "loss": 2.534, "step": 1181 }, { "epoch": 0.23017155764999123, "grad_norm": 1.4609375, "learning_rate": 3.524310345328713e-05, "loss": 2.5267, "step": 1182 }, { "epoch": 0.23036628824021965, "grad_norm": 1.390625, "learning_rate": 3.523511684871888e-05, "loss": 2.5381, "step": 1183 }, { "epoch": 0.23056101883044808, "grad_norm": 1.25, "learning_rate": 3.522712445176931e-05, "loss": 2.5353, "step": 1184 }, { "epoch": 0.2307557494206765, "grad_norm": 1.578125, "learning_rate": 3.521912626547712e-05, "loss": 2.5254, "step": 1185 }, { "epoch": 0.2309504800109049, "grad_norm": 1.390625, "learning_rate": 3.5211122292883216e-05, "loss": 2.5589, "step": 1186 }, { "epoch": 0.23114521060113333, "grad_norm": 1.5546875, "learning_rate": 3.52031125370307e-05, "loss": 2.5619, "step": 1187 }, { "epoch": 0.23133994119136175, "grad_norm": 1.453125, "learning_rate": 3.519509700096489e-05, "loss": 2.5369, "step": 1188 }, { "epoch": 0.23153467178159018, "grad_norm": 1.4375, "learning_rate": 3.518707568773327e-05, "loss": 2.5586, "step": 1189 }, { "epoch": 0.23172940237181858, "grad_norm": 1.3515625, "learning_rate": 3.517904860038555e-05, "loss": 2.5442, "step": 1190 }, { "epoch": 0.231924132962047, "grad_norm": 1.3515625, "learning_rate": 3.5171015741973615e-05, "loss": 2.5302, "step": 1191 }, { "epoch": 0.23211886355227543, "grad_norm": 1.25, "learning_rate": 3.516297711555156e-05, "loss": 2.5362, "step": 1192 }, { "epoch": 0.23231359414250385, "grad_norm": 1.5625, "learning_rate": 3.515493272417566e-05, "loss": 2.5433, "step": 1193 }, { "epoch": 0.23250832473273225, "grad_norm": 1.390625, "learning_rate": 3.514688257090439e-05, "loss": 2.547, "step": 1194 }, { "epoch": 0.23270305532296068, "grad_norm": 1.671875, "learning_rate": 3.513882665879842e-05, "loss": 2.5369, "step": 1195 }, { "epoch": 0.2328977859131891, "grad_norm": 1.578125, "learning_rate": 3.513076499092058e-05, "loss": 2.535, "step": 1196 }, { "epoch": 0.23309251650341753, "grad_norm": 1.1953125, "learning_rate": 3.512269757033594e-05, "loss": 2.5515, "step": 1197 }, { "epoch": 0.23328724709364593, "grad_norm": 1.1875, "learning_rate": 3.511462440011171e-05, "loss": 2.544, "step": 1198 }, { "epoch": 0.23348197768387435, "grad_norm": 1.3671875, "learning_rate": 3.51065454833173e-05, "loss": 2.5418, "step": 1199 }, { "epoch": 0.23367670827410278, "grad_norm": 1.1015625, "learning_rate": 3.509846082302432e-05, "loss": 2.5445, "step": 1200 }, { "epoch": 0.2338714388643312, "grad_norm": 1.8046875, "learning_rate": 3.5090370422306555e-05, "loss": 2.537, "step": 1201 }, { "epoch": 0.23406616945455963, "grad_norm": 1.578125, "learning_rate": 3.508227428423996e-05, "loss": 2.5526, "step": 1202 }, { "epoch": 0.23426090004478803, "grad_norm": 1.3515625, "learning_rate": 3.5074172411902677e-05, "loss": 2.5334, "step": 1203 }, { "epoch": 0.23445563063501645, "grad_norm": 1.359375, "learning_rate": 3.506606480837505e-05, "loss": 2.551, "step": 1204 }, { "epoch": 0.23465036122524488, "grad_norm": 1.453125, "learning_rate": 3.505795147673957e-05, "loss": 2.5456, "step": 1205 }, { "epoch": 0.2348450918154733, "grad_norm": 1.359375, "learning_rate": 3.5049832420080934e-05, "loss": 2.5417, "step": 1206 }, { "epoch": 0.2350398224057017, "grad_norm": 1.3984375, "learning_rate": 3.504170764148598e-05, "loss": 2.5351, "step": 1207 }, { "epoch": 0.23523455299593013, "grad_norm": 1.296875, "learning_rate": 3.503357714404376e-05, "loss": 2.5344, "step": 1208 }, { "epoch": 0.23542928358615856, "grad_norm": 1.3984375, "learning_rate": 3.502544093084549e-05, "loss": 2.5403, "step": 1209 }, { "epoch": 0.23562401417638698, "grad_norm": 1.25, "learning_rate": 3.5017299004984536e-05, "loss": 2.5389, "step": 1210 }, { "epoch": 0.23581874476661538, "grad_norm": 1.59375, "learning_rate": 3.500915136955646e-05, "loss": 2.5383, "step": 1211 }, { "epoch": 0.2360134753568438, "grad_norm": 1.3671875, "learning_rate": 3.500099802765899e-05, "loss": 2.5413, "step": 1212 }, { "epoch": 0.23620820594707223, "grad_norm": 1.515625, "learning_rate": 3.4992838982392016e-05, "loss": 2.5292, "step": 1213 }, { "epoch": 0.23640293653730066, "grad_norm": 1.5234375, "learning_rate": 3.4984674236857605e-05, "loss": 2.538, "step": 1214 }, { "epoch": 0.23659766712752905, "grad_norm": 1.1953125, "learning_rate": 3.497650379415999e-05, "loss": 2.5321, "step": 1215 }, { "epoch": 0.23679239771775748, "grad_norm": 1.140625, "learning_rate": 3.496832765740556e-05, "loss": 2.5356, "step": 1216 }, { "epoch": 0.2369871283079859, "grad_norm": 1.4296875, "learning_rate": 3.496014582970289e-05, "loss": 2.5271, "step": 1217 }, { "epoch": 0.23718185889821433, "grad_norm": 1.1796875, "learning_rate": 3.4951958314162696e-05, "loss": 2.5406, "step": 1218 }, { "epoch": 0.23737658948844273, "grad_norm": 1.8046875, "learning_rate": 3.494376511389786e-05, "loss": 2.5507, "step": 1219 }, { "epoch": 0.23757132007867116, "grad_norm": 1.6953125, "learning_rate": 3.4935566232023444e-05, "loss": 2.5472, "step": 1220 }, { "epoch": 0.23776605066889958, "grad_norm": 1.15625, "learning_rate": 3.492736167165665e-05, "loss": 2.5426, "step": 1221 }, { "epoch": 0.237960781259128, "grad_norm": 1.3359375, "learning_rate": 3.491915143591685e-05, "loss": 2.5365, "step": 1222 }, { "epoch": 0.2381555118493564, "grad_norm": 1.1953125, "learning_rate": 3.491093552792557e-05, "loss": 2.5415, "step": 1223 }, { "epoch": 0.23835024243958483, "grad_norm": 0.9921875, "learning_rate": 3.4902713950806494e-05, "loss": 2.5406, "step": 1224 }, { "epoch": 0.23854497302981326, "grad_norm": 1.6640625, "learning_rate": 3.4894486707685454e-05, "loss": 2.5249, "step": 1225 }, { "epoch": 0.23873970362004168, "grad_norm": 1.46875, "learning_rate": 3.488625380169044e-05, "loss": 2.5296, "step": 1226 }, { "epoch": 0.23893443421027008, "grad_norm": 1.640625, "learning_rate": 3.4878015235951614e-05, "loss": 2.5197, "step": 1227 }, { "epoch": 0.2391291648004985, "grad_norm": 1.640625, "learning_rate": 3.486977101360126e-05, "loss": 2.5379, "step": 1228 }, { "epoch": 0.23932389539072693, "grad_norm": 1.15625, "learning_rate": 3.486152113777383e-05, "loss": 2.553, "step": 1229 }, { "epoch": 0.23951862598095536, "grad_norm": 1.15625, "learning_rate": 3.485326561160591e-05, "loss": 2.5288, "step": 1230 }, { "epoch": 0.23971335657118376, "grad_norm": 1.2578125, "learning_rate": 3.484500443823627e-05, "loss": 2.5397, "step": 1231 }, { "epoch": 0.23990808716141218, "grad_norm": 1.0625, "learning_rate": 3.4836737620805774e-05, "loss": 2.5417, "step": 1232 }, { "epoch": 0.2401028177516406, "grad_norm": 1.7109375, "learning_rate": 3.482846516245749e-05, "loss": 2.5443, "step": 1233 }, { "epoch": 0.24029754834186903, "grad_norm": 1.421875, "learning_rate": 3.482018706633657e-05, "loss": 2.5086, "step": 1234 }, { "epoch": 0.24049227893209743, "grad_norm": 1.546875, "learning_rate": 3.481190333559037e-05, "loss": 2.526, "step": 1235 }, { "epoch": 0.24068700952232586, "grad_norm": 1.4453125, "learning_rate": 3.4803613973368334e-05, "loss": 2.5334, "step": 1236 }, { "epoch": 0.24088174011255428, "grad_norm": 1.2265625, "learning_rate": 3.4795318982822084e-05, "loss": 2.532, "step": 1237 }, { "epoch": 0.2410764707027827, "grad_norm": 1.1484375, "learning_rate": 3.4787018367105374e-05, "loss": 2.5517, "step": 1238 }, { "epoch": 0.2412712012930111, "grad_norm": 1.453125, "learning_rate": 3.477871212937409e-05, "loss": 2.5459, "step": 1239 }, { "epoch": 0.24146593188323953, "grad_norm": 1.203125, "learning_rate": 3.477040027278626e-05, "loss": 2.5402, "step": 1240 }, { "epoch": 0.24166066247346796, "grad_norm": 1.6015625, "learning_rate": 3.4762082800502034e-05, "loss": 2.5474, "step": 1241 }, { "epoch": 0.24185539306369638, "grad_norm": 1.4375, "learning_rate": 3.475375971568372e-05, "loss": 2.5225, "step": 1242 }, { "epoch": 0.24205012365392478, "grad_norm": 1.4453125, "learning_rate": 3.4745431021495744e-05, "loss": 2.5604, "step": 1243 }, { "epoch": 0.2422448542441532, "grad_norm": 1.3828125, "learning_rate": 3.473709672110467e-05, "loss": 2.5455, "step": 1244 }, { "epoch": 0.24243958483438163, "grad_norm": 1.3125, "learning_rate": 3.472875681767921e-05, "loss": 2.5193, "step": 1245 }, { "epoch": 0.24263431542461006, "grad_norm": 1.09375, "learning_rate": 3.472041131439016e-05, "loss": 2.5284, "step": 1246 }, { "epoch": 0.24282904601483848, "grad_norm": 1.4765625, "learning_rate": 3.471206021441051e-05, "loss": 2.5134, "step": 1247 }, { "epoch": 0.24302377660506688, "grad_norm": 1.2734375, "learning_rate": 3.4703703520915314e-05, "loss": 2.5306, "step": 1248 }, { "epoch": 0.2432185071952953, "grad_norm": 1.5859375, "learning_rate": 3.469534123708178e-05, "loss": 2.5214, "step": 1249 }, { "epoch": 0.24341323778552373, "grad_norm": 1.46875, "learning_rate": 3.4686973366089274e-05, "loss": 2.536, "step": 1250 }, { "epoch": 0.24360796837575216, "grad_norm": 1.21875, "learning_rate": 3.4678599911119224e-05, "loss": 2.5316, "step": 1251 }, { "epoch": 0.24380269896598056, "grad_norm": 1.1953125, "learning_rate": 3.467022087535522e-05, "loss": 2.526, "step": 1252 }, { "epoch": 0.24399742955620898, "grad_norm": 1.2109375, "learning_rate": 3.466183626198298e-05, "loss": 2.5186, "step": 1253 }, { "epoch": 0.2441921601464374, "grad_norm": 1.1015625, "learning_rate": 3.465344607419031e-05, "loss": 2.5346, "step": 1254 }, { "epoch": 0.24438689073666583, "grad_norm": 1.515625, "learning_rate": 3.4645050315167164e-05, "loss": 2.5325, "step": 1255 }, { "epoch": 0.24458162132689423, "grad_norm": 1.21875, "learning_rate": 3.46366489881056e-05, "loss": 2.5536, "step": 1256 }, { "epoch": 0.24477635191712266, "grad_norm": 1.6328125, "learning_rate": 3.46282420961998e-05, "loss": 2.5552, "step": 1257 }, { "epoch": 0.24497108250735108, "grad_norm": 1.5234375, "learning_rate": 3.4619829642646055e-05, "loss": 2.5468, "step": 1258 }, { "epoch": 0.2451658130975795, "grad_norm": 1.234375, "learning_rate": 3.4611411630642764e-05, "loss": 2.5476, "step": 1259 }, { "epoch": 0.2453605436878079, "grad_norm": 1.2265625, "learning_rate": 3.4602988063390476e-05, "loss": 2.5348, "step": 1260 }, { "epoch": 0.24555527427803633, "grad_norm": 1.2421875, "learning_rate": 3.459455894409181e-05, "loss": 2.5245, "step": 1261 }, { "epoch": 0.24575000486826476, "grad_norm": 0.99609375, "learning_rate": 3.458612427595151e-05, "loss": 2.5464, "step": 1262 }, { "epoch": 0.24594473545849319, "grad_norm": 1.484375, "learning_rate": 3.4577684062176435e-05, "loss": 2.521, "step": 1263 }, { "epoch": 0.24613946604872158, "grad_norm": 1.21875, "learning_rate": 3.456923830597555e-05, "loss": 2.513, "step": 1264 }, { "epoch": 0.24633419663895, "grad_norm": 1.7734375, "learning_rate": 3.456078701055992e-05, "loss": 2.5242, "step": 1265 }, { "epoch": 0.24652892722917844, "grad_norm": 1.703125, "learning_rate": 3.4552330179142725e-05, "loss": 2.5337, "step": 1266 }, { "epoch": 0.24672365781940686, "grad_norm": 1.0078125, "learning_rate": 3.454386781493925e-05, "loss": 2.5396, "step": 1267 }, { "epoch": 0.24691838840963526, "grad_norm": 1.140625, "learning_rate": 3.453539992116688e-05, "loss": 2.526, "step": 1268 }, { "epoch": 0.24711311899986368, "grad_norm": 1.03125, "learning_rate": 3.45269265010451e-05, "loss": 2.515, "step": 1269 }, { "epoch": 0.2473078495900921, "grad_norm": 0.9375, "learning_rate": 3.4518447557795495e-05, "loss": 2.5503, "step": 1270 }, { "epoch": 0.24750258018032054, "grad_norm": 1.2265625, "learning_rate": 3.450996309464176e-05, "loss": 2.5379, "step": 1271 }, { "epoch": 0.24769731077054893, "grad_norm": 0.87890625, "learning_rate": 3.450147311480968e-05, "loss": 2.5201, "step": 1272 }, { "epoch": 0.24789204136077736, "grad_norm": 1.7109375, "learning_rate": 3.4492977621527136e-05, "loss": 2.5526, "step": 1273 }, { "epoch": 0.24808677195100579, "grad_norm": 1.3359375, "learning_rate": 3.448447661802411e-05, "loss": 2.5307, "step": 1274 }, { "epoch": 0.2482815025412342, "grad_norm": 1.859375, "learning_rate": 3.447597010753269e-05, "loss": 2.5325, "step": 1275 }, { "epoch": 0.2484762331314626, "grad_norm": 1.8984375, "learning_rate": 3.446745809328703e-05, "loss": 2.5213, "step": 1276 }, { "epoch": 0.24867096372169104, "grad_norm": 0.85546875, "learning_rate": 3.4458940578523394e-05, "loss": 2.5148, "step": 1277 }, { "epoch": 0.24886569431191946, "grad_norm": 1.59375, "learning_rate": 3.445041756648014e-05, "loss": 2.5224, "step": 1278 }, { "epoch": 0.2490604249021479, "grad_norm": 0.90234375, "learning_rate": 3.444188906039772e-05, "loss": 2.5328, "step": 1279 }, { "epoch": 0.24925515549237628, "grad_norm": 1.9296875, "learning_rate": 3.443335506351865e-05, "loss": 2.541, "step": 1280 }, { "epoch": 0.2494498860826047, "grad_norm": 1.8203125, "learning_rate": 3.442481557908757e-05, "loss": 2.5433, "step": 1281 }, { "epoch": 0.24964461667283314, "grad_norm": 1.046875, "learning_rate": 3.441627061035116e-05, "loss": 2.5227, "step": 1282 }, { "epoch": 0.24983934726306156, "grad_norm": 1.5546875, "learning_rate": 3.4407720160558226e-05, "loss": 2.5454, "step": 1283 }, { "epoch": 0.25003407785328996, "grad_norm": 0.93359375, "learning_rate": 3.439916423295964e-05, "loss": 2.5341, "step": 1284 }, { "epoch": 0.2502288084435184, "grad_norm": 2.03125, "learning_rate": 3.439060283080838e-05, "loss": 2.5394, "step": 1285 }, { "epoch": 0.2504235390337468, "grad_norm": 1.6328125, "learning_rate": 3.438203595735945e-05, "loss": 2.5486, "step": 1286 }, { "epoch": 0.2506182696239752, "grad_norm": 1.7421875, "learning_rate": 3.4373463615869996e-05, "loss": 2.5318, "step": 1287 }, { "epoch": 0.25081300021420366, "grad_norm": 1.578125, "learning_rate": 3.43648858095992e-05, "loss": 2.5437, "step": 1288 }, { "epoch": 0.25100773080443206, "grad_norm": 1.53125, "learning_rate": 3.435630254180835e-05, "loss": 2.5391, "step": 1289 }, { "epoch": 0.2512024613946605, "grad_norm": 1.171875, "learning_rate": 3.43477138157608e-05, "loss": 2.53, "step": 1290 }, { "epoch": 0.2513971919848889, "grad_norm": 1.40625, "learning_rate": 3.4339119634721974e-05, "loss": 2.5319, "step": 1291 }, { "epoch": 0.2515919225751173, "grad_norm": 0.94140625, "learning_rate": 3.4330520001959364e-05, "loss": 2.5236, "step": 1292 }, { "epoch": 0.25178665316534576, "grad_norm": 1.734375, "learning_rate": 3.432191492074256e-05, "loss": 2.5267, "step": 1293 }, { "epoch": 0.25198138375557416, "grad_norm": 1.0234375, "learning_rate": 3.43133043943432e-05, "loss": 2.5402, "step": 1294 }, { "epoch": 0.25217611434580256, "grad_norm": 2.328125, "learning_rate": 3.4304688426035005e-05, "loss": 2.5507, "step": 1295 }, { "epoch": 0.252370844936031, "grad_norm": 2.0, "learning_rate": 3.429606701909375e-05, "loss": 2.5209, "step": 1296 }, { "epoch": 0.2525655755262594, "grad_norm": 1.7578125, "learning_rate": 3.42874401767973e-05, "loss": 2.5277, "step": 1297 }, { "epoch": 0.25276030611648787, "grad_norm": 1.7421875, "learning_rate": 3.427880790242557e-05, "loss": 2.5414, "step": 1298 }, { "epoch": 0.25295503670671626, "grad_norm": 1.5546875, "learning_rate": 3.427017019926055e-05, "loss": 2.5516, "step": 1299 }, { "epoch": 0.25314976729694466, "grad_norm": 1.375, "learning_rate": 3.426152707058628e-05, "loss": 2.5162, "step": 1300 }, { "epoch": 0.2533444978871731, "grad_norm": 1.5390625, "learning_rate": 3.425287851968888e-05, "loss": 2.5242, "step": 1301 }, { "epoch": 0.2535392284774015, "grad_norm": 1.3046875, "learning_rate": 3.424422454985651e-05, "loss": 2.5425, "step": 1302 }, { "epoch": 0.2537339590676299, "grad_norm": 1.578125, "learning_rate": 3.423556516437943e-05, "loss": 2.5286, "step": 1303 }, { "epoch": 0.25392868965785836, "grad_norm": 1.3671875, "learning_rate": 3.4226900366549906e-05, "loss": 2.5128, "step": 1304 }, { "epoch": 0.25412342024808676, "grad_norm": 1.5546875, "learning_rate": 3.4218230159662294e-05, "loss": 2.5208, "step": 1305 }, { "epoch": 0.2543181508383152, "grad_norm": 1.390625, "learning_rate": 3.420955454701301e-05, "loss": 2.5447, "step": 1306 }, { "epoch": 0.2545128814285436, "grad_norm": 1.609375, "learning_rate": 3.420087353190051e-05, "loss": 2.5309, "step": 1307 }, { "epoch": 0.254707612018772, "grad_norm": 1.4921875, "learning_rate": 3.4192187117625316e-05, "loss": 2.5426, "step": 1308 }, { "epoch": 0.25490234260900047, "grad_norm": 1.6015625, "learning_rate": 3.4183495307489984e-05, "loss": 2.5283, "step": 1309 }, { "epoch": 0.25509707319922886, "grad_norm": 1.4296875, "learning_rate": 3.417479810479915e-05, "loss": 2.5168, "step": 1310 }, { "epoch": 0.25529180378945726, "grad_norm": 1.6796875, "learning_rate": 3.4166095512859474e-05, "loss": 2.5224, "step": 1311 }, { "epoch": 0.2554865343796857, "grad_norm": 1.421875, "learning_rate": 3.415738753497968e-05, "loss": 2.5223, "step": 1312 }, { "epoch": 0.2556812649699141, "grad_norm": 1.625, "learning_rate": 3.4148674174470535e-05, "loss": 2.5325, "step": 1313 }, { "epoch": 0.25587599556014257, "grad_norm": 1.484375, "learning_rate": 3.413995543464486e-05, "loss": 2.5209, "step": 1314 }, { "epoch": 0.25607072615037096, "grad_norm": 1.640625, "learning_rate": 3.41312313188175e-05, "loss": 2.5103, "step": 1315 }, { "epoch": 0.25626545674059936, "grad_norm": 1.421875, "learning_rate": 3.412250183030536e-05, "loss": 2.5204, "step": 1316 }, { "epoch": 0.2564601873308278, "grad_norm": 1.5390625, "learning_rate": 3.411376697242739e-05, "loss": 2.5198, "step": 1317 }, { "epoch": 0.2566549179210562, "grad_norm": 1.453125, "learning_rate": 3.410502674850459e-05, "loss": 2.4981, "step": 1318 }, { "epoch": 0.2568496485112846, "grad_norm": 1.46875, "learning_rate": 3.409628116185996e-05, "loss": 2.5237, "step": 1319 }, { "epoch": 0.25704437910151307, "grad_norm": 1.3359375, "learning_rate": 3.408753021581859e-05, "loss": 2.5228, "step": 1320 }, { "epoch": 0.25723910969174146, "grad_norm": 1.5234375, "learning_rate": 3.4078773913707575e-05, "loss": 2.5192, "step": 1321 }, { "epoch": 0.2574338402819699, "grad_norm": 1.40625, "learning_rate": 3.407001225885606e-05, "loss": 2.5046, "step": 1322 }, { "epoch": 0.2576285708721983, "grad_norm": 1.421875, "learning_rate": 3.406124525459521e-05, "loss": 2.5172, "step": 1323 }, { "epoch": 0.2578233014624267, "grad_norm": 1.28125, "learning_rate": 3.405247290425823e-05, "loss": 2.5406, "step": 1324 }, { "epoch": 0.25801803205265517, "grad_norm": 1.4296875, "learning_rate": 3.4043695211180384e-05, "loss": 2.5208, "step": 1325 }, { "epoch": 0.25821276264288356, "grad_norm": 1.2890625, "learning_rate": 3.4034912178698935e-05, "loss": 2.5175, "step": 1326 }, { "epoch": 0.25840749323311196, "grad_norm": 1.5078125, "learning_rate": 3.402612381015318e-05, "loss": 2.5231, "step": 1327 }, { "epoch": 0.2586022238233404, "grad_norm": 1.296875, "learning_rate": 3.401733010888445e-05, "loss": 2.5194, "step": 1328 }, { "epoch": 0.2587969544135688, "grad_norm": 1.4140625, "learning_rate": 3.400853107823611e-05, "loss": 2.5223, "step": 1329 }, { "epoch": 0.25899168500379727, "grad_norm": 1.265625, "learning_rate": 3.399972672155356e-05, "loss": 2.5201, "step": 1330 }, { "epoch": 0.25918641559402567, "grad_norm": 1.375, "learning_rate": 3.399091704218417e-05, "loss": 2.5181, "step": 1331 }, { "epoch": 0.25938114618425406, "grad_norm": 1.3125, "learning_rate": 3.398210204347742e-05, "loss": 2.5382, "step": 1332 }, { "epoch": 0.2595758767744825, "grad_norm": 1.453125, "learning_rate": 3.397328172878473e-05, "loss": 2.5277, "step": 1333 }, { "epoch": 0.2597706073647109, "grad_norm": 1.375, "learning_rate": 3.396445610145961e-05, "loss": 2.5216, "step": 1334 }, { "epoch": 0.25996533795493937, "grad_norm": 1.3671875, "learning_rate": 3.3955625164857534e-05, "loss": 2.5368, "step": 1335 }, { "epoch": 0.26016006854516777, "grad_norm": 1.3046875, "learning_rate": 3.3946788922336025e-05, "loss": 2.5166, "step": 1336 }, { "epoch": 0.26035479913539616, "grad_norm": 1.2578125, "learning_rate": 3.3937947377254625e-05, "loss": 2.5293, "step": 1337 }, { "epoch": 0.2605495297256246, "grad_norm": 1.125, "learning_rate": 3.3929100532974876e-05, "loss": 2.5237, "step": 1338 }, { "epoch": 0.260744260315853, "grad_norm": 1.4140625, "learning_rate": 3.392024839286033e-05, "loss": 2.5181, "step": 1339 }, { "epoch": 0.2609389909060814, "grad_norm": 1.21875, "learning_rate": 3.391139096027659e-05, "loss": 2.5172, "step": 1340 }, { "epoch": 0.26113372149630987, "grad_norm": 1.6484375, "learning_rate": 3.390252823859123e-05, "loss": 2.5352, "step": 1341 }, { "epoch": 0.26132845208653827, "grad_norm": 1.5546875, "learning_rate": 3.3893660231173846e-05, "loss": 2.5215, "step": 1342 }, { "epoch": 0.2615231826767667, "grad_norm": 1.1484375, "learning_rate": 3.388478694139606e-05, "loss": 2.5187, "step": 1343 }, { "epoch": 0.2617179132669951, "grad_norm": 1.234375, "learning_rate": 3.387590837263149e-05, "loss": 2.5145, "step": 1344 }, { "epoch": 0.2619126438572235, "grad_norm": 1.046875, "learning_rate": 3.386702452825576e-05, "loss": 2.5053, "step": 1345 }, { "epoch": 0.26210737444745197, "grad_norm": 0.97265625, "learning_rate": 3.385813541164651e-05, "loss": 2.5197, "step": 1346 }, { "epoch": 0.26230210503768037, "grad_norm": 1.0546875, "learning_rate": 3.3849241026183354e-05, "loss": 2.5188, "step": 1347 }, { "epoch": 0.26249683562790876, "grad_norm": 0.80078125, "learning_rate": 3.384034137524795e-05, "loss": 2.5039, "step": 1348 }, { "epoch": 0.2626915662181372, "grad_norm": 0.92578125, "learning_rate": 3.3831436462223936e-05, "loss": 2.5147, "step": 1349 }, { "epoch": 0.2628862968083656, "grad_norm": 0.82421875, "learning_rate": 3.382252629049695e-05, "loss": 2.5333, "step": 1350 }, { "epoch": 0.26308102739859407, "grad_norm": 0.82421875, "learning_rate": 3.381361086345464e-05, "loss": 2.5165, "step": 1351 }, { "epoch": 0.26327575798882247, "grad_norm": 0.85546875, "learning_rate": 3.380469018448663e-05, "loss": 2.5211, "step": 1352 }, { "epoch": 0.26347048857905087, "grad_norm": 0.91796875, "learning_rate": 3.379576425698458e-05, "loss": 2.5172, "step": 1353 }, { "epoch": 0.2636652191692793, "grad_norm": 0.765625, "learning_rate": 3.37868330843421e-05, "loss": 2.5153, "step": 1354 }, { "epoch": 0.2638599497595077, "grad_norm": 0.91796875, "learning_rate": 3.377789666995483e-05, "loss": 2.5338, "step": 1355 }, { "epoch": 0.2640546803497361, "grad_norm": 0.8828125, "learning_rate": 3.376895501722039e-05, "loss": 2.5032, "step": 1356 }, { "epoch": 0.26424941093996457, "grad_norm": 0.890625, "learning_rate": 3.376000812953837e-05, "loss": 2.5331, "step": 1357 }, { "epoch": 0.26444414153019297, "grad_norm": 1.171875, "learning_rate": 3.375105601031039e-05, "loss": 2.5193, "step": 1358 }, { "epoch": 0.2646388721204214, "grad_norm": 0.74609375, "learning_rate": 3.3742098662940044e-05, "loss": 2.5191, "step": 1359 }, { "epoch": 0.2648336027106498, "grad_norm": 0.83203125, "learning_rate": 3.3733136090832895e-05, "loss": 2.5245, "step": 1360 }, { "epoch": 0.2650283333008782, "grad_norm": 0.88671875, "learning_rate": 3.372416829739651e-05, "loss": 2.5178, "step": 1361 }, { "epoch": 0.26522306389110667, "grad_norm": 0.96875, "learning_rate": 3.371519528604044e-05, "loss": 2.5353, "step": 1362 }, { "epoch": 0.26541779448133507, "grad_norm": 0.88671875, "learning_rate": 3.370621706017622e-05, "loss": 2.5245, "step": 1363 }, { "epoch": 0.26561252507156347, "grad_norm": 0.80078125, "learning_rate": 3.369723362321736e-05, "loss": 2.5339, "step": 1364 }, { "epoch": 0.2658072556617919, "grad_norm": 0.84765625, "learning_rate": 3.368824497857937e-05, "loss": 2.5104, "step": 1365 }, { "epoch": 0.2660019862520203, "grad_norm": 0.890625, "learning_rate": 3.3679251129679715e-05, "loss": 2.5147, "step": 1366 }, { "epoch": 0.26619671684224877, "grad_norm": 0.78515625, "learning_rate": 3.367025207993785e-05, "loss": 2.5338, "step": 1367 }, { "epoch": 0.26639144743247717, "grad_norm": 0.72265625, "learning_rate": 3.3661247832775205e-05, "loss": 2.5138, "step": 1368 }, { "epoch": 0.26658617802270557, "grad_norm": 0.69921875, "learning_rate": 3.365223839161521e-05, "loss": 2.5462, "step": 1369 }, { "epoch": 0.266780908612934, "grad_norm": 0.7421875, "learning_rate": 3.3643223759883224e-05, "loss": 2.5304, "step": 1370 }, { "epoch": 0.2669756392031624, "grad_norm": 0.78515625, "learning_rate": 3.36342039410066e-05, "loss": 2.5402, "step": 1371 }, { "epoch": 0.2671703697933908, "grad_norm": 0.6875, "learning_rate": 3.36251789384147e-05, "loss": 2.5278, "step": 1372 }, { "epoch": 0.26736510038361927, "grad_norm": 0.7265625, "learning_rate": 3.3616148755538796e-05, "loss": 2.537, "step": 1373 }, { "epoch": 0.26755983097384767, "grad_norm": 0.82421875, "learning_rate": 3.360711339581217e-05, "loss": 2.5378, "step": 1374 }, { "epoch": 0.2677545615640761, "grad_norm": 0.73046875, "learning_rate": 3.359807286267005e-05, "loss": 2.5323, "step": 1375 }, { "epoch": 0.2679492921543045, "grad_norm": 0.83203125, "learning_rate": 3.358902715954965e-05, "loss": 2.5158, "step": 1376 }, { "epoch": 0.2681440227445329, "grad_norm": 1.015625, "learning_rate": 3.357997628989013e-05, "loss": 2.5322, "step": 1377 }, { "epoch": 0.26833875333476137, "grad_norm": 1.0078125, "learning_rate": 3.3570920257132635e-05, "loss": 2.5121, "step": 1378 }, { "epoch": 0.26853348392498977, "grad_norm": 0.8828125, "learning_rate": 3.356185906472026e-05, "loss": 2.5086, "step": 1379 }, { "epoch": 0.2687282145152182, "grad_norm": 0.81640625, "learning_rate": 3.3552792716098066e-05, "loss": 2.5292, "step": 1380 }, { "epoch": 0.2689229451054466, "grad_norm": 0.8359375, "learning_rate": 3.354372121471306e-05, "loss": 2.5161, "step": 1381 }, { "epoch": 0.269117675695675, "grad_norm": 0.76953125, "learning_rate": 3.3534644564014245e-05, "loss": 2.5252, "step": 1382 }, { "epoch": 0.26931240628590347, "grad_norm": 0.83984375, "learning_rate": 3.3525562767452534e-05, "loss": 2.5215, "step": 1383 }, { "epoch": 0.26950713687613187, "grad_norm": 0.84375, "learning_rate": 3.351647582848083e-05, "loss": 2.5048, "step": 1384 }, { "epoch": 0.26970186746636027, "grad_norm": 0.81640625, "learning_rate": 3.3507383750553974e-05, "loss": 2.519, "step": 1385 }, { "epoch": 0.2698965980565887, "grad_norm": 0.7578125, "learning_rate": 3.3498286537128785e-05, "loss": 2.5113, "step": 1386 }, { "epoch": 0.2700913286468171, "grad_norm": 0.67578125, "learning_rate": 3.3489184191664e-05, "loss": 2.518, "step": 1387 }, { "epoch": 0.2702860592370456, "grad_norm": 0.8359375, "learning_rate": 3.348007671762034e-05, "loss": 2.5237, "step": 1388 }, { "epoch": 0.27048078982727397, "grad_norm": 0.828125, "learning_rate": 3.3470964118460444e-05, "loss": 2.5113, "step": 1389 }, { "epoch": 0.27067552041750237, "grad_norm": 0.8515625, "learning_rate": 3.346184639764893e-05, "loss": 2.5083, "step": 1390 }, { "epoch": 0.2708702510077308, "grad_norm": 0.7109375, "learning_rate": 3.345272355865234e-05, "loss": 2.5148, "step": 1391 }, { "epoch": 0.2710649815979592, "grad_norm": 0.69140625, "learning_rate": 3.344359560493918e-05, "loss": 2.531, "step": 1392 }, { "epoch": 0.2712597121881876, "grad_norm": 0.81640625, "learning_rate": 3.343446253997988e-05, "loss": 2.5303, "step": 1393 }, { "epoch": 0.27145444277841607, "grad_norm": 0.75390625, "learning_rate": 3.3425324367246834e-05, "loss": 2.5288, "step": 1394 }, { "epoch": 0.27164917336864447, "grad_norm": 0.703125, "learning_rate": 3.341618109021438e-05, "loss": 2.5294, "step": 1395 }, { "epoch": 0.2718439039588729, "grad_norm": 0.76171875, "learning_rate": 3.3407032712358755e-05, "loss": 2.5038, "step": 1396 }, { "epoch": 0.2720386345491013, "grad_norm": 0.70703125, "learning_rate": 3.33978792371582e-05, "loss": 2.5252, "step": 1397 }, { "epoch": 0.2722333651393297, "grad_norm": 0.71484375, "learning_rate": 3.338872066809283e-05, "loss": 2.5118, "step": 1398 }, { "epoch": 0.2724280957295582, "grad_norm": 0.73828125, "learning_rate": 3.3379557008644754e-05, "loss": 2.5309, "step": 1399 }, { "epoch": 0.27262282631978657, "grad_norm": 0.75390625, "learning_rate": 3.337038826229797e-05, "loss": 2.5421, "step": 1400 }, { "epoch": 0.27281755691001497, "grad_norm": 0.7265625, "learning_rate": 3.336121443253844e-05, "loss": 2.5019, "step": 1401 }, { "epoch": 0.2730122875002434, "grad_norm": 0.75390625, "learning_rate": 3.335203552285404e-05, "loss": 2.5362, "step": 1402 }, { "epoch": 0.2732070180904718, "grad_norm": 0.6953125, "learning_rate": 3.334285153673459e-05, "loss": 2.52, "step": 1403 }, { "epoch": 0.2734017486807003, "grad_norm": 0.7109375, "learning_rate": 3.3333662477671835e-05, "loss": 2.5193, "step": 1404 }, { "epoch": 0.27359647927092867, "grad_norm": 0.81640625, "learning_rate": 3.332446834915946e-05, "loss": 2.509, "step": 1405 }, { "epoch": 0.27379120986115707, "grad_norm": 0.63671875, "learning_rate": 3.331526915469304e-05, "loss": 2.4967, "step": 1406 }, { "epoch": 0.2739859404513855, "grad_norm": 0.7109375, "learning_rate": 3.3306064897770124e-05, "loss": 2.5192, "step": 1407 }, { "epoch": 0.2741806710416139, "grad_norm": 0.68359375, "learning_rate": 3.329685558189016e-05, "loss": 2.509, "step": 1408 }, { "epoch": 0.2743754016318423, "grad_norm": 0.6796875, "learning_rate": 3.3287641210554523e-05, "loss": 2.5219, "step": 1409 }, { "epoch": 0.2745701322220708, "grad_norm": 0.75, "learning_rate": 3.327842178726651e-05, "loss": 2.5255, "step": 1410 }, { "epoch": 0.27476486281229917, "grad_norm": 0.6171875, "learning_rate": 3.326919731553134e-05, "loss": 2.5107, "step": 1411 }, { "epoch": 0.2749595934025276, "grad_norm": 0.71875, "learning_rate": 3.325996779885615e-05, "loss": 2.5255, "step": 1412 }, { "epoch": 0.275154323992756, "grad_norm": 0.65234375, "learning_rate": 3.3250733240750006e-05, "loss": 2.5171, "step": 1413 }, { "epoch": 0.2753490545829844, "grad_norm": 0.765625, "learning_rate": 3.324149364472386e-05, "loss": 2.5173, "step": 1414 }, { "epoch": 0.2755437851732129, "grad_norm": 0.73046875, "learning_rate": 3.323224901429062e-05, "loss": 2.4991, "step": 1415 }, { "epoch": 0.27573851576344127, "grad_norm": 0.7109375, "learning_rate": 3.3222999352965074e-05, "loss": 2.5214, "step": 1416 }, { "epoch": 0.2759332463536697, "grad_norm": 0.76171875, "learning_rate": 3.321374466426395e-05, "loss": 2.504, "step": 1417 }, { "epoch": 0.2761279769438981, "grad_norm": 0.71484375, "learning_rate": 3.3204484951705864e-05, "loss": 2.4994, "step": 1418 }, { "epoch": 0.2763227075341265, "grad_norm": 0.73046875, "learning_rate": 3.3195220218811356e-05, "loss": 2.5214, "step": 1419 }, { "epoch": 0.276517438124355, "grad_norm": 0.61328125, "learning_rate": 3.318595046910288e-05, "loss": 2.517, "step": 1420 }, { "epoch": 0.2767121687145834, "grad_norm": 0.734375, "learning_rate": 3.317667570610477e-05, "loss": 2.5069, "step": 1421 }, { "epoch": 0.27690689930481177, "grad_norm": 0.77734375, "learning_rate": 3.3167395933343304e-05, "loss": 2.5204, "step": 1422 }, { "epoch": 0.2771016298950402, "grad_norm": 0.68359375, "learning_rate": 3.3158111154346635e-05, "loss": 2.5154, "step": 1423 }, { "epoch": 0.2772963604852686, "grad_norm": 0.7890625, "learning_rate": 3.314882137264483e-05, "loss": 2.5086, "step": 1424 }, { "epoch": 0.2774910910754971, "grad_norm": 0.74609375, "learning_rate": 3.313952659176986e-05, "loss": 2.5278, "step": 1425 }, { "epoch": 0.2776858216657255, "grad_norm": 0.6484375, "learning_rate": 3.31302268152556e-05, "loss": 2.5336, "step": 1426 }, { "epoch": 0.27788055225595387, "grad_norm": 0.7421875, "learning_rate": 3.31209220466378e-05, "loss": 2.5174, "step": 1427 }, { "epoch": 0.2780752828461823, "grad_norm": 0.70703125, "learning_rate": 3.311161228945415e-05, "loss": 2.5108, "step": 1428 }, { "epoch": 0.2782700134364107, "grad_norm": 0.60546875, "learning_rate": 3.3102297547244204e-05, "loss": 2.507, "step": 1429 }, { "epoch": 0.2784647440266391, "grad_norm": 0.69140625, "learning_rate": 3.309297782354942e-05, "loss": 2.5045, "step": 1430 }, { "epoch": 0.2786594746168676, "grad_norm": 0.703125, "learning_rate": 3.308365312191315e-05, "loss": 2.5147, "step": 1431 }, { "epoch": 0.278854205207096, "grad_norm": 0.78515625, "learning_rate": 3.3074323445880636e-05, "loss": 2.5291, "step": 1432 }, { "epoch": 0.2790489357973244, "grad_norm": 0.734375, "learning_rate": 3.306498879899902e-05, "loss": 2.5039, "step": 1433 }, { "epoch": 0.2792436663875528, "grad_norm": 0.62890625, "learning_rate": 3.3055649184817334e-05, "loss": 2.5414, "step": 1434 }, { "epoch": 0.2794383969777812, "grad_norm": 0.70703125, "learning_rate": 3.3046304606886476e-05, "loss": 2.5103, "step": 1435 }, { "epoch": 0.2796331275680097, "grad_norm": 0.71484375, "learning_rate": 3.3036955068759274e-05, "loss": 2.5089, "step": 1436 }, { "epoch": 0.2798278581582381, "grad_norm": 0.6796875, "learning_rate": 3.302760057399039e-05, "loss": 2.5126, "step": 1437 }, { "epoch": 0.28002258874846647, "grad_norm": 0.76171875, "learning_rate": 3.301824112613641e-05, "loss": 2.5227, "step": 1438 }, { "epoch": 0.2802173193386949, "grad_norm": 0.734375, "learning_rate": 3.300887672875579e-05, "loss": 2.5204, "step": 1439 }, { "epoch": 0.2804120499289233, "grad_norm": 0.6796875, "learning_rate": 3.299950738540886e-05, "loss": 2.5151, "step": 1440 }, { "epoch": 0.2806067805191518, "grad_norm": 0.6875, "learning_rate": 3.2990133099657856e-05, "loss": 2.5106, "step": 1441 }, { "epoch": 0.2808015111093802, "grad_norm": 0.76953125, "learning_rate": 3.298075387506685e-05, "loss": 2.5214, "step": 1442 }, { "epoch": 0.2809962416996086, "grad_norm": 0.73046875, "learning_rate": 3.297136971520184e-05, "loss": 2.5067, "step": 1443 }, { "epoch": 0.281190972289837, "grad_norm": 0.7421875, "learning_rate": 3.2961980623630656e-05, "loss": 2.5185, "step": 1444 }, { "epoch": 0.2813857028800654, "grad_norm": 0.640625, "learning_rate": 3.295258660392304e-05, "loss": 2.4911, "step": 1445 }, { "epoch": 0.2815804334702938, "grad_norm": 0.796875, "learning_rate": 3.2943187659650595e-05, "loss": 2.5035, "step": 1446 }, { "epoch": 0.2817751640605223, "grad_norm": 0.6875, "learning_rate": 3.293378379438678e-05, "loss": 2.4933, "step": 1447 }, { "epoch": 0.2819698946507507, "grad_norm": 0.6484375, "learning_rate": 3.292437501170694e-05, "loss": 2.5205, "step": 1448 }, { "epoch": 0.28216462524097913, "grad_norm": 0.67578125, "learning_rate": 3.29149613151883e-05, "loss": 2.4911, "step": 1449 }, { "epoch": 0.2823593558312075, "grad_norm": 0.6171875, "learning_rate": 3.290554270840993e-05, "loss": 2.4942, "step": 1450 }, { "epoch": 0.2825540864214359, "grad_norm": 0.63671875, "learning_rate": 3.289611919495278e-05, "loss": 2.5014, "step": 1451 }, { "epoch": 0.2827488170116644, "grad_norm": 0.6015625, "learning_rate": 3.288669077839966e-05, "loss": 2.5213, "step": 1452 }, { "epoch": 0.2829435476018928, "grad_norm": 0.671875, "learning_rate": 3.287725746233527e-05, "loss": 2.4945, "step": 1453 }, { "epoch": 0.2831382781921212, "grad_norm": 0.65625, "learning_rate": 3.286781925034612e-05, "loss": 2.4997, "step": 1454 }, { "epoch": 0.2833330087823496, "grad_norm": 0.7421875, "learning_rate": 3.2858376146020624e-05, "loss": 2.509, "step": 1455 }, { "epoch": 0.283527739372578, "grad_norm": 0.66015625, "learning_rate": 3.284892815294905e-05, "loss": 2.5077, "step": 1456 }, { "epoch": 0.2837224699628065, "grad_norm": 0.6953125, "learning_rate": 3.2839475274723516e-05, "loss": 2.513, "step": 1457 }, { "epoch": 0.2839172005530349, "grad_norm": 0.75390625, "learning_rate": 3.2830017514937996e-05, "loss": 2.5095, "step": 1458 }, { "epoch": 0.2841119311432633, "grad_norm": 0.6796875, "learning_rate": 3.2820554877188326e-05, "loss": 2.5093, "step": 1459 }, { "epoch": 0.28430666173349173, "grad_norm": 0.73828125, "learning_rate": 3.281108736507219e-05, "loss": 2.4989, "step": 1460 }, { "epoch": 0.2845013923237201, "grad_norm": 0.80859375, "learning_rate": 3.280161498218914e-05, "loss": 2.5049, "step": 1461 }, { "epoch": 0.2846961229139486, "grad_norm": 0.6953125, "learning_rate": 3.279213773214056e-05, "loss": 2.4879, "step": 1462 }, { "epoch": 0.284890853504177, "grad_norm": 0.71875, "learning_rate": 3.2782655618529707e-05, "loss": 2.5252, "step": 1463 }, { "epoch": 0.2850855840944054, "grad_norm": 0.74609375, "learning_rate": 3.2773168644961656e-05, "loss": 2.5177, "step": 1464 }, { "epoch": 0.28528031468463383, "grad_norm": 0.6875, "learning_rate": 3.2763676815043366e-05, "loss": 2.5225, "step": 1465 }, { "epoch": 0.2854750452748622, "grad_norm": 0.76953125, "learning_rate": 3.2754180132383604e-05, "loss": 2.5187, "step": 1466 }, { "epoch": 0.2856697758650906, "grad_norm": 0.70703125, "learning_rate": 3.2744678600593026e-05, "loss": 2.5111, "step": 1467 }, { "epoch": 0.2858645064553191, "grad_norm": 0.6484375, "learning_rate": 3.2735172223284087e-05, "loss": 2.4721, "step": 1468 }, { "epoch": 0.2860592370455475, "grad_norm": 0.734375, "learning_rate": 3.272566100407112e-05, "loss": 2.5104, "step": 1469 }, { "epoch": 0.28625396763577593, "grad_norm": 0.6953125, "learning_rate": 3.271614494657028e-05, "loss": 2.4985, "step": 1470 }, { "epoch": 0.28644869822600433, "grad_norm": 0.6796875, "learning_rate": 3.270662405439957e-05, "loss": 2.5256, "step": 1471 }, { "epoch": 0.2866434288162327, "grad_norm": 0.73828125, "learning_rate": 3.2697098331178814e-05, "loss": 2.4843, "step": 1472 }, { "epoch": 0.2868381594064612, "grad_norm": 0.67578125, "learning_rate": 3.268756778052969e-05, "loss": 2.505, "step": 1473 }, { "epoch": 0.2870328899966896, "grad_norm": 0.73046875, "learning_rate": 3.267803240607572e-05, "loss": 2.5107, "step": 1474 }, { "epoch": 0.287227620586918, "grad_norm": 0.7265625, "learning_rate": 3.266849221144223e-05, "loss": 2.5136, "step": 1475 }, { "epoch": 0.28742235117714643, "grad_norm": 0.765625, "learning_rate": 3.265894720025641e-05, "loss": 2.508, "step": 1476 }, { "epoch": 0.2876170817673748, "grad_norm": 0.6640625, "learning_rate": 3.264939737614726e-05, "loss": 2.5259, "step": 1477 }, { "epoch": 0.2878118123576033, "grad_norm": 0.7421875, "learning_rate": 3.2639842742745614e-05, "loss": 2.4963, "step": 1478 }, { "epoch": 0.2880065429478317, "grad_norm": 0.76171875, "learning_rate": 3.263028330368414e-05, "loss": 2.5111, "step": 1479 }, { "epoch": 0.2882012735380601, "grad_norm": 0.69921875, "learning_rate": 3.262071906259732e-05, "loss": 2.4866, "step": 1480 }, { "epoch": 0.28839600412828853, "grad_norm": 0.83984375, "learning_rate": 3.26111500231215e-05, "loss": 2.5036, "step": 1481 }, { "epoch": 0.28859073471851693, "grad_norm": 0.7890625, "learning_rate": 3.26015761888948e-05, "loss": 2.5162, "step": 1482 }, { "epoch": 0.2887854653087453, "grad_norm": 0.75, "learning_rate": 3.2591997563557184e-05, "loss": 2.503, "step": 1483 }, { "epoch": 0.2889801958989738, "grad_norm": 1.0, "learning_rate": 3.258241415075046e-05, "loss": 2.4913, "step": 1484 }, { "epoch": 0.2891749264892022, "grad_norm": 0.79296875, "learning_rate": 3.25728259541182e-05, "loss": 2.4995, "step": 1485 }, { "epoch": 0.28936965707943063, "grad_norm": 0.68359375, "learning_rate": 3.256323297730587e-05, "loss": 2.5083, "step": 1486 }, { "epoch": 0.28956438766965903, "grad_norm": 0.69921875, "learning_rate": 3.2553635223960683e-05, "loss": 2.4892, "step": 1487 }, { "epoch": 0.2897591182598874, "grad_norm": 0.703125, "learning_rate": 3.2544032697731714e-05, "loss": 2.499, "step": 1488 }, { "epoch": 0.2899538488501159, "grad_norm": 0.6875, "learning_rate": 3.253442540226983e-05, "loss": 2.4948, "step": 1489 }, { "epoch": 0.2901485794403443, "grad_norm": 0.6875, "learning_rate": 3.252481334122773e-05, "loss": 2.5079, "step": 1490 }, { "epoch": 0.2903433100305727, "grad_norm": 0.66015625, "learning_rate": 3.25151965182599e-05, "loss": 2.5014, "step": 1491 }, { "epoch": 0.29053804062080113, "grad_norm": 0.67578125, "learning_rate": 3.250557493702265e-05, "loss": 2.5138, "step": 1492 }, { "epoch": 0.29073277121102953, "grad_norm": 0.6640625, "learning_rate": 3.2495948601174106e-05, "loss": 2.4943, "step": 1493 }, { "epoch": 0.290927501801258, "grad_norm": 0.71484375, "learning_rate": 3.248631751437419e-05, "loss": 2.5035, "step": 1494 }, { "epoch": 0.2911222323914864, "grad_norm": 0.69140625, "learning_rate": 3.2476681680284633e-05, "loss": 2.4936, "step": 1495 }, { "epoch": 0.2913169629817148, "grad_norm": 0.66015625, "learning_rate": 3.2467041102568986e-05, "loss": 2.5013, "step": 1496 }, { "epoch": 0.29151169357194323, "grad_norm": 0.68359375, "learning_rate": 3.245739578489257e-05, "loss": 2.5022, "step": 1497 }, { "epoch": 0.29170642416217163, "grad_norm": 0.67578125, "learning_rate": 3.2447745730922546e-05, "loss": 2.4883, "step": 1498 }, { "epoch": 0.2919011547524, "grad_norm": 0.796875, "learning_rate": 3.2438090944327844e-05, "loss": 2.5088, "step": 1499 }, { "epoch": 0.2920958853426285, "grad_norm": 0.6875, "learning_rate": 3.242843142877922e-05, "loss": 2.5134, "step": 1500 }, { "epoch": 0.2922906159328569, "grad_norm": 0.71484375, "learning_rate": 3.24187671879492e-05, "loss": 2.4969, "step": 1501 }, { "epoch": 0.29248534652308533, "grad_norm": 0.6796875, "learning_rate": 3.240909822551214e-05, "loss": 2.4935, "step": 1502 }, { "epoch": 0.29268007711331373, "grad_norm": 0.76953125, "learning_rate": 3.239942454514417e-05, "loss": 2.504, "step": 1503 }, { "epoch": 0.29287480770354213, "grad_norm": 0.70703125, "learning_rate": 3.23897461505232e-05, "loss": 2.5054, "step": 1504 }, { "epoch": 0.2930695382937706, "grad_norm": 0.828125, "learning_rate": 3.2380063045328965e-05, "loss": 2.4854, "step": 1505 }, { "epoch": 0.293264268883999, "grad_norm": 0.95703125, "learning_rate": 3.237037523324297e-05, "loss": 2.4904, "step": 1506 }, { "epoch": 0.29345899947422743, "grad_norm": 0.85546875, "learning_rate": 3.236068271794852e-05, "loss": 2.4965, "step": 1507 }, { "epoch": 0.29365373006445583, "grad_norm": 0.7578125, "learning_rate": 3.2350985503130694e-05, "loss": 2.506, "step": 1508 }, { "epoch": 0.29384846065468423, "grad_norm": 0.9765625, "learning_rate": 3.2341283592476373e-05, "loss": 2.4938, "step": 1509 }, { "epoch": 0.2940431912449127, "grad_norm": 0.9609375, "learning_rate": 3.233157698967421e-05, "loss": 2.4953, "step": 1510 }, { "epoch": 0.2942379218351411, "grad_norm": 0.67578125, "learning_rate": 3.2321865698414665e-05, "loss": 2.501, "step": 1511 }, { "epoch": 0.2944326524253695, "grad_norm": 0.6953125, "learning_rate": 3.231214972238995e-05, "loss": 2.4985, "step": 1512 }, { "epoch": 0.29462738301559793, "grad_norm": 0.7421875, "learning_rate": 3.230242906529407e-05, "loss": 2.521, "step": 1513 }, { "epoch": 0.29482211360582633, "grad_norm": 0.625, "learning_rate": 3.2292703730822824e-05, "loss": 2.508, "step": 1514 }, { "epoch": 0.2950168441960548, "grad_norm": 0.69921875, "learning_rate": 3.2282973722673774e-05, "loss": 2.5022, "step": 1515 }, { "epoch": 0.2952115747862832, "grad_norm": 0.76171875, "learning_rate": 3.227323904454626e-05, "loss": 2.4977, "step": 1516 }, { "epoch": 0.2954063053765116, "grad_norm": 0.66015625, "learning_rate": 3.226349970014139e-05, "loss": 2.4912, "step": 1517 }, { "epoch": 0.29560103596674003, "grad_norm": 0.6953125, "learning_rate": 3.225375569316208e-05, "loss": 2.4949, "step": 1518 }, { "epoch": 0.29579576655696843, "grad_norm": 0.7890625, "learning_rate": 3.2244007027312975e-05, "loss": 2.522, "step": 1519 }, { "epoch": 0.29599049714719683, "grad_norm": 0.6484375, "learning_rate": 3.2234253706300526e-05, "loss": 2.475, "step": 1520 }, { "epoch": 0.2961852277374253, "grad_norm": 0.65625, "learning_rate": 3.2224495733832926e-05, "loss": 2.503, "step": 1521 }, { "epoch": 0.2963799583276537, "grad_norm": 0.68359375, "learning_rate": 3.221473311362016e-05, "loss": 2.5028, "step": 1522 }, { "epoch": 0.29657468891788213, "grad_norm": 0.71875, "learning_rate": 3.220496584937396e-05, "loss": 2.4968, "step": 1523 }, { "epoch": 0.29676941950811053, "grad_norm": 0.67578125, "learning_rate": 3.2195193944807845e-05, "loss": 2.5296, "step": 1524 }, { "epoch": 0.29696415009833893, "grad_norm": 0.7265625, "learning_rate": 3.2185417403637076e-05, "loss": 2.5014, "step": 1525 }, { "epoch": 0.2971588806885674, "grad_norm": 0.6484375, "learning_rate": 3.2175636229578695e-05, "loss": 2.483, "step": 1526 }, { "epoch": 0.2973536112787958, "grad_norm": 0.6953125, "learning_rate": 3.216585042635149e-05, "loss": 2.49, "step": 1527 }, { "epoch": 0.2975483418690242, "grad_norm": 0.71484375, "learning_rate": 3.2156059997676025e-05, "loss": 2.4916, "step": 1528 }, { "epoch": 0.29774307245925263, "grad_norm": 0.734375, "learning_rate": 3.214626494727461e-05, "loss": 2.4823, "step": 1529 }, { "epoch": 0.29793780304948103, "grad_norm": 0.6875, "learning_rate": 3.2136465278871324e-05, "loss": 2.5144, "step": 1530 }, { "epoch": 0.2981325336397095, "grad_norm": 0.69921875, "learning_rate": 3.212666099619198e-05, "loss": 2.4676, "step": 1531 }, { "epoch": 0.2983272642299379, "grad_norm": 0.67578125, "learning_rate": 3.211685210296417e-05, "loss": 2.5013, "step": 1532 }, { "epoch": 0.2985219948201663, "grad_norm": 0.7265625, "learning_rate": 3.2107038602917225e-05, "loss": 2.4797, "step": 1533 }, { "epoch": 0.29871672541039473, "grad_norm": 0.69921875, "learning_rate": 3.209722049978223e-05, "loss": 2.4905, "step": 1534 }, { "epoch": 0.29891145600062313, "grad_norm": 0.625, "learning_rate": 3.2087397797292034e-05, "loss": 2.4984, "step": 1535 }, { "epoch": 0.29910618659085153, "grad_norm": 0.7734375, "learning_rate": 3.2077570499181206e-05, "loss": 2.5095, "step": 1536 }, { "epoch": 0.29930091718108, "grad_norm": 0.7890625, "learning_rate": 3.206773860918609e-05, "loss": 2.4874, "step": 1537 }, { "epoch": 0.2994956477713084, "grad_norm": 0.6171875, "learning_rate": 3.2057902131044754e-05, "loss": 2.5007, "step": 1538 }, { "epoch": 0.29969037836153684, "grad_norm": 0.7734375, "learning_rate": 3.2048061068497016e-05, "loss": 2.4804, "step": 1539 }, { "epoch": 0.29988510895176523, "grad_norm": 0.78125, "learning_rate": 3.203821542528446e-05, "loss": 2.4805, "step": 1540 }, { "epoch": 0.30007983954199363, "grad_norm": 0.6328125, "learning_rate": 3.202836520515039e-05, "loss": 2.4885, "step": 1541 }, { "epoch": 0.3002745701322221, "grad_norm": 0.64453125, "learning_rate": 3.201851041183983e-05, "loss": 2.5019, "step": 1542 }, { "epoch": 0.3004693007224505, "grad_norm": 0.66015625, "learning_rate": 3.2008651049099596e-05, "loss": 2.5132, "step": 1543 }, { "epoch": 0.3006640313126789, "grad_norm": 0.66015625, "learning_rate": 3.1998787120678194e-05, "loss": 2.5079, "step": 1544 }, { "epoch": 0.30085876190290733, "grad_norm": 0.671875, "learning_rate": 3.198891863032589e-05, "loss": 2.5112, "step": 1545 }, { "epoch": 0.30105349249313573, "grad_norm": 0.60546875, "learning_rate": 3.197904558179466e-05, "loss": 2.4738, "step": 1546 }, { "epoch": 0.3012482230833642, "grad_norm": 0.6953125, "learning_rate": 3.1969167978838255e-05, "loss": 2.4986, "step": 1547 }, { "epoch": 0.3014429536735926, "grad_norm": 0.65625, "learning_rate": 3.195928582521212e-05, "loss": 2.4822, "step": 1548 }, { "epoch": 0.301637684263821, "grad_norm": 0.62890625, "learning_rate": 3.194939912467345e-05, "loss": 2.5068, "step": 1549 }, { "epoch": 0.30183241485404944, "grad_norm": 0.7109375, "learning_rate": 3.1939507880981154e-05, "loss": 2.4818, "step": 1550 }, { "epoch": 0.30202714544427783, "grad_norm": 0.7890625, "learning_rate": 3.192961209789588e-05, "loss": 2.4996, "step": 1551 }, { "epoch": 0.3022218760345063, "grad_norm": 0.6953125, "learning_rate": 3.191971177918e-05, "loss": 2.4843, "step": 1552 }, { "epoch": 0.3024166066247347, "grad_norm": 0.69140625, "learning_rate": 3.1909806928597606e-05, "loss": 2.5024, "step": 1553 }, { "epoch": 0.3026113372149631, "grad_norm": 0.69921875, "learning_rate": 3.189989754991452e-05, "loss": 2.504, "step": 1554 }, { "epoch": 0.30280606780519154, "grad_norm": 0.67578125, "learning_rate": 3.188998364689827e-05, "loss": 2.4912, "step": 1555 }, { "epoch": 0.30300079839541993, "grad_norm": 0.69140625, "learning_rate": 3.1880065223318127e-05, "loss": 2.4878, "step": 1556 }, { "epoch": 0.30319552898564833, "grad_norm": 0.71484375, "learning_rate": 3.187014228294507e-05, "loss": 2.4857, "step": 1557 }, { "epoch": 0.3033902595758768, "grad_norm": 0.66015625, "learning_rate": 3.186021482955179e-05, "loss": 2.5205, "step": 1558 }, { "epoch": 0.3035849901661052, "grad_norm": 0.78125, "learning_rate": 3.185028286691269e-05, "loss": 2.4663, "step": 1559 }, { "epoch": 0.30377972075633364, "grad_norm": 0.71484375, "learning_rate": 3.18403463988039e-05, "loss": 2.4911, "step": 1560 }, { "epoch": 0.30397445134656204, "grad_norm": 0.6796875, "learning_rate": 3.1830405429003275e-05, "loss": 2.5018, "step": 1561 }, { "epoch": 0.30416918193679043, "grad_norm": 0.72265625, "learning_rate": 3.182045996129034e-05, "loss": 2.4719, "step": 1562 }, { "epoch": 0.3043639125270189, "grad_norm": 0.64453125, "learning_rate": 3.1810509999446376e-05, "loss": 2.5045, "step": 1563 }, { "epoch": 0.3045586431172473, "grad_norm": 0.6640625, "learning_rate": 3.180055554725434e-05, "loss": 2.4907, "step": 1564 }, { "epoch": 0.3047533737074757, "grad_norm": 0.6328125, "learning_rate": 3.179059660849892e-05, "loss": 2.491, "step": 1565 }, { "epoch": 0.30494810429770414, "grad_norm": 0.64453125, "learning_rate": 3.178063318696648e-05, "loss": 2.4899, "step": 1566 }, { "epoch": 0.30514283488793253, "grad_norm": 0.70703125, "learning_rate": 3.177066528644512e-05, "loss": 2.4991, "step": 1567 }, { "epoch": 0.305337565478161, "grad_norm": 0.7265625, "learning_rate": 3.1760692910724614e-05, "loss": 2.4874, "step": 1568 }, { "epoch": 0.3055322960683894, "grad_norm": 0.6875, "learning_rate": 3.175071606359647e-05, "loss": 2.4998, "step": 1569 }, { "epoch": 0.3057270266586178, "grad_norm": 0.62890625, "learning_rate": 3.174073474885387e-05, "loss": 2.4891, "step": 1570 }, { "epoch": 0.30592175724884624, "grad_norm": 0.73046875, "learning_rate": 3.1730748970291714e-05, "loss": 2.4866, "step": 1571 }, { "epoch": 0.30611648783907464, "grad_norm": 0.69140625, "learning_rate": 3.1720758731706574e-05, "loss": 2.4887, "step": 1572 }, { "epoch": 0.30631121842930303, "grad_norm": 0.65234375, "learning_rate": 3.171076403689673e-05, "loss": 2.4978, "step": 1573 }, { "epoch": 0.3065059490195315, "grad_norm": 0.66796875, "learning_rate": 3.170076488966218e-05, "loss": 2.511, "step": 1574 }, { "epoch": 0.3067006796097599, "grad_norm": 0.62890625, "learning_rate": 3.169076129380457e-05, "loss": 2.4795, "step": 1575 }, { "epoch": 0.30689541019998834, "grad_norm": 0.6875, "learning_rate": 3.168075325312727e-05, "loss": 2.4913, "step": 1576 }, { "epoch": 0.30709014079021674, "grad_norm": 0.734375, "learning_rate": 3.1670740771435336e-05, "loss": 2.4861, "step": 1577 }, { "epoch": 0.30728487138044513, "grad_norm": 0.65234375, "learning_rate": 3.166072385253549e-05, "loss": 2.4824, "step": 1578 }, { "epoch": 0.3074796019706736, "grad_norm": 0.6875, "learning_rate": 3.165070250023618e-05, "loss": 2.5042, "step": 1579 }, { "epoch": 0.307674332560902, "grad_norm": 0.6640625, "learning_rate": 3.1640676718347505e-05, "loss": 2.4955, "step": 1580 }, { "epoch": 0.3078690631511304, "grad_norm": 0.66796875, "learning_rate": 3.163064651068126e-05, "loss": 2.4949, "step": 1581 }, { "epoch": 0.30806379374135884, "grad_norm": 0.64453125, "learning_rate": 3.162061188105092e-05, "loss": 2.4907, "step": 1582 }, { "epoch": 0.30825852433158724, "grad_norm": 0.65625, "learning_rate": 3.161057283327166e-05, "loss": 2.487, "step": 1583 }, { "epoch": 0.3084532549218157, "grad_norm": 0.640625, "learning_rate": 3.1600529371160306e-05, "loss": 2.4963, "step": 1584 }, { "epoch": 0.3086479855120441, "grad_norm": 0.69140625, "learning_rate": 3.159048149853537e-05, "loss": 2.4919, "step": 1585 }, { "epoch": 0.3088427161022725, "grad_norm": 0.6640625, "learning_rate": 3.1580429219217066e-05, "loss": 2.4813, "step": 1586 }, { "epoch": 0.30903744669250094, "grad_norm": 0.61328125, "learning_rate": 3.157037253702725e-05, "loss": 2.471, "step": 1587 }, { "epoch": 0.30923217728272934, "grad_norm": 0.73046875, "learning_rate": 3.156031145578947e-05, "loss": 2.4664, "step": 1588 }, { "epoch": 0.3094269078729578, "grad_norm": 0.6875, "learning_rate": 3.1550245979328934e-05, "loss": 2.479, "step": 1589 }, { "epoch": 0.3096216384631862, "grad_norm": 0.671875, "learning_rate": 3.1540176111472546e-05, "loss": 2.4904, "step": 1590 }, { "epoch": 0.3098163690534146, "grad_norm": 0.71484375, "learning_rate": 3.153010185604885e-05, "loss": 2.4714, "step": 1591 }, { "epoch": 0.31001109964364304, "grad_norm": 0.6171875, "learning_rate": 3.152002321688808e-05, "loss": 2.4854, "step": 1592 }, { "epoch": 0.31020583023387144, "grad_norm": 0.69921875, "learning_rate": 3.150994019782212e-05, "loss": 2.4907, "step": 1593 }, { "epoch": 0.31040056082409984, "grad_norm": 0.62890625, "learning_rate": 3.149985280268453e-05, "loss": 2.5, "step": 1594 }, { "epoch": 0.3105952914143283, "grad_norm": 0.62890625, "learning_rate": 3.148976103531053e-05, "loss": 2.4979, "step": 1595 }, { "epoch": 0.3107900220045567, "grad_norm": 0.6953125, "learning_rate": 3.147966489953701e-05, "loss": 2.4844, "step": 1596 }, { "epoch": 0.31098475259478514, "grad_norm": 0.71875, "learning_rate": 3.1469564399202504e-05, "loss": 2.486, "step": 1597 }, { "epoch": 0.31117948318501354, "grad_norm": 0.69140625, "learning_rate": 3.145945953814722e-05, "loss": 2.4762, "step": 1598 }, { "epoch": 0.31137421377524194, "grad_norm": 0.5859375, "learning_rate": 3.144935032021302e-05, "loss": 2.4789, "step": 1599 }, { "epoch": 0.3115689443654704, "grad_norm": 0.7890625, "learning_rate": 3.143923674924343e-05, "loss": 2.4745, "step": 1600 }, { "epoch": 0.3117636749556988, "grad_norm": 0.8125, "learning_rate": 3.1429118829083604e-05, "loss": 2.4839, "step": 1601 }, { "epoch": 0.3119584055459272, "grad_norm": 0.65625, "learning_rate": 3.141899656358038e-05, "loss": 2.4847, "step": 1602 }, { "epoch": 0.31215313613615564, "grad_norm": 0.67578125, "learning_rate": 3.140886995658224e-05, "loss": 2.4935, "step": 1603 }, { "epoch": 0.31234786672638404, "grad_norm": 0.6796875, "learning_rate": 3.139873901193931e-05, "loss": 2.4899, "step": 1604 }, { "epoch": 0.3125425973166125, "grad_norm": 0.62890625, "learning_rate": 3.138860373350337e-05, "loss": 2.4868, "step": 1605 }, { "epoch": 0.3127373279068409, "grad_norm": 0.61328125, "learning_rate": 3.137846412512783e-05, "loss": 2.4684, "step": 1606 }, { "epoch": 0.3129320584970693, "grad_norm": 0.63671875, "learning_rate": 3.1368320190667784e-05, "loss": 2.4962, "step": 1607 }, { "epoch": 0.31312678908729774, "grad_norm": 0.67578125, "learning_rate": 3.1358171933979946e-05, "loss": 2.4719, "step": 1608 }, { "epoch": 0.31332151967752614, "grad_norm": 0.63671875, "learning_rate": 3.134801935892267e-05, "loss": 2.4903, "step": 1609 }, { "epoch": 0.31351625026775454, "grad_norm": 0.74609375, "learning_rate": 3.133786246935596e-05, "loss": 2.4912, "step": 1610 }, { "epoch": 0.313710980857983, "grad_norm": 0.67578125, "learning_rate": 3.132770126914145e-05, "loss": 2.484, "step": 1611 }, { "epoch": 0.3139057114482114, "grad_norm": 0.65234375, "learning_rate": 3.131753576214245e-05, "loss": 2.4927, "step": 1612 }, { "epoch": 0.31410044203843984, "grad_norm": 0.69140625, "learning_rate": 3.1307365952223837e-05, "loss": 2.4849, "step": 1613 }, { "epoch": 0.31429517262866824, "grad_norm": 0.625, "learning_rate": 3.1297191843252197e-05, "loss": 2.477, "step": 1614 }, { "epoch": 0.31448990321889664, "grad_norm": 0.68359375, "learning_rate": 3.128701343909571e-05, "loss": 2.4828, "step": 1615 }, { "epoch": 0.3146846338091251, "grad_norm": 0.65625, "learning_rate": 3.1276830743624204e-05, "loss": 2.4821, "step": 1616 }, { "epoch": 0.3148793643993535, "grad_norm": 0.703125, "learning_rate": 3.1266643760709123e-05, "loss": 2.4883, "step": 1617 }, { "epoch": 0.3150740949895819, "grad_norm": 0.6875, "learning_rate": 3.125645249422356e-05, "loss": 2.4834, "step": 1618 }, { "epoch": 0.31526882557981034, "grad_norm": 0.6328125, "learning_rate": 3.124625694804222e-05, "loss": 2.4721, "step": 1619 }, { "epoch": 0.31546355617003874, "grad_norm": 0.703125, "learning_rate": 3.123605712604145e-05, "loss": 2.4981, "step": 1620 }, { "epoch": 0.3156582867602672, "grad_norm": 0.62890625, "learning_rate": 3.1225853032099215e-05, "loss": 2.4897, "step": 1621 }, { "epoch": 0.3158530173504956, "grad_norm": 0.73046875, "learning_rate": 3.12156446700951e-05, "loss": 2.4873, "step": 1622 }, { "epoch": 0.316047747940724, "grad_norm": 0.640625, "learning_rate": 3.120543204391032e-05, "loss": 2.4897, "step": 1623 }, { "epoch": 0.31624247853095244, "grad_norm": 0.765625, "learning_rate": 3.1195215157427704e-05, "loss": 2.5026, "step": 1624 }, { "epoch": 0.31643720912118084, "grad_norm": 0.71875, "learning_rate": 3.1184994014531715e-05, "loss": 2.4743, "step": 1625 }, { "epoch": 0.31663193971140924, "grad_norm": 0.69921875, "learning_rate": 3.117476861910843e-05, "loss": 2.4953, "step": 1626 }, { "epoch": 0.3168266703016377, "grad_norm": 0.7265625, "learning_rate": 3.116453897504551e-05, "loss": 2.4923, "step": 1627 }, { "epoch": 0.3170214008918661, "grad_norm": 0.66015625, "learning_rate": 3.115430508623228e-05, "loss": 2.4642, "step": 1628 }, { "epoch": 0.31721613148209454, "grad_norm": 0.75, "learning_rate": 3.114406695655966e-05, "loss": 2.476, "step": 1629 }, { "epoch": 0.31741086207232294, "grad_norm": 0.83984375, "learning_rate": 3.1133824589920164e-05, "loss": 2.4849, "step": 1630 }, { "epoch": 0.31760559266255134, "grad_norm": 0.7109375, "learning_rate": 3.1123577990207955e-05, "loss": 2.4861, "step": 1631 }, { "epoch": 0.3178003232527798, "grad_norm": 0.72265625, "learning_rate": 3.1113327161318766e-05, "loss": 2.5122, "step": 1632 }, { "epoch": 0.3179950538430082, "grad_norm": 0.76171875, "learning_rate": 3.110307210714996e-05, "loss": 2.4778, "step": 1633 }, { "epoch": 0.31818978443323664, "grad_norm": 0.6640625, "learning_rate": 3.10928128316005e-05, "loss": 2.4849, "step": 1634 }, { "epoch": 0.31838451502346504, "grad_norm": 0.86328125, "learning_rate": 3.108254933857096e-05, "loss": 2.4875, "step": 1635 }, { "epoch": 0.31857924561369344, "grad_norm": 0.77734375, "learning_rate": 3.107228163196352e-05, "loss": 2.4752, "step": 1636 }, { "epoch": 0.3187739762039219, "grad_norm": 0.7734375, "learning_rate": 3.106200971568194e-05, "loss": 2.4756, "step": 1637 }, { "epoch": 0.3189687067941503, "grad_norm": 0.76953125, "learning_rate": 3.1051733593631606e-05, "loss": 2.495, "step": 1638 }, { "epoch": 0.3191634373843787, "grad_norm": 0.7890625, "learning_rate": 3.104145326971949e-05, "loss": 2.4788, "step": 1639 }, { "epoch": 0.31935816797460714, "grad_norm": 0.7421875, "learning_rate": 3.103116874785416e-05, "loss": 2.493, "step": 1640 }, { "epoch": 0.31955289856483554, "grad_norm": 0.7890625, "learning_rate": 3.1020880031945785e-05, "loss": 2.4906, "step": 1641 }, { "epoch": 0.319747629155064, "grad_norm": 0.6953125, "learning_rate": 3.101058712590614e-05, "loss": 2.4966, "step": 1642 }, { "epoch": 0.3199423597452924, "grad_norm": 0.76953125, "learning_rate": 3.100029003364856e-05, "loss": 2.4838, "step": 1643 }, { "epoch": 0.3201370903355208, "grad_norm": 0.6953125, "learning_rate": 3.098998875908802e-05, "loss": 2.483, "step": 1644 }, { "epoch": 0.32033182092574924, "grad_norm": 0.67578125, "learning_rate": 3.0979683306141035e-05, "loss": 2.4734, "step": 1645 }, { "epoch": 0.32052655151597764, "grad_norm": 0.734375, "learning_rate": 3.0969373678725734e-05, "loss": 2.4849, "step": 1646 }, { "epoch": 0.32072128210620604, "grad_norm": 0.67578125, "learning_rate": 3.095905988076184e-05, "loss": 2.4686, "step": 1647 }, { "epoch": 0.3209160126964345, "grad_norm": 0.640625, "learning_rate": 3.094874191617065e-05, "loss": 2.4796, "step": 1648 }, { "epoch": 0.3211107432866629, "grad_norm": 0.7109375, "learning_rate": 3.093841978887504e-05, "loss": 2.4958, "step": 1649 }, { "epoch": 0.32130547387689135, "grad_norm": 0.65234375, "learning_rate": 3.092809350279949e-05, "loss": 2.4789, "step": 1650 }, { "epoch": 0.32150020446711974, "grad_norm": 0.65234375, "learning_rate": 3.091776306187003e-05, "loss": 2.4818, "step": 1651 }, { "epoch": 0.32169493505734814, "grad_norm": 0.7265625, "learning_rate": 3.09074284700143e-05, "loss": 2.4837, "step": 1652 }, { "epoch": 0.3218896656475766, "grad_norm": 0.62890625, "learning_rate": 3.08970897311615e-05, "loss": 2.467, "step": 1653 }, { "epoch": 0.322084396237805, "grad_norm": 0.73828125, "learning_rate": 3.0886746849242405e-05, "loss": 2.4797, "step": 1654 }, { "epoch": 0.3222791268280334, "grad_norm": 0.93359375, "learning_rate": 3.0876399828189377e-05, "loss": 2.4995, "step": 1655 }, { "epoch": 0.32247385741826184, "grad_norm": 0.73046875, "learning_rate": 3.086604867193636e-05, "loss": 2.4876, "step": 1656 }, { "epoch": 0.32266858800849024, "grad_norm": 0.70703125, "learning_rate": 3.085569338441883e-05, "loss": 2.4691, "step": 1657 }, { "epoch": 0.3228633185987187, "grad_norm": 0.734375, "learning_rate": 3.0845333969573886e-05, "loss": 2.4851, "step": 1658 }, { "epoch": 0.3230580491889471, "grad_norm": 0.72265625, "learning_rate": 3.0834970431340154e-05, "loss": 2.4675, "step": 1659 }, { "epoch": 0.3232527797791755, "grad_norm": 0.6640625, "learning_rate": 3.082460277365785e-05, "loss": 2.4886, "step": 1660 }, { "epoch": 0.32344751036940395, "grad_norm": 0.73046875, "learning_rate": 3.0814231000468753e-05, "loss": 2.448, "step": 1661 }, { "epoch": 0.32364224095963234, "grad_norm": 0.8359375, "learning_rate": 3.0803855115716195e-05, "loss": 2.4879, "step": 1662 }, { "epoch": 0.32383697154986074, "grad_norm": 0.609375, "learning_rate": 3.0793475123345094e-05, "loss": 2.4935, "step": 1663 }, { "epoch": 0.3240317021400892, "grad_norm": 0.70703125, "learning_rate": 3.078309102730191e-05, "loss": 2.4868, "step": 1664 }, { "epoch": 0.3242264327303176, "grad_norm": 0.71875, "learning_rate": 3.077270283153467e-05, "loss": 2.5109, "step": 1665 }, { "epoch": 0.32442116332054605, "grad_norm": 0.6328125, "learning_rate": 3.076231053999296e-05, "loss": 2.4769, "step": 1666 }, { "epoch": 0.32461589391077444, "grad_norm": 0.64453125, "learning_rate": 3.075191415662791e-05, "loss": 2.4838, "step": 1667 }, { "epoch": 0.32481062450100284, "grad_norm": 0.7578125, "learning_rate": 3.074151368539224e-05, "loss": 2.4826, "step": 1668 }, { "epoch": 0.3250053550912313, "grad_norm": 0.73046875, "learning_rate": 3.0731109130240185e-05, "loss": 2.4773, "step": 1669 }, { "epoch": 0.3252000856814597, "grad_norm": 0.671875, "learning_rate": 3.072070049512755e-05, "loss": 2.4802, "step": 1670 }, { "epoch": 0.3253948162716881, "grad_norm": 0.68359375, "learning_rate": 3.0710287784011705e-05, "loss": 2.4827, "step": 1671 }, { "epoch": 0.32558954686191655, "grad_norm": 0.765625, "learning_rate": 3.069987100085155e-05, "loss": 2.5038, "step": 1672 }, { "epoch": 0.32578427745214494, "grad_norm": 0.62890625, "learning_rate": 3.0689450149607535e-05, "loss": 2.4626, "step": 1673 }, { "epoch": 0.3259790080423734, "grad_norm": 0.76953125, "learning_rate": 3.067902523424165e-05, "loss": 2.472, "step": 1674 }, { "epoch": 0.3261737386326018, "grad_norm": 0.73828125, "learning_rate": 3.066859625871747e-05, "loss": 2.4741, "step": 1675 }, { "epoch": 0.3263684692228302, "grad_norm": 0.7421875, "learning_rate": 3.065816322700006e-05, "loss": 2.4638, "step": 1676 }, { "epoch": 0.32656319981305865, "grad_norm": 0.76953125, "learning_rate": 3.0647726143056064e-05, "loss": 2.4833, "step": 1677 }, { "epoch": 0.32675793040328704, "grad_norm": 0.69140625, "learning_rate": 3.063728501085365e-05, "loss": 2.4652, "step": 1678 }, { "epoch": 0.3269526609935155, "grad_norm": 0.8359375, "learning_rate": 3.062683983436252e-05, "loss": 2.4649, "step": 1679 }, { "epoch": 0.3271473915837439, "grad_norm": 0.73828125, "learning_rate": 3.061639061755394e-05, "loss": 2.4801, "step": 1680 }, { "epoch": 0.3273421221739723, "grad_norm": 0.80859375, "learning_rate": 3.06059373644007e-05, "loss": 2.4854, "step": 1681 }, { "epoch": 0.32753685276420075, "grad_norm": 0.890625, "learning_rate": 3.059548007887709e-05, "loss": 2.473, "step": 1682 }, { "epoch": 0.32773158335442915, "grad_norm": 0.68359375, "learning_rate": 3.0585018764958994e-05, "loss": 2.4827, "step": 1683 }, { "epoch": 0.32792631394465754, "grad_norm": 0.95703125, "learning_rate": 3.057455342662378e-05, "loss": 2.4865, "step": 1684 }, { "epoch": 0.328121044534886, "grad_norm": 0.97265625, "learning_rate": 3.0564084067850365e-05, "loss": 2.4937, "step": 1685 }, { "epoch": 0.3283157751251144, "grad_norm": 0.59765625, "learning_rate": 3.055361069261919e-05, "loss": 2.4818, "step": 1686 }, { "epoch": 0.32851050571534285, "grad_norm": 0.96484375, "learning_rate": 3.0543133304912236e-05, "loss": 2.4614, "step": 1687 }, { "epoch": 0.32870523630557125, "grad_norm": 0.8671875, "learning_rate": 3.0532651908712984e-05, "loss": 2.4969, "step": 1688 }, { "epoch": 0.32889996689579964, "grad_norm": 0.625, "learning_rate": 3.052216650800647e-05, "loss": 2.4798, "step": 1689 }, { "epoch": 0.3290946974860281, "grad_norm": 0.7734375, "learning_rate": 3.0511677106779225e-05, "loss": 2.4942, "step": 1690 }, { "epoch": 0.3292894280762565, "grad_norm": 0.671875, "learning_rate": 3.0501183709019312e-05, "loss": 2.4708, "step": 1691 }, { "epoch": 0.3294841586664849, "grad_norm": 0.703125, "learning_rate": 3.0490686318716324e-05, "loss": 2.4787, "step": 1692 }, { "epoch": 0.32967888925671335, "grad_norm": 0.63671875, "learning_rate": 3.0480184939861347e-05, "loss": 2.4891, "step": 1693 }, { "epoch": 0.32987361984694175, "grad_norm": 0.7109375, "learning_rate": 3.0469679576447015e-05, "loss": 2.4929, "step": 1694 }, { "epoch": 0.3300683504371702, "grad_norm": 0.62109375, "learning_rate": 3.045917023246745e-05, "loss": 2.4802, "step": 1695 }, { "epoch": 0.3302630810273986, "grad_norm": 0.6484375, "learning_rate": 3.0448656911918295e-05, "loss": 2.4926, "step": 1696 }, { "epoch": 0.330457811617627, "grad_norm": 0.625, "learning_rate": 3.0438139618796714e-05, "loss": 2.4752, "step": 1697 }, { "epoch": 0.33065254220785545, "grad_norm": 0.61328125, "learning_rate": 3.0427618357101375e-05, "loss": 2.4901, "step": 1698 }, { "epoch": 0.33084727279808385, "grad_norm": 0.6484375, "learning_rate": 3.041709313083245e-05, "loss": 2.482, "step": 1699 }, { "epoch": 0.33104200338831224, "grad_norm": 0.59765625, "learning_rate": 3.0406563943991626e-05, "loss": 2.4888, "step": 1700 } ], "logging_steps": 1, "max_steps": 5135, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.5490820797130342e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }