{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 2899, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00034494653328734045, "grad_norm": 0.6609077453613281, "learning_rate": 1.0000000000000002e-06, "loss": 10.3896, "step": 1 }, { "epoch": 0.0006898930665746809, "grad_norm": 0.6776272654533386, "learning_rate": 2.0000000000000003e-06, "loss": 10.3747, "step": 2 }, { "epoch": 0.0010348395998620215, "grad_norm": 0.7447559833526611, "learning_rate": 3e-06, "loss": 10.3959, "step": 3 }, { "epoch": 0.0013797861331493618, "grad_norm": 0.7730655074119568, "learning_rate": 4.000000000000001e-06, "loss": 10.39, "step": 4 }, { "epoch": 0.0017247326664367024, "grad_norm": 0.7812376618385315, "learning_rate": 5e-06, "loss": 10.3765, "step": 5 }, { "epoch": 0.002069679199724043, "grad_norm": 0.854775607585907, "learning_rate": 6e-06, "loss": 10.3793, "step": 6 }, { "epoch": 0.002414625733011383, "grad_norm": 0.7408090233802795, "learning_rate": 7.000000000000001e-06, "loss": 10.3799, "step": 7 }, { "epoch": 0.0027595722662987236, "grad_norm": 0.7635711431503296, "learning_rate": 8.000000000000001e-06, "loss": 10.3856, "step": 8 }, { "epoch": 0.003104518799586064, "grad_norm": 0.7853497862815857, "learning_rate": 9e-06, "loss": 10.3786, "step": 9 }, { "epoch": 0.0034494653328734047, "grad_norm": 0.7585586905479431, "learning_rate": 1e-05, "loss": 10.3554, "step": 10 }, { "epoch": 0.0037944118661607453, "grad_norm": 0.8145893216133118, "learning_rate": 1.1000000000000001e-05, "loss": 10.3792, "step": 11 }, { "epoch": 0.004139358399448086, "grad_norm": 0.8374782204627991, "learning_rate": 1.2e-05, "loss": 10.3807, "step": 12 }, { "epoch": 0.004484304932735426, "grad_norm": 0.7668830156326294, "learning_rate": 1.3000000000000001e-05, "loss": 10.3674, "step": 13 }, { "epoch": 0.004829251466022766, "grad_norm": 0.7636863589286804, "learning_rate": 1.4000000000000001e-05, "loss": 10.3887, "step": 14 }, { "epoch": 0.005174197999310107, "grad_norm": 0.8072705268859863, "learning_rate": 1.5e-05, "loss": 10.3833, "step": 15 }, { "epoch": 0.005519144532597447, "grad_norm": 0.8346720933914185, "learning_rate": 1.6000000000000003e-05, "loss": 10.3675, "step": 16 }, { "epoch": 0.005864091065884788, "grad_norm": 0.764621376991272, "learning_rate": 1.7000000000000003e-05, "loss": 10.3764, "step": 17 }, { "epoch": 0.006209037599172128, "grad_norm": 0.8266566395759583, "learning_rate": 1.8e-05, "loss": 10.3855, "step": 18 }, { "epoch": 0.006553984132459468, "grad_norm": 0.7850378751754761, "learning_rate": 1.9e-05, "loss": 10.3738, "step": 19 }, { "epoch": 0.006898930665746809, "grad_norm": 0.7893702387809753, "learning_rate": 2e-05, "loss": 10.3853, "step": 20 }, { "epoch": 0.0072438771990341495, "grad_norm": 0.8223876953125, "learning_rate": 2.1e-05, "loss": 10.3786, "step": 21 }, { "epoch": 0.0075888237323214905, "grad_norm": 0.8152451515197754, "learning_rate": 2.2000000000000003e-05, "loss": 10.3782, "step": 22 }, { "epoch": 0.00793377026560883, "grad_norm": 0.8822249174118042, "learning_rate": 2.3000000000000003e-05, "loss": 10.3668, "step": 23 }, { "epoch": 0.008278716798896172, "grad_norm": 1.0197700262069702, "learning_rate": 2.4e-05, "loss": 10.3814, "step": 24 }, { "epoch": 0.008623663332183512, "grad_norm": 0.8479687571525574, "learning_rate": 2.5e-05, "loss": 10.3559, "step": 25 }, { "epoch": 0.008968609865470852, "grad_norm": 0.8130446672439575, "learning_rate": 2.6000000000000002e-05, "loss": 10.385, "step": 26 }, { "epoch": 0.009313556398758192, "grad_norm": 0.8617949485778809, "learning_rate": 2.7000000000000002e-05, "loss": 10.3708, "step": 27 }, { "epoch": 0.009658502932045532, "grad_norm": 0.826323926448822, "learning_rate": 2.8000000000000003e-05, "loss": 10.3719, "step": 28 }, { "epoch": 0.010003449465332874, "grad_norm": 0.9218830466270447, "learning_rate": 2.9e-05, "loss": 10.3632, "step": 29 }, { "epoch": 0.010348395998620214, "grad_norm": 0.8265900611877441, "learning_rate": 3e-05, "loss": 10.3511, "step": 30 }, { "epoch": 0.010693342531907554, "grad_norm": 0.869763970375061, "learning_rate": 3.1e-05, "loss": 10.3784, "step": 31 }, { "epoch": 0.011038289065194894, "grad_norm": 0.8748270273208618, "learning_rate": 3.2000000000000005e-05, "loss": 10.3748, "step": 32 }, { "epoch": 0.011383235598482234, "grad_norm": 0.9415621757507324, "learning_rate": 3.3e-05, "loss": 10.3777, "step": 33 }, { "epoch": 0.011728182131769576, "grad_norm": 0.8795452117919922, "learning_rate": 3.4000000000000007e-05, "loss": 10.3634, "step": 34 }, { "epoch": 0.012073128665056916, "grad_norm": 0.9584307074546814, "learning_rate": 3.5e-05, "loss": 10.3707, "step": 35 }, { "epoch": 0.012418075198344257, "grad_norm": 0.8180338740348816, "learning_rate": 3.6e-05, "loss": 10.3616, "step": 36 }, { "epoch": 0.012763021731631597, "grad_norm": 0.8298875093460083, "learning_rate": 3.7e-05, "loss": 10.3896, "step": 37 }, { "epoch": 0.013107968264918937, "grad_norm": 0.9823697209358215, "learning_rate": 3.8e-05, "loss": 10.3874, "step": 38 }, { "epoch": 0.013452914798206279, "grad_norm": 0.9253247976303101, "learning_rate": 3.9000000000000006e-05, "loss": 10.3639, "step": 39 }, { "epoch": 0.013797861331493619, "grad_norm": 0.9450452327728271, "learning_rate": 4e-05, "loss": 10.3874, "step": 40 }, { "epoch": 0.014142807864780959, "grad_norm": 0.8728947639465332, "learning_rate": 4.1e-05, "loss": 10.3677, "step": 41 }, { "epoch": 0.014487754398068299, "grad_norm": 0.8550394177436829, "learning_rate": 4.2e-05, "loss": 10.3735, "step": 42 }, { "epoch": 0.01483270093135564, "grad_norm": 0.887096107006073, "learning_rate": 4.3e-05, "loss": 10.3676, "step": 43 }, { "epoch": 0.015177647464642981, "grad_norm": 1.0961228609085083, "learning_rate": 4.4000000000000006e-05, "loss": 10.3681, "step": 44 }, { "epoch": 0.015522593997930321, "grad_norm": 0.9618821740150452, "learning_rate": 4.5e-05, "loss": 10.3789, "step": 45 }, { "epoch": 0.01586754053121766, "grad_norm": 0.9343708157539368, "learning_rate": 4.600000000000001e-05, "loss": 10.3825, "step": 46 }, { "epoch": 0.016212487064505003, "grad_norm": 0.9412921667098999, "learning_rate": 4.7e-05, "loss": 10.3817, "step": 47 }, { "epoch": 0.016557433597792343, "grad_norm": 1.0469144582748413, "learning_rate": 4.8e-05, "loss": 10.3506, "step": 48 }, { "epoch": 0.016902380131079683, "grad_norm": 1.068739891052246, "learning_rate": 4.9e-05, "loss": 10.3372, "step": 49 }, { "epoch": 0.017247326664367024, "grad_norm": 1.1528873443603516, "learning_rate": 5e-05, "loss": 10.3483, "step": 50 }, { "epoch": 0.017592273197654364, "grad_norm": 0.7057186961174011, "learning_rate": 5.1000000000000006e-05, "loss": 10.3596, "step": 51 }, { "epoch": 0.017937219730941704, "grad_norm": 0.7338641285896301, "learning_rate": 5.2000000000000004e-05, "loss": 10.3678, "step": 52 }, { "epoch": 0.018282166264229044, "grad_norm": 0.7243222594261169, "learning_rate": 5.300000000000001e-05, "loss": 10.3669, "step": 53 }, { "epoch": 0.018627112797516384, "grad_norm": 0.712495744228363, "learning_rate": 5.4000000000000005e-05, "loss": 10.3632, "step": 54 }, { "epoch": 0.018972059330803724, "grad_norm": 0.7407525777816772, "learning_rate": 5.500000000000001e-05, "loss": 10.3715, "step": 55 }, { "epoch": 0.019317005864091064, "grad_norm": 0.7415438890457153, "learning_rate": 5.6000000000000006e-05, "loss": 10.3801, "step": 56 }, { "epoch": 0.019661952397378408, "grad_norm": 0.8786624073982239, "learning_rate": 5.6999999999999996e-05, "loss": 10.3781, "step": 57 }, { "epoch": 0.020006898930665748, "grad_norm": 0.7698060274124146, "learning_rate": 5.8e-05, "loss": 10.3719, "step": 58 }, { "epoch": 0.020351845463953088, "grad_norm": 0.7433401942253113, "learning_rate": 5.9e-05, "loss": 10.3633, "step": 59 }, { "epoch": 0.020696791997240428, "grad_norm": 0.8617119789123535, "learning_rate": 6e-05, "loss": 10.3708, "step": 60 }, { "epoch": 0.02104173853052777, "grad_norm": 0.9492126107215881, "learning_rate": 6.1e-05, "loss": 10.3524, "step": 61 }, { "epoch": 0.02138668506381511, "grad_norm": 0.8974522352218628, "learning_rate": 6.2e-05, "loss": 10.3708, "step": 62 }, { "epoch": 0.02173163159710245, "grad_norm": 0.7831613421440125, "learning_rate": 6.3e-05, "loss": 10.3636, "step": 63 }, { "epoch": 0.02207657813038979, "grad_norm": 0.8832221031188965, "learning_rate": 6.400000000000001e-05, "loss": 10.354, "step": 64 }, { "epoch": 0.02242152466367713, "grad_norm": 0.8677083253860474, "learning_rate": 6.500000000000001e-05, "loss": 10.3617, "step": 65 }, { "epoch": 0.02276647119696447, "grad_norm": 0.934693455696106, "learning_rate": 6.6e-05, "loss": 10.3556, "step": 66 }, { "epoch": 0.023111417730251813, "grad_norm": 0.9830888509750366, "learning_rate": 6.7e-05, "loss": 10.3612, "step": 67 }, { "epoch": 0.023456364263539153, "grad_norm": 1.0390836000442505, "learning_rate": 6.800000000000001e-05, "loss": 10.3761, "step": 68 }, { "epoch": 0.023801310796826493, "grad_norm": 0.9852373600006104, "learning_rate": 6.9e-05, "loss": 10.3501, "step": 69 }, { "epoch": 0.024146257330113833, "grad_norm": 0.8551622629165649, "learning_rate": 7e-05, "loss": 10.3612, "step": 70 }, { "epoch": 0.024491203863401173, "grad_norm": 0.9254797697067261, "learning_rate": 7.1e-05, "loss": 10.359, "step": 71 }, { "epoch": 0.024836150396688513, "grad_norm": 1.1132023334503174, "learning_rate": 7.2e-05, "loss": 10.3435, "step": 72 }, { "epoch": 0.025181096929975853, "grad_norm": 1.17877995967865, "learning_rate": 7.3e-05, "loss": 10.353, "step": 73 }, { "epoch": 0.025526043463263193, "grad_norm": 0.9523524641990662, "learning_rate": 7.4e-05, "loss": 10.36, "step": 74 }, { "epoch": 0.025870989996550534, "grad_norm": 0.991371214389801, "learning_rate": 7.500000000000001e-05, "loss": 10.3416, "step": 75 }, { "epoch": 0.026215936529837874, "grad_norm": 1.0377520322799683, "learning_rate": 7.6e-05, "loss": 10.3669, "step": 76 }, { "epoch": 0.026560883063125217, "grad_norm": 1.0984565019607544, "learning_rate": 7.7e-05, "loss": 10.322, "step": 77 }, { "epoch": 0.026905829596412557, "grad_norm": 1.147340178489685, "learning_rate": 7.800000000000001e-05, "loss": 10.3441, "step": 78 }, { "epoch": 0.027250776129699898, "grad_norm": 1.057822585105896, "learning_rate": 7.900000000000001e-05, "loss": 10.3325, "step": 79 }, { "epoch": 0.027595722662987238, "grad_norm": 1.1691458225250244, "learning_rate": 8e-05, "loss": 10.3458, "step": 80 }, { "epoch": 0.027940669196274578, "grad_norm": 1.2596869468688965, "learning_rate": 8.1e-05, "loss": 10.3158, "step": 81 }, { "epoch": 0.028285615729561918, "grad_norm": 1.1883245706558228, "learning_rate": 8.2e-05, "loss": 10.3267, "step": 82 }, { "epoch": 0.028630562262849258, "grad_norm": 1.2984602451324463, "learning_rate": 8.3e-05, "loss": 10.3389, "step": 83 }, { "epoch": 0.028975508796136598, "grad_norm": 1.2408357858657837, "learning_rate": 8.4e-05, "loss": 10.3261, "step": 84 }, { "epoch": 0.029320455329423938, "grad_norm": 1.3244764804840088, "learning_rate": 8.5e-05, "loss": 10.3211, "step": 85 }, { "epoch": 0.02966540186271128, "grad_norm": 1.4943519830703735, "learning_rate": 8.6e-05, "loss": 10.3072, "step": 86 }, { "epoch": 0.03001034839599862, "grad_norm": 1.3726887702941895, "learning_rate": 8.7e-05, "loss": 10.2841, "step": 87 }, { "epoch": 0.030355294929285962, "grad_norm": 1.5504422187805176, "learning_rate": 8.800000000000001e-05, "loss": 10.2865, "step": 88 }, { "epoch": 0.030700241462573302, "grad_norm": 1.499110221862793, "learning_rate": 8.900000000000001e-05, "loss": 10.2938, "step": 89 }, { "epoch": 0.031045187995860642, "grad_norm": 1.3083536624908447, "learning_rate": 9e-05, "loss": 10.2814, "step": 90 }, { "epoch": 0.03139013452914798, "grad_norm": 1.3920077085494995, "learning_rate": 9.1e-05, "loss": 10.2902, "step": 91 }, { "epoch": 0.03173508106243532, "grad_norm": 1.4502214193344116, "learning_rate": 9.200000000000001e-05, "loss": 10.2841, "step": 92 }, { "epoch": 0.03208002759572266, "grad_norm": 1.503482699394226, "learning_rate": 9.300000000000001e-05, "loss": 10.2558, "step": 93 }, { "epoch": 0.032424974129010006, "grad_norm": 1.4924733638763428, "learning_rate": 9.4e-05, "loss": 10.2587, "step": 94 }, { "epoch": 0.03276992066229734, "grad_norm": 1.4583185911178589, "learning_rate": 9.5e-05, "loss": 10.2477, "step": 95 }, { "epoch": 0.03311486719558469, "grad_norm": 1.5197653770446777, "learning_rate": 9.6e-05, "loss": 10.236, "step": 96 }, { "epoch": 0.03345981372887202, "grad_norm": 1.716496467590332, "learning_rate": 9.7e-05, "loss": 10.2214, "step": 97 }, { "epoch": 0.03380476026215937, "grad_norm": 1.5878716707229614, "learning_rate": 9.8e-05, "loss": 10.2318, "step": 98 }, { "epoch": 0.0341497067954467, "grad_norm": 1.5571258068084717, "learning_rate": 9.900000000000001e-05, "loss": 10.2292, "step": 99 }, { "epoch": 0.03449465332873405, "grad_norm": 1.6506520509719849, "learning_rate": 0.0001, "loss": 10.2108, "step": 100 }, { "epoch": 0.034839599862021384, "grad_norm": 1.0482630729675293, "learning_rate": 9.999996850555848e-05, "loss": 10.2815, "step": 101 }, { "epoch": 0.03518454639530873, "grad_norm": 1.0001797676086426, "learning_rate": 9.999987402227361e-05, "loss": 10.2609, "step": 102 }, { "epoch": 0.03552949292859607, "grad_norm": 1.1609498262405396, "learning_rate": 9.99997165502644e-05, "loss": 10.2727, "step": 103 }, { "epoch": 0.03587443946188341, "grad_norm": 1.1335071325302124, "learning_rate": 9.999949608972922e-05, "loss": 10.2701, "step": 104 }, { "epoch": 0.03621938599517075, "grad_norm": 1.193150520324707, "learning_rate": 9.999921264094586e-05, "loss": 10.2708, "step": 105 }, { "epoch": 0.03656433252845809, "grad_norm": 1.1749833822250366, "learning_rate": 9.999886620427133e-05, "loss": 10.2451, "step": 106 }, { "epoch": 0.03690927906174543, "grad_norm": 1.1283924579620361, "learning_rate": 9.99984567801421e-05, "loss": 10.2485, "step": 107 }, { "epoch": 0.03725422559503277, "grad_norm": 1.1589891910552979, "learning_rate": 9.999798436907395e-05, "loss": 10.2481, "step": 108 }, { "epoch": 0.03759917212832011, "grad_norm": 1.15898597240448, "learning_rate": 9.999744897166201e-05, "loss": 10.2464, "step": 109 }, { "epoch": 0.03794411866160745, "grad_norm": 1.2533750534057617, "learning_rate": 9.999685058858075e-05, "loss": 10.2044, "step": 110 }, { "epoch": 0.03828906519489479, "grad_norm": 1.2371543645858765, "learning_rate": 9.999618922058402e-05, "loss": 10.2436, "step": 111 }, { "epoch": 0.03863401172818213, "grad_norm": 1.1409327983856201, "learning_rate": 9.9995464868505e-05, "loss": 10.2129, "step": 112 }, { "epoch": 0.03897895826146947, "grad_norm": 1.3030234575271606, "learning_rate": 9.99946775332562e-05, "loss": 10.2095, "step": 113 }, { "epoch": 0.039323904794756816, "grad_norm": 1.194814682006836, "learning_rate": 9.999382721582948e-05, "loss": 10.2059, "step": 114 }, { "epoch": 0.03966885132804415, "grad_norm": 1.40117347240448, "learning_rate": 9.999291391729606e-05, "loss": 10.1761, "step": 115 }, { "epoch": 0.040013797861331496, "grad_norm": 1.1784946918487549, "learning_rate": 9.99919376388065e-05, "loss": 10.2012, "step": 116 }, { "epoch": 0.04035874439461883, "grad_norm": 1.2370185852050781, "learning_rate": 9.999089838159066e-05, "loss": 10.1933, "step": 117 }, { "epoch": 0.040703690927906176, "grad_norm": 1.2744057178497314, "learning_rate": 9.998979614695783e-05, "loss": 10.1672, "step": 118 }, { "epoch": 0.04104863746119351, "grad_norm": 1.2865217924118042, "learning_rate": 9.998863093629654e-05, "loss": 10.1898, "step": 119 }, { "epoch": 0.041393583994480856, "grad_norm": 1.2423728704452515, "learning_rate": 9.99874027510747e-05, "loss": 10.1716, "step": 120 }, { "epoch": 0.04173853052776819, "grad_norm": 1.341609001159668, "learning_rate": 9.998611159283954e-05, "loss": 10.1581, "step": 121 }, { "epoch": 0.04208347706105554, "grad_norm": 1.2851861715316772, "learning_rate": 9.998475746321769e-05, "loss": 10.1906, "step": 122 }, { "epoch": 0.04242842359434288, "grad_norm": 1.1925376653671265, "learning_rate": 9.998334036391498e-05, "loss": 10.1961, "step": 123 }, { "epoch": 0.04277337012763022, "grad_norm": 1.3623324632644653, "learning_rate": 9.99818602967167e-05, "loss": 10.1642, "step": 124 }, { "epoch": 0.04311831666091756, "grad_norm": 1.184838056564331, "learning_rate": 9.998031726348736e-05, "loss": 10.1793, "step": 125 }, { "epoch": 0.0434632631942049, "grad_norm": 1.369837760925293, "learning_rate": 9.997871126617085e-05, "loss": 10.1514, "step": 126 }, { "epoch": 0.04380820972749224, "grad_norm": 1.281457543373108, "learning_rate": 9.997704230679036e-05, "loss": 10.163, "step": 127 }, { "epoch": 0.04415315626077958, "grad_norm": 1.3965054750442505, "learning_rate": 9.997531038744845e-05, "loss": 10.1429, "step": 128 }, { "epoch": 0.04449810279406692, "grad_norm": 1.2722357511520386, "learning_rate": 9.997351551032692e-05, "loss": 10.138, "step": 129 }, { "epoch": 0.04484304932735426, "grad_norm": 1.3136990070343018, "learning_rate": 9.997165767768692e-05, "loss": 10.1314, "step": 130 }, { "epoch": 0.0451879958606416, "grad_norm": 1.252399206161499, "learning_rate": 9.99697368918689e-05, "loss": 10.1475, "step": 131 }, { "epoch": 0.04553294239392894, "grad_norm": 1.3466100692749023, "learning_rate": 9.996775315529265e-05, "loss": 10.1451, "step": 132 }, { "epoch": 0.04587788892721628, "grad_norm": 1.4946643114089966, "learning_rate": 9.99657064704572e-05, "loss": 10.12, "step": 133 }, { "epoch": 0.046222835460503625, "grad_norm": 1.42206609249115, "learning_rate": 9.996359683994096e-05, "loss": 10.1132, "step": 134 }, { "epoch": 0.04656778199379096, "grad_norm": 1.3316107988357544, "learning_rate": 9.996142426640155e-05, "loss": 10.1409, "step": 135 }, { "epoch": 0.046912728527078305, "grad_norm": 1.3772598505020142, "learning_rate": 9.995918875257594e-05, "loss": 10.1151, "step": 136 }, { "epoch": 0.04725767506036564, "grad_norm": 1.3657079935073853, "learning_rate": 9.995689030128041e-05, "loss": 10.1275, "step": 137 }, { "epoch": 0.047602621593652986, "grad_norm": 1.3499964475631714, "learning_rate": 9.995452891541048e-05, "loss": 10.1198, "step": 138 }, { "epoch": 0.04794756812694032, "grad_norm": 1.3011778593063354, "learning_rate": 9.995210459794098e-05, "loss": 10.1099, "step": 139 }, { "epoch": 0.048292514660227666, "grad_norm": 1.407361626625061, "learning_rate": 9.994961735192598e-05, "loss": 10.1072, "step": 140 }, { "epoch": 0.048637461193515, "grad_norm": 1.399038553237915, "learning_rate": 9.994706718049889e-05, "loss": 10.1009, "step": 141 }, { "epoch": 0.048982407726802346, "grad_norm": 1.3892508745193481, "learning_rate": 9.994445408687236e-05, "loss": 10.1054, "step": 142 }, { "epoch": 0.04932735426008968, "grad_norm": 1.4403347969055176, "learning_rate": 9.994177807433826e-05, "loss": 10.0781, "step": 143 }, { "epoch": 0.049672300793377026, "grad_norm": 1.472110629081726, "learning_rate": 9.993903914626783e-05, "loss": 10.0767, "step": 144 }, { "epoch": 0.05001724732666437, "grad_norm": 1.4474997520446777, "learning_rate": 9.993623730611147e-05, "loss": 10.0908, "step": 145 }, { "epoch": 0.05036219385995171, "grad_norm": 1.4426510334014893, "learning_rate": 9.993337255739892e-05, "loss": 10.0792, "step": 146 }, { "epoch": 0.05070714039323905, "grad_norm": 1.5582860708236694, "learning_rate": 9.993044490373907e-05, "loss": 10.0618, "step": 147 }, { "epoch": 0.05105208692652639, "grad_norm": 1.5666154623031616, "learning_rate": 9.992745434882014e-05, "loss": 10.0664, "step": 148 }, { "epoch": 0.05139703345981373, "grad_norm": 1.5812947750091553, "learning_rate": 9.992440089640958e-05, "loss": 10.0604, "step": 149 }, { "epoch": 0.05174197999310107, "grad_norm": 1.7215590476989746, "learning_rate": 9.992128455035403e-05, "loss": 10.0412, "step": 150 }, { "epoch": 0.05208692652638841, "grad_norm": 0.9909889101982117, "learning_rate": 9.991810531457943e-05, "loss": 10.1461, "step": 151 }, { "epoch": 0.05243187305967575, "grad_norm": 1.0242069959640503, "learning_rate": 9.991486319309086e-05, "loss": 10.1037, "step": 152 }, { "epoch": 0.05277681959296309, "grad_norm": 1.1299318075180054, "learning_rate": 9.991155818997273e-05, "loss": 10.1342, "step": 153 }, { "epoch": 0.053121766126250435, "grad_norm": 1.19455087184906, "learning_rate": 9.990819030938856e-05, "loss": 10.1224, "step": 154 }, { "epoch": 0.05346671265953777, "grad_norm": 1.1440625190734863, "learning_rate": 9.990475955558116e-05, "loss": 10.1076, "step": 155 }, { "epoch": 0.053811659192825115, "grad_norm": 1.1086006164550781, "learning_rate": 9.990126593287252e-05, "loss": 10.1221, "step": 156 }, { "epoch": 0.05415660572611245, "grad_norm": 1.0412251949310303, "learning_rate": 9.989770944566379e-05, "loss": 10.1014, "step": 157 }, { "epoch": 0.054501552259399795, "grad_norm": 1.1727021932601929, "learning_rate": 9.98940900984354e-05, "loss": 10.0921, "step": 158 }, { "epoch": 0.05484649879268713, "grad_norm": 1.1224405765533447, "learning_rate": 9.989040789574689e-05, "loss": 10.1181, "step": 159 }, { "epoch": 0.055191445325974475, "grad_norm": 1.233102798461914, "learning_rate": 9.988666284223703e-05, "loss": 10.0801, "step": 160 }, { "epoch": 0.05553639185926181, "grad_norm": 1.0544770956039429, "learning_rate": 9.988285494262377e-05, "loss": 10.1011, "step": 161 }, { "epoch": 0.055881338392549156, "grad_norm": 1.1918315887451172, "learning_rate": 9.987898420170418e-05, "loss": 10.0912, "step": 162 }, { "epoch": 0.05622628492583649, "grad_norm": 1.2117809057235718, "learning_rate": 9.987505062435455e-05, "loss": 10.0917, "step": 163 }, { "epoch": 0.056571231459123836, "grad_norm": 1.2464489936828613, "learning_rate": 9.987105421553032e-05, "loss": 10.0741, "step": 164 }, { "epoch": 0.05691617799241118, "grad_norm": 1.2690608501434326, "learning_rate": 9.986699498026609e-05, "loss": 10.0735, "step": 165 }, { "epoch": 0.057261124525698516, "grad_norm": 1.3744335174560547, "learning_rate": 9.986287292367555e-05, "loss": 10.0801, "step": 166 }, { "epoch": 0.05760607105898586, "grad_norm": 1.2143759727478027, "learning_rate": 9.985868805095163e-05, "loss": 10.1026, "step": 167 }, { "epoch": 0.057951017592273196, "grad_norm": 1.1724203824996948, "learning_rate": 9.985444036736628e-05, "loss": 10.0753, "step": 168 }, { "epoch": 0.05829596412556054, "grad_norm": 1.229285717010498, "learning_rate": 9.985012987827068e-05, "loss": 10.0444, "step": 169 }, { "epoch": 0.058640910658847877, "grad_norm": 1.280335545539856, "learning_rate": 9.984575658909508e-05, "loss": 10.0446, "step": 170 }, { "epoch": 0.05898585719213522, "grad_norm": 1.1907861232757568, "learning_rate": 9.984132050534885e-05, "loss": 10.0792, "step": 171 }, { "epoch": 0.05933080372542256, "grad_norm": 1.2323590517044067, "learning_rate": 9.983682163262044e-05, "loss": 10.0562, "step": 172 }, { "epoch": 0.0596757502587099, "grad_norm": 1.2697149515151978, "learning_rate": 9.983225997657749e-05, "loss": 10.0437, "step": 173 }, { "epoch": 0.06002069679199724, "grad_norm": 1.255582332611084, "learning_rate": 9.982763554296662e-05, "loss": 10.0482, "step": 174 }, { "epoch": 0.06036564332528458, "grad_norm": 1.2350727319717407, "learning_rate": 9.98229483376136e-05, "loss": 10.0387, "step": 175 }, { "epoch": 0.060710589858571924, "grad_norm": 1.3043851852416992, "learning_rate": 9.98181983664233e-05, "loss": 10.0554, "step": 176 }, { "epoch": 0.06105553639185926, "grad_norm": 1.152021884918213, "learning_rate": 9.981338563537959e-05, "loss": 10.0452, "step": 177 }, { "epoch": 0.061400482925146604, "grad_norm": 1.3231796026229858, "learning_rate": 9.980851015054543e-05, "loss": 10.036, "step": 178 }, { "epoch": 0.06174542945843394, "grad_norm": 1.3968857526779175, "learning_rate": 9.980357191806288e-05, "loss": 10.0244, "step": 179 }, { "epoch": 0.062090375991721285, "grad_norm": 1.324229121208191, "learning_rate": 9.979857094415301e-05, "loss": 10.0357, "step": 180 }, { "epoch": 0.06243532252500862, "grad_norm": 1.4936314821243286, "learning_rate": 9.979350723511594e-05, "loss": 10.0231, "step": 181 }, { "epoch": 0.06278026905829596, "grad_norm": 1.2559239864349365, "learning_rate": 9.978838079733078e-05, "loss": 10.0112, "step": 182 }, { "epoch": 0.06312521559158331, "grad_norm": 1.2411177158355713, "learning_rate": 9.978319163725574e-05, "loss": 10.0321, "step": 183 }, { "epoch": 0.06347016212487064, "grad_norm": 1.3505336046218872, "learning_rate": 9.9777939761428e-05, "loss": 10.0298, "step": 184 }, { "epoch": 0.06381510865815798, "grad_norm": 1.369786024093628, "learning_rate": 9.977262517646373e-05, "loss": 10.0121, "step": 185 }, { "epoch": 0.06416005519144533, "grad_norm": 1.348097801208496, "learning_rate": 9.976724788905817e-05, "loss": 10.0134, "step": 186 }, { "epoch": 0.06450500172473267, "grad_norm": 1.419002890586853, "learning_rate": 9.976180790598549e-05, "loss": 10.0246, "step": 187 }, { "epoch": 0.06484994825802001, "grad_norm": 1.3383148908615112, "learning_rate": 9.975630523409882e-05, "loss": 9.9983, "step": 188 }, { "epoch": 0.06519489479130734, "grad_norm": 1.3767681121826172, "learning_rate": 9.975073988033037e-05, "loss": 9.961, "step": 189 }, { "epoch": 0.06553984132459469, "grad_norm": 1.250262975692749, "learning_rate": 9.97451118516912e-05, "loss": 9.9955, "step": 190 }, { "epoch": 0.06588478785788203, "grad_norm": 1.399427056312561, "learning_rate": 9.973942115527137e-05, "loss": 10.0077, "step": 191 }, { "epoch": 0.06622973439116937, "grad_norm": 1.5137879848480225, "learning_rate": 9.973366779823994e-05, "loss": 9.9779, "step": 192 }, { "epoch": 0.0665746809244567, "grad_norm": 1.487107515335083, "learning_rate": 9.972785178784482e-05, "loss": 9.963, "step": 193 }, { "epoch": 0.06691962745774405, "grad_norm": 1.4277721643447876, "learning_rate": 9.972197313141291e-05, "loss": 9.9667, "step": 194 }, { "epoch": 0.06726457399103139, "grad_norm": 1.3553154468536377, "learning_rate": 9.971603183634998e-05, "loss": 9.977, "step": 195 }, { "epoch": 0.06760952052431873, "grad_norm": 1.3985073566436768, "learning_rate": 9.971002791014078e-05, "loss": 9.9725, "step": 196 }, { "epoch": 0.06795446705760608, "grad_norm": 1.6270498037338257, "learning_rate": 9.97039613603489e-05, "loss": 9.9501, "step": 197 }, { "epoch": 0.0682994135908934, "grad_norm": 1.6373891830444336, "learning_rate": 9.969783219461685e-05, "loss": 9.8966, "step": 198 }, { "epoch": 0.06864436012418075, "grad_norm": 1.5864276885986328, "learning_rate": 9.969164042066603e-05, "loss": 9.9174, "step": 199 }, { "epoch": 0.0689893066574681, "grad_norm": 1.659999966621399, "learning_rate": 9.968538604629667e-05, "loss": 9.9173, "step": 200 }, { "epoch": 0.06933425319075544, "grad_norm": 0.975853681564331, "learning_rate": 9.96790690793879e-05, "loss": 10.0666, "step": 201 }, { "epoch": 0.06967919972404277, "grad_norm": 1.065144658088684, "learning_rate": 9.96726895278977e-05, "loss": 10.0229, "step": 202 }, { "epoch": 0.07002414625733011, "grad_norm": 1.06117844581604, "learning_rate": 9.966624739986289e-05, "loss": 10.0301, "step": 203 }, { "epoch": 0.07036909279061745, "grad_norm": 1.0030990839004517, "learning_rate": 9.965974270339911e-05, "loss": 10.0408, "step": 204 }, { "epoch": 0.0707140393239048, "grad_norm": 1.122992753982544, "learning_rate": 9.965317544670083e-05, "loss": 10.0107, "step": 205 }, { "epoch": 0.07105898585719214, "grad_norm": 1.028238296508789, "learning_rate": 9.964654563804134e-05, "loss": 10.0143, "step": 206 }, { "epoch": 0.07140393239047947, "grad_norm": 1.094612717628479, "learning_rate": 9.963985328577273e-05, "loss": 9.9905, "step": 207 }, { "epoch": 0.07174887892376682, "grad_norm": 1.083109736442566, "learning_rate": 9.963309839832586e-05, "loss": 9.9978, "step": 208 }, { "epoch": 0.07209382545705416, "grad_norm": 1.2037516832351685, "learning_rate": 9.962628098421041e-05, "loss": 9.985, "step": 209 }, { "epoch": 0.0724387719903415, "grad_norm": 1.1501799821853638, "learning_rate": 9.961940105201477e-05, "loss": 9.9767, "step": 210 }, { "epoch": 0.07278371852362883, "grad_norm": 1.1958119869232178, "learning_rate": 9.961245861040617e-05, "loss": 10.0039, "step": 211 }, { "epoch": 0.07312866505691618, "grad_norm": 1.1737613677978516, "learning_rate": 9.96054536681305e-05, "loss": 10.0003, "step": 212 }, { "epoch": 0.07347361159020352, "grad_norm": 1.246880054473877, "learning_rate": 9.959838623401246e-05, "loss": 9.9803, "step": 213 }, { "epoch": 0.07381855812349086, "grad_norm": 1.1679513454437256, "learning_rate": 9.959125631695543e-05, "loss": 9.9684, "step": 214 }, { "epoch": 0.07416350465677819, "grad_norm": 1.2343090772628784, "learning_rate": 9.958406392594153e-05, "loss": 10.013, "step": 215 }, { "epoch": 0.07450845119006554, "grad_norm": 1.1205172538757324, "learning_rate": 9.957680907003155e-05, "loss": 10.0095, "step": 216 }, { "epoch": 0.07485339772335288, "grad_norm": 1.1602991819381714, "learning_rate": 9.956949175836503e-05, "loss": 9.9666, "step": 217 }, { "epoch": 0.07519834425664022, "grad_norm": 1.2786660194396973, "learning_rate": 9.956211200016013e-05, "loss": 9.9578, "step": 218 }, { "epoch": 0.07554329078992757, "grad_norm": 1.2090095281600952, "learning_rate": 9.955466980471373e-05, "loss": 9.9574, "step": 219 }, { "epoch": 0.0758882373232149, "grad_norm": 1.303739070892334, "learning_rate": 9.95471651814013e-05, "loss": 9.9349, "step": 220 }, { "epoch": 0.07623318385650224, "grad_norm": 1.188925862312317, "learning_rate": 9.953959813967704e-05, "loss": 9.9513, "step": 221 }, { "epoch": 0.07657813038978958, "grad_norm": 1.3651727437973022, "learning_rate": 9.953196868907373e-05, "loss": 9.9574, "step": 222 }, { "epoch": 0.07692307692307693, "grad_norm": 1.1526354551315308, "learning_rate": 9.952427683920276e-05, "loss": 9.9819, "step": 223 }, { "epoch": 0.07726802345636426, "grad_norm": 1.3217377662658691, "learning_rate": 9.951652259975418e-05, "loss": 9.961, "step": 224 }, { "epoch": 0.0776129699896516, "grad_norm": 1.2583246231079102, "learning_rate": 9.950870598049657e-05, "loss": 9.9224, "step": 225 }, { "epoch": 0.07795791652293894, "grad_norm": 1.2618720531463623, "learning_rate": 9.950082699127717e-05, "loss": 9.972, "step": 226 }, { "epoch": 0.07830286305622629, "grad_norm": 1.329683542251587, "learning_rate": 9.949288564202172e-05, "loss": 9.9146, "step": 227 }, { "epoch": 0.07864780958951363, "grad_norm": 1.3133656978607178, "learning_rate": 9.948488194273458e-05, "loss": 9.9109, "step": 228 }, { "epoch": 0.07899275612280096, "grad_norm": 1.4228748083114624, "learning_rate": 9.947681590349863e-05, "loss": 9.9242, "step": 229 }, { "epoch": 0.0793377026560883, "grad_norm": 1.2924089431762695, "learning_rate": 9.946868753447529e-05, "loss": 9.9271, "step": 230 }, { "epoch": 0.07968264918937565, "grad_norm": 1.463861107826233, "learning_rate": 9.946049684590448e-05, "loss": 9.8871, "step": 231 }, { "epoch": 0.08002759572266299, "grad_norm": 1.2419488430023193, "learning_rate": 9.945224384810465e-05, "loss": 9.9425, "step": 232 }, { "epoch": 0.08037254225595032, "grad_norm": 1.4464517831802368, "learning_rate": 9.944392855147275e-05, "loss": 9.8807, "step": 233 }, { "epoch": 0.08071748878923767, "grad_norm": 1.4393879175186157, "learning_rate": 9.94355509664842e-05, "loss": 9.9006, "step": 234 }, { "epoch": 0.08106243532252501, "grad_norm": 1.3730369806289673, "learning_rate": 9.942711110369292e-05, "loss": 9.9252, "step": 235 }, { "epoch": 0.08140738185581235, "grad_norm": 1.5222467184066772, "learning_rate": 9.941860897373121e-05, "loss": 9.8747, "step": 236 }, { "epoch": 0.0817523283890997, "grad_norm": 1.516811728477478, "learning_rate": 9.94100445873099e-05, "loss": 9.8623, "step": 237 }, { "epoch": 0.08209727492238703, "grad_norm": 1.3974412679672241, "learning_rate": 9.94014179552182e-05, "loss": 9.8805, "step": 238 }, { "epoch": 0.08244222145567437, "grad_norm": 1.4104135036468506, "learning_rate": 9.939272908832376e-05, "loss": 9.8955, "step": 239 }, { "epoch": 0.08278716798896171, "grad_norm": 1.3622193336486816, "learning_rate": 9.938397799757261e-05, "loss": 9.8692, "step": 240 }, { "epoch": 0.08313211452224906, "grad_norm": 1.4380640983581543, "learning_rate": 9.937516469398918e-05, "loss": 9.8608, "step": 241 }, { "epoch": 0.08347706105553639, "grad_norm": 1.483670711517334, "learning_rate": 9.936628918867627e-05, "loss": 9.8679, "step": 242 }, { "epoch": 0.08382200758882373, "grad_norm": 1.5402473211288452, "learning_rate": 9.935735149281504e-05, "loss": 9.8424, "step": 243 }, { "epoch": 0.08416695412211107, "grad_norm": 1.4765286445617676, "learning_rate": 9.934835161766502e-05, "loss": 9.8672, "step": 244 }, { "epoch": 0.08451190065539842, "grad_norm": 1.4256876707077026, "learning_rate": 9.933928957456404e-05, "loss": 9.8916, "step": 245 }, { "epoch": 0.08485684718868576, "grad_norm": 1.4095759391784668, "learning_rate": 9.933016537492826e-05, "loss": 9.8809, "step": 246 }, { "epoch": 0.08520179372197309, "grad_norm": 1.64491868019104, "learning_rate": 9.932097903025212e-05, "loss": 9.8161, "step": 247 }, { "epoch": 0.08554674025526043, "grad_norm": 1.608772873878479, "learning_rate": 9.93117305521084e-05, "loss": 9.8112, "step": 248 }, { "epoch": 0.08589168678854778, "grad_norm": 1.500699758529663, "learning_rate": 9.930241995214814e-05, "loss": 9.8622, "step": 249 }, { "epoch": 0.08623663332183512, "grad_norm": 1.723171591758728, "learning_rate": 9.929304724210058e-05, "loss": 9.7795, "step": 250 }, { "epoch": 0.08658157985512245, "grad_norm": 1.044055700302124, "learning_rate": 9.928361243377328e-05, "loss": 9.9278, "step": 251 }, { "epoch": 0.0869265263884098, "grad_norm": 0.9544603824615479, "learning_rate": 9.927411553905201e-05, "loss": 9.9693, "step": 252 }, { "epoch": 0.08727147292169714, "grad_norm": 1.0985561609268188, "learning_rate": 9.926455656990073e-05, "loss": 9.9014, "step": 253 }, { "epoch": 0.08761641945498448, "grad_norm": 1.0646491050720215, "learning_rate": 9.92549355383616e-05, "loss": 9.9267, "step": 254 }, { "epoch": 0.08796136598827181, "grad_norm": 1.0615615844726562, "learning_rate": 9.924525245655501e-05, "loss": 9.931, "step": 255 }, { "epoch": 0.08830631252155915, "grad_norm": 1.125076413154602, "learning_rate": 9.923550733667948e-05, "loss": 9.9225, "step": 256 }, { "epoch": 0.0886512590548465, "grad_norm": 1.1765639781951904, "learning_rate": 9.92257001910117e-05, "loss": 9.8822, "step": 257 }, { "epoch": 0.08899620558813384, "grad_norm": 1.0968784093856812, "learning_rate": 9.921583103190648e-05, "loss": 9.9152, "step": 258 }, { "epoch": 0.08934115212142119, "grad_norm": 1.183980941772461, "learning_rate": 9.920589987179676e-05, "loss": 9.9258, "step": 259 }, { "epoch": 0.08968609865470852, "grad_norm": 1.1285295486450195, "learning_rate": 9.919590672319361e-05, "loss": 9.8847, "step": 260 }, { "epoch": 0.09003104518799586, "grad_norm": 1.0491220951080322, "learning_rate": 9.918585159868617e-05, "loss": 9.9073, "step": 261 }, { "epoch": 0.0903759917212832, "grad_norm": 1.3533495664596558, "learning_rate": 9.917573451094168e-05, "loss": 9.8524, "step": 262 }, { "epoch": 0.09072093825457055, "grad_norm": 1.210127592086792, "learning_rate": 9.916555547270538e-05, "loss": 9.8654, "step": 263 }, { "epoch": 0.09106588478785788, "grad_norm": 1.2345919609069824, "learning_rate": 9.915531449680063e-05, "loss": 9.8847, "step": 264 }, { "epoch": 0.09141083132114522, "grad_norm": 1.298925757408142, "learning_rate": 9.914501159612876e-05, "loss": 9.8865, "step": 265 }, { "epoch": 0.09175577785443256, "grad_norm": 1.2874445915222168, "learning_rate": 9.913464678366917e-05, "loss": 9.8412, "step": 266 }, { "epoch": 0.0921007243877199, "grad_norm": 1.1336477994918823, "learning_rate": 9.912422007247917e-05, "loss": 9.8807, "step": 267 }, { "epoch": 0.09244567092100725, "grad_norm": 1.3368065357208252, "learning_rate": 9.911373147569414e-05, "loss": 9.8182, "step": 268 }, { "epoch": 0.09279061745429458, "grad_norm": 1.2617908716201782, "learning_rate": 9.910318100652736e-05, "loss": 9.8705, "step": 269 }, { "epoch": 0.09313556398758192, "grad_norm": 1.1415497064590454, "learning_rate": 9.909256867827006e-05, "loss": 9.8783, "step": 270 }, { "epoch": 0.09348051052086927, "grad_norm": 1.3616750240325928, "learning_rate": 9.908189450429143e-05, "loss": 9.8499, "step": 271 }, { "epoch": 0.09382545705415661, "grad_norm": 1.3078230619430542, "learning_rate": 9.907115849803859e-05, "loss": 9.8458, "step": 272 }, { "epoch": 0.09417040358744394, "grad_norm": 1.245226502418518, "learning_rate": 9.906036067303646e-05, "loss": 9.8787, "step": 273 }, { "epoch": 0.09451535012073128, "grad_norm": 1.316032886505127, "learning_rate": 9.904950104288793e-05, "loss": 9.8522, "step": 274 }, { "epoch": 0.09486029665401863, "grad_norm": 1.544091820716858, "learning_rate": 9.903857962127372e-05, "loss": 9.822, "step": 275 }, { "epoch": 0.09520524318730597, "grad_norm": 1.240692138671875, "learning_rate": 9.902759642195239e-05, "loss": 9.8909, "step": 276 }, { "epoch": 0.09555018972059331, "grad_norm": 1.4124935865402222, "learning_rate": 9.901655145876034e-05, "loss": 9.8022, "step": 277 }, { "epoch": 0.09589513625388064, "grad_norm": 1.238968849182129, "learning_rate": 9.900544474561174e-05, "loss": 9.8938, "step": 278 }, { "epoch": 0.09624008278716799, "grad_norm": 1.2817027568817139, "learning_rate": 9.89942762964986e-05, "loss": 9.8653, "step": 279 }, { "epoch": 0.09658502932045533, "grad_norm": 1.2441328763961792, "learning_rate": 9.898304612549067e-05, "loss": 9.8592, "step": 280 }, { "epoch": 0.09692997585374268, "grad_norm": 1.3548439741134644, "learning_rate": 9.89717542467355e-05, "loss": 9.8568, "step": 281 }, { "epoch": 0.09727492238703, "grad_norm": 1.3438184261322021, "learning_rate": 9.896040067445831e-05, "loss": 9.8484, "step": 282 }, { "epoch": 0.09761986892031735, "grad_norm": 1.4415910243988037, "learning_rate": 9.894898542296209e-05, "loss": 9.7968, "step": 283 }, { "epoch": 0.09796481545360469, "grad_norm": 1.273112177848816, "learning_rate": 9.893750850662752e-05, "loss": 9.8928, "step": 284 }, { "epoch": 0.09830976198689204, "grad_norm": 1.429869294166565, "learning_rate": 9.892596993991297e-05, "loss": 9.8333, "step": 285 }, { "epoch": 0.09865470852017937, "grad_norm": 1.5786771774291992, "learning_rate": 9.891436973735444e-05, "loss": 9.8043, "step": 286 }, { "epoch": 0.09899965505346671, "grad_norm": 1.3031753301620483, "learning_rate": 9.890270791356564e-05, "loss": 9.837, "step": 287 }, { "epoch": 0.09934460158675405, "grad_norm": 1.3726156949996948, "learning_rate": 9.889098448323786e-05, "loss": 9.8427, "step": 288 }, { "epoch": 0.0996895481200414, "grad_norm": 1.4839277267456055, "learning_rate": 9.887919946114001e-05, "loss": 9.7975, "step": 289 }, { "epoch": 0.10003449465332874, "grad_norm": 1.5329991579055786, "learning_rate": 9.886735286211861e-05, "loss": 9.7854, "step": 290 }, { "epoch": 0.10037944118661607, "grad_norm": 1.3865573406219482, "learning_rate": 9.885544470109774e-05, "loss": 9.7865, "step": 291 }, { "epoch": 0.10072438771990341, "grad_norm": 1.2633044719696045, "learning_rate": 9.884347499307902e-05, "loss": 9.8258, "step": 292 }, { "epoch": 0.10106933425319076, "grad_norm": 1.5327684879302979, "learning_rate": 9.883144375314164e-05, "loss": 9.7834, "step": 293 }, { "epoch": 0.1014142807864781, "grad_norm": 1.544948697090149, "learning_rate": 9.881935099644228e-05, "loss": 9.7626, "step": 294 }, { "epoch": 0.10175922731976543, "grad_norm": 1.3965122699737549, "learning_rate": 9.880719673821512e-05, "loss": 9.7893, "step": 295 }, { "epoch": 0.10210417385305277, "grad_norm": 1.5461406707763672, "learning_rate": 9.879498099377183e-05, "loss": 9.7662, "step": 296 }, { "epoch": 0.10244912038634012, "grad_norm": 1.5404688119888306, "learning_rate": 9.878270377850153e-05, "loss": 9.7516, "step": 297 }, { "epoch": 0.10279406691962746, "grad_norm": 1.6081411838531494, "learning_rate": 9.877036510787077e-05, "loss": 9.7487, "step": 298 }, { "epoch": 0.1031390134529148, "grad_norm": 1.6868000030517578, "learning_rate": 9.875796499742355e-05, "loss": 9.7492, "step": 299 }, { "epoch": 0.10348395998620213, "grad_norm": 1.6842550039291382, "learning_rate": 9.874550346278123e-05, "loss": 9.7523, "step": 300 }, { "epoch": 0.10382890651948948, "grad_norm": 0.9228402376174927, "learning_rate": 9.873298051964261e-05, "loss": 9.8752, "step": 301 }, { "epoch": 0.10417385305277682, "grad_norm": 1.069778561592102, "learning_rate": 9.872039618378378e-05, "loss": 9.8213, "step": 302 }, { "epoch": 0.10451879958606416, "grad_norm": 1.0447841882705688, "learning_rate": 9.870775047105822e-05, "loss": 9.8148, "step": 303 }, { "epoch": 0.1048637461193515, "grad_norm": 1.022067666053772, "learning_rate": 9.869504339739671e-05, "loss": 9.8546, "step": 304 }, { "epoch": 0.10520869265263884, "grad_norm": 1.168174386024475, "learning_rate": 9.868227497880733e-05, "loss": 9.8186, "step": 305 }, { "epoch": 0.10555363918592618, "grad_norm": 1.1317428350448608, "learning_rate": 9.866944523137546e-05, "loss": 9.8244, "step": 306 }, { "epoch": 0.10589858571921353, "grad_norm": 1.1827465295791626, "learning_rate": 9.865655417126374e-05, "loss": 9.8549, "step": 307 }, { "epoch": 0.10624353225250087, "grad_norm": 1.0269920825958252, "learning_rate": 9.864360181471202e-05, "loss": 9.8324, "step": 308 }, { "epoch": 0.1065884787857882, "grad_norm": 1.16238272190094, "learning_rate": 9.86305881780374e-05, "loss": 9.8568, "step": 309 }, { "epoch": 0.10693342531907554, "grad_norm": 1.1754971742630005, "learning_rate": 9.861751327763415e-05, "loss": 9.8146, "step": 310 }, { "epoch": 0.10727837185236289, "grad_norm": 1.247070550918579, "learning_rate": 9.860437712997378e-05, "loss": 9.8329, "step": 311 }, { "epoch": 0.10762331838565023, "grad_norm": 1.2488677501678467, "learning_rate": 9.859117975160488e-05, "loss": 9.7842, "step": 312 }, { "epoch": 0.10796826491893756, "grad_norm": 1.2628960609436035, "learning_rate": 9.85779211591532e-05, "loss": 9.8005, "step": 313 }, { "epoch": 0.1083132114522249, "grad_norm": 1.1880953311920166, "learning_rate": 9.856460136932166e-05, "loss": 9.8051, "step": 314 }, { "epoch": 0.10865815798551225, "grad_norm": 1.2296935319900513, "learning_rate": 9.85512203988902e-05, "loss": 9.7838, "step": 315 }, { "epoch": 0.10900310451879959, "grad_norm": 1.2373672723770142, "learning_rate": 9.853777826471589e-05, "loss": 9.7733, "step": 316 }, { "epoch": 0.10934805105208692, "grad_norm": 1.2010676860809326, "learning_rate": 9.852427498373283e-05, "loss": 9.8074, "step": 317 }, { "epoch": 0.10969299758537426, "grad_norm": 1.1413911581039429, "learning_rate": 9.851071057295213e-05, "loss": 9.8098, "step": 318 }, { "epoch": 0.11003794411866161, "grad_norm": 1.1600428819656372, "learning_rate": 9.849708504946193e-05, "loss": 9.8, "step": 319 }, { "epoch": 0.11038289065194895, "grad_norm": 1.2346547842025757, "learning_rate": 9.84833984304274e-05, "loss": 9.7597, "step": 320 }, { "epoch": 0.1107278371852363, "grad_norm": 1.3820494413375854, "learning_rate": 9.84696507330906e-05, "loss": 9.749, "step": 321 }, { "epoch": 0.11107278371852362, "grad_norm": 1.233407735824585, "learning_rate": 9.845584197477058e-05, "loss": 9.7642, "step": 322 }, { "epoch": 0.11141773025181097, "grad_norm": 1.3942359685897827, "learning_rate": 9.844197217286332e-05, "loss": 9.7944, "step": 323 }, { "epoch": 0.11176267678509831, "grad_norm": 1.2736047506332397, "learning_rate": 9.842804134484165e-05, "loss": 9.7716, "step": 324 }, { "epoch": 0.11210762331838565, "grad_norm": 1.3517143726348877, "learning_rate": 9.841404950825537e-05, "loss": 9.812, "step": 325 }, { "epoch": 0.11245256985167298, "grad_norm": 1.2531120777130127, "learning_rate": 9.839999668073105e-05, "loss": 9.7383, "step": 326 }, { "epoch": 0.11279751638496033, "grad_norm": 1.3106811046600342, "learning_rate": 9.838588287997212e-05, "loss": 9.7353, "step": 327 }, { "epoch": 0.11314246291824767, "grad_norm": 1.3290574550628662, "learning_rate": 9.837170812375885e-05, "loss": 9.7201, "step": 328 }, { "epoch": 0.11348740945153502, "grad_norm": 1.5268999338150024, "learning_rate": 9.835747242994828e-05, "loss": 9.7307, "step": 329 }, { "epoch": 0.11383235598482236, "grad_norm": 1.3327301740646362, "learning_rate": 9.83431758164742e-05, "loss": 9.7123, "step": 330 }, { "epoch": 0.11417730251810969, "grad_norm": 1.3089390993118286, "learning_rate": 9.832881830134719e-05, "loss": 9.7895, "step": 331 }, { "epoch": 0.11452224905139703, "grad_norm": 1.3884986639022827, "learning_rate": 9.83143999026545e-05, "loss": 9.7379, "step": 332 }, { "epoch": 0.11486719558468438, "grad_norm": 1.3758727312088013, "learning_rate": 9.829992063856013e-05, "loss": 9.7225, "step": 333 }, { "epoch": 0.11521214211797172, "grad_norm": 1.4772086143493652, "learning_rate": 9.828538052730473e-05, "loss": 9.7427, "step": 334 }, { "epoch": 0.11555708865125905, "grad_norm": 1.4043651819229126, "learning_rate": 9.82707795872056e-05, "loss": 9.7053, "step": 335 }, { "epoch": 0.11590203518454639, "grad_norm": 1.281566858291626, "learning_rate": 9.825611783665667e-05, "loss": 9.7229, "step": 336 }, { "epoch": 0.11624698171783374, "grad_norm": 1.4384312629699707, "learning_rate": 9.824139529412851e-05, "loss": 9.7285, "step": 337 }, { "epoch": 0.11659192825112108, "grad_norm": 1.391280174255371, "learning_rate": 9.822661197816823e-05, "loss": 9.7337, "step": 338 }, { "epoch": 0.11693687478440842, "grad_norm": 1.409494400024414, "learning_rate": 9.821176790739952e-05, "loss": 9.7505, "step": 339 }, { "epoch": 0.11728182131769575, "grad_norm": 1.260730266571045, "learning_rate": 9.819686310052263e-05, "loss": 9.7856, "step": 340 }, { "epoch": 0.1176267678509831, "grad_norm": 1.4997591972351074, "learning_rate": 9.818189757631429e-05, "loss": 9.6875, "step": 341 }, { "epoch": 0.11797171438427044, "grad_norm": 1.4658291339874268, "learning_rate": 9.816687135362771e-05, "loss": 9.7123, "step": 342 }, { "epoch": 0.11831666091755778, "grad_norm": 1.374866008758545, "learning_rate": 9.815178445139264e-05, "loss": 9.7303, "step": 343 }, { "epoch": 0.11866160745084511, "grad_norm": 1.6058346033096313, "learning_rate": 9.813663688861518e-05, "loss": 9.6672, "step": 344 }, { "epoch": 0.11900655398413246, "grad_norm": 1.5296977758407593, "learning_rate": 9.81214286843779e-05, "loss": 9.6622, "step": 345 }, { "epoch": 0.1193515005174198, "grad_norm": 1.4145252704620361, "learning_rate": 9.810615985783977e-05, "loss": 9.6812, "step": 346 }, { "epoch": 0.11969644705070714, "grad_norm": 1.5678027868270874, "learning_rate": 9.809083042823611e-05, "loss": 9.6396, "step": 347 }, { "epoch": 0.12004139358399447, "grad_norm": 1.5782277584075928, "learning_rate": 9.807544041487857e-05, "loss": 9.6638, "step": 348 }, { "epoch": 0.12038634011728182, "grad_norm": 1.6961196660995483, "learning_rate": 9.805998983715518e-05, "loss": 9.6102, "step": 349 }, { "epoch": 0.12073128665056916, "grad_norm": 1.6960567235946655, "learning_rate": 9.804447871453023e-05, "loss": 9.7084, "step": 350 }, { "epoch": 0.1210762331838565, "grad_norm": 0.730361819267273, "learning_rate": 9.802890706654426e-05, "loss": 9.884, "step": 351 }, { "epoch": 0.12142117971714385, "grad_norm": 1.076087474822998, "learning_rate": 9.801327491281411e-05, "loss": 9.7877, "step": 352 }, { "epoch": 0.12176612625043118, "grad_norm": 1.0796483755111694, "learning_rate": 9.79975822730328e-05, "loss": 9.8092, "step": 353 }, { "epoch": 0.12211107278371852, "grad_norm": 1.0125349760055542, "learning_rate": 9.798182916696956e-05, "loss": 9.7979, "step": 354 }, { "epoch": 0.12245601931700587, "grad_norm": 1.0231205224990845, "learning_rate": 9.796601561446983e-05, "loss": 9.7728, "step": 355 }, { "epoch": 0.12280096585029321, "grad_norm": 1.0823100805282593, "learning_rate": 9.795014163545515e-05, "loss": 9.7639, "step": 356 }, { "epoch": 0.12314591238358054, "grad_norm": 1.1489181518554688, "learning_rate": 9.793420724992321e-05, "loss": 9.7437, "step": 357 }, { "epoch": 0.12349085891686788, "grad_norm": 1.2228708267211914, "learning_rate": 9.79182124779478e-05, "loss": 9.7371, "step": 358 }, { "epoch": 0.12383580545015523, "grad_norm": 1.149304747581482, "learning_rate": 9.790215733967876e-05, "loss": 9.7673, "step": 359 }, { "epoch": 0.12418075198344257, "grad_norm": 1.167173147201538, "learning_rate": 9.7886041855342e-05, "loss": 9.7857, "step": 360 }, { "epoch": 0.12452569851672991, "grad_norm": 1.2003726959228516, "learning_rate": 9.786986604523946e-05, "loss": 9.7113, "step": 361 }, { "epoch": 0.12487064505001724, "grad_norm": 1.1671428680419922, "learning_rate": 9.785362992974904e-05, "loss": 9.7027, "step": 362 }, { "epoch": 0.1252155915833046, "grad_norm": 1.1911990642547607, "learning_rate": 9.783733352932467e-05, "loss": 9.783, "step": 363 }, { "epoch": 0.12556053811659193, "grad_norm": 1.1883963346481323, "learning_rate": 9.782097686449616e-05, "loss": 9.7556, "step": 364 }, { "epoch": 0.12590548464987927, "grad_norm": 1.1700190305709839, "learning_rate": 9.780455995586928e-05, "loss": 9.7978, "step": 365 }, { "epoch": 0.12625043118316662, "grad_norm": 1.2475215196609497, "learning_rate": 9.77880828241257e-05, "loss": 9.7012, "step": 366 }, { "epoch": 0.12659537771645396, "grad_norm": 1.3074865341186523, "learning_rate": 9.777154549002291e-05, "loss": 9.7239, "step": 367 }, { "epoch": 0.12694032424974128, "grad_norm": 1.0760914087295532, "learning_rate": 9.775494797439431e-05, "loss": 9.7418, "step": 368 }, { "epoch": 0.12728527078302862, "grad_norm": 1.2002527713775635, "learning_rate": 9.773829029814905e-05, "loss": 9.7252, "step": 369 }, { "epoch": 0.12763021731631596, "grad_norm": 1.2817260026931763, "learning_rate": 9.772157248227211e-05, "loss": 9.7005, "step": 370 }, { "epoch": 0.1279751638496033, "grad_norm": 1.2580500841140747, "learning_rate": 9.770479454782423e-05, "loss": 9.7851, "step": 371 }, { "epoch": 0.12832011038289065, "grad_norm": 1.1985312700271606, "learning_rate": 9.768795651594187e-05, "loss": 9.7286, "step": 372 }, { "epoch": 0.128665056916178, "grad_norm": 1.225277304649353, "learning_rate": 9.76710584078372e-05, "loss": 9.7375, "step": 373 }, { "epoch": 0.12901000344946534, "grad_norm": 1.5300991535186768, "learning_rate": 9.765410024479808e-05, "loss": 9.7107, "step": 374 }, { "epoch": 0.12935494998275268, "grad_norm": 1.3542684316635132, "learning_rate": 9.763708204818804e-05, "loss": 9.7217, "step": 375 }, { "epoch": 0.12969989651604003, "grad_norm": 1.2654287815093994, "learning_rate": 9.762000383944621e-05, "loss": 9.7224, "step": 376 }, { "epoch": 0.13004484304932734, "grad_norm": 1.3316888809204102, "learning_rate": 9.760286564008734e-05, "loss": 9.6934, "step": 377 }, { "epoch": 0.13038978958261468, "grad_norm": 1.4030239582061768, "learning_rate": 9.758566747170175e-05, "loss": 9.6717, "step": 378 }, { "epoch": 0.13073473611590203, "grad_norm": 1.372252345085144, "learning_rate": 9.75684093559553e-05, "loss": 9.6926, "step": 379 }, { "epoch": 0.13107968264918937, "grad_norm": 1.422454595565796, "learning_rate": 9.755109131458941e-05, "loss": 9.6567, "step": 380 }, { "epoch": 0.13142462918247672, "grad_norm": 1.433806300163269, "learning_rate": 9.753371336942092e-05, "loss": 9.6751, "step": 381 }, { "epoch": 0.13176957571576406, "grad_norm": 1.2416576147079468, "learning_rate": 9.751627554234219e-05, "loss": 9.7274, "step": 382 }, { "epoch": 0.1321145222490514, "grad_norm": 1.3043841123580933, "learning_rate": 9.749877785532103e-05, "loss": 9.7385, "step": 383 }, { "epoch": 0.13245946878233875, "grad_norm": 1.3732062578201294, "learning_rate": 9.748122033040061e-05, "loss": 9.6508, "step": 384 }, { "epoch": 0.1328044153156261, "grad_norm": 1.4883499145507812, "learning_rate": 9.746360298969951e-05, "loss": 9.635, "step": 385 }, { "epoch": 0.1331493618489134, "grad_norm": 1.3537193536758423, "learning_rate": 9.744592585541166e-05, "loss": 9.6717, "step": 386 }, { "epoch": 0.13349430838220075, "grad_norm": 1.470834732055664, "learning_rate": 9.742818894980634e-05, "loss": 9.6307, "step": 387 }, { "epoch": 0.1338392549154881, "grad_norm": 1.3867615461349487, "learning_rate": 9.741039229522809e-05, "loss": 9.6048, "step": 388 }, { "epoch": 0.13418420144877544, "grad_norm": 1.3195267915725708, "learning_rate": 9.739253591409673e-05, "loss": 9.6624, "step": 389 }, { "epoch": 0.13452914798206278, "grad_norm": 1.3774893283843994, "learning_rate": 9.737461982890734e-05, "loss": 9.63, "step": 390 }, { "epoch": 0.13487409451535012, "grad_norm": 1.541316270828247, "learning_rate": 9.735664406223022e-05, "loss": 9.6267, "step": 391 }, { "epoch": 0.13521904104863747, "grad_norm": 1.6289477348327637, "learning_rate": 9.733860863671081e-05, "loss": 9.6038, "step": 392 }, { "epoch": 0.1355639875819248, "grad_norm": 1.5535857677459717, "learning_rate": 9.732051357506974e-05, "loss": 9.6579, "step": 393 }, { "epoch": 0.13590893411521215, "grad_norm": 1.57676362991333, "learning_rate": 9.730235890010279e-05, "loss": 9.5814, "step": 394 }, { "epoch": 0.13625388064849947, "grad_norm": 1.4913179874420166, "learning_rate": 9.728414463468079e-05, "loss": 9.609, "step": 395 }, { "epoch": 0.1365988271817868, "grad_norm": 1.6802784204483032, "learning_rate": 9.726587080174968e-05, "loss": 9.5745, "step": 396 }, { "epoch": 0.13694377371507416, "grad_norm": 1.4532045125961304, "learning_rate": 9.724753742433042e-05, "loss": 9.688, "step": 397 }, { "epoch": 0.1372887202483615, "grad_norm": 1.6262314319610596, "learning_rate": 9.7229144525519e-05, "loss": 9.5843, "step": 398 }, { "epoch": 0.13763366678164884, "grad_norm": 1.7181068658828735, "learning_rate": 9.721069212848635e-05, "loss": 9.6142, "step": 399 }, { "epoch": 0.1379786133149362, "grad_norm": 1.8162275552749634, "learning_rate": 9.71921802564784e-05, "loss": 9.5156, "step": 400 }, { "epoch": 0.13832355984822353, "grad_norm": 0.8729615807533264, "learning_rate": 9.717360893281602e-05, "loss": 9.7578, "step": 401 }, { "epoch": 0.13866850638151088, "grad_norm": 1.138515591621399, "learning_rate": 9.715497818089492e-05, "loss": 9.6906, "step": 402 }, { "epoch": 0.13901345291479822, "grad_norm": 1.0682348012924194, "learning_rate": 9.713628802418573e-05, "loss": 9.7469, "step": 403 }, { "epoch": 0.13935839944808553, "grad_norm": 1.0238516330718994, "learning_rate": 9.711753848623389e-05, "loss": 9.7235, "step": 404 }, { "epoch": 0.13970334598137288, "grad_norm": 1.046028733253479, "learning_rate": 9.70987295906596e-05, "loss": 9.6959, "step": 405 }, { "epoch": 0.14004829251466022, "grad_norm": 1.1170198917388916, "learning_rate": 9.707986136115795e-05, "loss": 9.7301, "step": 406 }, { "epoch": 0.14039323904794757, "grad_norm": 1.259394645690918, "learning_rate": 9.706093382149868e-05, "loss": 9.6711, "step": 407 }, { "epoch": 0.1407381855812349, "grad_norm": 1.0471652746200562, "learning_rate": 9.704194699552628e-05, "loss": 9.6939, "step": 408 }, { "epoch": 0.14108313211452225, "grad_norm": 1.1050465106964111, "learning_rate": 9.702290090715996e-05, "loss": 9.7339, "step": 409 }, { "epoch": 0.1414280786478096, "grad_norm": 1.2006595134735107, "learning_rate": 9.700379558039351e-05, "loss": 9.6702, "step": 410 }, { "epoch": 0.14177302518109694, "grad_norm": 1.2607287168502808, "learning_rate": 9.698463103929542e-05, "loss": 9.6611, "step": 411 }, { "epoch": 0.14211797171438428, "grad_norm": 1.1868005990982056, "learning_rate": 9.696540730800876e-05, "loss": 9.7249, "step": 412 }, { "epoch": 0.1424629182476716, "grad_norm": 1.2063498497009277, "learning_rate": 9.694612441075115e-05, "loss": 9.6765, "step": 413 }, { "epoch": 0.14280786478095894, "grad_norm": 1.2330728769302368, "learning_rate": 9.692678237181474e-05, "loss": 9.7045, "step": 414 }, { "epoch": 0.1431528113142463, "grad_norm": 1.1720654964447021, "learning_rate": 9.690738121556622e-05, "loss": 9.6821, "step": 415 }, { "epoch": 0.14349775784753363, "grad_norm": 1.257949948310852, "learning_rate": 9.68879209664467e-05, "loss": 9.7019, "step": 416 }, { "epoch": 0.14384270438082097, "grad_norm": 1.2828700542449951, "learning_rate": 9.686840164897181e-05, "loss": 9.6634, "step": 417 }, { "epoch": 0.14418765091410832, "grad_norm": 1.3583167791366577, "learning_rate": 9.684882328773153e-05, "loss": 9.6357, "step": 418 }, { "epoch": 0.14453259744739566, "grad_norm": 1.24040687084198, "learning_rate": 9.682918590739022e-05, "loss": 9.7175, "step": 419 }, { "epoch": 0.144877543980683, "grad_norm": 1.2623732089996338, "learning_rate": 9.680948953268665e-05, "loss": 9.6945, "step": 420 }, { "epoch": 0.14522249051397035, "grad_norm": 1.2432429790496826, "learning_rate": 9.678973418843384e-05, "loss": 9.651, "step": 421 }, { "epoch": 0.14556743704725766, "grad_norm": 1.1109862327575684, "learning_rate": 9.676991989951917e-05, "loss": 9.6767, "step": 422 }, { "epoch": 0.145912383580545, "grad_norm": 1.350054383277893, "learning_rate": 9.67500466909042e-05, "loss": 9.6212, "step": 423 }, { "epoch": 0.14625733011383235, "grad_norm": 1.3729373216629028, "learning_rate": 9.673011458762479e-05, "loss": 9.6504, "step": 424 }, { "epoch": 0.1466022766471197, "grad_norm": 1.172640323638916, "learning_rate": 9.671012361479093e-05, "loss": 9.7148, "step": 425 }, { "epoch": 0.14694722318040704, "grad_norm": 1.4439337253570557, "learning_rate": 9.66900737975868e-05, "loss": 9.6306, "step": 426 }, { "epoch": 0.14729216971369438, "grad_norm": 1.2247321605682373, "learning_rate": 9.666996516127073e-05, "loss": 9.6421, "step": 427 }, { "epoch": 0.14763711624698173, "grad_norm": 1.4057327508926392, "learning_rate": 9.664979773117513e-05, "loss": 9.5413, "step": 428 }, { "epoch": 0.14798206278026907, "grad_norm": 1.3298213481903076, "learning_rate": 9.662957153270647e-05, "loss": 9.628, "step": 429 }, { "epoch": 0.14832700931355638, "grad_norm": 1.330397367477417, "learning_rate": 9.660928659134526e-05, "loss": 9.6531, "step": 430 }, { "epoch": 0.14867195584684373, "grad_norm": 1.2601134777069092, "learning_rate": 9.6588942932646e-05, "loss": 9.6714, "step": 431 }, { "epoch": 0.14901690238013107, "grad_norm": 1.3315186500549316, "learning_rate": 9.656854058223722e-05, "loss": 9.6103, "step": 432 }, { "epoch": 0.14936184891341842, "grad_norm": 1.4878896474838257, "learning_rate": 9.654807956582131e-05, "loss": 9.5996, "step": 433 }, { "epoch": 0.14970679544670576, "grad_norm": 1.3564176559448242, "learning_rate": 9.652755990917463e-05, "loss": 9.6551, "step": 434 }, { "epoch": 0.1500517419799931, "grad_norm": 1.5410569906234741, "learning_rate": 9.650698163814736e-05, "loss": 9.5719, "step": 435 }, { "epoch": 0.15039668851328045, "grad_norm": 1.3284558057785034, "learning_rate": 9.648634477866354e-05, "loss": 9.6312, "step": 436 }, { "epoch": 0.1507416350465678, "grad_norm": 1.3244214057922363, "learning_rate": 9.646564935672107e-05, "loss": 9.6812, "step": 437 }, { "epoch": 0.15108658157985513, "grad_norm": 1.446029782295227, "learning_rate": 9.644489539839153e-05, "loss": 9.6196, "step": 438 }, { "epoch": 0.15143152811314245, "grad_norm": 1.6580462455749512, "learning_rate": 9.64240829298203e-05, "loss": 9.519, "step": 439 }, { "epoch": 0.1517764746464298, "grad_norm": 1.436366319656372, "learning_rate": 9.64032119772265e-05, "loss": 9.6082, "step": 440 }, { "epoch": 0.15212142117971714, "grad_norm": 1.4805681705474854, "learning_rate": 9.638228256690285e-05, "loss": 9.6322, "step": 441 }, { "epoch": 0.15246636771300448, "grad_norm": 1.4746217727661133, "learning_rate": 9.636129472521577e-05, "loss": 9.6165, "step": 442 }, { "epoch": 0.15281131424629182, "grad_norm": 1.4036329984664917, "learning_rate": 9.634024847860527e-05, "loss": 9.6314, "step": 443 }, { "epoch": 0.15315626077957917, "grad_norm": 1.4564642906188965, "learning_rate": 9.631914385358495e-05, "loss": 9.5594, "step": 444 }, { "epoch": 0.1535012073128665, "grad_norm": 1.628414511680603, "learning_rate": 9.629798087674195e-05, "loss": 9.5453, "step": 445 }, { "epoch": 0.15384615384615385, "grad_norm": 1.4203163385391235, "learning_rate": 9.627675957473689e-05, "loss": 9.5673, "step": 446 }, { "epoch": 0.1541911003794412, "grad_norm": 1.4284478425979614, "learning_rate": 9.62554799743039e-05, "loss": 9.6228, "step": 447 }, { "epoch": 0.15453604691272851, "grad_norm": 1.5529612302780151, "learning_rate": 9.623414210225057e-05, "loss": 9.5866, "step": 448 }, { "epoch": 0.15488099344601586, "grad_norm": 1.741105556488037, "learning_rate": 9.621274598545785e-05, "loss": 9.5087, "step": 449 }, { "epoch": 0.1552259399793032, "grad_norm": 1.8047065734863281, "learning_rate": 9.61912916508801e-05, "loss": 9.5244, "step": 450 }, { "epoch": 0.15557088651259054, "grad_norm": 1.0891846418380737, "learning_rate": 9.6169779125545e-05, "loss": 9.6529, "step": 451 }, { "epoch": 0.1559158330458779, "grad_norm": 1.021998643875122, "learning_rate": 9.614820843655357e-05, "loss": 9.7033, "step": 452 }, { "epoch": 0.15626077957916523, "grad_norm": 1.0237458944320679, "learning_rate": 9.612657961108005e-05, "loss": 9.6524, "step": 453 }, { "epoch": 0.15660572611245258, "grad_norm": 1.085534691810608, "learning_rate": 9.610489267637199e-05, "loss": 9.6985, "step": 454 }, { "epoch": 0.15695067264573992, "grad_norm": 1.121556043624878, "learning_rate": 9.608314765975008e-05, "loss": 9.6726, "step": 455 }, { "epoch": 0.15729561917902726, "grad_norm": 1.1756961345672607, "learning_rate": 9.60613445886082e-05, "loss": 9.611, "step": 456 }, { "epoch": 0.15764056571231458, "grad_norm": 1.1245508193969727, "learning_rate": 9.60394834904134e-05, "loss": 9.6513, "step": 457 }, { "epoch": 0.15798551224560192, "grad_norm": 1.138670802116394, "learning_rate": 9.601756439270579e-05, "loss": 9.6294, "step": 458 }, { "epoch": 0.15833045877888927, "grad_norm": 1.0786954164505005, "learning_rate": 9.599558732309854e-05, "loss": 9.6677, "step": 459 }, { "epoch": 0.1586754053121766, "grad_norm": 1.1835142374038696, "learning_rate": 9.597355230927789e-05, "loss": 9.6656, "step": 460 }, { "epoch": 0.15902035184546395, "grad_norm": 1.2156816720962524, "learning_rate": 9.595145937900308e-05, "loss": 9.6536, "step": 461 }, { "epoch": 0.1593652983787513, "grad_norm": 1.2232426404953003, "learning_rate": 9.592930856010624e-05, "loss": 9.6273, "step": 462 }, { "epoch": 0.15971024491203864, "grad_norm": 1.159227967262268, "learning_rate": 9.590709988049251e-05, "loss": 9.6486, "step": 463 }, { "epoch": 0.16005519144532598, "grad_norm": 1.1979292631149292, "learning_rate": 9.58848333681399e-05, "loss": 9.6752, "step": 464 }, { "epoch": 0.16040013797861333, "grad_norm": 1.162811279296875, "learning_rate": 9.586250905109922e-05, "loss": 9.6441, "step": 465 }, { "epoch": 0.16074508451190064, "grad_norm": 1.238519549369812, "learning_rate": 9.584012695749418e-05, "loss": 9.6415, "step": 466 }, { "epoch": 0.161090031045188, "grad_norm": 1.3711762428283691, "learning_rate": 9.581768711552125e-05, "loss": 9.6418, "step": 467 }, { "epoch": 0.16143497757847533, "grad_norm": 1.235273838043213, "learning_rate": 9.579518955344961e-05, "loss": 9.6014, "step": 468 }, { "epoch": 0.16177992411176267, "grad_norm": 1.1799870729446411, "learning_rate": 9.57726342996212e-05, "loss": 9.664, "step": 469 }, { "epoch": 0.16212487064505002, "grad_norm": 1.305772066116333, "learning_rate": 9.575002138245064e-05, "loss": 9.6004, "step": 470 }, { "epoch": 0.16246981717833736, "grad_norm": 1.2880913019180298, "learning_rate": 9.572735083042516e-05, "loss": 9.6829, "step": 471 }, { "epoch": 0.1628147637116247, "grad_norm": 1.3265849351882935, "learning_rate": 9.570462267210461e-05, "loss": 9.6106, "step": 472 }, { "epoch": 0.16315971024491205, "grad_norm": 1.3995447158813477, "learning_rate": 9.568183693612143e-05, "loss": 9.6109, "step": 473 }, { "epoch": 0.1635046567781994, "grad_norm": 1.2574628591537476, "learning_rate": 9.565899365118058e-05, "loss": 9.6015, "step": 474 }, { "epoch": 0.1638496033114867, "grad_norm": 1.3726155757904053, "learning_rate": 9.563609284605953e-05, "loss": 9.5894, "step": 475 }, { "epoch": 0.16419454984477405, "grad_norm": 1.3234082460403442, "learning_rate": 9.561313454960818e-05, "loss": 9.5645, "step": 476 }, { "epoch": 0.1645394963780614, "grad_norm": 1.2199691534042358, "learning_rate": 9.559011879074888e-05, "loss": 9.6112, "step": 477 }, { "epoch": 0.16488444291134874, "grad_norm": 1.2691389322280884, "learning_rate": 9.556704559847639e-05, "loss": 9.6231, "step": 478 }, { "epoch": 0.16522938944463608, "grad_norm": 1.3256593942642212, "learning_rate": 9.554391500185777e-05, "loss": 9.6126, "step": 479 }, { "epoch": 0.16557433597792343, "grad_norm": 1.511549949645996, "learning_rate": 9.552072703003247e-05, "loss": 9.6111, "step": 480 }, { "epoch": 0.16591928251121077, "grad_norm": 1.3752213716506958, "learning_rate": 9.549748171221215e-05, "loss": 9.6003, "step": 481 }, { "epoch": 0.1662642290444981, "grad_norm": 1.3873335123062134, "learning_rate": 9.547417907768075e-05, "loss": 9.5684, "step": 482 }, { "epoch": 0.16660917557778546, "grad_norm": 1.3659286499023438, "learning_rate": 9.545081915579441e-05, "loss": 9.6035, "step": 483 }, { "epoch": 0.16695412211107277, "grad_norm": 1.575773000717163, "learning_rate": 9.542740197598144e-05, "loss": 9.6505, "step": 484 }, { "epoch": 0.16729906864436012, "grad_norm": 1.3886533975601196, "learning_rate": 9.540392756774225e-05, "loss": 9.5501, "step": 485 }, { "epoch": 0.16764401517764746, "grad_norm": 1.3898117542266846, "learning_rate": 9.538039596064943e-05, "loss": 9.6042, "step": 486 }, { "epoch": 0.1679889617109348, "grad_norm": 1.3748157024383545, "learning_rate": 9.535680718434755e-05, "loss": 9.5593, "step": 487 }, { "epoch": 0.16833390824422215, "grad_norm": 1.4047949314117432, "learning_rate": 9.533316126855319e-05, "loss": 9.5037, "step": 488 }, { "epoch": 0.1686788547775095, "grad_norm": 1.3030369281768799, "learning_rate": 9.530945824305498e-05, "loss": 9.5716, "step": 489 }, { "epoch": 0.16902380131079683, "grad_norm": 1.6138426065444946, "learning_rate": 9.528569813771346e-05, "loss": 9.4794, "step": 490 }, { "epoch": 0.16936874784408418, "grad_norm": 1.4896208047866821, "learning_rate": 9.526188098246108e-05, "loss": 9.5509, "step": 491 }, { "epoch": 0.16971369437737152, "grad_norm": 1.4523698091506958, "learning_rate": 9.523800680730214e-05, "loss": 9.5225, "step": 492 }, { "epoch": 0.17005864091065884, "grad_norm": 1.3837906122207642, "learning_rate": 9.521407564231283e-05, "loss": 9.5907, "step": 493 }, { "epoch": 0.17040358744394618, "grad_norm": 1.4787929058074951, "learning_rate": 9.519008751764107e-05, "loss": 9.4969, "step": 494 }, { "epoch": 0.17074853397723352, "grad_norm": 1.6119440793991089, "learning_rate": 9.516604246350658e-05, "loss": 9.4936, "step": 495 }, { "epoch": 0.17109348051052087, "grad_norm": 1.397890329360962, "learning_rate": 9.514194051020074e-05, "loss": 9.533, "step": 496 }, { "epoch": 0.1714384270438082, "grad_norm": 1.505786657333374, "learning_rate": 9.51177816880867e-05, "loss": 9.5489, "step": 497 }, { "epoch": 0.17178337357709555, "grad_norm": 1.5232138633728027, "learning_rate": 9.50935660275992e-05, "loss": 9.499, "step": 498 }, { "epoch": 0.1721283201103829, "grad_norm": 1.6428580284118652, "learning_rate": 9.506929355924456e-05, "loss": 9.4986, "step": 499 }, { "epoch": 0.17247326664367024, "grad_norm": 1.7673543691635132, "learning_rate": 9.504496431360071e-05, "loss": 9.4835, "step": 500 }, { "epoch": 0.17281821317695756, "grad_norm": 0.8733913898468018, "learning_rate": 9.50205783213171e-05, "loss": 9.6679, "step": 501 }, { "epoch": 0.1731631597102449, "grad_norm": 0.9659939408302307, "learning_rate": 9.499613561311465e-05, "loss": 9.647, "step": 502 }, { "epoch": 0.17350810624353225, "grad_norm": 1.0745805501937866, "learning_rate": 9.497163621978571e-05, "loss": 9.6277, "step": 503 }, { "epoch": 0.1738530527768196, "grad_norm": 1.1018327474594116, "learning_rate": 9.494708017219412e-05, "loss": 9.6958, "step": 504 }, { "epoch": 0.17419799931010693, "grad_norm": 0.9950918555259705, "learning_rate": 9.492246750127501e-05, "loss": 9.6551, "step": 505 }, { "epoch": 0.17454294584339428, "grad_norm": 1.1155328750610352, "learning_rate": 9.489779823803486e-05, "loss": 9.5963, "step": 506 }, { "epoch": 0.17488789237668162, "grad_norm": 1.182844877243042, "learning_rate": 9.487307241355148e-05, "loss": 9.6293, "step": 507 }, { "epoch": 0.17523283890996896, "grad_norm": 1.0601288080215454, "learning_rate": 9.484829005897392e-05, "loss": 9.6011, "step": 508 }, { "epoch": 0.1755777854432563, "grad_norm": 1.0896538496017456, "learning_rate": 9.482345120552242e-05, "loss": 9.6161, "step": 509 }, { "epoch": 0.17592273197654362, "grad_norm": 1.1434667110443115, "learning_rate": 9.47985558844884e-05, "loss": 9.6463, "step": 510 }, { "epoch": 0.17626767850983097, "grad_norm": 1.1508530378341675, "learning_rate": 9.477360412723447e-05, "loss": 9.6287, "step": 511 }, { "epoch": 0.1766126250431183, "grad_norm": 1.3060308694839478, "learning_rate": 9.474859596519424e-05, "loss": 9.59, "step": 512 }, { "epoch": 0.17695757157640565, "grad_norm": 1.1819398403167725, "learning_rate": 9.472353142987247e-05, "loss": 9.6123, "step": 513 }, { "epoch": 0.177302518109693, "grad_norm": 1.1345469951629639, "learning_rate": 9.46984105528449e-05, "loss": 9.5987, "step": 514 }, { "epoch": 0.17764746464298034, "grad_norm": 1.1536154747009277, "learning_rate": 9.467323336575826e-05, "loss": 9.5657, "step": 515 }, { "epoch": 0.17799241117626768, "grad_norm": 1.1551100015640259, "learning_rate": 9.464799990033018e-05, "loss": 9.6147, "step": 516 }, { "epoch": 0.17833735770955503, "grad_norm": 1.2950499057769775, "learning_rate": 9.462271018834923e-05, "loss": 9.5483, "step": 517 }, { "epoch": 0.17868230424284237, "grad_norm": 1.210809350013733, "learning_rate": 9.459736426167482e-05, "loss": 9.6072, "step": 518 }, { "epoch": 0.1790272507761297, "grad_norm": 1.1476471424102783, "learning_rate": 9.457196215223721e-05, "loss": 9.6183, "step": 519 }, { "epoch": 0.17937219730941703, "grad_norm": 1.2517257928848267, "learning_rate": 9.454650389203735e-05, "loss": 9.5937, "step": 520 }, { "epoch": 0.17971714384270437, "grad_norm": 1.2335177659988403, "learning_rate": 9.452098951314704e-05, "loss": 9.6255, "step": 521 }, { "epoch": 0.18006209037599172, "grad_norm": 1.3397551774978638, "learning_rate": 9.44954190477087e-05, "loss": 9.5745, "step": 522 }, { "epoch": 0.18040703690927906, "grad_norm": 1.2667393684387207, "learning_rate": 9.446979252793547e-05, "loss": 9.5642, "step": 523 }, { "epoch": 0.1807519834425664, "grad_norm": 1.269653081893921, "learning_rate": 9.444410998611099e-05, "loss": 9.5399, "step": 524 }, { "epoch": 0.18109692997585375, "grad_norm": 1.3198039531707764, "learning_rate": 9.441837145458962e-05, "loss": 9.5648, "step": 525 }, { "epoch": 0.1814418765091411, "grad_norm": 1.27424156665802, "learning_rate": 9.439257696579617e-05, "loss": 9.5742, "step": 526 }, { "epoch": 0.18178682304242844, "grad_norm": 1.2845133543014526, "learning_rate": 9.436672655222596e-05, "loss": 9.5993, "step": 527 }, { "epoch": 0.18213176957571575, "grad_norm": 1.2665520906448364, "learning_rate": 9.434082024644476e-05, "loss": 9.5995, "step": 528 }, { "epoch": 0.1824767161090031, "grad_norm": 1.2851994037628174, "learning_rate": 9.431485808108875e-05, "loss": 9.5827, "step": 529 }, { "epoch": 0.18282166264229044, "grad_norm": 1.4341552257537842, "learning_rate": 9.42888400888645e-05, "loss": 9.5422, "step": 530 }, { "epoch": 0.18316660917557778, "grad_norm": 1.3948688507080078, "learning_rate": 9.426276630254887e-05, "loss": 9.5377, "step": 531 }, { "epoch": 0.18351155570886513, "grad_norm": 1.3061037063598633, "learning_rate": 9.423663675498907e-05, "loss": 9.5961, "step": 532 }, { "epoch": 0.18385650224215247, "grad_norm": 1.6419034004211426, "learning_rate": 9.421045147910248e-05, "loss": 9.4838, "step": 533 }, { "epoch": 0.1842014487754398, "grad_norm": 1.4868634939193726, "learning_rate": 9.418421050787675e-05, "loss": 9.4782, "step": 534 }, { "epoch": 0.18454639530872716, "grad_norm": 1.3961265087127686, "learning_rate": 9.415791387436968e-05, "loss": 9.5505, "step": 535 }, { "epoch": 0.1848913418420145, "grad_norm": 1.2420654296875, "learning_rate": 9.413156161170913e-05, "loss": 9.5702, "step": 536 }, { "epoch": 0.18523628837530182, "grad_norm": 1.4459741115570068, "learning_rate": 9.410515375309316e-05, "loss": 9.5742, "step": 537 }, { "epoch": 0.18558123490858916, "grad_norm": 1.3767470121383667, "learning_rate": 9.407869033178978e-05, "loss": 9.5521, "step": 538 }, { "epoch": 0.1859261814418765, "grad_norm": 1.399837851524353, "learning_rate": 9.405217138113698e-05, "loss": 9.49, "step": 539 }, { "epoch": 0.18627112797516385, "grad_norm": 1.2332874536514282, "learning_rate": 9.402559693454278e-05, "loss": 9.5674, "step": 540 }, { "epoch": 0.1866160745084512, "grad_norm": 1.266040325164795, "learning_rate": 9.399896702548506e-05, "loss": 9.6078, "step": 541 }, { "epoch": 0.18696102104173853, "grad_norm": 1.3650012016296387, "learning_rate": 9.397228168751159e-05, "loss": 9.5375, "step": 542 }, { "epoch": 0.18730596757502588, "grad_norm": 1.4342894554138184, "learning_rate": 9.394554095423995e-05, "loss": 9.517, "step": 543 }, { "epoch": 0.18765091410831322, "grad_norm": 1.5103857517242432, "learning_rate": 9.391874485935753e-05, "loss": 9.4914, "step": 544 }, { "epoch": 0.18799586064160057, "grad_norm": 1.5459791421890259, "learning_rate": 9.389189343662146e-05, "loss": 9.5011, "step": 545 }, { "epoch": 0.18834080717488788, "grad_norm": 1.4449481964111328, "learning_rate": 9.386498671985852e-05, "loss": 9.4861, "step": 546 }, { "epoch": 0.18868575370817522, "grad_norm": 1.6004838943481445, "learning_rate": 9.383802474296526e-05, "loss": 9.4491, "step": 547 }, { "epoch": 0.18903070024146257, "grad_norm": 1.5383483171463013, "learning_rate": 9.381100753990773e-05, "loss": 9.5136, "step": 548 }, { "epoch": 0.1893756467747499, "grad_norm": 1.8076226711273193, "learning_rate": 9.37839351447216e-05, "loss": 9.4498, "step": 549 }, { "epoch": 0.18972059330803726, "grad_norm": 1.745660662651062, "learning_rate": 9.375680759151206e-05, "loss": 9.4381, "step": 550 }, { "epoch": 0.1900655398413246, "grad_norm": 0.9159271717071533, "learning_rate": 9.372962491445384e-05, "loss": 9.6563, "step": 551 }, { "epoch": 0.19041048637461194, "grad_norm": 0.9410980343818665, "learning_rate": 9.370238714779103e-05, "loss": 9.6111, "step": 552 }, { "epoch": 0.19075543290789929, "grad_norm": 1.0490546226501465, "learning_rate": 9.367509432583717e-05, "loss": 9.5987, "step": 553 }, { "epoch": 0.19110037944118663, "grad_norm": 1.0307831764221191, "learning_rate": 9.364774648297514e-05, "loss": 9.6333, "step": 554 }, { "epoch": 0.19144532597447395, "grad_norm": 1.148796796798706, "learning_rate": 9.362034365365717e-05, "loss": 9.6337, "step": 555 }, { "epoch": 0.1917902725077613, "grad_norm": 1.0962300300598145, "learning_rate": 9.35928858724047e-05, "loss": 9.605, "step": 556 }, { "epoch": 0.19213521904104863, "grad_norm": 1.1664516925811768, "learning_rate": 9.356537317380844e-05, "loss": 9.6228, "step": 557 }, { "epoch": 0.19248016557433598, "grad_norm": 1.0752300024032593, "learning_rate": 9.353780559252827e-05, "loss": 9.6528, "step": 558 }, { "epoch": 0.19282511210762332, "grad_norm": 1.3476428985595703, "learning_rate": 9.351018316329323e-05, "loss": 9.5958, "step": 559 }, { "epoch": 0.19317005864091066, "grad_norm": 1.0217633247375488, "learning_rate": 9.348250592090142e-05, "loss": 9.6622, "step": 560 }, { "epoch": 0.193515005174198, "grad_norm": 1.2857102155685425, "learning_rate": 9.345477390022002e-05, "loss": 9.5979, "step": 561 }, { "epoch": 0.19385995170748535, "grad_norm": 1.2263622283935547, "learning_rate": 9.342698713618521e-05, "loss": 9.5967, "step": 562 }, { "epoch": 0.19420489824077267, "grad_norm": 1.1881204843521118, "learning_rate": 9.339914566380212e-05, "loss": 9.5559, "step": 563 }, { "epoch": 0.19454984477406, "grad_norm": 1.288617730140686, "learning_rate": 9.337124951814485e-05, "loss": 9.5391, "step": 564 }, { "epoch": 0.19489479130734735, "grad_norm": 1.1224907636642456, "learning_rate": 9.33432987343563e-05, "loss": 9.59, "step": 565 }, { "epoch": 0.1952397378406347, "grad_norm": 1.3285510540008545, "learning_rate": 9.331529334764827e-05, "loss": 9.5726, "step": 566 }, { "epoch": 0.19558468437392204, "grad_norm": 1.209932804107666, "learning_rate": 9.328723339330132e-05, "loss": 9.585, "step": 567 }, { "epoch": 0.19592963090720938, "grad_norm": 1.1799026727676392, "learning_rate": 9.325911890666473e-05, "loss": 9.5934, "step": 568 }, { "epoch": 0.19627457744049673, "grad_norm": 1.3268376588821411, "learning_rate": 9.323094992315653e-05, "loss": 9.5686, "step": 569 }, { "epoch": 0.19661952397378407, "grad_norm": 1.270527958869934, "learning_rate": 9.320272647826336e-05, "loss": 9.5149, "step": 570 }, { "epoch": 0.19696447050707142, "grad_norm": 1.3913733959197998, "learning_rate": 9.31744486075405e-05, "loss": 9.5098, "step": 571 }, { "epoch": 0.19730941704035873, "grad_norm": 1.1947466135025024, "learning_rate": 9.314611634661177e-05, "loss": 9.6244, "step": 572 }, { "epoch": 0.19765436357364607, "grad_norm": 1.284228801727295, "learning_rate": 9.31177297311695e-05, "loss": 9.5676, "step": 573 }, { "epoch": 0.19799931010693342, "grad_norm": 1.313984751701355, "learning_rate": 9.308928879697456e-05, "loss": 9.5131, "step": 574 }, { "epoch": 0.19834425664022076, "grad_norm": 1.3125852346420288, "learning_rate": 9.306079357985616e-05, "loss": 9.5441, "step": 575 }, { "epoch": 0.1986892031735081, "grad_norm": 1.3541791439056396, "learning_rate": 9.303224411571195e-05, "loss": 9.506, "step": 576 }, { "epoch": 0.19903414970679545, "grad_norm": 1.4502359628677368, "learning_rate": 9.300364044050794e-05, "loss": 9.5573, "step": 577 }, { "epoch": 0.1993790962400828, "grad_norm": 1.2596427202224731, "learning_rate": 9.297498259027834e-05, "loss": 9.5027, "step": 578 }, { "epoch": 0.19972404277337014, "grad_norm": 1.1951390504837036, "learning_rate": 9.294627060112572e-05, "loss": 9.5988, "step": 579 }, { "epoch": 0.20006898930665748, "grad_norm": 1.3697572946548462, "learning_rate": 9.291750450922078e-05, "loss": 9.5958, "step": 580 }, { "epoch": 0.2004139358399448, "grad_norm": 1.2890172004699707, "learning_rate": 9.28886843508024e-05, "loss": 9.5881, "step": 581 }, { "epoch": 0.20075888237323214, "grad_norm": 1.365981936454773, "learning_rate": 9.285981016217759e-05, "loss": 9.5745, "step": 582 }, { "epoch": 0.20110382890651948, "grad_norm": 1.371512532234192, "learning_rate": 9.283088197972138e-05, "loss": 9.5246, "step": 583 }, { "epoch": 0.20144877543980683, "grad_norm": 1.427450180053711, "learning_rate": 9.280189983987688e-05, "loss": 9.555, "step": 584 }, { "epoch": 0.20179372197309417, "grad_norm": 1.450803518295288, "learning_rate": 9.277286377915512e-05, "loss": 9.5132, "step": 585 }, { "epoch": 0.2021386685063815, "grad_norm": 1.3853474855422974, "learning_rate": 9.274377383413508e-05, "loss": 9.5363, "step": 586 }, { "epoch": 0.20248361503966886, "grad_norm": 1.3772315979003906, "learning_rate": 9.271463004146363e-05, "loss": 9.5506, "step": 587 }, { "epoch": 0.2028285615729562, "grad_norm": 1.4147601127624512, "learning_rate": 9.268543243785547e-05, "loss": 9.4964, "step": 588 }, { "epoch": 0.20317350810624354, "grad_norm": 1.4551278352737427, "learning_rate": 9.265618106009308e-05, "loss": 9.5859, "step": 589 }, { "epoch": 0.20351845463953086, "grad_norm": 1.3521373271942139, "learning_rate": 9.262687594502671e-05, "loss": 9.5175, "step": 590 }, { "epoch": 0.2038634011728182, "grad_norm": 1.4414533376693726, "learning_rate": 9.259751712957428e-05, "loss": 9.454, "step": 591 }, { "epoch": 0.20420834770610555, "grad_norm": 1.4528087377548218, "learning_rate": 9.256810465072136e-05, "loss": 9.4803, "step": 592 }, { "epoch": 0.2045532942393929, "grad_norm": 1.6504982709884644, "learning_rate": 9.253863854552113e-05, "loss": 9.4405, "step": 593 }, { "epoch": 0.20489824077268023, "grad_norm": 1.3925122022628784, "learning_rate": 9.250911885109436e-05, "loss": 9.5588, "step": 594 }, { "epoch": 0.20524318730596758, "grad_norm": 1.4524890184402466, "learning_rate": 9.247954560462929e-05, "loss": 9.4648, "step": 595 }, { "epoch": 0.20558813383925492, "grad_norm": 1.4536553621292114, "learning_rate": 9.244991884338161e-05, "loss": 9.4829, "step": 596 }, { "epoch": 0.20593308037254227, "grad_norm": 1.6968779563903809, "learning_rate": 9.242023860467448e-05, "loss": 9.4031, "step": 597 }, { "epoch": 0.2062780269058296, "grad_norm": 1.6069278717041016, "learning_rate": 9.239050492589838e-05, "loss": 9.432, "step": 598 }, { "epoch": 0.20662297343911692, "grad_norm": 1.5400954484939575, "learning_rate": 9.236071784451117e-05, "loss": 9.4716, "step": 599 }, { "epoch": 0.20696791997240427, "grad_norm": 1.7635220289230347, "learning_rate": 9.233087739803792e-05, "loss": 9.3843, "step": 600 }, { "epoch": 0.2073128665056916, "grad_norm": 0.9172026515007019, "learning_rate": 9.230098362407095e-05, "loss": 9.6446, "step": 601 }, { "epoch": 0.20765781303897896, "grad_norm": 1.095590591430664, "learning_rate": 9.227103656026981e-05, "loss": 9.5951, "step": 602 }, { "epoch": 0.2080027595722663, "grad_norm": 1.0268243551254272, "learning_rate": 9.22410362443611e-05, "loss": 9.5898, "step": 603 }, { "epoch": 0.20834770610555364, "grad_norm": 1.0711814165115356, "learning_rate": 9.221098271413855e-05, "loss": 9.6209, "step": 604 }, { "epoch": 0.208692652638841, "grad_norm": 1.1392238140106201, "learning_rate": 9.218087600746297e-05, "loss": 9.5681, "step": 605 }, { "epoch": 0.20903759917212833, "grad_norm": 1.0311208963394165, "learning_rate": 9.215071616226207e-05, "loss": 9.6356, "step": 606 }, { "epoch": 0.20938254570541567, "grad_norm": 1.1314677000045776, "learning_rate": 9.212050321653056e-05, "loss": 9.5649, "step": 607 }, { "epoch": 0.209727492238703, "grad_norm": 1.0471137762069702, "learning_rate": 9.209023720833005e-05, "loss": 9.5673, "step": 608 }, { "epoch": 0.21007243877199033, "grad_norm": 1.2574738264083862, "learning_rate": 9.205991817578896e-05, "loss": 9.5615, "step": 609 }, { "epoch": 0.21041738530527768, "grad_norm": 1.195786476135254, "learning_rate": 9.202954615710256e-05, "loss": 9.5889, "step": 610 }, { "epoch": 0.21076233183856502, "grad_norm": 1.2348692417144775, "learning_rate": 9.19991211905328e-05, "loss": 9.5197, "step": 611 }, { "epoch": 0.21110727837185236, "grad_norm": 1.2200709581375122, "learning_rate": 9.19686433144084e-05, "loss": 9.6073, "step": 612 }, { "epoch": 0.2114522249051397, "grad_norm": 1.230139970779419, "learning_rate": 9.19381125671247e-05, "loss": 9.5837, "step": 613 }, { "epoch": 0.21179717143842705, "grad_norm": 1.3068007230758667, "learning_rate": 9.190752898714365e-05, "loss": 9.5385, "step": 614 }, { "epoch": 0.2121421179717144, "grad_norm": 1.3114107847213745, "learning_rate": 9.187689261299377e-05, "loss": 9.5578, "step": 615 }, { "epoch": 0.21248706450500174, "grad_norm": 1.1390831470489502, "learning_rate": 9.184620348327008e-05, "loss": 9.5274, "step": 616 }, { "epoch": 0.21283201103828905, "grad_norm": 1.1909688711166382, "learning_rate": 9.181546163663406e-05, "loss": 9.5915, "step": 617 }, { "epoch": 0.2131769575715764, "grad_norm": 1.1962287425994873, "learning_rate": 9.17846671118136e-05, "loss": 9.5722, "step": 618 }, { "epoch": 0.21352190410486374, "grad_norm": 1.2285292148590088, "learning_rate": 9.175381994760294e-05, "loss": 9.5933, "step": 619 }, { "epoch": 0.21386685063815108, "grad_norm": 1.3546727895736694, "learning_rate": 9.172292018286266e-05, "loss": 9.5063, "step": 620 }, { "epoch": 0.21421179717143843, "grad_norm": 1.3017284870147705, "learning_rate": 9.169196785651959e-05, "loss": 9.5152, "step": 621 }, { "epoch": 0.21455674370472577, "grad_norm": 1.3723574876785278, "learning_rate": 9.16609630075668e-05, "loss": 9.5051, "step": 622 }, { "epoch": 0.21490169023801312, "grad_norm": 1.2360388040542603, "learning_rate": 9.162990567506347e-05, "loss": 9.5724, "step": 623 }, { "epoch": 0.21524663677130046, "grad_norm": 1.2957721948623657, "learning_rate": 9.159879589813495e-05, "loss": 9.5246, "step": 624 }, { "epoch": 0.2155915833045878, "grad_norm": 1.430680751800537, "learning_rate": 9.156763371597267e-05, "loss": 9.5223, "step": 625 }, { "epoch": 0.21593652983787512, "grad_norm": 1.2834513187408447, "learning_rate": 9.1536419167834e-05, "loss": 9.539, "step": 626 }, { "epoch": 0.21628147637116246, "grad_norm": 1.2917267084121704, "learning_rate": 9.150515229304238e-05, "loss": 9.5258, "step": 627 }, { "epoch": 0.2166264229044498, "grad_norm": 1.3878052234649658, "learning_rate": 9.147383313098707e-05, "loss": 9.5223, "step": 628 }, { "epoch": 0.21697136943773715, "grad_norm": 1.3863571882247925, "learning_rate": 9.14424617211233e-05, "loss": 9.4875, "step": 629 }, { "epoch": 0.2173163159710245, "grad_norm": 1.3569782972335815, "learning_rate": 9.141103810297203e-05, "loss": 9.4716, "step": 630 }, { "epoch": 0.21766126250431184, "grad_norm": 1.3145636320114136, "learning_rate": 9.137956231612007e-05, "loss": 9.5432, "step": 631 }, { "epoch": 0.21800620903759918, "grad_norm": 1.2919973134994507, "learning_rate": 9.134803440021986e-05, "loss": 9.57, "step": 632 }, { "epoch": 0.21835115557088652, "grad_norm": 1.4465949535369873, "learning_rate": 9.131645439498963e-05, "loss": 9.4696, "step": 633 }, { "epoch": 0.21869610210417384, "grad_norm": 1.4450429677963257, "learning_rate": 9.128482234021311e-05, "loss": 9.4718, "step": 634 }, { "epoch": 0.21904104863746118, "grad_norm": 1.1756001710891724, "learning_rate": 9.12531382757397e-05, "loss": 9.6019, "step": 635 }, { "epoch": 0.21938599517074853, "grad_norm": 1.45283043384552, "learning_rate": 9.122140224148426e-05, "loss": 9.4854, "step": 636 }, { "epoch": 0.21973094170403587, "grad_norm": 1.4676939249038696, "learning_rate": 9.11896142774271e-05, "loss": 9.5498, "step": 637 }, { "epoch": 0.22007588823732321, "grad_norm": 1.3562549352645874, "learning_rate": 9.115777442361404e-05, "loss": 9.514, "step": 638 }, { "epoch": 0.22042083477061056, "grad_norm": 1.3062798976898193, "learning_rate": 9.11258827201562e-05, "loss": 9.4851, "step": 639 }, { "epoch": 0.2207657813038979, "grad_norm": 1.41761314868927, "learning_rate": 9.109393920723002e-05, "loss": 9.5105, "step": 640 }, { "epoch": 0.22111072783718524, "grad_norm": 1.4332202672958374, "learning_rate": 9.106194392507725e-05, "loss": 9.5448, "step": 641 }, { "epoch": 0.2214556743704726, "grad_norm": 1.3489359617233276, "learning_rate": 9.10298969140048e-05, "loss": 9.4664, "step": 642 }, { "epoch": 0.2218006209037599, "grad_norm": 1.4809479713439941, "learning_rate": 9.09977982143848e-05, "loss": 9.5064, "step": 643 }, { "epoch": 0.22214556743704725, "grad_norm": 1.3503596782684326, "learning_rate": 9.096564786665448e-05, "loss": 9.5502, "step": 644 }, { "epoch": 0.2224905139703346, "grad_norm": 1.4867632389068604, "learning_rate": 9.093344591131612e-05, "loss": 9.4481, "step": 645 }, { "epoch": 0.22283546050362193, "grad_norm": 1.5679457187652588, "learning_rate": 9.090119238893702e-05, "loss": 9.4593, "step": 646 }, { "epoch": 0.22318040703690928, "grad_norm": 1.50774085521698, "learning_rate": 9.086888734014945e-05, "loss": 9.4557, "step": 647 }, { "epoch": 0.22352535357019662, "grad_norm": 1.6264348030090332, "learning_rate": 9.083653080565058e-05, "loss": 9.4339, "step": 648 }, { "epoch": 0.22387030010348397, "grad_norm": 1.7029718160629272, "learning_rate": 9.080412282620248e-05, "loss": 9.416, "step": 649 }, { "epoch": 0.2242152466367713, "grad_norm": 1.7485153675079346, "learning_rate": 9.077166344263197e-05, "loss": 9.3769, "step": 650 }, { "epoch": 0.22456019317005865, "grad_norm": 0.9622684121131897, "learning_rate": 9.073915269583067e-05, "loss": 9.5603, "step": 651 }, { "epoch": 0.22490513970334597, "grad_norm": 1.1377094984054565, "learning_rate": 9.070659062675487e-05, "loss": 9.5554, "step": 652 }, { "epoch": 0.2252500862366333, "grad_norm": 0.9533647894859314, "learning_rate": 9.067397727642557e-05, "loss": 9.57, "step": 653 }, { "epoch": 0.22559503276992066, "grad_norm": 1.1317623853683472, "learning_rate": 9.064131268592831e-05, "loss": 9.5492, "step": 654 }, { "epoch": 0.225939979303208, "grad_norm": 1.1853982210159302, "learning_rate": 9.060859689641323e-05, "loss": 9.5454, "step": 655 }, { "epoch": 0.22628492583649534, "grad_norm": 1.0806140899658203, "learning_rate": 9.057582994909496e-05, "loss": 9.6048, "step": 656 }, { "epoch": 0.2266298723697827, "grad_norm": 1.0558366775512695, "learning_rate": 9.054301188525254e-05, "loss": 9.5815, "step": 657 }, { "epoch": 0.22697481890307003, "grad_norm": 1.124320387840271, "learning_rate": 9.051014274622947e-05, "loss": 9.5883, "step": 658 }, { "epoch": 0.22731976543635737, "grad_norm": 1.1820127964019775, "learning_rate": 9.04772225734335e-05, "loss": 9.5637, "step": 659 }, { "epoch": 0.22766471196964472, "grad_norm": 1.1382235288619995, "learning_rate": 9.044425140833679e-05, "loss": 9.5761, "step": 660 }, { "epoch": 0.22800965850293203, "grad_norm": 1.2138105630874634, "learning_rate": 9.041122929247566e-05, "loss": 9.5104, "step": 661 }, { "epoch": 0.22835460503621938, "grad_norm": 1.2376949787139893, "learning_rate": 9.037815626745059e-05, "loss": 9.538, "step": 662 }, { "epoch": 0.22869955156950672, "grad_norm": 1.0739209651947021, "learning_rate": 9.03450323749263e-05, "loss": 9.5949, "step": 663 }, { "epoch": 0.22904449810279406, "grad_norm": 1.1784121990203857, "learning_rate": 9.031185765663149e-05, "loss": 9.5563, "step": 664 }, { "epoch": 0.2293894446360814, "grad_norm": 1.3404618501663208, "learning_rate": 9.027863215435895e-05, "loss": 9.4811, "step": 665 }, { "epoch": 0.22973439116936875, "grad_norm": 1.2364481687545776, "learning_rate": 9.024535590996541e-05, "loss": 9.5474, "step": 666 }, { "epoch": 0.2300793377026561, "grad_norm": 1.3062660694122314, "learning_rate": 9.021202896537155e-05, "loss": 9.5248, "step": 667 }, { "epoch": 0.23042428423594344, "grad_norm": 1.19839346408844, "learning_rate": 9.017865136256191e-05, "loss": 9.5374, "step": 668 }, { "epoch": 0.23076923076923078, "grad_norm": 1.3458032608032227, "learning_rate": 9.014522314358484e-05, "loss": 9.4901, "step": 669 }, { "epoch": 0.2311141773025181, "grad_norm": 1.2809069156646729, "learning_rate": 9.011174435055247e-05, "loss": 9.5303, "step": 670 }, { "epoch": 0.23145912383580544, "grad_norm": 1.2737077474594116, "learning_rate": 9.007821502564064e-05, "loss": 9.5581, "step": 671 }, { "epoch": 0.23180407036909279, "grad_norm": 1.205348014831543, "learning_rate": 9.004463521108884e-05, "loss": 9.562, "step": 672 }, { "epoch": 0.23214901690238013, "grad_norm": 1.344906210899353, "learning_rate": 9.001100494920016e-05, "loss": 9.4788, "step": 673 }, { "epoch": 0.23249396343566747, "grad_norm": 1.2621089220046997, "learning_rate": 8.997732428234126e-05, "loss": 9.5728, "step": 674 }, { "epoch": 0.23283890996895482, "grad_norm": 1.2624657154083252, "learning_rate": 8.99435932529423e-05, "loss": 9.5437, "step": 675 }, { "epoch": 0.23318385650224216, "grad_norm": 1.3407676219940186, "learning_rate": 8.990981190349688e-05, "loss": 9.5087, "step": 676 }, { "epoch": 0.2335288030355295, "grad_norm": 1.365085244178772, "learning_rate": 8.987598027656197e-05, "loss": 9.4752, "step": 677 }, { "epoch": 0.23387374956881685, "grad_norm": 1.2666501998901367, "learning_rate": 8.984209841475791e-05, "loss": 9.5497, "step": 678 }, { "epoch": 0.23421869610210416, "grad_norm": 1.328727126121521, "learning_rate": 8.98081663607683e-05, "loss": 9.5032, "step": 679 }, { "epoch": 0.2345636426353915, "grad_norm": 1.334533929824829, "learning_rate": 8.977418415734002e-05, "loss": 9.5215, "step": 680 }, { "epoch": 0.23490858916867885, "grad_norm": 1.4917492866516113, "learning_rate": 8.974015184728304e-05, "loss": 9.4658, "step": 681 }, { "epoch": 0.2352535357019662, "grad_norm": 1.4826164245605469, "learning_rate": 8.970606947347054e-05, "loss": 9.4797, "step": 682 }, { "epoch": 0.23559848223525354, "grad_norm": 1.3176090717315674, "learning_rate": 8.967193707883872e-05, "loss": 9.4901, "step": 683 }, { "epoch": 0.23594342876854088, "grad_norm": 1.3593602180480957, "learning_rate": 8.963775470638682e-05, "loss": 9.459, "step": 684 }, { "epoch": 0.23628837530182822, "grad_norm": 1.395217776298523, "learning_rate": 8.960352239917699e-05, "loss": 9.5029, "step": 685 }, { "epoch": 0.23663332183511557, "grad_norm": 1.5413119792938232, "learning_rate": 8.956924020033439e-05, "loss": 9.4326, "step": 686 }, { "epoch": 0.2369782683684029, "grad_norm": 1.3983765840530396, "learning_rate": 8.953490815304692e-05, "loss": 9.5601, "step": 687 }, { "epoch": 0.23732321490169023, "grad_norm": 1.2415794134140015, "learning_rate": 8.950052630056534e-05, "loss": 9.5054, "step": 688 }, { "epoch": 0.23766816143497757, "grad_norm": 1.4755887985229492, "learning_rate": 8.946609468620313e-05, "loss": 9.5442, "step": 689 }, { "epoch": 0.23801310796826491, "grad_norm": 1.4634833335876465, "learning_rate": 8.94316133533365e-05, "loss": 9.4095, "step": 690 }, { "epoch": 0.23835805450155226, "grad_norm": 1.5690680742263794, "learning_rate": 8.939708234540423e-05, "loss": 9.4441, "step": 691 }, { "epoch": 0.2387030010348396, "grad_norm": 1.4769717454910278, "learning_rate": 8.936250170590772e-05, "loss": 9.4923, "step": 692 }, { "epoch": 0.23904794756812695, "grad_norm": 1.3412718772888184, "learning_rate": 8.932787147841089e-05, "loss": 9.5002, "step": 693 }, { "epoch": 0.2393928941014143, "grad_norm": 1.539059042930603, "learning_rate": 8.929319170654013e-05, "loss": 9.416, "step": 694 }, { "epoch": 0.23973784063470163, "grad_norm": 1.5159481763839722, "learning_rate": 8.925846243398423e-05, "loss": 9.4452, "step": 695 }, { "epoch": 0.24008278716798895, "grad_norm": 1.5319547653198242, "learning_rate": 8.922368370449438e-05, "loss": 9.4928, "step": 696 }, { "epoch": 0.2404277337012763, "grad_norm": 1.6070092916488647, "learning_rate": 8.918885556188401e-05, "loss": 9.4612, "step": 697 }, { "epoch": 0.24077268023456364, "grad_norm": 1.4909985065460205, "learning_rate": 8.915397805002887e-05, "loss": 9.4975, "step": 698 }, { "epoch": 0.24111762676785098, "grad_norm": 1.5712077617645264, "learning_rate": 8.911905121286684e-05, "loss": 9.4124, "step": 699 }, { "epoch": 0.24146257330113832, "grad_norm": 1.7502754926681519, "learning_rate": 8.9084075094398e-05, "loss": 9.3159, "step": 700 }, { "epoch": 0.24180751983442567, "grad_norm": 0.972281813621521, "learning_rate": 8.904904973868445e-05, "loss": 9.5847, "step": 701 }, { "epoch": 0.242152466367713, "grad_norm": 0.9288579821586609, "learning_rate": 8.901397518985038e-05, "loss": 9.6492, "step": 702 }, { "epoch": 0.24249741290100035, "grad_norm": 1.1141493320465088, "learning_rate": 8.89788514920819e-05, "loss": 9.55, "step": 703 }, { "epoch": 0.2428423594342877, "grad_norm": 1.0111804008483887, "learning_rate": 8.894367868962707e-05, "loss": 9.6488, "step": 704 }, { "epoch": 0.243187305967575, "grad_norm": 1.0936983823776245, "learning_rate": 8.890845682679579e-05, "loss": 9.5835, "step": 705 }, { "epoch": 0.24353225250086236, "grad_norm": 1.1165025234222412, "learning_rate": 8.887318594795982e-05, "loss": 9.5752, "step": 706 }, { "epoch": 0.2438771990341497, "grad_norm": 1.2050586938858032, "learning_rate": 8.883786609755255e-05, "loss": 9.5348, "step": 707 }, { "epoch": 0.24422214556743704, "grad_norm": 1.1180025339126587, "learning_rate": 8.88024973200692e-05, "loss": 9.6259, "step": 708 }, { "epoch": 0.2445670921007244, "grad_norm": 1.1547380685806274, "learning_rate": 8.876707966006656e-05, "loss": 9.5371, "step": 709 }, { "epoch": 0.24491203863401173, "grad_norm": 1.218729853630066, "learning_rate": 8.873161316216296e-05, "loss": 9.541, "step": 710 }, { "epoch": 0.24525698516729907, "grad_norm": 1.1403273344039917, "learning_rate": 8.869609787103836e-05, "loss": 9.5757, "step": 711 }, { "epoch": 0.24560193170058642, "grad_norm": 1.2201348543167114, "learning_rate": 8.86605338314341e-05, "loss": 9.5228, "step": 712 }, { "epoch": 0.24594687823387376, "grad_norm": 1.096950888633728, "learning_rate": 8.862492108815297e-05, "loss": 9.5398, "step": 713 }, { "epoch": 0.24629182476716108, "grad_norm": 0.9905299544334412, "learning_rate": 8.858925968605909e-05, "loss": 9.6028, "step": 714 }, { "epoch": 0.24663677130044842, "grad_norm": 1.2241541147232056, "learning_rate": 8.855354967007793e-05, "loss": 9.5068, "step": 715 }, { "epoch": 0.24698171783373576, "grad_norm": 1.3112493753433228, "learning_rate": 8.851779108519615e-05, "loss": 9.4628, "step": 716 }, { "epoch": 0.2473266643670231, "grad_norm": 1.1782866716384888, "learning_rate": 8.848198397646161e-05, "loss": 9.5325, "step": 717 }, { "epoch": 0.24767161090031045, "grad_norm": 1.3042865991592407, "learning_rate": 8.844612838898334e-05, "loss": 9.5256, "step": 718 }, { "epoch": 0.2480165574335978, "grad_norm": 1.2344504594802856, "learning_rate": 8.841022436793135e-05, "loss": 9.5835, "step": 719 }, { "epoch": 0.24836150396688514, "grad_norm": 1.2775154113769531, "learning_rate": 8.837427195853679e-05, "loss": 9.5256, "step": 720 }, { "epoch": 0.24870645050017248, "grad_norm": 1.3229973316192627, "learning_rate": 8.833827120609166e-05, "loss": 9.4894, "step": 721 }, { "epoch": 0.24905139703345983, "grad_norm": 1.1892383098602295, "learning_rate": 8.83022221559489e-05, "loss": 9.5601, "step": 722 }, { "epoch": 0.24939634356674714, "grad_norm": 1.3059921264648438, "learning_rate": 8.826612485352232e-05, "loss": 9.5064, "step": 723 }, { "epoch": 0.24974129010003449, "grad_norm": 1.1932693719863892, "learning_rate": 8.822997934428649e-05, "loss": 9.6054, "step": 724 }, { "epoch": 0.25008623663332186, "grad_norm": 1.3107818365097046, "learning_rate": 8.819378567377672e-05, "loss": 9.5453, "step": 725 }, { "epoch": 0.2504311831666092, "grad_norm": 1.4025793075561523, "learning_rate": 8.815754388758897e-05, "loss": 9.4636, "step": 726 }, { "epoch": 0.2507761296998965, "grad_norm": 1.3353124856948853, "learning_rate": 8.812125403137985e-05, "loss": 9.49, "step": 727 }, { "epoch": 0.25112107623318386, "grad_norm": 1.2831143140792847, "learning_rate": 8.808491615086649e-05, "loss": 9.5327, "step": 728 }, { "epoch": 0.2514660227664712, "grad_norm": 1.396451711654663, "learning_rate": 8.804853029182656e-05, "loss": 9.4794, "step": 729 }, { "epoch": 0.25181096929975855, "grad_norm": 1.2553220987319946, "learning_rate": 8.801209650009814e-05, "loss": 9.4996, "step": 730 }, { "epoch": 0.25215591583304586, "grad_norm": 1.3066761493682861, "learning_rate": 8.797561482157973e-05, "loss": 9.5276, "step": 731 }, { "epoch": 0.25250086236633323, "grad_norm": 1.443993091583252, "learning_rate": 8.79390853022301e-05, "loss": 9.4559, "step": 732 }, { "epoch": 0.25284580889962055, "grad_norm": 1.328656554222107, "learning_rate": 8.790250798806833e-05, "loss": 9.4936, "step": 733 }, { "epoch": 0.2531907554329079, "grad_norm": 1.3747369050979614, "learning_rate": 8.786588292517373e-05, "loss": 9.4605, "step": 734 }, { "epoch": 0.25353570196619524, "grad_norm": 1.3007593154907227, "learning_rate": 8.782921015968569e-05, "loss": 9.5227, "step": 735 }, { "epoch": 0.25388064849948255, "grad_norm": 1.508450984954834, "learning_rate": 8.779248973780378e-05, "loss": 9.4072, "step": 736 }, { "epoch": 0.2542255950327699, "grad_norm": 1.3896534442901611, "learning_rate": 8.775572170578757e-05, "loss": 9.4753, "step": 737 }, { "epoch": 0.25457054156605724, "grad_norm": 1.4728200435638428, "learning_rate": 8.771890610995659e-05, "loss": 9.4467, "step": 738 }, { "epoch": 0.2549154880993446, "grad_norm": 1.3297967910766602, "learning_rate": 8.768204299669029e-05, "loss": 9.5018, "step": 739 }, { "epoch": 0.2552604346326319, "grad_norm": 1.454687476158142, "learning_rate": 8.764513241242802e-05, "loss": 9.5259, "step": 740 }, { "epoch": 0.2556053811659193, "grad_norm": 1.4180147647857666, "learning_rate": 8.76081744036689e-05, "loss": 9.5011, "step": 741 }, { "epoch": 0.2559503276992066, "grad_norm": 1.5591294765472412, "learning_rate": 8.75711690169718e-05, "loss": 9.4629, "step": 742 }, { "epoch": 0.256295274232494, "grad_norm": 1.4959933757781982, "learning_rate": 8.75341162989553e-05, "loss": 9.4018, "step": 743 }, { "epoch": 0.2566402207657813, "grad_norm": 1.3860907554626465, "learning_rate": 8.749701629629756e-05, "loss": 9.4971, "step": 744 }, { "epoch": 0.2569851672990686, "grad_norm": 1.4601097106933594, "learning_rate": 8.745986905573634e-05, "loss": 9.4616, "step": 745 }, { "epoch": 0.257330113832356, "grad_norm": 1.5998722314834595, "learning_rate": 8.742267462406892e-05, "loss": 9.4879, "step": 746 }, { "epoch": 0.2576750603656433, "grad_norm": 1.7166414260864258, "learning_rate": 8.738543304815202e-05, "loss": 9.4445, "step": 747 }, { "epoch": 0.2580200068989307, "grad_norm": 1.5866833925247192, "learning_rate": 8.734814437490171e-05, "loss": 9.3825, "step": 748 }, { "epoch": 0.258364953432218, "grad_norm": 1.6864278316497803, "learning_rate": 8.731080865129344e-05, "loss": 9.3629, "step": 749 }, { "epoch": 0.25870989996550536, "grad_norm": 1.655298113822937, "learning_rate": 8.727342592436194e-05, "loss": 9.3556, "step": 750 }, { "epoch": 0.2590548464987927, "grad_norm": 0.8688000440597534, "learning_rate": 8.723599624120111e-05, "loss": 9.5912, "step": 751 }, { "epoch": 0.25939979303208005, "grad_norm": 1.0522035360336304, "learning_rate": 8.719851964896405e-05, "loss": 9.622, "step": 752 }, { "epoch": 0.25974473956536737, "grad_norm": 0.9646815657615662, "learning_rate": 8.716099619486293e-05, "loss": 9.6436, "step": 753 }, { "epoch": 0.2600896860986547, "grad_norm": 1.048831582069397, "learning_rate": 8.712342592616894e-05, "loss": 9.5588, "step": 754 }, { "epoch": 0.26043463263194205, "grad_norm": 1.0627691745758057, "learning_rate": 8.708580889021228e-05, "loss": 9.5313, "step": 755 }, { "epoch": 0.26077957916522937, "grad_norm": 1.06023371219635, "learning_rate": 8.704814513438206e-05, "loss": 9.532, "step": 756 }, { "epoch": 0.26112452569851674, "grad_norm": 1.085838794708252, "learning_rate": 8.701043470612622e-05, "loss": 9.5856, "step": 757 }, { "epoch": 0.26146947223180406, "grad_norm": 1.1486434936523438, "learning_rate": 8.697267765295153e-05, "loss": 9.6146, "step": 758 }, { "epoch": 0.26181441876509143, "grad_norm": 1.053144931793213, "learning_rate": 8.693487402242348e-05, "loss": 9.5736, "step": 759 }, { "epoch": 0.26215936529837874, "grad_norm": 1.2557427883148193, "learning_rate": 8.689702386216622e-05, "loss": 9.5062, "step": 760 }, { "epoch": 0.2625043118316661, "grad_norm": 1.2348942756652832, "learning_rate": 8.685912721986256e-05, "loss": 9.5179, "step": 761 }, { "epoch": 0.26284925836495343, "grad_norm": 1.2272906303405762, "learning_rate": 8.682118414325383e-05, "loss": 9.5305, "step": 762 }, { "epoch": 0.26319420489824075, "grad_norm": 1.2449849843978882, "learning_rate": 8.678319468013987e-05, "loss": 9.4919, "step": 763 }, { "epoch": 0.2635391514315281, "grad_norm": 1.264153003692627, "learning_rate": 8.674515887837897e-05, "loss": 9.5337, "step": 764 }, { "epoch": 0.26388409796481543, "grad_norm": 1.132380723953247, "learning_rate": 8.670707678588778e-05, "loss": 9.5491, "step": 765 }, { "epoch": 0.2642290444981028, "grad_norm": 1.2579922676086426, "learning_rate": 8.666894845064126e-05, "loss": 9.5501, "step": 766 }, { "epoch": 0.2645739910313901, "grad_norm": 1.2157853841781616, "learning_rate": 8.663077392067265e-05, "loss": 9.512, "step": 767 }, { "epoch": 0.2649189375646775, "grad_norm": 1.3099197149276733, "learning_rate": 8.659255324407334e-05, "loss": 9.4923, "step": 768 }, { "epoch": 0.2652638840979648, "grad_norm": 1.204965591430664, "learning_rate": 8.655428646899292e-05, "loss": 9.6055, "step": 769 }, { "epoch": 0.2656088306312522, "grad_norm": 1.3694841861724854, "learning_rate": 8.651597364363899e-05, "loss": 9.4878, "step": 770 }, { "epoch": 0.2659537771645395, "grad_norm": 1.263826847076416, "learning_rate": 8.647761481627721e-05, "loss": 9.5483, "step": 771 }, { "epoch": 0.2662987236978268, "grad_norm": 1.2380715608596802, "learning_rate": 8.643921003523117e-05, "loss": 9.563, "step": 772 }, { "epoch": 0.2666436702311142, "grad_norm": 1.3266398906707764, "learning_rate": 8.640075934888234e-05, "loss": 9.4974, "step": 773 }, { "epoch": 0.2669886167644015, "grad_norm": 1.2829941511154175, "learning_rate": 8.636226280567007e-05, "loss": 9.6534, "step": 774 }, { "epoch": 0.26733356329768887, "grad_norm": 1.2618184089660645, "learning_rate": 8.632372045409141e-05, "loss": 9.5707, "step": 775 }, { "epoch": 0.2676785098309762, "grad_norm": 1.2372167110443115, "learning_rate": 8.628513234270117e-05, "loss": 9.5143, "step": 776 }, { "epoch": 0.26802345636426356, "grad_norm": 1.3537949323654175, "learning_rate": 8.62464985201118e-05, "loss": 9.514, "step": 777 }, { "epoch": 0.2683684028975509, "grad_norm": 1.371895670890808, "learning_rate": 8.620781903499329e-05, "loss": 9.5151, "step": 778 }, { "epoch": 0.26871334943083824, "grad_norm": 1.3296464681625366, "learning_rate": 8.616909393607324e-05, "loss": 9.5469, "step": 779 }, { "epoch": 0.26905829596412556, "grad_norm": 1.4044578075408936, "learning_rate": 8.613032327213664e-05, "loss": 9.4461, "step": 780 }, { "epoch": 0.2694032424974129, "grad_norm": 1.4443117380142212, "learning_rate": 8.60915070920259e-05, "loss": 9.4868, "step": 781 }, { "epoch": 0.26974818903070025, "grad_norm": 1.3604815006256104, "learning_rate": 8.605264544464078e-05, "loss": 9.4855, "step": 782 }, { "epoch": 0.27009313556398756, "grad_norm": 1.2737566232681274, "learning_rate": 8.601373837893832e-05, "loss": 9.5417, "step": 783 }, { "epoch": 0.27043808209727493, "grad_norm": 1.444075345993042, "learning_rate": 8.597478594393275e-05, "loss": 9.5127, "step": 784 }, { "epoch": 0.27078302863056225, "grad_norm": 1.1705905199050903, "learning_rate": 8.593578818869553e-05, "loss": 9.5403, "step": 785 }, { "epoch": 0.2711279751638496, "grad_norm": 1.3495186567306519, "learning_rate": 8.589674516235513e-05, "loss": 9.5364, "step": 786 }, { "epoch": 0.27147292169713694, "grad_norm": 1.399301290512085, "learning_rate": 8.585765691409707e-05, "loss": 9.4696, "step": 787 }, { "epoch": 0.2718178682304243, "grad_norm": 1.4776448011398315, "learning_rate": 8.581852349316385e-05, "loss": 9.4584, "step": 788 }, { "epoch": 0.2721628147637116, "grad_norm": 1.3909804821014404, "learning_rate": 8.577934494885491e-05, "loss": 9.4188, "step": 789 }, { "epoch": 0.27250776129699894, "grad_norm": 1.6148669719696045, "learning_rate": 8.574012133052649e-05, "loss": 9.4471, "step": 790 }, { "epoch": 0.2728527078302863, "grad_norm": 1.453417420387268, "learning_rate": 8.570085268759163e-05, "loss": 9.4643, "step": 791 }, { "epoch": 0.2731976543635736, "grad_norm": 1.4808872938156128, "learning_rate": 8.566153906952007e-05, "loss": 9.3988, "step": 792 }, { "epoch": 0.273542600896861, "grad_norm": 1.3660118579864502, "learning_rate": 8.562218052583826e-05, "loss": 9.4487, "step": 793 }, { "epoch": 0.2738875474301483, "grad_norm": 1.6086071729660034, "learning_rate": 8.558277710612918e-05, "loss": 9.4475, "step": 794 }, { "epoch": 0.2742324939634357, "grad_norm": 1.5114543437957764, "learning_rate": 8.55433288600324e-05, "loss": 9.3927, "step": 795 }, { "epoch": 0.274577440496723, "grad_norm": 1.5037239789962769, "learning_rate": 8.550383583724394e-05, "loss": 9.4977, "step": 796 }, { "epoch": 0.2749223870300104, "grad_norm": 1.5446207523345947, "learning_rate": 8.546429808751621e-05, "loss": 9.4257, "step": 797 }, { "epoch": 0.2752673335632977, "grad_norm": 1.5671929121017456, "learning_rate": 8.542471566065801e-05, "loss": 9.4244, "step": 798 }, { "epoch": 0.275612280096585, "grad_norm": 1.8590857982635498, "learning_rate": 8.538508860653438e-05, "loss": 9.3651, "step": 799 }, { "epoch": 0.2759572266298724, "grad_norm": 1.864456057548523, "learning_rate": 8.534541697506661e-05, "loss": 9.3772, "step": 800 }, { "epoch": 0.2763021731631597, "grad_norm": 0.9062460660934448, "learning_rate": 8.53057008162321e-05, "loss": 9.6121, "step": 801 }, { "epoch": 0.27664711969644706, "grad_norm": 0.9802834987640381, "learning_rate": 8.526594018006443e-05, "loss": 9.5523, "step": 802 }, { "epoch": 0.2769920662297344, "grad_norm": 1.0618095397949219, "learning_rate": 8.522613511665314e-05, "loss": 9.5852, "step": 803 }, { "epoch": 0.27733701276302175, "grad_norm": 1.0446445941925049, "learning_rate": 8.518628567614373e-05, "loss": 9.5632, "step": 804 }, { "epoch": 0.27768195929630907, "grad_norm": 1.1162205934524536, "learning_rate": 8.514639190873768e-05, "loss": 9.5589, "step": 805 }, { "epoch": 0.27802690582959644, "grad_norm": 1.080688714981079, "learning_rate": 8.510645386469224e-05, "loss": 9.5131, "step": 806 }, { "epoch": 0.27837185236288375, "grad_norm": 1.093503475189209, "learning_rate": 8.506647159432047e-05, "loss": 9.6152, "step": 807 }, { "epoch": 0.27871679889617107, "grad_norm": 1.0839183330535889, "learning_rate": 8.502644514799116e-05, "loss": 9.584, "step": 808 }, { "epoch": 0.27906174542945844, "grad_norm": 1.2850983142852783, "learning_rate": 8.49863745761287e-05, "loss": 9.5141, "step": 809 }, { "epoch": 0.27940669196274576, "grad_norm": 1.2393872737884521, "learning_rate": 8.494625992921315e-05, "loss": 9.5024, "step": 810 }, { "epoch": 0.27975163849603313, "grad_norm": 1.2150235176086426, "learning_rate": 8.490610125777998e-05, "loss": 9.5355, "step": 811 }, { "epoch": 0.28009658502932044, "grad_norm": 1.0908310413360596, "learning_rate": 8.486589861242024e-05, "loss": 9.5201, "step": 812 }, { "epoch": 0.2804415315626078, "grad_norm": 1.2537291049957275, "learning_rate": 8.48256520437803e-05, "loss": 9.5507, "step": 813 }, { "epoch": 0.28078647809589513, "grad_norm": 1.3384590148925781, "learning_rate": 8.47853616025619e-05, "loss": 9.5322, "step": 814 }, { "epoch": 0.2811314246291825, "grad_norm": 1.163336992263794, "learning_rate": 8.474502733952203e-05, "loss": 9.5228, "step": 815 }, { "epoch": 0.2814763711624698, "grad_norm": 1.2593789100646973, "learning_rate": 8.470464930547291e-05, "loss": 9.489, "step": 816 }, { "epoch": 0.28182131769575713, "grad_norm": 1.0830525159835815, "learning_rate": 8.466422755128185e-05, "loss": 9.5628, "step": 817 }, { "epoch": 0.2821662642290445, "grad_norm": 1.3622268438339233, "learning_rate": 8.46237621278713e-05, "loss": 9.5389, "step": 818 }, { "epoch": 0.2825112107623318, "grad_norm": 1.237600564956665, "learning_rate": 8.45832530862187e-05, "loss": 9.4825, "step": 819 }, { "epoch": 0.2828561572956192, "grad_norm": 1.2572169303894043, "learning_rate": 8.454270047735644e-05, "loss": 9.5389, "step": 820 }, { "epoch": 0.2832011038289065, "grad_norm": 1.3330270051956177, "learning_rate": 8.450210435237174e-05, "loss": 9.5339, "step": 821 }, { "epoch": 0.2835460503621939, "grad_norm": 1.4301376342773438, "learning_rate": 8.446146476240675e-05, "loss": 9.4688, "step": 822 }, { "epoch": 0.2838909968954812, "grad_norm": 1.3089014291763306, "learning_rate": 8.44207817586583e-05, "loss": 9.531, "step": 823 }, { "epoch": 0.28423594342876857, "grad_norm": 1.1468137502670288, "learning_rate": 8.438005539237792e-05, "loss": 9.6279, "step": 824 }, { "epoch": 0.2845808899620559, "grad_norm": 1.2456337213516235, "learning_rate": 8.433928571487177e-05, "loss": 9.5898, "step": 825 }, { "epoch": 0.2849258364953432, "grad_norm": 1.2768372297286987, "learning_rate": 8.429847277750063e-05, "loss": 9.4724, "step": 826 }, { "epoch": 0.28527078302863057, "grad_norm": 1.3287653923034668, "learning_rate": 8.425761663167965e-05, "loss": 9.5066, "step": 827 }, { "epoch": 0.2856157295619179, "grad_norm": 1.379485845565796, "learning_rate": 8.421671732887854e-05, "loss": 9.4747, "step": 828 }, { "epoch": 0.28596067609520526, "grad_norm": 1.395017385482788, "learning_rate": 8.417577492062132e-05, "loss": 9.5308, "step": 829 }, { "epoch": 0.2863056226284926, "grad_norm": 1.3982423543930054, "learning_rate": 8.413478945848631e-05, "loss": 9.4779, "step": 830 }, { "epoch": 0.28665056916177994, "grad_norm": 1.3681056499481201, "learning_rate": 8.409376099410609e-05, "loss": 9.5195, "step": 831 }, { "epoch": 0.28699551569506726, "grad_norm": 1.401589274406433, "learning_rate": 8.405268957916738e-05, "loss": 9.4662, "step": 832 }, { "epoch": 0.28734046222835463, "grad_norm": 1.4695594310760498, "learning_rate": 8.401157526541108e-05, "loss": 9.4427, "step": 833 }, { "epoch": 0.28768540876164195, "grad_norm": 1.426550030708313, "learning_rate": 8.397041810463205e-05, "loss": 9.4904, "step": 834 }, { "epoch": 0.28803035529492926, "grad_norm": 1.5150681734085083, "learning_rate": 8.392921814867915e-05, "loss": 9.5391, "step": 835 }, { "epoch": 0.28837530182821663, "grad_norm": 1.4273500442504883, "learning_rate": 8.38879754494552e-05, "loss": 9.4018, "step": 836 }, { "epoch": 0.28872024836150395, "grad_norm": 1.4751886129379272, "learning_rate": 8.384669005891681e-05, "loss": 9.4735, "step": 837 }, { "epoch": 0.2890651948947913, "grad_norm": 1.4229878187179565, "learning_rate": 8.380536202907439e-05, "loss": 9.4534, "step": 838 }, { "epoch": 0.28941014142807864, "grad_norm": 1.3954477310180664, "learning_rate": 8.376399141199206e-05, "loss": 9.4548, "step": 839 }, { "epoch": 0.289755087961366, "grad_norm": 1.3925981521606445, "learning_rate": 8.372257825978762e-05, "loss": 9.4611, "step": 840 }, { "epoch": 0.2901000344946533, "grad_norm": 1.4793709516525269, "learning_rate": 8.368112262463244e-05, "loss": 9.4388, "step": 841 }, { "epoch": 0.2904449810279407, "grad_norm": 1.3460469245910645, "learning_rate": 8.363962455875136e-05, "loss": 9.5226, "step": 842 }, { "epoch": 0.290789927561228, "grad_norm": 1.399797797203064, "learning_rate": 8.359808411442277e-05, "loss": 9.485, "step": 843 }, { "epoch": 0.29113487409451533, "grad_norm": 1.6081780195236206, "learning_rate": 8.355650134397836e-05, "loss": 9.4068, "step": 844 }, { "epoch": 0.2914798206278027, "grad_norm": 1.4968883991241455, "learning_rate": 8.351487629980319e-05, "loss": 9.4218, "step": 845 }, { "epoch": 0.29182476716109, "grad_norm": 1.608202576637268, "learning_rate": 8.347320903433555e-05, "loss": 9.4663, "step": 846 }, { "epoch": 0.2921697136943774, "grad_norm": 1.6110312938690186, "learning_rate": 8.343149960006695e-05, "loss": 9.4781, "step": 847 }, { "epoch": 0.2925146602276647, "grad_norm": 1.6279586553573608, "learning_rate": 8.338974804954197e-05, "loss": 9.3999, "step": 848 }, { "epoch": 0.2928596067609521, "grad_norm": 1.801674723625183, "learning_rate": 8.33479544353583e-05, "loss": 9.4058, "step": 849 }, { "epoch": 0.2932045532942394, "grad_norm": 1.7849171161651611, "learning_rate": 8.330611881016661e-05, "loss": 9.3335, "step": 850 }, { "epoch": 0.29354949982752676, "grad_norm": 0.913504958152771, "learning_rate": 8.326424122667048e-05, "loss": 9.6455, "step": 851 }, { "epoch": 0.2938944463608141, "grad_norm": 1.0253827571868896, "learning_rate": 8.322232173762637e-05, "loss": 9.5369, "step": 852 }, { "epoch": 0.2942393928941014, "grad_norm": 0.8860166072845459, "learning_rate": 8.318036039584344e-05, "loss": 9.6341, "step": 853 }, { "epoch": 0.29458433942738876, "grad_norm": 1.1842451095581055, "learning_rate": 8.313835725418376e-05, "loss": 9.4935, "step": 854 }, { "epoch": 0.2949292859606761, "grad_norm": 1.193926215171814, "learning_rate": 8.309631236556189e-05, "loss": 9.5092, "step": 855 }, { "epoch": 0.29527423249396345, "grad_norm": 1.0631842613220215, "learning_rate": 8.305422578294504e-05, "loss": 9.6657, "step": 856 }, { "epoch": 0.29561917902725077, "grad_norm": 1.310213327407837, "learning_rate": 8.301209755935295e-05, "loss": 9.4685, "step": 857 }, { "epoch": 0.29596412556053814, "grad_norm": 1.3630036115646362, "learning_rate": 8.296992774785784e-05, "loss": 9.4808, "step": 858 }, { "epoch": 0.29630907209382545, "grad_norm": 1.0846993923187256, "learning_rate": 8.292771640158427e-05, "loss": 9.5889, "step": 859 }, { "epoch": 0.29665401862711277, "grad_norm": 1.2428042888641357, "learning_rate": 8.288546357370914e-05, "loss": 9.466, "step": 860 }, { "epoch": 0.29699896516040014, "grad_norm": 1.1466275453567505, "learning_rate": 8.284316931746166e-05, "loss": 9.5692, "step": 861 }, { "epoch": 0.29734391169368746, "grad_norm": 1.2471225261688232, "learning_rate": 8.280083368612315e-05, "loss": 9.5223, "step": 862 }, { "epoch": 0.29768885822697483, "grad_norm": 1.1912099123001099, "learning_rate": 8.275845673302713e-05, "loss": 9.5137, "step": 863 }, { "epoch": 0.29803380476026214, "grad_norm": 1.2651206254959106, "learning_rate": 8.271603851155911e-05, "loss": 9.4735, "step": 864 }, { "epoch": 0.2983787512935495, "grad_norm": 1.3276959657669067, "learning_rate": 8.267357907515661e-05, "loss": 9.4837, "step": 865 }, { "epoch": 0.29872369782683683, "grad_norm": 1.3006752729415894, "learning_rate": 8.263107847730911e-05, "loss": 9.5469, "step": 866 }, { "epoch": 0.2990686443601242, "grad_norm": 1.1855391263961792, "learning_rate": 8.25885367715579e-05, "loss": 9.5297, "step": 867 }, { "epoch": 0.2994135908934115, "grad_norm": 1.1859946250915527, "learning_rate": 8.254595401149605e-05, "loss": 9.5102, "step": 868 }, { "epoch": 0.29975853742669883, "grad_norm": 1.1786353588104248, "learning_rate": 8.250333025076842e-05, "loss": 9.5158, "step": 869 }, { "epoch": 0.3001034839599862, "grad_norm": 1.1845674514770508, "learning_rate": 8.246066554307141e-05, "loss": 9.5457, "step": 870 }, { "epoch": 0.3004484304932735, "grad_norm": 1.3296141624450684, "learning_rate": 8.241795994215312e-05, "loss": 9.5422, "step": 871 }, { "epoch": 0.3007933770265609, "grad_norm": 1.2888195514678955, "learning_rate": 8.237521350181308e-05, "loss": 9.4716, "step": 872 }, { "epoch": 0.3011383235598482, "grad_norm": 1.2534382343292236, "learning_rate": 8.233242627590232e-05, "loss": 9.4963, "step": 873 }, { "epoch": 0.3014832700931356, "grad_norm": 1.3657022714614868, "learning_rate": 8.22895983183232e-05, "loss": 9.5223, "step": 874 }, { "epoch": 0.3018282166264229, "grad_norm": 1.3497120141983032, "learning_rate": 8.224672968302946e-05, "loss": 9.5319, "step": 875 }, { "epoch": 0.30217316315971027, "grad_norm": 1.3509223461151123, "learning_rate": 8.220382042402604e-05, "loss": 9.4724, "step": 876 }, { "epoch": 0.3025181096929976, "grad_norm": 1.4304386377334595, "learning_rate": 8.216087059536903e-05, "loss": 9.5216, "step": 877 }, { "epoch": 0.3028630562262849, "grad_norm": 1.4425625801086426, "learning_rate": 8.211788025116571e-05, "loss": 9.5034, "step": 878 }, { "epoch": 0.30320800275957227, "grad_norm": 1.2742223739624023, "learning_rate": 8.207484944557436e-05, "loss": 9.5591, "step": 879 }, { "epoch": 0.3035529492928596, "grad_norm": 1.3797872066497803, "learning_rate": 8.20317782328042e-05, "loss": 9.5402, "step": 880 }, { "epoch": 0.30389789582614696, "grad_norm": 1.3795853853225708, "learning_rate": 8.198866666711535e-05, "loss": 9.5036, "step": 881 }, { "epoch": 0.3042428423594343, "grad_norm": 1.4435900449752808, "learning_rate": 8.194551480281888e-05, "loss": 9.4878, "step": 882 }, { "epoch": 0.30458778889272164, "grad_norm": 1.3728365898132324, "learning_rate": 8.19023226942765e-05, "loss": 9.5345, "step": 883 }, { "epoch": 0.30493273542600896, "grad_norm": 1.516736388206482, "learning_rate": 8.185909039590063e-05, "loss": 9.4699, "step": 884 }, { "epoch": 0.30527768195929633, "grad_norm": 1.4422507286071777, "learning_rate": 8.18158179621544e-05, "loss": 9.4944, "step": 885 }, { "epoch": 0.30562262849258365, "grad_norm": 1.424100637435913, "learning_rate": 8.177250544755146e-05, "loss": 9.4816, "step": 886 }, { "epoch": 0.30596757502587096, "grad_norm": 1.421716570854187, "learning_rate": 8.172915290665594e-05, "loss": 9.5022, "step": 887 }, { "epoch": 0.30631252155915834, "grad_norm": 1.4079738855361938, "learning_rate": 8.168576039408238e-05, "loss": 9.4492, "step": 888 }, { "epoch": 0.30665746809244565, "grad_norm": 1.3521206378936768, "learning_rate": 8.164232796449572e-05, "loss": 9.4899, "step": 889 }, { "epoch": 0.307002414625733, "grad_norm": 1.4199329614639282, "learning_rate": 8.159885567261114e-05, "loss": 9.4414, "step": 890 }, { "epoch": 0.30734736115902034, "grad_norm": 1.5063000917434692, "learning_rate": 8.155534357319408e-05, "loss": 9.4272, "step": 891 }, { "epoch": 0.3076923076923077, "grad_norm": 1.4825642108917236, "learning_rate": 8.151179172106012e-05, "loss": 9.4197, "step": 892 }, { "epoch": 0.308037254225595, "grad_norm": 1.5050302743911743, "learning_rate": 8.14682001710749e-05, "loss": 9.4654, "step": 893 }, { "epoch": 0.3083822007588824, "grad_norm": 1.4784218072891235, "learning_rate": 8.142456897815407e-05, "loss": 9.4737, "step": 894 }, { "epoch": 0.3087271472921697, "grad_norm": 1.6477797031402588, "learning_rate": 8.138089819726327e-05, "loss": 9.4824, "step": 895 }, { "epoch": 0.30907209382545703, "grad_norm": 1.5472941398620605, "learning_rate": 8.133718788341791e-05, "loss": 9.4556, "step": 896 }, { "epoch": 0.3094170403587444, "grad_norm": 1.5163496732711792, "learning_rate": 8.129343809168334e-05, "loss": 9.4168, "step": 897 }, { "epoch": 0.3097619868920317, "grad_norm": 1.538262128829956, "learning_rate": 8.12496488771745e-05, "loss": 9.437, "step": 898 }, { "epoch": 0.3101069334253191, "grad_norm": 1.7789909839630127, "learning_rate": 8.120582029505613e-05, "loss": 9.4236, "step": 899 }, { "epoch": 0.3104518799586064, "grad_norm": 1.8070483207702637, "learning_rate": 8.116195240054247e-05, "loss": 9.4368, "step": 900 }, { "epoch": 0.3107968264918938, "grad_norm": 0.9573423862457275, "learning_rate": 8.111804524889729e-05, "loss": 9.5679, "step": 901 }, { "epoch": 0.3111417730251811, "grad_norm": 1.020045518875122, "learning_rate": 8.107409889543386e-05, "loss": 9.5809, "step": 902 }, { "epoch": 0.31148671955846846, "grad_norm": 1.1108217239379883, "learning_rate": 8.103011339551482e-05, "loss": 9.5283, "step": 903 }, { "epoch": 0.3118316660917558, "grad_norm": 1.1079702377319336, "learning_rate": 8.098608880455213e-05, "loss": 9.5454, "step": 904 }, { "epoch": 0.3121766126250431, "grad_norm": 1.165088176727295, "learning_rate": 8.094202517800695e-05, "loss": 9.5713, "step": 905 }, { "epoch": 0.31252155915833046, "grad_norm": 1.1873505115509033, "learning_rate": 8.089792257138968e-05, "loss": 9.5299, "step": 906 }, { "epoch": 0.3128665056916178, "grad_norm": 1.1979339122772217, "learning_rate": 8.08537810402598e-05, "loss": 9.577, "step": 907 }, { "epoch": 0.31321145222490515, "grad_norm": 1.1789638996124268, "learning_rate": 8.080960064022581e-05, "loss": 9.6262, "step": 908 }, { "epoch": 0.31355639875819247, "grad_norm": 1.2188464403152466, "learning_rate": 8.076538142694518e-05, "loss": 9.553, "step": 909 }, { "epoch": 0.31390134529147984, "grad_norm": 1.1790921688079834, "learning_rate": 8.072112345612434e-05, "loss": 9.5114, "step": 910 }, { "epoch": 0.31424629182476715, "grad_norm": 1.131395697593689, "learning_rate": 8.067682678351845e-05, "loss": 9.5672, "step": 911 }, { "epoch": 0.3145912383580545, "grad_norm": 1.1688874959945679, "learning_rate": 8.063249146493145e-05, "loss": 9.5667, "step": 912 }, { "epoch": 0.31493618489134184, "grad_norm": 1.2182799577713013, "learning_rate": 8.058811755621603e-05, "loss": 9.5622, "step": 913 }, { "epoch": 0.31528113142462916, "grad_norm": 1.1979494094848633, "learning_rate": 8.054370511327342e-05, "loss": 9.4747, "step": 914 }, { "epoch": 0.31562607795791653, "grad_norm": 1.3254115581512451, "learning_rate": 8.049925419205344e-05, "loss": 9.5316, "step": 915 }, { "epoch": 0.31597102449120384, "grad_norm": 1.1189312934875488, "learning_rate": 8.045476484855434e-05, "loss": 9.5205, "step": 916 }, { "epoch": 0.3163159710244912, "grad_norm": 1.3546466827392578, "learning_rate": 8.041023713882286e-05, "loss": 9.5027, "step": 917 }, { "epoch": 0.31666091755777853, "grad_norm": 1.1889816522598267, "learning_rate": 8.036567111895394e-05, "loss": 9.5132, "step": 918 }, { "epoch": 0.3170058640910659, "grad_norm": 1.3260706663131714, "learning_rate": 8.032106684509091e-05, "loss": 9.4622, "step": 919 }, { "epoch": 0.3173508106243532, "grad_norm": 1.3710530996322632, "learning_rate": 8.027642437342521e-05, "loss": 9.5102, "step": 920 }, { "epoch": 0.3176957571576406, "grad_norm": 1.2274657487869263, "learning_rate": 8.023174376019644e-05, "loss": 9.5887, "step": 921 }, { "epoch": 0.3180407036909279, "grad_norm": 1.3517509698867798, "learning_rate": 8.018702506169223e-05, "loss": 9.4605, "step": 922 }, { "epoch": 0.3183856502242152, "grad_norm": 1.2345460653305054, "learning_rate": 8.014226833424823e-05, "loss": 9.5566, "step": 923 }, { "epoch": 0.3187305967575026, "grad_norm": 1.2032557725906372, "learning_rate": 8.009747363424791e-05, "loss": 9.5374, "step": 924 }, { "epoch": 0.3190755432907899, "grad_norm": 1.3755812644958496, "learning_rate": 8.005264101812268e-05, "loss": 9.4509, "step": 925 }, { "epoch": 0.3194204898240773, "grad_norm": 1.253843069076538, "learning_rate": 8.000777054235163e-05, "loss": 9.5665, "step": 926 }, { "epoch": 0.3197654363573646, "grad_norm": 1.4256430864334106, "learning_rate": 7.996286226346162e-05, "loss": 9.5317, "step": 927 }, { "epoch": 0.32011038289065197, "grad_norm": 1.4187809228897095, "learning_rate": 7.991791623802707e-05, "loss": 9.5435, "step": 928 }, { "epoch": 0.3204553294239393, "grad_norm": 1.3248265981674194, "learning_rate": 7.987293252266999e-05, "loss": 9.489, "step": 929 }, { "epoch": 0.32080027595722665, "grad_norm": 1.4751477241516113, "learning_rate": 7.982791117405986e-05, "loss": 9.4945, "step": 930 }, { "epoch": 0.32114522249051397, "grad_norm": 1.2307500839233398, "learning_rate": 7.978285224891356e-05, "loss": 9.538, "step": 931 }, { "epoch": 0.3214901690238013, "grad_norm": 1.5299421548843384, "learning_rate": 7.973775580399534e-05, "loss": 9.4755, "step": 932 }, { "epoch": 0.32183511555708866, "grad_norm": 1.2966649532318115, "learning_rate": 7.969262189611666e-05, "loss": 9.5272, "step": 933 }, { "epoch": 0.322180062090376, "grad_norm": 1.307608962059021, "learning_rate": 7.964745058213622e-05, "loss": 9.5482, "step": 934 }, { "epoch": 0.32252500862366335, "grad_norm": 1.349100947380066, "learning_rate": 7.960224191895987e-05, "loss": 9.4722, "step": 935 }, { "epoch": 0.32286995515695066, "grad_norm": 1.387831449508667, "learning_rate": 7.955699596354041e-05, "loss": 9.4829, "step": 936 }, { "epoch": 0.32321490169023803, "grad_norm": 1.3435271978378296, "learning_rate": 7.951171277287774e-05, "loss": 9.4139, "step": 937 }, { "epoch": 0.32355984822352535, "grad_norm": 1.3720812797546387, "learning_rate": 7.946639240401858e-05, "loss": 9.5294, "step": 938 }, { "epoch": 0.3239047947568127, "grad_norm": 1.3235864639282227, "learning_rate": 7.942103491405655e-05, "loss": 9.5735, "step": 939 }, { "epoch": 0.32424974129010004, "grad_norm": 1.387094259262085, "learning_rate": 7.937564036013194e-05, "loss": 9.5123, "step": 940 }, { "epoch": 0.32459468782338735, "grad_norm": 1.5132378339767456, "learning_rate": 7.933020879943185e-05, "loss": 9.4052, "step": 941 }, { "epoch": 0.3249396343566747, "grad_norm": 1.425180435180664, "learning_rate": 7.928474028918996e-05, "loss": 9.4817, "step": 942 }, { "epoch": 0.32528458088996204, "grad_norm": 1.4578827619552612, "learning_rate": 7.923923488668641e-05, "loss": 9.4453, "step": 943 }, { "epoch": 0.3256295274232494, "grad_norm": 1.6216663122177124, "learning_rate": 7.919369264924795e-05, "loss": 9.417, "step": 944 }, { "epoch": 0.3259744739565367, "grad_norm": 1.4772353172302246, "learning_rate": 7.914811363424765e-05, "loss": 9.512, "step": 945 }, { "epoch": 0.3263194204898241, "grad_norm": 1.6448122262954712, "learning_rate": 7.910249789910495e-05, "loss": 9.4389, "step": 946 }, { "epoch": 0.3266643670231114, "grad_norm": 1.4505155086517334, "learning_rate": 7.905684550128553e-05, "loss": 9.4267, "step": 947 }, { "epoch": 0.3270093135563988, "grad_norm": 1.6771926879882812, "learning_rate": 7.901115649830125e-05, "loss": 9.4435, "step": 948 }, { "epoch": 0.3273542600896861, "grad_norm": 1.53878915309906, "learning_rate": 7.896543094771011e-05, "loss": 9.4784, "step": 949 }, { "epoch": 0.3276992066229734, "grad_norm": 1.7891881465911865, "learning_rate": 7.89196689071161e-05, "loss": 9.4134, "step": 950 }, { "epoch": 0.3280441531562608, "grad_norm": 0.9297865033149719, "learning_rate": 7.887387043416927e-05, "loss": 9.5925, "step": 951 }, { "epoch": 0.3283890996895481, "grad_norm": 1.1696343421936035, "learning_rate": 7.882803558656549e-05, "loss": 9.5268, "step": 952 }, { "epoch": 0.3287340462228355, "grad_norm": 1.1735519170761108, "learning_rate": 7.878216442204647e-05, "loss": 9.4896, "step": 953 }, { "epoch": 0.3290789927561228, "grad_norm": 1.115681529045105, "learning_rate": 7.873625699839968e-05, "loss": 9.5907, "step": 954 }, { "epoch": 0.32942393928941016, "grad_norm": 1.0858581066131592, "learning_rate": 7.869031337345828e-05, "loss": 9.5537, "step": 955 }, { "epoch": 0.3297688858226975, "grad_norm": 1.0331652164459229, "learning_rate": 7.864433360510101e-05, "loss": 9.6112, "step": 956 }, { "epoch": 0.33011383235598485, "grad_norm": 1.1334502696990967, "learning_rate": 7.859831775125214e-05, "loss": 9.5681, "step": 957 }, { "epoch": 0.33045877888927216, "grad_norm": 1.0670603513717651, "learning_rate": 7.855226586988145e-05, "loss": 9.5747, "step": 958 }, { "epoch": 0.3308037254225595, "grad_norm": 1.1903663873672485, "learning_rate": 7.850617801900406e-05, "loss": 9.5893, "step": 959 }, { "epoch": 0.33114867195584685, "grad_norm": 1.1031749248504639, "learning_rate": 7.846005425668041e-05, "loss": 9.547, "step": 960 }, { "epoch": 0.33149361848913417, "grad_norm": 1.259435772895813, "learning_rate": 7.841389464101616e-05, "loss": 9.5292, "step": 961 }, { "epoch": 0.33183856502242154, "grad_norm": 1.1308194398880005, "learning_rate": 7.83676992301622e-05, "loss": 9.5453, "step": 962 }, { "epoch": 0.33218351155570885, "grad_norm": 1.3275343179702759, "learning_rate": 7.832146808231447e-05, "loss": 9.4979, "step": 963 }, { "epoch": 0.3325284580889962, "grad_norm": 1.1660116910934448, "learning_rate": 7.827520125571392e-05, "loss": 9.6073, "step": 964 }, { "epoch": 0.33287340462228354, "grad_norm": 1.347019076347351, "learning_rate": 7.822889880864649e-05, "loss": 9.5012, "step": 965 }, { "epoch": 0.3332183511555709, "grad_norm": 1.230370044708252, "learning_rate": 7.818256079944295e-05, "loss": 9.5268, "step": 966 }, { "epoch": 0.33356329768885823, "grad_norm": 1.2594881057739258, "learning_rate": 7.813618728647889e-05, "loss": 9.5, "step": 967 }, { "epoch": 0.33390824422214554, "grad_norm": 1.334810733795166, "learning_rate": 7.808977832817462e-05, "loss": 9.5172, "step": 968 }, { "epoch": 0.3342531907554329, "grad_norm": 1.1402246952056885, "learning_rate": 7.804333398299512e-05, "loss": 9.51, "step": 969 }, { "epoch": 0.33459813728872023, "grad_norm": 1.3126106262207031, "learning_rate": 7.799685430944995e-05, "loss": 9.5127, "step": 970 }, { "epoch": 0.3349430838220076, "grad_norm": 1.2847175598144531, "learning_rate": 7.795033936609314e-05, "loss": 9.5261, "step": 971 }, { "epoch": 0.3352880303552949, "grad_norm": 1.115410566329956, "learning_rate": 7.790378921152319e-05, "loss": 9.5918, "step": 972 }, { "epoch": 0.3356329768885823, "grad_norm": 1.4567341804504395, "learning_rate": 7.785720390438293e-05, "loss": 9.5061, "step": 973 }, { "epoch": 0.3359779234218696, "grad_norm": 1.3386863470077515, "learning_rate": 7.781058350335952e-05, "loss": 9.5209, "step": 974 }, { "epoch": 0.336322869955157, "grad_norm": 1.2178325653076172, "learning_rate": 7.776392806718427e-05, "loss": 9.582, "step": 975 }, { "epoch": 0.3366678164884443, "grad_norm": 1.3879817724227905, "learning_rate": 7.771723765463267e-05, "loss": 9.4433, "step": 976 }, { "epoch": 0.3370127630217316, "grad_norm": 1.2487505674362183, "learning_rate": 7.767051232452426e-05, "loss": 9.4916, "step": 977 }, { "epoch": 0.337357709555019, "grad_norm": 1.3047330379486084, "learning_rate": 7.762375213572254e-05, "loss": 9.5346, "step": 978 }, { "epoch": 0.3377026560883063, "grad_norm": 1.3577327728271484, "learning_rate": 7.7576957147135e-05, "loss": 9.4706, "step": 979 }, { "epoch": 0.33804760262159367, "grad_norm": 1.3883907794952393, "learning_rate": 7.753012741771288e-05, "loss": 9.4835, "step": 980 }, { "epoch": 0.338392549154881, "grad_norm": 1.4198276996612549, "learning_rate": 7.748326300645126e-05, "loss": 9.4446, "step": 981 }, { "epoch": 0.33873749568816836, "grad_norm": 1.4020169973373413, "learning_rate": 7.743636397238884e-05, "loss": 9.4671, "step": 982 }, { "epoch": 0.33908244222145567, "grad_norm": 1.375780463218689, "learning_rate": 7.738943037460802e-05, "loss": 9.4689, "step": 983 }, { "epoch": 0.33942738875474304, "grad_norm": 1.4482146501541138, "learning_rate": 7.734246227223464e-05, "loss": 9.451, "step": 984 }, { "epoch": 0.33977233528803036, "grad_norm": 1.3532198667526245, "learning_rate": 7.729545972443812e-05, "loss": 9.522, "step": 985 }, { "epoch": 0.3401172818213177, "grad_norm": 1.3163398504257202, "learning_rate": 7.724842279043121e-05, "loss": 9.4935, "step": 986 }, { "epoch": 0.34046222835460505, "grad_norm": 1.2999087572097778, "learning_rate": 7.720135152946997e-05, "loss": 9.5352, "step": 987 }, { "epoch": 0.34080717488789236, "grad_norm": 1.3928954601287842, "learning_rate": 7.715424600085373e-05, "loss": 9.4169, "step": 988 }, { "epoch": 0.34115212142117973, "grad_norm": 1.4478076696395874, "learning_rate": 7.710710626392499e-05, "loss": 9.4649, "step": 989 }, { "epoch": 0.34149706795446705, "grad_norm": 1.4046027660369873, "learning_rate": 7.705993237806932e-05, "loss": 9.4089, "step": 990 }, { "epoch": 0.3418420144877544, "grad_norm": 1.5680257081985474, "learning_rate": 7.701272440271534e-05, "loss": 9.4567, "step": 991 }, { "epoch": 0.34218696102104174, "grad_norm": 1.5575940608978271, "learning_rate": 7.696548239733461e-05, "loss": 9.4105, "step": 992 }, { "epoch": 0.34253190755432905, "grad_norm": 1.539071798324585, "learning_rate": 7.691820642144154e-05, "loss": 9.4895, "step": 993 }, { "epoch": 0.3428768540876164, "grad_norm": 1.5203800201416016, "learning_rate": 7.687089653459336e-05, "loss": 9.4102, "step": 994 }, { "epoch": 0.34322180062090374, "grad_norm": 1.6048343181610107, "learning_rate": 7.682355279639e-05, "loss": 9.4627, "step": 995 }, { "epoch": 0.3435667471541911, "grad_norm": 1.4530948400497437, "learning_rate": 7.677617526647403e-05, "loss": 9.4021, "step": 996 }, { "epoch": 0.3439116936874784, "grad_norm": 1.6160320043563843, "learning_rate": 7.672876400453063e-05, "loss": 9.3765, "step": 997 }, { "epoch": 0.3442566402207658, "grad_norm": 1.5577583312988281, "learning_rate": 7.668131907028746e-05, "loss": 9.396, "step": 998 }, { "epoch": 0.3446015867540531, "grad_norm": 1.5688533782958984, "learning_rate": 7.663384052351454e-05, "loss": 9.399, "step": 999 }, { "epoch": 0.3449465332873405, "grad_norm": 1.8562169075012207, "learning_rate": 7.658632842402432e-05, "loss": 9.336, "step": 1000 }, { "epoch": 0.3452914798206278, "grad_norm": 0.8915877342224121, "learning_rate": 7.653878283167148e-05, "loss": 9.5921, "step": 1001 }, { "epoch": 0.3456364263539151, "grad_norm": 1.0582910776138306, "learning_rate": 7.649120380635287e-05, "loss": 9.5403, "step": 1002 }, { "epoch": 0.3459813728872025, "grad_norm": 1.0659931898117065, "learning_rate": 7.64435914080075e-05, "loss": 9.4967, "step": 1003 }, { "epoch": 0.3463263194204898, "grad_norm": 1.1160966157913208, "learning_rate": 7.639594569661641e-05, "loss": 9.5211, "step": 1004 }, { "epoch": 0.3466712659537772, "grad_norm": 1.0769208669662476, "learning_rate": 7.634826673220259e-05, "loss": 9.5474, "step": 1005 }, { "epoch": 0.3470162124870645, "grad_norm": 1.1200584173202515, "learning_rate": 7.630055457483093e-05, "loss": 9.5184, "step": 1006 }, { "epoch": 0.34736115902035186, "grad_norm": 1.1214946508407593, "learning_rate": 7.625280928460819e-05, "loss": 9.5051, "step": 1007 }, { "epoch": 0.3477061055536392, "grad_norm": 1.2545450925827026, "learning_rate": 7.620503092168277e-05, "loss": 9.5243, "step": 1008 }, { "epoch": 0.34805105208692655, "grad_norm": 1.102694034576416, "learning_rate": 7.615721954624477e-05, "loss": 9.5558, "step": 1009 }, { "epoch": 0.34839599862021386, "grad_norm": 1.1054221391677856, "learning_rate": 7.610937521852594e-05, "loss": 9.5215, "step": 1010 }, { "epoch": 0.3487409451535012, "grad_norm": 1.1642522811889648, "learning_rate": 7.606149799879947e-05, "loss": 9.5085, "step": 1011 }, { "epoch": 0.34908589168678855, "grad_norm": 1.163087010383606, "learning_rate": 7.601358794738002e-05, "loss": 9.532, "step": 1012 }, { "epoch": 0.34943083822007587, "grad_norm": 1.2257179021835327, "learning_rate": 7.59656451246236e-05, "loss": 9.5124, "step": 1013 }, { "epoch": 0.34977578475336324, "grad_norm": 1.1685041189193726, "learning_rate": 7.59176695909275e-05, "loss": 9.5394, "step": 1014 }, { "epoch": 0.35012073128665055, "grad_norm": 1.3486417531967163, "learning_rate": 7.586966140673024e-05, "loss": 9.5435, "step": 1015 }, { "epoch": 0.3504656778199379, "grad_norm": 1.178091049194336, "learning_rate": 7.582162063251144e-05, "loss": 9.4923, "step": 1016 }, { "epoch": 0.35081062435322524, "grad_norm": 1.2380973100662231, "learning_rate": 7.57735473287918e-05, "loss": 9.5371, "step": 1017 }, { "epoch": 0.3511555708865126, "grad_norm": 1.2036174535751343, "learning_rate": 7.572544155613299e-05, "loss": 9.5282, "step": 1018 }, { "epoch": 0.35150051741979993, "grad_norm": 1.240722417831421, "learning_rate": 7.567730337513762e-05, "loss": 9.5513, "step": 1019 }, { "epoch": 0.35184546395308725, "grad_norm": 1.3504445552825928, "learning_rate": 7.562913284644906e-05, "loss": 9.5439, "step": 1020 }, { "epoch": 0.3521904104863746, "grad_norm": 1.3790074586868286, "learning_rate": 7.558093003075146e-05, "loss": 9.4826, "step": 1021 }, { "epoch": 0.35253535701966193, "grad_norm": 1.3520622253417969, "learning_rate": 7.55326949887697e-05, "loss": 9.4962, "step": 1022 }, { "epoch": 0.3528803035529493, "grad_norm": 1.0791302919387817, "learning_rate": 7.548442778126916e-05, "loss": 9.5365, "step": 1023 }, { "epoch": 0.3532252500862366, "grad_norm": 1.5069971084594727, "learning_rate": 7.543612846905579e-05, "loss": 9.5134, "step": 1024 }, { "epoch": 0.353570196619524, "grad_norm": 1.3267474174499512, "learning_rate": 7.538779711297601e-05, "loss": 9.4633, "step": 1025 }, { "epoch": 0.3539151431528113, "grad_norm": 1.2925893068313599, "learning_rate": 7.533943377391658e-05, "loss": 9.512, "step": 1026 }, { "epoch": 0.3542600896860987, "grad_norm": 1.2562297582626343, "learning_rate": 7.529103851280454e-05, "loss": 9.5156, "step": 1027 }, { "epoch": 0.354605036219386, "grad_norm": 1.2134735584259033, "learning_rate": 7.524261139060715e-05, "loss": 9.5536, "step": 1028 }, { "epoch": 0.3549499827526733, "grad_norm": 1.3246090412139893, "learning_rate": 7.519415246833186e-05, "loss": 9.4857, "step": 1029 }, { "epoch": 0.3552949292859607, "grad_norm": 1.234519362449646, "learning_rate": 7.514566180702609e-05, "loss": 9.4969, "step": 1030 }, { "epoch": 0.355639875819248, "grad_norm": 1.3051451444625854, "learning_rate": 7.509713946777731e-05, "loss": 9.5441, "step": 1031 }, { "epoch": 0.35598482235253537, "grad_norm": 1.420652985572815, "learning_rate": 7.504858551171289e-05, "loss": 9.4662, "step": 1032 }, { "epoch": 0.3563297688858227, "grad_norm": 1.3639212846755981, "learning_rate": 7.500000000000001e-05, "loss": 9.4872, "step": 1033 }, { "epoch": 0.35667471541911006, "grad_norm": 1.3951948881149292, "learning_rate": 7.49513829938456e-05, "loss": 9.4484, "step": 1034 }, { "epoch": 0.35701966195239737, "grad_norm": 1.2805373668670654, "learning_rate": 7.49027345544963e-05, "loss": 9.4841, "step": 1035 }, { "epoch": 0.35736460848568474, "grad_norm": 1.3282732963562012, "learning_rate": 7.485405474323832e-05, "loss": 9.4856, "step": 1036 }, { "epoch": 0.35770955501897206, "grad_norm": 1.301007866859436, "learning_rate": 7.48053436213974e-05, "loss": 9.4881, "step": 1037 }, { "epoch": 0.3580545015522594, "grad_norm": 1.2560900449752808, "learning_rate": 7.47566012503387e-05, "loss": 9.5065, "step": 1038 }, { "epoch": 0.35839944808554675, "grad_norm": 1.4078426361083984, "learning_rate": 7.470782769146679e-05, "loss": 9.4497, "step": 1039 }, { "epoch": 0.35874439461883406, "grad_norm": 1.4189677238464355, "learning_rate": 7.465902300622552e-05, "loss": 9.5039, "step": 1040 }, { "epoch": 0.35908934115212143, "grad_norm": 1.4221440553665161, "learning_rate": 7.461018725609794e-05, "loss": 9.4685, "step": 1041 }, { "epoch": 0.35943428768540875, "grad_norm": 1.4293962717056274, "learning_rate": 7.456132050260622e-05, "loss": 9.4399, "step": 1042 }, { "epoch": 0.3597792342186961, "grad_norm": 1.5681432485580444, "learning_rate": 7.451242280731162e-05, "loss": 9.4055, "step": 1043 }, { "epoch": 0.36012418075198344, "grad_norm": 1.488181471824646, "learning_rate": 7.446349423181435e-05, "loss": 9.4348, "step": 1044 }, { "epoch": 0.3604691272852708, "grad_norm": 1.6025547981262207, "learning_rate": 7.441453483775354e-05, "loss": 9.4014, "step": 1045 }, { "epoch": 0.3608140738185581, "grad_norm": 1.429567575454712, "learning_rate": 7.436554468680714e-05, "loss": 9.4213, "step": 1046 }, { "epoch": 0.36115902035184544, "grad_norm": 1.5469809770584106, "learning_rate": 7.431652384069186e-05, "loss": 9.4063, "step": 1047 }, { "epoch": 0.3615039668851328, "grad_norm": 1.6328766345977783, "learning_rate": 7.426747236116305e-05, "loss": 9.3885, "step": 1048 }, { "epoch": 0.3618489134184201, "grad_norm": 1.6448739767074585, "learning_rate": 7.42183903100147e-05, "loss": 9.4563, "step": 1049 }, { "epoch": 0.3621938599517075, "grad_norm": 1.7313497066497803, "learning_rate": 7.416927774907923e-05, "loss": 9.3958, "step": 1050 }, { "epoch": 0.3625388064849948, "grad_norm": 0.9497975707054138, "learning_rate": 7.41201347402276e-05, "loss": 9.6031, "step": 1051 }, { "epoch": 0.3628837530182822, "grad_norm": 0.9865735173225403, "learning_rate": 7.407096134536903e-05, "loss": 9.5727, "step": 1052 }, { "epoch": 0.3632286995515695, "grad_norm": 0.9498660564422607, "learning_rate": 7.40217576264511e-05, "loss": 9.6308, "step": 1053 }, { "epoch": 0.36357364608485687, "grad_norm": 1.051511287689209, "learning_rate": 7.397252364545954e-05, "loss": 9.6518, "step": 1054 }, { "epoch": 0.3639185926181442, "grad_norm": 1.1738197803497314, "learning_rate": 7.392325946441822e-05, "loss": 9.4939, "step": 1055 }, { "epoch": 0.3642635391514315, "grad_norm": 1.02397882938385, "learning_rate": 7.387396514538904e-05, "loss": 9.608, "step": 1056 }, { "epoch": 0.3646084856847189, "grad_norm": 1.2825227975845337, "learning_rate": 7.382464075047192e-05, "loss": 9.5337, "step": 1057 }, { "epoch": 0.3649534322180062, "grad_norm": 1.1852742433547974, "learning_rate": 7.37752863418046e-05, "loss": 9.5027, "step": 1058 }, { "epoch": 0.36529837875129356, "grad_norm": 1.1019452810287476, "learning_rate": 7.372590198156266e-05, "loss": 9.4872, "step": 1059 }, { "epoch": 0.3656433252845809, "grad_norm": 1.1270240545272827, "learning_rate": 7.367648773195944e-05, "loss": 9.6016, "step": 1060 }, { "epoch": 0.36598827181786825, "grad_norm": 1.2325217723846436, "learning_rate": 7.362704365524586e-05, "loss": 9.4971, "step": 1061 }, { "epoch": 0.36633321835115557, "grad_norm": 1.1359938383102417, "learning_rate": 7.357756981371052e-05, "loss": 9.4867, "step": 1062 }, { "epoch": 0.36667816488444294, "grad_norm": 1.264073133468628, "learning_rate": 7.352806626967943e-05, "loss": 9.4759, "step": 1063 }, { "epoch": 0.36702311141773025, "grad_norm": 1.2164510488510132, "learning_rate": 7.347853308551604e-05, "loss": 9.5165, "step": 1064 }, { "epoch": 0.36736805795101757, "grad_norm": 1.2314850091934204, "learning_rate": 7.342897032362118e-05, "loss": 9.5585, "step": 1065 }, { "epoch": 0.36771300448430494, "grad_norm": 1.1831406354904175, "learning_rate": 7.33793780464329e-05, "loss": 9.5417, "step": 1066 }, { "epoch": 0.36805795101759226, "grad_norm": 1.194286823272705, "learning_rate": 7.332975631642641e-05, "loss": 9.5301, "step": 1067 }, { "epoch": 0.3684028975508796, "grad_norm": 1.2886695861816406, "learning_rate": 7.32801051961141e-05, "loss": 9.4868, "step": 1068 }, { "epoch": 0.36874784408416694, "grad_norm": 1.3282010555267334, "learning_rate": 7.323042474804532e-05, "loss": 9.5042, "step": 1069 }, { "epoch": 0.3690927906174543, "grad_norm": 1.2334239482879639, "learning_rate": 7.318071503480639e-05, "loss": 9.5492, "step": 1070 }, { "epoch": 0.36943773715074163, "grad_norm": 1.3581160306930542, "learning_rate": 7.31309761190205e-05, "loss": 9.5154, "step": 1071 }, { "epoch": 0.369782683684029, "grad_norm": 1.441379427909851, "learning_rate": 7.308120806334761e-05, "loss": 9.4175, "step": 1072 }, { "epoch": 0.3701276302173163, "grad_norm": 1.3497183322906494, "learning_rate": 7.303141093048445e-05, "loss": 9.5024, "step": 1073 }, { "epoch": 0.37047257675060363, "grad_norm": 1.3548775911331177, "learning_rate": 7.298158478316429e-05, "loss": 9.5069, "step": 1074 }, { "epoch": 0.370817523283891, "grad_norm": 1.3406723737716675, "learning_rate": 7.2931729684157e-05, "loss": 9.5365, "step": 1075 }, { "epoch": 0.3711624698171783, "grad_norm": 1.3262616395950317, "learning_rate": 7.288184569626895e-05, "loss": 9.4802, "step": 1076 }, { "epoch": 0.3715074163504657, "grad_norm": 1.2792712450027466, "learning_rate": 7.283193288234286e-05, "loss": 9.4721, "step": 1077 }, { "epoch": 0.371852362883753, "grad_norm": 1.2492791414260864, "learning_rate": 7.278199130525776e-05, "loss": 9.5401, "step": 1078 }, { "epoch": 0.3721973094170404, "grad_norm": 1.3007467985153198, "learning_rate": 7.273202102792894e-05, "loss": 9.4928, "step": 1079 }, { "epoch": 0.3725422559503277, "grad_norm": 1.2172558307647705, "learning_rate": 7.268202211330787e-05, "loss": 9.4789, "step": 1080 }, { "epoch": 0.37288720248361507, "grad_norm": 1.348846673965454, "learning_rate": 7.263199462438203e-05, "loss": 9.5034, "step": 1081 }, { "epoch": 0.3732321490169024, "grad_norm": 1.4215086698532104, "learning_rate": 7.258193862417494e-05, "loss": 9.4715, "step": 1082 }, { "epoch": 0.3735770955501897, "grad_norm": 1.390318751335144, "learning_rate": 7.253185417574604e-05, "loss": 9.5005, "step": 1083 }, { "epoch": 0.37392204208347707, "grad_norm": 1.3486655950546265, "learning_rate": 7.248174134219058e-05, "loss": 9.4772, "step": 1084 }, { "epoch": 0.3742669886167644, "grad_norm": 1.4781957864761353, "learning_rate": 7.243160018663962e-05, "loss": 9.4457, "step": 1085 }, { "epoch": 0.37461193515005176, "grad_norm": 1.3520734310150146, "learning_rate": 7.238143077225983e-05, "loss": 9.5074, "step": 1086 }, { "epoch": 0.37495688168333907, "grad_norm": 1.4090464115142822, "learning_rate": 7.233123316225357e-05, "loss": 9.425, "step": 1087 }, { "epoch": 0.37530182821662644, "grad_norm": 1.2668166160583496, "learning_rate": 7.228100741985861e-05, "loss": 9.4516, "step": 1088 }, { "epoch": 0.37564677474991376, "grad_norm": 1.4521373510360718, "learning_rate": 7.223075360834827e-05, "loss": 9.4834, "step": 1089 }, { "epoch": 0.37599172128320113, "grad_norm": 1.2660518884658813, "learning_rate": 7.218047179103112e-05, "loss": 9.5091, "step": 1090 }, { "epoch": 0.37633666781648845, "grad_norm": 1.4445570707321167, "learning_rate": 7.213016203125113e-05, "loss": 9.5, "step": 1091 }, { "epoch": 0.37668161434977576, "grad_norm": 1.3990899324417114, "learning_rate": 7.207982439238739e-05, "loss": 9.4546, "step": 1092 }, { "epoch": 0.37702656088306313, "grad_norm": 1.2153910398483276, "learning_rate": 7.20294589378541e-05, "loss": 9.5494, "step": 1093 }, { "epoch": 0.37737150741635045, "grad_norm": 1.382103681564331, "learning_rate": 7.197906573110059e-05, "loss": 9.419, "step": 1094 }, { "epoch": 0.3777164539496378, "grad_norm": 1.4809362888336182, "learning_rate": 7.192864483561103e-05, "loss": 9.4376, "step": 1095 }, { "epoch": 0.37806140048292514, "grad_norm": 1.5676347017288208, "learning_rate": 7.18781963149046e-05, "loss": 9.4146, "step": 1096 }, { "epoch": 0.3784063470162125, "grad_norm": 1.551276683807373, "learning_rate": 7.18277202325352e-05, "loss": 9.4692, "step": 1097 }, { "epoch": 0.3787512935494998, "grad_norm": 1.5128159523010254, "learning_rate": 7.177721665209146e-05, "loss": 9.4816, "step": 1098 }, { "epoch": 0.3790962400827872, "grad_norm": 1.534871220588684, "learning_rate": 7.172668563719667e-05, "loss": 9.4842, "step": 1099 }, { "epoch": 0.3794411866160745, "grad_norm": 1.7613797187805176, "learning_rate": 7.167612725150867e-05, "loss": 9.4056, "step": 1100 }, { "epoch": 0.3797861331493618, "grad_norm": 0.8986926674842834, "learning_rate": 7.162554155871979e-05, "loss": 9.6347, "step": 1101 }, { "epoch": 0.3801310796826492, "grad_norm": 1.0825634002685547, "learning_rate": 7.157492862255676e-05, "loss": 9.5473, "step": 1102 }, { "epoch": 0.3804760262159365, "grad_norm": 1.0305083990097046, "learning_rate": 7.152428850678061e-05, "loss": 9.5789, "step": 1103 }, { "epoch": 0.3808209727492239, "grad_norm": 1.0649081468582153, "learning_rate": 7.147362127518664e-05, "loss": 9.5759, "step": 1104 }, { "epoch": 0.3811659192825112, "grad_norm": 1.0573383569717407, "learning_rate": 7.14229269916043e-05, "loss": 9.5686, "step": 1105 }, { "epoch": 0.38151086581579857, "grad_norm": 1.024278998374939, "learning_rate": 7.137220571989708e-05, "loss": 9.5416, "step": 1106 }, { "epoch": 0.3818558123490859, "grad_norm": 1.1951700448989868, "learning_rate": 7.132145752396257e-05, "loss": 9.5353, "step": 1107 }, { "epoch": 0.38220075888237326, "grad_norm": 1.1564604043960571, "learning_rate": 7.127068246773215e-05, "loss": 9.5338, "step": 1108 }, { "epoch": 0.3825457054156606, "grad_norm": 1.0238999128341675, "learning_rate": 7.121988061517115e-05, "loss": 9.6087, "step": 1109 }, { "epoch": 0.3828906519489479, "grad_norm": 1.205392599105835, "learning_rate": 7.116905203027859e-05, "loss": 9.4998, "step": 1110 }, { "epoch": 0.38323559848223526, "grad_norm": 1.1162025928497314, "learning_rate": 7.111819677708716e-05, "loss": 9.5259, "step": 1111 }, { "epoch": 0.3835805450155226, "grad_norm": 1.2404072284698486, "learning_rate": 7.106731491966321e-05, "loss": 9.525, "step": 1112 }, { "epoch": 0.38392549154880995, "grad_norm": 1.1695785522460938, "learning_rate": 7.101640652210654e-05, "loss": 9.4944, "step": 1113 }, { "epoch": 0.38427043808209727, "grad_norm": 1.082716703414917, "learning_rate": 7.096547164855044e-05, "loss": 9.5486, "step": 1114 }, { "epoch": 0.38461538461538464, "grad_norm": 1.2770994901657104, "learning_rate": 7.09145103631615e-05, "loss": 9.4949, "step": 1115 }, { "epoch": 0.38496033114867195, "grad_norm": 1.373692512512207, "learning_rate": 7.086352273013963e-05, "loss": 9.4809, "step": 1116 }, { "epoch": 0.3853052776819593, "grad_norm": 1.1916162967681885, "learning_rate": 7.08125088137179e-05, "loss": 9.5082, "step": 1117 }, { "epoch": 0.38565022421524664, "grad_norm": 1.142784833908081, "learning_rate": 7.076146867816249e-05, "loss": 9.5339, "step": 1118 }, { "epoch": 0.38599517074853396, "grad_norm": 1.2644896507263184, "learning_rate": 7.071040238777264e-05, "loss": 9.5302, "step": 1119 }, { "epoch": 0.3863401172818213, "grad_norm": 1.3028452396392822, "learning_rate": 7.065931000688053e-05, "loss": 9.4775, "step": 1120 }, { "epoch": 0.38668506381510864, "grad_norm": 1.2342731952667236, "learning_rate": 7.060819159985116e-05, "loss": 9.5798, "step": 1121 }, { "epoch": 0.387030010348396, "grad_norm": 1.328894019126892, "learning_rate": 7.05570472310824e-05, "loss": 9.5172, "step": 1122 }, { "epoch": 0.38737495688168333, "grad_norm": 1.1947658061981201, "learning_rate": 7.05058769650048e-05, "loss": 9.4893, "step": 1123 }, { "epoch": 0.3877199034149707, "grad_norm": 1.2407337427139282, "learning_rate": 7.045468086608145e-05, "loss": 9.506, "step": 1124 }, { "epoch": 0.388064849948258, "grad_norm": 1.3401503562927246, "learning_rate": 7.040345899880809e-05, "loss": 9.4722, "step": 1125 }, { "epoch": 0.38840979648154533, "grad_norm": 1.6716581583023071, "learning_rate": 7.035221142771288e-05, "loss": 9.3779, "step": 1126 }, { "epoch": 0.3887547430148327, "grad_norm": 1.2554329633712769, "learning_rate": 7.030093821735639e-05, "loss": 9.5142, "step": 1127 }, { "epoch": 0.38909968954812, "grad_norm": 1.3145281076431274, "learning_rate": 7.024963943233141e-05, "loss": 9.4608, "step": 1128 }, { "epoch": 0.3894446360814074, "grad_norm": 1.3281456232070923, "learning_rate": 7.019831513726305e-05, "loss": 9.4716, "step": 1129 }, { "epoch": 0.3897895826146947, "grad_norm": 1.3885881900787354, "learning_rate": 7.014696539680849e-05, "loss": 9.5253, "step": 1130 }, { "epoch": 0.3901345291479821, "grad_norm": 1.44611394405365, "learning_rate": 7.009559027565699e-05, "loss": 9.508, "step": 1131 }, { "epoch": 0.3904794756812694, "grad_norm": 1.4117335081100464, "learning_rate": 7.004418983852979e-05, "loss": 9.4963, "step": 1132 }, { "epoch": 0.39082442221455677, "grad_norm": 1.3524430990219116, "learning_rate": 6.999276415018001e-05, "loss": 9.5305, "step": 1133 }, { "epoch": 0.3911693687478441, "grad_norm": 1.4730901718139648, "learning_rate": 6.994131327539258e-05, "loss": 9.4665, "step": 1134 }, { "epoch": 0.3915143152811314, "grad_norm": 1.216262698173523, "learning_rate": 6.988983727898414e-05, "loss": 9.5145, "step": 1135 }, { "epoch": 0.39185926181441877, "grad_norm": 1.3632173538208008, "learning_rate": 6.983833622580304e-05, "loss": 9.4898, "step": 1136 }, { "epoch": 0.3922042083477061, "grad_norm": 1.3551784753799438, "learning_rate": 6.978681018072912e-05, "loss": 9.5128, "step": 1137 }, { "epoch": 0.39254915488099346, "grad_norm": 1.3956425189971924, "learning_rate": 6.973525920867378e-05, "loss": 9.4895, "step": 1138 }, { "epoch": 0.39289410141428077, "grad_norm": 1.3603055477142334, "learning_rate": 6.968368337457973e-05, "loss": 9.4716, "step": 1139 }, { "epoch": 0.39323904794756814, "grad_norm": 1.5052947998046875, "learning_rate": 6.96320827434211e-05, "loss": 9.4741, "step": 1140 }, { "epoch": 0.39358399448085546, "grad_norm": 1.5152782201766968, "learning_rate": 6.95804573802032e-05, "loss": 9.441, "step": 1141 }, { "epoch": 0.39392894101414283, "grad_norm": 1.5431822538375854, "learning_rate": 6.95288073499625e-05, "loss": 9.4526, "step": 1142 }, { "epoch": 0.39427388754743015, "grad_norm": 1.4436509609222412, "learning_rate": 6.947713271776656e-05, "loss": 9.4532, "step": 1143 }, { "epoch": 0.39461883408071746, "grad_norm": 1.528663158416748, "learning_rate": 6.942543354871393e-05, "loss": 9.4697, "step": 1144 }, { "epoch": 0.39496378061400483, "grad_norm": 1.472640037536621, "learning_rate": 6.937370990793407e-05, "loss": 9.4878, "step": 1145 }, { "epoch": 0.39530872714729215, "grad_norm": 1.5636851787567139, "learning_rate": 6.932196186058725e-05, "loss": 9.4147, "step": 1146 }, { "epoch": 0.3956536736805795, "grad_norm": 1.5114054679870605, "learning_rate": 6.927018947186453e-05, "loss": 9.4385, "step": 1147 }, { "epoch": 0.39599862021386684, "grad_norm": 1.6888611316680908, "learning_rate": 6.921839280698758e-05, "loss": 9.3557, "step": 1148 }, { "epoch": 0.3963435667471542, "grad_norm": 1.7319715023040771, "learning_rate": 6.916657193120871e-05, "loss": 9.3959, "step": 1149 }, { "epoch": 0.3966885132804415, "grad_norm": 1.6487864255905151, "learning_rate": 6.91147269098107e-05, "loss": 9.3655, "step": 1150 }, { "epoch": 0.3970334598137289, "grad_norm": 0.8318474292755127, "learning_rate": 6.90628578081067e-05, "loss": 9.5905, "step": 1151 }, { "epoch": 0.3973784063470162, "grad_norm": 0.986283540725708, "learning_rate": 6.901096469144032e-05, "loss": 9.5246, "step": 1152 }, { "epoch": 0.3977233528803035, "grad_norm": 1.0239535570144653, "learning_rate": 6.895904762518528e-05, "loss": 9.5762, "step": 1153 }, { "epoch": 0.3980682994135909, "grad_norm": 1.1255637407302856, "learning_rate": 6.89071066747456e-05, "loss": 9.4697, "step": 1154 }, { "epoch": 0.3984132459468782, "grad_norm": 1.1051015853881836, "learning_rate": 6.88551419055553e-05, "loss": 9.5427, "step": 1155 }, { "epoch": 0.3987581924801656, "grad_norm": 1.1070098876953125, "learning_rate": 6.880315338307843e-05, "loss": 9.5184, "step": 1156 }, { "epoch": 0.3991031390134529, "grad_norm": 1.0810296535491943, "learning_rate": 6.875114117280898e-05, "loss": 9.5697, "step": 1157 }, { "epoch": 0.3994480855467403, "grad_norm": 1.0810821056365967, "learning_rate": 6.869910534027077e-05, "loss": 9.5222, "step": 1158 }, { "epoch": 0.3997930320800276, "grad_norm": 1.1185795068740845, "learning_rate": 6.864704595101737e-05, "loss": 9.5386, "step": 1159 }, { "epoch": 0.40013797861331496, "grad_norm": 1.0730277299880981, "learning_rate": 6.859496307063205e-05, "loss": 9.5331, "step": 1160 }, { "epoch": 0.4004829251466023, "grad_norm": 1.2169557809829712, "learning_rate": 6.854285676472764e-05, "loss": 9.5272, "step": 1161 }, { "epoch": 0.4008278716798896, "grad_norm": 1.1956943273544312, "learning_rate": 6.849072709894651e-05, "loss": 9.4893, "step": 1162 }, { "epoch": 0.40117281821317696, "grad_norm": 1.1670726537704468, "learning_rate": 6.843857413896047e-05, "loss": 9.5798, "step": 1163 }, { "epoch": 0.4015177647464643, "grad_norm": 1.2046582698822021, "learning_rate": 6.838639795047061e-05, "loss": 9.5015, "step": 1164 }, { "epoch": 0.40186271127975165, "grad_norm": 1.232411503791809, "learning_rate": 6.833419859920736e-05, "loss": 9.5492, "step": 1165 }, { "epoch": 0.40220765781303897, "grad_norm": 1.1485507488250732, "learning_rate": 6.82819761509303e-05, "loss": 9.5267, "step": 1166 }, { "epoch": 0.40255260434632634, "grad_norm": 1.2545028924942017, "learning_rate": 6.822973067142808e-05, "loss": 9.5099, "step": 1167 }, { "epoch": 0.40289755087961365, "grad_norm": 1.3489080667495728, "learning_rate": 6.81774622265184e-05, "loss": 9.4798, "step": 1168 }, { "epoch": 0.403242497412901, "grad_norm": 1.2620747089385986, "learning_rate": 6.812517088204788e-05, "loss": 9.4896, "step": 1169 }, { "epoch": 0.40358744394618834, "grad_norm": 1.1811059713363647, "learning_rate": 6.807285670389199e-05, "loss": 9.5281, "step": 1170 }, { "epoch": 0.40393239047947566, "grad_norm": 1.2100896835327148, "learning_rate": 6.802051975795495e-05, "loss": 9.5105, "step": 1171 }, { "epoch": 0.404277337012763, "grad_norm": 1.364280104637146, "learning_rate": 6.79681601101697e-05, "loss": 9.4185, "step": 1172 }, { "epoch": 0.40462228354605034, "grad_norm": 1.2773628234863281, "learning_rate": 6.791577782649771e-05, "loss": 9.5374, "step": 1173 }, { "epoch": 0.4049672300793377, "grad_norm": 1.2328367233276367, "learning_rate": 6.786337297292907e-05, "loss": 9.5104, "step": 1174 }, { "epoch": 0.40531217661262503, "grad_norm": 1.275637149810791, "learning_rate": 6.781094561548219e-05, "loss": 9.4765, "step": 1175 }, { "epoch": 0.4056571231459124, "grad_norm": 1.2586758136749268, "learning_rate": 6.775849582020393e-05, "loss": 9.5087, "step": 1176 }, { "epoch": 0.4060020696791997, "grad_norm": 1.2569774389266968, "learning_rate": 6.770602365316931e-05, "loss": 9.5354, "step": 1177 }, { "epoch": 0.4063470162124871, "grad_norm": 1.2480967044830322, "learning_rate": 6.765352918048167e-05, "loss": 9.4769, "step": 1178 }, { "epoch": 0.4066919627457744, "grad_norm": 1.3023992776870728, "learning_rate": 6.760101246827232e-05, "loss": 9.494, "step": 1179 }, { "epoch": 0.4070369092790617, "grad_norm": 1.3120083808898926, "learning_rate": 6.754847358270067e-05, "loss": 9.4736, "step": 1180 }, { "epoch": 0.4073818558123491, "grad_norm": 1.1869686841964722, "learning_rate": 6.749591258995401e-05, "loss": 9.4962, "step": 1181 }, { "epoch": 0.4077268023456364, "grad_norm": 1.373629093170166, "learning_rate": 6.744332955624751e-05, "loss": 9.4913, "step": 1182 }, { "epoch": 0.4080717488789238, "grad_norm": 1.3023265600204468, "learning_rate": 6.739072454782411e-05, "loss": 9.5136, "step": 1183 }, { "epoch": 0.4084166954122111, "grad_norm": 1.282807469367981, "learning_rate": 6.733809763095443e-05, "loss": 9.5072, "step": 1184 }, { "epoch": 0.40876164194549847, "grad_norm": 1.3017750978469849, "learning_rate": 6.728544887193667e-05, "loss": 9.5657, "step": 1185 }, { "epoch": 0.4091065884787858, "grad_norm": 1.6445789337158203, "learning_rate": 6.723277833709655e-05, "loss": 9.435, "step": 1186 }, { "epoch": 0.40945153501207315, "grad_norm": 1.4415967464447021, "learning_rate": 6.718008609278727e-05, "loss": 9.4222, "step": 1187 }, { "epoch": 0.40979648154536047, "grad_norm": 1.3269176483154297, "learning_rate": 6.71273722053893e-05, "loss": 9.471, "step": 1188 }, { "epoch": 0.4101414280786478, "grad_norm": 1.4464130401611328, "learning_rate": 6.707463674131045e-05, "loss": 9.4682, "step": 1189 }, { "epoch": 0.41048637461193516, "grad_norm": 1.451816201210022, "learning_rate": 6.702187976698567e-05, "loss": 9.4768, "step": 1190 }, { "epoch": 0.4108313211452225, "grad_norm": 1.4212086200714111, "learning_rate": 6.696910134887702e-05, "loss": 9.4485, "step": 1191 }, { "epoch": 0.41117626767850984, "grad_norm": 1.429534912109375, "learning_rate": 6.691630155347357e-05, "loss": 9.4584, "step": 1192 }, { "epoch": 0.41152121421179716, "grad_norm": 1.5786341428756714, "learning_rate": 6.686348044729131e-05, "loss": 9.455, "step": 1193 }, { "epoch": 0.41186616074508453, "grad_norm": 1.4174039363861084, "learning_rate": 6.681063809687312e-05, "loss": 9.4575, "step": 1194 }, { "epoch": 0.41221110727837185, "grad_norm": 1.4542423486709595, "learning_rate": 6.675777456878856e-05, "loss": 9.4186, "step": 1195 }, { "epoch": 0.4125560538116592, "grad_norm": 1.4207855463027954, "learning_rate": 6.670488992963396e-05, "loss": 9.4742, "step": 1196 }, { "epoch": 0.41290100034494653, "grad_norm": 1.489596962928772, "learning_rate": 6.665198424603223e-05, "loss": 9.4153, "step": 1197 }, { "epoch": 0.41324594687823385, "grad_norm": 1.5289177894592285, "learning_rate": 6.65990575846327e-05, "loss": 9.4263, "step": 1198 }, { "epoch": 0.4135908934115212, "grad_norm": 1.4849228858947754, "learning_rate": 6.654611001211127e-05, "loss": 9.4248, "step": 1199 }, { "epoch": 0.41393583994480854, "grad_norm": 1.7787175178527832, "learning_rate": 6.649314159517007e-05, "loss": 9.336, "step": 1200 }, { "epoch": 0.4142807864780959, "grad_norm": 0.8549869656562805, "learning_rate": 6.644015240053754e-05, "loss": 9.5802, "step": 1201 }, { "epoch": 0.4146257330113832, "grad_norm": 1.0080626010894775, "learning_rate": 6.638714249496824e-05, "loss": 9.5279, "step": 1202 }, { "epoch": 0.4149706795446706, "grad_norm": 0.9619107842445374, "learning_rate": 6.633411194524294e-05, "loss": 9.5521, "step": 1203 }, { "epoch": 0.4153156260779579, "grad_norm": 1.1162559986114502, "learning_rate": 6.628106081816829e-05, "loss": 9.5364, "step": 1204 }, { "epoch": 0.4156605726112453, "grad_norm": 1.0343070030212402, "learning_rate": 6.622798918057693e-05, "loss": 9.5726, "step": 1205 }, { "epoch": 0.4160055191445326, "grad_norm": 1.1603822708129883, "learning_rate": 6.617489709932733e-05, "loss": 9.4792, "step": 1206 }, { "epoch": 0.4163504656778199, "grad_norm": 1.2021888494491577, "learning_rate": 6.612178464130367e-05, "loss": 9.5337, "step": 1207 }, { "epoch": 0.4166954122111073, "grad_norm": 1.0984165668487549, "learning_rate": 6.60686518734159e-05, "loss": 9.527, "step": 1208 }, { "epoch": 0.4170403587443946, "grad_norm": 1.1222621202468872, "learning_rate": 6.601549886259945e-05, "loss": 9.4886, "step": 1209 }, { "epoch": 0.417385305277682, "grad_norm": 1.0295212268829346, "learning_rate": 6.596232567581531e-05, "loss": 9.6241, "step": 1210 }, { "epoch": 0.4177302518109693, "grad_norm": 1.2314414978027344, "learning_rate": 6.590913238004987e-05, "loss": 9.4848, "step": 1211 }, { "epoch": 0.41807519834425666, "grad_norm": 1.137915015220642, "learning_rate": 6.585591904231484e-05, "loss": 9.5238, "step": 1212 }, { "epoch": 0.418420144877544, "grad_norm": 1.2005376815795898, "learning_rate": 6.580268572964722e-05, "loss": 9.553, "step": 1213 }, { "epoch": 0.41876509141083135, "grad_norm": 1.156784176826477, "learning_rate": 6.574943250910916e-05, "loss": 9.5773, "step": 1214 }, { "epoch": 0.41911003794411866, "grad_norm": 1.1334538459777832, "learning_rate": 6.569615944778785e-05, "loss": 9.5779, "step": 1215 }, { "epoch": 0.419454984477406, "grad_norm": 1.3424021005630493, "learning_rate": 6.564286661279549e-05, "loss": 9.5571, "step": 1216 }, { "epoch": 0.41979993101069335, "grad_norm": 1.2306984663009644, "learning_rate": 6.558955407126922e-05, "loss": 9.4602, "step": 1217 }, { "epoch": 0.42014487754398067, "grad_norm": 1.1734577417373657, "learning_rate": 6.553622189037099e-05, "loss": 9.516, "step": 1218 }, { "epoch": 0.42048982407726804, "grad_norm": 1.301382064819336, "learning_rate": 6.548287013728751e-05, "loss": 9.5368, "step": 1219 }, { "epoch": 0.42083477061055535, "grad_norm": 1.2797003984451294, "learning_rate": 6.54294988792301e-05, "loss": 9.5108, "step": 1220 }, { "epoch": 0.4211797171438427, "grad_norm": 1.4130553007125854, "learning_rate": 6.537610818343467e-05, "loss": 9.4416, "step": 1221 }, { "epoch": 0.42152466367713004, "grad_norm": 1.3803390264511108, "learning_rate": 6.532269811716165e-05, "loss": 9.4939, "step": 1222 }, { "epoch": 0.4218696102104174, "grad_norm": 1.276152491569519, "learning_rate": 6.526926874769584e-05, "loss": 9.5169, "step": 1223 }, { "epoch": 0.4222145567437047, "grad_norm": 1.2787901163101196, "learning_rate": 6.521582014234638e-05, "loss": 9.5251, "step": 1224 }, { "epoch": 0.42255950327699204, "grad_norm": 1.3120802640914917, "learning_rate": 6.51623523684466e-05, "loss": 9.4306, "step": 1225 }, { "epoch": 0.4229044498102794, "grad_norm": 1.2330889701843262, "learning_rate": 6.510886549335402e-05, "loss": 9.4904, "step": 1226 }, { "epoch": 0.42324939634356673, "grad_norm": 1.3258132934570312, "learning_rate": 6.505535958445023e-05, "loss": 9.4623, "step": 1227 }, { "epoch": 0.4235943428768541, "grad_norm": 1.3963433504104614, "learning_rate": 6.500183470914075e-05, "loss": 9.4231, "step": 1228 }, { "epoch": 0.4239392894101414, "grad_norm": 1.3144148588180542, "learning_rate": 6.494829093485504e-05, "loss": 9.4466, "step": 1229 }, { "epoch": 0.4242842359434288, "grad_norm": 1.3022875785827637, "learning_rate": 6.489472832904634e-05, "loss": 9.5031, "step": 1230 }, { "epoch": 0.4246291824767161, "grad_norm": 1.3690828084945679, "learning_rate": 6.484114695919161e-05, "loss": 9.5058, "step": 1231 }, { "epoch": 0.4249741290100035, "grad_norm": 1.3271464109420776, "learning_rate": 6.478754689279151e-05, "loss": 9.4568, "step": 1232 }, { "epoch": 0.4253190755432908, "grad_norm": 1.3607510328292847, "learning_rate": 6.473392819737017e-05, "loss": 9.5171, "step": 1233 }, { "epoch": 0.4256640220765781, "grad_norm": 1.3704109191894531, "learning_rate": 6.468029094047521e-05, "loss": 9.5053, "step": 1234 }, { "epoch": 0.4260089686098655, "grad_norm": 1.5395640134811401, "learning_rate": 6.46266351896777e-05, "loss": 9.4434, "step": 1235 }, { "epoch": 0.4263539151431528, "grad_norm": 1.3000792264938354, "learning_rate": 6.457296101257192e-05, "loss": 9.4975, "step": 1236 }, { "epoch": 0.42669886167644017, "grad_norm": 1.3487573862075806, "learning_rate": 6.45192684767754e-05, "loss": 9.5217, "step": 1237 }, { "epoch": 0.4270438082097275, "grad_norm": 1.372479796409607, "learning_rate": 6.44655576499288e-05, "loss": 9.4771, "step": 1238 }, { "epoch": 0.42738875474301485, "grad_norm": 1.3763235807418823, "learning_rate": 6.441182859969583e-05, "loss": 9.434, "step": 1239 }, { "epoch": 0.42773370127630217, "grad_norm": 1.475024938583374, "learning_rate": 6.435808139376313e-05, "loss": 9.3964, "step": 1240 }, { "epoch": 0.42807864780958954, "grad_norm": 1.5142197608947754, "learning_rate": 6.430431609984025e-05, "loss": 9.4465, "step": 1241 }, { "epoch": 0.42842359434287686, "grad_norm": 1.2984328269958496, "learning_rate": 6.425053278565949e-05, "loss": 9.4826, "step": 1242 }, { "epoch": 0.4287685408761642, "grad_norm": 1.619287371635437, "learning_rate": 6.419673151897587e-05, "loss": 9.4733, "step": 1243 }, { "epoch": 0.42911348740945154, "grad_norm": 1.4279025793075562, "learning_rate": 6.414291236756702e-05, "loss": 9.4634, "step": 1244 }, { "epoch": 0.42945843394273886, "grad_norm": 1.4935935735702515, "learning_rate": 6.408907539923314e-05, "loss": 9.4086, "step": 1245 }, { "epoch": 0.42980338047602623, "grad_norm": 1.606303095817566, "learning_rate": 6.403522068179679e-05, "loss": 9.4784, "step": 1246 }, { "epoch": 0.43014832700931355, "grad_norm": 1.5438969135284424, "learning_rate": 6.398134828310296e-05, "loss": 9.4186, "step": 1247 }, { "epoch": 0.4304932735426009, "grad_norm": 1.450445294380188, "learning_rate": 6.39274582710189e-05, "loss": 9.4346, "step": 1248 }, { "epoch": 0.43083822007588823, "grad_norm": 1.7464522123336792, "learning_rate": 6.387355071343405e-05, "loss": 9.3591, "step": 1249 }, { "epoch": 0.4311831666091756, "grad_norm": 1.8599432706832886, "learning_rate": 6.381962567825991e-05, "loss": 9.3323, "step": 1250 }, { "epoch": 0.4315281131424629, "grad_norm": 0.9142783284187317, "learning_rate": 6.37656832334301e-05, "loss": 9.5449, "step": 1251 }, { "epoch": 0.43187305967575024, "grad_norm": 1.044974446296692, "learning_rate": 6.371172344690004e-05, "loss": 9.5463, "step": 1252 }, { "epoch": 0.4322180062090376, "grad_norm": 1.0773589611053467, "learning_rate": 6.36577463866471e-05, "loss": 9.5608, "step": 1253 }, { "epoch": 0.4325629527423249, "grad_norm": 1.068940281867981, "learning_rate": 6.360375212067036e-05, "loss": 9.5209, "step": 1254 }, { "epoch": 0.4329078992756123, "grad_norm": 1.03683602809906, "learning_rate": 6.35497407169906e-05, "loss": 9.5599, "step": 1255 }, { "epoch": 0.4332528458088996, "grad_norm": 1.0739017724990845, "learning_rate": 6.349571224365018e-05, "loss": 9.5328, "step": 1256 }, { "epoch": 0.433597792342187, "grad_norm": 0.9906735420227051, "learning_rate": 6.344166676871295e-05, "loss": 9.5635, "step": 1257 }, { "epoch": 0.4339427388754743, "grad_norm": 1.1994469165802002, "learning_rate": 6.338760436026422e-05, "loss": 9.5827, "step": 1258 }, { "epoch": 0.4342876854087616, "grad_norm": 1.1381406784057617, "learning_rate": 6.333352508641058e-05, "loss": 9.5336, "step": 1259 }, { "epoch": 0.434632631942049, "grad_norm": 1.1975433826446533, "learning_rate": 6.327942901527989e-05, "loss": 9.5102, "step": 1260 }, { "epoch": 0.4349775784753363, "grad_norm": 1.1926690340042114, "learning_rate": 6.322531621502117e-05, "loss": 9.5124, "step": 1261 }, { "epoch": 0.4353225250086237, "grad_norm": 1.0483787059783936, "learning_rate": 6.317118675380455e-05, "loss": 9.5291, "step": 1262 }, { "epoch": 0.435667471541911, "grad_norm": 1.3115662336349487, "learning_rate": 6.311704069982108e-05, "loss": 9.5245, "step": 1263 }, { "epoch": 0.43601241807519836, "grad_norm": 1.1819570064544678, "learning_rate": 6.306287812128276e-05, "loss": 9.4733, "step": 1264 }, { "epoch": 0.4363573646084857, "grad_norm": 1.2581042051315308, "learning_rate": 6.30086990864224e-05, "loss": 9.5113, "step": 1265 }, { "epoch": 0.43670231114177305, "grad_norm": 1.2800958156585693, "learning_rate": 6.295450366349354e-05, "loss": 9.5554, "step": 1266 }, { "epoch": 0.43704725767506036, "grad_norm": 1.0594321489334106, "learning_rate": 6.290029192077035e-05, "loss": 9.5154, "step": 1267 }, { "epoch": 0.4373922042083477, "grad_norm": 1.2820416688919067, "learning_rate": 6.284606392654758e-05, "loss": 9.4685, "step": 1268 }, { "epoch": 0.43773715074163505, "grad_norm": 1.1895943880081177, "learning_rate": 6.279181974914045e-05, "loss": 9.4791, "step": 1269 }, { "epoch": 0.43808209727492237, "grad_norm": 1.3060154914855957, "learning_rate": 6.273755945688458e-05, "loss": 9.5066, "step": 1270 }, { "epoch": 0.43842704380820974, "grad_norm": 1.3100395202636719, "learning_rate": 6.268328311813582e-05, "loss": 9.4843, "step": 1271 }, { "epoch": 0.43877199034149705, "grad_norm": 1.3740859031677246, "learning_rate": 6.262899080127034e-05, "loss": 9.4552, "step": 1272 }, { "epoch": 0.4391169368747844, "grad_norm": 1.3276293277740479, "learning_rate": 6.257468257468438e-05, "loss": 9.5196, "step": 1273 }, { "epoch": 0.43946188340807174, "grad_norm": 1.3813064098358154, "learning_rate": 6.25203585067942e-05, "loss": 9.4719, "step": 1274 }, { "epoch": 0.4398068299413591, "grad_norm": 1.360579252243042, "learning_rate": 6.246601866603609e-05, "loss": 9.4658, "step": 1275 }, { "epoch": 0.44015177647464643, "grad_norm": 1.1985294818878174, "learning_rate": 6.241166312086614e-05, "loss": 9.4809, "step": 1276 }, { "epoch": 0.44049672300793374, "grad_norm": 1.4840415716171265, "learning_rate": 6.235729193976026e-05, "loss": 9.4197, "step": 1277 }, { "epoch": 0.4408416695412211, "grad_norm": 1.2561899423599243, "learning_rate": 6.230290519121405e-05, "loss": 9.5185, "step": 1278 }, { "epoch": 0.44118661607450843, "grad_norm": 1.4068700075149536, "learning_rate": 6.22485029437427e-05, "loss": 9.4594, "step": 1279 }, { "epoch": 0.4415315626077958, "grad_norm": 1.3950265645980835, "learning_rate": 6.219408526588098e-05, "loss": 9.4549, "step": 1280 }, { "epoch": 0.4418765091410831, "grad_norm": 1.3500123023986816, "learning_rate": 6.213965222618305e-05, "loss": 9.4758, "step": 1281 }, { "epoch": 0.4422214556743705, "grad_norm": 1.347732424736023, "learning_rate": 6.208520389322243e-05, "loss": 9.492, "step": 1282 }, { "epoch": 0.4425664022076578, "grad_norm": 1.3567559719085693, "learning_rate": 6.203074033559193e-05, "loss": 9.5828, "step": 1283 }, { "epoch": 0.4429113487409452, "grad_norm": 1.2305982112884521, "learning_rate": 6.197626162190351e-05, "loss": 9.5299, "step": 1284 }, { "epoch": 0.4432562952742325, "grad_norm": 1.3164910078048706, "learning_rate": 6.192176782078822e-05, "loss": 9.4837, "step": 1285 }, { "epoch": 0.4436012418075198, "grad_norm": 1.2801580429077148, "learning_rate": 6.186725900089617e-05, "loss": 9.4811, "step": 1286 }, { "epoch": 0.4439461883408072, "grad_norm": 1.5440186262130737, "learning_rate": 6.181273523089632e-05, "loss": 9.4587, "step": 1287 }, { "epoch": 0.4442911348740945, "grad_norm": 1.3695796728134155, "learning_rate": 6.175819657947655e-05, "loss": 9.5089, "step": 1288 }, { "epoch": 0.44463608140738187, "grad_norm": 1.256692886352539, "learning_rate": 6.170364311534336e-05, "loss": 9.4668, "step": 1289 }, { "epoch": 0.4449810279406692, "grad_norm": 1.450398564338684, "learning_rate": 6.164907490722205e-05, "loss": 9.4356, "step": 1290 }, { "epoch": 0.44532597447395655, "grad_norm": 1.5067511796951294, "learning_rate": 6.15944920238564e-05, "loss": 9.4279, "step": 1291 }, { "epoch": 0.44567092100724387, "grad_norm": 1.4862086772918701, "learning_rate": 6.15398945340087e-05, "loss": 9.4324, "step": 1292 }, { "epoch": 0.44601586754053124, "grad_norm": 1.4513452053070068, "learning_rate": 6.148528250645966e-05, "loss": 9.4995, "step": 1293 }, { "epoch": 0.44636081407381856, "grad_norm": 1.5199989080429077, "learning_rate": 6.14306560100083e-05, "loss": 9.44, "step": 1294 }, { "epoch": 0.4467057606071059, "grad_norm": 1.5350712537765503, "learning_rate": 6.137601511347185e-05, "loss": 9.4288, "step": 1295 }, { "epoch": 0.44705070714039324, "grad_norm": 1.4825901985168457, "learning_rate": 6.132135988568568e-05, "loss": 9.4414, "step": 1296 }, { "epoch": 0.44739565367368056, "grad_norm": 1.6072896718978882, "learning_rate": 6.126669039550325e-05, "loss": 9.4124, "step": 1297 }, { "epoch": 0.44774060020696793, "grad_norm": 1.5674636363983154, "learning_rate": 6.121200671179595e-05, "loss": 9.3757, "step": 1298 }, { "epoch": 0.44808554674025525, "grad_norm": 1.6910454034805298, "learning_rate": 6.115730890345305e-05, "loss": 9.3629, "step": 1299 }, { "epoch": 0.4484304932735426, "grad_norm": 1.6481505632400513, "learning_rate": 6.110259703938165e-05, "loss": 9.387, "step": 1300 }, { "epoch": 0.44877543980682993, "grad_norm": 0.8783053755760193, "learning_rate": 6.104787118850652e-05, "loss": 9.5823, "step": 1301 }, { "epoch": 0.4491203863401173, "grad_norm": 1.075404167175293, "learning_rate": 6.0993131419770076e-05, "loss": 9.5247, "step": 1302 }, { "epoch": 0.4494653328734046, "grad_norm": 1.0360864400863647, "learning_rate": 6.093837780213224e-05, "loss": 9.5537, "step": 1303 }, { "epoch": 0.44981027940669194, "grad_norm": 1.0971293449401855, "learning_rate": 6.08836104045704e-05, "loss": 9.5563, "step": 1304 }, { "epoch": 0.4501552259399793, "grad_norm": 1.074342966079712, "learning_rate": 6.082882929607931e-05, "loss": 9.5333, "step": 1305 }, { "epoch": 0.4505001724732666, "grad_norm": 1.1947624683380127, "learning_rate": 6.077403454567099e-05, "loss": 9.5231, "step": 1306 }, { "epoch": 0.450845119006554, "grad_norm": 1.0972341299057007, "learning_rate": 6.071922622237463e-05, "loss": 9.4951, "step": 1307 }, { "epoch": 0.4511900655398413, "grad_norm": 1.1745643615722656, "learning_rate": 6.066440439523653e-05, "loss": 9.4876, "step": 1308 }, { "epoch": 0.4515350120731287, "grad_norm": 1.2012512683868408, "learning_rate": 6.0609569133319996e-05, "loss": 9.5364, "step": 1309 }, { "epoch": 0.451879958606416, "grad_norm": 1.1683628559112549, "learning_rate": 6.0554720505705286e-05, "loss": 9.5097, "step": 1310 }, { "epoch": 0.45222490513970337, "grad_norm": 1.1659014225006104, "learning_rate": 6.049985858148948e-05, "loss": 9.5134, "step": 1311 }, { "epoch": 0.4525698516729907, "grad_norm": 1.2198443412780762, "learning_rate": 6.044498342978638e-05, "loss": 9.5175, "step": 1312 }, { "epoch": 0.452914798206278, "grad_norm": 1.1924607753753662, "learning_rate": 6.0390095119726484e-05, "loss": 9.5343, "step": 1313 }, { "epoch": 0.4532597447395654, "grad_norm": 1.3247007131576538, "learning_rate": 6.0335193720456876e-05, "loss": 9.5194, "step": 1314 }, { "epoch": 0.4536046912728527, "grad_norm": 1.2382389307022095, "learning_rate": 6.02802793011411e-05, "loss": 9.5108, "step": 1315 }, { "epoch": 0.45394963780614006, "grad_norm": 1.2551473379135132, "learning_rate": 6.0225351930959107e-05, "loss": 9.5181, "step": 1316 }, { "epoch": 0.4542945843394274, "grad_norm": 1.1353877782821655, "learning_rate": 6.0170411679107174e-05, "loss": 9.5048, "step": 1317 }, { "epoch": 0.45463953087271475, "grad_norm": 1.1790921688079834, "learning_rate": 6.011545861479783e-05, "loss": 9.5132, "step": 1318 }, { "epoch": 0.45498447740600206, "grad_norm": 1.091915249824524, "learning_rate": 6.006049280725966e-05, "loss": 9.5422, "step": 1319 }, { "epoch": 0.45532942393928943, "grad_norm": 1.1751124858856201, "learning_rate": 6.000551432573742e-05, "loss": 9.5443, "step": 1320 }, { "epoch": 0.45567437047257675, "grad_norm": 1.18860924243927, "learning_rate": 5.995052323949174e-05, "loss": 9.4933, "step": 1321 }, { "epoch": 0.45601931700586407, "grad_norm": 1.313167691230774, "learning_rate": 5.9895519617799175e-05, "loss": 9.5229, "step": 1322 }, { "epoch": 0.45636426353915144, "grad_norm": 1.3344849348068237, "learning_rate": 5.984050352995205e-05, "loss": 9.502, "step": 1323 }, { "epoch": 0.45670921007243875, "grad_norm": 1.2585493326187134, "learning_rate": 5.978547504525841e-05, "loss": 9.452, "step": 1324 }, { "epoch": 0.4570541566057261, "grad_norm": 1.2542619705200195, "learning_rate": 5.973043423304191e-05, "loss": 9.5105, "step": 1325 }, { "epoch": 0.45739910313901344, "grad_norm": 1.2945995330810547, "learning_rate": 5.967538116264174e-05, "loss": 9.4899, "step": 1326 }, { "epoch": 0.4577440496723008, "grad_norm": 1.3108856678009033, "learning_rate": 5.962031590341251e-05, "loss": 9.4645, "step": 1327 }, { "epoch": 0.45808899620558813, "grad_norm": 1.3480335474014282, "learning_rate": 5.956523852472422e-05, "loss": 9.5071, "step": 1328 }, { "epoch": 0.4584339427388755, "grad_norm": 1.3959459066390991, "learning_rate": 5.951014909596212e-05, "loss": 9.4515, "step": 1329 }, { "epoch": 0.4587788892721628, "grad_norm": 1.3636053800582886, "learning_rate": 5.9455047686526635e-05, "loss": 9.4722, "step": 1330 }, { "epoch": 0.45912383580545013, "grad_norm": 1.3342995643615723, "learning_rate": 5.93999343658333e-05, "loss": 9.4706, "step": 1331 }, { "epoch": 0.4594687823387375, "grad_norm": 1.3717411756515503, "learning_rate": 5.9344809203312626e-05, "loss": 9.4213, "step": 1332 }, { "epoch": 0.4598137288720248, "grad_norm": 1.3053313493728638, "learning_rate": 5.9289672268410076e-05, "loss": 9.4657, "step": 1333 }, { "epoch": 0.4601586754053122, "grad_norm": 1.3955119848251343, "learning_rate": 5.9234523630585936e-05, "loss": 9.5157, "step": 1334 }, { "epoch": 0.4605036219385995, "grad_norm": 1.3672175407409668, "learning_rate": 5.917936335931521e-05, "loss": 9.5097, "step": 1335 }, { "epoch": 0.4608485684718869, "grad_norm": 1.417806625366211, "learning_rate": 5.91241915240876e-05, "loss": 9.4413, "step": 1336 }, { "epoch": 0.4611935150051742, "grad_norm": 1.343322992324829, "learning_rate": 5.9069008194407324e-05, "loss": 9.48, "step": 1337 }, { "epoch": 0.46153846153846156, "grad_norm": 1.343604326248169, "learning_rate": 5.901381343979312e-05, "loss": 9.4752, "step": 1338 }, { "epoch": 0.4618834080717489, "grad_norm": 1.279022216796875, "learning_rate": 5.89586073297781e-05, "loss": 9.5199, "step": 1339 }, { "epoch": 0.4622283546050362, "grad_norm": 1.3533319234848022, "learning_rate": 5.8903389933909714e-05, "loss": 9.4999, "step": 1340 }, { "epoch": 0.46257330113832357, "grad_norm": 1.4877336025238037, "learning_rate": 5.884816132174956e-05, "loss": 9.4258, "step": 1341 }, { "epoch": 0.4629182476716109, "grad_norm": 1.4366827011108398, "learning_rate": 5.879292156287346e-05, "loss": 9.4265, "step": 1342 }, { "epoch": 0.46326319420489825, "grad_norm": 1.5778945684432983, "learning_rate": 5.8737670726871176e-05, "loss": 9.4417, "step": 1343 }, { "epoch": 0.46360814073818557, "grad_norm": 1.4845643043518066, "learning_rate": 5.868240888334653e-05, "loss": 9.4152, "step": 1344 }, { "epoch": 0.46395308727147294, "grad_norm": 1.4498002529144287, "learning_rate": 5.86271361019171e-05, "loss": 9.3937, "step": 1345 }, { "epoch": 0.46429803380476026, "grad_norm": 1.688463807106018, "learning_rate": 5.857185245221436e-05, "loss": 9.3969, "step": 1346 }, { "epoch": 0.46464298033804763, "grad_norm": 1.6296204328536987, "learning_rate": 5.851655800388338e-05, "loss": 9.3736, "step": 1347 }, { "epoch": 0.46498792687133494, "grad_norm": 1.6956126689910889, "learning_rate": 5.846125282658288e-05, "loss": 9.3915, "step": 1348 }, { "epoch": 0.46533287340462226, "grad_norm": 1.7350283861160278, "learning_rate": 5.8405936989985086e-05, "loss": 9.3848, "step": 1349 }, { "epoch": 0.46567781993790963, "grad_norm": 1.6955797672271729, "learning_rate": 5.835061056377567e-05, "loss": 9.401, "step": 1350 }, { "epoch": 0.46602276647119695, "grad_norm": 0.9491857290267944, "learning_rate": 5.82952736176536e-05, "loss": 9.5889, "step": 1351 }, { "epoch": 0.4663677130044843, "grad_norm": 1.0396230220794678, "learning_rate": 5.8239926221331134e-05, "loss": 9.5876, "step": 1352 }, { "epoch": 0.46671265953777163, "grad_norm": 0.9977867603302002, "learning_rate": 5.8184568444533697e-05, "loss": 9.5805, "step": 1353 }, { "epoch": 0.467057606071059, "grad_norm": 1.1935267448425293, "learning_rate": 5.8129200356999755e-05, "loss": 9.5101, "step": 1354 }, { "epoch": 0.4674025526043463, "grad_norm": 1.0658153295516968, "learning_rate": 5.8073822028480805e-05, "loss": 9.497, "step": 1355 }, { "epoch": 0.4677474991376337, "grad_norm": 1.0787256956100464, "learning_rate": 5.801843352874123e-05, "loss": 9.6134, "step": 1356 }, { "epoch": 0.468092445670921, "grad_norm": 1.1341609954833984, "learning_rate": 5.7963034927558235e-05, "loss": 9.5077, "step": 1357 }, { "epoch": 0.4684373922042083, "grad_norm": 1.1322368383407593, "learning_rate": 5.790762629472173e-05, "loss": 9.5009, "step": 1358 }, { "epoch": 0.4687823387374957, "grad_norm": 1.1739046573638916, "learning_rate": 5.785220770003424e-05, "loss": 9.5338, "step": 1359 }, { "epoch": 0.469127285270783, "grad_norm": 1.1165406703948975, "learning_rate": 5.7796779213310934e-05, "loss": 9.5175, "step": 1360 }, { "epoch": 0.4694722318040704, "grad_norm": 1.1558291912078857, "learning_rate": 5.774134090437934e-05, "loss": 9.5053, "step": 1361 }, { "epoch": 0.4698171783373577, "grad_norm": 1.215736746788025, "learning_rate": 5.768589284307939e-05, "loss": 9.4972, "step": 1362 }, { "epoch": 0.47016212487064507, "grad_norm": 1.3127442598342896, "learning_rate": 5.7630435099263356e-05, "loss": 9.514, "step": 1363 }, { "epoch": 0.4705070714039324, "grad_norm": 1.2573809623718262, "learning_rate": 5.757496774279565e-05, "loss": 9.5645, "step": 1364 }, { "epoch": 0.47085201793721976, "grad_norm": 1.3243647813796997, "learning_rate": 5.751949084355277e-05, "loss": 9.4822, "step": 1365 }, { "epoch": 0.4711969644705071, "grad_norm": 1.22898268699646, "learning_rate": 5.746400447142332e-05, "loss": 9.5129, "step": 1366 }, { "epoch": 0.4715419110037944, "grad_norm": 1.2787483930587769, "learning_rate": 5.7408508696307784e-05, "loss": 9.5467, "step": 1367 }, { "epoch": 0.47188685753708176, "grad_norm": 1.3485304117202759, "learning_rate": 5.7353003588118495e-05, "loss": 9.4727, "step": 1368 }, { "epoch": 0.4722318040703691, "grad_norm": 1.2806965112686157, "learning_rate": 5.729748921677953e-05, "loss": 9.5296, "step": 1369 }, { "epoch": 0.47257675060365645, "grad_norm": 1.2300639152526855, "learning_rate": 5.724196565222668e-05, "loss": 9.5279, "step": 1370 }, { "epoch": 0.47292169713694376, "grad_norm": 1.451492428779602, "learning_rate": 5.718643296440729e-05, "loss": 9.4898, "step": 1371 }, { "epoch": 0.47326664367023114, "grad_norm": 1.3249592781066895, "learning_rate": 5.713089122328019e-05, "loss": 9.5028, "step": 1372 }, { "epoch": 0.47361159020351845, "grad_norm": 1.3282641172409058, "learning_rate": 5.707534049881561e-05, "loss": 9.5387, "step": 1373 }, { "epoch": 0.4739565367368058, "grad_norm": 1.2661629915237427, "learning_rate": 5.7019780860995145e-05, "loss": 9.5194, "step": 1374 }, { "epoch": 0.47430148327009314, "grad_norm": 1.3013981580734253, "learning_rate": 5.696421237981156e-05, "loss": 9.4936, "step": 1375 }, { "epoch": 0.47464642980338045, "grad_norm": 1.4271470308303833, "learning_rate": 5.690863512526878e-05, "loss": 9.525, "step": 1376 }, { "epoch": 0.4749913763366678, "grad_norm": 1.246364712715149, "learning_rate": 5.685304916738182e-05, "loss": 9.4834, "step": 1377 }, { "epoch": 0.47533632286995514, "grad_norm": 1.4177308082580566, "learning_rate": 5.679745457617661e-05, "loss": 9.46, "step": 1378 }, { "epoch": 0.4756812694032425, "grad_norm": 1.1870930194854736, "learning_rate": 5.674185142168997e-05, "loss": 9.5407, "step": 1379 }, { "epoch": 0.47602621593652983, "grad_norm": 1.2858004570007324, "learning_rate": 5.668623977396952e-05, "loss": 9.5336, "step": 1380 }, { "epoch": 0.4763711624698172, "grad_norm": 1.3497929573059082, "learning_rate": 5.663061970307357e-05, "loss": 9.4715, "step": 1381 }, { "epoch": 0.4767161090031045, "grad_norm": 1.3225263357162476, "learning_rate": 5.6574991279071046e-05, "loss": 9.4959, "step": 1382 }, { "epoch": 0.4770610555363919, "grad_norm": 1.3493469953536987, "learning_rate": 5.6519354572041384e-05, "loss": 9.4836, "step": 1383 }, { "epoch": 0.4774060020696792, "grad_norm": 1.3390860557556152, "learning_rate": 5.646370965207447e-05, "loss": 9.5075, "step": 1384 }, { "epoch": 0.4777509486029665, "grad_norm": 1.4489787817001343, "learning_rate": 5.640805658927053e-05, "loss": 9.4673, "step": 1385 }, { "epoch": 0.4780958951362539, "grad_norm": 1.428164005279541, "learning_rate": 5.6352395453740045e-05, "loss": 9.4675, "step": 1386 }, { "epoch": 0.4784408416695412, "grad_norm": 1.3007261753082275, "learning_rate": 5.629672631560369e-05, "loss": 9.4735, "step": 1387 }, { "epoch": 0.4787857882028286, "grad_norm": 1.4828238487243652, "learning_rate": 5.6241049244992164e-05, "loss": 9.4761, "step": 1388 }, { "epoch": 0.4791307347361159, "grad_norm": 1.3790254592895508, "learning_rate": 5.618536431204624e-05, "loss": 9.5257, "step": 1389 }, { "epoch": 0.47947568126940326, "grad_norm": 1.4181640148162842, "learning_rate": 5.612967158691652e-05, "loss": 9.5113, "step": 1390 }, { "epoch": 0.4798206278026906, "grad_norm": 1.5841997861862183, "learning_rate": 5.607397113976347e-05, "loss": 9.4038, "step": 1391 }, { "epoch": 0.4801655743359779, "grad_norm": 1.487927794456482, "learning_rate": 5.601826304075728e-05, "loss": 9.4257, "step": 1392 }, { "epoch": 0.48051052086926527, "grad_norm": 1.5204216241836548, "learning_rate": 5.5962547360077745e-05, "loss": 9.3959, "step": 1393 }, { "epoch": 0.4808554674025526, "grad_norm": 1.4272792339324951, "learning_rate": 5.590682416791424e-05, "loss": 9.4667, "step": 1394 }, { "epoch": 0.48120041393583995, "grad_norm": 1.5055564641952515, "learning_rate": 5.5851093534465605e-05, "loss": 9.3947, "step": 1395 }, { "epoch": 0.48154536046912727, "grad_norm": 1.5691437721252441, "learning_rate": 5.579535552994005e-05, "loss": 9.4853, "step": 1396 }, { "epoch": 0.48189030700241464, "grad_norm": 1.537042260169983, "learning_rate": 5.573961022455505e-05, "loss": 9.421, "step": 1397 }, { "epoch": 0.48223525353570196, "grad_norm": 1.6870508193969727, "learning_rate": 5.568385768853732e-05, "loss": 9.4025, "step": 1398 }, { "epoch": 0.48258020006898933, "grad_norm": 1.7952243089675903, "learning_rate": 5.562809799212264e-05, "loss": 9.3092, "step": 1399 }, { "epoch": 0.48292514660227664, "grad_norm": 1.8603969812393188, "learning_rate": 5.5572331205555836e-05, "loss": 9.3861, "step": 1400 }, { "epoch": 0.48327009313556396, "grad_norm": 0.943221926689148, "learning_rate": 5.5516557399090665e-05, "loss": 9.5419, "step": 1401 }, { "epoch": 0.48361503966885133, "grad_norm": 0.949111819267273, "learning_rate": 5.546077664298972e-05, "loss": 9.545, "step": 1402 }, { "epoch": 0.48395998620213865, "grad_norm": 1.0729368925094604, "learning_rate": 5.5404989007524355e-05, "loss": 9.5456, "step": 1403 }, { "epoch": 0.484304932735426, "grad_norm": 1.070397138595581, "learning_rate": 5.534919456297457e-05, "loss": 9.5503, "step": 1404 }, { "epoch": 0.48464987926871334, "grad_norm": 1.0518831014633179, "learning_rate": 5.5293393379628975e-05, "loss": 9.5389, "step": 1405 }, { "epoch": 0.4849948258020007, "grad_norm": 1.099482536315918, "learning_rate": 5.5237585527784664e-05, "loss": 9.5556, "step": 1406 }, { "epoch": 0.485339772335288, "grad_norm": 1.1868844032287598, "learning_rate": 5.51817710777471e-05, "loss": 9.4733, "step": 1407 }, { "epoch": 0.4856847188685754, "grad_norm": 1.234670877456665, "learning_rate": 5.512595009983008e-05, "loss": 9.5143, "step": 1408 }, { "epoch": 0.4860296654018627, "grad_norm": 1.1355990171432495, "learning_rate": 5.507012266435564e-05, "loss": 9.5251, "step": 1409 }, { "epoch": 0.48637461193515, "grad_norm": 1.1389458179473877, "learning_rate": 5.501428884165393e-05, "loss": 9.5322, "step": 1410 }, { "epoch": 0.4867195584684374, "grad_norm": 1.123797059059143, "learning_rate": 5.495844870206315e-05, "loss": 9.5602, "step": 1411 }, { "epoch": 0.4870645050017247, "grad_norm": 1.1804533004760742, "learning_rate": 5.490260231592946e-05, "loss": 9.5132, "step": 1412 }, { "epoch": 0.4874094515350121, "grad_norm": 1.3132667541503906, "learning_rate": 5.4846749753606906e-05, "loss": 9.5615, "step": 1413 }, { "epoch": 0.4877543980682994, "grad_norm": 1.2434678077697754, "learning_rate": 5.4790891085457276e-05, "loss": 9.538, "step": 1414 }, { "epoch": 0.48809934460158677, "grad_norm": 1.2329378128051758, "learning_rate": 5.47350263818501e-05, "loss": 9.5578, "step": 1415 }, { "epoch": 0.4884442911348741, "grad_norm": 1.2569046020507812, "learning_rate": 5.4679155713162446e-05, "loss": 9.4817, "step": 1416 }, { "epoch": 0.48878923766816146, "grad_norm": 1.2540076971054077, "learning_rate": 5.462327914977896e-05, "loss": 9.605, "step": 1417 }, { "epoch": 0.4891341842014488, "grad_norm": 1.140594482421875, "learning_rate": 5.456739676209168e-05, "loss": 9.5552, "step": 1418 }, { "epoch": 0.4894791307347361, "grad_norm": 1.2399282455444336, "learning_rate": 5.451150862049999e-05, "loss": 9.5012, "step": 1419 }, { "epoch": 0.48982407726802346, "grad_norm": 1.388474941253662, "learning_rate": 5.445561479541054e-05, "loss": 9.4309, "step": 1420 }, { "epoch": 0.4901690238013108, "grad_norm": 1.2384589910507202, "learning_rate": 5.439971535723708e-05, "loss": 9.5436, "step": 1421 }, { "epoch": 0.49051397033459815, "grad_norm": 1.2547194957733154, "learning_rate": 5.4343810376400503e-05, "loss": 9.5297, "step": 1422 }, { "epoch": 0.49085891686788546, "grad_norm": 1.33177649974823, "learning_rate": 5.4287899923328664e-05, "loss": 9.4787, "step": 1423 }, { "epoch": 0.49120386340117284, "grad_norm": 1.2320342063903809, "learning_rate": 5.423198406845629e-05, "loss": 9.5077, "step": 1424 }, { "epoch": 0.49154880993446015, "grad_norm": 1.1713165044784546, "learning_rate": 5.417606288222491e-05, "loss": 9.5643, "step": 1425 }, { "epoch": 0.4918937564677475, "grad_norm": 1.2229870557785034, "learning_rate": 5.4120136435082826e-05, "loss": 9.516, "step": 1426 }, { "epoch": 0.49223870300103484, "grad_norm": 1.3608933687210083, "learning_rate": 5.406420479748489e-05, "loss": 9.4398, "step": 1427 }, { "epoch": 0.49258364953432215, "grad_norm": 1.3051090240478516, "learning_rate": 5.400826803989253e-05, "loss": 9.5392, "step": 1428 }, { "epoch": 0.4929285960676095, "grad_norm": 1.2316997051239014, "learning_rate": 5.3952326232773646e-05, "loss": 9.4952, "step": 1429 }, { "epoch": 0.49327354260089684, "grad_norm": 1.303128719329834, "learning_rate": 5.3896379446602476e-05, "loss": 9.4482, "step": 1430 }, { "epoch": 0.4936184891341842, "grad_norm": 1.3697431087493896, "learning_rate": 5.384042775185951e-05, "loss": 9.475, "step": 1431 }, { "epoch": 0.49396343566747153, "grad_norm": 1.3772562742233276, "learning_rate": 5.378447121903145e-05, "loss": 9.5066, "step": 1432 }, { "epoch": 0.4943083822007589, "grad_norm": 1.229060173034668, "learning_rate": 5.372850991861109e-05, "loss": 9.5655, "step": 1433 }, { "epoch": 0.4946533287340462, "grad_norm": 1.2789677381515503, "learning_rate": 5.3672543921097226e-05, "loss": 9.4798, "step": 1434 }, { "epoch": 0.4949982752673336, "grad_norm": 1.3277839422225952, "learning_rate": 5.3616573296994577e-05, "loss": 9.527, "step": 1435 }, { "epoch": 0.4953432218006209, "grad_norm": 1.2170019149780273, "learning_rate": 5.356059811681369e-05, "loss": 9.5078, "step": 1436 }, { "epoch": 0.4956881683339082, "grad_norm": 1.353895902633667, "learning_rate": 5.3504618451070833e-05, "loss": 9.4486, "step": 1437 }, { "epoch": 0.4960331148671956, "grad_norm": 1.350578784942627, "learning_rate": 5.344863437028794e-05, "loss": 9.4945, "step": 1438 }, { "epoch": 0.4963780614004829, "grad_norm": 1.6105198860168457, "learning_rate": 5.3392645944992495e-05, "loss": 9.4206, "step": 1439 }, { "epoch": 0.4967230079337703, "grad_norm": 1.5112483501434326, "learning_rate": 5.3336653245717494e-05, "loss": 9.3976, "step": 1440 }, { "epoch": 0.4970679544670576, "grad_norm": 1.5440597534179688, "learning_rate": 5.328065634300127e-05, "loss": 9.4787, "step": 1441 }, { "epoch": 0.49741290100034496, "grad_norm": 1.568060040473938, "learning_rate": 5.322465530738746e-05, "loss": 9.3947, "step": 1442 }, { "epoch": 0.4977578475336323, "grad_norm": 1.365394949913025, "learning_rate": 5.316865020942494e-05, "loss": 9.4426, "step": 1443 }, { "epoch": 0.49810279406691965, "grad_norm": 1.5042157173156738, "learning_rate": 5.311264111966767e-05, "loss": 9.4702, "step": 1444 }, { "epoch": 0.49844774060020697, "grad_norm": 1.4479701519012451, "learning_rate": 5.305662810867466e-05, "loss": 9.374, "step": 1445 }, { "epoch": 0.4987926871334943, "grad_norm": 1.5126973390579224, "learning_rate": 5.300061124700982e-05, "loss": 9.3831, "step": 1446 }, { "epoch": 0.49913763366678165, "grad_norm": 1.536891222000122, "learning_rate": 5.2944590605241986e-05, "loss": 9.4704, "step": 1447 }, { "epoch": 0.49948258020006897, "grad_norm": 1.661973476409912, "learning_rate": 5.288856625394467e-05, "loss": 9.3696, "step": 1448 }, { "epoch": 0.49982752673335634, "grad_norm": 1.7247661352157593, "learning_rate": 5.283253826369612e-05, "loss": 9.3604, "step": 1449 }, { "epoch": 0.5001724732666437, "grad_norm": 1.7713525295257568, "learning_rate": 5.277650670507915e-05, "loss": 9.3608, "step": 1450 }, { "epoch": 0.500517419799931, "grad_norm": 0.9689469337463379, "learning_rate": 5.272047164868106e-05, "loss": 9.5352, "step": 1451 }, { "epoch": 0.5008623663332183, "grad_norm": 1.0381879806518555, "learning_rate": 5.266443316509355e-05, "loss": 9.5648, "step": 1452 }, { "epoch": 0.5012073128665057, "grad_norm": 1.0764495134353638, "learning_rate": 5.260839132491266e-05, "loss": 9.5591, "step": 1453 }, { "epoch": 0.501552259399793, "grad_norm": 1.1121872663497925, "learning_rate": 5.255234619873866e-05, "loss": 9.5613, "step": 1454 }, { "epoch": 0.5018972059330804, "grad_norm": 1.153185486793518, "learning_rate": 5.249629785717592e-05, "loss": 9.5407, "step": 1455 }, { "epoch": 0.5022421524663677, "grad_norm": 1.0321553945541382, "learning_rate": 5.2440246370832914e-05, "loss": 9.5957, "step": 1456 }, { "epoch": 0.502587098999655, "grad_norm": 1.1872321367263794, "learning_rate": 5.238419181032204e-05, "loss": 9.4708, "step": 1457 }, { "epoch": 0.5029320455329424, "grad_norm": 1.1325219869613647, "learning_rate": 5.2328134246259594e-05, "loss": 9.5841, "step": 1458 }, { "epoch": 0.5032769920662298, "grad_norm": 1.1462233066558838, "learning_rate": 5.227207374926563e-05, "loss": 9.5935, "step": 1459 }, { "epoch": 0.5036219385995171, "grad_norm": 1.1690250635147095, "learning_rate": 5.2216010389963896e-05, "loss": 9.4704, "step": 1460 }, { "epoch": 0.5039668851328044, "grad_norm": 1.1512292623519897, "learning_rate": 5.2159944238981803e-05, "loss": 9.5424, "step": 1461 }, { "epoch": 0.5043118316660917, "grad_norm": 1.2307171821594238, "learning_rate": 5.2103875366950196e-05, "loss": 9.5497, "step": 1462 }, { "epoch": 0.504656778199379, "grad_norm": 1.2325085401535034, "learning_rate": 5.20478038445034e-05, "loss": 9.5028, "step": 1463 }, { "epoch": 0.5050017247326665, "grad_norm": 1.056069016456604, "learning_rate": 5.199172974227907e-05, "loss": 9.57, "step": 1464 }, { "epoch": 0.5053466712659538, "grad_norm": 1.256806492805481, "learning_rate": 5.1935653130918106e-05, "loss": 9.4998, "step": 1465 }, { "epoch": 0.5056916177992411, "grad_norm": 1.1562812328338623, "learning_rate": 5.187957408106456e-05, "loss": 9.4816, "step": 1466 }, { "epoch": 0.5060365643325284, "grad_norm": 1.1788362264633179, "learning_rate": 5.182349266336558e-05, "loss": 9.5193, "step": 1467 }, { "epoch": 0.5063815108658158, "grad_norm": 1.1777877807617188, "learning_rate": 5.1767408948471286e-05, "loss": 9.5475, "step": 1468 }, { "epoch": 0.5067264573991032, "grad_norm": 1.248210072517395, "learning_rate": 5.171132300703467e-05, "loss": 9.4653, "step": 1469 }, { "epoch": 0.5070714039323905, "grad_norm": 1.3637944459915161, "learning_rate": 5.1655234909711555e-05, "loss": 9.5111, "step": 1470 }, { "epoch": 0.5074163504656778, "grad_norm": 1.1688549518585205, "learning_rate": 5.15991447271605e-05, "loss": 9.5297, "step": 1471 }, { "epoch": 0.5077612969989651, "grad_norm": 1.4187387228012085, "learning_rate": 5.154305253004264e-05, "loss": 9.4879, "step": 1472 }, { "epoch": 0.5081062435322525, "grad_norm": 1.2831348180770874, "learning_rate": 5.1486958389021655e-05, "loss": 9.5302, "step": 1473 }, { "epoch": 0.5084511900655398, "grad_norm": 1.333694577217102, "learning_rate": 5.143086237476371e-05, "loss": 9.4627, "step": 1474 }, { "epoch": 0.5087961365988272, "grad_norm": 1.2403720617294312, "learning_rate": 5.137476455793732e-05, "loss": 9.5265, "step": 1475 }, { "epoch": 0.5091410831321145, "grad_norm": 1.301084041595459, "learning_rate": 5.131866500921325e-05, "loss": 9.5289, "step": 1476 }, { "epoch": 0.5094860296654019, "grad_norm": 1.443691372871399, "learning_rate": 5.1262563799264455e-05, "loss": 9.4393, "step": 1477 }, { "epoch": 0.5098309761986892, "grad_norm": 1.3539116382598877, "learning_rate": 5.120646099876598e-05, "loss": 9.4564, "step": 1478 }, { "epoch": 0.5101759227319765, "grad_norm": 1.3923438787460327, "learning_rate": 5.1150356678394905e-05, "loss": 9.4671, "step": 1479 }, { "epoch": 0.5105208692652639, "grad_norm": 1.301438331604004, "learning_rate": 5.109425090883019e-05, "loss": 9.493, "step": 1480 }, { "epoch": 0.5108658157985512, "grad_norm": 1.3289682865142822, "learning_rate": 5.10381437607526e-05, "loss": 9.5038, "step": 1481 }, { "epoch": 0.5112107623318386, "grad_norm": 1.3028764724731445, "learning_rate": 5.098203530484471e-05, "loss": 9.5781, "step": 1482 }, { "epoch": 0.5115557088651259, "grad_norm": 1.3720993995666504, "learning_rate": 5.092592561179069e-05, "loss": 9.4409, "step": 1483 }, { "epoch": 0.5119006553984132, "grad_norm": 1.3018594980239868, "learning_rate": 5.086981475227624e-05, "loss": 9.5002, "step": 1484 }, { "epoch": 0.5122456019317005, "grad_norm": 1.3004437685012817, "learning_rate": 5.0813702796988624e-05, "loss": 9.4889, "step": 1485 }, { "epoch": 0.512590548464988, "grad_norm": 1.32923424243927, "learning_rate": 5.07575898166164e-05, "loss": 9.4854, "step": 1486 }, { "epoch": 0.5129354949982753, "grad_norm": 1.6009782552719116, "learning_rate": 5.0701475881849444e-05, "loss": 9.4702, "step": 1487 }, { "epoch": 0.5132804415315626, "grad_norm": 1.4030383825302124, "learning_rate": 5.064536106337883e-05, "loss": 9.4459, "step": 1488 }, { "epoch": 0.5136253880648499, "grad_norm": 1.4751638174057007, "learning_rate": 5.058924543189677e-05, "loss": 9.3985, "step": 1489 }, { "epoch": 0.5139703345981372, "grad_norm": 1.4806643724441528, "learning_rate": 5.0533129058096484e-05, "loss": 9.4081, "step": 1490 }, { "epoch": 0.5143152811314247, "grad_norm": 1.6850110292434692, "learning_rate": 5.047701201267211e-05, "loss": 9.3762, "step": 1491 }, { "epoch": 0.514660227664712, "grad_norm": 1.448501467704773, "learning_rate": 5.042089436631867e-05, "loss": 9.3982, "step": 1492 }, { "epoch": 0.5150051741979993, "grad_norm": 1.3358256816864014, "learning_rate": 5.036477618973191e-05, "loss": 9.4275, "step": 1493 }, { "epoch": 0.5153501207312866, "grad_norm": 1.3124507665634155, "learning_rate": 5.030865755360826e-05, "loss": 9.4639, "step": 1494 }, { "epoch": 0.515695067264574, "grad_norm": 1.502692461013794, "learning_rate": 5.025253852864471e-05, "loss": 9.4533, "step": 1495 }, { "epoch": 0.5160400137978614, "grad_norm": 1.5912710428237915, "learning_rate": 5.019641918553877e-05, "loss": 9.3773, "step": 1496 }, { "epoch": 0.5163849603311487, "grad_norm": 1.5063811540603638, "learning_rate": 5.0140299594988315e-05, "loss": 9.4607, "step": 1497 }, { "epoch": 0.516729906864436, "grad_norm": 1.5693217515945435, "learning_rate": 5.0084179827691566e-05, "loss": 9.4344, "step": 1498 }, { "epoch": 0.5170748533977233, "grad_norm": 1.6469818353652954, "learning_rate": 5.0028059954346964e-05, "loss": 9.3941, "step": 1499 }, { "epoch": 0.5174197999310107, "grad_norm": 1.855391025543213, "learning_rate": 4.997194004565304e-05, "loss": 9.3555, "step": 1500 }, { "epoch": 0.517764746464298, "grad_norm": 0.9066408276557922, "learning_rate": 4.991582017230843e-05, "loss": 9.608, "step": 1501 }, { "epoch": 0.5181096929975854, "grad_norm": 0.9518599510192871, "learning_rate": 4.985970040501169e-05, "loss": 9.5632, "step": 1502 }, { "epoch": 0.5184546395308727, "grad_norm": 1.1453851461410522, "learning_rate": 4.980358081446125e-05, "loss": 9.5027, "step": 1503 }, { "epoch": 0.5187995860641601, "grad_norm": 1.0721421241760254, "learning_rate": 4.9747461471355306e-05, "loss": 9.5719, "step": 1504 }, { "epoch": 0.5191445325974474, "grad_norm": 1.0002890825271606, "learning_rate": 4.969134244639176e-05, "loss": 9.5454, "step": 1505 }, { "epoch": 0.5194894791307347, "grad_norm": 1.0482701063156128, "learning_rate": 4.9635223810268104e-05, "loss": 9.4886, "step": 1506 }, { "epoch": 0.519834425664022, "grad_norm": 1.034656286239624, "learning_rate": 4.957910563368133e-05, "loss": 9.6031, "step": 1507 }, { "epoch": 0.5201793721973094, "grad_norm": 1.1927002668380737, "learning_rate": 4.9522987987327886e-05, "loss": 9.5221, "step": 1508 }, { "epoch": 0.5205243187305968, "grad_norm": 1.0671268701553345, "learning_rate": 4.946687094190353e-05, "loss": 9.5753, "step": 1509 }, { "epoch": 0.5208692652638841, "grad_norm": 1.1840550899505615, "learning_rate": 4.941075456810324e-05, "loss": 9.5308, "step": 1510 }, { "epoch": 0.5212142117971714, "grad_norm": 1.0830540657043457, "learning_rate": 4.9354638936621177e-05, "loss": 9.5537, "step": 1511 }, { "epoch": 0.5215591583304587, "grad_norm": 1.1524994373321533, "learning_rate": 4.929852411815058e-05, "loss": 9.5055, "step": 1512 }, { "epoch": 0.5219041048637462, "grad_norm": 1.2321434020996094, "learning_rate": 4.9242410183383624e-05, "loss": 9.478, "step": 1513 }, { "epoch": 0.5222490513970335, "grad_norm": 1.2240982055664062, "learning_rate": 4.9186297203011374e-05, "loss": 9.4934, "step": 1514 }, { "epoch": 0.5225939979303208, "grad_norm": 1.1383882761001587, "learning_rate": 4.913018524772375e-05, "loss": 9.5314, "step": 1515 }, { "epoch": 0.5229389444636081, "grad_norm": 1.1765910387039185, "learning_rate": 4.907407438820932e-05, "loss": 9.5152, "step": 1516 }, { "epoch": 0.5232838909968954, "grad_norm": 1.3054953813552856, "learning_rate": 4.90179646951553e-05, "loss": 9.4555, "step": 1517 }, { "epoch": 0.5236288375301829, "grad_norm": 1.2543056011199951, "learning_rate": 4.8961856239247406e-05, "loss": 9.5529, "step": 1518 }, { "epoch": 0.5239737840634702, "grad_norm": 1.239845633506775, "learning_rate": 4.890574909116984e-05, "loss": 9.5519, "step": 1519 }, { "epoch": 0.5243187305967575, "grad_norm": 1.1929110288619995, "learning_rate": 4.8849643321605107e-05, "loss": 9.526, "step": 1520 }, { "epoch": 0.5246636771300448, "grad_norm": 1.391808032989502, "learning_rate": 4.8793539001234025e-05, "loss": 9.4912, "step": 1521 }, { "epoch": 0.5250086236633322, "grad_norm": 1.3413889408111572, "learning_rate": 4.8737436200735564e-05, "loss": 9.4721, "step": 1522 }, { "epoch": 0.5253535701966195, "grad_norm": 1.3500343561172485, "learning_rate": 4.868133499078676e-05, "loss": 9.4887, "step": 1523 }, { "epoch": 0.5256985167299069, "grad_norm": 1.2600687742233276, "learning_rate": 4.8625235442062686e-05, "loss": 9.551, "step": 1524 }, { "epoch": 0.5260434632631942, "grad_norm": 1.4446165561676025, "learning_rate": 4.85691376252363e-05, "loss": 9.4995, "step": 1525 }, { "epoch": 0.5263884097964815, "grad_norm": 1.2388883829116821, "learning_rate": 4.851304161097836e-05, "loss": 9.5027, "step": 1526 }, { "epoch": 0.5267333563297689, "grad_norm": 1.4213436841964722, "learning_rate": 4.8456947469957395e-05, "loss": 9.4535, "step": 1527 }, { "epoch": 0.5270783028630562, "grad_norm": 1.3508599996566772, "learning_rate": 4.84008552728395e-05, "loss": 9.4191, "step": 1528 }, { "epoch": 0.5274232493963436, "grad_norm": 1.305248737335205, "learning_rate": 4.8344765090288436e-05, "loss": 9.4776, "step": 1529 }, { "epoch": 0.5277681959296309, "grad_norm": 1.4581810235977173, "learning_rate": 4.828867699296534e-05, "loss": 9.4615, "step": 1530 }, { "epoch": 0.5281131424629183, "grad_norm": 1.2803398370742798, "learning_rate": 4.823259105152873e-05, "loss": 9.5474, "step": 1531 }, { "epoch": 0.5284580889962056, "grad_norm": 1.3474336862564087, "learning_rate": 4.8176507336634435e-05, "loss": 9.4794, "step": 1532 }, { "epoch": 0.5288030355294929, "grad_norm": 1.5129001140594482, "learning_rate": 4.812042591893545e-05, "loss": 9.3918, "step": 1533 }, { "epoch": 0.5291479820627802, "grad_norm": 1.2152923345565796, "learning_rate": 4.806434686908191e-05, "loss": 9.5461, "step": 1534 }, { "epoch": 0.5294929285960676, "grad_norm": 1.4519068002700806, "learning_rate": 4.800827025772094e-05, "loss": 9.4692, "step": 1535 }, { "epoch": 0.529837875129355, "grad_norm": 1.441679835319519, "learning_rate": 4.79521961554966e-05, "loss": 9.4283, "step": 1536 }, { "epoch": 0.5301828216626423, "grad_norm": 1.451649785041809, "learning_rate": 4.789612463304981e-05, "loss": 9.3841, "step": 1537 }, { "epoch": 0.5305277681959296, "grad_norm": 1.3632525205612183, "learning_rate": 4.784005576101821e-05, "loss": 9.5185, "step": 1538 }, { "epoch": 0.5308727147292169, "grad_norm": 1.319543719291687, "learning_rate": 4.778398961003611e-05, "loss": 9.5255, "step": 1539 }, { "epoch": 0.5312176612625044, "grad_norm": 1.4514024257659912, "learning_rate": 4.7727926250734396e-05, "loss": 9.5116, "step": 1540 }, { "epoch": 0.5315626077957917, "grad_norm": 1.3405132293701172, "learning_rate": 4.767186575374043e-05, "loss": 9.4577, "step": 1541 }, { "epoch": 0.531907554329079, "grad_norm": 1.5354069471359253, "learning_rate": 4.761580818967796e-05, "loss": 9.4914, "step": 1542 }, { "epoch": 0.5322525008623663, "grad_norm": 1.3511700630187988, "learning_rate": 4.755975362916709e-05, "loss": 9.4602, "step": 1543 }, { "epoch": 0.5325974473956536, "grad_norm": 1.4528660774230957, "learning_rate": 4.750370214282409e-05, "loss": 9.403, "step": 1544 }, { "epoch": 0.532942393928941, "grad_norm": 1.4760255813598633, "learning_rate": 4.7447653801261355e-05, "loss": 9.402, "step": 1545 }, { "epoch": 0.5332873404622284, "grad_norm": 1.5060619115829468, "learning_rate": 4.739160867508735e-05, "loss": 9.3647, "step": 1546 }, { "epoch": 0.5336322869955157, "grad_norm": 1.6048104763031006, "learning_rate": 4.7335566834906466e-05, "loss": 9.3865, "step": 1547 }, { "epoch": 0.533977233528803, "grad_norm": 1.4714221954345703, "learning_rate": 4.727952835131897e-05, "loss": 9.455, "step": 1548 }, { "epoch": 0.5343221800620904, "grad_norm": 1.62465500831604, "learning_rate": 4.7223493294920853e-05, "loss": 9.3758, "step": 1549 }, { "epoch": 0.5346671265953777, "grad_norm": 1.764082908630371, "learning_rate": 4.716746173630388e-05, "loss": 9.3348, "step": 1550 }, { "epoch": 0.535012073128665, "grad_norm": 0.9597568511962891, "learning_rate": 4.711143374605534e-05, "loss": 9.5354, "step": 1551 }, { "epoch": 0.5353570196619524, "grad_norm": 1.048038125038147, "learning_rate": 4.705540939475803e-05, "loss": 9.5423, "step": 1552 }, { "epoch": 0.5357019661952397, "grad_norm": 1.1174081563949585, "learning_rate": 4.699938875299019e-05, "loss": 9.4938, "step": 1553 }, { "epoch": 0.5360469127285271, "grad_norm": 1.1701761484146118, "learning_rate": 4.694337189132537e-05, "loss": 9.5567, "step": 1554 }, { "epoch": 0.5363918592618144, "grad_norm": 1.0438191890716553, "learning_rate": 4.6887358880332346e-05, "loss": 9.5207, "step": 1555 }, { "epoch": 0.5367368057951017, "grad_norm": 1.1834765672683716, "learning_rate": 4.683134979057507e-05, "loss": 9.4953, "step": 1556 }, { "epoch": 0.5370817523283891, "grad_norm": 1.191499948501587, "learning_rate": 4.677534469261254e-05, "loss": 9.5511, "step": 1557 }, { "epoch": 0.5374266988616765, "grad_norm": 1.1501351594924927, "learning_rate": 4.671934365699874e-05, "loss": 9.5603, "step": 1558 }, { "epoch": 0.5377716453949638, "grad_norm": 1.171834111213684, "learning_rate": 4.666334675428252e-05, "loss": 9.5079, "step": 1559 }, { "epoch": 0.5381165919282511, "grad_norm": 1.2038573026657104, "learning_rate": 4.660735405500751e-05, "loss": 9.5096, "step": 1560 }, { "epoch": 0.5384615384615384, "grad_norm": 1.138796091079712, "learning_rate": 4.655136562971208e-05, "loss": 9.5425, "step": 1561 }, { "epoch": 0.5388064849948258, "grad_norm": 1.2308675050735474, "learning_rate": 4.649538154892919e-05, "loss": 9.5467, "step": 1562 }, { "epoch": 0.5391514315281132, "grad_norm": 1.1142117977142334, "learning_rate": 4.643940188318631e-05, "loss": 9.5166, "step": 1563 }, { "epoch": 0.5394963780614005, "grad_norm": 1.2303515672683716, "learning_rate": 4.638342670300542e-05, "loss": 9.5361, "step": 1564 }, { "epoch": 0.5398413245946878, "grad_norm": 1.2311829328536987, "learning_rate": 4.632745607890278e-05, "loss": 9.4819, "step": 1565 }, { "epoch": 0.5401862711279751, "grad_norm": 1.3217933177947998, "learning_rate": 4.627149008138892e-05, "loss": 9.5043, "step": 1566 }, { "epoch": 0.5405312176612626, "grad_norm": 1.1946293115615845, "learning_rate": 4.621552878096857e-05, "loss": 9.5427, "step": 1567 }, { "epoch": 0.5408761641945499, "grad_norm": 1.326340675354004, "learning_rate": 4.615957224814051e-05, "loss": 9.4889, "step": 1568 }, { "epoch": 0.5412211107278372, "grad_norm": 1.1464662551879883, "learning_rate": 4.6103620553397556e-05, "loss": 9.6259, "step": 1569 }, { "epoch": 0.5415660572611245, "grad_norm": 1.2796909809112549, "learning_rate": 4.604767376722635e-05, "loss": 9.4796, "step": 1570 }, { "epoch": 0.5419110037944118, "grad_norm": 1.395712971687317, "learning_rate": 4.599173196010747e-05, "loss": 9.507, "step": 1571 }, { "epoch": 0.5422559503276992, "grad_norm": 1.3465749025344849, "learning_rate": 4.593579520251512e-05, "loss": 9.556, "step": 1572 }, { "epoch": 0.5426008968609866, "grad_norm": 1.215400218963623, "learning_rate": 4.587986356491719e-05, "loss": 9.5338, "step": 1573 }, { "epoch": 0.5429458433942739, "grad_norm": 1.1242035627365112, "learning_rate": 4.58239371177751e-05, "loss": 9.564, "step": 1574 }, { "epoch": 0.5432907899275612, "grad_norm": 1.254112720489502, "learning_rate": 4.5768015931543736e-05, "loss": 9.5347, "step": 1575 }, { "epoch": 0.5436357364608486, "grad_norm": 1.2248470783233643, "learning_rate": 4.571210007667135e-05, "loss": 9.4812, "step": 1576 }, { "epoch": 0.5439806829941359, "grad_norm": 1.480251669883728, "learning_rate": 4.56561896235995e-05, "loss": 9.4632, "step": 1577 }, { "epoch": 0.5443256295274232, "grad_norm": 1.379051685333252, "learning_rate": 4.560028464276293e-05, "loss": 9.4717, "step": 1578 }, { "epoch": 0.5446705760607106, "grad_norm": 1.3023099899291992, "learning_rate": 4.554438520458948e-05, "loss": 9.5061, "step": 1579 }, { "epoch": 0.5450155225939979, "grad_norm": 1.3963764905929565, "learning_rate": 4.548849137950001e-05, "loss": 9.5005, "step": 1580 }, { "epoch": 0.5453604691272853, "grad_norm": 1.3574894666671753, "learning_rate": 4.543260323790833e-05, "loss": 9.4728, "step": 1581 }, { "epoch": 0.5457054156605726, "grad_norm": 1.3623203039169312, "learning_rate": 4.5376720850221054e-05, "loss": 9.451, "step": 1582 }, { "epoch": 0.5460503621938599, "grad_norm": 1.5018421411514282, "learning_rate": 4.532084428683757e-05, "loss": 9.4535, "step": 1583 }, { "epoch": 0.5463953087271473, "grad_norm": 1.3744196891784668, "learning_rate": 4.526497361814991e-05, "loss": 9.4854, "step": 1584 }, { "epoch": 0.5467402552604347, "grad_norm": 1.177968144416809, "learning_rate": 4.520910891454272e-05, "loss": 9.5429, "step": 1585 }, { "epoch": 0.547085201793722, "grad_norm": 1.4899221658706665, "learning_rate": 4.51532502463931e-05, "loss": 9.4118, "step": 1586 }, { "epoch": 0.5474301483270093, "grad_norm": 1.3041064739227295, "learning_rate": 4.509739768407054e-05, "loss": 9.484, "step": 1587 }, { "epoch": 0.5477750948602966, "grad_norm": 1.3999149799346924, "learning_rate": 4.5041551297936865e-05, "loss": 9.4015, "step": 1588 }, { "epoch": 0.548120041393584, "grad_norm": 1.323775053024292, "learning_rate": 4.498571115834608e-05, "loss": 9.4503, "step": 1589 }, { "epoch": 0.5484649879268714, "grad_norm": 1.481468915939331, "learning_rate": 4.4929877335644376e-05, "loss": 9.4589, "step": 1590 }, { "epoch": 0.5488099344601587, "grad_norm": 1.4510349035263062, "learning_rate": 4.487404990016993e-05, "loss": 9.4337, "step": 1591 }, { "epoch": 0.549154880993446, "grad_norm": 1.4892961978912354, "learning_rate": 4.4818228922252916e-05, "loss": 9.4147, "step": 1592 }, { "epoch": 0.5494998275267333, "grad_norm": 1.451235055923462, "learning_rate": 4.4762414472215354e-05, "loss": 9.4324, "step": 1593 }, { "epoch": 0.5498447740600207, "grad_norm": 1.5764260292053223, "learning_rate": 4.4706606620371037e-05, "loss": 9.4358, "step": 1594 }, { "epoch": 0.5501897205933081, "grad_norm": 1.3623017072677612, "learning_rate": 4.4650805437025446e-05, "loss": 9.4655, "step": 1595 }, { "epoch": 0.5505346671265954, "grad_norm": 1.5152086019515991, "learning_rate": 4.459501099247567e-05, "loss": 9.4347, "step": 1596 }, { "epoch": 0.5508796136598827, "grad_norm": 1.5371122360229492, "learning_rate": 4.4539223357010304e-05, "loss": 9.3836, "step": 1597 }, { "epoch": 0.55122456019317, "grad_norm": 1.666603922843933, "learning_rate": 4.4483442600909333e-05, "loss": 9.3984, "step": 1598 }, { "epoch": 0.5515695067264574, "grad_norm": 1.5026121139526367, "learning_rate": 4.442766879444417e-05, "loss": 9.4533, "step": 1599 }, { "epoch": 0.5519144532597448, "grad_norm": 1.6777715682983398, "learning_rate": 4.437190200787737e-05, "loss": 9.3604, "step": 1600 }, { "epoch": 0.5522593997930321, "grad_norm": 1.0869591236114502, "learning_rate": 4.431614231146269e-05, "loss": 9.5396, "step": 1601 }, { "epoch": 0.5526043463263194, "grad_norm": 1.0521719455718994, "learning_rate": 4.426038977544496e-05, "loss": 9.5658, "step": 1602 }, { "epoch": 0.5529492928596068, "grad_norm": 1.0579216480255127, "learning_rate": 4.420464447005997e-05, "loss": 9.5042, "step": 1603 }, { "epoch": 0.5532942393928941, "grad_norm": 1.0266507863998413, "learning_rate": 4.414890646553442e-05, "loss": 9.5387, "step": 1604 }, { "epoch": 0.5536391859261814, "grad_norm": 1.0023735761642456, "learning_rate": 4.409317583208576e-05, "loss": 9.5934, "step": 1605 }, { "epoch": 0.5539841324594688, "grad_norm": 1.1149392127990723, "learning_rate": 4.403745263992227e-05, "loss": 9.563, "step": 1606 }, { "epoch": 0.5543290789927561, "grad_norm": 1.165576457977295, "learning_rate": 4.3981736959242734e-05, "loss": 9.5337, "step": 1607 }, { "epoch": 0.5546740255260435, "grad_norm": 1.1478943824768066, "learning_rate": 4.392602886023653e-05, "loss": 9.5396, "step": 1608 }, { "epoch": 0.5550189720593308, "grad_norm": 1.1021052598953247, "learning_rate": 4.38703284130835e-05, "loss": 9.5555, "step": 1609 }, { "epoch": 0.5553639185926181, "grad_norm": 1.2310773134231567, "learning_rate": 4.381463568795378e-05, "loss": 9.4717, "step": 1610 }, { "epoch": 0.5557088651259054, "grad_norm": 1.1044448614120483, "learning_rate": 4.375895075500784e-05, "loss": 9.5253, "step": 1611 }, { "epoch": 0.5560538116591929, "grad_norm": 1.3454209566116333, "learning_rate": 4.370327368439633e-05, "loss": 9.48, "step": 1612 }, { "epoch": 0.5563987581924802, "grad_norm": 1.3490056991577148, "learning_rate": 4.364760454625997e-05, "loss": 9.5315, "step": 1613 }, { "epoch": 0.5567437047257675, "grad_norm": 1.1780604124069214, "learning_rate": 4.359194341072948e-05, "loss": 9.5037, "step": 1614 }, { "epoch": 0.5570886512590548, "grad_norm": 1.2844969034194946, "learning_rate": 4.3536290347925544e-05, "loss": 9.4968, "step": 1615 }, { "epoch": 0.5574335977923421, "grad_norm": 1.2029293775558472, "learning_rate": 4.3480645427958634e-05, "loss": 9.5103, "step": 1616 }, { "epoch": 0.5577785443256296, "grad_norm": 1.1009153127670288, "learning_rate": 4.342500872092897e-05, "loss": 9.5644, "step": 1617 }, { "epoch": 0.5581234908589169, "grad_norm": 1.240302562713623, "learning_rate": 4.336938029692645e-05, "loss": 9.546, "step": 1618 }, { "epoch": 0.5584684373922042, "grad_norm": 1.3886947631835938, "learning_rate": 4.331376022603048e-05, "loss": 9.4193, "step": 1619 }, { "epoch": 0.5588133839254915, "grad_norm": 1.3278820514678955, "learning_rate": 4.3258148578310033e-05, "loss": 9.5184, "step": 1620 }, { "epoch": 0.5591583304587789, "grad_norm": 1.2009872198104858, "learning_rate": 4.3202545423823406e-05, "loss": 9.5146, "step": 1621 }, { "epoch": 0.5595032769920663, "grad_norm": 1.2812772989273071, "learning_rate": 4.31469508326182e-05, "loss": 9.5334, "step": 1622 }, { "epoch": 0.5598482235253536, "grad_norm": 1.3486807346343994, "learning_rate": 4.309136487473122e-05, "loss": 9.4973, "step": 1623 }, { "epoch": 0.5601931700586409, "grad_norm": 1.192238211631775, "learning_rate": 4.303578762018846e-05, "loss": 9.5171, "step": 1624 }, { "epoch": 0.5605381165919282, "grad_norm": 1.329584002494812, "learning_rate": 4.298021913900488e-05, "loss": 9.5528, "step": 1625 }, { "epoch": 0.5608830631252156, "grad_norm": 1.3967713117599487, "learning_rate": 4.292465950118439e-05, "loss": 9.5038, "step": 1626 }, { "epoch": 0.561228009658503, "grad_norm": 1.1745870113372803, "learning_rate": 4.286910877671982e-05, "loss": 9.5121, "step": 1627 }, { "epoch": 0.5615729561917903, "grad_norm": 1.3389042615890503, "learning_rate": 4.281356703559271e-05, "loss": 9.4378, "step": 1628 }, { "epoch": 0.5619179027250776, "grad_norm": 1.3878090381622314, "learning_rate": 4.275803434777332e-05, "loss": 9.4766, "step": 1629 }, { "epoch": 0.562262849258365, "grad_norm": 1.4438650608062744, "learning_rate": 4.270251078322048e-05, "loss": 9.3935, "step": 1630 }, { "epoch": 0.5626077957916523, "grad_norm": 1.3266236782073975, "learning_rate": 4.264699641188153e-05, "loss": 9.5028, "step": 1631 }, { "epoch": 0.5629527423249396, "grad_norm": 1.347325086593628, "learning_rate": 4.259149130369224e-05, "loss": 9.4892, "step": 1632 }, { "epoch": 0.563297688858227, "grad_norm": 1.3031338453292847, "learning_rate": 4.253599552857668e-05, "loss": 9.5652, "step": 1633 }, { "epoch": 0.5636426353915143, "grad_norm": 1.4257103204727173, "learning_rate": 4.2480509156447235e-05, "loss": 9.4456, "step": 1634 }, { "epoch": 0.5639875819248017, "grad_norm": 1.4598137140274048, "learning_rate": 4.242503225720437e-05, "loss": 9.4028, "step": 1635 }, { "epoch": 0.564332528458089, "grad_norm": 1.364773154258728, "learning_rate": 4.236956490073665e-05, "loss": 9.4691, "step": 1636 }, { "epoch": 0.5646774749913763, "grad_norm": 1.4265398979187012, "learning_rate": 4.2314107156920613e-05, "loss": 9.519, "step": 1637 }, { "epoch": 0.5650224215246636, "grad_norm": 1.3014031648635864, "learning_rate": 4.2258659095620686e-05, "loss": 9.4917, "step": 1638 }, { "epoch": 0.5653673680579511, "grad_norm": 1.33185613155365, "learning_rate": 4.220322078668909e-05, "loss": 9.4324, "step": 1639 }, { "epoch": 0.5657123145912384, "grad_norm": 1.449734091758728, "learning_rate": 4.214779229996575e-05, "loss": 9.4531, "step": 1640 }, { "epoch": 0.5660572611245257, "grad_norm": 1.4314883947372437, "learning_rate": 4.209237370527828e-05, "loss": 9.4829, "step": 1641 }, { "epoch": 0.566402207657813, "grad_norm": 1.4500685930252075, "learning_rate": 4.203696507244177e-05, "loss": 9.4823, "step": 1642 }, { "epoch": 0.5667471541911003, "grad_norm": 1.4503802061080933, "learning_rate": 4.198156647125877e-05, "loss": 9.4211, "step": 1643 }, { "epoch": 0.5670921007243878, "grad_norm": 1.5186033248901367, "learning_rate": 4.192617797151921e-05, "loss": 9.3657, "step": 1644 }, { "epoch": 0.5674370472576751, "grad_norm": 1.5139539241790771, "learning_rate": 4.187079964300026e-05, "loss": 9.4758, "step": 1645 }, { "epoch": 0.5677819937909624, "grad_norm": 1.5969244241714478, "learning_rate": 4.181543155546633e-05, "loss": 9.4532, "step": 1646 }, { "epoch": 0.5681269403242497, "grad_norm": 1.6448180675506592, "learning_rate": 4.176007377866888e-05, "loss": 9.3931, "step": 1647 }, { "epoch": 0.5684718868575371, "grad_norm": 1.6749922037124634, "learning_rate": 4.170472638234641e-05, "loss": 9.3962, "step": 1648 }, { "epoch": 0.5688168333908245, "grad_norm": 1.5745540857315063, "learning_rate": 4.164938943622434e-05, "loss": 9.4104, "step": 1649 }, { "epoch": 0.5691617799241118, "grad_norm": 1.7174612283706665, "learning_rate": 4.159406301001492e-05, "loss": 9.441, "step": 1650 }, { "epoch": 0.5695067264573991, "grad_norm": 0.7641019821166992, "learning_rate": 4.153874717341713e-05, "loss": 9.5972, "step": 1651 }, { "epoch": 0.5698516729906864, "grad_norm": 0.890247642993927, "learning_rate": 4.148344199611664e-05, "loss": 9.6203, "step": 1652 }, { "epoch": 0.5701966195239738, "grad_norm": 1.0681867599487305, "learning_rate": 4.142814754778566e-05, "loss": 9.5632, "step": 1653 }, { "epoch": 0.5705415660572611, "grad_norm": 1.0469846725463867, "learning_rate": 4.1372863898082895e-05, "loss": 9.533, "step": 1654 }, { "epoch": 0.5708865125905485, "grad_norm": 1.002163052558899, "learning_rate": 4.131759111665349e-05, "loss": 9.5788, "step": 1655 }, { "epoch": 0.5712314591238358, "grad_norm": 1.1065441370010376, "learning_rate": 4.126232927312883e-05, "loss": 9.5626, "step": 1656 }, { "epoch": 0.5715764056571232, "grad_norm": 1.079107642173767, "learning_rate": 4.120707843712656e-05, "loss": 9.4588, "step": 1657 }, { "epoch": 0.5719213521904105, "grad_norm": 1.0924365520477295, "learning_rate": 4.1151838678250443e-05, "loss": 9.5678, "step": 1658 }, { "epoch": 0.5722662987236978, "grad_norm": 1.1700769662857056, "learning_rate": 4.1096610066090304e-05, "loss": 9.5389, "step": 1659 }, { "epoch": 0.5726112452569851, "grad_norm": 1.1450729370117188, "learning_rate": 4.104139267022191e-05, "loss": 9.4952, "step": 1660 }, { "epoch": 0.5729561917902725, "grad_norm": 1.2342902421951294, "learning_rate": 4.098618656020688e-05, "loss": 9.4941, "step": 1661 }, { "epoch": 0.5733011383235599, "grad_norm": 1.2648087739944458, "learning_rate": 4.093099180559268e-05, "loss": 9.537, "step": 1662 }, { "epoch": 0.5736460848568472, "grad_norm": 1.2142956256866455, "learning_rate": 4.0875808475912415e-05, "loss": 9.5433, "step": 1663 }, { "epoch": 0.5739910313901345, "grad_norm": 1.1451528072357178, "learning_rate": 4.0820636640684797e-05, "loss": 9.5745, "step": 1664 }, { "epoch": 0.5743359779234218, "grad_norm": 1.2990100383758545, "learning_rate": 4.076547636941408e-05, "loss": 9.5221, "step": 1665 }, { "epoch": 0.5746809244567093, "grad_norm": 1.1534736156463623, "learning_rate": 4.0710327731589935e-05, "loss": 9.5126, "step": 1666 }, { "epoch": 0.5750258709899966, "grad_norm": 1.3099054098129272, "learning_rate": 4.0655190796687385e-05, "loss": 9.5155, "step": 1667 }, { "epoch": 0.5753708175232839, "grad_norm": 1.176540732383728, "learning_rate": 4.060006563416672e-05, "loss": 9.5128, "step": 1668 }, { "epoch": 0.5757157640565712, "grad_norm": 1.3951212167739868, "learning_rate": 4.0544952313473376e-05, "loss": 9.4966, "step": 1669 }, { "epoch": 0.5760607105898585, "grad_norm": 1.2419620752334595, "learning_rate": 4.048985090403788e-05, "loss": 9.4978, "step": 1670 }, { "epoch": 0.576405657123146, "grad_norm": 1.2248680591583252, "learning_rate": 4.043476147527579e-05, "loss": 9.531, "step": 1671 }, { "epoch": 0.5767506036564333, "grad_norm": 1.3265471458435059, "learning_rate": 4.0379684096587504e-05, "loss": 9.4789, "step": 1672 }, { "epoch": 0.5770955501897206, "grad_norm": 1.2454609870910645, "learning_rate": 4.0324618837358285e-05, "loss": 9.5451, "step": 1673 }, { "epoch": 0.5774404967230079, "grad_norm": 1.2326449155807495, "learning_rate": 4.026956576695811e-05, "loss": 9.563, "step": 1674 }, { "epoch": 0.5777854432562953, "grad_norm": 1.3096765279769897, "learning_rate": 4.021452495474159e-05, "loss": 9.4312, "step": 1675 }, { "epoch": 0.5781303897895826, "grad_norm": 1.3253740072250366, "learning_rate": 4.0159496470047954e-05, "loss": 9.4891, "step": 1676 }, { "epoch": 0.57847533632287, "grad_norm": 1.3036460876464844, "learning_rate": 4.0104480382200836e-05, "loss": 9.5537, "step": 1677 }, { "epoch": 0.5788202828561573, "grad_norm": 1.1840957403182983, "learning_rate": 4.004947676050828e-05, "loss": 9.5231, "step": 1678 }, { "epoch": 0.5791652293894446, "grad_norm": 1.319153904914856, "learning_rate": 3.999448567426259e-05, "loss": 9.4613, "step": 1679 }, { "epoch": 0.579510175922732, "grad_norm": 1.3345825672149658, "learning_rate": 3.993950719274035e-05, "loss": 9.4576, "step": 1680 }, { "epoch": 0.5798551224560193, "grad_norm": 1.373167634010315, "learning_rate": 3.988454138520219e-05, "loss": 9.4671, "step": 1681 }, { "epoch": 0.5802000689893066, "grad_norm": 1.2741271257400513, "learning_rate": 3.982958832089282e-05, "loss": 9.5347, "step": 1682 }, { "epoch": 0.580545015522594, "grad_norm": 1.4847259521484375, "learning_rate": 3.97746480690409e-05, "loss": 9.4661, "step": 1683 }, { "epoch": 0.5808899620558814, "grad_norm": 1.3877465724945068, "learning_rate": 3.971972069885891e-05, "loss": 9.469, "step": 1684 }, { "epoch": 0.5812349085891687, "grad_norm": 1.5072605609893799, "learning_rate": 3.9664806279543136e-05, "loss": 9.4667, "step": 1685 }, { "epoch": 0.581579855122456, "grad_norm": 1.4354532957077026, "learning_rate": 3.960990488027353e-05, "loss": 9.4154, "step": 1686 }, { "epoch": 0.5819248016557433, "grad_norm": 1.4752713441848755, "learning_rate": 3.955501657021364e-05, "loss": 9.4524, "step": 1687 }, { "epoch": 0.5822697481890307, "grad_norm": 1.3264256715774536, "learning_rate": 3.9500141418510526e-05, "loss": 9.5238, "step": 1688 }, { "epoch": 0.5826146947223181, "grad_norm": 1.3095241785049438, "learning_rate": 3.944527949429472e-05, "loss": 9.4962, "step": 1689 }, { "epoch": 0.5829596412556054, "grad_norm": 1.382723093032837, "learning_rate": 3.9390430866680016e-05, "loss": 9.4848, "step": 1690 }, { "epoch": 0.5833045877888927, "grad_norm": 1.508972406387329, "learning_rate": 3.933559560476349e-05, "loss": 9.379, "step": 1691 }, { "epoch": 0.58364953432218, "grad_norm": 1.3861052989959717, "learning_rate": 3.928077377762539e-05, "loss": 9.4771, "step": 1692 }, { "epoch": 0.5839944808554675, "grad_norm": 1.8806065320968628, "learning_rate": 3.922596545432903e-05, "loss": 9.348, "step": 1693 }, { "epoch": 0.5843394273887548, "grad_norm": 1.6042461395263672, "learning_rate": 3.91711707039207e-05, "loss": 9.4361, "step": 1694 }, { "epoch": 0.5846843739220421, "grad_norm": 1.6209865808486938, "learning_rate": 3.911638959542959e-05, "loss": 9.4388, "step": 1695 }, { "epoch": 0.5850293204553294, "grad_norm": 1.7686316967010498, "learning_rate": 3.906162219786776e-05, "loss": 9.3907, "step": 1696 }, { "epoch": 0.5853742669886167, "grad_norm": 1.5446085929870605, "learning_rate": 3.9006868580229936e-05, "loss": 9.4078, "step": 1697 }, { "epoch": 0.5857192135219041, "grad_norm": 1.5874037742614746, "learning_rate": 3.895212881149349e-05, "loss": 9.3253, "step": 1698 }, { "epoch": 0.5860641600551915, "grad_norm": 1.5663777589797974, "learning_rate": 3.889740296061836e-05, "loss": 9.4409, "step": 1699 }, { "epoch": 0.5864091065884788, "grad_norm": 1.5761209726333618, "learning_rate": 3.8842691096546965e-05, "loss": 9.4123, "step": 1700 }, { "epoch": 0.5867540531217661, "grad_norm": 0.9454447031021118, "learning_rate": 3.878799328820407e-05, "loss": 9.5656, "step": 1701 }, { "epoch": 0.5870989996550535, "grad_norm": 1.008521556854248, "learning_rate": 3.8733309604496754e-05, "loss": 9.5158, "step": 1702 }, { "epoch": 0.5874439461883408, "grad_norm": 0.9412047266960144, "learning_rate": 3.867864011431432e-05, "loss": 9.6026, "step": 1703 }, { "epoch": 0.5877888927216282, "grad_norm": 1.052085041999817, "learning_rate": 3.862398488652816e-05, "loss": 9.4797, "step": 1704 }, { "epoch": 0.5881338392549155, "grad_norm": 1.0003331899642944, "learning_rate": 3.8569343989991707e-05, "loss": 9.599, "step": 1705 }, { "epoch": 0.5884787857882028, "grad_norm": 1.1032466888427734, "learning_rate": 3.851471749354035e-05, "loss": 9.5337, "step": 1706 }, { "epoch": 0.5888237323214902, "grad_norm": 1.0652117729187012, "learning_rate": 3.8460105465991315e-05, "loss": 9.5674, "step": 1707 }, { "epoch": 0.5891686788547775, "grad_norm": 1.25304114818573, "learning_rate": 3.840550797614363e-05, "loss": 9.4834, "step": 1708 }, { "epoch": 0.5895136253880648, "grad_norm": 1.2567071914672852, "learning_rate": 3.835092509277796e-05, "loss": 9.4938, "step": 1709 }, { "epoch": 0.5898585719213522, "grad_norm": 1.0688841342926025, "learning_rate": 3.8296356884656634e-05, "loss": 9.5581, "step": 1710 }, { "epoch": 0.5902035184546396, "grad_norm": 1.3249194622039795, "learning_rate": 3.824180342052347e-05, "loss": 9.468, "step": 1711 }, { "epoch": 0.5905484649879269, "grad_norm": 1.0625578165054321, "learning_rate": 3.818726476910368e-05, "loss": 9.5566, "step": 1712 }, { "epoch": 0.5908934115212142, "grad_norm": 1.1471552848815918, "learning_rate": 3.813274099910384e-05, "loss": 9.5621, "step": 1713 }, { "epoch": 0.5912383580545015, "grad_norm": 1.0972129106521606, "learning_rate": 3.8078232179211794e-05, "loss": 9.4839, "step": 1714 }, { "epoch": 0.5915833045877888, "grad_norm": 1.187414526939392, "learning_rate": 3.802373837809652e-05, "loss": 9.5577, "step": 1715 }, { "epoch": 0.5919282511210763, "grad_norm": 1.2144163846969604, "learning_rate": 3.7969259664408074e-05, "loss": 9.4984, "step": 1716 }, { "epoch": 0.5922731976543636, "grad_norm": 1.3942620754241943, "learning_rate": 3.791479610677757e-05, "loss": 9.4702, "step": 1717 }, { "epoch": 0.5926181441876509, "grad_norm": 1.2334468364715576, "learning_rate": 3.786034777381695e-05, "loss": 9.4928, "step": 1718 }, { "epoch": 0.5929630907209382, "grad_norm": 1.1848747730255127, "learning_rate": 3.7805914734119025e-05, "loss": 9.5711, "step": 1719 }, { "epoch": 0.5933080372542255, "grad_norm": 1.3229336738586426, "learning_rate": 3.7751497056257304e-05, "loss": 9.5629, "step": 1720 }, { "epoch": 0.593652983787513, "grad_norm": 1.3299323320388794, "learning_rate": 3.769709480878597e-05, "loss": 9.4933, "step": 1721 }, { "epoch": 0.5939979303208003, "grad_norm": 1.19021737575531, "learning_rate": 3.764270806023976e-05, "loss": 9.5504, "step": 1722 }, { "epoch": 0.5943428768540876, "grad_norm": 1.2337749004364014, "learning_rate": 3.7588336879133855e-05, "loss": 9.4946, "step": 1723 }, { "epoch": 0.5946878233873749, "grad_norm": 1.1160210371017456, "learning_rate": 3.753398133396391e-05, "loss": 9.5584, "step": 1724 }, { "epoch": 0.5950327699206623, "grad_norm": 1.3497905731201172, "learning_rate": 3.7479641493205796e-05, "loss": 9.5159, "step": 1725 }, { "epoch": 0.5953777164539497, "grad_norm": 1.304688811302185, "learning_rate": 3.742531742531562e-05, "loss": 9.491, "step": 1726 }, { "epoch": 0.595722662987237, "grad_norm": 1.2296416759490967, "learning_rate": 3.7371009198729654e-05, "loss": 9.584, "step": 1727 }, { "epoch": 0.5960676095205243, "grad_norm": 1.2551003694534302, "learning_rate": 3.731671688186418e-05, "loss": 9.5121, "step": 1728 }, { "epoch": 0.5964125560538116, "grad_norm": 1.3699496984481812, "learning_rate": 3.7262440543115446e-05, "loss": 9.4163, "step": 1729 }, { "epoch": 0.596757502587099, "grad_norm": 1.4183300733566284, "learning_rate": 3.720818025085954e-05, "loss": 9.5081, "step": 1730 }, { "epoch": 0.5971024491203863, "grad_norm": 1.3562133312225342, "learning_rate": 3.715393607345242e-05, "loss": 9.4844, "step": 1731 }, { "epoch": 0.5974473956536737, "grad_norm": 1.2870327234268188, "learning_rate": 3.7099708079229654e-05, "loss": 9.5323, "step": 1732 }, { "epoch": 0.597792342186961, "grad_norm": 1.2621005773544312, "learning_rate": 3.704549633650648e-05, "loss": 9.5163, "step": 1733 }, { "epoch": 0.5981372887202484, "grad_norm": 1.3484461307525635, "learning_rate": 3.699130091357762e-05, "loss": 9.4726, "step": 1734 }, { "epoch": 0.5984822352535357, "grad_norm": 1.372406244277954, "learning_rate": 3.693712187871725e-05, "loss": 9.4711, "step": 1735 }, { "epoch": 0.598827181786823, "grad_norm": 1.4271361827850342, "learning_rate": 3.6882959300178936e-05, "loss": 9.5137, "step": 1736 }, { "epoch": 0.5991721283201104, "grad_norm": 1.3963810205459595, "learning_rate": 3.682881324619546e-05, "loss": 9.5354, "step": 1737 }, { "epoch": 0.5995170748533977, "grad_norm": 1.3229588270187378, "learning_rate": 3.6774683784978825e-05, "loss": 9.475, "step": 1738 }, { "epoch": 0.5998620213866851, "grad_norm": 1.5664173364639282, "learning_rate": 3.672057098472012e-05, "loss": 9.4822, "step": 1739 }, { "epoch": 0.6002069679199724, "grad_norm": 1.4833637475967407, "learning_rate": 3.666647491358943e-05, "loss": 9.4125, "step": 1740 }, { "epoch": 0.6005519144532597, "grad_norm": 1.571965217590332, "learning_rate": 3.66123956397358e-05, "loss": 9.4406, "step": 1741 }, { "epoch": 0.600896860986547, "grad_norm": 1.3770232200622559, "learning_rate": 3.655833323128706e-05, "loss": 9.5445, "step": 1742 }, { "epoch": 0.6012418075198345, "grad_norm": 1.4384090900421143, "learning_rate": 3.6504287756349844e-05, "loss": 9.4861, "step": 1743 }, { "epoch": 0.6015867540531218, "grad_norm": 1.48284113407135, "learning_rate": 3.6450259283009395e-05, "loss": 9.5147, "step": 1744 }, { "epoch": 0.6019317005864091, "grad_norm": 1.5759750604629517, "learning_rate": 3.639624787932964e-05, "loss": 9.4079, "step": 1745 }, { "epoch": 0.6022766471196964, "grad_norm": 1.4510525465011597, "learning_rate": 3.634225361335291e-05, "loss": 9.4536, "step": 1746 }, { "epoch": 0.6026215936529837, "grad_norm": 1.5437071323394775, "learning_rate": 3.6288276553099976e-05, "loss": 9.4269, "step": 1747 }, { "epoch": 0.6029665401862712, "grad_norm": 1.4966944456100464, "learning_rate": 3.6234316766569917e-05, "loss": 9.3802, "step": 1748 }, { "epoch": 0.6033114867195585, "grad_norm": 1.8180314302444458, "learning_rate": 3.6180374321740084e-05, "loss": 9.3866, "step": 1749 }, { "epoch": 0.6036564332528458, "grad_norm": 1.7780247926712036, "learning_rate": 3.6126449286565965e-05, "loss": 9.3133, "step": 1750 }, { "epoch": 0.6040013797861331, "grad_norm": 0.9698929786682129, "learning_rate": 3.6072541728981094e-05, "loss": 9.5623, "step": 1751 }, { "epoch": 0.6043463263194205, "grad_norm": 0.9983956813812256, "learning_rate": 3.601865171689704e-05, "loss": 9.5587, "step": 1752 }, { "epoch": 0.6046912728527079, "grad_norm": 1.0467181205749512, "learning_rate": 3.596477931820322e-05, "loss": 9.5551, "step": 1753 }, { "epoch": 0.6050362193859952, "grad_norm": 1.2166588306427002, "learning_rate": 3.591092460076688e-05, "loss": 9.5129, "step": 1754 }, { "epoch": 0.6053811659192825, "grad_norm": 0.9980295896530151, "learning_rate": 3.5857087632432986e-05, "loss": 9.5484, "step": 1755 }, { "epoch": 0.6057261124525698, "grad_norm": 1.0149705410003662, "learning_rate": 3.580326848102415e-05, "loss": 9.5623, "step": 1756 }, { "epoch": 0.6060710589858572, "grad_norm": 1.147386908531189, "learning_rate": 3.5749467214340525e-05, "loss": 9.5167, "step": 1757 }, { "epoch": 0.6064160055191445, "grad_norm": 1.0991644859313965, "learning_rate": 3.569568390015976e-05, "loss": 9.5704, "step": 1758 }, { "epoch": 0.6067609520524319, "grad_norm": 1.0665550231933594, "learning_rate": 3.564191860623688e-05, "loss": 9.535, "step": 1759 }, { "epoch": 0.6071058985857192, "grad_norm": 1.2261543273925781, "learning_rate": 3.558817140030418e-05, "loss": 9.5013, "step": 1760 }, { "epoch": 0.6074508451190066, "grad_norm": 1.1674590110778809, "learning_rate": 3.553444235007121e-05, "loss": 9.5437, "step": 1761 }, { "epoch": 0.6077957916522939, "grad_norm": 1.214137077331543, "learning_rate": 3.548073152322462e-05, "loss": 9.5122, "step": 1762 }, { "epoch": 0.6081407381855812, "grad_norm": 1.1316503286361694, "learning_rate": 3.5427038987428105e-05, "loss": 9.5394, "step": 1763 }, { "epoch": 0.6084856847188685, "grad_norm": 1.1909269094467163, "learning_rate": 3.537336481032232e-05, "loss": 9.5682, "step": 1764 }, { "epoch": 0.6088306312521559, "grad_norm": 1.2326979637145996, "learning_rate": 3.531970905952478e-05, "loss": 9.4657, "step": 1765 }, { "epoch": 0.6091755777854433, "grad_norm": 1.2010674476623535, "learning_rate": 3.526607180262984e-05, "loss": 9.5197, "step": 1766 }, { "epoch": 0.6095205243187306, "grad_norm": 1.1736875772476196, "learning_rate": 3.5212453107208506e-05, "loss": 9.4788, "step": 1767 }, { "epoch": 0.6098654708520179, "grad_norm": 1.167720079421997, "learning_rate": 3.515885304080839e-05, "loss": 9.5067, "step": 1768 }, { "epoch": 0.6102104173853052, "grad_norm": 1.3078930377960205, "learning_rate": 3.5105271670953674e-05, "loss": 9.4946, "step": 1769 }, { "epoch": 0.6105553639185927, "grad_norm": 1.2530713081359863, "learning_rate": 3.505170906514498e-05, "loss": 9.535, "step": 1770 }, { "epoch": 0.61090031045188, "grad_norm": 1.2267308235168457, "learning_rate": 3.4998165290859266e-05, "loss": 9.4742, "step": 1771 }, { "epoch": 0.6112452569851673, "grad_norm": 1.2809817790985107, "learning_rate": 3.494464041554977e-05, "loss": 9.5116, "step": 1772 }, { "epoch": 0.6115902035184546, "grad_norm": 1.1434053182601929, "learning_rate": 3.489113450664597e-05, "loss": 9.5531, "step": 1773 }, { "epoch": 0.6119351500517419, "grad_norm": 1.3463351726531982, "learning_rate": 3.4837647631553405e-05, "loss": 9.5608, "step": 1774 }, { "epoch": 0.6122800965850294, "grad_norm": 1.5135565996170044, "learning_rate": 3.478417985765363e-05, "loss": 9.4173, "step": 1775 }, { "epoch": 0.6126250431183167, "grad_norm": 1.2766389846801758, "learning_rate": 3.473073125230417e-05, "loss": 9.5591, "step": 1776 }, { "epoch": 0.612969989651604, "grad_norm": 1.29940664768219, "learning_rate": 3.467730188283836e-05, "loss": 9.5021, "step": 1777 }, { "epoch": 0.6133149361848913, "grad_norm": 1.322685956954956, "learning_rate": 3.462389181656535e-05, "loss": 9.4625, "step": 1778 }, { "epoch": 0.6136598827181787, "grad_norm": 1.272139310836792, "learning_rate": 3.457050112076992e-05, "loss": 9.4416, "step": 1779 }, { "epoch": 0.614004829251466, "grad_norm": 1.449051856994629, "learning_rate": 3.4517129862712504e-05, "loss": 9.443, "step": 1780 }, { "epoch": 0.6143497757847534, "grad_norm": 1.3246345520019531, "learning_rate": 3.446377810962902e-05, "loss": 9.4954, "step": 1781 }, { "epoch": 0.6146947223180407, "grad_norm": 1.449554443359375, "learning_rate": 3.441044592873079e-05, "loss": 9.4129, "step": 1782 }, { "epoch": 0.615039668851328, "grad_norm": 1.2014191150665283, "learning_rate": 3.435713338720453e-05, "loss": 9.5974, "step": 1783 }, { "epoch": 0.6153846153846154, "grad_norm": 1.471830129623413, "learning_rate": 3.4303840552212184e-05, "loss": 9.4773, "step": 1784 }, { "epoch": 0.6157295619179027, "grad_norm": 1.3256584405899048, "learning_rate": 3.425056749089086e-05, "loss": 9.5171, "step": 1785 }, { "epoch": 0.61607450845119, "grad_norm": 1.4362033605575562, "learning_rate": 3.419731427035277e-05, "loss": 9.4557, "step": 1786 }, { "epoch": 0.6164194549844774, "grad_norm": 1.4328278303146362, "learning_rate": 3.414408095768516e-05, "loss": 9.4515, "step": 1787 }, { "epoch": 0.6167644015177648, "grad_norm": 1.360702395439148, "learning_rate": 3.409086761995015e-05, "loss": 9.4638, "step": 1788 }, { "epoch": 0.6171093480510521, "grad_norm": 1.3730055093765259, "learning_rate": 3.4037674324184706e-05, "loss": 9.4274, "step": 1789 }, { "epoch": 0.6174542945843394, "grad_norm": 1.568692684173584, "learning_rate": 3.398450113740057e-05, "loss": 9.4313, "step": 1790 }, { "epoch": 0.6177992411176267, "grad_norm": 1.3866729736328125, "learning_rate": 3.393134812658411e-05, "loss": 9.4285, "step": 1791 }, { "epoch": 0.6181441876509141, "grad_norm": 1.4505038261413574, "learning_rate": 3.3878215358696333e-05, "loss": 9.4724, "step": 1792 }, { "epoch": 0.6184891341842015, "grad_norm": 1.435584306716919, "learning_rate": 3.382510290067269e-05, "loss": 9.4448, "step": 1793 }, { "epoch": 0.6188340807174888, "grad_norm": 1.4031693935394287, "learning_rate": 3.377201081942307e-05, "loss": 9.4456, "step": 1794 }, { "epoch": 0.6191790272507761, "grad_norm": 1.3541998863220215, "learning_rate": 3.371893918183171e-05, "loss": 9.4522, "step": 1795 }, { "epoch": 0.6195239737840634, "grad_norm": 1.6531020402908325, "learning_rate": 3.366588805475707e-05, "loss": 9.3786, "step": 1796 }, { "epoch": 0.6198689203173509, "grad_norm": 1.5427093505859375, "learning_rate": 3.361285750503176e-05, "loss": 9.4252, "step": 1797 }, { "epoch": 0.6202138668506382, "grad_norm": 1.6081955432891846, "learning_rate": 3.355984759946249e-05, "loss": 9.4544, "step": 1798 }, { "epoch": 0.6205588133839255, "grad_norm": 1.565117359161377, "learning_rate": 3.350685840482995e-05, "loss": 9.3773, "step": 1799 }, { "epoch": 0.6209037599172128, "grad_norm": 1.8025498390197754, "learning_rate": 3.3453889987888724e-05, "loss": 9.361, "step": 1800 }, { "epoch": 0.6212487064505001, "grad_norm": 0.9393291473388672, "learning_rate": 3.340094241536729e-05, "loss": 9.5986, "step": 1801 }, { "epoch": 0.6215936529837875, "grad_norm": 0.9784131050109863, "learning_rate": 3.3348015753967785e-05, "loss": 9.5321, "step": 1802 }, { "epoch": 0.6219385995170749, "grad_norm": 1.0297746658325195, "learning_rate": 3.329511007036604e-05, "loss": 9.5718, "step": 1803 }, { "epoch": 0.6222835460503622, "grad_norm": 1.0639649629592896, "learning_rate": 3.324222543121145e-05, "loss": 9.537, "step": 1804 }, { "epoch": 0.6226284925836495, "grad_norm": 1.1060718297958374, "learning_rate": 3.3189361903126916e-05, "loss": 9.5329, "step": 1805 }, { "epoch": 0.6229734391169369, "grad_norm": 1.1022459268569946, "learning_rate": 3.313651955270871e-05, "loss": 9.513, "step": 1806 }, { "epoch": 0.6233183856502242, "grad_norm": 1.0694283246994019, "learning_rate": 3.308369844652643e-05, "loss": 9.5594, "step": 1807 }, { "epoch": 0.6236633321835116, "grad_norm": 1.255054235458374, "learning_rate": 3.3030898651122985e-05, "loss": 9.4931, "step": 1808 }, { "epoch": 0.6240082787167989, "grad_norm": 1.1304607391357422, "learning_rate": 3.297812023301433e-05, "loss": 9.546, "step": 1809 }, { "epoch": 0.6243532252500862, "grad_norm": 1.214964747428894, "learning_rate": 3.2925363258689555e-05, "loss": 9.5152, "step": 1810 }, { "epoch": 0.6246981717833736, "grad_norm": 1.1067923307418823, "learning_rate": 3.287262779461071e-05, "loss": 9.5478, "step": 1811 }, { "epoch": 0.6250431183166609, "grad_norm": 1.1942600011825562, "learning_rate": 3.281991390721276e-05, "loss": 9.5681, "step": 1812 }, { "epoch": 0.6253880648499482, "grad_norm": 1.2584760189056396, "learning_rate": 3.276722166290346e-05, "loss": 9.4977, "step": 1813 }, { "epoch": 0.6257330113832356, "grad_norm": 1.0992047786712646, "learning_rate": 3.2714551128063344e-05, "loss": 9.5753, "step": 1814 }, { "epoch": 0.626077957916523, "grad_norm": 1.187728762626648, "learning_rate": 3.2661902369045586e-05, "loss": 9.5477, "step": 1815 }, { "epoch": 0.6264229044498103, "grad_norm": 1.3243029117584229, "learning_rate": 3.260927545217589e-05, "loss": 9.4887, "step": 1816 }, { "epoch": 0.6267678509830976, "grad_norm": 1.2621115446090698, "learning_rate": 3.25566704437525e-05, "loss": 9.4954, "step": 1817 }, { "epoch": 0.6271127975163849, "grad_norm": 1.326181173324585, "learning_rate": 3.2504087410046004e-05, "loss": 9.4583, "step": 1818 }, { "epoch": 0.6274577440496723, "grad_norm": 1.183527946472168, "learning_rate": 3.245152641729935e-05, "loss": 9.559, "step": 1819 }, { "epoch": 0.6278026905829597, "grad_norm": 1.1669021844863892, "learning_rate": 3.2398987531727694e-05, "loss": 9.4858, "step": 1820 }, { "epoch": 0.628147637116247, "grad_norm": 1.3547872304916382, "learning_rate": 3.2346470819518326e-05, "loss": 9.4709, "step": 1821 }, { "epoch": 0.6284925836495343, "grad_norm": 1.2788245677947998, "learning_rate": 3.229397634683068e-05, "loss": 9.4746, "step": 1822 }, { "epoch": 0.6288375301828216, "grad_norm": 1.2765026092529297, "learning_rate": 3.224150417979609e-05, "loss": 9.5076, "step": 1823 }, { "epoch": 0.629182476716109, "grad_norm": 1.3237452507019043, "learning_rate": 3.218905438451782e-05, "loss": 9.5053, "step": 1824 }, { "epoch": 0.6295274232493964, "grad_norm": 1.1867791414260864, "learning_rate": 3.213662702707094e-05, "loss": 9.4948, "step": 1825 }, { "epoch": 0.6298723697826837, "grad_norm": 1.2228574752807617, "learning_rate": 3.20842221735023e-05, "loss": 9.5036, "step": 1826 }, { "epoch": 0.630217316315971, "grad_norm": 1.3094220161437988, "learning_rate": 3.203183988983033e-05, "loss": 9.4877, "step": 1827 }, { "epoch": 0.6305622628492583, "grad_norm": 1.2245163917541504, "learning_rate": 3.1979480242045045e-05, "loss": 9.5359, "step": 1828 }, { "epoch": 0.6309072093825457, "grad_norm": 1.34917151927948, "learning_rate": 3.192714329610802e-05, "loss": 9.5315, "step": 1829 }, { "epoch": 0.6312521559158331, "grad_norm": 1.3427605628967285, "learning_rate": 3.1874829117952124e-05, "loss": 9.4754, "step": 1830 }, { "epoch": 0.6315971024491204, "grad_norm": 1.4418407678604126, "learning_rate": 3.182253777348161e-05, "loss": 9.4585, "step": 1831 }, { "epoch": 0.6319420489824077, "grad_norm": 1.387801170349121, "learning_rate": 3.177026932857193e-05, "loss": 9.4647, "step": 1832 }, { "epoch": 0.6322869955156951, "grad_norm": 1.41670823097229, "learning_rate": 3.171802384906972e-05, "loss": 9.5347, "step": 1833 }, { "epoch": 0.6326319420489824, "grad_norm": 1.417445182800293, "learning_rate": 3.166580140079265e-05, "loss": 9.5112, "step": 1834 }, { "epoch": 0.6329768885822697, "grad_norm": 1.3484708070755005, "learning_rate": 3.161360204952939e-05, "loss": 9.5273, "step": 1835 }, { "epoch": 0.6333218351155571, "grad_norm": 1.4183062314987183, "learning_rate": 3.1561425861039546e-05, "loss": 9.4721, "step": 1836 }, { "epoch": 0.6336667816488444, "grad_norm": 1.4750018119812012, "learning_rate": 3.15092729010535e-05, "loss": 9.4265, "step": 1837 }, { "epoch": 0.6340117281821318, "grad_norm": 1.3089995384216309, "learning_rate": 3.145714323527237e-05, "loss": 9.438, "step": 1838 }, { "epoch": 0.6343566747154191, "grad_norm": 1.4804081916809082, "learning_rate": 3.140503692936797e-05, "loss": 9.4808, "step": 1839 }, { "epoch": 0.6347016212487064, "grad_norm": 1.3244197368621826, "learning_rate": 3.135295404898265e-05, "loss": 9.4566, "step": 1840 }, { "epoch": 0.6350465677819938, "grad_norm": 1.364261269569397, "learning_rate": 3.130089465972926e-05, "loss": 9.4726, "step": 1841 }, { "epoch": 0.6353915143152812, "grad_norm": 1.5104689598083496, "learning_rate": 3.124885882719102e-05, "loss": 9.4577, "step": 1842 }, { "epoch": 0.6357364608485685, "grad_norm": 1.30232572555542, "learning_rate": 3.119684661692158e-05, "loss": 9.4958, "step": 1843 }, { "epoch": 0.6360814073818558, "grad_norm": 1.457231879234314, "learning_rate": 3.1144858094444715e-05, "loss": 9.5086, "step": 1844 }, { "epoch": 0.6364263539151431, "grad_norm": 1.4896906614303589, "learning_rate": 3.1092893325254413e-05, "loss": 9.4611, "step": 1845 }, { "epoch": 0.6367713004484304, "grad_norm": 1.5810197591781616, "learning_rate": 3.104095237481473e-05, "loss": 9.4457, "step": 1846 }, { "epoch": 0.6371162469817179, "grad_norm": 1.4873422384262085, "learning_rate": 3.0989035308559696e-05, "loss": 9.4359, "step": 1847 }, { "epoch": 0.6374611935150052, "grad_norm": 1.7242629528045654, "learning_rate": 3.093714219189331e-05, "loss": 9.3872, "step": 1848 }, { "epoch": 0.6378061400482925, "grad_norm": 1.7722023725509644, "learning_rate": 3.0885273090189324e-05, "loss": 9.3504, "step": 1849 }, { "epoch": 0.6381510865815798, "grad_norm": 1.7719429731369019, "learning_rate": 3.083342806879129e-05, "loss": 9.3643, "step": 1850 }, { "epoch": 0.6384960331148672, "grad_norm": 0.9851622581481934, "learning_rate": 3.078160719301242e-05, "loss": 9.5158, "step": 1851 }, { "epoch": 0.6388409796481546, "grad_norm": 1.0696388483047485, "learning_rate": 3.0729810528135484e-05, "loss": 9.4771, "step": 1852 }, { "epoch": 0.6391859261814419, "grad_norm": 1.0862210988998413, "learning_rate": 3.067803813941276e-05, "loss": 9.535, "step": 1853 }, { "epoch": 0.6395308727147292, "grad_norm": 0.9991084933280945, "learning_rate": 3.062629009206595e-05, "loss": 9.5883, "step": 1854 }, { "epoch": 0.6398758192480165, "grad_norm": 1.1714235544204712, "learning_rate": 3.0574566451286094e-05, "loss": 9.527, "step": 1855 }, { "epoch": 0.6402207657813039, "grad_norm": 1.2183157205581665, "learning_rate": 3.052286728223343e-05, "loss": 9.4489, "step": 1856 }, { "epoch": 0.6405657123145913, "grad_norm": 1.1056588888168335, "learning_rate": 3.0471192650037504e-05, "loss": 9.5884, "step": 1857 }, { "epoch": 0.6409106588478786, "grad_norm": 1.1708348989486694, "learning_rate": 3.041954261979681e-05, "loss": 9.5137, "step": 1858 }, { "epoch": 0.6412556053811659, "grad_norm": 1.127313494682312, "learning_rate": 3.0367917256578908e-05, "loss": 9.5326, "step": 1859 }, { "epoch": 0.6416005519144533, "grad_norm": 1.2033101320266724, "learning_rate": 3.0316316625420272e-05, "loss": 9.4886, "step": 1860 }, { "epoch": 0.6419454984477406, "grad_norm": 1.183809757232666, "learning_rate": 3.026474079132624e-05, "loss": 9.4902, "step": 1861 }, { "epoch": 0.6422904449810279, "grad_norm": 1.071576714515686, "learning_rate": 3.0213189819270894e-05, "loss": 9.5036, "step": 1862 }, { "epoch": 0.6426353915143153, "grad_norm": 1.2379518747329712, "learning_rate": 3.0161663774196962e-05, "loss": 9.4938, "step": 1863 }, { "epoch": 0.6429803380476026, "grad_norm": 1.231271505355835, "learning_rate": 3.0110162721015856e-05, "loss": 9.5153, "step": 1864 }, { "epoch": 0.64332528458089, "grad_norm": 1.246327519416809, "learning_rate": 3.0058686724607432e-05, "loss": 9.5067, "step": 1865 }, { "epoch": 0.6436702311141773, "grad_norm": 1.328060507774353, "learning_rate": 3.000723584982e-05, "loss": 9.452, "step": 1866 }, { "epoch": 0.6440151776474646, "grad_norm": 1.3291081190109253, "learning_rate": 2.995581016147021e-05, "loss": 9.4638, "step": 1867 }, { "epoch": 0.644360124180752, "grad_norm": 1.3064149618148804, "learning_rate": 2.990440972434302e-05, "loss": 9.5566, "step": 1868 }, { "epoch": 0.6447050707140394, "grad_norm": 1.1765854358673096, "learning_rate": 2.985303460319152e-05, "loss": 9.5289, "step": 1869 }, { "epoch": 0.6450500172473267, "grad_norm": 1.3379801511764526, "learning_rate": 2.9801684862736958e-05, "loss": 9.5201, "step": 1870 }, { "epoch": 0.645394963780614, "grad_norm": 1.2638524770736694, "learning_rate": 2.97503605676686e-05, "loss": 9.5207, "step": 1871 }, { "epoch": 0.6457399103139013, "grad_norm": 1.3365275859832764, "learning_rate": 2.9699061782643632e-05, "loss": 9.4655, "step": 1872 }, { "epoch": 0.6460848568471886, "grad_norm": 1.2336562871932983, "learning_rate": 2.9647788572287126e-05, "loss": 9.5254, "step": 1873 }, { "epoch": 0.6464298033804761, "grad_norm": 1.1859928369522095, "learning_rate": 2.9596541001191924e-05, "loss": 9.461, "step": 1874 }, { "epoch": 0.6467747499137634, "grad_norm": 1.2857304811477661, "learning_rate": 2.954531913391857e-05, "loss": 9.4632, "step": 1875 }, { "epoch": 0.6471196964470507, "grad_norm": 1.1990352869033813, "learning_rate": 2.9494123034995236e-05, "loss": 9.5014, "step": 1876 }, { "epoch": 0.647464642980338, "grad_norm": 1.325065016746521, "learning_rate": 2.9442952768917588e-05, "loss": 9.5199, "step": 1877 }, { "epoch": 0.6478095895136254, "grad_norm": 1.4335227012634277, "learning_rate": 2.9391808400148834e-05, "loss": 9.4413, "step": 1878 }, { "epoch": 0.6481545360469128, "grad_norm": 1.347676157951355, "learning_rate": 2.9340689993119485e-05, "loss": 9.5028, "step": 1879 }, { "epoch": 0.6484994825802001, "grad_norm": 1.3796898126602173, "learning_rate": 2.928959761222737e-05, "loss": 9.4798, "step": 1880 }, { "epoch": 0.6488444291134874, "grad_norm": 1.427625298500061, "learning_rate": 2.9238531321837513e-05, "loss": 9.5211, "step": 1881 }, { "epoch": 0.6491893756467747, "grad_norm": 1.3672127723693848, "learning_rate": 2.918749118628213e-05, "loss": 9.5062, "step": 1882 }, { "epoch": 0.6495343221800621, "grad_norm": 1.348329782485962, "learning_rate": 2.9136477269860386e-05, "loss": 9.4853, "step": 1883 }, { "epoch": 0.6498792687133494, "grad_norm": 1.4505221843719482, "learning_rate": 2.9085489636838504e-05, "loss": 9.4153, "step": 1884 }, { "epoch": 0.6502242152466368, "grad_norm": 1.262218713760376, "learning_rate": 2.9034528351449563e-05, "loss": 9.4583, "step": 1885 }, { "epoch": 0.6505691617799241, "grad_norm": 1.4502121210098267, "learning_rate": 2.8983593477893474e-05, "loss": 9.4859, "step": 1886 }, { "epoch": 0.6509141083132115, "grad_norm": 1.3023953437805176, "learning_rate": 2.8932685080336807e-05, "loss": 9.498, "step": 1887 }, { "epoch": 0.6512590548464988, "grad_norm": 1.2097197771072388, "learning_rate": 2.8881803222912844e-05, "loss": 9.5218, "step": 1888 }, { "epoch": 0.6516040013797861, "grad_norm": 1.571702003479004, "learning_rate": 2.8830947969721445e-05, "loss": 9.4553, "step": 1889 }, { "epoch": 0.6519489479130735, "grad_norm": 1.5287089347839355, "learning_rate": 2.878011938482886e-05, "loss": 9.3639, "step": 1890 }, { "epoch": 0.6522938944463608, "grad_norm": 1.3860400915145874, "learning_rate": 2.8729317532267846e-05, "loss": 9.5053, "step": 1891 }, { "epoch": 0.6526388409796482, "grad_norm": 1.4050540924072266, "learning_rate": 2.8678542476037427e-05, "loss": 9.4575, "step": 1892 }, { "epoch": 0.6529837875129355, "grad_norm": 1.5032589435577393, "learning_rate": 2.862779428010292e-05, "loss": 9.3963, "step": 1893 }, { "epoch": 0.6533287340462228, "grad_norm": 1.5438631772994995, "learning_rate": 2.8577073008395717e-05, "loss": 9.4169, "step": 1894 }, { "epoch": 0.6536736805795101, "grad_norm": 1.5366860628128052, "learning_rate": 2.852637872481338e-05, "loss": 9.3983, "step": 1895 }, { "epoch": 0.6540186271127976, "grad_norm": 1.5280556678771973, "learning_rate": 2.8475711493219404e-05, "loss": 9.4123, "step": 1896 }, { "epoch": 0.6543635736460849, "grad_norm": 1.4505887031555176, "learning_rate": 2.8425071377443245e-05, "loss": 9.473, "step": 1897 }, { "epoch": 0.6547085201793722, "grad_norm": 1.694767713546753, "learning_rate": 2.8374458441280204e-05, "loss": 9.3827, "step": 1898 }, { "epoch": 0.6550534667126595, "grad_norm": 1.6835027933120728, "learning_rate": 2.832387274849134e-05, "loss": 9.4103, "step": 1899 }, { "epoch": 0.6553984132459468, "grad_norm": 1.7232531309127808, "learning_rate": 2.8273314362803338e-05, "loss": 9.3244, "step": 1900 }, { "epoch": 0.6557433597792343, "grad_norm": 0.9157406091690063, "learning_rate": 2.8222783347908545e-05, "loss": 9.5555, "step": 1901 }, { "epoch": 0.6560883063125216, "grad_norm": 1.0032362937927246, "learning_rate": 2.8172279767464814e-05, "loss": 9.6001, "step": 1902 }, { "epoch": 0.6564332528458089, "grad_norm": 1.1082993745803833, "learning_rate": 2.8121803685095403e-05, "loss": 9.5102, "step": 1903 }, { "epoch": 0.6567781993790962, "grad_norm": 0.9890560507774353, "learning_rate": 2.807135516438899e-05, "loss": 9.5794, "step": 1904 }, { "epoch": 0.6571231459123836, "grad_norm": 1.0277185440063477, "learning_rate": 2.8020934268899423e-05, "loss": 9.5428, "step": 1905 }, { "epoch": 0.657468092445671, "grad_norm": 1.018535852432251, "learning_rate": 2.7970541062145918e-05, "loss": 9.5505, "step": 1906 }, { "epoch": 0.6578130389789583, "grad_norm": 1.0695736408233643, "learning_rate": 2.792017560761263e-05, "loss": 9.5001, "step": 1907 }, { "epoch": 0.6581579855122456, "grad_norm": 1.1729811429977417, "learning_rate": 2.786983796874889e-05, "loss": 9.4728, "step": 1908 }, { "epoch": 0.6585029320455329, "grad_norm": 1.1934943199157715, "learning_rate": 2.7819528208968883e-05, "loss": 9.4998, "step": 1909 }, { "epoch": 0.6588478785788203, "grad_norm": 1.0921857357025146, "learning_rate": 2.7769246391651742e-05, "loss": 9.5279, "step": 1910 }, { "epoch": 0.6591928251121076, "grad_norm": 1.2350205183029175, "learning_rate": 2.7718992580141402e-05, "loss": 9.4769, "step": 1911 }, { "epoch": 0.659537771645395, "grad_norm": 1.2308374643325806, "learning_rate": 2.7668766837746422e-05, "loss": 9.5153, "step": 1912 }, { "epoch": 0.6598827181786823, "grad_norm": 1.287536859512329, "learning_rate": 2.7618569227740165e-05, "loss": 9.4465, "step": 1913 }, { "epoch": 0.6602276647119697, "grad_norm": 1.2224645614624023, "learning_rate": 2.7568399813360378e-05, "loss": 9.5243, "step": 1914 }, { "epoch": 0.660572611245257, "grad_norm": 1.2047133445739746, "learning_rate": 2.751825865780943e-05, "loss": 9.4819, "step": 1915 }, { "epoch": 0.6609175577785443, "grad_norm": 1.182023525238037, "learning_rate": 2.7468145824253977e-05, "loss": 9.5053, "step": 1916 }, { "epoch": 0.6612625043118316, "grad_norm": 1.197792887687683, "learning_rate": 2.7418061375825087e-05, "loss": 9.556, "step": 1917 }, { "epoch": 0.661607450845119, "grad_norm": 1.1292563676834106, "learning_rate": 2.7368005375617994e-05, "loss": 9.5674, "step": 1918 }, { "epoch": 0.6619523973784064, "grad_norm": 1.3293133974075317, "learning_rate": 2.7317977886692147e-05, "loss": 9.4748, "step": 1919 }, { "epoch": 0.6622973439116937, "grad_norm": 1.1616480350494385, "learning_rate": 2.7267978972071057e-05, "loss": 9.5377, "step": 1920 }, { "epoch": 0.662642290444981, "grad_norm": 1.3261762857437134, "learning_rate": 2.721800869474226e-05, "loss": 9.5438, "step": 1921 }, { "epoch": 0.6629872369782683, "grad_norm": 1.2228319644927979, "learning_rate": 2.716806711765716e-05, "loss": 9.4966, "step": 1922 }, { "epoch": 0.6633321835115558, "grad_norm": 1.2406237125396729, "learning_rate": 2.7118154303731048e-05, "loss": 9.4417, "step": 1923 }, { "epoch": 0.6636771300448431, "grad_norm": 1.3249214887619019, "learning_rate": 2.706827031584301e-05, "loss": 9.4701, "step": 1924 }, { "epoch": 0.6640220765781304, "grad_norm": 1.22321617603302, "learning_rate": 2.7018415216835725e-05, "loss": 9.4863, "step": 1925 }, { "epoch": 0.6643670231114177, "grad_norm": 1.1897952556610107, "learning_rate": 2.6968589069515558e-05, "loss": 9.5445, "step": 1926 }, { "epoch": 0.664711969644705, "grad_norm": 1.3240059614181519, "learning_rate": 2.6918791936652376e-05, "loss": 9.4969, "step": 1927 }, { "epoch": 0.6650569161779925, "grad_norm": 1.255070447921753, "learning_rate": 2.6869023880979516e-05, "loss": 9.5387, "step": 1928 }, { "epoch": 0.6654018627112798, "grad_norm": 1.351804494857788, "learning_rate": 2.6819284965193625e-05, "loss": 9.5075, "step": 1929 }, { "epoch": 0.6657468092445671, "grad_norm": 1.2300724983215332, "learning_rate": 2.6769575251954704e-05, "loss": 9.5479, "step": 1930 }, { "epoch": 0.6660917557778544, "grad_norm": 1.231567144393921, "learning_rate": 2.671989480388592e-05, "loss": 9.4898, "step": 1931 }, { "epoch": 0.6664367023111418, "grad_norm": 1.3243263959884644, "learning_rate": 2.6670243683573598e-05, "loss": 9.4612, "step": 1932 }, { "epoch": 0.6667816488444291, "grad_norm": 1.426049828529358, "learning_rate": 2.6620621953567115e-05, "loss": 9.4235, "step": 1933 }, { "epoch": 0.6671265953777165, "grad_norm": 1.2795472145080566, "learning_rate": 2.657102967637881e-05, "loss": 9.5161, "step": 1934 }, { "epoch": 0.6674715419110038, "grad_norm": 1.3263260126113892, "learning_rate": 2.652146691448396e-05, "loss": 9.4876, "step": 1935 }, { "epoch": 0.6678164884442911, "grad_norm": 1.2887285947799683, "learning_rate": 2.6471933730320576e-05, "loss": 9.5039, "step": 1936 }, { "epoch": 0.6681614349775785, "grad_norm": 1.3971636295318604, "learning_rate": 2.64224301862895e-05, "loss": 9.4347, "step": 1937 }, { "epoch": 0.6685063815108658, "grad_norm": 1.395354151725769, "learning_rate": 2.6372956344754142e-05, "loss": 9.519, "step": 1938 }, { "epoch": 0.6688513280441531, "grad_norm": 1.3385522365570068, "learning_rate": 2.6323512268040597e-05, "loss": 9.4831, "step": 1939 }, { "epoch": 0.6691962745774405, "grad_norm": 1.4280192852020264, "learning_rate": 2.6274098018437343e-05, "loss": 9.471, "step": 1940 }, { "epoch": 0.6695412211107279, "grad_norm": 1.4592801332473755, "learning_rate": 2.622471365819542e-05, "loss": 9.4051, "step": 1941 }, { "epoch": 0.6698861676440152, "grad_norm": 1.3051594495773315, "learning_rate": 2.6175359249528088e-05, "loss": 9.497, "step": 1942 }, { "epoch": 0.6702311141773025, "grad_norm": 1.4578258991241455, "learning_rate": 2.612603485461097e-05, "loss": 9.4316, "step": 1943 }, { "epoch": 0.6705760607105898, "grad_norm": 1.4668503999710083, "learning_rate": 2.6076740535581802e-05, "loss": 9.4014, "step": 1944 }, { "epoch": 0.6709210072438772, "grad_norm": 1.718100666999817, "learning_rate": 2.602747635454047e-05, "loss": 9.3969, "step": 1945 }, { "epoch": 0.6712659537771646, "grad_norm": 1.4831199645996094, "learning_rate": 2.5978242373548915e-05, "loss": 9.4129, "step": 1946 }, { "epoch": 0.6716109003104519, "grad_norm": 1.6233813762664795, "learning_rate": 2.5929038654630954e-05, "loss": 9.4001, "step": 1947 }, { "epoch": 0.6719558468437392, "grad_norm": 1.645786166191101, "learning_rate": 2.587986525977241e-05, "loss": 9.3544, "step": 1948 }, { "epoch": 0.6723007933770265, "grad_norm": 1.6554784774780273, "learning_rate": 2.5830722250920768e-05, "loss": 9.4351, "step": 1949 }, { "epoch": 0.672645739910314, "grad_norm": 1.8901984691619873, "learning_rate": 2.578160968998532e-05, "loss": 9.2865, "step": 1950 }, { "epoch": 0.6729906864436013, "grad_norm": 1.0118002891540527, "learning_rate": 2.5732527638836957e-05, "loss": 9.5655, "step": 1951 }, { "epoch": 0.6733356329768886, "grad_norm": 0.9816641807556152, "learning_rate": 2.568347615930816e-05, "loss": 9.538, "step": 1952 }, { "epoch": 0.6736805795101759, "grad_norm": 1.0502429008483887, "learning_rate": 2.5634455313192872e-05, "loss": 9.5611, "step": 1953 }, { "epoch": 0.6740255260434632, "grad_norm": 1.091073989868164, "learning_rate": 2.558546516224648e-05, "loss": 9.501, "step": 1954 }, { "epoch": 0.6743704725767506, "grad_norm": 1.0108824968338013, "learning_rate": 2.5536505768185664e-05, "loss": 9.6199, "step": 1955 }, { "epoch": 0.674715419110038, "grad_norm": 1.1202064752578735, "learning_rate": 2.5487577192688388e-05, "loss": 9.5031, "step": 1956 }, { "epoch": 0.6750603656433253, "grad_norm": 1.0962861776351929, "learning_rate": 2.5438679497393793e-05, "loss": 9.5629, "step": 1957 }, { "epoch": 0.6754053121766126, "grad_norm": 1.1996490955352783, "learning_rate": 2.5389812743902063e-05, "loss": 9.4618, "step": 1958 }, { "epoch": 0.6757502587099, "grad_norm": 1.098293662071228, "learning_rate": 2.534097699377449e-05, "loss": 9.5814, "step": 1959 }, { "epoch": 0.6760952052431873, "grad_norm": 1.176307201385498, "learning_rate": 2.5292172308533213e-05, "loss": 9.4972, "step": 1960 }, { "epoch": 0.6764401517764747, "grad_norm": 1.2546685934066772, "learning_rate": 2.5243398749661307e-05, "loss": 9.5163, "step": 1961 }, { "epoch": 0.676785098309762, "grad_norm": 1.1058201789855957, "learning_rate": 2.519465637860261e-05, "loss": 9.5297, "step": 1962 }, { "epoch": 0.6771300448430493, "grad_norm": 1.1880666017532349, "learning_rate": 2.5145945256761693e-05, "loss": 9.5706, "step": 1963 }, { "epoch": 0.6774749913763367, "grad_norm": 1.3704041242599487, "learning_rate": 2.5097265445503703e-05, "loss": 9.4795, "step": 1964 }, { "epoch": 0.677819937909624, "grad_norm": 1.386462688446045, "learning_rate": 2.504861700615442e-05, "loss": 9.5004, "step": 1965 }, { "epoch": 0.6781648844429113, "grad_norm": 1.2360806465148926, "learning_rate": 2.500000000000001e-05, "loss": 9.5056, "step": 1966 }, { "epoch": 0.6785098309761987, "grad_norm": 1.2533848285675049, "learning_rate": 2.4951414488287123e-05, "loss": 9.4878, "step": 1967 }, { "epoch": 0.6788547775094861, "grad_norm": 1.3033040761947632, "learning_rate": 2.4902860532222692e-05, "loss": 9.4286, "step": 1968 }, { "epoch": 0.6791997240427734, "grad_norm": 1.2532498836517334, "learning_rate": 2.4854338192973913e-05, "loss": 9.4829, "step": 1969 }, { "epoch": 0.6795446705760607, "grad_norm": 1.265669345855713, "learning_rate": 2.480584753166816e-05, "loss": 9.4645, "step": 1970 }, { "epoch": 0.679889617109348, "grad_norm": 1.2609977722167969, "learning_rate": 2.475738860939285e-05, "loss": 9.4758, "step": 1971 }, { "epoch": 0.6802345636426353, "grad_norm": 1.324218511581421, "learning_rate": 2.4708961487195486e-05, "loss": 9.452, "step": 1972 }, { "epoch": 0.6805795101759228, "grad_norm": 1.5088634490966797, "learning_rate": 2.4660566226083444e-05, "loss": 9.4525, "step": 1973 }, { "epoch": 0.6809244567092101, "grad_norm": 1.3118107318878174, "learning_rate": 2.4612202887024016e-05, "loss": 9.5058, "step": 1974 }, { "epoch": 0.6812694032424974, "grad_norm": 1.4367679357528687, "learning_rate": 2.456387153094421e-05, "loss": 9.4298, "step": 1975 }, { "epoch": 0.6816143497757847, "grad_norm": 1.1999366283416748, "learning_rate": 2.4515572218730866e-05, "loss": 9.5317, "step": 1976 }, { "epoch": 0.681959296309072, "grad_norm": 1.3008366823196411, "learning_rate": 2.4467305011230318e-05, "loss": 9.498, "step": 1977 }, { "epoch": 0.6823042428423595, "grad_norm": 1.4255015850067139, "learning_rate": 2.4419069969248533e-05, "loss": 9.5161, "step": 1978 }, { "epoch": 0.6826491893756468, "grad_norm": 1.217320203781128, "learning_rate": 2.4370867153550956e-05, "loss": 9.4937, "step": 1979 }, { "epoch": 0.6829941359089341, "grad_norm": 1.2340617179870605, "learning_rate": 2.432269662486239e-05, "loss": 9.5439, "step": 1980 }, { "epoch": 0.6833390824422214, "grad_norm": 1.2029873132705688, "learning_rate": 2.4274558443867024e-05, "loss": 9.54, "step": 1981 }, { "epoch": 0.6836840289755088, "grad_norm": 1.3623865842819214, "learning_rate": 2.42264526712082e-05, "loss": 9.4325, "step": 1982 }, { "epoch": 0.6840289755087962, "grad_norm": 1.3263287544250488, "learning_rate": 2.417837936748858e-05, "loss": 9.4755, "step": 1983 }, { "epoch": 0.6843739220420835, "grad_norm": 1.3248276710510254, "learning_rate": 2.4130338593269773e-05, "loss": 9.4778, "step": 1984 }, { "epoch": 0.6847188685753708, "grad_norm": 1.3979350328445435, "learning_rate": 2.408233040907252e-05, "loss": 9.4865, "step": 1985 }, { "epoch": 0.6850638151086581, "grad_norm": 1.253639817237854, "learning_rate": 2.4034354875376414e-05, "loss": 9.5268, "step": 1986 }, { "epoch": 0.6854087616419455, "grad_norm": 1.254916787147522, "learning_rate": 2.3986412052619985e-05, "loss": 9.4905, "step": 1987 }, { "epoch": 0.6857537081752328, "grad_norm": 1.5778090953826904, "learning_rate": 2.393850200120054e-05, "loss": 9.4679, "step": 1988 }, { "epoch": 0.6860986547085202, "grad_norm": 1.5365502834320068, "learning_rate": 2.3890624781474068e-05, "loss": 9.4484, "step": 1989 }, { "epoch": 0.6864436012418075, "grad_norm": 1.519955039024353, "learning_rate": 2.384278045375523e-05, "loss": 9.4295, "step": 1990 }, { "epoch": 0.6867885477750949, "grad_norm": 1.3635841608047485, "learning_rate": 2.3794969078317243e-05, "loss": 9.4853, "step": 1991 }, { "epoch": 0.6871334943083822, "grad_norm": 1.5128055810928345, "learning_rate": 2.3747190715391824e-05, "loss": 9.4405, "step": 1992 }, { "epoch": 0.6874784408416695, "grad_norm": 1.3982264995574951, "learning_rate": 2.369944542516906e-05, "loss": 9.4502, "step": 1993 }, { "epoch": 0.6878233873749569, "grad_norm": 1.3720015287399292, "learning_rate": 2.365173326779743e-05, "loss": 9.4478, "step": 1994 }, { "epoch": 0.6881683339082442, "grad_norm": 1.5373377799987793, "learning_rate": 2.360405430338361e-05, "loss": 9.3671, "step": 1995 }, { "epoch": 0.6885132804415316, "grad_norm": 1.6050822734832764, "learning_rate": 2.355640859199251e-05, "loss": 9.4249, "step": 1996 }, { "epoch": 0.6888582269748189, "grad_norm": 1.7167375087738037, "learning_rate": 2.3508796193647138e-05, "loss": 9.4011, "step": 1997 }, { "epoch": 0.6892031735081062, "grad_norm": 1.7736046314239502, "learning_rate": 2.346121716832855e-05, "loss": 9.4303, "step": 1998 }, { "epoch": 0.6895481200413935, "grad_norm": 1.566192626953125, "learning_rate": 2.341367157597569e-05, "loss": 9.3759, "step": 1999 }, { "epoch": 0.689893066574681, "grad_norm": 1.855802059173584, "learning_rate": 2.336615947648546e-05, "loss": 9.3965, "step": 2000 }, { "epoch": 0.6902380131079683, "grad_norm": 1.0500324964523315, "learning_rate": 2.331868092971256e-05, "loss": 9.5872, "step": 2001 }, { "epoch": 0.6905829596412556, "grad_norm": 0.9782236814498901, "learning_rate": 2.3271235995469363e-05, "loss": 9.6103, "step": 2002 }, { "epoch": 0.6909279061745429, "grad_norm": 0.9627702236175537, "learning_rate": 2.322382473352596e-05, "loss": 9.498, "step": 2003 }, { "epoch": 0.6912728527078302, "grad_norm": 1.0011677742004395, "learning_rate": 2.3176447203610003e-05, "loss": 9.5546, "step": 2004 }, { "epoch": 0.6916177992411177, "grad_norm": 1.1541500091552734, "learning_rate": 2.3129103465406654e-05, "loss": 9.5231, "step": 2005 }, { "epoch": 0.691962745774405, "grad_norm": 1.135690450668335, "learning_rate": 2.3081793578558462e-05, "loss": 9.6356, "step": 2006 }, { "epoch": 0.6923076923076923, "grad_norm": 1.2155224084854126, "learning_rate": 2.3034517602665407e-05, "loss": 9.5496, "step": 2007 }, { "epoch": 0.6926526388409796, "grad_norm": 1.1832019090652466, "learning_rate": 2.298727559728467e-05, "loss": 9.4695, "step": 2008 }, { "epoch": 0.692997585374267, "grad_norm": 1.1747037172317505, "learning_rate": 2.294006762193069e-05, "loss": 9.5464, "step": 2009 }, { "epoch": 0.6933425319075543, "grad_norm": 1.2343512773513794, "learning_rate": 2.2892893736075016e-05, "loss": 9.506, "step": 2010 }, { "epoch": 0.6936874784408417, "grad_norm": 1.1944860219955444, "learning_rate": 2.2845753999146284e-05, "loss": 9.5046, "step": 2011 }, { "epoch": 0.694032424974129, "grad_norm": 1.2822582721710205, "learning_rate": 2.2798648470530038e-05, "loss": 9.5136, "step": 2012 }, { "epoch": 0.6943773715074163, "grad_norm": 1.2347842454910278, "learning_rate": 2.2751577209568793e-05, "loss": 9.5564, "step": 2013 }, { "epoch": 0.6947223180407037, "grad_norm": 1.1867071390151978, "learning_rate": 2.2704540275561887e-05, "loss": 9.4912, "step": 2014 }, { "epoch": 0.695067264573991, "grad_norm": 1.163027048110962, "learning_rate": 2.265753772776536e-05, "loss": 9.5166, "step": 2015 }, { "epoch": 0.6954122111072784, "grad_norm": 1.1979963779449463, "learning_rate": 2.261056962539201e-05, "loss": 9.603, "step": 2016 }, { "epoch": 0.6957571576405657, "grad_norm": 1.2587945461273193, "learning_rate": 2.256363602761115e-05, "loss": 9.4699, "step": 2017 }, { "epoch": 0.6961021041738531, "grad_norm": 1.2772116661071777, "learning_rate": 2.2516736993548755e-05, "loss": 9.5098, "step": 2018 }, { "epoch": 0.6964470507071404, "grad_norm": 1.1604729890823364, "learning_rate": 2.2469872582287118e-05, "loss": 9.5239, "step": 2019 }, { "epoch": 0.6967919972404277, "grad_norm": 1.2303416728973389, "learning_rate": 2.2423042852865013e-05, "loss": 9.5669, "step": 2020 }, { "epoch": 0.697136943773715, "grad_norm": 1.3527498245239258, "learning_rate": 2.237624786427746e-05, "loss": 9.4579, "step": 2021 }, { "epoch": 0.6974818903070024, "grad_norm": 1.2704267501831055, "learning_rate": 2.2329487675475753e-05, "loss": 9.4686, "step": 2022 }, { "epoch": 0.6978268368402898, "grad_norm": 1.1141988039016724, "learning_rate": 2.2282762345367346e-05, "loss": 9.5626, "step": 2023 }, { "epoch": 0.6981717833735771, "grad_norm": 1.3489370346069336, "learning_rate": 2.223607193281572e-05, "loss": 9.4736, "step": 2024 }, { "epoch": 0.6985167299068644, "grad_norm": 1.3955328464508057, "learning_rate": 2.2189416496640488e-05, "loss": 9.4638, "step": 2025 }, { "epoch": 0.6988616764401517, "grad_norm": 1.254169225692749, "learning_rate": 2.2142796095617063e-05, "loss": 9.5615, "step": 2026 }, { "epoch": 0.6992066229734392, "grad_norm": 1.3857367038726807, "learning_rate": 2.2096210788476822e-05, "loss": 9.5208, "step": 2027 }, { "epoch": 0.6995515695067265, "grad_norm": 1.374439001083374, "learning_rate": 2.2049660633906865e-05, "loss": 9.5285, "step": 2028 }, { "epoch": 0.6998965160400138, "grad_norm": 1.1612699031829834, "learning_rate": 2.200314569055007e-05, "loss": 9.5181, "step": 2029 }, { "epoch": 0.7002414625733011, "grad_norm": 1.3591151237487793, "learning_rate": 2.1956666017004867e-05, "loss": 9.4199, "step": 2030 }, { "epoch": 0.7005864091065884, "grad_norm": 1.3390651941299438, "learning_rate": 2.1910221671825388e-05, "loss": 9.4853, "step": 2031 }, { "epoch": 0.7009313556398759, "grad_norm": 1.390390396118164, "learning_rate": 2.1863812713521115e-05, "loss": 9.4814, "step": 2032 }, { "epoch": 0.7012763021731632, "grad_norm": 1.3022857904434204, "learning_rate": 2.1817439200557065e-05, "loss": 9.466, "step": 2033 }, { "epoch": 0.7016212487064505, "grad_norm": 1.3257852792739868, "learning_rate": 2.177110119135352e-05, "loss": 9.4746, "step": 2034 }, { "epoch": 0.7019661952397378, "grad_norm": 1.3743329048156738, "learning_rate": 2.1724798744286072e-05, "loss": 9.4532, "step": 2035 }, { "epoch": 0.7023111417730252, "grad_norm": 1.3872151374816895, "learning_rate": 2.1678531917685545e-05, "loss": 9.5759, "step": 2036 }, { "epoch": 0.7026560883063125, "grad_norm": 1.346116542816162, "learning_rate": 2.163230076983779e-05, "loss": 9.4619, "step": 2037 }, { "epoch": 0.7030010348395999, "grad_norm": 1.4963953495025635, "learning_rate": 2.158610535898384e-05, "loss": 9.4303, "step": 2038 }, { "epoch": 0.7033459813728872, "grad_norm": 1.4644328355789185, "learning_rate": 2.15399457433196e-05, "loss": 9.4146, "step": 2039 }, { "epoch": 0.7036909279061745, "grad_norm": 1.5116710662841797, "learning_rate": 2.1493821980995953e-05, "loss": 9.4338, "step": 2040 }, { "epoch": 0.7040358744394619, "grad_norm": 1.473137378692627, "learning_rate": 2.1447734130118546e-05, "loss": 9.4159, "step": 2041 }, { "epoch": 0.7043808209727492, "grad_norm": 1.3014733791351318, "learning_rate": 2.1401682248747867e-05, "loss": 9.4929, "step": 2042 }, { "epoch": 0.7047257675060365, "grad_norm": 1.5376137495040894, "learning_rate": 2.135566639489901e-05, "loss": 9.4507, "step": 2043 }, { "epoch": 0.7050707140393239, "grad_norm": 1.567149043083191, "learning_rate": 2.130968662654173e-05, "loss": 9.4694, "step": 2044 }, { "epoch": 0.7054156605726113, "grad_norm": 1.4803193807601929, "learning_rate": 2.126374300160032e-05, "loss": 9.388, "step": 2045 }, { "epoch": 0.7057606071058986, "grad_norm": 1.4357073307037354, "learning_rate": 2.1217835577953525e-05, "loss": 9.4049, "step": 2046 }, { "epoch": 0.7061055536391859, "grad_norm": 1.5136798620224, "learning_rate": 2.117196441343452e-05, "loss": 9.4743, "step": 2047 }, { "epoch": 0.7064505001724732, "grad_norm": 1.4890515804290771, "learning_rate": 2.1126129565830728e-05, "loss": 9.4143, "step": 2048 }, { "epoch": 0.7067954467057606, "grad_norm": 1.567327618598938, "learning_rate": 2.1080331092883905e-05, "loss": 9.4024, "step": 2049 }, { "epoch": 0.707140393239048, "grad_norm": 1.809950590133667, "learning_rate": 2.103456905228991e-05, "loss": 9.4008, "step": 2050 }, { "epoch": 0.7074853397723353, "grad_norm": 0.894708514213562, "learning_rate": 2.098884350169876e-05, "loss": 9.5418, "step": 2051 }, { "epoch": 0.7078302863056226, "grad_norm": 1.0703632831573486, "learning_rate": 2.0943154498714467e-05, "loss": 9.5756, "step": 2052 }, { "epoch": 0.7081752328389099, "grad_norm": 1.0689934492111206, "learning_rate": 2.0897502100895054e-05, "loss": 9.5163, "step": 2053 }, { "epoch": 0.7085201793721974, "grad_norm": 1.0289816856384277, "learning_rate": 2.0851886365752348e-05, "loss": 9.5832, "step": 2054 }, { "epoch": 0.7088651259054847, "grad_norm": 1.028753638267517, "learning_rate": 2.0806307350752068e-05, "loss": 9.5606, "step": 2055 }, { "epoch": 0.709210072438772, "grad_norm": 1.2290129661560059, "learning_rate": 2.07607651133136e-05, "loss": 9.5328, "step": 2056 }, { "epoch": 0.7095550189720593, "grad_norm": 1.2330552339553833, "learning_rate": 2.0715259710810065e-05, "loss": 9.4955, "step": 2057 }, { "epoch": 0.7098999655053466, "grad_norm": 1.059729814529419, "learning_rate": 2.0669791200568144e-05, "loss": 9.5306, "step": 2058 }, { "epoch": 0.710244912038634, "grad_norm": 1.1995996236801147, "learning_rate": 2.062435963986805e-05, "loss": 9.5008, "step": 2059 }, { "epoch": 0.7105898585719214, "grad_norm": 1.1978609561920166, "learning_rate": 2.0578965085943475e-05, "loss": 9.5495, "step": 2060 }, { "epoch": 0.7109348051052087, "grad_norm": 1.2156633138656616, "learning_rate": 2.053360759598142e-05, "loss": 9.4956, "step": 2061 }, { "epoch": 0.711279751638496, "grad_norm": 1.2758194208145142, "learning_rate": 2.0488287227122272e-05, "loss": 9.5299, "step": 2062 }, { "epoch": 0.7116246981717834, "grad_norm": 1.1374874114990234, "learning_rate": 2.044300403645959e-05, "loss": 9.5721, "step": 2063 }, { "epoch": 0.7119696447050707, "grad_norm": 1.1768476963043213, "learning_rate": 2.0397758081040157e-05, "loss": 9.4924, "step": 2064 }, { "epoch": 0.712314591238358, "grad_norm": 1.3491202592849731, "learning_rate": 2.035254941786377e-05, "loss": 9.4696, "step": 2065 }, { "epoch": 0.7126595377716454, "grad_norm": 1.257044792175293, "learning_rate": 2.0307378103883358e-05, "loss": 9.5058, "step": 2066 }, { "epoch": 0.7130044843049327, "grad_norm": 1.2255409955978394, "learning_rate": 2.0262244196004675e-05, "loss": 9.4967, "step": 2067 }, { "epoch": 0.7133494308382201, "grad_norm": 1.139023780822754, "learning_rate": 2.021714775108644e-05, "loss": 9.6305, "step": 2068 }, { "epoch": 0.7136943773715074, "grad_norm": 1.3005746603012085, "learning_rate": 2.0172088825940157e-05, "loss": 9.4542, "step": 2069 }, { "epoch": 0.7140393239047947, "grad_norm": 1.28018319606781, "learning_rate": 2.012706747733002e-05, "loss": 9.4719, "step": 2070 }, { "epoch": 0.7143842704380821, "grad_norm": 1.32435941696167, "learning_rate": 2.0082083761972952e-05, "loss": 9.4833, "step": 2071 }, { "epoch": 0.7147292169713695, "grad_norm": 1.2384377717971802, "learning_rate": 2.003713773653838e-05, "loss": 9.5325, "step": 2072 }, { "epoch": 0.7150741635046568, "grad_norm": 1.3312957286834717, "learning_rate": 1.9992229457648375e-05, "loss": 9.4717, "step": 2073 }, { "epoch": 0.7154191100379441, "grad_norm": 1.3630597591400146, "learning_rate": 1.994735898187733e-05, "loss": 9.4864, "step": 2074 }, { "epoch": 0.7157640565712314, "grad_norm": 1.2554008960723877, "learning_rate": 1.9902526365752105e-05, "loss": 9.4386, "step": 2075 }, { "epoch": 0.7161090031045187, "grad_norm": 1.365967035293579, "learning_rate": 1.985773166575179e-05, "loss": 9.5193, "step": 2076 }, { "epoch": 0.7164539496378062, "grad_norm": 1.196242332458496, "learning_rate": 1.981297493830778e-05, "loss": 9.5456, "step": 2077 }, { "epoch": 0.7167988961710935, "grad_norm": 1.3467918634414673, "learning_rate": 1.9768256239803574e-05, "loss": 9.4929, "step": 2078 }, { "epoch": 0.7171438427043808, "grad_norm": 1.429176688194275, "learning_rate": 1.97235756265748e-05, "loss": 9.478, "step": 2079 }, { "epoch": 0.7174887892376681, "grad_norm": 1.2556706666946411, "learning_rate": 1.9678933154909095e-05, "loss": 9.5675, "step": 2080 }, { "epoch": 0.7178337357709555, "grad_norm": 1.2309414148330688, "learning_rate": 1.963432888104606e-05, "loss": 9.5355, "step": 2081 }, { "epoch": 0.7181786823042429, "grad_norm": 1.325849175453186, "learning_rate": 1.9589762861177164e-05, "loss": 9.5337, "step": 2082 }, { "epoch": 0.7185236288375302, "grad_norm": 1.3904569149017334, "learning_rate": 1.9545235151445655e-05, "loss": 9.4365, "step": 2083 }, { "epoch": 0.7188685753708175, "grad_norm": 1.3091845512390137, "learning_rate": 1.9500745807946586e-05, "loss": 9.4856, "step": 2084 }, { "epoch": 0.7192135219041048, "grad_norm": 1.2799763679504395, "learning_rate": 1.9456294886726594e-05, "loss": 9.4987, "step": 2085 }, { "epoch": 0.7195584684373922, "grad_norm": 1.484225869178772, "learning_rate": 1.9411882443783984e-05, "loss": 9.4233, "step": 2086 }, { "epoch": 0.7199034149706796, "grad_norm": 1.3886871337890625, "learning_rate": 1.936750853506855e-05, "loss": 9.5053, "step": 2087 }, { "epoch": 0.7202483615039669, "grad_norm": 1.4728593826293945, "learning_rate": 1.932317321648158e-05, "loss": 9.5088, "step": 2088 }, { "epoch": 0.7205933080372542, "grad_norm": 1.5038708448410034, "learning_rate": 1.9278876543875675e-05, "loss": 9.4059, "step": 2089 }, { "epoch": 0.7209382545705416, "grad_norm": 1.4496952295303345, "learning_rate": 1.9234618573054807e-05, "loss": 9.44, "step": 2090 }, { "epoch": 0.7212832011038289, "grad_norm": 1.4279743432998657, "learning_rate": 1.9190399359774208e-05, "loss": 9.5069, "step": 2091 }, { "epoch": 0.7216281476371162, "grad_norm": 1.6038273572921753, "learning_rate": 1.9146218959740214e-05, "loss": 9.5013, "step": 2092 }, { "epoch": 0.7219730941704036, "grad_norm": 1.551633358001709, "learning_rate": 1.910207742861032e-05, "loss": 9.3824, "step": 2093 }, { "epoch": 0.7223180407036909, "grad_norm": 1.510208249092102, "learning_rate": 1.9057974821993046e-05, "loss": 9.375, "step": 2094 }, { "epoch": 0.7226629872369783, "grad_norm": 1.4527089595794678, "learning_rate": 1.9013911195447887e-05, "loss": 9.4562, "step": 2095 }, { "epoch": 0.7230079337702656, "grad_norm": 1.4268805980682373, "learning_rate": 1.8969886604485176e-05, "loss": 9.447, "step": 2096 }, { "epoch": 0.7233528803035529, "grad_norm": 1.4859588146209717, "learning_rate": 1.892590110456615e-05, "loss": 9.3738, "step": 2097 }, { "epoch": 0.7236978268368403, "grad_norm": 1.5741686820983887, "learning_rate": 1.888195475110272e-05, "loss": 9.4351, "step": 2098 }, { "epoch": 0.7240427733701277, "grad_norm": 1.651714563369751, "learning_rate": 1.8838047599457563e-05, "loss": 9.3773, "step": 2099 }, { "epoch": 0.724387719903415, "grad_norm": 1.6480320692062378, "learning_rate": 1.8794179704943865e-05, "loss": 9.3666, "step": 2100 }, { "epoch": 0.7247326664367023, "grad_norm": 0.9026673436164856, "learning_rate": 1.87503511228255e-05, "loss": 9.6036, "step": 2101 }, { "epoch": 0.7250776129699896, "grad_norm": 0.9937422275543213, "learning_rate": 1.8706561908316673e-05, "loss": 9.5324, "step": 2102 }, { "epoch": 0.7254225595032769, "grad_norm": 0.9917021989822388, "learning_rate": 1.8662812116582077e-05, "loss": 9.5648, "step": 2103 }, { "epoch": 0.7257675060365644, "grad_norm": 1.0386805534362793, "learning_rate": 1.8619101802736755e-05, "loss": 9.5532, "step": 2104 }, { "epoch": 0.7261124525698517, "grad_norm": 1.044515609741211, "learning_rate": 1.8575431021845928e-05, "loss": 9.5103, "step": 2105 }, { "epoch": 0.726457399103139, "grad_norm": 1.216599941253662, "learning_rate": 1.8531799828925123e-05, "loss": 9.466, "step": 2106 }, { "epoch": 0.7268023456364263, "grad_norm": 1.0359028577804565, "learning_rate": 1.8488208278939878e-05, "loss": 9.5026, "step": 2107 }, { "epoch": 0.7271472921697137, "grad_norm": 1.160091519355774, "learning_rate": 1.844465642680593e-05, "loss": 9.512, "step": 2108 }, { "epoch": 0.7274922387030011, "grad_norm": 1.1458526849746704, "learning_rate": 1.840114432738887e-05, "loss": 9.4816, "step": 2109 }, { "epoch": 0.7278371852362884, "grad_norm": 1.1700782775878906, "learning_rate": 1.8357672035504313e-05, "loss": 9.5054, "step": 2110 }, { "epoch": 0.7281821317695757, "grad_norm": 1.1910396814346313, "learning_rate": 1.8314239605917638e-05, "loss": 9.533, "step": 2111 }, { "epoch": 0.728527078302863, "grad_norm": 1.1200579404830933, "learning_rate": 1.8270847093344074e-05, "loss": 9.5353, "step": 2112 }, { "epoch": 0.7288720248361504, "grad_norm": 1.1751587390899658, "learning_rate": 1.8227494552448547e-05, "loss": 9.5049, "step": 2113 }, { "epoch": 0.7292169713694377, "grad_norm": 1.2288830280303955, "learning_rate": 1.81841820378456e-05, "loss": 9.4904, "step": 2114 }, { "epoch": 0.7295619179027251, "grad_norm": 1.1687408685684204, "learning_rate": 1.814090960409937e-05, "loss": 9.4727, "step": 2115 }, { "epoch": 0.7299068644360124, "grad_norm": 1.3285386562347412, "learning_rate": 1.8097677305723517e-05, "loss": 9.4991, "step": 2116 }, { "epoch": 0.7302518109692998, "grad_norm": 1.1721177101135254, "learning_rate": 1.8054485197181136e-05, "loss": 9.4992, "step": 2117 }, { "epoch": 0.7305967575025871, "grad_norm": 1.2847938537597656, "learning_rate": 1.8011333332884645e-05, "loss": 9.4851, "step": 2118 }, { "epoch": 0.7309417040358744, "grad_norm": 1.2144776582717896, "learning_rate": 1.7968221767195836e-05, "loss": 9.5051, "step": 2119 }, { "epoch": 0.7312866505691618, "grad_norm": 1.2324657440185547, "learning_rate": 1.7925150554425662e-05, "loss": 9.4642, "step": 2120 }, { "epoch": 0.7316315971024491, "grad_norm": 1.3017010688781738, "learning_rate": 1.788211974883429e-05, "loss": 9.5029, "step": 2121 }, { "epoch": 0.7319765436357365, "grad_norm": 1.2795159816741943, "learning_rate": 1.7839129404630965e-05, "loss": 9.5278, "step": 2122 }, { "epoch": 0.7323214901690238, "grad_norm": 1.1670211553573608, "learning_rate": 1.7796179575973988e-05, "loss": 9.5435, "step": 2123 }, { "epoch": 0.7326664367023111, "grad_norm": 1.2223438024520874, "learning_rate": 1.775327031697055e-05, "loss": 9.5277, "step": 2124 }, { "epoch": 0.7330113832355984, "grad_norm": 1.2785682678222656, "learning_rate": 1.7710401681676803e-05, "loss": 9.5291, "step": 2125 }, { "epoch": 0.7333563297688859, "grad_norm": 1.332833170890808, "learning_rate": 1.7667573724097702e-05, "loss": 9.5411, "step": 2126 }, { "epoch": 0.7337012763021732, "grad_norm": 1.4342169761657715, "learning_rate": 1.762478649818693e-05, "loss": 9.4796, "step": 2127 }, { "epoch": 0.7340462228354605, "grad_norm": 1.2753576040267944, "learning_rate": 1.7582040057846883e-05, "loss": 9.5234, "step": 2128 }, { "epoch": 0.7343911693687478, "grad_norm": 1.2632288932800293, "learning_rate": 1.753933445692858e-05, "loss": 9.4742, "step": 2129 }, { "epoch": 0.7347361159020351, "grad_norm": 1.3039580583572388, "learning_rate": 1.74966697492316e-05, "loss": 9.4876, "step": 2130 }, { "epoch": 0.7350810624353226, "grad_norm": 1.1701034307479858, "learning_rate": 1.745404598850395e-05, "loss": 9.5418, "step": 2131 }, { "epoch": 0.7354260089686099, "grad_norm": 1.2938871383666992, "learning_rate": 1.7411463228442122e-05, "loss": 9.4901, "step": 2132 }, { "epoch": 0.7357709555018972, "grad_norm": 1.3119385242462158, "learning_rate": 1.73689215226909e-05, "loss": 9.5219, "step": 2133 }, { "epoch": 0.7361159020351845, "grad_norm": 1.5491797924041748, "learning_rate": 1.7326420924843395e-05, "loss": 9.4397, "step": 2134 }, { "epoch": 0.7364608485684719, "grad_norm": 1.3859881162643433, "learning_rate": 1.7283961488440904e-05, "loss": 9.4493, "step": 2135 }, { "epoch": 0.7368057951017593, "grad_norm": 1.3091398477554321, "learning_rate": 1.7241543266972888e-05, "loss": 9.4656, "step": 2136 }, { "epoch": 0.7371507416350466, "grad_norm": 1.3951692581176758, "learning_rate": 1.719916631387685e-05, "loss": 9.534, "step": 2137 }, { "epoch": 0.7374956881683339, "grad_norm": 1.4812074899673462, "learning_rate": 1.7156830682538343e-05, "loss": 9.4484, "step": 2138 }, { "epoch": 0.7378406347016212, "grad_norm": 1.377474069595337, "learning_rate": 1.7114536426290868e-05, "loss": 9.4458, "step": 2139 }, { "epoch": 0.7381855812349086, "grad_norm": 1.3391168117523193, "learning_rate": 1.707228359841575e-05, "loss": 9.5431, "step": 2140 }, { "epoch": 0.7385305277681959, "grad_norm": 1.4583009481430054, "learning_rate": 1.7030072252142188e-05, "loss": 9.4805, "step": 2141 }, { "epoch": 0.7388754743014833, "grad_norm": 1.4434447288513184, "learning_rate": 1.6987902440647047e-05, "loss": 9.4072, "step": 2142 }, { "epoch": 0.7392204208347706, "grad_norm": 1.5743590593338013, "learning_rate": 1.6945774217054973e-05, "loss": 9.4339, "step": 2143 }, { "epoch": 0.739565367368058, "grad_norm": 1.3587831258773804, "learning_rate": 1.690368763443812e-05, "loss": 9.4421, "step": 2144 }, { "epoch": 0.7399103139013453, "grad_norm": 1.3895113468170166, "learning_rate": 1.6861642745816253e-05, "loss": 9.3958, "step": 2145 }, { "epoch": 0.7402552604346326, "grad_norm": 1.43183434009552, "learning_rate": 1.6819639604156557e-05, "loss": 9.4746, "step": 2146 }, { "epoch": 0.74060020696792, "grad_norm": 1.5698809623718262, "learning_rate": 1.6777678262373653e-05, "loss": 9.3977, "step": 2147 }, { "epoch": 0.7409451535012073, "grad_norm": 1.4829764366149902, "learning_rate": 1.6735758773329536e-05, "loss": 9.4524, "step": 2148 }, { "epoch": 0.7412901000344947, "grad_norm": 1.6046053171157837, "learning_rate": 1.6693881189833376e-05, "loss": 9.4178, "step": 2149 }, { "epoch": 0.741635046567782, "grad_norm": 1.7703441381454468, "learning_rate": 1.66520455646417e-05, "loss": 9.357, "step": 2150 }, { "epoch": 0.7419799931010693, "grad_norm": 0.9205633401870728, "learning_rate": 1.6610251950458033e-05, "loss": 9.5522, "step": 2151 }, { "epoch": 0.7423249396343566, "grad_norm": 1.0296859741210938, "learning_rate": 1.6568500399933073e-05, "loss": 9.5601, "step": 2152 }, { "epoch": 0.7426698861676441, "grad_norm": 1.0968191623687744, "learning_rate": 1.6526790965664457e-05, "loss": 9.5359, "step": 2153 }, { "epoch": 0.7430148327009314, "grad_norm": 1.110319972038269, "learning_rate": 1.648512370019683e-05, "loss": 9.5107, "step": 2154 }, { "epoch": 0.7433597792342187, "grad_norm": 1.0326136350631714, "learning_rate": 1.644349865602165e-05, "loss": 9.5391, "step": 2155 }, { "epoch": 0.743704725767506, "grad_norm": 1.1610885858535767, "learning_rate": 1.640191588557724e-05, "loss": 9.5459, "step": 2156 }, { "epoch": 0.7440496723007933, "grad_norm": 1.0827651023864746, "learning_rate": 1.636037544124864e-05, "loss": 9.5505, "step": 2157 }, { "epoch": 0.7443946188340808, "grad_norm": 1.101487398147583, "learning_rate": 1.6318877375367568e-05, "loss": 9.5195, "step": 2158 }, { "epoch": 0.7447395653673681, "grad_norm": 1.18477201461792, "learning_rate": 1.627742174021239e-05, "loss": 9.5322, "step": 2159 }, { "epoch": 0.7450845119006554, "grad_norm": 1.223777174949646, "learning_rate": 1.6236008588007945e-05, "loss": 9.5771, "step": 2160 }, { "epoch": 0.7454294584339427, "grad_norm": 1.2236958742141724, "learning_rate": 1.6194637970925634e-05, "loss": 9.5204, "step": 2161 }, { "epoch": 0.7457744049672301, "grad_norm": 1.1944174766540527, "learning_rate": 1.6153309941083204e-05, "loss": 9.5285, "step": 2162 }, { "epoch": 0.7461193515005174, "grad_norm": 1.1148663759231567, "learning_rate": 1.611202455054481e-05, "loss": 9.5056, "step": 2163 }, { "epoch": 0.7464642980338048, "grad_norm": 1.2774313688278198, "learning_rate": 1.607078185132084e-05, "loss": 9.5108, "step": 2164 }, { "epoch": 0.7468092445670921, "grad_norm": 1.3021870851516724, "learning_rate": 1.6029581895367962e-05, "loss": 9.5348, "step": 2165 }, { "epoch": 0.7471541911003794, "grad_norm": 1.3037549257278442, "learning_rate": 1.598842473458892e-05, "loss": 9.4827, "step": 2166 }, { "epoch": 0.7474991376336668, "grad_norm": 1.0977771282196045, "learning_rate": 1.5947310420832622e-05, "loss": 9.5269, "step": 2167 }, { "epoch": 0.7478440841669541, "grad_norm": 1.2924681901931763, "learning_rate": 1.5906239005893923e-05, "loss": 9.4714, "step": 2168 }, { "epoch": 0.7481890307002415, "grad_norm": 1.2541073560714722, "learning_rate": 1.58652105415137e-05, "loss": 9.5176, "step": 2169 }, { "epoch": 0.7485339772335288, "grad_norm": 1.374686360359192, "learning_rate": 1.5824225079378685e-05, "loss": 9.5176, "step": 2170 }, { "epoch": 0.7488789237668162, "grad_norm": 1.0905579328536987, "learning_rate": 1.578328267112146e-05, "loss": 9.5887, "step": 2171 }, { "epoch": 0.7492238703001035, "grad_norm": 1.2786760330200195, "learning_rate": 1.5742383368320358e-05, "loss": 9.5006, "step": 2172 }, { "epoch": 0.7495688168333908, "grad_norm": 1.3505619764328003, "learning_rate": 1.5701527222499386e-05, "loss": 9.4938, "step": 2173 }, { "epoch": 0.7499137633666781, "grad_norm": 1.2414374351501465, "learning_rate": 1.566071428512823e-05, "loss": 9.5063, "step": 2174 }, { "epoch": 0.7502587098999655, "grad_norm": 1.2529624700546265, "learning_rate": 1.561994460762209e-05, "loss": 9.5985, "step": 2175 }, { "epoch": 0.7506036564332529, "grad_norm": 1.2772570848464966, "learning_rate": 1.557921824134172e-05, "loss": 9.5573, "step": 2176 }, { "epoch": 0.7509486029665402, "grad_norm": 1.3001856803894043, "learning_rate": 1.5538535237593243e-05, "loss": 9.4594, "step": 2177 }, { "epoch": 0.7512935494998275, "grad_norm": 1.2496637105941772, "learning_rate": 1.5497895647628265e-05, "loss": 9.4983, "step": 2178 }, { "epoch": 0.7516384960331148, "grad_norm": 1.2094224691390991, "learning_rate": 1.545729952264358e-05, "loss": 9.4784, "step": 2179 }, { "epoch": 0.7519834425664023, "grad_norm": 1.3114935159683228, "learning_rate": 1.54167469137813e-05, "loss": 9.5164, "step": 2180 }, { "epoch": 0.7523283890996896, "grad_norm": 1.2864598035812378, "learning_rate": 1.5376237872128705e-05, "loss": 9.5024, "step": 2181 }, { "epoch": 0.7526733356329769, "grad_norm": 1.303964376449585, "learning_rate": 1.5335772448718157e-05, "loss": 9.5344, "step": 2182 }, { "epoch": 0.7530182821662642, "grad_norm": 1.4014822244644165, "learning_rate": 1.5295350694527117e-05, "loss": 9.4536, "step": 2183 }, { "epoch": 0.7533632286995515, "grad_norm": 1.2853933572769165, "learning_rate": 1.5254972660477961e-05, "loss": 9.5274, "step": 2184 }, { "epoch": 0.753708175232839, "grad_norm": 1.4304496049880981, "learning_rate": 1.5214638397438109e-05, "loss": 9.4957, "step": 2185 }, { "epoch": 0.7540531217661263, "grad_norm": 1.3712221384048462, "learning_rate": 1.5174347956219697e-05, "loss": 9.4559, "step": 2186 }, { "epoch": 0.7543980682994136, "grad_norm": 1.3548405170440674, "learning_rate": 1.5134101387579775e-05, "loss": 9.4248, "step": 2187 }, { "epoch": 0.7547430148327009, "grad_norm": 1.3276457786560059, "learning_rate": 1.5093898742220026e-05, "loss": 9.4374, "step": 2188 }, { "epoch": 0.7550879613659883, "grad_norm": 1.4717036485671997, "learning_rate": 1.5053740070786881e-05, "loss": 9.4696, "step": 2189 }, { "epoch": 0.7554329078992756, "grad_norm": 1.605868935585022, "learning_rate": 1.5013625423871307e-05, "loss": 9.3633, "step": 2190 }, { "epoch": 0.755777854432563, "grad_norm": 1.4265213012695312, "learning_rate": 1.4973554852008853e-05, "loss": 9.4659, "step": 2191 }, { "epoch": 0.7561228009658503, "grad_norm": 1.370707631111145, "learning_rate": 1.493352840567953e-05, "loss": 9.4668, "step": 2192 }, { "epoch": 0.7564677474991376, "grad_norm": 1.4738413095474243, "learning_rate": 1.4893546135307762e-05, "loss": 9.4277, "step": 2193 }, { "epoch": 0.756812694032425, "grad_norm": 1.393288016319275, "learning_rate": 1.4853608091262333e-05, "loss": 9.4523, "step": 2194 }, { "epoch": 0.7571576405657123, "grad_norm": 1.4397128820419312, "learning_rate": 1.4813714323856276e-05, "loss": 9.497, "step": 2195 }, { "epoch": 0.7575025870989996, "grad_norm": 1.4494057893753052, "learning_rate": 1.4773864883346889e-05, "loss": 9.4915, "step": 2196 }, { "epoch": 0.757847533632287, "grad_norm": 1.5768532752990723, "learning_rate": 1.473405981993558e-05, "loss": 9.4324, "step": 2197 }, { "epoch": 0.7581924801655744, "grad_norm": 1.572800874710083, "learning_rate": 1.4694299183767896e-05, "loss": 9.3683, "step": 2198 }, { "epoch": 0.7585374266988617, "grad_norm": 1.6129246950149536, "learning_rate": 1.46545830249334e-05, "loss": 9.4134, "step": 2199 }, { "epoch": 0.758882373232149, "grad_norm": 1.646519422531128, "learning_rate": 1.461491139346563e-05, "loss": 9.3784, "step": 2200 }, { "epoch": 0.7592273197654363, "grad_norm": 0.8638380765914917, "learning_rate": 1.4575284339341994e-05, "loss": 9.5856, "step": 2201 }, { "epoch": 0.7595722662987237, "grad_norm": 0.9636580348014832, "learning_rate": 1.4535701912483784e-05, "loss": 9.5805, "step": 2202 }, { "epoch": 0.7599172128320111, "grad_norm": 0.9889575839042664, "learning_rate": 1.4496164162756071e-05, "loss": 9.5958, "step": 2203 }, { "epoch": 0.7602621593652984, "grad_norm": 1.0838682651519775, "learning_rate": 1.4456671139967604e-05, "loss": 9.5557, "step": 2204 }, { "epoch": 0.7606071058985857, "grad_norm": 1.0837624073028564, "learning_rate": 1.4417222893870825e-05, "loss": 9.5078, "step": 2205 }, { "epoch": 0.760952052431873, "grad_norm": 1.0023666620254517, "learning_rate": 1.4377819474161746e-05, "loss": 9.5269, "step": 2206 }, { "epoch": 0.7612969989651605, "grad_norm": 1.0206482410430908, "learning_rate": 1.4338460930479935e-05, "loss": 9.5391, "step": 2207 }, { "epoch": 0.7616419454984478, "grad_norm": 1.030287265777588, "learning_rate": 1.4299147312408379e-05, "loss": 9.5096, "step": 2208 }, { "epoch": 0.7619868920317351, "grad_norm": 1.1585311889648438, "learning_rate": 1.4259878669473525e-05, "loss": 9.5347, "step": 2209 }, { "epoch": 0.7623318385650224, "grad_norm": 1.1257652044296265, "learning_rate": 1.4220655051145094e-05, "loss": 9.539, "step": 2210 }, { "epoch": 0.7626767850983097, "grad_norm": 1.0882645845413208, "learning_rate": 1.4181476506836161e-05, "loss": 9.5286, "step": 2211 }, { "epoch": 0.7630217316315971, "grad_norm": 1.1985927820205688, "learning_rate": 1.4142343085902937e-05, "loss": 9.5612, "step": 2212 }, { "epoch": 0.7633666781648845, "grad_norm": 1.2014905214309692, "learning_rate": 1.4103254837644892e-05, "loss": 9.4562, "step": 2213 }, { "epoch": 0.7637116246981718, "grad_norm": 1.1860039234161377, "learning_rate": 1.4064211811304472e-05, "loss": 9.5269, "step": 2214 }, { "epoch": 0.7640565712314591, "grad_norm": 1.2806318998336792, "learning_rate": 1.4025214056067238e-05, "loss": 9.4719, "step": 2215 }, { "epoch": 0.7644015177647465, "grad_norm": 1.3235222101211548, "learning_rate": 1.3986261621061703e-05, "loss": 9.523, "step": 2216 }, { "epoch": 0.7647464642980338, "grad_norm": 1.3476524353027344, "learning_rate": 1.3947354555359238e-05, "loss": 9.4753, "step": 2217 }, { "epoch": 0.7650914108313212, "grad_norm": 1.1621500253677368, "learning_rate": 1.390849290797413e-05, "loss": 9.5493, "step": 2218 }, { "epoch": 0.7654363573646085, "grad_norm": 1.3728376626968384, "learning_rate": 1.3869676727863368e-05, "loss": 9.4585, "step": 2219 }, { "epoch": 0.7657813038978958, "grad_norm": 1.260619878768921, "learning_rate": 1.3830906063926769e-05, "loss": 9.495, "step": 2220 }, { "epoch": 0.7661262504311832, "grad_norm": 1.451232671737671, "learning_rate": 1.3792180965006707e-05, "loss": 9.4501, "step": 2221 }, { "epoch": 0.7664711969644705, "grad_norm": 1.2228461503982544, "learning_rate": 1.375350147988822e-05, "loss": 9.4814, "step": 2222 }, { "epoch": 0.7668161434977578, "grad_norm": 1.3036428689956665, "learning_rate": 1.3714867657298835e-05, "loss": 9.4188, "step": 2223 }, { "epoch": 0.7671610900310452, "grad_norm": 1.3564863204956055, "learning_rate": 1.3676279545908593e-05, "loss": 9.4896, "step": 2224 }, { "epoch": 0.7675060365643326, "grad_norm": 1.2337634563446045, "learning_rate": 1.3637737194329947e-05, "loss": 9.5008, "step": 2225 }, { "epoch": 0.7678509830976199, "grad_norm": 1.2691717147827148, "learning_rate": 1.3599240651117662e-05, "loss": 9.5147, "step": 2226 }, { "epoch": 0.7681959296309072, "grad_norm": 1.3650051355361938, "learning_rate": 1.3560789964768839e-05, "loss": 9.4761, "step": 2227 }, { "epoch": 0.7685408761641945, "grad_norm": 1.2378038167953491, "learning_rate": 1.3522385183722791e-05, "loss": 9.5646, "step": 2228 }, { "epoch": 0.7688858226974818, "grad_norm": 1.2806237936019897, "learning_rate": 1.3484026356361018e-05, "loss": 9.4591, "step": 2229 }, { "epoch": 0.7692307692307693, "grad_norm": 1.3981233835220337, "learning_rate": 1.3445713531007092e-05, "loss": 9.5678, "step": 2230 }, { "epoch": 0.7695757157640566, "grad_norm": 1.347752571105957, "learning_rate": 1.3407446755926672e-05, "loss": 9.4931, "step": 2231 }, { "epoch": 0.7699206622973439, "grad_norm": 1.3737890720367432, "learning_rate": 1.3369226079327368e-05, "loss": 9.5142, "step": 2232 }, { "epoch": 0.7702656088306312, "grad_norm": 1.4244921207427979, "learning_rate": 1.3331051549358742e-05, "loss": 9.5011, "step": 2233 }, { "epoch": 0.7706105553639186, "grad_norm": 1.2368680238723755, "learning_rate": 1.3292923214112218e-05, "loss": 9.4674, "step": 2234 }, { "epoch": 0.770955501897206, "grad_norm": 1.3025181293487549, "learning_rate": 1.3254841121621037e-05, "loss": 9.5319, "step": 2235 }, { "epoch": 0.7713004484304933, "grad_norm": 1.3030591011047363, "learning_rate": 1.3216805319860131e-05, "loss": 9.484, "step": 2236 }, { "epoch": 0.7716453949637806, "grad_norm": 1.5651847124099731, "learning_rate": 1.3178815856746173e-05, "loss": 9.4354, "step": 2237 }, { "epoch": 0.7719903414970679, "grad_norm": 1.34073007106781, "learning_rate": 1.3140872780137458e-05, "loss": 9.4937, "step": 2238 }, { "epoch": 0.7723352880303553, "grad_norm": 1.3797394037246704, "learning_rate": 1.310297613783379e-05, "loss": 9.4472, "step": 2239 }, { "epoch": 0.7726802345636427, "grad_norm": 1.5019760131835938, "learning_rate": 1.3065125977576531e-05, "loss": 9.4539, "step": 2240 }, { "epoch": 0.77302518109693, "grad_norm": 1.3732333183288574, "learning_rate": 1.302732234704847e-05, "loss": 9.486, "step": 2241 }, { "epoch": 0.7733701276302173, "grad_norm": 1.3739562034606934, "learning_rate": 1.298956529387379e-05, "loss": 9.4486, "step": 2242 }, { "epoch": 0.7737150741635046, "grad_norm": 1.497048020362854, "learning_rate": 1.2951854865617946e-05, "loss": 9.4217, "step": 2243 }, { "epoch": 0.774060020696792, "grad_norm": 1.4363867044448853, "learning_rate": 1.2914191109787733e-05, "loss": 9.436, "step": 2244 }, { "epoch": 0.7744049672300793, "grad_norm": 1.5675686597824097, "learning_rate": 1.2876574073831071e-05, "loss": 9.4108, "step": 2245 }, { "epoch": 0.7747499137633667, "grad_norm": 1.644961953163147, "learning_rate": 1.2839003805137085e-05, "loss": 9.343, "step": 2246 }, { "epoch": 0.775094860296654, "grad_norm": 1.659659743309021, "learning_rate": 1.2801480351035955e-05, "loss": 9.3956, "step": 2247 }, { "epoch": 0.7754398068299414, "grad_norm": 1.6075783967971802, "learning_rate": 1.2764003758798904e-05, "loss": 9.3805, "step": 2248 }, { "epoch": 0.7757847533632287, "grad_norm": 1.7635935544967651, "learning_rate": 1.2726574075638075e-05, "loss": 9.3577, "step": 2249 }, { "epoch": 0.776129699896516, "grad_norm": 1.8175138235092163, "learning_rate": 1.268919134870657e-05, "loss": 9.3385, "step": 2250 }, { "epoch": 0.7764746464298034, "grad_norm": 0.8989494442939758, "learning_rate": 1.265185562509832e-05, "loss": 9.5623, "step": 2251 }, { "epoch": 0.7768195929630907, "grad_norm": 1.050932765007019, "learning_rate": 1.2614566951848006e-05, "loss": 9.4873, "step": 2252 }, { "epoch": 0.7771645394963781, "grad_norm": 1.1079734563827515, "learning_rate": 1.2577325375931099e-05, "loss": 9.5309, "step": 2253 }, { "epoch": 0.7775094860296654, "grad_norm": 1.1610482931137085, "learning_rate": 1.2540130944263657e-05, "loss": 9.4721, "step": 2254 }, { "epoch": 0.7778544325629527, "grad_norm": 1.0923736095428467, "learning_rate": 1.2502983703702454e-05, "loss": 9.4681, "step": 2255 }, { "epoch": 0.77819937909624, "grad_norm": 1.096974492073059, "learning_rate": 1.2465883701044712e-05, "loss": 9.5229, "step": 2256 }, { "epoch": 0.7785443256295275, "grad_norm": 1.0986400842666626, "learning_rate": 1.2428830983028211e-05, "loss": 9.5748, "step": 2257 }, { "epoch": 0.7788892721628148, "grad_norm": 1.1701014041900635, "learning_rate": 1.2391825596331114e-05, "loss": 9.5348, "step": 2258 }, { "epoch": 0.7792342186961021, "grad_norm": 1.0841007232666016, "learning_rate": 1.2354867587571989e-05, "loss": 9.5687, "step": 2259 }, { "epoch": 0.7795791652293894, "grad_norm": 1.0948131084442139, "learning_rate": 1.2317957003309727e-05, "loss": 9.5516, "step": 2260 }, { "epoch": 0.7799241117626767, "grad_norm": 1.104779601097107, "learning_rate": 1.2281093890043416e-05, "loss": 9.5223, "step": 2261 }, { "epoch": 0.7802690582959642, "grad_norm": 1.276573896408081, "learning_rate": 1.2244278294212436e-05, "loss": 9.5379, "step": 2262 }, { "epoch": 0.7806140048292515, "grad_norm": 1.25575590133667, "learning_rate": 1.2207510262196215e-05, "loss": 9.5144, "step": 2263 }, { "epoch": 0.7809589513625388, "grad_norm": 1.231367826461792, "learning_rate": 1.2170789840314318e-05, "loss": 9.5943, "step": 2264 }, { "epoch": 0.7813038978958261, "grad_norm": 1.329979419708252, "learning_rate": 1.213411707482629e-05, "loss": 9.458, "step": 2265 }, { "epoch": 0.7816488444291135, "grad_norm": 1.1375129222869873, "learning_rate": 1.2097492011931688e-05, "loss": 9.515, "step": 2266 }, { "epoch": 0.7819937909624008, "grad_norm": 1.3155901432037354, "learning_rate": 1.2060914697769926e-05, "loss": 9.4361, "step": 2267 }, { "epoch": 0.7823387374956882, "grad_norm": 1.2770370244979858, "learning_rate": 1.2024385178420289e-05, "loss": 9.5143, "step": 2268 }, { "epoch": 0.7826836840289755, "grad_norm": 1.18695068359375, "learning_rate": 1.198790349990186e-05, "loss": 9.494, "step": 2269 }, { "epoch": 0.7830286305622628, "grad_norm": 1.2336689233779907, "learning_rate": 1.1951469708173457e-05, "loss": 9.5422, "step": 2270 }, { "epoch": 0.7833735770955502, "grad_norm": 1.1328778266906738, "learning_rate": 1.1915083849133518e-05, "loss": 9.5457, "step": 2271 }, { "epoch": 0.7837185236288375, "grad_norm": 1.2695673704147339, "learning_rate": 1.1878745968620158e-05, "loss": 9.4853, "step": 2272 }, { "epoch": 0.7840634701621249, "grad_norm": 1.348298192024231, "learning_rate": 1.1842456112411044e-05, "loss": 9.445, "step": 2273 }, { "epoch": 0.7844084166954122, "grad_norm": 1.4354599714279175, "learning_rate": 1.1806214326223292e-05, "loss": 9.4896, "step": 2274 }, { "epoch": 0.7847533632286996, "grad_norm": 1.4190089702606201, "learning_rate": 1.177002065571351e-05, "loss": 9.4625, "step": 2275 }, { "epoch": 0.7850983097619869, "grad_norm": 1.3022481203079224, "learning_rate": 1.1733875146477675e-05, "loss": 9.568, "step": 2276 }, { "epoch": 0.7854432562952742, "grad_norm": 1.257018804550171, "learning_rate": 1.1697777844051105e-05, "loss": 9.5252, "step": 2277 }, { "epoch": 0.7857882028285615, "grad_norm": 1.2993141412734985, "learning_rate": 1.1661728793908349e-05, "loss": 9.4571, "step": 2278 }, { "epoch": 0.7861331493618489, "grad_norm": 1.1839405298233032, "learning_rate": 1.162572804146323e-05, "loss": 9.5178, "step": 2279 }, { "epoch": 0.7864780958951363, "grad_norm": 1.3165513277053833, "learning_rate": 1.158977563206865e-05, "loss": 9.4607, "step": 2280 }, { "epoch": 0.7868230424284236, "grad_norm": 1.5338356494903564, "learning_rate": 1.1553871611016675e-05, "loss": 9.3831, "step": 2281 }, { "epoch": 0.7871679889617109, "grad_norm": 1.392738938331604, "learning_rate": 1.1518016023538386e-05, "loss": 9.4592, "step": 2282 }, { "epoch": 0.7875129354949982, "grad_norm": 1.4205015897750854, "learning_rate": 1.1482208914803849e-05, "loss": 9.4043, "step": 2283 }, { "epoch": 0.7878578820282857, "grad_norm": 1.3894630670547485, "learning_rate": 1.1446450329922077e-05, "loss": 9.4858, "step": 2284 }, { "epoch": 0.788202828561573, "grad_norm": 1.3243451118469238, "learning_rate": 1.141074031394091e-05, "loss": 9.5666, "step": 2285 }, { "epoch": 0.7885477750948603, "grad_norm": 1.3860193490982056, "learning_rate": 1.1375078911847048e-05, "loss": 9.4606, "step": 2286 }, { "epoch": 0.7888927216281476, "grad_norm": 1.3805482387542725, "learning_rate": 1.133946616856591e-05, "loss": 9.5212, "step": 2287 }, { "epoch": 0.7892376681614349, "grad_norm": 1.4170877933502197, "learning_rate": 1.1303902128961658e-05, "loss": 9.4076, "step": 2288 }, { "epoch": 0.7895826146947224, "grad_norm": 1.3475642204284668, "learning_rate": 1.1268386837837031e-05, "loss": 9.4137, "step": 2289 }, { "epoch": 0.7899275612280097, "grad_norm": 1.3942818641662598, "learning_rate": 1.1232920339933461e-05, "loss": 9.4543, "step": 2290 }, { "epoch": 0.790272507761297, "grad_norm": 1.3341909646987915, "learning_rate": 1.1197502679930799e-05, "loss": 9.4859, "step": 2291 }, { "epoch": 0.7906174542945843, "grad_norm": 1.537868618965149, "learning_rate": 1.1162133902447447e-05, "loss": 9.4129, "step": 2292 }, { "epoch": 0.7909624008278717, "grad_norm": 1.4403679370880127, "learning_rate": 1.1126814052040208e-05, "loss": 9.4867, "step": 2293 }, { "epoch": 0.791307347361159, "grad_norm": 1.6439192295074463, "learning_rate": 1.109154317320421e-05, "loss": 9.4521, "step": 2294 }, { "epoch": 0.7916522938944464, "grad_norm": 1.4819450378417969, "learning_rate": 1.1056321310372952e-05, "loss": 9.3779, "step": 2295 }, { "epoch": 0.7919972404277337, "grad_norm": 1.452518105506897, "learning_rate": 1.1021148507918106e-05, "loss": 9.4669, "step": 2296 }, { "epoch": 0.792342186961021, "grad_norm": 1.6845139265060425, "learning_rate": 1.0986024810149636e-05, "loss": 9.4524, "step": 2297 }, { "epoch": 0.7926871334943084, "grad_norm": 1.6107673645019531, "learning_rate": 1.0950950261315557e-05, "loss": 9.4202, "step": 2298 }, { "epoch": 0.7930320800275957, "grad_norm": 1.7000176906585693, "learning_rate": 1.091592490560202e-05, "loss": 9.4115, "step": 2299 }, { "epoch": 0.793377026560883, "grad_norm": 1.7845960855484009, "learning_rate": 1.0880948787133167e-05, "loss": 9.3256, "step": 2300 }, { "epoch": 0.7937219730941704, "grad_norm": 0.9648402333259583, "learning_rate": 1.0846021949971152e-05, "loss": 9.5441, "step": 2301 }, { "epoch": 0.7940669196274578, "grad_norm": 1.0515005588531494, "learning_rate": 1.0811144438116e-05, "loss": 9.5474, "step": 2302 }, { "epoch": 0.7944118661607451, "grad_norm": 0.9792898893356323, "learning_rate": 1.0776316295505634e-05, "loss": 9.535, "step": 2303 }, { "epoch": 0.7947568126940324, "grad_norm": 1.0286474227905273, "learning_rate": 1.0741537566015769e-05, "loss": 9.5707, "step": 2304 }, { "epoch": 0.7951017592273197, "grad_norm": 1.0761648416519165, "learning_rate": 1.0706808293459875e-05, "loss": 9.5962, "step": 2305 }, { "epoch": 0.795446705760607, "grad_norm": 1.0304803848266602, "learning_rate": 1.0672128521589125e-05, "loss": 9.5236, "step": 2306 }, { "epoch": 0.7957916522938945, "grad_norm": 1.0607376098632812, "learning_rate": 1.0637498294092291e-05, "loss": 9.5741, "step": 2307 }, { "epoch": 0.7961365988271818, "grad_norm": 1.1150785684585571, "learning_rate": 1.0602917654595796e-05, "loss": 9.493, "step": 2308 }, { "epoch": 0.7964815453604691, "grad_norm": 1.2552889585494995, "learning_rate": 1.056838664666352e-05, "loss": 9.5416, "step": 2309 }, { "epoch": 0.7968264918937564, "grad_norm": 1.281883955001831, "learning_rate": 1.0533905313796872e-05, "loss": 9.4997, "step": 2310 }, { "epoch": 0.7971714384270439, "grad_norm": 1.1472063064575195, "learning_rate": 1.0499473699434664e-05, "loss": 9.5945, "step": 2311 }, { "epoch": 0.7975163849603312, "grad_norm": 1.0823895931243896, "learning_rate": 1.0465091846953095e-05, "loss": 9.5251, "step": 2312 }, { "epoch": 0.7978613314936185, "grad_norm": 1.2799855470657349, "learning_rate": 1.0430759799665618e-05, "loss": 9.5017, "step": 2313 }, { "epoch": 0.7982062780269058, "grad_norm": 1.2302875518798828, "learning_rate": 1.0396477600823002e-05, "loss": 9.4611, "step": 2314 }, { "epoch": 0.7985512245601931, "grad_norm": 1.2777308225631714, "learning_rate": 1.0362245293613199e-05, "loss": 9.4718, "step": 2315 }, { "epoch": 0.7988961710934805, "grad_norm": 1.1929059028625488, "learning_rate": 1.0328062921161286e-05, "loss": 9.5054, "step": 2316 }, { "epoch": 0.7992411176267679, "grad_norm": 1.2410634756088257, "learning_rate": 1.029393052652946e-05, "loss": 9.4943, "step": 2317 }, { "epoch": 0.7995860641600552, "grad_norm": 1.2318724393844604, "learning_rate": 1.0259848152716955e-05, "loss": 9.4966, "step": 2318 }, { "epoch": 0.7999310106933425, "grad_norm": 1.230173110961914, "learning_rate": 1.0225815842659996e-05, "loss": 9.3985, "step": 2319 }, { "epoch": 0.8002759572266299, "grad_norm": 1.107927680015564, "learning_rate": 1.0191833639231696e-05, "loss": 9.4939, "step": 2320 }, { "epoch": 0.8006209037599172, "grad_norm": 1.3488962650299072, "learning_rate": 1.0157901585242101e-05, "loss": 9.464, "step": 2321 }, { "epoch": 0.8009658502932046, "grad_norm": 1.299417495727539, "learning_rate": 1.0124019723438038e-05, "loss": 9.4867, "step": 2322 }, { "epoch": 0.8013107968264919, "grad_norm": 1.3014909029006958, "learning_rate": 1.0090188096503139e-05, "loss": 9.5483, "step": 2323 }, { "epoch": 0.8016557433597792, "grad_norm": 1.4022812843322754, "learning_rate": 1.0056406747057695e-05, "loss": 9.4322, "step": 2324 }, { "epoch": 0.8020006898930666, "grad_norm": 1.2554450035095215, "learning_rate": 1.0022675717658742e-05, "loss": 9.6187, "step": 2325 }, { "epoch": 0.8023456364263539, "grad_norm": 1.3391486406326294, "learning_rate": 9.988995050799844e-06, "loss": 9.5109, "step": 2326 }, { "epoch": 0.8026905829596412, "grad_norm": 1.3482129573822021, "learning_rate": 9.955364788911164e-06, "loss": 9.5367, "step": 2327 }, { "epoch": 0.8030355294929286, "grad_norm": 1.2753441333770752, "learning_rate": 9.921784974359372e-06, "loss": 9.4852, "step": 2328 }, { "epoch": 0.803380476026216, "grad_norm": 1.3399298191070557, "learning_rate": 9.88825564944753e-06, "loss": 9.5077, "step": 2329 }, { "epoch": 0.8037254225595033, "grad_norm": 1.2231972217559814, "learning_rate": 9.85477685641517e-06, "loss": 9.4774, "step": 2330 }, { "epoch": 0.8040703690927906, "grad_norm": 1.4416093826293945, "learning_rate": 9.821348637438088e-06, "loss": 9.4557, "step": 2331 }, { "epoch": 0.8044153156260779, "grad_norm": 1.282536268234253, "learning_rate": 9.787971034628458e-06, "loss": 9.5142, "step": 2332 }, { "epoch": 0.8047602621593652, "grad_norm": 1.3632969856262207, "learning_rate": 9.754644090034593e-06, "loss": 9.4723, "step": 2333 }, { "epoch": 0.8051052086926527, "grad_norm": 1.2392255067825317, "learning_rate": 9.721367845641067e-06, "loss": 9.478, "step": 2334 }, { "epoch": 0.80545015522594, "grad_norm": 1.2770322561264038, "learning_rate": 9.688142343368517e-06, "loss": 9.437, "step": 2335 }, { "epoch": 0.8057951017592273, "grad_norm": 1.3481700420379639, "learning_rate": 9.654967625073708e-06, "loss": 9.4381, "step": 2336 }, { "epoch": 0.8061400482925146, "grad_norm": 1.5370646715164185, "learning_rate": 9.621843732549418e-06, "loss": 9.4208, "step": 2337 }, { "epoch": 0.806484994825802, "grad_norm": 1.4297363758087158, "learning_rate": 9.588770707524364e-06, "loss": 9.5563, "step": 2338 }, { "epoch": 0.8068299413590894, "grad_norm": 1.436538577079773, "learning_rate": 9.555748591663215e-06, "loss": 9.3967, "step": 2339 }, { "epoch": 0.8071748878923767, "grad_norm": 1.4317678213119507, "learning_rate": 9.522777426566492e-06, "loss": 9.5017, "step": 2340 }, { "epoch": 0.807519834425664, "grad_norm": 1.4889994859695435, "learning_rate": 9.48985725377055e-06, "loss": 9.5296, "step": 2341 }, { "epoch": 0.8078647809589513, "grad_norm": 1.5667929649353027, "learning_rate": 9.456988114747462e-06, "loss": 9.4448, "step": 2342 }, { "epoch": 0.8082097274922387, "grad_norm": 1.4982324838638306, "learning_rate": 9.424170050905057e-06, "loss": 9.4171, "step": 2343 }, { "epoch": 0.808554674025526, "grad_norm": 1.699577808380127, "learning_rate": 9.39140310358677e-06, "loss": 9.3494, "step": 2344 }, { "epoch": 0.8088996205588134, "grad_norm": 1.545198917388916, "learning_rate": 9.358687314071696e-06, "loss": 9.4545, "step": 2345 }, { "epoch": 0.8092445670921007, "grad_norm": 1.724341869354248, "learning_rate": 9.32602272357444e-06, "loss": 9.3616, "step": 2346 }, { "epoch": 0.8095895136253881, "grad_norm": 1.5193202495574951, "learning_rate": 9.293409373245144e-06, "loss": 9.4388, "step": 2347 }, { "epoch": 0.8099344601586754, "grad_norm": 1.5527263879776, "learning_rate": 9.260847304169345e-06, "loss": 9.3641, "step": 2348 }, { "epoch": 0.8102794066919627, "grad_norm": 1.701203465461731, "learning_rate": 9.22833655736804e-06, "loss": 9.3475, "step": 2349 }, { "epoch": 0.8106243532252501, "grad_norm": 1.9347988367080688, "learning_rate": 9.195877173797535e-06, "loss": 9.3568, "step": 2350 }, { "epoch": 0.8109692997585374, "grad_norm": 0.9528728127479553, "learning_rate": 9.163469194349423e-06, "loss": 9.5367, "step": 2351 }, { "epoch": 0.8113142462918248, "grad_norm": 0.9608731269836426, "learning_rate": 9.13111265985056e-06, "loss": 9.5382, "step": 2352 }, { "epoch": 0.8116591928251121, "grad_norm": 1.016716480255127, "learning_rate": 9.098807611062988e-06, "loss": 9.5135, "step": 2353 }, { "epoch": 0.8120041393583994, "grad_norm": 0.9165623784065247, "learning_rate": 9.066554088683894e-06, "loss": 9.6206, "step": 2354 }, { "epoch": 0.8123490858916868, "grad_norm": 1.1385338306427002, "learning_rate": 9.034352133345525e-06, "loss": 9.4904, "step": 2355 }, { "epoch": 0.8126940324249742, "grad_norm": 1.1017615795135498, "learning_rate": 9.002201785615211e-06, "loss": 9.5393, "step": 2356 }, { "epoch": 0.8130389789582615, "grad_norm": 1.0922075510025024, "learning_rate": 8.970103085995207e-06, "loss": 9.4825, "step": 2357 }, { "epoch": 0.8133839254915488, "grad_norm": 1.123779535293579, "learning_rate": 8.938056074922762e-06, "loss": 9.5213, "step": 2358 }, { "epoch": 0.8137288720248361, "grad_norm": 1.0766289234161377, "learning_rate": 8.906060792769982e-06, "loss": 9.5497, "step": 2359 }, { "epoch": 0.8140738185581234, "grad_norm": 1.142089605331421, "learning_rate": 8.874117279843813e-06, "loss": 9.5189, "step": 2360 }, { "epoch": 0.8144187650914109, "grad_norm": 1.2554246187210083, "learning_rate": 8.842225576385964e-06, "loss": 9.5115, "step": 2361 }, { "epoch": 0.8147637116246982, "grad_norm": 1.347861886024475, "learning_rate": 8.810385722572901e-06, "loss": 9.5216, "step": 2362 }, { "epoch": 0.8151086581579855, "grad_norm": 1.1705132722854614, "learning_rate": 8.778597758515766e-06, "loss": 9.4948, "step": 2363 }, { "epoch": 0.8154536046912728, "grad_norm": 1.1871190071105957, "learning_rate": 8.746861724260308e-06, "loss": 9.5249, "step": 2364 }, { "epoch": 0.8157985512245602, "grad_norm": 1.1689561605453491, "learning_rate": 8.7151776597869e-06, "loss": 9.5313, "step": 2365 }, { "epoch": 0.8161434977578476, "grad_norm": 1.2277796268463135, "learning_rate": 8.683545605010373e-06, "loss": 9.5339, "step": 2366 }, { "epoch": 0.8164884442911349, "grad_norm": 1.2542256116867065, "learning_rate": 8.651965599780143e-06, "loss": 9.493, "step": 2367 }, { "epoch": 0.8168333908244222, "grad_norm": 1.2161953449249268, "learning_rate": 8.62043768387995e-06, "loss": 9.5418, "step": 2368 }, { "epoch": 0.8171783373577095, "grad_norm": 1.3967869281768799, "learning_rate": 8.588961897027986e-06, "loss": 9.5238, "step": 2369 }, { "epoch": 0.8175232838909969, "grad_norm": 1.2551013231277466, "learning_rate": 8.55753827887672e-06, "loss": 9.4706, "step": 2370 }, { "epoch": 0.8178682304242842, "grad_norm": 1.3102620840072632, "learning_rate": 8.526166869012936e-06, "loss": 9.4533, "step": 2371 }, { "epoch": 0.8182131769575716, "grad_norm": 1.2320739030838013, "learning_rate": 8.494847706957642e-06, "loss": 9.4959, "step": 2372 }, { "epoch": 0.8185581234908589, "grad_norm": 1.4043902158737183, "learning_rate": 8.463580832165996e-06, "loss": 9.5451, "step": 2373 }, { "epoch": 0.8189030700241463, "grad_norm": 1.2317163944244385, "learning_rate": 8.432366284027343e-06, "loss": 9.4723, "step": 2374 }, { "epoch": 0.8192480165574336, "grad_norm": 1.389200210571289, "learning_rate": 8.401204101865046e-06, "loss": 9.5072, "step": 2375 }, { "epoch": 0.8195929630907209, "grad_norm": 1.277854561805725, "learning_rate": 8.370094324936545e-06, "loss": 9.4576, "step": 2376 }, { "epoch": 0.8199379096240083, "grad_norm": 1.2297642230987549, "learning_rate": 8.33903699243322e-06, "loss": 9.5131, "step": 2377 }, { "epoch": 0.8202828561572956, "grad_norm": 1.2601749897003174, "learning_rate": 8.308032143480427e-06, "loss": 9.46, "step": 2378 }, { "epoch": 0.820627802690583, "grad_norm": 1.3500021696090698, "learning_rate": 8.277079817137345e-06, "loss": 9.4438, "step": 2379 }, { "epoch": 0.8209727492238703, "grad_norm": 1.278239130973816, "learning_rate": 8.24618005239708e-06, "loss": 9.5169, "step": 2380 }, { "epoch": 0.8213176957571576, "grad_norm": 1.4504845142364502, "learning_rate": 8.215332888186417e-06, "loss": 9.4441, "step": 2381 }, { "epoch": 0.821662642290445, "grad_norm": 1.3500051498413086, "learning_rate": 8.184538363365957e-06, "loss": 9.4842, "step": 2382 }, { "epoch": 0.8220075888237324, "grad_norm": 1.5992237329483032, "learning_rate": 8.153796516729928e-06, "loss": 9.4223, "step": 2383 }, { "epoch": 0.8223525353570197, "grad_norm": 1.363759994506836, "learning_rate": 8.123107387006229e-06, "loss": 9.4813, "step": 2384 }, { "epoch": 0.822697481890307, "grad_norm": 1.442180871963501, "learning_rate": 8.092471012856362e-06, "loss": 9.4769, "step": 2385 }, { "epoch": 0.8230424284235943, "grad_norm": 1.4553319215774536, "learning_rate": 8.061887432875304e-06, "loss": 9.4806, "step": 2386 }, { "epoch": 0.8233873749568816, "grad_norm": 1.4236608743667603, "learning_rate": 8.031356685591612e-06, "loss": 9.4485, "step": 2387 }, { "epoch": 0.8237323214901691, "grad_norm": 1.4737285375595093, "learning_rate": 8.00087880946721e-06, "loss": 9.4346, "step": 2388 }, { "epoch": 0.8240772680234564, "grad_norm": 1.4963711500167847, "learning_rate": 7.970453842897462e-06, "loss": 9.4678, "step": 2389 }, { "epoch": 0.8244222145567437, "grad_norm": 1.4410032033920288, "learning_rate": 7.940081824211049e-06, "loss": 9.4932, "step": 2390 }, { "epoch": 0.824767161090031, "grad_norm": 1.51363205909729, "learning_rate": 7.909762791669972e-06, "loss": 9.4573, "step": 2391 }, { "epoch": 0.8251121076233184, "grad_norm": 1.5144532918930054, "learning_rate": 7.879496783469454e-06, "loss": 9.3881, "step": 2392 }, { "epoch": 0.8254570541566058, "grad_norm": 1.5119389295578003, "learning_rate": 7.849283837737941e-06, "loss": 9.4617, "step": 2393 }, { "epoch": 0.8258020006898931, "grad_norm": 1.6774712800979614, "learning_rate": 7.819123992537042e-06, "loss": 9.4475, "step": 2394 }, { "epoch": 0.8261469472231804, "grad_norm": 1.540063500404358, "learning_rate": 7.789017285861439e-06, "loss": 9.4618, "step": 2395 }, { "epoch": 0.8264918937564677, "grad_norm": 1.6762762069702148, "learning_rate": 7.758963755638916e-06, "loss": 9.436, "step": 2396 }, { "epoch": 0.8268368402897551, "grad_norm": 1.5456700325012207, "learning_rate": 7.728963439730203e-06, "loss": 9.3776, "step": 2397 }, { "epoch": 0.8271817868230424, "grad_norm": 1.513611912727356, "learning_rate": 7.699016375929057e-06, "loss": 9.4647, "step": 2398 }, { "epoch": 0.8275267333563298, "grad_norm": 1.805281400680542, "learning_rate": 7.669122601962098e-06, "loss": 9.3213, "step": 2399 }, { "epoch": 0.8278716798896171, "grad_norm": 1.686733603477478, "learning_rate": 7.639282155488836e-06, "loss": 9.3407, "step": 2400 }, { "epoch": 0.8282166264229045, "grad_norm": 0.8331398963928223, "learning_rate": 7.609495074101614e-06, "loss": 9.5749, "step": 2401 }, { "epoch": 0.8285615729561918, "grad_norm": 0.9920496940612793, "learning_rate": 7.579761395325536e-06, "loss": 9.5205, "step": 2402 }, { "epoch": 0.8289065194894791, "grad_norm": 1.0967357158660889, "learning_rate": 7.550081156618399e-06, "loss": 9.4806, "step": 2403 }, { "epoch": 0.8292514660227664, "grad_norm": 1.0771796703338623, "learning_rate": 7.520454395370724e-06, "loss": 9.5243, "step": 2404 }, { "epoch": 0.8295964125560538, "grad_norm": 1.102107286453247, "learning_rate": 7.490881148905654e-06, "loss": 9.4947, "step": 2405 }, { "epoch": 0.8299413590893412, "grad_norm": 0.9728934168815613, "learning_rate": 7.461361454478871e-06, "loss": 9.5951, "step": 2406 }, { "epoch": 0.8302863056226285, "grad_norm": 1.1681954860687256, "learning_rate": 7.43189534927865e-06, "loss": 9.5276, "step": 2407 }, { "epoch": 0.8306312521559158, "grad_norm": 1.1862436532974243, "learning_rate": 7.402482870425725e-06, "loss": 9.524, "step": 2408 }, { "epoch": 0.8309761986892031, "grad_norm": 1.2328039407730103, "learning_rate": 7.373124054973296e-06, "loss": 9.5313, "step": 2409 }, { "epoch": 0.8313211452224906, "grad_norm": 1.20083749294281, "learning_rate": 7.343818939906916e-06, "loss": 9.5427, "step": 2410 }, { "epoch": 0.8316660917557779, "grad_norm": 1.2234563827514648, "learning_rate": 7.314567562144542e-06, "loss": 9.52, "step": 2411 }, { "epoch": 0.8320110382890652, "grad_norm": 1.3493642807006836, "learning_rate": 7.285369958536375e-06, "loss": 9.4352, "step": 2412 }, { "epoch": 0.8323559848223525, "grad_norm": 1.0981404781341553, "learning_rate": 7.2562261658649325e-06, "loss": 9.5559, "step": 2413 }, { "epoch": 0.8327009313556398, "grad_norm": 1.130433201789856, "learning_rate": 7.227136220844883e-06, "loss": 9.5719, "step": 2414 }, { "epoch": 0.8330458778889273, "grad_norm": 1.3468449115753174, "learning_rate": 7.1981001601231276e-06, "loss": 9.4805, "step": 2415 }, { "epoch": 0.8333908244222146, "grad_norm": 1.3730062246322632, "learning_rate": 7.169118020278615e-06, "loss": 9.4373, "step": 2416 }, { "epoch": 0.8337357709555019, "grad_norm": 1.30331289768219, "learning_rate": 7.140189837822409e-06, "loss": 9.4876, "step": 2417 }, { "epoch": 0.8340807174887892, "grad_norm": 1.2246065139770508, "learning_rate": 7.111315649197603e-06, "loss": 9.523, "step": 2418 }, { "epoch": 0.8344256640220766, "grad_norm": 1.301967740058899, "learning_rate": 7.082495490779229e-06, "loss": 9.4755, "step": 2419 }, { "epoch": 0.834770610555364, "grad_norm": 1.2529209852218628, "learning_rate": 7.053729398874298e-06, "loss": 9.5024, "step": 2420 }, { "epoch": 0.8351155570886513, "grad_norm": 1.276577115058899, "learning_rate": 7.025017409721657e-06, "loss": 9.6069, "step": 2421 }, { "epoch": 0.8354605036219386, "grad_norm": 1.323262095451355, "learning_rate": 6.996359559492083e-06, "loss": 9.4286, "step": 2422 }, { "epoch": 0.8358054501552259, "grad_norm": 1.3153644800186157, "learning_rate": 6.967755884288046e-06, "loss": 9.5094, "step": 2423 }, { "epoch": 0.8361503966885133, "grad_norm": 1.23281991481781, "learning_rate": 6.939206420143857e-06, "loss": 9.4647, "step": 2424 }, { "epoch": 0.8364953432218006, "grad_norm": 1.269282341003418, "learning_rate": 6.910711203025455e-06, "loss": 9.5021, "step": 2425 }, { "epoch": 0.836840289755088, "grad_norm": 1.2297919988632202, "learning_rate": 6.882270268830498e-06, "loss": 9.4929, "step": 2426 }, { "epoch": 0.8371852362883753, "grad_norm": 1.2677528858184814, "learning_rate": 6.853883653388249e-06, "loss": 9.4232, "step": 2427 }, { "epoch": 0.8375301828216627, "grad_norm": 1.2528631687164307, "learning_rate": 6.825551392459511e-06, "loss": 9.4827, "step": 2428 }, { "epoch": 0.83787512935495, "grad_norm": 1.3022857904434204, "learning_rate": 6.797273521736641e-06, "loss": 9.4533, "step": 2429 }, { "epoch": 0.8382200758882373, "grad_norm": 1.4493368864059448, "learning_rate": 6.769050076843469e-06, "loss": 9.4251, "step": 2430 }, { "epoch": 0.8385650224215246, "grad_norm": 1.4479137659072876, "learning_rate": 6.740881093335278e-06, "loss": 9.4296, "step": 2431 }, { "epoch": 0.838909968954812, "grad_norm": 1.3461737632751465, "learning_rate": 6.712766606698689e-06, "loss": 9.5194, "step": 2432 }, { "epoch": 0.8392549154880994, "grad_norm": 1.3478329181671143, "learning_rate": 6.684706652351741e-06, "loss": 9.4739, "step": 2433 }, { "epoch": 0.8395998620213867, "grad_norm": 1.5345542430877686, "learning_rate": 6.656701265643711e-06, "loss": 9.4211, "step": 2434 }, { "epoch": 0.839944808554674, "grad_norm": 1.2493342161178589, "learning_rate": 6.628750481855167e-06, "loss": 9.4707, "step": 2435 }, { "epoch": 0.8402897550879613, "grad_norm": 1.3178075551986694, "learning_rate": 6.6008543361978814e-06, "loss": 9.4852, "step": 2436 }, { "epoch": 0.8406347016212488, "grad_norm": 1.4194328784942627, "learning_rate": 6.5730128638148095e-06, "loss": 9.4392, "step": 2437 }, { "epoch": 0.8409796481545361, "grad_norm": 1.4278353452682495, "learning_rate": 6.545226099779994e-06, "loss": 9.4369, "step": 2438 }, { "epoch": 0.8413245946878234, "grad_norm": 1.4180926084518433, "learning_rate": 6.517494079098585e-06, "loss": 9.4777, "step": 2439 }, { "epoch": 0.8416695412211107, "grad_norm": 1.3950679302215576, "learning_rate": 6.489816836706786e-06, "loss": 9.4472, "step": 2440 }, { "epoch": 0.842014487754398, "grad_norm": 1.5117658376693726, "learning_rate": 6.462194407471733e-06, "loss": 9.4362, "step": 2441 }, { "epoch": 0.8423594342876854, "grad_norm": 1.452319622039795, "learning_rate": 6.43462682619157e-06, "loss": 9.443, "step": 2442 }, { "epoch": 0.8427043808209728, "grad_norm": 1.3729323148727417, "learning_rate": 6.407114127595304e-06, "loss": 9.4496, "step": 2443 }, { "epoch": 0.8430493273542601, "grad_norm": 1.5410650968551636, "learning_rate": 6.379656346342844e-06, "loss": 9.4024, "step": 2444 }, { "epoch": 0.8433942738875474, "grad_norm": 1.4286586046218872, "learning_rate": 6.352253517024859e-06, "loss": 9.4946, "step": 2445 }, { "epoch": 0.8437392204208348, "grad_norm": 1.544579267501831, "learning_rate": 6.324905674162846e-06, "loss": 9.3807, "step": 2446 }, { "epoch": 0.8440841669541221, "grad_norm": 1.5438258647918701, "learning_rate": 6.297612852208978e-06, "loss": 9.4389, "step": 2447 }, { "epoch": 0.8444291134874095, "grad_norm": 1.5225231647491455, "learning_rate": 6.2703750855461654e-06, "loss": 9.3885, "step": 2448 }, { "epoch": 0.8447740600206968, "grad_norm": 1.6549159288406372, "learning_rate": 6.24319240848793e-06, "loss": 9.4022, "step": 2449 }, { "epoch": 0.8451190065539841, "grad_norm": 1.7702971696853638, "learning_rate": 6.216064855278414e-06, "loss": 9.3768, "step": 2450 }, { "epoch": 0.8454639530872715, "grad_norm": 0.8622440695762634, "learning_rate": 6.188992460092286e-06, "loss": 9.5452, "step": 2451 }, { "epoch": 0.8458088996205588, "grad_norm": 1.0240706205368042, "learning_rate": 6.161975257034741e-06, "loss": 9.5128, "step": 2452 }, { "epoch": 0.8461538461538461, "grad_norm": 1.021040678024292, "learning_rate": 6.135013280141477e-06, "loss": 9.5117, "step": 2453 }, { "epoch": 0.8464987926871335, "grad_norm": 1.160355806350708, "learning_rate": 6.108106563378557e-06, "loss": 9.5123, "step": 2454 }, { "epoch": 0.8468437392204209, "grad_norm": 1.145014762878418, "learning_rate": 6.081255140642483e-06, "loss": 9.4774, "step": 2455 }, { "epoch": 0.8471886857537082, "grad_norm": 1.0997885465621948, "learning_rate": 6.054459045760053e-06, "loss": 9.531, "step": 2456 }, { "epoch": 0.8475336322869955, "grad_norm": 1.057318925857544, "learning_rate": 6.027718312488423e-06, "loss": 9.5878, "step": 2457 }, { "epoch": 0.8478785788202828, "grad_norm": 1.2089539766311646, "learning_rate": 6.001032974514947e-06, "loss": 9.4941, "step": 2458 }, { "epoch": 0.8482235253535702, "grad_norm": 1.2328418493270874, "learning_rate": 5.974403065457235e-06, "loss": 9.4992, "step": 2459 }, { "epoch": 0.8485684718868576, "grad_norm": 1.1309876441955566, "learning_rate": 5.947828618863027e-06, "loss": 9.544, "step": 2460 }, { "epoch": 0.8489134184201449, "grad_norm": 1.2640806436538696, "learning_rate": 5.921309668210234e-06, "loss": 9.4976, "step": 2461 }, { "epoch": 0.8492583649534322, "grad_norm": 1.1987804174423218, "learning_rate": 5.894846246906843e-06, "loss": 9.5373, "step": 2462 }, { "epoch": 0.8496033114867195, "grad_norm": 1.2217447757720947, "learning_rate": 5.868438388290854e-06, "loss": 9.4895, "step": 2463 }, { "epoch": 0.849948258020007, "grad_norm": 1.1472078561782837, "learning_rate": 5.8420861256303415e-06, "loss": 9.5564, "step": 2464 }, { "epoch": 0.8502932045532943, "grad_norm": 1.3079042434692383, "learning_rate": 5.815789492123258e-06, "loss": 9.4928, "step": 2465 }, { "epoch": 0.8506381510865816, "grad_norm": 1.388771891593933, "learning_rate": 5.7895485208975365e-06, "loss": 9.5433, "step": 2466 }, { "epoch": 0.8509830976198689, "grad_norm": 1.3080216646194458, "learning_rate": 5.76336324501095e-06, "loss": 9.4568, "step": 2467 }, { "epoch": 0.8513280441531562, "grad_norm": 1.1682881116867065, "learning_rate": 5.737233697451145e-06, "loss": 9.5277, "step": 2468 }, { "epoch": 0.8516729906864436, "grad_norm": 1.3328065872192383, "learning_rate": 5.7111599111355215e-06, "loss": 9.4417, "step": 2469 }, { "epoch": 0.852017937219731, "grad_norm": 1.3096672296524048, "learning_rate": 5.685141918911257e-06, "loss": 9.4382, "step": 2470 }, { "epoch": 0.8523628837530183, "grad_norm": 1.3736581802368164, "learning_rate": 5.659179753555244e-06, "loss": 9.4432, "step": 2471 }, { "epoch": 0.8527078302863056, "grad_norm": 1.2530180215835571, "learning_rate": 5.633273447774046e-06, "loss": 9.4777, "step": 2472 }, { "epoch": 0.853052776819593, "grad_norm": 1.279069423675537, "learning_rate": 5.607423034203829e-06, "loss": 9.538, "step": 2473 }, { "epoch": 0.8533977233528803, "grad_norm": 1.3018614053726196, "learning_rate": 5.581628545410372e-06, "loss": 9.4599, "step": 2474 }, { "epoch": 0.8537426698861676, "grad_norm": 1.3087834119796753, "learning_rate": 5.555890013889009e-06, "loss": 9.503, "step": 2475 }, { "epoch": 0.854087616419455, "grad_norm": 1.349788784980774, "learning_rate": 5.530207472064552e-06, "loss": 9.5043, "step": 2476 }, { "epoch": 0.8544325629527423, "grad_norm": 1.2353582382202148, "learning_rate": 5.504580952291294e-06, "loss": 9.5006, "step": 2477 }, { "epoch": 0.8547775094860297, "grad_norm": 1.3644788265228271, "learning_rate": 5.479010486852959e-06, "loss": 9.4729, "step": 2478 }, { "epoch": 0.855122456019317, "grad_norm": 1.2602345943450928, "learning_rate": 5.453496107962658e-06, "loss": 9.5259, "step": 2479 }, { "epoch": 0.8554674025526043, "grad_norm": 1.4060320854187012, "learning_rate": 5.428037847762813e-06, "loss": 9.5535, "step": 2480 }, { "epoch": 0.8558123490858917, "grad_norm": 1.5353844165802002, "learning_rate": 5.40263573832519e-06, "loss": 9.4855, "step": 2481 }, { "epoch": 0.8561572956191791, "grad_norm": 1.502706527709961, "learning_rate": 5.377289811650782e-06, "loss": 9.3855, "step": 2482 }, { "epoch": 0.8565022421524664, "grad_norm": 1.2772150039672852, "learning_rate": 5.35200009966983e-06, "loss": 9.5104, "step": 2483 }, { "epoch": 0.8568471886857537, "grad_norm": 1.388257622718811, "learning_rate": 5.326766634241748e-06, "loss": 9.4841, "step": 2484 }, { "epoch": 0.857192135219041, "grad_norm": 1.3631497621536255, "learning_rate": 5.301589447155092e-06, "loss": 9.4448, "step": 2485 }, { "epoch": 0.8575370817523283, "grad_norm": 1.4563467502593994, "learning_rate": 5.27646857012753e-06, "loss": 9.4469, "step": 2486 }, { "epoch": 0.8578820282856158, "grad_norm": 1.3632886409759521, "learning_rate": 5.251404034805768e-06, "loss": 9.4865, "step": 2487 }, { "epoch": 0.8582269748189031, "grad_norm": 1.3538519144058228, "learning_rate": 5.226395872765555e-06, "loss": 9.4439, "step": 2488 }, { "epoch": 0.8585719213521904, "grad_norm": 1.3481531143188477, "learning_rate": 5.201444115511605e-06, "loss": 9.52, "step": 2489 }, { "epoch": 0.8589168678854777, "grad_norm": 1.370755910873413, "learning_rate": 5.176548794477598e-06, "loss": 9.4518, "step": 2490 }, { "epoch": 0.8592618144187651, "grad_norm": 1.4279519319534302, "learning_rate": 5.151709941026078e-06, "loss": 9.4622, "step": 2491 }, { "epoch": 0.8596067609520525, "grad_norm": 1.441394329071045, "learning_rate": 5.126927586448516e-06, "loss": 9.4812, "step": 2492 }, { "epoch": 0.8599517074853398, "grad_norm": 1.4180115461349487, "learning_rate": 5.1022017619651415e-06, "loss": 9.4861, "step": 2493 }, { "epoch": 0.8602966540186271, "grad_norm": 1.5045257806777954, "learning_rate": 5.077532498725013e-06, "loss": 9.4093, "step": 2494 }, { "epoch": 0.8606416005519144, "grad_norm": 1.4450771808624268, "learning_rate": 5.052919827805891e-06, "loss": 9.4047, "step": 2495 }, { "epoch": 0.8609865470852018, "grad_norm": 1.6171228885650635, "learning_rate": 5.02836378021429e-06, "loss": 9.3855, "step": 2496 }, { "epoch": 0.8613314936184892, "grad_norm": 1.4856090545654297, "learning_rate": 5.003864386885376e-06, "loss": 9.3877, "step": 2497 }, { "epoch": 0.8616764401517765, "grad_norm": 1.6896843910217285, "learning_rate": 4.979421678682905e-06, "loss": 9.4596, "step": 2498 }, { "epoch": 0.8620213866850638, "grad_norm": 1.7836569547653198, "learning_rate": 4.95503568639929e-06, "loss": 9.325, "step": 2499 }, { "epoch": 0.8623663332183512, "grad_norm": 1.8124544620513916, "learning_rate": 4.930706440755445e-06, "loss": 9.3828, "step": 2500 }, { "epoch": 0.8627112797516385, "grad_norm": 0.9383218884468079, "learning_rate": 4.9064339724008144e-06, "loss": 9.5376, "step": 2501 }, { "epoch": 0.8630562262849258, "grad_norm": 1.0528539419174194, "learning_rate": 4.8822183119133e-06, "loss": 9.5191, "step": 2502 }, { "epoch": 0.8634011728182132, "grad_norm": 1.0615488290786743, "learning_rate": 4.858059489799266e-06, "loss": 9.5424, "step": 2503 }, { "epoch": 0.8637461193515005, "grad_norm": 1.0702091455459595, "learning_rate": 4.833957536493439e-06, "loss": 9.5417, "step": 2504 }, { "epoch": 0.8640910658847879, "grad_norm": 1.1862672567367554, "learning_rate": 4.809912482358936e-06, "loss": 9.5302, "step": 2505 }, { "epoch": 0.8644360124180752, "grad_norm": 1.08174729347229, "learning_rate": 4.785924357687166e-06, "loss": 9.5423, "step": 2506 }, { "epoch": 0.8647809589513625, "grad_norm": 1.0673178434371948, "learning_rate": 4.761993192697844e-06, "loss": 9.5344, "step": 2507 }, { "epoch": 0.8651259054846498, "grad_norm": 1.2792860269546509, "learning_rate": 4.7381190175389275e-06, "loss": 9.5388, "step": 2508 }, { "epoch": 0.8654708520179372, "grad_norm": 1.148232340812683, "learning_rate": 4.714301862286541e-06, "loss": 9.46, "step": 2509 }, { "epoch": 0.8658157985512246, "grad_norm": 1.2341820001602173, "learning_rate": 4.6905417569450275e-06, "loss": 9.5178, "step": 2510 }, { "epoch": 0.8661607450845119, "grad_norm": 1.114518642425537, "learning_rate": 4.666838731446821e-06, "loss": 9.5216, "step": 2511 }, { "epoch": 0.8665056916177992, "grad_norm": 1.1133232116699219, "learning_rate": 4.643192815652469e-06, "loss": 9.49, "step": 2512 }, { "epoch": 0.8668506381510865, "grad_norm": 1.1808667182922363, "learning_rate": 4.619604039350572e-06, "loss": 9.5987, "step": 2513 }, { "epoch": 0.867195584684374, "grad_norm": 1.2320611476898193, "learning_rate": 4.596072432257748e-06, "loss": 9.4902, "step": 2514 }, { "epoch": 0.8675405312176613, "grad_norm": 1.1610668897628784, "learning_rate": 4.572598024018571e-06, "loss": 9.5405, "step": 2515 }, { "epoch": 0.8678854777509486, "grad_norm": 1.1383475065231323, "learning_rate": 4.549180844205603e-06, "loss": 9.5299, "step": 2516 }, { "epoch": 0.8682304242842359, "grad_norm": 1.1785327196121216, "learning_rate": 4.525820922319257e-06, "loss": 9.5377, "step": 2517 }, { "epoch": 0.8685753708175232, "grad_norm": 1.3649024963378906, "learning_rate": 4.502518287787855e-06, "loss": 9.4778, "step": 2518 }, { "epoch": 0.8689203173508107, "grad_norm": 1.3480148315429688, "learning_rate": 4.4792729699675294e-06, "loss": 9.4929, "step": 2519 }, { "epoch": 0.869265263884098, "grad_norm": 1.2999801635742188, "learning_rate": 4.456084998142224e-06, "loss": 9.5344, "step": 2520 }, { "epoch": 0.8696102104173853, "grad_norm": 1.2308170795440674, "learning_rate": 4.43295440152362e-06, "loss": 9.478, "step": 2521 }, { "epoch": 0.8699551569506726, "grad_norm": 1.3496099710464478, "learning_rate": 4.409881209251121e-06, "loss": 9.49, "step": 2522 }, { "epoch": 0.87030010348396, "grad_norm": 1.218358039855957, "learning_rate": 4.386865450391836e-06, "loss": 9.5208, "step": 2523 }, { "epoch": 0.8706450500172473, "grad_norm": 1.460325002670288, "learning_rate": 4.3639071539404775e-06, "loss": 9.4209, "step": 2524 }, { "epoch": 0.8709899965505347, "grad_norm": 1.4746004343032837, "learning_rate": 4.341006348819421e-06, "loss": 9.5211, "step": 2525 }, { "epoch": 0.871334943083822, "grad_norm": 1.2816509008407593, "learning_rate": 4.318163063878561e-06, "loss": 9.4756, "step": 2526 }, { "epoch": 0.8716798896171093, "grad_norm": 1.3259309530258179, "learning_rate": 4.295377327895389e-06, "loss": 9.5018, "step": 2527 }, { "epoch": 0.8720248361503967, "grad_norm": 1.4273511171340942, "learning_rate": 4.272649169574849e-06, "loss": 9.4362, "step": 2528 }, { "epoch": 0.872369782683684, "grad_norm": 1.3017966747283936, "learning_rate": 4.249978617549361e-06, "loss": 9.4516, "step": 2529 }, { "epoch": 0.8727147292169714, "grad_norm": 1.2315176725387573, "learning_rate": 4.227365700378799e-06, "loss": 9.5207, "step": 2530 }, { "epoch": 0.8730596757502587, "grad_norm": 1.395295262336731, "learning_rate": 4.204810446550394e-06, "loss": 9.4892, "step": 2531 }, { "epoch": 0.8734046222835461, "grad_norm": 1.3410911560058594, "learning_rate": 4.182312884478767e-06, "loss": 9.5657, "step": 2532 }, { "epoch": 0.8737495688168334, "grad_norm": 1.4816246032714844, "learning_rate": 4.159873042505813e-06, "loss": 9.4206, "step": 2533 }, { "epoch": 0.8740945153501207, "grad_norm": 1.3325238227844238, "learning_rate": 4.137490948900785e-06, "loss": 9.484, "step": 2534 }, { "epoch": 0.874439461883408, "grad_norm": 1.395811676979065, "learning_rate": 4.115166631860113e-06, "loss": 9.5019, "step": 2535 }, { "epoch": 0.8747844084166954, "grad_norm": 1.3411831855773926, "learning_rate": 4.092900119507498e-06, "loss": 9.5089, "step": 2536 }, { "epoch": 0.8751293549499828, "grad_norm": 1.315796971321106, "learning_rate": 4.07069143989377e-06, "loss": 9.471, "step": 2537 }, { "epoch": 0.8754743014832701, "grad_norm": 1.3734862804412842, "learning_rate": 4.048540620996932e-06, "loss": 9.545, "step": 2538 }, { "epoch": 0.8758192480165574, "grad_norm": 1.5111862421035767, "learning_rate": 4.026447690722113e-06, "loss": 9.402, "step": 2539 }, { "epoch": 0.8761641945498447, "grad_norm": 1.3808101415634155, "learning_rate": 4.00441267690147e-06, "loss": 9.4543, "step": 2540 }, { "epoch": 0.8765091410831322, "grad_norm": 1.574131727218628, "learning_rate": 3.982435607294227e-06, "loss": 9.436, "step": 2541 }, { "epoch": 0.8768540876164195, "grad_norm": 1.2923246622085571, "learning_rate": 3.9605165095866034e-06, "loss": 9.5278, "step": 2542 }, { "epoch": 0.8771990341497068, "grad_norm": 1.4172004461288452, "learning_rate": 3.938655411391806e-06, "loss": 9.4752, "step": 2543 }, { "epoch": 0.8775439806829941, "grad_norm": 1.5436758995056152, "learning_rate": 3.916852340249932e-06, "loss": 9.4072, "step": 2544 }, { "epoch": 0.8778889272162814, "grad_norm": 1.5451159477233887, "learning_rate": 3.895107323628022e-06, "loss": 9.4346, "step": 2545 }, { "epoch": 0.8782338737495688, "grad_norm": 1.6603964567184448, "learning_rate": 3.873420388919951e-06, "loss": 9.3515, "step": 2546 }, { "epoch": 0.8785788202828562, "grad_norm": 1.6220159530639648, "learning_rate": 3.851791563446444e-06, "loss": 9.354, "step": 2547 }, { "epoch": 0.8789237668161435, "grad_norm": 1.769572138786316, "learning_rate": 3.8302208744549985e-06, "loss": 9.3154, "step": 2548 }, { "epoch": 0.8792687133494308, "grad_norm": 1.58255136013031, "learning_rate": 3.8087083491199104e-06, "loss": 9.4663, "step": 2549 }, { "epoch": 0.8796136598827182, "grad_norm": 1.7701119184494019, "learning_rate": 3.7872540145421574e-06, "loss": 9.3808, "step": 2550 }, { "epoch": 0.8799586064160055, "grad_norm": 0.9108375310897827, "learning_rate": 3.765857897749431e-06, "loss": 9.5752, "step": 2551 }, { "epoch": 0.8803035529492929, "grad_norm": 1.0239676237106323, "learning_rate": 3.7445200256961023e-06, "loss": 9.6122, "step": 2552 }, { "epoch": 0.8806484994825802, "grad_norm": 1.0186281204223633, "learning_rate": 3.723240425263119e-06, "loss": 9.5595, "step": 2553 }, { "epoch": 0.8809934460158675, "grad_norm": 1.052709698677063, "learning_rate": 3.7020191232580603e-06, "loss": 9.5816, "step": 2554 }, { "epoch": 0.8813383925491549, "grad_norm": 1.0933276414871216, "learning_rate": 3.680856146415046e-06, "loss": 9.538, "step": 2555 }, { "epoch": 0.8816833390824422, "grad_norm": 1.128589153289795, "learning_rate": 3.659751521394733e-06, "loss": 9.5611, "step": 2556 }, { "epoch": 0.8820282856157295, "grad_norm": 1.0991203784942627, "learning_rate": 3.6387052747842376e-06, "loss": 9.601, "step": 2557 }, { "epoch": 0.8823732321490169, "grad_norm": 1.1087894439697266, "learning_rate": 3.6177174330971675e-06, "loss": 9.4994, "step": 2558 }, { "epoch": 0.8827181786823043, "grad_norm": 1.183724045753479, "learning_rate": 3.5967880227735172e-06, "loss": 9.553, "step": 2559 }, { "epoch": 0.8830631252155916, "grad_norm": 1.090376615524292, "learning_rate": 3.5759170701797017e-06, "loss": 9.5626, "step": 2560 }, { "epoch": 0.8834080717488789, "grad_norm": 1.3279502391815186, "learning_rate": 3.555104601608483e-06, "loss": 9.5579, "step": 2561 }, { "epoch": 0.8837530182821662, "grad_norm": 1.1904933452606201, "learning_rate": 3.5343506432789498e-06, "loss": 9.5454, "step": 2562 }, { "epoch": 0.8840979648154536, "grad_norm": 1.2588059902191162, "learning_rate": 3.5136552213364593e-06, "loss": 9.5167, "step": 2563 }, { "epoch": 0.884442911348741, "grad_norm": 1.151564598083496, "learning_rate": 3.493018361852651e-06, "loss": 9.5044, "step": 2564 }, { "epoch": 0.8847878578820283, "grad_norm": 1.164340853691101, "learning_rate": 3.4724400908253853e-06, "loss": 9.5034, "step": 2565 }, { "epoch": 0.8851328044153156, "grad_norm": 1.254442572593689, "learning_rate": 3.4519204341786902e-06, "loss": 9.558, "step": 2566 }, { "epoch": 0.8854777509486029, "grad_norm": 1.2330467700958252, "learning_rate": 3.4314594177627944e-06, "loss": 9.4793, "step": 2567 }, { "epoch": 0.8858226974818904, "grad_norm": 1.302596092224121, "learning_rate": 3.4110570673539955e-06, "loss": 9.5275, "step": 2568 }, { "epoch": 0.8861676440151777, "grad_norm": 1.3285911083221436, "learning_rate": 3.390713408654761e-06, "loss": 9.5662, "step": 2569 }, { "epoch": 0.886512590548465, "grad_norm": 1.2535372972488403, "learning_rate": 3.370428467293546e-06, "loss": 9.5002, "step": 2570 }, { "epoch": 0.8868575370817523, "grad_norm": 1.2314890623092651, "learning_rate": 3.3502022688248867e-06, "loss": 9.5106, "step": 2571 }, { "epoch": 0.8872024836150396, "grad_norm": 1.347776174545288, "learning_rate": 3.330034838729279e-06, "loss": 9.4378, "step": 2572 }, { "epoch": 0.887547430148327, "grad_norm": 1.3018256425857544, "learning_rate": 3.309926202413205e-06, "loss": 9.5894, "step": 2573 }, { "epoch": 0.8878923766816144, "grad_norm": 1.2346081733703613, "learning_rate": 3.2898763852090895e-06, "loss": 9.498, "step": 2574 }, { "epoch": 0.8882373232149017, "grad_norm": 1.3945900201797485, "learning_rate": 3.269885412375223e-06, "loss": 9.46, "step": 2575 }, { "epoch": 0.888582269748189, "grad_norm": 1.4205291271209717, "learning_rate": 3.249953309095799e-06, "loss": 9.5027, "step": 2576 }, { "epoch": 0.8889272162814764, "grad_norm": 1.2638293504714966, "learning_rate": 3.2300801004808314e-06, "loss": 9.491, "step": 2577 }, { "epoch": 0.8892721628147637, "grad_norm": 1.215364694595337, "learning_rate": 3.21026581156616e-06, "loss": 9.5241, "step": 2578 }, { "epoch": 0.889617109348051, "grad_norm": 1.4178768396377563, "learning_rate": 3.1905104673133625e-06, "loss": 9.5186, "step": 2579 }, { "epoch": 0.8899620558813384, "grad_norm": 1.3347620964050293, "learning_rate": 3.170814092609792e-06, "loss": 9.4986, "step": 2580 }, { "epoch": 0.8903070024146257, "grad_norm": 1.4867733716964722, "learning_rate": 3.151176712268489e-06, "loss": 9.497, "step": 2581 }, { "epoch": 0.8906519489479131, "grad_norm": 1.451385259628296, "learning_rate": 3.1315983510281976e-06, "loss": 9.4539, "step": 2582 }, { "epoch": 0.8909968954812004, "grad_norm": 1.2720712423324585, "learning_rate": 3.1120790335533e-06, "loss": 9.5447, "step": 2583 }, { "epoch": 0.8913418420144877, "grad_norm": 1.3472886085510254, "learning_rate": 3.0926187844337984e-06, "loss": 9.4754, "step": 2584 }, { "epoch": 0.891686788547775, "grad_norm": 1.3972887992858887, "learning_rate": 3.0732176281852652e-06, "loss": 9.4664, "step": 2585 }, { "epoch": 0.8920317350810625, "grad_norm": 1.3255772590637207, "learning_rate": 3.053875589248861e-06, "loss": 9.5324, "step": 2586 }, { "epoch": 0.8923766816143498, "grad_norm": 1.3736553192138672, "learning_rate": 3.0345926919912505e-06, "loss": 9.4713, "step": 2587 }, { "epoch": 0.8927216281476371, "grad_norm": 1.5358455181121826, "learning_rate": 3.0153689607045845e-06, "loss": 9.3817, "step": 2588 }, { "epoch": 0.8930665746809244, "grad_norm": 1.3966903686523438, "learning_rate": 2.996204419606502e-06, "loss": 9.4691, "step": 2589 }, { "epoch": 0.8934115212142117, "grad_norm": 1.3497977256774902, "learning_rate": 2.9770990928400576e-06, "loss": 9.5051, "step": 2590 }, { "epoch": 0.8937564677474992, "grad_norm": 1.4420788288116455, "learning_rate": 2.9580530044737263e-06, "loss": 9.5041, "step": 2591 }, { "epoch": 0.8941014142807865, "grad_norm": 1.465009331703186, "learning_rate": 2.939066178501332e-06, "loss": 9.4508, "step": 2592 }, { "epoch": 0.8944463608140738, "grad_norm": 1.221989631652832, "learning_rate": 2.920138638842068e-06, "loss": 9.469, "step": 2593 }, { "epoch": 0.8947913073473611, "grad_norm": 1.483028769493103, "learning_rate": 2.9012704093404062e-06, "loss": 9.4044, "step": 2594 }, { "epoch": 0.8951362538806485, "grad_norm": 1.5430452823638916, "learning_rate": 2.882461513766133e-06, "loss": 9.4083, "step": 2595 }, { "epoch": 0.8954812004139359, "grad_norm": 1.4477607011795044, "learning_rate": 2.8637119758142707e-06, "loss": 9.4623, "step": 2596 }, { "epoch": 0.8958261469472232, "grad_norm": 1.4406673908233643, "learning_rate": 2.8450218191050705e-06, "loss": 9.4275, "step": 2597 }, { "epoch": 0.8961710934805105, "grad_norm": 1.5870102643966675, "learning_rate": 2.8263910671839866e-06, "loss": 9.4317, "step": 2598 }, { "epoch": 0.8965160400137978, "grad_norm": 1.490122675895691, "learning_rate": 2.8078197435216e-06, "loss": 9.4802, "step": 2599 }, { "epoch": 0.8968609865470852, "grad_norm": 1.7018402814865112, "learning_rate": 2.7893078715136687e-06, "loss": 9.3888, "step": 2600 }, { "epoch": 0.8972059330803726, "grad_norm": 0.8538911938667297, "learning_rate": 2.770855474481021e-06, "loss": 9.6266, "step": 2601 }, { "epoch": 0.8975508796136599, "grad_norm": 1.0329569578170776, "learning_rate": 2.7524625756695954e-06, "loss": 9.5183, "step": 2602 }, { "epoch": 0.8978958261469472, "grad_norm": 1.030372142791748, "learning_rate": 2.734129198250318e-06, "loss": 9.5589, "step": 2603 }, { "epoch": 0.8982407726802346, "grad_norm": 1.0216647386550903, "learning_rate": 2.7158553653192144e-06, "loss": 9.5562, "step": 2604 }, { "epoch": 0.8985857192135219, "grad_norm": 1.0627166032791138, "learning_rate": 2.6976410998972136e-06, "loss": 9.5075, "step": 2605 }, { "epoch": 0.8989306657468092, "grad_norm": 1.1064709424972534, "learning_rate": 2.6794864249302664e-06, "loss": 9.5467, "step": 2606 }, { "epoch": 0.8992756122800966, "grad_norm": 1.0749237537384033, "learning_rate": 2.6613913632892064e-06, "loss": 9.5038, "step": 2607 }, { "epoch": 0.8996205588133839, "grad_norm": 1.1983023881912231, "learning_rate": 2.6433559377697925e-06, "loss": 9.4699, "step": 2608 }, { "epoch": 0.8999655053466713, "grad_norm": 1.2673431634902954, "learning_rate": 2.625380171092667e-06, "loss": 9.482, "step": 2609 }, { "epoch": 0.9003104518799586, "grad_norm": 1.168282389640808, "learning_rate": 2.6074640859032718e-06, "loss": 9.5551, "step": 2610 }, { "epoch": 0.9006553984132459, "grad_norm": 1.100938320159912, "learning_rate": 2.5896077047719237e-06, "loss": 9.6096, "step": 2611 }, { "epoch": 0.9010003449465332, "grad_norm": 1.2417744398117065, "learning_rate": 2.5718110501936675e-06, "loss": 9.5024, "step": 2612 }, { "epoch": 0.9013452914798207, "grad_norm": 1.1761174201965332, "learning_rate": 2.554074144588342e-06, "loss": 9.5431, "step": 2613 }, { "epoch": 0.901690238013108, "grad_norm": 1.2780194282531738, "learning_rate": 2.5363970103004955e-06, "loss": 9.5324, "step": 2614 }, { "epoch": 0.9020351845463953, "grad_norm": 1.1857703924179077, "learning_rate": 2.5187796695994026e-06, "loss": 9.4997, "step": 2615 }, { "epoch": 0.9023801310796826, "grad_norm": 1.2250052690505981, "learning_rate": 2.5012221446789775e-06, "loss": 9.4995, "step": 2616 }, { "epoch": 0.9027250776129699, "grad_norm": 1.1668322086334229, "learning_rate": 2.4837244576578047e-06, "loss": 9.4859, "step": 2617 }, { "epoch": 0.9030700241462574, "grad_norm": 1.3255215883255005, "learning_rate": 2.4662866305790842e-06, "loss": 9.4968, "step": 2618 }, { "epoch": 0.9034149706795447, "grad_norm": 1.371646761894226, "learning_rate": 2.4489086854105946e-06, "loss": 9.5009, "step": 2619 }, { "epoch": 0.903759917212832, "grad_norm": 1.182887315750122, "learning_rate": 2.4315906440446955e-06, "loss": 9.5169, "step": 2620 }, { "epoch": 0.9041048637461193, "grad_norm": 1.215737223625183, "learning_rate": 2.414332528298252e-06, "loss": 9.5355, "step": 2621 }, { "epoch": 0.9044498102794067, "grad_norm": 1.2013062238693237, "learning_rate": 2.397134359912667e-06, "loss": 9.4352, "step": 2622 }, { "epoch": 0.9047947568126941, "grad_norm": 1.2379329204559326, "learning_rate": 2.3799961605537937e-06, "loss": 9.4499, "step": 2623 }, { "epoch": 0.9051397033459814, "grad_norm": 1.1866499185562134, "learning_rate": 2.3629179518119606e-06, "loss": 9.5595, "step": 2624 }, { "epoch": 0.9054846498792687, "grad_norm": 1.3945666551589966, "learning_rate": 2.345899755201919e-06, "loss": 9.4705, "step": 2625 }, { "epoch": 0.905829596412556, "grad_norm": 1.3572181463241577, "learning_rate": 2.328941592162809e-06, "loss": 9.4464, "step": 2626 }, { "epoch": 0.9061745429458434, "grad_norm": 1.3635412454605103, "learning_rate": 2.3120434840581406e-06, "loss": 9.5151, "step": 2627 }, { "epoch": 0.9065194894791307, "grad_norm": 1.2851320505142212, "learning_rate": 2.2952054521757804e-06, "loss": 9.5445, "step": 2628 }, { "epoch": 0.9068644360124181, "grad_norm": 1.4047993421554565, "learning_rate": 2.2784275177278934e-06, "loss": 9.3997, "step": 2629 }, { "epoch": 0.9072093825457054, "grad_norm": 1.3021107912063599, "learning_rate": 2.2617097018509613e-06, "loss": 9.5147, "step": 2630 }, { "epoch": 0.9075543290789928, "grad_norm": 1.280267357826233, "learning_rate": 2.2450520256057038e-06, "loss": 9.5056, "step": 2631 }, { "epoch": 0.9078992756122801, "grad_norm": 1.3371871709823608, "learning_rate": 2.22845450997709e-06, "loss": 9.521, "step": 2632 }, { "epoch": 0.9082442221455674, "grad_norm": 1.3242119550704956, "learning_rate": 2.2119171758743117e-06, "loss": 9.4992, "step": 2633 }, { "epoch": 0.9085891686788548, "grad_norm": 1.357699990272522, "learning_rate": 2.19544004413072e-06, "loss": 9.4417, "step": 2634 }, { "epoch": 0.9089341152121421, "grad_norm": 1.3349027633666992, "learning_rate": 2.1790231355038495e-06, "loss": 9.462, "step": 2635 }, { "epoch": 0.9092790617454295, "grad_norm": 1.4189931154251099, "learning_rate": 2.162666470675334e-06, "loss": 9.4083, "step": 2636 }, { "epoch": 0.9096240082787168, "grad_norm": 1.5067139863967896, "learning_rate": 2.146370070250958e-06, "loss": 9.4424, "step": 2637 }, { "epoch": 0.9099689548120041, "grad_norm": 1.5032390356063843, "learning_rate": 2.130133954760538e-06, "loss": 9.4019, "step": 2638 }, { "epoch": 0.9103139013452914, "grad_norm": 1.3871209621429443, "learning_rate": 2.1139581446580017e-06, "loss": 9.5032, "step": 2639 }, { "epoch": 0.9106588478785789, "grad_norm": 1.5128953456878662, "learning_rate": 2.097842660321242e-06, "loss": 9.3989, "step": 2640 }, { "epoch": 0.9110037944118662, "grad_norm": 1.3251484632492065, "learning_rate": 2.081787522052203e-06, "loss": 9.5056, "step": 2641 }, { "epoch": 0.9113487409451535, "grad_norm": 1.3176518678665161, "learning_rate": 2.0657927500767894e-06, "loss": 9.4636, "step": 2642 }, { "epoch": 0.9116936874784408, "grad_norm": 1.5699663162231445, "learning_rate": 2.0498583645448487e-06, "loss": 9.4639, "step": 2643 }, { "epoch": 0.9120386340117281, "grad_norm": 1.4477311372756958, "learning_rate": 2.0339843855301744e-06, "loss": 9.4008, "step": 2644 }, { "epoch": 0.9123835805450156, "grad_norm": 1.6083184480667114, "learning_rate": 2.018170833030436e-06, "loss": 9.4396, "step": 2645 }, { "epoch": 0.9127285270783029, "grad_norm": 1.6080248355865479, "learning_rate": 2.0024177269672094e-06, "loss": 9.4432, "step": 2646 }, { "epoch": 0.9130734736115902, "grad_norm": 1.4727885723114014, "learning_rate": 1.986725087185898e-06, "loss": 9.4662, "step": 2647 }, { "epoch": 0.9134184201448775, "grad_norm": 1.6938202381134033, "learning_rate": 1.9710929334557484e-06, "loss": 9.332, "step": 2648 }, { "epoch": 0.9137633666781649, "grad_norm": 1.6540645360946655, "learning_rate": 1.9555212854697803e-06, "loss": 9.3841, "step": 2649 }, { "epoch": 0.9141083132114523, "grad_norm": 1.7224019765853882, "learning_rate": 1.940010162844824e-06, "loss": 9.3694, "step": 2650 }, { "epoch": 0.9144532597447396, "grad_norm": 0.7851664423942566, "learning_rate": 1.9245595851214328e-06, "loss": 9.6529, "step": 2651 }, { "epoch": 0.9147982062780269, "grad_norm": 0.9760227203369141, "learning_rate": 1.909169571763908e-06, "loss": 9.5619, "step": 2652 }, { "epoch": 0.9151431528113142, "grad_norm": 1.0906972885131836, "learning_rate": 1.8938401421602359e-06, "loss": 9.4804, "step": 2653 }, { "epoch": 0.9154880993446016, "grad_norm": 1.1999458074569702, "learning_rate": 1.8785713156221018e-06, "loss": 9.527, "step": 2654 }, { "epoch": 0.9158330458778889, "grad_norm": 1.1160448789596558, "learning_rate": 1.863363111384836e-06, "loss": 9.5069, "step": 2655 }, { "epoch": 0.9161779924111763, "grad_norm": 1.0647850036621094, "learning_rate": 1.8482155486073739e-06, "loss": 9.5693, "step": 2656 }, { "epoch": 0.9165229389444636, "grad_norm": 1.2221078872680664, "learning_rate": 1.8331286463722951e-06, "loss": 9.4765, "step": 2657 }, { "epoch": 0.916867885477751, "grad_norm": 1.1700892448425293, "learning_rate": 1.8181024236857246e-06, "loss": 9.5458, "step": 2658 }, { "epoch": 0.9172128320110383, "grad_norm": 1.2416577339172363, "learning_rate": 1.8031368994773756e-06, "loss": 9.5379, "step": 2659 }, { "epoch": 0.9175577785443256, "grad_norm": 1.1829198598861694, "learning_rate": 1.788232092600478e-06, "loss": 9.5421, "step": 2660 }, { "epoch": 0.917902725077613, "grad_norm": 1.1688567399978638, "learning_rate": 1.7733880218317788e-06, "loss": 9.4909, "step": 2661 }, { "epoch": 0.9182476716109003, "grad_norm": 1.1145144701004028, "learning_rate": 1.7586047058714972e-06, "loss": 9.5701, "step": 2662 }, { "epoch": 0.9185926181441877, "grad_norm": 1.1775919198989868, "learning_rate": 1.74388216334333e-06, "loss": 9.5376, "step": 2663 }, { "epoch": 0.918937564677475, "grad_norm": 1.2331610918045044, "learning_rate": 1.7292204127944134e-06, "loss": 9.4995, "step": 2664 }, { "epoch": 0.9192825112107623, "grad_norm": 1.2776384353637695, "learning_rate": 1.714619472695278e-06, "loss": 9.5589, "step": 2665 }, { "epoch": 0.9196274577440496, "grad_norm": 1.1205222606658936, "learning_rate": 1.7000793614398714e-06, "loss": 9.586, "step": 2666 }, { "epoch": 0.9199724042773371, "grad_norm": 1.172733187675476, "learning_rate": 1.6856000973455022e-06, "loss": 9.5626, "step": 2667 }, { "epoch": 0.9203173508106244, "grad_norm": 1.1725226640701294, "learning_rate": 1.6711816986528238e-06, "loss": 9.5409, "step": 2668 }, { "epoch": 0.9206622973439117, "grad_norm": 1.3246572017669678, "learning_rate": 1.6568241835258068e-06, "loss": 9.5028, "step": 2669 }, { "epoch": 0.921007243877199, "grad_norm": 1.4405431747436523, "learning_rate": 1.6425275700517385e-06, "loss": 9.4395, "step": 2670 }, { "epoch": 0.9213521904104863, "grad_norm": 1.2822954654693604, "learning_rate": 1.6282918762411614e-06, "loss": 9.5234, "step": 2671 }, { "epoch": 0.9216971369437738, "grad_norm": 1.3265615701675415, "learning_rate": 1.614117120027886e-06, "loss": 9.4913, "step": 2672 }, { "epoch": 0.9220420834770611, "grad_norm": 1.2790005207061768, "learning_rate": 1.6000033192689611e-06, "loss": 9.4907, "step": 2673 }, { "epoch": 0.9223870300103484, "grad_norm": 1.3700817823410034, "learning_rate": 1.5859504917446366e-06, "loss": 9.48, "step": 2674 }, { "epoch": 0.9227319765436357, "grad_norm": 1.3636354207992554, "learning_rate": 1.5719586551583454e-06, "loss": 9.4867, "step": 2675 }, { "epoch": 0.9230769230769231, "grad_norm": 1.2950481176376343, "learning_rate": 1.5580278271366878e-06, "loss": 9.4936, "step": 2676 }, { "epoch": 0.9234218696102104, "grad_norm": 1.3804364204406738, "learning_rate": 1.5441580252294253e-06, "loss": 9.4613, "step": 2677 }, { "epoch": 0.9237668161434978, "grad_norm": 1.369942307472229, "learning_rate": 1.5303492669094089e-06, "loss": 9.5052, "step": 2678 }, { "epoch": 0.9241117626767851, "grad_norm": 1.2750253677368164, "learning_rate": 1.5166015695726122e-06, "loss": 9.4562, "step": 2679 }, { "epoch": 0.9244567092100724, "grad_norm": 1.348370909690857, "learning_rate": 1.5029149505380646e-06, "loss": 9.4602, "step": 2680 }, { "epoch": 0.9248016557433598, "grad_norm": 1.2539187669754028, "learning_rate": 1.4892894270478853e-06, "loss": 9.525, "step": 2681 }, { "epoch": 0.9251466022766471, "grad_norm": 1.474637508392334, "learning_rate": 1.4757250162671822e-06, "loss": 9.449, "step": 2682 }, { "epoch": 0.9254915488099345, "grad_norm": 1.484521508216858, "learning_rate": 1.4622217352841138e-06, "loss": 9.3875, "step": 2683 }, { "epoch": 0.9258364953432218, "grad_norm": 1.2464600801467896, "learning_rate": 1.448779601109801e-06, "loss": 9.4846, "step": 2684 }, { "epoch": 0.9261814418765092, "grad_norm": 1.404441237449646, "learning_rate": 1.4353986306783418e-06, "loss": 9.516, "step": 2685 }, { "epoch": 0.9265263884097965, "grad_norm": 1.3945449590682983, "learning_rate": 1.4220788408468021e-06, "loss": 9.4727, "step": 2686 }, { "epoch": 0.9268713349430838, "grad_norm": 1.3472886085510254, "learning_rate": 1.4088202483951374e-06, "loss": 9.4367, "step": 2687 }, { "epoch": 0.9272162814763711, "grad_norm": 1.377700686454773, "learning_rate": 1.3956228700262252e-06, "loss": 9.5139, "step": 2688 }, { "epoch": 0.9275612280096585, "grad_norm": 1.389609456062317, "learning_rate": 1.3824867223658388e-06, "loss": 9.4322, "step": 2689 }, { "epoch": 0.9279061745429459, "grad_norm": 1.4232558012008667, "learning_rate": 1.3694118219626074e-06, "loss": 9.5109, "step": 2690 }, { "epoch": 0.9282511210762332, "grad_norm": 1.3743571043014526, "learning_rate": 1.3563981852879827e-06, "loss": 9.4921, "step": 2691 }, { "epoch": 0.9285960676095205, "grad_norm": 1.4875822067260742, "learning_rate": 1.3434458287362672e-06, "loss": 9.463, "step": 2692 }, { "epoch": 0.9289410141428078, "grad_norm": 1.3445839881896973, "learning_rate": 1.3305547686245422e-06, "loss": 9.4212, "step": 2693 }, { "epoch": 0.9292859606760953, "grad_norm": 1.6044926643371582, "learning_rate": 1.3177250211926728e-06, "loss": 9.4379, "step": 2694 }, { "epoch": 0.9296309072093826, "grad_norm": 1.5688241720199585, "learning_rate": 1.3049566026033023e-06, "loss": 9.4778, "step": 2695 }, { "epoch": 0.9299758537426699, "grad_norm": 1.6058199405670166, "learning_rate": 1.2922495289417913e-06, "loss": 9.421, "step": 2696 }, { "epoch": 0.9303208002759572, "grad_norm": 1.693510890007019, "learning_rate": 1.2796038162162239e-06, "loss": 9.354, "step": 2697 }, { "epoch": 0.9306657468092445, "grad_norm": 1.5772984027862549, "learning_rate": 1.2670194803573954e-06, "loss": 9.3735, "step": 2698 }, { "epoch": 0.931010693342532, "grad_norm": 1.614292860031128, "learning_rate": 1.2544965372187635e-06, "loss": 9.3702, "step": 2699 }, { "epoch": 0.9313556398758193, "grad_norm": 1.9061168432235718, "learning_rate": 1.2420350025764528e-06, "loss": 9.2773, "step": 2700 }, { "epoch": 0.9317005864091066, "grad_norm": 0.8620436787605286, "learning_rate": 1.2296348921292333e-06, "loss": 9.6332, "step": 2701 }, { "epoch": 0.9320455329423939, "grad_norm": 0.8850879073143005, "learning_rate": 1.2172962214984763e-06, "loss": 9.6099, "step": 2702 }, { "epoch": 0.9323904794756813, "grad_norm": 1.0009554624557495, "learning_rate": 1.2050190062281752e-06, "loss": 9.5471, "step": 2703 }, { "epoch": 0.9327354260089686, "grad_norm": 1.0762373208999634, "learning_rate": 1.1928032617848805e-06, "loss": 9.5482, "step": 2704 }, { "epoch": 0.933080372542256, "grad_norm": 1.0936650037765503, "learning_rate": 1.1806490035577267e-06, "loss": 9.5399, "step": 2705 }, { "epoch": 0.9334253190755433, "grad_norm": 1.043621301651001, "learning_rate": 1.16855624685836e-06, "loss": 9.5344, "step": 2706 }, { "epoch": 0.9337702656088306, "grad_norm": 1.2308648824691772, "learning_rate": 1.1565250069209776e-06, "loss": 9.5064, "step": 2707 }, { "epoch": 0.934115212142118, "grad_norm": 1.162726640701294, "learning_rate": 1.1445552989022668e-06, "loss": 9.5275, "step": 2708 }, { "epoch": 0.9344601586754053, "grad_norm": 1.1094176769256592, "learning_rate": 1.132647137881393e-06, "loss": 9.5538, "step": 2709 }, { "epoch": 0.9348051052086926, "grad_norm": 1.255376935005188, "learning_rate": 1.120800538859995e-06, "loss": 9.463, "step": 2710 }, { "epoch": 0.93515005174198, "grad_norm": 1.1070146560668945, "learning_rate": 1.1090155167621518e-06, "loss": 9.5253, "step": 2711 }, { "epoch": 0.9354949982752674, "grad_norm": 1.3558119535446167, "learning_rate": 1.0972920864343705e-06, "loss": 9.4959, "step": 2712 }, { "epoch": 0.9358399448085547, "grad_norm": 1.2252329587936401, "learning_rate": 1.085630262645565e-06, "loss": 9.5362, "step": 2713 }, { "epoch": 0.936184891341842, "grad_norm": 1.1986970901489258, "learning_rate": 1.07403006008705e-06, "loss": 9.4938, "step": 2714 }, { "epoch": 0.9365298378751293, "grad_norm": 1.1853275299072266, "learning_rate": 1.062491493372486e-06, "loss": 9.5321, "step": 2715 }, { "epoch": 0.9368747844084166, "grad_norm": 1.169337511062622, "learning_rate": 1.0510145770379177e-06, "loss": 9.5066, "step": 2716 }, { "epoch": 0.9372197309417041, "grad_norm": 1.2148215770721436, "learning_rate": 1.0395993255416957e-06, "loss": 9.5333, "step": 2717 }, { "epoch": 0.9375646774749914, "grad_norm": 1.3632956743240356, "learning_rate": 1.0282457532645119e-06, "loss": 9.4924, "step": 2718 }, { "epoch": 0.9379096240082787, "grad_norm": 1.1406636238098145, "learning_rate": 1.0169538745093242e-06, "loss": 9.5274, "step": 2719 }, { "epoch": 0.938254570541566, "grad_norm": 1.1914732456207275, "learning_rate": 1.0057237035014044e-06, "loss": 9.4695, "step": 2720 }, { "epoch": 0.9385995170748535, "grad_norm": 1.4516353607177734, "learning_rate": 9.945552543882685e-07, "loss": 9.4253, "step": 2721 }, { "epoch": 0.9389444636081408, "grad_norm": 1.3653576374053955, "learning_rate": 9.834485412396677e-07, "loss": 9.4781, "step": 2722 }, { "epoch": 0.9392894101414281, "grad_norm": 1.1982650756835938, "learning_rate": 9.724035780476092e-07, "loss": 9.599, "step": 2723 }, { "epoch": 0.9396343566747154, "grad_norm": 1.244680643081665, "learning_rate": 9.6142037872628e-07, "loss": 9.5415, "step": 2724 }, { "epoch": 0.9399793032080027, "grad_norm": 1.288313388824463, "learning_rate": 9.504989571120726e-07, "loss": 9.5385, "step": 2725 }, { "epoch": 0.9403242497412901, "grad_norm": 1.185012698173523, "learning_rate": 9.396393269635484e-07, "loss": 9.5646, "step": 2726 }, { "epoch": 0.9406691962745775, "grad_norm": 1.2785496711730957, "learning_rate": 9.28841501961425e-07, "loss": 9.496, "step": 2727 }, { "epoch": 0.9410141428078648, "grad_norm": 1.2989834547042847, "learning_rate": 9.1810549570856e-07, "loss": 9.5301, "step": 2728 }, { "epoch": 0.9413590893411521, "grad_norm": 1.2136149406433105, "learning_rate": 9.074313217299457e-07, "loss": 9.5197, "step": 2729 }, { "epoch": 0.9417040358744395, "grad_norm": 1.4056475162506104, "learning_rate": 8.968189934726534e-07, "loss": 9.5294, "step": 2730 }, { "epoch": 0.9420489824077268, "grad_norm": 1.2778812646865845, "learning_rate": 8.862685243058666e-07, "loss": 9.455, "step": 2731 }, { "epoch": 0.9423939289410141, "grad_norm": 1.3463318347930908, "learning_rate": 8.757799275208311e-07, "loss": 9.5346, "step": 2732 }, { "epoch": 0.9427388754743015, "grad_norm": 1.2876229286193848, "learning_rate": 8.653532163308387e-07, "loss": 9.5036, "step": 2733 }, { "epoch": 0.9430838220075888, "grad_norm": 1.3723164796829224, "learning_rate": 8.549884038712375e-07, "loss": 9.5081, "step": 2734 }, { "epoch": 0.9434287685408762, "grad_norm": 1.4172887802124023, "learning_rate": 8.446855031993717e-07, "loss": 9.4852, "step": 2735 }, { "epoch": 0.9437737150741635, "grad_norm": 1.4235061407089233, "learning_rate": 8.344445272946199e-07, "loss": 9.486, "step": 2736 }, { "epoch": 0.9441186616074508, "grad_norm": 1.449661374092102, "learning_rate": 8.24265489058329e-07, "loss": 9.4349, "step": 2737 }, { "epoch": 0.9444636081407382, "grad_norm": 1.4169197082519531, "learning_rate": 8.1414840131383e-07, "loss": 9.4753, "step": 2738 }, { "epoch": 0.9448085546740256, "grad_norm": 1.4273600578308105, "learning_rate": 8.040932768063947e-07, "loss": 9.4446, "step": 2739 }, { "epoch": 0.9451535012073129, "grad_norm": 1.4585797786712646, "learning_rate": 7.941001282032512e-07, "loss": 9.4683, "step": 2740 }, { "epoch": 0.9454984477406002, "grad_norm": 1.433382272720337, "learning_rate": 7.841689680935349e-07, "loss": 9.4656, "step": 2741 }, { "epoch": 0.9458433942738875, "grad_norm": 1.298423409461975, "learning_rate": 7.742998089883102e-07, "loss": 9.4483, "step": 2742 }, { "epoch": 0.9461883408071748, "grad_norm": 1.4828376770019531, "learning_rate": 7.644926633205208e-07, "loss": 9.4068, "step": 2743 }, { "epoch": 0.9465332873404623, "grad_norm": 1.4661272764205933, "learning_rate": 7.547475434449835e-07, "loss": 9.4218, "step": 2744 }, { "epoch": 0.9468782338737496, "grad_norm": 1.4254372119903564, "learning_rate": 7.450644616383951e-07, "loss": 9.4599, "step": 2745 }, { "epoch": 0.9472231804070369, "grad_norm": 1.6096229553222656, "learning_rate": 7.354434300992752e-07, "loss": 9.4529, "step": 2746 }, { "epoch": 0.9475681269403242, "grad_norm": 1.6057524681091309, "learning_rate": 7.258844609479953e-07, "loss": 9.3971, "step": 2747 }, { "epoch": 0.9479130734736116, "grad_norm": 1.5125082731246948, "learning_rate": 7.163875662267117e-07, "loss": 9.417, "step": 2748 }, { "epoch": 0.948258020006899, "grad_norm": 1.7600480318069458, "learning_rate": 7.069527578994151e-07, "loss": 9.2774, "step": 2749 }, { "epoch": 0.9486029665401863, "grad_norm": 1.6905750036239624, "learning_rate": 6.975800478518646e-07, "loss": 9.3521, "step": 2750 }, { "epoch": 0.9489479130734736, "grad_norm": 0.99737948179245, "learning_rate": 6.88269447891593e-07, "loss": 9.533, "step": 2751 }, { "epoch": 0.9492928596067609, "grad_norm": 0.9953950643539429, "learning_rate": 6.790209697478789e-07, "loss": 9.5993, "step": 2752 }, { "epoch": 0.9496378061400483, "grad_norm": 1.0281240940093994, "learning_rate": 6.698346250717524e-07, "loss": 9.5267, "step": 2753 }, { "epoch": 0.9499827526733357, "grad_norm": 1.020925760269165, "learning_rate": 6.607104254359675e-07, "loss": 9.5643, "step": 2754 }, { "epoch": 0.950327699206623, "grad_norm": 1.0711194276809692, "learning_rate": 6.516483823349795e-07, "loss": 9.5639, "step": 2755 }, { "epoch": 0.9506726457399103, "grad_norm": 1.0857421159744263, "learning_rate": 6.426485071849564e-07, "loss": 9.4995, "step": 2756 }, { "epoch": 0.9510175922731977, "grad_norm": 1.262204885482788, "learning_rate": 6.337108113237344e-07, "loss": 9.4864, "step": 2757 }, { "epoch": 0.951362538806485, "grad_norm": 1.0766891241073608, "learning_rate": 6.248353060108292e-07, "loss": 9.5252, "step": 2758 }, { "epoch": 0.9517074853397723, "grad_norm": 1.153833031654358, "learning_rate": 6.160220024273966e-07, "loss": 9.5178, "step": 2759 }, { "epoch": 0.9520524318730597, "grad_norm": 1.1806145906448364, "learning_rate": 6.072709116762442e-07, "loss": 9.538, "step": 2760 }, { "epoch": 0.952397378406347, "grad_norm": 1.146377682685852, "learning_rate": 5.98582044781798e-07, "loss": 9.534, "step": 2761 }, { "epoch": 0.9527423249396344, "grad_norm": 1.1945569515228271, "learning_rate": 5.899554126901075e-07, "loss": 9.5208, "step": 2762 }, { "epoch": 0.9530872714729217, "grad_norm": 1.286086916923523, "learning_rate": 5.813910262687905e-07, "loss": 9.4976, "step": 2763 }, { "epoch": 0.953432218006209, "grad_norm": 1.2750720977783203, "learning_rate": 5.728888963070945e-07, "loss": 9.5186, "step": 2764 }, { "epoch": 0.9537771645394963, "grad_norm": 1.2140814065933228, "learning_rate": 5.644490335157959e-07, "loss": 9.4896, "step": 2765 }, { "epoch": 0.9541221110727838, "grad_norm": 1.1700353622436523, "learning_rate": 5.560714485272512e-07, "loss": 9.6212, "step": 2766 }, { "epoch": 0.9544670576060711, "grad_norm": 1.3271950483322144, "learning_rate": 5.477561518953566e-07, "loss": 9.4967, "step": 2767 }, { "epoch": 0.9548120041393584, "grad_norm": 1.1459087133407593, "learning_rate": 5.395031540955275e-07, "loss": 9.5539, "step": 2768 }, { "epoch": 0.9551569506726457, "grad_norm": 1.122616171836853, "learning_rate": 5.313124655247192e-07, "loss": 9.5894, "step": 2769 }, { "epoch": 0.955501897205933, "grad_norm": 1.3789931535720825, "learning_rate": 5.231840965013668e-07, "loss": 9.4521, "step": 2770 }, { "epoch": 0.9558468437392205, "grad_norm": 1.2291425466537476, "learning_rate": 5.151180572654235e-07, "loss": 9.5169, "step": 2771 }, { "epoch": 0.9561917902725078, "grad_norm": 1.1856980323791504, "learning_rate": 5.071143579782889e-07, "loss": 9.4942, "step": 2772 }, { "epoch": 0.9565367368057951, "grad_norm": 1.2218326330184937, "learning_rate": 4.99173008722853e-07, "loss": 9.55, "step": 2773 }, { "epoch": 0.9568816833390824, "grad_norm": 1.2583118677139282, "learning_rate": 4.91294019503441e-07, "loss": 9.538, "step": 2774 }, { "epoch": 0.9572266298723697, "grad_norm": 1.3726310729980469, "learning_rate": 4.834774002458409e-07, "loss": 9.4831, "step": 2775 }, { "epoch": 0.9575715764056572, "grad_norm": 1.3480925559997559, "learning_rate": 4.757231607972534e-07, "loss": 9.4674, "step": 2776 }, { "epoch": 0.9579165229389445, "grad_norm": 1.2078763246536255, "learning_rate": 4.680313109262813e-07, "loss": 9.4641, "step": 2777 }, { "epoch": 0.9582614694722318, "grad_norm": 1.2556449174880981, "learning_rate": 4.6040186032296206e-07, "loss": 9.4801, "step": 2778 }, { "epoch": 0.9586064160055191, "grad_norm": 1.2832894325256348, "learning_rate": 4.5283481859869635e-07, "loss": 9.5116, "step": 2779 }, { "epoch": 0.9589513625388065, "grad_norm": 1.3231313228607178, "learning_rate": 4.4533019528628093e-07, "loss": 9.4913, "step": 2780 }, { "epoch": 0.9592963090720938, "grad_norm": 1.2611724138259888, "learning_rate": 4.3788799983986997e-07, "loss": 9.521, "step": 2781 }, { "epoch": 0.9596412556053812, "grad_norm": 1.2805002927780151, "learning_rate": 4.305082416349804e-07, "loss": 9.502, "step": 2782 }, { "epoch": 0.9599862021386685, "grad_norm": 1.3639451265335083, "learning_rate": 4.231909299684533e-07, "loss": 9.4736, "step": 2783 }, { "epoch": 0.9603311486719558, "grad_norm": 1.4248766899108887, "learning_rate": 4.159360740584817e-07, "loss": 9.5154, "step": 2784 }, { "epoch": 0.9606760952052432, "grad_norm": 1.5050346851348877, "learning_rate": 4.0874368304457676e-07, "loss": 9.4806, "step": 2785 }, { "epoch": 0.9610210417385305, "grad_norm": 1.28929603099823, "learning_rate": 4.016137659875463e-07, "loss": 9.5005, "step": 2786 }, { "epoch": 0.9613659882718179, "grad_norm": 1.1755974292755127, "learning_rate": 3.945463318695053e-07, "loss": 9.4845, "step": 2787 }, { "epoch": 0.9617109348051052, "grad_norm": 1.4888191223144531, "learning_rate": 3.8754138959383733e-07, "loss": 9.4333, "step": 2788 }, { "epoch": 0.9620558813383926, "grad_norm": 1.3838120698928833, "learning_rate": 3.805989479852279e-07, "loss": 9.4622, "step": 2789 }, { "epoch": 0.9624008278716799, "grad_norm": 1.3965057134628296, "learning_rate": 3.7371901578959756e-07, "loss": 9.4135, "step": 2790 }, { "epoch": 0.9627457744049672, "grad_norm": 1.3354548215866089, "learning_rate": 3.6690160167413554e-07, "loss": 9.4748, "step": 2791 }, { "epoch": 0.9630907209382545, "grad_norm": 1.4212675094604492, "learning_rate": 3.6014671422727185e-07, "loss": 9.4848, "step": 2792 }, { "epoch": 0.9634356674715419, "grad_norm": 1.4828577041625977, "learning_rate": 3.5345436195866053e-07, "loss": 9.4245, "step": 2793 }, { "epoch": 0.9637806140048293, "grad_norm": 1.4509541988372803, "learning_rate": 3.468245532991743e-07, "loss": 9.4091, "step": 2794 }, { "epoch": 0.9641255605381166, "grad_norm": 1.5361698865890503, "learning_rate": 3.4025729660089877e-07, "loss": 9.4698, "step": 2795 }, { "epoch": 0.9644705070714039, "grad_norm": 1.5707025527954102, "learning_rate": 3.3375260013711604e-07, "loss": 9.4206, "step": 2796 }, { "epoch": 0.9648154536046912, "grad_norm": 1.47184157371521, "learning_rate": 3.273104721023046e-07, "loss": 9.3935, "step": 2797 }, { "epoch": 0.9651604001379787, "grad_norm": 1.5686029195785522, "learning_rate": 3.209309206121058e-07, "loss": 9.3636, "step": 2798 }, { "epoch": 0.965505346671266, "grad_norm": 1.5438534021377563, "learning_rate": 3.1461395370334104e-07, "loss": 9.3861, "step": 2799 }, { "epoch": 0.9658502932045533, "grad_norm": 1.6539479494094849, "learning_rate": 3.0835957933397773e-07, "loss": 9.398, "step": 2800 }, { "epoch": 0.9661952397378406, "grad_norm": 0.774260401725769, "learning_rate": 3.0216780538314116e-07, "loss": 9.6004, "step": 2801 }, { "epoch": 0.9665401862711279, "grad_norm": 0.9886148571968079, "learning_rate": 2.960386396510972e-07, "loss": 9.5417, "step": 2802 }, { "epoch": 0.9668851328044153, "grad_norm": 1.0178803205490112, "learning_rate": 2.8997208985921953e-07, "loss": 9.5666, "step": 2803 }, { "epoch": 0.9672300793377027, "grad_norm": 1.0676816701889038, "learning_rate": 2.8396816365001687e-07, "loss": 9.499, "step": 2804 }, { "epoch": 0.96757502587099, "grad_norm": 1.0291966199874878, "learning_rate": 2.7802686858710016e-07, "loss": 9.5364, "step": 2805 }, { "epoch": 0.9679199724042773, "grad_norm": 1.100340723991394, "learning_rate": 2.7214821215518214e-07, "loss": 9.5105, "step": 2806 }, { "epoch": 0.9682649189375647, "grad_norm": 1.1685614585876465, "learning_rate": 2.6633220176006667e-07, "loss": 9.5038, "step": 2807 }, { "epoch": 0.968609865470852, "grad_norm": 1.1775288581848145, "learning_rate": 2.6057884472862617e-07, "loss": 9.5383, "step": 2808 }, { "epoch": 0.9689548120041394, "grad_norm": 1.2399282455444336, "learning_rate": 2.548881483088128e-07, "loss": 9.4643, "step": 2809 }, { "epoch": 0.9692997585374267, "grad_norm": 1.2756638526916504, "learning_rate": 2.49260119669642e-07, "loss": 9.4916, "step": 2810 }, { "epoch": 0.969644705070714, "grad_norm": 1.3724730014801025, "learning_rate": 2.4369476590118123e-07, "loss": 9.4872, "step": 2811 }, { "epoch": 0.9699896516040014, "grad_norm": 1.1623852252960205, "learning_rate": 2.381920940145277e-07, "loss": 9.5659, "step": 2812 }, { "epoch": 0.9703345981372887, "grad_norm": 1.3232759237289429, "learning_rate": 2.3275211094183623e-07, "loss": 9.4726, "step": 2813 }, { "epoch": 0.970679544670576, "grad_norm": 1.105460286140442, "learning_rate": 2.2737482353626937e-07, "loss": 9.5364, "step": 2814 }, { "epoch": 0.9710244912038634, "grad_norm": 1.130436897277832, "learning_rate": 2.2206023857201385e-07, "loss": 9.5565, "step": 2815 }, { "epoch": 0.9713694377371508, "grad_norm": 1.2147849798202515, "learning_rate": 2.1680836274426962e-07, "loss": 9.5194, "step": 2816 }, { "epoch": 0.9717143842704381, "grad_norm": 1.2784674167633057, "learning_rate": 2.1161920266922763e-07, "loss": 9.4549, "step": 2817 }, { "epoch": 0.9720593308037254, "grad_norm": 1.3010627031326294, "learning_rate": 2.0649276488408086e-07, "loss": 9.4602, "step": 2818 }, { "epoch": 0.9724042773370127, "grad_norm": 1.2807101011276245, "learning_rate": 2.014290558469911e-07, "loss": 9.5456, "step": 2819 }, { "epoch": 0.9727492238703, "grad_norm": 1.301635503768921, "learning_rate": 1.964280819371167e-07, "loss": 9.5044, "step": 2820 }, { "epoch": 0.9730941704035875, "grad_norm": 1.1405683755874634, "learning_rate": 1.914898494545736e-07, "loss": 9.5604, "step": 2821 }, { "epoch": 0.9734391169368748, "grad_norm": 1.261732816696167, "learning_rate": 1.8661436462042437e-07, "loss": 9.4851, "step": 2822 }, { "epoch": 0.9737840634701621, "grad_norm": 1.2781423330307007, "learning_rate": 1.8180163357671143e-07, "loss": 9.5201, "step": 2823 }, { "epoch": 0.9741290100034494, "grad_norm": 1.2555526494979858, "learning_rate": 1.7705166238639047e-07, "loss": 9.4532, "step": 2824 }, { "epoch": 0.9744739565367369, "grad_norm": 1.2804704904556274, "learning_rate": 1.7236445703338044e-07, "loss": 9.5293, "step": 2825 }, { "epoch": 0.9748189030700242, "grad_norm": 1.3036941289901733, "learning_rate": 1.677400234225135e-07, "loss": 9.536, "step": 2826 }, { "epoch": 0.9751638496033115, "grad_norm": 1.2297347784042358, "learning_rate": 1.6317836737955172e-07, "loss": 9.496, "step": 2827 }, { "epoch": 0.9755087961365988, "grad_norm": 1.2707699537277222, "learning_rate": 1.586794946511594e-07, "loss": 9.4632, "step": 2828 }, { "epoch": 0.9758537426698861, "grad_norm": 1.3694789409637451, "learning_rate": 1.542434109049251e-07, "loss": 9.4874, "step": 2829 }, { "epoch": 0.9761986892031735, "grad_norm": 1.3027503490447998, "learning_rate": 1.4987012172932302e-07, "loss": 9.4417, "step": 2830 }, { "epoch": 0.9765436357364609, "grad_norm": 1.300711750984192, "learning_rate": 1.4555963263372385e-07, "loss": 9.5223, "step": 2831 }, { "epoch": 0.9768885822697482, "grad_norm": 1.29239821434021, "learning_rate": 1.413119490483894e-07, "loss": 9.4713, "step": 2832 }, { "epoch": 0.9772335288030355, "grad_norm": 1.3773186206817627, "learning_rate": 1.3712707632445032e-07, "loss": 9.4819, "step": 2833 }, { "epoch": 0.9775784753363229, "grad_norm": 1.4483076333999634, "learning_rate": 1.3300501973392277e-07, "loss": 9.4047, "step": 2834 }, { "epoch": 0.9779234218696102, "grad_norm": 1.4170414209365845, "learning_rate": 1.2894578446968065e-07, "loss": 9.5096, "step": 2835 }, { "epoch": 0.9782683684028975, "grad_norm": 1.5087085962295532, "learning_rate": 1.2494937564545562e-07, "loss": 9.4874, "step": 2836 }, { "epoch": 0.9786133149361849, "grad_norm": 1.5026702880859375, "learning_rate": 1.2101579829583154e-07, "loss": 9.4319, "step": 2837 }, { "epoch": 0.9789582614694722, "grad_norm": 1.3315801620483398, "learning_rate": 1.1714505737625004e-07, "loss": 9.4616, "step": 2838 }, { "epoch": 0.9793032080027596, "grad_norm": 1.5373655557632446, "learning_rate": 1.133371577629716e-07, "loss": 9.4924, "step": 2839 }, { "epoch": 0.9796481545360469, "grad_norm": 1.4027314186096191, "learning_rate": 1.095921042531145e-07, "loss": 9.3959, "step": 2840 }, { "epoch": 0.9799931010693342, "grad_norm": 1.4827989339828491, "learning_rate": 1.0590990156461034e-07, "loss": 9.4222, "step": 2841 }, { "epoch": 0.9803380476026216, "grad_norm": 1.365110158920288, "learning_rate": 1.022905543362096e-07, "loss": 9.4457, "step": 2842 }, { "epoch": 0.980682994135909, "grad_norm": 1.4800422191619873, "learning_rate": 9.873406712749279e-08, "loss": 9.4845, "step": 2843 }, { "epoch": 0.9810279406691963, "grad_norm": 1.482553243637085, "learning_rate": 9.524044441883706e-08, "loss": 9.4586, "step": 2844 }, { "epoch": 0.9813728872024836, "grad_norm": 1.5155751705169678, "learning_rate": 9.180969061143852e-08, "loss": 9.4402, "step": 2845 }, { "epoch": 0.9817178337357709, "grad_norm": 1.5355010032653809, "learning_rate": 8.844181002727325e-08, "loss": 9.4685, "step": 2846 }, { "epoch": 0.9820627802690582, "grad_norm": 1.5655204057693481, "learning_rate": 8.513680690913073e-08, "loss": 9.4197, "step": 2847 }, { "epoch": 0.9824077268023457, "grad_norm": 1.5435409545898438, "learning_rate": 8.189468542057488e-08, "loss": 9.4517, "step": 2848 }, { "epoch": 0.982752673335633, "grad_norm": 1.6485852003097534, "learning_rate": 7.871544964596633e-08, "loss": 9.4181, "step": 2849 }, { "epoch": 0.9830976198689203, "grad_norm": 1.6750664710998535, "learning_rate": 7.559910359042355e-08, "loss": 9.361, "step": 2850 }, { "epoch": 0.9834425664022076, "grad_norm": 0.9129282832145691, "learning_rate": 7.254565117985613e-08, "loss": 9.5804, "step": 2851 }, { "epoch": 0.983787512935495, "grad_norm": 1.0424609184265137, "learning_rate": 6.955509626093703e-08, "loss": 9.5371, "step": 2852 }, { "epoch": 0.9841324594687824, "grad_norm": 0.9472970366477966, "learning_rate": 6.662744260109155e-08, "loss": 9.6361, "step": 2853 }, { "epoch": 0.9844774060020697, "grad_norm": 1.1693047285079956, "learning_rate": 6.376269388852496e-08, "loss": 9.4819, "step": 2854 }, { "epoch": 0.984822352535357, "grad_norm": 0.99554044008255, "learning_rate": 6.096085373217264e-08, "loss": 9.5079, "step": 2855 }, { "epoch": 0.9851672990686443, "grad_norm": 0.9872909188270569, "learning_rate": 5.822192566173334e-08, "loss": 9.5455, "step": 2856 }, { "epoch": 0.9855122456019317, "grad_norm": 1.08110773563385, "learning_rate": 5.554591312765811e-08, "loss": 9.5288, "step": 2857 }, { "epoch": 0.985857192135219, "grad_norm": 1.1271926164627075, "learning_rate": 5.2932819501111395e-08, "loss": 9.5112, "step": 2858 }, { "epoch": 0.9862021386685064, "grad_norm": 1.2304562330245972, "learning_rate": 5.038264807402105e-08, "loss": 9.5027, "step": 2859 }, { "epoch": 0.9865470852017937, "grad_norm": 1.0815626382827759, "learning_rate": 4.789540205902832e-08, "loss": 9.5418, "step": 2860 }, { "epoch": 0.9868920317350811, "grad_norm": 1.279129147529602, "learning_rate": 4.547108458951566e-08, "loss": 9.5028, "step": 2861 }, { "epoch": 0.9872369782683684, "grad_norm": 1.1527676582336426, "learning_rate": 4.310969871958448e-08, "loss": 9.541, "step": 2862 }, { "epoch": 0.9875819248016557, "grad_norm": 1.260764241218567, "learning_rate": 4.0811247424049625e-08, "loss": 9.4835, "step": 2863 }, { "epoch": 0.9879268713349431, "grad_norm": 1.153011679649353, "learning_rate": 3.857573359845601e-08, "loss": 9.5011, "step": 2864 }, { "epoch": 0.9882718178682304, "grad_norm": 1.124871015548706, "learning_rate": 3.6403160059050865e-08, "loss": 9.5657, "step": 2865 }, { "epoch": 0.9886167644015178, "grad_norm": 1.1694023609161377, "learning_rate": 3.4293529542800406e-08, "loss": 9.4759, "step": 2866 }, { "epoch": 0.9889617109348051, "grad_norm": 1.1712218523025513, "learning_rate": 3.224684470735651e-08, "loss": 9.5351, "step": 2867 }, { "epoch": 0.9893066574680924, "grad_norm": 1.2313039302825928, "learning_rate": 3.0263108131095566e-08, "loss": 9.5316, "step": 2868 }, { "epoch": 0.9896516040013797, "grad_norm": 1.1785590648651123, "learning_rate": 2.8342322313085202e-08, "loss": 9.4722, "step": 2869 }, { "epoch": 0.9899965505346672, "grad_norm": 1.2561894655227661, "learning_rate": 2.6484489673084257e-08, "loss": 9.5575, "step": 2870 }, { "epoch": 0.9903414970679545, "grad_norm": 1.1631916761398315, "learning_rate": 2.4689612551553888e-08, "loss": 9.5365, "step": 2871 }, { "epoch": 0.9906864436012418, "grad_norm": 1.2572925090789795, "learning_rate": 2.2957693209635368e-08, "loss": 9.5596, "step": 2872 }, { "epoch": 0.9910313901345291, "grad_norm": 1.324566125869751, "learning_rate": 2.1288733829161188e-08, "loss": 9.4521, "step": 2873 }, { "epoch": 0.9913763366678164, "grad_norm": 1.3486089706420898, "learning_rate": 1.9682736512660617e-08, "loss": 9.4974, "step": 2874 }, { "epoch": 0.9917212832011039, "grad_norm": 1.174851417541504, "learning_rate": 1.813970328331527e-08, "loss": 9.5033, "step": 2875 }, { "epoch": 0.9920662297343912, "grad_norm": 1.2707513570785522, "learning_rate": 1.6659636085020192e-08, "loss": 9.4723, "step": 2876 }, { "epoch": 0.9924111762676785, "grad_norm": 1.474165678024292, "learning_rate": 1.5242536782317242e-08, "loss": 9.4423, "step": 2877 }, { "epoch": 0.9927561228009658, "grad_norm": 1.3797553777694702, "learning_rate": 1.388840716045059e-08, "loss": 9.4966, "step": 2878 }, { "epoch": 0.9931010693342532, "grad_norm": 1.326752781867981, "learning_rate": 1.2597248925311222e-08, "loss": 9.5313, "step": 2879 }, { "epoch": 0.9934460158675406, "grad_norm": 1.3259997367858887, "learning_rate": 1.1369063703475791e-08, "loss": 9.4992, "step": 2880 }, { "epoch": 0.9937909624008279, "grad_norm": 1.3481284379959106, "learning_rate": 1.0203853042184407e-08, "loss": 9.4431, "step": 2881 }, { "epoch": 0.9941359089341152, "grad_norm": 1.3297460079193115, "learning_rate": 9.101618409340651e-09, "loss": 9.4599, "step": 2882 }, { "epoch": 0.9944808554674025, "grad_norm": 1.3250812292099, "learning_rate": 8.06236119351711e-09, "loss": 9.4787, "step": 2883 }, { "epoch": 0.9948258020006899, "grad_norm": 1.3159713745117188, "learning_rate": 7.086082703949837e-09, "loss": 9.4653, "step": 2884 }, { "epoch": 0.9951707485339772, "grad_norm": 1.3725025653839111, "learning_rate": 6.172784170532797e-09, "loss": 9.5064, "step": 2885 }, { "epoch": 0.9955156950672646, "grad_norm": 1.3856096267700195, "learning_rate": 5.3224667438123155e-09, "loss": 9.4516, "step": 2886 }, { "epoch": 0.9958606416005519, "grad_norm": 1.391958475112915, "learning_rate": 4.53513149500373e-09, "loss": 9.4719, "step": 2887 }, { "epoch": 0.9962055881338393, "grad_norm": 1.5442167520523071, "learning_rate": 3.810779415974741e-09, "loss": 9.4641, "step": 2888 }, { "epoch": 0.9965505346671266, "grad_norm": 1.4803158044815063, "learning_rate": 3.1494114192509585e-09, "loss": 9.4186, "step": 2889 }, { "epoch": 0.9968954812004139, "grad_norm": 1.4997535943984985, "learning_rate": 2.5510283379992504e-09, "loss": 9.4915, "step": 2890 }, { "epoch": 0.9972404277337013, "grad_norm": 1.5729633569717407, "learning_rate": 2.0156309260610517e-09, "loss": 9.423, "step": 2891 }, { "epoch": 0.9975853742669886, "grad_norm": 1.605699062347412, "learning_rate": 1.5432198579079516e-09, "loss": 9.4781, "step": 2892 }, { "epoch": 0.997930320800276, "grad_norm": 1.4182220697402954, "learning_rate": 1.1337957286805534e-09, "loss": 9.453, "step": 2893 }, { "epoch": 0.9982752673335633, "grad_norm": 1.5460253953933716, "learning_rate": 7.873590541551679e-10, "loss": 9.5107, "step": 2894 }, { "epoch": 0.9986202138668506, "grad_norm": 1.456726312637329, "learning_rate": 5.039102707715682e-10, "loss": 9.3826, "step": 2895 }, { "epoch": 0.9989651604001379, "grad_norm": 1.55360746383667, "learning_rate": 2.8344973560523456e-10, "loss": 9.394, "step": 2896 }, { "epoch": 0.9993101069334254, "grad_norm": 1.6202263832092285, "learning_rate": 1.2597772639511006e-10, "loss": 9.4164, "step": 2897 }, { "epoch": 0.9996550534667127, "grad_norm": 1.6138488054275513, "learning_rate": 3.1494441515844684e-11, "loss": 9.3771, "step": 2898 }, { "epoch": 1.0, "grad_norm": 1.670608639717102, "learning_rate": 0.0, "loss": 9.3193, "step": 2899 } ], "logging_steps": 1, "max_steps": 2899, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 239, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 601758459101184.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }