{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999317359546727, "eval_steps": 500, "global_step": 7324, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0001365280906546522, "grad_norm": 0.0064392657950520515, "learning_rate": 1.0000000000000002e-06, "loss": 46.0, "step": 1 }, { "epoch": 0.0002730561813093044, "grad_norm": 0.005096447188407183, "learning_rate": 2.0000000000000003e-06, "loss": 46.0, "step": 2 }, { "epoch": 0.00040958427196395656, "grad_norm": 0.00540162855759263, "learning_rate": 3e-06, "loss": 46.0, "step": 3 }, { "epoch": 0.0005461123626186088, "grad_norm": 0.004730381537228823, "learning_rate": 4.000000000000001e-06, "loss": 46.0, "step": 4 }, { "epoch": 0.0006826404532732609, "grad_norm": 0.005004895851016045, "learning_rate": 5e-06, "loss": 46.0, "step": 5 }, { "epoch": 0.0008191685439279131, "grad_norm": 0.0047608171589672565, "learning_rate": 6e-06, "loss": 46.0, "step": 6 }, { "epoch": 0.0009556966345825653, "grad_norm": 0.004943865351378918, "learning_rate": 7.000000000000001e-06, "loss": 46.0, "step": 7 }, { "epoch": 0.0010922247252372176, "grad_norm": 0.005371114704757929, "learning_rate": 8.000000000000001e-06, "loss": 46.0, "step": 8 }, { "epoch": 0.0012287528158918697, "grad_norm": 0.005523690953850746, "learning_rate": 9e-06, "loss": 46.0, "step": 9 }, { "epoch": 0.0013652809065465218, "grad_norm": 0.005615883972495794, "learning_rate": 1e-05, "loss": 46.0, "step": 10 }, { "epoch": 0.0015018089972011742, "grad_norm": 0.0048523093573749065, "learning_rate": 1.1000000000000001e-05, "loss": 46.0, "step": 11 }, { "epoch": 0.0016383370878558263, "grad_norm": 0.0064697773195803165, "learning_rate": 1.2e-05, "loss": 46.0, "step": 12 }, { "epoch": 0.0017748651785104786, "grad_norm": 0.006073013413697481, "learning_rate": 1.3000000000000001e-05, "loss": 46.0, "step": 13 }, { "epoch": 0.0019113932691651307, "grad_norm": 0.006469789892435074, "learning_rate": 1.4000000000000001e-05, "loss": 46.0, "step": 14 }, { "epoch": 0.0020479213598197828, "grad_norm": 0.006134071387350559, "learning_rate": 1.5e-05, "loss": 46.0, "step": 15 }, { "epoch": 0.0021844494504744353, "grad_norm": 0.006042553577572107, "learning_rate": 1.6000000000000003e-05, "loss": 46.0, "step": 16 }, { "epoch": 0.0023209775411290874, "grad_norm": 0.005889922846108675, "learning_rate": 1.7000000000000003e-05, "loss": 46.0, "step": 17 }, { "epoch": 0.0024575056317837395, "grad_norm": 0.007019102107733488, "learning_rate": 1.8e-05, "loss": 46.0, "step": 18 }, { "epoch": 0.0025940337224383916, "grad_norm": 0.006500387564301491, "learning_rate": 1.9e-05, "loss": 46.0, "step": 19 }, { "epoch": 0.0027305618130930437, "grad_norm": 0.006012055091559887, "learning_rate": 2e-05, "loss": 46.0, "step": 20 }, { "epoch": 0.0028670899037476962, "grad_norm": 0.00662240432575345, "learning_rate": 2.1e-05, "loss": 46.0, "step": 21 }, { "epoch": 0.0030036179944023483, "grad_norm": 0.006500392220914364, "learning_rate": 2.2000000000000003e-05, "loss": 46.0, "step": 22 }, { "epoch": 0.0031401460850570004, "grad_norm": 0.0059815868735313416, "learning_rate": 2.3000000000000003e-05, "loss": 46.0, "step": 23 }, { "epoch": 0.0032766741757116525, "grad_norm": 0.007568406872451305, "learning_rate": 2.4e-05, "loss": 46.0, "step": 24 }, { "epoch": 0.003413202266366305, "grad_norm": 0.007171654608100653, "learning_rate": 2.5e-05, "loss": 46.0, "step": 25 }, { "epoch": 0.003549730357020957, "grad_norm": 0.006286699324846268, "learning_rate": 2.6000000000000002e-05, "loss": 46.0, "step": 26 }, { "epoch": 0.0036862584476756092, "grad_norm": 0.006256168242543936, "learning_rate": 2.7000000000000002e-05, "loss": 46.0, "step": 27 }, { "epoch": 0.0038227865383302613, "grad_norm": 0.007476857863366604, "learning_rate": 2.8000000000000003e-05, "loss": 46.0, "step": 28 }, { "epoch": 0.003959314628984914, "grad_norm": 0.005951044149696827, "learning_rate": 2.9e-05, "loss": 46.0, "step": 29 }, { "epoch": 0.0040958427196395655, "grad_norm": 0.006622393615543842, "learning_rate": 3e-05, "loss": 46.0, "step": 30 }, { "epoch": 0.004232370810294218, "grad_norm": 0.007354812230914831, "learning_rate": 3.1e-05, "loss": 46.0, "step": 31 }, { "epoch": 0.004368898900948871, "grad_norm": 0.0065308124758303165, "learning_rate": 3.2000000000000005e-05, "loss": 46.0, "step": 32 }, { "epoch": 0.004505426991603522, "grad_norm": 0.0068970052525401115, "learning_rate": 3.3e-05, "loss": 46.0, "step": 33 }, { "epoch": 0.004641955082258175, "grad_norm": 0.00781263131648302, "learning_rate": 3.4000000000000007e-05, "loss": 46.0, "step": 34 }, { "epoch": 0.0047784831729128265, "grad_norm": 0.00711080152541399, "learning_rate": 3.5e-05, "loss": 46.0, "step": 35 }, { "epoch": 0.004915011263567479, "grad_norm": 0.00689734797924757, "learning_rate": 3.6e-05, "loss": 46.0, "step": 36 }, { "epoch": 0.0050515393542221315, "grad_norm": 0.007202244829386473, "learning_rate": 3.7e-05, "loss": 46.0, "step": 37 }, { "epoch": 0.005188067444876783, "grad_norm": 0.0074768345803022385, "learning_rate": 3.8e-05, "loss": 46.0, "step": 38 }, { "epoch": 0.005324595535531436, "grad_norm": 0.007263243198394775, "learning_rate": 3.9000000000000006e-05, "loss": 46.0, "step": 39 }, { "epoch": 0.005461123626186087, "grad_norm": 0.007598937954753637, "learning_rate": 4e-05, "loss": 46.0, "step": 40 }, { "epoch": 0.00559765171684074, "grad_norm": 0.006866552401334047, "learning_rate": 4.1e-05, "loss": 46.0, "step": 41 }, { "epoch": 0.0057341798074953924, "grad_norm": 0.007080144714564085, "learning_rate": 4.2e-05, "loss": 46.0, "step": 42 }, { "epoch": 0.005870707898150044, "grad_norm": 0.007629483472555876, "learning_rate": 4.3e-05, "loss": 46.0, "step": 43 }, { "epoch": 0.006007235988804697, "grad_norm": 0.007354784291237593, "learning_rate": 4.4000000000000006e-05, "loss": 46.0, "step": 44 }, { "epoch": 0.006143764079459349, "grad_norm": 0.008544961921870708, "learning_rate": 4.5e-05, "loss": 46.0, "step": 45 }, { "epoch": 0.006280292170114001, "grad_norm": 0.008300847373902798, "learning_rate": 4.600000000000001e-05, "loss": 46.0, "step": 46 }, { "epoch": 0.006416820260768653, "grad_norm": 0.009948834776878357, "learning_rate": 4.7e-05, "loss": 46.0, "step": 47 }, { "epoch": 0.006553348351423305, "grad_norm": 0.011779936961829662, "learning_rate": 4.8e-05, "loss": 46.0, "step": 48 }, { "epoch": 0.0066898764420779576, "grad_norm": 0.012878548353910446, "learning_rate": 4.9e-05, "loss": 46.0, "step": 49 }, { "epoch": 0.00682640453273261, "grad_norm": 0.025269048288464546, "learning_rate": 5e-05, "loss": 46.0, "step": 50 }, { "epoch": 0.006962932623387262, "grad_norm": 0.0076904622837901115, "learning_rate": 5.1000000000000006e-05, "loss": 46.0, "step": 51 }, { "epoch": 0.007099460714041914, "grad_norm": 0.005523694213479757, "learning_rate": 5.2000000000000004e-05, "loss": 46.0, "step": 52 }, { "epoch": 0.007235988804696566, "grad_norm": 0.004943859297782183, "learning_rate": 5.300000000000001e-05, "loss": 46.0, "step": 53 }, { "epoch": 0.0073725168953512185, "grad_norm": 0.005584749858826399, "learning_rate": 5.4000000000000005e-05, "loss": 46.0, "step": 54 }, { "epoch": 0.007509044986005871, "grad_norm": 0.005279551260173321, "learning_rate": 5.500000000000001e-05, "loss": 46.0, "step": 55 }, { "epoch": 0.007645573076660523, "grad_norm": 0.00512697221711278, "learning_rate": 5.6000000000000006e-05, "loss": 46.0, "step": 56 }, { "epoch": 0.007782101167315175, "grad_norm": 0.005096450448036194, "learning_rate": 5.6999999999999996e-05, "loss": 46.0, "step": 57 }, { "epoch": 0.007918629257969828, "grad_norm": 0.00512696523219347, "learning_rate": 5.8e-05, "loss": 46.0, "step": 58 }, { "epoch": 0.00805515734862448, "grad_norm": 0.005279564298689365, "learning_rate": 5.9e-05, "loss": 46.0, "step": 59 }, { "epoch": 0.008191685439279131, "grad_norm": 0.005493259057402611, "learning_rate": 6e-05, "loss": 46.0, "step": 60 }, { "epoch": 0.008328213529933784, "grad_norm": 0.005584775935858488, "learning_rate": 6.1e-05, "loss": 46.0, "step": 61 }, { "epoch": 0.008464741620588436, "grad_norm": 0.004974375944584608, "learning_rate": 6.2e-05, "loss": 46.0, "step": 62 }, { "epoch": 0.008601269711243089, "grad_norm": 0.005279638338834047, "learning_rate": 6.3e-05, "loss": 46.0, "step": 63 }, { "epoch": 0.008737797801897741, "grad_norm": 0.005645803641527891, "learning_rate": 6.400000000000001e-05, "loss": 46.0, "step": 64 }, { "epoch": 0.008874325892552392, "grad_norm": 0.0064697787165641785, "learning_rate": 6.500000000000001e-05, "loss": 46.0, "step": 65 }, { "epoch": 0.009010853983207045, "grad_norm": 0.005798387341201305, "learning_rate": 6.6e-05, "loss": 46.0, "step": 66 }, { "epoch": 0.009147382073861697, "grad_norm": 0.006103536579757929, "learning_rate": 6.7e-05, "loss": 46.0, "step": 67 }, { "epoch": 0.00928391016451635, "grad_norm": 0.007263305597007275, "learning_rate": 6.800000000000001e-05, "loss": 46.0, "step": 68 }, { "epoch": 0.009420438255171002, "grad_norm": 0.006164577789604664, "learning_rate": 6.9e-05, "loss": 46.0, "step": 69 }, { "epoch": 0.009556966345825653, "grad_norm": 0.006591958459466696, "learning_rate": 7e-05, "loss": 46.0, "step": 70 }, { "epoch": 0.009693494436480305, "grad_norm": 0.006713982205837965, "learning_rate": 7.1e-05, "loss": 46.0, "step": 71 }, { "epoch": 0.009830022527134958, "grad_norm": 0.006500311195850372, "learning_rate": 7.2e-05, "loss": 46.0, "step": 72 }, { "epoch": 0.00996655061778961, "grad_norm": 0.010498268529772758, "learning_rate": 7.3e-05, "loss": 46.0, "step": 73 }, { "epoch": 0.010103078708444263, "grad_norm": 0.006866553332656622, "learning_rate": 7.4e-05, "loss": 46.0, "step": 74 }, { "epoch": 0.010239606799098914, "grad_norm": 0.006439250893890858, "learning_rate": 7.500000000000001e-05, "loss": 46.0, "step": 75 }, { "epoch": 0.010376134889753566, "grad_norm": 0.006378244608640671, "learning_rate": 7.6e-05, "loss": 46.0, "step": 76 }, { "epoch": 0.010512662980408219, "grad_norm": 0.006500313989818096, "learning_rate": 7.7e-05, "loss": 46.0, "step": 77 }, { "epoch": 0.010649191071062871, "grad_norm": 0.006805569399148226, "learning_rate": 7.800000000000001e-05, "loss": 46.0, "step": 78 }, { "epoch": 0.010785719161717524, "grad_norm": 0.005890019237995148, "learning_rate": 7.900000000000001e-05, "loss": 46.0, "step": 79 }, { "epoch": 0.010922247252372175, "grad_norm": 0.007568598259240389, "learning_rate": 8e-05, "loss": 46.0, "step": 80 }, { "epoch": 0.011058775343026827, "grad_norm": 0.006286646705120802, "learning_rate": 8.1e-05, "loss": 46.0, "step": 81 }, { "epoch": 0.01119530343368148, "grad_norm": 0.007446358446031809, "learning_rate": 8.2e-05, "loss": 46.0, "step": 82 }, { "epoch": 0.011331831524336132, "grad_norm": 0.00604271562770009, "learning_rate": 8.3e-05, "loss": 46.0, "step": 83 }, { "epoch": 0.011468359614990785, "grad_norm": 0.007476830389350653, "learning_rate": 8.4e-05, "loss": 46.0, "step": 84 }, { "epoch": 0.011604887705645437, "grad_norm": 0.0071412562392652035, "learning_rate": 8.5e-05, "loss": 46.0, "step": 85 }, { "epoch": 0.011741415796300088, "grad_norm": 0.007598943542689085, "learning_rate": 8.6e-05, "loss": 46.0, "step": 86 }, { "epoch": 0.01187794388695474, "grad_norm": 0.007599305361509323, "learning_rate": 8.7e-05, "loss": 46.0, "step": 87 }, { "epoch": 0.012014471977609393, "grad_norm": 0.007049628999084234, "learning_rate": 8.800000000000001e-05, "loss": 46.0, "step": 88 }, { "epoch": 0.012151000068264046, "grad_norm": 0.007233199663460255, "learning_rate": 8.900000000000001e-05, "loss": 46.0, "step": 89 }, { "epoch": 0.012287528158918698, "grad_norm": 0.007324621547013521, "learning_rate": 9e-05, "loss": 46.0, "step": 90 }, { "epoch": 0.012424056249573349, "grad_norm": 0.007782080676406622, "learning_rate": 9.1e-05, "loss": 46.0, "step": 91 }, { "epoch": 0.012560584340228002, "grad_norm": 0.007446406874805689, "learning_rate": 9.200000000000001e-05, "loss": 46.0, "step": 92 }, { "epoch": 0.012697112430882654, "grad_norm": 0.00799572840332985, "learning_rate": 9.300000000000001e-05, "loss": 46.0, "step": 93 }, { "epoch": 0.012833640521537307, "grad_norm": 0.007782103028148413, "learning_rate": 9.4e-05, "loss": 46.0, "step": 94 }, { "epoch": 0.01297016861219196, "grad_norm": 0.00921639148145914, "learning_rate": 9.5e-05, "loss": 46.0, "step": 95 }, { "epoch": 0.01310669670284661, "grad_norm": 0.00836196169257164, "learning_rate": 9.6e-05, "loss": 46.0, "step": 96 }, { "epoch": 0.013243224793501263, "grad_norm": 0.009582675993442535, "learning_rate": 9.7e-05, "loss": 46.0, "step": 97 }, { "epoch": 0.013379752884155915, "grad_norm": 0.009887848980724812, "learning_rate": 9.8e-05, "loss": 46.0, "step": 98 }, { "epoch": 0.013516280974810568, "grad_norm": 0.01336693949997425, "learning_rate": 9.900000000000001e-05, "loss": 46.0, "step": 99 }, { "epoch": 0.01365280906546522, "grad_norm": 0.025635506957769394, "learning_rate": 0.0001, "loss": 46.0, "step": 100 }, { "epoch": 0.013789337156119871, "grad_norm": 0.009216404519975185, "learning_rate": 9.999999527192591e-05, "loss": 46.0, "step": 101 }, { "epoch": 0.013925865246774524, "grad_norm": 0.005340642761439085, "learning_rate": 9.999998108770457e-05, "loss": 46.0, "step": 102 }, { "epoch": 0.014062393337429176, "grad_norm": 0.0050964620895683765, "learning_rate": 9.999995744733863e-05, "loss": 46.0, "step": 103 }, { "epoch": 0.014198921428083829, "grad_norm": 0.005096455104649067, "learning_rate": 9.999992435083259e-05, "loss": 46.0, "step": 104 }, { "epoch": 0.014335449518738481, "grad_norm": 0.005065944045782089, "learning_rate": 9.999988179819268e-05, "loss": 46.0, "step": 105 }, { "epoch": 0.014471977609393132, "grad_norm": 0.005523856729269028, "learning_rate": 9.999982978942697e-05, "loss": 46.0, "step": 106 }, { "epoch": 0.014608505700047784, "grad_norm": 0.005218552425503731, "learning_rate": 9.999976832454529e-05, "loss": 46.0, "step": 107 }, { "epoch": 0.014745033790702437, "grad_norm": 0.00527958245947957, "learning_rate": 9.999969740355926e-05, "loss": 46.0, "step": 108 }, { "epoch": 0.01488156188135709, "grad_norm": 0.004852349869906902, "learning_rate": 9.999961702648229e-05, "loss": 46.0, "step": 109 }, { "epoch": 0.015018089972011742, "grad_norm": 0.004943876527249813, "learning_rate": 9.999952719332959e-05, "loss": 46.0, "step": 110 }, { "epoch": 0.015154618062666393, "grad_norm": 0.005554288160055876, "learning_rate": 9.999942790411816e-05, "loss": 46.0, "step": 111 }, { "epoch": 0.015291146153321045, "grad_norm": 0.004821829032152891, "learning_rate": 9.999931915886675e-05, "loss": 46.0, "step": 112 }, { "epoch": 0.015427674243975698, "grad_norm": 0.005737371277064085, "learning_rate": 9.999920095759594e-05, "loss": 46.0, "step": 113 }, { "epoch": 0.01556420233463035, "grad_norm": 0.005706910975277424, "learning_rate": 9.999907330032809e-05, "loss": 46.0, "step": 114 }, { "epoch": 0.015700730425285, "grad_norm": 0.005920883733779192, "learning_rate": 9.999893618708734e-05, "loss": 46.0, "step": 115 }, { "epoch": 0.015837258515939655, "grad_norm": 0.006409116089344025, "learning_rate": 9.999878961789962e-05, "loss": 46.0, "step": 116 }, { "epoch": 0.015973786606594306, "grad_norm": 0.005768393166363239, "learning_rate": 9.999863359279264e-05, "loss": 46.0, "step": 117 }, { "epoch": 0.01611031469724896, "grad_norm": 0.006988877430558205, "learning_rate": 9.999846811179592e-05, "loss": 46.0, "step": 118 }, { "epoch": 0.01624684278790361, "grad_norm": 0.006043075118213892, "learning_rate": 9.999829317494075e-05, "loss": 46.0, "step": 119 }, { "epoch": 0.016383370878558262, "grad_norm": 0.006500791292637587, "learning_rate": 9.999810878226022e-05, "loss": 46.0, "step": 120 }, { "epoch": 0.016519898969212916, "grad_norm": 0.006592370104044676, "learning_rate": 9.999791493378921e-05, "loss": 46.0, "step": 121 }, { "epoch": 0.016656427059867567, "grad_norm": 0.006134727504104376, "learning_rate": 9.999771162956436e-05, "loss": 46.0, "step": 122 }, { "epoch": 0.01679295515052222, "grad_norm": 0.006105243694037199, "learning_rate": 9.999749886962413e-05, "loss": 46.0, "step": 123 }, { "epoch": 0.016929483241176872, "grad_norm": 0.006196007132530212, "learning_rate": 9.999727665400875e-05, "loss": 46.0, "step": 124 }, { "epoch": 0.017066011331831523, "grad_norm": 0.006256686523556709, "learning_rate": 9.999704498276029e-05, "loss": 46.0, "step": 125 }, { "epoch": 0.017202539422486177, "grad_norm": 0.006929056718945503, "learning_rate": 9.99968038559225e-05, "loss": 46.0, "step": 126 }, { "epoch": 0.017339067513140828, "grad_norm": 0.006196240894496441, "learning_rate": 9.999655327354102e-05, "loss": 46.0, "step": 127 }, { "epoch": 0.017475595603795482, "grad_norm": 0.006714789662510157, "learning_rate": 9.999629323566323e-05, "loss": 46.0, "step": 128 }, { "epoch": 0.017612123694450133, "grad_norm": 0.006685623899102211, "learning_rate": 9.999602374233832e-05, "loss": 46.0, "step": 129 }, { "epoch": 0.017748651785104784, "grad_norm": 0.0064108301885426044, "learning_rate": 9.999574479361724e-05, "loss": 46.0, "step": 130 }, { "epoch": 0.017885179875759438, "grad_norm": 0.00650253938511014, "learning_rate": 9.999545638955276e-05, "loss": 46.0, "step": 131 }, { "epoch": 0.01802170796641409, "grad_norm": 0.0060740187764167786, "learning_rate": 9.99951585301994e-05, "loss": 46.0, "step": 132 }, { "epoch": 0.018158236057068743, "grad_norm": 0.0071415649726986885, "learning_rate": 9.999485121561354e-05, "loss": 46.0, "step": 133 }, { "epoch": 0.018294764147723394, "grad_norm": 0.006379471626132727, "learning_rate": 9.999453444585326e-05, "loss": 46.0, "step": 134 }, { "epoch": 0.018431292238378045, "grad_norm": 0.006318703293800354, "learning_rate": 9.999420822097848e-05, "loss": 46.0, "step": 135 }, { "epoch": 0.0185678203290327, "grad_norm": 0.006868820637464523, "learning_rate": 9.99938725410509e-05, "loss": 46.0, "step": 136 }, { "epoch": 0.01870434841968735, "grad_norm": 0.007661610376089811, "learning_rate": 9.999352740613399e-05, "loss": 46.0, "step": 137 }, { "epoch": 0.018840876510342004, "grad_norm": 0.006655195727944374, "learning_rate": 9.999317281629304e-05, "loss": 46.0, "step": 138 }, { "epoch": 0.018977404600996655, "grad_norm": 0.007112228311598301, "learning_rate": 9.999280877159512e-05, "loss": 46.0, "step": 139 }, { "epoch": 0.019113932691651306, "grad_norm": 0.006929009687155485, "learning_rate": 9.999243527210905e-05, "loss": 46.0, "step": 140 }, { "epoch": 0.01925046078230596, "grad_norm": 0.006534802261739969, "learning_rate": 9.999205231790547e-05, "loss": 46.0, "step": 141 }, { "epoch": 0.01938698887296061, "grad_norm": 0.007388260681182146, "learning_rate": 9.999165990905683e-05, "loss": 46.0, "step": 142 }, { "epoch": 0.019523516963615265, "grad_norm": 0.008180912584066391, "learning_rate": 9.999125804563732e-05, "loss": 46.0, "step": 143 }, { "epoch": 0.019660045054269916, "grad_norm": 0.007751926779747009, "learning_rate": 9.999084672772297e-05, "loss": 46.0, "step": 144 }, { "epoch": 0.019796573144924567, "grad_norm": 0.007935077883303165, "learning_rate": 9.999042595539155e-05, "loss": 46.0, "step": 145 }, { "epoch": 0.01993310123557922, "grad_norm": 0.009522893466055393, "learning_rate": 9.998999572872261e-05, "loss": 46.0, "step": 146 }, { "epoch": 0.020069629326233872, "grad_norm": 0.010623243637382984, "learning_rate": 9.998955604779759e-05, "loss": 46.0, "step": 147 }, { "epoch": 0.020206157416888526, "grad_norm": 0.01245472114533186, "learning_rate": 9.998910691269955e-05, "loss": 46.0, "step": 148 }, { "epoch": 0.020342685507543177, "grad_norm": 0.017580725252628326, "learning_rate": 9.99886483235135e-05, "loss": 46.0, "step": 149 }, { "epoch": 0.020479213598197828, "grad_norm": 0.03349756821990013, "learning_rate": 9.998818028032617e-05, "loss": 46.0, "step": 150 }, { "epoch": 0.020615741688852482, "grad_norm": 0.008304811082780361, "learning_rate": 9.998770278322604e-05, "loss": 46.0, "step": 151 }, { "epoch": 0.020752269779507133, "grad_norm": 0.004884021822363138, "learning_rate": 9.998721583230345e-05, "loss": 46.0, "step": 152 }, { "epoch": 0.020888797870161787, "grad_norm": 0.005798705387860537, "learning_rate": 9.998671942765047e-05, "loss": 46.0, "step": 153 }, { "epoch": 0.021025325960816438, "grad_norm": 0.004822134971618652, "learning_rate": 9.998621356936098e-05, "loss": 46.0, "step": 154 }, { "epoch": 0.02116185405147109, "grad_norm": 0.0050971838645637035, "learning_rate": 9.998569825753065e-05, "loss": 46.0, "step": 155 }, { "epoch": 0.021298382142125743, "grad_norm": 0.005127623211592436, "learning_rate": 9.998517349225698e-05, "loss": 46.0, "step": 156 }, { "epoch": 0.021434910232780394, "grad_norm": 0.004974557552486658, "learning_rate": 9.998463927363915e-05, "loss": 46.0, "step": 157 }, { "epoch": 0.021571438323435048, "grad_norm": 0.004913656506687403, "learning_rate": 9.998409560177824e-05, "loss": 46.0, "step": 158 }, { "epoch": 0.0217079664140897, "grad_norm": 0.005739128682762384, "learning_rate": 9.998354247677705e-05, "loss": 46.0, "step": 159 }, { "epoch": 0.02184449450474435, "grad_norm": 0.006135366391390562, "learning_rate": 9.998297989874019e-05, "loss": 46.0, "step": 160 }, { "epoch": 0.021981022595399004, "grad_norm": 0.0056165060959756374, "learning_rate": 9.998240786777407e-05, "loss": 46.0, "step": 161 }, { "epoch": 0.022117550686053655, "grad_norm": 0.0049142674542963505, "learning_rate": 9.998182638398685e-05, "loss": 46.0, "step": 162 }, { "epoch": 0.02225407877670831, "grad_norm": 0.004945337772369385, "learning_rate": 9.998123544748852e-05, "loss": 46.0, "step": 163 }, { "epoch": 0.02239060686736296, "grad_norm": 0.006471709348261356, "learning_rate": 9.998063505839083e-05, "loss": 46.0, "step": 164 }, { "epoch": 0.02252713495801761, "grad_norm": 0.006015671882778406, "learning_rate": 9.998002521680734e-05, "loss": 46.0, "step": 165 }, { "epoch": 0.022663663048672265, "grad_norm": 0.0063506849110126495, "learning_rate": 9.997940592285338e-05, "loss": 46.0, "step": 166 }, { "epoch": 0.022800191139326915, "grad_norm": 0.00549672357738018, "learning_rate": 9.997877717664607e-05, "loss": 46.0, "step": 167 }, { "epoch": 0.02293671922998157, "grad_norm": 0.005861447658389807, "learning_rate": 9.997813897830433e-05, "loss": 46.0, "step": 168 }, { "epoch": 0.02307324732063622, "grad_norm": 0.005863008089363575, "learning_rate": 9.997749132794882e-05, "loss": 46.0, "step": 169 }, { "epoch": 0.023209775411290875, "grad_norm": 0.0062310416251420975, "learning_rate": 9.997683422570207e-05, "loss": 46.0, "step": 170 }, { "epoch": 0.023346303501945526, "grad_norm": 0.006629048381000757, "learning_rate": 9.997616767168836e-05, "loss": 46.0, "step": 171 }, { "epoch": 0.023482831592600176, "grad_norm": 0.00669481186196208, "learning_rate": 9.997549166603371e-05, "loss": 46.0, "step": 172 }, { "epoch": 0.02361935968325483, "grad_norm": 0.0066991448402404785, "learning_rate": 9.997480620886599e-05, "loss": 46.0, "step": 173 }, { "epoch": 0.02375588777390948, "grad_norm": 0.006779018323868513, "learning_rate": 9.997411130031482e-05, "loss": 46.0, "step": 174 }, { "epoch": 0.023892415864564136, "grad_norm": 0.005892460234463215, "learning_rate": 9.997340694051164e-05, "loss": 46.0, "step": 175 }, { "epoch": 0.024028943955218787, "grad_norm": 0.006875636056065559, "learning_rate": 9.997269312958965e-05, "loss": 46.0, "step": 176 }, { "epoch": 0.024165472045873437, "grad_norm": 0.006511778105050325, "learning_rate": 9.997196986768387e-05, "loss": 46.0, "step": 177 }, { "epoch": 0.02430200013652809, "grad_norm": 0.006331036798655987, "learning_rate": 9.997123715493106e-05, "loss": 46.0, "step": 178 }, { "epoch": 0.024438528227182742, "grad_norm": 0.007284753955900669, "learning_rate": 9.99704949914698e-05, "loss": 46.0, "step": 179 }, { "epoch": 0.024575056317837397, "grad_norm": 0.006993473507463932, "learning_rate": 9.996974337744046e-05, "loss": 46.0, "step": 180 }, { "epoch": 0.024711584408492047, "grad_norm": 0.007088113576173782, "learning_rate": 9.996898231298519e-05, "loss": 46.0, "step": 181 }, { "epoch": 0.024848112499146698, "grad_norm": 0.0066872392781078815, "learning_rate": 9.996821179824789e-05, "loss": 46.0, "step": 182 }, { "epoch": 0.024984640589801353, "grad_norm": 0.006381938699632883, "learning_rate": 9.996743183337432e-05, "loss": 46.0, "step": 183 }, { "epoch": 0.025121168680456003, "grad_norm": 0.007271216716617346, "learning_rate": 9.996664241851197e-05, "loss": 46.0, "step": 184 }, { "epoch": 0.025257696771110658, "grad_norm": 0.006416036281734705, "learning_rate": 9.996584355381016e-05, "loss": 46.0, "step": 185 }, { "epoch": 0.02539422486176531, "grad_norm": 0.006877813022583723, "learning_rate": 9.996503523941994e-05, "loss": 46.0, "step": 186 }, { "epoch": 0.02553075295241996, "grad_norm": 0.007740365341305733, "learning_rate": 9.996421747549419e-05, "loss": 46.0, "step": 187 }, { "epoch": 0.025667281043074613, "grad_norm": 0.007230641320347786, "learning_rate": 9.996339026218759e-05, "loss": 46.0, "step": 188 }, { "epoch": 0.025803809133729264, "grad_norm": 0.00781883206218481, "learning_rate": 9.996255359965656e-05, "loss": 46.0, "step": 189 }, { "epoch": 0.02594033722438392, "grad_norm": 0.006809736602008343, "learning_rate": 9.996170748805935e-05, "loss": 46.0, "step": 190 }, { "epoch": 0.02607686531503857, "grad_norm": 0.007462185341864824, "learning_rate": 9.996085192755596e-05, "loss": 46.0, "step": 191 }, { "epoch": 0.02621339340569322, "grad_norm": 0.008371432311832905, "learning_rate": 9.995998691830821e-05, "loss": 46.0, "step": 192 }, { "epoch": 0.026349921496347874, "grad_norm": 0.007817413657903671, "learning_rate": 9.995911246047971e-05, "loss": 46.0, "step": 193 }, { "epoch": 0.026486449587002525, "grad_norm": 0.008794574998319149, "learning_rate": 9.995822855423579e-05, "loss": 46.0, "step": 194 }, { "epoch": 0.02662297767765718, "grad_norm": 0.009037042036652565, "learning_rate": 9.995733519974366e-05, "loss": 46.0, "step": 195 }, { "epoch": 0.02675950576831183, "grad_norm": 0.009956594556570053, "learning_rate": 9.995643239717227e-05, "loss": 46.0, "step": 196 }, { "epoch": 0.02689603385896648, "grad_norm": 0.011966821737587452, "learning_rate": 9.995552014669235e-05, "loss": 46.0, "step": 197 }, { "epoch": 0.027032561949621135, "grad_norm": 0.012152734212577343, "learning_rate": 9.995459844847643e-05, "loss": 46.0, "step": 198 }, { "epoch": 0.027169090040275786, "grad_norm": 0.01459161750972271, "learning_rate": 9.995366730269881e-05, "loss": 46.0, "step": 199 }, { "epoch": 0.02730561813093044, "grad_norm": 0.03182811290025711, "learning_rate": 9.995272670953561e-05, "loss": 46.0, "step": 200 }, { "epoch": 0.02744214622158509, "grad_norm": 0.007998433895409107, "learning_rate": 9.995177666916472e-05, "loss": 46.0, "step": 201 }, { "epoch": 0.027578674312239742, "grad_norm": 0.005067504942417145, "learning_rate": 9.99508171817658e-05, "loss": 46.0, "step": 202 }, { "epoch": 0.027715202402894396, "grad_norm": 0.005280999932438135, "learning_rate": 9.994984824752032e-05, "loss": 46.0, "step": 203 }, { "epoch": 0.027851730493549047, "grad_norm": 0.004853568505495787, "learning_rate": 9.994886986661153e-05, "loss": 46.0, "step": 204 }, { "epoch": 0.0279882585842037, "grad_norm": 0.005801225081086159, "learning_rate": 9.994788203922447e-05, "loss": 46.0, "step": 205 }, { "epoch": 0.028124786674858352, "grad_norm": 0.005099669564515352, "learning_rate": 9.994688476554592e-05, "loss": 46.0, "step": 206 }, { "epoch": 0.028261314765513003, "grad_norm": 0.005158513318747282, "learning_rate": 9.994587804576453e-05, "loss": 46.0, "step": 207 }, { "epoch": 0.028397842856167657, "grad_norm": 0.005342944525182247, "learning_rate": 9.994486188007071e-05, "loss": 46.0, "step": 208 }, { "epoch": 0.028534370946822308, "grad_norm": 0.006135035306215286, "learning_rate": 9.994383626865658e-05, "loss": 46.0, "step": 209 }, { "epoch": 0.028670899037476962, "grad_norm": 0.006725645624101162, "learning_rate": 9.994280121171615e-05, "loss": 46.0, "step": 210 }, { "epoch": 0.028807427128131613, "grad_norm": 0.005914472043514252, "learning_rate": 9.994175670944517e-05, "loss": 46.0, "step": 211 }, { "epoch": 0.028943955218786264, "grad_norm": 0.005872462410479784, "learning_rate": 9.994070276204116e-05, "loss": 46.0, "step": 212 }, { "epoch": 0.029080483309440918, "grad_norm": 0.005198659375309944, "learning_rate": 9.993963936970346e-05, "loss": 46.0, "step": 213 }, { "epoch": 0.02921701140009557, "grad_norm": 0.006306622643023729, "learning_rate": 9.993856653263319e-05, "loss": 46.0, "step": 214 }, { "epoch": 0.029353539490750223, "grad_norm": 0.006014724727720022, "learning_rate": 9.993748425103322e-05, "loss": 46.0, "step": 215 }, { "epoch": 0.029490067581404874, "grad_norm": 0.0065292054787278175, "learning_rate": 9.993639252510824e-05, "loss": 46.0, "step": 216 }, { "epoch": 0.029626595672059525, "grad_norm": 0.006609591655433178, "learning_rate": 9.993529135506476e-05, "loss": 46.0, "step": 217 }, { "epoch": 0.02976312376271418, "grad_norm": 0.006543149705976248, "learning_rate": 9.993418074111101e-05, "loss": 46.0, "step": 218 }, { "epoch": 0.02989965185336883, "grad_norm": 0.005964506883174181, "learning_rate": 9.9933060683457e-05, "loss": 46.0, "step": 219 }, { "epoch": 0.030036179944023484, "grad_norm": 0.006460642442107201, "learning_rate": 9.993193118231462e-05, "loss": 46.0, "step": 220 }, { "epoch": 0.030172708034678135, "grad_norm": 0.005484839901328087, "learning_rate": 9.993079223789744e-05, "loss": 46.0, "step": 221 }, { "epoch": 0.030309236125332786, "grad_norm": 0.007502452004700899, "learning_rate": 9.992964385042088e-05, "loss": 46.0, "step": 222 }, { "epoch": 0.03044576421598744, "grad_norm": 0.006395469885319471, "learning_rate": 9.992848602010212e-05, "loss": 46.0, "step": 223 }, { "epoch": 0.03058229230664209, "grad_norm": 0.006767972372472286, "learning_rate": 9.992731874716013e-05, "loss": 46.0, "step": 224 }, { "epoch": 0.030718820397296745, "grad_norm": 0.006445927079766989, "learning_rate": 9.992614203181568e-05, "loss": 46.0, "step": 225 }, { "epoch": 0.030855348487951396, "grad_norm": 0.00642132293432951, "learning_rate": 9.992495587429129e-05, "loss": 46.0, "step": 226 }, { "epoch": 0.030991876578606047, "grad_norm": 0.006829009857028723, "learning_rate": 9.992376027481131e-05, "loss": 46.0, "step": 227 }, { "epoch": 0.0311284046692607, "grad_norm": 0.0070356884971261024, "learning_rate": 9.992255523360186e-05, "loss": 46.0, "step": 228 }, { "epoch": 0.031264932759915355, "grad_norm": 0.006753654219210148, "learning_rate": 9.992134075089084e-05, "loss": 46.0, "step": 229 }, { "epoch": 0.03140146085057, "grad_norm": 0.0069990940392017365, "learning_rate": 9.992011682690791e-05, "loss": 46.0, "step": 230 }, { "epoch": 0.03153798894122466, "grad_norm": 0.006793106906116009, "learning_rate": 9.991888346188456e-05, "loss": 46.0, "step": 231 }, { "epoch": 0.03167451703187931, "grad_norm": 0.0064877672120928764, "learning_rate": 9.991764065605406e-05, "loss": 46.0, "step": 232 }, { "epoch": 0.03181104512253396, "grad_norm": 0.006404773332178593, "learning_rate": 9.991638840965143e-05, "loss": 46.0, "step": 233 }, { "epoch": 0.03194757321318861, "grad_norm": 0.006637753453105688, "learning_rate": 9.991512672291352e-05, "loss": 46.0, "step": 234 }, { "epoch": 0.03208410130384327, "grad_norm": 0.007091572508215904, "learning_rate": 9.991385559607892e-05, "loss": 46.0, "step": 235 }, { "epoch": 0.03222062939449792, "grad_norm": 0.007286660838872194, "learning_rate": 9.991257502938804e-05, "loss": 46.0, "step": 236 }, { "epoch": 0.03235715748515257, "grad_norm": 0.007123738061636686, "learning_rate": 9.991128502308308e-05, "loss": 46.0, "step": 237 }, { "epoch": 0.03249368557580722, "grad_norm": 0.00724747683852911, "learning_rate": 9.990998557740801e-05, "loss": 46.0, "step": 238 }, { "epoch": 0.03263021366646188, "grad_norm": 0.006686553359031677, "learning_rate": 9.990867669260854e-05, "loss": 46.0, "step": 239 }, { "epoch": 0.032766741757116524, "grad_norm": 0.007607371546328068, "learning_rate": 9.990735836893226e-05, "loss": 46.0, "step": 240 }, { "epoch": 0.03290326984777118, "grad_norm": 0.006825089920312166, "learning_rate": 9.990603060662848e-05, "loss": 46.0, "step": 241 }, { "epoch": 0.03303979793842583, "grad_norm": 0.0072606876492500305, "learning_rate": 9.99046934059483e-05, "loss": 46.0, "step": 242 }, { "epoch": 0.03317632602908048, "grad_norm": 0.007662308868020773, "learning_rate": 9.990334676714463e-05, "loss": 46.0, "step": 243 }, { "epoch": 0.033312854119735134, "grad_norm": 0.008321871981024742, "learning_rate": 9.990199069047214e-05, "loss": 46.0, "step": 244 }, { "epoch": 0.03344938221038979, "grad_norm": 0.009909854270517826, "learning_rate": 9.99006251761873e-05, "loss": 46.0, "step": 245 }, { "epoch": 0.03358591030104444, "grad_norm": 0.009818311780691147, "learning_rate": 9.989925022454836e-05, "loss": 46.0, "step": 246 }, { "epoch": 0.03372243839169909, "grad_norm": 0.011731724254786968, "learning_rate": 9.989786583581535e-05, "loss": 46.0, "step": 247 }, { "epoch": 0.033858966482353745, "grad_norm": 0.01246633194386959, "learning_rate": 9.989647201025009e-05, "loss": 46.0, "step": 248 }, { "epoch": 0.0339954945730084, "grad_norm": 0.013519180938601494, "learning_rate": 9.98950687481162e-05, "loss": 46.0, "step": 249 }, { "epoch": 0.034132022663663046, "grad_norm": 0.02726808749139309, "learning_rate": 9.989365604967905e-05, "loss": 46.0, "step": 250 }, { "epoch": 0.0342685507543177, "grad_norm": 0.007643743418157101, "learning_rate": 9.989223391520582e-05, "loss": 46.0, "step": 251 }, { "epoch": 0.034405078844972355, "grad_norm": 0.005342512857168913, "learning_rate": 9.989080234496547e-05, "loss": 46.0, "step": 252 }, { "epoch": 0.034541606935627, "grad_norm": 0.006152589805424213, "learning_rate": 9.988936133922875e-05, "loss": 46.0, "step": 253 }, { "epoch": 0.034678135026281656, "grad_norm": 0.005725871305912733, "learning_rate": 9.988791089826816e-05, "loss": 46.0, "step": 254 }, { "epoch": 0.03481466311693631, "grad_norm": 0.004920752719044685, "learning_rate": 9.988645102235805e-05, "loss": 46.0, "step": 255 }, { "epoch": 0.034951191207590965, "grad_norm": 0.005686153192073107, "learning_rate": 9.988498171177449e-05, "loss": 46.0, "step": 256 }, { "epoch": 0.03508771929824561, "grad_norm": 0.005416962318122387, "learning_rate": 9.988350296679536e-05, "loss": 46.0, "step": 257 }, { "epoch": 0.035224247388900266, "grad_norm": 0.005919235292822123, "learning_rate": 9.988201478770034e-05, "loss": 46.0, "step": 258 }, { "epoch": 0.03536077547955492, "grad_norm": 0.005957192275673151, "learning_rate": 9.988051717477088e-05, "loss": 46.0, "step": 259 }, { "epoch": 0.03549730357020957, "grad_norm": 0.005302075762301683, "learning_rate": 9.987901012829018e-05, "loss": 46.0, "step": 260 }, { "epoch": 0.03563383166086422, "grad_norm": 0.005039900075644255, "learning_rate": 9.98774936485433e-05, "loss": 46.0, "step": 261 }, { "epoch": 0.035770359751518876, "grad_norm": 0.005356596317142248, "learning_rate": 9.987596773581702e-05, "loss": 46.0, "step": 262 }, { "epoch": 0.035906887842173524, "grad_norm": 0.006136135198175907, "learning_rate": 9.987443239039992e-05, "loss": 46.0, "step": 263 }, { "epoch": 0.03604341593282818, "grad_norm": 0.005955490283668041, "learning_rate": 9.987288761258237e-05, "loss": 46.0, "step": 264 }, { "epoch": 0.03617994402348283, "grad_norm": 0.006400538142770529, "learning_rate": 9.987133340265654e-05, "loss": 46.0, "step": 265 }, { "epoch": 0.03631647211413749, "grad_norm": 0.006042613182216883, "learning_rate": 9.986976976091636e-05, "loss": 46.0, "step": 266 }, { "epoch": 0.036453000204792134, "grad_norm": 0.0070048486813902855, "learning_rate": 9.986819668765755e-05, "loss": 46.0, "step": 267 }, { "epoch": 0.03658952829544679, "grad_norm": 0.006953079253435135, "learning_rate": 9.986661418317759e-05, "loss": 46.0, "step": 268 }, { "epoch": 0.03672605638610144, "grad_norm": 0.0060361698269844055, "learning_rate": 9.98650222477758e-05, "loss": 46.0, "step": 269 }, { "epoch": 0.03686258447675609, "grad_norm": 0.005728107877075672, "learning_rate": 9.986342088175324e-05, "loss": 46.0, "step": 270 }, { "epoch": 0.036999112567410744, "grad_norm": 0.006946246605366468, "learning_rate": 9.986181008541277e-05, "loss": 46.0, "step": 271 }, { "epoch": 0.0371356406580654, "grad_norm": 0.00682005425915122, "learning_rate": 9.986018985905901e-05, "loss": 46.0, "step": 272 }, { "epoch": 0.03727216874872005, "grad_norm": 0.007106370758265257, "learning_rate": 9.98585602029984e-05, "loss": 46.0, "step": 273 }, { "epoch": 0.0374086968393747, "grad_norm": 0.007159827277064323, "learning_rate": 9.985692111753915e-05, "loss": 46.0, "step": 274 }, { "epoch": 0.037545224930029354, "grad_norm": 0.008228704333305359, "learning_rate": 9.985527260299123e-05, "loss": 46.0, "step": 275 }, { "epoch": 0.03768175302068401, "grad_norm": 0.008150205947458744, "learning_rate": 9.985361465966643e-05, "loss": 46.0, "step": 276 }, { "epoch": 0.037818281111338656, "grad_norm": 0.006951555609703064, "learning_rate": 9.98519472878783e-05, "loss": 46.0, "step": 277 }, { "epoch": 0.03795480920199331, "grad_norm": 0.0092319929972291, "learning_rate": 9.985027048794217e-05, "loss": 46.0, "step": 278 }, { "epoch": 0.038091337292647964, "grad_norm": 0.007552103605121374, "learning_rate": 9.984858426017518e-05, "loss": 46.0, "step": 279 }, { "epoch": 0.03822786538330261, "grad_norm": 0.007326656021177769, "learning_rate": 9.984688860489619e-05, "loss": 46.0, "step": 280 }, { "epoch": 0.038364393473957266, "grad_norm": 0.009034374728798866, "learning_rate": 9.984518352242594e-05, "loss": 46.0, "step": 281 }, { "epoch": 0.03850092156461192, "grad_norm": 0.008336121216416359, "learning_rate": 9.984346901308687e-05, "loss": 46.0, "step": 282 }, { "epoch": 0.038637449655266574, "grad_norm": 0.008318337611854076, "learning_rate": 9.984174507720326e-05, "loss": 46.0, "step": 283 }, { "epoch": 0.03877397774592122, "grad_norm": 0.008486776612699032, "learning_rate": 9.984001171510112e-05, "loss": 46.0, "step": 284 }, { "epoch": 0.038910505836575876, "grad_norm": 0.007455648388713598, "learning_rate": 9.983826892710827e-05, "loss": 46.0, "step": 285 }, { "epoch": 0.03904703392723053, "grad_norm": 0.008934040553867817, "learning_rate": 9.983651671355432e-05, "loss": 46.0, "step": 286 }, { "epoch": 0.03918356201788518, "grad_norm": 0.01077974122017622, "learning_rate": 9.983475507477065e-05, "loss": 46.0, "step": 287 }, { "epoch": 0.03932009010853983, "grad_norm": 0.008421733975410461, "learning_rate": 9.983298401109043e-05, "loss": 46.0, "step": 288 }, { "epoch": 0.039456618199194486, "grad_norm": 0.009629035368561745, "learning_rate": 9.98312035228486e-05, "loss": 46.0, "step": 289 }, { "epoch": 0.039593146289849133, "grad_norm": 0.010118825361132622, "learning_rate": 9.982941361038189e-05, "loss": 46.0, "step": 290 }, { "epoch": 0.03972967438050379, "grad_norm": 0.00947739090770483, "learning_rate": 9.982761427402885e-05, "loss": 46.0, "step": 291 }, { "epoch": 0.03986620247115844, "grad_norm": 0.007952556014060974, "learning_rate": 9.982580551412972e-05, "loss": 46.0, "step": 292 }, { "epoch": 0.040002730561813096, "grad_norm": 0.00814715214073658, "learning_rate": 9.982398733102662e-05, "loss": 46.0, "step": 293 }, { "epoch": 0.040139258652467744, "grad_norm": 0.008617954328656197, "learning_rate": 9.98221597250634e-05, "loss": 46.0, "step": 294 }, { "epoch": 0.0402757867431224, "grad_norm": 0.008763943798840046, "learning_rate": 9.982032269658568e-05, "loss": 46.0, "step": 295 }, { "epoch": 0.04041231483377705, "grad_norm": 0.010324403643608093, "learning_rate": 9.981847624594092e-05, "loss": 46.0, "step": 296 }, { "epoch": 0.0405488429244317, "grad_norm": 0.01507294736802578, "learning_rate": 9.98166203734783e-05, "loss": 46.0, "step": 297 }, { "epoch": 0.040685371015086354, "grad_norm": 0.013663548044860363, "learning_rate": 9.98147550795488e-05, "loss": 46.0, "step": 298 }, { "epoch": 0.04082189910574101, "grad_norm": 0.019793622195720673, "learning_rate": 9.981288036450523e-05, "loss": 46.0, "step": 299 }, { "epoch": 0.040958427196395655, "grad_norm": 0.04147307574748993, "learning_rate": 9.981099622870211e-05, "loss": 46.0, "step": 300 }, { "epoch": 0.04109495528705031, "grad_norm": 0.005967415869235992, "learning_rate": 9.980910267249577e-05, "loss": 46.0, "step": 301 }, { "epoch": 0.041231483377704964, "grad_norm": 0.006596552673727274, "learning_rate": 9.980719969624435e-05, "loss": 46.0, "step": 302 }, { "epoch": 0.04136801146835962, "grad_norm": 0.006816134322434664, "learning_rate": 9.980528730030773e-05, "loss": 46.0, "step": 303 }, { "epoch": 0.041504539559014265, "grad_norm": 0.006091169081628323, "learning_rate": 9.980336548504759e-05, "loss": 46.0, "step": 304 }, { "epoch": 0.04164106764966892, "grad_norm": 0.006342379841953516, "learning_rate": 9.980143425082737e-05, "loss": 46.0, "step": 305 }, { "epoch": 0.041777595740323574, "grad_norm": 0.006914492230862379, "learning_rate": 9.979949359801236e-05, "loss": 46.0, "step": 306 }, { "epoch": 0.04191412383097822, "grad_norm": 0.006762057542800903, "learning_rate": 9.979754352696955e-05, "loss": 46.0, "step": 307 }, { "epoch": 0.042050651921632876, "grad_norm": 0.00724062928929925, "learning_rate": 9.979558403806772e-05, "loss": 46.0, "step": 308 }, { "epoch": 0.04218718001228753, "grad_norm": 0.006303674075752497, "learning_rate": 9.979361513167751e-05, "loss": 46.0, "step": 309 }, { "epoch": 0.04232370810294218, "grad_norm": 0.0076789576560258865, "learning_rate": 9.979163680817124e-05, "loss": 46.0, "step": 310 }, { "epoch": 0.04246023619359683, "grad_norm": 0.00704782921820879, "learning_rate": 9.978964906792307e-05, "loss": 46.0, "step": 311 }, { "epoch": 0.042596764284251486, "grad_norm": 0.006469301413744688, "learning_rate": 9.978765191130894e-05, "loss": 46.0, "step": 312 }, { "epoch": 0.04273329237490614, "grad_norm": 0.010129045695066452, "learning_rate": 9.978564533870654e-05, "loss": 46.0, "step": 313 }, { "epoch": 0.04286982046556079, "grad_norm": 0.008771914057433605, "learning_rate": 9.978362935049537e-05, "loss": 46.0, "step": 314 }, { "epoch": 0.04300634855621544, "grad_norm": 0.006772540509700775, "learning_rate": 9.978160394705668e-05, "loss": 46.0, "step": 315 }, { "epoch": 0.043142876646870096, "grad_norm": 0.007306412793695927, "learning_rate": 9.977956912877356e-05, "loss": 46.0, "step": 316 }, { "epoch": 0.04327940473752474, "grad_norm": 0.007753013167530298, "learning_rate": 9.977752489603082e-05, "loss": 46.0, "step": 317 }, { "epoch": 0.0434159328281794, "grad_norm": 0.011776250787079334, "learning_rate": 9.977547124921505e-05, "loss": 46.0, "step": 318 }, { "epoch": 0.04355246091883405, "grad_norm": 0.007803077809512615, "learning_rate": 9.977340818871466e-05, "loss": 46.0, "step": 319 }, { "epoch": 0.0436889890094887, "grad_norm": 0.00976123008877039, "learning_rate": 9.977133571491983e-05, "loss": 46.0, "step": 320 }, { "epoch": 0.04382551710014335, "grad_norm": 0.010388693772256374, "learning_rate": 9.97692538282225e-05, "loss": 46.0, "step": 321 }, { "epoch": 0.04396204519079801, "grad_norm": 0.01206995826214552, "learning_rate": 9.976716252901643e-05, "loss": 46.0, "step": 322 }, { "epoch": 0.04409857328145266, "grad_norm": 0.012360908091068268, "learning_rate": 9.97650618176971e-05, "loss": 46.0, "step": 323 }, { "epoch": 0.04423510137210731, "grad_norm": 0.013144474476575851, "learning_rate": 9.976295169466178e-05, "loss": 46.0, "step": 324 }, { "epoch": 0.04437162946276196, "grad_norm": 0.01161892805248499, "learning_rate": 9.976083216030962e-05, "loss": 46.0, "step": 325 }, { "epoch": 0.04450815755341662, "grad_norm": 0.011692160740494728, "learning_rate": 9.97587032150414e-05, "loss": 46.0, "step": 326 }, { "epoch": 0.044644685644071265, "grad_norm": 0.00933381449431181, "learning_rate": 9.975656485925979e-05, "loss": 46.0, "step": 327 }, { "epoch": 0.04478121373472592, "grad_norm": 0.01361104380339384, "learning_rate": 9.97544170933692e-05, "loss": 46.0, "step": 328 }, { "epoch": 0.044917741825380574, "grad_norm": 0.011912395246326923, "learning_rate": 9.975225991777581e-05, "loss": 46.0, "step": 329 }, { "epoch": 0.04505426991603522, "grad_norm": 0.017330307513475418, "learning_rate": 9.97500933328876e-05, "loss": 46.0, "step": 330 }, { "epoch": 0.045190798006689875, "grad_norm": 0.01198787335306406, "learning_rate": 9.97479173391143e-05, "loss": 46.0, "step": 331 }, { "epoch": 0.04532732609734453, "grad_norm": 0.018621394410729408, "learning_rate": 9.974573193686747e-05, "loss": 46.0, "step": 332 }, { "epoch": 0.045463854187999184, "grad_norm": 0.014098040759563446, "learning_rate": 9.974353712656042e-05, "loss": 46.0, "step": 333 }, { "epoch": 0.04560038227865383, "grad_norm": 0.01677376590669155, "learning_rate": 9.974133290860821e-05, "loss": 46.0, "step": 334 }, { "epoch": 0.045736910369308485, "grad_norm": 0.01417413167655468, "learning_rate": 9.973911928342771e-05, "loss": 46.0, "step": 335 }, { "epoch": 0.04587343845996314, "grad_norm": 0.012560643255710602, "learning_rate": 9.97368962514376e-05, "loss": 46.0, "step": 336 }, { "epoch": 0.04600996655061779, "grad_norm": 0.01657690852880478, "learning_rate": 9.973466381305829e-05, "loss": 46.0, "step": 337 }, { "epoch": 0.04614649464127244, "grad_norm": 0.017862094566226006, "learning_rate": 9.973242196871199e-05, "loss": 46.0, "step": 338 }, { "epoch": 0.046283022731927095, "grad_norm": 0.01787407137453556, "learning_rate": 9.973017071882267e-05, "loss": 46.0, "step": 339 }, { "epoch": 0.04641955082258175, "grad_norm": 0.012912525795400143, "learning_rate": 9.97279100638161e-05, "loss": 46.0, "step": 340 }, { "epoch": 0.0465560789132364, "grad_norm": 0.013944827020168304, "learning_rate": 9.972564000411981e-05, "loss": 46.0, "step": 341 }, { "epoch": 0.04669260700389105, "grad_norm": 0.012106145732104778, "learning_rate": 9.972336054016316e-05, "loss": 46.0, "step": 342 }, { "epoch": 0.046829135094545705, "grad_norm": 0.011543608270585537, "learning_rate": 9.972107167237721e-05, "loss": 46.0, "step": 343 }, { "epoch": 0.04696566318520035, "grad_norm": 0.013418659567832947, "learning_rate": 9.971877340119485e-05, "loss": 46.0, "step": 344 }, { "epoch": 0.04710219127585501, "grad_norm": 0.01523605827242136, "learning_rate": 9.971646572705073e-05, "loss": 46.0, "step": 345 }, { "epoch": 0.04723871936650966, "grad_norm": 0.012491518631577492, "learning_rate": 9.971414865038128e-05, "loss": 46.0, "step": 346 }, { "epoch": 0.04737524745716431, "grad_norm": 0.017480602487921715, "learning_rate": 9.971182217162475e-05, "loss": 46.0, "step": 347 }, { "epoch": 0.04751177554781896, "grad_norm": 0.022275693714618683, "learning_rate": 9.970948629122108e-05, "loss": 46.0, "step": 348 }, { "epoch": 0.04764830363847362, "grad_norm": 0.021590255200862885, "learning_rate": 9.970714100961207e-05, "loss": 46.0, "step": 349 }, { "epoch": 0.04778483172912827, "grad_norm": 0.04023585096001625, "learning_rate": 9.970478632724125e-05, "loss": 46.0, "step": 350 }, { "epoch": 0.04792135981978292, "grad_norm": 0.015047573484480381, "learning_rate": 9.970242224455397e-05, "loss": 46.0, "step": 351 }, { "epoch": 0.04805788791043757, "grad_norm": 0.00992714986205101, "learning_rate": 9.97000487619973e-05, "loss": 46.0, "step": 352 }, { "epoch": 0.04819441600109223, "grad_norm": 0.011110669933259487, "learning_rate": 9.969766588002014e-05, "loss": 46.0, "step": 353 }, { "epoch": 0.048330944091746875, "grad_norm": 0.00723347719758749, "learning_rate": 9.969527359907314e-05, "loss": 46.0, "step": 354 }, { "epoch": 0.04846747218240153, "grad_norm": 0.008030584082007408, "learning_rate": 9.969287191960875e-05, "loss": 46.0, "step": 355 }, { "epoch": 0.04860400027305618, "grad_norm": 0.008942479267716408, "learning_rate": 9.969046084208115e-05, "loss": 46.0, "step": 356 }, { "epoch": 0.04874052836371083, "grad_norm": 0.010328180156648159, "learning_rate": 9.968804036694636e-05, "loss": 46.0, "step": 357 }, { "epoch": 0.048877056454365485, "grad_norm": 0.014530482701957226, "learning_rate": 9.968561049466214e-05, "loss": 46.0, "step": 358 }, { "epoch": 0.04901358454502014, "grad_norm": 0.013858549296855927, "learning_rate": 9.968317122568802e-05, "loss": 46.0, "step": 359 }, { "epoch": 0.04915011263567479, "grad_norm": 0.010014830157160759, "learning_rate": 9.968072256048536e-05, "loss": 46.0, "step": 360 }, { "epoch": 0.04928664072632944, "grad_norm": 0.012682556174695492, "learning_rate": 9.967826449951721e-05, "loss": 46.0, "step": 361 }, { "epoch": 0.049423168816984095, "grad_norm": 0.019593454897403717, "learning_rate": 9.967579704324847e-05, "loss": 46.0, "step": 362 }, { "epoch": 0.04955969690763875, "grad_norm": 0.017988504841923714, "learning_rate": 9.967332019214581e-05, "loss": 46.0, "step": 363 }, { "epoch": 0.049696224998293397, "grad_norm": 0.013040930964052677, "learning_rate": 9.967083394667762e-05, "loss": 46.0, "step": 364 }, { "epoch": 0.04983275308894805, "grad_norm": 0.02687775529921055, "learning_rate": 9.966833830731414e-05, "loss": 46.0, "step": 365 }, { "epoch": 0.049969281179602705, "grad_norm": 0.012713750824332237, "learning_rate": 9.966583327452733e-05, "loss": 46.0, "step": 366 }, { "epoch": 0.05010580927025735, "grad_norm": 0.01567133329808712, "learning_rate": 9.966331884879097e-05, "loss": 46.0, "step": 367 }, { "epoch": 0.05024233736091201, "grad_norm": 0.019353868439793587, "learning_rate": 9.966079503058057e-05, "loss": 46.0, "step": 368 }, { "epoch": 0.05037886545156666, "grad_norm": 0.0210272129625082, "learning_rate": 9.965826182037347e-05, "loss": 46.0, "step": 369 }, { "epoch": 0.050515393542221315, "grad_norm": 0.021272914484143257, "learning_rate": 9.965571921864874e-05, "loss": 46.0, "step": 370 }, { "epoch": 0.05065192163287596, "grad_norm": 0.026431970298290253, "learning_rate": 9.965316722588726e-05, "loss": 46.0, "step": 371 }, { "epoch": 0.05078844972353062, "grad_norm": 0.017381049692630768, "learning_rate": 9.965060584257164e-05, "loss": 46.0, "step": 372 }, { "epoch": 0.05092497781418527, "grad_norm": 0.025171156972646713, "learning_rate": 9.964803506918634e-05, "loss": 46.0, "step": 373 }, { "epoch": 0.05106150590483992, "grad_norm": 0.02153884805738926, "learning_rate": 9.964545490621751e-05, "loss": 46.0, "step": 374 }, { "epoch": 0.05119803399549457, "grad_norm": 0.015456629917025566, "learning_rate": 9.964286535415315e-05, "loss": 46.0, "step": 375 }, { "epoch": 0.05133456208614923, "grad_norm": 0.02152492292225361, "learning_rate": 9.964026641348298e-05, "loss": 46.0, "step": 376 }, { "epoch": 0.051471090176803874, "grad_norm": 0.021810637786984444, "learning_rate": 9.963765808469853e-05, "loss": 46.0, "step": 377 }, { "epoch": 0.05160761826745853, "grad_norm": 0.03107687458395958, "learning_rate": 9.96350403682931e-05, "loss": 46.0, "step": 378 }, { "epoch": 0.05174414635811318, "grad_norm": 0.02374485693871975, "learning_rate": 9.963241326476174e-05, "loss": 46.0, "step": 379 }, { "epoch": 0.05188067444876784, "grad_norm": 0.023210648447275162, "learning_rate": 9.962977677460132e-05, "loss": 46.0, "step": 380 }, { "epoch": 0.052017202539422484, "grad_norm": 0.02798812836408615, "learning_rate": 9.962713089831046e-05, "loss": 46.0, "step": 381 }, { "epoch": 0.05215373063007714, "grad_norm": 0.020860202610492706, "learning_rate": 9.962447563638953e-05, "loss": 46.0, "step": 382 }, { "epoch": 0.05229025872073179, "grad_norm": 0.022190634161233902, "learning_rate": 9.962181098934073e-05, "loss": 46.0, "step": 383 }, { "epoch": 0.05242678681138644, "grad_norm": 0.017248941585421562, "learning_rate": 9.961913695766801e-05, "loss": 46.0, "step": 384 }, { "epoch": 0.052563314902041094, "grad_norm": 0.027107106521725655, "learning_rate": 9.961645354187706e-05, "loss": 46.0, "step": 385 }, { "epoch": 0.05269984299269575, "grad_norm": 0.024366384372115135, "learning_rate": 9.961376074247537e-05, "loss": 46.0, "step": 386 }, { "epoch": 0.052836371083350396, "grad_norm": 0.02012712135910988, "learning_rate": 9.961105855997225e-05, "loss": 46.0, "step": 387 }, { "epoch": 0.05297289917400505, "grad_norm": 0.024378309026360512, "learning_rate": 9.960834699487873e-05, "loss": 46.0, "step": 388 }, { "epoch": 0.053109427264659705, "grad_norm": 0.021467244252562523, "learning_rate": 9.960562604770762e-05, "loss": 46.0, "step": 389 }, { "epoch": 0.05324595535531436, "grad_norm": 0.02663583494722843, "learning_rate": 9.96028957189735e-05, "loss": 46.0, "step": 390 }, { "epoch": 0.053382483445969006, "grad_norm": 0.02531626634299755, "learning_rate": 9.960015600919278e-05, "loss": 46.0, "step": 391 }, { "epoch": 0.05351901153662366, "grad_norm": 0.0320737324655056, "learning_rate": 9.959740691888357e-05, "loss": 46.0, "step": 392 }, { "epoch": 0.053655539627278315, "grad_norm": 0.02451186813414097, "learning_rate": 9.95946484485658e-05, "loss": 46.0, "step": 393 }, { "epoch": 0.05379206771793296, "grad_norm": 0.017833087593317032, "learning_rate": 9.959188059876115e-05, "loss": 46.0, "step": 394 }, { "epoch": 0.053928595808587616, "grad_norm": 0.019000815227627754, "learning_rate": 9.958910336999308e-05, "loss": 46.0, "step": 395 }, { "epoch": 0.05406512389924227, "grad_norm": 0.02422933466732502, "learning_rate": 9.958631676278685e-05, "loss": 46.0, "step": 396 }, { "epoch": 0.05420165198989692, "grad_norm": 0.031288012862205505, "learning_rate": 9.958352077766945e-05, "loss": 46.0, "step": 397 }, { "epoch": 0.05433818008055157, "grad_norm": 0.02622430957853794, "learning_rate": 9.958071541516967e-05, "loss": 46.0, "step": 398 }, { "epoch": 0.054474708171206226, "grad_norm": 0.02882174775004387, "learning_rate": 9.957790067581807e-05, "loss": 46.0, "step": 399 }, { "epoch": 0.05461123626186088, "grad_norm": 0.03686172515153885, "learning_rate": 9.9575076560147e-05, "loss": 46.0, "step": 400 }, { "epoch": 0.05474776435251553, "grad_norm": 0.02976585365831852, "learning_rate": 9.957224306869053e-05, "loss": 46.0, "step": 401 }, { "epoch": 0.05488429244317018, "grad_norm": 0.016752202063798904, "learning_rate": 9.956940020198456e-05, "loss": 46.0, "step": 402 }, { "epoch": 0.05502082053382484, "grad_norm": 0.02107394114136696, "learning_rate": 9.956654796056673e-05, "loss": 46.0, "step": 403 }, { "epoch": 0.055157348624479484, "grad_norm": 0.01509918924421072, "learning_rate": 9.956368634497648e-05, "loss": 46.0, "step": 404 }, { "epoch": 0.05529387671513414, "grad_norm": 0.014063271693885326, "learning_rate": 9.956081535575502e-05, "loss": 46.0, "step": 405 }, { "epoch": 0.05543040480578879, "grad_norm": 0.013264385983347893, "learning_rate": 9.955793499344525e-05, "loss": 46.0, "step": 406 }, { "epoch": 0.05556693289644345, "grad_norm": 0.019103355705738068, "learning_rate": 9.9555045258592e-05, "loss": 46.0, "step": 407 }, { "epoch": 0.055703460987098094, "grad_norm": 0.013714710250496864, "learning_rate": 9.955214615174174e-05, "loss": 46.0, "step": 408 }, { "epoch": 0.05583998907775275, "grad_norm": 0.015945089980959892, "learning_rate": 9.954923767344277e-05, "loss": 46.0, "step": 409 }, { "epoch": 0.0559765171684074, "grad_norm": 0.015214615501463413, "learning_rate": 9.954631982424513e-05, "loss": 46.0, "step": 410 }, { "epoch": 0.05611304525906205, "grad_norm": 0.021529672667384148, "learning_rate": 9.954339260470067e-05, "loss": 46.0, "step": 411 }, { "epoch": 0.056249573349716704, "grad_norm": 0.023824959993362427, "learning_rate": 9.954045601536299e-05, "loss": 46.0, "step": 412 }, { "epoch": 0.05638610144037136, "grad_norm": 0.026986010372638702, "learning_rate": 9.953751005678749e-05, "loss": 46.0, "step": 413 }, { "epoch": 0.056522629531026006, "grad_norm": 0.021499130874872208, "learning_rate": 9.953455472953126e-05, "loss": 46.0, "step": 414 }, { "epoch": 0.05665915762168066, "grad_norm": 0.030798906460404396, "learning_rate": 9.953159003415328e-05, "loss": 46.0, "step": 415 }, { "epoch": 0.056795685712335314, "grad_norm": 0.01969870924949646, "learning_rate": 9.952861597121421e-05, "loss": 46.0, "step": 416 }, { "epoch": 0.05693221380298997, "grad_norm": 0.024998739361763, "learning_rate": 9.952563254127654e-05, "loss": 46.0, "step": 417 }, { "epoch": 0.057068741893644616, "grad_norm": 0.02469976432621479, "learning_rate": 9.952263974490447e-05, "loss": 46.0, "step": 418 }, { "epoch": 0.05720526998429927, "grad_norm": 0.0176350437104702, "learning_rate": 9.951963758266403e-05, "loss": 46.0, "step": 419 }, { "epoch": 0.057341798074953924, "grad_norm": 0.02153296396136284, "learning_rate": 9.951662605512297e-05, "loss": 46.0, "step": 420 }, { "epoch": 0.05747832616560857, "grad_norm": 0.027336645871400833, "learning_rate": 9.951360516285088e-05, "loss": 46.0, "step": 421 }, { "epoch": 0.057614854256263226, "grad_norm": 0.02458326518535614, "learning_rate": 9.951057490641906e-05, "loss": 46.0, "step": 422 }, { "epoch": 0.05775138234691788, "grad_norm": 0.021053364500403404, "learning_rate": 9.950753528640061e-05, "loss": 46.0, "step": 423 }, { "epoch": 0.05788791043757253, "grad_norm": 0.027241142466664314, "learning_rate": 9.950448630337036e-05, "loss": 46.0, "step": 424 }, { "epoch": 0.05802443852822718, "grad_norm": 0.025739336386322975, "learning_rate": 9.9501427957905e-05, "loss": 46.0, "step": 425 }, { "epoch": 0.058160966618881836, "grad_norm": 0.0272113885730505, "learning_rate": 9.949836025058288e-05, "loss": 46.0, "step": 426 }, { "epoch": 0.05829749470953649, "grad_norm": 0.024557653814554214, "learning_rate": 9.94952831819842e-05, "loss": 46.0, "step": 427 }, { "epoch": 0.05843402280019114, "grad_norm": 0.03066672757267952, "learning_rate": 9.949219675269088e-05, "loss": 46.0, "step": 428 }, { "epoch": 0.05857055089084579, "grad_norm": 0.025212090462446213, "learning_rate": 9.948910096328668e-05, "loss": 46.0, "step": 429 }, { "epoch": 0.058707078981500446, "grad_norm": 0.02717198245227337, "learning_rate": 9.948599581435704e-05, "loss": 46.0, "step": 430 }, { "epoch": 0.058843607072155094, "grad_norm": 0.0255216546356678, "learning_rate": 9.948288130648923e-05, "loss": 46.0, "step": 431 }, { "epoch": 0.05898013516280975, "grad_norm": 0.022869152948260307, "learning_rate": 9.947975744027229e-05, "loss": 46.0, "step": 432 }, { "epoch": 0.0591166632534644, "grad_norm": 0.018529551103711128, "learning_rate": 9.9476624216297e-05, "loss": 46.0, "step": 433 }, { "epoch": 0.05925319134411905, "grad_norm": 0.034000374376773834, "learning_rate": 9.947348163515591e-05, "loss": 46.0, "step": 434 }, { "epoch": 0.059389719434773704, "grad_norm": 0.02614707313477993, "learning_rate": 9.947032969744339e-05, "loss": 46.0, "step": 435 }, { "epoch": 0.05952624752542836, "grad_norm": 0.021684279665350914, "learning_rate": 9.946716840375551e-05, "loss": 46.0, "step": 436 }, { "epoch": 0.05966277561608301, "grad_norm": 0.023319482803344727, "learning_rate": 9.946399775469018e-05, "loss": 46.0, "step": 437 }, { "epoch": 0.05979930370673766, "grad_norm": 0.027700578793883324, "learning_rate": 9.946081775084699e-05, "loss": 46.0, "step": 438 }, { "epoch": 0.059935831797392314, "grad_norm": 0.02492944523692131, "learning_rate": 9.94576283928274e-05, "loss": 46.0, "step": 439 }, { "epoch": 0.06007235988804697, "grad_norm": 0.027233844622969627, "learning_rate": 9.945442968123458e-05, "loss": 46.0, "step": 440 }, { "epoch": 0.060208887978701615, "grad_norm": 0.02674233727157116, "learning_rate": 9.945122161667347e-05, "loss": 46.0, "step": 441 }, { "epoch": 0.06034541606935627, "grad_norm": 0.02005865052342415, "learning_rate": 9.944800419975078e-05, "loss": 46.0, "step": 442 }, { "epoch": 0.060481944160010924, "grad_norm": 0.029115544632077217, "learning_rate": 9.944477743107502e-05, "loss": 46.0, "step": 443 }, { "epoch": 0.06061847225066557, "grad_norm": 0.0221553985029459, "learning_rate": 9.944154131125642e-05, "loss": 46.0, "step": 444 }, { "epoch": 0.060755000341320226, "grad_norm": 0.03231995552778244, "learning_rate": 9.943829584090705e-05, "loss": 46.0, "step": 445 }, { "epoch": 0.06089152843197488, "grad_norm": 0.02758156880736351, "learning_rate": 9.943504102064065e-05, "loss": 46.0, "step": 446 }, { "epoch": 0.061028056522629534, "grad_norm": 0.03414014354348183, "learning_rate": 9.943177685107281e-05, "loss": 46.0, "step": 447 }, { "epoch": 0.06116458461328418, "grad_norm": 0.024990718811750412, "learning_rate": 9.942850333282085e-05, "loss": 46.0, "step": 448 }, { "epoch": 0.061301112703938836, "grad_norm": 0.046782296150922775, "learning_rate": 9.942522046650386e-05, "loss": 46.0, "step": 449 }, { "epoch": 0.06143764079459349, "grad_norm": 0.061254240572452545, "learning_rate": 9.942192825274275e-05, "loss": 46.0, "step": 450 }, { "epoch": 0.06157416888524814, "grad_norm": 0.030294068157672882, "learning_rate": 9.941862669216008e-05, "loss": 46.0, "step": 451 }, { "epoch": 0.06171069697590279, "grad_norm": 0.02199397422373295, "learning_rate": 9.941531578538033e-05, "loss": 46.0, "step": 452 }, { "epoch": 0.061847225066557446, "grad_norm": 0.030181311070919037, "learning_rate": 9.94119955330296e-05, "loss": 46.0, "step": 453 }, { "epoch": 0.06198375315721209, "grad_norm": 0.02972453087568283, "learning_rate": 9.940866593573586e-05, "loss": 46.0, "step": 454 }, { "epoch": 0.06212028124786675, "grad_norm": 0.01890549436211586, "learning_rate": 9.940532699412881e-05, "loss": 46.0, "step": 455 }, { "epoch": 0.0622568093385214, "grad_norm": 0.030012821778655052, "learning_rate": 9.940197870883992e-05, "loss": 46.0, "step": 456 }, { "epoch": 0.062393337429176056, "grad_norm": 0.02222747914493084, "learning_rate": 9.939862108050243e-05, "loss": 46.0, "step": 457 }, { "epoch": 0.06252986551983071, "grad_norm": 0.02560306154191494, "learning_rate": 9.939525410975134e-05, "loss": 46.0, "step": 458 }, { "epoch": 0.06266639361048536, "grad_norm": 0.02287154458463192, "learning_rate": 9.939187779722342e-05, "loss": 46.0, "step": 459 }, { "epoch": 0.06280292170114, "grad_norm": 0.020005807280540466, "learning_rate": 9.938849214355721e-05, "loss": 46.0, "step": 460 }, { "epoch": 0.06293944979179467, "grad_norm": 0.020549889653921127, "learning_rate": 9.938509714939302e-05, "loss": 46.0, "step": 461 }, { "epoch": 0.06307597788244931, "grad_norm": 0.029612736776471138, "learning_rate": 9.93816928153729e-05, "loss": 46.0, "step": 462 }, { "epoch": 0.06321250597310396, "grad_norm": 0.019080527126789093, "learning_rate": 9.937827914214073e-05, "loss": 46.0, "step": 463 }, { "epoch": 0.06334903406375862, "grad_norm": 0.026794377714395523, "learning_rate": 9.937485613034208e-05, "loss": 46.0, "step": 464 }, { "epoch": 0.06348556215441327, "grad_norm": 0.024645088240504265, "learning_rate": 9.937142378062432e-05, "loss": 46.0, "step": 465 }, { "epoch": 0.06362209024506792, "grad_norm": 0.020400095731019974, "learning_rate": 9.93679820936366e-05, "loss": 46.0, "step": 466 }, { "epoch": 0.06375861833572258, "grad_norm": 0.026544470340013504, "learning_rate": 9.936453107002982e-05, "loss": 46.0, "step": 467 }, { "epoch": 0.06389514642637723, "grad_norm": 0.02441665343940258, "learning_rate": 9.936107071045664e-05, "loss": 46.0, "step": 468 }, { "epoch": 0.06403167451703187, "grad_norm": 0.03043745644390583, "learning_rate": 9.93576010155715e-05, "loss": 46.0, "step": 469 }, { "epoch": 0.06416820260768653, "grad_norm": 0.03153218328952789, "learning_rate": 9.93541219860306e-05, "loss": 46.0, "step": 470 }, { "epoch": 0.06430473069834118, "grad_norm": 0.018797485157847404, "learning_rate": 9.93506336224919e-05, "loss": 46.0, "step": 471 }, { "epoch": 0.06444125878899584, "grad_norm": 0.03075023554265499, "learning_rate": 9.934713592561513e-05, "loss": 46.0, "step": 472 }, { "epoch": 0.06457778687965049, "grad_norm": 0.029182186350226402, "learning_rate": 9.934362889606179e-05, "loss": 46.0, "step": 473 }, { "epoch": 0.06471431497030514, "grad_norm": 0.027218403294682503, "learning_rate": 9.934011253449513e-05, "loss": 46.0, "step": 474 }, { "epoch": 0.0648508430609598, "grad_norm": 0.03143514692783356, "learning_rate": 9.933658684158018e-05, "loss": 46.0, "step": 475 }, { "epoch": 0.06498737115161445, "grad_norm": 0.028574995696544647, "learning_rate": 9.933305181798373e-05, "loss": 46.0, "step": 476 }, { "epoch": 0.06512389924226909, "grad_norm": 0.023349422961473465, "learning_rate": 9.932950746437435e-05, "loss": 46.0, "step": 477 }, { "epoch": 0.06526042733292375, "grad_norm": 0.02615458145737648, "learning_rate": 9.932595378142233e-05, "loss": 46.0, "step": 478 }, { "epoch": 0.0653969554235784, "grad_norm": 0.0369233600795269, "learning_rate": 9.932239076979976e-05, "loss": 46.0, "step": 479 }, { "epoch": 0.06553348351423305, "grad_norm": 0.03040166385471821, "learning_rate": 9.931881843018052e-05, "loss": 46.0, "step": 480 }, { "epoch": 0.06567001160488771, "grad_norm": 0.03446846827864647, "learning_rate": 9.931523676324016e-05, "loss": 46.0, "step": 481 }, { "epoch": 0.06580653969554236, "grad_norm": 0.036189354956150055, "learning_rate": 9.931164576965612e-05, "loss": 46.0, "step": 482 }, { "epoch": 0.065943067786197, "grad_norm": 0.034723829478025436, "learning_rate": 9.93080454501075e-05, "loss": 46.0, "step": 483 }, { "epoch": 0.06607959587685167, "grad_norm": 0.0284198559820652, "learning_rate": 9.930443580527519e-05, "loss": 46.0, "step": 484 }, { "epoch": 0.06621612396750631, "grad_norm": 0.030604802072048187, "learning_rate": 9.93008168358419e-05, "loss": 46.0, "step": 485 }, { "epoch": 0.06635265205816096, "grad_norm": 0.033503416925668716, "learning_rate": 9.929718854249204e-05, "loss": 46.0, "step": 486 }, { "epoch": 0.06648918014881562, "grad_norm": 0.022747470065951347, "learning_rate": 9.92935509259118e-05, "loss": 46.0, "step": 487 }, { "epoch": 0.06662570823947027, "grad_norm": 0.03021126799285412, "learning_rate": 9.928990398678914e-05, "loss": 46.0, "step": 488 }, { "epoch": 0.06676223633012493, "grad_norm": 0.035819657146930695, "learning_rate": 9.928624772581379e-05, "loss": 46.0, "step": 489 }, { "epoch": 0.06689876442077958, "grad_norm": 0.022918762639164925, "learning_rate": 9.92825821436772e-05, "loss": 46.0, "step": 490 }, { "epoch": 0.06703529251143422, "grad_norm": 0.03715512156486511, "learning_rate": 9.927890724107265e-05, "loss": 46.0, "step": 491 }, { "epoch": 0.06717182060208889, "grad_norm": 0.030694128945469856, "learning_rate": 9.927522301869515e-05, "loss": 46.0, "step": 492 }, { "epoch": 0.06730834869274353, "grad_norm": 0.02562526799738407, "learning_rate": 9.927152947724143e-05, "loss": 46.0, "step": 493 }, { "epoch": 0.06744487678339818, "grad_norm": 0.04457505792379379, "learning_rate": 9.926782661741007e-05, "loss": 46.0, "step": 494 }, { "epoch": 0.06758140487405284, "grad_norm": 0.04124419391155243, "learning_rate": 9.926411443990135e-05, "loss": 46.0, "step": 495 }, { "epoch": 0.06771793296470749, "grad_norm": 0.049940332770347595, "learning_rate": 9.926039294541733e-05, "loss": 46.0, "step": 496 }, { "epoch": 0.06785446105536214, "grad_norm": 0.05609569698572159, "learning_rate": 9.925666213466181e-05, "loss": 46.0, "step": 497 }, { "epoch": 0.0679909891460168, "grad_norm": 0.04243920370936394, "learning_rate": 9.92529220083404e-05, "loss": 46.0, "step": 498 }, { "epoch": 0.06812751723667144, "grad_norm": 0.053366899490356445, "learning_rate": 9.924917256716042e-05, "loss": 46.0, "step": 499 }, { "epoch": 0.06826404532732609, "grad_norm": 0.08013661950826645, "learning_rate": 9.9245413811831e-05, "loss": 46.0, "step": 500 }, { "epoch": 0.06840057341798075, "grad_norm": 0.02672446146607399, "learning_rate": 9.924164574306298e-05, "loss": 46.0, "step": 501 }, { "epoch": 0.0685371015086354, "grad_norm": 0.012860066257417202, "learning_rate": 9.923786836156901e-05, "loss": 46.0, "step": 502 }, { "epoch": 0.06867362959929005, "grad_norm": 0.040421728044748306, "learning_rate": 9.923408166806347e-05, "loss": 46.0, "step": 503 }, { "epoch": 0.06881015768994471, "grad_norm": 0.022842317819595337, "learning_rate": 9.923028566326252e-05, "loss": 46.0, "step": 504 }, { "epoch": 0.06894668578059936, "grad_norm": 0.02556697465479374, "learning_rate": 9.922648034788404e-05, "loss": 46.0, "step": 505 }, { "epoch": 0.069083213871254, "grad_norm": 0.03154052793979645, "learning_rate": 9.922266572264775e-05, "loss": 46.0, "step": 506 }, { "epoch": 0.06921974196190867, "grad_norm": 0.0380636565387249, "learning_rate": 9.921884178827506e-05, "loss": 46.0, "step": 507 }, { "epoch": 0.06935627005256331, "grad_norm": 0.027142710983753204, "learning_rate": 9.921500854548915e-05, "loss": 46.0, "step": 508 }, { "epoch": 0.06949279814321797, "grad_norm": 0.03341107815504074, "learning_rate": 9.921116599501498e-05, "loss": 46.0, "step": 509 }, { "epoch": 0.06962932623387262, "grad_norm": 0.018783489242196083, "learning_rate": 9.920731413757929e-05, "loss": 46.0, "step": 510 }, { "epoch": 0.06976585432452727, "grad_norm": 0.02644832246005535, "learning_rate": 9.920345297391054e-05, "loss": 46.0, "step": 511 }, { "epoch": 0.06990238241518193, "grad_norm": 0.02421944960951805, "learning_rate": 9.919958250473895e-05, "loss": 46.0, "step": 512 }, { "epoch": 0.07003891050583658, "grad_norm": 0.02774537168443203, "learning_rate": 9.919570273079653e-05, "loss": 46.0, "step": 513 }, { "epoch": 0.07017543859649122, "grad_norm": 0.028812702745199203, "learning_rate": 9.919181365281703e-05, "loss": 46.0, "step": 514 }, { "epoch": 0.07031196668714589, "grad_norm": 0.03555852174758911, "learning_rate": 9.918791527153597e-05, "loss": 46.0, "step": 515 }, { "epoch": 0.07044849477780053, "grad_norm": 0.028417788445949554, "learning_rate": 9.918400758769063e-05, "loss": 46.0, "step": 516 }, { "epoch": 0.07058502286845518, "grad_norm": 0.029636088758707047, "learning_rate": 9.918009060202001e-05, "loss": 46.0, "step": 517 }, { "epoch": 0.07072155095910984, "grad_norm": 0.024642419070005417, "learning_rate": 9.917616431526491e-05, "loss": 46.0, "step": 518 }, { "epoch": 0.07085807904976449, "grad_norm": 0.037331245839595795, "learning_rate": 9.917222872816792e-05, "loss": 46.0, "step": 519 }, { "epoch": 0.07099460714041914, "grad_norm": 0.025924591347575188, "learning_rate": 9.916828384147331e-05, "loss": 46.0, "step": 520 }, { "epoch": 0.0711311352310738, "grad_norm": 0.03282320126891136, "learning_rate": 9.916432965592716e-05, "loss": 46.0, "step": 521 }, { "epoch": 0.07126766332172844, "grad_norm": 0.023265549913048744, "learning_rate": 9.91603661722773e-05, "loss": 46.0, "step": 522 }, { "epoch": 0.07140419141238309, "grad_norm": 0.029068903997540474, "learning_rate": 9.915639339127334e-05, "loss": 46.0, "step": 523 }, { "epoch": 0.07154071950303775, "grad_norm": 0.03129446133971214, "learning_rate": 9.915241131366657e-05, "loss": 46.0, "step": 524 }, { "epoch": 0.0716772475936924, "grad_norm": 0.03483783081173897, "learning_rate": 9.914841994021014e-05, "loss": 46.0, "step": 525 }, { "epoch": 0.07181377568434705, "grad_norm": 0.03457973897457123, "learning_rate": 9.914441927165888e-05, "loss": 46.0, "step": 526 }, { "epoch": 0.07195030377500171, "grad_norm": 0.03264329582452774, "learning_rate": 9.914040930876942e-05, "loss": 46.0, "step": 527 }, { "epoch": 0.07208683186565636, "grad_norm": 0.022817546501755714, "learning_rate": 9.913639005230016e-05, "loss": 46.0, "step": 528 }, { "epoch": 0.07222335995631102, "grad_norm": 0.033075399696826935, "learning_rate": 9.913236150301119e-05, "loss": 46.0, "step": 529 }, { "epoch": 0.07235988804696566, "grad_norm": 0.030642932280898094, "learning_rate": 9.912832366166442e-05, "loss": 46.0, "step": 530 }, { "epoch": 0.07249641613762031, "grad_norm": 0.029940955340862274, "learning_rate": 9.91242765290235e-05, "loss": 46.0, "step": 531 }, { "epoch": 0.07263294422827497, "grad_norm": 0.026094570755958557, "learning_rate": 9.912022010585384e-05, "loss": 46.0, "step": 532 }, { "epoch": 0.07276947231892962, "grad_norm": 0.036322589963674545, "learning_rate": 9.911615439292259e-05, "loss": 46.0, "step": 533 }, { "epoch": 0.07290600040958427, "grad_norm": 0.033507090061903, "learning_rate": 9.91120793909987e-05, "loss": 46.0, "step": 534 }, { "epoch": 0.07304252850023893, "grad_norm": 0.03490331768989563, "learning_rate": 9.91079951008528e-05, "loss": 46.0, "step": 535 }, { "epoch": 0.07317905659089358, "grad_norm": 0.03710697591304779, "learning_rate": 9.910390152325736e-05, "loss": 46.0, "step": 536 }, { "epoch": 0.07331558468154822, "grad_norm": 0.02814531698822975, "learning_rate": 9.909979865898655e-05, "loss": 46.0, "step": 537 }, { "epoch": 0.07345211277220288, "grad_norm": 0.024011477828025818, "learning_rate": 9.909568650881633e-05, "loss": 46.0, "step": 538 }, { "epoch": 0.07358864086285753, "grad_norm": 0.04085537791252136, "learning_rate": 9.90915650735244e-05, "loss": 46.0, "step": 539 }, { "epoch": 0.07372516895351218, "grad_norm": 0.027113674208521843, "learning_rate": 9.90874343538902e-05, "loss": 46.0, "step": 540 }, { "epoch": 0.07386169704416684, "grad_norm": 0.02620992809534073, "learning_rate": 9.908329435069495e-05, "loss": 46.0, "step": 541 }, { "epoch": 0.07399822513482149, "grad_norm": 0.023083044216036797, "learning_rate": 9.907914506472165e-05, "loss": 46.0, "step": 542 }, { "epoch": 0.07413475322547614, "grad_norm": 0.03674302622675896, "learning_rate": 9.907498649675498e-05, "loss": 46.0, "step": 543 }, { "epoch": 0.0742712813161308, "grad_norm": 0.039842259138822556, "learning_rate": 9.907081864758146e-05, "loss": 46.0, "step": 544 }, { "epoch": 0.07440780940678544, "grad_norm": 0.03394661471247673, "learning_rate": 9.90666415179893e-05, "loss": 46.0, "step": 545 }, { "epoch": 0.0745443374974401, "grad_norm": 0.03382465988397598, "learning_rate": 9.906245510876851e-05, "loss": 46.0, "step": 546 }, { "epoch": 0.07468086558809475, "grad_norm": 0.06611469388008118, "learning_rate": 9.905825942071081e-05, "loss": 46.0, "step": 547 }, { "epoch": 0.0748173936787494, "grad_norm": 0.043424852192401886, "learning_rate": 9.905405445460972e-05, "loss": 46.0, "step": 548 }, { "epoch": 0.07495392176940406, "grad_norm": 0.09268485009670258, "learning_rate": 9.904984021126049e-05, "loss": 46.0, "step": 549 }, { "epoch": 0.07509044986005871, "grad_norm": 0.06914953887462616, "learning_rate": 9.904561669146014e-05, "loss": 46.0, "step": 550 }, { "epoch": 0.07522697795071336, "grad_norm": 0.04720338433980942, "learning_rate": 9.904138389600743e-05, "loss": 46.0, "step": 551 }, { "epoch": 0.07536350604136802, "grad_norm": 0.04322914034128189, "learning_rate": 9.903714182570287e-05, "loss": 46.0, "step": 552 }, { "epoch": 0.07550003413202266, "grad_norm": 0.023460084572434425, "learning_rate": 9.903289048134874e-05, "loss": 46.0, "step": 553 }, { "epoch": 0.07563656222267731, "grad_norm": 0.033462002873420715, "learning_rate": 9.902862986374905e-05, "loss": 46.0, "step": 554 }, { "epoch": 0.07577309031333197, "grad_norm": 0.030122894793748856, "learning_rate": 9.902435997370962e-05, "loss": 46.0, "step": 555 }, { "epoch": 0.07590961840398662, "grad_norm": 0.05705294758081436, "learning_rate": 9.902008081203795e-05, "loss": 46.0, "step": 556 }, { "epoch": 0.07604614649464127, "grad_norm": 0.05319324508309364, "learning_rate": 9.901579237954335e-05, "loss": 46.0, "step": 557 }, { "epoch": 0.07618267458529593, "grad_norm": 0.0319780670106411, "learning_rate": 9.901149467703685e-05, "loss": 46.0, "step": 558 }, { "epoch": 0.07631920267595058, "grad_norm": 0.02941146306693554, "learning_rate": 9.900718770533121e-05, "loss": 46.0, "step": 559 }, { "epoch": 0.07645573076660522, "grad_norm": 0.029848946258425713, "learning_rate": 9.900287146524106e-05, "loss": 46.0, "step": 560 }, { "epoch": 0.07659225885725988, "grad_norm": 0.03975166380405426, "learning_rate": 9.899854595758263e-05, "loss": 46.0, "step": 561 }, { "epoch": 0.07672878694791453, "grad_norm": 0.031236477196216583, "learning_rate": 9.899421118317398e-05, "loss": 46.0, "step": 562 }, { "epoch": 0.07686531503856918, "grad_norm": 0.02150660753250122, "learning_rate": 9.898986714283494e-05, "loss": 46.0, "step": 563 }, { "epoch": 0.07700184312922384, "grad_norm": 0.04006044566631317, "learning_rate": 9.898551383738706e-05, "loss": 46.0, "step": 564 }, { "epoch": 0.07713837121987849, "grad_norm": 0.020833930000662804, "learning_rate": 9.898115126765364e-05, "loss": 46.0, "step": 565 }, { "epoch": 0.07727489931053315, "grad_norm": 0.028323480859398842, "learning_rate": 9.897677943445975e-05, "loss": 46.0, "step": 566 }, { "epoch": 0.0774114274011878, "grad_norm": 0.02369450032711029, "learning_rate": 9.89723983386322e-05, "loss": 46.0, "step": 567 }, { "epoch": 0.07754795549184244, "grad_norm": 0.030176835134625435, "learning_rate": 9.896800798099956e-05, "loss": 46.0, "step": 568 }, { "epoch": 0.0776844835824971, "grad_norm": 0.03284559026360512, "learning_rate": 9.896360836239215e-05, "loss": 46.0, "step": 569 }, { "epoch": 0.07782101167315175, "grad_norm": 0.04267904907464981, "learning_rate": 9.895919948364203e-05, "loss": 46.0, "step": 570 }, { "epoch": 0.0779575397638064, "grad_norm": 0.026012932881712914, "learning_rate": 9.895478134558304e-05, "loss": 46.0, "step": 571 }, { "epoch": 0.07809406785446106, "grad_norm": 0.02736637555062771, "learning_rate": 9.895035394905073e-05, "loss": 46.0, "step": 572 }, { "epoch": 0.07823059594511571, "grad_norm": 0.027951430529356003, "learning_rate": 9.894591729488242e-05, "loss": 46.0, "step": 573 }, { "epoch": 0.07836712403577036, "grad_norm": 0.04004766792058945, "learning_rate": 9.894147138391721e-05, "loss": 46.0, "step": 574 }, { "epoch": 0.07850365212642502, "grad_norm": 0.02744406647980213, "learning_rate": 9.893701621699589e-05, "loss": 46.0, "step": 575 }, { "epoch": 0.07864018021707966, "grad_norm": 0.05032196268439293, "learning_rate": 9.893255179496106e-05, "loss": 46.0, "step": 576 }, { "epoch": 0.07877670830773431, "grad_norm": 0.03171202167868614, "learning_rate": 9.892807811865704e-05, "loss": 46.0, "step": 577 }, { "epoch": 0.07891323639838897, "grad_norm": 0.03299544379115105, "learning_rate": 9.892359518892988e-05, "loss": 46.0, "step": 578 }, { "epoch": 0.07904976448904362, "grad_norm": 0.04001383110880852, "learning_rate": 9.891910300662744e-05, "loss": 46.0001, "step": 579 }, { "epoch": 0.07918629257969827, "grad_norm": 0.02912035398185253, "learning_rate": 9.891460157259928e-05, "loss": 46.0, "step": 580 }, { "epoch": 0.07932282067035293, "grad_norm": 0.031307581812143326, "learning_rate": 9.891009088769673e-05, "loss": 46.0, "step": 581 }, { "epoch": 0.07945934876100758, "grad_norm": 0.024721374735236168, "learning_rate": 9.890557095277284e-05, "loss": 46.0002, "step": 582 }, { "epoch": 0.07959587685166222, "grad_norm": 0.035373374819755554, "learning_rate": 9.890104176868247e-05, "loss": 46.0, "step": 583 }, { "epoch": 0.07973240494231688, "grad_norm": 0.038460977375507355, "learning_rate": 9.889650333628218e-05, "loss": 46.0, "step": 584 }, { "epoch": 0.07986893303297153, "grad_norm": 0.037929717451334, "learning_rate": 9.889195565643025e-05, "loss": 46.0007, "step": 585 }, { "epoch": 0.08000546112362619, "grad_norm": 0.03317030519247055, "learning_rate": 9.888739872998683e-05, "loss": 46.0, "step": 586 }, { "epoch": 0.08014198921428084, "grad_norm": 0.021303853020071983, "learning_rate": 9.888283255781367e-05, "loss": 46.0004, "step": 587 }, { "epoch": 0.08027851730493549, "grad_norm": 0.0321236252784729, "learning_rate": 9.887825714077438e-05, "loss": 46.0, "step": 588 }, { "epoch": 0.08041504539559015, "grad_norm": 0.03924312815070152, "learning_rate": 9.887367247973425e-05, "loss": 46.0, "step": 589 }, { "epoch": 0.0805515734862448, "grad_norm": 0.04108710587024689, "learning_rate": 9.886907857556037e-05, "loss": 46.0004, "step": 590 }, { "epoch": 0.08068810157689944, "grad_norm": 0.03146194666624069, "learning_rate": 9.886447542912153e-05, "loss": 46.0, "step": 591 }, { "epoch": 0.0808246296675541, "grad_norm": 0.03142925351858139, "learning_rate": 9.885986304128829e-05, "loss": 46.0009, "step": 592 }, { "epoch": 0.08096115775820875, "grad_norm": 0.03831726685166359, "learning_rate": 9.885524141293298e-05, "loss": 46.0007, "step": 593 }, { "epoch": 0.0810976858488634, "grad_norm": 0.06970760226249695, "learning_rate": 9.885061054492965e-05, "loss": 46.0026, "step": 594 }, { "epoch": 0.08123421393951806, "grad_norm": 0.057924021035432816, "learning_rate": 9.884597043815409e-05, "loss": 46.002, "step": 595 }, { "epoch": 0.08137074203017271, "grad_norm": 0.06073028966784477, "learning_rate": 9.884132109348386e-05, "loss": 46.0008, "step": 596 }, { "epoch": 0.08150727012082735, "grad_norm": 0.04543042927980423, "learning_rate": 9.883666251179826e-05, "loss": 46.0014, "step": 597 }, { "epoch": 0.08164379821148202, "grad_norm": 0.07863510400056839, "learning_rate": 9.883199469397831e-05, "loss": 46.0047, "step": 598 }, { "epoch": 0.08178032630213666, "grad_norm": 0.084328792989254, "learning_rate": 9.882731764090684e-05, "loss": 46.0037, "step": 599 }, { "epoch": 0.08191685439279131, "grad_norm": 0.06102737411856651, "learning_rate": 9.882263135346836e-05, "loss": 46.0, "step": 600 }, { "epoch": 0.08205338248344597, "grad_norm": 0.02868320234119892, "learning_rate": 9.881793583254918e-05, "loss": 46.0006, "step": 601 }, { "epoch": 0.08218991057410062, "grad_norm": 0.04926200583577156, "learning_rate": 9.881323107903731e-05, "loss": 46.0004, "step": 602 }, { "epoch": 0.08232643866475527, "grad_norm": 0.0378955714404583, "learning_rate": 9.880851709382253e-05, "loss": 46.0017, "step": 603 }, { "epoch": 0.08246296675540993, "grad_norm": 0.05592959374189377, "learning_rate": 9.880379387779637e-05, "loss": 46.0002, "step": 604 }, { "epoch": 0.08259949484606458, "grad_norm": 0.04601922631263733, "learning_rate": 9.879906143185209e-05, "loss": 46.0017, "step": 605 }, { "epoch": 0.08273602293671924, "grad_norm": 0.029013510793447495, "learning_rate": 9.87943197568847e-05, "loss": 46.0012, "step": 606 }, { "epoch": 0.08287255102737388, "grad_norm": 0.03496301919221878, "learning_rate": 9.878956885379098e-05, "loss": 46.0009, "step": 607 }, { "epoch": 0.08300907911802853, "grad_norm": 0.04794227331876755, "learning_rate": 9.878480872346943e-05, "loss": 46.0015, "step": 608 }, { "epoch": 0.08314560720868319, "grad_norm": 0.0389663390815258, "learning_rate": 9.878003936682028e-05, "loss": 46.0007, "step": 609 }, { "epoch": 0.08328213529933784, "grad_norm": 0.03340943530201912, "learning_rate": 9.877526078474553e-05, "loss": 46.0, "step": 610 }, { "epoch": 0.08341866338999249, "grad_norm": 0.050008054822683334, "learning_rate": 9.877047297814895e-05, "loss": 46.0001, "step": 611 }, { "epoch": 0.08355519148064715, "grad_norm": 0.0597776360809803, "learning_rate": 9.876567594793598e-05, "loss": 46.0005, "step": 612 }, { "epoch": 0.0836917195713018, "grad_norm": 0.027893688529729843, "learning_rate": 9.876086969501388e-05, "loss": 46.0006, "step": 613 }, { "epoch": 0.08382824766195644, "grad_norm": 0.04583516716957092, "learning_rate": 9.875605422029161e-05, "loss": 46.001, "step": 614 }, { "epoch": 0.0839647757526111, "grad_norm": 0.021992016583681107, "learning_rate": 9.87512295246799e-05, "loss": 46.0016, "step": 615 }, { "epoch": 0.08410130384326575, "grad_norm": 0.03530313819646835, "learning_rate": 9.874639560909117e-05, "loss": 46.0003, "step": 616 }, { "epoch": 0.0842378319339204, "grad_norm": 0.019832070916891098, "learning_rate": 9.874155247443969e-05, "loss": 46.0031, "step": 617 }, { "epoch": 0.08437436002457506, "grad_norm": 0.023453380912542343, "learning_rate": 9.873670012164135e-05, "loss": 46.0009, "step": 618 }, { "epoch": 0.08451088811522971, "grad_norm": 0.0323169007897377, "learning_rate": 9.873183855161389e-05, "loss": 46.0002, "step": 619 }, { "epoch": 0.08464741620588435, "grad_norm": 0.03367290645837784, "learning_rate": 9.87269677652767e-05, "loss": 46.0011, "step": 620 }, { "epoch": 0.08478394429653902, "grad_norm": 0.029752835631370544, "learning_rate": 9.872208776355099e-05, "loss": 46.0004, "step": 621 }, { "epoch": 0.08492047238719366, "grad_norm": 0.03498440235853195, "learning_rate": 9.871719854735965e-05, "loss": 46.0003, "step": 622 }, { "epoch": 0.08505700047784832, "grad_norm": 0.03832085430622101, "learning_rate": 9.871230011762735e-05, "loss": 46.0, "step": 623 }, { "epoch": 0.08519352856850297, "grad_norm": 0.029333464801311493, "learning_rate": 9.870739247528054e-05, "loss": 46.0009, "step": 624 }, { "epoch": 0.08533005665915762, "grad_norm": 0.03186045587062836, "learning_rate": 9.87024756212473e-05, "loss": 46.0002, "step": 625 }, { "epoch": 0.08546658474981228, "grad_norm": 0.04231792315840721, "learning_rate": 9.869754955645759e-05, "loss": 46.0015, "step": 626 }, { "epoch": 0.08560311284046693, "grad_norm": 0.029023781418800354, "learning_rate": 9.869261428184297e-05, "loss": 46.0009, "step": 627 }, { "epoch": 0.08573964093112157, "grad_norm": 0.02962622418999672, "learning_rate": 9.868766979833686e-05, "loss": 46.0014, "step": 628 }, { "epoch": 0.08587616902177624, "grad_norm": 0.027645183727145195, "learning_rate": 9.868271610687436e-05, "loss": 46.0021, "step": 629 }, { "epoch": 0.08601269711243088, "grad_norm": 0.03420056775212288, "learning_rate": 9.867775320839235e-05, "loss": 46.0012, "step": 630 }, { "epoch": 0.08614922520308553, "grad_norm": 0.032719384878873825, "learning_rate": 9.867278110382938e-05, "loss": 46.0002, "step": 631 }, { "epoch": 0.08628575329374019, "grad_norm": 0.028429344296455383, "learning_rate": 9.866779979412583e-05, "loss": 46.0004, "step": 632 }, { "epoch": 0.08642228138439484, "grad_norm": 0.03253468498587608, "learning_rate": 9.866280928022378e-05, "loss": 46.0002, "step": 633 }, { "epoch": 0.08655880947504949, "grad_norm": 0.026662984862923622, "learning_rate": 9.865780956306703e-05, "loss": 46.0005, "step": 634 }, { "epoch": 0.08669533756570415, "grad_norm": 0.033064741641283035, "learning_rate": 9.865280064360116e-05, "loss": 46.0007, "step": 635 }, { "epoch": 0.0868318656563588, "grad_norm": 0.02249247021973133, "learning_rate": 9.864778252277345e-05, "loss": 46.003, "step": 636 }, { "epoch": 0.08696839374701344, "grad_norm": 0.03309246152639389, "learning_rate": 9.864275520153296e-05, "loss": 46.0016, "step": 637 }, { "epoch": 0.0871049218376681, "grad_norm": 0.040317103266716, "learning_rate": 9.863771868083048e-05, "loss": 46.0003, "step": 638 }, { "epoch": 0.08724144992832275, "grad_norm": 0.0358666330575943, "learning_rate": 9.863267296161849e-05, "loss": 46.0008, "step": 639 }, { "epoch": 0.0873779780189774, "grad_norm": 0.03177111968398094, "learning_rate": 9.862761804485128e-05, "loss": 46.001, "step": 640 }, { "epoch": 0.08751450610963206, "grad_norm": 0.03193167969584465, "learning_rate": 9.862255393148487e-05, "loss": 46.0009, "step": 641 }, { "epoch": 0.0876510342002867, "grad_norm": 0.03407769277691841, "learning_rate": 9.861748062247697e-05, "loss": 46.0013, "step": 642 }, { "epoch": 0.08778756229094137, "grad_norm": 0.08346112072467804, "learning_rate": 9.861239811878707e-05, "loss": 46.0024, "step": 643 }, { "epoch": 0.08792409038159602, "grad_norm": 0.09646159410476685, "learning_rate": 9.860730642137639e-05, "loss": 46.0021, "step": 644 }, { "epoch": 0.08806061847225066, "grad_norm": 0.03354816883802414, "learning_rate": 9.860220553120787e-05, "loss": 46.0016, "step": 645 }, { "epoch": 0.08819714656290532, "grad_norm": 0.08075177669525146, "learning_rate": 9.859709544924624e-05, "loss": 46.0025, "step": 646 }, { "epoch": 0.08833367465355997, "grad_norm": 0.07125189900398254, "learning_rate": 9.85919761764579e-05, "loss": 46.0044, "step": 647 }, { "epoch": 0.08847020274421462, "grad_norm": 0.07120703905820847, "learning_rate": 9.858684771381103e-05, "loss": 46.0054, "step": 648 }, { "epoch": 0.08860673083486928, "grad_norm": 0.04338957369327545, "learning_rate": 9.858171006227556e-05, "loss": 46.0042, "step": 649 }, { "epoch": 0.08874325892552393, "grad_norm": 0.08091072738170624, "learning_rate": 9.857656322282313e-05, "loss": 46.0, "step": 650 }, { "epoch": 0.08887978701617857, "grad_norm": 0.03413210064172745, "learning_rate": 9.85714071964271e-05, "loss": 46.0021, "step": 651 }, { "epoch": 0.08901631510683324, "grad_norm": 0.02347501739859581, "learning_rate": 9.856624198406262e-05, "loss": 46.0015, "step": 652 }, { "epoch": 0.08915284319748788, "grad_norm": 0.047690894454717636, "learning_rate": 9.856106758670654e-05, "loss": 46.0015, "step": 653 }, { "epoch": 0.08928937128814253, "grad_norm": 0.054183200001716614, "learning_rate": 9.855588400533746e-05, "loss": 46.0022, "step": 654 }, { "epoch": 0.08942589937879719, "grad_norm": 0.049700066447257996, "learning_rate": 9.855069124093572e-05, "loss": 46.0023, "step": 655 }, { "epoch": 0.08956242746945184, "grad_norm": 0.02983039803802967, "learning_rate": 9.854548929448339e-05, "loss": 46.005, "step": 656 }, { "epoch": 0.08969895556010649, "grad_norm": 0.054335612803697586, "learning_rate": 9.854027816696425e-05, "loss": 46.0019, "step": 657 }, { "epoch": 0.08983548365076115, "grad_norm": 0.03426862508058548, "learning_rate": 9.853505785936388e-05, "loss": 46.0021, "step": 658 }, { "epoch": 0.0899720117414158, "grad_norm": 0.05844447761774063, "learning_rate": 9.852982837266955e-05, "loss": 46.001, "step": 659 }, { "epoch": 0.09010853983207044, "grad_norm": 0.0295439250767231, "learning_rate": 9.852458970787026e-05, "loss": 46.0019, "step": 660 }, { "epoch": 0.0902450679227251, "grad_norm": 0.03558456897735596, "learning_rate": 9.851934186595679e-05, "loss": 46.0007, "step": 661 }, { "epoch": 0.09038159601337975, "grad_norm": 0.03447722643613815, "learning_rate": 9.85140848479216e-05, "loss": 46.0007, "step": 662 }, { "epoch": 0.09051812410403441, "grad_norm": 0.05700782313942909, "learning_rate": 9.850881865475895e-05, "loss": 46.0003, "step": 663 }, { "epoch": 0.09065465219468906, "grad_norm": 0.03729323670268059, "learning_rate": 9.850354328746474e-05, "loss": 46.0015, "step": 664 }, { "epoch": 0.0907911802853437, "grad_norm": 0.059311412274837494, "learning_rate": 9.849825874703671e-05, "loss": 46.0021, "step": 665 }, { "epoch": 0.09092770837599837, "grad_norm": 0.048485077917575836, "learning_rate": 9.849296503447427e-05, "loss": 46.0006, "step": 666 }, { "epoch": 0.09106423646665301, "grad_norm": 0.040473971515893936, "learning_rate": 9.848766215077858e-05, "loss": 46.0002, "step": 667 }, { "epoch": 0.09120076455730766, "grad_norm": 0.0361190140247345, "learning_rate": 9.848235009695255e-05, "loss": 46.001, "step": 668 }, { "epoch": 0.09133729264796232, "grad_norm": 0.041401371359825134, "learning_rate": 9.84770288740008e-05, "loss": 46.0017, "step": 669 }, { "epoch": 0.09147382073861697, "grad_norm": 0.035540927201509476, "learning_rate": 9.847169848292971e-05, "loss": 46.0011, "step": 670 }, { "epoch": 0.09161034882927162, "grad_norm": 0.0288468636572361, "learning_rate": 9.846635892474736e-05, "loss": 46.0007, "step": 671 }, { "epoch": 0.09174687691992628, "grad_norm": 0.04629570245742798, "learning_rate": 9.84610102004636e-05, "loss": 46.0014, "step": 672 }, { "epoch": 0.09188340501058093, "grad_norm": 0.0670638307929039, "learning_rate": 9.845565231108998e-05, "loss": 46.001, "step": 673 }, { "epoch": 0.09201993310123557, "grad_norm": 0.04425930231809616, "learning_rate": 9.845028525763982e-05, "loss": 46.0008, "step": 674 }, { "epoch": 0.09215646119189023, "grad_norm": 0.040495071560144424, "learning_rate": 9.844490904112813e-05, "loss": 46.0011, "step": 675 }, { "epoch": 0.09229298928254488, "grad_norm": 0.05850289389491081, "learning_rate": 9.84395236625717e-05, "loss": 46.0006, "step": 676 }, { "epoch": 0.09242951737319953, "grad_norm": 0.06842999160289764, "learning_rate": 9.843412912298902e-05, "loss": 46.0005, "step": 677 }, { "epoch": 0.09256604546385419, "grad_norm": 0.07399780303239822, "learning_rate": 9.84287254234003e-05, "loss": 46.0018, "step": 678 }, { "epoch": 0.09270257355450884, "grad_norm": 0.06883575767278671, "learning_rate": 9.842331256482753e-05, "loss": 46.0027, "step": 679 }, { "epoch": 0.0928391016451635, "grad_norm": 0.02609880268573761, "learning_rate": 9.84178905482944e-05, "loss": 46.0007, "step": 680 }, { "epoch": 0.09297562973581815, "grad_norm": 0.03209716081619263, "learning_rate": 9.841245937482632e-05, "loss": 46.0007, "step": 681 }, { "epoch": 0.0931121578264728, "grad_norm": 0.03685125708580017, "learning_rate": 9.840701904545049e-05, "loss": 46.0013, "step": 682 }, { "epoch": 0.09324868591712746, "grad_norm": 0.06795309484004974, "learning_rate": 9.840156956119577e-05, "loss": 46.002, "step": 683 }, { "epoch": 0.0933852140077821, "grad_norm": 0.03866392374038696, "learning_rate": 9.839611092309277e-05, "loss": 46.0001, "step": 684 }, { "epoch": 0.09352174209843675, "grad_norm": 0.03536488488316536, "learning_rate": 9.839064313217388e-05, "loss": 46.0017, "step": 685 }, { "epoch": 0.09365827018909141, "grad_norm": 0.05660153180360794, "learning_rate": 9.838516618947318e-05, "loss": 46.0012, "step": 686 }, { "epoch": 0.09379479827974606, "grad_norm": 0.04624690115451813, "learning_rate": 9.837968009602645e-05, "loss": 46.0003, "step": 687 }, { "epoch": 0.0939313263704007, "grad_norm": 0.03933669254183769, "learning_rate": 9.837418485287127e-05, "loss": 46.0, "step": 688 }, { "epoch": 0.09406785446105537, "grad_norm": 0.03808354213833809, "learning_rate": 9.83686804610469e-05, "loss": 46.0003, "step": 689 }, { "epoch": 0.09420438255171001, "grad_norm": 0.0350818932056427, "learning_rate": 9.836316692159435e-05, "loss": 46.0033, "step": 690 }, { "epoch": 0.09434091064236466, "grad_norm": 0.05906994640827179, "learning_rate": 9.835764423555637e-05, "loss": 46.0016, "step": 691 }, { "epoch": 0.09447743873301932, "grad_norm": 0.039931830018758774, "learning_rate": 9.835211240397741e-05, "loss": 46.002, "step": 692 }, { "epoch": 0.09461396682367397, "grad_norm": 0.03922465443611145, "learning_rate": 9.834657142790368e-05, "loss": 46.0009, "step": 693 }, { "epoch": 0.09475049491432862, "grad_norm": 0.03921540081501007, "learning_rate": 9.834102130838309e-05, "loss": 46.0013, "step": 694 }, { "epoch": 0.09488702300498328, "grad_norm": 0.05121154338121414, "learning_rate": 9.833546204646531e-05, "loss": 46.0016, "step": 695 }, { "epoch": 0.09502355109563793, "grad_norm": 0.09077654778957367, "learning_rate": 9.832989364320172e-05, "loss": 46.004, "step": 696 }, { "epoch": 0.09516007918629257, "grad_norm": 0.0587497241795063, "learning_rate": 9.832431609964543e-05, "loss": 46.0052, "step": 697 }, { "epoch": 0.09529660727694723, "grad_norm": 0.04381035268306732, "learning_rate": 9.831872941685128e-05, "loss": 46.0052, "step": 698 }, { "epoch": 0.09543313536760188, "grad_norm": 0.12686116993427277, "learning_rate": 9.831313359587584e-05, "loss": 46.0054, "step": 699 }, { "epoch": 0.09556966345825654, "grad_norm": 0.19086812436580658, "learning_rate": 9.830752863777741e-05, "loss": 46.0016, "step": 700 }, { "epoch": 0.09570619154891119, "grad_norm": 0.10847432166337967, "learning_rate": 9.830191454361601e-05, "loss": 46.0006, "step": 701 }, { "epoch": 0.09584271963956584, "grad_norm": 0.030252549797296524, "learning_rate": 9.829629131445342e-05, "loss": 46.0041, "step": 702 }, { "epoch": 0.0959792477302205, "grad_norm": 0.03715343773365021, "learning_rate": 9.82906589513531e-05, "loss": 46.0008, "step": 703 }, { "epoch": 0.09611577582087515, "grad_norm": 0.043439123779535294, "learning_rate": 9.828501745538025e-05, "loss": 46.0017, "step": 704 }, { "epoch": 0.0962523039115298, "grad_norm": 0.029862580820918083, "learning_rate": 9.827936682760182e-05, "loss": 46.0016, "step": 705 }, { "epoch": 0.09638883200218445, "grad_norm": 0.06478384137153625, "learning_rate": 9.827370706908648e-05, "loss": 46.0041, "step": 706 }, { "epoch": 0.0965253600928391, "grad_norm": 0.039531394839286804, "learning_rate": 9.82680381809046e-05, "loss": 46.0018, "step": 707 }, { "epoch": 0.09666188818349375, "grad_norm": 0.03632217273116112, "learning_rate": 9.826236016412833e-05, "loss": 46.0, "step": 708 }, { "epoch": 0.09679841627414841, "grad_norm": 0.03289657086133957, "learning_rate": 9.825667301983148e-05, "loss": 46.0025, "step": 709 }, { "epoch": 0.09693494436480306, "grad_norm": 0.041733358055353165, "learning_rate": 9.825097674908963e-05, "loss": 46.0025, "step": 710 }, { "epoch": 0.0970714724554577, "grad_norm": 0.0290440134704113, "learning_rate": 9.824527135298008e-05, "loss": 46.0029, "step": 711 }, { "epoch": 0.09720800054611237, "grad_norm": 0.04194749891757965, "learning_rate": 9.823955683258185e-05, "loss": 46.0014, "step": 712 }, { "epoch": 0.09734452863676701, "grad_norm": 0.05331726744771004, "learning_rate": 9.823383318897568e-05, "loss": 46.0001, "step": 713 }, { "epoch": 0.09748105672742166, "grad_norm": 0.06871955096721649, "learning_rate": 9.822810042324406e-05, "loss": 46.0029, "step": 714 }, { "epoch": 0.09761758481807632, "grad_norm": 0.05735849589109421, "learning_rate": 9.822235853647116e-05, "loss": 46.0028, "step": 715 }, { "epoch": 0.09775411290873097, "grad_norm": 0.06272710114717484, "learning_rate": 9.821660752974293e-05, "loss": 46.0006, "step": 716 }, { "epoch": 0.09789064099938562, "grad_norm": 0.027135305106639862, "learning_rate": 9.821084740414701e-05, "loss": 46.004, "step": 717 }, { "epoch": 0.09802716909004028, "grad_norm": 0.053261179476976395, "learning_rate": 9.820507816077276e-05, "loss": 46.001, "step": 718 }, { "epoch": 0.09816369718069493, "grad_norm": 0.030309267342090607, "learning_rate": 9.81992998007113e-05, "loss": 46.002, "step": 719 }, { "epoch": 0.09830022527134959, "grad_norm": 0.0516032949090004, "learning_rate": 9.819351232505541e-05, "loss": 46.0001, "step": 720 }, { "epoch": 0.09843675336200423, "grad_norm": 0.040273893624544144, "learning_rate": 9.818771573489969e-05, "loss": 46.0005, "step": 721 }, { "epoch": 0.09857328145265888, "grad_norm": 0.04051680490374565, "learning_rate": 9.818191003134035e-05, "loss": 46.0001, "step": 722 }, { "epoch": 0.09870980954331354, "grad_norm": 0.026247017085552216, "learning_rate": 9.817609521547543e-05, "loss": 46.0023, "step": 723 }, { "epoch": 0.09884633763396819, "grad_norm": 0.036516301333904266, "learning_rate": 9.817027128840461e-05, "loss": 46.0002, "step": 724 }, { "epoch": 0.09898286572462284, "grad_norm": 0.04042641445994377, "learning_rate": 9.816443825122935e-05, "loss": 46.0002, "step": 725 }, { "epoch": 0.0991193938152775, "grad_norm": 0.03488520532846451, "learning_rate": 9.81585961050528e-05, "loss": 46.001, "step": 726 }, { "epoch": 0.09925592190593215, "grad_norm": 0.05405500531196594, "learning_rate": 9.815274485097988e-05, "loss": 46.0017, "step": 727 }, { "epoch": 0.09939244999658679, "grad_norm": 0.06232611835002899, "learning_rate": 9.814688449011713e-05, "loss": 46.0007, "step": 728 }, { "epoch": 0.09952897808724145, "grad_norm": 0.025829503312706947, "learning_rate": 9.814101502357292e-05, "loss": 46.0052, "step": 729 }, { "epoch": 0.0996655061778961, "grad_norm": 0.04045548290014267, "learning_rate": 9.813513645245729e-05, "loss": 46.0016, "step": 730 }, { "epoch": 0.09980203426855075, "grad_norm": 0.03953877463936806, "learning_rate": 9.812924877788204e-05, "loss": 46.0027, "step": 731 }, { "epoch": 0.09993856235920541, "grad_norm": 0.041129253804683685, "learning_rate": 9.812335200096063e-05, "loss": 46.0003, "step": 732 }, { "epoch": 0.10007509044986006, "grad_norm": 0.08587277680635452, "learning_rate": 9.811744612280829e-05, "loss": 46.0011, "step": 733 }, { "epoch": 0.1002116185405147, "grad_norm": 0.03947478532791138, "learning_rate": 9.811153114454195e-05, "loss": 46.0035, "step": 734 }, { "epoch": 0.10034814663116937, "grad_norm": 0.024990912526845932, "learning_rate": 9.810560706728027e-05, "loss": 46.0053, "step": 735 }, { "epoch": 0.10048467472182401, "grad_norm": 0.03473329171538353, "learning_rate": 9.809967389214365e-05, "loss": 46.0011, "step": 736 }, { "epoch": 0.10062120281247866, "grad_norm": 0.0345025435090065, "learning_rate": 9.809373162025416e-05, "loss": 46.0017, "step": 737 }, { "epoch": 0.10075773090313332, "grad_norm": 0.040182147175073624, "learning_rate": 9.808778025273564e-05, "loss": 46.0005, "step": 738 }, { "epoch": 0.10089425899378797, "grad_norm": 0.06263817101716995, "learning_rate": 9.808181979071361e-05, "loss": 46.0014, "step": 739 }, { "epoch": 0.10103078708444263, "grad_norm": 0.026548366993665695, "learning_rate": 9.807585023531535e-05, "loss": 46.0014, "step": 740 }, { "epoch": 0.10116731517509728, "grad_norm": 0.03933629021048546, "learning_rate": 9.806987158766983e-05, "loss": 46.0029, "step": 741 }, { "epoch": 0.10130384326575192, "grad_norm": 0.05970722436904907, "learning_rate": 9.806388384890777e-05, "loss": 46.0015, "step": 742 }, { "epoch": 0.10144037135640659, "grad_norm": 0.042312949895858765, "learning_rate": 9.805788702016156e-05, "loss": 46.0044, "step": 743 }, { "epoch": 0.10157689944706123, "grad_norm": 0.04626930505037308, "learning_rate": 9.805188110256534e-05, "loss": 46.0061, "step": 744 }, { "epoch": 0.10171342753771588, "grad_norm": 0.04759659245610237, "learning_rate": 9.804586609725499e-05, "loss": 46.003, "step": 745 }, { "epoch": 0.10184995562837054, "grad_norm": 0.0783025249838829, "learning_rate": 9.803984200536807e-05, "loss": 46.0034, "step": 746 }, { "epoch": 0.10198648371902519, "grad_norm": 0.07158616185188293, "learning_rate": 9.803380882804386e-05, "loss": 46.0077, "step": 747 }, { "epoch": 0.10212301180967984, "grad_norm": 0.16745953261852264, "learning_rate": 9.80277665664234e-05, "loss": 46.0075, "step": 748 }, { "epoch": 0.1022595399003345, "grad_norm": 0.059001702815294266, "learning_rate": 9.80217152216494e-05, "loss": 46.0075, "step": 749 }, { "epoch": 0.10239606799098915, "grad_norm": 0.05525839328765869, "learning_rate": 9.801565479486633e-05, "loss": 46.0098, "step": 750 }, { "epoch": 0.10253259608164379, "grad_norm": 0.0822095051407814, "learning_rate": 9.800958528722036e-05, "loss": 46.0033, "step": 751 }, { "epoch": 0.10266912417229845, "grad_norm": 0.12800097465515137, "learning_rate": 9.800350669985932e-05, "loss": 46.0016, "step": 752 }, { "epoch": 0.1028056522629531, "grad_norm": 0.05125027149915695, "learning_rate": 9.799741903393287e-05, "loss": 46.0024, "step": 753 }, { "epoch": 0.10294218035360775, "grad_norm": 0.04559844732284546, "learning_rate": 9.799132229059229e-05, "loss": 46.0023, "step": 754 }, { "epoch": 0.10307870844426241, "grad_norm": 0.03107638657093048, "learning_rate": 9.798521647099063e-05, "loss": 46.0033, "step": 755 }, { "epoch": 0.10321523653491706, "grad_norm": 0.04498928040266037, "learning_rate": 9.797910157628265e-05, "loss": 46.0057, "step": 756 }, { "epoch": 0.10335176462557172, "grad_norm": 0.0474763959646225, "learning_rate": 9.79729776076248e-05, "loss": 46.0069, "step": 757 }, { "epoch": 0.10348829271622637, "grad_norm": 0.046097442507743835, "learning_rate": 9.796684456617526e-05, "loss": 46.0018, "step": 758 }, { "epoch": 0.10362482080688101, "grad_norm": 0.05998275429010391, "learning_rate": 9.796070245309396e-05, "loss": 46.0021, "step": 759 }, { "epoch": 0.10376134889753567, "grad_norm": 0.08459696173667908, "learning_rate": 9.795455126954247e-05, "loss": 46.0026, "step": 760 }, { "epoch": 0.10389787698819032, "grad_norm": 0.05848350003361702, "learning_rate": 9.794839101668416e-05, "loss": 46.0026, "step": 761 }, { "epoch": 0.10403440507884497, "grad_norm": 0.06621556729078293, "learning_rate": 9.794222169568406e-05, "loss": 46.0036, "step": 762 }, { "epoch": 0.10417093316949963, "grad_norm": 0.05873598903417587, "learning_rate": 9.793604330770892e-05, "loss": 46.0018, "step": 763 }, { "epoch": 0.10430746126015428, "grad_norm": 0.05366057902574539, "learning_rate": 9.792985585392722e-05, "loss": 46.0025, "step": 764 }, { "epoch": 0.10444398935080892, "grad_norm": 0.04832799360156059, "learning_rate": 9.792365933550915e-05, "loss": 46.0001, "step": 765 }, { "epoch": 0.10458051744146359, "grad_norm": 0.06615574657917023, "learning_rate": 9.791745375362662e-05, "loss": 46.001, "step": 766 }, { "epoch": 0.10471704553211823, "grad_norm": 0.05558893829584122, "learning_rate": 9.791123910945324e-05, "loss": 46.0034, "step": 767 }, { "epoch": 0.10485357362277288, "grad_norm": 0.037954144179821014, "learning_rate": 9.790501540416437e-05, "loss": 46.0015, "step": 768 }, { "epoch": 0.10499010171342754, "grad_norm": 0.035602767020463943, "learning_rate": 9.789878263893702e-05, "loss": 46.0075, "step": 769 }, { "epoch": 0.10512662980408219, "grad_norm": 0.02851206250488758, "learning_rate": 9.789254081494994e-05, "loss": 46.0031, "step": 770 }, { "epoch": 0.10526315789473684, "grad_norm": 0.03392300009727478, "learning_rate": 9.788628993338365e-05, "loss": 46.002, "step": 771 }, { "epoch": 0.1053996859853915, "grad_norm": 0.04427320510149002, "learning_rate": 9.78800299954203e-05, "loss": 46.0011, "step": 772 }, { "epoch": 0.10553621407604614, "grad_norm": 0.0984644889831543, "learning_rate": 9.787376100224381e-05, "loss": 46.0009, "step": 773 }, { "epoch": 0.10567274216670079, "grad_norm": 0.0558011457324028, "learning_rate": 9.786748295503976e-05, "loss": 46.0026, "step": 774 }, { "epoch": 0.10580927025735545, "grad_norm": 0.05963090807199478, "learning_rate": 9.786119585499549e-05, "loss": 46.0014, "step": 775 }, { "epoch": 0.1059457983480101, "grad_norm": 0.045638538897037506, "learning_rate": 9.785489970330004e-05, "loss": 46.0026, "step": 776 }, { "epoch": 0.10608232643866476, "grad_norm": 0.03876648470759392, "learning_rate": 9.784859450114417e-05, "loss": 46.0029, "step": 777 }, { "epoch": 0.10621885452931941, "grad_norm": 0.03528429567813873, "learning_rate": 9.78422802497203e-05, "loss": 46.0036, "step": 778 }, { "epoch": 0.10635538261997406, "grad_norm": 0.05152333527803421, "learning_rate": 9.783595695022262e-05, "loss": 46.0011, "step": 779 }, { "epoch": 0.10649191071062872, "grad_norm": 0.046583130955696106, "learning_rate": 9.782962460384701e-05, "loss": 46.0001, "step": 780 }, { "epoch": 0.10662843880128337, "grad_norm": 0.05636008828878403, "learning_rate": 9.782328321179108e-05, "loss": 46.0017, "step": 781 }, { "epoch": 0.10676496689193801, "grad_norm": 0.04163141921162605, "learning_rate": 9.78169327752541e-05, "loss": 46.0017, "step": 782 }, { "epoch": 0.10690149498259267, "grad_norm": 0.03872488811612129, "learning_rate": 9.781057329543712e-05, "loss": 46.0041, "step": 783 }, { "epoch": 0.10703802307324732, "grad_norm": 0.04920263588428497, "learning_rate": 9.780420477354282e-05, "loss": 46.0036, "step": 784 }, { "epoch": 0.10717455116390197, "grad_norm": 0.04373733326792717, "learning_rate": 9.779782721077568e-05, "loss": 46.0014, "step": 785 }, { "epoch": 0.10731107925455663, "grad_norm": 0.030751509591937065, "learning_rate": 9.779144060834182e-05, "loss": 46.0042, "step": 786 }, { "epoch": 0.10744760734521128, "grad_norm": 0.0397043414413929, "learning_rate": 9.778504496744908e-05, "loss": 46.0019, "step": 787 }, { "epoch": 0.10758413543586592, "grad_norm": 0.04384777322411537, "learning_rate": 9.777864028930705e-05, "loss": 46.0034, "step": 788 }, { "epoch": 0.10772066352652059, "grad_norm": 0.040858082473278046, "learning_rate": 9.777222657512697e-05, "loss": 46.0026, "step": 789 }, { "epoch": 0.10785719161717523, "grad_norm": 0.03438681364059448, "learning_rate": 9.776580382612186e-05, "loss": 46.0008, "step": 790 }, { "epoch": 0.10799371970782988, "grad_norm": 0.05219962075352669, "learning_rate": 9.775937204350638e-05, "loss": 46.0036, "step": 791 }, { "epoch": 0.10813024779848454, "grad_norm": 0.05389074608683586, "learning_rate": 9.775293122849694e-05, "loss": 46.006, "step": 792 }, { "epoch": 0.10826677588913919, "grad_norm": 0.06585431098937988, "learning_rate": 9.774648138231163e-05, "loss": 46.0023, "step": 793 }, { "epoch": 0.10840330397979384, "grad_norm": 0.04325488582253456, "learning_rate": 9.774002250617028e-05, "loss": 46.0023, "step": 794 }, { "epoch": 0.1085398320704485, "grad_norm": 0.09958802908658981, "learning_rate": 9.773355460129442e-05, "loss": 46.002, "step": 795 }, { "epoch": 0.10867636016110314, "grad_norm": 0.1281256079673767, "learning_rate": 9.772707766890726e-05, "loss": 46.0049, "step": 796 }, { "epoch": 0.1088128882517578, "grad_norm": 0.04173184558749199, "learning_rate": 9.772059171023374e-05, "loss": 46.0059, "step": 797 }, { "epoch": 0.10894941634241245, "grad_norm": 0.08460516482591629, "learning_rate": 9.771409672650051e-05, "loss": 46.0059, "step": 798 }, { "epoch": 0.1090859444330671, "grad_norm": 0.14542537927627563, "learning_rate": 9.770759271893592e-05, "loss": 46.007, "step": 799 }, { "epoch": 0.10922247252372176, "grad_norm": 0.37057337164878845, "learning_rate": 9.770107968877003e-05, "loss": 46.0037, "step": 800 }, { "epoch": 0.10935900061437641, "grad_norm": 0.046123214066028595, "learning_rate": 9.769455763723461e-05, "loss": 46.0035, "step": 801 }, { "epoch": 0.10949552870503106, "grad_norm": 0.04648647457361221, "learning_rate": 9.76880265655631e-05, "loss": 46.0023, "step": 802 }, { "epoch": 0.10963205679568572, "grad_norm": 0.06727230548858643, "learning_rate": 9.768148647499069e-05, "loss": 46.0049, "step": 803 }, { "epoch": 0.10976858488634036, "grad_norm": 0.05876913666725159, "learning_rate": 9.767493736675429e-05, "loss": 46.0012, "step": 804 }, { "epoch": 0.10990511297699501, "grad_norm": 0.04849866032600403, "learning_rate": 9.766837924209244e-05, "loss": 46.0009, "step": 805 }, { "epoch": 0.11004164106764967, "grad_norm": 0.06767833232879639, "learning_rate": 9.766181210224546e-05, "loss": 46.003, "step": 806 }, { "epoch": 0.11017816915830432, "grad_norm": 0.0684816911816597, "learning_rate": 9.765523594845535e-05, "loss": 46.0064, "step": 807 }, { "epoch": 0.11031469724895897, "grad_norm": 0.040182385593652725, "learning_rate": 9.76486507819658e-05, "loss": 46.0003, "step": 808 }, { "epoch": 0.11045122533961363, "grad_norm": 0.10716207325458527, "learning_rate": 9.764205660402221e-05, "loss": 46.0052, "step": 809 }, { "epoch": 0.11058775343026828, "grad_norm": 0.024547114968299866, "learning_rate": 9.763545341587171e-05, "loss": 46.0074, "step": 810 }, { "epoch": 0.11072428152092292, "grad_norm": 0.03348519280552864, "learning_rate": 9.76288412187631e-05, "loss": 46.0023, "step": 811 }, { "epoch": 0.11086080961157758, "grad_norm": 0.03896411135792732, "learning_rate": 9.762222001394692e-05, "loss": 46.006, "step": 812 }, { "epoch": 0.11099733770223223, "grad_norm": 0.03791692107915878, "learning_rate": 9.761558980267536e-05, "loss": 46.0022, "step": 813 }, { "epoch": 0.1111338657928869, "grad_norm": 0.0395740270614624, "learning_rate": 9.760895058620235e-05, "loss": 46.0067, "step": 814 }, { "epoch": 0.11127039388354154, "grad_norm": 0.02890084870159626, "learning_rate": 9.760230236578354e-05, "loss": 46.0048, "step": 815 }, { "epoch": 0.11140692197419619, "grad_norm": 0.056968096643686295, "learning_rate": 9.759564514267625e-05, "loss": 46.0023, "step": 816 }, { "epoch": 0.11154345006485085, "grad_norm": 0.05344318598508835, "learning_rate": 9.75889789181395e-05, "loss": 46.0085, "step": 817 }, { "epoch": 0.1116799781555055, "grad_norm": 0.05645618587732315, "learning_rate": 9.758230369343406e-05, "loss": 46.0074, "step": 818 }, { "epoch": 0.11181650624616014, "grad_norm": 0.04705626145005226, "learning_rate": 9.757561946982234e-05, "loss": 46.0026, "step": 819 }, { "epoch": 0.1119530343368148, "grad_norm": 0.05368528515100479, "learning_rate": 9.756892624856848e-05, "loss": 46.0088, "step": 820 }, { "epoch": 0.11208956242746945, "grad_norm": 0.03863050043582916, "learning_rate": 9.756222403093833e-05, "loss": 46.0033, "step": 821 }, { "epoch": 0.1122260905181241, "grad_norm": 0.0476750023663044, "learning_rate": 9.755551281819943e-05, "loss": 46.0021, "step": 822 }, { "epoch": 0.11236261860877876, "grad_norm": 0.0584893524646759, "learning_rate": 9.754879261162104e-05, "loss": 46.0016, "step": 823 }, { "epoch": 0.11249914669943341, "grad_norm": 0.0614020936191082, "learning_rate": 9.754206341247409e-05, "loss": 46.0006, "step": 824 }, { "epoch": 0.11263567479008806, "grad_norm": 0.09581192582845688, "learning_rate": 9.753532522203121e-05, "loss": 46.0015, "step": 825 }, { "epoch": 0.11277220288074272, "grad_norm": 0.10595183074474335, "learning_rate": 9.752857804156679e-05, "loss": 46.0045, "step": 826 }, { "epoch": 0.11290873097139736, "grad_norm": 0.037210091948509216, "learning_rate": 9.752182187235685e-05, "loss": 46.0001, "step": 827 }, { "epoch": 0.11304525906205201, "grad_norm": 0.0642985850572586, "learning_rate": 9.751505671567913e-05, "loss": 46.0047, "step": 828 }, { "epoch": 0.11318178715270667, "grad_norm": 0.038948915898799896, "learning_rate": 9.750828257281308e-05, "loss": 46.0041, "step": 829 }, { "epoch": 0.11331831524336132, "grad_norm": 0.06999576091766357, "learning_rate": 9.750149944503985e-05, "loss": 46.0024, "step": 830 }, { "epoch": 0.11345484333401597, "grad_norm": 0.04555666446685791, "learning_rate": 9.74947073336423e-05, "loss": 46.0029, "step": 831 }, { "epoch": 0.11359137142467063, "grad_norm": 0.04345620051026344, "learning_rate": 9.748790623990496e-05, "loss": 46.0025, "step": 832 }, { "epoch": 0.11372789951532528, "grad_norm": 0.026461204513907433, "learning_rate": 9.748109616511408e-05, "loss": 46.0027, "step": 833 }, { "epoch": 0.11386442760597994, "grad_norm": 0.0698833018541336, "learning_rate": 9.747427711055756e-05, "loss": 46.0018, "step": 834 }, { "epoch": 0.11400095569663458, "grad_norm": 0.056212954223155975, "learning_rate": 9.746744907752509e-05, "loss": 46.0043, "step": 835 }, { "epoch": 0.11413748378728923, "grad_norm": 0.03530261665582657, "learning_rate": 9.746061206730802e-05, "loss": 46.0033, "step": 836 }, { "epoch": 0.11427401187794389, "grad_norm": 0.11938730627298355, "learning_rate": 9.745376608119934e-05, "loss": 46.0044, "step": 837 }, { "epoch": 0.11441053996859854, "grad_norm": 0.03523730859160423, "learning_rate": 9.74469111204938e-05, "loss": 46.0041, "step": 838 }, { "epoch": 0.11454706805925319, "grad_norm": 0.04193026199936867, "learning_rate": 9.744004718648783e-05, "loss": 46.0034, "step": 839 }, { "epoch": 0.11468359614990785, "grad_norm": 0.10958817601203918, "learning_rate": 9.743317428047956e-05, "loss": 46.001, "step": 840 }, { "epoch": 0.1148201242405625, "grad_norm": 0.05277593806385994, "learning_rate": 9.742629240376881e-05, "loss": 46.0057, "step": 841 }, { "epoch": 0.11495665233121714, "grad_norm": 0.058790598064661026, "learning_rate": 9.741940155765712e-05, "loss": 46.0013, "step": 842 }, { "epoch": 0.1150931804218718, "grad_norm": 0.04378712177276611, "learning_rate": 9.741250174344769e-05, "loss": 46.0034, "step": 843 }, { "epoch": 0.11522970851252645, "grad_norm": 0.07715120166540146, "learning_rate": 9.740559296244543e-05, "loss": 46.0033, "step": 844 }, { "epoch": 0.1153662366031811, "grad_norm": 0.04369937255978584, "learning_rate": 9.739867521595694e-05, "loss": 46.0045, "step": 845 }, { "epoch": 0.11550276469383576, "grad_norm": 0.10291318595409393, "learning_rate": 9.739174850529056e-05, "loss": 46.002, "step": 846 }, { "epoch": 0.11563929278449041, "grad_norm": 0.07012007385492325, "learning_rate": 9.738481283175625e-05, "loss": 46.0043, "step": 847 }, { "epoch": 0.11577582087514506, "grad_norm": 0.09539791196584702, "learning_rate": 9.737786819666575e-05, "loss": 46.0156, "step": 848 }, { "epoch": 0.11591234896579972, "grad_norm": 0.045198772102594376, "learning_rate": 9.737091460133241e-05, "loss": 46.0043, "step": 849 }, { "epoch": 0.11604887705645436, "grad_norm": 0.10079105198383331, "learning_rate": 9.736395204707133e-05, "loss": 46.0057, "step": 850 }, { "epoch": 0.11618540514710901, "grad_norm": 0.09403451532125473, "learning_rate": 9.735698053519928e-05, "loss": 46.0102, "step": 851 }, { "epoch": 0.11632193323776367, "grad_norm": 0.045815978199243546, "learning_rate": 9.735000006703475e-05, "loss": 46.0061, "step": 852 }, { "epoch": 0.11645846132841832, "grad_norm": 0.042027268558740616, "learning_rate": 9.73430106438979e-05, "loss": 46.0008, "step": 853 }, { "epoch": 0.11659498941907298, "grad_norm": 0.040001414716243744, "learning_rate": 9.733601226711058e-05, "loss": 46.0021, "step": 854 }, { "epoch": 0.11673151750972763, "grad_norm": 0.06370840221643448, "learning_rate": 9.732900493799636e-05, "loss": 46.0048, "step": 855 }, { "epoch": 0.11686804560038228, "grad_norm": 0.041940417140722275, "learning_rate": 9.732198865788047e-05, "loss": 46.0034, "step": 856 }, { "epoch": 0.11700457369103694, "grad_norm": 0.055001724511384964, "learning_rate": 9.731496342808987e-05, "loss": 46.0022, "step": 857 }, { "epoch": 0.11714110178169158, "grad_norm": 0.048636604100465775, "learning_rate": 9.730792924995317e-05, "loss": 46.0042, "step": 858 }, { "epoch": 0.11727762987234623, "grad_norm": 0.04533183574676514, "learning_rate": 9.73008861248007e-05, "loss": 46.0072, "step": 859 }, { "epoch": 0.11741415796300089, "grad_norm": 0.08404522389173508, "learning_rate": 9.72938340539645e-05, "loss": 46.005, "step": 860 }, { "epoch": 0.11755068605365554, "grad_norm": 0.04576120525598526, "learning_rate": 9.728677303877824e-05, "loss": 46.0029, "step": 861 }, { "epoch": 0.11768721414431019, "grad_norm": 0.04307778924703598, "learning_rate": 9.727970308057736e-05, "loss": 46.0, "step": 862 }, { "epoch": 0.11782374223496485, "grad_norm": 0.032827604562044144, "learning_rate": 9.727262418069893e-05, "loss": 46.0036, "step": 863 }, { "epoch": 0.1179602703256195, "grad_norm": 0.034264929592609406, "learning_rate": 9.726553634048172e-05, "loss": 46.0015, "step": 864 }, { "epoch": 0.11809679841627414, "grad_norm": 0.03789762035012245, "learning_rate": 9.725843956126623e-05, "loss": 46.003, "step": 865 }, { "epoch": 0.1182333265069288, "grad_norm": 0.05271655693650246, "learning_rate": 9.725133384439461e-05, "loss": 46.002, "step": 866 }, { "epoch": 0.11836985459758345, "grad_norm": 0.0398254469037056, "learning_rate": 9.724421919121072e-05, "loss": 46.007, "step": 867 }, { "epoch": 0.1185063826882381, "grad_norm": 0.059620801359415054, "learning_rate": 9.723709560306009e-05, "loss": 46.0021, "step": 868 }, { "epoch": 0.11864291077889276, "grad_norm": 0.07665008306503296, "learning_rate": 9.722996308128997e-05, "loss": 46.0038, "step": 869 }, { "epoch": 0.11877943886954741, "grad_norm": 0.04196556285023689, "learning_rate": 9.722282162724927e-05, "loss": 46.0033, "step": 870 }, { "epoch": 0.11891596696020205, "grad_norm": 0.043959394097328186, "learning_rate": 9.721567124228864e-05, "loss": 46.0041, "step": 871 }, { "epoch": 0.11905249505085672, "grad_norm": 0.025985782966017723, "learning_rate": 9.720851192776032e-05, "loss": 46.0066, "step": 872 }, { "epoch": 0.11918902314151136, "grad_norm": 0.02769347093999386, "learning_rate": 9.720134368501834e-05, "loss": 46.0069, "step": 873 }, { "epoch": 0.11932555123216602, "grad_norm": 0.04030538722872734, "learning_rate": 9.719416651541839e-05, "loss": 46.0048, "step": 874 }, { "epoch": 0.11946207932282067, "grad_norm": 0.04101410135626793, "learning_rate": 9.71869804203178e-05, "loss": 46.0012, "step": 875 }, { "epoch": 0.11959860741347532, "grad_norm": 0.05188958719372749, "learning_rate": 9.717978540107566e-05, "loss": 46.0029, "step": 876 }, { "epoch": 0.11973513550412998, "grad_norm": 0.05008949711918831, "learning_rate": 9.71725814590527e-05, "loss": 46.0047, "step": 877 }, { "epoch": 0.11987166359478463, "grad_norm": 0.04504159465432167, "learning_rate": 9.716536859561134e-05, "loss": 46.005, "step": 878 }, { "epoch": 0.12000819168543927, "grad_norm": 0.05220263823866844, "learning_rate": 9.715814681211571e-05, "loss": 46.0058, "step": 879 }, { "epoch": 0.12014471977609394, "grad_norm": 0.09104208648204803, "learning_rate": 9.715091610993162e-05, "loss": 46.0015, "step": 880 }, { "epoch": 0.12028124786674858, "grad_norm": 0.04231470823287964, "learning_rate": 9.714367649042654e-05, "loss": 46.0011, "step": 881 }, { "epoch": 0.12041777595740323, "grad_norm": 0.07370532304048538, "learning_rate": 9.713642795496969e-05, "loss": 46.0009, "step": 882 }, { "epoch": 0.12055430404805789, "grad_norm": 0.03533048927783966, "learning_rate": 9.71291705049319e-05, "loss": 46.0007, "step": 883 }, { "epoch": 0.12069083213871254, "grad_norm": 0.04976946488022804, "learning_rate": 9.712190414168572e-05, "loss": 46.0058, "step": 884 }, { "epoch": 0.12082736022936719, "grad_norm": 0.031232262030243874, "learning_rate": 9.71146288666054e-05, "loss": 46.0027, "step": 885 }, { "epoch": 0.12096388832002185, "grad_norm": 0.02718084305524826, "learning_rate": 9.710734468106685e-05, "loss": 46.0034, "step": 886 }, { "epoch": 0.1211004164106765, "grad_norm": 0.02644386515021324, "learning_rate": 9.710005158644771e-05, "loss": 46.0051, "step": 887 }, { "epoch": 0.12123694450133114, "grad_norm": 0.06574355810880661, "learning_rate": 9.709274958412722e-05, "loss": 46.0052, "step": 888 }, { "epoch": 0.1213734725919858, "grad_norm": 0.05334949865937233, "learning_rate": 9.708543867548638e-05, "loss": 46.0033, "step": 889 }, { "epoch": 0.12151000068264045, "grad_norm": 0.04962702468037605, "learning_rate": 9.707811886190786e-05, "loss": 46.003, "step": 890 }, { "epoch": 0.12164652877329511, "grad_norm": 0.04016447067260742, "learning_rate": 9.7070790144776e-05, "loss": 46.0033, "step": 891 }, { "epoch": 0.12178305686394976, "grad_norm": 0.0959213376045227, "learning_rate": 9.706345252547682e-05, "loss": 46.0025, "step": 892 }, { "epoch": 0.1219195849546044, "grad_norm": 0.059750109910964966, "learning_rate": 9.705610600539802e-05, "loss": 46.0035, "step": 893 }, { "epoch": 0.12205611304525907, "grad_norm": 0.10696861147880554, "learning_rate": 9.704875058592905e-05, "loss": 46.0035, "step": 894 }, { "epoch": 0.12219264113591372, "grad_norm": 0.059534259140491486, "learning_rate": 9.704138626846094e-05, "loss": 46.0051, "step": 895 }, { "epoch": 0.12232916922656836, "grad_norm": 0.08442714810371399, "learning_rate": 9.703401305438645e-05, "loss": 46.0055, "step": 896 }, { "epoch": 0.12246569731722302, "grad_norm": 0.21099558472633362, "learning_rate": 9.702663094510006e-05, "loss": 46.0076, "step": 897 }, { "epoch": 0.12260222540787767, "grad_norm": 0.07288625091314316, "learning_rate": 9.701923994199784e-05, "loss": 46.0114, "step": 898 }, { "epoch": 0.12273875349853232, "grad_norm": 0.13815559446811676, "learning_rate": 9.701184004647765e-05, "loss": 46.005, "step": 899 }, { "epoch": 0.12287528158918698, "grad_norm": 0.08396711200475693, "learning_rate": 9.700443125993897e-05, "loss": 46.005, "step": 900 }, { "epoch": 0.12301180967984163, "grad_norm": 0.07331496477127075, "learning_rate": 9.699701358378296e-05, "loss": 46.0034, "step": 901 }, { "epoch": 0.12314833777049627, "grad_norm": 0.0777362510561943, "learning_rate": 9.698958701941248e-05, "loss": 46.0017, "step": 902 }, { "epoch": 0.12328486586115094, "grad_norm": 0.04128085449337959, "learning_rate": 9.698215156823206e-05, "loss": 46.0011, "step": 903 }, { "epoch": 0.12342139395180558, "grad_norm": 0.03372270241379738, "learning_rate": 9.697470723164792e-05, "loss": 46.0032, "step": 904 }, { "epoch": 0.12355792204246023, "grad_norm": 0.03626884147524834, "learning_rate": 9.696725401106794e-05, "loss": 46.0042, "step": 905 }, { "epoch": 0.12369445013311489, "grad_norm": 0.053507931530475616, "learning_rate": 9.695979190790171e-05, "loss": 46.004, "step": 906 }, { "epoch": 0.12383097822376954, "grad_norm": 0.030730433762073517, "learning_rate": 9.695232092356047e-05, "loss": 46.0029, "step": 907 }, { "epoch": 0.12396750631442419, "grad_norm": 0.08149008452892303, "learning_rate": 9.694484105945719e-05, "loss": 46.003, "step": 908 }, { "epoch": 0.12410403440507885, "grad_norm": 0.024417391046881676, "learning_rate": 9.693735231700644e-05, "loss": 46.0035, "step": 909 }, { "epoch": 0.1242405624957335, "grad_norm": 0.06721650063991547, "learning_rate": 9.692985469762454e-05, "loss": 46.0007, "step": 910 }, { "epoch": 0.12437709058638816, "grad_norm": 0.07872725278139114, "learning_rate": 9.692234820272946e-05, "loss": 46.005, "step": 911 }, { "epoch": 0.1245136186770428, "grad_norm": 0.09382612258195877, "learning_rate": 9.691483283374084e-05, "loss": 46.0024, "step": 912 }, { "epoch": 0.12465014676769745, "grad_norm": 0.06288377940654755, "learning_rate": 9.690730859208002e-05, "loss": 46.0074, "step": 913 }, { "epoch": 0.12478667485835211, "grad_norm": 0.08234284818172455, "learning_rate": 9.689977547917e-05, "loss": 46.0003, "step": 914 }, { "epoch": 0.12492320294900676, "grad_norm": 0.046307675540447235, "learning_rate": 9.689223349643546e-05, "loss": 46.0032, "step": 915 }, { "epoch": 0.12505973103966142, "grad_norm": 0.03981054201722145, "learning_rate": 9.688468264530277e-05, "loss": 46.0048, "step": 916 }, { "epoch": 0.12519625913031607, "grad_norm": 0.08676145225763321, "learning_rate": 9.687712292719997e-05, "loss": 46.0046, "step": 917 }, { "epoch": 0.12533278722097072, "grad_norm": 0.06843296438455582, "learning_rate": 9.686955434355677e-05, "loss": 46.0047, "step": 918 }, { "epoch": 0.12546931531162536, "grad_norm": 0.033135831356048584, "learning_rate": 9.686197689580456e-05, "loss": 46.006, "step": 919 }, { "epoch": 0.12560584340228, "grad_norm": 0.03578069806098938, "learning_rate": 9.685439058537642e-05, "loss": 46.0033, "step": 920 }, { "epoch": 0.12574237149293468, "grad_norm": 0.0697474330663681, "learning_rate": 9.684679541370711e-05, "loss": 46.0027, "step": 921 }, { "epoch": 0.12587889958358933, "grad_norm": 0.0875772088766098, "learning_rate": 9.6839191382233e-05, "loss": 46.0059, "step": 922 }, { "epoch": 0.12601542767424398, "grad_norm": 0.06745638698339462, "learning_rate": 9.683157849239225e-05, "loss": 46.0047, "step": 923 }, { "epoch": 0.12615195576489863, "grad_norm": 0.11721964180469513, "learning_rate": 9.682395674562458e-05, "loss": 46.0016, "step": 924 }, { "epoch": 0.12628848385555327, "grad_norm": 0.03514353930950165, "learning_rate": 9.681632614337148e-05, "loss": 46.0013, "step": 925 }, { "epoch": 0.12642501194620792, "grad_norm": 0.0709599182009697, "learning_rate": 9.680868668707603e-05, "loss": 46.0045, "step": 926 }, { "epoch": 0.1265615400368626, "grad_norm": 0.05006397143006325, "learning_rate": 9.680103837818306e-05, "loss": 46.0018, "step": 927 }, { "epoch": 0.12669806812751724, "grad_norm": 0.06728421151638031, "learning_rate": 9.679338121813904e-05, "loss": 46.001, "step": 928 }, { "epoch": 0.1268345962181719, "grad_norm": 0.048774946480989456, "learning_rate": 9.678571520839208e-05, "loss": 46.0042, "step": 929 }, { "epoch": 0.12697112430882654, "grad_norm": 0.03572423383593559, "learning_rate": 9.677804035039204e-05, "loss": 46.0046, "step": 930 }, { "epoch": 0.12710765239948119, "grad_norm": 0.03603662922978401, "learning_rate": 9.67703566455904e-05, "loss": 46.0062, "step": 931 }, { "epoch": 0.12724418049013583, "grad_norm": 0.03725367411971092, "learning_rate": 9.676266409544031e-05, "loss": 46.0036, "step": 932 }, { "epoch": 0.1273807085807905, "grad_norm": 0.03632837161421776, "learning_rate": 9.67549627013966e-05, "loss": 46.0028, "step": 933 }, { "epoch": 0.12751723667144516, "grad_norm": 0.05126827210187912, "learning_rate": 9.674725246491582e-05, "loss": 46.0061, "step": 934 }, { "epoch": 0.1276537647620998, "grad_norm": 0.06471189111471176, "learning_rate": 9.673953338745612e-05, "loss": 46.0019, "step": 935 }, { "epoch": 0.12779029285275445, "grad_norm": 0.04153895750641823, "learning_rate": 9.673180547047736e-05, "loss": 46.0003, "step": 936 }, { "epoch": 0.1279268209434091, "grad_norm": 0.10111337900161743, "learning_rate": 9.672406871544108e-05, "loss": 46.0025, "step": 937 }, { "epoch": 0.12806334903406374, "grad_norm": 0.05235195532441139, "learning_rate": 9.671632312381046e-05, "loss": 46.0041, "step": 938 }, { "epoch": 0.12819987712471842, "grad_norm": 0.038805484771728516, "learning_rate": 9.67085686970504e-05, "loss": 46.0017, "step": 939 }, { "epoch": 0.12833640521537307, "grad_norm": 0.04370106756687164, "learning_rate": 9.67008054366274e-05, "loss": 46.0, "step": 940 }, { "epoch": 0.12847293330602771, "grad_norm": 0.056308455765247345, "learning_rate": 9.66930333440097e-05, "loss": 46.0026, "step": 941 }, { "epoch": 0.12860946139668236, "grad_norm": 0.038316335529088974, "learning_rate": 9.668525242066717e-05, "loss": 46.0001, "step": 942 }, { "epoch": 0.128745989487337, "grad_norm": 0.05051232874393463, "learning_rate": 9.667746266807136e-05, "loss": 46.0059, "step": 943 }, { "epoch": 0.12888251757799168, "grad_norm": 0.05074362829327583, "learning_rate": 9.666966408769548e-05, "loss": 46.0052, "step": 944 }, { "epoch": 0.12901904566864633, "grad_norm": 0.12170279026031494, "learning_rate": 9.666185668101446e-05, "loss": 46.0066, "step": 945 }, { "epoch": 0.12915557375930098, "grad_norm": 0.07977000623941422, "learning_rate": 9.665404044950482e-05, "loss": 46.0101, "step": 946 }, { "epoch": 0.12929210184995563, "grad_norm": 0.04740988835692406, "learning_rate": 9.66462153946448e-05, "loss": 46.0052, "step": 947 }, { "epoch": 0.12942862994061027, "grad_norm": 0.05185500904917717, "learning_rate": 9.663838151791431e-05, "loss": 46.0103, "step": 948 }, { "epoch": 0.12956515803126492, "grad_norm": 0.06090673431754112, "learning_rate": 9.663053882079491e-05, "loss": 46.0122, "step": 949 }, { "epoch": 0.1297016861219196, "grad_norm": 0.06926035135984421, "learning_rate": 9.66226873047698e-05, "loss": 46.0, "step": 950 }, { "epoch": 0.12983821421257424, "grad_norm": 0.03058060258626938, "learning_rate": 9.661482697132395e-05, "loss": 46.0052, "step": 951 }, { "epoch": 0.1299747423032289, "grad_norm": 0.041967108845710754, "learning_rate": 9.660695782194387e-05, "loss": 46.0055, "step": 952 }, { "epoch": 0.13011127039388354, "grad_norm": 0.055860403925180435, "learning_rate": 9.659907985811783e-05, "loss": 46.0016, "step": 953 }, { "epoch": 0.13024779848453819, "grad_norm": 0.04529847577214241, "learning_rate": 9.659119308133571e-05, "loss": 46.0044, "step": 954 }, { "epoch": 0.13038432657519283, "grad_norm": 0.04365735128521919, "learning_rate": 9.65832974930891e-05, "loss": 46.0036, "step": 955 }, { "epoch": 0.1305208546658475, "grad_norm": 0.07392144948244095, "learning_rate": 9.657539309487123e-05, "loss": 46.0005, "step": 956 }, { "epoch": 0.13065738275650216, "grad_norm": 0.04029381647706032, "learning_rate": 9.6567479888177e-05, "loss": 46.0012, "step": 957 }, { "epoch": 0.1307939108471568, "grad_norm": 0.06639609485864639, "learning_rate": 9.655955787450299e-05, "loss": 46.002, "step": 958 }, { "epoch": 0.13093043893781145, "grad_norm": 0.04604870826005936, "learning_rate": 9.655162705534744e-05, "loss": 46.0035, "step": 959 }, { "epoch": 0.1310669670284661, "grad_norm": 0.03753645345568657, "learning_rate": 9.654368743221022e-05, "loss": 46.0042, "step": 960 }, { "epoch": 0.13120349511912077, "grad_norm": 0.05301903188228607, "learning_rate": 9.653573900659292e-05, "loss": 46.001, "step": 961 }, { "epoch": 0.13134002320977542, "grad_norm": 0.11774907261133194, "learning_rate": 9.652778177999875e-05, "loss": 46.0034, "step": 962 }, { "epoch": 0.13147655130043007, "grad_norm": 0.08642661571502686, "learning_rate": 9.651981575393263e-05, "loss": 46.001, "step": 963 }, { "epoch": 0.13161307939108471, "grad_norm": 0.04304247349500656, "learning_rate": 9.651184092990108e-05, "loss": 46.0041, "step": 964 }, { "epoch": 0.13174960748173936, "grad_norm": 0.047795798629522324, "learning_rate": 9.650385730941238e-05, "loss": 46.0011, "step": 965 }, { "epoch": 0.131886135572394, "grad_norm": 0.11916861683130264, "learning_rate": 9.649586489397637e-05, "loss": 46.0037, "step": 966 }, { "epoch": 0.13202266366304868, "grad_norm": 0.04457377642393112, "learning_rate": 9.648786368510461e-05, "loss": 46.0067, "step": 967 }, { "epoch": 0.13215919175370333, "grad_norm": 0.04350854083895683, "learning_rate": 9.647985368431032e-05, "loss": 46.0054, "step": 968 }, { "epoch": 0.13229571984435798, "grad_norm": 0.04022478684782982, "learning_rate": 9.647183489310836e-05, "loss": 46.0092, "step": 969 }, { "epoch": 0.13243224793501263, "grad_norm": 0.08951615542173386, "learning_rate": 9.646380731301528e-05, "loss": 46.0035, "step": 970 }, { "epoch": 0.13256877602566727, "grad_norm": 0.05615146458148956, "learning_rate": 9.64557709455493e-05, "loss": 46.0001, "step": 971 }, { "epoch": 0.13270530411632192, "grad_norm": 0.06977701187133789, "learning_rate": 9.644772579223022e-05, "loss": 46.0054, "step": 972 }, { "epoch": 0.1328418322069766, "grad_norm": 0.035919468849897385, "learning_rate": 9.643967185457962e-05, "loss": 46.0045, "step": 973 }, { "epoch": 0.13297836029763124, "grad_norm": 0.07111049443483353, "learning_rate": 9.643160913412068e-05, "loss": 46.0011, "step": 974 }, { "epoch": 0.1331148883882859, "grad_norm": 0.03148115798830986, "learning_rate": 9.642353763237824e-05, "loss": 46.0121, "step": 975 }, { "epoch": 0.13325141647894054, "grad_norm": 0.10738994926214218, "learning_rate": 9.641545735087877e-05, "loss": 46.003, "step": 976 }, { "epoch": 0.13338794456959518, "grad_norm": 0.07435135543346405, "learning_rate": 9.640736829115047e-05, "loss": 46.0001, "step": 977 }, { "epoch": 0.13352447266024986, "grad_norm": 0.04236827418208122, "learning_rate": 9.639927045472316e-05, "loss": 46.0016, "step": 978 }, { "epoch": 0.1336610007509045, "grad_norm": 0.06364499032497406, "learning_rate": 9.639116384312835e-05, "loss": 46.0062, "step": 979 }, { "epoch": 0.13379752884155915, "grad_norm": 0.06840364634990692, "learning_rate": 9.638304845789917e-05, "loss": 46.0047, "step": 980 }, { "epoch": 0.1339340569322138, "grad_norm": 0.057626884430646896, "learning_rate": 9.637492430057039e-05, "loss": 46.0033, "step": 981 }, { "epoch": 0.13407058502286845, "grad_norm": 0.028512686491012573, "learning_rate": 9.636679137267852e-05, "loss": 46.0051, "step": 982 }, { "epoch": 0.1342071131135231, "grad_norm": 0.03788848593831062, "learning_rate": 9.635864967576168e-05, "loss": 46.0038, "step": 983 }, { "epoch": 0.13434364120417777, "grad_norm": 0.057888343930244446, "learning_rate": 9.635049921135963e-05, "loss": 46.0029, "step": 984 }, { "epoch": 0.13448016929483242, "grad_norm": 0.056508779525756836, "learning_rate": 9.634233998101381e-05, "loss": 46.0013, "step": 985 }, { "epoch": 0.13461669738548707, "grad_norm": 0.049630455672740936, "learning_rate": 9.633417198626735e-05, "loss": 46.0056, "step": 986 }, { "epoch": 0.1347532254761417, "grad_norm": 0.032446060329675674, "learning_rate": 9.632599522866497e-05, "loss": 46.0087, "step": 987 }, { "epoch": 0.13488975356679636, "grad_norm": 0.02808917686343193, "learning_rate": 9.631780970975311e-05, "loss": 46.0065, "step": 988 }, { "epoch": 0.135026281657451, "grad_norm": 0.04135937988758087, "learning_rate": 9.630961543107981e-05, "loss": 46.0028, "step": 989 }, { "epoch": 0.13516280974810568, "grad_norm": 0.04066353291273117, "learning_rate": 9.630141239419483e-05, "loss": 46.003, "step": 990 }, { "epoch": 0.13529933783876033, "grad_norm": 0.04382762312889099, "learning_rate": 9.629320060064953e-05, "loss": 46.0056, "step": 991 }, { "epoch": 0.13543586592941498, "grad_norm": 0.03829548507928848, "learning_rate": 9.628498005199696e-05, "loss": 46.0032, "step": 992 }, { "epoch": 0.13557239402006963, "grad_norm": 0.041225217282772064, "learning_rate": 9.627675074979179e-05, "loss": 46.008, "step": 993 }, { "epoch": 0.13570892211072427, "grad_norm": 0.06963073462247849, "learning_rate": 9.62685126955904e-05, "loss": 46.0033, "step": 994 }, { "epoch": 0.13584545020137892, "grad_norm": 0.09863008558750153, "learning_rate": 9.626026589095078e-05, "loss": 46.0084, "step": 995 }, { "epoch": 0.1359819782920336, "grad_norm": 0.15818136930465698, "learning_rate": 9.625201033743261e-05, "loss": 46.0065, "step": 996 }, { "epoch": 0.13611850638268824, "grad_norm": 0.08740869909524918, "learning_rate": 9.624374603659718e-05, "loss": 46.0059, "step": 997 }, { "epoch": 0.1362550344733429, "grad_norm": 0.07528246194124222, "learning_rate": 9.623547299000746e-05, "loss": 46.0112, "step": 998 }, { "epoch": 0.13639156256399754, "grad_norm": 0.049360450357198715, "learning_rate": 9.62271911992281e-05, "loss": 46.0087, "step": 999 }, { "epoch": 0.13652809065465218, "grad_norm": 0.08507868647575378, "learning_rate": 9.621890066582533e-05, "loss": 46.0079, "step": 1000 }, { "epoch": 0.13666461874530686, "grad_norm": 0.07158122956752777, "learning_rate": 9.621060139136713e-05, "loss": 46.0065, "step": 1001 }, { "epoch": 0.1368011468359615, "grad_norm": 0.0620691180229187, "learning_rate": 9.620229337742306e-05, "loss": 46.0034, "step": 1002 }, { "epoch": 0.13693767492661615, "grad_norm": 0.07214509695768356, "learning_rate": 9.619397662556435e-05, "loss": 46.0063, "step": 1003 }, { "epoch": 0.1370742030172708, "grad_norm": 0.07948952913284302, "learning_rate": 9.618565113736389e-05, "loss": 46.0013, "step": 1004 }, { "epoch": 0.13721073110792545, "grad_norm": 0.10923119634389877, "learning_rate": 9.617731691439624e-05, "loss": 46.0027, "step": 1005 }, { "epoch": 0.1373472591985801, "grad_norm": 0.08042927086353302, "learning_rate": 9.616897395823758e-05, "loss": 46.0037, "step": 1006 }, { "epoch": 0.13748378728923477, "grad_norm": 0.05807102099061012, "learning_rate": 9.616062227046573e-05, "loss": 46.0043, "step": 1007 }, { "epoch": 0.13762031537988942, "grad_norm": 0.05810738727450371, "learning_rate": 9.615226185266024e-05, "loss": 46.0014, "step": 1008 }, { "epoch": 0.13775684347054407, "grad_norm": 0.050710223615169525, "learning_rate": 9.614389270640223e-05, "loss": 46.0016, "step": 1009 }, { "epoch": 0.1378933715611987, "grad_norm": 0.047813307493925095, "learning_rate": 9.613551483327448e-05, "loss": 46.0051, "step": 1010 }, { "epoch": 0.13802989965185336, "grad_norm": 0.05913747847080231, "learning_rate": 9.612712823486147e-05, "loss": 46.0044, "step": 1011 }, { "epoch": 0.138166427742508, "grad_norm": 0.07266601920127869, "learning_rate": 9.611873291274927e-05, "loss": 46.006, "step": 1012 }, { "epoch": 0.13830295583316268, "grad_norm": 0.04592336714267731, "learning_rate": 9.611032886852565e-05, "loss": 46.0004, "step": 1013 }, { "epoch": 0.13843948392381733, "grad_norm": 0.07942342013120651, "learning_rate": 9.610191610377999e-05, "loss": 46.0066, "step": 1014 }, { "epoch": 0.13857601201447198, "grad_norm": 0.03488558158278465, "learning_rate": 9.609349462010334e-05, "loss": 46.0021, "step": 1015 }, { "epoch": 0.13871254010512662, "grad_norm": 0.03294004499912262, "learning_rate": 9.60850644190884e-05, "loss": 46.0053, "step": 1016 }, { "epoch": 0.13884906819578127, "grad_norm": 0.044882286339998245, "learning_rate": 9.607662550232953e-05, "loss": 46.0051, "step": 1017 }, { "epoch": 0.13898559628643595, "grad_norm": 0.05368762090802193, "learning_rate": 9.60681778714227e-05, "loss": 46.0, "step": 1018 }, { "epoch": 0.1391221243770906, "grad_norm": 0.09411368519067764, "learning_rate": 9.605972152796556e-05, "loss": 46.0071, "step": 1019 }, { "epoch": 0.13925865246774524, "grad_norm": 0.03581425920128822, "learning_rate": 9.60512564735574e-05, "loss": 46.0046, "step": 1020 }, { "epoch": 0.1393951805583999, "grad_norm": 0.05393175035715103, "learning_rate": 9.604278270979917e-05, "loss": 46.0014, "step": 1021 }, { "epoch": 0.13953170864905454, "grad_norm": 0.038308240473270416, "learning_rate": 9.603430023829341e-05, "loss": 46.0076, "step": 1022 }, { "epoch": 0.13966823673970918, "grad_norm": 0.07727690041065216, "learning_rate": 9.602580906064438e-05, "loss": 46.006, "step": 1023 }, { "epoch": 0.13980476483036386, "grad_norm": 0.06866724044084549, "learning_rate": 9.601730917845797e-05, "loss": 46.0045, "step": 1024 }, { "epoch": 0.1399412929210185, "grad_norm": 0.045124899595975876, "learning_rate": 9.60088005933417e-05, "loss": 46.0048, "step": 1025 }, { "epoch": 0.14007782101167315, "grad_norm": 0.08196580410003662, "learning_rate": 9.60002833069047e-05, "loss": 46.0026, "step": 1026 }, { "epoch": 0.1402143491023278, "grad_norm": 0.03134594112634659, "learning_rate": 9.599175732075782e-05, "loss": 46.0038, "step": 1027 }, { "epoch": 0.14035087719298245, "grad_norm": 0.10086174309253693, "learning_rate": 9.598322263651352e-05, "loss": 46.0023, "step": 1028 }, { "epoch": 0.1404874052836371, "grad_norm": 0.09338849782943726, "learning_rate": 9.597467925578587e-05, "loss": 46.0021, "step": 1029 }, { "epoch": 0.14062393337429177, "grad_norm": 0.09058735519647598, "learning_rate": 9.596612718019066e-05, "loss": 46.0026, "step": 1030 }, { "epoch": 0.14076046146494642, "grad_norm": 0.080014668405056, "learning_rate": 9.595756641134528e-05, "loss": 46.0013, "step": 1031 }, { "epoch": 0.14089698955560107, "grad_norm": 0.03662969544529915, "learning_rate": 9.594899695086874e-05, "loss": 46.0062, "step": 1032 }, { "epoch": 0.1410335176462557, "grad_norm": 0.0701504722237587, "learning_rate": 9.594041880038175e-05, "loss": 46.0008, "step": 1033 }, { "epoch": 0.14117004573691036, "grad_norm": 0.06847663223743439, "learning_rate": 9.593183196150662e-05, "loss": 46.0051, "step": 1034 }, { "epoch": 0.14130657382756504, "grad_norm": 0.03452630713582039, "learning_rate": 9.592323643586731e-05, "loss": 46.0023, "step": 1035 }, { "epoch": 0.14144310191821968, "grad_norm": 0.05782969295978546, "learning_rate": 9.591463222508946e-05, "loss": 46.0018, "step": 1036 }, { "epoch": 0.14157963000887433, "grad_norm": 0.033170003443956375, "learning_rate": 9.59060193308003e-05, "loss": 46.0014, "step": 1037 }, { "epoch": 0.14171615809952898, "grad_norm": 0.04278853163123131, "learning_rate": 9.589739775462873e-05, "loss": 46.0017, "step": 1038 }, { "epoch": 0.14185268619018362, "grad_norm": 0.03286951780319214, "learning_rate": 9.58887674982053e-05, "loss": 46.0042, "step": 1039 }, { "epoch": 0.14198921428083827, "grad_norm": 0.03890041634440422, "learning_rate": 9.588012856316219e-05, "loss": 46.0062, "step": 1040 }, { "epoch": 0.14212574237149295, "grad_norm": 0.04147426411509514, "learning_rate": 9.58714809511332e-05, "loss": 46.0019, "step": 1041 }, { "epoch": 0.1422622704621476, "grad_norm": 0.05893467739224434, "learning_rate": 9.58628246637538e-05, "loss": 46.0047, "step": 1042 }, { "epoch": 0.14239879855280224, "grad_norm": 0.051441740244627, "learning_rate": 9.58541597026611e-05, "loss": 46.0076, "step": 1043 }, { "epoch": 0.1425353266434569, "grad_norm": 0.06807120889425278, "learning_rate": 9.584548606949383e-05, "loss": 46.0034, "step": 1044 }, { "epoch": 0.14267185473411154, "grad_norm": 0.07531283050775528, "learning_rate": 9.583680376589241e-05, "loss": 46.0041, "step": 1045 }, { "epoch": 0.14280838282476618, "grad_norm": 0.13253933191299438, "learning_rate": 9.582811279349882e-05, "loss": 46.0059, "step": 1046 }, { "epoch": 0.14294491091542086, "grad_norm": 0.06514899432659149, "learning_rate": 9.581941315395672e-05, "loss": 46.0099, "step": 1047 }, { "epoch": 0.1430814390060755, "grad_norm": 0.07592852413654327, "learning_rate": 9.581070484891146e-05, "loss": 46.0088, "step": 1048 }, { "epoch": 0.14321796709673015, "grad_norm": 0.09079253673553467, "learning_rate": 9.580198788000993e-05, "loss": 46.0075, "step": 1049 }, { "epoch": 0.1433544951873848, "grad_norm": 0.10257468372583389, "learning_rate": 9.579326224890075e-05, "loss": 46.0069, "step": 1050 }, { "epoch": 0.14349102327803945, "grad_norm": 0.06948917359113693, "learning_rate": 9.57845279572341e-05, "loss": 46.0071, "step": 1051 }, { "epoch": 0.1436275513686941, "grad_norm": 0.08531492203474045, "learning_rate": 9.577578500666187e-05, "loss": 46.0081, "step": 1052 }, { "epoch": 0.14376407945934877, "grad_norm": 0.04098331183195114, "learning_rate": 9.576703339883751e-05, "loss": 46.0094, "step": 1053 }, { "epoch": 0.14390060755000342, "grad_norm": 0.07196906208992004, "learning_rate": 9.575827313541618e-05, "loss": 46.0048, "step": 1054 }, { "epoch": 0.14403713564065806, "grad_norm": 0.08923915773630142, "learning_rate": 9.574950421805466e-05, "loss": 46.0044, "step": 1055 }, { "epoch": 0.1441736637313127, "grad_norm": 0.05379122123122215, "learning_rate": 9.574072664841132e-05, "loss": 46.0055, "step": 1056 }, { "epoch": 0.14431019182196736, "grad_norm": 0.051071200519800186, "learning_rate": 9.573194042814623e-05, "loss": 46.0019, "step": 1057 }, { "epoch": 0.14444671991262203, "grad_norm": 0.09496951103210449, "learning_rate": 9.572314555892104e-05, "loss": 46.0106, "step": 1058 }, { "epoch": 0.14458324800327668, "grad_norm": 0.03693302720785141, "learning_rate": 9.571434204239907e-05, "loss": 46.0015, "step": 1059 }, { "epoch": 0.14471977609393133, "grad_norm": 0.08030872792005539, "learning_rate": 9.570552988024526e-05, "loss": 46.0011, "step": 1060 }, { "epoch": 0.14485630418458598, "grad_norm": 0.03652976080775261, "learning_rate": 9.569670907412622e-05, "loss": 46.0053, "step": 1061 }, { "epoch": 0.14499283227524062, "grad_norm": 0.07603907585144043, "learning_rate": 9.568787962571016e-05, "loss": 46.0076, "step": 1062 }, { "epoch": 0.14512936036589527, "grad_norm": 0.052123162895441055, "learning_rate": 9.56790415366669e-05, "loss": 46.0031, "step": 1063 }, { "epoch": 0.14526588845654995, "grad_norm": 0.09682771563529968, "learning_rate": 9.567019480866795e-05, "loss": 46.0028, "step": 1064 }, { "epoch": 0.1454024165472046, "grad_norm": 0.09613823145627975, "learning_rate": 9.566133944338644e-05, "loss": 46.0046, "step": 1065 }, { "epoch": 0.14553894463785924, "grad_norm": 0.05020280182361603, "learning_rate": 9.56524754424971e-05, "loss": 46.0013, "step": 1066 }, { "epoch": 0.1456754727285139, "grad_norm": 0.052428584545850754, "learning_rate": 9.56436028076763e-05, "loss": 46.0088, "step": 1067 }, { "epoch": 0.14581200081916854, "grad_norm": 0.09420745074748993, "learning_rate": 9.563472154060211e-05, "loss": 46.0032, "step": 1068 }, { "epoch": 0.14594852890982318, "grad_norm": 0.10344218462705612, "learning_rate": 9.562583164295416e-05, "loss": 46.0018, "step": 1069 }, { "epoch": 0.14608505700047786, "grad_norm": 0.03792449086904526, "learning_rate": 9.561693311641371e-05, "loss": 46.0114, "step": 1070 }, { "epoch": 0.1462215850911325, "grad_norm": 0.07102537900209427, "learning_rate": 9.560802596266371e-05, "loss": 46.003, "step": 1071 }, { "epoch": 0.14635811318178715, "grad_norm": 0.23002368211746216, "learning_rate": 9.559911018338869e-05, "loss": 46.005, "step": 1072 }, { "epoch": 0.1464946412724418, "grad_norm": 0.07016875594854355, "learning_rate": 9.559018578027483e-05, "loss": 46.0044, "step": 1073 }, { "epoch": 0.14663116936309645, "grad_norm": 0.05370096489787102, "learning_rate": 9.558125275500993e-05, "loss": 46.0072, "step": 1074 }, { "epoch": 0.14676769745375112, "grad_norm": 0.06562657654285431, "learning_rate": 9.557231110928346e-05, "loss": 46.0027, "step": 1075 }, { "epoch": 0.14690422554440577, "grad_norm": 0.08427286148071289, "learning_rate": 9.556336084478646e-05, "loss": 46.0051, "step": 1076 }, { "epoch": 0.14704075363506042, "grad_norm": 0.0571058988571167, "learning_rate": 9.555440196321163e-05, "loss": 46.0034, "step": 1077 }, { "epoch": 0.14717728172571506, "grad_norm": 0.07639696449041367, "learning_rate": 9.554543446625332e-05, "loss": 46.0086, "step": 1078 }, { "epoch": 0.1473138098163697, "grad_norm": 0.03286051005125046, "learning_rate": 9.55364583556075e-05, "loss": 46.0077, "step": 1079 }, { "epoch": 0.14745033790702436, "grad_norm": 0.05243142321705818, "learning_rate": 9.552747363297172e-05, "loss": 46.0085, "step": 1080 }, { "epoch": 0.14758686599767903, "grad_norm": 0.0404677651822567, "learning_rate": 9.551848030004522e-05, "loss": 46.0075, "step": 1081 }, { "epoch": 0.14772339408833368, "grad_norm": 0.05352947488427162, "learning_rate": 9.550947835852886e-05, "loss": 46.0059, "step": 1082 }, { "epoch": 0.14785992217898833, "grad_norm": 0.043886035680770874, "learning_rate": 9.550046781012507e-05, "loss": 46.0005, "step": 1083 }, { "epoch": 0.14799645026964298, "grad_norm": 0.05935193598270416, "learning_rate": 9.5491448656538e-05, "loss": 46.0051, "step": 1084 }, { "epoch": 0.14813297836029762, "grad_norm": 0.10466616600751877, "learning_rate": 9.548242089947335e-05, "loss": 46.0055, "step": 1085 }, { "epoch": 0.14826950645095227, "grad_norm": 0.040502626448869705, "learning_rate": 9.547338454063848e-05, "loss": 46.0068, "step": 1086 }, { "epoch": 0.14840603454160695, "grad_norm": 0.05848405882716179, "learning_rate": 9.546433958174238e-05, "loss": 46.0008, "step": 1087 }, { "epoch": 0.1485425626322616, "grad_norm": 0.04107705131173134, "learning_rate": 9.545528602449566e-05, "loss": 46.0055, "step": 1088 }, { "epoch": 0.14867909072291624, "grad_norm": 0.03532888740301132, "learning_rate": 9.544622387061055e-05, "loss": 46.0037, "step": 1089 }, { "epoch": 0.1488156188135709, "grad_norm": 0.041227225214242935, "learning_rate": 9.54371531218009e-05, "loss": 46.0034, "step": 1090 }, { "epoch": 0.14895214690422554, "grad_norm": 0.04631313309073448, "learning_rate": 9.542807377978222e-05, "loss": 46.0027, "step": 1091 }, { "epoch": 0.1490886749948802, "grad_norm": 0.05095398798584938, "learning_rate": 9.541898584627163e-05, "loss": 46.0098, "step": 1092 }, { "epoch": 0.14922520308553486, "grad_norm": 0.04225940257310867, "learning_rate": 9.540988932298782e-05, "loss": 46.0033, "step": 1093 }, { "epoch": 0.1493617311761895, "grad_norm": 0.04354506731033325, "learning_rate": 9.540078421165121e-05, "loss": 46.0039, "step": 1094 }, { "epoch": 0.14949825926684415, "grad_norm": 0.05046950653195381, "learning_rate": 9.539167051398373e-05, "loss": 46.0028, "step": 1095 }, { "epoch": 0.1496347873574988, "grad_norm": 0.2189709097146988, "learning_rate": 9.538254823170903e-05, "loss": 46.0134, "step": 1096 }, { "epoch": 0.14977131544815345, "grad_norm": 0.12210095673799515, "learning_rate": 9.537341736655233e-05, "loss": 46.0093, "step": 1097 }, { "epoch": 0.14990784353880812, "grad_norm": 0.10862187296152115, "learning_rate": 9.536427792024047e-05, "loss": 46.01, "step": 1098 }, { "epoch": 0.15004437162946277, "grad_norm": 0.09195853769779205, "learning_rate": 9.535512989450196e-05, "loss": 46.0038, "step": 1099 }, { "epoch": 0.15018089972011742, "grad_norm": 0.20868568122386932, "learning_rate": 9.534597329106688e-05, "loss": 46.0064, "step": 1100 }, { "epoch": 0.15031742781077206, "grad_norm": 0.11228267103433609, "learning_rate": 9.533680811166696e-05, "loss": 46.0096, "step": 1101 }, { "epoch": 0.1504539559014267, "grad_norm": 0.03305850178003311, "learning_rate": 9.532763435803556e-05, "loss": 46.0012, "step": 1102 }, { "epoch": 0.15059048399208136, "grad_norm": 0.03495301678776741, "learning_rate": 9.531845203190762e-05, "loss": 46.0076, "step": 1103 }, { "epoch": 0.15072701208273603, "grad_norm": 0.07040043920278549, "learning_rate": 9.530926113501973e-05, "loss": 46.0037, "step": 1104 }, { "epoch": 0.15086354017339068, "grad_norm": 0.03761465847492218, "learning_rate": 9.530006166911013e-05, "loss": 46.0046, "step": 1105 }, { "epoch": 0.15100006826404533, "grad_norm": 0.09145580977201462, "learning_rate": 9.529085363591862e-05, "loss": 46.0082, "step": 1106 }, { "epoch": 0.15113659635469998, "grad_norm": 0.04139364883303642, "learning_rate": 9.528163703718667e-05, "loss": 46.0028, "step": 1107 }, { "epoch": 0.15127312444535462, "grad_norm": 0.05348122492432594, "learning_rate": 9.527241187465734e-05, "loss": 46.0019, "step": 1108 }, { "epoch": 0.15140965253600927, "grad_norm": 0.03554386645555496, "learning_rate": 9.526317815007533e-05, "loss": 46.0062, "step": 1109 }, { "epoch": 0.15154618062666395, "grad_norm": 0.07018011063337326, "learning_rate": 9.525393586518693e-05, "loss": 46.0044, "step": 1110 }, { "epoch": 0.1516827087173186, "grad_norm": 0.06616585701704025, "learning_rate": 9.524468502174008e-05, "loss": 46.0099, "step": 1111 }, { "epoch": 0.15181923680797324, "grad_norm": 0.044512324035167694, "learning_rate": 9.523542562148432e-05, "loss": 46.002, "step": 1112 }, { "epoch": 0.1519557648986279, "grad_norm": 0.07083824276924133, "learning_rate": 9.522615766617085e-05, "loss": 46.0033, "step": 1113 }, { "epoch": 0.15209229298928253, "grad_norm": 0.035782668739557266, "learning_rate": 9.521688115755242e-05, "loss": 46.003, "step": 1114 }, { "epoch": 0.1522288210799372, "grad_norm": 0.10829572379589081, "learning_rate": 9.520759609738343e-05, "loss": 46.0064, "step": 1115 }, { "epoch": 0.15236534917059186, "grad_norm": 0.04117819666862488, "learning_rate": 9.51983024874199e-05, "loss": 46.0085, "step": 1116 }, { "epoch": 0.1525018772612465, "grad_norm": 0.029585180804133415, "learning_rate": 9.518900032941947e-05, "loss": 46.0089, "step": 1117 }, { "epoch": 0.15263840535190115, "grad_norm": 0.04569714888930321, "learning_rate": 9.517968962514141e-05, "loss": 46.0033, "step": 1118 }, { "epoch": 0.1527749334425558, "grad_norm": 0.03192995861172676, "learning_rate": 9.517037037634655e-05, "loss": 46.0083, "step": 1119 }, { "epoch": 0.15291146153321045, "grad_norm": 0.031456708908081055, "learning_rate": 9.51610425847974e-05, "loss": 46.0105, "step": 1120 }, { "epoch": 0.15304798962386512, "grad_norm": 0.033218834549188614, "learning_rate": 9.515170625225805e-05, "loss": 46.006, "step": 1121 }, { "epoch": 0.15318451771451977, "grad_norm": 0.06490657478570938, "learning_rate": 9.514236138049422e-05, "loss": 46.0078, "step": 1122 }, { "epoch": 0.15332104580517442, "grad_norm": 0.048283979296684265, "learning_rate": 9.513300797127324e-05, "loss": 46.0043, "step": 1123 }, { "epoch": 0.15345757389582906, "grad_norm": 0.0797492191195488, "learning_rate": 9.512364602636405e-05, "loss": 46.0003, "step": 1124 }, { "epoch": 0.1535941019864837, "grad_norm": 0.05579257756471634, "learning_rate": 9.511427554753722e-05, "loss": 46.0017, "step": 1125 }, { "epoch": 0.15373063007713836, "grad_norm": 0.06035767123103142, "learning_rate": 9.51048965365649e-05, "loss": 46.001, "step": 1126 }, { "epoch": 0.15386715816779303, "grad_norm": 0.07266231626272202, "learning_rate": 9.50955089952209e-05, "loss": 46.0012, "step": 1127 }, { "epoch": 0.15400368625844768, "grad_norm": 0.02949325554072857, "learning_rate": 9.508611292528061e-05, "loss": 46.0029, "step": 1128 }, { "epoch": 0.15414021434910233, "grad_norm": 0.03317132964730263, "learning_rate": 9.507670832852102e-05, "loss": 46.0075, "step": 1129 }, { "epoch": 0.15427674243975698, "grad_norm": 0.07225502282381058, "learning_rate": 9.50672952067208e-05, "loss": 46.0064, "step": 1130 }, { "epoch": 0.15441327053041162, "grad_norm": 0.05574885755777359, "learning_rate": 9.505787356166016e-05, "loss": 46.0014, "step": 1131 }, { "epoch": 0.1545497986210663, "grad_norm": 0.026512276381254196, "learning_rate": 9.504844339512095e-05, "loss": 46.0074, "step": 1132 }, { "epoch": 0.15468632671172095, "grad_norm": 0.05689043179154396, "learning_rate": 9.503900470888665e-05, "loss": 46.0052, "step": 1133 }, { "epoch": 0.1548228548023756, "grad_norm": 0.03514964506030083, "learning_rate": 9.502955750474232e-05, "loss": 46.0045, "step": 1134 }, { "epoch": 0.15495938289303024, "grad_norm": 0.09339026361703873, "learning_rate": 9.502010178447463e-05, "loss": 46.0004, "step": 1135 }, { "epoch": 0.1550959109836849, "grad_norm": 0.06396578997373581, "learning_rate": 9.501063754987188e-05, "loss": 46.0005, "step": 1136 }, { "epoch": 0.15523243907433953, "grad_norm": 0.02555418200790882, "learning_rate": 9.500116480272398e-05, "loss": 46.0049, "step": 1137 }, { "epoch": 0.1553689671649942, "grad_norm": 0.03979639708995819, "learning_rate": 9.499168354482244e-05, "loss": 46.0001, "step": 1138 }, { "epoch": 0.15550549525564886, "grad_norm": 0.04433701932430267, "learning_rate": 9.49821937779604e-05, "loss": 46.0038, "step": 1139 }, { "epoch": 0.1556420233463035, "grad_norm": 0.06406650692224503, "learning_rate": 9.497269550393257e-05, "loss": 46.0026, "step": 1140 }, { "epoch": 0.15577855143695815, "grad_norm": 0.03584996238350868, "learning_rate": 9.496318872453531e-05, "loss": 46.0036, "step": 1141 }, { "epoch": 0.1559150795276128, "grad_norm": 0.06977511942386627, "learning_rate": 9.495367344156655e-05, "loss": 46.0022, "step": 1142 }, { "epoch": 0.15605160761826745, "grad_norm": 0.05156489461660385, "learning_rate": 9.494414965682586e-05, "loss": 46.0058, "step": 1143 }, { "epoch": 0.15618813570892212, "grad_norm": 0.03158146142959595, "learning_rate": 9.493461737211442e-05, "loss": 46.0051, "step": 1144 }, { "epoch": 0.15632466379957677, "grad_norm": 0.10384754836559296, "learning_rate": 9.492507658923497e-05, "loss": 46.0059, "step": 1145 }, { "epoch": 0.15646119189023142, "grad_norm": 0.07649780064821243, "learning_rate": 9.491552730999193e-05, "loss": 46.012, "step": 1146 }, { "epoch": 0.15659771998088606, "grad_norm": 0.09884501993656158, "learning_rate": 9.490596953619127e-05, "loss": 46.0111, "step": 1147 }, { "epoch": 0.1567342480715407, "grad_norm": 0.11522934585809708, "learning_rate": 9.489640326964058e-05, "loss": 46.0109, "step": 1148 }, { "epoch": 0.15687077616219539, "grad_norm": 0.1288016438484192, "learning_rate": 9.488682851214905e-05, "loss": 46.0131, "step": 1149 }, { "epoch": 0.15700730425285003, "grad_norm": 0.09086047857999802, "learning_rate": 9.487724526552753e-05, "loss": 46.0037, "step": 1150 }, { "epoch": 0.15714383234350468, "grad_norm": 0.07888873666524887, "learning_rate": 9.486765353158837e-05, "loss": 46.0086, "step": 1151 }, { "epoch": 0.15728036043415933, "grad_norm": 0.07214833050966263, "learning_rate": 9.485805331214564e-05, "loss": 46.0066, "step": 1152 }, { "epoch": 0.15741688852481397, "grad_norm": 0.04857967048883438, "learning_rate": 9.484844460901494e-05, "loss": 46.0045, "step": 1153 }, { "epoch": 0.15755341661546862, "grad_norm": 0.025982355698943138, "learning_rate": 9.483882742401349e-05, "loss": 46.0041, "step": 1154 }, { "epoch": 0.1576899447061233, "grad_norm": 0.06927240639925003, "learning_rate": 9.482920175896013e-05, "loss": 46.0053, "step": 1155 }, { "epoch": 0.15782647279677794, "grad_norm": 0.10595306009054184, "learning_rate": 9.48195676156753e-05, "loss": 46.0122, "step": 1156 }, { "epoch": 0.1579630008874326, "grad_norm": 0.06308285146951675, "learning_rate": 9.480992499598105e-05, "loss": 46.004, "step": 1157 }, { "epoch": 0.15809952897808724, "grad_norm": 0.06899264454841614, "learning_rate": 9.480027390170098e-05, "loss": 46.0024, "step": 1158 }, { "epoch": 0.1582360570687419, "grad_norm": 0.038066890090703964, "learning_rate": 9.479061433466034e-05, "loss": 46.0008, "step": 1159 }, { "epoch": 0.15837258515939653, "grad_norm": 0.042379919439554214, "learning_rate": 9.478094629668603e-05, "loss": 46.0045, "step": 1160 }, { "epoch": 0.1585091132500512, "grad_norm": 0.06670461595058441, "learning_rate": 9.477126978960643e-05, "loss": 46.0023, "step": 1161 }, { "epoch": 0.15864564134070586, "grad_norm": 0.049916066229343414, "learning_rate": 9.476158481525164e-05, "loss": 46.0043, "step": 1162 }, { "epoch": 0.1587821694313605, "grad_norm": 0.0892569050192833, "learning_rate": 9.475189137545325e-05, "loss": 46.007, "step": 1163 }, { "epoch": 0.15891869752201515, "grad_norm": 0.03525412827730179, "learning_rate": 9.474218947204459e-05, "loss": 46.0023, "step": 1164 }, { "epoch": 0.1590552256126698, "grad_norm": 0.08010265976190567, "learning_rate": 9.473247910686047e-05, "loss": 46.0001, "step": 1165 }, { "epoch": 0.15919175370332445, "grad_norm": 0.05976048856973648, "learning_rate": 9.472276028173734e-05, "loss": 46.0083, "step": 1166 }, { "epoch": 0.15932828179397912, "grad_norm": 0.05307581648230553, "learning_rate": 9.471303299851328e-05, "loss": 46.0073, "step": 1167 }, { "epoch": 0.15946480988463377, "grad_norm": 0.039263058453798294, "learning_rate": 9.47032972590279e-05, "loss": 46.0065, "step": 1168 }, { "epoch": 0.15960133797528842, "grad_norm": 0.0601196363568306, "learning_rate": 9.469355306512249e-05, "loss": 46.0023, "step": 1169 }, { "epoch": 0.15973786606594306, "grad_norm": 0.039549462497234344, "learning_rate": 9.468380041863987e-05, "loss": 46.0031, "step": 1170 }, { "epoch": 0.1598743941565977, "grad_norm": 0.09689195454120636, "learning_rate": 9.467403932142452e-05, "loss": 46.003, "step": 1171 }, { "epoch": 0.16001092224725239, "grad_norm": 0.04266255348920822, "learning_rate": 9.466426977532247e-05, "loss": 46.0079, "step": 1172 }, { "epoch": 0.16014745033790703, "grad_norm": 0.09745386987924576, "learning_rate": 9.465449178218136e-05, "loss": 46.004, "step": 1173 }, { "epoch": 0.16028397842856168, "grad_norm": 0.08361425250768661, "learning_rate": 9.464470534385046e-05, "loss": 46.0015, "step": 1174 }, { "epoch": 0.16042050651921633, "grad_norm": 0.10898271948099136, "learning_rate": 9.463491046218058e-05, "loss": 46.0025, "step": 1175 }, { "epoch": 0.16055703460987097, "grad_norm": 0.0588994026184082, "learning_rate": 9.462510713902417e-05, "loss": 46.0089, "step": 1176 }, { "epoch": 0.16069356270052562, "grad_norm": 0.044915251433849335, "learning_rate": 9.461529537623527e-05, "loss": 46.0017, "step": 1177 }, { "epoch": 0.1608300907911803, "grad_norm": 0.030732598155736923, "learning_rate": 9.460547517566949e-05, "loss": 46.0066, "step": 1178 }, { "epoch": 0.16096661888183494, "grad_norm": 0.11266729235649109, "learning_rate": 9.459564653918409e-05, "loss": 46.0024, "step": 1179 }, { "epoch": 0.1611031469724896, "grad_norm": 0.05787282437086105, "learning_rate": 9.458580946863785e-05, "loss": 46.002, "step": 1180 }, { "epoch": 0.16123967506314424, "grad_norm": 0.08448874205350876, "learning_rate": 9.45759639658912e-05, "loss": 46.004, "step": 1181 }, { "epoch": 0.1613762031537989, "grad_norm": 0.025996146723628044, "learning_rate": 9.456611003280617e-05, "loss": 46.0085, "step": 1182 }, { "epoch": 0.16151273124445353, "grad_norm": 0.07552418857812881, "learning_rate": 9.455624767124634e-05, "loss": 46.0016, "step": 1183 }, { "epoch": 0.1616492593351082, "grad_norm": 0.036268964409828186, "learning_rate": 9.454637688307692e-05, "loss": 46.0009, "step": 1184 }, { "epoch": 0.16178578742576286, "grad_norm": 0.03546437248587608, "learning_rate": 9.45364976701647e-05, "loss": 46.0084, "step": 1185 }, { "epoch": 0.1619223155164175, "grad_norm": 0.04400569945573807, "learning_rate": 9.452661003437808e-05, "loss": 46.0047, "step": 1186 }, { "epoch": 0.16205884360707215, "grad_norm": 0.10435964167118073, "learning_rate": 9.451671397758701e-05, "loss": 46.006, "step": 1187 }, { "epoch": 0.1621953716977268, "grad_norm": 0.0960523784160614, "learning_rate": 9.45068095016631e-05, "loss": 46.003, "step": 1188 }, { "epoch": 0.16233189978838147, "grad_norm": 0.045318808406591415, "learning_rate": 9.449689660847948e-05, "loss": 46.0049, "step": 1189 }, { "epoch": 0.16246842787903612, "grad_norm": 0.06377995014190674, "learning_rate": 9.448697529991092e-05, "loss": 46.0025, "step": 1190 }, { "epoch": 0.16260495596969077, "grad_norm": 0.055316515266895294, "learning_rate": 9.447704557783377e-05, "loss": 46.0096, "step": 1191 }, { "epoch": 0.16274148406034541, "grad_norm": 0.036629460752010345, "learning_rate": 9.446710744412595e-05, "loss": 46.0066, "step": 1192 }, { "epoch": 0.16287801215100006, "grad_norm": 0.041809890419244766, "learning_rate": 9.445716090066701e-05, "loss": 46.0032, "step": 1193 }, { "epoch": 0.1630145402416547, "grad_norm": 0.034480322152376175, "learning_rate": 9.444720594933807e-05, "loss": 46.0061, "step": 1194 }, { "epoch": 0.16315106833230938, "grad_norm": 0.09660469740629196, "learning_rate": 9.443724259202182e-05, "loss": 46.004, "step": 1195 }, { "epoch": 0.16328759642296403, "grad_norm": 0.09552944451570511, "learning_rate": 9.442727083060258e-05, "loss": 46.01, "step": 1196 }, { "epoch": 0.16342412451361868, "grad_norm": 0.13854628801345825, "learning_rate": 9.441729066696625e-05, "loss": 46.0098, "step": 1197 }, { "epoch": 0.16356065260427333, "grad_norm": 0.09629393368959427, "learning_rate": 9.440730210300026e-05, "loss": 46.0176, "step": 1198 }, { "epoch": 0.16369718069492797, "grad_norm": 0.056496817618608475, "learning_rate": 9.439730514059371e-05, "loss": 46.0037, "step": 1199 }, { "epoch": 0.16383370878558262, "grad_norm": 0.28363895416259766, "learning_rate": 9.438729978163727e-05, "loss": 46.0024, "step": 1200 }, { "epoch": 0.1639702368762373, "grad_norm": 0.058542072772979736, "learning_rate": 9.437728602802316e-05, "loss": 46.004, "step": 1201 }, { "epoch": 0.16410676496689194, "grad_norm": 0.05516689270734787, "learning_rate": 9.436726388164521e-05, "loss": 46.0076, "step": 1202 }, { "epoch": 0.1642432930575466, "grad_norm": 0.057665567845106125, "learning_rate": 9.435723334439883e-05, "loss": 46.0051, "step": 1203 }, { "epoch": 0.16437982114820124, "grad_norm": 0.03515243902802467, "learning_rate": 9.434719441818107e-05, "loss": 46.002, "step": 1204 }, { "epoch": 0.16451634923885589, "grad_norm": 0.04687934368848801, "learning_rate": 9.433714710489046e-05, "loss": 46.0141, "step": 1205 }, { "epoch": 0.16465287732951053, "grad_norm": 0.10925057530403137, "learning_rate": 9.432709140642722e-05, "loss": 46.0023, "step": 1206 }, { "epoch": 0.1647894054201652, "grad_norm": 0.04592084512114525, "learning_rate": 9.431702732469311e-05, "loss": 46.0075, "step": 1207 }, { "epoch": 0.16492593351081986, "grad_norm": 0.05803637579083443, "learning_rate": 9.430695486159146e-05, "loss": 46.0017, "step": 1208 }, { "epoch": 0.1650624616014745, "grad_norm": 0.07992859929800034, "learning_rate": 9.429687401902722e-05, "loss": 46.0059, "step": 1209 }, { "epoch": 0.16519898969212915, "grad_norm": 0.06848010420799255, "learning_rate": 9.428678479890689e-05, "loss": 46.0083, "step": 1210 }, { "epoch": 0.1653355177827838, "grad_norm": 0.06991204619407654, "learning_rate": 9.427668720313861e-05, "loss": 46.0103, "step": 1211 }, { "epoch": 0.16547204587343847, "grad_norm": 0.030821314081549644, "learning_rate": 9.4266581233632e-05, "loss": 46.0085, "step": 1212 }, { "epoch": 0.16560857396409312, "grad_norm": 0.05153965950012207, "learning_rate": 9.425646689229842e-05, "loss": 46.0008, "step": 1213 }, { "epoch": 0.16574510205474777, "grad_norm": 0.05146633833646774, "learning_rate": 9.424634418105066e-05, "loss": 46.0152, "step": 1214 }, { "epoch": 0.16588163014540241, "grad_norm": 0.06431291997432709, "learning_rate": 9.423621310180317e-05, "loss": 46.0017, "step": 1215 }, { "epoch": 0.16601815823605706, "grad_norm": 0.10083091259002686, "learning_rate": 9.4226073656472e-05, "loss": 46.0055, "step": 1216 }, { "epoch": 0.1661546863267117, "grad_norm": 0.04163992777466774, "learning_rate": 9.42159258469747e-05, "loss": 46.0059, "step": 1217 }, { "epoch": 0.16629121441736638, "grad_norm": 0.03956631198525429, "learning_rate": 9.420576967523049e-05, "loss": 46.0169, "step": 1218 }, { "epoch": 0.16642774250802103, "grad_norm": 0.09828846156597137, "learning_rate": 9.419560514316013e-05, "loss": 46.0031, "step": 1219 }, { "epoch": 0.16656427059867568, "grad_norm": 0.051124926656484604, "learning_rate": 9.418543225268596e-05, "loss": 46.0122, "step": 1220 }, { "epoch": 0.16670079868933033, "grad_norm": 0.04843780770897865, "learning_rate": 9.417525100573192e-05, "loss": 46.016, "step": 1221 }, { "epoch": 0.16683732677998497, "grad_norm": 0.0411166250705719, "learning_rate": 9.416506140422349e-05, "loss": 46.0019, "step": 1222 }, { "epoch": 0.16697385487063962, "grad_norm": 0.07768721878528595, "learning_rate": 9.415486345008779e-05, "loss": 46.0013, "step": 1223 }, { "epoch": 0.1671103829612943, "grad_norm": 0.04945991560816765, "learning_rate": 9.414465714525347e-05, "loss": 46.0048, "step": 1224 }, { "epoch": 0.16724691105194894, "grad_norm": 0.07257694751024246, "learning_rate": 9.413444249165077e-05, "loss": 46.0021, "step": 1225 }, { "epoch": 0.1673834391426036, "grad_norm": 0.08655298501253128, "learning_rate": 9.412421949121152e-05, "loss": 46.0041, "step": 1226 }, { "epoch": 0.16751996723325824, "grad_norm": 0.09693397581577301, "learning_rate": 9.411398814586916e-05, "loss": 46.0061, "step": 1227 }, { "epoch": 0.16765649532391289, "grad_norm": 0.06431207060813904, "learning_rate": 9.41037484575586e-05, "loss": 46.0052, "step": 1228 }, { "epoch": 0.16779302341456756, "grad_norm": 0.05113862827420235, "learning_rate": 9.409350042821648e-05, "loss": 46.0044, "step": 1229 }, { "epoch": 0.1679295515052222, "grad_norm": 0.056593310087919235, "learning_rate": 9.408324405978089e-05, "loss": 46.0012, "step": 1230 }, { "epoch": 0.16806607959587685, "grad_norm": 0.06000594422221184, "learning_rate": 9.407297935419152e-05, "loss": 46.005, "step": 1231 }, { "epoch": 0.1682026076865315, "grad_norm": 0.045497018843889236, "learning_rate": 9.406270631338974e-05, "loss": 46.0005, "step": 1232 }, { "epoch": 0.16833913577718615, "grad_norm": 0.07157689332962036, "learning_rate": 9.405242493931834e-05, "loss": 46.009, "step": 1233 }, { "epoch": 0.1684756638678408, "grad_norm": 0.06327848881483078, "learning_rate": 9.404213523392183e-05, "loss": 46.0047, "step": 1234 }, { "epoch": 0.16861219195849547, "grad_norm": 0.08419623225927353, "learning_rate": 9.403183719914617e-05, "loss": 46.0058, "step": 1235 }, { "epoch": 0.16874872004915012, "grad_norm": 0.0605023056268692, "learning_rate": 9.402153083693898e-05, "loss": 46.0056, "step": 1236 }, { "epoch": 0.16888524813980477, "grad_norm": 0.03864269703626633, "learning_rate": 9.401121614924946e-05, "loss": 46.0038, "step": 1237 }, { "epoch": 0.16902177623045941, "grad_norm": 0.04686254262924194, "learning_rate": 9.40008931380283e-05, "loss": 46.0088, "step": 1238 }, { "epoch": 0.16915830432111406, "grad_norm": 0.059192076325416565, "learning_rate": 9.399056180522785e-05, "loss": 46.0041, "step": 1239 }, { "epoch": 0.1692948324117687, "grad_norm": 0.0416235588490963, "learning_rate": 9.3980222152802e-05, "loss": 46.0016, "step": 1240 }, { "epoch": 0.16943136050242338, "grad_norm": 0.04178151488304138, "learning_rate": 9.396987418270622e-05, "loss": 46.0044, "step": 1241 }, { "epoch": 0.16956788859307803, "grad_norm": 0.03247765079140663, "learning_rate": 9.395951789689753e-05, "loss": 46.008, "step": 1242 }, { "epoch": 0.16970441668373268, "grad_norm": 0.03101865015923977, "learning_rate": 9.394915329733456e-05, "loss": 46.0028, "step": 1243 }, { "epoch": 0.16984094477438733, "grad_norm": 0.1476668268442154, "learning_rate": 9.393878038597747e-05, "loss": 46.0068, "step": 1244 }, { "epoch": 0.16997747286504197, "grad_norm": 0.1258232593536377, "learning_rate": 9.392839916478804e-05, "loss": 46.0069, "step": 1245 }, { "epoch": 0.17011400095569665, "grad_norm": 0.13220500946044922, "learning_rate": 9.39180096357296e-05, "loss": 46.0061, "step": 1246 }, { "epoch": 0.1702505290463513, "grad_norm": 0.12163622677326202, "learning_rate": 9.390761180076702e-05, "loss": 46.0084, "step": 1247 }, { "epoch": 0.17038705713700594, "grad_norm": 0.0987609401345253, "learning_rate": 9.389720566186681e-05, "loss": 46.0092, "step": 1248 }, { "epoch": 0.1705235852276606, "grad_norm": 0.08239259570837021, "learning_rate": 9.388679122099697e-05, "loss": 46.0078, "step": 1249 }, { "epoch": 0.17066011331831524, "grad_norm": 0.175066739320755, "learning_rate": 9.387636848012713e-05, "loss": 46.0034, "step": 1250 }, { "epoch": 0.17079664140896988, "grad_norm": 0.045509204268455505, "learning_rate": 9.386593744122847e-05, "loss": 46.0059, "step": 1251 }, { "epoch": 0.17093316949962456, "grad_norm": 0.08370327949523926, "learning_rate": 9.385549810627373e-05, "loss": 46.0073, "step": 1252 }, { "epoch": 0.1710696975902792, "grad_norm": 0.07448790222406387, "learning_rate": 9.384505047723724e-05, "loss": 46.0079, "step": 1253 }, { "epoch": 0.17120622568093385, "grad_norm": 0.07004041969776154, "learning_rate": 9.383459455609488e-05, "loss": 46.0023, "step": 1254 }, { "epoch": 0.1713427537715885, "grad_norm": 0.10463694483041763, "learning_rate": 9.38241303448241e-05, "loss": 46.0059, "step": 1255 }, { "epoch": 0.17147928186224315, "grad_norm": 0.0698322281241417, "learning_rate": 9.381365784540393e-05, "loss": 46.008, "step": 1256 }, { "epoch": 0.1716158099528978, "grad_norm": 0.09714088588953018, "learning_rate": 9.380317705981497e-05, "loss": 46.0046, "step": 1257 }, { "epoch": 0.17175233804355247, "grad_norm": 0.051492366939783096, "learning_rate": 9.379268799003934e-05, "loss": 46.0012, "step": 1258 }, { "epoch": 0.17188886613420712, "grad_norm": 0.0807804986834526, "learning_rate": 9.37821906380608e-05, "loss": 46.0115, "step": 1259 }, { "epoch": 0.17202539422486177, "grad_norm": 0.08413977921009064, "learning_rate": 9.377168500586463e-05, "loss": 46.0023, "step": 1260 }, { "epoch": 0.1721619223155164, "grad_norm": 0.040352486073970795, "learning_rate": 9.376117109543769e-05, "loss": 46.0047, "step": 1261 }, { "epoch": 0.17229845040617106, "grad_norm": 0.07688635587692261, "learning_rate": 9.37506489087684e-05, "loss": 46.0038, "step": 1262 }, { "epoch": 0.1724349784968257, "grad_norm": 0.03639710322022438, "learning_rate": 9.374011844784674e-05, "loss": 46.0093, "step": 1263 }, { "epoch": 0.17257150658748038, "grad_norm": 0.07958271354436874, "learning_rate": 9.372957971466425e-05, "loss": 46.0006, "step": 1264 }, { "epoch": 0.17270803467813503, "grad_norm": 0.08671557903289795, "learning_rate": 9.371903271121408e-05, "loss": 46.0025, "step": 1265 }, { "epoch": 0.17284456276878968, "grad_norm": 0.03351590782403946, "learning_rate": 9.370847743949089e-05, "loss": 46.0023, "step": 1266 }, { "epoch": 0.17298109085944433, "grad_norm": 0.026574067771434784, "learning_rate": 9.369791390149094e-05, "loss": 46.0092, "step": 1267 }, { "epoch": 0.17311761895009897, "grad_norm": 0.08447230607271194, "learning_rate": 9.368734209921201e-05, "loss": 46.0098, "step": 1268 }, { "epoch": 0.17325414704075365, "grad_norm": 0.05628855898976326, "learning_rate": 9.36767620346535e-05, "loss": 46.0047, "step": 1269 }, { "epoch": 0.1733906751314083, "grad_norm": 0.06046047434210777, "learning_rate": 9.36661737098163e-05, "loss": 46.0098, "step": 1270 }, { "epoch": 0.17352720322206294, "grad_norm": 0.045841485261917114, "learning_rate": 9.365557712670296e-05, "loss": 46.0038, "step": 1271 }, { "epoch": 0.1736637313127176, "grad_norm": 0.05528651177883148, "learning_rate": 9.36449722873175e-05, "loss": 46.002, "step": 1272 }, { "epoch": 0.17380025940337224, "grad_norm": 0.05152858793735504, "learning_rate": 9.363435919366557e-05, "loss": 46.0071, "step": 1273 }, { "epoch": 0.17393678749402688, "grad_norm": 0.024345725774765015, "learning_rate": 9.362373784775431e-05, "loss": 46.0107, "step": 1274 }, { "epoch": 0.17407331558468156, "grad_norm": 0.030155766755342484, "learning_rate": 9.361310825159249e-05, "loss": 46.0067, "step": 1275 }, { "epoch": 0.1742098436753362, "grad_norm": 0.05139519274234772, "learning_rate": 9.360247040719039e-05, "loss": 46.0091, "step": 1276 }, { "epoch": 0.17434637176599085, "grad_norm": 0.04077908769249916, "learning_rate": 9.359182431655988e-05, "loss": 46.0067, "step": 1277 }, { "epoch": 0.1744828998566455, "grad_norm": 0.03941740840673447, "learning_rate": 9.35811699817144e-05, "loss": 46.0004, "step": 1278 }, { "epoch": 0.17461942794730015, "grad_norm": 0.03987620398402214, "learning_rate": 9.35705074046689e-05, "loss": 46.0045, "step": 1279 }, { "epoch": 0.1747559560379548, "grad_norm": 0.05629584565758705, "learning_rate": 9.355983658743992e-05, "loss": 46.0035, "step": 1280 }, { "epoch": 0.17489248412860947, "grad_norm": 0.10254652053117752, "learning_rate": 9.354915753204559e-05, "loss": 46.0049, "step": 1281 }, { "epoch": 0.17502901221926412, "grad_norm": 0.06664399802684784, "learning_rate": 9.353847024050552e-05, "loss": 46.0055, "step": 1282 }, { "epoch": 0.17516554030991877, "grad_norm": 0.048967353999614716, "learning_rate": 9.352777471484096e-05, "loss": 46.0008, "step": 1283 }, { "epoch": 0.1753020684005734, "grad_norm": 0.1283901184797287, "learning_rate": 9.351707095707465e-05, "loss": 46.0033, "step": 1284 }, { "epoch": 0.17543859649122806, "grad_norm": 0.05314289405941963, "learning_rate": 9.350635896923092e-05, "loss": 46.0069, "step": 1285 }, { "epoch": 0.17557512458188274, "grad_norm": 0.10164634138345718, "learning_rate": 9.349563875333569e-05, "loss": 46.0021, "step": 1286 }, { "epoch": 0.17571165267253738, "grad_norm": 0.03837037459015846, "learning_rate": 9.348491031141635e-05, "loss": 46.0068, "step": 1287 }, { "epoch": 0.17584818076319203, "grad_norm": 0.03646520525217056, "learning_rate": 9.347417364550193e-05, "loss": 46.0039, "step": 1288 }, { "epoch": 0.17598470885384668, "grad_norm": 0.039944957941770554, "learning_rate": 9.346342875762295e-05, "loss": 46.0035, "step": 1289 }, { "epoch": 0.17612123694450132, "grad_norm": 0.0967688262462616, "learning_rate": 9.345267564981153e-05, "loss": 46.0055, "step": 1290 }, { "epoch": 0.17625776503515597, "grad_norm": 0.04587594419717789, "learning_rate": 9.344191432410134e-05, "loss": 46.0041, "step": 1291 }, { "epoch": 0.17639429312581065, "grad_norm": 0.054896287620067596, "learning_rate": 9.343114478252758e-05, "loss": 46.0058, "step": 1292 }, { "epoch": 0.1765308212164653, "grad_norm": 0.047464072704315186, "learning_rate": 9.342036702712702e-05, "loss": 46.0036, "step": 1293 }, { "epoch": 0.17666734930711994, "grad_norm": 0.038838956505060196, "learning_rate": 9.340958105993798e-05, "loss": 46.0075, "step": 1294 }, { "epoch": 0.1768038773977746, "grad_norm": 0.1072995513677597, "learning_rate": 9.339878688300034e-05, "loss": 46.0121, "step": 1295 }, { "epoch": 0.17694040548842924, "grad_norm": 0.1496000587940216, "learning_rate": 9.338798449835552e-05, "loss": 46.0074, "step": 1296 }, { "epoch": 0.17707693357908388, "grad_norm": 0.1807657778263092, "learning_rate": 9.337717390804652e-05, "loss": 46.0121, "step": 1297 }, { "epoch": 0.17721346166973856, "grad_norm": 0.08297491818666458, "learning_rate": 9.336635511411784e-05, "loss": 46.0029, "step": 1298 }, { "epoch": 0.1773499897603932, "grad_norm": 0.05860929563641548, "learning_rate": 9.335552811861559e-05, "loss": 46.0076, "step": 1299 }, { "epoch": 0.17748651785104785, "grad_norm": 0.11058010160923004, "learning_rate": 9.334469292358736e-05, "loss": 46.0144, "step": 1300 }, { "epoch": 0.1776230459417025, "grad_norm": 0.046979062259197235, "learning_rate": 9.333384953108239e-05, "loss": 46.0065, "step": 1301 }, { "epoch": 0.17775957403235715, "grad_norm": 0.16819332540035248, "learning_rate": 9.332299794315139e-05, "loss": 46.0024, "step": 1302 }, { "epoch": 0.17789610212301182, "grad_norm": 0.08157929033041, "learning_rate": 9.33121381618466e-05, "loss": 46.0015, "step": 1303 }, { "epoch": 0.17803263021366647, "grad_norm": 0.09279023855924606, "learning_rate": 9.330127018922194e-05, "loss": 46.0037, "step": 1304 }, { "epoch": 0.17816915830432112, "grad_norm": 0.09612240642309189, "learning_rate": 9.329039402733273e-05, "loss": 46.008, "step": 1305 }, { "epoch": 0.17830568639497577, "grad_norm": 0.13235169649124146, "learning_rate": 9.327950967823594e-05, "loss": 46.0077, "step": 1306 }, { "epoch": 0.1784422144856304, "grad_norm": 0.043584998697042465, "learning_rate": 9.326861714399e-05, "loss": 46.0048, "step": 1307 }, { "epoch": 0.17857874257628506, "grad_norm": 0.03888232633471489, "learning_rate": 9.3257716426655e-05, "loss": 46.0009, "step": 1308 }, { "epoch": 0.17871527066693974, "grad_norm": 0.03400126099586487, "learning_rate": 9.324680752829247e-05, "loss": 46.0013, "step": 1309 }, { "epoch": 0.17885179875759438, "grad_norm": 0.10692491382360458, "learning_rate": 9.323589045096555e-05, "loss": 46.0076, "step": 1310 }, { "epoch": 0.17898832684824903, "grad_norm": 0.041244979947805405, "learning_rate": 9.322496519673893e-05, "loss": 46.002, "step": 1311 }, { "epoch": 0.17912485493890368, "grad_norm": 0.06601157039403915, "learning_rate": 9.321403176767877e-05, "loss": 46.0048, "step": 1312 }, { "epoch": 0.17926138302955832, "grad_norm": 0.034743886440992355, "learning_rate": 9.320309016585289e-05, "loss": 46.0013, "step": 1313 }, { "epoch": 0.17939791112021297, "grad_norm": 0.04734872281551361, "learning_rate": 9.319214039333057e-05, "loss": 46.0136, "step": 1314 }, { "epoch": 0.17953443921086765, "grad_norm": 0.021847747266292572, "learning_rate": 9.318118245218268e-05, "loss": 46.0123, "step": 1315 }, { "epoch": 0.1796709673015223, "grad_norm": 0.0544564351439476, "learning_rate": 9.317021634448161e-05, "loss": 46.0031, "step": 1316 }, { "epoch": 0.17980749539217694, "grad_norm": 0.07262349128723145, "learning_rate": 9.315924207230128e-05, "loss": 46.0096, "step": 1317 }, { "epoch": 0.1799440234828316, "grad_norm": 0.0652289092540741, "learning_rate": 9.314825963771723e-05, "loss": 46.0087, "step": 1318 }, { "epoch": 0.18008055157348624, "grad_norm": 0.04669729247689247, "learning_rate": 9.313726904280644e-05, "loss": 46.0026, "step": 1319 }, { "epoch": 0.18021707966414088, "grad_norm": 0.0874590054154396, "learning_rate": 9.31262702896475e-05, "loss": 46.0069, "step": 1320 }, { "epoch": 0.18035360775479556, "grad_norm": 0.06905972212553024, "learning_rate": 9.311526338032053e-05, "loss": 46.0085, "step": 1321 }, { "epoch": 0.1804901358454502, "grad_norm": 0.04445880278944969, "learning_rate": 9.310424831690718e-05, "loss": 46.011, "step": 1322 }, { "epoch": 0.18062666393610485, "grad_norm": 0.04458507150411606, "learning_rate": 9.309322510149068e-05, "loss": 46.0125, "step": 1323 }, { "epoch": 0.1807631920267595, "grad_norm": 0.08413175493478775, "learning_rate": 9.308219373615574e-05, "loss": 46.0055, "step": 1324 }, { "epoch": 0.18089972011741415, "grad_norm": 0.049977343529462814, "learning_rate": 9.307115422298867e-05, "loss": 46.0062, "step": 1325 }, { "epoch": 0.18103624820806882, "grad_norm": 0.05676538124680519, "learning_rate": 9.306010656407727e-05, "loss": 46.0015, "step": 1326 }, { "epoch": 0.18117277629872347, "grad_norm": 0.04245628044009209, "learning_rate": 9.304905076151092e-05, "loss": 46.0128, "step": 1327 }, { "epoch": 0.18130930438937812, "grad_norm": 0.03832421824336052, "learning_rate": 9.303798681738052e-05, "loss": 46.0023, "step": 1328 }, { "epoch": 0.18144583248003276, "grad_norm": 0.04014146700501442, "learning_rate": 9.302691473377854e-05, "loss": 46.0016, "step": 1329 }, { "epoch": 0.1815823605706874, "grad_norm": 0.07618001103401184, "learning_rate": 9.301583451279891e-05, "loss": 46.003, "step": 1330 }, { "epoch": 0.18171888866134206, "grad_norm": 0.07687030732631683, "learning_rate": 9.300474615653722e-05, "loss": 46.0049, "step": 1331 }, { "epoch": 0.18185541675199673, "grad_norm": 0.08137104660272598, "learning_rate": 9.29936496670905e-05, "loss": 46.0156, "step": 1332 }, { "epoch": 0.18199194484265138, "grad_norm": 0.03406977280974388, "learning_rate": 9.298254504655737e-05, "loss": 46.0103, "step": 1333 }, { "epoch": 0.18212847293330603, "grad_norm": 0.15025188028812408, "learning_rate": 9.297143229703793e-05, "loss": 46.0023, "step": 1334 }, { "epoch": 0.18226500102396068, "grad_norm": 0.0912509486079216, "learning_rate": 9.296031142063389e-05, "loss": 46.0058, "step": 1335 }, { "epoch": 0.18240152911461532, "grad_norm": 0.03387906774878502, "learning_rate": 9.294918241944844e-05, "loss": 46.0063, "step": 1336 }, { "epoch": 0.18253805720526997, "grad_norm": 0.04867706447839737, "learning_rate": 9.293804529558634e-05, "loss": 46.0095, "step": 1337 }, { "epoch": 0.18267458529592465, "grad_norm": 0.059201426804065704, "learning_rate": 9.29269000511539e-05, "loss": 46.0024, "step": 1338 }, { "epoch": 0.1828111133865793, "grad_norm": 0.06076626852154732, "learning_rate": 9.29157466882589e-05, "loss": 46.0125, "step": 1339 }, { "epoch": 0.18294764147723394, "grad_norm": 0.030774084851145744, "learning_rate": 9.290458520901072e-05, "loss": 46.008, "step": 1340 }, { "epoch": 0.1830841695678886, "grad_norm": 0.026633813977241516, "learning_rate": 9.289341561552025e-05, "loss": 46.0125, "step": 1341 }, { "epoch": 0.18322069765854324, "grad_norm": 0.14001452922821045, "learning_rate": 9.288223790989992e-05, "loss": 46.0051, "step": 1342 }, { "epoch": 0.1833572257491979, "grad_norm": 0.06345508247613907, "learning_rate": 9.287105209426368e-05, "loss": 46.0082, "step": 1343 }, { "epoch": 0.18349375383985256, "grad_norm": 0.05462528020143509, "learning_rate": 9.285985817072703e-05, "loss": 46.0008, "step": 1344 }, { "epoch": 0.1836302819305072, "grad_norm": 0.10172836482524872, "learning_rate": 9.284865614140701e-05, "loss": 46.0045, "step": 1345 }, { "epoch": 0.18376681002116185, "grad_norm": 0.04498520866036415, "learning_rate": 9.283744600842215e-05, "loss": 46.0022, "step": 1346 }, { "epoch": 0.1839033381118165, "grad_norm": 0.08669564872980118, "learning_rate": 9.282622777389258e-05, "loss": 46.007, "step": 1347 }, { "epoch": 0.18403986620247115, "grad_norm": 0.21595695614814758, "learning_rate": 9.28150014399399e-05, "loss": 46.0114, "step": 1348 }, { "epoch": 0.18417639429312582, "grad_norm": 0.06384280323982239, "learning_rate": 9.280376700868728e-05, "loss": 46.0076, "step": 1349 }, { "epoch": 0.18431292238378047, "grad_norm": 0.058130472898483276, "learning_rate": 9.27925244822594e-05, "loss": 46.0045, "step": 1350 }, { "epoch": 0.18444945047443512, "grad_norm": 0.048540811985731125, "learning_rate": 9.278127386278249e-05, "loss": 46.0114, "step": 1351 }, { "epoch": 0.18458597856508976, "grad_norm": 0.0751015916466713, "learning_rate": 9.277001515238429e-05, "loss": 46.0078, "step": 1352 }, { "epoch": 0.1847225066557444, "grad_norm": 0.03494304418563843, "learning_rate": 9.275874835319409e-05, "loss": 46.0036, "step": 1353 }, { "epoch": 0.18485903474639906, "grad_norm": 0.083470918238163, "learning_rate": 9.27474734673427e-05, "loss": 46.0044, "step": 1354 }, { "epoch": 0.18499556283705373, "grad_norm": 0.046889688819646835, "learning_rate": 9.273619049696244e-05, "loss": 46.0061, "step": 1355 }, { "epoch": 0.18513209092770838, "grad_norm": 0.06015819311141968, "learning_rate": 9.272489944418724e-05, "loss": 46.0014, "step": 1356 }, { "epoch": 0.18526861901836303, "grad_norm": 0.053556349128484726, "learning_rate": 9.271360031115241e-05, "loss": 46.0035, "step": 1357 }, { "epoch": 0.18540514710901768, "grad_norm": 0.04540860280394554, "learning_rate": 9.270229309999493e-05, "loss": 46.0016, "step": 1358 }, { "epoch": 0.18554167519967232, "grad_norm": 0.05363784730434418, "learning_rate": 9.269097781285324e-05, "loss": 46.0093, "step": 1359 }, { "epoch": 0.185678203290327, "grad_norm": 0.03197086229920387, "learning_rate": 9.267965445186733e-05, "loss": 46.013, "step": 1360 }, { "epoch": 0.18581473138098165, "grad_norm": 0.061332330107688904, "learning_rate": 9.26683230191787e-05, "loss": 46.0084, "step": 1361 }, { "epoch": 0.1859512594716363, "grad_norm": 0.11652117967605591, "learning_rate": 9.265698351693037e-05, "loss": 46.0079, "step": 1362 }, { "epoch": 0.18608778756229094, "grad_norm": 0.03930535539984703, "learning_rate": 9.264563594726691e-05, "loss": 46.0034, "step": 1363 }, { "epoch": 0.1862243156529456, "grad_norm": 0.03859221935272217, "learning_rate": 9.263428031233443e-05, "loss": 46.0047, "step": 1364 }, { "epoch": 0.18636084374360024, "grad_norm": 0.09399253875017166, "learning_rate": 9.262291661428053e-05, "loss": 46.0076, "step": 1365 }, { "epoch": 0.1864973718342549, "grad_norm": 0.0457252562046051, "learning_rate": 9.261154485525432e-05, "loss": 46.0027, "step": 1366 }, { "epoch": 0.18663389992490956, "grad_norm": 0.12306556850671768, "learning_rate": 9.260016503740647e-05, "loss": 46.0032, "step": 1367 }, { "epoch": 0.1867704280155642, "grad_norm": 0.07713311910629272, "learning_rate": 9.258877716288918e-05, "loss": 46.0075, "step": 1368 }, { "epoch": 0.18690695610621885, "grad_norm": 0.07986722141504288, "learning_rate": 9.257738123385616e-05, "loss": 46.0075, "step": 1369 }, { "epoch": 0.1870434841968735, "grad_norm": 0.05640026554465294, "learning_rate": 9.256597725246263e-05, "loss": 46.0025, "step": 1370 }, { "epoch": 0.18718001228752815, "grad_norm": 0.035740986466407776, "learning_rate": 9.255456522086535e-05, "loss": 46.0063, "step": 1371 }, { "epoch": 0.18731654037818282, "grad_norm": 0.05061297491192818, "learning_rate": 9.254314514122259e-05, "loss": 46.0048, "step": 1372 }, { "epoch": 0.18745306846883747, "grad_norm": 0.05584397912025452, "learning_rate": 9.253171701569416e-05, "loss": 46.0016, "step": 1373 }, { "epoch": 0.18758959655949212, "grad_norm": 0.0862777903676033, "learning_rate": 9.252028084644137e-05, "loss": 46.0045, "step": 1374 }, { "epoch": 0.18772612465014676, "grad_norm": 0.09182412922382355, "learning_rate": 9.250883663562709e-05, "loss": 46.0039, "step": 1375 }, { "epoch": 0.1878626527408014, "grad_norm": 0.08696755766868591, "learning_rate": 9.249738438541565e-05, "loss": 46.0083, "step": 1376 }, { "epoch": 0.18799918083145606, "grad_norm": 0.049136240035295486, "learning_rate": 9.248592409797293e-05, "loss": 46.0054, "step": 1377 }, { "epoch": 0.18813570892211073, "grad_norm": 0.04071767255663872, "learning_rate": 9.247445577546637e-05, "loss": 46.009, "step": 1378 }, { "epoch": 0.18827223701276538, "grad_norm": 0.059692032635211945, "learning_rate": 9.246297942006485e-05, "loss": 46.0032, "step": 1379 }, { "epoch": 0.18840876510342003, "grad_norm": 0.04639244079589844, "learning_rate": 9.245149503393884e-05, "loss": 46.0096, "step": 1380 }, { "epoch": 0.18854529319407468, "grad_norm": 0.05955829471349716, "learning_rate": 9.24400026192603e-05, "loss": 46.0039, "step": 1381 }, { "epoch": 0.18868182128472932, "grad_norm": 0.0399048775434494, "learning_rate": 9.24285021782027e-05, "loss": 46.0103, "step": 1382 }, { "epoch": 0.188818349375384, "grad_norm": 0.0959966704249382, "learning_rate": 9.241699371294105e-05, "loss": 46.0026, "step": 1383 }, { "epoch": 0.18895487746603865, "grad_norm": 0.09754492342472076, "learning_rate": 9.240547722565185e-05, "loss": 46.0003, "step": 1384 }, { "epoch": 0.1890914055566933, "grad_norm": 0.0705810934305191, "learning_rate": 9.239395271851314e-05, "loss": 46.0046, "step": 1385 }, { "epoch": 0.18922793364734794, "grad_norm": 0.05053739249706268, "learning_rate": 9.238242019370447e-05, "loss": 46.0036, "step": 1386 }, { "epoch": 0.1893644617380026, "grad_norm": 0.05409899353981018, "learning_rate": 9.23708796534069e-05, "loss": 46.0027, "step": 1387 }, { "epoch": 0.18950098982865723, "grad_norm": 0.08978836983442307, "learning_rate": 9.235933109980301e-05, "loss": 46.0048, "step": 1388 }, { "epoch": 0.1896375179193119, "grad_norm": 0.03835688531398773, "learning_rate": 9.234777453507692e-05, "loss": 46.0, "step": 1389 }, { "epoch": 0.18977404600996656, "grad_norm": 0.046984877437353134, "learning_rate": 9.233620996141421e-05, "loss": 46.0138, "step": 1390 }, { "epoch": 0.1899105741006212, "grad_norm": 0.030541757121682167, "learning_rate": 9.232463738100203e-05, "loss": 46.005, "step": 1391 }, { "epoch": 0.19004710219127585, "grad_norm": 0.07132083922624588, "learning_rate": 9.2313056796029e-05, "loss": 46.0007, "step": 1392 }, { "epoch": 0.1901836302819305, "grad_norm": 0.08626524358987808, "learning_rate": 9.230146820868528e-05, "loss": 46.0076, "step": 1393 }, { "epoch": 0.19032015837258515, "grad_norm": 0.09188501536846161, "learning_rate": 9.228987162116256e-05, "loss": 46.0054, "step": 1394 }, { "epoch": 0.19045668646323982, "grad_norm": 0.11346902698278427, "learning_rate": 9.227826703565399e-05, "loss": 46.0082, "step": 1395 }, { "epoch": 0.19059321455389447, "grad_norm": 0.04726196452975273, "learning_rate": 9.226665445435428e-05, "loss": 46.0029, "step": 1396 }, { "epoch": 0.19072974264454912, "grad_norm": 0.11630396544933319, "learning_rate": 9.225503387945964e-05, "loss": 46.0087, "step": 1397 }, { "epoch": 0.19086627073520376, "grad_norm": 0.05987127125263214, "learning_rate": 9.224340531316777e-05, "loss": 46.0113, "step": 1398 }, { "epoch": 0.1910027988258584, "grad_norm": 0.14501255750656128, "learning_rate": 9.223176875767792e-05, "loss": 46.0222, "step": 1399 }, { "epoch": 0.19113932691651309, "grad_norm": 0.07520521432161331, "learning_rate": 9.222012421519081e-05, "loss": 46.0, "step": 1400 }, { "epoch": 0.19127585500716773, "grad_norm": 0.024257859215140343, "learning_rate": 9.22084716879087e-05, "loss": 46.0097, "step": 1401 }, { "epoch": 0.19141238309782238, "grad_norm": 0.031596165150403976, "learning_rate": 9.219681117803536e-05, "loss": 46.0033, "step": 1402 }, { "epoch": 0.19154891118847703, "grad_norm": 0.07211479544639587, "learning_rate": 9.218514268777604e-05, "loss": 46.0062, "step": 1403 }, { "epoch": 0.19168543927913168, "grad_norm": 0.028090810403227806, "learning_rate": 9.217346621933754e-05, "loss": 46.0051, "step": 1404 }, { "epoch": 0.19182196736978632, "grad_norm": 0.06306546181440353, "learning_rate": 9.216178177492812e-05, "loss": 46.005, "step": 1405 }, { "epoch": 0.191958495460441, "grad_norm": 0.048922598361968994, "learning_rate": 9.215008935675762e-05, "loss": 46.0062, "step": 1406 }, { "epoch": 0.19209502355109564, "grad_norm": 0.10970179736614227, "learning_rate": 9.213838896703731e-05, "loss": 46.0018, "step": 1407 }, { "epoch": 0.1922315516417503, "grad_norm": 0.03560686111450195, "learning_rate": 9.212668060798e-05, "loss": 46.0001, "step": 1408 }, { "epoch": 0.19236807973240494, "grad_norm": 0.06926270574331284, "learning_rate": 9.211496428180003e-05, "loss": 46.001, "step": 1409 }, { "epoch": 0.1925046078230596, "grad_norm": 0.08467784523963928, "learning_rate": 9.210323999071322e-05, "loss": 46.0076, "step": 1410 }, { "epoch": 0.19264113591371423, "grad_norm": 0.13914842903614044, "learning_rate": 9.20915077369369e-05, "loss": 46.0116, "step": 1411 }, { "epoch": 0.1927776640043689, "grad_norm": 0.0906820297241211, "learning_rate": 9.20797675226899e-05, "loss": 46.0061, "step": 1412 }, { "epoch": 0.19291419209502356, "grad_norm": 0.017932334914803505, "learning_rate": 9.206801935019259e-05, "loss": 46.0145, "step": 1413 }, { "epoch": 0.1930507201856782, "grad_norm": 0.05047451704740524, "learning_rate": 9.205626322166681e-05, "loss": 46.0062, "step": 1414 }, { "epoch": 0.19318724827633285, "grad_norm": 0.05547706410288811, "learning_rate": 9.204449913933588e-05, "loss": 46.0032, "step": 1415 }, { "epoch": 0.1933237763669875, "grad_norm": 0.02381858602166176, "learning_rate": 9.20327271054247e-05, "loss": 46.0122, "step": 1416 }, { "epoch": 0.19346030445764215, "grad_norm": 0.0809660255908966, "learning_rate": 9.202094712215961e-05, "loss": 46.0052, "step": 1417 }, { "epoch": 0.19359683254829682, "grad_norm": 0.05814438685774803, "learning_rate": 9.200915919176848e-05, "loss": 46.0075, "step": 1418 }, { "epoch": 0.19373336063895147, "grad_norm": 0.10646533221006393, "learning_rate": 9.19973633164807e-05, "loss": 46.0112, "step": 1419 }, { "epoch": 0.19386988872960612, "grad_norm": 0.1389753818511963, "learning_rate": 9.198555949852709e-05, "loss": 46.0059, "step": 1420 }, { "epoch": 0.19400641682026076, "grad_norm": 0.09437958151102066, "learning_rate": 9.197374774014008e-05, "loss": 46.0048, "step": 1421 }, { "epoch": 0.1941429449109154, "grad_norm": 0.06534861773252487, "learning_rate": 9.19619280435535e-05, "loss": 46.0119, "step": 1422 }, { "epoch": 0.19427947300157009, "grad_norm": 0.04418373480439186, "learning_rate": 9.195010041100275e-05, "loss": 46.004, "step": 1423 }, { "epoch": 0.19441600109222473, "grad_norm": 0.07789772003889084, "learning_rate": 9.193826484472472e-05, "loss": 46.0072, "step": 1424 }, { "epoch": 0.19455252918287938, "grad_norm": 0.052767518907785416, "learning_rate": 9.192642134695775e-05, "loss": 46.0037, "step": 1425 }, { "epoch": 0.19468905727353403, "grad_norm": 0.05161157622933388, "learning_rate": 9.191456991994174e-05, "loss": 46.0148, "step": 1426 }, { "epoch": 0.19482558536418867, "grad_norm": 0.03661066293716431, "learning_rate": 9.190271056591808e-05, "loss": 46.0078, "step": 1427 }, { "epoch": 0.19496211345484332, "grad_norm": 0.08310777693986893, "learning_rate": 9.18908432871296e-05, "loss": 46.0016, "step": 1428 }, { "epoch": 0.195098641545498, "grad_norm": 0.1254054605960846, "learning_rate": 9.187896808582073e-05, "loss": 46.0094, "step": 1429 }, { "epoch": 0.19523516963615264, "grad_norm": 0.07407595962285995, "learning_rate": 9.18670849642373e-05, "loss": 46.0008, "step": 1430 }, { "epoch": 0.1953716977268073, "grad_norm": 0.06297612935304642, "learning_rate": 9.185519392462672e-05, "loss": 46.0064, "step": 1431 }, { "epoch": 0.19550822581746194, "grad_norm": 0.04970478266477585, "learning_rate": 9.184329496923783e-05, "loss": 46.0019, "step": 1432 }, { "epoch": 0.1956447539081166, "grad_norm": 0.041346386075019836, "learning_rate": 9.183138810032099e-05, "loss": 46.0029, "step": 1433 }, { "epoch": 0.19578128199877123, "grad_norm": 0.04702509194612503, "learning_rate": 9.181947332012808e-05, "loss": 46.0048, "step": 1434 }, { "epoch": 0.1959178100894259, "grad_norm": 0.0520223006606102, "learning_rate": 9.180755063091246e-05, "loss": 46.0062, "step": 1435 }, { "epoch": 0.19605433818008056, "grad_norm": 0.10606356710195541, "learning_rate": 9.179562003492898e-05, "loss": 46.0042, "step": 1436 }, { "epoch": 0.1961908662707352, "grad_norm": 0.08171603083610535, "learning_rate": 9.178368153443399e-05, "loss": 46.0025, "step": 1437 }, { "epoch": 0.19632739436138985, "grad_norm": 0.09865172207355499, "learning_rate": 9.177173513168533e-05, "loss": 46.0008, "step": 1438 }, { "epoch": 0.1964639224520445, "grad_norm": 0.12435268610715866, "learning_rate": 9.175978082894234e-05, "loss": 46.0027, "step": 1439 }, { "epoch": 0.19660045054269917, "grad_norm": 0.04531469568610191, "learning_rate": 9.174781862846585e-05, "loss": 46.0074, "step": 1440 }, { "epoch": 0.19673697863335382, "grad_norm": 0.04259936884045601, "learning_rate": 9.17358485325182e-05, "loss": 46.0093, "step": 1441 }, { "epoch": 0.19687350672400847, "grad_norm": 0.07773260772228241, "learning_rate": 9.17238705433632e-05, "loss": 46.0037, "step": 1442 }, { "epoch": 0.19701003481466312, "grad_norm": 0.07480160892009735, "learning_rate": 9.171188466326616e-05, "loss": 46.0006, "step": 1443 }, { "epoch": 0.19714656290531776, "grad_norm": 0.08840031921863556, "learning_rate": 9.16998908944939e-05, "loss": 46.0041, "step": 1444 }, { "epoch": 0.1972830909959724, "grad_norm": 0.08413294702768326, "learning_rate": 9.16878892393147e-05, "loss": 46.0072, "step": 1445 }, { "epoch": 0.19741961908662709, "grad_norm": 0.1972070187330246, "learning_rate": 9.167587969999836e-05, "loss": 46.0135, "step": 1446 }, { "epoch": 0.19755614717728173, "grad_norm": 0.13185521960258484, "learning_rate": 9.166386227881616e-05, "loss": 46.0044, "step": 1447 }, { "epoch": 0.19769267526793638, "grad_norm": 0.10518690943717957, "learning_rate": 9.165183697804085e-05, "loss": 46.0098, "step": 1448 }, { "epoch": 0.19782920335859103, "grad_norm": 0.053810153156518936, "learning_rate": 9.163980379994672e-05, "loss": 46.0158, "step": 1449 }, { "epoch": 0.19796573144924567, "grad_norm": 0.14938730001449585, "learning_rate": 9.16277627468095e-05, "loss": 46.0, "step": 1450 }, { "epoch": 0.19810225953990032, "grad_norm": 0.08877211809158325, "learning_rate": 9.161571382090643e-05, "loss": 46.0073, "step": 1451 }, { "epoch": 0.198238787630555, "grad_norm": 0.10758589953184128, "learning_rate": 9.160365702451625e-05, "loss": 46.0015, "step": 1452 }, { "epoch": 0.19837531572120964, "grad_norm": 0.059726789593696594, "learning_rate": 9.159159235991918e-05, "loss": 46.001, "step": 1453 }, { "epoch": 0.1985118438118643, "grad_norm": 0.08406286686658859, "learning_rate": 9.15795198293969e-05, "loss": 46.002, "step": 1454 }, { "epoch": 0.19864837190251894, "grad_norm": 0.050748709589242935, "learning_rate": 9.156743943523264e-05, "loss": 46.005, "step": 1455 }, { "epoch": 0.19878489999317359, "grad_norm": 0.04197966307401657, "learning_rate": 9.155535117971104e-05, "loss": 46.0056, "step": 1456 }, { "epoch": 0.19892142808382826, "grad_norm": 0.06211195886135101, "learning_rate": 9.15432550651183e-05, "loss": 46.0058, "step": 1457 }, { "epoch": 0.1990579561744829, "grad_norm": 0.028299568220973015, "learning_rate": 9.153115109374207e-05, "loss": 46.0064, "step": 1458 }, { "epoch": 0.19919448426513756, "grad_norm": 0.04828493669629097, "learning_rate": 9.151903926787145e-05, "loss": 46.0019, "step": 1459 }, { "epoch": 0.1993310123557922, "grad_norm": 0.08643963932991028, "learning_rate": 9.15069195897971e-05, "loss": 46.0061, "step": 1460 }, { "epoch": 0.19946754044644685, "grad_norm": 0.039026591926813126, "learning_rate": 9.149479206181113e-05, "loss": 46.0072, "step": 1461 }, { "epoch": 0.1996040685371015, "grad_norm": 0.04176577180624008, "learning_rate": 9.148265668620712e-05, "loss": 46.0062, "step": 1462 }, { "epoch": 0.19974059662775617, "grad_norm": 0.028783755376935005, "learning_rate": 9.147051346528017e-05, "loss": 46.0075, "step": 1463 }, { "epoch": 0.19987712471841082, "grad_norm": 0.02835187129676342, "learning_rate": 9.145836240132682e-05, "loss": 46.0064, "step": 1464 }, { "epoch": 0.20001365280906547, "grad_norm": 0.04845873638987541, "learning_rate": 9.14462034966451e-05, "loss": 46.0076, "step": 1465 }, { "epoch": 0.20015018089972011, "grad_norm": 0.05122579261660576, "learning_rate": 9.143403675353457e-05, "loss": 46.0076, "step": 1466 }, { "epoch": 0.20028670899037476, "grad_norm": 0.03828597441315651, "learning_rate": 9.142186217429625e-05, "loss": 46.0051, "step": 1467 }, { "epoch": 0.2004232370810294, "grad_norm": 0.04791915789246559, "learning_rate": 9.140967976123259e-05, "loss": 46.0078, "step": 1468 }, { "epoch": 0.20055976517168408, "grad_norm": 0.10306814312934875, "learning_rate": 9.13974895166476e-05, "loss": 46.01, "step": 1469 }, { "epoch": 0.20069629326233873, "grad_norm": 0.09662801027297974, "learning_rate": 9.138529144284671e-05, "loss": 46.0044, "step": 1470 }, { "epoch": 0.20083282135299338, "grad_norm": 0.0318765789270401, "learning_rate": 9.137308554213687e-05, "loss": 46.0078, "step": 1471 }, { "epoch": 0.20096934944364803, "grad_norm": 0.06996145099401474, "learning_rate": 9.13608718168265e-05, "loss": 46.0016, "step": 1472 }, { "epoch": 0.20110587753430267, "grad_norm": 0.059220414608716965, "learning_rate": 9.134865026922549e-05, "loss": 46.0074, "step": 1473 }, { "epoch": 0.20124240562495732, "grad_norm": 0.044967472553253174, "learning_rate": 9.13364209016452e-05, "loss": 46.0099, "step": 1474 }, { "epoch": 0.201378933715612, "grad_norm": 0.06767603009939194, "learning_rate": 9.132418371639852e-05, "loss": 46.0077, "step": 1475 }, { "epoch": 0.20151546180626664, "grad_norm": 0.058048002421855927, "learning_rate": 9.131193871579975e-05, "loss": 46.008, "step": 1476 }, { "epoch": 0.2016519898969213, "grad_norm": 0.10835488885641098, "learning_rate": 9.129968590216472e-05, "loss": 46.0087, "step": 1477 }, { "epoch": 0.20178851798757594, "grad_norm": 0.073755644261837, "learning_rate": 9.12874252778107e-05, "loss": 46.0035, "step": 1478 }, { "epoch": 0.20192504607823059, "grad_norm": 0.06154082342982292, "learning_rate": 9.127515684505647e-05, "loss": 46.0035, "step": 1479 }, { "epoch": 0.20206157416888526, "grad_norm": 0.061806339770555496, "learning_rate": 9.126288060622228e-05, "loss": 46.0026, "step": 1480 }, { "epoch": 0.2021981022595399, "grad_norm": 0.04984788969159126, "learning_rate": 9.125059656362982e-05, "loss": 46.0042, "step": 1481 }, { "epoch": 0.20233463035019456, "grad_norm": 0.047489505261182785, "learning_rate": 9.12383047196023e-05, "loss": 46.0069, "step": 1482 }, { "epoch": 0.2024711584408492, "grad_norm": 0.03643013909459114, "learning_rate": 9.122600507646442e-05, "loss": 46.0025, "step": 1483 }, { "epoch": 0.20260768653150385, "grad_norm": 0.07481986284255981, "learning_rate": 9.121369763654228e-05, "loss": 46.0041, "step": 1484 }, { "epoch": 0.2027442146221585, "grad_norm": 0.11695441603660583, "learning_rate": 9.120138240216352e-05, "loss": 46.0065, "step": 1485 }, { "epoch": 0.20288074271281317, "grad_norm": 0.14224562048912048, "learning_rate": 9.118905937565722e-05, "loss": 46.0029, "step": 1486 }, { "epoch": 0.20301727080346782, "grad_norm": 0.07142463326454163, "learning_rate": 9.117672855935397e-05, "loss": 46.0038, "step": 1487 }, { "epoch": 0.20315379889412247, "grad_norm": 0.06257296353578568, "learning_rate": 9.116438995558579e-05, "loss": 46.003, "step": 1488 }, { "epoch": 0.20329032698477711, "grad_norm": 0.0731351375579834, "learning_rate": 9.115204356668621e-05, "loss": 46.0087, "step": 1489 }, { "epoch": 0.20342685507543176, "grad_norm": 0.040294013917446136, "learning_rate": 9.11396893949902e-05, "loss": 46.0039, "step": 1490 }, { "epoch": 0.2035633831660864, "grad_norm": 0.029354481026530266, "learning_rate": 9.112732744283424e-05, "loss": 46.006, "step": 1491 }, { "epoch": 0.20369991125674108, "grad_norm": 0.09384988993406296, "learning_rate": 9.111495771255623e-05, "loss": 46.0026, "step": 1492 }, { "epoch": 0.20383643934739573, "grad_norm": 0.15620771050453186, "learning_rate": 9.110258020649558e-05, "loss": 46.0021, "step": 1493 }, { "epoch": 0.20397296743805038, "grad_norm": 0.08743107318878174, "learning_rate": 9.109019492699315e-05, "loss": 46.0006, "step": 1494 }, { "epoch": 0.20410949552870503, "grad_norm": 0.048310600221157074, "learning_rate": 9.107780187639132e-05, "loss": 46.0076, "step": 1495 }, { "epoch": 0.20424602361935967, "grad_norm": 0.07549877464771271, "learning_rate": 9.106540105703387e-05, "loss": 46.0009, "step": 1496 }, { "epoch": 0.20438255171001435, "grad_norm": 0.2132052481174469, "learning_rate": 9.105299247126608e-05, "loss": 46.0061, "step": 1497 }, { "epoch": 0.204519079800669, "grad_norm": 0.16092480719089508, "learning_rate": 9.104057612143473e-05, "loss": 46.0066, "step": 1498 }, { "epoch": 0.20465560789132364, "grad_norm": 0.07214135676622391, "learning_rate": 9.102815200988798e-05, "loss": 46.0037, "step": 1499 }, { "epoch": 0.2047921359819783, "grad_norm": 0.13100090622901917, "learning_rate": 9.101572013897555e-05, "loss": 46.0074, "step": 1500 }, { "epoch": 0.20492866407263294, "grad_norm": 0.06803402304649353, "learning_rate": 9.100328051104859e-05, "loss": 46.0098, "step": 1501 }, { "epoch": 0.20506519216328759, "grad_norm": 0.04870695248246193, "learning_rate": 9.099083312845973e-05, "loss": 46.0056, "step": 1502 }, { "epoch": 0.20520172025394226, "grad_norm": 0.0696379542350769, "learning_rate": 9.097837799356303e-05, "loss": 46.0033, "step": 1503 }, { "epoch": 0.2053382483445969, "grad_norm": 0.056966934353113174, "learning_rate": 9.096591510871405e-05, "loss": 46.0048, "step": 1504 }, { "epoch": 0.20547477643525155, "grad_norm": 0.1018475815653801, "learning_rate": 9.095344447626982e-05, "loss": 46.0028, "step": 1505 }, { "epoch": 0.2056113045259062, "grad_norm": 0.07463174313306808, "learning_rate": 9.094096609858882e-05, "loss": 46.0056, "step": 1506 }, { "epoch": 0.20574783261656085, "grad_norm": 0.07083996385335922, "learning_rate": 9.092847997803097e-05, "loss": 46.0034, "step": 1507 }, { "epoch": 0.2058843607072155, "grad_norm": 0.030400194227695465, "learning_rate": 9.091598611695772e-05, "loss": 46.0104, "step": 1508 }, { "epoch": 0.20602088879787017, "grad_norm": 0.05467965453863144, "learning_rate": 9.090348451773195e-05, "loss": 46.005, "step": 1509 }, { "epoch": 0.20615741688852482, "grad_norm": 0.04095850884914398, "learning_rate": 9.089097518271796e-05, "loss": 46.0079, "step": 1510 }, { "epoch": 0.20629394497917947, "grad_norm": 0.03612156957387924, "learning_rate": 9.08784581142816e-05, "loss": 46.0122, "step": 1511 }, { "epoch": 0.20643047306983411, "grad_norm": 0.08893720805644989, "learning_rate": 9.086593331479008e-05, "loss": 46.0049, "step": 1512 }, { "epoch": 0.20656700116048876, "grad_norm": 0.08251229673624039, "learning_rate": 9.085340078661217e-05, "loss": 46.0067, "step": 1513 }, { "epoch": 0.20670352925114344, "grad_norm": 0.07215525209903717, "learning_rate": 9.084086053211803e-05, "loss": 46.0145, "step": 1514 }, { "epoch": 0.20684005734179808, "grad_norm": 0.0770954042673111, "learning_rate": 9.082831255367934e-05, "loss": 46.0084, "step": 1515 }, { "epoch": 0.20697658543245273, "grad_norm": 0.0870961919426918, "learning_rate": 9.081575685366919e-05, "loss": 46.0069, "step": 1516 }, { "epoch": 0.20711311352310738, "grad_norm": 0.03395692631602287, "learning_rate": 9.080319343446215e-05, "loss": 46.0082, "step": 1517 }, { "epoch": 0.20724964161376203, "grad_norm": 0.12485601007938385, "learning_rate": 9.079062229843426e-05, "loss": 46.0116, "step": 1518 }, { "epoch": 0.20738616970441667, "grad_norm": 0.10456576943397522, "learning_rate": 9.077804344796302e-05, "loss": 46.0016, "step": 1519 }, { "epoch": 0.20752269779507135, "grad_norm": 0.09492659568786621, "learning_rate": 9.076545688542735e-05, "loss": 46.0033, "step": 1520 }, { "epoch": 0.207659225885726, "grad_norm": 0.1533442586660385, "learning_rate": 9.075286261320769e-05, "loss": 46.0066, "step": 1521 }, { "epoch": 0.20779575397638064, "grad_norm": 0.06753119081258774, "learning_rate": 9.074026063368589e-05, "loss": 46.0042, "step": 1522 }, { "epoch": 0.2079322820670353, "grad_norm": 0.038518182933330536, "learning_rate": 9.072765094924527e-05, "loss": 46.0048, "step": 1523 }, { "epoch": 0.20806881015768994, "grad_norm": 0.07437936961650848, "learning_rate": 9.071503356227063e-05, "loss": 46.0034, "step": 1524 }, { "epoch": 0.20820533824834458, "grad_norm": 0.07282775640487671, "learning_rate": 9.070240847514817e-05, "loss": 46.0067, "step": 1525 }, { "epoch": 0.20834186633899926, "grad_norm": 0.03311668708920479, "learning_rate": 9.068977569026563e-05, "loss": 46.0083, "step": 1526 }, { "epoch": 0.2084783944296539, "grad_norm": 0.044919855892658234, "learning_rate": 9.067713521001212e-05, "loss": 46.0063, "step": 1527 }, { "epoch": 0.20861492252030855, "grad_norm": 0.021898744627833366, "learning_rate": 9.066448703677828e-05, "loss": 46.0128, "step": 1528 }, { "epoch": 0.2087514506109632, "grad_norm": 0.052995290607213974, "learning_rate": 9.065183117295614e-05, "loss": 46.0033, "step": 1529 }, { "epoch": 0.20888797870161785, "grad_norm": 0.09687753766775131, "learning_rate": 9.063916762093922e-05, "loss": 46.0042, "step": 1530 }, { "epoch": 0.2090245067922725, "grad_norm": 0.08305657655000687, "learning_rate": 9.062649638312252e-05, "loss": 46.0088, "step": 1531 }, { "epoch": 0.20916103488292717, "grad_norm": 0.12205012887716293, "learning_rate": 9.061381746190243e-05, "loss": 46.002, "step": 1532 }, { "epoch": 0.20929756297358182, "grad_norm": 0.05995684117078781, "learning_rate": 9.060113085967682e-05, "loss": 46.0048, "step": 1533 }, { "epoch": 0.20943409106423647, "grad_norm": 0.04063927009701729, "learning_rate": 9.058843657884504e-05, "loss": 46.0075, "step": 1534 }, { "epoch": 0.2095706191548911, "grad_norm": 0.035346873104572296, "learning_rate": 9.057573462180786e-05, "loss": 46.0051, "step": 1535 }, { "epoch": 0.20970714724554576, "grad_norm": 0.11213374137878418, "learning_rate": 9.056302499096751e-05, "loss": 46.0058, "step": 1536 }, { "epoch": 0.20984367533620044, "grad_norm": 0.062055084854364395, "learning_rate": 9.055030768872768e-05, "loss": 46.0071, "step": 1537 }, { "epoch": 0.20998020342685508, "grad_norm": 0.09919865429401398, "learning_rate": 9.053758271749348e-05, "loss": 46.0063, "step": 1538 }, { "epoch": 0.21011673151750973, "grad_norm": 0.061194270849227905, "learning_rate": 9.052485007967155e-05, "loss": 46.0057, "step": 1539 }, { "epoch": 0.21025325960816438, "grad_norm": 0.05801825970411301, "learning_rate": 9.051210977766987e-05, "loss": 46.005, "step": 1540 }, { "epoch": 0.21038978769881903, "grad_norm": 0.046560462564229965, "learning_rate": 9.049936181389794e-05, "loss": 46.0079, "step": 1541 }, { "epoch": 0.21052631578947367, "grad_norm": 0.11721022427082062, "learning_rate": 9.048660619076669e-05, "loss": 46.0041, "step": 1542 }, { "epoch": 0.21066284388012835, "grad_norm": 0.0633828267455101, "learning_rate": 9.04738429106885e-05, "loss": 46.0034, "step": 1543 }, { "epoch": 0.210799371970783, "grad_norm": 0.08779153227806091, "learning_rate": 9.046107197607723e-05, "loss": 46.0052, "step": 1544 }, { "epoch": 0.21093590006143764, "grad_norm": 0.1228007972240448, "learning_rate": 9.04482933893481e-05, "loss": 46.007, "step": 1545 }, { "epoch": 0.2110724281520923, "grad_norm": 0.14286258816719055, "learning_rate": 9.043550715291788e-05, "loss": 46.0066, "step": 1546 }, { "epoch": 0.21120895624274694, "grad_norm": 0.0654035359621048, "learning_rate": 9.042271326920474e-05, "loss": 46.0054, "step": 1547 }, { "epoch": 0.21134548433340158, "grad_norm": 0.11816471070051193, "learning_rate": 9.040991174062828e-05, "loss": 46.0089, "step": 1548 }, { "epoch": 0.21148201242405626, "grad_norm": 0.11046800017356873, "learning_rate": 9.039710256960957e-05, "loss": 46.0116, "step": 1549 }, { "epoch": 0.2116185405147109, "grad_norm": 0.10258238017559052, "learning_rate": 9.03842857585711e-05, "loss": 46.0098, "step": 1550 }, { "epoch": 0.21175506860536555, "grad_norm": 0.04302448034286499, "learning_rate": 9.037146130993683e-05, "loss": 46.0104, "step": 1551 }, { "epoch": 0.2118915966960202, "grad_norm": 0.06374172866344452, "learning_rate": 9.035862922613218e-05, "loss": 46.0094, "step": 1552 }, { "epoch": 0.21202812478667485, "grad_norm": 0.0352407768368721, "learning_rate": 9.034578950958399e-05, "loss": 46.004, "step": 1553 }, { "epoch": 0.21216465287732952, "grad_norm": 0.1806410551071167, "learning_rate": 9.033294216272052e-05, "loss": 46.0022, "step": 1554 }, { "epoch": 0.21230118096798417, "grad_norm": 0.04575693979859352, "learning_rate": 9.032008718797151e-05, "loss": 46.0051, "step": 1555 }, { "epoch": 0.21243770905863882, "grad_norm": 0.035396672785282135, "learning_rate": 9.030722458776814e-05, "loss": 46.007, "step": 1556 }, { "epoch": 0.21257423714929347, "grad_norm": 0.04047870263457298, "learning_rate": 9.029435436454301e-05, "loss": 46.0067, "step": 1557 }, { "epoch": 0.2127107652399481, "grad_norm": 0.056147802621126175, "learning_rate": 9.02814765207302e-05, "loss": 46.004, "step": 1558 }, { "epoch": 0.21284729333060276, "grad_norm": 0.08090036362409592, "learning_rate": 9.026859105876516e-05, "loss": 46.0068, "step": 1559 }, { "epoch": 0.21298382142125744, "grad_norm": 0.06328801065683365, "learning_rate": 9.025569798108486e-05, "loss": 46.0083, "step": 1560 }, { "epoch": 0.21312034951191208, "grad_norm": 0.03966137766838074, "learning_rate": 9.024279729012768e-05, "loss": 46.0061, "step": 1561 }, { "epoch": 0.21325687760256673, "grad_norm": 0.0336940623819828, "learning_rate": 9.022988898833342e-05, "loss": 46.0042, "step": 1562 }, { "epoch": 0.21339340569322138, "grad_norm": 0.03750106692314148, "learning_rate": 9.021697307814334e-05, "loss": 46.0066, "step": 1563 }, { "epoch": 0.21352993378387602, "grad_norm": 0.029017198830842972, "learning_rate": 9.020404956200015e-05, "loss": 46.0091, "step": 1564 }, { "epoch": 0.21366646187453067, "grad_norm": 0.029139243066310883, "learning_rate": 9.019111844234796e-05, "loss": 46.0078, "step": 1565 }, { "epoch": 0.21380298996518535, "grad_norm": 0.04558883234858513, "learning_rate": 9.017817972163235e-05, "loss": 46.0033, "step": 1566 }, { "epoch": 0.21393951805584, "grad_norm": 0.03995855897665024, "learning_rate": 9.016523340230036e-05, "loss": 46.0098, "step": 1567 }, { "epoch": 0.21407604614649464, "grad_norm": 0.05904058739542961, "learning_rate": 9.015227948680039e-05, "loss": 46.01, "step": 1568 }, { "epoch": 0.2142125742371493, "grad_norm": 0.034609001129865646, "learning_rate": 9.013931797758235e-05, "loss": 46.0006, "step": 1569 }, { "epoch": 0.21434910232780394, "grad_norm": 0.056017208844423294, "learning_rate": 9.012634887709754e-05, "loss": 46.0046, "step": 1570 }, { "epoch": 0.2144856304184586, "grad_norm": 0.08704742044210434, "learning_rate": 9.011337218779874e-05, "loss": 46.0058, "step": 1571 }, { "epoch": 0.21462215850911326, "grad_norm": 0.09679454565048218, "learning_rate": 9.010038791214012e-05, "loss": 46.007, "step": 1572 }, { "epoch": 0.2147586865997679, "grad_norm": 0.05318604037165642, "learning_rate": 9.008739605257731e-05, "loss": 46.0048, "step": 1573 }, { "epoch": 0.21489521469042255, "grad_norm": 0.1014774739742279, "learning_rate": 9.007439661156735e-05, "loss": 46.0045, "step": 1574 }, { "epoch": 0.2150317427810772, "grad_norm": 0.047581277787685394, "learning_rate": 9.00613895915688e-05, "loss": 46.0028, "step": 1575 }, { "epoch": 0.21516827087173185, "grad_norm": 0.07131008803844452, "learning_rate": 9.00483749950415e-05, "loss": 46.0016, "step": 1576 }, { "epoch": 0.21530479896238652, "grad_norm": 0.07475249469280243, "learning_rate": 9.003535282444687e-05, "loss": 46.0014, "step": 1577 }, { "epoch": 0.21544132705304117, "grad_norm": 0.042241234332323074, "learning_rate": 9.002232308224765e-05, "loss": 46.003, "step": 1578 }, { "epoch": 0.21557785514369582, "grad_norm": 0.028971578925848007, "learning_rate": 9.000928577090813e-05, "loss": 46.0083, "step": 1579 }, { "epoch": 0.21571438323435047, "grad_norm": 0.06977228820323944, "learning_rate": 8.999624089289389e-05, "loss": 46.0029, "step": 1580 }, { "epoch": 0.2158509113250051, "grad_norm": 0.06966626644134521, "learning_rate": 8.998318845067208e-05, "loss": 46.005, "step": 1581 }, { "epoch": 0.21598743941565976, "grad_norm": 0.0745711699128151, "learning_rate": 8.997012844671117e-05, "loss": 46.01, "step": 1582 }, { "epoch": 0.21612396750631443, "grad_norm": 0.05826292186975479, "learning_rate": 8.995706088348115e-05, "loss": 46.0049, "step": 1583 }, { "epoch": 0.21626049559696908, "grad_norm": 0.02755294181406498, "learning_rate": 8.994398576345336e-05, "loss": 46.011, "step": 1584 }, { "epoch": 0.21639702368762373, "grad_norm": 0.05545426532626152, "learning_rate": 8.993090308910061e-05, "loss": 46.0047, "step": 1585 }, { "epoch": 0.21653355177827838, "grad_norm": 0.10013281553983688, "learning_rate": 8.991781286289716e-05, "loss": 46.0048, "step": 1586 }, { "epoch": 0.21667007986893302, "grad_norm": 0.05210503563284874, "learning_rate": 8.990471508731866e-05, "loss": 46.0084, "step": 1587 }, { "epoch": 0.21680660795958767, "grad_norm": 0.03269532322883606, "learning_rate": 8.989160976484217e-05, "loss": 46.0077, "step": 1588 }, { "epoch": 0.21694313605024235, "grad_norm": 0.044314756989479065, "learning_rate": 8.987849689794625e-05, "loss": 46.0015, "step": 1589 }, { "epoch": 0.217079664140897, "grad_norm": 0.10195699334144592, "learning_rate": 8.986537648911082e-05, "loss": 46.0024, "step": 1590 }, { "epoch": 0.21721619223155164, "grad_norm": 0.19894830882549286, "learning_rate": 8.985224854081726e-05, "loss": 46.0071, "step": 1591 }, { "epoch": 0.2173527203222063, "grad_norm": 0.04507644474506378, "learning_rate": 8.983911305554838e-05, "loss": 46.0016, "step": 1592 }, { "epoch": 0.21748924841286094, "grad_norm": 0.06948091834783554, "learning_rate": 8.982597003578836e-05, "loss": 46.0018, "step": 1593 }, { "epoch": 0.2176257765035156, "grad_norm": 0.04610830545425415, "learning_rate": 8.981281948402289e-05, "loss": 46.0069, "step": 1594 }, { "epoch": 0.21776230459417026, "grad_norm": 0.06257401406764984, "learning_rate": 8.979966140273901e-05, "loss": 46.0023, "step": 1595 }, { "epoch": 0.2178988326848249, "grad_norm": 0.06266569346189499, "learning_rate": 8.978649579442525e-05, "loss": 46.0058, "step": 1596 }, { "epoch": 0.21803536077547955, "grad_norm": 0.14726963639259338, "learning_rate": 8.977332266157148e-05, "loss": 46.0065, "step": 1597 }, { "epoch": 0.2181718888661342, "grad_norm": 0.057241130620241165, "learning_rate": 8.976014200666909e-05, "loss": 46.0046, "step": 1598 }, { "epoch": 0.21830841695678885, "grad_norm": 0.08520026504993439, "learning_rate": 8.974695383221082e-05, "loss": 46.0066, "step": 1599 }, { "epoch": 0.21844494504744352, "grad_norm": 0.22325581312179565, "learning_rate": 8.973375814069087e-05, "loss": 46.0074, "step": 1600 }, { "epoch": 0.21858147313809817, "grad_norm": 0.03908319026231766, "learning_rate": 8.972055493460483e-05, "loss": 46.0097, "step": 1601 }, { "epoch": 0.21871800122875282, "grad_norm": 0.09156929701566696, "learning_rate": 8.970734421644974e-05, "loss": 46.0028, "step": 1602 }, { "epoch": 0.21885452931940746, "grad_norm": 0.04087188467383385, "learning_rate": 8.969412598872404e-05, "loss": 46.0062, "step": 1603 }, { "epoch": 0.2189910574100621, "grad_norm": 0.12215627729892731, "learning_rate": 8.968090025392762e-05, "loss": 46.0009, "step": 1604 }, { "epoch": 0.21912758550071676, "grad_norm": 0.11604364216327667, "learning_rate": 8.966766701456177e-05, "loss": 46.0039, "step": 1605 }, { "epoch": 0.21926411359137143, "grad_norm": 0.046828676015138626, "learning_rate": 8.965442627312916e-05, "loss": 46.0058, "step": 1606 }, { "epoch": 0.21940064168202608, "grad_norm": 0.1395268589258194, "learning_rate": 8.964117803213398e-05, "loss": 46.0038, "step": 1607 }, { "epoch": 0.21953716977268073, "grad_norm": 0.05131154507398605, "learning_rate": 8.962792229408172e-05, "loss": 46.0015, "step": 1608 }, { "epoch": 0.21967369786333538, "grad_norm": 0.03785865008831024, "learning_rate": 8.96146590614794e-05, "loss": 46.0101, "step": 1609 }, { "epoch": 0.21981022595399002, "grad_norm": 0.049474086612463, "learning_rate": 8.960138833683533e-05, "loss": 46.0019, "step": 1610 }, { "epoch": 0.2199467540446447, "grad_norm": 0.050491590052843094, "learning_rate": 8.958811012265937e-05, "loss": 46.0099, "step": 1611 }, { "epoch": 0.22008328213529935, "grad_norm": 0.058515384793281555, "learning_rate": 8.957482442146272e-05, "loss": 46.009, "step": 1612 }, { "epoch": 0.220219810225954, "grad_norm": 0.04179259389638901, "learning_rate": 8.956153123575798e-05, "loss": 46.0062, "step": 1613 }, { "epoch": 0.22035633831660864, "grad_norm": 0.02568054012954235, "learning_rate": 8.954823056805924e-05, "loss": 46.0093, "step": 1614 }, { "epoch": 0.2204928664072633, "grad_norm": 0.09199140220880508, "learning_rate": 8.953492242088195e-05, "loss": 46.0111, "step": 1615 }, { "epoch": 0.22062939449791794, "grad_norm": 0.05577234923839569, "learning_rate": 8.952160679674298e-05, "loss": 46.0115, "step": 1616 }, { "epoch": 0.2207659225885726, "grad_norm": 0.03626580163836479, "learning_rate": 8.95082836981606e-05, "loss": 46.0027, "step": 1617 }, { "epoch": 0.22090245067922726, "grad_norm": 0.11353987455368042, "learning_rate": 8.949495312765455e-05, "loss": 46.0058, "step": 1618 }, { "epoch": 0.2210389787698819, "grad_norm": 0.0579768642783165, "learning_rate": 8.948161508774593e-05, "loss": 46.0031, "step": 1619 }, { "epoch": 0.22117550686053655, "grad_norm": 0.040593717247247696, "learning_rate": 8.946826958095727e-05, "loss": 46.0049, "step": 1620 }, { "epoch": 0.2213120349511912, "grad_norm": 0.06674161553382874, "learning_rate": 8.945491660981251e-05, "loss": 46.0089, "step": 1621 }, { "epoch": 0.22144856304184585, "grad_norm": 0.06030568107962608, "learning_rate": 8.944155617683702e-05, "loss": 46.0058, "step": 1622 }, { "epoch": 0.22158509113250052, "grad_norm": 0.06746244430541992, "learning_rate": 8.942818828455754e-05, "loss": 46.0103, "step": 1623 }, { "epoch": 0.22172161922315517, "grad_norm": 0.037692904472351074, "learning_rate": 8.941481293550225e-05, "loss": 46.005, "step": 1624 }, { "epoch": 0.22185814731380982, "grad_norm": 0.08732948452234268, "learning_rate": 8.940143013220075e-05, "loss": 46.0075, "step": 1625 }, { "epoch": 0.22199467540446446, "grad_norm": 0.06631074100732803, "learning_rate": 8.938803987718403e-05, "loss": 46.0001, "step": 1626 }, { "epoch": 0.2221312034951191, "grad_norm": 0.03751387819647789, "learning_rate": 8.937464217298448e-05, "loss": 46.0059, "step": 1627 }, { "epoch": 0.2222677315857738, "grad_norm": 0.027690699324011803, "learning_rate": 8.936123702213594e-05, "loss": 46.0058, "step": 1628 }, { "epoch": 0.22240425967642843, "grad_norm": 0.04070919752120972, "learning_rate": 8.93478244271736e-05, "loss": 46.0046, "step": 1629 }, { "epoch": 0.22254078776708308, "grad_norm": 0.059110578149557114, "learning_rate": 8.933440439063412e-05, "loss": 46.0055, "step": 1630 }, { "epoch": 0.22267731585773773, "grad_norm": 0.12852534651756287, "learning_rate": 8.932097691505552e-05, "loss": 46.0085, "step": 1631 }, { "epoch": 0.22281384394839238, "grad_norm": 0.1421593874692917, "learning_rate": 8.930754200297725e-05, "loss": 46.0142, "step": 1632 }, { "epoch": 0.22295037203904702, "grad_norm": 0.037453558295965195, "learning_rate": 8.929409965694016e-05, "loss": 46.0078, "step": 1633 }, { "epoch": 0.2230869001297017, "grad_norm": 0.04556925594806671, "learning_rate": 8.92806498794865e-05, "loss": 46.0039, "step": 1634 }, { "epoch": 0.22322342822035635, "grad_norm": 0.04774145036935806, "learning_rate": 8.926719267315995e-05, "loss": 46.0071, "step": 1635 }, { "epoch": 0.223359956311011, "grad_norm": 0.12487052381038666, "learning_rate": 8.925372804050555e-05, "loss": 46.0076, "step": 1636 }, { "epoch": 0.22349648440166564, "grad_norm": 0.05150671675801277, "learning_rate": 8.924025598406978e-05, "loss": 46.0009, "step": 1637 }, { "epoch": 0.2236330124923203, "grad_norm": 0.07781659811735153, "learning_rate": 8.922677650640054e-05, "loss": 46.0007, "step": 1638 }, { "epoch": 0.22376954058297494, "grad_norm": 0.11946213245391846, "learning_rate": 8.921328961004707e-05, "loss": 46.0027, "step": 1639 }, { "epoch": 0.2239060686736296, "grad_norm": 0.05591997504234314, "learning_rate": 8.919979529756008e-05, "loss": 46.0008, "step": 1640 }, { "epoch": 0.22404259676428426, "grad_norm": 0.03308802470564842, "learning_rate": 8.918629357149164e-05, "loss": 46.0041, "step": 1641 }, { "epoch": 0.2241791248549389, "grad_norm": 0.08729170262813568, "learning_rate": 8.917278443439523e-05, "loss": 46.0053, "step": 1642 }, { "epoch": 0.22431565294559355, "grad_norm": 0.085662841796875, "learning_rate": 8.915926788882576e-05, "loss": 46.007, "step": 1643 }, { "epoch": 0.2244521810362482, "grad_norm": 0.041338928043842316, "learning_rate": 8.914574393733952e-05, "loss": 46.0052, "step": 1644 }, { "epoch": 0.22458870912690285, "grad_norm": 0.1374015361070633, "learning_rate": 8.913221258249418e-05, "loss": 46.0095, "step": 1645 }, { "epoch": 0.22472523721755752, "grad_norm": 0.038746364414691925, "learning_rate": 8.911867382684883e-05, "loss": 46.0034, "step": 1646 }, { "epoch": 0.22486176530821217, "grad_norm": 0.1670989841222763, "learning_rate": 8.910512767296398e-05, "loss": 46.0037, "step": 1647 }, { "epoch": 0.22499829339886682, "grad_norm": 0.09270493686199188, "learning_rate": 8.90915741234015e-05, "loss": 46.0053, "step": 1648 }, { "epoch": 0.22513482148952146, "grad_norm": 0.20192816853523254, "learning_rate": 8.907801318072469e-05, "loss": 46.005, "step": 1649 }, { "epoch": 0.2252713495801761, "grad_norm": 0.08479326218366623, "learning_rate": 8.90644448474982e-05, "loss": 46.0128, "step": 1650 }, { "epoch": 0.2254078776708308, "grad_norm": 0.18741504848003387, "learning_rate": 8.905086912628817e-05, "loss": 46.0091, "step": 1651 }, { "epoch": 0.22554440576148543, "grad_norm": 0.058691706508398056, "learning_rate": 8.903728601966206e-05, "loss": 46.0014, "step": 1652 }, { "epoch": 0.22568093385214008, "grad_norm": 0.04443957656621933, "learning_rate": 8.902369553018872e-05, "loss": 46.0057, "step": 1653 }, { "epoch": 0.22581746194279473, "grad_norm": 0.04465538635849953, "learning_rate": 8.901009766043847e-05, "loss": 46.0051, "step": 1654 }, { "epoch": 0.22595399003344938, "grad_norm": 0.1097107082605362, "learning_rate": 8.899649241298293e-05, "loss": 46.0042, "step": 1655 }, { "epoch": 0.22609051812410402, "grad_norm": 0.08197598904371262, "learning_rate": 8.89828797903952e-05, "loss": 46.0065, "step": 1656 }, { "epoch": 0.2262270462147587, "grad_norm": 0.042670369148254395, "learning_rate": 8.896925979524972e-05, "loss": 46.0002, "step": 1657 }, { "epoch": 0.22636357430541335, "grad_norm": 0.09123487025499344, "learning_rate": 8.895563243012237e-05, "loss": 46.0082, "step": 1658 }, { "epoch": 0.226500102396068, "grad_norm": 0.059811610728502274, "learning_rate": 8.894199769759037e-05, "loss": 46.003, "step": 1659 }, { "epoch": 0.22663663048672264, "grad_norm": 0.14972169697284698, "learning_rate": 8.892835560023237e-05, "loss": 46.0096, "step": 1660 }, { "epoch": 0.2267731585773773, "grad_norm": 0.07804597914218903, "learning_rate": 8.89147061406284e-05, "loss": 46.0068, "step": 1661 }, { "epoch": 0.22690968666803193, "grad_norm": 0.07539259642362595, "learning_rate": 8.89010493213599e-05, "loss": 46.0089, "step": 1662 }, { "epoch": 0.2270462147586866, "grad_norm": 0.03744538873434067, "learning_rate": 8.888738514500967e-05, "loss": 46.0141, "step": 1663 }, { "epoch": 0.22718274284934126, "grad_norm": 0.09533721953630447, "learning_rate": 8.887371361416192e-05, "loss": 46.006, "step": 1664 }, { "epoch": 0.2273192709399959, "grad_norm": 0.04732528328895569, "learning_rate": 8.886003473140227e-05, "loss": 46.0067, "step": 1665 }, { "epoch": 0.22745579903065055, "grad_norm": 0.0691031962633133, "learning_rate": 8.884634849931771e-05, "loss": 46.0116, "step": 1666 }, { "epoch": 0.2275923271213052, "grad_norm": 0.04217732697725296, "learning_rate": 8.88326549204966e-05, "loss": 46.0017, "step": 1667 }, { "epoch": 0.22772885521195987, "grad_norm": 0.04489154741168022, "learning_rate": 8.881895399752874e-05, "loss": 46.0099, "step": 1668 }, { "epoch": 0.22786538330261452, "grad_norm": 0.10013914108276367, "learning_rate": 8.880524573300523e-05, "loss": 46.003, "step": 1669 }, { "epoch": 0.22800191139326917, "grad_norm": 0.1041250079870224, "learning_rate": 8.879153012951869e-05, "loss": 46.0106, "step": 1670 }, { "epoch": 0.22813843948392382, "grad_norm": 0.12298408150672913, "learning_rate": 8.8777807189663e-05, "loss": 46.0028, "step": 1671 }, { "epoch": 0.22827496757457846, "grad_norm": 0.08660801500082016, "learning_rate": 8.876407691603353e-05, "loss": 46.0021, "step": 1672 }, { "epoch": 0.2284114956652331, "grad_norm": 0.047619517892599106, "learning_rate": 8.875033931122695e-05, "loss": 46.0005, "step": 1673 }, { "epoch": 0.22854802375588779, "grad_norm": 0.09880300611257553, "learning_rate": 8.873659437784138e-05, "loss": 46.0079, "step": 1674 }, { "epoch": 0.22868455184654243, "grad_norm": 0.09500162303447723, "learning_rate": 8.872284211847629e-05, "loss": 46.0032, "step": 1675 }, { "epoch": 0.22882107993719708, "grad_norm": 0.17600296437740326, "learning_rate": 8.870908253573256e-05, "loss": 46.0118, "step": 1676 }, { "epoch": 0.22895760802785173, "grad_norm": 0.055331818759441376, "learning_rate": 8.869531563221243e-05, "loss": 46.0038, "step": 1677 }, { "epoch": 0.22909413611850638, "grad_norm": 0.07013209164142609, "learning_rate": 8.868154141051954e-05, "loss": 46.0052, "step": 1678 }, { "epoch": 0.22923066420916102, "grad_norm": 0.18177147209644318, "learning_rate": 8.866775987325893e-05, "loss": 46.0073, "step": 1679 }, { "epoch": 0.2293671922998157, "grad_norm": 0.06932986527681351, "learning_rate": 8.865397102303698e-05, "loss": 46.008, "step": 1680 }, { "epoch": 0.22950372039047034, "grad_norm": 0.04440315067768097, "learning_rate": 8.864017486246148e-05, "loss": 46.011, "step": 1681 }, { "epoch": 0.229640248481125, "grad_norm": 0.03485152870416641, "learning_rate": 8.862637139414162e-05, "loss": 46.0042, "step": 1682 }, { "epoch": 0.22977677657177964, "grad_norm": 0.08908677846193314, "learning_rate": 8.861256062068796e-05, "loss": 46.011, "step": 1683 }, { "epoch": 0.2299133046624343, "grad_norm": 0.028371354565024376, "learning_rate": 8.85987425447124e-05, "loss": 46.0102, "step": 1684 }, { "epoch": 0.23004983275308893, "grad_norm": 0.11603806912899017, "learning_rate": 8.858491716882826e-05, "loss": 46.0063, "step": 1685 }, { "epoch": 0.2301863608437436, "grad_norm": 0.049917303025722504, "learning_rate": 8.857108449565027e-05, "loss": 46.0027, "step": 1686 }, { "epoch": 0.23032288893439826, "grad_norm": 0.05997578054666519, "learning_rate": 8.855724452779448e-05, "loss": 46.0009, "step": 1687 }, { "epoch": 0.2304594170250529, "grad_norm": 0.050803206861019135, "learning_rate": 8.854339726787835e-05, "loss": 46.0058, "step": 1688 }, { "epoch": 0.23059594511570755, "grad_norm": 0.07068202644586563, "learning_rate": 8.85295427185207e-05, "loss": 46.0051, "step": 1689 }, { "epoch": 0.2307324732063622, "grad_norm": 0.04077614098787308, "learning_rate": 8.851568088234178e-05, "loss": 46.0056, "step": 1690 }, { "epoch": 0.23086900129701687, "grad_norm": 0.0706164762377739, "learning_rate": 8.850181176196315e-05, "loss": 46.0054, "step": 1691 }, { "epoch": 0.23100552938767152, "grad_norm": 0.041390202939510345, "learning_rate": 8.848793536000779e-05, "loss": 46.0067, "step": 1692 }, { "epoch": 0.23114205747832617, "grad_norm": 0.04200747236609459, "learning_rate": 8.847405167910004e-05, "loss": 46.0071, "step": 1693 }, { "epoch": 0.23127858556898082, "grad_norm": 0.0680479034781456, "learning_rate": 8.846016072186563e-05, "loss": 46.0045, "step": 1694 }, { "epoch": 0.23141511365963546, "grad_norm": 0.0594487264752388, "learning_rate": 8.844626249093166e-05, "loss": 46.0028, "step": 1695 }, { "epoch": 0.2315516417502901, "grad_norm": 0.08974328637123108, "learning_rate": 8.84323569889266e-05, "loss": 46.0092, "step": 1696 }, { "epoch": 0.23168816984094479, "grad_norm": 0.06920242309570312, "learning_rate": 8.841844421848031e-05, "loss": 46.0095, "step": 1697 }, { "epoch": 0.23182469793159943, "grad_norm": 0.09233519434928894, "learning_rate": 8.8404524182224e-05, "loss": 46.0058, "step": 1698 }, { "epoch": 0.23196122602225408, "grad_norm": 0.0801035687327385, "learning_rate": 8.839059688279028e-05, "loss": 46.01, "step": 1699 }, { "epoch": 0.23209775411290873, "grad_norm": 0.33776217699050903, "learning_rate": 8.837666232281313e-05, "loss": 46.008, "step": 1700 }, { "epoch": 0.23223428220356337, "grad_norm": 0.14376206696033478, "learning_rate": 8.836272050492785e-05, "loss": 46.0168, "step": 1701 }, { "epoch": 0.23237081029421802, "grad_norm": 0.07748469710350037, "learning_rate": 8.834877143177121e-05, "loss": 46.0027, "step": 1702 }, { "epoch": 0.2325073383848727, "grad_norm": 0.03136751428246498, "learning_rate": 8.833481510598127e-05, "loss": 46.0105, "step": 1703 }, { "epoch": 0.23264386647552734, "grad_norm": 0.048550743609666824, "learning_rate": 8.83208515301975e-05, "loss": 46.0024, "step": 1704 }, { "epoch": 0.232780394566182, "grad_norm": 0.06768582761287689, "learning_rate": 8.830688070706075e-05, "loss": 46.0041, "step": 1705 }, { "epoch": 0.23291692265683664, "grad_norm": 0.029650744050741196, "learning_rate": 8.82929026392132e-05, "loss": 46.0022, "step": 1706 }, { "epoch": 0.2330534507474913, "grad_norm": 0.10753413289785385, "learning_rate": 8.827891732929843e-05, "loss": 46.0009, "step": 1707 }, { "epoch": 0.23318997883814596, "grad_norm": 0.05531018227338791, "learning_rate": 8.826492477996138e-05, "loss": 46.0089, "step": 1708 }, { "epoch": 0.2333265069288006, "grad_norm": 0.05802333727478981, "learning_rate": 8.825092499384837e-05, "loss": 46.0009, "step": 1709 }, { "epoch": 0.23346303501945526, "grad_norm": 0.02462812513113022, "learning_rate": 8.823691797360708e-05, "loss": 46.0127, "step": 1710 }, { "epoch": 0.2335995631101099, "grad_norm": 0.031160922721028328, "learning_rate": 8.822290372188655e-05, "loss": 46.0051, "step": 1711 }, { "epoch": 0.23373609120076455, "grad_norm": 0.12732389569282532, "learning_rate": 8.820888224133722e-05, "loss": 46.0068, "step": 1712 }, { "epoch": 0.2338726192914192, "grad_norm": 0.08745445311069489, "learning_rate": 8.819485353461086e-05, "loss": 46.0023, "step": 1713 }, { "epoch": 0.23400914738207387, "grad_norm": 0.11956072598695755, "learning_rate": 8.818081760436061e-05, "loss": 46.0067, "step": 1714 }, { "epoch": 0.23414567547272852, "grad_norm": 0.039469581097364426, "learning_rate": 8.8166774453241e-05, "loss": 46.0062, "step": 1715 }, { "epoch": 0.23428220356338317, "grad_norm": 0.04733849689364433, "learning_rate": 8.815272408390788e-05, "loss": 46.0046, "step": 1716 }, { "epoch": 0.23441873165403782, "grad_norm": 0.03967846930027008, "learning_rate": 8.813866649901856e-05, "loss": 46.0033, "step": 1717 }, { "epoch": 0.23455525974469246, "grad_norm": 0.049113087356090546, "learning_rate": 8.81246017012316e-05, "loss": 46.0136, "step": 1718 }, { "epoch": 0.2346917878353471, "grad_norm": 0.06405916064977646, "learning_rate": 8.811052969320702e-05, "loss": 46.0019, "step": 1719 }, { "epoch": 0.23482831592600178, "grad_norm": 0.09281764924526215, "learning_rate": 8.80964504776061e-05, "loss": 46.0059, "step": 1720 }, { "epoch": 0.23496484401665643, "grad_norm": 0.0656527504324913, "learning_rate": 8.808236405709158e-05, "loss": 46.0026, "step": 1721 }, { "epoch": 0.23510137210731108, "grad_norm": 0.0550096221268177, "learning_rate": 8.806827043432754e-05, "loss": 46.0023, "step": 1722 }, { "epoch": 0.23523790019796573, "grad_norm": 0.10086105018854141, "learning_rate": 8.805416961197939e-05, "loss": 46.0114, "step": 1723 }, { "epoch": 0.23537442828862037, "grad_norm": 0.10868209600448608, "learning_rate": 8.80400615927139e-05, "loss": 46.0047, "step": 1724 }, { "epoch": 0.23551095637927505, "grad_norm": 0.07805322855710983, "learning_rate": 8.802594637919925e-05, "loss": 46.0019, "step": 1725 }, { "epoch": 0.2356474844699297, "grad_norm": 0.0730748325586319, "learning_rate": 8.801182397410495e-05, "loss": 46.0032, "step": 1726 }, { "epoch": 0.23578401256058434, "grad_norm": 0.16289301216602325, "learning_rate": 8.799769438010184e-05, "loss": 46.0001, "step": 1727 }, { "epoch": 0.235920540651239, "grad_norm": 0.10425674170255661, "learning_rate": 8.798355759986219e-05, "loss": 46.0067, "step": 1728 }, { "epoch": 0.23605706874189364, "grad_norm": 0.07232199609279633, "learning_rate": 8.796941363605958e-05, "loss": 46.0038, "step": 1729 }, { "epoch": 0.23619359683254829, "grad_norm": 0.07333379238843918, "learning_rate": 8.795526249136893e-05, "loss": 46.0048, "step": 1730 }, { "epoch": 0.23633012492320296, "grad_norm": 0.04663578420877457, "learning_rate": 8.794110416846657e-05, "loss": 46.0051, "step": 1731 }, { "epoch": 0.2364666530138576, "grad_norm": 0.043116677552461624, "learning_rate": 8.792693867003017e-05, "loss": 46.0031, "step": 1732 }, { "epoch": 0.23660318110451226, "grad_norm": 0.07206620275974274, "learning_rate": 8.791276599873873e-05, "loss": 46.0184, "step": 1733 }, { "epoch": 0.2367397091951669, "grad_norm": 0.07968153059482574, "learning_rate": 8.789858615727265e-05, "loss": 46.0011, "step": 1734 }, { "epoch": 0.23687623728582155, "grad_norm": 0.04350929707288742, "learning_rate": 8.788439914831366e-05, "loss": 46.0047, "step": 1735 }, { "epoch": 0.2370127653764762, "grad_norm": 0.03626111149787903, "learning_rate": 8.787020497454484e-05, "loss": 46.0121, "step": 1736 }, { "epoch": 0.23714929346713087, "grad_norm": 0.034210264682769775, "learning_rate": 8.785600363865061e-05, "loss": 46.0067, "step": 1737 }, { "epoch": 0.23728582155778552, "grad_norm": 0.1267501711845398, "learning_rate": 8.784179514331682e-05, "loss": 46.004, "step": 1738 }, { "epoch": 0.23742234964844017, "grad_norm": 0.1610621213912964, "learning_rate": 8.782757949123059e-05, "loss": 46.0017, "step": 1739 }, { "epoch": 0.23755887773909481, "grad_norm": 0.14214304089546204, "learning_rate": 8.781335668508043e-05, "loss": 46.0003, "step": 1740 }, { "epoch": 0.23769540582974946, "grad_norm": 0.039774633944034576, "learning_rate": 8.779912672755621e-05, "loss": 46.0086, "step": 1741 }, { "epoch": 0.2378319339204041, "grad_norm": 0.08785615861415863, "learning_rate": 8.778488962134915e-05, "loss": 46.0031, "step": 1742 }, { "epoch": 0.23796846201105878, "grad_norm": 0.07195477932691574, "learning_rate": 8.777064536915178e-05, "loss": 46.0082, "step": 1743 }, { "epoch": 0.23810499010171343, "grad_norm": 0.08673382550477982, "learning_rate": 8.775639397365804e-05, "loss": 46.0146, "step": 1744 }, { "epoch": 0.23824151819236808, "grad_norm": 0.06902418285608292, "learning_rate": 8.774213543756318e-05, "loss": 46.0061, "step": 1745 }, { "epoch": 0.23837804628302273, "grad_norm": 0.027046339586377144, "learning_rate": 8.772786976356385e-05, "loss": 46.0086, "step": 1746 }, { "epoch": 0.23851457437367737, "grad_norm": 0.06209815293550491, "learning_rate": 8.771359695435797e-05, "loss": 46.0192, "step": 1747 }, { "epoch": 0.23865110246433205, "grad_norm": 0.17595143616199493, "learning_rate": 8.769931701264488e-05, "loss": 46.0087, "step": 1748 }, { "epoch": 0.2387876305549867, "grad_norm": 0.16023744642734528, "learning_rate": 8.768502994112526e-05, "loss": 46.0026, "step": 1749 }, { "epoch": 0.23892415864564134, "grad_norm": 0.122861847281456, "learning_rate": 8.76707357425011e-05, "loss": 46.0098, "step": 1750 }, { "epoch": 0.239060686736296, "grad_norm": 0.12551911175251007, "learning_rate": 8.765643441947578e-05, "loss": 46.0075, "step": 1751 }, { "epoch": 0.23919721482695064, "grad_norm": 0.05466614291071892, "learning_rate": 8.764212597475397e-05, "loss": 46.0055, "step": 1752 }, { "epoch": 0.23933374291760529, "grad_norm": 0.1516505777835846, "learning_rate": 8.762781041104177e-05, "loss": 46.0016, "step": 1753 }, { "epoch": 0.23947027100825996, "grad_norm": 0.03741871193051338, "learning_rate": 8.761348773104657e-05, "loss": 46.0045, "step": 1754 }, { "epoch": 0.2396067990989146, "grad_norm": 0.04451262950897217, "learning_rate": 8.75991579374771e-05, "loss": 46.0021, "step": 1755 }, { "epoch": 0.23974332718956926, "grad_norm": 0.04980932176113129, "learning_rate": 8.758482103304348e-05, "loss": 46.0093, "step": 1756 }, { "epoch": 0.2398798552802239, "grad_norm": 0.1892150193452835, "learning_rate": 8.757047702045711e-05, "loss": 46.0053, "step": 1757 }, { "epoch": 0.24001638337087855, "grad_norm": 0.033540837466716766, "learning_rate": 8.75561259024308e-05, "loss": 46.0054, "step": 1758 }, { "epoch": 0.2401529114615332, "grad_norm": 0.09901927411556244, "learning_rate": 8.75417676816787e-05, "loss": 46.0056, "step": 1759 }, { "epoch": 0.24028943955218787, "grad_norm": 0.03211856260895729, "learning_rate": 8.752740236091624e-05, "loss": 46.0052, "step": 1760 }, { "epoch": 0.24042596764284252, "grad_norm": 0.11884235590696335, "learning_rate": 8.751302994286023e-05, "loss": 46.0041, "step": 1761 }, { "epoch": 0.24056249573349717, "grad_norm": 0.04157320037484169, "learning_rate": 8.749865043022884e-05, "loss": 46.0004, "step": 1762 }, { "epoch": 0.24069902382415181, "grad_norm": 0.055461980402469635, "learning_rate": 8.748426382574156e-05, "loss": 46.0078, "step": 1763 }, { "epoch": 0.24083555191480646, "grad_norm": 0.03211371973156929, "learning_rate": 8.746987013211924e-05, "loss": 46.0022, "step": 1764 }, { "epoch": 0.24097208000546114, "grad_norm": 0.09578761458396912, "learning_rate": 8.745546935208404e-05, "loss": 46.0039, "step": 1765 }, { "epoch": 0.24110860809611578, "grad_norm": 0.0823301449418068, "learning_rate": 8.74410614883595e-05, "loss": 46.0093, "step": 1766 }, { "epoch": 0.24124513618677043, "grad_norm": 0.05270790308713913, "learning_rate": 8.742664654367045e-05, "loss": 46.0161, "step": 1767 }, { "epoch": 0.24138166427742508, "grad_norm": 0.07813568413257599, "learning_rate": 8.741222452074308e-05, "loss": 46.0015, "step": 1768 }, { "epoch": 0.24151819236807973, "grad_norm": 0.07466394454240799, "learning_rate": 8.739779542230498e-05, "loss": 46.007, "step": 1769 }, { "epoch": 0.24165472045873437, "grad_norm": 0.043698154389858246, "learning_rate": 8.738335925108499e-05, "loss": 46.0016, "step": 1770 }, { "epoch": 0.24179124854938905, "grad_norm": 0.15281455218791962, "learning_rate": 8.736891600981332e-05, "loss": 46.0073, "step": 1771 }, { "epoch": 0.2419277766400437, "grad_norm": 0.03488371893763542, "learning_rate": 8.735446570122151e-05, "loss": 46.0115, "step": 1772 }, { "epoch": 0.24206430473069834, "grad_norm": 0.06657852232456207, "learning_rate": 8.734000832804245e-05, "loss": 46.0016, "step": 1773 }, { "epoch": 0.242200832821353, "grad_norm": 0.05580620467662811, "learning_rate": 8.73255438930104e-05, "loss": 46.0026, "step": 1774 }, { "epoch": 0.24233736091200764, "grad_norm": 0.04599941894412041, "learning_rate": 8.731107239886085e-05, "loss": 46.0033, "step": 1775 }, { "epoch": 0.24247388900266229, "grad_norm": 0.045879434794187546, "learning_rate": 8.729659384833074e-05, "loss": 46.0117, "step": 1776 }, { "epoch": 0.24261041709331696, "grad_norm": 0.046412479132413864, "learning_rate": 8.728210824415827e-05, "loss": 46.0062, "step": 1777 }, { "epoch": 0.2427469451839716, "grad_norm": 0.09105752408504486, "learning_rate": 8.726761558908303e-05, "loss": 46.0069, "step": 1778 }, { "epoch": 0.24288347327462625, "grad_norm": 0.14796888828277588, "learning_rate": 8.725311588584591e-05, "loss": 46.0041, "step": 1779 }, { "epoch": 0.2430200013652809, "grad_norm": 0.039077047258615494, "learning_rate": 8.72386091371891e-05, "loss": 46.0065, "step": 1780 }, { "epoch": 0.24315652945593555, "grad_norm": 0.055250607430934906, "learning_rate": 8.722409534585619e-05, "loss": 46.0023, "step": 1781 }, { "epoch": 0.24329305754659022, "grad_norm": 0.03439754620194435, "learning_rate": 8.720957451459207e-05, "loss": 46.0052, "step": 1782 }, { "epoch": 0.24342958563724487, "grad_norm": 0.07294188439846039, "learning_rate": 8.719504664614293e-05, "loss": 46.0009, "step": 1783 }, { "epoch": 0.24356611372789952, "grad_norm": 0.053632259368896484, "learning_rate": 8.718051174325637e-05, "loss": 46.0008, "step": 1784 }, { "epoch": 0.24370264181855417, "grad_norm": 0.03502294793725014, "learning_rate": 8.716596980868126e-05, "loss": 46.0034, "step": 1785 }, { "epoch": 0.2438391699092088, "grad_norm": 0.03531038388609886, "learning_rate": 8.71514208451678e-05, "loss": 46.0036, "step": 1786 }, { "epoch": 0.24397569799986346, "grad_norm": 0.036508750170469284, "learning_rate": 8.713686485546754e-05, "loss": 46.0044, "step": 1787 }, { "epoch": 0.24411222609051814, "grad_norm": 0.12205744534730911, "learning_rate": 8.712230184233336e-05, "loss": 46.0025, "step": 1788 }, { "epoch": 0.24424875418117278, "grad_norm": 0.07715026289224625, "learning_rate": 8.710773180851945e-05, "loss": 46.0043, "step": 1789 }, { "epoch": 0.24438528227182743, "grad_norm": 0.04096875712275505, "learning_rate": 8.709315475678134e-05, "loss": 46.0083, "step": 1790 }, { "epoch": 0.24452181036248208, "grad_norm": 0.04008398950099945, "learning_rate": 8.707857068987589e-05, "loss": 46.0071, "step": 1791 }, { "epoch": 0.24465833845313673, "grad_norm": 0.03713328018784523, "learning_rate": 8.706397961056126e-05, "loss": 46.0082, "step": 1792 }, { "epoch": 0.24479486654379137, "grad_norm": 0.09118172526359558, "learning_rate": 8.7049381521597e-05, "loss": 46.0046, "step": 1793 }, { "epoch": 0.24493139463444605, "grad_norm": 0.12514762580394745, "learning_rate": 8.703477642574391e-05, "loss": 46.002, "step": 1794 }, { "epoch": 0.2450679227251007, "grad_norm": 0.11447775363922119, "learning_rate": 8.702016432576417e-05, "loss": 46.011, "step": 1795 }, { "epoch": 0.24520445081575534, "grad_norm": 0.0909338966012001, "learning_rate": 8.700554522442123e-05, "loss": 46.0106, "step": 1796 }, { "epoch": 0.24534097890641, "grad_norm": 0.03596460074186325, "learning_rate": 8.699091912447994e-05, "loss": 46.0067, "step": 1797 }, { "epoch": 0.24547750699706464, "grad_norm": 0.16990052163600922, "learning_rate": 8.69762860287064e-05, "loss": 46.0041, "step": 1798 }, { "epoch": 0.24561403508771928, "grad_norm": 0.15732082724571228, "learning_rate": 8.696164593986809e-05, "loss": 46.009, "step": 1799 }, { "epoch": 0.24575056317837396, "grad_norm": 0.06863962113857269, "learning_rate": 8.694699886073375e-05, "loss": 46.0102, "step": 1800 }, { "epoch": 0.2458870912690286, "grad_norm": 0.18832962214946747, "learning_rate": 8.693234479407353e-05, "loss": 46.0101, "step": 1801 }, { "epoch": 0.24602361935968325, "grad_norm": 0.03322373703122139, "learning_rate": 8.691768374265881e-05, "loss": 46.005, "step": 1802 }, { "epoch": 0.2461601474503379, "grad_norm": 0.06785766035318375, "learning_rate": 8.690301570926233e-05, "loss": 46.0086, "step": 1803 }, { "epoch": 0.24629667554099255, "grad_norm": 0.046073686331510544, "learning_rate": 8.68883406966582e-05, "loss": 46.0042, "step": 1804 }, { "epoch": 0.24643320363164722, "grad_norm": 0.07956234365701675, "learning_rate": 8.687365870762174e-05, "loss": 46.005, "step": 1805 }, { "epoch": 0.24656973172230187, "grad_norm": 0.1451939046382904, "learning_rate": 8.685896974492969e-05, "loss": 46.0104, "step": 1806 }, { "epoch": 0.24670625981295652, "grad_norm": 0.07369821518659592, "learning_rate": 8.684427381136007e-05, "loss": 46.0114, "step": 1807 }, { "epoch": 0.24684278790361117, "grad_norm": 0.03286251425743103, "learning_rate": 8.68295709096922e-05, "loss": 46.0057, "step": 1808 }, { "epoch": 0.2469793159942658, "grad_norm": 0.07479933649301529, "learning_rate": 8.681486104270672e-05, "loss": 46.0066, "step": 1809 }, { "epoch": 0.24711584408492046, "grad_norm": 0.11703833192586899, "learning_rate": 8.680014421318565e-05, "loss": 46.0007, "step": 1810 }, { "epoch": 0.24725237217557514, "grad_norm": 0.031570080667734146, "learning_rate": 8.678542042391227e-05, "loss": 46.0029, "step": 1811 }, { "epoch": 0.24738890026622978, "grad_norm": 0.08747951686382294, "learning_rate": 8.677068967767117e-05, "loss": 46.0041, "step": 1812 }, { "epoch": 0.24752542835688443, "grad_norm": 0.07936576753854752, "learning_rate": 8.675595197724826e-05, "loss": 46.0088, "step": 1813 }, { "epoch": 0.24766195644753908, "grad_norm": 0.08106990158557892, "learning_rate": 8.67412073254308e-05, "loss": 46.0074, "step": 1814 }, { "epoch": 0.24779848453819373, "grad_norm": 0.055016081780195236, "learning_rate": 8.672645572500736e-05, "loss": 46.0128, "step": 1815 }, { "epoch": 0.24793501262884837, "grad_norm": 0.04532529041171074, "learning_rate": 8.671169717876776e-05, "loss": 46.003, "step": 1816 }, { "epoch": 0.24807154071950305, "grad_norm": 0.05790189281105995, "learning_rate": 8.669693168950321e-05, "loss": 46.0198, "step": 1817 }, { "epoch": 0.2482080688101577, "grad_norm": 0.04769091680645943, "learning_rate": 8.66821592600062e-05, "loss": 46.003, "step": 1818 }, { "epoch": 0.24834459690081234, "grad_norm": 0.04425997659564018, "learning_rate": 8.666737989307054e-05, "loss": 46.007, "step": 1819 }, { "epoch": 0.248481124991467, "grad_norm": 0.03149816766381264, "learning_rate": 8.665259359149132e-05, "loss": 46.0062, "step": 1820 }, { "epoch": 0.24861765308212164, "grad_norm": 0.09356091171503067, "learning_rate": 8.6637800358065e-05, "loss": 46.0099, "step": 1821 }, { "epoch": 0.2487541811727763, "grad_norm": 0.06751033663749695, "learning_rate": 8.662300019558931e-05, "loss": 46.0046, "step": 1822 }, { "epoch": 0.24889070926343096, "grad_norm": 0.047068770974874496, "learning_rate": 8.66081931068633e-05, "loss": 46.0077, "step": 1823 }, { "epoch": 0.2490272373540856, "grad_norm": 0.05260690674185753, "learning_rate": 8.659337909468734e-05, "loss": 46.008, "step": 1824 }, { "epoch": 0.24916376544474025, "grad_norm": 0.040157344192266464, "learning_rate": 8.657855816186307e-05, "loss": 46.0058, "step": 1825 }, { "epoch": 0.2493002935353949, "grad_norm": 0.05856579542160034, "learning_rate": 8.656373031119351e-05, "loss": 46.0043, "step": 1826 }, { "epoch": 0.24943682162604955, "grad_norm": 0.0607539564371109, "learning_rate": 8.654889554548292e-05, "loss": 46.0114, "step": 1827 }, { "epoch": 0.24957334971670422, "grad_norm": 0.04904850944876671, "learning_rate": 8.653405386753688e-05, "loss": 46.0048, "step": 1828 }, { "epoch": 0.24970987780735887, "grad_norm": 0.06629790365695953, "learning_rate": 8.651920528016232e-05, "loss": 46.0102, "step": 1829 }, { "epoch": 0.24984640589801352, "grad_norm": 0.045363910496234894, "learning_rate": 8.650434978616748e-05, "loss": 46.0077, "step": 1830 }, { "epoch": 0.24998293398866817, "grad_norm": 0.037038031965494156, "learning_rate": 8.64894873883618e-05, "loss": 46.0049, "step": 1831 }, { "epoch": 0.25011946207932284, "grad_norm": 0.06026539206504822, "learning_rate": 8.647461808955615e-05, "loss": 46.0032, "step": 1832 }, { "epoch": 0.25025599016997746, "grad_norm": 0.08990694582462311, "learning_rate": 8.645974189256264e-05, "loss": 46.0054, "step": 1833 }, { "epoch": 0.25039251826063214, "grad_norm": 0.038050662726163864, "learning_rate": 8.644485880019471e-05, "loss": 46.0, "step": 1834 }, { "epoch": 0.25052904635128675, "grad_norm": 0.08464080095291138, "learning_rate": 8.642996881526709e-05, "loss": 46.0014, "step": 1835 }, { "epoch": 0.25066557444194143, "grad_norm": 0.03950836509466171, "learning_rate": 8.641507194059579e-05, "loss": 46.0015, "step": 1836 }, { "epoch": 0.2508021025325961, "grad_norm": 0.09211736917495728, "learning_rate": 8.64001681789982e-05, "loss": 46.0052, "step": 1837 }, { "epoch": 0.2509386306232507, "grad_norm": 0.05781028792262077, "learning_rate": 8.638525753329296e-05, "loss": 46.0004, "step": 1838 }, { "epoch": 0.2510751587139054, "grad_norm": 0.09404385089874268, "learning_rate": 8.637034000629997e-05, "loss": 46.0031, "step": 1839 }, { "epoch": 0.25121168680456, "grad_norm": 0.06755977123975754, "learning_rate": 8.635541560084051e-05, "loss": 46.0045, "step": 1840 }, { "epoch": 0.2513482148952147, "grad_norm": 0.05884630233049393, "learning_rate": 8.634048431973712e-05, "loss": 46.0037, "step": 1841 }, { "epoch": 0.25148474298586937, "grad_norm": 0.07348772883415222, "learning_rate": 8.632554616581365e-05, "loss": 46.0014, "step": 1842 }, { "epoch": 0.251621271076524, "grad_norm": 0.09434951096773148, "learning_rate": 8.631060114189525e-05, "loss": 46.0037, "step": 1843 }, { "epoch": 0.25175779916717866, "grad_norm": 0.09010591357946396, "learning_rate": 8.629564925080838e-05, "loss": 46.0076, "step": 1844 }, { "epoch": 0.2518943272578333, "grad_norm": 0.12029779702425003, "learning_rate": 8.628069049538074e-05, "loss": 46.0011, "step": 1845 }, { "epoch": 0.25203085534848796, "grad_norm": 0.16389265656471252, "learning_rate": 8.626572487844143e-05, "loss": 46.0075, "step": 1846 }, { "epoch": 0.2521673834391426, "grad_norm": 0.10041432082653046, "learning_rate": 8.625075240282075e-05, "loss": 46.0071, "step": 1847 }, { "epoch": 0.25230391152979725, "grad_norm": 0.2057553231716156, "learning_rate": 8.623577307135036e-05, "loss": 46.0118, "step": 1848 }, { "epoch": 0.25244043962045193, "grad_norm": 0.2816252112388611, "learning_rate": 8.622078688686319e-05, "loss": 46.0146, "step": 1849 }, { "epoch": 0.25257696771110655, "grad_norm": 0.11707514524459839, "learning_rate": 8.62057938521935e-05, "loss": 46.0097, "step": 1850 }, { "epoch": 0.2527134958017612, "grad_norm": 0.058029137551784515, "learning_rate": 8.619079397017675e-05, "loss": 46.0124, "step": 1851 }, { "epoch": 0.25285002389241584, "grad_norm": 0.06569298356771469, "learning_rate": 8.617578724364983e-05, "loss": 46.0051, "step": 1852 }, { "epoch": 0.2529865519830705, "grad_norm": 0.08789964765310287, "learning_rate": 8.616077367545081e-05, "loss": 46.0051, "step": 1853 }, { "epoch": 0.2531230800737252, "grad_norm": 0.030545823276042938, "learning_rate": 8.614575326841913e-05, "loss": 46.0049, "step": 1854 }, { "epoch": 0.2532596081643798, "grad_norm": 0.050443150103092194, "learning_rate": 8.613072602539547e-05, "loss": 46.0043, "step": 1855 }, { "epoch": 0.2533961362550345, "grad_norm": 0.08743395656347275, "learning_rate": 8.611569194922186e-05, "loss": 46.0011, "step": 1856 }, { "epoch": 0.2535326643456891, "grad_norm": 0.04551408439874649, "learning_rate": 8.610065104274156e-05, "loss": 46.0017, "step": 1857 }, { "epoch": 0.2536691924363438, "grad_norm": 0.03921450302004814, "learning_rate": 8.608560330879916e-05, "loss": 46.0017, "step": 1858 }, { "epoch": 0.25380572052699846, "grad_norm": 0.06653425097465515, "learning_rate": 8.607054875024053e-05, "loss": 46.0025, "step": 1859 }, { "epoch": 0.2539422486176531, "grad_norm": 0.08306702226400375, "learning_rate": 8.605548736991283e-05, "loss": 46.0042, "step": 1860 }, { "epoch": 0.25407877670830775, "grad_norm": 0.05812665447592735, "learning_rate": 8.604041917066452e-05, "loss": 46.0045, "step": 1861 }, { "epoch": 0.25421530479896237, "grad_norm": 0.04306524991989136, "learning_rate": 8.602534415534535e-05, "loss": 46.0031, "step": 1862 }, { "epoch": 0.25435183288961705, "grad_norm": 0.04273774474859238, "learning_rate": 8.601026232680634e-05, "loss": 46.0051, "step": 1863 }, { "epoch": 0.25448836098027167, "grad_norm": 0.08544976264238358, "learning_rate": 8.59951736878998e-05, "loss": 46.0068, "step": 1864 }, { "epoch": 0.25462488907092634, "grad_norm": 0.03873571753501892, "learning_rate": 8.598007824147936e-05, "loss": 46.0087, "step": 1865 }, { "epoch": 0.254761417161581, "grad_norm": 0.026399245485663414, "learning_rate": 8.59649759903999e-05, "loss": 46.0139, "step": 1866 }, { "epoch": 0.25489794525223564, "grad_norm": 0.05815350264310837, "learning_rate": 8.594986693751761e-05, "loss": 46.0066, "step": 1867 }, { "epoch": 0.2550344733428903, "grad_norm": 0.03653793781995773, "learning_rate": 8.593475108568995e-05, "loss": 46.0021, "step": 1868 }, { "epoch": 0.25517100143354493, "grad_norm": 0.10042199492454529, "learning_rate": 8.591962843777568e-05, "loss": 46.0029, "step": 1869 }, { "epoch": 0.2553075295241996, "grad_norm": 0.07296951115131378, "learning_rate": 8.590449899663485e-05, "loss": 46.0108, "step": 1870 }, { "epoch": 0.2554440576148543, "grad_norm": 0.07861333340406418, "learning_rate": 8.588936276512877e-05, "loss": 46.0044, "step": 1871 }, { "epoch": 0.2555805857055089, "grad_norm": 0.04783058166503906, "learning_rate": 8.587421974612006e-05, "loss": 46.0154, "step": 1872 }, { "epoch": 0.2557171137961636, "grad_norm": 0.0830233097076416, "learning_rate": 8.585906994247261e-05, "loss": 46.0136, "step": 1873 }, { "epoch": 0.2558536418868182, "grad_norm": 0.05275800824165344, "learning_rate": 8.584391335705157e-05, "loss": 46.0052, "step": 1874 }, { "epoch": 0.25599016997747287, "grad_norm": 0.12081858515739441, "learning_rate": 8.582874999272346e-05, "loss": 46.0034, "step": 1875 }, { "epoch": 0.2561266980681275, "grad_norm": 0.06187004595994949, "learning_rate": 8.581357985235595e-05, "loss": 46.0137, "step": 1876 }, { "epoch": 0.25626322615878216, "grad_norm": 0.09780984371900558, "learning_rate": 8.579840293881812e-05, "loss": 46.005, "step": 1877 }, { "epoch": 0.25639975424943684, "grad_norm": 0.078471340239048, "learning_rate": 8.578321925498024e-05, "loss": 46.0045, "step": 1878 }, { "epoch": 0.25653628234009146, "grad_norm": 0.04202977940440178, "learning_rate": 8.576802880371389e-05, "loss": 46.0065, "step": 1879 }, { "epoch": 0.25667281043074613, "grad_norm": 0.11370931565761566, "learning_rate": 8.575283158789193e-05, "loss": 46.005, "step": 1880 }, { "epoch": 0.25680933852140075, "grad_norm": 0.08637732267379761, "learning_rate": 8.573762761038853e-05, "loss": 46.0067, "step": 1881 }, { "epoch": 0.25694586661205543, "grad_norm": 0.08066518604755402, "learning_rate": 8.57224168740791e-05, "loss": 46.0151, "step": 1882 }, { "epoch": 0.2570823947027101, "grad_norm": 0.12490334361791611, "learning_rate": 8.570719938184033e-05, "loss": 46.0029, "step": 1883 }, { "epoch": 0.2572189227933647, "grad_norm": 0.049131184816360474, "learning_rate": 8.569197513655022e-05, "loss": 46.0051, "step": 1884 }, { "epoch": 0.2573554508840194, "grad_norm": 0.06754127144813538, "learning_rate": 8.5676744141088e-05, "loss": 46.0036, "step": 1885 }, { "epoch": 0.257491978974674, "grad_norm": 0.09303124994039536, "learning_rate": 8.56615063983342e-05, "loss": 46.0023, "step": 1886 }, { "epoch": 0.2576285070653287, "grad_norm": 0.07112471014261246, "learning_rate": 8.564626191117065e-05, "loss": 46.0058, "step": 1887 }, { "epoch": 0.25776503515598337, "grad_norm": 0.05778362974524498, "learning_rate": 8.56310106824804e-05, "loss": 46.0004, "step": 1888 }, { "epoch": 0.257901563246638, "grad_norm": 0.0910431295633316, "learning_rate": 8.561575271514783e-05, "loss": 46.0072, "step": 1889 }, { "epoch": 0.25803809133729266, "grad_norm": 0.08846339583396912, "learning_rate": 8.560048801205857e-05, "loss": 46.0023, "step": 1890 }, { "epoch": 0.2581746194279473, "grad_norm": 0.10155307501554489, "learning_rate": 8.558521657609954e-05, "loss": 46.007, "step": 1891 }, { "epoch": 0.25831114751860196, "grad_norm": 0.06468070298433304, "learning_rate": 8.55699384101589e-05, "loss": 46.0038, "step": 1892 }, { "epoch": 0.2584476756092566, "grad_norm": 0.12660831212997437, "learning_rate": 8.55546535171261e-05, "loss": 46.007, "step": 1893 }, { "epoch": 0.25858420369991125, "grad_norm": 0.0662212073802948, "learning_rate": 8.553936189989187e-05, "loss": 46.0072, "step": 1894 }, { "epoch": 0.2587207317905659, "grad_norm": 0.17876537144184113, "learning_rate": 8.55240635613482e-05, "loss": 46.0033, "step": 1895 }, { "epoch": 0.25885725988122055, "grad_norm": 0.050914231687784195, "learning_rate": 8.550875850438836e-05, "loss": 46.0039, "step": 1896 }, { "epoch": 0.2589937879718752, "grad_norm": 0.09993785619735718, "learning_rate": 8.54934467319069e-05, "loss": 46.0161, "step": 1897 }, { "epoch": 0.25913031606252984, "grad_norm": 0.16351088881492615, "learning_rate": 8.547812824679961e-05, "loss": 46.0074, "step": 1898 }, { "epoch": 0.2592668441531845, "grad_norm": 0.055353790521621704, "learning_rate": 8.54628030519636e-05, "loss": 46.0059, "step": 1899 }, { "epoch": 0.2594033722438392, "grad_norm": 0.05881524831056595, "learning_rate": 8.544747115029716e-05, "loss": 46.0118, "step": 1900 }, { "epoch": 0.2595399003344938, "grad_norm": 0.07911784201860428, "learning_rate": 8.543213254469995e-05, "loss": 46.0128, "step": 1901 }, { "epoch": 0.2596764284251485, "grad_norm": 0.13872648775577545, "learning_rate": 8.541678723807285e-05, "loss": 46.0016, "step": 1902 }, { "epoch": 0.2598129565158031, "grad_norm": 0.03441177308559418, "learning_rate": 8.540143523331798e-05, "loss": 46.0091, "step": 1903 }, { "epoch": 0.2599494846064578, "grad_norm": 0.08173685520887375, "learning_rate": 8.538607653333879e-05, "loss": 46.0067, "step": 1904 }, { "epoch": 0.26008601269711246, "grad_norm": 0.21706141531467438, "learning_rate": 8.537071114103994e-05, "loss": 46.0067, "step": 1905 }, { "epoch": 0.2602225407877671, "grad_norm": 0.135494202375412, "learning_rate": 8.535533905932738e-05, "loss": 46.006, "step": 1906 }, { "epoch": 0.26035906887842175, "grad_norm": 0.07157078385353088, "learning_rate": 8.533996029110833e-05, "loss": 46.0025, "step": 1907 }, { "epoch": 0.26049559696907637, "grad_norm": 0.042661480605602264, "learning_rate": 8.532457483929128e-05, "loss": 46.0013, "step": 1908 }, { "epoch": 0.26063212505973105, "grad_norm": 0.056601349264383316, "learning_rate": 8.530918270678596e-05, "loss": 46.0033, "step": 1909 }, { "epoch": 0.26076865315038567, "grad_norm": 0.07002176344394684, "learning_rate": 8.529378389650337e-05, "loss": 46.0058, "step": 1910 }, { "epoch": 0.26090518124104034, "grad_norm": 0.057386696338653564, "learning_rate": 8.527837841135579e-05, "loss": 46.0025, "step": 1911 }, { "epoch": 0.261041709331695, "grad_norm": 0.035257428884506226, "learning_rate": 8.526296625425675e-05, "loss": 46.0004, "step": 1912 }, { "epoch": 0.26117823742234964, "grad_norm": 0.03010445274412632, "learning_rate": 8.524754742812104e-05, "loss": 46.0018, "step": 1913 }, { "epoch": 0.2613147655130043, "grad_norm": 0.08017271757125854, "learning_rate": 8.523212193586471e-05, "loss": 46.0088, "step": 1914 }, { "epoch": 0.26145129360365893, "grad_norm": 0.05974216014146805, "learning_rate": 8.52166897804051e-05, "loss": 46.0051, "step": 1915 }, { "epoch": 0.2615878216943136, "grad_norm": 0.05020933970808983, "learning_rate": 8.520125096466071e-05, "loss": 46.0118, "step": 1916 }, { "epoch": 0.2617243497849683, "grad_norm": 0.11582202464342117, "learning_rate": 8.518580549155148e-05, "loss": 46.0071, "step": 1917 }, { "epoch": 0.2618608778756229, "grad_norm": 0.034197498112916946, "learning_rate": 8.517035336399842e-05, "loss": 46.0073, "step": 1918 }, { "epoch": 0.2619974059662776, "grad_norm": 0.03850787505507469, "learning_rate": 8.515489458492392e-05, "loss": 46.0102, "step": 1919 }, { "epoch": 0.2621339340569322, "grad_norm": 0.057820405811071396, "learning_rate": 8.513942915725159e-05, "loss": 46.0048, "step": 1920 }, { "epoch": 0.26227046214758687, "grad_norm": 0.06519020348787308, "learning_rate": 8.512395708390629e-05, "loss": 46.0026, "step": 1921 }, { "epoch": 0.26240699023824154, "grad_norm": 0.05112835764884949, "learning_rate": 8.510847836781415e-05, "loss": 46.0063, "step": 1922 }, { "epoch": 0.26254351832889616, "grad_norm": 0.07979835569858551, "learning_rate": 8.509299301190253e-05, "loss": 46.0087, "step": 1923 }, { "epoch": 0.26268004641955084, "grad_norm": 0.1202448233962059, "learning_rate": 8.507750101910011e-05, "loss": 46.0112, "step": 1924 }, { "epoch": 0.26281657451020546, "grad_norm": 0.04008162394165993, "learning_rate": 8.506200239233673e-05, "loss": 46.0139, "step": 1925 }, { "epoch": 0.26295310260086013, "grad_norm": 0.030530031770467758, "learning_rate": 8.504649713454356e-05, "loss": 46.0128, "step": 1926 }, { "epoch": 0.26308963069151475, "grad_norm": 0.06194627285003662, "learning_rate": 8.503098524865301e-05, "loss": 46.0037, "step": 1927 }, { "epoch": 0.26322615878216943, "grad_norm": 0.08511101454496384, "learning_rate": 8.501546673759872e-05, "loss": 46.0103, "step": 1928 }, { "epoch": 0.2633626868728241, "grad_norm": 0.06716964393854141, "learning_rate": 8.499994160431559e-05, "loss": 46.0047, "step": 1929 }, { "epoch": 0.2634992149634787, "grad_norm": 0.13719791173934937, "learning_rate": 8.498440985173981e-05, "loss": 46.001, "step": 1930 }, { "epoch": 0.2636357430541334, "grad_norm": 0.11435616761445999, "learning_rate": 8.496887148280876e-05, "loss": 46.0052, "step": 1931 }, { "epoch": 0.263772271144788, "grad_norm": 0.050346482545137405, "learning_rate": 8.495332650046112e-05, "loss": 46.0088, "step": 1932 }, { "epoch": 0.2639087992354427, "grad_norm": 0.10387753695249557, "learning_rate": 8.493777490763679e-05, "loss": 46.005, "step": 1933 }, { "epoch": 0.26404532732609737, "grad_norm": 0.051499851047992706, "learning_rate": 8.492221670727694e-05, "loss": 46.0064, "step": 1934 }, { "epoch": 0.264181855416752, "grad_norm": 0.07326754182577133, "learning_rate": 8.490665190232401e-05, "loss": 46.0118, "step": 1935 }, { "epoch": 0.26431838350740666, "grad_norm": 0.07964305579662323, "learning_rate": 8.48910804957216e-05, "loss": 46.0059, "step": 1936 }, { "epoch": 0.2644549115980613, "grad_norm": 0.07854373008012772, "learning_rate": 8.487550249041466e-05, "loss": 46.0121, "step": 1937 }, { "epoch": 0.26459143968871596, "grad_norm": 0.03083227202296257, "learning_rate": 8.485991788934938e-05, "loss": 46.0065, "step": 1938 }, { "epoch": 0.26472796777937063, "grad_norm": 0.049574099481105804, "learning_rate": 8.484432669547309e-05, "loss": 46.003, "step": 1939 }, { "epoch": 0.26486449587002525, "grad_norm": 0.046297650784254074, "learning_rate": 8.48287289117345e-05, "loss": 46.0101, "step": 1940 }, { "epoch": 0.2650010239606799, "grad_norm": 0.11580875515937805, "learning_rate": 8.481312454108348e-05, "loss": 46.0009, "step": 1941 }, { "epoch": 0.26513755205133455, "grad_norm": 0.04786570370197296, "learning_rate": 8.479751358647121e-05, "loss": 46.003, "step": 1942 }, { "epoch": 0.2652740801419892, "grad_norm": 0.1866709291934967, "learning_rate": 8.478189605085003e-05, "loss": 46.0042, "step": 1943 }, { "epoch": 0.26541060823264384, "grad_norm": 0.15682385861873627, "learning_rate": 8.476627193717362e-05, "loss": 46.0117, "step": 1944 }, { "epoch": 0.2655471363232985, "grad_norm": 0.11352037638425827, "learning_rate": 8.475064124839683e-05, "loss": 46.01, "step": 1945 }, { "epoch": 0.2656836644139532, "grad_norm": 0.04783349111676216, "learning_rate": 8.47350039874758e-05, "loss": 46.002, "step": 1946 }, { "epoch": 0.2658201925046078, "grad_norm": 0.07751612365245819, "learning_rate": 8.471936015736789e-05, "loss": 46.0044, "step": 1947 }, { "epoch": 0.2659567205952625, "grad_norm": 0.1367805153131485, "learning_rate": 8.47037097610317e-05, "loss": 46.0065, "step": 1948 }, { "epoch": 0.2660932486859171, "grad_norm": 0.048581045120954514, "learning_rate": 8.468805280142709e-05, "loss": 46.0121, "step": 1949 }, { "epoch": 0.2662297767765718, "grad_norm": 0.14885330200195312, "learning_rate": 8.467238928151514e-05, "loss": 46.0016, "step": 1950 }, { "epoch": 0.26636630486722646, "grad_norm": 0.11618994921445847, "learning_rate": 8.465671920425817e-05, "loss": 46.0132, "step": 1951 }, { "epoch": 0.2665028329578811, "grad_norm": 0.05186247080564499, "learning_rate": 8.464104257261979e-05, "loss": 46.0037, "step": 1952 }, { "epoch": 0.26663936104853575, "grad_norm": 0.10379702597856522, "learning_rate": 8.462535938956479e-05, "loss": 46.0065, "step": 1953 }, { "epoch": 0.26677588913919037, "grad_norm": 0.030517904087901115, "learning_rate": 8.46096696580592e-05, "loss": 46.0047, "step": 1954 }, { "epoch": 0.26691241722984504, "grad_norm": 0.06784427165985107, "learning_rate": 8.459397338107035e-05, "loss": 46.0024, "step": 1955 }, { "epoch": 0.2670489453204997, "grad_norm": 0.19956451654434204, "learning_rate": 8.457827056156672e-05, "loss": 46.005, "step": 1956 }, { "epoch": 0.26718547341115434, "grad_norm": 0.028619591146707535, "learning_rate": 8.456256120251813e-05, "loss": 46.0045, "step": 1957 }, { "epoch": 0.267322001501809, "grad_norm": 0.06246228888630867, "learning_rate": 8.454684530689551e-05, "loss": 46.0072, "step": 1958 }, { "epoch": 0.26745852959246363, "grad_norm": 0.13500910997390747, "learning_rate": 8.453112287767115e-05, "loss": 46.0056, "step": 1959 }, { "epoch": 0.2675950576831183, "grad_norm": 0.09931359440088272, "learning_rate": 8.45153939178185e-05, "loss": 46.0041, "step": 1960 }, { "epoch": 0.26773158577377293, "grad_norm": 0.02987753413617611, "learning_rate": 8.44996584303123e-05, "loss": 46.0102, "step": 1961 }, { "epoch": 0.2678681138644276, "grad_norm": 0.08066042512655258, "learning_rate": 8.448391641812845e-05, "loss": 46.0107, "step": 1962 }, { "epoch": 0.2680046419550823, "grad_norm": 0.04089735820889473, "learning_rate": 8.446816788424414e-05, "loss": 46.0036, "step": 1963 }, { "epoch": 0.2681411700457369, "grad_norm": 0.06543969362974167, "learning_rate": 8.44524128316378e-05, "loss": 46.0042, "step": 1964 }, { "epoch": 0.2682776981363916, "grad_norm": 0.057818181812763214, "learning_rate": 8.443665126328904e-05, "loss": 46.01, "step": 1965 }, { "epoch": 0.2684142262270462, "grad_norm": 0.054999612271785736, "learning_rate": 8.442088318217874e-05, "loss": 46.006, "step": 1966 }, { "epoch": 0.26855075431770087, "grad_norm": 0.03792842477560043, "learning_rate": 8.440510859128903e-05, "loss": 46.0025, "step": 1967 }, { "epoch": 0.26868728240835554, "grad_norm": 0.05597066879272461, "learning_rate": 8.438932749360322e-05, "loss": 46.0071, "step": 1968 }, { "epoch": 0.26882381049901016, "grad_norm": 0.15907920897006989, "learning_rate": 8.43735398921059e-05, "loss": 46.0082, "step": 1969 }, { "epoch": 0.26896033858966484, "grad_norm": 0.07290897518396378, "learning_rate": 8.435774578978285e-05, "loss": 46.0107, "step": 1970 }, { "epoch": 0.26909686668031946, "grad_norm": 0.03698053956031799, "learning_rate": 8.434194518962112e-05, "loss": 46.0119, "step": 1971 }, { "epoch": 0.26923339477097413, "grad_norm": 0.06087418273091316, "learning_rate": 8.432613809460894e-05, "loss": 46.0074, "step": 1972 }, { "epoch": 0.2693699228616288, "grad_norm": 0.030266206711530685, "learning_rate": 8.431032450773581e-05, "loss": 46.0075, "step": 1973 }, { "epoch": 0.2695064509522834, "grad_norm": 0.057124219834804535, "learning_rate": 8.429450443199244e-05, "loss": 46.002, "step": 1974 }, { "epoch": 0.2696429790429381, "grad_norm": 0.04006488621234894, "learning_rate": 8.427867787037077e-05, "loss": 46.006, "step": 1975 }, { "epoch": 0.2697795071335927, "grad_norm": 0.0842823013663292, "learning_rate": 8.426284482586396e-05, "loss": 46.0083, "step": 1976 }, { "epoch": 0.2699160352242474, "grad_norm": 0.03801862522959709, "learning_rate": 8.424700530146641e-05, "loss": 46.0057, "step": 1977 }, { "epoch": 0.270052563314902, "grad_norm": 0.11611777544021606, "learning_rate": 8.423115930017373e-05, "loss": 46.0035, "step": 1978 }, { "epoch": 0.2701890914055567, "grad_norm": 0.08132970333099365, "learning_rate": 8.421530682498279e-05, "loss": 46.0102, "step": 1979 }, { "epoch": 0.27032561949621137, "grad_norm": 0.06906259059906006, "learning_rate": 8.419944787889162e-05, "loss": 46.0019, "step": 1980 }, { "epoch": 0.270462147586866, "grad_norm": 0.08429627120494843, "learning_rate": 8.418358246489952e-05, "loss": 46.0143, "step": 1981 }, { "epoch": 0.27059867567752066, "grad_norm": 0.042043305933475494, "learning_rate": 8.416771058600701e-05, "loss": 46.0027, "step": 1982 }, { "epoch": 0.2707352037681753, "grad_norm": 0.1021868959069252, "learning_rate": 8.415183224521583e-05, "loss": 46.0142, "step": 1983 }, { "epoch": 0.27087173185882996, "grad_norm": 0.03678969666361809, "learning_rate": 8.413594744552894e-05, "loss": 46.0016, "step": 1984 }, { "epoch": 0.27100825994948463, "grad_norm": 0.04405586048960686, "learning_rate": 8.412005618995051e-05, "loss": 46.0106, "step": 1985 }, { "epoch": 0.27114478804013925, "grad_norm": 0.043125733733177185, "learning_rate": 8.410415848148596e-05, "loss": 46.003, "step": 1986 }, { "epoch": 0.2712813161307939, "grad_norm": 0.07676594704389572, "learning_rate": 8.408825432314188e-05, "loss": 46.0035, "step": 1987 }, { "epoch": 0.27141784422144855, "grad_norm": 0.08001963794231415, "learning_rate": 8.407234371792614e-05, "loss": 46.0101, "step": 1988 }, { "epoch": 0.2715543723121032, "grad_norm": 0.0454275906085968, "learning_rate": 8.405642666884779e-05, "loss": 46.0076, "step": 1989 }, { "epoch": 0.27169090040275784, "grad_norm": 0.08838507533073425, "learning_rate": 8.404050317891711e-05, "loss": 46.0009, "step": 1990 }, { "epoch": 0.2718274284934125, "grad_norm": 0.13248996436595917, "learning_rate": 8.402457325114562e-05, "loss": 46.0086, "step": 1991 }, { "epoch": 0.2719639565840672, "grad_norm": 0.040655020624399185, "learning_rate": 8.400863688854597e-05, "loss": 46.0061, "step": 1992 }, { "epoch": 0.2721004846747218, "grad_norm": 0.041273947805166245, "learning_rate": 8.399269409413218e-05, "loss": 46.0012, "step": 1993 }, { "epoch": 0.2722370127653765, "grad_norm": 0.03537369892001152, "learning_rate": 8.397674487091931e-05, "loss": 46.0029, "step": 1994 }, { "epoch": 0.2723735408560311, "grad_norm": 0.052936892956495285, "learning_rate": 8.39607892219238e-05, "loss": 46.003, "step": 1995 }, { "epoch": 0.2725100689466858, "grad_norm": 0.09714681655168533, "learning_rate": 8.394482715016319e-05, "loss": 46.0058, "step": 1996 }, { "epoch": 0.27264659703734045, "grad_norm": 0.11350007355213165, "learning_rate": 8.392885865865628e-05, "loss": 46.0025, "step": 1997 }, { "epoch": 0.2727831251279951, "grad_norm": 0.1412503868341446, "learning_rate": 8.39128837504231e-05, "loss": 46.0056, "step": 1998 }, { "epoch": 0.27291965321864975, "grad_norm": 0.22971245646476746, "learning_rate": 8.389690242848483e-05, "loss": 46.0088, "step": 1999 }, { "epoch": 0.27305618130930437, "grad_norm": 0.09795019030570984, "learning_rate": 8.388091469586394e-05, "loss": 46.0078, "step": 2000 }, { "epoch": 0.27319270939995904, "grad_norm": 0.11063358187675476, "learning_rate": 8.386492055558405e-05, "loss": 46.0034, "step": 2001 }, { "epoch": 0.2733292374906137, "grad_norm": 0.031250689178705215, "learning_rate": 8.384892001067004e-05, "loss": 46.003, "step": 2002 }, { "epoch": 0.27346576558126834, "grad_norm": 0.06405351310968399, "learning_rate": 8.383291306414798e-05, "loss": 46.0038, "step": 2003 }, { "epoch": 0.273602293671923, "grad_norm": 0.07687491178512573, "learning_rate": 8.381689971904514e-05, "loss": 46.0013, "step": 2004 }, { "epoch": 0.27373882176257763, "grad_norm": 0.05231303721666336, "learning_rate": 8.380087997839003e-05, "loss": 46.0008, "step": 2005 }, { "epoch": 0.2738753498532323, "grad_norm": 0.0774698257446289, "learning_rate": 8.378485384521233e-05, "loss": 46.0024, "step": 2006 }, { "epoch": 0.27401187794388693, "grad_norm": 0.038125764578580856, "learning_rate": 8.376882132254295e-05, "loss": 46.0049, "step": 2007 }, { "epoch": 0.2741484060345416, "grad_norm": 0.07358758896589279, "learning_rate": 8.375278241341403e-05, "loss": 46.0036, "step": 2008 }, { "epoch": 0.2742849341251963, "grad_norm": 0.05093492940068245, "learning_rate": 8.373673712085888e-05, "loss": 46.0013, "step": 2009 }, { "epoch": 0.2744214622158509, "grad_norm": 0.11395668983459473, "learning_rate": 8.372068544791203e-05, "loss": 46.0032, "step": 2010 }, { "epoch": 0.2745579903065056, "grad_norm": 0.04949096590280533, "learning_rate": 8.370462739760923e-05, "loss": 46.0019, "step": 2011 }, { "epoch": 0.2746945183971602, "grad_norm": 0.05323164910078049, "learning_rate": 8.368856297298742e-05, "loss": 46.0023, "step": 2012 }, { "epoch": 0.27483104648781487, "grad_norm": 0.03448178246617317, "learning_rate": 8.367249217708475e-05, "loss": 46.013, "step": 2013 }, { "epoch": 0.27496757457846954, "grad_norm": 0.06274440884590149, "learning_rate": 8.36564150129406e-05, "loss": 46.013, "step": 2014 }, { "epoch": 0.27510410266912416, "grad_norm": 0.03629192337393761, "learning_rate": 8.364033148359547e-05, "loss": 46.0032, "step": 2015 }, { "epoch": 0.27524063075977884, "grad_norm": 0.053521089255809784, "learning_rate": 8.36242415920912e-05, "loss": 46.0068, "step": 2016 }, { "epoch": 0.27537715885043346, "grad_norm": 0.05934813991189003, "learning_rate": 8.360814534147071e-05, "loss": 46.0037, "step": 2017 }, { "epoch": 0.27551368694108813, "grad_norm": 0.15048646926879883, "learning_rate": 8.359204273477818e-05, "loss": 46.0087, "step": 2018 }, { "epoch": 0.2756502150317428, "grad_norm": 0.07626891136169434, "learning_rate": 8.3575933775059e-05, "loss": 46.0077, "step": 2019 }, { "epoch": 0.2757867431223974, "grad_norm": 0.06950526684522629, "learning_rate": 8.355981846535971e-05, "loss": 46.0063, "step": 2020 }, { "epoch": 0.2759232712130521, "grad_norm": 0.11118067800998688, "learning_rate": 8.354369680872813e-05, "loss": 46.0096, "step": 2021 }, { "epoch": 0.2760597993037067, "grad_norm": 0.033292148262262344, "learning_rate": 8.352756880821319e-05, "loss": 46.0108, "step": 2022 }, { "epoch": 0.2761963273943614, "grad_norm": 0.06583798676729202, "learning_rate": 8.351143446686511e-05, "loss": 46.003, "step": 2023 }, { "epoch": 0.276332855485016, "grad_norm": 0.048245713114738464, "learning_rate": 8.349529378773521e-05, "loss": 46.0159, "step": 2024 }, { "epoch": 0.2764693835756707, "grad_norm": 0.12447294592857361, "learning_rate": 8.347914677387614e-05, "loss": 46.0083, "step": 2025 }, { "epoch": 0.27660591166632537, "grad_norm": 0.060285184532403946, "learning_rate": 8.346299342834159e-05, "loss": 46.0022, "step": 2026 }, { "epoch": 0.27674243975698, "grad_norm": 0.044471751898527145, "learning_rate": 8.344683375418655e-05, "loss": 46.0047, "step": 2027 }, { "epoch": 0.27687896784763466, "grad_norm": 0.0792950689792633, "learning_rate": 8.343066775446725e-05, "loss": 46.004, "step": 2028 }, { "epoch": 0.2770154959382893, "grad_norm": 0.09891238808631897, "learning_rate": 8.341449543224095e-05, "loss": 46.0044, "step": 2029 }, { "epoch": 0.27715202402894396, "grad_norm": 0.08615852892398834, "learning_rate": 8.339831679056629e-05, "loss": 46.002, "step": 2030 }, { "epoch": 0.27728855211959863, "grad_norm": 0.06056644767522812, "learning_rate": 8.338213183250297e-05, "loss": 46.0058, "step": 2031 }, { "epoch": 0.27742508021025325, "grad_norm": 0.09238624572753906, "learning_rate": 8.336594056111197e-05, "loss": 46.0049, "step": 2032 }, { "epoch": 0.2775616083009079, "grad_norm": 0.09380973130464554, "learning_rate": 8.334974297945543e-05, "loss": 46.0075, "step": 2033 }, { "epoch": 0.27769813639156254, "grad_norm": 0.04116404056549072, "learning_rate": 8.333353909059665e-05, "loss": 46.0067, "step": 2034 }, { "epoch": 0.2778346644822172, "grad_norm": 0.0641622394323349, "learning_rate": 8.33173288976002e-05, "loss": 46.0061, "step": 2035 }, { "epoch": 0.2779711925728719, "grad_norm": 0.07035573571920395, "learning_rate": 8.330111240353178e-05, "loss": 46.0065, "step": 2036 }, { "epoch": 0.2781077206635265, "grad_norm": 0.07186944037675858, "learning_rate": 8.328488961145831e-05, "loss": 46.0042, "step": 2037 }, { "epoch": 0.2782442487541812, "grad_norm": 0.03955550491809845, "learning_rate": 8.326866052444786e-05, "loss": 46.0037, "step": 2038 }, { "epoch": 0.2783807768448358, "grad_norm": 0.04629998281598091, "learning_rate": 8.325242514556977e-05, "loss": 46.0019, "step": 2039 }, { "epoch": 0.2785173049354905, "grad_norm": 0.043198078870773315, "learning_rate": 8.323618347789449e-05, "loss": 46.0046, "step": 2040 }, { "epoch": 0.2786538330261451, "grad_norm": 0.04526280611753464, "learning_rate": 8.32199355244937e-05, "loss": 46.0062, "step": 2041 }, { "epoch": 0.2787903611167998, "grad_norm": 0.03991240635514259, "learning_rate": 8.320368128844028e-05, "loss": 46.0041, "step": 2042 }, { "epoch": 0.27892688920745445, "grad_norm": 0.07244943082332611, "learning_rate": 8.318742077280825e-05, "loss": 46.0089, "step": 2043 }, { "epoch": 0.2790634172981091, "grad_norm": 0.17482887208461761, "learning_rate": 8.317115398067288e-05, "loss": 46.006, "step": 2044 }, { "epoch": 0.27919994538876375, "grad_norm": 0.14064674079418182, "learning_rate": 8.315488091511056e-05, "loss": 46.0052, "step": 2045 }, { "epoch": 0.27933647347941837, "grad_norm": 0.12665516138076782, "learning_rate": 8.313860157919892e-05, "loss": 46.0094, "step": 2046 }, { "epoch": 0.27947300157007304, "grad_norm": 0.09731275588274002, "learning_rate": 8.312231597601674e-05, "loss": 46.0013, "step": 2047 }, { "epoch": 0.2796095296607277, "grad_norm": 0.07652882486581802, "learning_rate": 8.310602410864404e-05, "loss": 46.0116, "step": 2048 }, { "epoch": 0.27974605775138234, "grad_norm": 0.11362301558256149, "learning_rate": 8.308972598016194e-05, "loss": 46.0118, "step": 2049 }, { "epoch": 0.279882585842037, "grad_norm": 0.26978185772895813, "learning_rate": 8.30734215936528e-05, "loss": 46.0, "step": 2050 }, { "epoch": 0.28001911393269163, "grad_norm": 0.09602268785238266, "learning_rate": 8.305711095220017e-05, "loss": 46.0051, "step": 2051 }, { "epoch": 0.2801556420233463, "grad_norm": 0.029402440413832664, "learning_rate": 8.304079405888878e-05, "loss": 46.0077, "step": 2052 }, { "epoch": 0.280292170114001, "grad_norm": 0.16859747469425201, "learning_rate": 8.30244709168045e-05, "loss": 46.0039, "step": 2053 }, { "epoch": 0.2804286982046556, "grad_norm": 0.12326042354106903, "learning_rate": 8.30081415290344e-05, "loss": 46.0089, "step": 2054 }, { "epoch": 0.2805652262953103, "grad_norm": 0.1955948919057846, "learning_rate": 8.299180589866679e-05, "loss": 46.0057, "step": 2055 }, { "epoch": 0.2807017543859649, "grad_norm": 0.09433986246585846, "learning_rate": 8.297546402879106e-05, "loss": 46.0023, "step": 2056 }, { "epoch": 0.28083828247661957, "grad_norm": 0.07213763147592545, "learning_rate": 8.295911592249788e-05, "loss": 46.0037, "step": 2057 }, { "epoch": 0.2809748105672742, "grad_norm": 0.040236297994852066, "learning_rate": 8.294276158287903e-05, "loss": 46.0082, "step": 2058 }, { "epoch": 0.28111133865792887, "grad_norm": 0.05828127637505531, "learning_rate": 8.292640101302748e-05, "loss": 46.0041, "step": 2059 }, { "epoch": 0.28124786674858354, "grad_norm": 0.06429574638605118, "learning_rate": 8.29100342160374e-05, "loss": 46.0035, "step": 2060 }, { "epoch": 0.28138439483923816, "grad_norm": 0.1105106770992279, "learning_rate": 8.289366119500415e-05, "loss": 46.0082, "step": 2061 }, { "epoch": 0.28152092292989284, "grad_norm": 0.11525078117847443, "learning_rate": 8.28772819530242e-05, "loss": 46.0105, "step": 2062 }, { "epoch": 0.28165745102054746, "grad_norm": 0.05919964984059334, "learning_rate": 8.286089649319529e-05, "loss": 46.008, "step": 2063 }, { "epoch": 0.28179397911120213, "grad_norm": 0.030346762388944626, "learning_rate": 8.284450481861623e-05, "loss": 46.0029, "step": 2064 }, { "epoch": 0.2819305072018568, "grad_norm": 0.06520410627126694, "learning_rate": 8.282810693238711e-05, "loss": 46.012, "step": 2065 }, { "epoch": 0.2820670352925114, "grad_norm": 0.05951046198606491, "learning_rate": 8.281170283760912e-05, "loss": 46.0042, "step": 2066 }, { "epoch": 0.2822035633831661, "grad_norm": 0.055153265595436096, "learning_rate": 8.279529253738467e-05, "loss": 46.0097, "step": 2067 }, { "epoch": 0.2823400914738207, "grad_norm": 0.06182892620563507, "learning_rate": 8.27788760348173e-05, "loss": 46.0084, "step": 2068 }, { "epoch": 0.2824766195644754, "grad_norm": 0.058704908937215805, "learning_rate": 8.276245333301177e-05, "loss": 46.0017, "step": 2069 }, { "epoch": 0.28261314765513007, "grad_norm": 0.10955832153558731, "learning_rate": 8.274602443507398e-05, "loss": 46.008, "step": 2070 }, { "epoch": 0.2827496757457847, "grad_norm": 0.06188804283738136, "learning_rate": 8.272958934411102e-05, "loss": 46.0014, "step": 2071 }, { "epoch": 0.28288620383643936, "grad_norm": 0.09448379278182983, "learning_rate": 8.271314806323114e-05, "loss": 46.0047, "step": 2072 }, { "epoch": 0.283022731927094, "grad_norm": 0.07912758737802505, "learning_rate": 8.269670059554375e-05, "loss": 46.0155, "step": 2073 }, { "epoch": 0.28315926001774866, "grad_norm": 0.060129862278699875, "learning_rate": 8.268024694415947e-05, "loss": 46.0072, "step": 2074 }, { "epoch": 0.2832957881084033, "grad_norm": 0.06493951380252838, "learning_rate": 8.266378711219003e-05, "loss": 46.0139, "step": 2075 }, { "epoch": 0.28343231619905795, "grad_norm": 0.06751691550016403, "learning_rate": 8.26473211027484e-05, "loss": 46.0087, "step": 2076 }, { "epoch": 0.28356884428971263, "grad_norm": 0.06510376185178757, "learning_rate": 8.263084891894866e-05, "loss": 46.0042, "step": 2077 }, { "epoch": 0.28370537238036725, "grad_norm": 0.054394014179706573, "learning_rate": 8.261437056390606e-05, "loss": 46.0051, "step": 2078 }, { "epoch": 0.2838419004710219, "grad_norm": 0.06610194593667984, "learning_rate": 8.259788604073707e-05, "loss": 46.0046, "step": 2079 }, { "epoch": 0.28397842856167654, "grad_norm": 0.06227370724081993, "learning_rate": 8.258139535255927e-05, "loss": 46.007, "step": 2080 }, { "epoch": 0.2841149566523312, "grad_norm": 0.04607626423239708, "learning_rate": 8.256489850249143e-05, "loss": 46.0032, "step": 2081 }, { "epoch": 0.2842514847429859, "grad_norm": 0.050247468054294586, "learning_rate": 8.25483954936535e-05, "loss": 46.013, "step": 2082 }, { "epoch": 0.2843880128336405, "grad_norm": 0.05409115180373192, "learning_rate": 8.253188632916657e-05, "loss": 46.0033, "step": 2083 }, { "epoch": 0.2845245409242952, "grad_norm": 0.17449316382408142, "learning_rate": 8.251537101215288e-05, "loss": 46.0017, "step": 2084 }, { "epoch": 0.2846610690149498, "grad_norm": 0.039905816316604614, "learning_rate": 8.249884954573588e-05, "loss": 46.0, "step": 2085 }, { "epoch": 0.2847975971056045, "grad_norm": 0.1335863322019577, "learning_rate": 8.248232193304015e-05, "loss": 46.0008, "step": 2086 }, { "epoch": 0.2849341251962591, "grad_norm": 0.15179413557052612, "learning_rate": 8.246578817719145e-05, "loss": 46.0041, "step": 2087 }, { "epoch": 0.2850706532869138, "grad_norm": 0.0693478062748909, "learning_rate": 8.244924828131669e-05, "loss": 46.0071, "step": 2088 }, { "epoch": 0.28520718137756845, "grad_norm": 0.12254491448402405, "learning_rate": 8.243270224854392e-05, "loss": 46.0043, "step": 2089 }, { "epoch": 0.2853437094682231, "grad_norm": 0.04585923254489899, "learning_rate": 8.241615008200241e-05, "loss": 46.0078, "step": 2090 }, { "epoch": 0.28548023755887775, "grad_norm": 0.07833298295736313, "learning_rate": 8.239959178482253e-05, "loss": 46.004, "step": 2091 }, { "epoch": 0.28561676564953237, "grad_norm": 0.03095315210521221, "learning_rate": 8.238302736013586e-05, "loss": 46.0047, "step": 2092 }, { "epoch": 0.28575329374018704, "grad_norm": 0.04151926189661026, "learning_rate": 8.236645681107507e-05, "loss": 46.0059, "step": 2093 }, { "epoch": 0.2858898218308417, "grad_norm": 0.05074651166796684, "learning_rate": 8.234988014077407e-05, "loss": 46.0016, "step": 2094 }, { "epoch": 0.28602634992149634, "grad_norm": 0.04790991172194481, "learning_rate": 8.233329735236789e-05, "loss": 46.0066, "step": 2095 }, { "epoch": 0.286162878012151, "grad_norm": 0.08195990324020386, "learning_rate": 8.231670844899268e-05, "loss": 46.004, "step": 2096 }, { "epoch": 0.28629940610280563, "grad_norm": 0.10950994491577148, "learning_rate": 8.230011343378582e-05, "loss": 46.0096, "step": 2097 }, { "epoch": 0.2864359341934603, "grad_norm": 0.1060578003525734, "learning_rate": 8.22835123098858e-05, "loss": 46.0056, "step": 2098 }, { "epoch": 0.286572462284115, "grad_norm": 0.08094188570976257, "learning_rate": 8.226690508043226e-05, "loss": 46.0081, "step": 2099 }, { "epoch": 0.2867089903747696, "grad_norm": 0.07069123536348343, "learning_rate": 8.225029174856603e-05, "loss": 46.0049, "step": 2100 }, { "epoch": 0.2868455184654243, "grad_norm": 0.06915846467018127, "learning_rate": 8.223367231742905e-05, "loss": 46.0157, "step": 2101 }, { "epoch": 0.2869820465560789, "grad_norm": 0.07951541244983673, "learning_rate": 8.221704679016444e-05, "loss": 46.0008, "step": 2102 }, { "epoch": 0.28711857464673357, "grad_norm": 0.08607452362775803, "learning_rate": 8.220041516991646e-05, "loss": 46.0026, "step": 2103 }, { "epoch": 0.2872551027373882, "grad_norm": 0.06456422060728073, "learning_rate": 8.218377745983057e-05, "loss": 46.0044, "step": 2104 }, { "epoch": 0.28739163082804287, "grad_norm": 0.06507862359285355, "learning_rate": 8.216713366305331e-05, "loss": 46.0013, "step": 2105 }, { "epoch": 0.28752815891869754, "grad_norm": 0.08143823593854904, "learning_rate": 8.215048378273243e-05, "loss": 46.0054, "step": 2106 }, { "epoch": 0.28766468700935216, "grad_norm": 0.05699825659394264, "learning_rate": 8.213382782201675e-05, "loss": 46.0039, "step": 2107 }, { "epoch": 0.28780121510000684, "grad_norm": 0.1102060005068779, "learning_rate": 8.211716578405635e-05, "loss": 46.0005, "step": 2108 }, { "epoch": 0.28793774319066145, "grad_norm": 0.06837007403373718, "learning_rate": 8.21004976720024e-05, "loss": 46.0033, "step": 2109 }, { "epoch": 0.28807427128131613, "grad_norm": 0.051499877125024796, "learning_rate": 8.208382348900719e-05, "loss": 46.0086, "step": 2110 }, { "epoch": 0.2882107993719708, "grad_norm": 0.1002233475446701, "learning_rate": 8.206714323822422e-05, "loss": 46.0078, "step": 2111 }, { "epoch": 0.2883473274626254, "grad_norm": 0.033960044384002686, "learning_rate": 8.205045692280808e-05, "loss": 46.0158, "step": 2112 }, { "epoch": 0.2884838555532801, "grad_norm": 0.1926700323820114, "learning_rate": 8.203376454591458e-05, "loss": 46.0055, "step": 2113 }, { "epoch": 0.2886203836439347, "grad_norm": 0.11289513111114502, "learning_rate": 8.20170661107006e-05, "loss": 46.0038, "step": 2114 }, { "epoch": 0.2887569117345894, "grad_norm": 0.06547566503286362, "learning_rate": 8.200036162032418e-05, "loss": 46.0009, "step": 2115 }, { "epoch": 0.28889343982524407, "grad_norm": 0.03141045570373535, "learning_rate": 8.198365107794457e-05, "loss": 46.0101, "step": 2116 }, { "epoch": 0.2890299679158987, "grad_norm": 0.03801402449607849, "learning_rate": 8.196693448672207e-05, "loss": 46.0003, "step": 2117 }, { "epoch": 0.28916649600655336, "grad_norm": 0.06029272824525833, "learning_rate": 8.195021184981822e-05, "loss": 46.0002, "step": 2118 }, { "epoch": 0.289303024097208, "grad_norm": 0.11219095438718796, "learning_rate": 8.193348317039562e-05, "loss": 46.0123, "step": 2119 }, { "epoch": 0.28943955218786266, "grad_norm": 0.10937895625829697, "learning_rate": 8.191674845161806e-05, "loss": 46.0157, "step": 2120 }, { "epoch": 0.2895760802785173, "grad_norm": 0.07374370843172073, "learning_rate": 8.190000769665044e-05, "loss": 46.0102, "step": 2121 }, { "epoch": 0.28971260836917195, "grad_norm": 0.05752773955464363, "learning_rate": 8.188326090865884e-05, "loss": 46.0027, "step": 2122 }, { "epoch": 0.28984913645982663, "grad_norm": 0.052175454795360565, "learning_rate": 8.186650809081046e-05, "loss": 46.003, "step": 2123 }, { "epoch": 0.28998566455048125, "grad_norm": 0.07862614840269089, "learning_rate": 8.184974924627365e-05, "loss": 46.0062, "step": 2124 }, { "epoch": 0.2901221926411359, "grad_norm": 0.1104978621006012, "learning_rate": 8.183298437821788e-05, "loss": 46.005, "step": 2125 }, { "epoch": 0.29025872073179054, "grad_norm": 0.10031845420598984, "learning_rate": 8.181621348981376e-05, "loss": 46.0067, "step": 2126 }, { "epoch": 0.2903952488224452, "grad_norm": 0.06388320028781891, "learning_rate": 8.17994365842331e-05, "loss": 46.0083, "step": 2127 }, { "epoch": 0.2905317769130999, "grad_norm": 0.04841059073805809, "learning_rate": 8.178265366464875e-05, "loss": 46.0042, "step": 2128 }, { "epoch": 0.2906683050037545, "grad_norm": 0.14941242337226868, "learning_rate": 8.176586473423475e-05, "loss": 46.0069, "step": 2129 }, { "epoch": 0.2908048330944092, "grad_norm": 0.06360785663127899, "learning_rate": 8.174906979616628e-05, "loss": 46.008, "step": 2130 }, { "epoch": 0.2909413611850638, "grad_norm": 0.046419549733400345, "learning_rate": 8.173226885361965e-05, "loss": 46.0069, "step": 2131 }, { "epoch": 0.2910778892757185, "grad_norm": 0.029966367408633232, "learning_rate": 8.171546190977231e-05, "loss": 46.0071, "step": 2132 }, { "epoch": 0.29121441736637316, "grad_norm": 0.045306093990802765, "learning_rate": 8.169864896780284e-05, "loss": 46.0124, "step": 2133 }, { "epoch": 0.2913509454570278, "grad_norm": 0.07164879888296127, "learning_rate": 8.168183003089094e-05, "loss": 46.0034, "step": 2134 }, { "epoch": 0.29148747354768245, "grad_norm": 0.044803399592638016, "learning_rate": 8.166500510221745e-05, "loss": 46.0106, "step": 2135 }, { "epoch": 0.29162400163833707, "grad_norm": 0.10396397113800049, "learning_rate": 8.164817418496437e-05, "loss": 46.005, "step": 2136 }, { "epoch": 0.29176052972899175, "grad_norm": 0.08692236989736557, "learning_rate": 8.163133728231482e-05, "loss": 46.0033, "step": 2137 }, { "epoch": 0.29189705781964637, "grad_norm": 0.05046821013092995, "learning_rate": 8.161449439745303e-05, "loss": 46.0189, "step": 2138 }, { "epoch": 0.29203358591030104, "grad_norm": 0.05850978568196297, "learning_rate": 8.159764553356437e-05, "loss": 46.0085, "step": 2139 }, { "epoch": 0.2921701140009557, "grad_norm": 0.03602452203631401, "learning_rate": 8.158079069383535e-05, "loss": 46.0048, "step": 2140 }, { "epoch": 0.29230664209161034, "grad_norm": 0.0816250666975975, "learning_rate": 8.156392988145362e-05, "loss": 46.0074, "step": 2141 }, { "epoch": 0.292443170182265, "grad_norm": 0.11446735262870789, "learning_rate": 8.154706309960793e-05, "loss": 46.0015, "step": 2142 }, { "epoch": 0.29257969827291963, "grad_norm": 0.0278147142380476, "learning_rate": 8.153019035148821e-05, "loss": 46.0125, "step": 2143 }, { "epoch": 0.2927162263635743, "grad_norm": 0.16927510499954224, "learning_rate": 8.151331164028544e-05, "loss": 46.0042, "step": 2144 }, { "epoch": 0.292852754454229, "grad_norm": 0.0467304103076458, "learning_rate": 8.14964269691918e-05, "loss": 46.0043, "step": 2145 }, { "epoch": 0.2929892825448836, "grad_norm": 0.17710985243320465, "learning_rate": 8.147953634140054e-05, "loss": 46.0089, "step": 2146 }, { "epoch": 0.2931258106355383, "grad_norm": 0.18936793506145477, "learning_rate": 8.146263976010611e-05, "loss": 46.0094, "step": 2147 }, { "epoch": 0.2932623387261929, "grad_norm": 0.1732177436351776, "learning_rate": 8.1445737228504e-05, "loss": 46.0081, "step": 2148 }, { "epoch": 0.29339886681684757, "grad_norm": 0.24245929718017578, "learning_rate": 8.14288287497909e-05, "loss": 46.0055, "step": 2149 }, { "epoch": 0.29353539490750225, "grad_norm": 0.08451434224843979, "learning_rate": 8.141191432716456e-05, "loss": 46.0, "step": 2150 }, { "epoch": 0.29367192299815686, "grad_norm": 0.05349041521549225, "learning_rate": 8.139499396382391e-05, "loss": 46.0065, "step": 2151 }, { "epoch": 0.29380845108881154, "grad_norm": 0.059442970901727676, "learning_rate": 8.137806766296896e-05, "loss": 46.0016, "step": 2152 }, { "epoch": 0.29394497917946616, "grad_norm": 0.06850149482488632, "learning_rate": 8.136113542780089e-05, "loss": 46.0015, "step": 2153 }, { "epoch": 0.29408150727012083, "grad_norm": 0.03785879537463188, "learning_rate": 8.134419726152194e-05, "loss": 46.0031, "step": 2154 }, { "epoch": 0.29421803536077545, "grad_norm": 0.05516555532813072, "learning_rate": 8.132725316733553e-05, "loss": 46.0049, "step": 2155 }, { "epoch": 0.29435456345143013, "grad_norm": 0.13189087808132172, "learning_rate": 8.131030314844617e-05, "loss": 46.0024, "step": 2156 }, { "epoch": 0.2944910915420848, "grad_norm": 0.08680865168571472, "learning_rate": 8.12933472080595e-05, "loss": 46.0059, "step": 2157 }, { "epoch": 0.2946276196327394, "grad_norm": 0.045643966645002365, "learning_rate": 8.127638534938227e-05, "loss": 46.0002, "step": 2158 }, { "epoch": 0.2947641477233941, "grad_norm": 0.10984178632497787, "learning_rate": 8.125941757562237e-05, "loss": 46.0051, "step": 2159 }, { "epoch": 0.2949006758140487, "grad_norm": 0.05148117616772652, "learning_rate": 8.124244388998878e-05, "loss": 46.0004, "step": 2160 }, { "epoch": 0.2950372039047034, "grad_norm": 0.048430416733026505, "learning_rate": 8.122546429569164e-05, "loss": 46.0023, "step": 2161 }, { "epoch": 0.29517373199535807, "grad_norm": 0.07541736960411072, "learning_rate": 8.120847879594216e-05, "loss": 46.0042, "step": 2162 }, { "epoch": 0.2953102600860127, "grad_norm": 0.09563111513853073, "learning_rate": 8.119148739395269e-05, "loss": 46.0052, "step": 2163 }, { "epoch": 0.29544678817666736, "grad_norm": 0.03463374078273773, "learning_rate": 8.117449009293668e-05, "loss": 46.0056, "step": 2164 }, { "epoch": 0.295583316267322, "grad_norm": 0.05598647519946098, "learning_rate": 8.115748689610874e-05, "loss": 46.003, "step": 2165 }, { "epoch": 0.29571984435797666, "grad_norm": 0.06797880679368973, "learning_rate": 8.114047780668454e-05, "loss": 46.0104, "step": 2166 }, { "epoch": 0.29585637244863133, "grad_norm": 0.03879106417298317, "learning_rate": 8.112346282788092e-05, "loss": 46.0112, "step": 2167 }, { "epoch": 0.29599290053928595, "grad_norm": 0.03877091780304909, "learning_rate": 8.110644196291577e-05, "loss": 46.0024, "step": 2168 }, { "epoch": 0.2961294286299406, "grad_norm": 0.03868767246603966, "learning_rate": 8.108941521500815e-05, "loss": 46.0029, "step": 2169 }, { "epoch": 0.29626595672059525, "grad_norm": 0.02985203266143799, "learning_rate": 8.107238258737819e-05, "loss": 46.0093, "step": 2170 }, { "epoch": 0.2964024848112499, "grad_norm": 0.051738958805799484, "learning_rate": 8.105534408324715e-05, "loss": 46.0037, "step": 2171 }, { "epoch": 0.29653901290190454, "grad_norm": 0.07167970389127731, "learning_rate": 8.103829970583742e-05, "loss": 46.0055, "step": 2172 }, { "epoch": 0.2966755409925592, "grad_norm": 0.12677684426307678, "learning_rate": 8.102124945837248e-05, "loss": 46.0011, "step": 2173 }, { "epoch": 0.2968120690832139, "grad_norm": 0.0704827532172203, "learning_rate": 8.100419334407693e-05, "loss": 46.0032, "step": 2174 }, { "epoch": 0.2969485971738685, "grad_norm": 0.04485374316573143, "learning_rate": 8.098713136617644e-05, "loss": 46.0046, "step": 2175 }, { "epoch": 0.2970851252645232, "grad_norm": 0.05172800272703171, "learning_rate": 8.097006352789786e-05, "loss": 46.0018, "step": 2176 }, { "epoch": 0.2972216533551778, "grad_norm": 0.052127230912446976, "learning_rate": 8.095298983246909e-05, "loss": 46.0068, "step": 2177 }, { "epoch": 0.2973581814458325, "grad_norm": 0.08465684205293655, "learning_rate": 8.093591028311916e-05, "loss": 46.0015, "step": 2178 }, { "epoch": 0.29749470953648716, "grad_norm": 0.05928308516740799, "learning_rate": 8.09188248830782e-05, "loss": 46.0075, "step": 2179 }, { "epoch": 0.2976312376271418, "grad_norm": 0.13689769804477692, "learning_rate": 8.090173363557748e-05, "loss": 46.0063, "step": 2180 }, { "epoch": 0.29776776571779645, "grad_norm": 0.06012807786464691, "learning_rate": 8.088463654384931e-05, "loss": 46.0015, "step": 2181 }, { "epoch": 0.29790429380845107, "grad_norm": 0.06470958888530731, "learning_rate": 8.086753361112714e-05, "loss": 46.0059, "step": 2182 }, { "epoch": 0.29804082189910575, "grad_norm": 0.10693283379077911, "learning_rate": 8.085042484064557e-05, "loss": 46.0043, "step": 2183 }, { "epoch": 0.2981773499897604, "grad_norm": 0.062035899609327316, "learning_rate": 8.083331023564023e-05, "loss": 46.007, "step": 2184 }, { "epoch": 0.29831387808041504, "grad_norm": 0.10625892877578735, "learning_rate": 8.081618979934789e-05, "loss": 46.0104, "step": 2185 }, { "epoch": 0.2984504061710697, "grad_norm": 0.034263432025909424, "learning_rate": 8.079906353500641e-05, "loss": 46.0041, "step": 2186 }, { "epoch": 0.29858693426172433, "grad_norm": 0.04221809282898903, "learning_rate": 8.078193144585478e-05, "loss": 46.0074, "step": 2187 }, { "epoch": 0.298723462352379, "grad_norm": 0.04605955258011818, "learning_rate": 8.076479353513307e-05, "loss": 46.0048, "step": 2188 }, { "epoch": 0.29885999044303363, "grad_norm": 0.043434906750917435, "learning_rate": 8.074764980608243e-05, "loss": 46.0061, "step": 2189 }, { "epoch": 0.2989965185336883, "grad_norm": 0.0744430348277092, "learning_rate": 8.073050026194516e-05, "loss": 46.0057, "step": 2190 }, { "epoch": 0.299133046624343, "grad_norm": 0.03237936645746231, "learning_rate": 8.071334490596459e-05, "loss": 46.0061, "step": 2191 }, { "epoch": 0.2992695747149976, "grad_norm": 0.04543660953640938, "learning_rate": 8.069618374138524e-05, "loss": 46.003, "step": 2192 }, { "epoch": 0.2994061028056523, "grad_norm": 0.07370980083942413, "learning_rate": 8.067901677145265e-05, "loss": 46.0052, "step": 2193 }, { "epoch": 0.2995426308963069, "grad_norm": 0.07079674303531647, "learning_rate": 8.066184399941352e-05, "loss": 46.0018, "step": 2194 }, { "epoch": 0.29967915898696157, "grad_norm": 0.18116968870162964, "learning_rate": 8.064466542851558e-05, "loss": 46.0047, "step": 2195 }, { "epoch": 0.29981568707761624, "grad_norm": 0.06444227695465088, "learning_rate": 8.06274810620077e-05, "loss": 46.0044, "step": 2196 }, { "epoch": 0.29995221516827086, "grad_norm": 0.08678343892097473, "learning_rate": 8.061029090313985e-05, "loss": 46.0066, "step": 2197 }, { "epoch": 0.30008874325892554, "grad_norm": 0.13339294493198395, "learning_rate": 8.059309495516307e-05, "loss": 46.005, "step": 2198 }, { "epoch": 0.30022527134958016, "grad_norm": 0.2720363438129425, "learning_rate": 8.057589322132955e-05, "loss": 46.0139, "step": 2199 }, { "epoch": 0.30036179944023483, "grad_norm": 0.22928427159786224, "learning_rate": 8.055868570489247e-05, "loss": 46.0049, "step": 2200 }, { "epoch": 0.30049832753088945, "grad_norm": 0.14000362157821655, "learning_rate": 8.05414724091062e-05, "loss": 46.0056, "step": 2201 }, { "epoch": 0.30063485562154413, "grad_norm": 0.0856882706284523, "learning_rate": 8.052425333722617e-05, "loss": 46.0035, "step": 2202 }, { "epoch": 0.3007713837121988, "grad_norm": 0.035256605595350266, "learning_rate": 8.050702849250891e-05, "loss": 46.0065, "step": 2203 }, { "epoch": 0.3009079118028534, "grad_norm": 0.0582156628370285, "learning_rate": 8.048979787821201e-05, "loss": 46.003, "step": 2204 }, { "epoch": 0.3010444398935081, "grad_norm": 0.0309146661311388, "learning_rate": 8.047256149759417e-05, "loss": 46.0057, "step": 2205 }, { "epoch": 0.3011809679841627, "grad_norm": 0.038980595767498016, "learning_rate": 8.045531935391523e-05, "loss": 46.0061, "step": 2206 }, { "epoch": 0.3013174960748174, "grad_norm": 0.10523828119039536, "learning_rate": 8.043807145043604e-05, "loss": 46.0005, "step": 2207 }, { "epoch": 0.30145402416547207, "grad_norm": 0.07984757423400879, "learning_rate": 8.042081779041857e-05, "loss": 46.004, "step": 2208 }, { "epoch": 0.3015905522561267, "grad_norm": 0.09002459794282913, "learning_rate": 8.04035583771259e-05, "loss": 46.0043, "step": 2209 }, { "epoch": 0.30172708034678136, "grad_norm": 0.02144874446094036, "learning_rate": 8.038629321382219e-05, "loss": 46.0079, "step": 2210 }, { "epoch": 0.301863608437436, "grad_norm": 0.04512840881943703, "learning_rate": 8.036902230377266e-05, "loss": 46.009, "step": 2211 }, { "epoch": 0.30200013652809066, "grad_norm": 0.04879119247198105, "learning_rate": 8.035174565024362e-05, "loss": 46.0047, "step": 2212 }, { "epoch": 0.30213666461874533, "grad_norm": 0.06292454898357391, "learning_rate": 8.033446325650253e-05, "loss": 46.006, "step": 2213 }, { "epoch": 0.30227319270939995, "grad_norm": 0.0498957596719265, "learning_rate": 8.031717512581784e-05, "loss": 46.0095, "step": 2214 }, { "epoch": 0.3024097208000546, "grad_norm": 0.0670875832438469, "learning_rate": 8.029988126145918e-05, "loss": 46.008, "step": 2215 }, { "epoch": 0.30254624889070925, "grad_norm": 0.03615615889430046, "learning_rate": 8.028258166669715e-05, "loss": 46.0089, "step": 2216 }, { "epoch": 0.3026827769813639, "grad_norm": 0.15255798399448395, "learning_rate": 8.026527634480359e-05, "loss": 46.0072, "step": 2217 }, { "epoch": 0.30281930507201854, "grad_norm": 0.04128759354352951, "learning_rate": 8.024796529905124e-05, "loss": 46.012, "step": 2218 }, { "epoch": 0.3029558331626732, "grad_norm": 0.07099483907222748, "learning_rate": 8.023064853271409e-05, "loss": 46.0061, "step": 2219 }, { "epoch": 0.3030923612533279, "grad_norm": 0.09658737480640411, "learning_rate": 8.021332604906708e-05, "loss": 46.0043, "step": 2220 }, { "epoch": 0.3032288893439825, "grad_norm": 0.12146454304456711, "learning_rate": 8.019599785138635e-05, "loss": 46.0076, "step": 2221 }, { "epoch": 0.3033654174346372, "grad_norm": 0.08328458666801453, "learning_rate": 8.0178663942949e-05, "loss": 46.0123, "step": 2222 }, { "epoch": 0.3035019455252918, "grad_norm": 0.08267484605312347, "learning_rate": 8.016132432703332e-05, "loss": 46.0084, "step": 2223 }, { "epoch": 0.3036384736159465, "grad_norm": 0.033287737518548965, "learning_rate": 8.014397900691859e-05, "loss": 46.0039, "step": 2224 }, { "epoch": 0.30377500170660116, "grad_norm": 0.11120165139436722, "learning_rate": 8.012662798588524e-05, "loss": 46.0053, "step": 2225 }, { "epoch": 0.3039115297972558, "grad_norm": 0.03591031953692436, "learning_rate": 8.010927126721472e-05, "loss": 46.0044, "step": 2226 }, { "epoch": 0.30404805788791045, "grad_norm": 0.04143424332141876, "learning_rate": 8.009190885418961e-05, "loss": 46.0044, "step": 2227 }, { "epoch": 0.30418458597856507, "grad_norm": 0.0593876875936985, "learning_rate": 8.007454075009351e-05, "loss": 46.0017, "step": 2228 }, { "epoch": 0.30432111406921974, "grad_norm": 0.041939493268728256, "learning_rate": 8.005716695821117e-05, "loss": 46.0023, "step": 2229 }, { "epoch": 0.3044576421598744, "grad_norm": 0.07249029725790024, "learning_rate": 8.003978748182833e-05, "loss": 46.0015, "step": 2230 }, { "epoch": 0.30459417025052904, "grad_norm": 0.07760994881391525, "learning_rate": 8.002240232423189e-05, "loss": 46.0071, "step": 2231 }, { "epoch": 0.3047306983411837, "grad_norm": 0.06886765360832214, "learning_rate": 8.000501148870974e-05, "loss": 46.0044, "step": 2232 }, { "epoch": 0.30486722643183833, "grad_norm": 0.06274443119764328, "learning_rate": 7.998761497855089e-05, "loss": 46.0084, "step": 2233 }, { "epoch": 0.305003754522493, "grad_norm": 0.07121464610099792, "learning_rate": 7.997021279704547e-05, "loss": 46.0066, "step": 2234 }, { "epoch": 0.30514028261314763, "grad_norm": 0.03295321762561798, "learning_rate": 7.995280494748456e-05, "loss": 46.0075, "step": 2235 }, { "epoch": 0.3052768107038023, "grad_norm": 0.04818626120686531, "learning_rate": 7.993539143316045e-05, "loss": 46.0031, "step": 2236 }, { "epoch": 0.305413338794457, "grad_norm": 0.037741512060165405, "learning_rate": 7.991797225736639e-05, "loss": 46.0011, "step": 2237 }, { "epoch": 0.3055498668851116, "grad_norm": 0.03654111549258232, "learning_rate": 7.990054742339676e-05, "loss": 46.003, "step": 2238 }, { "epoch": 0.3056863949757663, "grad_norm": 0.0810028538107872, "learning_rate": 7.9883116934547e-05, "loss": 46.004, "step": 2239 }, { "epoch": 0.3058229230664209, "grad_norm": 0.07483073323965073, "learning_rate": 7.986568079411362e-05, "loss": 46.0016, "step": 2240 }, { "epoch": 0.30595945115707557, "grad_norm": 0.07007084786891937, "learning_rate": 7.98482390053942e-05, "loss": 46.0027, "step": 2241 }, { "epoch": 0.30609597924773024, "grad_norm": 0.07013051211833954, "learning_rate": 7.983079157168736e-05, "loss": 46.0006, "step": 2242 }, { "epoch": 0.30623250733838486, "grad_norm": 0.13813447952270508, "learning_rate": 7.981333849629282e-05, "loss": 46.0049, "step": 2243 }, { "epoch": 0.30636903542903954, "grad_norm": 0.15285450220108032, "learning_rate": 7.979587978251135e-05, "loss": 46.0022, "step": 2244 }, { "epoch": 0.30650556351969416, "grad_norm": 0.105680450797081, "learning_rate": 7.977841543364481e-05, "loss": 46.0056, "step": 2245 }, { "epoch": 0.30664209161034883, "grad_norm": 0.07350999861955643, "learning_rate": 7.976094545299612e-05, "loss": 46.0054, "step": 2246 }, { "epoch": 0.3067786197010035, "grad_norm": 0.16121087968349457, "learning_rate": 7.974346984386923e-05, "loss": 46.0153, "step": 2247 }, { "epoch": 0.3069151477916581, "grad_norm": 0.14443638920783997, "learning_rate": 7.972598860956917e-05, "loss": 46.0142, "step": 2248 }, { "epoch": 0.3070516758823128, "grad_norm": 0.08114917576313019, "learning_rate": 7.970850175340208e-05, "loss": 46.0103, "step": 2249 }, { "epoch": 0.3071882039729674, "grad_norm": 0.20918354392051697, "learning_rate": 7.969100927867507e-05, "loss": 46.008, "step": 2250 }, { "epoch": 0.3073247320636221, "grad_norm": 0.0474676713347435, "learning_rate": 7.967351118869644e-05, "loss": 46.0031, "step": 2251 }, { "epoch": 0.3074612601542767, "grad_norm": 0.08985094726085663, "learning_rate": 7.965600748677544e-05, "loss": 46.0089, "step": 2252 }, { "epoch": 0.3075977882449314, "grad_norm": 0.054638300091028214, "learning_rate": 7.963849817622241e-05, "loss": 46.0007, "step": 2253 }, { "epoch": 0.30773431633558607, "grad_norm": 0.09645725786685944, "learning_rate": 7.962098326034879e-05, "loss": 46.002, "step": 2254 }, { "epoch": 0.3078708444262407, "grad_norm": 0.06458620727062225, "learning_rate": 7.960346274246704e-05, "loss": 46.0055, "step": 2255 }, { "epoch": 0.30800737251689536, "grad_norm": 0.12574337422847748, "learning_rate": 7.958593662589069e-05, "loss": 46.0053, "step": 2256 }, { "epoch": 0.30814390060755, "grad_norm": 0.04850224032998085, "learning_rate": 7.956840491393434e-05, "loss": 46.0036, "step": 2257 }, { "epoch": 0.30828042869820466, "grad_norm": 0.04960274323821068, "learning_rate": 7.955086760991361e-05, "loss": 46.0054, "step": 2258 }, { "epoch": 0.30841695678885933, "grad_norm": 0.11507552117109299, "learning_rate": 7.953332471714526e-05, "loss": 46.0078, "step": 2259 }, { "epoch": 0.30855348487951395, "grad_norm": 0.12147805839776993, "learning_rate": 7.951577623894702e-05, "loss": 46.0076, "step": 2260 }, { "epoch": 0.3086900129701686, "grad_norm": 0.07974059134721756, "learning_rate": 7.949822217863771e-05, "loss": 46.0033, "step": 2261 }, { "epoch": 0.30882654106082325, "grad_norm": 0.10387420654296875, "learning_rate": 7.948066253953721e-05, "loss": 46.0037, "step": 2262 }, { "epoch": 0.3089630691514779, "grad_norm": 0.057140979915857315, "learning_rate": 7.946309732496647e-05, "loss": 46.0106, "step": 2263 }, { "epoch": 0.3090995972421326, "grad_norm": 0.0823119655251503, "learning_rate": 7.944552653824743e-05, "loss": 46.0079, "step": 2264 }, { "epoch": 0.3092361253327872, "grad_norm": 0.08640222251415253, "learning_rate": 7.942795018270317e-05, "loss": 46.008, "step": 2265 }, { "epoch": 0.3093726534234419, "grad_norm": 0.029985710978507996, "learning_rate": 7.941036826165778e-05, "loss": 46.0102, "step": 2266 }, { "epoch": 0.3095091815140965, "grad_norm": 0.04533234238624573, "learning_rate": 7.939278077843637e-05, "loss": 46.0052, "step": 2267 }, { "epoch": 0.3096457096047512, "grad_norm": 0.03653517737984657, "learning_rate": 7.937518773636518e-05, "loss": 46.0024, "step": 2268 }, { "epoch": 0.3097822376954058, "grad_norm": 0.08275052160024643, "learning_rate": 7.935758913877142e-05, "loss": 46.0078, "step": 2269 }, { "epoch": 0.3099187657860605, "grad_norm": 0.24493104219436646, "learning_rate": 7.933998498898342e-05, "loss": 46.0129, "step": 2270 }, { "epoch": 0.31005529387671515, "grad_norm": 0.04048851877450943, "learning_rate": 7.932237529033053e-05, "loss": 46.014, "step": 2271 }, { "epoch": 0.3101918219673698, "grad_norm": 0.11917678266763687, "learning_rate": 7.930476004614313e-05, "loss": 46.005, "step": 2272 }, { "epoch": 0.31032835005802445, "grad_norm": 0.13990776240825653, "learning_rate": 7.928713925975266e-05, "loss": 46.0044, "step": 2273 }, { "epoch": 0.31046487814867907, "grad_norm": 0.048524945974349976, "learning_rate": 7.926951293449164e-05, "loss": 46.01, "step": 2274 }, { "epoch": 0.31060140623933374, "grad_norm": 0.03877371922135353, "learning_rate": 7.92518810736936e-05, "loss": 46.0013, "step": 2275 }, { "epoch": 0.3107379343299884, "grad_norm": 0.06718602031469345, "learning_rate": 7.923424368069312e-05, "loss": 46.0014, "step": 2276 }, { "epoch": 0.31087446242064304, "grad_norm": 0.15729375183582306, "learning_rate": 7.921660075882586e-05, "loss": 46.0051, "step": 2277 }, { "epoch": 0.3110109905112977, "grad_norm": 0.1324358731508255, "learning_rate": 7.919895231142848e-05, "loss": 46.0124, "step": 2278 }, { "epoch": 0.31114751860195233, "grad_norm": 0.029627440497279167, "learning_rate": 7.918129834183873e-05, "loss": 46.0115, "step": 2279 }, { "epoch": 0.311284046692607, "grad_norm": 0.060650020837783813, "learning_rate": 7.916363885339534e-05, "loss": 46.0058, "step": 2280 }, { "epoch": 0.3114205747832617, "grad_norm": 0.11627070605754852, "learning_rate": 7.914597384943817e-05, "loss": 46.0026, "step": 2281 }, { "epoch": 0.3115571028739163, "grad_norm": 0.11698884516954422, "learning_rate": 7.912830333330806e-05, "loss": 46.0137, "step": 2282 }, { "epoch": 0.311693630964571, "grad_norm": 0.09407193958759308, "learning_rate": 7.91106273083469e-05, "loss": 46.0075, "step": 2283 }, { "epoch": 0.3118301590552256, "grad_norm": 0.03504778817296028, "learning_rate": 7.909294577789766e-05, "loss": 46.0, "step": 2284 }, { "epoch": 0.3119666871458803, "grad_norm": 0.06222125142812729, "learning_rate": 7.907525874530429e-05, "loss": 46.0014, "step": 2285 }, { "epoch": 0.3121032152365349, "grad_norm": 0.09561023116111755, "learning_rate": 7.905756621391183e-05, "loss": 46.0046, "step": 2286 }, { "epoch": 0.31223974332718957, "grad_norm": 0.039617568254470825, "learning_rate": 7.903986818706632e-05, "loss": 46.0096, "step": 2287 }, { "epoch": 0.31237627141784424, "grad_norm": 0.07044173777103424, "learning_rate": 7.902216466811492e-05, "loss": 46.0031, "step": 2288 }, { "epoch": 0.31251279950849886, "grad_norm": 0.12227807193994522, "learning_rate": 7.900445566040573e-05, "loss": 46.0004, "step": 2289 }, { "epoch": 0.31264932759915354, "grad_norm": 0.06601500511169434, "learning_rate": 7.898674116728792e-05, "loss": 46.0089, "step": 2290 }, { "epoch": 0.31278585568980816, "grad_norm": 0.035665784031152725, "learning_rate": 7.896902119211175e-05, "loss": 46.0068, "step": 2291 }, { "epoch": 0.31292238378046283, "grad_norm": 0.03733803704380989, "learning_rate": 7.895129573822843e-05, "loss": 46.0022, "step": 2292 }, { "epoch": 0.3130589118711175, "grad_norm": 0.06630972027778625, "learning_rate": 7.89335648089903e-05, "loss": 46.0038, "step": 2293 }, { "epoch": 0.3131954399617721, "grad_norm": 0.07079185545444489, "learning_rate": 7.891582840775063e-05, "loss": 46.002, "step": 2294 }, { "epoch": 0.3133319680524268, "grad_norm": 0.11252684146165848, "learning_rate": 7.889808653786382e-05, "loss": 46.0071, "step": 2295 }, { "epoch": 0.3134684961430814, "grad_norm": 0.06548863649368286, "learning_rate": 7.888033920268524e-05, "loss": 46.0075, "step": 2296 }, { "epoch": 0.3136050242337361, "grad_norm": 0.05755559355020523, "learning_rate": 7.886258640557134e-05, "loss": 46.0101, "step": 2297 }, { "epoch": 0.31374155232439077, "grad_norm": 0.15627242624759674, "learning_rate": 7.884482814987957e-05, "loss": 46.0036, "step": 2298 }, { "epoch": 0.3138780804150454, "grad_norm": 0.12887191772460938, "learning_rate": 7.882706443896841e-05, "loss": 46.0075, "step": 2299 }, { "epoch": 0.31401460850570007, "grad_norm": 0.23238515853881836, "learning_rate": 7.880929527619742e-05, "loss": 46.0069, "step": 2300 }, { "epoch": 0.3141511365963547, "grad_norm": 0.17320580780506134, "learning_rate": 7.879152066492715e-05, "loss": 46.0123, "step": 2301 }, { "epoch": 0.31428766468700936, "grad_norm": 0.06952478736639023, "learning_rate": 7.877374060851915e-05, "loss": 46.0003, "step": 2302 }, { "epoch": 0.314424192777664, "grad_norm": 0.0605926476418972, "learning_rate": 7.875595511033606e-05, "loss": 46.0005, "step": 2303 }, { "epoch": 0.31456072086831866, "grad_norm": 0.06830646097660065, "learning_rate": 7.873816417374154e-05, "loss": 46.0049, "step": 2304 }, { "epoch": 0.31469724895897333, "grad_norm": 0.08110372722148895, "learning_rate": 7.872036780210026e-05, "loss": 46.0044, "step": 2305 }, { "epoch": 0.31483377704962795, "grad_norm": 0.03455398231744766, "learning_rate": 7.870256599877791e-05, "loss": 46.0034, "step": 2306 }, { "epoch": 0.3149703051402826, "grad_norm": 0.06936363130807877, "learning_rate": 7.868475876714123e-05, "loss": 46.0008, "step": 2307 }, { "epoch": 0.31510683323093724, "grad_norm": 0.06509093940258026, "learning_rate": 7.866694611055796e-05, "loss": 46.0074, "step": 2308 }, { "epoch": 0.3152433613215919, "grad_norm": 0.12767794728279114, "learning_rate": 7.86491280323969e-05, "loss": 46.002, "step": 2309 }, { "epoch": 0.3153798894122466, "grad_norm": 0.04930827394127846, "learning_rate": 7.863130453602786e-05, "loss": 46.0051, "step": 2310 }, { "epoch": 0.3155164175029012, "grad_norm": 0.09081713110208511, "learning_rate": 7.861347562482165e-05, "loss": 46.0029, "step": 2311 }, { "epoch": 0.3156529455935559, "grad_norm": 0.10908962041139603, "learning_rate": 7.859564130215015e-05, "loss": 46.0132, "step": 2312 }, { "epoch": 0.3157894736842105, "grad_norm": 0.12011969089508057, "learning_rate": 7.857780157138623e-05, "loss": 46.0022, "step": 2313 }, { "epoch": 0.3159260017748652, "grad_norm": 0.08189690113067627, "learning_rate": 7.85599564359038e-05, "loss": 46.0132, "step": 2314 }, { "epoch": 0.3160625298655198, "grad_norm": 0.04245464876294136, "learning_rate": 7.854210589907778e-05, "loss": 46.0027, "step": 2315 }, { "epoch": 0.3161990579561745, "grad_norm": 0.06267411261796951, "learning_rate": 7.852424996428412e-05, "loss": 46.008, "step": 2316 }, { "epoch": 0.31633558604682915, "grad_norm": 0.058329347521066666, "learning_rate": 7.850638863489976e-05, "loss": 46.0073, "step": 2317 }, { "epoch": 0.3164721141374838, "grad_norm": 0.16938361525535583, "learning_rate": 7.848852191430272e-05, "loss": 46.0058, "step": 2318 }, { "epoch": 0.31660864222813845, "grad_norm": 0.0336732380092144, "learning_rate": 7.847064980587199e-05, "loss": 46.0079, "step": 2319 }, { "epoch": 0.31674517031879307, "grad_norm": 0.060919586569070816, "learning_rate": 7.845277231298761e-05, "loss": 46.0049, "step": 2320 }, { "epoch": 0.31688169840944774, "grad_norm": 0.05041394382715225, "learning_rate": 7.843488943903062e-05, "loss": 46.0091, "step": 2321 }, { "epoch": 0.3170182265001024, "grad_norm": 0.1137467548251152, "learning_rate": 7.841700118738308e-05, "loss": 46.0104, "step": 2322 }, { "epoch": 0.31715475459075704, "grad_norm": 0.042069945484399796, "learning_rate": 7.839910756142807e-05, "loss": 46.0026, "step": 2323 }, { "epoch": 0.3172912826814117, "grad_norm": 0.04367847740650177, "learning_rate": 7.838120856454968e-05, "loss": 46.0017, "step": 2324 }, { "epoch": 0.31742781077206633, "grad_norm": 0.10674586147069931, "learning_rate": 7.836330420013303e-05, "loss": 46.0092, "step": 2325 }, { "epoch": 0.317564338862721, "grad_norm": 0.042446643114089966, "learning_rate": 7.834539447156424e-05, "loss": 46.0026, "step": 2326 }, { "epoch": 0.3177008669533757, "grad_norm": 0.0322008952498436, "learning_rate": 7.832747938223044e-05, "loss": 46.0099, "step": 2327 }, { "epoch": 0.3178373950440303, "grad_norm": 0.0385475717484951, "learning_rate": 7.830955893551983e-05, "loss": 46.0049, "step": 2328 }, { "epoch": 0.317973923134685, "grad_norm": 0.08432316035032272, "learning_rate": 7.829163313482151e-05, "loss": 46.0042, "step": 2329 }, { "epoch": 0.3181104512253396, "grad_norm": 0.031665217131376266, "learning_rate": 7.827370198352573e-05, "loss": 46.0046, "step": 2330 }, { "epoch": 0.31824697931599427, "grad_norm": 0.06182726100087166, "learning_rate": 7.825576548502362e-05, "loss": 46.0017, "step": 2331 }, { "epoch": 0.3183835074066489, "grad_norm": 0.11776746809482574, "learning_rate": 7.823782364270743e-05, "loss": 46.0103, "step": 2332 }, { "epoch": 0.31852003549730357, "grad_norm": 0.05878999084234238, "learning_rate": 7.821987645997036e-05, "loss": 46.0018, "step": 2333 }, { "epoch": 0.31865656358795824, "grad_norm": 0.032820601016283035, "learning_rate": 7.82019239402066e-05, "loss": 46.0033, "step": 2334 }, { "epoch": 0.31879309167861286, "grad_norm": 0.053745996206998825, "learning_rate": 7.818396608681144e-05, "loss": 46.003, "step": 2335 }, { "epoch": 0.31892961976926754, "grad_norm": 0.04527346044778824, "learning_rate": 7.81660029031811e-05, "loss": 46.0049, "step": 2336 }, { "epoch": 0.31906614785992216, "grad_norm": 0.057261619716882706, "learning_rate": 7.814803439271284e-05, "loss": 46.003, "step": 2337 }, { "epoch": 0.31920267595057683, "grad_norm": 0.03927910327911377, "learning_rate": 7.813006055880488e-05, "loss": 46.003, "step": 2338 }, { "epoch": 0.3193392040412315, "grad_norm": 0.03573683276772499, "learning_rate": 7.811208140485653e-05, "loss": 46.0099, "step": 2339 }, { "epoch": 0.3194757321318861, "grad_norm": 0.053848981857299805, "learning_rate": 7.809409693426803e-05, "loss": 46.0052, "step": 2340 }, { "epoch": 0.3196122602225408, "grad_norm": 0.13740186393260956, "learning_rate": 7.807610715044066e-05, "loss": 46.0112, "step": 2341 }, { "epoch": 0.3197487883131954, "grad_norm": 0.0335574708878994, "learning_rate": 7.805811205677673e-05, "loss": 46.0, "step": 2342 }, { "epoch": 0.3198853164038501, "grad_norm": 0.03292257711291313, "learning_rate": 7.80401116566795e-05, "loss": 46.009, "step": 2343 }, { "epoch": 0.32002184449450477, "grad_norm": 0.0895957201719284, "learning_rate": 7.802210595355326e-05, "loss": 46.0029, "step": 2344 }, { "epoch": 0.3201583725851594, "grad_norm": 0.0580277144908905, "learning_rate": 7.80040949508033e-05, "loss": 46.0013, "step": 2345 }, { "epoch": 0.32029490067581406, "grad_norm": 0.13071410357952118, "learning_rate": 7.798607865183592e-05, "loss": 46.0047, "step": 2346 }, { "epoch": 0.3204314287664687, "grad_norm": 0.04337235540151596, "learning_rate": 7.796805706005843e-05, "loss": 46.0009, "step": 2347 }, { "epoch": 0.32056795685712336, "grad_norm": 0.04333433136343956, "learning_rate": 7.79500301788791e-05, "loss": 46.0057, "step": 2348 }, { "epoch": 0.320704484947778, "grad_norm": 0.19528566300868988, "learning_rate": 7.793199801170724e-05, "loss": 46.0074, "step": 2349 }, { "epoch": 0.32084101303843265, "grad_norm": 0.4017755091190338, "learning_rate": 7.791396056195315e-05, "loss": 46.0031, "step": 2350 }, { "epoch": 0.32097754112908733, "grad_norm": 0.06867045909166336, "learning_rate": 7.789591783302813e-05, "loss": 46.0031, "step": 2351 }, { "epoch": 0.32111406921974195, "grad_norm": 0.08327417820692062, "learning_rate": 7.787786982834444e-05, "loss": 46.0025, "step": 2352 }, { "epoch": 0.3212505973103966, "grad_norm": 0.026355475187301636, "learning_rate": 7.785981655131543e-05, "loss": 46.0076, "step": 2353 }, { "epoch": 0.32138712540105124, "grad_norm": 0.04928196966648102, "learning_rate": 7.784175800535534e-05, "loss": 46.0048, "step": 2354 }, { "epoch": 0.3215236534917059, "grad_norm": 0.05744294449687004, "learning_rate": 7.78236941938795e-05, "loss": 46.0016, "step": 2355 }, { "epoch": 0.3216601815823606, "grad_norm": 0.05622630938887596, "learning_rate": 7.780562512030413e-05, "loss": 46.0054, "step": 2356 }, { "epoch": 0.3217967096730152, "grad_norm": 0.046670977026224136, "learning_rate": 7.778755078804657e-05, "loss": 46.004, "step": 2357 }, { "epoch": 0.3219332377636699, "grad_norm": 0.07489047199487686, "learning_rate": 7.776947120052504e-05, "loss": 46.0086, "step": 2358 }, { "epoch": 0.3220697658543245, "grad_norm": 0.053108666092157364, "learning_rate": 7.775138636115884e-05, "loss": 46.0033, "step": 2359 }, { "epoch": 0.3222062939449792, "grad_norm": 0.07008488476276398, "learning_rate": 7.773329627336823e-05, "loss": 46.0027, "step": 2360 }, { "epoch": 0.32234282203563386, "grad_norm": 0.08452637493610382, "learning_rate": 7.771520094057443e-05, "loss": 46.0054, "step": 2361 }, { "epoch": 0.3224793501262885, "grad_norm": 0.04944227635860443, "learning_rate": 7.769710036619971e-05, "loss": 46.0056, "step": 2362 }, { "epoch": 0.32261587821694315, "grad_norm": 0.052133649587631226, "learning_rate": 7.76789945536673e-05, "loss": 46.0106, "step": 2363 }, { "epoch": 0.3227524063075978, "grad_norm": 0.06824846565723419, "learning_rate": 7.76608835064014e-05, "loss": 46.0031, "step": 2364 }, { "epoch": 0.32288893439825245, "grad_norm": 0.11251381784677505, "learning_rate": 7.764276722782726e-05, "loss": 46.0074, "step": 2365 }, { "epoch": 0.32302546248890707, "grad_norm": 0.05627238005399704, "learning_rate": 7.762464572137107e-05, "loss": 46.0136, "step": 2366 }, { "epoch": 0.32316199057956174, "grad_norm": 0.04113422706723213, "learning_rate": 7.760651899046003e-05, "loss": 46.0042, "step": 2367 }, { "epoch": 0.3232985186702164, "grad_norm": 0.10562483966350555, "learning_rate": 7.75883870385223e-05, "loss": 46.002, "step": 2368 }, { "epoch": 0.32343504676087104, "grad_norm": 0.052782632410526276, "learning_rate": 7.757024986898706e-05, "loss": 46.0054, "step": 2369 }, { "epoch": 0.3235715748515257, "grad_norm": 0.03351743146777153, "learning_rate": 7.755210748528449e-05, "loss": 46.0012, "step": 2370 }, { "epoch": 0.32370810294218033, "grad_norm": 0.061609722673892975, "learning_rate": 7.753395989084569e-05, "loss": 46.0028, "step": 2371 }, { "epoch": 0.323844631032835, "grad_norm": 0.12369406968355179, "learning_rate": 7.75158070891028e-05, "loss": 46.0048, "step": 2372 }, { "epoch": 0.3239811591234897, "grad_norm": 0.09524232149124146, "learning_rate": 7.749764908348895e-05, "loss": 46.0098, "step": 2373 }, { "epoch": 0.3241176872141443, "grad_norm": 0.11783839017152786, "learning_rate": 7.747948587743823e-05, "loss": 46.0081, "step": 2374 }, { "epoch": 0.324254215304799, "grad_norm": 0.05258028581738472, "learning_rate": 7.74613174743857e-05, "loss": 46.0033, "step": 2375 }, { "epoch": 0.3243907433954536, "grad_norm": 0.18624091148376465, "learning_rate": 7.744314387776743e-05, "loss": 46.0043, "step": 2376 }, { "epoch": 0.32452727148610827, "grad_norm": 0.13209089636802673, "learning_rate": 7.742496509102047e-05, "loss": 46.0058, "step": 2377 }, { "epoch": 0.32466379957676295, "grad_norm": 0.09499132633209229, "learning_rate": 7.740678111758284e-05, "loss": 46.0056, "step": 2378 }, { "epoch": 0.32480032766741757, "grad_norm": 0.0513133779168129, "learning_rate": 7.738859196089358e-05, "loss": 46.0048, "step": 2379 }, { "epoch": 0.32493685575807224, "grad_norm": 0.05496421828866005, "learning_rate": 7.737039762439263e-05, "loss": 46.0077, "step": 2380 }, { "epoch": 0.32507338384872686, "grad_norm": 0.03104402869939804, "learning_rate": 7.735219811152098e-05, "loss": 46.0073, "step": 2381 }, { "epoch": 0.32520991193938154, "grad_norm": 0.04970036447048187, "learning_rate": 7.733399342572056e-05, "loss": 46.0126, "step": 2382 }, { "epoch": 0.32534644003003615, "grad_norm": 0.05049006640911102, "learning_rate": 7.731578357043431e-05, "loss": 46.0056, "step": 2383 }, { "epoch": 0.32548296812069083, "grad_norm": 0.050248824059963226, "learning_rate": 7.729756854910615e-05, "loss": 46.0055, "step": 2384 }, { "epoch": 0.3256194962113455, "grad_norm": 0.1270340234041214, "learning_rate": 7.727934836518092e-05, "loss": 46.0039, "step": 2385 }, { "epoch": 0.3257560243020001, "grad_norm": 0.04722575098276138, "learning_rate": 7.726112302210448e-05, "loss": 46.0074, "step": 2386 }, { "epoch": 0.3258925523926548, "grad_norm": 0.051176879554986954, "learning_rate": 7.724289252332368e-05, "loss": 46.0065, "step": 2387 }, { "epoch": 0.3260290804833094, "grad_norm": 0.030080856755375862, "learning_rate": 7.722465687228633e-05, "loss": 46.0088, "step": 2388 }, { "epoch": 0.3261656085739641, "grad_norm": 0.041067443788051605, "learning_rate": 7.72064160724412e-05, "loss": 46.0063, "step": 2389 }, { "epoch": 0.32630213666461877, "grad_norm": 0.10000106692314148, "learning_rate": 7.718817012723803e-05, "loss": 46.0077, "step": 2390 }, { "epoch": 0.3264386647552734, "grad_norm": 0.03864217549562454, "learning_rate": 7.716991904012756e-05, "loss": 46.0007, "step": 2391 }, { "epoch": 0.32657519284592806, "grad_norm": 0.15792548656463623, "learning_rate": 7.71516628145615e-05, "loss": 46.0003, "step": 2392 }, { "epoch": 0.3267117209365827, "grad_norm": 0.12320960313081741, "learning_rate": 7.713340145399251e-05, "loss": 46.0077, "step": 2393 }, { "epoch": 0.32684824902723736, "grad_norm": 0.11252971738576889, "learning_rate": 7.711513496187422e-05, "loss": 46.007, "step": 2394 }, { "epoch": 0.32698477711789203, "grad_norm": 0.18415217101573944, "learning_rate": 7.709686334166127e-05, "loss": 46.0064, "step": 2395 }, { "epoch": 0.32712130520854665, "grad_norm": 0.3024270236492157, "learning_rate": 7.707858659680924e-05, "loss": 46.0062, "step": 2396 }, { "epoch": 0.32725783329920133, "grad_norm": 0.07900864630937576, "learning_rate": 7.706030473077466e-05, "loss": 46.0003, "step": 2397 }, { "epoch": 0.32739436138985595, "grad_norm": 0.15733203291893005, "learning_rate": 7.704201774701505e-05, "loss": 46.0018, "step": 2398 }, { "epoch": 0.3275308894805106, "grad_norm": 0.06569360941648483, "learning_rate": 7.702372564898892e-05, "loss": 46.0024, "step": 2399 }, { "epoch": 0.32766741757116524, "grad_norm": 0.25557464361190796, "learning_rate": 7.700542844015572e-05, "loss": 46.0013, "step": 2400 }, { "epoch": 0.3278039456618199, "grad_norm": 0.12120918929576874, "learning_rate": 7.698712612397589e-05, "loss": 46.0102, "step": 2401 }, { "epoch": 0.3279404737524746, "grad_norm": 0.030923593789339066, "learning_rate": 7.696881870391078e-05, "loss": 46.0038, "step": 2402 }, { "epoch": 0.3280770018431292, "grad_norm": 0.05471885949373245, "learning_rate": 7.695050618342276e-05, "loss": 46.0056, "step": 2403 }, { "epoch": 0.3282135299337839, "grad_norm": 0.08186311274766922, "learning_rate": 7.693218856597515e-05, "loss": 46.0055, "step": 2404 }, { "epoch": 0.3283500580244385, "grad_norm": 0.03972172737121582, "learning_rate": 7.691386585503223e-05, "loss": 46.0066, "step": 2405 }, { "epoch": 0.3284865861150932, "grad_norm": 0.041294973343610764, "learning_rate": 7.689553805405925e-05, "loss": 46.0059, "step": 2406 }, { "epoch": 0.32862311420574786, "grad_norm": 0.0609920471906662, "learning_rate": 7.687720516652242e-05, "loss": 46.0035, "step": 2407 }, { "epoch": 0.3287596422964025, "grad_norm": 0.08375613391399384, "learning_rate": 7.685886719588891e-05, "loss": 46.001, "step": 2408 }, { "epoch": 0.32889617038705715, "grad_norm": 0.023319905623793602, "learning_rate": 7.684052414562683e-05, "loss": 46.0073, "step": 2409 }, { "epoch": 0.32903269847771177, "grad_norm": 0.06012373045086861, "learning_rate": 7.682217601920529e-05, "loss": 46.0032, "step": 2410 }, { "epoch": 0.32916922656836645, "grad_norm": 0.07107339054346085, "learning_rate": 7.680382282009436e-05, "loss": 46.0134, "step": 2411 }, { "epoch": 0.32930575465902107, "grad_norm": 0.07095666229724884, "learning_rate": 7.678546455176501e-05, "loss": 46.0029, "step": 2412 }, { "epoch": 0.32944228274967574, "grad_norm": 0.12113150954246521, "learning_rate": 7.676710121768924e-05, "loss": 46.0048, "step": 2413 }, { "epoch": 0.3295788108403304, "grad_norm": 0.049721136689186096, "learning_rate": 7.674873282133996e-05, "loss": 46.0062, "step": 2414 }, { "epoch": 0.32971533893098504, "grad_norm": 0.04285869374871254, "learning_rate": 7.673035936619108e-05, "loss": 46.0064, "step": 2415 }, { "epoch": 0.3298518670216397, "grad_norm": 0.06029954180121422, "learning_rate": 7.671198085571744e-05, "loss": 46.0086, "step": 2416 }, { "epoch": 0.32998839511229433, "grad_norm": 0.07143882662057877, "learning_rate": 7.66935972933948e-05, "loss": 46.0078, "step": 2417 }, { "epoch": 0.330124923202949, "grad_norm": 0.07925109565258026, "learning_rate": 7.667520868269995e-05, "loss": 46.0075, "step": 2418 }, { "epoch": 0.3302614512936037, "grad_norm": 0.04047425463795662, "learning_rate": 7.665681502711059e-05, "loss": 46.0084, "step": 2419 }, { "epoch": 0.3303979793842583, "grad_norm": 0.06191015616059303, "learning_rate": 7.663841633010538e-05, "loss": 46.0025, "step": 2420 }, { "epoch": 0.330534507474913, "grad_norm": 0.08631505817174911, "learning_rate": 7.662001259516393e-05, "loss": 46.0029, "step": 2421 }, { "epoch": 0.3306710355655676, "grad_norm": 0.05107804760336876, "learning_rate": 7.660160382576683e-05, "loss": 46.0096, "step": 2422 }, { "epoch": 0.33080756365622227, "grad_norm": 0.046317972242832184, "learning_rate": 7.658319002539557e-05, "loss": 46.0107, "step": 2423 }, { "epoch": 0.33094409174687694, "grad_norm": 0.086350217461586, "learning_rate": 7.656477119753267e-05, "loss": 46.0076, "step": 2424 }, { "epoch": 0.33108061983753156, "grad_norm": 0.05942961201071739, "learning_rate": 7.65463473456615e-05, "loss": 46.0083, "step": 2425 }, { "epoch": 0.33121714792818624, "grad_norm": 0.04219512268900871, "learning_rate": 7.652791847326647e-05, "loss": 46.0008, "step": 2426 }, { "epoch": 0.33135367601884086, "grad_norm": 0.07253344357013702, "learning_rate": 7.650948458383288e-05, "loss": 46.004, "step": 2427 }, { "epoch": 0.33149020410949553, "grad_norm": 0.1761104315519333, "learning_rate": 7.649104568084701e-05, "loss": 46.0127, "step": 2428 }, { "epoch": 0.33162673220015015, "grad_norm": 0.08891978114843369, "learning_rate": 7.647260176779608e-05, "loss": 46.0109, "step": 2429 }, { "epoch": 0.33176326029080483, "grad_norm": 0.058492422103881836, "learning_rate": 7.645415284816828e-05, "loss": 46.0114, "step": 2430 }, { "epoch": 0.3318997883814595, "grad_norm": 0.05890268832445145, "learning_rate": 7.643569892545267e-05, "loss": 46.0098, "step": 2431 }, { "epoch": 0.3320363164721141, "grad_norm": 0.13941484689712524, "learning_rate": 7.641724000313936e-05, "loss": 46.0051, "step": 2432 }, { "epoch": 0.3321728445627688, "grad_norm": 0.12504573166370392, "learning_rate": 7.639877608471935e-05, "loss": 46.01, "step": 2433 }, { "epoch": 0.3323093726534234, "grad_norm": 0.033859118819236755, "learning_rate": 7.638030717368457e-05, "loss": 46.0028, "step": 2434 }, { "epoch": 0.3324459007440781, "grad_norm": 0.044842835515737534, "learning_rate": 7.636183327352793e-05, "loss": 46.0006, "step": 2435 }, { "epoch": 0.33258242883473277, "grad_norm": 0.11034370958805084, "learning_rate": 7.634335438774324e-05, "loss": 46.005, "step": 2436 }, { "epoch": 0.3327189569253874, "grad_norm": 0.0533994659781456, "learning_rate": 7.632487051982534e-05, "loss": 46.0044, "step": 2437 }, { "epoch": 0.33285548501604206, "grad_norm": 0.08569423109292984, "learning_rate": 7.63063816732699e-05, "loss": 46.0046, "step": 2438 }, { "epoch": 0.3329920131066967, "grad_norm": 0.17276008427143097, "learning_rate": 7.628788785157363e-05, "loss": 46.0016, "step": 2439 }, { "epoch": 0.33312854119735136, "grad_norm": 0.03524738922715187, "learning_rate": 7.626938905823409e-05, "loss": 46.0046, "step": 2440 }, { "epoch": 0.33326506928800603, "grad_norm": 0.03884146735072136, "learning_rate": 7.625088529674986e-05, "loss": 46.0112, "step": 2441 }, { "epoch": 0.33340159737866065, "grad_norm": 0.027518216520547867, "learning_rate": 7.623237657062041e-05, "loss": 46.009, "step": 2442 }, { "epoch": 0.3335381254693153, "grad_norm": 0.03921595960855484, "learning_rate": 7.621386288334618e-05, "loss": 46.0025, "step": 2443 }, { "epoch": 0.33367465355996995, "grad_norm": 0.06874629110097885, "learning_rate": 7.619534423842852e-05, "loss": 46.0038, "step": 2444 }, { "epoch": 0.3338111816506246, "grad_norm": 0.11401580274105072, "learning_rate": 7.61768206393697e-05, "loss": 46.0075, "step": 2445 }, { "epoch": 0.33394770974127924, "grad_norm": 0.0723903626203537, "learning_rate": 7.615829208967303e-05, "loss": 46.0034, "step": 2446 }, { "epoch": 0.3340842378319339, "grad_norm": 0.09895554184913635, "learning_rate": 7.613975859284263e-05, "loss": 46.0071, "step": 2447 }, { "epoch": 0.3342207659225886, "grad_norm": 0.08224520832300186, "learning_rate": 7.612122015238364e-05, "loss": 46.0128, "step": 2448 }, { "epoch": 0.3343572940132432, "grad_norm": 0.11856339871883392, "learning_rate": 7.610267677180208e-05, "loss": 46.0164, "step": 2449 }, { "epoch": 0.3344938221038979, "grad_norm": 0.06712093949317932, "learning_rate": 7.608412845460496e-05, "loss": 46.0033, "step": 2450 }, { "epoch": 0.3346303501945525, "grad_norm": 0.12051035463809967, "learning_rate": 7.606557520430015e-05, "loss": 46.0056, "step": 2451 }, { "epoch": 0.3347668782852072, "grad_norm": 0.04672810807824135, "learning_rate": 7.604701702439651e-05, "loss": 46.0071, "step": 2452 }, { "epoch": 0.33490340637586186, "grad_norm": 0.04719126969575882, "learning_rate": 7.602845391840385e-05, "loss": 46.0067, "step": 2453 }, { "epoch": 0.3350399344665165, "grad_norm": 0.05666414275765419, "learning_rate": 7.600988588983284e-05, "loss": 46.0015, "step": 2454 }, { "epoch": 0.33517646255717115, "grad_norm": 0.05731857195496559, "learning_rate": 7.599131294219516e-05, "loss": 46.0028, "step": 2455 }, { "epoch": 0.33531299064782577, "grad_norm": 0.03351934626698494, "learning_rate": 7.597273507900335e-05, "loss": 46.0037, "step": 2456 }, { "epoch": 0.33544951873848045, "grad_norm": 0.1454591453075409, "learning_rate": 7.59541523037709e-05, "loss": 46.0048, "step": 2457 }, { "epoch": 0.3355860468291351, "grad_norm": 0.10389339178800583, "learning_rate": 7.593556462001228e-05, "loss": 46.0054, "step": 2458 }, { "epoch": 0.33572257491978974, "grad_norm": 0.08865857124328613, "learning_rate": 7.591697203124279e-05, "loss": 46.0047, "step": 2459 }, { "epoch": 0.3358591030104444, "grad_norm": 0.061848778277635574, "learning_rate": 7.58983745409788e-05, "loss": 46.0066, "step": 2460 }, { "epoch": 0.33599563110109903, "grad_norm": 0.08364047855138779, "learning_rate": 7.587977215273744e-05, "loss": 46.0121, "step": 2461 }, { "epoch": 0.3361321591917537, "grad_norm": 0.07265309989452362, "learning_rate": 7.586116487003689e-05, "loss": 46.0022, "step": 2462 }, { "epoch": 0.33626868728240833, "grad_norm": 0.06864363700151443, "learning_rate": 7.584255269639621e-05, "loss": 46.0009, "step": 2463 }, { "epoch": 0.336405215373063, "grad_norm": 0.07863830029964447, "learning_rate": 7.58239356353354e-05, "loss": 46.0103, "step": 2464 }, { "epoch": 0.3365417434637177, "grad_norm": 0.1610105037689209, "learning_rate": 7.580531369037533e-05, "loss": 46.0099, "step": 2465 }, { "epoch": 0.3366782715543723, "grad_norm": 0.052166350185871124, "learning_rate": 7.578668686503789e-05, "loss": 46.0014, "step": 2466 }, { "epoch": 0.336814799645027, "grad_norm": 0.13888821005821228, "learning_rate": 7.57680551628458e-05, "loss": 46.0069, "step": 2467 }, { "epoch": 0.3369513277356816, "grad_norm": 0.19699837267398834, "learning_rate": 7.574941858732279e-05, "loss": 46.0022, "step": 2468 }, { "epoch": 0.33708785582633627, "grad_norm": 0.1387547105550766, "learning_rate": 7.573077714199341e-05, "loss": 46.0077, "step": 2469 }, { "epoch": 0.33722438391699094, "grad_norm": 0.06877775490283966, "learning_rate": 7.571213083038323e-05, "loss": 46.0133, "step": 2470 }, { "epoch": 0.33736091200764556, "grad_norm": 0.12322141975164413, "learning_rate": 7.569347965601865e-05, "loss": 46.0067, "step": 2471 }, { "epoch": 0.33749744009830024, "grad_norm": 0.12219001352787018, "learning_rate": 7.567482362242708e-05, "loss": 46.0017, "step": 2472 }, { "epoch": 0.33763396818895486, "grad_norm": 0.08074057102203369, "learning_rate": 7.565616273313678e-05, "loss": 46.0001, "step": 2473 }, { "epoch": 0.33777049627960953, "grad_norm": 0.05854204297065735, "learning_rate": 7.563749699167695e-05, "loss": 46.0029, "step": 2474 }, { "epoch": 0.3379070243702642, "grad_norm": 0.11666933447122574, "learning_rate": 7.561882640157771e-05, "loss": 46.0058, "step": 2475 }, { "epoch": 0.33804355246091883, "grad_norm": 0.041636597365140915, "learning_rate": 7.560015096637014e-05, "loss": 46.0052, "step": 2476 }, { "epoch": 0.3381800805515735, "grad_norm": 0.05266483873128891, "learning_rate": 7.558147068958614e-05, "loss": 46.0056, "step": 2477 }, { "epoch": 0.3383166086422281, "grad_norm": 0.06649995595216751, "learning_rate": 7.55627855747586e-05, "loss": 46.0089, "step": 2478 }, { "epoch": 0.3384531367328828, "grad_norm": 0.04397900775074959, "learning_rate": 7.554409562542131e-05, "loss": 46.0079, "step": 2479 }, { "epoch": 0.3385896648235374, "grad_norm": 0.06896942108869553, "learning_rate": 7.552540084510896e-05, "loss": 46.0073, "step": 2480 }, { "epoch": 0.3387261929141921, "grad_norm": 0.06206410378217697, "learning_rate": 7.550670123735714e-05, "loss": 46.0132, "step": 2481 }, { "epoch": 0.33886272100484677, "grad_norm": 0.03279475122690201, "learning_rate": 7.548799680570244e-05, "loss": 46.0054, "step": 2482 }, { "epoch": 0.3389992490955014, "grad_norm": 0.10599425435066223, "learning_rate": 7.546928755368223e-05, "loss": 46.0061, "step": 2483 }, { "epoch": 0.33913577718615606, "grad_norm": 0.03954022750258446, "learning_rate": 7.54505734848349e-05, "loss": 46.0024, "step": 2484 }, { "epoch": 0.3392723052768107, "grad_norm": 0.044845789670944214, "learning_rate": 7.543185460269969e-05, "loss": 46.0025, "step": 2485 }, { "epoch": 0.33940883336746536, "grad_norm": 0.1371038854122162, "learning_rate": 7.541313091081676e-05, "loss": 46.0041, "step": 2486 }, { "epoch": 0.33954536145812003, "grad_norm": 0.03343932330608368, "learning_rate": 7.539440241272724e-05, "loss": 46.0056, "step": 2487 }, { "epoch": 0.33968188954877465, "grad_norm": 0.0428137369453907, "learning_rate": 7.537566911197307e-05, "loss": 46.0036, "step": 2488 }, { "epoch": 0.3398184176394293, "grad_norm": 0.14479891955852509, "learning_rate": 7.535693101209716e-05, "loss": 46.0029, "step": 2489 }, { "epoch": 0.33995494573008395, "grad_norm": 0.05708535760641098, "learning_rate": 7.533818811664333e-05, "loss": 46.0006, "step": 2490 }, { "epoch": 0.3400914738207386, "grad_norm": 0.03681248426437378, "learning_rate": 7.531944042915625e-05, "loss": 46.0017, "step": 2491 }, { "epoch": 0.3402280019113933, "grad_norm": 0.044063739478588104, "learning_rate": 7.530068795318159e-05, "loss": 46.0027, "step": 2492 }, { "epoch": 0.3403645300020479, "grad_norm": 0.05756054073572159, "learning_rate": 7.528193069226586e-05, "loss": 46.0112, "step": 2493 }, { "epoch": 0.3405010580927026, "grad_norm": 0.09396126866340637, "learning_rate": 7.526316864995647e-05, "loss": 46.0107, "step": 2494 }, { "epoch": 0.3406375861833572, "grad_norm": 0.08261001855134964, "learning_rate": 7.524440182980176e-05, "loss": 46.002, "step": 2495 }, { "epoch": 0.3407741142740119, "grad_norm": 0.2134465128183365, "learning_rate": 7.522563023535098e-05, "loss": 46.0022, "step": 2496 }, { "epoch": 0.3409106423646665, "grad_norm": 0.10516143590211868, "learning_rate": 7.520685387015425e-05, "loss": 46.0048, "step": 2497 }, { "epoch": 0.3410471704553212, "grad_norm": 0.17966994643211365, "learning_rate": 7.518807273776263e-05, "loss": 46.0026, "step": 2498 }, { "epoch": 0.34118369854597586, "grad_norm": 0.11630476266145706, "learning_rate": 7.516928684172806e-05, "loss": 46.014, "step": 2499 }, { "epoch": 0.3413202266366305, "grad_norm": 0.23527869582176208, "learning_rate": 7.515049618560337e-05, "loss": 46.0014, "step": 2500 }, { "epoch": 0.34145675472728515, "grad_norm": 0.03661562129855156, "learning_rate": 7.513170077294232e-05, "loss": 46.0026, "step": 2501 }, { "epoch": 0.34159328281793977, "grad_norm": 0.06880245357751846, "learning_rate": 7.511290060729955e-05, "loss": 46.004, "step": 2502 }, { "epoch": 0.34172981090859444, "grad_norm": 0.09073877334594727, "learning_rate": 7.50940956922306e-05, "loss": 46.0065, "step": 2503 }, { "epoch": 0.3418663389992491, "grad_norm": 0.04188260808587074, "learning_rate": 7.507528603129191e-05, "loss": 46.0025, "step": 2504 }, { "epoch": 0.34200286708990374, "grad_norm": 0.07319718599319458, "learning_rate": 7.505647162804083e-05, "loss": 46.0019, "step": 2505 }, { "epoch": 0.3421393951805584, "grad_norm": 0.04289955645799637, "learning_rate": 7.503765248603559e-05, "loss": 46.0054, "step": 2506 }, { "epoch": 0.34227592327121303, "grad_norm": 0.05238354578614235, "learning_rate": 7.501882860883529e-05, "loss": 46.0047, "step": 2507 }, { "epoch": 0.3424124513618677, "grad_norm": 0.0686480849981308, "learning_rate": 7.500000000000001e-05, "loss": 46.013, "step": 2508 }, { "epoch": 0.3425489794525224, "grad_norm": 0.04991857334971428, "learning_rate": 7.498116666309064e-05, "loss": 46.0033, "step": 2509 }, { "epoch": 0.342685507543177, "grad_norm": 0.07894279062747955, "learning_rate": 7.4962328601669e-05, "loss": 46.004, "step": 2510 }, { "epoch": 0.3428220356338317, "grad_norm": 0.052177030593156815, "learning_rate": 7.494348581929783e-05, "loss": 46.0088, "step": 2511 }, { "epoch": 0.3429585637244863, "grad_norm": 0.06629771739244461, "learning_rate": 7.492463831954069e-05, "loss": 46.0089, "step": 2512 }, { "epoch": 0.343095091815141, "grad_norm": 0.06857644766569138, "learning_rate": 7.490578610596208e-05, "loss": 46.0035, "step": 2513 }, { "epoch": 0.3432316199057956, "grad_norm": 0.03178929537534714, "learning_rate": 7.488692918212743e-05, "loss": 46.0076, "step": 2514 }, { "epoch": 0.34336814799645027, "grad_norm": 0.029100922867655754, "learning_rate": 7.486806755160297e-05, "loss": 46.0059, "step": 2515 }, { "epoch": 0.34350467608710494, "grad_norm": 0.04681701958179474, "learning_rate": 7.484920121795589e-05, "loss": 46.0016, "step": 2516 }, { "epoch": 0.34364120417775956, "grad_norm": 0.046284813433885574, "learning_rate": 7.483033018475424e-05, "loss": 46.0003, "step": 2517 }, { "epoch": 0.34377773226841424, "grad_norm": 0.08544912189245224, "learning_rate": 7.481145445556697e-05, "loss": 46.007, "step": 2518 }, { "epoch": 0.34391426035906886, "grad_norm": 0.0348338820040226, "learning_rate": 7.47925740339639e-05, "loss": 46.0063, "step": 2519 }, { "epoch": 0.34405078844972353, "grad_norm": 0.031770579516887665, "learning_rate": 7.477368892351577e-05, "loss": 46.0103, "step": 2520 }, { "epoch": 0.3441873165403782, "grad_norm": 0.08082900941371918, "learning_rate": 7.475479912779418e-05, "loss": 46.0062, "step": 2521 }, { "epoch": 0.3443238446310328, "grad_norm": 0.06575899571180344, "learning_rate": 7.473590465037163e-05, "loss": 46.0031, "step": 2522 }, { "epoch": 0.3444603727216875, "grad_norm": 0.09333646297454834, "learning_rate": 7.471700549482149e-05, "loss": 46.0114, "step": 2523 }, { "epoch": 0.3445969008123421, "grad_norm": 0.08236744999885559, "learning_rate": 7.469810166471802e-05, "loss": 46.008, "step": 2524 }, { "epoch": 0.3447334289029968, "grad_norm": 0.07366789877414703, "learning_rate": 7.467919316363638e-05, "loss": 46.0045, "step": 2525 }, { "epoch": 0.3448699569936514, "grad_norm": 0.06955844163894653, "learning_rate": 7.46602799951526e-05, "loss": 46.0003, "step": 2526 }, { "epoch": 0.3450064850843061, "grad_norm": 0.09324923157691956, "learning_rate": 7.46413621628436e-05, "loss": 46.0048, "step": 2527 }, { "epoch": 0.34514301317496077, "grad_norm": 0.03874450549483299, "learning_rate": 7.462243967028716e-05, "loss": 46.008, "step": 2528 }, { "epoch": 0.3452795412656154, "grad_norm": 0.07901052385568619, "learning_rate": 7.460351252106198e-05, "loss": 46.0097, "step": 2529 }, { "epoch": 0.34541606935627006, "grad_norm": 0.12711898982524872, "learning_rate": 7.458458071874759e-05, "loss": 46.0068, "step": 2530 }, { "epoch": 0.3455525974469247, "grad_norm": 0.05074112117290497, "learning_rate": 7.456564426692445e-05, "loss": 46.0028, "step": 2531 }, { "epoch": 0.34568912553757936, "grad_norm": 0.07914649695158005, "learning_rate": 7.454670316917388e-05, "loss": 46.001, "step": 2532 }, { "epoch": 0.34582565362823403, "grad_norm": 0.03099147230386734, "learning_rate": 7.452775742907805e-05, "loss": 46.003, "step": 2533 }, { "epoch": 0.34596218171888865, "grad_norm": 0.059430960565805435, "learning_rate": 7.450880705022007e-05, "loss": 46.0015, "step": 2534 }, { "epoch": 0.3460987098095433, "grad_norm": 0.07428580522537231, "learning_rate": 7.448985203618387e-05, "loss": 46.0013, "step": 2535 }, { "epoch": 0.34623523790019795, "grad_norm": 0.031961847096681595, "learning_rate": 7.447089239055428e-05, "loss": 46.0022, "step": 2536 }, { "epoch": 0.3463717659908526, "grad_norm": 0.04307635501027107, "learning_rate": 7.445192811691702e-05, "loss": 46.0077, "step": 2537 }, { "epoch": 0.3465082940815073, "grad_norm": 0.0474204495549202, "learning_rate": 7.443295921885865e-05, "loss": 46.0044, "step": 2538 }, { "epoch": 0.3466448221721619, "grad_norm": 0.06332392245531082, "learning_rate": 7.441398569996663e-05, "loss": 46.0046, "step": 2539 }, { "epoch": 0.3467813502628166, "grad_norm": 0.038991570472717285, "learning_rate": 7.439500756382929e-05, "loss": 46.0055, "step": 2540 }, { "epoch": 0.3469178783534712, "grad_norm": 0.11502372473478317, "learning_rate": 7.437602481403583e-05, "loss": 46.0138, "step": 2541 }, { "epoch": 0.3470544064441259, "grad_norm": 0.05362650752067566, "learning_rate": 7.435703745417631e-05, "loss": 46.0058, "step": 2542 }, { "epoch": 0.3471909345347805, "grad_norm": 0.0737246721982956, "learning_rate": 7.433804548784173e-05, "loss": 46.0015, "step": 2543 }, { "epoch": 0.3473274626254352, "grad_norm": 0.11727206408977509, "learning_rate": 7.431904891862382e-05, "loss": 46.0054, "step": 2544 }, { "epoch": 0.34746399071608985, "grad_norm": 0.19689880311489105, "learning_rate": 7.430004775011536e-05, "loss": 46.0114, "step": 2545 }, { "epoch": 0.3476005188067445, "grad_norm": 0.1463821828365326, "learning_rate": 7.428104198590984e-05, "loss": 46.002, "step": 2546 }, { "epoch": 0.34773704689739915, "grad_norm": 0.1599053293466568, "learning_rate": 7.426203162960172e-05, "loss": 46.0051, "step": 2547 }, { "epoch": 0.34787357498805377, "grad_norm": 0.12383975833654404, "learning_rate": 7.424301668478626e-05, "loss": 46.0047, "step": 2548 }, { "epoch": 0.34801010307870844, "grad_norm": 0.1794418841600418, "learning_rate": 7.422399715505967e-05, "loss": 46.0027, "step": 2549 }, { "epoch": 0.3481466311693631, "grad_norm": 0.09106852114200592, "learning_rate": 7.420497304401894e-05, "loss": 46.0, "step": 2550 }, { "epoch": 0.34828315926001774, "grad_norm": 0.050388094037771225, "learning_rate": 7.4185944355262e-05, "loss": 46.0125, "step": 2551 }, { "epoch": 0.3484196873506724, "grad_norm": 0.06737316399812698, "learning_rate": 7.416691109238758e-05, "loss": 46.0059, "step": 2552 }, { "epoch": 0.34855621544132703, "grad_norm": 0.1261681765317917, "learning_rate": 7.414787325899533e-05, "loss": 46.0027, "step": 2553 }, { "epoch": 0.3486927435319817, "grad_norm": 0.08611474186182022, "learning_rate": 7.412883085868573e-05, "loss": 46.0019, "step": 2554 }, { "epoch": 0.3488292716226364, "grad_norm": 0.07692024856805801, "learning_rate": 7.410978389506014e-05, "loss": 46.0063, "step": 2555 }, { "epoch": 0.348965799713291, "grad_norm": 0.050584759563207626, "learning_rate": 7.409073237172077e-05, "loss": 46.0073, "step": 2556 }, { "epoch": 0.3491023278039457, "grad_norm": 0.06698505580425262, "learning_rate": 7.407167629227072e-05, "loss": 46.0021, "step": 2557 }, { "epoch": 0.3492388558946003, "grad_norm": 0.047339461743831635, "learning_rate": 7.405261566031391e-05, "loss": 46.0051, "step": 2558 }, { "epoch": 0.349375383985255, "grad_norm": 0.048789892345666885, "learning_rate": 7.403355047945517e-05, "loss": 46.0008, "step": 2559 }, { "epoch": 0.3495119120759096, "grad_norm": 0.13539057970046997, "learning_rate": 7.401448075330011e-05, "loss": 46.0045, "step": 2560 }, { "epoch": 0.34964844016656427, "grad_norm": 0.0332566499710083, "learning_rate": 7.399540648545532e-05, "loss": 46.0075, "step": 2561 }, { "epoch": 0.34978496825721894, "grad_norm": 0.12015832960605621, "learning_rate": 7.397632767952814e-05, "loss": 46.0097, "step": 2562 }, { "epoch": 0.34992149634787356, "grad_norm": 0.0345270000398159, "learning_rate": 7.395724433912682e-05, "loss": 46.0138, "step": 2563 }, { "epoch": 0.35005802443852824, "grad_norm": 0.09587839245796204, "learning_rate": 7.393815646786046e-05, "loss": 46.0075, "step": 2564 }, { "epoch": 0.35019455252918286, "grad_norm": 0.0510622076690197, "learning_rate": 7.391906406933902e-05, "loss": 46.0046, "step": 2565 }, { "epoch": 0.35033108061983753, "grad_norm": 0.09663897007703781, "learning_rate": 7.389996714717329e-05, "loss": 46.0089, "step": 2566 }, { "epoch": 0.3504676087104922, "grad_norm": 0.11563468724489212, "learning_rate": 7.388086570497497e-05, "loss": 46.0064, "step": 2567 }, { "epoch": 0.3506041368011468, "grad_norm": 0.03634456917643547, "learning_rate": 7.386175974635657e-05, "loss": 46.0106, "step": 2568 }, { "epoch": 0.3507406648918015, "grad_norm": 0.10218404233455658, "learning_rate": 7.384264927493143e-05, "loss": 46.009, "step": 2569 }, { "epoch": 0.3508771929824561, "grad_norm": 0.051879528909921646, "learning_rate": 7.382353429431382e-05, "loss": 46.012, "step": 2570 }, { "epoch": 0.3510137210731108, "grad_norm": 0.13064716756343842, "learning_rate": 7.380441480811882e-05, "loss": 46.005, "step": 2571 }, { "epoch": 0.35115024916376547, "grad_norm": 0.10757128149271011, "learning_rate": 7.378529081996232e-05, "loss": 46.0078, "step": 2572 }, { "epoch": 0.3512867772544201, "grad_norm": 0.09254384785890579, "learning_rate": 7.376616233346119e-05, "loss": 46.0016, "step": 2573 }, { "epoch": 0.35142330534507477, "grad_norm": 0.05731824412941933, "learning_rate": 7.374702935223298e-05, "loss": 46.0058, "step": 2574 }, { "epoch": 0.3515598334357294, "grad_norm": 0.06030096486210823, "learning_rate": 7.372789187989622e-05, "loss": 46.004, "step": 2575 }, { "epoch": 0.35169636152638406, "grad_norm": 0.09936447441577911, "learning_rate": 7.370874992007024e-05, "loss": 46.0053, "step": 2576 }, { "epoch": 0.3518328896170387, "grad_norm": 0.047900546342134476, "learning_rate": 7.368960347637521e-05, "loss": 46.0023, "step": 2577 }, { "epoch": 0.35196941770769336, "grad_norm": 0.0434957854449749, "learning_rate": 7.367045255243216e-05, "loss": 46.0053, "step": 2578 }, { "epoch": 0.35210594579834803, "grad_norm": 0.0663352757692337, "learning_rate": 7.3651297151863e-05, "loss": 46.0052, "step": 2579 }, { "epoch": 0.35224247388900265, "grad_norm": 0.03944587707519531, "learning_rate": 7.363213727829045e-05, "loss": 46.0142, "step": 2580 }, { "epoch": 0.3523790019796573, "grad_norm": 0.04969996586441994, "learning_rate": 7.361297293533805e-05, "loss": 46.003, "step": 2581 }, { "epoch": 0.35251553007031194, "grad_norm": 0.10179700702428818, "learning_rate": 7.359380412663024e-05, "loss": 46.0034, "step": 2582 }, { "epoch": 0.3526520581609666, "grad_norm": 0.06169659644365311, "learning_rate": 7.35746308557923e-05, "loss": 46.0058, "step": 2583 }, { "epoch": 0.3527885862516213, "grad_norm": 0.04011627659201622, "learning_rate": 7.355545312645028e-05, "loss": 46.0014, "step": 2584 }, { "epoch": 0.3529251143422759, "grad_norm": 0.056108396500349045, "learning_rate": 7.35362709422312e-05, "loss": 46.0007, "step": 2585 }, { "epoch": 0.3530616424329306, "grad_norm": 0.08297691494226456, "learning_rate": 7.351708430676279e-05, "loss": 46.0063, "step": 2586 }, { "epoch": 0.3531981705235852, "grad_norm": 0.04847220331430435, "learning_rate": 7.349789322367373e-05, "loss": 46.0081, "step": 2587 }, { "epoch": 0.3533346986142399, "grad_norm": 0.08158842474222183, "learning_rate": 7.347869769659346e-05, "loss": 46.0028, "step": 2588 }, { "epoch": 0.35347122670489456, "grad_norm": 0.061446525156497955, "learning_rate": 7.345949772915232e-05, "loss": 46.0045, "step": 2589 }, { "epoch": 0.3536077547955492, "grad_norm": 0.11454685777425766, "learning_rate": 7.344029332498143e-05, "loss": 46.0009, "step": 2590 }, { "epoch": 0.35374428288620385, "grad_norm": 0.06195387244224548, "learning_rate": 7.342108448771283e-05, "loss": 46.0109, "step": 2591 }, { "epoch": 0.3538808109768585, "grad_norm": 0.1030483990907669, "learning_rate": 7.34018712209793e-05, "loss": 46.0092, "step": 2592 }, { "epoch": 0.35401733906751315, "grad_norm": 0.09282976388931274, "learning_rate": 7.338265352841457e-05, "loss": 46.0074, "step": 2593 }, { "epoch": 0.35415386715816777, "grad_norm": 0.1384211629629135, "learning_rate": 7.33634314136531e-05, "loss": 46.0094, "step": 2594 }, { "epoch": 0.35429039524882244, "grad_norm": 0.08021644502878189, "learning_rate": 7.334420488033025e-05, "loss": 46.0014, "step": 2595 }, { "epoch": 0.3544269233394771, "grad_norm": 0.1011502742767334, "learning_rate": 7.33249739320822e-05, "loss": 46.0051, "step": 2596 }, { "epoch": 0.35456345143013174, "grad_norm": 0.05961092188954353, "learning_rate": 7.330573857254598e-05, "loss": 46.0047, "step": 2597 }, { "epoch": 0.3546999795207864, "grad_norm": 0.21362051367759705, "learning_rate": 7.32864988053594e-05, "loss": 46.0032, "step": 2598 }, { "epoch": 0.35483650761144103, "grad_norm": 0.21173438429832458, "learning_rate": 7.326725463416117e-05, "loss": 46.0044, "step": 2599 }, { "epoch": 0.3549730357020957, "grad_norm": 0.1896902471780777, "learning_rate": 7.324800606259079e-05, "loss": 46.0081, "step": 2600 }, { "epoch": 0.3551095637927504, "grad_norm": 0.0659048780798912, "learning_rate": 7.322875309428863e-05, "loss": 46.0092, "step": 2601 }, { "epoch": 0.355246091883405, "grad_norm": 0.08934059739112854, "learning_rate": 7.320949573289584e-05, "loss": 46.0017, "step": 2602 }, { "epoch": 0.3553826199740597, "grad_norm": 0.05569083243608475, "learning_rate": 7.319023398205444e-05, "loss": 46.0051, "step": 2603 }, { "epoch": 0.3555191480647143, "grad_norm": 0.06217579171061516, "learning_rate": 7.317096784540729e-05, "loss": 46.0041, "step": 2604 }, { "epoch": 0.35565567615536897, "grad_norm": 0.06427934020757675, "learning_rate": 7.315169732659801e-05, "loss": 46.0064, "step": 2605 }, { "epoch": 0.35579220424602365, "grad_norm": 0.060564130544662476, "learning_rate": 7.313242242927115e-05, "loss": 46.0019, "step": 2606 }, { "epoch": 0.35592873233667827, "grad_norm": 0.032597921788692474, "learning_rate": 7.3113143157072e-05, "loss": 46.0008, "step": 2607 }, { "epoch": 0.35606526042733294, "grad_norm": 0.05207895115017891, "learning_rate": 7.309385951364671e-05, "loss": 46.0004, "step": 2608 }, { "epoch": 0.35620178851798756, "grad_norm": 0.0324837751686573, "learning_rate": 7.307457150264231e-05, "loss": 46.0032, "step": 2609 }, { "epoch": 0.35633831660864224, "grad_norm": 0.06152229383587837, "learning_rate": 7.305527912770656e-05, "loss": 46.0027, "step": 2610 }, { "epoch": 0.35647484469929686, "grad_norm": 0.04943689703941345, "learning_rate": 7.30359823924881e-05, "loss": 46.003, "step": 2611 }, { "epoch": 0.35661137278995153, "grad_norm": 0.0743120014667511, "learning_rate": 7.30166813006364e-05, "loss": 46.0072, "step": 2612 }, { "epoch": 0.3567479008806062, "grad_norm": 0.08020827919244766, "learning_rate": 7.29973758558017e-05, "loss": 46.0084, "step": 2613 }, { "epoch": 0.3568844289712608, "grad_norm": 0.07973899692296982, "learning_rate": 7.297806606163516e-05, "loss": 46.0084, "step": 2614 }, { "epoch": 0.3570209570619155, "grad_norm": 0.047957826405763626, "learning_rate": 7.295875192178866e-05, "loss": 46.0119, "step": 2615 }, { "epoch": 0.3571574851525701, "grad_norm": 0.03887777775526047, "learning_rate": 7.293943343991497e-05, "loss": 46.0054, "step": 2616 }, { "epoch": 0.3572940132432248, "grad_norm": 0.07024010270833969, "learning_rate": 7.292011061966765e-05, "loss": 46.0143, "step": 2617 }, { "epoch": 0.35743054133387947, "grad_norm": 0.0517759844660759, "learning_rate": 7.290078346470111e-05, "loss": 46.0111, "step": 2618 }, { "epoch": 0.3575670694245341, "grad_norm": 0.0683019682765007, "learning_rate": 7.288145197867052e-05, "loss": 46.0075, "step": 2619 }, { "epoch": 0.35770359751518876, "grad_norm": 0.23568561673164368, "learning_rate": 7.286211616523193e-05, "loss": 46.0073, "step": 2620 }, { "epoch": 0.3578401256058434, "grad_norm": 0.18810023367404938, "learning_rate": 7.284277602804219e-05, "loss": 46.0084, "step": 2621 }, { "epoch": 0.35797665369649806, "grad_norm": 0.07744306325912476, "learning_rate": 7.282343157075896e-05, "loss": 46.0133, "step": 2622 }, { "epoch": 0.3581131817871527, "grad_norm": 0.0675233006477356, "learning_rate": 7.280408279704071e-05, "loss": 46.0075, "step": 2623 }, { "epoch": 0.35824970987780735, "grad_norm": 0.0728139653801918, "learning_rate": 7.278472971054676e-05, "loss": 46.0015, "step": 2624 }, { "epoch": 0.35838623796846203, "grad_norm": 0.06776165962219238, "learning_rate": 7.276537231493721e-05, "loss": 46.0035, "step": 2625 }, { "epoch": 0.35852276605911665, "grad_norm": 0.26131442189216614, "learning_rate": 7.274601061387298e-05, "loss": 46.0056, "step": 2626 }, { "epoch": 0.3586592941497713, "grad_norm": 0.046106401830911636, "learning_rate": 7.272664461101582e-05, "loss": 46.0051, "step": 2627 }, { "epoch": 0.35879582224042594, "grad_norm": 0.0497218519449234, "learning_rate": 7.270727431002831e-05, "loss": 46.0041, "step": 2628 }, { "epoch": 0.3589323503310806, "grad_norm": 0.05877493694424629, "learning_rate": 7.268789971457376e-05, "loss": 46.0063, "step": 2629 }, { "epoch": 0.3590688784217353, "grad_norm": 0.04342946037650108, "learning_rate": 7.26685208283164e-05, "loss": 46.0076, "step": 2630 }, { "epoch": 0.3592054065123899, "grad_norm": 0.13190822303295135, "learning_rate": 7.264913765492121e-05, "loss": 46.001, "step": 2631 }, { "epoch": 0.3593419346030446, "grad_norm": 0.027610696852207184, "learning_rate": 7.2629750198054e-05, "loss": 46.0066, "step": 2632 }, { "epoch": 0.3594784626936992, "grad_norm": 0.11564499139785767, "learning_rate": 7.261035846138137e-05, "loss": 46.0005, "step": 2633 }, { "epoch": 0.3596149907843539, "grad_norm": 0.05402936041355133, "learning_rate": 7.259096244857075e-05, "loss": 46.0029, "step": 2634 }, { "epoch": 0.35975151887500856, "grad_norm": 0.056319646537303925, "learning_rate": 7.257156216329036e-05, "loss": 46.011, "step": 2635 }, { "epoch": 0.3598880469656632, "grad_norm": 0.05918372794985771, "learning_rate": 7.255215760920925e-05, "loss": 46.006, "step": 2636 }, { "epoch": 0.36002457505631785, "grad_norm": 0.0369877927005291, "learning_rate": 7.253274878999727e-05, "loss": 46.0071, "step": 2637 }, { "epoch": 0.36016110314697247, "grad_norm": 0.11715282499790192, "learning_rate": 7.251333570932507e-05, "loss": 46.0049, "step": 2638 }, { "epoch": 0.36029763123762715, "grad_norm": 0.03255638852715492, "learning_rate": 7.24939183708641e-05, "loss": 46.0074, "step": 2639 }, { "epoch": 0.36043415932828177, "grad_norm": 0.03968512639403343, "learning_rate": 7.247449677828664e-05, "loss": 46.0006, "step": 2640 }, { "epoch": 0.36057068741893644, "grad_norm": 0.07155174016952515, "learning_rate": 7.245507093526574e-05, "loss": 46.0026, "step": 2641 }, { "epoch": 0.3607072155095911, "grad_norm": 0.07613620162010193, "learning_rate": 7.243564084547531e-05, "loss": 46.0047, "step": 2642 }, { "epoch": 0.36084374360024574, "grad_norm": 0.032871320843696594, "learning_rate": 7.241620651258998e-05, "loss": 46.0078, "step": 2643 }, { "epoch": 0.3609802716909004, "grad_norm": 0.07620126754045486, "learning_rate": 7.239676794028526e-05, "loss": 46.0052, "step": 2644 }, { "epoch": 0.36111679978155503, "grad_norm": 0.08104418963193893, "learning_rate": 7.23773251322374e-05, "loss": 46.0024, "step": 2645 }, { "epoch": 0.3612533278722097, "grad_norm": 0.1853868067264557, "learning_rate": 7.235787809212352e-05, "loss": 46.0041, "step": 2646 }, { "epoch": 0.3613898559628644, "grad_norm": 0.17076504230499268, "learning_rate": 7.233842682362149e-05, "loss": 46.0063, "step": 2647 }, { "epoch": 0.361526384053519, "grad_norm": 0.05861486494541168, "learning_rate": 7.231897133040997e-05, "loss": 46.0141, "step": 2648 }, { "epoch": 0.3616629121441737, "grad_norm": 0.09442692250013351, "learning_rate": 7.229951161616846e-05, "loss": 46.0028, "step": 2649 }, { "epoch": 0.3617994402348283, "grad_norm": 0.12586447596549988, "learning_rate": 7.228004768457724e-05, "loss": 46.0068, "step": 2650 }, { "epoch": 0.36193596832548297, "grad_norm": 0.047748763114213943, "learning_rate": 7.226057953931738e-05, "loss": 46.014, "step": 2651 }, { "epoch": 0.36207249641613765, "grad_norm": 0.03841572627425194, "learning_rate": 7.224110718407074e-05, "loss": 46.0015, "step": 2652 }, { "epoch": 0.36220902450679227, "grad_norm": 0.03327563405036926, "learning_rate": 7.222163062252001e-05, "loss": 46.0094, "step": 2653 }, { "epoch": 0.36234555259744694, "grad_norm": 0.0845429077744484, "learning_rate": 7.220214985834866e-05, "loss": 46.0016, "step": 2654 }, { "epoch": 0.36248208068810156, "grad_norm": 0.05277141183614731, "learning_rate": 7.218266489524092e-05, "loss": 46.0046, "step": 2655 }, { "epoch": 0.36261860877875624, "grad_norm": 0.027610039338469505, "learning_rate": 7.216317573688188e-05, "loss": 46.002, "step": 2656 }, { "epoch": 0.36275513686941085, "grad_norm": 0.07933810353279114, "learning_rate": 7.214368238695736e-05, "loss": 46.0001, "step": 2657 }, { "epoch": 0.36289166496006553, "grad_norm": 0.04011527821421623, "learning_rate": 7.212418484915403e-05, "loss": 46.0025, "step": 2658 }, { "epoch": 0.3630281930507202, "grad_norm": 0.16100534796714783, "learning_rate": 7.210468312715926e-05, "loss": 46.0086, "step": 2659 }, { "epoch": 0.3631647211413748, "grad_norm": 0.07723020762205124, "learning_rate": 7.208517722466135e-05, "loss": 46.0068, "step": 2660 }, { "epoch": 0.3633012492320295, "grad_norm": 0.05796537920832634, "learning_rate": 7.206566714534925e-05, "loss": 46.0094, "step": 2661 }, { "epoch": 0.3634377773226841, "grad_norm": 0.08736076205968857, "learning_rate": 7.204615289291283e-05, "loss": 46.0063, "step": 2662 }, { "epoch": 0.3635743054133388, "grad_norm": 0.12321504205465317, "learning_rate": 7.202663447104263e-05, "loss": 46.0066, "step": 2663 }, { "epoch": 0.36371083350399347, "grad_norm": 0.04877617955207825, "learning_rate": 7.200711188343004e-05, "loss": 46.0016, "step": 2664 }, { "epoch": 0.3638473615946481, "grad_norm": 0.05953492969274521, "learning_rate": 7.198758513376725e-05, "loss": 46.0074, "step": 2665 }, { "epoch": 0.36398388968530276, "grad_norm": 0.0979892760515213, "learning_rate": 7.19680542257472e-05, "loss": 46.0041, "step": 2666 }, { "epoch": 0.3641204177759574, "grad_norm": 0.08652079105377197, "learning_rate": 7.194851916306364e-05, "loss": 46.0074, "step": 2667 }, { "epoch": 0.36425694586661206, "grad_norm": 0.09696731716394424, "learning_rate": 7.192897994941111e-05, "loss": 46.002, "step": 2668 }, { "epoch": 0.36439347395726673, "grad_norm": 0.052554577589035034, "learning_rate": 7.190943658848488e-05, "loss": 46.0119, "step": 2669 }, { "epoch": 0.36453000204792135, "grad_norm": 0.05370837077498436, "learning_rate": 7.18898890839811e-05, "loss": 46.0035, "step": 2670 }, { "epoch": 0.36466653013857603, "grad_norm": 0.04071200266480446, "learning_rate": 7.187033743959661e-05, "loss": 46.0063, "step": 2671 }, { "epoch": 0.36480305822923065, "grad_norm": 0.09991610050201416, "learning_rate": 7.185078165902913e-05, "loss": 46.0028, "step": 2672 }, { "epoch": 0.3649395863198853, "grad_norm": 0.09013056010007858, "learning_rate": 7.183122174597705e-05, "loss": 46.0023, "step": 2673 }, { "epoch": 0.36507611441053994, "grad_norm": 0.06065017357468605, "learning_rate": 7.181165770413964e-05, "loss": 46.0071, "step": 2674 }, { "epoch": 0.3652126425011946, "grad_norm": 0.0655297189950943, "learning_rate": 7.179208953721688e-05, "loss": 46.0048, "step": 2675 }, { "epoch": 0.3653491705918493, "grad_norm": 0.06768113374710083, "learning_rate": 7.177251724890956e-05, "loss": 46.0005, "step": 2676 }, { "epoch": 0.3654856986825039, "grad_norm": 0.042774491012096405, "learning_rate": 7.175294084291928e-05, "loss": 46.0063, "step": 2677 }, { "epoch": 0.3656222267731586, "grad_norm": 0.06134216859936714, "learning_rate": 7.173336032294836e-05, "loss": 46.0058, "step": 2678 }, { "epoch": 0.3657587548638132, "grad_norm": 0.06768768280744553, "learning_rate": 7.171377569269994e-05, "loss": 46.0113, "step": 2679 }, { "epoch": 0.3658952829544679, "grad_norm": 0.17517529428005219, "learning_rate": 7.169418695587791e-05, "loss": 46.0005, "step": 2680 }, { "epoch": 0.36603181104512256, "grad_norm": 0.11045275628566742, "learning_rate": 7.167459411618697e-05, "loss": 46.0034, "step": 2681 }, { "epoch": 0.3661683391357772, "grad_norm": 0.04271414875984192, "learning_rate": 7.165499717733254e-05, "loss": 46.0052, "step": 2682 }, { "epoch": 0.36630486722643185, "grad_norm": 0.06376302987337112, "learning_rate": 7.163539614302088e-05, "loss": 46.0002, "step": 2683 }, { "epoch": 0.36644139531708647, "grad_norm": 0.08130251616239548, "learning_rate": 7.161579101695902e-05, "loss": 46.012, "step": 2684 }, { "epoch": 0.36657792340774115, "grad_norm": 0.0662224069237709, "learning_rate": 7.159618180285468e-05, "loss": 46.0008, "step": 2685 }, { "epoch": 0.3667144514983958, "grad_norm": 0.03827563673257828, "learning_rate": 7.157656850441647e-05, "loss": 46.0038, "step": 2686 }, { "epoch": 0.36685097958905044, "grad_norm": 0.16682539880275726, "learning_rate": 7.155695112535365e-05, "loss": 46.0059, "step": 2687 }, { "epoch": 0.3669875076797051, "grad_norm": 0.0408146008849144, "learning_rate": 7.153732966937638e-05, "loss": 46.003, "step": 2688 }, { "epoch": 0.36712403577035974, "grad_norm": 0.05431431531906128, "learning_rate": 7.151770414019549e-05, "loss": 46.002, "step": 2689 }, { "epoch": 0.3672605638610144, "grad_norm": 0.08994335681200027, "learning_rate": 7.149807454152265e-05, "loss": 46.0058, "step": 2690 }, { "epoch": 0.36739709195166903, "grad_norm": 0.07742220163345337, "learning_rate": 7.147844087707024e-05, "loss": 46.0013, "step": 2691 }, { "epoch": 0.3675336200423237, "grad_norm": 0.1148194968700409, "learning_rate": 7.145880315055144e-05, "loss": 46.0076, "step": 2692 }, { "epoch": 0.3676701481329784, "grad_norm": 0.02913118526339531, "learning_rate": 7.143916136568021e-05, "loss": 46.0105, "step": 2693 }, { "epoch": 0.367806676223633, "grad_norm": 0.19818547368049622, "learning_rate": 7.141951552617125e-05, "loss": 46.0052, "step": 2694 }, { "epoch": 0.3679432043142877, "grad_norm": 0.10295163094997406, "learning_rate": 7.139986563574002e-05, "loss": 46.007, "step": 2695 }, { "epoch": 0.3680797324049423, "grad_norm": 0.16522735357284546, "learning_rate": 7.138021169810281e-05, "loss": 46.0058, "step": 2696 }, { "epoch": 0.36821626049559697, "grad_norm": 0.055018581449985504, "learning_rate": 7.13605537169766e-05, "loss": 46.0013, "step": 2697 }, { "epoch": 0.36835278858625164, "grad_norm": 0.04274178668856621, "learning_rate": 7.134089169607919e-05, "loss": 46.0138, "step": 2698 }, { "epoch": 0.36848931667690626, "grad_norm": 0.19898296892642975, "learning_rate": 7.13212256391291e-05, "loss": 46.0044, "step": 2699 }, { "epoch": 0.36862584476756094, "grad_norm": 0.3457675576210022, "learning_rate": 7.130155554984563e-05, "loss": 46.0, "step": 2700 }, { "epoch": 0.36876237285821556, "grad_norm": 0.04829472675919533, "learning_rate": 7.128188143194884e-05, "loss": 46.0034, "step": 2701 }, { "epoch": 0.36889890094887023, "grad_norm": 0.0337841659784317, "learning_rate": 7.12622032891596e-05, "loss": 46.0053, "step": 2702 }, { "epoch": 0.3690354290395249, "grad_norm": 0.08164691925048828, "learning_rate": 7.124252112519945e-05, "loss": 46.0032, "step": 2703 }, { "epoch": 0.36917195713017953, "grad_norm": 0.045846544206142426, "learning_rate": 7.122283494379076e-05, "loss": 46.0082, "step": 2704 }, { "epoch": 0.3693084852208342, "grad_norm": 0.048335202038288116, "learning_rate": 7.120314474865664e-05, "loss": 46.0099, "step": 2705 }, { "epoch": 0.3694450133114888, "grad_norm": 0.044687625020742416, "learning_rate": 7.118345054352095e-05, "loss": 46.0045, "step": 2706 }, { "epoch": 0.3695815414021435, "grad_norm": 0.08502571284770966, "learning_rate": 7.116375233210832e-05, "loss": 46.0039, "step": 2707 }, { "epoch": 0.3697180694927981, "grad_norm": 0.049820512533187866, "learning_rate": 7.114405011814414e-05, "loss": 46.0056, "step": 2708 }, { "epoch": 0.3698545975834528, "grad_norm": 0.04504777491092682, "learning_rate": 7.112434390535456e-05, "loss": 46.0012, "step": 2709 }, { "epoch": 0.36999112567410747, "grad_norm": 0.05477355420589447, "learning_rate": 7.110463369746644e-05, "loss": 46.0034, "step": 2710 }, { "epoch": 0.3701276537647621, "grad_norm": 0.08364272862672806, "learning_rate": 7.108491949820747e-05, "loss": 46.0053, "step": 2711 }, { "epoch": 0.37026418185541676, "grad_norm": 0.1036464273929596, "learning_rate": 7.106520131130604e-05, "loss": 46.0069, "step": 2712 }, { "epoch": 0.3704007099460714, "grad_norm": 0.047879092395305634, "learning_rate": 7.104547914049132e-05, "loss": 46.0021, "step": 2713 }, { "epoch": 0.37053723803672606, "grad_norm": 0.06658387929201126, "learning_rate": 7.102575298949322e-05, "loss": 46.0046, "step": 2714 }, { "epoch": 0.37067376612738073, "grad_norm": 0.09466984868049622, "learning_rate": 7.100602286204241e-05, "loss": 46.009, "step": 2715 }, { "epoch": 0.37081029421803535, "grad_norm": 0.02285481058061123, "learning_rate": 7.09862887618703e-05, "loss": 46.0097, "step": 2716 }, { "epoch": 0.37094682230869, "grad_norm": 0.144430473446846, "learning_rate": 7.09665506927091e-05, "loss": 46.0091, "step": 2717 }, { "epoch": 0.37108335039934465, "grad_norm": 0.1069277748465538, "learning_rate": 7.094680865829169e-05, "loss": 46.0029, "step": 2718 }, { "epoch": 0.3712198784899993, "grad_norm": 0.041800953447818756, "learning_rate": 7.092706266235176e-05, "loss": 46.0114, "step": 2719 }, { "epoch": 0.371356406580654, "grad_norm": 0.07179572433233261, "learning_rate": 7.09073127086237e-05, "loss": 46.0006, "step": 2720 }, { "epoch": 0.3714929346713086, "grad_norm": 0.09592409431934357, "learning_rate": 7.088755880084274e-05, "loss": 46.0077, "step": 2721 }, { "epoch": 0.3716294627619633, "grad_norm": 0.15486924350261688, "learning_rate": 7.086780094274474e-05, "loss": 46.0102, "step": 2722 }, { "epoch": 0.3717659908526179, "grad_norm": 0.080518938601017, "learning_rate": 7.084803913806641e-05, "loss": 46.0028, "step": 2723 }, { "epoch": 0.3719025189432726, "grad_norm": 0.04608803614974022, "learning_rate": 7.082827339054513e-05, "loss": 46.0028, "step": 2724 }, { "epoch": 0.3720390470339272, "grad_norm": 0.06877920031547546, "learning_rate": 7.080850370391907e-05, "loss": 46.005, "step": 2725 }, { "epoch": 0.3721755751245819, "grad_norm": 0.086606964468956, "learning_rate": 7.078873008192712e-05, "loss": 46.0033, "step": 2726 }, { "epoch": 0.37231210321523656, "grad_norm": 0.061082419008016586, "learning_rate": 7.076895252830895e-05, "loss": 46.0062, "step": 2727 }, { "epoch": 0.3724486313058912, "grad_norm": 0.07738859951496124, "learning_rate": 7.074917104680493e-05, "loss": 46.0075, "step": 2728 }, { "epoch": 0.37258515939654585, "grad_norm": 0.11354585736989975, "learning_rate": 7.072938564115618e-05, "loss": 46.0125, "step": 2729 }, { "epoch": 0.37272168748720047, "grad_norm": 0.11330889910459518, "learning_rate": 7.070959631510463e-05, "loss": 46.0012, "step": 2730 }, { "epoch": 0.37285821557785515, "grad_norm": 0.05331442505121231, "learning_rate": 7.068980307239282e-05, "loss": 46.0073, "step": 2731 }, { "epoch": 0.3729947436685098, "grad_norm": 0.11238349229097366, "learning_rate": 7.067000591676416e-05, "loss": 46.0072, "step": 2732 }, { "epoch": 0.37313127175916444, "grad_norm": 0.03777269273996353, "learning_rate": 7.065020485196271e-05, "loss": 46.0012, "step": 2733 }, { "epoch": 0.3732677998498191, "grad_norm": 0.07292987406253815, "learning_rate": 7.063039988173335e-05, "loss": 46.0139, "step": 2734 }, { "epoch": 0.37340432794047373, "grad_norm": 0.09869389981031418, "learning_rate": 7.061059100982164e-05, "loss": 46.0041, "step": 2735 }, { "epoch": 0.3735408560311284, "grad_norm": 0.0387304350733757, "learning_rate": 7.059077823997385e-05, "loss": 46.0032, "step": 2736 }, { "epoch": 0.37367738412178303, "grad_norm": 0.0821879506111145, "learning_rate": 7.05709615759371e-05, "loss": 46.0082, "step": 2737 }, { "epoch": 0.3738139122124377, "grad_norm": 0.06673094630241394, "learning_rate": 7.055114102145911e-05, "loss": 46.0014, "step": 2738 }, { "epoch": 0.3739504403030924, "grad_norm": 0.059320855885744095, "learning_rate": 7.053131658028845e-05, "loss": 46.0047, "step": 2739 }, { "epoch": 0.374086968393747, "grad_norm": 0.0409160740673542, "learning_rate": 7.051148825617435e-05, "loss": 46.0013, "step": 2740 }, { "epoch": 0.3742234964844017, "grad_norm": 0.028405681252479553, "learning_rate": 7.049165605286683e-05, "loss": 46.0098, "step": 2741 }, { "epoch": 0.3743600245750563, "grad_norm": 0.08434174954891205, "learning_rate": 7.047181997411657e-05, "loss": 46.0064, "step": 2742 }, { "epoch": 0.37449655266571097, "grad_norm": 0.045228540897369385, "learning_rate": 7.045198002367505e-05, "loss": 46.0095, "step": 2743 }, { "epoch": 0.37463308075636564, "grad_norm": 0.07384924590587616, "learning_rate": 7.043213620529449e-05, "loss": 46.0051, "step": 2744 }, { "epoch": 0.37476960884702026, "grad_norm": 0.1509762704372406, "learning_rate": 7.041228852272777e-05, "loss": 46.0038, "step": 2745 }, { "epoch": 0.37490613693767494, "grad_norm": 0.05867889150977135, "learning_rate": 7.039243697972857e-05, "loss": 46.0022, "step": 2746 }, { "epoch": 0.37504266502832956, "grad_norm": 0.1364651471376419, "learning_rate": 7.037258158005124e-05, "loss": 46.0035, "step": 2747 }, { "epoch": 0.37517919311898423, "grad_norm": 0.152664452791214, "learning_rate": 7.035272232745092e-05, "loss": 46.0006, "step": 2748 }, { "epoch": 0.3753157212096389, "grad_norm": 0.15711957216262817, "learning_rate": 7.033285922568346e-05, "loss": 46.0057, "step": 2749 }, { "epoch": 0.37545224930029353, "grad_norm": 0.15027759969234467, "learning_rate": 7.031299227850539e-05, "loss": 46.0069, "step": 2750 }, { "epoch": 0.3755887773909482, "grad_norm": 0.18137143552303314, "learning_rate": 7.029312148967405e-05, "loss": 46.0046, "step": 2751 }, { "epoch": 0.3757253054816028, "grad_norm": 0.05537695065140724, "learning_rate": 7.027324686294741e-05, "loss": 46.0056, "step": 2752 }, { "epoch": 0.3758618335722575, "grad_norm": 0.04597265645861626, "learning_rate": 7.025336840208428e-05, "loss": 46.0042, "step": 2753 }, { "epoch": 0.3759983616629121, "grad_norm": 0.0340721495449543, "learning_rate": 7.023348611084409e-05, "loss": 46.0069, "step": 2754 }, { "epoch": 0.3761348897535668, "grad_norm": 0.07194885611534119, "learning_rate": 7.021359999298704e-05, "loss": 46.0047, "step": 2755 }, { "epoch": 0.37627141784422147, "grad_norm": 0.03384450450539589, "learning_rate": 7.019371005227407e-05, "loss": 46.0013, "step": 2756 }, { "epoch": 0.3764079459348761, "grad_norm": 0.08080285787582397, "learning_rate": 7.017381629246682e-05, "loss": 46.0078, "step": 2757 }, { "epoch": 0.37654447402553076, "grad_norm": 0.04855979606509209, "learning_rate": 7.015391871732764e-05, "loss": 46.0029, "step": 2758 }, { "epoch": 0.3766810021161854, "grad_norm": 0.0854191929101944, "learning_rate": 7.013401733061964e-05, "loss": 46.0025, "step": 2759 }, { "epoch": 0.37681753020684006, "grad_norm": 0.0424196794629097, "learning_rate": 7.011411213610662e-05, "loss": 46.0056, "step": 2760 }, { "epoch": 0.37695405829749473, "grad_norm": 0.0272492915391922, "learning_rate": 7.009420313755311e-05, "loss": 46.0075, "step": 2761 }, { "epoch": 0.37709058638814935, "grad_norm": 0.06638514250516891, "learning_rate": 7.007429033872436e-05, "loss": 46.0114, "step": 2762 }, { "epoch": 0.377227114478804, "grad_norm": 0.0802583172917366, "learning_rate": 7.005437374338634e-05, "loss": 46.0039, "step": 2763 }, { "epoch": 0.37736364256945865, "grad_norm": 0.047348104417324066, "learning_rate": 7.003445335530572e-05, "loss": 46.0099, "step": 2764 }, { "epoch": 0.3775001706601133, "grad_norm": 0.07711659371852875, "learning_rate": 7.001452917824992e-05, "loss": 46.0132, "step": 2765 }, { "epoch": 0.377636698750768, "grad_norm": 0.049445025622844696, "learning_rate": 6.999460121598704e-05, "loss": 46.0, "step": 2766 }, { "epoch": 0.3777732268414226, "grad_norm": 0.03517887741327286, "learning_rate": 6.997466947228596e-05, "loss": 46.004, "step": 2767 }, { "epoch": 0.3779097549320773, "grad_norm": 0.11581943184137344, "learning_rate": 6.995473395091617e-05, "loss": 46.0059, "step": 2768 }, { "epoch": 0.3780462830227319, "grad_norm": 0.13305966556072235, "learning_rate": 6.993479465564799e-05, "loss": 46.0132, "step": 2769 }, { "epoch": 0.3781828111133866, "grad_norm": 0.07825091481208801, "learning_rate": 6.991485159025234e-05, "loss": 46.0063, "step": 2770 }, { "epoch": 0.3783193392040412, "grad_norm": 0.12916211783885956, "learning_rate": 6.9894904758501e-05, "loss": 46.0112, "step": 2771 }, { "epoch": 0.3784558672946959, "grad_norm": 0.1338757425546646, "learning_rate": 6.987495416416626e-05, "loss": 46.001, "step": 2772 }, { "epoch": 0.37859239538535056, "grad_norm": 0.0646224245429039, "learning_rate": 6.985499981102133e-05, "loss": 46.015, "step": 2773 }, { "epoch": 0.3787289234760052, "grad_norm": 0.20051167905330658, "learning_rate": 6.983504170283999e-05, "loss": 46.0032, "step": 2774 }, { "epoch": 0.37886545156665985, "grad_norm": 0.053664155304431915, "learning_rate": 6.981507984339678e-05, "loss": 46.0088, "step": 2775 }, { "epoch": 0.37900197965731447, "grad_norm": 0.08480936288833618, "learning_rate": 6.979511423646696e-05, "loss": 46.0016, "step": 2776 }, { "epoch": 0.37913850774796914, "grad_norm": 0.08391254395246506, "learning_rate": 6.977514488582647e-05, "loss": 46.0073, "step": 2777 }, { "epoch": 0.3792750358386238, "grad_norm": 0.07340699434280396, "learning_rate": 6.9755171795252e-05, "loss": 46.0084, "step": 2778 }, { "epoch": 0.37941156392927844, "grad_norm": 0.038414232432842255, "learning_rate": 6.973519496852088e-05, "loss": 46.0118, "step": 2779 }, { "epoch": 0.3795480920199331, "grad_norm": 0.07899674773216248, "learning_rate": 6.97152144094112e-05, "loss": 46.0075, "step": 2780 }, { "epoch": 0.37968462011058773, "grad_norm": 0.15494370460510254, "learning_rate": 6.969523012170178e-05, "loss": 46.0034, "step": 2781 }, { "epoch": 0.3798211482012424, "grad_norm": 0.03460179641842842, "learning_rate": 6.967524210917205e-05, "loss": 46.0094, "step": 2782 }, { "epoch": 0.3799576762918971, "grad_norm": 0.05212077870965004, "learning_rate": 6.965525037560226e-05, "loss": 46.0008, "step": 2783 }, { "epoch": 0.3800942043825517, "grad_norm": 0.04970061033964157, "learning_rate": 6.963525492477325e-05, "loss": 46.0028, "step": 2784 }, { "epoch": 0.3802307324732064, "grad_norm": 0.03315117210149765, "learning_rate": 6.961525576046666e-05, "loss": 46.0045, "step": 2785 }, { "epoch": 0.380367260563861, "grad_norm": 0.06768501549959183, "learning_rate": 6.959525288646475e-05, "loss": 46.0036, "step": 2786 }, { "epoch": 0.3805037886545157, "grad_norm": 0.11494909226894379, "learning_rate": 6.957524630655056e-05, "loss": 46.0033, "step": 2787 }, { "epoch": 0.3806403167451703, "grad_norm": 0.05197020247578621, "learning_rate": 6.95552360245078e-05, "loss": 46.0077, "step": 2788 }, { "epoch": 0.38077684483582497, "grad_norm": 0.036914147436618805, "learning_rate": 6.953522204412085e-05, "loss": 46.0028, "step": 2789 }, { "epoch": 0.38091337292647964, "grad_norm": 0.04878842085599899, "learning_rate": 6.951520436917481e-05, "loss": 46.0092, "step": 2790 }, { "epoch": 0.38104990101713426, "grad_norm": 0.03958869352936745, "learning_rate": 6.94951830034555e-05, "loss": 46.0009, "step": 2791 }, { "epoch": 0.38118642910778894, "grad_norm": 0.11141853779554367, "learning_rate": 6.947515795074941e-05, "loss": 46.0026, "step": 2792 }, { "epoch": 0.38132295719844356, "grad_norm": 0.18885193765163422, "learning_rate": 6.945512921484375e-05, "loss": 46.0069, "step": 2793 }, { "epoch": 0.38145948528909823, "grad_norm": 0.10230422019958496, "learning_rate": 6.943509679952638e-05, "loss": 46.0074, "step": 2794 }, { "epoch": 0.3815960133797529, "grad_norm": 0.23744626343250275, "learning_rate": 6.941506070858592e-05, "loss": 46.0052, "step": 2795 }, { "epoch": 0.3817325414704075, "grad_norm": 0.18499210476875305, "learning_rate": 6.939502094581164e-05, "loss": 46.0041, "step": 2796 }, { "epoch": 0.3818690695610622, "grad_norm": 0.32202956080436707, "learning_rate": 6.937497751499353e-05, "loss": 46.0032, "step": 2797 }, { "epoch": 0.3820055976517168, "grad_norm": 0.07785579562187195, "learning_rate": 6.935493041992226e-05, "loss": 46.0021, "step": 2798 }, { "epoch": 0.3821421257423715, "grad_norm": 0.1281740665435791, "learning_rate": 6.93348796643892e-05, "loss": 46.0, "step": 2799 }, { "epoch": 0.38227865383302617, "grad_norm": 0.17225177586078644, "learning_rate": 6.93148252521864e-05, "loss": 46.0015, "step": 2800 }, { "epoch": 0.3824151819236808, "grad_norm": 0.0467715784907341, "learning_rate": 6.929476718710664e-05, "loss": 46.015, "step": 2801 }, { "epoch": 0.38255171001433547, "grad_norm": 0.07169802486896515, "learning_rate": 6.92747054729433e-05, "loss": 46.0042, "step": 2802 }, { "epoch": 0.3826882381049901, "grad_norm": 0.04571770876646042, "learning_rate": 6.925464011349056e-05, "loss": 46.0007, "step": 2803 }, { "epoch": 0.38282476619564476, "grad_norm": 0.09017938375473022, "learning_rate": 6.923457111254322e-05, "loss": 46.0072, "step": 2804 }, { "epoch": 0.3829612942862994, "grad_norm": 0.0945509746670723, "learning_rate": 6.92144984738968e-05, "loss": 46.0014, "step": 2805 }, { "epoch": 0.38309782237695406, "grad_norm": 0.045747119933366776, "learning_rate": 6.919442220134748e-05, "loss": 46.0021, "step": 2806 }, { "epoch": 0.38323435046760873, "grad_norm": 0.03573809564113617, "learning_rate": 6.917434229869217e-05, "loss": 46.0012, "step": 2807 }, { "epoch": 0.38337087855826335, "grad_norm": 0.08558043092489243, "learning_rate": 6.915425876972842e-05, "loss": 46.0023, "step": 2808 }, { "epoch": 0.383507406648918, "grad_norm": 0.08518315851688385, "learning_rate": 6.91341716182545e-05, "loss": 46.0027, "step": 2809 }, { "epoch": 0.38364393473957265, "grad_norm": 0.04048817232251167, "learning_rate": 6.911408084806933e-05, "loss": 46.0057, "step": 2810 }, { "epoch": 0.3837804628302273, "grad_norm": 0.07092530280351639, "learning_rate": 6.909398646297256e-05, "loss": 46.0109, "step": 2811 }, { "epoch": 0.383916990920882, "grad_norm": 0.15319107472896576, "learning_rate": 6.90738884667645e-05, "loss": 46.004, "step": 2812 }, { "epoch": 0.3840535190115366, "grad_norm": 0.08175390213727951, "learning_rate": 6.90537868632461e-05, "loss": 46.0057, "step": 2813 }, { "epoch": 0.3841900471021913, "grad_norm": 0.09150116890668869, "learning_rate": 6.90336816562191e-05, "loss": 46.0092, "step": 2814 }, { "epoch": 0.3843265751928459, "grad_norm": 0.04271779581904411, "learning_rate": 6.901357284948581e-05, "loss": 46.0029, "step": 2815 }, { "epoch": 0.3844631032835006, "grad_norm": 0.06234373524785042, "learning_rate": 6.899346044684928e-05, "loss": 46.0057, "step": 2816 }, { "epoch": 0.38459963137415526, "grad_norm": 0.11965598911046982, "learning_rate": 6.897334445211321e-05, "loss": 46.0049, "step": 2817 }, { "epoch": 0.3847361594648099, "grad_norm": 0.13023754954338074, "learning_rate": 6.895322486908206e-05, "loss": 46.0088, "step": 2818 }, { "epoch": 0.38487268755546455, "grad_norm": 0.12862105667591095, "learning_rate": 6.893310170156081e-05, "loss": 46.001, "step": 2819 }, { "epoch": 0.3850092156461192, "grad_norm": 0.0952359139919281, "learning_rate": 6.891297495335532e-05, "loss": 46.0087, "step": 2820 }, { "epoch": 0.38514574373677385, "grad_norm": 0.1149710863828659, "learning_rate": 6.889284462827191e-05, "loss": 46.0068, "step": 2821 }, { "epoch": 0.38528227182742847, "grad_norm": 0.09459277987480164, "learning_rate": 6.887271073011776e-05, "loss": 46.0084, "step": 2822 }, { "epoch": 0.38541879991808314, "grad_norm": 0.03697892650961876, "learning_rate": 6.885257326270063e-05, "loss": 46.0075, "step": 2823 }, { "epoch": 0.3855553280087378, "grad_norm": 0.09529288113117218, "learning_rate": 6.883243222982898e-05, "loss": 46.0073, "step": 2824 }, { "epoch": 0.38569185609939244, "grad_norm": 0.09184238314628601, "learning_rate": 6.881228763531195e-05, "loss": 46.0006, "step": 2825 }, { "epoch": 0.3858283841900471, "grad_norm": 0.17466557025909424, "learning_rate": 6.879213948295932e-05, "loss": 46.0017, "step": 2826 }, { "epoch": 0.38596491228070173, "grad_norm": 0.14267247915267944, "learning_rate": 6.877198777658161e-05, "loss": 46.0123, "step": 2827 }, { "epoch": 0.3861014403713564, "grad_norm": 0.08852794021368027, "learning_rate": 6.875183251998994e-05, "loss": 46.012, "step": 2828 }, { "epoch": 0.3862379684620111, "grad_norm": 0.07829584926366806, "learning_rate": 6.873167371699611e-05, "loss": 46.0024, "step": 2829 }, { "epoch": 0.3863744965526657, "grad_norm": 0.12148236483335495, "learning_rate": 6.871151137141266e-05, "loss": 46.0104, "step": 2830 }, { "epoch": 0.3865110246433204, "grad_norm": 0.05246533453464508, "learning_rate": 6.869134548705274e-05, "loss": 46.0039, "step": 2831 }, { "epoch": 0.386647552733975, "grad_norm": 0.1215442344546318, "learning_rate": 6.867117606773017e-05, "loss": 46.0045, "step": 2832 }, { "epoch": 0.3867840808246297, "grad_norm": 0.06236011162400246, "learning_rate": 6.865100311725944e-05, "loss": 46.0042, "step": 2833 }, { "epoch": 0.3869206089152843, "grad_norm": 0.06573495268821716, "learning_rate": 6.863082663945575e-05, "loss": 46.0024, "step": 2834 }, { "epoch": 0.38705713700593897, "grad_norm": 0.06135331839323044, "learning_rate": 6.86106466381349e-05, "loss": 46.0053, "step": 2835 }, { "epoch": 0.38719366509659364, "grad_norm": 0.11152108013629913, "learning_rate": 6.859046311711343e-05, "loss": 46.0063, "step": 2836 }, { "epoch": 0.38733019318724826, "grad_norm": 0.047375719994306564, "learning_rate": 6.857027608020847e-05, "loss": 46.0042, "step": 2837 }, { "epoch": 0.38746672127790294, "grad_norm": 0.08904846012592316, "learning_rate": 6.85500855312379e-05, "loss": 46.0064, "step": 2838 }, { "epoch": 0.38760324936855756, "grad_norm": 0.12209279090166092, "learning_rate": 6.852989147402014e-05, "loss": 46.0054, "step": 2839 }, { "epoch": 0.38773977745921223, "grad_norm": 0.09408655762672424, "learning_rate": 6.850969391237442e-05, "loss": 46.002, "step": 2840 }, { "epoch": 0.3878763055498669, "grad_norm": 0.1432700902223587, "learning_rate": 6.848949285012055e-05, "loss": 46.0074, "step": 2841 }, { "epoch": 0.3880128336405215, "grad_norm": 0.03999871015548706, "learning_rate": 6.8469288291079e-05, "loss": 46.0021, "step": 2842 }, { "epoch": 0.3881493617311762, "grad_norm": 0.03572747856378555, "learning_rate": 6.84490802390709e-05, "loss": 46.0016, "step": 2843 }, { "epoch": 0.3882858898218308, "grad_norm": 0.1813347041606903, "learning_rate": 6.842886869791809e-05, "loss": 46.0024, "step": 2844 }, { "epoch": 0.3884224179124855, "grad_norm": 0.10942332446575165, "learning_rate": 6.840865367144302e-05, "loss": 46.006, "step": 2845 }, { "epoch": 0.38855894600314017, "grad_norm": 0.1667984277009964, "learning_rate": 6.838843516346883e-05, "loss": 46.005, "step": 2846 }, { "epoch": 0.3886954740937948, "grad_norm": 0.13985666632652283, "learning_rate": 6.836821317781926e-05, "loss": 46.0058, "step": 2847 }, { "epoch": 0.38883200218444947, "grad_norm": 0.04211229458451271, "learning_rate": 6.834798771831881e-05, "loss": 46.0072, "step": 2848 }, { "epoch": 0.3889685302751041, "grad_norm": 0.16389942169189453, "learning_rate": 6.832775878879255e-05, "loss": 46.0049, "step": 2849 }, { "epoch": 0.38910505836575876, "grad_norm": 0.17829033732414246, "learning_rate": 6.830752639306624e-05, "loss": 46.0091, "step": 2850 }, { "epoch": 0.3892415864564134, "grad_norm": 0.03750026598572731, "learning_rate": 6.828729053496629e-05, "loss": 46.006, "step": 2851 }, { "epoch": 0.38937811454706805, "grad_norm": 0.04455247521400452, "learning_rate": 6.826705121831976e-05, "loss": 46.0042, "step": 2852 }, { "epoch": 0.38951464263772273, "grad_norm": 0.07708753645420074, "learning_rate": 6.824680844695437e-05, "loss": 46.0072, "step": 2853 }, { "epoch": 0.38965117072837735, "grad_norm": 0.06045396625995636, "learning_rate": 6.82265622246985e-05, "loss": 46.002, "step": 2854 }, { "epoch": 0.389787698819032, "grad_norm": 0.06399362534284592, "learning_rate": 6.820631255538117e-05, "loss": 46.0048, "step": 2855 }, { "epoch": 0.38992422690968664, "grad_norm": 0.07073832303285599, "learning_rate": 6.818605944283206e-05, "loss": 46.0004, "step": 2856 }, { "epoch": 0.3900607550003413, "grad_norm": 0.09435048699378967, "learning_rate": 6.81658028908815e-05, "loss": 46.0058, "step": 2857 }, { "epoch": 0.390197283090996, "grad_norm": 0.10140799731016159, "learning_rate": 6.814554290336047e-05, "loss": 46.0044, "step": 2858 }, { "epoch": 0.3903338111816506, "grad_norm": 0.07209686934947968, "learning_rate": 6.81252794841006e-05, "loss": 46.0045, "step": 2859 }, { "epoch": 0.3904703392723053, "grad_norm": 0.036053579300642014, "learning_rate": 6.810501263693417e-05, "loss": 46.0009, "step": 2860 }, { "epoch": 0.3906068673629599, "grad_norm": 0.07174188643693924, "learning_rate": 6.808474236569407e-05, "loss": 46.0025, "step": 2861 }, { "epoch": 0.3907433954536146, "grad_norm": 0.04467269033193588, "learning_rate": 6.806446867421393e-05, "loss": 46.0103, "step": 2862 }, { "epoch": 0.39087992354426926, "grad_norm": 0.07637561857700348, "learning_rate": 6.804419156632793e-05, "loss": 46.0105, "step": 2863 }, { "epoch": 0.3910164516349239, "grad_norm": 0.15134026110172272, "learning_rate": 6.802391104587098e-05, "loss": 46.0074, "step": 2864 }, { "epoch": 0.39115297972557855, "grad_norm": 0.036502987146377563, "learning_rate": 6.800362711667853e-05, "loss": 46.003, "step": 2865 }, { "epoch": 0.3912895078162332, "grad_norm": 0.09317944943904877, "learning_rate": 6.79833397825868e-05, "loss": 46.0044, "step": 2866 }, { "epoch": 0.39142603590688785, "grad_norm": 0.0515737347304821, "learning_rate": 6.796304904743254e-05, "loss": 46.0022, "step": 2867 }, { "epoch": 0.39156256399754247, "grad_norm": 0.10820328444242477, "learning_rate": 6.794275491505322e-05, "loss": 46.0069, "step": 2868 }, { "epoch": 0.39169909208819714, "grad_norm": 0.058926474303007126, "learning_rate": 6.792245738928692e-05, "loss": 46.0055, "step": 2869 }, { "epoch": 0.3918356201788518, "grad_norm": 0.13512329757213593, "learning_rate": 6.790215647397237e-05, "loss": 46.0022, "step": 2870 }, { "epoch": 0.39197214826950644, "grad_norm": 0.08140117675065994, "learning_rate": 6.788185217294894e-05, "loss": 46.0045, "step": 2871 }, { "epoch": 0.3921086763601611, "grad_norm": 0.06915444135665894, "learning_rate": 6.786154449005665e-05, "loss": 46.007, "step": 2872 }, { "epoch": 0.39224520445081573, "grad_norm": 0.05448833480477333, "learning_rate": 6.784123342913612e-05, "loss": 46.0008, "step": 2873 }, { "epoch": 0.3923817325414704, "grad_norm": 0.059616293758153915, "learning_rate": 6.782091899402867e-05, "loss": 46.0085, "step": 2874 }, { "epoch": 0.3925182606321251, "grad_norm": 0.08596896380186081, "learning_rate": 6.780060118857621e-05, "loss": 46.0042, "step": 2875 }, { "epoch": 0.3926547887227797, "grad_norm": 0.11139814555644989, "learning_rate": 6.77802800166213e-05, "loss": 46.0064, "step": 2876 }, { "epoch": 0.3927913168134344, "grad_norm": 0.13377784192562103, "learning_rate": 6.775995548200713e-05, "loss": 46.0041, "step": 2877 }, { "epoch": 0.392927844904089, "grad_norm": 0.059272099286317825, "learning_rate": 6.773962758857759e-05, "loss": 46.0008, "step": 2878 }, { "epoch": 0.39306437299474367, "grad_norm": 0.043817080557346344, "learning_rate": 6.771929634017708e-05, "loss": 46.0063, "step": 2879 }, { "epoch": 0.39320090108539835, "grad_norm": 0.2732653021812439, "learning_rate": 6.769896174065077e-05, "loss": 46.0007, "step": 2880 }, { "epoch": 0.39333742917605297, "grad_norm": 0.06373842805624008, "learning_rate": 6.767862379384434e-05, "loss": 46.0076, "step": 2881 }, { "epoch": 0.39347395726670764, "grad_norm": 0.040040288120508194, "learning_rate": 6.765828250360422e-05, "loss": 46.0052, "step": 2882 }, { "epoch": 0.39361048535736226, "grad_norm": 0.07913838326931, "learning_rate": 6.763793787377735e-05, "loss": 46.0017, "step": 2883 }, { "epoch": 0.39374701344801694, "grad_norm": 0.05716557800769806, "learning_rate": 6.761758990821143e-05, "loss": 46.0005, "step": 2884 }, { "epoch": 0.39388354153867156, "grad_norm": 0.03803883120417595, "learning_rate": 6.75972386107547e-05, "loss": 46.0089, "step": 2885 }, { "epoch": 0.39402006962932623, "grad_norm": 0.03612419217824936, "learning_rate": 6.757688398525605e-05, "loss": 46.0001, "step": 2886 }, { "epoch": 0.3941565977199809, "grad_norm": 0.07839575409889221, "learning_rate": 6.755652603556504e-05, "loss": 46.0085, "step": 2887 }, { "epoch": 0.3942931258106355, "grad_norm": 0.050731219351291656, "learning_rate": 6.753616476553177e-05, "loss": 46.009, "step": 2888 }, { "epoch": 0.3944296539012902, "grad_norm": 0.04125159606337547, "learning_rate": 6.751580017900707e-05, "loss": 46.0027, "step": 2889 }, { "epoch": 0.3945661819919448, "grad_norm": 0.12569355964660645, "learning_rate": 6.749543227984234e-05, "loss": 46.0055, "step": 2890 }, { "epoch": 0.3947027100825995, "grad_norm": 0.09077274054288864, "learning_rate": 6.747506107188961e-05, "loss": 46.0023, "step": 2891 }, { "epoch": 0.39483923817325417, "grad_norm": 0.09225741773843765, "learning_rate": 6.745468655900155e-05, "loss": 46.0086, "step": 2892 }, { "epoch": 0.3949757662639088, "grad_norm": 0.2656152844429016, "learning_rate": 6.743430874503143e-05, "loss": 46.0081, "step": 2893 }, { "epoch": 0.39511229435456346, "grad_norm": 0.09339848160743713, "learning_rate": 6.74139276338332e-05, "loss": 46.002, "step": 2894 }, { "epoch": 0.3952488224452181, "grad_norm": 0.14308400452136993, "learning_rate": 6.739354322926136e-05, "loss": 46.0012, "step": 2895 }, { "epoch": 0.39538535053587276, "grad_norm": 0.21904270350933075, "learning_rate": 6.73731555351711e-05, "loss": 46.0061, "step": 2896 }, { "epoch": 0.39552187862652743, "grad_norm": 0.11437327414751053, "learning_rate": 6.735276455541816e-05, "loss": 46.0109, "step": 2897 }, { "epoch": 0.39565840671718205, "grad_norm": 0.2219550758600235, "learning_rate": 6.7332370293859e-05, "loss": 46.0048, "step": 2898 }, { "epoch": 0.39579493480783673, "grad_norm": 0.1298856884241104, "learning_rate": 6.731197275435057e-05, "loss": 46.0048, "step": 2899 }, { "epoch": 0.39593146289849135, "grad_norm": 0.10205799341201782, "learning_rate": 6.729157194075057e-05, "loss": 46.0035, "step": 2900 }, { "epoch": 0.396067990989146, "grad_norm": 0.11407556384801865, "learning_rate": 6.727116785691725e-05, "loss": 46.0036, "step": 2901 }, { "epoch": 0.39620451907980064, "grad_norm": 0.06781189888715744, "learning_rate": 6.725076050670948e-05, "loss": 46.0087, "step": 2902 }, { "epoch": 0.3963410471704553, "grad_norm": 0.038440026342868805, "learning_rate": 6.723034989398678e-05, "loss": 46.0032, "step": 2903 }, { "epoch": 0.39647757526111, "grad_norm": 0.19473013281822205, "learning_rate": 6.720993602260923e-05, "loss": 46.0036, "step": 2904 }, { "epoch": 0.3966141033517646, "grad_norm": 0.03288116306066513, "learning_rate": 6.71895188964376e-05, "loss": 46.0082, "step": 2905 }, { "epoch": 0.3967506314424193, "grad_norm": 0.04958664998412132, "learning_rate": 6.71690985193332e-05, "loss": 46.0049, "step": 2906 }, { "epoch": 0.3968871595330739, "grad_norm": 0.05639585107564926, "learning_rate": 6.714867489515803e-05, "loss": 46.0036, "step": 2907 }, { "epoch": 0.3970236876237286, "grad_norm": 0.0376536101102829, "learning_rate": 6.712824802777464e-05, "loss": 46.0025, "step": 2908 }, { "epoch": 0.39716021571438326, "grad_norm": 0.10764352977275848, "learning_rate": 6.710781792104623e-05, "loss": 46.0067, "step": 2909 }, { "epoch": 0.3972967438050379, "grad_norm": 0.23326475918293, "learning_rate": 6.70873845788366e-05, "loss": 46.0045, "step": 2910 }, { "epoch": 0.39743327189569255, "grad_norm": 0.23535679280757904, "learning_rate": 6.706694800501017e-05, "loss": 46.0075, "step": 2911 }, { "epoch": 0.39756979998634717, "grad_norm": 0.07222997397184372, "learning_rate": 6.704650820343194e-05, "loss": 46.0053, "step": 2912 }, { "epoch": 0.39770632807700185, "grad_norm": 0.0675157904624939, "learning_rate": 6.702606517796759e-05, "loss": 46.0084, "step": 2913 }, { "epoch": 0.3978428561676565, "grad_norm": 0.13119302690029144, "learning_rate": 6.700561893248332e-05, "loss": 46.0127, "step": 2914 }, { "epoch": 0.39797938425831114, "grad_norm": 0.0686432495713234, "learning_rate": 6.698516947084602e-05, "loss": 46.0064, "step": 2915 }, { "epoch": 0.3981159123489658, "grad_norm": 0.0782148614525795, "learning_rate": 6.696471679692312e-05, "loss": 46.01, "step": 2916 }, { "epoch": 0.39825244043962044, "grad_norm": 0.0840383991599083, "learning_rate": 6.694426091458272e-05, "loss": 46.0057, "step": 2917 }, { "epoch": 0.3983889685302751, "grad_norm": 0.06982682645320892, "learning_rate": 6.692380182769348e-05, "loss": 46.0032, "step": 2918 }, { "epoch": 0.39852549662092973, "grad_norm": 0.09727339446544647, "learning_rate": 6.690333954012471e-05, "loss": 46.0075, "step": 2919 }, { "epoch": 0.3986620247115844, "grad_norm": 0.13205093145370483, "learning_rate": 6.688287405574627e-05, "loss": 46.0107, "step": 2920 }, { "epoch": 0.3987985528022391, "grad_norm": 0.04405706748366356, "learning_rate": 6.686240537842864e-05, "loss": 46.0104, "step": 2921 }, { "epoch": 0.3989350808928937, "grad_norm": 0.10336392372846603, "learning_rate": 6.684193351204297e-05, "loss": 46.0181, "step": 2922 }, { "epoch": 0.3990716089835484, "grad_norm": 0.06936386227607727, "learning_rate": 6.682145846046091e-05, "loss": 46.0147, "step": 2923 }, { "epoch": 0.399208137074203, "grad_norm": 0.12471471726894379, "learning_rate": 6.680098022755478e-05, "loss": 46.0081, "step": 2924 }, { "epoch": 0.39934466516485767, "grad_norm": 0.08753729611635208, "learning_rate": 6.678049881719748e-05, "loss": 46.006, "step": 2925 }, { "epoch": 0.39948119325551235, "grad_norm": 0.10034804046154022, "learning_rate": 6.676001423326252e-05, "loss": 46.0146, "step": 2926 }, { "epoch": 0.39961772134616697, "grad_norm": 0.08269649744033813, "learning_rate": 6.6739526479624e-05, "loss": 46.0025, "step": 2927 }, { "epoch": 0.39975424943682164, "grad_norm": 0.09845083951950073, "learning_rate": 6.671903556015664e-05, "loss": 46.0077, "step": 2928 }, { "epoch": 0.39989077752747626, "grad_norm": 0.10461684316396713, "learning_rate": 6.669854147873573e-05, "loss": 46.0052, "step": 2929 }, { "epoch": 0.40002730561813094, "grad_norm": 0.1029122918844223, "learning_rate": 6.667804423923715e-05, "loss": 46.0065, "step": 2930 }, { "epoch": 0.4001638337087856, "grad_norm": 0.0682147964835167, "learning_rate": 6.665754384553746e-05, "loss": 46.0055, "step": 2931 }, { "epoch": 0.40030036179944023, "grad_norm": 0.19040082395076752, "learning_rate": 6.663704030151369e-05, "loss": 46.0064, "step": 2932 }, { "epoch": 0.4004368898900949, "grad_norm": 0.09426770359277725, "learning_rate": 6.661653361104359e-05, "loss": 46.0063, "step": 2933 }, { "epoch": 0.4005734179807495, "grad_norm": 0.06220243498682976, "learning_rate": 6.659602377800539e-05, "loss": 46.0084, "step": 2934 }, { "epoch": 0.4007099460714042, "grad_norm": 0.052854426205158234, "learning_rate": 6.6575510806278e-05, "loss": 46.0106, "step": 2935 }, { "epoch": 0.4008464741620588, "grad_norm": 0.05812637880444527, "learning_rate": 6.65549946997409e-05, "loss": 46.0093, "step": 2936 }, { "epoch": 0.4009830022527135, "grad_norm": 0.05819082632660866, "learning_rate": 6.653447546227411e-05, "loss": 46.0038, "step": 2937 }, { "epoch": 0.40111953034336817, "grad_norm": 0.17536793649196625, "learning_rate": 6.651395309775837e-05, "loss": 46.012, "step": 2938 }, { "epoch": 0.4012560584340228, "grad_norm": 0.02911416068673134, "learning_rate": 6.649342761007486e-05, "loss": 46.002, "step": 2939 }, { "epoch": 0.40139258652467746, "grad_norm": 0.15855257213115692, "learning_rate": 6.647289900310545e-05, "loss": 46.0035, "step": 2940 }, { "epoch": 0.4015291146153321, "grad_norm": 0.08625531941652298, "learning_rate": 6.645236728073257e-05, "loss": 46.0008, "step": 2941 }, { "epoch": 0.40166564270598676, "grad_norm": 0.05802028998732567, "learning_rate": 6.643183244683923e-05, "loss": 46.0014, "step": 2942 }, { "epoch": 0.40180217079664143, "grad_norm": 0.1068258136510849, "learning_rate": 6.641129450530905e-05, "loss": 46.0028, "step": 2943 }, { "epoch": 0.40193869888729605, "grad_norm": 0.16526785492897034, "learning_rate": 6.639075346002622e-05, "loss": 46.0097, "step": 2944 }, { "epoch": 0.40207522697795073, "grad_norm": 0.07433614134788513, "learning_rate": 6.637020931487555e-05, "loss": 46.0044, "step": 2945 }, { "epoch": 0.40221175506860535, "grad_norm": 0.11289048939943314, "learning_rate": 6.634966207374234e-05, "loss": 46.0019, "step": 2946 }, { "epoch": 0.40234828315926, "grad_norm": 0.1473781168460846, "learning_rate": 6.632911174051263e-05, "loss": 46.0118, "step": 2947 }, { "epoch": 0.40248481124991464, "grad_norm": 0.07173411548137665, "learning_rate": 6.63085583190729e-05, "loss": 46.0013, "step": 2948 }, { "epoch": 0.4026213393405693, "grad_norm": 0.1842130422592163, "learning_rate": 6.628800181331031e-05, "loss": 46.004, "step": 2949 }, { "epoch": 0.402757867431224, "grad_norm": 0.17641174793243408, "learning_rate": 6.626744222711254e-05, "loss": 46.0052, "step": 2950 }, { "epoch": 0.4028943955218786, "grad_norm": 0.1055992990732193, "learning_rate": 6.624687956436787e-05, "loss": 46.0105, "step": 2951 }, { "epoch": 0.4030309236125333, "grad_norm": 0.05551333352923393, "learning_rate": 6.622631382896524e-05, "loss": 46.0025, "step": 2952 }, { "epoch": 0.4031674517031879, "grad_norm": 0.10408655554056168, "learning_rate": 6.620574502479404e-05, "loss": 46.0041, "step": 2953 }, { "epoch": 0.4033039797938426, "grad_norm": 0.06666222959756851, "learning_rate": 6.61851731557443e-05, "loss": 46.0053, "step": 2954 }, { "epoch": 0.40344050788449726, "grad_norm": 0.1510469913482666, "learning_rate": 6.616459822570667e-05, "loss": 46.0056, "step": 2955 }, { "epoch": 0.4035770359751519, "grad_norm": 0.045026302337646484, "learning_rate": 6.614402023857232e-05, "loss": 46.0038, "step": 2956 }, { "epoch": 0.40371356406580655, "grad_norm": 0.1681818962097168, "learning_rate": 6.612343919823302e-05, "loss": 46.0016, "step": 2957 }, { "epoch": 0.40385009215646117, "grad_norm": 0.1299772411584854, "learning_rate": 6.610285510858112e-05, "loss": 46.003, "step": 2958 }, { "epoch": 0.40398662024711585, "grad_norm": 0.03964947536587715, "learning_rate": 6.608226797350956e-05, "loss": 46.0022, "step": 2959 }, { "epoch": 0.4041231483377705, "grad_norm": 0.0679377093911171, "learning_rate": 6.606167779691181e-05, "loss": 46.0067, "step": 2960 }, { "epoch": 0.40425967642842514, "grad_norm": 0.059781357645988464, "learning_rate": 6.604108458268196e-05, "loss": 46.0032, "step": 2961 }, { "epoch": 0.4043962045190798, "grad_norm": 0.057675354182720184, "learning_rate": 6.602048833471466e-05, "loss": 46.003, "step": 2962 }, { "epoch": 0.40453273260973444, "grad_norm": 0.041330404579639435, "learning_rate": 6.599988905690515e-05, "loss": 46.0088, "step": 2963 }, { "epoch": 0.4046692607003891, "grad_norm": 0.0344633050262928, "learning_rate": 6.597928675314918e-05, "loss": 46.004, "step": 2964 }, { "epoch": 0.40480578879104373, "grad_norm": 0.14867693185806274, "learning_rate": 6.595868142734319e-05, "loss": 46.0093, "step": 2965 }, { "epoch": 0.4049423168816984, "grad_norm": 0.03542421758174896, "learning_rate": 6.593807308338405e-05, "loss": 46.0134, "step": 2966 }, { "epoch": 0.4050788449723531, "grad_norm": 0.07698102295398712, "learning_rate": 6.591746172516929e-05, "loss": 46.0056, "step": 2967 }, { "epoch": 0.4052153730630077, "grad_norm": 0.05118850991129875, "learning_rate": 6.589684735659702e-05, "loss": 46.0126, "step": 2968 }, { "epoch": 0.4053519011536624, "grad_norm": 0.04234418272972107, "learning_rate": 6.587622998156587e-05, "loss": 46.0037, "step": 2969 }, { "epoch": 0.405488429244317, "grad_norm": 0.028087357059121132, "learning_rate": 6.585560960397507e-05, "loss": 46.014, "step": 2970 }, { "epoch": 0.40562495733497167, "grad_norm": 0.08146511018276215, "learning_rate": 6.583498622772439e-05, "loss": 46.0016, "step": 2971 }, { "epoch": 0.40576148542562634, "grad_norm": 0.08589431643486023, "learning_rate": 6.581435985671419e-05, "loss": 46.0108, "step": 2972 }, { "epoch": 0.40589801351628096, "grad_norm": 0.0864008367061615, "learning_rate": 6.579373049484538e-05, "loss": 46.0112, "step": 2973 }, { "epoch": 0.40603454160693564, "grad_norm": 0.05074608325958252, "learning_rate": 6.577309814601946e-05, "loss": 46.0011, "step": 2974 }, { "epoch": 0.40617106969759026, "grad_norm": 0.15372882783412933, "learning_rate": 6.575246281413851e-05, "loss": 46.0041, "step": 2975 }, { "epoch": 0.40630759778824493, "grad_norm": 0.13325732946395874, "learning_rate": 6.573182450310507e-05, "loss": 46.0149, "step": 2976 }, { "epoch": 0.4064441258788996, "grad_norm": 0.1167139932513237, "learning_rate": 6.57111832168224e-05, "loss": 46.001, "step": 2977 }, { "epoch": 0.40658065396955423, "grad_norm": 0.06919942051172256, "learning_rate": 6.569053895919419e-05, "loss": 46.0071, "step": 2978 }, { "epoch": 0.4067171820602089, "grad_norm": 0.0987137034535408, "learning_rate": 6.566989173412476e-05, "loss": 46.0044, "step": 2979 }, { "epoch": 0.4068537101508635, "grad_norm": 0.0734979584813118, "learning_rate": 6.564924154551895e-05, "loss": 46.0069, "step": 2980 }, { "epoch": 0.4069902382415182, "grad_norm": 0.08994965255260468, "learning_rate": 6.562858839728223e-05, "loss": 46.0024, "step": 2981 }, { "epoch": 0.4071267663321728, "grad_norm": 0.07536820322275162, "learning_rate": 6.560793229332056e-05, "loss": 46.0001, "step": 2982 }, { "epoch": 0.4072632944228275, "grad_norm": 0.1885458528995514, "learning_rate": 6.558727323754047e-05, "loss": 46.0025, "step": 2983 }, { "epoch": 0.40739982251348217, "grad_norm": 0.03753744438290596, "learning_rate": 6.556661123384909e-05, "loss": 46.0063, "step": 2984 }, { "epoch": 0.4075363506041368, "grad_norm": 0.12112201005220413, "learning_rate": 6.554594628615406e-05, "loss": 46.0084, "step": 2985 }, { "epoch": 0.40767287869479146, "grad_norm": 0.05179598927497864, "learning_rate": 6.55252783983636e-05, "loss": 46.0078, "step": 2986 }, { "epoch": 0.4078094067854461, "grad_norm": 0.07926946878433228, "learning_rate": 6.550460757438649e-05, "loss": 46.0051, "step": 2987 }, { "epoch": 0.40794593487610076, "grad_norm": 0.10655926167964935, "learning_rate": 6.548393381813204e-05, "loss": 46.0052, "step": 2988 }, { "epoch": 0.40808246296675543, "grad_norm": 0.10628828406333923, "learning_rate": 6.546325713351017e-05, "loss": 46.0004, "step": 2989 }, { "epoch": 0.40821899105741005, "grad_norm": 0.08854484558105469, "learning_rate": 6.544257752443126e-05, "loss": 46.0042, "step": 2990 }, { "epoch": 0.4083555191480647, "grad_norm": 0.12204300612211227, "learning_rate": 6.542189499480636e-05, "loss": 46.0, "step": 2991 }, { "epoch": 0.40849204723871935, "grad_norm": 0.10037143528461456, "learning_rate": 6.540120954854694e-05, "loss": 46.003, "step": 2992 }, { "epoch": 0.408628575329374, "grad_norm": 0.07198242843151093, "learning_rate": 6.538052118956517e-05, "loss": 46.0038, "step": 2993 }, { "epoch": 0.4087651034200287, "grad_norm": 0.11555296182632446, "learning_rate": 6.535982992177363e-05, "loss": 46.0047, "step": 2994 }, { "epoch": 0.4089016315106833, "grad_norm": 0.30255138874053955, "learning_rate": 6.533913574908556e-05, "loss": 46.0078, "step": 2995 }, { "epoch": 0.409038159601338, "grad_norm": 0.12400604784488678, "learning_rate": 6.531843867541467e-05, "loss": 46.0047, "step": 2996 }, { "epoch": 0.4091746876919926, "grad_norm": 0.22306384146213531, "learning_rate": 6.529773870467526e-05, "loss": 46.0015, "step": 2997 }, { "epoch": 0.4093112157826473, "grad_norm": 0.21406985819339752, "learning_rate": 6.527703584078218e-05, "loss": 46.0025, "step": 2998 }, { "epoch": 0.4094477438733019, "grad_norm": 0.2059585601091385, "learning_rate": 6.525633008765082e-05, "loss": 46.0049, "step": 2999 }, { "epoch": 0.4095842719639566, "grad_norm": 0.388536274433136, "learning_rate": 6.523562144919711e-05, "loss": 46.0074, "step": 3000 }, { "epoch": 0.40972080005461126, "grad_norm": 0.032229792326688766, "learning_rate": 6.521490992933752e-05, "loss": 46.0095, "step": 3001 }, { "epoch": 0.4098573281452659, "grad_norm": 0.0773838609457016, "learning_rate": 6.519419553198906e-05, "loss": 46.0049, "step": 3002 }, { "epoch": 0.40999385623592055, "grad_norm": 0.10458146780729294, "learning_rate": 6.517347826106935e-05, "loss": 46.004, "step": 3003 }, { "epoch": 0.41013038432657517, "grad_norm": 0.12347229570150375, "learning_rate": 6.515275812049644e-05, "loss": 46.0055, "step": 3004 }, { "epoch": 0.41026691241722985, "grad_norm": 0.16248582303524017, "learning_rate": 6.513203511418902e-05, "loss": 46.0025, "step": 3005 }, { "epoch": 0.4104034405078845, "grad_norm": 0.056949831545352936, "learning_rate": 6.511130924606629e-05, "loss": 46.0004, "step": 3006 }, { "epoch": 0.41053996859853914, "grad_norm": 0.09475484490394592, "learning_rate": 6.509058052004796e-05, "loss": 46.0065, "step": 3007 }, { "epoch": 0.4106764966891938, "grad_norm": 0.08122842013835907, "learning_rate": 6.506984894005435e-05, "loss": 46.0031, "step": 3008 }, { "epoch": 0.41081302477984843, "grad_norm": 0.10592480003833771, "learning_rate": 6.504911451000622e-05, "loss": 46.0025, "step": 3009 }, { "epoch": 0.4109495528705031, "grad_norm": 0.06374751776456833, "learning_rate": 6.502837723382499e-05, "loss": 46.0008, "step": 3010 }, { "epoch": 0.4110860809611578, "grad_norm": 0.07758045196533203, "learning_rate": 6.50076371154325e-05, "loss": 46.0113, "step": 3011 }, { "epoch": 0.4112226090518124, "grad_norm": 0.06114276498556137, "learning_rate": 6.498689415875121e-05, "loss": 46.0112, "step": 3012 }, { "epoch": 0.4113591371424671, "grad_norm": 0.10836328566074371, "learning_rate": 6.496614836770411e-05, "loss": 46.0036, "step": 3013 }, { "epoch": 0.4114956652331217, "grad_norm": 0.16181589663028717, "learning_rate": 6.494539974621467e-05, "loss": 46.0067, "step": 3014 }, { "epoch": 0.4116321933237764, "grad_norm": 0.08463869988918304, "learning_rate": 6.492464829820694e-05, "loss": 46.0047, "step": 3015 }, { "epoch": 0.411768721414431, "grad_norm": 0.05180621147155762, "learning_rate": 6.49038940276055e-05, "loss": 46.0048, "step": 3016 }, { "epoch": 0.41190524950508567, "grad_norm": 0.14443132281303406, "learning_rate": 6.488313693833545e-05, "loss": 46.0189, "step": 3017 }, { "epoch": 0.41204177759574034, "grad_norm": 0.05938568711280823, "learning_rate": 6.486237703432245e-05, "loss": 46.0036, "step": 3018 }, { "epoch": 0.41217830568639496, "grad_norm": 0.2428516000509262, "learning_rate": 6.484161431949267e-05, "loss": 46.0028, "step": 3019 }, { "epoch": 0.41231483377704964, "grad_norm": 0.09055539220571518, "learning_rate": 6.48208487977728e-05, "loss": 46.0078, "step": 3020 }, { "epoch": 0.41245136186770426, "grad_norm": 0.10341428220272064, "learning_rate": 6.480008047309008e-05, "loss": 46.0048, "step": 3021 }, { "epoch": 0.41258788995835893, "grad_norm": 0.036727167665958405, "learning_rate": 6.47793093493723e-05, "loss": 46.0026, "step": 3022 }, { "epoch": 0.4127244180490136, "grad_norm": 0.06123412400484085, "learning_rate": 6.475853543054774e-05, "loss": 46.0098, "step": 3023 }, { "epoch": 0.41286094613966823, "grad_norm": 0.05888576805591583, "learning_rate": 6.473775872054521e-05, "loss": 46.0028, "step": 3024 }, { "epoch": 0.4129974742303229, "grad_norm": 0.1471269130706787, "learning_rate": 6.471697922329411e-05, "loss": 46.009, "step": 3025 }, { "epoch": 0.4131340023209775, "grad_norm": 0.042687803506851196, "learning_rate": 6.469619694272426e-05, "loss": 46.0076, "step": 3026 }, { "epoch": 0.4132705304116322, "grad_norm": 0.17965491116046906, "learning_rate": 6.467541188276611e-05, "loss": 46.0051, "step": 3027 }, { "epoch": 0.4134070585022869, "grad_norm": 0.0807965099811554, "learning_rate": 6.465462404735057e-05, "loss": 46.0079, "step": 3028 }, { "epoch": 0.4135435865929415, "grad_norm": 0.0951194167137146, "learning_rate": 6.46338334404091e-05, "loss": 46.0067, "step": 3029 }, { "epoch": 0.41368011468359617, "grad_norm": 0.11773881316184998, "learning_rate": 6.461304006587371e-05, "loss": 46.0059, "step": 3030 }, { "epoch": 0.4138166427742508, "grad_norm": 0.07785610854625702, "learning_rate": 6.459224392767685e-05, "loss": 46.0131, "step": 3031 }, { "epoch": 0.41395317086490546, "grad_norm": 0.09976619482040405, "learning_rate": 6.45714450297516e-05, "loss": 46.0063, "step": 3032 }, { "epoch": 0.4140896989555601, "grad_norm": 0.12906283140182495, "learning_rate": 6.455064337603148e-05, "loss": 46.013, "step": 3033 }, { "epoch": 0.41422622704621476, "grad_norm": 0.038255833089351654, "learning_rate": 6.452983897045057e-05, "loss": 46.0075, "step": 3034 }, { "epoch": 0.41436275513686943, "grad_norm": 0.0717085748910904, "learning_rate": 6.450903181694345e-05, "loss": 46.0056, "step": 3035 }, { "epoch": 0.41449928322752405, "grad_norm": 0.06787427514791489, "learning_rate": 6.448822191944526e-05, "loss": 46.0009, "step": 3036 }, { "epoch": 0.4146358113181787, "grad_norm": 0.08207577466964722, "learning_rate": 6.446740928189159e-05, "loss": 46.015, "step": 3037 }, { "epoch": 0.41477233940883335, "grad_norm": 0.04329405725002289, "learning_rate": 6.444659390821861e-05, "loss": 46.0064, "step": 3038 }, { "epoch": 0.414908867499488, "grad_norm": 0.08540213108062744, "learning_rate": 6.442577580236298e-05, "loss": 46.0054, "step": 3039 }, { "epoch": 0.4150453955901427, "grad_norm": 0.04842311888933182, "learning_rate": 6.440495496826189e-05, "loss": 46.0023, "step": 3040 }, { "epoch": 0.4151819236807973, "grad_norm": 0.13656380772590637, "learning_rate": 6.438413140985301e-05, "loss": 46.0062, "step": 3041 }, { "epoch": 0.415318451771452, "grad_norm": 0.07023292034864426, "learning_rate": 6.43633051310746e-05, "loss": 46.0079, "step": 3042 }, { "epoch": 0.4154549798621066, "grad_norm": 0.03958439454436302, "learning_rate": 6.434247613586534e-05, "loss": 46.0001, "step": 3043 }, { "epoch": 0.4155915079527613, "grad_norm": 0.06279753148555756, "learning_rate": 6.432164442816451e-05, "loss": 46.0045, "step": 3044 }, { "epoch": 0.41572803604341596, "grad_norm": 0.2406947761774063, "learning_rate": 6.430081001191184e-05, "loss": 46.0027, "step": 3045 }, { "epoch": 0.4158645641340706, "grad_norm": 0.18376927077770233, "learning_rate": 6.427997289104763e-05, "loss": 46.0036, "step": 3046 }, { "epoch": 0.41600109222472526, "grad_norm": 0.04252389818429947, "learning_rate": 6.425913306951259e-05, "loss": 46.0021, "step": 3047 }, { "epoch": 0.4161376203153799, "grad_norm": 0.20044878125190735, "learning_rate": 6.423829055124806e-05, "loss": 46.0073, "step": 3048 }, { "epoch": 0.41627414840603455, "grad_norm": 0.3393474817276001, "learning_rate": 6.421744534019584e-05, "loss": 46.0063, "step": 3049 }, { "epoch": 0.41641067649668917, "grad_norm": 0.16257227957248688, "learning_rate": 6.419659744029822e-05, "loss": 46.0101, "step": 3050 }, { "epoch": 0.41654720458734384, "grad_norm": 0.07500612735748291, "learning_rate": 6.417574685549805e-05, "loss": 46.0101, "step": 3051 }, { "epoch": 0.4166837326779985, "grad_norm": 0.07293863594532013, "learning_rate": 6.415489358973859e-05, "loss": 46.0035, "step": 3052 }, { "epoch": 0.41682026076865314, "grad_norm": 0.05854949355125427, "learning_rate": 6.413403764696373e-05, "loss": 46.0032, "step": 3053 }, { "epoch": 0.4169567888593078, "grad_norm": 0.04135798662900925, "learning_rate": 6.411317903111779e-05, "loss": 46.0046, "step": 3054 }, { "epoch": 0.41709331694996243, "grad_norm": 0.0669059008359909, "learning_rate": 6.409231774614561e-05, "loss": 46.0029, "step": 3055 }, { "epoch": 0.4172298450406171, "grad_norm": 0.03054913505911827, "learning_rate": 6.407145379599253e-05, "loss": 46.0025, "step": 3056 }, { "epoch": 0.4173663731312718, "grad_norm": 0.031107984483242035, "learning_rate": 6.40505871846044e-05, "loss": 46.0007, "step": 3057 }, { "epoch": 0.4175029012219264, "grad_norm": 0.029462145641446114, "learning_rate": 6.402971791592763e-05, "loss": 46.0132, "step": 3058 }, { "epoch": 0.4176394293125811, "grad_norm": 0.03708043321967125, "learning_rate": 6.400884599390899e-05, "loss": 46.0031, "step": 3059 }, { "epoch": 0.4177759574032357, "grad_norm": 0.15843965113162994, "learning_rate": 6.39879714224959e-05, "loss": 46.0026, "step": 3060 }, { "epoch": 0.4179124854938904, "grad_norm": 0.06722952425479889, "learning_rate": 6.39670942056362e-05, "loss": 46.013, "step": 3061 }, { "epoch": 0.418049013584545, "grad_norm": 0.05922914296388626, "learning_rate": 6.394621434727828e-05, "loss": 46.006, "step": 3062 }, { "epoch": 0.41818554167519967, "grad_norm": 0.05466350540518761, "learning_rate": 6.392533185137094e-05, "loss": 46.0057, "step": 3063 }, { "epoch": 0.41832206976585434, "grad_norm": 0.1463405340909958, "learning_rate": 6.390444672186359e-05, "loss": 46.0041, "step": 3064 }, { "epoch": 0.41845859785650896, "grad_norm": 0.057675376534461975, "learning_rate": 6.388355896270608e-05, "loss": 46.0045, "step": 3065 }, { "epoch": 0.41859512594716364, "grad_norm": 0.08256033807992935, "learning_rate": 6.386266857784876e-05, "loss": 46.0061, "step": 3066 }, { "epoch": 0.41873165403781826, "grad_norm": 0.13502010703086853, "learning_rate": 6.384177557124247e-05, "loss": 46.0094, "step": 3067 }, { "epoch": 0.41886818212847293, "grad_norm": 0.13152672350406647, "learning_rate": 6.382087994683859e-05, "loss": 46.0082, "step": 3068 }, { "epoch": 0.4190047102191276, "grad_norm": 0.10914742201566696, "learning_rate": 6.379998170858892e-05, "loss": 46.0034, "step": 3069 }, { "epoch": 0.4191412383097822, "grad_norm": 0.10139483958482742, "learning_rate": 6.377908086044582e-05, "loss": 46.0062, "step": 3070 }, { "epoch": 0.4192777664004369, "grad_norm": 0.08345407992601395, "learning_rate": 6.37581774063621e-05, "loss": 46.004, "step": 3071 }, { "epoch": 0.4194142944910915, "grad_norm": 0.05705133080482483, "learning_rate": 6.373727135029113e-05, "loss": 46.0079, "step": 3072 }, { "epoch": 0.4195508225817462, "grad_norm": 0.08439817279577255, "learning_rate": 6.371636269618667e-05, "loss": 46.0058, "step": 3073 }, { "epoch": 0.41968735067240087, "grad_norm": 0.10786799341440201, "learning_rate": 6.369545144800308e-05, "loss": 46.0027, "step": 3074 }, { "epoch": 0.4198238787630555, "grad_norm": 0.10755147784948349, "learning_rate": 6.36745376096951e-05, "loss": 46.0002, "step": 3075 }, { "epoch": 0.41996040685371017, "grad_norm": 0.057623300701379776, "learning_rate": 6.365362118521807e-05, "loss": 46.0045, "step": 3076 }, { "epoch": 0.4200969349443648, "grad_norm": 0.043174561113119125, "learning_rate": 6.363270217852771e-05, "loss": 46.0028, "step": 3077 }, { "epoch": 0.42023346303501946, "grad_norm": 0.05158182978630066, "learning_rate": 6.361178059358035e-05, "loss": 46.0074, "step": 3078 }, { "epoch": 0.4203699911256741, "grad_norm": 0.0455855093896389, "learning_rate": 6.359085643433269e-05, "loss": 46.0007, "step": 3079 }, { "epoch": 0.42050651921632876, "grad_norm": 0.10318309813737869, "learning_rate": 6.3569929704742e-05, "loss": 46.009, "step": 3080 }, { "epoch": 0.42064304730698343, "grad_norm": 0.04826938733458519, "learning_rate": 6.3549000408766e-05, "loss": 46.0106, "step": 3081 }, { "epoch": 0.42077957539763805, "grad_norm": 0.06663958728313446, "learning_rate": 6.352806855036287e-05, "loss": 46.0019, "step": 3082 }, { "epoch": 0.4209161034882927, "grad_norm": 0.09227830916643143, "learning_rate": 6.350713413349134e-05, "loss": 46.0059, "step": 3083 }, { "epoch": 0.42105263157894735, "grad_norm": 0.09250567108392715, "learning_rate": 6.348619716211058e-05, "loss": 46.0095, "step": 3084 }, { "epoch": 0.421189159669602, "grad_norm": 0.1283983290195465, "learning_rate": 6.346525764018026e-05, "loss": 46.0054, "step": 3085 }, { "epoch": 0.4213256877602567, "grad_norm": 0.09813642501831055, "learning_rate": 6.344431557166049e-05, "loss": 46.0036, "step": 3086 }, { "epoch": 0.4214622158509113, "grad_norm": 0.09345485270023346, "learning_rate": 6.342337096051193e-05, "loss": 46.0013, "step": 3087 }, { "epoch": 0.421598743941566, "grad_norm": 0.18421253561973572, "learning_rate": 6.34024238106957e-05, "loss": 46.0064, "step": 3088 }, { "epoch": 0.4217352720322206, "grad_norm": 0.03930913656949997, "learning_rate": 6.338147412617332e-05, "loss": 46.0001, "step": 3089 }, { "epoch": 0.4218718001228753, "grad_norm": 0.0927978903055191, "learning_rate": 6.336052191090695e-05, "loss": 46.0069, "step": 3090 }, { "epoch": 0.42200832821352996, "grad_norm": 0.04483458772301674, "learning_rate": 6.333956716885905e-05, "loss": 46.0029, "step": 3091 }, { "epoch": 0.4221448563041846, "grad_norm": 0.07580871880054474, "learning_rate": 6.33186099039927e-05, "loss": 46.0032, "step": 3092 }, { "epoch": 0.42228138439483925, "grad_norm": 0.05554001033306122, "learning_rate": 6.329765012027135e-05, "loss": 46.0022, "step": 3093 }, { "epoch": 0.4224179124854939, "grad_norm": 0.08425886183977127, "learning_rate": 6.327668782165903e-05, "loss": 46.0017, "step": 3094 }, { "epoch": 0.42255444057614855, "grad_norm": 0.17459827661514282, "learning_rate": 6.325572301212016e-05, "loss": 46.0038, "step": 3095 }, { "epoch": 0.42269096866680317, "grad_norm": 0.14648029208183289, "learning_rate": 6.323475569561967e-05, "loss": 46.0004, "step": 3096 }, { "epoch": 0.42282749675745784, "grad_norm": 0.04045286774635315, "learning_rate": 6.321378587612297e-05, "loss": 46.0084, "step": 3097 }, { "epoch": 0.4229640248481125, "grad_norm": 0.10925066471099854, "learning_rate": 6.319281355759594e-05, "loss": 46.0038, "step": 3098 }, { "epoch": 0.42310055293876714, "grad_norm": 0.20921269059181213, "learning_rate": 6.317183874400491e-05, "loss": 46.0033, "step": 3099 }, { "epoch": 0.4232370810294218, "grad_norm": 0.13181813061237335, "learning_rate": 6.31508614393167e-05, "loss": 46.0, "step": 3100 }, { "epoch": 0.42337360912007643, "grad_norm": 0.04670752212405205, "learning_rate": 6.31298816474986e-05, "loss": 46.0103, "step": 3101 }, { "epoch": 0.4235101372107311, "grad_norm": 0.0360301174223423, "learning_rate": 6.310889937251839e-05, "loss": 46.003, "step": 3102 }, { "epoch": 0.4236466653013858, "grad_norm": 0.038068387657403946, "learning_rate": 6.308791461834426e-05, "loss": 46.001, "step": 3103 }, { "epoch": 0.4237831933920404, "grad_norm": 0.06321882456541061, "learning_rate": 6.306692738894496e-05, "loss": 46.0024, "step": 3104 }, { "epoch": 0.4239197214826951, "grad_norm": 0.05484837293624878, "learning_rate": 6.304593768828964e-05, "loss": 46.0127, "step": 3105 }, { "epoch": 0.4240562495733497, "grad_norm": 0.13911764323711395, "learning_rate": 6.30249455203479e-05, "loss": 46.0, "step": 3106 }, { "epoch": 0.4241927776640044, "grad_norm": 0.10663256794214249, "learning_rate": 6.300395088908987e-05, "loss": 46.0019, "step": 3107 }, { "epoch": 0.42432930575465905, "grad_norm": 0.09675727784633636, "learning_rate": 6.298295379848612e-05, "loss": 46.0039, "step": 3108 }, { "epoch": 0.42446583384531367, "grad_norm": 0.09626434743404388, "learning_rate": 6.296195425250768e-05, "loss": 46.0026, "step": 3109 }, { "epoch": 0.42460236193596834, "grad_norm": 0.07951164245605469, "learning_rate": 6.294095225512603e-05, "loss": 46.0041, "step": 3110 }, { "epoch": 0.42473889002662296, "grad_norm": 0.03175652027130127, "learning_rate": 6.291994781031316e-05, "loss": 46.0027, "step": 3111 }, { "epoch": 0.42487541811727764, "grad_norm": 0.07827149331569672, "learning_rate": 6.289894092204147e-05, "loss": 46.0093, "step": 3112 }, { "epoch": 0.42501194620793226, "grad_norm": 0.07771078497171402, "learning_rate": 6.287793159428384e-05, "loss": 46.0036, "step": 3113 }, { "epoch": 0.42514847429858693, "grad_norm": 0.06853289157152176, "learning_rate": 6.285691983101364e-05, "loss": 46.004, "step": 3114 }, { "epoch": 0.4252850023892416, "grad_norm": 0.04411051422357559, "learning_rate": 6.283590563620466e-05, "loss": 46.0061, "step": 3115 }, { "epoch": 0.4254215304798962, "grad_norm": 0.06752839684486389, "learning_rate": 6.281488901383116e-05, "loss": 46.0076, "step": 3116 }, { "epoch": 0.4255580585705509, "grad_norm": 0.08104309439659119, "learning_rate": 6.279386996786789e-05, "loss": 46.0059, "step": 3117 }, { "epoch": 0.4256945866612055, "grad_norm": 0.11394506692886353, "learning_rate": 6.277284850229001e-05, "loss": 46.0008, "step": 3118 }, { "epoch": 0.4258311147518602, "grad_norm": 0.08206288516521454, "learning_rate": 6.275182462107319e-05, "loss": 46.0079, "step": 3119 }, { "epoch": 0.42596764284251487, "grad_norm": 0.05211225524544716, "learning_rate": 6.273079832819349e-05, "loss": 46.0043, "step": 3120 }, { "epoch": 0.4261041709331695, "grad_norm": 0.09561647474765778, "learning_rate": 6.27097696276275e-05, "loss": 46.0036, "step": 3121 }, { "epoch": 0.42624069902382417, "grad_norm": 0.0986897274851799, "learning_rate": 6.26887385233522e-05, "loss": 46.0038, "step": 3122 }, { "epoch": 0.4263772271144788, "grad_norm": 0.07617656141519547, "learning_rate": 6.266770501934508e-05, "loss": 46.0117, "step": 3123 }, { "epoch": 0.42651375520513346, "grad_norm": 0.06046047806739807, "learning_rate": 6.264666911958404e-05, "loss": 46.0057, "step": 3124 }, { "epoch": 0.42665028329578814, "grad_norm": 0.16818149387836456, "learning_rate": 6.262563082804748e-05, "loss": 46.0061, "step": 3125 }, { "epoch": 0.42678681138644275, "grad_norm": 0.05400524288415909, "learning_rate": 6.260459014871418e-05, "loss": 46.0086, "step": 3126 }, { "epoch": 0.42692333947709743, "grad_norm": 0.08694793283939362, "learning_rate": 6.258354708556346e-05, "loss": 46.0048, "step": 3127 }, { "epoch": 0.42705986756775205, "grad_norm": 0.048912450671195984, "learning_rate": 6.256250164257503e-05, "loss": 46.0069, "step": 3128 }, { "epoch": 0.4271963956584067, "grad_norm": 0.10415376722812653, "learning_rate": 6.254145382372905e-05, "loss": 46.0075, "step": 3129 }, { "epoch": 0.42733292374906134, "grad_norm": 0.05751701444387436, "learning_rate": 6.252040363300616e-05, "loss": 46.0109, "step": 3130 }, { "epoch": 0.427469451839716, "grad_norm": 0.12857235968112946, "learning_rate": 6.249935107438743e-05, "loss": 46.0054, "step": 3131 }, { "epoch": 0.4276059799303707, "grad_norm": 0.08819173276424408, "learning_rate": 6.24782961518544e-05, "loss": 46.0007, "step": 3132 }, { "epoch": 0.4277425080210253, "grad_norm": 0.08147698640823364, "learning_rate": 6.245723886938902e-05, "loss": 46.0038, "step": 3133 }, { "epoch": 0.42787903611168, "grad_norm": 0.09769059717655182, "learning_rate": 6.243617923097371e-05, "loss": 46.014, "step": 3134 }, { "epoch": 0.4280155642023346, "grad_norm": 0.03360109031200409, "learning_rate": 6.241511724059133e-05, "loss": 46.0053, "step": 3135 }, { "epoch": 0.4281520922929893, "grad_norm": 0.16110147535800934, "learning_rate": 6.23940529022252e-05, "loss": 46.0029, "step": 3136 }, { "epoch": 0.42828862038364396, "grad_norm": 0.07278283685445786, "learning_rate": 6.237298621985905e-05, "loss": 46.0041, "step": 3137 }, { "epoch": 0.4284251484742986, "grad_norm": 0.038830436766147614, "learning_rate": 6.235191719747708e-05, "loss": 46.0, "step": 3138 }, { "epoch": 0.42856167656495325, "grad_norm": 0.09360076487064362, "learning_rate": 6.233084583906395e-05, "loss": 46.0009, "step": 3139 }, { "epoch": 0.4286982046556079, "grad_norm": 0.09371034055948257, "learning_rate": 6.230977214860467e-05, "loss": 46.0017, "step": 3140 }, { "epoch": 0.42883473274626255, "grad_norm": 0.06570795923471451, "learning_rate": 6.228869613008485e-05, "loss": 46.0053, "step": 3141 }, { "epoch": 0.4289712608369172, "grad_norm": 0.06049337610602379, "learning_rate": 6.226761778749037e-05, "loss": 46.0034, "step": 3142 }, { "epoch": 0.42910778892757184, "grad_norm": 0.0999312698841095, "learning_rate": 6.224653712480768e-05, "loss": 46.0027, "step": 3143 }, { "epoch": 0.4292443170182265, "grad_norm": 0.050478339195251465, "learning_rate": 6.222545414602358e-05, "loss": 46.0008, "step": 3144 }, { "epoch": 0.42938084510888114, "grad_norm": 0.0534159280359745, "learning_rate": 6.22043688551254e-05, "loss": 46.0035, "step": 3145 }, { "epoch": 0.4295173731995358, "grad_norm": 0.08422348648309708, "learning_rate": 6.218328125610079e-05, "loss": 46.0, "step": 3146 }, { "epoch": 0.42965390129019043, "grad_norm": 0.2510489225387573, "learning_rate": 6.216219135293794e-05, "loss": 46.0021, "step": 3147 }, { "epoch": 0.4297904293808451, "grad_norm": 0.3223000764846802, "learning_rate": 6.214109914962541e-05, "loss": 46.0014, "step": 3148 }, { "epoch": 0.4299269574714998, "grad_norm": 0.24452504515647888, "learning_rate": 6.212000465015225e-05, "loss": 46.0062, "step": 3149 }, { "epoch": 0.4300634855621544, "grad_norm": 0.15141169726848602, "learning_rate": 6.209890785850788e-05, "loss": 46.0, "step": 3150 }, { "epoch": 0.4302000136528091, "grad_norm": 0.06830636411905289, "learning_rate": 6.20778087786822e-05, "loss": 46.0032, "step": 3151 }, { "epoch": 0.4303365417434637, "grad_norm": 0.04586782678961754, "learning_rate": 6.205670741466555e-05, "loss": 46.0034, "step": 3152 }, { "epoch": 0.43047306983411837, "grad_norm": 0.04550708830356598, "learning_rate": 6.203560377044866e-05, "loss": 46.0001, "step": 3153 }, { "epoch": 0.43060959792477305, "grad_norm": 0.2034296840429306, "learning_rate": 6.201449785002272e-05, "loss": 46.0071, "step": 3154 }, { "epoch": 0.43074612601542767, "grad_norm": 0.1553216576576233, "learning_rate": 6.199338965737935e-05, "loss": 46.0067, "step": 3155 }, { "epoch": 0.43088265410608234, "grad_norm": 0.09652433544397354, "learning_rate": 6.197227919651057e-05, "loss": 46.0032, "step": 3156 }, { "epoch": 0.43101918219673696, "grad_norm": 0.12523505091667175, "learning_rate": 6.19511664714089e-05, "loss": 46.0027, "step": 3157 }, { "epoch": 0.43115571028739164, "grad_norm": 0.03402751684188843, "learning_rate": 6.193005148606717e-05, "loss": 46.0028, "step": 3158 }, { "epoch": 0.43129223837804626, "grad_norm": 0.16455547511577606, "learning_rate": 6.19089342444788e-05, "loss": 46.0031, "step": 3159 }, { "epoch": 0.43142876646870093, "grad_norm": 0.07008813321590424, "learning_rate": 6.188781475063747e-05, "loss": 46.0096, "step": 3160 }, { "epoch": 0.4315652945593556, "grad_norm": 0.09810718148946762, "learning_rate": 6.186669300853739e-05, "loss": 46.0104, "step": 3161 }, { "epoch": 0.4317018226500102, "grad_norm": 0.03928745165467262, "learning_rate": 6.184556902217317e-05, "loss": 46.01, "step": 3162 }, { "epoch": 0.4318383507406649, "grad_norm": 0.10421772301197052, "learning_rate": 6.182444279553983e-05, "loss": 46.0043, "step": 3163 }, { "epoch": 0.4319748788313195, "grad_norm": 0.07950572669506073, "learning_rate": 6.180331433263283e-05, "loss": 46.0038, "step": 3164 }, { "epoch": 0.4321114069219742, "grad_norm": 0.06618155539035797, "learning_rate": 6.178218363744806e-05, "loss": 46.0063, "step": 3165 }, { "epoch": 0.43224793501262887, "grad_norm": 0.04069308564066887, "learning_rate": 6.17610507139818e-05, "loss": 46.0037, "step": 3166 }, { "epoch": 0.4323844631032835, "grad_norm": 0.19080381095409393, "learning_rate": 6.173991556623077e-05, "loss": 46.0071, "step": 3167 }, { "epoch": 0.43252099119393816, "grad_norm": 0.1092572882771492, "learning_rate": 6.17187781981921e-05, "loss": 46.0028, "step": 3168 }, { "epoch": 0.4326575192845928, "grad_norm": 0.07284687459468842, "learning_rate": 6.169763861386341e-05, "loss": 46.0077, "step": 3169 }, { "epoch": 0.43279404737524746, "grad_norm": 0.1310311108827591, "learning_rate": 6.167649681724263e-05, "loss": 46.0117, "step": 3170 }, { "epoch": 0.43293057546590213, "grad_norm": 0.10845159739255905, "learning_rate": 6.165535281232817e-05, "loss": 46.0058, "step": 3171 }, { "epoch": 0.43306710355655675, "grad_norm": 0.0521695613861084, "learning_rate": 6.163420660311884e-05, "loss": 46.0039, "step": 3172 }, { "epoch": 0.43320363164721143, "grad_norm": 0.13437247276306152, "learning_rate": 6.16130581936139e-05, "loss": 46.0079, "step": 3173 }, { "epoch": 0.43334015973786605, "grad_norm": 0.033553533256053925, "learning_rate": 6.159190758781297e-05, "loss": 46.011, "step": 3174 }, { "epoch": 0.4334766878285207, "grad_norm": 0.0923759788274765, "learning_rate": 6.157075478971613e-05, "loss": 46.0006, "step": 3175 }, { "epoch": 0.43361321591917534, "grad_norm": 0.0593784861266613, "learning_rate": 6.154959980332386e-05, "loss": 46.0128, "step": 3176 }, { "epoch": 0.43374974400983, "grad_norm": 0.0765688568353653, "learning_rate": 6.152844263263704e-05, "loss": 46.0064, "step": 3177 }, { "epoch": 0.4338862721004847, "grad_norm": 0.03967902064323425, "learning_rate": 6.150728328165701e-05, "loss": 46.0013, "step": 3178 }, { "epoch": 0.4340228001911393, "grad_norm": 0.10994844138622284, "learning_rate": 6.148612175438544e-05, "loss": 46.0042, "step": 3179 }, { "epoch": 0.434159328281794, "grad_norm": 0.07339121401309967, "learning_rate": 6.146495805482451e-05, "loss": 46.002, "step": 3180 }, { "epoch": 0.4342958563724486, "grad_norm": 0.2033199518918991, "learning_rate": 6.144379218697672e-05, "loss": 46.0025, "step": 3181 }, { "epoch": 0.4344323844631033, "grad_norm": 0.07573425024747849, "learning_rate": 6.142262415484506e-05, "loss": 46.0032, "step": 3182 }, { "epoch": 0.43456891255375796, "grad_norm": 0.061398617923259735, "learning_rate": 6.140145396243286e-05, "loss": 46.0065, "step": 3183 }, { "epoch": 0.4347054406444126, "grad_norm": 0.031197747215628624, "learning_rate": 6.138028161374389e-05, "loss": 46.0099, "step": 3184 }, { "epoch": 0.43484196873506725, "grad_norm": 0.09142500907182693, "learning_rate": 6.135910711278237e-05, "loss": 46.0007, "step": 3185 }, { "epoch": 0.43497849682572187, "grad_norm": 0.1840081810951233, "learning_rate": 6.133793046355282e-05, "loss": 46.0051, "step": 3186 }, { "epoch": 0.43511502491637655, "grad_norm": 0.06781520694494247, "learning_rate": 6.13167516700603e-05, "loss": 46.005, "step": 3187 }, { "epoch": 0.4352515530070312, "grad_norm": 0.15074709057807922, "learning_rate": 6.129557073631013e-05, "loss": 46.0011, "step": 3188 }, { "epoch": 0.43538808109768584, "grad_norm": 0.08946449309587479, "learning_rate": 6.127438766630818e-05, "loss": 46.0003, "step": 3189 }, { "epoch": 0.4355246091883405, "grad_norm": 0.14158055186271667, "learning_rate": 6.12532024640606e-05, "loss": 46.0016, "step": 3190 }, { "epoch": 0.43566113727899514, "grad_norm": 0.13696853816509247, "learning_rate": 6.123201513357405e-05, "loss": 46.0067, "step": 3191 }, { "epoch": 0.4357976653696498, "grad_norm": 0.03912540525197983, "learning_rate": 6.121082567885549e-05, "loss": 46.0032, "step": 3192 }, { "epoch": 0.43593419346030443, "grad_norm": 0.06694703549146652, "learning_rate": 6.118963410391239e-05, "loss": 46.0044, "step": 3193 }, { "epoch": 0.4360707215509591, "grad_norm": 0.08239177614450455, "learning_rate": 6.11684404127525e-05, "loss": 46.0047, "step": 3194 }, { "epoch": 0.4362072496416138, "grad_norm": 0.05849479138851166, "learning_rate": 6.114724460938407e-05, "loss": 46.0014, "step": 3195 }, { "epoch": 0.4363437777322684, "grad_norm": 0.35454389452934265, "learning_rate": 6.112604669781572e-05, "loss": 46.0035, "step": 3196 }, { "epoch": 0.4364803058229231, "grad_norm": 0.12231047451496124, "learning_rate": 6.110484668205645e-05, "loss": 46.0029, "step": 3197 }, { "epoch": 0.4366168339135777, "grad_norm": 0.23862168192863464, "learning_rate": 6.108364456611566e-05, "loss": 46.0081, "step": 3198 }, { "epoch": 0.43675336200423237, "grad_norm": 0.19826661050319672, "learning_rate": 6.106244035400317e-05, "loss": 46.0031, "step": 3199 }, { "epoch": 0.43688989009488705, "grad_norm": 0.1390678435564041, "learning_rate": 6.104123404972918e-05, "loss": 46.0047, "step": 3200 }, { "epoch": 0.43702641818554167, "grad_norm": 0.05851922929286957, "learning_rate": 6.102002565730429e-05, "loss": 46.0067, "step": 3201 }, { "epoch": 0.43716294627619634, "grad_norm": 0.0998137816786766, "learning_rate": 6.0998815180739474e-05, "loss": 46.0046, "step": 3202 }, { "epoch": 0.43729947436685096, "grad_norm": 0.0628010705113411, "learning_rate": 6.097760262404617e-05, "loss": 46.0028, "step": 3203 }, { "epoch": 0.43743600245750563, "grad_norm": 0.030384881421923637, "learning_rate": 6.0956387991236094e-05, "loss": 46.0053, "step": 3204 }, { "epoch": 0.4375725305481603, "grad_norm": 0.037958431988954544, "learning_rate": 6.093517128632148e-05, "loss": 46.0009, "step": 3205 }, { "epoch": 0.43770905863881493, "grad_norm": 0.15799258649349213, "learning_rate": 6.091395251331486e-05, "loss": 46.0019, "step": 3206 }, { "epoch": 0.4378455867294696, "grad_norm": 0.07213957607746124, "learning_rate": 6.08927316762292e-05, "loss": 46.0025, "step": 3207 }, { "epoch": 0.4379821148201242, "grad_norm": 0.05441324785351753, "learning_rate": 6.0871508779077856e-05, "loss": 46.0029, "step": 3208 }, { "epoch": 0.4381186429107789, "grad_norm": 0.06127326190471649, "learning_rate": 6.085028382587454e-05, "loss": 46.0032, "step": 3209 }, { "epoch": 0.4382551710014335, "grad_norm": 0.2748855650424957, "learning_rate": 6.0829056820633413e-05, "loss": 46.0015, "step": 3210 }, { "epoch": 0.4383916990920882, "grad_norm": 0.050557781010866165, "learning_rate": 6.080782776736896e-05, "loss": 46.0049, "step": 3211 }, { "epoch": 0.43852822718274287, "grad_norm": 0.07262804359197617, "learning_rate": 6.0786596670096093e-05, "loss": 46.0027, "step": 3212 }, { "epoch": 0.4386647552733975, "grad_norm": 0.0333293192088604, "learning_rate": 6.076536353283011e-05, "loss": 46.0023, "step": 3213 }, { "epoch": 0.43880128336405216, "grad_norm": 0.0844803974032402, "learning_rate": 6.074412835958667e-05, "loss": 46.0056, "step": 3214 }, { "epoch": 0.4389378114547068, "grad_norm": 0.20421677827835083, "learning_rate": 6.0722891154381834e-05, "loss": 46.0153, "step": 3215 }, { "epoch": 0.43907433954536146, "grad_norm": 0.16153252124786377, "learning_rate": 6.070165192123205e-05, "loss": 46.0013, "step": 3216 }, { "epoch": 0.43921086763601613, "grad_norm": 0.0705636665225029, "learning_rate": 6.068041066415414e-05, "loss": 46.0011, "step": 3217 }, { "epoch": 0.43934739572667075, "grad_norm": 0.15467534959316254, "learning_rate": 6.065916738716533e-05, "loss": 46.0094, "step": 3218 }, { "epoch": 0.43948392381732543, "grad_norm": 0.07918302714824677, "learning_rate": 6.063792209428318e-05, "loss": 46.0037, "step": 3219 }, { "epoch": 0.43962045190798005, "grad_norm": 0.04068339243531227, "learning_rate": 6.06166747895257e-05, "loss": 46.0043, "step": 3220 }, { "epoch": 0.4397569799986347, "grad_norm": 0.05974968522787094, "learning_rate": 6.0595425476911195e-05, "loss": 46.0043, "step": 3221 }, { "epoch": 0.4398935080892894, "grad_norm": 0.06394850462675095, "learning_rate": 6.0574174160458455e-05, "loss": 46.0048, "step": 3222 }, { "epoch": 0.440030036179944, "grad_norm": 0.11815439909696579, "learning_rate": 6.0552920844186546e-05, "loss": 46.0052, "step": 3223 }, { "epoch": 0.4401665642705987, "grad_norm": 0.038392163813114166, "learning_rate": 6.053166553211499e-05, "loss": 46.0046, "step": 3224 }, { "epoch": 0.4403030923612533, "grad_norm": 0.13214096426963806, "learning_rate": 6.051040822826363e-05, "loss": 46.006, "step": 3225 }, { "epoch": 0.440439620451908, "grad_norm": 0.0326433964073658, "learning_rate": 6.048914893665272e-05, "loss": 46.0066, "step": 3226 }, { "epoch": 0.4405761485425626, "grad_norm": 0.08121295273303986, "learning_rate": 6.046788766130289e-05, "loss": 46.0051, "step": 3227 }, { "epoch": 0.4407126766332173, "grad_norm": 0.11382203549146652, "learning_rate": 6.044662440623512e-05, "loss": 46.004, "step": 3228 }, { "epoch": 0.44084920472387196, "grad_norm": 0.14298652112483978, "learning_rate": 6.04253591754708e-05, "loss": 46.0085, "step": 3229 }, { "epoch": 0.4409857328145266, "grad_norm": 0.0546637661755085, "learning_rate": 6.040409197303164e-05, "loss": 46.0017, "step": 3230 }, { "epoch": 0.44112226090518125, "grad_norm": 0.07963018864393234, "learning_rate": 6.038282280293979e-05, "loss": 46.0, "step": 3231 }, { "epoch": 0.44125878899583587, "grad_norm": 0.11989983916282654, "learning_rate": 6.036155166921772e-05, "loss": 46.0044, "step": 3232 }, { "epoch": 0.44139531708649055, "grad_norm": 0.07779297977685928, "learning_rate": 6.0340278575888284e-05, "loss": 46.0095, "step": 3233 }, { "epoch": 0.4415318451771452, "grad_norm": 0.05337800458073616, "learning_rate": 6.031900352697475e-05, "loss": 46.0033, "step": 3234 }, { "epoch": 0.44166837326779984, "grad_norm": 0.054361265152692795, "learning_rate": 6.029772652650065e-05, "loss": 46.0022, "step": 3235 }, { "epoch": 0.4418049013584545, "grad_norm": 0.14803533256053925, "learning_rate": 6.0276447578490035e-05, "loss": 46.0044, "step": 3236 }, { "epoch": 0.44194142944910914, "grad_norm": 0.07336097955703735, "learning_rate": 6.025516668696718e-05, "loss": 46.0079, "step": 3237 }, { "epoch": 0.4420779575397638, "grad_norm": 0.047431834042072296, "learning_rate": 6.023388385595683e-05, "loss": 46.0129, "step": 3238 }, { "epoch": 0.4422144856304185, "grad_norm": 0.10808273404836655, "learning_rate": 6.021259908948402e-05, "loss": 46.0052, "step": 3239 }, { "epoch": 0.4423510137210731, "grad_norm": 0.0437563993036747, "learning_rate": 6.019131239157423e-05, "loss": 46.0052, "step": 3240 }, { "epoch": 0.4424875418117278, "grad_norm": 0.15858805179595947, "learning_rate": 6.0170023766253215e-05, "loss": 46.0015, "step": 3241 }, { "epoch": 0.4426240699023824, "grad_norm": 0.143216073513031, "learning_rate": 6.0148733217547184e-05, "loss": 46.0038, "step": 3242 }, { "epoch": 0.4427605979930371, "grad_norm": 0.15097832679748535, "learning_rate": 6.012744074948264e-05, "loss": 46.0039, "step": 3243 }, { "epoch": 0.4428971260836917, "grad_norm": 0.06578706204891205, "learning_rate": 6.010614636608651e-05, "loss": 46.0036, "step": 3244 }, { "epoch": 0.44303365417434637, "grad_norm": 0.2825259864330292, "learning_rate": 6.008485007138602e-05, "loss": 46.0048, "step": 3245 }, { "epoch": 0.44317018226500104, "grad_norm": 0.04364576190710068, "learning_rate": 6.006355186940882e-05, "loss": 46.0042, "step": 3246 }, { "epoch": 0.44330671035565566, "grad_norm": 0.06883291900157928, "learning_rate": 6.004225176418286e-05, "loss": 46.0, "step": 3247 }, { "epoch": 0.44344323844631034, "grad_norm": 0.08209047466516495, "learning_rate": 6.002094975973648e-05, "loss": 46.0015, "step": 3248 }, { "epoch": 0.44357976653696496, "grad_norm": 0.32461410760879517, "learning_rate": 5.999964586009839e-05, "loss": 46.0082, "step": 3249 }, { "epoch": 0.44371629462761963, "grad_norm": 0.27143633365631104, "learning_rate": 5.9978340069297647e-05, "loss": 46.0013, "step": 3250 }, { "epoch": 0.4438528227182743, "grad_norm": 0.15770061314105988, "learning_rate": 5.995703239136364e-05, "loss": 46.0052, "step": 3251 }, { "epoch": 0.44398935080892893, "grad_norm": 0.11876748502254486, "learning_rate": 5.99357228303262e-05, "loss": 46.007, "step": 3252 }, { "epoch": 0.4441258788995836, "grad_norm": 0.040678370743989944, "learning_rate": 5.991441139021539e-05, "loss": 46.0014, "step": 3253 }, { "epoch": 0.4442624069902382, "grad_norm": 0.031607311218976974, "learning_rate": 5.989309807506174e-05, "loss": 46.0, "step": 3254 }, { "epoch": 0.4443989350808929, "grad_norm": 0.032480254769325256, "learning_rate": 5.9871782888896035e-05, "loss": 46.0016, "step": 3255 }, { "epoch": 0.4445354631715476, "grad_norm": 0.04399074241518974, "learning_rate": 5.985046583574952e-05, "loss": 46.0013, "step": 3256 }, { "epoch": 0.4446719912622022, "grad_norm": 0.0771164745092392, "learning_rate": 5.982914691965371e-05, "loss": 46.0054, "step": 3257 }, { "epoch": 0.44480851935285687, "grad_norm": 0.035902682691812515, "learning_rate": 5.980782614464051e-05, "loss": 46.0054, "step": 3258 }, { "epoch": 0.4449450474435115, "grad_norm": 0.12372922152280807, "learning_rate": 5.9786503514742175e-05, "loss": 46.0063, "step": 3259 }, { "epoch": 0.44508157553416616, "grad_norm": 0.13666968047618866, "learning_rate": 5.976517903399128e-05, "loss": 46.0005, "step": 3260 }, { "epoch": 0.4452181036248208, "grad_norm": 0.163943350315094, "learning_rate": 5.97438527064208e-05, "loss": 46.0099, "step": 3261 }, { "epoch": 0.44535463171547546, "grad_norm": 0.04138359799981117, "learning_rate": 5.972252453606402e-05, "loss": 46.0141, "step": 3262 }, { "epoch": 0.44549115980613013, "grad_norm": 0.05028345063328743, "learning_rate": 5.970119452695459e-05, "loss": 46.006, "step": 3263 }, { "epoch": 0.44562768789678475, "grad_norm": 0.11450722813606262, "learning_rate": 5.967986268312651e-05, "loss": 46.0002, "step": 3264 }, { "epoch": 0.4457642159874394, "grad_norm": 0.07472186535596848, "learning_rate": 5.965852900861409e-05, "loss": 46.0154, "step": 3265 }, { "epoch": 0.44590074407809405, "grad_norm": 0.0813026875257492, "learning_rate": 5.9637193507452085e-05, "loss": 46.0106, "step": 3266 }, { "epoch": 0.4460372721687487, "grad_norm": 0.05395880341529846, "learning_rate": 5.961585618367545e-05, "loss": 46.0014, "step": 3267 }, { "epoch": 0.4461738002594034, "grad_norm": 0.05749817192554474, "learning_rate": 5.959451704131962e-05, "loss": 46.0098, "step": 3268 }, { "epoch": 0.446310328350058, "grad_norm": 0.058427028357982635, "learning_rate": 5.957317608442028e-05, "loss": 46.0061, "step": 3269 }, { "epoch": 0.4464468564407127, "grad_norm": 0.13032928109169006, "learning_rate": 5.955183331701354e-05, "loss": 46.0011, "step": 3270 }, { "epoch": 0.4465833845313673, "grad_norm": 0.1015309989452362, "learning_rate": 5.9530488743135746e-05, "loss": 46.0091, "step": 3271 }, { "epoch": 0.446719912622022, "grad_norm": 0.04668082296848297, "learning_rate": 5.950914236682369e-05, "loss": 46.0109, "step": 3272 }, { "epoch": 0.4468564407126766, "grad_norm": 0.07465145736932755, "learning_rate": 5.948779419211445e-05, "loss": 46.0045, "step": 3273 }, { "epoch": 0.4469929688033313, "grad_norm": 0.07152850925922394, "learning_rate": 5.946644422304547e-05, "loss": 46.016, "step": 3274 }, { "epoch": 0.44712949689398596, "grad_norm": 0.13386327028274536, "learning_rate": 5.944509246365451e-05, "loss": 46.0067, "step": 3275 }, { "epoch": 0.4472660249846406, "grad_norm": 0.11008156836032867, "learning_rate": 5.942373891797966e-05, "loss": 46.0071, "step": 3276 }, { "epoch": 0.44740255307529525, "grad_norm": 0.13668060302734375, "learning_rate": 5.9402383590059386e-05, "loss": 46.0043, "step": 3277 }, { "epoch": 0.44753908116594987, "grad_norm": 0.09213408082723618, "learning_rate": 5.9381026483932465e-05, "loss": 46.0011, "step": 3278 }, { "epoch": 0.44767560925660455, "grad_norm": 0.10921097546815872, "learning_rate": 5.935966760363801e-05, "loss": 46.0065, "step": 3279 }, { "epoch": 0.4478121373472592, "grad_norm": 0.07381380349397659, "learning_rate": 5.93383069532155e-05, "loss": 46.0042, "step": 3280 }, { "epoch": 0.44794866543791384, "grad_norm": 0.1419496089220047, "learning_rate": 5.931694453670469e-05, "loss": 46.0081, "step": 3281 }, { "epoch": 0.4480851935285685, "grad_norm": 0.07644156366586685, "learning_rate": 5.9295580358145744e-05, "loss": 46.0042, "step": 3282 }, { "epoch": 0.44822172161922313, "grad_norm": 0.12819160521030426, "learning_rate": 5.9274214421579066e-05, "loss": 46.0102, "step": 3283 }, { "epoch": 0.4483582497098778, "grad_norm": 0.11327636986970901, "learning_rate": 5.9252846731045496e-05, "loss": 46.0142, "step": 3284 }, { "epoch": 0.4484947778005325, "grad_norm": 0.0905163362622261, "learning_rate": 5.9231477290586126e-05, "loss": 46.0062, "step": 3285 }, { "epoch": 0.4486313058911871, "grad_norm": 0.09323874115943909, "learning_rate": 5.921010610424243e-05, "loss": 46.0052, "step": 3286 }, { "epoch": 0.4487678339818418, "grad_norm": 0.068678118288517, "learning_rate": 5.9188733176056145e-05, "loss": 46.0018, "step": 3287 }, { "epoch": 0.4489043620724964, "grad_norm": 0.051968034356832504, "learning_rate": 5.916735851006943e-05, "loss": 46.0068, "step": 3288 }, { "epoch": 0.4490408901631511, "grad_norm": 0.04144961014389992, "learning_rate": 5.9145982110324717e-05, "loss": 46.0076, "step": 3289 }, { "epoch": 0.4491774182538057, "grad_norm": 0.07388057559728622, "learning_rate": 5.912460398086476e-05, "loss": 46.0084, "step": 3290 }, { "epoch": 0.44931394634446037, "grad_norm": 0.05416680872440338, "learning_rate": 5.910322412573265e-05, "loss": 46.0042, "step": 3291 }, { "epoch": 0.44945047443511504, "grad_norm": 0.04057887941598892, "learning_rate": 5.908184254897182e-05, "loss": 46.0089, "step": 3292 }, { "epoch": 0.44958700252576966, "grad_norm": 0.08227428793907166, "learning_rate": 5.9060459254626035e-05, "loss": 46.0042, "step": 3293 }, { "epoch": 0.44972353061642434, "grad_norm": 0.12047554552555084, "learning_rate": 5.9039074246739326e-05, "loss": 46.0032, "step": 3294 }, { "epoch": 0.44986005870707896, "grad_norm": 0.07840359210968018, "learning_rate": 5.901768752935613e-05, "loss": 46.0095, "step": 3295 }, { "epoch": 0.44999658679773363, "grad_norm": 0.1567053198814392, "learning_rate": 5.899629910652112e-05, "loss": 46.0048, "step": 3296 }, { "epoch": 0.4501331148883883, "grad_norm": 0.11564769595861435, "learning_rate": 5.8974908982279374e-05, "loss": 46.0029, "step": 3297 }, { "epoch": 0.45026964297904293, "grad_norm": 0.07763446122407913, "learning_rate": 5.8953517160676265e-05, "loss": 46.0098, "step": 3298 }, { "epoch": 0.4504061710696976, "grad_norm": 0.1639242172241211, "learning_rate": 5.893212364575743e-05, "loss": 46.0122, "step": 3299 }, { "epoch": 0.4505426991603522, "grad_norm": 0.2077317237854004, "learning_rate": 5.891072844156894e-05, "loss": 46.0049, "step": 3300 }, { "epoch": 0.4506792272510069, "grad_norm": 0.21527737379074097, "learning_rate": 5.8889331552157045e-05, "loss": 46.0146, "step": 3301 }, { "epoch": 0.4508157553416616, "grad_norm": 0.09228160232305527, "learning_rate": 5.8867932981568444e-05, "loss": 46.0009, "step": 3302 }, { "epoch": 0.4509522834323162, "grad_norm": 0.07258673012256622, "learning_rate": 5.884653273385008e-05, "loss": 46.0018, "step": 3303 }, { "epoch": 0.45108881152297087, "grad_norm": 0.1341945081949234, "learning_rate": 5.8825130813049236e-05, "loss": 46.0007, "step": 3304 }, { "epoch": 0.4512253396136255, "grad_norm": 0.18081152439117432, "learning_rate": 5.880372722321349e-05, "loss": 46.0071, "step": 3305 }, { "epoch": 0.45136186770428016, "grad_norm": 0.09528390318155289, "learning_rate": 5.8782321968390765e-05, "loss": 46.0016, "step": 3306 }, { "epoch": 0.4514983957949348, "grad_norm": 0.05644824728369713, "learning_rate": 5.87609150526293e-05, "loss": 46.0017, "step": 3307 }, { "epoch": 0.45163492388558946, "grad_norm": 0.0670633614063263, "learning_rate": 5.87395064799776e-05, "loss": 46.0051, "step": 3308 }, { "epoch": 0.45177145197624413, "grad_norm": 0.05378558486700058, "learning_rate": 5.871809625448454e-05, "loss": 46.0045, "step": 3309 }, { "epoch": 0.45190798006689875, "grad_norm": 0.07654604315757751, "learning_rate": 5.8696684380199296e-05, "loss": 46.0074, "step": 3310 }, { "epoch": 0.4520445081575534, "grad_norm": 0.11115776747465134, "learning_rate": 5.8675270861171325e-05, "loss": 46.002, "step": 3311 }, { "epoch": 0.45218103624820805, "grad_norm": 0.34480664134025574, "learning_rate": 5.865385570145042e-05, "loss": 46.0032, "step": 3312 }, { "epoch": 0.4523175643388627, "grad_norm": 0.055602651089429855, "learning_rate": 5.863243890508668e-05, "loss": 46.0084, "step": 3313 }, { "epoch": 0.4524540924295174, "grad_norm": 0.12936097383499146, "learning_rate": 5.861102047613052e-05, "loss": 46.0027, "step": 3314 }, { "epoch": 0.452590620520172, "grad_norm": 0.04736260697245598, "learning_rate": 5.8589600418632636e-05, "loss": 46.0059, "step": 3315 }, { "epoch": 0.4527271486108267, "grad_norm": 0.0468810610473156, "learning_rate": 5.856817873664409e-05, "loss": 46.0097, "step": 3316 }, { "epoch": 0.4528636767014813, "grad_norm": 0.13045214116573334, "learning_rate": 5.854675543421617e-05, "loss": 46.0046, "step": 3317 }, { "epoch": 0.453000204792136, "grad_norm": 0.11823493987321854, "learning_rate": 5.852533051540054e-05, "loss": 46.0074, "step": 3318 }, { "epoch": 0.45313673288279066, "grad_norm": 0.08374939858913422, "learning_rate": 5.850390398424916e-05, "loss": 46.0037, "step": 3319 }, { "epoch": 0.4532732609734453, "grad_norm": 0.07006404548883438, "learning_rate": 5.8482475844814235e-05, "loss": 46.0061, "step": 3320 }, { "epoch": 0.45340978906409996, "grad_norm": 0.11420948803424835, "learning_rate": 5.846104610114835e-05, "loss": 46.0053, "step": 3321 }, { "epoch": 0.4535463171547546, "grad_norm": 0.09238722175359726, "learning_rate": 5.843961475730436e-05, "loss": 46.0068, "step": 3322 }, { "epoch": 0.45368284524540925, "grad_norm": 0.06880855560302734, "learning_rate": 5.841818181733543e-05, "loss": 46.0067, "step": 3323 }, { "epoch": 0.45381937333606387, "grad_norm": 0.04617644101381302, "learning_rate": 5.8396747285294986e-05, "loss": 46.0047, "step": 3324 }, { "epoch": 0.45395590142671854, "grad_norm": 0.11385727673768997, "learning_rate": 5.837531116523682e-05, "loss": 46.0045, "step": 3325 }, { "epoch": 0.4540924295173732, "grad_norm": 0.08321409672498703, "learning_rate": 5.8353873461215e-05, "loss": 46.0063, "step": 3326 }, { "epoch": 0.45422895760802784, "grad_norm": 0.10459613800048828, "learning_rate": 5.833243417728387e-05, "loss": 46.0018, "step": 3327 }, { "epoch": 0.4543654856986825, "grad_norm": 0.06356633454561234, "learning_rate": 5.8310993317498095e-05, "loss": 46.006, "step": 3328 }, { "epoch": 0.45450201378933713, "grad_norm": 0.0779912993311882, "learning_rate": 5.8289550885912635e-05, "loss": 46.0074, "step": 3329 }, { "epoch": 0.4546385418799918, "grad_norm": 0.07653915137052536, "learning_rate": 5.8268106886582754e-05, "loss": 46.0021, "step": 3330 }, { "epoch": 0.4547750699706465, "grad_norm": 0.14351485669612885, "learning_rate": 5.824666132356399e-05, "loss": 46.0039, "step": 3331 }, { "epoch": 0.4549115980613011, "grad_norm": 0.03992318734526634, "learning_rate": 5.8225214200912195e-05, "loss": 46.0, "step": 3332 }, { "epoch": 0.4550481261519558, "grad_norm": 0.03906750679016113, "learning_rate": 5.820376552268354e-05, "loss": 46.004, "step": 3333 }, { "epoch": 0.4551846542426104, "grad_norm": 0.044312991201877594, "learning_rate": 5.8182315292934406e-05, "loss": 46.0012, "step": 3334 }, { "epoch": 0.4553211823332651, "grad_norm": 0.042077213525772095, "learning_rate": 5.8160863515721586e-05, "loss": 46.012, "step": 3335 }, { "epoch": 0.45545771042391975, "grad_norm": 0.046846937388181686, "learning_rate": 5.813941019510206e-05, "loss": 46.0053, "step": 3336 }, { "epoch": 0.45559423851457437, "grad_norm": 0.16154907643795013, "learning_rate": 5.811795533513318e-05, "loss": 46.0016, "step": 3337 }, { "epoch": 0.45573076660522904, "grad_norm": 0.06997659057378769, "learning_rate": 5.80964989398725e-05, "loss": 46.0017, "step": 3338 }, { "epoch": 0.45586729469588366, "grad_norm": 0.051572535187006, "learning_rate": 5.807504101337797e-05, "loss": 46.0046, "step": 3339 }, { "epoch": 0.45600382278653834, "grad_norm": 0.049550507217645645, "learning_rate": 5.805358155970775e-05, "loss": 46.0008, "step": 3340 }, { "epoch": 0.45614035087719296, "grad_norm": 0.17579501867294312, "learning_rate": 5.803212058292033e-05, "loss": 46.0012, "step": 3341 }, { "epoch": 0.45627687896784763, "grad_norm": 0.059525564312934875, "learning_rate": 5.801065808707444e-05, "loss": 46.005, "step": 3342 }, { "epoch": 0.4564134070585023, "grad_norm": 0.05696041136980057, "learning_rate": 5.798919407622918e-05, "loss": 46.0064, "step": 3343 }, { "epoch": 0.4565499351491569, "grad_norm": 0.045897193253040314, "learning_rate": 5.796772855444385e-05, "loss": 46.0042, "step": 3344 }, { "epoch": 0.4566864632398116, "grad_norm": 0.18403272330760956, "learning_rate": 5.794626152577809e-05, "loss": 46.0056, "step": 3345 }, { "epoch": 0.4568229913304662, "grad_norm": 0.28788232803344727, "learning_rate": 5.7924792994291796e-05, "loss": 46.008, "step": 3346 }, { "epoch": 0.4569595194211209, "grad_norm": 0.21291011571884155, "learning_rate": 5.790332296404517e-05, "loss": 46.0068, "step": 3347 }, { "epoch": 0.45709604751177557, "grad_norm": 0.059914883226156235, "learning_rate": 5.788185143909868e-05, "loss": 46.0085, "step": 3348 }, { "epoch": 0.4572325756024302, "grad_norm": 0.3102721869945526, "learning_rate": 5.786037842351312e-05, "loss": 46.0104, "step": 3349 }, { "epoch": 0.45736910369308487, "grad_norm": 0.11323566734790802, "learning_rate": 5.7838903921349464e-05, "loss": 46.0036, "step": 3350 }, { "epoch": 0.4575056317837395, "grad_norm": 0.06036747246980667, "learning_rate": 5.781742793666909e-05, "loss": 46.0051, "step": 3351 }, { "epoch": 0.45764215987439416, "grad_norm": 0.04581493139266968, "learning_rate": 5.779595047353357e-05, "loss": 46.0014, "step": 3352 }, { "epoch": 0.45777868796504884, "grad_norm": 0.22289352118968964, "learning_rate": 5.77744715360048e-05, "loss": 46.0062, "step": 3353 }, { "epoch": 0.45791521605570346, "grad_norm": 0.07708962261676788, "learning_rate": 5.775299112814493e-05, "loss": 46.0019, "step": 3354 }, { "epoch": 0.45805174414635813, "grad_norm": 0.07710837572813034, "learning_rate": 5.773150925401641e-05, "loss": 46.0017, "step": 3355 }, { "epoch": 0.45818827223701275, "grad_norm": 0.034776702523231506, "learning_rate": 5.7710025917681956e-05, "loss": 46.002, "step": 3356 }, { "epoch": 0.4583248003276674, "grad_norm": 0.07020708918571472, "learning_rate": 5.768854112320454e-05, "loss": 46.0006, "step": 3357 }, { "epoch": 0.45846132841832204, "grad_norm": 0.10019591450691223, "learning_rate": 5.766705487464745e-05, "loss": 46.0002, "step": 3358 }, { "epoch": 0.4585978565089767, "grad_norm": 0.08326965570449829, "learning_rate": 5.764556717607422e-05, "loss": 46.0053, "step": 3359 }, { "epoch": 0.4587343845996314, "grad_norm": 0.05549149215221405, "learning_rate": 5.762407803154868e-05, "loss": 46.0076, "step": 3360 }, { "epoch": 0.458870912690286, "grad_norm": 0.06828659772872925, "learning_rate": 5.76025874451349e-05, "loss": 46.0041, "step": 3361 }, { "epoch": 0.4590074407809407, "grad_norm": 0.08929409831762314, "learning_rate": 5.758109542089725e-05, "loss": 46.0087, "step": 3362 }, { "epoch": 0.4591439688715953, "grad_norm": 0.14556454122066498, "learning_rate": 5.7559601962900386e-05, "loss": 46.0015, "step": 3363 }, { "epoch": 0.45928049696225, "grad_norm": 0.12194705009460449, "learning_rate": 5.753810707520918e-05, "loss": 46.0075, "step": 3364 }, { "epoch": 0.45941702505290466, "grad_norm": 0.08698486536741257, "learning_rate": 5.751661076188885e-05, "loss": 46.0073, "step": 3365 }, { "epoch": 0.4595535531435593, "grad_norm": 0.09474481642246246, "learning_rate": 5.7495113027004786e-05, "loss": 46.0105, "step": 3366 }, { "epoch": 0.45969008123421395, "grad_norm": 0.06687586009502411, "learning_rate": 5.747361387462277e-05, "loss": 46.0064, "step": 3367 }, { "epoch": 0.4598266093248686, "grad_norm": 0.10021716356277466, "learning_rate": 5.745211330880872e-05, "loss": 46.0097, "step": 3368 }, { "epoch": 0.45996313741552325, "grad_norm": 0.0457107275724411, "learning_rate": 5.743061133362894e-05, "loss": 46.0036, "step": 3369 }, { "epoch": 0.46009966550617787, "grad_norm": 0.03703836351633072, "learning_rate": 5.7409107953149924e-05, "loss": 46.0095, "step": 3370 }, { "epoch": 0.46023619359683254, "grad_norm": 0.11224452406167984, "learning_rate": 5.738760317143845e-05, "loss": 46.0063, "step": 3371 }, { "epoch": 0.4603727216874872, "grad_norm": 0.20429962873458862, "learning_rate": 5.736609699256158e-05, "loss": 46.0043, "step": 3372 }, { "epoch": 0.46050924977814184, "grad_norm": 0.0694582387804985, "learning_rate": 5.734458942058662e-05, "loss": 46.001, "step": 3373 }, { "epoch": 0.4606457778687965, "grad_norm": 0.04724152013659477, "learning_rate": 5.7323080459581136e-05, "loss": 46.0096, "step": 3374 }, { "epoch": 0.46078230595945113, "grad_norm": 0.03322480618953705, "learning_rate": 5.730157011361298e-05, "loss": 46.0089, "step": 3375 }, { "epoch": 0.4609188340501058, "grad_norm": 0.10225646942853928, "learning_rate": 5.7280058386750255e-05, "loss": 46.0033, "step": 3376 }, { "epoch": 0.4610553621407605, "grad_norm": 0.06530465185642242, "learning_rate": 5.7258545283061305e-05, "loss": 46.0028, "step": 3377 }, { "epoch": 0.4611918902314151, "grad_norm": 0.05753616243600845, "learning_rate": 5.7237030806614754e-05, "loss": 46.0039, "step": 3378 }, { "epoch": 0.4613284183220698, "grad_norm": 0.06254931539297104, "learning_rate": 5.721551496147952e-05, "loss": 46.0087, "step": 3379 }, { "epoch": 0.4614649464127244, "grad_norm": 0.05898243933916092, "learning_rate": 5.7193997751724685e-05, "loss": 46.0037, "step": 3380 }, { "epoch": 0.4616014745033791, "grad_norm": 0.044909991323947906, "learning_rate": 5.71724791814197e-05, "loss": 46.0014, "step": 3381 }, { "epoch": 0.46173800259403375, "grad_norm": 0.048481374979019165, "learning_rate": 5.715095925463419e-05, "loss": 46.0067, "step": 3382 }, { "epoch": 0.46187453068468837, "grad_norm": 0.06316757202148438, "learning_rate": 5.7129437975438084e-05, "loss": 46.0119, "step": 3383 }, { "epoch": 0.46201105877534304, "grad_norm": 0.11995294690132141, "learning_rate": 5.710791534790152e-05, "loss": 46.0058, "step": 3384 }, { "epoch": 0.46214758686599766, "grad_norm": 0.1693667471408844, "learning_rate": 5.708639137609495e-05, "loss": 46.0058, "step": 3385 }, { "epoch": 0.46228411495665234, "grad_norm": 0.17885972559452057, "learning_rate": 5.706486606408905e-05, "loss": 46.0144, "step": 3386 }, { "epoch": 0.46242064304730696, "grad_norm": 0.042178910225629807, "learning_rate": 5.704333941595474e-05, "loss": 46.0038, "step": 3387 }, { "epoch": 0.46255717113796163, "grad_norm": 0.04877404496073723, "learning_rate": 5.702181143576323e-05, "loss": 46.0043, "step": 3388 }, { "epoch": 0.4626936992286163, "grad_norm": 0.07124482095241547, "learning_rate": 5.700028212758591e-05, "loss": 46.0063, "step": 3389 }, { "epoch": 0.4628302273192709, "grad_norm": 0.039469778537750244, "learning_rate": 5.69787514954945e-05, "loss": 46.0069, "step": 3390 }, { "epoch": 0.4629667554099256, "grad_norm": 0.044495053589344025, "learning_rate": 5.6957219543560927e-05, "loss": 46.0026, "step": 3391 }, { "epoch": 0.4631032835005802, "grad_norm": 0.03439701348543167, "learning_rate": 5.6935686275857366e-05, "loss": 46.0001, "step": 3392 }, { "epoch": 0.4632398115912349, "grad_norm": 0.08465097844600677, "learning_rate": 5.6914151696456284e-05, "loss": 46.0089, "step": 3393 }, { "epoch": 0.46337633968188957, "grad_norm": 0.09672936052083969, "learning_rate": 5.689261580943033e-05, "loss": 46.0015, "step": 3394 }, { "epoch": 0.4635128677725442, "grad_norm": 0.07851473987102509, "learning_rate": 5.687107861885247e-05, "loss": 46.0034, "step": 3395 }, { "epoch": 0.46364939586319887, "grad_norm": 0.037039730697870255, "learning_rate": 5.684954012879583e-05, "loss": 46.0049, "step": 3396 }, { "epoch": 0.4637859239538535, "grad_norm": 0.07489261031150818, "learning_rate": 5.68280003433339e-05, "loss": 46.0021, "step": 3397 }, { "epoch": 0.46392245204450816, "grad_norm": 0.16583040356636047, "learning_rate": 5.680645926654028e-05, "loss": 46.0064, "step": 3398 }, { "epoch": 0.46405898013516284, "grad_norm": 0.14989735186100006, "learning_rate": 5.678491690248893e-05, "loss": 46.0, "step": 3399 }, { "epoch": 0.46419550822581745, "grad_norm": 0.19811350107192993, "learning_rate": 5.676337325525398e-05, "loss": 46.0, "step": 3400 }, { "epoch": 0.46433203631647213, "grad_norm": 0.12381062656641006, "learning_rate": 5.674182832890984e-05, "loss": 46.001, "step": 3401 }, { "epoch": 0.46446856440712675, "grad_norm": 0.0415620282292366, "learning_rate": 5.6720282127531155e-05, "loss": 46.0023, "step": 3402 }, { "epoch": 0.4646050924977814, "grad_norm": 0.07577162235975266, "learning_rate": 5.669873465519281e-05, "loss": 46.0004, "step": 3403 }, { "epoch": 0.46474162058843604, "grad_norm": 0.050749942660331726, "learning_rate": 5.66771859159699e-05, "loss": 46.0013, "step": 3404 }, { "epoch": 0.4648781486790907, "grad_norm": 0.08133310824632645, "learning_rate": 5.665563591393781e-05, "loss": 46.003, "step": 3405 }, { "epoch": 0.4650146767697454, "grad_norm": 0.04965519532561302, "learning_rate": 5.6634084653172134e-05, "loss": 46.0059, "step": 3406 }, { "epoch": 0.4651512048604, "grad_norm": 0.05294590815901756, "learning_rate": 5.661253213774871e-05, "loss": 46.0002, "step": 3407 }, { "epoch": 0.4652877329510547, "grad_norm": 0.12944480776786804, "learning_rate": 5.659097837174362e-05, "loss": 46.0019, "step": 3408 }, { "epoch": 0.4654242610417093, "grad_norm": 0.06467145681381226, "learning_rate": 5.6569423359233155e-05, "loss": 46.0033, "step": 3409 }, { "epoch": 0.465560789132364, "grad_norm": 0.06355395168066025, "learning_rate": 5.6547867104293884e-05, "loss": 46.0057, "step": 3410 }, { "epoch": 0.46569731722301866, "grad_norm": 0.060215819627046585, "learning_rate": 5.6526309611002594e-05, "loss": 46.0047, "step": 3411 }, { "epoch": 0.4658338453136733, "grad_norm": 0.053166463971138, "learning_rate": 5.650475088343627e-05, "loss": 46.0064, "step": 3412 }, { "epoch": 0.46597037340432795, "grad_norm": 0.10819060355424881, "learning_rate": 5.6483190925672205e-05, "loss": 46.0035, "step": 3413 }, { "epoch": 0.4661069014949826, "grad_norm": 0.1827937513589859, "learning_rate": 5.6461629741787844e-05, "loss": 46.0095, "step": 3414 }, { "epoch": 0.46624342958563725, "grad_norm": 0.06127537786960602, "learning_rate": 5.644006733586092e-05, "loss": 46.0063, "step": 3415 }, { "epoch": 0.4663799576762919, "grad_norm": 0.0647561103105545, "learning_rate": 5.641850371196939e-05, "loss": 46.002, "step": 3416 }, { "epoch": 0.46651648576694654, "grad_norm": 0.07403590530157089, "learning_rate": 5.6396938874191405e-05, "loss": 46.0018, "step": 3417 }, { "epoch": 0.4666530138576012, "grad_norm": 0.06166885793209076, "learning_rate": 5.63753728266054e-05, "loss": 46.011, "step": 3418 }, { "epoch": 0.46678954194825584, "grad_norm": 0.1293756365776062, "learning_rate": 5.635380557328999e-05, "loss": 46.0118, "step": 3419 }, { "epoch": 0.4669260700389105, "grad_norm": 0.11807050555944443, "learning_rate": 5.633223711832403e-05, "loss": 46.0039, "step": 3420 }, { "epoch": 0.46706259812956513, "grad_norm": 0.12815074622631073, "learning_rate": 5.6310667465786636e-05, "loss": 46.0129, "step": 3421 }, { "epoch": 0.4671991262202198, "grad_norm": 0.062197133898735046, "learning_rate": 5.62890966197571e-05, "loss": 46.0082, "step": 3422 }, { "epoch": 0.4673356543108745, "grad_norm": 0.08948805928230286, "learning_rate": 5.626752458431498e-05, "loss": 46.0032, "step": 3423 }, { "epoch": 0.4674721824015291, "grad_norm": 0.05417512729763985, "learning_rate": 5.624595136354004e-05, "loss": 46.0064, "step": 3424 }, { "epoch": 0.4676087104921838, "grad_norm": 0.1392868608236313, "learning_rate": 5.622437696151227e-05, "loss": 46.0059, "step": 3425 }, { "epoch": 0.4677452385828384, "grad_norm": 0.11200509965419769, "learning_rate": 5.6202801382311884e-05, "loss": 46.0053, "step": 3426 }, { "epoch": 0.46788176667349307, "grad_norm": 0.14722104370594025, "learning_rate": 5.618122463001933e-05, "loss": 46.0093, "step": 3427 }, { "epoch": 0.46801829476414775, "grad_norm": 0.07754315435886383, "learning_rate": 5.615964670871524e-05, "loss": 46.0046, "step": 3428 }, { "epoch": 0.46815482285480237, "grad_norm": 0.06673520058393478, "learning_rate": 5.613806762248051e-05, "loss": 46.0014, "step": 3429 }, { "epoch": 0.46829135094545704, "grad_norm": 0.09075331687927246, "learning_rate": 5.6116487375396256e-05, "loss": 46.007, "step": 3430 }, { "epoch": 0.46842787903611166, "grad_norm": 0.06512121111154556, "learning_rate": 5.609490597154378e-05, "loss": 46.0022, "step": 3431 }, { "epoch": 0.46856440712676634, "grad_norm": 0.22880560159683228, "learning_rate": 5.607332341500463e-05, "loss": 46.0117, "step": 3432 }, { "epoch": 0.468700935217421, "grad_norm": 0.08297006040811539, "learning_rate": 5.605173970986054e-05, "loss": 46.0046, "step": 3433 }, { "epoch": 0.46883746330807563, "grad_norm": 0.2474154233932495, "learning_rate": 5.603015486019354e-05, "loss": 46.0041, "step": 3434 }, { "epoch": 0.4689739913987303, "grad_norm": 0.12391345947980881, "learning_rate": 5.6008568870085745e-05, "loss": 46.0026, "step": 3435 }, { "epoch": 0.4691105194893849, "grad_norm": 0.10368985682725906, "learning_rate": 5.5986981743619615e-05, "loss": 46.0086, "step": 3436 }, { "epoch": 0.4692470475800396, "grad_norm": 0.08877207338809967, "learning_rate": 5.596539348487777e-05, "loss": 46.009, "step": 3437 }, { "epoch": 0.4693835756706942, "grad_norm": 0.10317400842905045, "learning_rate": 5.594380409794302e-05, "loss": 46.0049, "step": 3438 }, { "epoch": 0.4695201037613489, "grad_norm": 0.11079995334148407, "learning_rate": 5.592221358689843e-05, "loss": 46.0039, "step": 3439 }, { "epoch": 0.46965663185200357, "grad_norm": 0.07853364199399948, "learning_rate": 5.590062195582725e-05, "loss": 46.0006, "step": 3440 }, { "epoch": 0.4697931599426582, "grad_norm": 0.08707218617200851, "learning_rate": 5.587902920881297e-05, "loss": 46.0047, "step": 3441 }, { "epoch": 0.46992968803331286, "grad_norm": 0.07376271486282349, "learning_rate": 5.5857435349939255e-05, "loss": 46.0018, "step": 3442 }, { "epoch": 0.4700662161239675, "grad_norm": 0.04806479066610336, "learning_rate": 5.583584038329002e-05, "loss": 46.0088, "step": 3443 }, { "epoch": 0.47020274421462216, "grad_norm": 0.15495692193508148, "learning_rate": 5.581424431294936e-05, "loss": 46.0067, "step": 3444 }, { "epoch": 0.47033927230527683, "grad_norm": 0.11935877054929733, "learning_rate": 5.5792647143001564e-05, "loss": 46.0036, "step": 3445 }, { "epoch": 0.47047580039593145, "grad_norm": 0.16231299936771393, "learning_rate": 5.5771048877531205e-05, "loss": 46.0002, "step": 3446 }, { "epoch": 0.47061232848658613, "grad_norm": 0.2458767145872116, "learning_rate": 5.5749449520622966e-05, "loss": 46.0078, "step": 3447 }, { "epoch": 0.47074885657724075, "grad_norm": 0.11026644706726074, "learning_rate": 5.57278490763618e-05, "loss": 46.0078, "step": 3448 }, { "epoch": 0.4708853846678954, "grad_norm": 0.18529583513736725, "learning_rate": 5.570624754883284e-05, "loss": 46.0027, "step": 3449 }, { "epoch": 0.4710219127585501, "grad_norm": 0.3792688548564911, "learning_rate": 5.568464494212145e-05, "loss": 46.0063, "step": 3450 }, { "epoch": 0.4711584408492047, "grad_norm": 0.08244206756353378, "learning_rate": 5.5663041260313145e-05, "loss": 46.0028, "step": 3451 }, { "epoch": 0.4712949689398594, "grad_norm": 0.11265075206756592, "learning_rate": 5.5641436507493696e-05, "loss": 46.0022, "step": 3452 }, { "epoch": 0.471431497030514, "grad_norm": 0.03704860806465149, "learning_rate": 5.561983068774907e-05, "loss": 46.0039, "step": 3453 }, { "epoch": 0.4715680251211687, "grad_norm": 0.09973981976509094, "learning_rate": 5.559822380516539e-05, "loss": 46.0013, "step": 3454 }, { "epoch": 0.4717045532118233, "grad_norm": 0.06526769697666168, "learning_rate": 5.557661586382906e-05, "loss": 46.0028, "step": 3455 }, { "epoch": 0.471841081302478, "grad_norm": 0.10562141239643097, "learning_rate": 5.55550068678266e-05, "loss": 46.0028, "step": 3456 }, { "epoch": 0.47197760939313266, "grad_norm": 0.10647348314523697, "learning_rate": 5.553339682124479e-05, "loss": 46.0004, "step": 3457 }, { "epoch": 0.4721141374837873, "grad_norm": 0.04144544154405594, "learning_rate": 5.551178572817056e-05, "loss": 46.0065, "step": 3458 }, { "epoch": 0.47225066557444195, "grad_norm": 0.06172046437859535, "learning_rate": 5.549017359269108e-05, "loss": 46.0024, "step": 3459 }, { "epoch": 0.47238719366509657, "grad_norm": 0.08402599394321442, "learning_rate": 5.546856041889373e-05, "loss": 46.0047, "step": 3460 }, { "epoch": 0.47252372175575125, "grad_norm": 0.07843755185604095, "learning_rate": 5.5446946210866005e-05, "loss": 46.0104, "step": 3461 }, { "epoch": 0.4726602498464059, "grad_norm": 0.0483468659222126, "learning_rate": 5.542533097269569e-05, "loss": 46.0036, "step": 3462 }, { "epoch": 0.47279677793706054, "grad_norm": 0.04057765752077103, "learning_rate": 5.540371470847068e-05, "loss": 46.0008, "step": 3463 }, { "epoch": 0.4729333060277152, "grad_norm": 0.1106705516576767, "learning_rate": 5.538209742227915e-05, "loss": 46.0059, "step": 3464 }, { "epoch": 0.47306983411836984, "grad_norm": 0.15035894513130188, "learning_rate": 5.5360479118209386e-05, "loss": 46.0057, "step": 3465 }, { "epoch": 0.4732063622090245, "grad_norm": 0.05420340597629547, "learning_rate": 5.5338859800349954e-05, "loss": 46.011, "step": 3466 }, { "epoch": 0.4733428902996792, "grad_norm": 0.12177227437496185, "learning_rate": 5.531723947278952e-05, "loss": 46.0022, "step": 3467 }, { "epoch": 0.4734794183903338, "grad_norm": 0.038663361221551895, "learning_rate": 5.5295618139617e-05, "loss": 46.0017, "step": 3468 }, { "epoch": 0.4736159464809885, "grad_norm": 0.11599582433700562, "learning_rate": 5.52739958049215e-05, "loss": 46.0105, "step": 3469 }, { "epoch": 0.4737524745716431, "grad_norm": 0.05642065778374672, "learning_rate": 5.5252372472792266e-05, "loss": 46.0055, "step": 3470 }, { "epoch": 0.4738890026622978, "grad_norm": 0.03084094449877739, "learning_rate": 5.52307481473188e-05, "loss": 46.0052, "step": 3471 }, { "epoch": 0.4740255307529524, "grad_norm": 0.03951660916209221, "learning_rate": 5.520912283259073e-05, "loss": 46.0025, "step": 3472 }, { "epoch": 0.47416205884360707, "grad_norm": 0.06974965333938599, "learning_rate": 5.518749653269794e-05, "loss": 46.007, "step": 3473 }, { "epoch": 0.47429858693426175, "grad_norm": 0.08274827152490616, "learning_rate": 5.516586925173041e-05, "loss": 46.0025, "step": 3474 }, { "epoch": 0.47443511502491637, "grad_norm": 0.07353687286376953, "learning_rate": 5.514424099377837e-05, "loss": 46.0103, "step": 3475 }, { "epoch": 0.47457164311557104, "grad_norm": 0.1796838790178299, "learning_rate": 5.512261176293226e-05, "loss": 46.0034, "step": 3476 }, { "epoch": 0.47470817120622566, "grad_norm": 0.07547491788864136, "learning_rate": 5.510098156328262e-05, "loss": 46.0016, "step": 3477 }, { "epoch": 0.47484469929688033, "grad_norm": 0.055620331317186356, "learning_rate": 5.507935039892024e-05, "loss": 46.0102, "step": 3478 }, { "epoch": 0.474981227387535, "grad_norm": 0.0339420884847641, "learning_rate": 5.5057718273936034e-05, "loss": 46.0038, "step": 3479 }, { "epoch": 0.47511775547818963, "grad_norm": 0.05348673462867737, "learning_rate": 5.503608519242119e-05, "loss": 46.0059, "step": 3480 }, { "epoch": 0.4752542835688443, "grad_norm": 0.10053804516792297, "learning_rate": 5.5014451158466975e-05, "loss": 46.0076, "step": 3481 }, { "epoch": 0.4753908116594989, "grad_norm": 0.045698560774326324, "learning_rate": 5.49928161761649e-05, "loss": 46.0006, "step": 3482 }, { "epoch": 0.4755273397501536, "grad_norm": 0.06016741693019867, "learning_rate": 5.4971180249606635e-05, "loss": 46.0054, "step": 3483 }, { "epoch": 0.4756638678408082, "grad_norm": 0.08454906195402145, "learning_rate": 5.4949543382884036e-05, "loss": 46.0076, "step": 3484 }, { "epoch": 0.4758003959314629, "grad_norm": 0.05342252179980278, "learning_rate": 5.4927905580089125e-05, "loss": 46.0024, "step": 3485 }, { "epoch": 0.47593692402211757, "grad_norm": 0.04546725004911423, "learning_rate": 5.4906266845314114e-05, "loss": 46.0043, "step": 3486 }, { "epoch": 0.4760734521127722, "grad_norm": 0.06786619871854782, "learning_rate": 5.488462718265137e-05, "loss": 46.0058, "step": 3487 }, { "epoch": 0.47620998020342686, "grad_norm": 0.059110481292009354, "learning_rate": 5.486298659619345e-05, "loss": 46.0043, "step": 3488 }, { "epoch": 0.4763465082940815, "grad_norm": 0.056047044694423676, "learning_rate": 5.48413450900331e-05, "loss": 46.0049, "step": 3489 }, { "epoch": 0.47648303638473616, "grad_norm": 0.07039384543895721, "learning_rate": 5.481970266826324e-05, "loss": 46.0081, "step": 3490 }, { "epoch": 0.47661956447539083, "grad_norm": 0.10172511637210846, "learning_rate": 5.4798059334976907e-05, "loss": 46.001, "step": 3491 }, { "epoch": 0.47675609256604545, "grad_norm": 0.09674578905105591, "learning_rate": 5.477641509426739e-05, "loss": 46.0041, "step": 3492 }, { "epoch": 0.47689262065670013, "grad_norm": 0.11460185050964355, "learning_rate": 5.475476995022808e-05, "loss": 46.0092, "step": 3493 }, { "epoch": 0.47702914874735475, "grad_norm": 0.08499687910079956, "learning_rate": 5.473312390695261e-05, "loss": 46.0003, "step": 3494 }, { "epoch": 0.4771656768380094, "grad_norm": 0.12212444096803665, "learning_rate": 5.4711476968534694e-05, "loss": 46.0013, "step": 3495 }, { "epoch": 0.4773022049286641, "grad_norm": 0.12452375888824463, "learning_rate": 5.468982913906831e-05, "loss": 46.0092, "step": 3496 }, { "epoch": 0.4774387330193187, "grad_norm": 0.2809508740901947, "learning_rate": 5.466818042264753e-05, "loss": 46.0045, "step": 3497 }, { "epoch": 0.4775752611099734, "grad_norm": 0.1229642704129219, "learning_rate": 5.464653082336665e-05, "loss": 46.0065, "step": 3498 }, { "epoch": 0.477711789200628, "grad_norm": 0.29394781589508057, "learning_rate": 5.4624880345320084e-05, "loss": 46.0025, "step": 3499 }, { "epoch": 0.4778483172912827, "grad_norm": 0.13535772264003754, "learning_rate": 5.460322899260245e-05, "loss": 46.0, "step": 3500 }, { "epoch": 0.4779848453819373, "grad_norm": 0.0758185088634491, "learning_rate": 5.458157676930849e-05, "loss": 46.0042, "step": 3501 }, { "epoch": 0.478121373472592, "grad_norm": 0.04973942041397095, "learning_rate": 5.4559923679533173e-05, "loss": 46.0013, "step": 3502 }, { "epoch": 0.47825790156324666, "grad_norm": 0.13067305088043213, "learning_rate": 5.453826972737158e-05, "loss": 46.0081, "step": 3503 }, { "epoch": 0.4783944296539013, "grad_norm": 0.13257314264774323, "learning_rate": 5.451661491691896e-05, "loss": 46.006, "step": 3504 }, { "epoch": 0.47853095774455595, "grad_norm": 0.04957200214266777, "learning_rate": 5.449495925227074e-05, "loss": 46.0009, "step": 3505 }, { "epoch": 0.47866748583521057, "grad_norm": 0.055356379598379135, "learning_rate": 5.4473302737522515e-05, "loss": 46.0078, "step": 3506 }, { "epoch": 0.47880401392586525, "grad_norm": 0.09314689040184021, "learning_rate": 5.445164537677001e-05, "loss": 46.0029, "step": 3507 }, { "epoch": 0.4789405420165199, "grad_norm": 0.12960022687911987, "learning_rate": 5.442998717410916e-05, "loss": 46.0007, "step": 3508 }, { "epoch": 0.47907707010717454, "grad_norm": 0.12836357951164246, "learning_rate": 5.440832813363599e-05, "loss": 46.0032, "step": 3509 }, { "epoch": 0.4792135981978292, "grad_norm": 0.07320880144834518, "learning_rate": 5.438666825944674e-05, "loss": 46.0046, "step": 3510 }, { "epoch": 0.47935012628848384, "grad_norm": 0.04292183741927147, "learning_rate": 5.436500755563778e-05, "loss": 46.006, "step": 3511 }, { "epoch": 0.4794866543791385, "grad_norm": 0.031541239470243454, "learning_rate": 5.4343346026305677e-05, "loss": 46.0061, "step": 3512 }, { "epoch": 0.4796231824697932, "grad_norm": 0.1751173883676529, "learning_rate": 5.4321683675547094e-05, "loss": 46.0041, "step": 3513 }, { "epoch": 0.4797597105604478, "grad_norm": 0.06905974447727203, "learning_rate": 5.4300020507458895e-05, "loss": 46.0073, "step": 3514 }, { "epoch": 0.4798962386511025, "grad_norm": 0.056492190808057785, "learning_rate": 5.427835652613808e-05, "loss": 46.0085, "step": 3515 }, { "epoch": 0.4800327667417571, "grad_norm": 0.13005191087722778, "learning_rate": 5.4256691735681786e-05, "loss": 46.0013, "step": 3516 }, { "epoch": 0.4801692948324118, "grad_norm": 0.07129677385091782, "learning_rate": 5.423502614018735e-05, "loss": 46.0033, "step": 3517 }, { "epoch": 0.4803058229230664, "grad_norm": 0.07490525394678116, "learning_rate": 5.4213359743752214e-05, "loss": 46.0003, "step": 3518 }, { "epoch": 0.48044235101372107, "grad_norm": 0.07456731796264648, "learning_rate": 5.4191692550473996e-05, "loss": 46.0088, "step": 3519 }, { "epoch": 0.48057887910437574, "grad_norm": 0.19726069271564484, "learning_rate": 5.417002456445046e-05, "loss": 46.0052, "step": 3520 }, { "epoch": 0.48071540719503036, "grad_norm": 0.07515542209148407, "learning_rate": 5.414835578977954e-05, "loss": 46.0081, "step": 3521 }, { "epoch": 0.48085193528568504, "grad_norm": 0.1313927322626114, "learning_rate": 5.4126686230559264e-05, "loss": 46.0038, "step": 3522 }, { "epoch": 0.48098846337633966, "grad_norm": 0.05808495357632637, "learning_rate": 5.410501589088785e-05, "loss": 46.0105, "step": 3523 }, { "epoch": 0.48112499146699433, "grad_norm": 0.12861758470535278, "learning_rate": 5.4083344774863685e-05, "loss": 46.0053, "step": 3524 }, { "epoch": 0.481261519557649, "grad_norm": 0.21699756383895874, "learning_rate": 5.406167288658526e-05, "loss": 46.0104, "step": 3525 }, { "epoch": 0.48139804764830363, "grad_norm": 0.1516313999891281, "learning_rate": 5.40400002301512e-05, "loss": 46.0021, "step": 3526 }, { "epoch": 0.4815345757389583, "grad_norm": 0.05236617848277092, "learning_rate": 5.4018326809660344e-05, "loss": 46.0069, "step": 3527 }, { "epoch": 0.4816711038296129, "grad_norm": 0.08200529217720032, "learning_rate": 5.399665262921161e-05, "loss": 46.0016, "step": 3528 }, { "epoch": 0.4818076319202676, "grad_norm": 0.1298283040523529, "learning_rate": 5.3974977692904096e-05, "loss": 46.0028, "step": 3529 }, { "epoch": 0.4819441600109223, "grad_norm": 0.06210097670555115, "learning_rate": 5.3953302004837e-05, "loss": 46.0076, "step": 3530 }, { "epoch": 0.4820806881015769, "grad_norm": 0.17126965522766113, "learning_rate": 5.3931625569109733e-05, "loss": 46.004, "step": 3531 }, { "epoch": 0.48221721619223157, "grad_norm": 0.06783267110586166, "learning_rate": 5.3909948389821776e-05, "loss": 46.0048, "step": 3532 }, { "epoch": 0.4823537442828862, "grad_norm": 0.03436407074332237, "learning_rate": 5.38882704710728e-05, "loss": 46.0028, "step": 3533 }, { "epoch": 0.48249027237354086, "grad_norm": 0.0684400349855423, "learning_rate": 5.3866591816962585e-05, "loss": 46.0059, "step": 3534 }, { "epoch": 0.4826268004641955, "grad_norm": 0.04203067347407341, "learning_rate": 5.384491243159108e-05, "loss": 46.0013, "step": 3535 }, { "epoch": 0.48276332855485016, "grad_norm": 0.0922812819480896, "learning_rate": 5.3823232319058335e-05, "loss": 46.003, "step": 3536 }, { "epoch": 0.48289985664550483, "grad_norm": 0.10877163708209991, "learning_rate": 5.380155148346456e-05, "loss": 46.0117, "step": 3537 }, { "epoch": 0.48303638473615945, "grad_norm": 0.20423027873039246, "learning_rate": 5.377986992891011e-05, "loss": 46.0009, "step": 3538 }, { "epoch": 0.4831729128268141, "grad_norm": 0.10351524502038956, "learning_rate": 5.375818765949546e-05, "loss": 46.0052, "step": 3539 }, { "epoch": 0.48330944091746875, "grad_norm": 0.0535518117249012, "learning_rate": 5.373650467932122e-05, "loss": 46.0018, "step": 3540 }, { "epoch": 0.4834459690081234, "grad_norm": 0.08592096716165543, "learning_rate": 5.3714820992488146e-05, "loss": 46.0038, "step": 3541 }, { "epoch": 0.4835824970987781, "grad_norm": 0.09139885753393173, "learning_rate": 5.36931366030971e-05, "loss": 46.0095, "step": 3542 }, { "epoch": 0.4837190251894327, "grad_norm": 0.13241876661777496, "learning_rate": 5.367145151524915e-05, "loss": 46.0003, "step": 3543 }, { "epoch": 0.4838555532800874, "grad_norm": 0.07193689048290253, "learning_rate": 5.364976573304538e-05, "loss": 46.0045, "step": 3544 }, { "epoch": 0.483992081370742, "grad_norm": 0.1909732073545456, "learning_rate": 5.3628079260587116e-05, "loss": 46.0064, "step": 3545 }, { "epoch": 0.4841286094613967, "grad_norm": 0.16899266839027405, "learning_rate": 5.3606392101975733e-05, "loss": 46.0089, "step": 3546 }, { "epoch": 0.48426513755205136, "grad_norm": 0.1323392540216446, "learning_rate": 5.358470426131282e-05, "loss": 46.0021, "step": 3547 }, { "epoch": 0.484401665642706, "grad_norm": 0.24131232500076294, "learning_rate": 5.3563015742699986e-05, "loss": 46.0012, "step": 3548 }, { "epoch": 0.48453819373336066, "grad_norm": 0.2516774535179138, "learning_rate": 5.354132655023907e-05, "loss": 46.0046, "step": 3549 }, { "epoch": 0.4846747218240153, "grad_norm": 0.1629866510629654, "learning_rate": 5.351963668803198e-05, "loss": 46.0017, "step": 3550 }, { "epoch": 0.48481124991466995, "grad_norm": 0.14269879460334778, "learning_rate": 5.349794616018077e-05, "loss": 46.007, "step": 3551 }, { "epoch": 0.48494777800532457, "grad_norm": 0.04561243951320648, "learning_rate": 5.347625497078762e-05, "loss": 46.0017, "step": 3552 }, { "epoch": 0.48508430609597925, "grad_norm": 0.06718702614307404, "learning_rate": 5.345456312395483e-05, "loss": 46.0035, "step": 3553 }, { "epoch": 0.4852208341866339, "grad_norm": 0.052944742143154144, "learning_rate": 5.3432870623784816e-05, "loss": 46.0039, "step": 3554 }, { "epoch": 0.48535736227728854, "grad_norm": 0.056514982134103775, "learning_rate": 5.3411177474380136e-05, "loss": 46.0019, "step": 3555 }, { "epoch": 0.4854938903679432, "grad_norm": 0.24464833736419678, "learning_rate": 5.3389483679843464e-05, "loss": 46.0055, "step": 3556 }, { "epoch": 0.48563041845859783, "grad_norm": 0.044937603175640106, "learning_rate": 5.336778924427761e-05, "loss": 46.0012, "step": 3557 }, { "epoch": 0.4857669465492525, "grad_norm": 0.06341525912284851, "learning_rate": 5.3346094171785455e-05, "loss": 46.0011, "step": 3558 }, { "epoch": 0.4859034746399072, "grad_norm": 0.09076950699090958, "learning_rate": 5.3324398466470085e-05, "loss": 46.0078, "step": 3559 }, { "epoch": 0.4860400027305618, "grad_norm": 0.12276820838451385, "learning_rate": 5.330270213243459e-05, "loss": 46.0015, "step": 3560 }, { "epoch": 0.4861765308212165, "grad_norm": 0.0631285235285759, "learning_rate": 5.3281005173782315e-05, "loss": 46.0065, "step": 3561 }, { "epoch": 0.4863130589118711, "grad_norm": 0.06628759950399399, "learning_rate": 5.325930759461659e-05, "loss": 46.0073, "step": 3562 }, { "epoch": 0.4864495870025258, "grad_norm": 0.08841874450445175, "learning_rate": 5.323760939904097e-05, "loss": 46.0027, "step": 3563 }, { "epoch": 0.48658611509318045, "grad_norm": 0.03499993681907654, "learning_rate": 5.321591059115906e-05, "loss": 46.0111, "step": 3564 }, { "epoch": 0.48672264318383507, "grad_norm": 0.11569716036319733, "learning_rate": 5.319421117507462e-05, "loss": 46.008, "step": 3565 }, { "epoch": 0.48685917127448974, "grad_norm": 0.10003436356782913, "learning_rate": 5.317251115489148e-05, "loss": 46.0099, "step": 3566 }, { "epoch": 0.48699569936514436, "grad_norm": 0.09441227465867996, "learning_rate": 5.3150810534713644e-05, "loss": 46.0072, "step": 3567 }, { "epoch": 0.48713222745579904, "grad_norm": 0.06614168733358383, "learning_rate": 5.312910931864518e-05, "loss": 46.0007, "step": 3568 }, { "epoch": 0.48726875554645366, "grad_norm": 0.10540804266929626, "learning_rate": 5.310740751079029e-05, "loss": 46.0096, "step": 3569 }, { "epoch": 0.48740528363710833, "grad_norm": 0.08330143243074417, "learning_rate": 5.308570511525327e-05, "loss": 46.0045, "step": 3570 }, { "epoch": 0.487541811727763, "grad_norm": 0.06861492246389389, "learning_rate": 5.306400213613857e-05, "loss": 46.0101, "step": 3571 }, { "epoch": 0.4876783398184176, "grad_norm": 0.1035468727350235, "learning_rate": 5.3042298577550696e-05, "loss": 46.007, "step": 3572 }, { "epoch": 0.4878148679090723, "grad_norm": 0.04352625831961632, "learning_rate": 5.302059444359432e-05, "loss": 46.009, "step": 3573 }, { "epoch": 0.4879513959997269, "grad_norm": 0.043714653700590134, "learning_rate": 5.2998889738374146e-05, "loss": 46.0102, "step": 3574 }, { "epoch": 0.4880879240903816, "grad_norm": 0.2240709364414215, "learning_rate": 5.2977184465995075e-05, "loss": 46.0042, "step": 3575 }, { "epoch": 0.4882244521810363, "grad_norm": 0.04049823433160782, "learning_rate": 5.295547863056205e-05, "loss": 46.0034, "step": 3576 }, { "epoch": 0.4883609802716909, "grad_norm": 0.05111071094870567, "learning_rate": 5.2933772236180156e-05, "loss": 46.0064, "step": 3577 }, { "epoch": 0.48849750836234557, "grad_norm": 0.05824568122625351, "learning_rate": 5.291206528695455e-05, "loss": 46.0071, "step": 3578 }, { "epoch": 0.4886340364530002, "grad_norm": 0.1414150893688202, "learning_rate": 5.289035778699053e-05, "loss": 46.0042, "step": 3579 }, { "epoch": 0.48877056454365486, "grad_norm": 0.06070927157998085, "learning_rate": 5.2868649740393496e-05, "loss": 46.0123, "step": 3580 }, { "epoch": 0.48890709263430954, "grad_norm": 0.08039873093366623, "learning_rate": 5.28469411512689e-05, "loss": 46.0032, "step": 3581 }, { "epoch": 0.48904362072496416, "grad_norm": 0.0741550624370575, "learning_rate": 5.282523202372238e-05, "loss": 46.0032, "step": 3582 }, { "epoch": 0.48918014881561883, "grad_norm": 0.1459955871105194, "learning_rate": 5.2803522361859594e-05, "loss": 46.0016, "step": 3583 }, { "epoch": 0.48931667690627345, "grad_norm": 0.114018514752388, "learning_rate": 5.278181216978636e-05, "loss": 46.0012, "step": 3584 }, { "epoch": 0.4894532049969281, "grad_norm": 0.04026031494140625, "learning_rate": 5.276010145160856e-05, "loss": 46.0077, "step": 3585 }, { "epoch": 0.48958973308758275, "grad_norm": 0.09525207430124283, "learning_rate": 5.273839021143218e-05, "loss": 46.0007, "step": 3586 }, { "epoch": 0.4897262611782374, "grad_norm": 0.1343512386083603, "learning_rate": 5.2716678453363356e-05, "loss": 46.0014, "step": 3587 }, { "epoch": 0.4898627892688921, "grad_norm": 0.04020600765943527, "learning_rate": 5.2694966181508234e-05, "loss": 46.0128, "step": 3588 }, { "epoch": 0.4899993173595467, "grad_norm": 0.04094218835234642, "learning_rate": 5.267325339997313e-05, "loss": 46.0045, "step": 3589 }, { "epoch": 0.4901358454502014, "grad_norm": 0.0719633623957634, "learning_rate": 5.2651540112864415e-05, "loss": 46.0079, "step": 3590 }, { "epoch": 0.490272373540856, "grad_norm": 0.030265411362051964, "learning_rate": 5.262982632428859e-05, "loss": 46.0018, "step": 3591 }, { "epoch": 0.4904089016315107, "grad_norm": 0.06513111293315887, "learning_rate": 5.2608112038352196e-05, "loss": 46.0017, "step": 3592 }, { "epoch": 0.49054542972216536, "grad_norm": 0.07720500975847244, "learning_rate": 5.258639725916195e-05, "loss": 46.0023, "step": 3593 }, { "epoch": 0.49068195781282, "grad_norm": 0.05949806794524193, "learning_rate": 5.256468199082457e-05, "loss": 46.0064, "step": 3594 }, { "epoch": 0.49081848590347465, "grad_norm": 0.26642534136772156, "learning_rate": 5.2542966237446946e-05, "loss": 46.0021, "step": 3595 }, { "epoch": 0.4909550139941293, "grad_norm": 0.20386172831058502, "learning_rate": 5.2521250003136005e-05, "loss": 46.008, "step": 3596 }, { "epoch": 0.49109154208478395, "grad_norm": 0.14512427151203156, "learning_rate": 5.249953329199879e-05, "loss": 46.0086, "step": 3597 }, { "epoch": 0.49122807017543857, "grad_norm": 0.12544819712638855, "learning_rate": 5.247781610814243e-05, "loss": 46.0017, "step": 3598 }, { "epoch": 0.49136459826609324, "grad_norm": 0.25164473056793213, "learning_rate": 5.245609845567415e-05, "loss": 46.0022, "step": 3599 }, { "epoch": 0.4915011263567479, "grad_norm": 0.21715201437473297, "learning_rate": 5.243438033870126e-05, "loss": 46.0038, "step": 3600 }, { "epoch": 0.49163765444740254, "grad_norm": 0.11968432366847992, "learning_rate": 5.241266176133114e-05, "loss": 46.0082, "step": 3601 }, { "epoch": 0.4917741825380572, "grad_norm": 0.04493282735347748, "learning_rate": 5.2390942727671275e-05, "loss": 46.001, "step": 3602 }, { "epoch": 0.49191071062871183, "grad_norm": 0.039406634867191315, "learning_rate": 5.236922324182924e-05, "loss": 46.0061, "step": 3603 }, { "epoch": 0.4920472387193665, "grad_norm": 0.09218931198120117, "learning_rate": 5.234750330791268e-05, "loss": 46.0046, "step": 3604 }, { "epoch": 0.4921837668100212, "grad_norm": 0.06850703060626984, "learning_rate": 5.2325782930029346e-05, "loss": 46.0047, "step": 3605 }, { "epoch": 0.4923202949006758, "grad_norm": 0.08794685453176498, "learning_rate": 5.230406211228703e-05, "loss": 46.0052, "step": 3606 }, { "epoch": 0.4924568229913305, "grad_norm": 0.06021621823310852, "learning_rate": 5.22823408587937e-05, "loss": 46.0051, "step": 3607 }, { "epoch": 0.4925933510819851, "grad_norm": 0.0990615114569664, "learning_rate": 5.226061917365726e-05, "loss": 46.0025, "step": 3608 }, { "epoch": 0.4927298791726398, "grad_norm": 0.0501224547624588, "learning_rate": 5.223889706098586e-05, "loss": 46.0035, "step": 3609 }, { "epoch": 0.49286640726329445, "grad_norm": 0.0523495227098465, "learning_rate": 5.221717452488759e-05, "loss": 46.0032, "step": 3610 }, { "epoch": 0.49300293535394907, "grad_norm": 0.03997698798775673, "learning_rate": 5.2195451569470724e-05, "loss": 46.0055, "step": 3611 }, { "epoch": 0.49313946344460374, "grad_norm": 0.09860397130250931, "learning_rate": 5.217372819884353e-05, "loss": 46.0039, "step": 3612 }, { "epoch": 0.49327599153525836, "grad_norm": 0.07921923696994781, "learning_rate": 5.215200441711443e-05, "loss": 46.0037, "step": 3613 }, { "epoch": 0.49341251962591304, "grad_norm": 0.053434304893016815, "learning_rate": 5.213028022839188e-05, "loss": 46.0076, "step": 3614 }, { "epoch": 0.49354904771656766, "grad_norm": 0.03921204060316086, "learning_rate": 5.2108555636784416e-05, "loss": 46.0083, "step": 3615 }, { "epoch": 0.49368557580722233, "grad_norm": 0.14185796678066254, "learning_rate": 5.2086830646400664e-05, "loss": 46.0084, "step": 3616 }, { "epoch": 0.493822103897877, "grad_norm": 0.0708821713924408, "learning_rate": 5.206510526134931e-05, "loss": 46.0115, "step": 3617 }, { "epoch": 0.4939586319885316, "grad_norm": 0.06737960875034332, "learning_rate": 5.204337948573913e-05, "loss": 46.0045, "step": 3618 }, { "epoch": 0.4940951600791863, "grad_norm": 0.037902384996414185, "learning_rate": 5.202165332367898e-05, "loss": 46.0026, "step": 3619 }, { "epoch": 0.4942316881698409, "grad_norm": 0.09915512055158615, "learning_rate": 5.199992677927775e-05, "loss": 46.0152, "step": 3620 }, { "epoch": 0.4943682162604956, "grad_norm": 0.03299998492002487, "learning_rate": 5.197819985664444e-05, "loss": 46.0045, "step": 3621 }, { "epoch": 0.49450474435115027, "grad_norm": 0.12359783798456192, "learning_rate": 5.1956472559888114e-05, "loss": 46.0031, "step": 3622 }, { "epoch": 0.4946412724418049, "grad_norm": 0.03179977089166641, "learning_rate": 5.193474489311789e-05, "loss": 46.0031, "step": 3623 }, { "epoch": 0.49477780053245957, "grad_norm": 0.14937672019004822, "learning_rate": 5.191301686044301e-05, "loss": 46.0016, "step": 3624 }, { "epoch": 0.4949143286231142, "grad_norm": 0.10305608063936234, "learning_rate": 5.1891288465972675e-05, "loss": 46.0091, "step": 3625 }, { "epoch": 0.49505085671376886, "grad_norm": 0.12923555076122284, "learning_rate": 5.18695597138163e-05, "loss": 46.0049, "step": 3626 }, { "epoch": 0.49518738480442354, "grad_norm": 0.035760920494794846, "learning_rate": 5.1847830608083215e-05, "loss": 46.0049, "step": 3627 }, { "epoch": 0.49532391289507816, "grad_norm": 0.14157888293266296, "learning_rate": 5.182610115288295e-05, "loss": 46.0029, "step": 3628 }, { "epoch": 0.49546044098573283, "grad_norm": 0.11995163559913635, "learning_rate": 5.180437135232501e-05, "loss": 46.0031, "step": 3629 }, { "epoch": 0.49559696907638745, "grad_norm": 0.09753424674272537, "learning_rate": 5.178264121051902e-05, "loss": 46.0037, "step": 3630 }, { "epoch": 0.4957334971670421, "grad_norm": 0.10328710824251175, "learning_rate": 5.1760910731574654e-05, "loss": 46.0086, "step": 3631 }, { "epoch": 0.49587002525769674, "grad_norm": 0.048373326659202576, "learning_rate": 5.1739179919601624e-05, "loss": 46.0008, "step": 3632 }, { "epoch": 0.4960065533483514, "grad_norm": 0.0665678083896637, "learning_rate": 5.171744877870973e-05, "loss": 46.0047, "step": 3633 }, { "epoch": 0.4961430814390061, "grad_norm": 0.035323966294527054, "learning_rate": 5.169571731300884e-05, "loss": 46.0079, "step": 3634 }, { "epoch": 0.4962796095296607, "grad_norm": 0.060418158769607544, "learning_rate": 5.167398552660886e-05, "loss": 46.0103, "step": 3635 }, { "epoch": 0.4964161376203154, "grad_norm": 0.0762789249420166, "learning_rate": 5.1652253423619776e-05, "loss": 46.0007, "step": 3636 }, { "epoch": 0.49655266571097, "grad_norm": 0.06681987643241882, "learning_rate": 5.163052100815163e-05, "loss": 46.0021, "step": 3637 }, { "epoch": 0.4966891938016247, "grad_norm": 0.04651793837547302, "learning_rate": 5.160878828431454e-05, "loss": 46.0025, "step": 3638 }, { "epoch": 0.49682572189227936, "grad_norm": 0.04734274744987488, "learning_rate": 5.1587055256218606e-05, "loss": 46.0026, "step": 3639 }, { "epoch": 0.496962249982934, "grad_norm": 0.06592410802841187, "learning_rate": 5.156532192797411e-05, "loss": 46.0042, "step": 3640 }, { "epoch": 0.49709877807358865, "grad_norm": 0.11769154667854309, "learning_rate": 5.154358830369127e-05, "loss": 46.0127, "step": 3641 }, { "epoch": 0.4972353061642433, "grad_norm": 0.04620181769132614, "learning_rate": 5.152185438748045e-05, "loss": 46.0037, "step": 3642 }, { "epoch": 0.49737183425489795, "grad_norm": 0.12203779816627502, "learning_rate": 5.150012018345201e-05, "loss": 46.0022, "step": 3643 }, { "epoch": 0.4975083623455526, "grad_norm": 0.12124486267566681, "learning_rate": 5.147838569571641e-05, "loss": 46.0029, "step": 3644 }, { "epoch": 0.49764489043620724, "grad_norm": 0.13150478899478912, "learning_rate": 5.145665092838411e-05, "loss": 46.0002, "step": 3645 }, { "epoch": 0.4977814185268619, "grad_norm": 0.067418672144413, "learning_rate": 5.1434915885565674e-05, "loss": 46.0042, "step": 3646 }, { "epoch": 0.49791794661751654, "grad_norm": 0.19239819049835205, "learning_rate": 5.14131805713717e-05, "loss": 46.0003, "step": 3647 }, { "epoch": 0.4980544747081712, "grad_norm": 0.15669293701648712, "learning_rate": 5.139144498991282e-05, "loss": 46.0, "step": 3648 }, { "epoch": 0.49819100279882583, "grad_norm": 0.2672114968299866, "learning_rate": 5.136970914529975e-05, "loss": 46.0018, "step": 3649 }, { "epoch": 0.4983275308894805, "grad_norm": 0.28681567311286926, "learning_rate": 5.134797304164321e-05, "loss": 46.0, "step": 3650 }, { "epoch": 0.4984640589801352, "grad_norm": 0.06476181000471115, "learning_rate": 5.132623668305404e-05, "loss": 46.0078, "step": 3651 }, { "epoch": 0.4986005870707898, "grad_norm": 0.12151657789945602, "learning_rate": 5.130450007364305e-05, "loss": 46.006, "step": 3652 }, { "epoch": 0.4987371151614445, "grad_norm": 0.14722934365272522, "learning_rate": 5.1282763217521126e-05, "loss": 46.0028, "step": 3653 }, { "epoch": 0.4988736432520991, "grad_norm": 0.040940482169389725, "learning_rate": 5.126102611879924e-05, "loss": 46.0009, "step": 3654 }, { "epoch": 0.49901017134275377, "grad_norm": 0.05079694092273712, "learning_rate": 5.123928878158833e-05, "loss": 46.0038, "step": 3655 }, { "epoch": 0.49914669943340845, "grad_norm": 0.04259675741195679, "learning_rate": 5.121755120999949e-05, "loss": 46.0, "step": 3656 }, { "epoch": 0.49928322752406307, "grad_norm": 0.14459188282489777, "learning_rate": 5.1195813408143724e-05, "loss": 46.0019, "step": 3657 }, { "epoch": 0.49941975561471774, "grad_norm": 0.10318051278591156, "learning_rate": 5.117407538013221e-05, "loss": 46.0024, "step": 3658 }, { "epoch": 0.49955628370537236, "grad_norm": 0.11508560180664062, "learning_rate": 5.115233713007607e-05, "loss": 46.0007, "step": 3659 }, { "epoch": 0.49969281179602704, "grad_norm": 0.0846799910068512, "learning_rate": 5.113059866208652e-05, "loss": 46.0041, "step": 3660 }, { "epoch": 0.4998293398866817, "grad_norm": 0.14619475603103638, "learning_rate": 5.110885998027479e-05, "loss": 46.0033, "step": 3661 }, { "epoch": 0.49996586797733633, "grad_norm": 0.07055540382862091, "learning_rate": 5.108712108875218e-05, "loss": 46.0013, "step": 3662 }, { "epoch": 0.500102396067991, "grad_norm": 0.15471559762954712, "learning_rate": 5.106538199163001e-05, "loss": 46.0093, "step": 3663 }, { "epoch": 0.5002389241586457, "grad_norm": 0.05544060096144676, "learning_rate": 5.1043642693019645e-05, "loss": 46.005, "step": 3664 }, { "epoch": 0.5003754522493002, "grad_norm": 0.08635144680738449, "learning_rate": 5.102190319703247e-05, "loss": 46.008, "step": 3665 }, { "epoch": 0.5005119803399549, "grad_norm": 0.06720244139432907, "learning_rate": 5.1000163507779954e-05, "loss": 46.0061, "step": 3666 }, { "epoch": 0.5006485084306096, "grad_norm": 0.09719010442495346, "learning_rate": 5.097842362937353e-05, "loss": 46.0049, "step": 3667 }, { "epoch": 0.5007850365212643, "grad_norm": 0.04524730518460274, "learning_rate": 5.095668356592475e-05, "loss": 46.0022, "step": 3668 }, { "epoch": 0.500921564611919, "grad_norm": 0.15217161178588867, "learning_rate": 5.0934943321545115e-05, "loss": 46.0021, "step": 3669 }, { "epoch": 0.5010580927025735, "grad_norm": 0.08644964545965195, "learning_rate": 5.091320290034625e-05, "loss": 46.0024, "step": 3670 }, { "epoch": 0.5011946207932282, "grad_norm": 0.13262514770030975, "learning_rate": 5.089146230643973e-05, "loss": 46.0054, "step": 3671 }, { "epoch": 0.5013311488838829, "grad_norm": 0.05330755189061165, "learning_rate": 5.086972154393723e-05, "loss": 46.0063, "step": 3672 }, { "epoch": 0.5014676769745375, "grad_norm": 0.08629577606916428, "learning_rate": 5.084798061695041e-05, "loss": 46.0074, "step": 3673 }, { "epoch": 0.5016042050651922, "grad_norm": 0.06772720813751221, "learning_rate": 5.082623952959099e-05, "loss": 46.0041, "step": 3674 }, { "epoch": 0.5017407331558468, "grad_norm": 0.15478529036045074, "learning_rate": 5.080449828597068e-05, "loss": 46.0059, "step": 3675 }, { "epoch": 0.5018772612465014, "grad_norm": 0.14051242172718048, "learning_rate": 5.078275689020129e-05, "loss": 46.0026, "step": 3676 }, { "epoch": 0.5020137893371561, "grad_norm": 0.11585818231105804, "learning_rate": 5.076101534639459e-05, "loss": 46.0079, "step": 3677 }, { "epoch": 0.5021503174278108, "grad_norm": 0.05447547510266304, "learning_rate": 5.073927365866241e-05, "loss": 46.0082, "step": 3678 }, { "epoch": 0.5022868455184655, "grad_norm": 0.03876263275742531, "learning_rate": 5.071753183111662e-05, "loss": 46.0036, "step": 3679 }, { "epoch": 0.50242337360912, "grad_norm": 0.04659855738282204, "learning_rate": 5.069578986786907e-05, "loss": 46.0078, "step": 3680 }, { "epoch": 0.5025599016997747, "grad_norm": 0.06535707414150238, "learning_rate": 5.067404777303169e-05, "loss": 46.0001, "step": 3681 }, { "epoch": 0.5026964297904294, "grad_norm": 0.1265409290790558, "learning_rate": 5.0652305550716384e-05, "loss": 46.0057, "step": 3682 }, { "epoch": 0.5028329578810841, "grad_norm": 0.037594474852085114, "learning_rate": 5.063056320503511e-05, "loss": 46.0032, "step": 3683 }, { "epoch": 0.5029694859717387, "grad_norm": 0.16348282992839813, "learning_rate": 5.060882074009988e-05, "loss": 46.0029, "step": 3684 }, { "epoch": 0.5031060140623933, "grad_norm": 0.14721862971782684, "learning_rate": 5.058707816002265e-05, "loss": 46.0051, "step": 3685 }, { "epoch": 0.503242542153048, "grad_norm": 0.034867506474256516, "learning_rate": 5.056533546891548e-05, "loss": 46.006, "step": 3686 }, { "epoch": 0.5033790702437027, "grad_norm": 0.07183868438005447, "learning_rate": 5.054359267089036e-05, "loss": 46.002, "step": 3687 }, { "epoch": 0.5035155983343573, "grad_norm": 0.13470180332660675, "learning_rate": 5.052184977005941e-05, "loss": 46.002, "step": 3688 }, { "epoch": 0.5036521264250119, "grad_norm": 0.09768860042095184, "learning_rate": 5.0500106770534675e-05, "loss": 46.0022, "step": 3689 }, { "epoch": 0.5037886545156666, "grad_norm": 0.08463192731142044, "learning_rate": 5.047836367642827e-05, "loss": 46.0114, "step": 3690 }, { "epoch": 0.5039251826063212, "grad_norm": 0.06346849352121353, "learning_rate": 5.045662049185229e-05, "loss": 46.0053, "step": 3691 }, { "epoch": 0.5040617106969759, "grad_norm": 0.04599376767873764, "learning_rate": 5.043487722091891e-05, "loss": 46.0023, "step": 3692 }, { "epoch": 0.5041982387876306, "grad_norm": 0.05925873667001724, "learning_rate": 5.041313386774026e-05, "loss": 46.0034, "step": 3693 }, { "epoch": 0.5043347668782852, "grad_norm": 0.05503217130899429, "learning_rate": 5.03913904364285e-05, "loss": 46.0089, "step": 3694 }, { "epoch": 0.5044712949689398, "grad_norm": 0.10560446232557297, "learning_rate": 5.036964693109583e-05, "loss": 46.0, "step": 3695 }, { "epoch": 0.5046078230595945, "grad_norm": 0.24623654782772064, "learning_rate": 5.034790335585443e-05, "loss": 46.0031, "step": 3696 }, { "epoch": 0.5047443511502492, "grad_norm": 0.14909818768501282, "learning_rate": 5.0326159714816526e-05, "loss": 46.0032, "step": 3697 }, { "epoch": 0.5048808792409039, "grad_norm": 0.16826170682907104, "learning_rate": 5.030441601209431e-05, "loss": 46.0013, "step": 3698 }, { "epoch": 0.5050174073315584, "grad_norm": 0.23511923849582672, "learning_rate": 5.0282672251800055e-05, "loss": 46.0014, "step": 3699 }, { "epoch": 0.5051539354222131, "grad_norm": 0.11384458839893341, "learning_rate": 5.0260928438045984e-05, "loss": 46.0015, "step": 3700 }, { "epoch": 0.5052904635128678, "grad_norm": 0.098232701420784, "learning_rate": 5.023918457494433e-05, "loss": 46.0142, "step": 3701 }, { "epoch": 0.5054269916035224, "grad_norm": 0.04126046970486641, "learning_rate": 5.0217440666607415e-05, "loss": 46.0021, "step": 3702 }, { "epoch": 0.5055635196941771, "grad_norm": 0.09445569664239883, "learning_rate": 5.019569671714746e-05, "loss": 46.0019, "step": 3703 }, { "epoch": 0.5057000477848317, "grad_norm": 0.04119783639907837, "learning_rate": 5.017395273067676e-05, "loss": 46.0054, "step": 3704 }, { "epoch": 0.5058365758754864, "grad_norm": 0.051249273121356964, "learning_rate": 5.01522087113076e-05, "loss": 46.0034, "step": 3705 }, { "epoch": 0.505973103966141, "grad_norm": 0.04213196039199829, "learning_rate": 5.0130464663152285e-05, "loss": 46.0014, "step": 3706 }, { "epoch": 0.5061096320567957, "grad_norm": 0.03572355955839157, "learning_rate": 5.010872059032311e-05, "loss": 46.0028, "step": 3707 }, { "epoch": 0.5062461601474504, "grad_norm": 0.04816088452935219, "learning_rate": 5.008697649693237e-05, "loss": 46.0016, "step": 3708 }, { "epoch": 0.506382688238105, "grad_norm": 0.11648207902908325, "learning_rate": 5.0065232387092374e-05, "loss": 46.0069, "step": 3709 }, { "epoch": 0.5065192163287596, "grad_norm": 0.05503849312663078, "learning_rate": 5.004348826491545e-05, "loss": 46.0044, "step": 3710 }, { "epoch": 0.5066557444194143, "grad_norm": 0.05017152801156044, "learning_rate": 5.002174413451388e-05, "loss": 46.0068, "step": 3711 }, { "epoch": 0.506792272510069, "grad_norm": 0.07609308511018753, "learning_rate": 5e-05, "loss": 46.0042, "step": 3712 }, { "epoch": 0.5069288006007237, "grad_norm": 0.03873499110341072, "learning_rate": 4.997825586548613e-05, "loss": 46.0056, "step": 3713 }, { "epoch": 0.5070653286913782, "grad_norm": 0.09029056876897812, "learning_rate": 4.995651173508455e-05, "loss": 46.0125, "step": 3714 }, { "epoch": 0.5072018567820329, "grad_norm": 0.0990789532661438, "learning_rate": 4.993476761290763e-05, "loss": 46.0112, "step": 3715 }, { "epoch": 0.5073383848726876, "grad_norm": 0.04144856333732605, "learning_rate": 4.9913023503067644e-05, "loss": 46.0026, "step": 3716 }, { "epoch": 0.5074749129633422, "grad_norm": 0.11823198944330215, "learning_rate": 4.98912794096769e-05, "loss": 46.0061, "step": 3717 }, { "epoch": 0.5076114410539969, "grad_norm": 0.058694206178188324, "learning_rate": 4.9869535336847713e-05, "loss": 46.0022, "step": 3718 }, { "epoch": 0.5077479691446515, "grad_norm": 0.08802681416273117, "learning_rate": 4.9847791288692406e-05, "loss": 46.0028, "step": 3719 }, { "epoch": 0.5078844972353062, "grad_norm": 0.07600174099206924, "learning_rate": 4.9826047269323255e-05, "loss": 46.0031, "step": 3720 }, { "epoch": 0.5080210253259608, "grad_norm": 0.04282359406352043, "learning_rate": 4.9804303282852546e-05, "loss": 46.0074, "step": 3721 }, { "epoch": 0.5081575534166155, "grad_norm": 0.08084993809461594, "learning_rate": 4.9782559333392596e-05, "loss": 46.0031, "step": 3722 }, { "epoch": 0.5082940815072701, "grad_norm": 0.0965765118598938, "learning_rate": 4.976081542505568e-05, "loss": 46.0074, "step": 3723 }, { "epoch": 0.5084306095979247, "grad_norm": 0.10648202151060104, "learning_rate": 4.973907156195404e-05, "loss": 46.0057, "step": 3724 }, { "epoch": 0.5085671376885794, "grad_norm": 0.08598223328590393, "learning_rate": 4.971732774819996e-05, "loss": 46.003, "step": 3725 }, { "epoch": 0.5087036657792341, "grad_norm": 0.08926679193973541, "learning_rate": 4.96955839879057e-05, "loss": 46.0086, "step": 3726 }, { "epoch": 0.5088401938698888, "grad_norm": 0.10481219738721848, "learning_rate": 4.967384028518349e-05, "loss": 46.0106, "step": 3727 }, { "epoch": 0.5089767219605433, "grad_norm": 0.12186281383037567, "learning_rate": 4.9652096644145565e-05, "loss": 46.0038, "step": 3728 }, { "epoch": 0.509113250051198, "grad_norm": 0.04719223454594612, "learning_rate": 4.963035306890418e-05, "loss": 46.0152, "step": 3729 }, { "epoch": 0.5092497781418527, "grad_norm": 0.03489864990115166, "learning_rate": 4.9608609563571504e-05, "loss": 46.0, "step": 3730 }, { "epoch": 0.5093863062325074, "grad_norm": 0.15576374530792236, "learning_rate": 4.9586866132259754e-05, "loss": 46.0001, "step": 3731 }, { "epoch": 0.509522834323162, "grad_norm": 0.05441977083683014, "learning_rate": 4.956512277908109e-05, "loss": 46.0084, "step": 3732 }, { "epoch": 0.5096593624138166, "grad_norm": 0.10401179641485214, "learning_rate": 4.9543379508147716e-05, "loss": 46.0043, "step": 3733 }, { "epoch": 0.5097958905044713, "grad_norm": 0.10340963304042816, "learning_rate": 4.952163632357175e-05, "loss": 46.0004, "step": 3734 }, { "epoch": 0.509932418595126, "grad_norm": 0.07392320781946182, "learning_rate": 4.949989322946533e-05, "loss": 46.003, "step": 3735 }, { "epoch": 0.5100689466857806, "grad_norm": 0.10232871025800705, "learning_rate": 4.94781502299406e-05, "loss": 46.0084, "step": 3736 }, { "epoch": 0.5102054747764353, "grad_norm": 0.08975447714328766, "learning_rate": 4.9456407329109646e-05, "loss": 46.0067, "step": 3737 }, { "epoch": 0.5103420028670899, "grad_norm": 0.037376437336206436, "learning_rate": 4.9434664531084546e-05, "loss": 46.0023, "step": 3738 }, { "epoch": 0.5104785309577445, "grad_norm": 0.11168858408927917, "learning_rate": 4.941292183997735e-05, "loss": 46.0049, "step": 3739 }, { "epoch": 0.5106150590483992, "grad_norm": 0.08416692167520523, "learning_rate": 4.9391179259900125e-05, "loss": 46.0026, "step": 3740 }, { "epoch": 0.5107515871390539, "grad_norm": 0.06264391541481018, "learning_rate": 4.93694367949649e-05, "loss": 46.0085, "step": 3741 }, { "epoch": 0.5108881152297086, "grad_norm": 0.040007736533880234, "learning_rate": 4.9347694449283614e-05, "loss": 46.0046, "step": 3742 }, { "epoch": 0.5110246433203631, "grad_norm": 0.09096915274858475, "learning_rate": 4.932595222696833e-05, "loss": 46.001, "step": 3743 }, { "epoch": 0.5111611714110178, "grad_norm": 0.06033296138048172, "learning_rate": 4.930421013213094e-05, "loss": 46.0033, "step": 3744 }, { "epoch": 0.5112976995016725, "grad_norm": 0.04162440821528435, "learning_rate": 4.928246816888341e-05, "loss": 46.0, "step": 3745 }, { "epoch": 0.5114342275923272, "grad_norm": 0.08708584308624268, "learning_rate": 4.926072634133758e-05, "loss": 46.004, "step": 3746 }, { "epoch": 0.5115707556829818, "grad_norm": 0.2741992473602295, "learning_rate": 4.923898465360542e-05, "loss": 46.0051, "step": 3747 }, { "epoch": 0.5117072837736364, "grad_norm": 0.2265305519104004, "learning_rate": 4.9217243109798724e-05, "loss": 46.0035, "step": 3748 }, { "epoch": 0.5118438118642911, "grad_norm": 0.1396733522415161, "learning_rate": 4.919550171402933e-05, "loss": 46.0022, "step": 3749 }, { "epoch": 0.5119803399549457, "grad_norm": 0.15619534254074097, "learning_rate": 4.917376047040903e-05, "loss": 46.0049, "step": 3750 }, { "epoch": 0.5121168680456004, "grad_norm": 0.1562427580356598, "learning_rate": 4.915201938304961e-05, "loss": 46.0039, "step": 3751 }, { "epoch": 0.512253396136255, "grad_norm": 0.04286370053887367, "learning_rate": 4.913027845606279e-05, "loss": 46.0015, "step": 3752 }, { "epoch": 0.5123899242269097, "grad_norm": 0.07354115694761276, "learning_rate": 4.9108537693560274e-05, "loss": 46.0043, "step": 3753 }, { "epoch": 0.5125264523175643, "grad_norm": 0.09467605501413345, "learning_rate": 4.9086797099653756e-05, "loss": 46.0014, "step": 3754 }, { "epoch": 0.512662980408219, "grad_norm": 0.042374324053525925, "learning_rate": 4.9065056678454904e-05, "loss": 46.0002, "step": 3755 }, { "epoch": 0.5127995084988737, "grad_norm": 0.08183711767196655, "learning_rate": 4.904331643407528e-05, "loss": 46.0038, "step": 3756 }, { "epoch": 0.5129360365895282, "grad_norm": 0.03660031035542488, "learning_rate": 4.9021576370626475e-05, "loss": 46.0017, "step": 3757 }, { "epoch": 0.5130725646801829, "grad_norm": 0.04416121169924736, "learning_rate": 4.8999836492220065e-05, "loss": 46.0034, "step": 3758 }, { "epoch": 0.5132090927708376, "grad_norm": 0.05969323217868805, "learning_rate": 4.897809680296754e-05, "loss": 46.0047, "step": 3759 }, { "epoch": 0.5133456208614923, "grad_norm": 0.06485038995742798, "learning_rate": 4.895635730698035e-05, "loss": 46.0041, "step": 3760 }, { "epoch": 0.5134821489521469, "grad_norm": 0.1754254251718521, "learning_rate": 4.893461800837e-05, "loss": 46.0111, "step": 3761 }, { "epoch": 0.5136186770428015, "grad_norm": 0.0861864686012268, "learning_rate": 4.8912878911247827e-05, "loss": 46.0084, "step": 3762 }, { "epoch": 0.5137552051334562, "grad_norm": 0.13387073576450348, "learning_rate": 4.8891140019725235e-05, "loss": 46.0045, "step": 3763 }, { "epoch": 0.5138917332241109, "grad_norm": 0.13656648993492126, "learning_rate": 4.886940133791349e-05, "loss": 46.0081, "step": 3764 }, { "epoch": 0.5140282613147655, "grad_norm": 0.13985441625118256, "learning_rate": 4.884766286992395e-05, "loss": 46.012, "step": 3765 }, { "epoch": 0.5141647894054202, "grad_norm": 0.051763877272605896, "learning_rate": 4.8825924619867806e-05, "loss": 46.001, "step": 3766 }, { "epoch": 0.5143013174960748, "grad_norm": 0.1323077380657196, "learning_rate": 4.8804186591856274e-05, "loss": 46.006, "step": 3767 }, { "epoch": 0.5144378455867294, "grad_norm": 0.040042538195848465, "learning_rate": 4.8782448790000525e-05, "loss": 46.0002, "step": 3768 }, { "epoch": 0.5145743736773841, "grad_norm": 0.06911583989858627, "learning_rate": 4.876071121841167e-05, "loss": 46.0046, "step": 3769 }, { "epoch": 0.5147109017680388, "grad_norm": 0.13127583265304565, "learning_rate": 4.873897388120078e-05, "loss": 46.0062, "step": 3770 }, { "epoch": 0.5148474298586935, "grad_norm": 0.13249653577804565, "learning_rate": 4.871723678247888e-05, "loss": 46.0087, "step": 3771 }, { "epoch": 0.514983957949348, "grad_norm": 0.09703817218542099, "learning_rate": 4.869549992635697e-05, "loss": 46.0014, "step": 3772 }, { "epoch": 0.5151204860400027, "grad_norm": 0.06032884120941162, "learning_rate": 4.867376331694599e-05, "loss": 46.0019, "step": 3773 }, { "epoch": 0.5152570141306574, "grad_norm": 0.05335661768913269, "learning_rate": 4.865202695835678e-05, "loss": 46.005, "step": 3774 }, { "epoch": 0.5153935422213121, "grad_norm": 0.13369935750961304, "learning_rate": 4.863029085470026e-05, "loss": 46.0072, "step": 3775 }, { "epoch": 0.5155300703119667, "grad_norm": 0.09573909640312195, "learning_rate": 4.8608555010087184e-05, "loss": 46.0037, "step": 3776 }, { "epoch": 0.5156665984026213, "grad_norm": 0.03444691374897957, "learning_rate": 4.858681942862833e-05, "loss": 46.0052, "step": 3777 }, { "epoch": 0.515803126493276, "grad_norm": 0.12492784857749939, "learning_rate": 4.856508411443433e-05, "loss": 46.0001, "step": 3778 }, { "epoch": 0.5159396545839307, "grad_norm": 0.04045983403921127, "learning_rate": 4.8543349071615904e-05, "loss": 46.008, "step": 3779 }, { "epoch": 0.5160761826745853, "grad_norm": 0.05414349213242531, "learning_rate": 4.8521614304283615e-05, "loss": 46.0022, "step": 3780 }, { "epoch": 0.51621271076524, "grad_norm": 0.03376138210296631, "learning_rate": 4.8499879816547997e-05, "loss": 46.0033, "step": 3781 }, { "epoch": 0.5163492388558946, "grad_norm": 0.10558482259511948, "learning_rate": 4.847814561251955e-05, "loss": 46.01, "step": 3782 }, { "epoch": 0.5164857669465492, "grad_norm": 0.06936874240636826, "learning_rate": 4.845641169630874e-05, "loss": 46.0038, "step": 3783 }, { "epoch": 0.5166222950372039, "grad_norm": 0.06239832192659378, "learning_rate": 4.8434678072025914e-05, "loss": 46.0029, "step": 3784 }, { "epoch": 0.5167588231278586, "grad_norm": 0.0705326721072197, "learning_rate": 4.84129447437814e-05, "loss": 46.0037, "step": 3785 }, { "epoch": 0.5168953512185132, "grad_norm": 0.13516411185264587, "learning_rate": 4.839121171568548e-05, "loss": 46.0028, "step": 3786 }, { "epoch": 0.5170318793091678, "grad_norm": 0.06571994721889496, "learning_rate": 4.836947899184838e-05, "loss": 46.0029, "step": 3787 }, { "epoch": 0.5171684073998225, "grad_norm": 0.08189158886671066, "learning_rate": 4.834774657638023e-05, "loss": 46.0061, "step": 3788 }, { "epoch": 0.5173049354904772, "grad_norm": 0.12006599456071854, "learning_rate": 4.832601447339115e-05, "loss": 46.0051, "step": 3789 }, { "epoch": 0.5174414635811319, "grad_norm": 0.09899503737688065, "learning_rate": 4.8304282686991176e-05, "loss": 46.0009, "step": 3790 }, { "epoch": 0.5175779916717864, "grad_norm": 0.05613633245229721, "learning_rate": 4.828255122129029e-05, "loss": 46.0089, "step": 3791 }, { "epoch": 0.5177145197624411, "grad_norm": 0.19829019904136658, "learning_rate": 4.826082008039839e-05, "loss": 46.0094, "step": 3792 }, { "epoch": 0.5178510478530958, "grad_norm": 0.1463659405708313, "learning_rate": 4.823908926842536e-05, "loss": 46.0039, "step": 3793 }, { "epoch": 0.5179875759437504, "grad_norm": 0.03409542888402939, "learning_rate": 4.8217358789480984e-05, "loss": 46.007, "step": 3794 }, { "epoch": 0.5181241040344051, "grad_norm": 0.10712377727031708, "learning_rate": 4.819562864767498e-05, "loss": 46.0047, "step": 3795 }, { "epoch": 0.5182606321250597, "grad_norm": 0.08869447559118271, "learning_rate": 4.817389884711705e-05, "loss": 46.0034, "step": 3796 }, { "epoch": 0.5183971602157144, "grad_norm": 0.07067501544952393, "learning_rate": 4.8152169391916796e-05, "loss": 46.005, "step": 3797 }, { "epoch": 0.518533688306369, "grad_norm": 0.13555225729942322, "learning_rate": 4.813044028618373e-05, "loss": 46.0024, "step": 3798 }, { "epoch": 0.5186702163970237, "grad_norm": 0.11809061467647552, "learning_rate": 4.810871153402732e-05, "loss": 46.0034, "step": 3799 }, { "epoch": 0.5188067444876784, "grad_norm": 0.14248456060886383, "learning_rate": 4.808698313955701e-05, "loss": 46.0, "step": 3800 }, { "epoch": 0.518943272578333, "grad_norm": 0.1248212605714798, "learning_rate": 4.806525510688212e-05, "loss": 46.0078, "step": 3801 }, { "epoch": 0.5190798006689876, "grad_norm": 0.1444309949874878, "learning_rate": 4.804352744011189e-05, "loss": 46.0007, "step": 3802 }, { "epoch": 0.5192163287596423, "grad_norm": 0.0935368463397026, "learning_rate": 4.8021800143355564e-05, "loss": 46.0019, "step": 3803 }, { "epoch": 0.519352856850297, "grad_norm": 0.04870251193642616, "learning_rate": 4.8000073220722265e-05, "loss": 46.0048, "step": 3804 }, { "epoch": 0.5194893849409516, "grad_norm": 0.06565872579813004, "learning_rate": 4.797834667632105e-05, "loss": 46.0006, "step": 3805 }, { "epoch": 0.5196259130316062, "grad_norm": 0.0340382419526577, "learning_rate": 4.795662051426088e-05, "loss": 46.003, "step": 3806 }, { "epoch": 0.5197624411222609, "grad_norm": 0.03937444090843201, "learning_rate": 4.79348947386507e-05, "loss": 46.0031, "step": 3807 }, { "epoch": 0.5198989692129156, "grad_norm": 0.059707462787628174, "learning_rate": 4.7913169353599354e-05, "loss": 46.0038, "step": 3808 }, { "epoch": 0.5200354973035702, "grad_norm": 0.1272866576910019, "learning_rate": 4.789144436321561e-05, "loss": 46.009, "step": 3809 }, { "epoch": 0.5201720253942249, "grad_norm": 0.06578918546438217, "learning_rate": 4.786971977160813e-05, "loss": 46.002, "step": 3810 }, { "epoch": 0.5203085534848795, "grad_norm": 0.08559970557689667, "learning_rate": 4.784799558288558e-05, "loss": 46.0061, "step": 3811 }, { "epoch": 0.5204450815755342, "grad_norm": 0.08197557926177979, "learning_rate": 4.782627180115648e-05, "loss": 46.0041, "step": 3812 }, { "epoch": 0.5205816096661888, "grad_norm": 0.03554689511656761, "learning_rate": 4.780454843052928e-05, "loss": 46.0084, "step": 3813 }, { "epoch": 0.5207181377568435, "grad_norm": 0.08497384190559387, "learning_rate": 4.7782825475112416e-05, "loss": 46.0003, "step": 3814 }, { "epoch": 0.5208546658474982, "grad_norm": 0.10217847675085068, "learning_rate": 4.776110293901416e-05, "loss": 46.0004, "step": 3815 }, { "epoch": 0.5209911939381527, "grad_norm": 0.08687689155340195, "learning_rate": 4.773938082634274e-05, "loss": 46.0014, "step": 3816 }, { "epoch": 0.5211277220288074, "grad_norm": 0.15057432651519775, "learning_rate": 4.771765914120631e-05, "loss": 46.0042, "step": 3817 }, { "epoch": 0.5212642501194621, "grad_norm": 0.09728197008371353, "learning_rate": 4.769593788771297e-05, "loss": 46.0099, "step": 3818 }, { "epoch": 0.5214007782101168, "grad_norm": 0.08773849904537201, "learning_rate": 4.767421706997068e-05, "loss": 46.0018, "step": 3819 }, { "epoch": 0.5215373063007713, "grad_norm": 0.07475342601537704, "learning_rate": 4.7652496692087325e-05, "loss": 46.0067, "step": 3820 }, { "epoch": 0.521673834391426, "grad_norm": 0.1399032324552536, "learning_rate": 4.763077675817077e-05, "loss": 46.0035, "step": 3821 }, { "epoch": 0.5218103624820807, "grad_norm": 0.06432703882455826, "learning_rate": 4.7609057272328736e-05, "loss": 46.0051, "step": 3822 }, { "epoch": 0.5219468905727354, "grad_norm": 0.0452507883310318, "learning_rate": 4.758733823866888e-05, "loss": 46.0063, "step": 3823 }, { "epoch": 0.52208341866339, "grad_norm": 0.12198086082935333, "learning_rate": 4.756561966129876e-05, "loss": 46.0067, "step": 3824 }, { "epoch": 0.5222199467540446, "grad_norm": 0.05661104619503021, "learning_rate": 4.7543901544325856e-05, "loss": 46.0042, "step": 3825 }, { "epoch": 0.5223564748446993, "grad_norm": 0.1015692800283432, "learning_rate": 4.7522183891857576e-05, "loss": 46.0007, "step": 3826 }, { "epoch": 0.522493002935354, "grad_norm": 0.055667050182819366, "learning_rate": 4.7500466708001214e-05, "loss": 46.0017, "step": 3827 }, { "epoch": 0.5226295310260086, "grad_norm": 0.1829957813024521, "learning_rate": 4.747874999686401e-05, "loss": 46.0073, "step": 3828 }, { "epoch": 0.5227660591166633, "grad_norm": 0.08555509895086288, "learning_rate": 4.745703376255307e-05, "loss": 46.0047, "step": 3829 }, { "epoch": 0.5229025872073179, "grad_norm": 0.0899159386754036, "learning_rate": 4.7435318009175446e-05, "loss": 46.0054, "step": 3830 }, { "epoch": 0.5230391152979725, "grad_norm": 0.12161027640104294, "learning_rate": 4.741360274083806e-05, "loss": 46.0087, "step": 3831 }, { "epoch": 0.5231756433886272, "grad_norm": 0.04548361152410507, "learning_rate": 4.7391887961647815e-05, "loss": 46.0089, "step": 3832 }, { "epoch": 0.5233121714792819, "grad_norm": 0.10152045637369156, "learning_rate": 4.7370173675711435e-05, "loss": 46.001, "step": 3833 }, { "epoch": 0.5234486995699366, "grad_norm": 0.07035678625106812, "learning_rate": 4.734845988713559e-05, "loss": 46.0031, "step": 3834 }, { "epoch": 0.5235852276605911, "grad_norm": 0.11814124137163162, "learning_rate": 4.732674660002688e-05, "loss": 46.0053, "step": 3835 }, { "epoch": 0.5237217557512458, "grad_norm": 0.10511859506368637, "learning_rate": 4.7305033818491785e-05, "loss": 46.0039, "step": 3836 }, { "epoch": 0.5238582838419005, "grad_norm": 0.08202613890171051, "learning_rate": 4.728332154663667e-05, "loss": 46.0003, "step": 3837 }, { "epoch": 0.5239948119325551, "grad_norm": 0.09970813244581223, "learning_rate": 4.726160978856782e-05, "loss": 46.0079, "step": 3838 }, { "epoch": 0.5241313400232098, "grad_norm": 0.05698138847947121, "learning_rate": 4.7239898548391454e-05, "loss": 46.0044, "step": 3839 }, { "epoch": 0.5242678681138644, "grad_norm": 0.10648500919342041, "learning_rate": 4.7218187830213664e-05, "loss": 46.0047, "step": 3840 }, { "epoch": 0.5244043962045191, "grad_norm": 0.050885748118162155, "learning_rate": 4.7196477638140404e-05, "loss": 46.0045, "step": 3841 }, { "epoch": 0.5245409242951737, "grad_norm": 0.05908678472042084, "learning_rate": 4.7174767976277636e-05, "loss": 46.0042, "step": 3842 }, { "epoch": 0.5246774523858284, "grad_norm": 0.13028298318386078, "learning_rate": 4.7153058848731105e-05, "loss": 46.0087, "step": 3843 }, { "epoch": 0.5248139804764831, "grad_norm": 0.12747034430503845, "learning_rate": 4.713135025960652e-05, "loss": 46.0027, "step": 3844 }, { "epoch": 0.5249505085671377, "grad_norm": 0.13158288598060608, "learning_rate": 4.710964221300946e-05, "loss": 46.0061, "step": 3845 }, { "epoch": 0.5250870366577923, "grad_norm": 0.11429021507501602, "learning_rate": 4.708793471304546e-05, "loss": 46.0, "step": 3846 }, { "epoch": 0.525223564748447, "grad_norm": 0.20794184505939484, "learning_rate": 4.706622776381986e-05, "loss": 46.0059, "step": 3847 }, { "epoch": 0.5253600928391017, "grad_norm": 0.13414062559604645, "learning_rate": 4.704452136943796e-05, "loss": 46.0026, "step": 3848 }, { "epoch": 0.5254966209297564, "grad_norm": 0.3647390604019165, "learning_rate": 4.702281553400493e-05, "loss": 46.0071, "step": 3849 }, { "epoch": 0.5256331490204109, "grad_norm": 0.0642671212553978, "learning_rate": 4.700111026162587e-05, "loss": 46.0021, "step": 3850 }, { "epoch": 0.5257696771110656, "grad_norm": 0.09432899951934814, "learning_rate": 4.697940555640571e-05, "loss": 46.006, "step": 3851 }, { "epoch": 0.5259062052017203, "grad_norm": 0.04655693471431732, "learning_rate": 4.695770142244931e-05, "loss": 46.0048, "step": 3852 }, { "epoch": 0.5260427332923749, "grad_norm": 0.07975205779075623, "learning_rate": 4.693599786386144e-05, "loss": 46.0069, "step": 3853 }, { "epoch": 0.5261792613830295, "grad_norm": 0.05667569488286972, "learning_rate": 4.6914294884746746e-05, "loss": 46.0017, "step": 3854 }, { "epoch": 0.5263157894736842, "grad_norm": 0.10164524614810944, "learning_rate": 4.689259248920972e-05, "loss": 46.0085, "step": 3855 }, { "epoch": 0.5264523175643389, "grad_norm": 0.07247145473957062, "learning_rate": 4.687089068135483e-05, "loss": 46.003, "step": 3856 }, { "epoch": 0.5265888456549935, "grad_norm": 0.07369309663772583, "learning_rate": 4.684918946528637e-05, "loss": 46.002, "step": 3857 }, { "epoch": 0.5267253737456482, "grad_norm": 0.09566149115562439, "learning_rate": 4.682748884510854e-05, "loss": 46.0028, "step": 3858 }, { "epoch": 0.5268619018363028, "grad_norm": 0.028315136209130287, "learning_rate": 4.6805788824925386e-05, "loss": 46.008, "step": 3859 }, { "epoch": 0.5269984299269574, "grad_norm": 0.08594685047864914, "learning_rate": 4.678408940884095e-05, "loss": 46.0101, "step": 3860 }, { "epoch": 0.5271349580176121, "grad_norm": 0.034670453518629074, "learning_rate": 4.6762390600959045e-05, "loss": 46.0121, "step": 3861 }, { "epoch": 0.5272714861082668, "grad_norm": 0.08738952875137329, "learning_rate": 4.674069240538341e-05, "loss": 46.006, "step": 3862 }, { "epoch": 0.5274080141989215, "grad_norm": 0.03899762034416199, "learning_rate": 4.67189948262177e-05, "loss": 46.0042, "step": 3863 }, { "epoch": 0.527544542289576, "grad_norm": 0.16914494335651398, "learning_rate": 4.669729786756542e-05, "loss": 46.0062, "step": 3864 }, { "epoch": 0.5276810703802307, "grad_norm": 0.19384977221488953, "learning_rate": 4.667560153352994e-05, "loss": 46.0044, "step": 3865 }, { "epoch": 0.5278175984708854, "grad_norm": 0.0381193645298481, "learning_rate": 4.6653905828214536e-05, "loss": 46.0054, "step": 3866 }, { "epoch": 0.5279541265615401, "grad_norm": 0.04863312467932701, "learning_rate": 4.6632210755722394e-05, "loss": 46.0077, "step": 3867 }, { "epoch": 0.5280906546521947, "grad_norm": 0.06509484350681305, "learning_rate": 4.661051632015655e-05, "loss": 46.0055, "step": 3868 }, { "epoch": 0.5282271827428493, "grad_norm": 0.06096045300364494, "learning_rate": 4.6588822525619855e-05, "loss": 46.0084, "step": 3869 }, { "epoch": 0.528363710833504, "grad_norm": 0.06434444338083267, "learning_rate": 4.6567129376215195e-05, "loss": 46.0073, "step": 3870 }, { "epoch": 0.5285002389241586, "grad_norm": 0.11478033661842346, "learning_rate": 4.654543687604519e-05, "loss": 46.0106, "step": 3871 }, { "epoch": 0.5286367670148133, "grad_norm": 0.08522956818342209, "learning_rate": 4.6523745029212404e-05, "loss": 46.0052, "step": 3872 }, { "epoch": 0.528773295105468, "grad_norm": 0.06912510097026825, "learning_rate": 4.650205383981923e-05, "loss": 46.0067, "step": 3873 }, { "epoch": 0.5289098231961226, "grad_norm": 0.03796585649251938, "learning_rate": 4.648036331196804e-05, "loss": 46.0, "step": 3874 }, { "epoch": 0.5290463512867772, "grad_norm": 0.09428533911705017, "learning_rate": 4.6458673449760945e-05, "loss": 46.0025, "step": 3875 }, { "epoch": 0.5291828793774319, "grad_norm": 0.12976014614105225, "learning_rate": 4.643698425730004e-05, "loss": 46.0007, "step": 3876 }, { "epoch": 0.5293194074680866, "grad_norm": 0.08267038315534592, "learning_rate": 4.641529573868719e-05, "loss": 46.0063, "step": 3877 }, { "epoch": 0.5294559355587413, "grad_norm": 0.11666794121265411, "learning_rate": 4.639360789802427e-05, "loss": 46.0101, "step": 3878 }, { "epoch": 0.5295924636493958, "grad_norm": 0.06146417558193207, "learning_rate": 4.63719207394129e-05, "loss": 46.0108, "step": 3879 }, { "epoch": 0.5297289917400505, "grad_norm": 0.10910844802856445, "learning_rate": 4.6350234266954626e-05, "loss": 46.0089, "step": 3880 }, { "epoch": 0.5298655198307052, "grad_norm": 0.07234887778759003, "learning_rate": 4.6328548484750864e-05, "loss": 46.0051, "step": 3881 }, { "epoch": 0.5300020479213599, "grad_norm": 0.07183128595352173, "learning_rate": 4.63068633969029e-05, "loss": 46.005, "step": 3882 }, { "epoch": 0.5301385760120144, "grad_norm": 0.12924590706825256, "learning_rate": 4.628517900751187e-05, "loss": 46.0044, "step": 3883 }, { "epoch": 0.5302751041026691, "grad_norm": 0.03178626298904419, "learning_rate": 4.626349532067879e-05, "loss": 46.0043, "step": 3884 }, { "epoch": 0.5304116321933238, "grad_norm": 0.12860679626464844, "learning_rate": 4.624181234050455e-05, "loss": 46.0013, "step": 3885 }, { "epoch": 0.5305481602839784, "grad_norm": 0.06857714056968689, "learning_rate": 4.622013007108991e-05, "loss": 46.0004, "step": 3886 }, { "epoch": 0.5306846883746331, "grad_norm": 0.0652199387550354, "learning_rate": 4.619844851653545e-05, "loss": 46.0064, "step": 3887 }, { "epoch": 0.5308212164652877, "grad_norm": 0.11217772960662842, "learning_rate": 4.6176767680941676e-05, "loss": 46.0023, "step": 3888 }, { "epoch": 0.5309577445559424, "grad_norm": 0.0765429437160492, "learning_rate": 4.615508756840893e-05, "loss": 46.0077, "step": 3889 }, { "epoch": 0.531094272646597, "grad_norm": 0.158610537648201, "learning_rate": 4.613340818303743e-05, "loss": 46.003, "step": 3890 }, { "epoch": 0.5312308007372517, "grad_norm": 0.15901312232017517, "learning_rate": 4.611172952892721e-05, "loss": 46.0018, "step": 3891 }, { "epoch": 0.5313673288279064, "grad_norm": 0.04170028120279312, "learning_rate": 4.6090051610178236e-05, "loss": 46.002, "step": 3892 }, { "epoch": 0.531503856918561, "grad_norm": 0.09148025512695312, "learning_rate": 4.6068374430890285e-05, "loss": 46.0031, "step": 3893 }, { "epoch": 0.5316403850092156, "grad_norm": 0.14522333443164825, "learning_rate": 4.6046697995163003e-05, "loss": 46.0008, "step": 3894 }, { "epoch": 0.5317769130998703, "grad_norm": 0.055086471140384674, "learning_rate": 4.6025022307095916e-05, "loss": 46.0024, "step": 3895 }, { "epoch": 0.531913441190525, "grad_norm": 0.32704418897628784, "learning_rate": 4.600334737078841e-05, "loss": 46.0078, "step": 3896 }, { "epoch": 0.5320499692811796, "grad_norm": 0.08910759538412094, "learning_rate": 4.598167319033967e-05, "loss": 46.0026, "step": 3897 }, { "epoch": 0.5321864973718342, "grad_norm": 0.18617670238018036, "learning_rate": 4.59599997698488e-05, "loss": 46.0099, "step": 3898 }, { "epoch": 0.5323230254624889, "grad_norm": 0.288745254278183, "learning_rate": 4.593832711341475e-05, "loss": 46.0066, "step": 3899 }, { "epoch": 0.5324595535531436, "grad_norm": 0.11622758209705353, "learning_rate": 4.5916655225136327e-05, "loss": 46.0056, "step": 3900 }, { "epoch": 0.5325960816437982, "grad_norm": 0.07512719929218292, "learning_rate": 4.589498410911215e-05, "loss": 46.006, "step": 3901 }, { "epoch": 0.5327326097344529, "grad_norm": 0.04328688234090805, "learning_rate": 4.5873313769440754e-05, "loss": 46.0026, "step": 3902 }, { "epoch": 0.5328691378251075, "grad_norm": 0.07224782556295395, "learning_rate": 4.585164421022048e-05, "loss": 46.0005, "step": 3903 }, { "epoch": 0.5330056659157622, "grad_norm": 0.10634280741214752, "learning_rate": 4.582997543554956e-05, "loss": 46.0031, "step": 3904 }, { "epoch": 0.5331421940064168, "grad_norm": 0.07437647134065628, "learning_rate": 4.580830744952601e-05, "loss": 46.0067, "step": 3905 }, { "epoch": 0.5332787220970715, "grad_norm": 0.08550957590341568, "learning_rate": 4.57866402562478e-05, "loss": 46.0031, "step": 3906 }, { "epoch": 0.5334152501877262, "grad_norm": 0.05360689014196396, "learning_rate": 4.576497385981267e-05, "loss": 46.0, "step": 3907 }, { "epoch": 0.5335517782783807, "grad_norm": 0.06599234789609909, "learning_rate": 4.574330826431821e-05, "loss": 46.0057, "step": 3908 }, { "epoch": 0.5336883063690354, "grad_norm": 0.09968619793653488, "learning_rate": 4.5721643473861934e-05, "loss": 46.0039, "step": 3909 }, { "epoch": 0.5338248344596901, "grad_norm": 0.08970796316862106, "learning_rate": 4.5699979492541116e-05, "loss": 46.0035, "step": 3910 }, { "epoch": 0.5339613625503448, "grad_norm": 0.12265932559967041, "learning_rate": 4.567831632445291e-05, "loss": 46.0037, "step": 3911 }, { "epoch": 0.5340978906409994, "grad_norm": 0.0874001756310463, "learning_rate": 4.565665397369432e-05, "loss": 46.0063, "step": 3912 }, { "epoch": 0.534234418731654, "grad_norm": 0.15746571123600006, "learning_rate": 4.563499244436222e-05, "loss": 46.0089, "step": 3913 }, { "epoch": 0.5343709468223087, "grad_norm": 0.08844971656799316, "learning_rate": 4.561333174055328e-05, "loss": 46.0067, "step": 3914 }, { "epoch": 0.5345074749129634, "grad_norm": 0.07914043217897415, "learning_rate": 4.559167186636403e-05, "loss": 46.0127, "step": 3915 }, { "epoch": 0.534644003003618, "grad_norm": 0.07415549457073212, "learning_rate": 4.557001282589086e-05, "loss": 46.0014, "step": 3916 }, { "epoch": 0.5347805310942726, "grad_norm": 0.19102349877357483, "learning_rate": 4.554835462323e-05, "loss": 46.0048, "step": 3917 }, { "epoch": 0.5349170591849273, "grad_norm": 0.10400357097387314, "learning_rate": 4.552669726247751e-05, "loss": 46.0068, "step": 3918 }, { "epoch": 0.5350535872755819, "grad_norm": 0.03831706941127777, "learning_rate": 4.5505040747729266e-05, "loss": 46.0006, "step": 3919 }, { "epoch": 0.5351901153662366, "grad_norm": 0.08260433375835419, "learning_rate": 4.5483385083081055e-05, "loss": 46.0041, "step": 3920 }, { "epoch": 0.5353266434568913, "grad_norm": 0.052553415298461914, "learning_rate": 4.546173027262843e-05, "loss": 46.0108, "step": 3921 }, { "epoch": 0.5354631715475459, "grad_norm": 0.169109508395195, "learning_rate": 4.5440076320466825e-05, "loss": 46.0032, "step": 3922 }, { "epoch": 0.5355996996382005, "grad_norm": 0.04858896881341934, "learning_rate": 4.541842323069151e-05, "loss": 46.0079, "step": 3923 }, { "epoch": 0.5357362277288552, "grad_norm": 0.08834350854158401, "learning_rate": 4.5396771007397565e-05, "loss": 46.0084, "step": 3924 }, { "epoch": 0.5358727558195099, "grad_norm": 0.04415224865078926, "learning_rate": 4.537511965467993e-05, "loss": 46.0086, "step": 3925 }, { "epoch": 0.5360092839101646, "grad_norm": 0.1369699239730835, "learning_rate": 4.535346917663335e-05, "loss": 46.0098, "step": 3926 }, { "epoch": 0.5361458120008191, "grad_norm": 0.12496551871299744, "learning_rate": 4.5331819577352474e-05, "loss": 46.0053, "step": 3927 }, { "epoch": 0.5362823400914738, "grad_norm": 0.039274998009204865, "learning_rate": 4.5310170860931704e-05, "loss": 46.0034, "step": 3928 }, { "epoch": 0.5364188681821285, "grad_norm": 0.07214152812957764, "learning_rate": 4.528852303146531e-05, "loss": 46.0041, "step": 3929 }, { "epoch": 0.5365553962727831, "grad_norm": 0.09485740214586258, "learning_rate": 4.52668760930474e-05, "loss": 46.0055, "step": 3930 }, { "epoch": 0.5366919243634378, "grad_norm": 0.18160228431224823, "learning_rate": 4.524523004977193e-05, "loss": 46.0008, "step": 3931 }, { "epoch": 0.5368284524540924, "grad_norm": 0.04829541593790054, "learning_rate": 4.522358490573263e-05, "loss": 46.0035, "step": 3932 }, { "epoch": 0.5369649805447471, "grad_norm": 0.07689401507377625, "learning_rate": 4.52019406650231e-05, "loss": 46.0077, "step": 3933 }, { "epoch": 0.5371015086354017, "grad_norm": 0.09358040243387222, "learning_rate": 4.518029733173677e-05, "loss": 46.0197, "step": 3934 }, { "epoch": 0.5372380367260564, "grad_norm": 0.09256160259246826, "learning_rate": 4.5158654909966905e-05, "loss": 46.0063, "step": 3935 }, { "epoch": 0.5373745648167111, "grad_norm": 0.03995924070477486, "learning_rate": 4.5137013403806546e-05, "loss": 46.0043, "step": 3936 }, { "epoch": 0.5375110929073657, "grad_norm": 0.07621372491121292, "learning_rate": 4.5115372817348646e-05, "loss": 46.0086, "step": 3937 }, { "epoch": 0.5376476209980203, "grad_norm": 0.17521966993808746, "learning_rate": 4.5093733154685904e-05, "loss": 46.0012, "step": 3938 }, { "epoch": 0.537784149088675, "grad_norm": 0.13213598728179932, "learning_rate": 4.507209441991088e-05, "loss": 46.0149, "step": 3939 }, { "epoch": 0.5379206771793297, "grad_norm": 0.08357955515384674, "learning_rate": 4.505045661711596e-05, "loss": 46.0064, "step": 3940 }, { "epoch": 0.5380572052699844, "grad_norm": 0.0514645129442215, "learning_rate": 4.502881975039337e-05, "loss": 46.0025, "step": 3941 }, { "epoch": 0.5381937333606389, "grad_norm": 0.05157996341586113, "learning_rate": 4.5007183823835105e-05, "loss": 46.0004, "step": 3942 }, { "epoch": 0.5383302614512936, "grad_norm": 0.0627037063241005, "learning_rate": 4.4985548841533036e-05, "loss": 46.005, "step": 3943 }, { "epoch": 0.5384667895419483, "grad_norm": 0.12766292691230774, "learning_rate": 4.496391480757881e-05, "loss": 46.0013, "step": 3944 }, { "epoch": 0.5386033176326029, "grad_norm": 0.21507279574871063, "learning_rate": 4.494228172606397e-05, "loss": 46.0016, "step": 3945 }, { "epoch": 0.5387398457232576, "grad_norm": 0.05362633988261223, "learning_rate": 4.492064960107979e-05, "loss": 46.0067, "step": 3946 }, { "epoch": 0.5388763738139122, "grad_norm": 0.1571461707353592, "learning_rate": 4.489901843671739e-05, "loss": 46.0047, "step": 3947 }, { "epoch": 0.5390129019045669, "grad_norm": 0.25629645586013794, "learning_rate": 4.487738823706775e-05, "loss": 46.0108, "step": 3948 }, { "epoch": 0.5391494299952215, "grad_norm": 0.31904178857803345, "learning_rate": 4.485575900622164e-05, "loss": 46.003, "step": 3949 }, { "epoch": 0.5392859580858762, "grad_norm": 0.16209858655929565, "learning_rate": 4.483413074826962e-05, "loss": 46.0015, "step": 3950 }, { "epoch": 0.5394224861765308, "grad_norm": 0.1613723635673523, "learning_rate": 4.481250346730208e-05, "loss": 46.0067, "step": 3951 }, { "epoch": 0.5395590142671854, "grad_norm": 0.06977512687444687, "learning_rate": 4.4790877167409274e-05, "loss": 46.0074, "step": 3952 }, { "epoch": 0.5396955423578401, "grad_norm": 0.1373399943113327, "learning_rate": 4.4769251852681225e-05, "loss": 46.0043, "step": 3953 }, { "epoch": 0.5398320704484948, "grad_norm": 0.06592591106891632, "learning_rate": 4.474762752720773e-05, "loss": 46.0041, "step": 3954 }, { "epoch": 0.5399685985391495, "grad_norm": 0.057124942541122437, "learning_rate": 4.472600419507852e-05, "loss": 46.0007, "step": 3955 }, { "epoch": 0.540105126629804, "grad_norm": 0.05022481456398964, "learning_rate": 4.470438186038301e-05, "loss": 46.0017, "step": 3956 }, { "epoch": 0.5402416547204587, "grad_norm": 0.08001145720481873, "learning_rate": 4.468276052721051e-05, "loss": 46.0032, "step": 3957 }, { "epoch": 0.5403781828111134, "grad_norm": 0.08564513921737671, "learning_rate": 4.466114019965005e-05, "loss": 46.001, "step": 3958 }, { "epoch": 0.5405147109017681, "grad_norm": 0.10974765568971634, "learning_rate": 4.463952088179062e-05, "loss": 46.0037, "step": 3959 }, { "epoch": 0.5406512389924227, "grad_norm": 0.03855816274881363, "learning_rate": 4.461790257772087e-05, "loss": 46.0065, "step": 3960 }, { "epoch": 0.5407877670830773, "grad_norm": 0.03127044439315796, "learning_rate": 4.4596285291529324e-05, "loss": 46.0046, "step": 3961 }, { "epoch": 0.540924295173732, "grad_norm": 0.20282158255577087, "learning_rate": 4.4574669027304324e-05, "loss": 46.0064, "step": 3962 }, { "epoch": 0.5410608232643866, "grad_norm": 0.07375569641590118, "learning_rate": 4.4553053789134014e-05, "loss": 46.0114, "step": 3963 }, { "epoch": 0.5411973513550413, "grad_norm": 0.03702244907617569, "learning_rate": 4.4531439581106295e-05, "loss": 46.0067, "step": 3964 }, { "epoch": 0.541333879445696, "grad_norm": 0.13327309489250183, "learning_rate": 4.4509826407308915e-05, "loss": 46.0098, "step": 3965 }, { "epoch": 0.5414704075363506, "grad_norm": 0.12194481492042542, "learning_rate": 4.4488214271829444e-05, "loss": 46.0037, "step": 3966 }, { "epoch": 0.5416069356270052, "grad_norm": 0.044855378568172455, "learning_rate": 4.446660317875523e-05, "loss": 46.0028, "step": 3967 }, { "epoch": 0.5417434637176599, "grad_norm": 0.13959497213363647, "learning_rate": 4.44449931321734e-05, "loss": 46.0001, "step": 3968 }, { "epoch": 0.5418799918083146, "grad_norm": 0.05282937362790108, "learning_rate": 4.4423384136170956e-05, "loss": 46.0116, "step": 3969 }, { "epoch": 0.5420165198989693, "grad_norm": 0.14267542958259583, "learning_rate": 4.4401776194834613e-05, "loss": 46.0054, "step": 3970 }, { "epoch": 0.5421530479896238, "grad_norm": 0.21523992717266083, "learning_rate": 4.438016931225096e-05, "loss": 46.0006, "step": 3971 }, { "epoch": 0.5422895760802785, "grad_norm": 0.04425100237131119, "learning_rate": 4.43585634925063e-05, "loss": 46.0048, "step": 3972 }, { "epoch": 0.5424261041709332, "grad_norm": 0.10031484812498093, "learning_rate": 4.4336958739686874e-05, "loss": 46.0188, "step": 3973 }, { "epoch": 0.5425626322615879, "grad_norm": 0.07307979464530945, "learning_rate": 4.431535505787858e-05, "loss": 46.0137, "step": 3974 }, { "epoch": 0.5426991603522425, "grad_norm": 0.17042788863182068, "learning_rate": 4.429375245116716e-05, "loss": 46.005, "step": 3975 }, { "epoch": 0.5428356884428971, "grad_norm": 0.04934168979525566, "learning_rate": 4.42721509236382e-05, "loss": 46.0155, "step": 3976 }, { "epoch": 0.5429722165335518, "grad_norm": 0.03948139026761055, "learning_rate": 4.425055047937705e-05, "loss": 46.0063, "step": 3977 }, { "epoch": 0.5431087446242064, "grad_norm": 0.07254410535097122, "learning_rate": 4.422895112246881e-05, "loss": 46.005, "step": 3978 }, { "epoch": 0.5432452727148611, "grad_norm": 0.04681352898478508, "learning_rate": 4.420735285699843e-05, "loss": 46.0098, "step": 3979 }, { "epoch": 0.5433818008055157, "grad_norm": 0.11951729655265808, "learning_rate": 4.418575568705065e-05, "loss": 46.0115, "step": 3980 }, { "epoch": 0.5435183288961704, "grad_norm": 0.15754596889019012, "learning_rate": 4.416415961671e-05, "loss": 46.0035, "step": 3981 }, { "epoch": 0.543654856986825, "grad_norm": 0.05914534628391266, "learning_rate": 4.414256465006075e-05, "loss": 46.0092, "step": 3982 }, { "epoch": 0.5437913850774797, "grad_norm": 0.03437582775950432, "learning_rate": 4.4120970791187046e-05, "loss": 46.0016, "step": 3983 }, { "epoch": 0.5439279131681344, "grad_norm": 0.05425356701016426, "learning_rate": 4.4099378044172754e-05, "loss": 46.0072, "step": 3984 }, { "epoch": 0.5440644412587889, "grad_norm": 0.029023397713899612, "learning_rate": 4.4077786413101595e-05, "loss": 46.0086, "step": 3985 }, { "epoch": 0.5442009693494436, "grad_norm": 0.04644041880965233, "learning_rate": 4.405619590205699e-05, "loss": 46.0078, "step": 3986 }, { "epoch": 0.5443374974400983, "grad_norm": 0.07745679467916489, "learning_rate": 4.403460651512224e-05, "loss": 46.0022, "step": 3987 }, { "epoch": 0.544474025530753, "grad_norm": 0.06720803678035736, "learning_rate": 4.401301825638039e-05, "loss": 46.0059, "step": 3988 }, { "epoch": 0.5446105536214076, "grad_norm": 0.07657355070114136, "learning_rate": 4.399143112991425e-05, "loss": 46.008, "step": 3989 }, { "epoch": 0.5447470817120622, "grad_norm": 0.09193466603755951, "learning_rate": 4.396984513980648e-05, "loss": 46.0083, "step": 3990 }, { "epoch": 0.5448836098027169, "grad_norm": 0.07391355186700821, "learning_rate": 4.3948260290139465e-05, "loss": 46.0001, "step": 3991 }, { "epoch": 0.5450201378933716, "grad_norm": 0.03515985980629921, "learning_rate": 4.392667658499539e-05, "loss": 46.0, "step": 3992 }, { "epoch": 0.5451566659840262, "grad_norm": 0.1253080666065216, "learning_rate": 4.3905094028456225e-05, "loss": 46.0153, "step": 3993 }, { "epoch": 0.5452931940746809, "grad_norm": 0.08017907291650772, "learning_rate": 4.388351262460375e-05, "loss": 46.0077, "step": 3994 }, { "epoch": 0.5454297221653355, "grad_norm": 0.0864248275756836, "learning_rate": 4.38619323775195e-05, "loss": 46.0023, "step": 3995 }, { "epoch": 0.5455662502559901, "grad_norm": 0.09516637027263641, "learning_rate": 4.384035329128478e-05, "loss": 46.0, "step": 3996 }, { "epoch": 0.5457027783466448, "grad_norm": 0.1145850345492363, "learning_rate": 4.381877536998069e-05, "loss": 46.001, "step": 3997 }, { "epoch": 0.5458393064372995, "grad_norm": 0.09399574249982834, "learning_rate": 4.379719861768813e-05, "loss": 46.003, "step": 3998 }, { "epoch": 0.5459758345279542, "grad_norm": 0.10414939373731613, "learning_rate": 4.377562303848775e-05, "loss": 46.0085, "step": 3999 }, { "epoch": 0.5461123626186087, "grad_norm": 0.16911965608596802, "learning_rate": 4.375404863645997e-05, "loss": 46.0, "step": 4000 }, { "epoch": 0.5462488907092634, "grad_norm": 0.06198761984705925, "learning_rate": 4.3732475415685024e-05, "loss": 46.0067, "step": 4001 }, { "epoch": 0.5463854187999181, "grad_norm": 0.089242123067379, "learning_rate": 4.3710903380242906e-05, "loss": 46.0013, "step": 4002 }, { "epoch": 0.5465219468905728, "grad_norm": 0.09482287615537643, "learning_rate": 4.368933253421339e-05, "loss": 46.0027, "step": 4003 }, { "epoch": 0.5466584749812274, "grad_norm": 0.04586491361260414, "learning_rate": 4.3667762881675976e-05, "loss": 46.0008, "step": 4004 }, { "epoch": 0.546795003071882, "grad_norm": 0.03904891759157181, "learning_rate": 4.364619442671003e-05, "loss": 46.0033, "step": 4005 }, { "epoch": 0.5469315311625367, "grad_norm": 0.2148495465517044, "learning_rate": 4.362462717339461e-05, "loss": 46.0052, "step": 4006 }, { "epoch": 0.5470680592531914, "grad_norm": 0.02836555987596512, "learning_rate": 4.3603061125808593e-05, "loss": 46.0039, "step": 4007 }, { "epoch": 0.547204587343846, "grad_norm": 0.05618109554052353, "learning_rate": 4.3581496288030625e-05, "loss": 46.0078, "step": 4008 }, { "epoch": 0.5473411154345007, "grad_norm": 0.07705201208591461, "learning_rate": 4.355993266413909e-05, "loss": 46.0036, "step": 4009 }, { "epoch": 0.5474776435251553, "grad_norm": 0.08347824960947037, "learning_rate": 4.3538370258212174e-05, "loss": 46.0069, "step": 4010 }, { "epoch": 0.5476141716158099, "grad_norm": 0.09120592474937439, "learning_rate": 4.3516809074327806e-05, "loss": 46.0084, "step": 4011 }, { "epoch": 0.5477506997064646, "grad_norm": 0.07005074620246887, "learning_rate": 4.349524911656374e-05, "loss": 46.0092, "step": 4012 }, { "epoch": 0.5478872277971193, "grad_norm": 0.08044164627790451, "learning_rate": 4.347369038899744e-05, "loss": 46.007, "step": 4013 }, { "epoch": 0.5480237558877739, "grad_norm": 0.1585235446691513, "learning_rate": 4.345213289570612e-05, "loss": 46.0065, "step": 4014 }, { "epoch": 0.5481602839784285, "grad_norm": 0.11119988560676575, "learning_rate": 4.343057664076685e-05, "loss": 46.0024, "step": 4015 }, { "epoch": 0.5482968120690832, "grad_norm": 0.046788837760686874, "learning_rate": 4.340902162825639e-05, "loss": 46.0036, "step": 4016 }, { "epoch": 0.5484333401597379, "grad_norm": 0.05364629253745079, "learning_rate": 4.3387467862251304e-05, "loss": 46.006, "step": 4017 }, { "epoch": 0.5485698682503926, "grad_norm": 0.1138971671462059, "learning_rate": 4.336591534682787e-05, "loss": 46.0067, "step": 4018 }, { "epoch": 0.5487063963410471, "grad_norm": 0.08445706218481064, "learning_rate": 4.33443640860622e-05, "loss": 46.0024, "step": 4019 }, { "epoch": 0.5488429244317018, "grad_norm": 0.1470561921596527, "learning_rate": 4.332281408403011e-05, "loss": 46.0066, "step": 4020 }, { "epoch": 0.5489794525223565, "grad_norm": 0.13401351869106293, "learning_rate": 4.330126534480719e-05, "loss": 46.0029, "step": 4021 }, { "epoch": 0.5491159806130111, "grad_norm": 0.17563486099243164, "learning_rate": 4.327971787246885e-05, "loss": 46.0011, "step": 4022 }, { "epoch": 0.5492525087036658, "grad_norm": 0.15944713354110718, "learning_rate": 4.325817167109016e-05, "loss": 46.0015, "step": 4023 }, { "epoch": 0.5493890367943204, "grad_norm": 0.07961536198854446, "learning_rate": 4.323662674474603e-05, "loss": 46.0028, "step": 4024 }, { "epoch": 0.5495255648849751, "grad_norm": 0.042241644114255905, "learning_rate": 4.3215083097511076e-05, "loss": 46.0123, "step": 4025 }, { "epoch": 0.5496620929756297, "grad_norm": 0.08511419594287872, "learning_rate": 4.3193540733459736e-05, "loss": 46.0038, "step": 4026 }, { "epoch": 0.5497986210662844, "grad_norm": 0.09058299660682678, "learning_rate": 4.317199965666613e-05, "loss": 46.0029, "step": 4027 }, { "epoch": 0.5499351491569391, "grad_norm": 0.043374039232730865, "learning_rate": 4.315045987120417e-05, "loss": 46.0061, "step": 4028 }, { "epoch": 0.5500716772475936, "grad_norm": 0.08272770047187805, "learning_rate": 4.3128921381147544e-05, "loss": 46.0001, "step": 4029 }, { "epoch": 0.5502082053382483, "grad_norm": 0.08139296621084213, "learning_rate": 4.3107384190569683e-05, "loss": 46.0056, "step": 4030 }, { "epoch": 0.550344733428903, "grad_norm": 0.052537817507982254, "learning_rate": 4.3085848303543735e-05, "loss": 46.0068, "step": 4031 }, { "epoch": 0.5504812615195577, "grad_norm": 0.05293959379196167, "learning_rate": 4.306431372414264e-05, "loss": 46.002, "step": 4032 }, { "epoch": 0.5506177896102123, "grad_norm": 0.05541194602847099, "learning_rate": 4.304278045643908e-05, "loss": 46.0074, "step": 4033 }, { "epoch": 0.5507543177008669, "grad_norm": 0.09317421168088913, "learning_rate": 4.302124850450551e-05, "loss": 46.0052, "step": 4034 }, { "epoch": 0.5508908457915216, "grad_norm": 0.15853849053382874, "learning_rate": 4.299971787241409e-05, "loss": 46.0113, "step": 4035 }, { "epoch": 0.5510273738821763, "grad_norm": 0.08581767976284027, "learning_rate": 4.297818856423679e-05, "loss": 46.0027, "step": 4036 }, { "epoch": 0.5511639019728309, "grad_norm": 0.12849023938179016, "learning_rate": 4.295666058404526e-05, "loss": 46.0062, "step": 4037 }, { "epoch": 0.5513004300634856, "grad_norm": 0.06341037154197693, "learning_rate": 4.2935133935910954e-05, "loss": 46.004, "step": 4038 }, { "epoch": 0.5514369581541402, "grad_norm": 0.0405600406229496, "learning_rate": 4.2913608623905045e-05, "loss": 46.0132, "step": 4039 }, { "epoch": 0.5515734862447949, "grad_norm": 0.0582706518471241, "learning_rate": 4.28920846520985e-05, "loss": 46.0066, "step": 4040 }, { "epoch": 0.5517100143354495, "grad_norm": 0.04203653335571289, "learning_rate": 4.287056202456194e-05, "loss": 46.0072, "step": 4041 }, { "epoch": 0.5518465424261042, "grad_norm": 0.044868502765893936, "learning_rate": 4.284904074536582e-05, "loss": 46.0041, "step": 4042 }, { "epoch": 0.5519830705167589, "grad_norm": 0.1066102460026741, "learning_rate": 4.282752081858031e-05, "loss": 46.0077, "step": 4043 }, { "epoch": 0.5521195986074134, "grad_norm": 0.13053590059280396, "learning_rate": 4.280600224827532e-05, "loss": 46.001, "step": 4044 }, { "epoch": 0.5522561266980681, "grad_norm": 0.09412842243909836, "learning_rate": 4.2784485038520506e-05, "loss": 46.0045, "step": 4045 }, { "epoch": 0.5523926547887228, "grad_norm": 0.11065170168876648, "learning_rate": 4.2762969193385244e-05, "loss": 46.009, "step": 4046 }, { "epoch": 0.5525291828793775, "grad_norm": 0.18756525218486786, "learning_rate": 4.274145471693871e-05, "loss": 46.0006, "step": 4047 }, { "epoch": 0.552665710970032, "grad_norm": 0.21536095440387726, "learning_rate": 4.271994161324977e-05, "loss": 46.0021, "step": 4048 }, { "epoch": 0.5528022390606867, "grad_norm": 0.10154073685407639, "learning_rate": 4.2698429886387017e-05, "loss": 46.0018, "step": 4049 }, { "epoch": 0.5529387671513414, "grad_norm": 0.34587588906288147, "learning_rate": 4.2676919540418875e-05, "loss": 46.0, "step": 4050 }, { "epoch": 0.5530752952419961, "grad_norm": 0.13021451234817505, "learning_rate": 4.2655410579413395e-05, "loss": 46.0049, "step": 4051 }, { "epoch": 0.5532118233326507, "grad_norm": 0.04923039674758911, "learning_rate": 4.263390300743844e-05, "loss": 46.0002, "step": 4052 }, { "epoch": 0.5533483514233053, "grad_norm": 0.11151327192783356, "learning_rate": 4.2612396828561545e-05, "loss": 46.0051, "step": 4053 }, { "epoch": 0.55348487951396, "grad_norm": 0.06284657120704651, "learning_rate": 4.259089204685009e-05, "loss": 46.0016, "step": 4054 }, { "epoch": 0.5536214076046146, "grad_norm": 0.0525670051574707, "learning_rate": 4.2569388666371065e-05, "loss": 46.0015, "step": 4055 }, { "epoch": 0.5537579356952693, "grad_norm": 0.04270012304186821, "learning_rate": 4.254788669119127e-05, "loss": 46.0021, "step": 4056 }, { "epoch": 0.553894463785924, "grad_norm": 0.04137655720114708, "learning_rate": 4.252638612537724e-05, "loss": 46.0033, "step": 4057 }, { "epoch": 0.5540309918765786, "grad_norm": 0.0655406042933464, "learning_rate": 4.2504886972995226e-05, "loss": 46.0024, "step": 4058 }, { "epoch": 0.5541675199672332, "grad_norm": 0.06541130691766739, "learning_rate": 4.248338923811118e-05, "loss": 46.0116, "step": 4059 }, { "epoch": 0.5543040480578879, "grad_norm": 0.05504436045885086, "learning_rate": 4.246189292479082e-05, "loss": 46.0044, "step": 4060 }, { "epoch": 0.5544405761485426, "grad_norm": 0.05183485522866249, "learning_rate": 4.2440398037099625e-05, "loss": 46.0038, "step": 4061 }, { "epoch": 0.5545771042391973, "grad_norm": 0.06312978267669678, "learning_rate": 4.2418904579102764e-05, "loss": 46.0072, "step": 4062 }, { "epoch": 0.5547136323298518, "grad_norm": 0.044179871678352356, "learning_rate": 4.23974125548651e-05, "loss": 46.0041, "step": 4063 }, { "epoch": 0.5548501604205065, "grad_norm": 0.2924930453300476, "learning_rate": 4.237592196845133e-05, "loss": 46.0074, "step": 4064 }, { "epoch": 0.5549866885111612, "grad_norm": 0.056571751832962036, "learning_rate": 4.235443282392579e-05, "loss": 46.0133, "step": 4065 }, { "epoch": 0.5551232166018158, "grad_norm": 0.0721200555562973, "learning_rate": 4.233294512535257e-05, "loss": 46.002, "step": 4066 }, { "epoch": 0.5552597446924705, "grad_norm": 0.10474075376987457, "learning_rate": 4.231145887679546e-05, "loss": 46.0026, "step": 4067 }, { "epoch": 0.5553962727831251, "grad_norm": 0.034559354186058044, "learning_rate": 4.228997408231806e-05, "loss": 46.0014, "step": 4068 }, { "epoch": 0.5555328008737798, "grad_norm": 0.08531183004379272, "learning_rate": 4.22684907459836e-05, "loss": 46.01, "step": 4069 }, { "epoch": 0.5556693289644344, "grad_norm": 0.09575473517179489, "learning_rate": 4.2247008871855084e-05, "loss": 46.0026, "step": 4070 }, { "epoch": 0.5558058570550891, "grad_norm": 0.09335958957672119, "learning_rate": 4.22255284639952e-05, "loss": 46.0016, "step": 4071 }, { "epoch": 0.5559423851457438, "grad_norm": 0.08056870847940445, "learning_rate": 4.220404952646644e-05, "loss": 46.0137, "step": 4072 }, { "epoch": 0.5560789132363984, "grad_norm": 0.1481815129518509, "learning_rate": 4.218257206333093e-05, "loss": 46.0076, "step": 4073 }, { "epoch": 0.556215441327053, "grad_norm": 0.04429387301206589, "learning_rate": 4.216109607865054e-05, "loss": 46.0054, "step": 4074 }, { "epoch": 0.5563519694177077, "grad_norm": 0.08655617386102676, "learning_rate": 4.21396215764869e-05, "loss": 46.0035, "step": 4075 }, { "epoch": 0.5564884975083624, "grad_norm": 0.1556338369846344, "learning_rate": 4.2118148560901325e-05, "loss": 46.0007, "step": 4076 }, { "epoch": 0.5566250255990169, "grad_norm": 0.048218391835689545, "learning_rate": 4.2096677035954844e-05, "loss": 46.0066, "step": 4077 }, { "epoch": 0.5567615536896716, "grad_norm": 0.21837644279003143, "learning_rate": 4.207520700570821e-05, "loss": 46.0146, "step": 4078 }, { "epoch": 0.5568980817803263, "grad_norm": 0.04727736860513687, "learning_rate": 4.205373847422192e-05, "loss": 46.0063, "step": 4079 }, { "epoch": 0.557034609870981, "grad_norm": 0.16838309168815613, "learning_rate": 4.203227144555618e-05, "loss": 46.0087, "step": 4080 }, { "epoch": 0.5571711379616356, "grad_norm": 0.049227409064769745, "learning_rate": 4.2010805923770834e-05, "loss": 46.0061, "step": 4081 }, { "epoch": 0.5573076660522902, "grad_norm": 0.06729698926210403, "learning_rate": 4.1989341912925565e-05, "loss": 46.0106, "step": 4082 }, { "epoch": 0.5574441941429449, "grad_norm": 0.08476191759109497, "learning_rate": 4.196787941707969e-05, "loss": 46.0045, "step": 4083 }, { "epoch": 0.5575807222335996, "grad_norm": 0.06338997185230255, "learning_rate": 4.194641844029227e-05, "loss": 46.0067, "step": 4084 }, { "epoch": 0.5577172503242542, "grad_norm": 0.08407865464687347, "learning_rate": 4.192495898662203e-05, "loss": 46.0018, "step": 4085 }, { "epoch": 0.5578537784149089, "grad_norm": 0.07926420867443085, "learning_rate": 4.190350106012751e-05, "loss": 46.007, "step": 4086 }, { "epoch": 0.5579903065055635, "grad_norm": 0.06856502592563629, "learning_rate": 4.188204466486684e-05, "loss": 46.0012, "step": 4087 }, { "epoch": 0.5581268345962181, "grad_norm": 0.04348832741379738, "learning_rate": 4.186058980489794e-05, "loss": 46.0005, "step": 4088 }, { "epoch": 0.5582633626868728, "grad_norm": 0.04779432713985443, "learning_rate": 4.183913648427842e-05, "loss": 46.0033, "step": 4089 }, { "epoch": 0.5583998907775275, "grad_norm": 0.06987781077623367, "learning_rate": 4.1817684707065605e-05, "loss": 46.0028, "step": 4090 }, { "epoch": 0.5585364188681822, "grad_norm": 0.050050508230924606, "learning_rate": 4.179623447731649e-05, "loss": 46.0062, "step": 4091 }, { "epoch": 0.5586729469588367, "grad_norm": 0.08165524154901505, "learning_rate": 4.1774785799087804e-05, "loss": 46.0038, "step": 4092 }, { "epoch": 0.5588094750494914, "grad_norm": 0.1079489141702652, "learning_rate": 4.175333867643602e-05, "loss": 46.004, "step": 4093 }, { "epoch": 0.5589460031401461, "grad_norm": 0.12247691303491592, "learning_rate": 4.173189311341727e-05, "loss": 46.001, "step": 4094 }, { "epoch": 0.5590825312308008, "grad_norm": 0.10001842677593231, "learning_rate": 4.1710449114087376e-05, "loss": 46.0059, "step": 4095 }, { "epoch": 0.5592190593214554, "grad_norm": 0.17480109632015228, "learning_rate": 4.1689006682501917e-05, "loss": 46.0103, "step": 4096 }, { "epoch": 0.55935558741211, "grad_norm": 0.31364116072654724, "learning_rate": 4.166756582271614e-05, "loss": 46.0063, "step": 4097 }, { "epoch": 0.5594921155027647, "grad_norm": 0.10005810856819153, "learning_rate": 4.1646126538785025e-05, "loss": 46.0061, "step": 4098 }, { "epoch": 0.5596286435934194, "grad_norm": 0.22198715806007385, "learning_rate": 4.162468883476319e-05, "loss": 46.0058, "step": 4099 }, { "epoch": 0.559765171684074, "grad_norm": 0.09430018812417984, "learning_rate": 4.160325271470502e-05, "loss": 46.0, "step": 4100 }, { "epoch": 0.5599016997747287, "grad_norm": 0.07843402028083801, "learning_rate": 4.158181818266459e-05, "loss": 46.0042, "step": 4101 }, { "epoch": 0.5600382278653833, "grad_norm": 0.04286456108093262, "learning_rate": 4.156038524269563e-05, "loss": 46.0037, "step": 4102 }, { "epoch": 0.5601747559560379, "grad_norm": 0.04321611672639847, "learning_rate": 4.153895389885165e-05, "loss": 46.0037, "step": 4103 }, { "epoch": 0.5603112840466926, "grad_norm": 0.13361065089702606, "learning_rate": 4.151752415518577e-05, "loss": 46.002, "step": 4104 }, { "epoch": 0.5604478121373473, "grad_norm": 0.09158908575773239, "learning_rate": 4.149609601575086e-05, "loss": 46.0032, "step": 4105 }, { "epoch": 0.560584340228002, "grad_norm": 0.07521231472492218, "learning_rate": 4.1474669484599455e-05, "loss": 46.0015, "step": 4106 }, { "epoch": 0.5607208683186565, "grad_norm": 0.041728630661964417, "learning_rate": 4.1453244565783835e-05, "loss": 46.0008, "step": 4107 }, { "epoch": 0.5608573964093112, "grad_norm": 0.04411265254020691, "learning_rate": 4.1431821263355936e-05, "loss": 46.0031, "step": 4108 }, { "epoch": 0.5609939244999659, "grad_norm": 0.03887029364705086, "learning_rate": 4.141039958136737e-05, "loss": 46.0, "step": 4109 }, { "epoch": 0.5611304525906206, "grad_norm": 0.09799305349588394, "learning_rate": 4.138897952386949e-05, "loss": 46.0095, "step": 4110 }, { "epoch": 0.5612669806812751, "grad_norm": 0.06648367643356323, "learning_rate": 4.1367561094913335e-05, "loss": 46.0039, "step": 4111 }, { "epoch": 0.5614035087719298, "grad_norm": 0.06605003029108047, "learning_rate": 4.1346144298549604e-05, "loss": 46.0047, "step": 4112 }, { "epoch": 0.5615400368625845, "grad_norm": 0.050873976200819016, "learning_rate": 4.132472913882869e-05, "loss": 46.0082, "step": 4113 }, { "epoch": 0.5616765649532391, "grad_norm": 0.055341001600027084, "learning_rate": 4.1303315619800715e-05, "loss": 46.0123, "step": 4114 }, { "epoch": 0.5618130930438938, "grad_norm": 0.11422544717788696, "learning_rate": 4.128190374551546e-05, "loss": 46.0048, "step": 4115 }, { "epoch": 0.5619496211345484, "grad_norm": 0.05951273813843727, "learning_rate": 4.1260493520022397e-05, "loss": 46.0066, "step": 4116 }, { "epoch": 0.5620861492252031, "grad_norm": 0.17030808329582214, "learning_rate": 4.123908494737072e-05, "loss": 46.0049, "step": 4117 }, { "epoch": 0.5622226773158577, "grad_norm": 0.12659838795661926, "learning_rate": 4.121767803160924e-05, "loss": 46.0045, "step": 4118 }, { "epoch": 0.5623592054065124, "grad_norm": 0.10275189578533173, "learning_rate": 4.119627277678652e-05, "loss": 46.0149, "step": 4119 }, { "epoch": 0.5624957334971671, "grad_norm": 0.15153470635414124, "learning_rate": 4.117486918695077e-05, "loss": 46.0058, "step": 4120 }, { "epoch": 0.5626322615878216, "grad_norm": 0.13164539635181427, "learning_rate": 4.115346726614993e-05, "loss": 46.0041, "step": 4121 }, { "epoch": 0.5627687896784763, "grad_norm": 0.09382838010787964, "learning_rate": 4.113206701843156e-05, "loss": 46.004, "step": 4122 }, { "epoch": 0.562905317769131, "grad_norm": 0.12128282338380814, "learning_rate": 4.111066844784295e-05, "loss": 46.0012, "step": 4123 }, { "epoch": 0.5630418458597857, "grad_norm": 0.2182716727256775, "learning_rate": 4.1089271558431076e-05, "loss": 46.0058, "step": 4124 }, { "epoch": 0.5631783739504403, "grad_norm": 0.22989261150360107, "learning_rate": 4.1067876354242577e-05, "loss": 46.0069, "step": 4125 }, { "epoch": 0.5633149020410949, "grad_norm": 0.1527366042137146, "learning_rate": 4.104648283932376e-05, "loss": 46.0084, "step": 4126 }, { "epoch": 0.5634514301317496, "grad_norm": 0.0619303323328495, "learning_rate": 4.1025091017720624e-05, "loss": 46.0183, "step": 4127 }, { "epoch": 0.5635879582224043, "grad_norm": 0.10899220407009125, "learning_rate": 4.100370089347888e-05, "loss": 46.0018, "step": 4128 }, { "epoch": 0.5637244863130589, "grad_norm": 0.07269302010536194, "learning_rate": 4.098231247064389e-05, "loss": 46.0064, "step": 4129 }, { "epoch": 0.5638610144037136, "grad_norm": 0.10168563574552536, "learning_rate": 4.096092575326067e-05, "loss": 46.0055, "step": 4130 }, { "epoch": 0.5639975424943682, "grad_norm": 0.044407498091459274, "learning_rate": 4.0939540745373984e-05, "loss": 46.0006, "step": 4131 }, { "epoch": 0.5641340705850229, "grad_norm": 0.15285055339336395, "learning_rate": 4.0918157451028185e-05, "loss": 46.0047, "step": 4132 }, { "epoch": 0.5642705986756775, "grad_norm": 0.17183825373649597, "learning_rate": 4.0896775874267356e-05, "loss": 46.0063, "step": 4133 }, { "epoch": 0.5644071267663322, "grad_norm": 0.10922179371118546, "learning_rate": 4.087539601913525e-05, "loss": 46.0054, "step": 4134 }, { "epoch": 0.5645436548569869, "grad_norm": 0.061512541025877, "learning_rate": 4.08540178896753e-05, "loss": 46.0015, "step": 4135 }, { "epoch": 0.5646801829476414, "grad_norm": 0.0411384142935276, "learning_rate": 4.083264148993058e-05, "loss": 46.0038, "step": 4136 }, { "epoch": 0.5648167110382961, "grad_norm": 0.12862056493759155, "learning_rate": 4.0811266823943866e-05, "loss": 46.0037, "step": 4137 }, { "epoch": 0.5649532391289508, "grad_norm": 0.06511116772890091, "learning_rate": 4.078989389575759e-05, "loss": 46.0081, "step": 4138 }, { "epoch": 0.5650897672196055, "grad_norm": 0.060052428394556046, "learning_rate": 4.076852270941389e-05, "loss": 46.006, "step": 4139 }, { "epoch": 0.5652262953102601, "grad_norm": 0.03514265641570091, "learning_rate": 4.074715326895452e-05, "loss": 46.006, "step": 4140 }, { "epoch": 0.5653628234009147, "grad_norm": 0.1453302949666977, "learning_rate": 4.072578557842094e-05, "loss": 46.0024, "step": 4141 }, { "epoch": 0.5654993514915694, "grad_norm": 0.06674858182668686, "learning_rate": 4.0704419641854274e-05, "loss": 46.0031, "step": 4142 }, { "epoch": 0.565635879582224, "grad_norm": 0.07814361155033112, "learning_rate": 4.068305546329532e-05, "loss": 46.0056, "step": 4143 }, { "epoch": 0.5657724076728787, "grad_norm": 0.0870714783668518, "learning_rate": 4.066169304678452e-05, "loss": 46.0083, "step": 4144 }, { "epoch": 0.5659089357635333, "grad_norm": 0.1284550428390503, "learning_rate": 4.064033239636199e-05, "loss": 46.0045, "step": 4145 }, { "epoch": 0.566045463854188, "grad_norm": 0.18012332916259766, "learning_rate": 4.061897351606755e-05, "loss": 46.0007, "step": 4146 }, { "epoch": 0.5661819919448426, "grad_norm": 0.11048446595668793, "learning_rate": 4.059761640994064e-05, "loss": 46.0061, "step": 4147 }, { "epoch": 0.5663185200354973, "grad_norm": 0.35324349999427795, "learning_rate": 4.057626108202034e-05, "loss": 46.0098, "step": 4148 }, { "epoch": 0.566455048126152, "grad_norm": 0.04762815311551094, "learning_rate": 4.055490753634551e-05, "loss": 46.0047, "step": 4149 }, { "epoch": 0.5665915762168066, "grad_norm": 0.2622414827346802, "learning_rate": 4.053355577695454e-05, "loss": 46.0175, "step": 4150 }, { "epoch": 0.5667281043074612, "grad_norm": 0.2150178700685501, "learning_rate": 4.051220580788555e-05, "loss": 46.0052, "step": 4151 }, { "epoch": 0.5668646323981159, "grad_norm": 0.03773285448551178, "learning_rate": 4.04908576331763e-05, "loss": 46.0015, "step": 4152 }, { "epoch": 0.5670011604887706, "grad_norm": 0.22337068617343903, "learning_rate": 4.0469511256864265e-05, "loss": 46.014, "step": 4153 }, { "epoch": 0.5671376885794253, "grad_norm": 0.12137621641159058, "learning_rate": 4.044816668298649e-05, "loss": 46.0034, "step": 4154 }, { "epoch": 0.5672742166700798, "grad_norm": 0.1286476105451584, "learning_rate": 4.042682391557971e-05, "loss": 46.0075, "step": 4155 }, { "epoch": 0.5674107447607345, "grad_norm": 0.10643253475427628, "learning_rate": 4.040548295868038e-05, "loss": 46.0124, "step": 4156 }, { "epoch": 0.5675472728513892, "grad_norm": 0.1980726420879364, "learning_rate": 4.038414381632456e-05, "loss": 46.0061, "step": 4157 }, { "epoch": 0.5676838009420438, "grad_norm": 0.08968617022037506, "learning_rate": 4.036280649254795e-05, "loss": 46.0019, "step": 4158 }, { "epoch": 0.5678203290326985, "grad_norm": 0.1224856749176979, "learning_rate": 4.0341470991385906e-05, "loss": 46.0041, "step": 4159 }, { "epoch": 0.5679568571233531, "grad_norm": 0.06743017584085464, "learning_rate": 4.032013731687351e-05, "loss": 46.0073, "step": 4160 }, { "epoch": 0.5680933852140078, "grad_norm": 0.172834575176239, "learning_rate": 4.029880547304543e-05, "loss": 46.0008, "step": 4161 }, { "epoch": 0.5682299133046624, "grad_norm": 0.036811791360378265, "learning_rate": 4.027747546393598e-05, "loss": 46.0017, "step": 4162 }, { "epoch": 0.5683664413953171, "grad_norm": 0.08253966271877289, "learning_rate": 4.025614729357921e-05, "loss": 46.0052, "step": 4163 }, { "epoch": 0.5685029694859718, "grad_norm": 0.12697434425354004, "learning_rate": 4.023482096600873e-05, "loss": 46.0126, "step": 4164 }, { "epoch": 0.5686394975766264, "grad_norm": 0.07066084444522858, "learning_rate": 4.021349648525785e-05, "loss": 46.0076, "step": 4165 }, { "epoch": 0.568776025667281, "grad_norm": 0.07999136298894882, "learning_rate": 4.019217385535949e-05, "loss": 46.0021, "step": 4166 }, { "epoch": 0.5689125537579357, "grad_norm": 0.06257220357656479, "learning_rate": 4.0170853080346296e-05, "loss": 46.0032, "step": 4167 }, { "epoch": 0.5690490818485904, "grad_norm": 0.07309366762638092, "learning_rate": 4.014953416425049e-05, "loss": 46.004, "step": 4168 }, { "epoch": 0.569185609939245, "grad_norm": 0.07747320085763931, "learning_rate": 4.012821711110396e-05, "loss": 46.0067, "step": 4169 }, { "epoch": 0.5693221380298996, "grad_norm": 0.07461026310920715, "learning_rate": 4.010690192493828e-05, "loss": 46.0049, "step": 4170 }, { "epoch": 0.5694586661205543, "grad_norm": 0.10077689588069916, "learning_rate": 4.0085588609784624e-05, "loss": 46.0085, "step": 4171 }, { "epoch": 0.569595194211209, "grad_norm": 0.07393091171979904, "learning_rate": 4.0064277169673824e-05, "loss": 46.0016, "step": 4172 }, { "epoch": 0.5697317223018636, "grad_norm": 0.07434780150651932, "learning_rate": 4.004296760863636e-05, "loss": 46.0046, "step": 4173 }, { "epoch": 0.5698682503925182, "grad_norm": 0.07543166726827621, "learning_rate": 4.002165993070237e-05, "loss": 46.0031, "step": 4174 }, { "epoch": 0.5700047784831729, "grad_norm": 0.05544315651059151, "learning_rate": 4.000035413990164e-05, "loss": 46.0104, "step": 4175 }, { "epoch": 0.5701413065738276, "grad_norm": 0.14732478559017181, "learning_rate": 3.997905024026354e-05, "loss": 46.001, "step": 4176 }, { "epoch": 0.5702778346644822, "grad_norm": 0.05757248029112816, "learning_rate": 3.995774823581716e-05, "loss": 46.0043, "step": 4177 }, { "epoch": 0.5704143627551369, "grad_norm": 0.09436565637588501, "learning_rate": 3.99364481305912e-05, "loss": 46.0046, "step": 4178 }, { "epoch": 0.5705508908457915, "grad_norm": 0.06754551827907562, "learning_rate": 3.991514992861399e-05, "loss": 46.0059, "step": 4179 }, { "epoch": 0.5706874189364461, "grad_norm": 0.07996420562267303, "learning_rate": 3.989385363391349e-05, "loss": 46.0059, "step": 4180 }, { "epoch": 0.5708239470271008, "grad_norm": 0.04658552631735802, "learning_rate": 3.987255925051736e-05, "loss": 46.0022, "step": 4181 }, { "epoch": 0.5709604751177555, "grad_norm": 0.05027594789862633, "learning_rate": 3.985126678245283e-05, "loss": 46.0145, "step": 4182 }, { "epoch": 0.5710970032084102, "grad_norm": 0.21464218199253082, "learning_rate": 3.982997623374678e-05, "loss": 46.0025, "step": 4183 }, { "epoch": 0.5712335312990647, "grad_norm": 0.0689343586564064, "learning_rate": 3.980868760842579e-05, "loss": 46.0036, "step": 4184 }, { "epoch": 0.5713700593897194, "grad_norm": 0.0651131346821785, "learning_rate": 3.978740091051599e-05, "loss": 46.0001, "step": 4185 }, { "epoch": 0.5715065874803741, "grad_norm": 0.09982171654701233, "learning_rate": 3.976611614404319e-05, "loss": 46.0061, "step": 4186 }, { "epoch": 0.5716431155710288, "grad_norm": 0.14463631808757782, "learning_rate": 3.974483331303282e-05, "loss": 46.0105, "step": 4187 }, { "epoch": 0.5717796436616834, "grad_norm": 0.19314250349998474, "learning_rate": 3.972355242150998e-05, "loss": 46.0078, "step": 4188 }, { "epoch": 0.571916171752338, "grad_norm": 0.08179299533367157, "learning_rate": 3.970227347349935e-05, "loss": 46.0011, "step": 4189 }, { "epoch": 0.5720526998429927, "grad_norm": 0.03834139183163643, "learning_rate": 3.968099647302527e-05, "loss": 46.0083, "step": 4190 }, { "epoch": 0.5721892279336473, "grad_norm": 0.10589633136987686, "learning_rate": 3.965972142411172e-05, "loss": 46.008, "step": 4191 }, { "epoch": 0.572325756024302, "grad_norm": 0.12396863102912903, "learning_rate": 3.963844833078229e-05, "loss": 46.004, "step": 4192 }, { "epoch": 0.5724622841149567, "grad_norm": 0.09641066193580627, "learning_rate": 3.961717719706024e-05, "loss": 46.0013, "step": 4193 }, { "epoch": 0.5725988122056113, "grad_norm": 0.20831313729286194, "learning_rate": 3.959590802696837e-05, "loss": 46.0046, "step": 4194 }, { "epoch": 0.5727353402962659, "grad_norm": 0.09245959669351578, "learning_rate": 3.957464082452922e-05, "loss": 46.0092, "step": 4195 }, { "epoch": 0.5728718683869206, "grad_norm": 0.16495190560817719, "learning_rate": 3.955337559376488e-05, "loss": 46.0046, "step": 4196 }, { "epoch": 0.5730083964775753, "grad_norm": 0.08406770974397659, "learning_rate": 3.953211233869713e-05, "loss": 46.0057, "step": 4197 }, { "epoch": 0.57314492456823, "grad_norm": 0.10736968368291855, "learning_rate": 3.9510851063347284e-05, "loss": 46.0068, "step": 4198 }, { "epoch": 0.5732814526588845, "grad_norm": 0.16655531525611877, "learning_rate": 3.9489591771736385e-05, "loss": 46.0037, "step": 4199 }, { "epoch": 0.5734179807495392, "grad_norm": 0.12663151323795319, "learning_rate": 3.9468334467885024e-05, "loss": 46.0, "step": 4200 }, { "epoch": 0.5735545088401939, "grad_norm": 0.06263776868581772, "learning_rate": 3.944707915581345e-05, "loss": 46.001, "step": 4201 }, { "epoch": 0.5736910369308486, "grad_norm": 0.10458409786224365, "learning_rate": 3.942582583954155e-05, "loss": 46.0041, "step": 4202 }, { "epoch": 0.5738275650215032, "grad_norm": 0.08206160366535187, "learning_rate": 3.9404574523088816e-05, "loss": 46.0007, "step": 4203 }, { "epoch": 0.5739640931121578, "grad_norm": 0.20233093202114105, "learning_rate": 3.938332521047433e-05, "loss": 46.0022, "step": 4204 }, { "epoch": 0.5741006212028125, "grad_norm": 0.043105367571115494, "learning_rate": 3.9362077905716824e-05, "loss": 46.0001, "step": 4205 }, { "epoch": 0.5742371492934671, "grad_norm": 0.02991567552089691, "learning_rate": 3.934083261283469e-05, "loss": 46.0006, "step": 4206 }, { "epoch": 0.5743736773841218, "grad_norm": 0.16281278431415558, "learning_rate": 3.931958933584587e-05, "loss": 46.003, "step": 4207 }, { "epoch": 0.5745102054747764, "grad_norm": 0.08540689945220947, "learning_rate": 3.929834807876796e-05, "loss": 46.0011, "step": 4208 }, { "epoch": 0.5746467335654311, "grad_norm": 0.12417396157979965, "learning_rate": 3.927710884561818e-05, "loss": 46.0035, "step": 4209 }, { "epoch": 0.5747832616560857, "grad_norm": 0.05525057390332222, "learning_rate": 3.9255871640413346e-05, "loss": 46.0046, "step": 4210 }, { "epoch": 0.5749197897467404, "grad_norm": 0.03201719745993614, "learning_rate": 3.923463646716991e-05, "loss": 46.006, "step": 4211 }, { "epoch": 0.5750563178373951, "grad_norm": 0.06920528411865234, "learning_rate": 3.921340332990391e-05, "loss": 46.0048, "step": 4212 }, { "epoch": 0.5751928459280496, "grad_norm": 0.049465078860521317, "learning_rate": 3.919217223263105e-05, "loss": 46.007, "step": 4213 }, { "epoch": 0.5753293740187043, "grad_norm": 0.13075508177280426, "learning_rate": 3.91709431793666e-05, "loss": 46.0066, "step": 4214 }, { "epoch": 0.575465902109359, "grad_norm": 0.0803513303399086, "learning_rate": 3.914971617412546e-05, "loss": 46.0014, "step": 4215 }, { "epoch": 0.5756024302000137, "grad_norm": 0.05694730207324028, "learning_rate": 3.9128491220922156e-05, "loss": 46.0063, "step": 4216 }, { "epoch": 0.5757389582906683, "grad_norm": 0.26581040024757385, "learning_rate": 3.9107268323770805e-05, "loss": 46.0062, "step": 4217 }, { "epoch": 0.5758754863813229, "grad_norm": 0.11349122226238251, "learning_rate": 3.908604748668515e-05, "loss": 46.0024, "step": 4218 }, { "epoch": 0.5760120144719776, "grad_norm": 0.060634102672338486, "learning_rate": 3.906482871367852e-05, "loss": 46.0058, "step": 4219 }, { "epoch": 0.5761485425626323, "grad_norm": 0.10069743543863297, "learning_rate": 3.904361200876391e-05, "loss": 46.0004, "step": 4220 }, { "epoch": 0.5762850706532869, "grad_norm": 0.16796773672103882, "learning_rate": 3.902239737595386e-05, "loss": 46.0091, "step": 4221 }, { "epoch": 0.5764215987439416, "grad_norm": 0.08749929070472717, "learning_rate": 3.900118481926053e-05, "loss": 46.01, "step": 4222 }, { "epoch": 0.5765581268345962, "grad_norm": 0.09402868896722794, "learning_rate": 3.8979974342695726e-05, "loss": 46.0055, "step": 4223 }, { "epoch": 0.5766946549252508, "grad_norm": 0.056187234818935394, "learning_rate": 3.895876595027083e-05, "loss": 46.0057, "step": 4224 }, { "epoch": 0.5768311830159055, "grad_norm": 0.17621631920337677, "learning_rate": 3.893755964599685e-05, "loss": 46.0055, "step": 4225 }, { "epoch": 0.5769677111065602, "grad_norm": 0.09108150750398636, "learning_rate": 3.891635543388435e-05, "loss": 46.0106, "step": 4226 }, { "epoch": 0.5771042391972149, "grad_norm": 0.1562362015247345, "learning_rate": 3.889515331794356e-05, "loss": 46.0037, "step": 4227 }, { "epoch": 0.5772407672878694, "grad_norm": 0.05684921517968178, "learning_rate": 3.887395330218429e-05, "loss": 46.0068, "step": 4228 }, { "epoch": 0.5773772953785241, "grad_norm": 0.0439908504486084, "learning_rate": 3.885275539061592e-05, "loss": 46.0077, "step": 4229 }, { "epoch": 0.5775138234691788, "grad_norm": 0.08761084079742432, "learning_rate": 3.8831559587247506e-05, "loss": 46.0041, "step": 4230 }, { "epoch": 0.5776503515598335, "grad_norm": 0.08323706686496735, "learning_rate": 3.881036589608763e-05, "loss": 46.0055, "step": 4231 }, { "epoch": 0.5777868796504881, "grad_norm": 0.087737537920475, "learning_rate": 3.878917432114451e-05, "loss": 46.0044, "step": 4232 }, { "epoch": 0.5779234077411427, "grad_norm": 0.039592742919921875, "learning_rate": 3.876798486642596e-05, "loss": 46.018, "step": 4233 }, { "epoch": 0.5780599358317974, "grad_norm": 0.06101216375827789, "learning_rate": 3.874679753593941e-05, "loss": 46.0099, "step": 4234 }, { "epoch": 0.578196463922452, "grad_norm": 0.03501024469733238, "learning_rate": 3.872561233369184e-05, "loss": 46.002, "step": 4235 }, { "epoch": 0.5783329920131067, "grad_norm": 0.10864663124084473, "learning_rate": 3.870442926368987e-05, "loss": 46.0087, "step": 4236 }, { "epoch": 0.5784695201037614, "grad_norm": 0.16185355186462402, "learning_rate": 3.8683248329939716e-05, "loss": 46.0073, "step": 4237 }, { "epoch": 0.578606048194416, "grad_norm": 0.08467511832714081, "learning_rate": 3.866206953644719e-05, "loss": 46.0076, "step": 4238 }, { "epoch": 0.5787425762850706, "grad_norm": 0.1423330157995224, "learning_rate": 3.864089288721766e-05, "loss": 46.0082, "step": 4239 }, { "epoch": 0.5788791043757253, "grad_norm": 0.040363412350416183, "learning_rate": 3.861971838625611e-05, "loss": 46.0016, "step": 4240 }, { "epoch": 0.57901563246638, "grad_norm": 0.07136883586645126, "learning_rate": 3.859854603756715e-05, "loss": 46.0045, "step": 4241 }, { "epoch": 0.5791521605570346, "grad_norm": 0.031872157007455826, "learning_rate": 3.857737584515496e-05, "loss": 46.0043, "step": 4242 }, { "epoch": 0.5792886886476892, "grad_norm": 0.054682351648807526, "learning_rate": 3.8556207813023274e-05, "loss": 46.0041, "step": 4243 }, { "epoch": 0.5794252167383439, "grad_norm": 0.12711066007614136, "learning_rate": 3.8535041945175506e-05, "loss": 46.0018, "step": 4244 }, { "epoch": 0.5795617448289986, "grad_norm": 0.1162513867020607, "learning_rate": 3.851387824561457e-05, "loss": 46.0071, "step": 4245 }, { "epoch": 0.5796982729196533, "grad_norm": 0.1755143254995346, "learning_rate": 3.849271671834301e-05, "loss": 46.0006, "step": 4246 }, { "epoch": 0.5798348010103078, "grad_norm": 0.1865094006061554, "learning_rate": 3.8471557367362955e-05, "loss": 46.0036, "step": 4247 }, { "epoch": 0.5799713291009625, "grad_norm": 0.21232017874717712, "learning_rate": 3.845040019667616e-05, "loss": 46.0068, "step": 4248 }, { "epoch": 0.5801078571916172, "grad_norm": 0.148094043135643, "learning_rate": 3.842924521028388e-05, "loss": 46.0059, "step": 4249 }, { "epoch": 0.5802443852822718, "grad_norm": 0.08114785701036453, "learning_rate": 3.8408092412187036e-05, "loss": 46.0, "step": 4250 }, { "epoch": 0.5803809133729265, "grad_norm": 0.12156055122613907, "learning_rate": 3.838694180638611e-05, "loss": 46.0064, "step": 4251 }, { "epoch": 0.5805174414635811, "grad_norm": 0.04426558315753937, "learning_rate": 3.836579339688117e-05, "loss": 46.0038, "step": 4252 }, { "epoch": 0.5806539695542358, "grad_norm": 0.13635505735874176, "learning_rate": 3.8344647187671846e-05, "loss": 46.006, "step": 4253 }, { "epoch": 0.5807904976448904, "grad_norm": 0.11358733475208282, "learning_rate": 3.8323503182757375e-05, "loss": 46.0021, "step": 4254 }, { "epoch": 0.5809270257355451, "grad_norm": 0.04804825410246849, "learning_rate": 3.83023613861366e-05, "loss": 46.0064, "step": 4255 }, { "epoch": 0.5810635538261998, "grad_norm": 0.09464491903781891, "learning_rate": 3.8281221801807906e-05, "loss": 46.0052, "step": 4256 }, { "epoch": 0.5812000819168543, "grad_norm": 0.06417125463485718, "learning_rate": 3.826008443376924e-05, "loss": 46.0038, "step": 4257 }, { "epoch": 0.581336610007509, "grad_norm": 0.0603540763258934, "learning_rate": 3.823894928601822e-05, "loss": 46.0038, "step": 4258 }, { "epoch": 0.5814731380981637, "grad_norm": 0.11743415892124176, "learning_rate": 3.8217816362551964e-05, "loss": 46.0078, "step": 4259 }, { "epoch": 0.5816096661888184, "grad_norm": 0.10012999922037125, "learning_rate": 3.8196685667367195e-05, "loss": 46.0041, "step": 4260 }, { "epoch": 0.581746194279473, "grad_norm": 0.14327684044837952, "learning_rate": 3.817555720446017e-05, "loss": 46.0073, "step": 4261 }, { "epoch": 0.5818827223701276, "grad_norm": 0.2517293095588684, "learning_rate": 3.8154430977826846e-05, "loss": 46.0056, "step": 4262 }, { "epoch": 0.5820192504607823, "grad_norm": 0.13470691442489624, "learning_rate": 3.813330699146263e-05, "loss": 46.0081, "step": 4263 }, { "epoch": 0.582155778551437, "grad_norm": 0.13395443558692932, "learning_rate": 3.811218524936256e-05, "loss": 46.0067, "step": 4264 }, { "epoch": 0.5822923066420916, "grad_norm": 0.05919964611530304, "learning_rate": 3.809106575552121e-05, "loss": 46.0058, "step": 4265 }, { "epoch": 0.5824288347327463, "grad_norm": 0.17943774163722992, "learning_rate": 3.806994851393283e-05, "loss": 46.0088, "step": 4266 }, { "epoch": 0.5825653628234009, "grad_norm": 0.09051204472780228, "learning_rate": 3.8048833528591125e-05, "loss": 46.0069, "step": 4267 }, { "epoch": 0.5827018909140556, "grad_norm": 0.04491504654288292, "learning_rate": 3.802772080348943e-05, "loss": 46.0111, "step": 4268 }, { "epoch": 0.5828384190047102, "grad_norm": 0.12523025274276733, "learning_rate": 3.8006610342620663e-05, "loss": 46.0131, "step": 4269 }, { "epoch": 0.5829749470953649, "grad_norm": 0.11852729320526123, "learning_rate": 3.79855021499773e-05, "loss": 46.0127, "step": 4270 }, { "epoch": 0.5831114751860196, "grad_norm": 0.18985849618911743, "learning_rate": 3.7964396229551364e-05, "loss": 46.0087, "step": 4271 }, { "epoch": 0.5832480032766741, "grad_norm": 0.14504916965961456, "learning_rate": 3.794329258533446e-05, "loss": 46.0146, "step": 4272 }, { "epoch": 0.5833845313673288, "grad_norm": 0.07202795892953873, "learning_rate": 3.7922191221317806e-05, "loss": 46.0088, "step": 4273 }, { "epoch": 0.5835210594579835, "grad_norm": 0.09781450033187866, "learning_rate": 3.7901092141492144e-05, "loss": 46.009, "step": 4274 }, { "epoch": 0.5836575875486382, "grad_norm": 0.049807123839855194, "learning_rate": 3.787999534984776e-05, "loss": 46.0045, "step": 4275 }, { "epoch": 0.5837941156392927, "grad_norm": 0.06406521797180176, "learning_rate": 3.78589008503746e-05, "loss": 46.0042, "step": 4276 }, { "epoch": 0.5839306437299474, "grad_norm": 0.10423512756824493, "learning_rate": 3.7837808647062076e-05, "loss": 46.0048, "step": 4277 }, { "epoch": 0.5840671718206021, "grad_norm": 0.12244128435850143, "learning_rate": 3.781671874389923e-05, "loss": 46.0046, "step": 4278 }, { "epoch": 0.5842036999112568, "grad_norm": 0.06191691383719444, "learning_rate": 3.7795631144874604e-05, "loss": 46.0, "step": 4279 }, { "epoch": 0.5843402280019114, "grad_norm": 0.13915924727916718, "learning_rate": 3.777454585397642e-05, "loss": 46.0025, "step": 4280 }, { "epoch": 0.584476756092566, "grad_norm": 0.04839993268251419, "learning_rate": 3.7753462875192334e-05, "loss": 46.0033, "step": 4281 }, { "epoch": 0.5846132841832207, "grad_norm": 0.0771898627281189, "learning_rate": 3.773238221250963e-05, "loss": 46.0043, "step": 4282 }, { "epoch": 0.5847498122738753, "grad_norm": 0.03783189132809639, "learning_rate": 3.7711303869915164e-05, "loss": 46.0031, "step": 4283 }, { "epoch": 0.58488634036453, "grad_norm": 0.03843112289905548, "learning_rate": 3.769022785139534e-05, "loss": 46.0066, "step": 4284 }, { "epoch": 0.5850228684551847, "grad_norm": 0.1580609828233719, "learning_rate": 3.766915416093608e-05, "loss": 46.0046, "step": 4285 }, { "epoch": 0.5851593965458393, "grad_norm": 0.03319886699318886, "learning_rate": 3.7648082802522925e-05, "loss": 46.0096, "step": 4286 }, { "epoch": 0.5852959246364939, "grad_norm": 0.0665319636464119, "learning_rate": 3.762701378014096e-05, "loss": 46.0058, "step": 4287 }, { "epoch": 0.5854324527271486, "grad_norm": 0.0986555889248848, "learning_rate": 3.7605947097774816e-05, "loss": 46.0064, "step": 4288 }, { "epoch": 0.5855689808178033, "grad_norm": 0.09997288137674332, "learning_rate": 3.758488275940867e-05, "loss": 46.0061, "step": 4289 }, { "epoch": 0.585705508908458, "grad_norm": 0.06580162048339844, "learning_rate": 3.756382076902629e-05, "loss": 46.0103, "step": 4290 }, { "epoch": 0.5858420369991125, "grad_norm": 0.07698946446180344, "learning_rate": 3.7542761130610985e-05, "loss": 46.0011, "step": 4291 }, { "epoch": 0.5859785650897672, "grad_norm": 0.03202710300683975, "learning_rate": 3.7521703848145615e-05, "loss": 46.0045, "step": 4292 }, { "epoch": 0.5861150931804219, "grad_norm": 0.0379021055996418, "learning_rate": 3.750064892561257e-05, "loss": 46.0057, "step": 4293 }, { "epoch": 0.5862516212710766, "grad_norm": 0.14360937476158142, "learning_rate": 3.747959636699385e-05, "loss": 46.0009, "step": 4294 }, { "epoch": 0.5863881493617312, "grad_norm": 0.0669412687420845, "learning_rate": 3.745854617627097e-05, "loss": 46.0073, "step": 4295 }, { "epoch": 0.5865246774523858, "grad_norm": 0.14032770693302155, "learning_rate": 3.743749835742498e-05, "loss": 46.0096, "step": 4296 }, { "epoch": 0.5866612055430405, "grad_norm": 0.29445409774780273, "learning_rate": 3.741645291443654e-05, "loss": 46.0016, "step": 4297 }, { "epoch": 0.5867977336336951, "grad_norm": 0.10499759018421173, "learning_rate": 3.739540985128583e-05, "loss": 46.0064, "step": 4298 }, { "epoch": 0.5869342617243498, "grad_norm": 0.12879155576229095, "learning_rate": 3.737436917195255e-05, "loss": 46.0, "step": 4299 }, { "epoch": 0.5870707898150045, "grad_norm": 0.12872737646102905, "learning_rate": 3.735333088041596e-05, "loss": 46.0037, "step": 4300 }, { "epoch": 0.587207317905659, "grad_norm": 0.12918731570243835, "learning_rate": 3.733229498065493e-05, "loss": 46.007, "step": 4301 }, { "epoch": 0.5873438459963137, "grad_norm": 0.09211447089910507, "learning_rate": 3.731126147664782e-05, "loss": 46.0024, "step": 4302 }, { "epoch": 0.5874803740869684, "grad_norm": 0.06117982789874077, "learning_rate": 3.729023037237252e-05, "loss": 46.0052, "step": 4303 }, { "epoch": 0.5876169021776231, "grad_norm": 0.05547470971941948, "learning_rate": 3.726920167180652e-05, "loss": 46.0019, "step": 4304 }, { "epoch": 0.5877534302682776, "grad_norm": 0.09759073704481125, "learning_rate": 3.724817537892683e-05, "loss": 46.0061, "step": 4305 }, { "epoch": 0.5878899583589323, "grad_norm": 0.036053985357284546, "learning_rate": 3.722715149771001e-05, "loss": 46.0039, "step": 4306 }, { "epoch": 0.588026486449587, "grad_norm": 0.04423439875245094, "learning_rate": 3.720613003213212e-05, "loss": 46.0038, "step": 4307 }, { "epoch": 0.5881630145402417, "grad_norm": 0.03751080110669136, "learning_rate": 3.718511098616884e-05, "loss": 46.0036, "step": 4308 }, { "epoch": 0.5882995426308963, "grad_norm": 0.10478569567203522, "learning_rate": 3.7164094363795354e-05, "loss": 46.0019, "step": 4309 }, { "epoch": 0.5884360707215509, "grad_norm": 0.21864309906959534, "learning_rate": 3.714308016898636e-05, "loss": 46.0031, "step": 4310 }, { "epoch": 0.5885725988122056, "grad_norm": 0.043107517063617706, "learning_rate": 3.712206840571616e-05, "loss": 46.0064, "step": 4311 }, { "epoch": 0.5887091269028603, "grad_norm": 0.1518174558877945, "learning_rate": 3.7101059077958545e-05, "loss": 46.0026, "step": 4312 }, { "epoch": 0.5888456549935149, "grad_norm": 0.07207447290420532, "learning_rate": 3.708005218968685e-05, "loss": 46.0109, "step": 4313 }, { "epoch": 0.5889821830841696, "grad_norm": 0.08164936304092407, "learning_rate": 3.705904774487396e-05, "loss": 46.0041, "step": 4314 }, { "epoch": 0.5891187111748242, "grad_norm": 0.07013317942619324, "learning_rate": 3.7038045747492336e-05, "loss": 46.0011, "step": 4315 }, { "epoch": 0.5892552392654788, "grad_norm": 0.04303300008177757, "learning_rate": 3.701704620151389e-05, "loss": 46.0025, "step": 4316 }, { "epoch": 0.5893917673561335, "grad_norm": 0.0663682147860527, "learning_rate": 3.699604911091013e-05, "loss": 46.0054, "step": 4317 }, { "epoch": 0.5895282954467882, "grad_norm": 0.05712326243519783, "learning_rate": 3.6975054479652104e-05, "loss": 46.0026, "step": 4318 }, { "epoch": 0.5896648235374429, "grad_norm": 0.08883316814899445, "learning_rate": 3.695406231171038e-05, "loss": 46.0079, "step": 4319 }, { "epoch": 0.5898013516280974, "grad_norm": 0.06615108251571655, "learning_rate": 3.6933072611055054e-05, "loss": 46.0054, "step": 4320 }, { "epoch": 0.5899378797187521, "grad_norm": 0.0927659198641777, "learning_rate": 3.6912085381655734e-05, "loss": 46.0029, "step": 4321 }, { "epoch": 0.5900744078094068, "grad_norm": 0.05156198889017105, "learning_rate": 3.6891100627481624e-05, "loss": 46.0002, "step": 4322 }, { "epoch": 0.5902109359000615, "grad_norm": 0.0534687265753746, "learning_rate": 3.687011835250141e-05, "loss": 46.0025, "step": 4323 }, { "epoch": 0.5903474639907161, "grad_norm": 0.04731634259223938, "learning_rate": 3.6849138560683305e-05, "loss": 46.0115, "step": 4324 }, { "epoch": 0.5904839920813707, "grad_norm": 0.07578995078802109, "learning_rate": 3.682816125599511e-05, "loss": 46.0011, "step": 4325 }, { "epoch": 0.5906205201720254, "grad_norm": 0.11654839664697647, "learning_rate": 3.680718644240407e-05, "loss": 46.001, "step": 4326 }, { "epoch": 0.59075704826268, "grad_norm": 0.16289417445659637, "learning_rate": 3.678621412387703e-05, "loss": 46.0046, "step": 4327 }, { "epoch": 0.5908935763533347, "grad_norm": 0.21898220479488373, "learning_rate": 3.676524430438032e-05, "loss": 46.0035, "step": 4328 }, { "epoch": 0.5910301044439894, "grad_norm": 0.062218643724918365, "learning_rate": 3.674427698787985e-05, "loss": 46.004, "step": 4329 }, { "epoch": 0.591166632534644, "grad_norm": 0.08002422749996185, "learning_rate": 3.672331217834098e-05, "loss": 46.0078, "step": 4330 }, { "epoch": 0.5913031606252986, "grad_norm": 0.10204236954450607, "learning_rate": 3.670234987972865e-05, "loss": 46.0046, "step": 4331 }, { "epoch": 0.5914396887159533, "grad_norm": 0.12423589080572128, "learning_rate": 3.668139009600731e-05, "loss": 46.0023, "step": 4332 }, { "epoch": 0.591576216806608, "grad_norm": 0.12721998989582062, "learning_rate": 3.666043283114097e-05, "loss": 46.0054, "step": 4333 }, { "epoch": 0.5917127448972627, "grad_norm": 0.08459702134132385, "learning_rate": 3.6639478089093074e-05, "loss": 46.0005, "step": 4334 }, { "epoch": 0.5918492729879172, "grad_norm": 0.14385032653808594, "learning_rate": 3.661852587382667e-05, "loss": 46.0052, "step": 4335 }, { "epoch": 0.5919858010785719, "grad_norm": 0.03450857847929001, "learning_rate": 3.659757618930432e-05, "loss": 46.0135, "step": 4336 }, { "epoch": 0.5921223291692266, "grad_norm": 0.14188598096370697, "learning_rate": 3.657662903948808e-05, "loss": 46.0042, "step": 4337 }, { "epoch": 0.5922588572598813, "grad_norm": 0.03717031702399254, "learning_rate": 3.655568442833952e-05, "loss": 46.0075, "step": 4338 }, { "epoch": 0.5923953853505358, "grad_norm": 0.11317181587219238, "learning_rate": 3.6534742359819754e-05, "loss": 46.0009, "step": 4339 }, { "epoch": 0.5925319134411905, "grad_norm": 0.06808724999427795, "learning_rate": 3.6513802837889425e-05, "loss": 46.0002, "step": 4340 }, { "epoch": 0.5926684415318452, "grad_norm": 0.03163086250424385, "learning_rate": 3.649286586650866e-05, "loss": 46.0023, "step": 4341 }, { "epoch": 0.5928049696224998, "grad_norm": 0.03407124802470207, "learning_rate": 3.6471931449637124e-05, "loss": 46.0035, "step": 4342 }, { "epoch": 0.5929414977131545, "grad_norm": 0.079999178647995, "learning_rate": 3.645099959123402e-05, "loss": 46.0008, "step": 4343 }, { "epoch": 0.5930780258038091, "grad_norm": 0.07053667306900024, "learning_rate": 3.643007029525801e-05, "loss": 46.0062, "step": 4344 }, { "epoch": 0.5932145538944638, "grad_norm": 0.08338662981987, "learning_rate": 3.640914356566731e-05, "loss": 46.0015, "step": 4345 }, { "epoch": 0.5933510819851184, "grad_norm": 0.03604087233543396, "learning_rate": 3.638821940641965e-05, "loss": 46.0021, "step": 4346 }, { "epoch": 0.5934876100757731, "grad_norm": 0.19087360799312592, "learning_rate": 3.636729782147229e-05, "loss": 46.0014, "step": 4347 }, { "epoch": 0.5936241381664278, "grad_norm": 0.13813664019107819, "learning_rate": 3.634637881478196e-05, "loss": 46.0075, "step": 4348 }, { "epoch": 0.5937606662570823, "grad_norm": 0.15297341346740723, "learning_rate": 3.632546239030491e-05, "loss": 46.0029, "step": 4349 }, { "epoch": 0.593897194347737, "grad_norm": 0.1052624061703682, "learning_rate": 3.630454855199694e-05, "loss": 46.0, "step": 4350 }, { "epoch": 0.5940337224383917, "grad_norm": 0.030981192365288734, "learning_rate": 3.6283637303813344e-05, "loss": 46.0055, "step": 4351 }, { "epoch": 0.5941702505290464, "grad_norm": 0.0931968241930008, "learning_rate": 3.62627286497089e-05, "loss": 46.0045, "step": 4352 }, { "epoch": 0.594306778619701, "grad_norm": 0.08448714762926102, "learning_rate": 3.62418225936379e-05, "loss": 46.0048, "step": 4353 }, { "epoch": 0.5944433067103556, "grad_norm": 0.08227349817752838, "learning_rate": 3.62209191395542e-05, "loss": 46.002, "step": 4354 }, { "epoch": 0.5945798348010103, "grad_norm": 0.058360204100608826, "learning_rate": 3.620001829141111e-05, "loss": 46.0007, "step": 4355 }, { "epoch": 0.594716362891665, "grad_norm": 0.04601885378360748, "learning_rate": 3.6179120053161416e-05, "loss": 46.0032, "step": 4356 }, { "epoch": 0.5948528909823196, "grad_norm": 0.03014664724469185, "learning_rate": 3.6158224428757535e-05, "loss": 46.003, "step": 4357 }, { "epoch": 0.5949894190729743, "grad_norm": 0.05767808109521866, "learning_rate": 3.6137331422151255e-05, "loss": 46.0062, "step": 4358 }, { "epoch": 0.5951259471636289, "grad_norm": 0.05631568655371666, "learning_rate": 3.6116441037293937e-05, "loss": 46.0028, "step": 4359 }, { "epoch": 0.5952624752542836, "grad_norm": 0.04386984184384346, "learning_rate": 3.6095553278136406e-05, "loss": 46.0055, "step": 4360 }, { "epoch": 0.5953990033449382, "grad_norm": 0.10023115575313568, "learning_rate": 3.607466814862907e-05, "loss": 46.0028, "step": 4361 }, { "epoch": 0.5955355314355929, "grad_norm": 0.10370411723852158, "learning_rate": 3.605378565272175e-05, "loss": 46.0036, "step": 4362 }, { "epoch": 0.5956720595262476, "grad_norm": 0.0919298455119133, "learning_rate": 3.60329057943638e-05, "loss": 46.0128, "step": 4363 }, { "epoch": 0.5958085876169021, "grad_norm": 0.08302462846040726, "learning_rate": 3.6012028577504106e-05, "loss": 46.0061, "step": 4364 }, { "epoch": 0.5959451157075568, "grad_norm": 0.06262465566396713, "learning_rate": 3.5991154006091023e-05, "loss": 46.0061, "step": 4365 }, { "epoch": 0.5960816437982115, "grad_norm": 0.08482460677623749, "learning_rate": 3.5970282084072405e-05, "loss": 46.0044, "step": 4366 }, { "epoch": 0.5962181718888662, "grad_norm": 0.0317610427737236, "learning_rate": 3.5949412815395594e-05, "loss": 46.0096, "step": 4367 }, { "epoch": 0.5963546999795208, "grad_norm": 0.06994505226612091, "learning_rate": 3.592854620400748e-05, "loss": 46.0115, "step": 4368 }, { "epoch": 0.5964912280701754, "grad_norm": 0.07669384032487869, "learning_rate": 3.590768225385441e-05, "loss": 46.0086, "step": 4369 }, { "epoch": 0.5966277561608301, "grad_norm": 0.115550696849823, "learning_rate": 3.5886820968882206e-05, "loss": 46.0114, "step": 4370 }, { "epoch": 0.5967642842514848, "grad_norm": 0.08419157564640045, "learning_rate": 3.586596235303627e-05, "loss": 46.0078, "step": 4371 }, { "epoch": 0.5969008123421394, "grad_norm": 0.11112983524799347, "learning_rate": 3.5845106410261414e-05, "loss": 46.0083, "step": 4372 }, { "epoch": 0.597037340432794, "grad_norm": 0.035214923322200775, "learning_rate": 3.5824253144501984e-05, "loss": 46.0051, "step": 4373 }, { "epoch": 0.5971738685234487, "grad_norm": 0.08452166616916656, "learning_rate": 3.580340255970177e-05, "loss": 46.002, "step": 4374 }, { "epoch": 0.5973103966141033, "grad_norm": 0.04793662205338478, "learning_rate": 3.578255465980417e-05, "loss": 46.0067, "step": 4375 }, { "epoch": 0.597446924704758, "grad_norm": 0.08468640595674515, "learning_rate": 3.576170944875195e-05, "loss": 46.0061, "step": 4376 }, { "epoch": 0.5975834527954127, "grad_norm": 0.0737350583076477, "learning_rate": 3.574086693048741e-05, "loss": 46.0034, "step": 4377 }, { "epoch": 0.5977199808860673, "grad_norm": 0.08933068066835403, "learning_rate": 3.5720027108952395e-05, "loss": 46.0052, "step": 4378 }, { "epoch": 0.5978565089767219, "grad_norm": 0.17222310602664948, "learning_rate": 3.569918998808817e-05, "loss": 46.0042, "step": 4379 }, { "epoch": 0.5979930370673766, "grad_norm": 0.07497809082269669, "learning_rate": 3.56783555718355e-05, "loss": 46.006, "step": 4380 }, { "epoch": 0.5981295651580313, "grad_norm": 0.08348838239908218, "learning_rate": 3.5657523864134656e-05, "loss": 46.0023, "step": 4381 }, { "epoch": 0.598266093248686, "grad_norm": 0.06986185163259506, "learning_rate": 3.56366948689254e-05, "loss": 46.0029, "step": 4382 }, { "epoch": 0.5984026213393405, "grad_norm": 0.0889706090092659, "learning_rate": 3.5615868590146995e-05, "loss": 46.0005, "step": 4383 }, { "epoch": 0.5985391494299952, "grad_norm": 0.0912967324256897, "learning_rate": 3.5595045031738125e-05, "loss": 46.0029, "step": 4384 }, { "epoch": 0.5986756775206499, "grad_norm": 0.06904101371765137, "learning_rate": 3.557422419763703e-05, "loss": 46.0137, "step": 4385 }, { "epoch": 0.5988122056113045, "grad_norm": 0.06315898150205612, "learning_rate": 3.555340609178141e-05, "loss": 46.0069, "step": 4386 }, { "epoch": 0.5989487337019592, "grad_norm": 0.16565854847431183, "learning_rate": 3.553259071810843e-05, "loss": 46.0028, "step": 4387 }, { "epoch": 0.5990852617926138, "grad_norm": 0.09017941355705261, "learning_rate": 3.551177808055476e-05, "loss": 46.0037, "step": 4388 }, { "epoch": 0.5992217898832685, "grad_norm": 0.05858726426959038, "learning_rate": 3.549096818305655e-05, "loss": 46.0058, "step": 4389 }, { "epoch": 0.5993583179739231, "grad_norm": 0.12049282342195511, "learning_rate": 3.547016102954944e-05, "loss": 46.005, "step": 4390 }, { "epoch": 0.5994948460645778, "grad_norm": 0.07481376081705093, "learning_rate": 3.544935662396854e-05, "loss": 46.0, "step": 4391 }, { "epoch": 0.5996313741552325, "grad_norm": 0.059108372777700424, "learning_rate": 3.54285549702484e-05, "loss": 46.0059, "step": 4392 }, { "epoch": 0.599767902245887, "grad_norm": 0.17801479995250702, "learning_rate": 3.540775607232316e-05, "loss": 46.0061, "step": 4393 }, { "epoch": 0.5999044303365417, "grad_norm": 0.22576040029525757, "learning_rate": 3.538695993412631e-05, "loss": 46.0127, "step": 4394 }, { "epoch": 0.6000409584271964, "grad_norm": 0.29897722601890564, "learning_rate": 3.5366166559590894e-05, "loss": 46.0047, "step": 4395 }, { "epoch": 0.6001774865178511, "grad_norm": 0.0765308290719986, "learning_rate": 3.534537595264944e-05, "loss": 46.0031, "step": 4396 }, { "epoch": 0.6003140146085058, "grad_norm": 0.08068645000457764, "learning_rate": 3.532458811723391e-05, "loss": 46.0049, "step": 4397 }, { "epoch": 0.6004505426991603, "grad_norm": 0.12831254303455353, "learning_rate": 3.530380305727576e-05, "loss": 46.0004, "step": 4398 }, { "epoch": 0.600587070789815, "grad_norm": 0.12276868522167206, "learning_rate": 3.528302077670591e-05, "loss": 46.0018, "step": 4399 }, { "epoch": 0.6007235988804697, "grad_norm": 0.1233544796705246, "learning_rate": 3.5262241279454785e-05, "loss": 46.0018, "step": 4400 }, { "epoch": 0.6008601269711243, "grad_norm": 0.08760356158018112, "learning_rate": 3.524146456945229e-05, "loss": 46.0026, "step": 4401 }, { "epoch": 0.6009966550617789, "grad_norm": 0.034584421664476395, "learning_rate": 3.522069065062771e-05, "loss": 46.0007, "step": 4402 }, { "epoch": 0.6011331831524336, "grad_norm": 0.038604628294706345, "learning_rate": 3.519991952690992e-05, "loss": 46.0034, "step": 4403 }, { "epoch": 0.6012697112430883, "grad_norm": 0.11173518002033234, "learning_rate": 3.5179151202227215e-05, "loss": 46.0025, "step": 4404 }, { "epoch": 0.6014062393337429, "grad_norm": 0.06896651536226273, "learning_rate": 3.515838568050736e-05, "loss": 46.002, "step": 4405 }, { "epoch": 0.6015427674243976, "grad_norm": 0.03670615330338478, "learning_rate": 3.5137622965677564e-05, "loss": 46.0031, "step": 4406 }, { "epoch": 0.6016792955150522, "grad_norm": 0.03644955903291702, "learning_rate": 3.5116863061664556e-05, "loss": 46.0014, "step": 4407 }, { "epoch": 0.6018158236057068, "grad_norm": 0.05525347590446472, "learning_rate": 3.509610597239452e-05, "loss": 46.0046, "step": 4408 }, { "epoch": 0.6019523516963615, "grad_norm": 0.03206522762775421, "learning_rate": 3.507535170179307e-05, "loss": 46.0015, "step": 4409 }, { "epoch": 0.6020888797870162, "grad_norm": 0.07896500825881958, "learning_rate": 3.505460025378535e-05, "loss": 46.0039, "step": 4410 }, { "epoch": 0.6022254078776709, "grad_norm": 0.08707083761692047, "learning_rate": 3.503385163229591e-05, "loss": 46.0074, "step": 4411 }, { "epoch": 0.6023619359683254, "grad_norm": 0.15682901442050934, "learning_rate": 3.5013105841248795e-05, "loss": 46.016, "step": 4412 }, { "epoch": 0.6024984640589801, "grad_norm": 0.10615840554237366, "learning_rate": 3.49923628845675e-05, "loss": 46.006, "step": 4413 }, { "epoch": 0.6026349921496348, "grad_norm": 0.16351549327373505, "learning_rate": 3.497162276617503e-05, "loss": 46.0109, "step": 4414 }, { "epoch": 0.6027715202402895, "grad_norm": 0.0441865436732769, "learning_rate": 3.495088548999379e-05, "loss": 46.0043, "step": 4415 }, { "epoch": 0.6029080483309441, "grad_norm": 0.11977081000804901, "learning_rate": 3.493015105994567e-05, "loss": 46.0007, "step": 4416 }, { "epoch": 0.6030445764215987, "grad_norm": 0.03557894378900528, "learning_rate": 3.490941947995204e-05, "loss": 46.0037, "step": 4417 }, { "epoch": 0.6031811045122534, "grad_norm": 0.0643799677491188, "learning_rate": 3.488869075393373e-05, "loss": 46.0043, "step": 4418 }, { "epoch": 0.603317632602908, "grad_norm": 0.062019214034080505, "learning_rate": 3.4867964885811e-05, "loss": 46.0061, "step": 4419 }, { "epoch": 0.6034541606935627, "grad_norm": 0.09654542058706284, "learning_rate": 3.4847241879503575e-05, "loss": 46.0014, "step": 4420 }, { "epoch": 0.6035906887842174, "grad_norm": 0.033531833440065384, "learning_rate": 3.482652173893067e-05, "loss": 46.0047, "step": 4421 }, { "epoch": 0.603727216874872, "grad_norm": 0.08665689080953598, "learning_rate": 3.480580446801095e-05, "loss": 46.0043, "step": 4422 }, { "epoch": 0.6038637449655266, "grad_norm": 0.11660393327474594, "learning_rate": 3.478509007066249e-05, "loss": 46.0046, "step": 4423 }, { "epoch": 0.6040002730561813, "grad_norm": 0.050875164568424225, "learning_rate": 3.4764378550802915e-05, "loss": 46.0075, "step": 4424 }, { "epoch": 0.604136801146836, "grad_norm": 0.05600744113326073, "learning_rate": 3.474366991234919e-05, "loss": 46.0054, "step": 4425 }, { "epoch": 0.6042733292374907, "grad_norm": 0.15942251682281494, "learning_rate": 3.472296415921783e-05, "loss": 46.0068, "step": 4426 }, { "epoch": 0.6044098573281452, "grad_norm": 0.04356178641319275, "learning_rate": 3.470226129532475e-05, "loss": 46.0095, "step": 4427 }, { "epoch": 0.6045463854187999, "grad_norm": 0.11671172082424164, "learning_rate": 3.468156132458535e-05, "loss": 46.0054, "step": 4428 }, { "epoch": 0.6046829135094546, "grad_norm": 0.0747804269194603, "learning_rate": 3.466086425091446e-05, "loss": 46.0049, "step": 4429 }, { "epoch": 0.6048194416001093, "grad_norm": 0.09105555713176727, "learning_rate": 3.464017007822638e-05, "loss": 46.0048, "step": 4430 }, { "epoch": 0.6049559696907639, "grad_norm": 0.12296337634325027, "learning_rate": 3.4619478810434845e-05, "loss": 46.0025, "step": 4431 }, { "epoch": 0.6050924977814185, "grad_norm": 0.04539744183421135, "learning_rate": 3.459879045145307e-05, "loss": 46.0076, "step": 4432 }, { "epoch": 0.6052290258720732, "grad_norm": 0.08707727491855621, "learning_rate": 3.4578105005193675e-05, "loss": 46.0081, "step": 4433 }, { "epoch": 0.6053655539627278, "grad_norm": 0.05195476487278938, "learning_rate": 3.455742247556874e-05, "loss": 46.004, "step": 4434 }, { "epoch": 0.6055020820533825, "grad_norm": 0.07239328324794769, "learning_rate": 3.453674286648985e-05, "loss": 46.0054, "step": 4435 }, { "epoch": 0.6056386101440371, "grad_norm": 0.13089999556541443, "learning_rate": 3.4516066181867955e-05, "loss": 46.0094, "step": 4436 }, { "epoch": 0.6057751382346918, "grad_norm": 0.06729764491319656, "learning_rate": 3.4495392425613503e-05, "loss": 46.0046, "step": 4437 }, { "epoch": 0.6059116663253464, "grad_norm": 0.06910049170255661, "learning_rate": 3.4474721601636404e-05, "loss": 46.0002, "step": 4438 }, { "epoch": 0.6060481944160011, "grad_norm": 0.1308211237192154, "learning_rate": 3.4454053713845945e-05, "loss": 46.0094, "step": 4439 }, { "epoch": 0.6061847225066558, "grad_norm": 0.06315009295940399, "learning_rate": 3.443338876615092e-05, "loss": 46.0074, "step": 4440 }, { "epoch": 0.6063212505973103, "grad_norm": 0.07268992066383362, "learning_rate": 3.441272676245952e-05, "loss": 46.0052, "step": 4441 }, { "epoch": 0.606457778687965, "grad_norm": 0.047160495072603226, "learning_rate": 3.439206770667946e-05, "loss": 46.0002, "step": 4442 }, { "epoch": 0.6065943067786197, "grad_norm": 0.05835330858826637, "learning_rate": 3.4371411602717784e-05, "loss": 46.0011, "step": 4443 }, { "epoch": 0.6067308348692744, "grad_norm": 0.03351099044084549, "learning_rate": 3.4350758454481045e-05, "loss": 46.0079, "step": 4444 }, { "epoch": 0.606867362959929, "grad_norm": 0.1551922857761383, "learning_rate": 3.4330108265875254e-05, "loss": 46.0094, "step": 4445 }, { "epoch": 0.6070038910505836, "grad_norm": 0.09057211875915527, "learning_rate": 3.430946104080583e-05, "loss": 46.0074, "step": 4446 }, { "epoch": 0.6071404191412383, "grad_norm": 0.12899525463581085, "learning_rate": 3.428881678317763e-05, "loss": 46.0013, "step": 4447 }, { "epoch": 0.607276947231893, "grad_norm": 0.17337369918823242, "learning_rate": 3.426817549689493e-05, "loss": 46.0018, "step": 4448 }, { "epoch": 0.6074134753225476, "grad_norm": 0.12488137930631638, "learning_rate": 3.4247537185861514e-05, "loss": 46.0047, "step": 4449 }, { "epoch": 0.6075500034132023, "grad_norm": 0.30541759729385376, "learning_rate": 3.4226901853980554e-05, "loss": 46.0, "step": 4450 }, { "epoch": 0.6076865315038569, "grad_norm": 0.31408047676086426, "learning_rate": 3.420626950515462e-05, "loss": 46.0007, "step": 4451 }, { "epoch": 0.6078230595945115, "grad_norm": 0.08922439813613892, "learning_rate": 3.418564014328583e-05, "loss": 46.0043, "step": 4452 }, { "epoch": 0.6079595876851662, "grad_norm": 0.0782141238451004, "learning_rate": 3.4165013772275626e-05, "loss": 46.0017, "step": 4453 }, { "epoch": 0.6080961157758209, "grad_norm": 0.048420120030641556, "learning_rate": 3.414439039602495e-05, "loss": 46.0002, "step": 4454 }, { "epoch": 0.6082326438664756, "grad_norm": 0.09339205175638199, "learning_rate": 3.4123770018434125e-05, "loss": 46.0024, "step": 4455 }, { "epoch": 0.6083691719571301, "grad_norm": 0.06397505104541779, "learning_rate": 3.4103152643402985e-05, "loss": 46.004, "step": 4456 }, { "epoch": 0.6085057000477848, "grad_norm": 0.038910601288080215, "learning_rate": 3.408253827483071e-05, "loss": 46.0005, "step": 4457 }, { "epoch": 0.6086422281384395, "grad_norm": 0.03227005526423454, "learning_rate": 3.406192691661598e-05, "loss": 46.0037, "step": 4458 }, { "epoch": 0.6087787562290942, "grad_norm": 0.08500204235315323, "learning_rate": 3.404131857265682e-05, "loss": 46.0037, "step": 4459 }, { "epoch": 0.6089152843197488, "grad_norm": 0.054018113762140274, "learning_rate": 3.402071324685082e-05, "loss": 46.0054, "step": 4460 }, { "epoch": 0.6090518124104034, "grad_norm": 0.10801033675670624, "learning_rate": 3.400011094309487e-05, "loss": 46.0085, "step": 4461 }, { "epoch": 0.6091883405010581, "grad_norm": 0.10957902669906616, "learning_rate": 3.397951166528534e-05, "loss": 46.004, "step": 4462 }, { "epoch": 0.6093248685917128, "grad_norm": 0.11573906242847443, "learning_rate": 3.395891541731805e-05, "loss": 46.0016, "step": 4463 }, { "epoch": 0.6094613966823674, "grad_norm": 0.08529449254274368, "learning_rate": 3.393832220308821e-05, "loss": 46.0052, "step": 4464 }, { "epoch": 0.6095979247730221, "grad_norm": 0.06786014139652252, "learning_rate": 3.391773202649047e-05, "loss": 46.0022, "step": 4465 }, { "epoch": 0.6097344528636767, "grad_norm": 0.1241271048784256, "learning_rate": 3.389714489141889e-05, "loss": 46.002, "step": 4466 }, { "epoch": 0.6098709809543313, "grad_norm": 0.05683095008134842, "learning_rate": 3.387656080176699e-05, "loss": 46.0046, "step": 4467 }, { "epoch": 0.610007509044986, "grad_norm": 0.05700958892703056, "learning_rate": 3.38559797614277e-05, "loss": 46.0007, "step": 4468 }, { "epoch": 0.6101440371356407, "grad_norm": 0.03749581798911095, "learning_rate": 3.383540177429333e-05, "loss": 46.0047, "step": 4469 }, { "epoch": 0.6102805652262953, "grad_norm": 0.0917622298002243, "learning_rate": 3.381482684425571e-05, "loss": 46.0062, "step": 4470 }, { "epoch": 0.6104170933169499, "grad_norm": 0.14039354026317596, "learning_rate": 3.379425497520598e-05, "loss": 46.0125, "step": 4471 }, { "epoch": 0.6105536214076046, "grad_norm": 0.0921136811375618, "learning_rate": 3.377368617103478e-05, "loss": 46.0061, "step": 4472 }, { "epoch": 0.6106901494982593, "grad_norm": 0.06732282042503357, "learning_rate": 3.3753120435632116e-05, "loss": 46.0039, "step": 4473 }, { "epoch": 0.610826677588914, "grad_norm": 0.07454404979944229, "learning_rate": 3.373255777288748e-05, "loss": 46.002, "step": 4474 }, { "epoch": 0.6109632056795685, "grad_norm": 0.21821412444114685, "learning_rate": 3.371199818668971e-05, "loss": 46.0138, "step": 4475 }, { "epoch": 0.6110997337702232, "grad_norm": 0.06654957681894302, "learning_rate": 3.36914416809271e-05, "loss": 46.0023, "step": 4476 }, { "epoch": 0.6112362618608779, "grad_norm": 0.07606367021799088, "learning_rate": 3.3670888259487375e-05, "loss": 46.0041, "step": 4477 }, { "epoch": 0.6113727899515325, "grad_norm": 0.09395492821931839, "learning_rate": 3.365033792625767e-05, "loss": 46.004, "step": 4478 }, { "epoch": 0.6115093180421872, "grad_norm": 0.05660363659262657, "learning_rate": 3.3629790685124484e-05, "loss": 46.0081, "step": 4479 }, { "epoch": 0.6116458461328418, "grad_norm": 0.050514522939920425, "learning_rate": 3.360924653997378e-05, "loss": 46.0056, "step": 4480 }, { "epoch": 0.6117823742234965, "grad_norm": 0.06680817157030106, "learning_rate": 3.358870549469096e-05, "loss": 46.0022, "step": 4481 }, { "epoch": 0.6119189023141511, "grad_norm": 0.1630132496356964, "learning_rate": 3.3568167553160787e-05, "loss": 46.0045, "step": 4482 }, { "epoch": 0.6120554304048058, "grad_norm": 0.041735172271728516, "learning_rate": 3.3547632719267444e-05, "loss": 46.0035, "step": 4483 }, { "epoch": 0.6121919584954605, "grad_norm": 0.12745948135852814, "learning_rate": 3.352710099689457e-05, "loss": 46.0058, "step": 4484 }, { "epoch": 0.612328486586115, "grad_norm": 0.1218743622303009, "learning_rate": 3.350657238992516e-05, "loss": 46.0, "step": 4485 }, { "epoch": 0.6124650146767697, "grad_norm": 0.13234339654445648, "learning_rate": 3.3486046902241664e-05, "loss": 46.0052, "step": 4486 }, { "epoch": 0.6126015427674244, "grad_norm": 0.05586312338709831, "learning_rate": 3.3465524537725875e-05, "loss": 46.0036, "step": 4487 }, { "epoch": 0.6127380708580791, "grad_norm": 0.08620762079954147, "learning_rate": 3.344500530025912e-05, "loss": 46.0046, "step": 4488 }, { "epoch": 0.6128745989487338, "grad_norm": 0.03764060512185097, "learning_rate": 3.3424489193722013e-05, "loss": 46.0036, "step": 4489 }, { "epoch": 0.6130111270393883, "grad_norm": 0.0421682745218277, "learning_rate": 3.340397622199462e-05, "loss": 46.0057, "step": 4490 }, { "epoch": 0.613147655130043, "grad_norm": 0.07224363833665848, "learning_rate": 3.338346638895642e-05, "loss": 46.0075, "step": 4491 }, { "epoch": 0.6132841832206977, "grad_norm": 0.07549507170915604, "learning_rate": 3.3362959698486306e-05, "loss": 46.0068, "step": 4492 }, { "epoch": 0.6134207113113523, "grad_norm": 0.06632731854915619, "learning_rate": 3.334245615446255e-05, "loss": 46.0028, "step": 4493 }, { "epoch": 0.613557239402007, "grad_norm": 0.040623705834150314, "learning_rate": 3.332195576076283e-05, "loss": 46.0006, "step": 4494 }, { "epoch": 0.6136937674926616, "grad_norm": 0.22971110045909882, "learning_rate": 3.330145852126428e-05, "loss": 46.0041, "step": 4495 }, { "epoch": 0.6138302955833163, "grad_norm": 0.08484344929456711, "learning_rate": 3.3280964439843376e-05, "loss": 46.0011, "step": 4496 }, { "epoch": 0.6139668236739709, "grad_norm": 0.17664486169815063, "learning_rate": 3.3260473520376e-05, "loss": 46.0031, "step": 4497 }, { "epoch": 0.6141033517646256, "grad_norm": 0.03784787654876709, "learning_rate": 3.323998576673749e-05, "loss": 46.0, "step": 4498 }, { "epoch": 0.6142398798552802, "grad_norm": 0.3492957055568695, "learning_rate": 3.321950118280253e-05, "loss": 46.0074, "step": 4499 }, { "epoch": 0.6143764079459348, "grad_norm": 0.10065589100122452, "learning_rate": 3.319901977244525e-05, "loss": 46.0059, "step": 4500 }, { "epoch": 0.6145129360365895, "grad_norm": 0.08518275618553162, "learning_rate": 3.317854153953911e-05, "loss": 46.0126, "step": 4501 }, { "epoch": 0.6146494641272442, "grad_norm": 0.06854823976755142, "learning_rate": 3.315806648795704e-05, "loss": 46.0063, "step": 4502 }, { "epoch": 0.6147859922178989, "grad_norm": 0.09186793863773346, "learning_rate": 3.3137594621571353e-05, "loss": 46.0026, "step": 4503 }, { "epoch": 0.6149225203085534, "grad_norm": 0.03790552541613579, "learning_rate": 3.311712594425373e-05, "loss": 46.0, "step": 4504 }, { "epoch": 0.6150590483992081, "grad_norm": 0.059265296906232834, "learning_rate": 3.30966604598753e-05, "loss": 46.0005, "step": 4505 }, { "epoch": 0.6151955764898628, "grad_norm": 0.11641985177993774, "learning_rate": 3.307619817230651e-05, "loss": 46.0047, "step": 4506 }, { "epoch": 0.6153321045805175, "grad_norm": 0.041874565184116364, "learning_rate": 3.305573908541728e-05, "loss": 46.0038, "step": 4507 }, { "epoch": 0.6154686326711721, "grad_norm": 0.08744334429502487, "learning_rate": 3.303528320307688e-05, "loss": 46.0006, "step": 4508 }, { "epoch": 0.6156051607618267, "grad_norm": 0.06722111254930496, "learning_rate": 3.3014830529153994e-05, "loss": 46.0072, "step": 4509 }, { "epoch": 0.6157416888524814, "grad_norm": 0.08663570880889893, "learning_rate": 3.2994381067516696e-05, "loss": 46.0095, "step": 4510 }, { "epoch": 0.615878216943136, "grad_norm": 0.0636630430817604, "learning_rate": 3.2973934822032423e-05, "loss": 46.0074, "step": 4511 }, { "epoch": 0.6160147450337907, "grad_norm": 0.07207613438367844, "learning_rate": 3.2953491796568063e-05, "loss": 46.0096, "step": 4512 }, { "epoch": 0.6161512731244454, "grad_norm": 0.09541121870279312, "learning_rate": 3.293305199498985e-05, "loss": 46.0072, "step": 4513 }, { "epoch": 0.6162878012151, "grad_norm": 0.06648455560207367, "learning_rate": 3.291261542116342e-05, "loss": 46.0083, "step": 4514 }, { "epoch": 0.6164243293057546, "grad_norm": 0.05397643521428108, "learning_rate": 3.2892182078953785e-05, "loss": 46.0082, "step": 4515 }, { "epoch": 0.6165608573964093, "grad_norm": 0.16415931284427643, "learning_rate": 3.287175197222537e-05, "loss": 46.0051, "step": 4516 }, { "epoch": 0.616697385487064, "grad_norm": 0.09156833589076996, "learning_rate": 3.285132510484198e-05, "loss": 46.0039, "step": 4517 }, { "epoch": 0.6168339135777187, "grad_norm": 0.060234688222408295, "learning_rate": 3.283090148066679e-05, "loss": 46.002, "step": 4518 }, { "epoch": 0.6169704416683732, "grad_norm": 0.1194780245423317, "learning_rate": 3.281048110356241e-05, "loss": 46.0055, "step": 4519 }, { "epoch": 0.6171069697590279, "grad_norm": 0.0924786701798439, "learning_rate": 3.279006397739077e-05, "loss": 46.013, "step": 4520 }, { "epoch": 0.6172434978496826, "grad_norm": 0.06693924218416214, "learning_rate": 3.2769650106013236e-05, "loss": 46.0075, "step": 4521 }, { "epoch": 0.6173800259403373, "grad_norm": 0.13940265774726868, "learning_rate": 3.274923949329051e-05, "loss": 46.0071, "step": 4522 }, { "epoch": 0.6175165540309919, "grad_norm": 0.10261955857276917, "learning_rate": 3.2728832143082755e-05, "loss": 46.0025, "step": 4523 }, { "epoch": 0.6176530821216465, "grad_norm": 0.1707039475440979, "learning_rate": 3.2708428059249436e-05, "loss": 46.0065, "step": 4524 }, { "epoch": 0.6177896102123012, "grad_norm": 0.03649646043777466, "learning_rate": 3.268802724564944e-05, "loss": 46.0034, "step": 4525 }, { "epoch": 0.6179261383029558, "grad_norm": 0.03332537040114403, "learning_rate": 3.2667629706141015e-05, "loss": 46.0015, "step": 4526 }, { "epoch": 0.6180626663936105, "grad_norm": 0.041939593851566315, "learning_rate": 3.264723544458185e-05, "loss": 46.0029, "step": 4527 }, { "epoch": 0.6181991944842652, "grad_norm": 0.046000923961400986, "learning_rate": 3.262684446482892e-05, "loss": 46.0096, "step": 4528 }, { "epoch": 0.6183357225749198, "grad_norm": 0.14526598155498505, "learning_rate": 3.2606456770738636e-05, "loss": 46.0053, "step": 4529 }, { "epoch": 0.6184722506655744, "grad_norm": 0.05272410809993744, "learning_rate": 3.2586072366166807e-05, "loss": 46.002, "step": 4530 }, { "epoch": 0.6186087787562291, "grad_norm": 0.047309719026088715, "learning_rate": 3.256569125496858e-05, "loss": 46.0051, "step": 4531 }, { "epoch": 0.6187453068468838, "grad_norm": 0.03672179579734802, "learning_rate": 3.254531344099847e-05, "loss": 46.002, "step": 4532 }, { "epoch": 0.6188818349375383, "grad_norm": 0.06911537796258926, "learning_rate": 3.25249389281104e-05, "loss": 46.0054, "step": 4533 }, { "epoch": 0.619018363028193, "grad_norm": 0.12484586983919144, "learning_rate": 3.250456772015767e-05, "loss": 46.0084, "step": 4534 }, { "epoch": 0.6191548911188477, "grad_norm": 0.20555727183818817, "learning_rate": 3.248419982099293e-05, "loss": 46.0054, "step": 4535 }, { "epoch": 0.6192914192095024, "grad_norm": 0.0687088817358017, "learning_rate": 3.246383523446822e-05, "loss": 46.0033, "step": 4536 }, { "epoch": 0.619427947300157, "grad_norm": 0.20400714874267578, "learning_rate": 3.244347396443498e-05, "loss": 46.0033, "step": 4537 }, { "epoch": 0.6195644753908116, "grad_norm": 0.13164359331130981, "learning_rate": 3.2423116014743945e-05, "loss": 46.0017, "step": 4538 }, { "epoch": 0.6197010034814663, "grad_norm": 0.08778772503137589, "learning_rate": 3.2402761389245305e-05, "loss": 46.0034, "step": 4539 }, { "epoch": 0.619837531572121, "grad_norm": 0.04187837243080139, "learning_rate": 3.238241009178857e-05, "loss": 46.0035, "step": 4540 }, { "epoch": 0.6199740596627756, "grad_norm": 0.0791497603058815, "learning_rate": 3.236206212622265e-05, "loss": 46.003, "step": 4541 }, { "epoch": 0.6201105877534303, "grad_norm": 0.08875452727079391, "learning_rate": 3.234171749639581e-05, "loss": 46.0066, "step": 4542 }, { "epoch": 0.6202471158440849, "grad_norm": 0.18070253729820251, "learning_rate": 3.232137620615566e-05, "loss": 46.0042, "step": 4543 }, { "epoch": 0.6203836439347395, "grad_norm": 0.10951925814151764, "learning_rate": 3.230103825934925e-05, "loss": 46.0024, "step": 4544 }, { "epoch": 0.6205201720253942, "grad_norm": 0.12382256984710693, "learning_rate": 3.228070365982293e-05, "loss": 46.0002, "step": 4545 }, { "epoch": 0.6206567001160489, "grad_norm": 0.11967556923627853, "learning_rate": 3.226037241142243e-05, "loss": 46.0055, "step": 4546 }, { "epoch": 0.6207932282067036, "grad_norm": 0.2914543151855469, "learning_rate": 3.2240044517992865e-05, "loss": 46.0046, "step": 4547 }, { "epoch": 0.6209297562973581, "grad_norm": 0.34095004200935364, "learning_rate": 3.2219719983378714e-05, "loss": 46.0063, "step": 4548 }, { "epoch": 0.6210662843880128, "grad_norm": 0.09049853682518005, "learning_rate": 3.2199398811423814e-05, "loss": 46.0072, "step": 4549 }, { "epoch": 0.6212028124786675, "grad_norm": 0.06460541486740112, "learning_rate": 3.217908100597133e-05, "loss": 46.0037, "step": 4550 }, { "epoch": 0.6213393405693222, "grad_norm": 0.11948754638433456, "learning_rate": 3.215876657086389e-05, "loss": 46.0056, "step": 4551 }, { "epoch": 0.6214758686599768, "grad_norm": 0.04774630814790726, "learning_rate": 3.2138455509943366e-05, "loss": 46.0033, "step": 4552 }, { "epoch": 0.6216123967506314, "grad_norm": 0.24433806538581848, "learning_rate": 3.211814782705108e-05, "loss": 46.0069, "step": 4553 }, { "epoch": 0.6217489248412861, "grad_norm": 0.11666325479745865, "learning_rate": 3.209784352602763e-05, "loss": 46.0088, "step": 4554 }, { "epoch": 0.6218854529319408, "grad_norm": 0.056736260652542114, "learning_rate": 3.2077542610713093e-05, "loss": 46.0027, "step": 4555 }, { "epoch": 0.6220219810225954, "grad_norm": 0.04487067461013794, "learning_rate": 3.20572450849468e-05, "loss": 46.0024, "step": 4556 }, { "epoch": 0.6221585091132501, "grad_norm": 0.04770679026842117, "learning_rate": 3.203695095256747e-05, "loss": 46.005, "step": 4557 }, { "epoch": 0.6222950372039047, "grad_norm": 0.08080880343914032, "learning_rate": 3.201666021741322e-05, "loss": 46.0006, "step": 4558 }, { "epoch": 0.6224315652945593, "grad_norm": 0.03922185301780701, "learning_rate": 3.199637288332148e-05, "loss": 46.006, "step": 4559 }, { "epoch": 0.622568093385214, "grad_norm": 0.07565814256668091, "learning_rate": 3.197608895412905e-05, "loss": 46.0087, "step": 4560 }, { "epoch": 0.6227046214758687, "grad_norm": 0.051719412207603455, "learning_rate": 3.1955808433672065e-05, "loss": 46.001, "step": 4561 }, { "epoch": 0.6228411495665234, "grad_norm": 0.07565624266862869, "learning_rate": 3.1935531325786074e-05, "loss": 46.0062, "step": 4562 }, { "epoch": 0.6229776776571779, "grad_norm": 0.09571212530136108, "learning_rate": 3.191525763430594e-05, "loss": 46.0059, "step": 4563 }, { "epoch": 0.6231142057478326, "grad_norm": 0.0971112847328186, "learning_rate": 3.189498736306584e-05, "loss": 46.0042, "step": 4564 }, { "epoch": 0.6232507338384873, "grad_norm": 0.057434991002082825, "learning_rate": 3.187472051589941e-05, "loss": 46.0008, "step": 4565 }, { "epoch": 0.623387261929142, "grad_norm": 0.03532848134636879, "learning_rate": 3.1854457096639534e-05, "loss": 46.0103, "step": 4566 }, { "epoch": 0.6235237900197965, "grad_norm": 0.14656709134578705, "learning_rate": 3.183419710911851e-05, "loss": 46.0131, "step": 4567 }, { "epoch": 0.6236603181104512, "grad_norm": 0.07054292410612106, "learning_rate": 3.181394055716794e-05, "loss": 46.0082, "step": 4568 }, { "epoch": 0.6237968462011059, "grad_norm": 0.16381940245628357, "learning_rate": 3.1793687444618834e-05, "loss": 46.0033, "step": 4569 }, { "epoch": 0.6239333742917605, "grad_norm": 0.05053531751036644, "learning_rate": 3.177343777530151e-05, "loss": 46.002, "step": 4570 }, { "epoch": 0.6240699023824152, "grad_norm": 0.10566481202840805, "learning_rate": 3.1753191553045634e-05, "loss": 46.0045, "step": 4571 }, { "epoch": 0.6242064304730698, "grad_norm": 0.04983733221888542, "learning_rate": 3.173294878168025e-05, "loss": 46.0019, "step": 4572 }, { "epoch": 0.6243429585637245, "grad_norm": 0.10711231082677841, "learning_rate": 3.171270946503373e-05, "loss": 46.0072, "step": 4573 }, { "epoch": 0.6244794866543791, "grad_norm": 0.054744262248277664, "learning_rate": 3.169247360693377e-05, "loss": 46.0023, "step": 4574 }, { "epoch": 0.6246160147450338, "grad_norm": 0.05453157797455788, "learning_rate": 3.167224121120745e-05, "loss": 46.0001, "step": 4575 }, { "epoch": 0.6247525428356885, "grad_norm": 0.13131502270698547, "learning_rate": 3.165201228168119e-05, "loss": 46.0088, "step": 4576 }, { "epoch": 0.624889070926343, "grad_norm": 0.15902508795261383, "learning_rate": 3.163178682218074e-05, "loss": 46.0117, "step": 4577 }, { "epoch": 0.6250255990169977, "grad_norm": 0.09789088368415833, "learning_rate": 3.161156483653119e-05, "loss": 46.0093, "step": 4578 }, { "epoch": 0.6251621271076524, "grad_norm": 0.07676657289266586, "learning_rate": 3.159134632855699e-05, "loss": 46.0002, "step": 4579 }, { "epoch": 0.6252986551983071, "grad_norm": 0.04997127503156662, "learning_rate": 3.157113130208191e-05, "loss": 46.0126, "step": 4580 }, { "epoch": 0.6254351832889617, "grad_norm": 0.09231780469417572, "learning_rate": 3.1550919760929116e-05, "loss": 46.0032, "step": 4581 }, { "epoch": 0.6255717113796163, "grad_norm": 0.10441574454307556, "learning_rate": 3.153071170892101e-05, "loss": 46.001, "step": 4582 }, { "epoch": 0.625708239470271, "grad_norm": 0.04877331480383873, "learning_rate": 3.151050714987946e-05, "loss": 46.0026, "step": 4583 }, { "epoch": 0.6258447675609257, "grad_norm": 0.10178231447935104, "learning_rate": 3.1490306087625574e-05, "loss": 46.0036, "step": 4584 }, { "epoch": 0.6259812956515803, "grad_norm": 0.0571167878806591, "learning_rate": 3.147010852597987e-05, "loss": 46.0041, "step": 4585 }, { "epoch": 0.626117823742235, "grad_norm": 0.05723447725176811, "learning_rate": 3.144991446876212e-05, "loss": 46.007, "step": 4586 }, { "epoch": 0.6262543518328896, "grad_norm": 0.06631124764680862, "learning_rate": 3.142972391979153e-05, "loss": 46.005, "step": 4587 }, { "epoch": 0.6263908799235443, "grad_norm": 0.061154067516326904, "learning_rate": 3.140953688288658e-05, "loss": 46.0003, "step": 4588 }, { "epoch": 0.6265274080141989, "grad_norm": 0.06005484610795975, "learning_rate": 3.13893533618651e-05, "loss": 46.0071, "step": 4589 }, { "epoch": 0.6266639361048536, "grad_norm": 0.09905415773391724, "learning_rate": 3.136917336054426e-05, "loss": 46.0125, "step": 4590 }, { "epoch": 0.6268004641955083, "grad_norm": 0.09662015736103058, "learning_rate": 3.134899688274058e-05, "loss": 46.0022, "step": 4591 }, { "epoch": 0.6269369922861628, "grad_norm": 0.041027866303920746, "learning_rate": 3.132882393226986e-05, "loss": 46.0062, "step": 4592 }, { "epoch": 0.6270735203768175, "grad_norm": 0.16689112782478333, "learning_rate": 3.130865451294727e-05, "loss": 46.007, "step": 4593 }, { "epoch": 0.6272100484674722, "grad_norm": 0.12646625936031342, "learning_rate": 3.128848862858734e-05, "loss": 46.0002, "step": 4594 }, { "epoch": 0.6273465765581269, "grad_norm": 0.2983611822128296, "learning_rate": 3.1268326283003904e-05, "loss": 46.007, "step": 4595 }, { "epoch": 0.6274831046487815, "grad_norm": 0.13704490661621094, "learning_rate": 3.124816748001008e-05, "loss": 46.0043, "step": 4596 }, { "epoch": 0.6276196327394361, "grad_norm": 0.11226113885641098, "learning_rate": 3.1228012223418404e-05, "loss": 46.0052, "step": 4597 }, { "epoch": 0.6277561608300908, "grad_norm": 0.24353379011154175, "learning_rate": 3.120786051704068e-05, "loss": 46.003, "step": 4598 }, { "epoch": 0.6278926889207455, "grad_norm": 0.17430125176906586, "learning_rate": 3.118771236468807e-05, "loss": 46.0, "step": 4599 }, { "epoch": 0.6280292170114001, "grad_norm": 0.2133212834596634, "learning_rate": 3.116756777017102e-05, "loss": 46.0028, "step": 4600 }, { "epoch": 0.6281657451020547, "grad_norm": 0.09712287783622742, "learning_rate": 3.114742673729938e-05, "loss": 46.0049, "step": 4601 }, { "epoch": 0.6283022731927094, "grad_norm": 0.0928475633263588, "learning_rate": 3.1127289269882253e-05, "loss": 46.0033, "step": 4602 }, { "epoch": 0.628438801283364, "grad_norm": 0.049613796174526215, "learning_rate": 3.110715537172809e-05, "loss": 46.0002, "step": 4603 }, { "epoch": 0.6285753293740187, "grad_norm": 0.08613001555204391, "learning_rate": 3.10870250466447e-05, "loss": 46.0009, "step": 4604 }, { "epoch": 0.6287118574646734, "grad_norm": 0.08995973318815231, "learning_rate": 3.106689829843919e-05, "loss": 46.0027, "step": 4605 }, { "epoch": 0.628848385555328, "grad_norm": 0.05751065909862518, "learning_rate": 3.104677513091797e-05, "loss": 46.0019, "step": 4606 }, { "epoch": 0.6289849136459826, "grad_norm": 0.16759870946407318, "learning_rate": 3.102665554788678e-05, "loss": 46.001, "step": 4607 }, { "epoch": 0.6291214417366373, "grad_norm": 0.06407050788402557, "learning_rate": 3.100653955315073e-05, "loss": 46.0041, "step": 4608 }, { "epoch": 0.629257969827292, "grad_norm": 0.0567891001701355, "learning_rate": 3.098642715051422e-05, "loss": 46.0018, "step": 4609 }, { "epoch": 0.6293944979179467, "grad_norm": 0.0403071753680706, "learning_rate": 3.0966318343780917e-05, "loss": 46.0071, "step": 4610 }, { "epoch": 0.6295310260086012, "grad_norm": 0.13815295696258545, "learning_rate": 3.09462131367539e-05, "loss": 46.005, "step": 4611 }, { "epoch": 0.6296675540992559, "grad_norm": 0.08300232887268066, "learning_rate": 3.092611153323552e-05, "loss": 46.0027, "step": 4612 }, { "epoch": 0.6298040821899106, "grad_norm": 0.09941402822732925, "learning_rate": 3.090601353702746e-05, "loss": 46.0072, "step": 4613 }, { "epoch": 0.6299406102805652, "grad_norm": 0.09820212423801422, "learning_rate": 3.0885919151930674e-05, "loss": 46.0006, "step": 4614 }, { "epoch": 0.6300771383712199, "grad_norm": 0.06900647282600403, "learning_rate": 3.086582838174551e-05, "loss": 46.0041, "step": 4615 }, { "epoch": 0.6302136664618745, "grad_norm": 0.10828868299722672, "learning_rate": 3.084574123027159e-05, "loss": 46.006, "step": 4616 }, { "epoch": 0.6303501945525292, "grad_norm": 0.10592019557952881, "learning_rate": 3.082565770130783e-05, "loss": 46.0102, "step": 4617 }, { "epoch": 0.6304867226431838, "grad_norm": 0.1509520411491394, "learning_rate": 3.0805577798652525e-05, "loss": 46.0016, "step": 4618 }, { "epoch": 0.6306232507338385, "grad_norm": 0.06290264427661896, "learning_rate": 3.078550152610321e-05, "loss": 46.0006, "step": 4619 }, { "epoch": 0.6307597788244932, "grad_norm": 0.05548783391714096, "learning_rate": 3.0765428887456794e-05, "loss": 46.0077, "step": 4620 }, { "epoch": 0.6308963069151478, "grad_norm": 0.06709595769643784, "learning_rate": 3.0745359886509446e-05, "loss": 46.0023, "step": 4621 }, { "epoch": 0.6310328350058024, "grad_norm": 0.05121311917901039, "learning_rate": 3.0725294527056716e-05, "loss": 46.001, "step": 4622 }, { "epoch": 0.6311693630964571, "grad_norm": 0.11325531452894211, "learning_rate": 3.070523281289338e-05, "loss": 46.0138, "step": 4623 }, { "epoch": 0.6313058911871118, "grad_norm": 0.04973983019590378, "learning_rate": 3.068517474781359e-05, "loss": 46.0079, "step": 4624 }, { "epoch": 0.6314424192777665, "grad_norm": 0.14332814514636993, "learning_rate": 3.06651203356108e-05, "loss": 46.0027, "step": 4625 }, { "epoch": 0.631578947368421, "grad_norm": 0.05995268002152443, "learning_rate": 3.0645069580077745e-05, "loss": 46.0032, "step": 4626 }, { "epoch": 0.6317154754590757, "grad_norm": 0.03412984311580658, "learning_rate": 3.0625022485006484e-05, "loss": 46.0048, "step": 4627 }, { "epoch": 0.6318520035497304, "grad_norm": 0.2029622346162796, "learning_rate": 3.060497905418836e-05, "loss": 46.001, "step": 4628 }, { "epoch": 0.631988531640385, "grad_norm": 0.04881516844034195, "learning_rate": 3.0584939291414095e-05, "loss": 46.0024, "step": 4629 }, { "epoch": 0.6321250597310396, "grad_norm": 0.05594048276543617, "learning_rate": 3.056490320047364e-05, "loss": 46.0128, "step": 4630 }, { "epoch": 0.6322615878216943, "grad_norm": 0.10044699162244797, "learning_rate": 3.0544870785156266e-05, "loss": 46.0018, "step": 4631 }, { "epoch": 0.632398115912349, "grad_norm": 0.08106653392314911, "learning_rate": 3.05248420492506e-05, "loss": 46.0075, "step": 4632 }, { "epoch": 0.6325346440030036, "grad_norm": 0.0761239230632782, "learning_rate": 3.0504816996544512e-05, "loss": 46.0053, "step": 4633 }, { "epoch": 0.6326711720936583, "grad_norm": 0.058438509702682495, "learning_rate": 3.04847956308252e-05, "loss": 46.0085, "step": 4634 }, { "epoch": 0.6328077001843129, "grad_norm": 0.03653903305530548, "learning_rate": 3.0464777955879155e-05, "loss": 46.0078, "step": 4635 }, { "epoch": 0.6329442282749675, "grad_norm": 0.14104998111724854, "learning_rate": 3.0444763975492208e-05, "loss": 46.0036, "step": 4636 }, { "epoch": 0.6330807563656222, "grad_norm": 0.0627252608537674, "learning_rate": 3.0424753693449437e-05, "loss": 46.0046, "step": 4637 }, { "epoch": 0.6332172844562769, "grad_norm": 0.06386160105466843, "learning_rate": 3.040474711353525e-05, "loss": 46.0052, "step": 4638 }, { "epoch": 0.6333538125469316, "grad_norm": 0.25657951831817627, "learning_rate": 3.0384744239533353e-05, "loss": 46.0076, "step": 4639 }, { "epoch": 0.6334903406375861, "grad_norm": 0.054040975868701935, "learning_rate": 3.0364745075226765e-05, "loss": 46.002, "step": 4640 }, { "epoch": 0.6336268687282408, "grad_norm": 0.049654748290777206, "learning_rate": 3.0344749624397762e-05, "loss": 46.0019, "step": 4641 }, { "epoch": 0.6337633968188955, "grad_norm": 0.03481779992580414, "learning_rate": 3.032475789082795e-05, "loss": 46.0036, "step": 4642 }, { "epoch": 0.6338999249095502, "grad_norm": 0.1168549582362175, "learning_rate": 3.0304769878298235e-05, "loss": 46.0023, "step": 4643 }, { "epoch": 0.6340364530002048, "grad_norm": 0.05290257930755615, "learning_rate": 3.0284785590588804e-05, "loss": 46.0005, "step": 4644 }, { "epoch": 0.6341729810908594, "grad_norm": 0.1102796196937561, "learning_rate": 3.026480503147912e-05, "loss": 46.0036, "step": 4645 }, { "epoch": 0.6343095091815141, "grad_norm": 0.10004972666501999, "learning_rate": 3.024482820474802e-05, "loss": 46.0034, "step": 4646 }, { "epoch": 0.6344460372721688, "grad_norm": 0.3043212592601776, "learning_rate": 3.0224855114173532e-05, "loss": 46.0081, "step": 4647 }, { "epoch": 0.6345825653628234, "grad_norm": 0.1219576820731163, "learning_rate": 3.0204885763533064e-05, "loss": 46.0018, "step": 4648 }, { "epoch": 0.6347190934534781, "grad_norm": 0.05279252305626869, "learning_rate": 3.0184920156603223e-05, "loss": 46.0096, "step": 4649 }, { "epoch": 0.6348556215441327, "grad_norm": 0.14519484341144562, "learning_rate": 3.0164958297160027e-05, "loss": 46.0054, "step": 4650 }, { "epoch": 0.6349921496347873, "grad_norm": 0.1345735341310501, "learning_rate": 3.014500018897869e-05, "loss": 46.0143, "step": 4651 }, { "epoch": 0.635128677725442, "grad_norm": 0.08857832849025726, "learning_rate": 3.0125045835833742e-05, "loss": 46.0022, "step": 4652 }, { "epoch": 0.6352652058160967, "grad_norm": 0.03969201445579529, "learning_rate": 3.010509524149902e-05, "loss": 46.0012, "step": 4653 }, { "epoch": 0.6354017339067514, "grad_norm": 0.06840205192565918, "learning_rate": 3.0085148409747654e-05, "loss": 46.0051, "step": 4654 }, { "epoch": 0.6355382619974059, "grad_norm": 0.055424146354198456, "learning_rate": 3.0065205344352027e-05, "loss": 46.0044, "step": 4655 }, { "epoch": 0.6356747900880606, "grad_norm": 0.11025039851665497, "learning_rate": 3.0045266049083827e-05, "loss": 46.0032, "step": 4656 }, { "epoch": 0.6358113181787153, "grad_norm": 0.05694626644253731, "learning_rate": 3.0025330527714046e-05, "loss": 46.0022, "step": 4657 }, { "epoch": 0.63594784626937, "grad_norm": 0.1051018014550209, "learning_rate": 3.000539878401296e-05, "loss": 46.0013, "step": 4658 }, { "epoch": 0.6360843743600246, "grad_norm": 0.07724828273057938, "learning_rate": 2.9985470821750095e-05, "loss": 46.0011, "step": 4659 }, { "epoch": 0.6362209024506792, "grad_norm": 0.14080318808555603, "learning_rate": 2.9965546644694288e-05, "loss": 46.0067, "step": 4660 }, { "epoch": 0.6363574305413339, "grad_norm": 0.0979226604104042, "learning_rate": 2.9945626256613678e-05, "loss": 46.006, "step": 4661 }, { "epoch": 0.6364939586319885, "grad_norm": 0.10992138832807541, "learning_rate": 2.992570966127566e-05, "loss": 46.0005, "step": 4662 }, { "epoch": 0.6366304867226432, "grad_norm": 0.06215823441743851, "learning_rate": 2.990579686244689e-05, "loss": 46.0043, "step": 4663 }, { "epoch": 0.6367670148132978, "grad_norm": 0.03986900672316551, "learning_rate": 2.988588786389339e-05, "loss": 46.0026, "step": 4664 }, { "epoch": 0.6369035429039525, "grad_norm": 0.04558256268501282, "learning_rate": 2.9865982669380373e-05, "loss": 46.0106, "step": 4665 }, { "epoch": 0.6370400709946071, "grad_norm": 0.102702297270298, "learning_rate": 2.9846081282672377e-05, "loss": 46.0026, "step": 4666 }, { "epoch": 0.6371765990852618, "grad_norm": 0.10449610650539398, "learning_rate": 2.982618370753319e-05, "loss": 46.016, "step": 4667 }, { "epoch": 0.6373131271759165, "grad_norm": 0.10499338805675507, "learning_rate": 2.9806289947725947e-05, "loss": 46.0031, "step": 4668 }, { "epoch": 0.637449655266571, "grad_norm": 0.16130296885967255, "learning_rate": 2.9786400007012975e-05, "loss": 46.0083, "step": 4669 }, { "epoch": 0.6375861833572257, "grad_norm": 0.18988144397735596, "learning_rate": 2.9766513889155922e-05, "loss": 46.0055, "step": 4670 }, { "epoch": 0.6377227114478804, "grad_norm": 0.1779356598854065, "learning_rate": 2.974663159791573e-05, "loss": 46.001, "step": 4671 }, { "epoch": 0.6378592395385351, "grad_norm": 0.04672381281852722, "learning_rate": 2.97267531370526e-05, "loss": 46.0, "step": 4672 }, { "epoch": 0.6379957676291897, "grad_norm": 0.0891195610165596, "learning_rate": 2.9706878510325975e-05, "loss": 46.0106, "step": 4673 }, { "epoch": 0.6381322957198443, "grad_norm": 0.11720253527164459, "learning_rate": 2.9687007721494616e-05, "loss": 46.0099, "step": 4674 }, { "epoch": 0.638268823810499, "grad_norm": 0.13089783489704132, "learning_rate": 2.9667140774316553e-05, "loss": 46.0088, "step": 4675 }, { "epoch": 0.6384053519011537, "grad_norm": 0.08934959769248962, "learning_rate": 2.964727767254909e-05, "loss": 46.0076, "step": 4676 }, { "epoch": 0.6385418799918083, "grad_norm": 0.044297657907009125, "learning_rate": 2.9627418419948753e-05, "loss": 46.0001, "step": 4677 }, { "epoch": 0.638678408082463, "grad_norm": 0.10006847977638245, "learning_rate": 2.9607563020271446e-05, "loss": 46.0108, "step": 4678 }, { "epoch": 0.6388149361731176, "grad_norm": 0.09591533988714218, "learning_rate": 2.958771147727224e-05, "loss": 46.0073, "step": 4679 }, { "epoch": 0.6389514642637723, "grad_norm": 0.07383422553539276, "learning_rate": 2.956786379470553e-05, "loss": 46.0081, "step": 4680 }, { "epoch": 0.6390879923544269, "grad_norm": 0.1241079792380333, "learning_rate": 2.9548019976324938e-05, "loss": 46.0076, "step": 4681 }, { "epoch": 0.6392245204450816, "grad_norm": 0.0649513527750969, "learning_rate": 2.9528180025883445e-05, "loss": 46.0077, "step": 4682 }, { "epoch": 0.6393610485357363, "grad_norm": 0.04798297956585884, "learning_rate": 2.9508343947133195e-05, "loss": 46.0002, "step": 4683 }, { "epoch": 0.6394975766263908, "grad_norm": 0.2512376308441162, "learning_rate": 2.9488511743825646e-05, "loss": 46.0062, "step": 4684 }, { "epoch": 0.6396341047170455, "grad_norm": 0.06962765008211136, "learning_rate": 2.9468683419711552e-05, "loss": 46.0008, "step": 4685 }, { "epoch": 0.6397706328077002, "grad_norm": 0.18680931627750397, "learning_rate": 2.94488589785409e-05, "loss": 46.0, "step": 4686 }, { "epoch": 0.6399071608983549, "grad_norm": 0.11666570603847504, "learning_rate": 2.942903842406292e-05, "loss": 46.0021, "step": 4687 }, { "epoch": 0.6400436889890095, "grad_norm": 0.07213084399700165, "learning_rate": 2.9409221760026147e-05, "loss": 46.0112, "step": 4688 }, { "epoch": 0.6401802170796641, "grad_norm": 0.10328512638807297, "learning_rate": 2.9389408990178373e-05, "loss": 46.0023, "step": 4689 }, { "epoch": 0.6403167451703188, "grad_norm": 0.09569231420755386, "learning_rate": 2.936960011826666e-05, "loss": 46.0027, "step": 4690 }, { "epoch": 0.6404532732609735, "grad_norm": 0.09780403971672058, "learning_rate": 2.9349795148037284e-05, "loss": 46.0099, "step": 4691 }, { "epoch": 0.6405898013516281, "grad_norm": 0.06607670336961746, "learning_rate": 2.9329994083235857e-05, "loss": 46.0055, "step": 4692 }, { "epoch": 0.6407263294422828, "grad_norm": 0.05519267916679382, "learning_rate": 2.9310196927607192e-05, "loss": 46.0045, "step": 4693 }, { "epoch": 0.6408628575329374, "grad_norm": 0.09503115713596344, "learning_rate": 2.9290403684895406e-05, "loss": 46.0048, "step": 4694 }, { "epoch": 0.640999385623592, "grad_norm": 0.051873210817575455, "learning_rate": 2.9270614358843816e-05, "loss": 46.0067, "step": 4695 }, { "epoch": 0.6411359137142467, "grad_norm": 0.12564176321029663, "learning_rate": 2.9250828953195088e-05, "loss": 46.002, "step": 4696 }, { "epoch": 0.6412724418049014, "grad_norm": 0.1572524458169937, "learning_rate": 2.9231047471691063e-05, "loss": 46.008, "step": 4697 }, { "epoch": 0.641408969895556, "grad_norm": 0.06374054402112961, "learning_rate": 2.921126991807287e-05, "loss": 46.0091, "step": 4698 }, { "epoch": 0.6415454979862106, "grad_norm": 0.2555488049983978, "learning_rate": 2.9191496296080935e-05, "loss": 46.0035, "step": 4699 }, { "epoch": 0.6416820260768653, "grad_norm": 0.3438304364681244, "learning_rate": 2.9171726609454874e-05, "loss": 46.0, "step": 4700 }, { "epoch": 0.64181855416752, "grad_norm": 0.11261097341775894, "learning_rate": 2.9151960861933614e-05, "loss": 46.0037, "step": 4701 }, { "epoch": 0.6419550822581747, "grad_norm": 0.11238928884267807, "learning_rate": 2.913219905725526e-05, "loss": 46.0012, "step": 4702 }, { "epoch": 0.6420916103488292, "grad_norm": 0.1035400927066803, "learning_rate": 2.911244119915727e-05, "loss": 46.0034, "step": 4703 }, { "epoch": 0.6422281384394839, "grad_norm": 0.11268958449363708, "learning_rate": 2.9092687291376296e-05, "loss": 46.0042, "step": 4704 }, { "epoch": 0.6423646665301386, "grad_norm": 0.03359321877360344, "learning_rate": 2.907293733764826e-05, "loss": 46.0003, "step": 4705 }, { "epoch": 0.6425011946207932, "grad_norm": 0.11333522945642471, "learning_rate": 2.9053191341708318e-05, "loss": 46.0071, "step": 4706 }, { "epoch": 0.6426377227114479, "grad_norm": 0.054761484265327454, "learning_rate": 2.9033449307290912e-05, "loss": 46.0053, "step": 4707 }, { "epoch": 0.6427742508021025, "grad_norm": 0.06534621864557266, "learning_rate": 2.9013711238129693e-05, "loss": 46.002, "step": 4708 }, { "epoch": 0.6429107788927572, "grad_norm": 0.09796672314405441, "learning_rate": 2.8993977137957595e-05, "loss": 46.0052, "step": 4709 }, { "epoch": 0.6430473069834118, "grad_norm": 0.0955348089337349, "learning_rate": 2.897424701050679e-05, "loss": 46.0027, "step": 4710 }, { "epoch": 0.6431838350740665, "grad_norm": 0.0772956907749176, "learning_rate": 2.8954520859508692e-05, "loss": 46.002, "step": 4711 }, { "epoch": 0.6433203631647212, "grad_norm": 0.1752818077802658, "learning_rate": 2.893479868869397e-05, "loss": 46.01, "step": 4712 }, { "epoch": 0.6434568912553758, "grad_norm": 0.09373238682746887, "learning_rate": 2.8915080501792542e-05, "loss": 46.0085, "step": 4713 }, { "epoch": 0.6435934193460304, "grad_norm": 0.03671931475400925, "learning_rate": 2.8895366302533567e-05, "loss": 46.0063, "step": 4714 }, { "epoch": 0.6437299474366851, "grad_norm": 0.0472961887717247, "learning_rate": 2.8875656094645464e-05, "loss": 46.0047, "step": 4715 }, { "epoch": 0.6438664755273398, "grad_norm": 0.1164243295788765, "learning_rate": 2.885594988185587e-05, "loss": 46.0046, "step": 4716 }, { "epoch": 0.6440030036179945, "grad_norm": 0.1737416833639145, "learning_rate": 2.883624766789169e-05, "loss": 46.0033, "step": 4717 }, { "epoch": 0.644139531708649, "grad_norm": 0.2091880440711975, "learning_rate": 2.8816549456479064e-05, "loss": 46.0041, "step": 4718 }, { "epoch": 0.6442760597993037, "grad_norm": 0.14715908467769623, "learning_rate": 2.879685525134338e-05, "loss": 46.0027, "step": 4719 }, { "epoch": 0.6444125878899584, "grad_norm": 0.09529249370098114, "learning_rate": 2.8777165056209256e-05, "loss": 46.0042, "step": 4720 }, { "epoch": 0.644549115980613, "grad_norm": 0.05609596520662308, "learning_rate": 2.875747887480057e-05, "loss": 46.0015, "step": 4721 }, { "epoch": 0.6446856440712677, "grad_norm": 0.045765068382024765, "learning_rate": 2.873779671084042e-05, "loss": 46.0018, "step": 4722 }, { "epoch": 0.6448221721619223, "grad_norm": 0.07264920324087143, "learning_rate": 2.8718118568051145e-05, "loss": 46.0036, "step": 4723 }, { "epoch": 0.644958700252577, "grad_norm": 0.10702801495790482, "learning_rate": 2.8698444450154393e-05, "loss": 46.007, "step": 4724 }, { "epoch": 0.6450952283432316, "grad_norm": 0.1312239021062851, "learning_rate": 2.867877436087092e-05, "loss": 46.0074, "step": 4725 }, { "epoch": 0.6452317564338863, "grad_norm": 0.11333466321229935, "learning_rate": 2.8659108303920822e-05, "loss": 46.0022, "step": 4726 }, { "epoch": 0.6453682845245409, "grad_norm": 0.1322934329509735, "learning_rate": 2.8639446283023386e-05, "loss": 46.0093, "step": 4727 }, { "epoch": 0.6455048126151955, "grad_norm": 0.06821990758180618, "learning_rate": 2.86197883018972e-05, "loss": 46.0082, "step": 4728 }, { "epoch": 0.6456413407058502, "grad_norm": 0.0796675980091095, "learning_rate": 2.860013436425999e-05, "loss": 46.0081, "step": 4729 }, { "epoch": 0.6457778687965049, "grad_norm": 0.25648030638694763, "learning_rate": 2.8580484473828762e-05, "loss": 46.0072, "step": 4730 }, { "epoch": 0.6459143968871596, "grad_norm": 0.054088201373815536, "learning_rate": 2.8560838634319796e-05, "loss": 46.0084, "step": 4731 }, { "epoch": 0.6460509249778141, "grad_norm": 0.07256486266851425, "learning_rate": 2.8541196849448583e-05, "loss": 46.0051, "step": 4732 }, { "epoch": 0.6461874530684688, "grad_norm": 0.03981205075979233, "learning_rate": 2.8521559122929786e-05, "loss": 46.004, "step": 4733 }, { "epoch": 0.6463239811591235, "grad_norm": 0.04326188191771507, "learning_rate": 2.8501925458477353e-05, "loss": 46.001, "step": 4734 }, { "epoch": 0.6464605092497782, "grad_norm": 0.08594322949647903, "learning_rate": 2.8482295859804496e-05, "loss": 46.0001, "step": 4735 }, { "epoch": 0.6465970373404328, "grad_norm": 0.19009698927402496, "learning_rate": 2.846267033062362e-05, "loss": 46.0064, "step": 4736 }, { "epoch": 0.6467335654310874, "grad_norm": 0.058170538395643234, "learning_rate": 2.8443048874646343e-05, "loss": 46.0073, "step": 4737 }, { "epoch": 0.6468700935217421, "grad_norm": 0.05463339760899544, "learning_rate": 2.8423431495583546e-05, "loss": 46.0113, "step": 4738 }, { "epoch": 0.6470066216123967, "grad_norm": 0.0741669163107872, "learning_rate": 2.8403818197145316e-05, "loss": 46.0023, "step": 4739 }, { "epoch": 0.6471431497030514, "grad_norm": 0.18060268461704254, "learning_rate": 2.8384208983040994e-05, "loss": 46.0039, "step": 4740 }, { "epoch": 0.6472796777937061, "grad_norm": 0.10554227232933044, "learning_rate": 2.836460385697911e-05, "loss": 46.0055, "step": 4741 }, { "epoch": 0.6474162058843607, "grad_norm": 0.050257936120033264, "learning_rate": 2.834500282266746e-05, "loss": 46.0015, "step": 4742 }, { "epoch": 0.6475527339750153, "grad_norm": 0.08740747720003128, "learning_rate": 2.832540588381305e-05, "loss": 46.0013, "step": 4743 }, { "epoch": 0.64768926206567, "grad_norm": 0.24838687479496002, "learning_rate": 2.8305813044122097e-05, "loss": 46.0075, "step": 4744 }, { "epoch": 0.6478257901563247, "grad_norm": 0.12099893391132355, "learning_rate": 2.8286224307300073e-05, "loss": 46.0013, "step": 4745 }, { "epoch": 0.6479623182469794, "grad_norm": 0.08489709347486496, "learning_rate": 2.8266639677051647e-05, "loss": 46.0104, "step": 4746 }, { "epoch": 0.6480988463376339, "grad_norm": 0.07468590885400772, "learning_rate": 2.8247059157080736e-05, "loss": 46.002, "step": 4747 }, { "epoch": 0.6482353744282886, "grad_norm": 0.24630199372768402, "learning_rate": 2.8227482751090445e-05, "loss": 46.0031, "step": 4748 }, { "epoch": 0.6483719025189433, "grad_norm": 0.13917523622512817, "learning_rate": 2.8207910462783138e-05, "loss": 46.002, "step": 4749 }, { "epoch": 0.648508430609598, "grad_norm": 0.759537398815155, "learning_rate": 2.8188342295860383e-05, "loss": 46.0049, "step": 4750 }, { "epoch": 0.6486449587002526, "grad_norm": 0.07110653817653656, "learning_rate": 2.816877825402294e-05, "loss": 46.0061, "step": 4751 }, { "epoch": 0.6487814867909072, "grad_norm": 0.1953609734773636, "learning_rate": 2.8149218340970884e-05, "loss": 46.007, "step": 4752 }, { "epoch": 0.6489180148815619, "grad_norm": 0.15844453871250153, "learning_rate": 2.812966256040339e-05, "loss": 46.0016, "step": 4753 }, { "epoch": 0.6490545429722165, "grad_norm": 0.13615483045578003, "learning_rate": 2.8110110916018917e-05, "loss": 46.0023, "step": 4754 }, { "epoch": 0.6491910710628712, "grad_norm": 0.18911349773406982, "learning_rate": 2.8090563411515115e-05, "loss": 46.0006, "step": 4755 }, { "epoch": 0.6493275991535259, "grad_norm": 0.08866765350103378, "learning_rate": 2.8071020050588925e-05, "loss": 46.0017, "step": 4756 }, { "epoch": 0.6494641272441805, "grad_norm": 0.04026784747838974, "learning_rate": 2.8051480836936385e-05, "loss": 46.0027, "step": 4757 }, { "epoch": 0.6496006553348351, "grad_norm": 0.030250880867242813, "learning_rate": 2.8031945774252798e-05, "loss": 46.0009, "step": 4758 }, { "epoch": 0.6497371834254898, "grad_norm": 0.05392301455140114, "learning_rate": 2.801241486623275e-05, "loss": 46.0034, "step": 4759 }, { "epoch": 0.6498737115161445, "grad_norm": 0.04165443778038025, "learning_rate": 2.7992888116569977e-05, "loss": 46.0074, "step": 4760 }, { "epoch": 0.650010239606799, "grad_norm": 0.08742102980613708, "learning_rate": 2.7973365528957395e-05, "loss": 46.0101, "step": 4761 }, { "epoch": 0.6501467676974537, "grad_norm": 0.10692781209945679, "learning_rate": 2.7953847107087172e-05, "loss": 46.008, "step": 4762 }, { "epoch": 0.6502832957881084, "grad_norm": 0.08035540580749512, "learning_rate": 2.7934332854650737e-05, "loss": 46.007, "step": 4763 }, { "epoch": 0.6504198238787631, "grad_norm": 0.14423607289791107, "learning_rate": 2.791482277533868e-05, "loss": 46.0048, "step": 4764 }, { "epoch": 0.6505563519694177, "grad_norm": 0.05647960677742958, "learning_rate": 2.7895316872840738e-05, "loss": 46.0026, "step": 4765 }, { "epoch": 0.6506928800600723, "grad_norm": 0.12445896863937378, "learning_rate": 2.787581515084599e-05, "loss": 46.0004, "step": 4766 }, { "epoch": 0.650829408150727, "grad_norm": 0.07062388211488724, "learning_rate": 2.7856317613042643e-05, "loss": 46.0077, "step": 4767 }, { "epoch": 0.6509659362413817, "grad_norm": 0.18188750743865967, "learning_rate": 2.7836824263118144e-05, "loss": 46.0047, "step": 4768 }, { "epoch": 0.6511024643320363, "grad_norm": 0.0500834584236145, "learning_rate": 2.7817335104759078e-05, "loss": 46.0033, "step": 4769 }, { "epoch": 0.651238992422691, "grad_norm": 0.08266730606555939, "learning_rate": 2.779785014165135e-05, "loss": 46.0052, "step": 4770 }, { "epoch": 0.6513755205133456, "grad_norm": 0.19548416137695312, "learning_rate": 2.7778369377479996e-05, "loss": 46.0156, "step": 4771 }, { "epoch": 0.6515120486040002, "grad_norm": 0.06721244007349014, "learning_rate": 2.7758892815929265e-05, "loss": 46.0065, "step": 4772 }, { "epoch": 0.6516485766946549, "grad_norm": 0.06863345205783844, "learning_rate": 2.7739420460682635e-05, "loss": 46.004, "step": 4773 }, { "epoch": 0.6517851047853096, "grad_norm": 0.1077580377459526, "learning_rate": 2.771995231542277e-05, "loss": 46.0073, "step": 4774 }, { "epoch": 0.6519216328759643, "grad_norm": 0.13528481125831604, "learning_rate": 2.7700488383831548e-05, "loss": 46.0079, "step": 4775 }, { "epoch": 0.6520581609666188, "grad_norm": 0.12392151355743408, "learning_rate": 2.7681028669590036e-05, "loss": 46.0041, "step": 4776 }, { "epoch": 0.6521946890572735, "grad_norm": 0.03640040010213852, "learning_rate": 2.7661573176378523e-05, "loss": 46.0111, "step": 4777 }, { "epoch": 0.6523312171479282, "grad_norm": 0.044853027909994125, "learning_rate": 2.7642121907876484e-05, "loss": 46.0, "step": 4778 }, { "epoch": 0.6524677452385829, "grad_norm": 0.05173332244157791, "learning_rate": 2.7622674867762606e-05, "loss": 46.0063, "step": 4779 }, { "epoch": 0.6526042733292375, "grad_norm": 0.11247345060110092, "learning_rate": 2.7603232059714757e-05, "loss": 46.0056, "step": 4780 }, { "epoch": 0.6527408014198921, "grad_norm": 0.12495771050453186, "learning_rate": 2.758379348741004e-05, "loss": 46.0031, "step": 4781 }, { "epoch": 0.6528773295105468, "grad_norm": 0.07004411518573761, "learning_rate": 2.756435915452471e-05, "loss": 46.0028, "step": 4782 }, { "epoch": 0.6530138576012015, "grad_norm": 0.08164798468351364, "learning_rate": 2.754492906473425e-05, "loss": 46.0059, "step": 4783 }, { "epoch": 0.6531503856918561, "grad_norm": 0.08836957812309265, "learning_rate": 2.752550322171338e-05, "loss": 46.0073, "step": 4784 }, { "epoch": 0.6532869137825108, "grad_norm": 0.04025871679186821, "learning_rate": 2.7506081629135916e-05, "loss": 46.0045, "step": 4785 }, { "epoch": 0.6534234418731654, "grad_norm": 0.07421176135540009, "learning_rate": 2.748666429067496e-05, "loss": 46.009, "step": 4786 }, { "epoch": 0.65355996996382, "grad_norm": 0.159263014793396, "learning_rate": 2.746725121000273e-05, "loss": 46.0146, "step": 4787 }, { "epoch": 0.6536964980544747, "grad_norm": 0.09141864627599716, "learning_rate": 2.744784239079077e-05, "loss": 46.0047, "step": 4788 }, { "epoch": 0.6538330261451294, "grad_norm": 0.040342651307582855, "learning_rate": 2.7428437836709663e-05, "loss": 46.0014, "step": 4789 }, { "epoch": 0.6539695542357841, "grad_norm": 0.1044471487402916, "learning_rate": 2.7409037551429263e-05, "loss": 46.0044, "step": 4790 }, { "epoch": 0.6541060823264386, "grad_norm": 0.07580877095460892, "learning_rate": 2.7389641538618637e-05, "loss": 46.006, "step": 4791 }, { "epoch": 0.6542426104170933, "grad_norm": 0.0687091276049614, "learning_rate": 2.7370249801946026e-05, "loss": 46.0041, "step": 4792 }, { "epoch": 0.654379138507748, "grad_norm": 0.04282739758491516, "learning_rate": 2.735086234507881e-05, "loss": 46.0067, "step": 4793 }, { "epoch": 0.6545156665984027, "grad_norm": 0.06786416471004486, "learning_rate": 2.73314791716836e-05, "loss": 46.0047, "step": 4794 }, { "epoch": 0.6546521946890572, "grad_norm": 0.10447469353675842, "learning_rate": 2.7312100285426246e-05, "loss": 46.0108, "step": 4795 }, { "epoch": 0.6547887227797119, "grad_norm": 0.1994817852973938, "learning_rate": 2.7292725689971733e-05, "loss": 46.0002, "step": 4796 }, { "epoch": 0.6549252508703666, "grad_norm": 0.055715061724185944, "learning_rate": 2.727335538898418e-05, "loss": 46.0063, "step": 4797 }, { "epoch": 0.6550617789610212, "grad_norm": 0.35650140047073364, "learning_rate": 2.725398938612702e-05, "loss": 46.0047, "step": 4798 }, { "epoch": 0.6551983070516759, "grad_norm": 0.08465246111154556, "learning_rate": 2.7234627685062798e-05, "loss": 46.0044, "step": 4799 }, { "epoch": 0.6553348351423305, "grad_norm": 0.13254281878471375, "learning_rate": 2.7215270289453264e-05, "loss": 46.0, "step": 4800 }, { "epoch": 0.6554713632329852, "grad_norm": 0.17921675741672516, "learning_rate": 2.7195917202959287e-05, "loss": 46.0119, "step": 4801 }, { "epoch": 0.6556078913236398, "grad_norm": 0.15715597569942474, "learning_rate": 2.7176568429241047e-05, "loss": 46.0027, "step": 4802 }, { "epoch": 0.6557444194142945, "grad_norm": 0.11356619745492935, "learning_rate": 2.7157223971957813e-05, "loss": 46.0059, "step": 4803 }, { "epoch": 0.6558809475049492, "grad_norm": 0.044449154287576675, "learning_rate": 2.7137883834768073e-05, "loss": 46.0089, "step": 4804 }, { "epoch": 0.6560174755956037, "grad_norm": 0.07532574981451035, "learning_rate": 2.711854802132949e-05, "loss": 46.0025, "step": 4805 }, { "epoch": 0.6561540036862584, "grad_norm": 0.09055738151073456, "learning_rate": 2.709921653529891e-05, "loss": 46.0, "step": 4806 }, { "epoch": 0.6562905317769131, "grad_norm": 0.09871842712163925, "learning_rate": 2.707988938033235e-05, "loss": 46.0013, "step": 4807 }, { "epoch": 0.6564270598675678, "grad_norm": 0.03725261241197586, "learning_rate": 2.7060566560085032e-05, "loss": 46.0015, "step": 4808 }, { "epoch": 0.6565635879582224, "grad_norm": 0.14663530886173248, "learning_rate": 2.7041248078211345e-05, "loss": 46.0069, "step": 4809 }, { "epoch": 0.656700116048877, "grad_norm": 0.05916503816843033, "learning_rate": 2.702193393836485e-05, "loss": 46.0041, "step": 4810 }, { "epoch": 0.6568366441395317, "grad_norm": 0.09166333079338074, "learning_rate": 2.7002624144198306e-05, "loss": 46.0044, "step": 4811 }, { "epoch": 0.6569731722301864, "grad_norm": 0.09446903318166733, "learning_rate": 2.6983318699363624e-05, "loss": 46.0081, "step": 4812 }, { "epoch": 0.657109700320841, "grad_norm": 0.05116857960820198, "learning_rate": 2.6964017607511916e-05, "loss": 46.005, "step": 4813 }, { "epoch": 0.6572462284114957, "grad_norm": 0.07296491414308548, "learning_rate": 2.6944720872293456e-05, "loss": 46.01, "step": 4814 }, { "epoch": 0.6573827565021503, "grad_norm": 0.06187593564391136, "learning_rate": 2.6925428497357707e-05, "loss": 46.0052, "step": 4815 }, { "epoch": 0.657519284592805, "grad_norm": 0.11945225298404694, "learning_rate": 2.6906140486353294e-05, "loss": 46.0069, "step": 4816 }, { "epoch": 0.6576558126834596, "grad_norm": 0.1213914304971695, "learning_rate": 2.688685684292802e-05, "loss": 46.007, "step": 4817 }, { "epoch": 0.6577923407741143, "grad_norm": 0.12250461429357529, "learning_rate": 2.686757757072886e-05, "loss": 46.0082, "step": 4818 }, { "epoch": 0.657928868864769, "grad_norm": 0.14148138463497162, "learning_rate": 2.684830267340201e-05, "loss": 46.0024, "step": 4819 }, { "epoch": 0.6580653969554235, "grad_norm": 0.08034166693687439, "learning_rate": 2.6829032154592743e-05, "loss": 46.0099, "step": 4820 }, { "epoch": 0.6582019250460782, "grad_norm": 0.1273563802242279, "learning_rate": 2.680976601794558e-05, "loss": 46.0036, "step": 4821 }, { "epoch": 0.6583384531367329, "grad_norm": 0.09743737429380417, "learning_rate": 2.6790504267104167e-05, "loss": 46.0069, "step": 4822 }, { "epoch": 0.6584749812273876, "grad_norm": 0.06770643591880798, "learning_rate": 2.6771246905711393e-05, "loss": 46.0088, "step": 4823 }, { "epoch": 0.6586115093180421, "grad_norm": 0.13797840476036072, "learning_rate": 2.6751993937409226e-05, "loss": 46.0078, "step": 4824 }, { "epoch": 0.6587480374086968, "grad_norm": 0.04027314484119415, "learning_rate": 2.6732745365838828e-05, "loss": 46.0002, "step": 4825 }, { "epoch": 0.6588845654993515, "grad_norm": 0.03713482990860939, "learning_rate": 2.67135011946406e-05, "loss": 46.0046, "step": 4826 }, { "epoch": 0.6590210935900062, "grad_norm": 0.15030567348003387, "learning_rate": 2.6694261427454048e-05, "loss": 46.004, "step": 4827 }, { "epoch": 0.6591576216806608, "grad_norm": 0.09018111228942871, "learning_rate": 2.6675026067917808e-05, "loss": 46.0038, "step": 4828 }, { "epoch": 0.6592941497713154, "grad_norm": 0.05882930010557175, "learning_rate": 2.6655795119669746e-05, "loss": 46.0043, "step": 4829 }, { "epoch": 0.6594306778619701, "grad_norm": 0.16176079213619232, "learning_rate": 2.66365685863469e-05, "loss": 46.0044, "step": 4830 }, { "epoch": 0.6595672059526247, "grad_norm": 0.046270258724689484, "learning_rate": 2.6617346471585437e-05, "loss": 46.0023, "step": 4831 }, { "epoch": 0.6597037340432794, "grad_norm": 0.17053107917308807, "learning_rate": 2.6598128779020693e-05, "loss": 46.0065, "step": 4832 }, { "epoch": 0.6598402621339341, "grad_norm": 0.07112212479114532, "learning_rate": 2.6578915512287185e-05, "loss": 46.0047, "step": 4833 }, { "epoch": 0.6599767902245887, "grad_norm": 0.05074160173535347, "learning_rate": 2.6559706675018576e-05, "loss": 46.0047, "step": 4834 }, { "epoch": 0.6601133183152433, "grad_norm": 0.06939174234867096, "learning_rate": 2.65405022708477e-05, "loss": 46.0043, "step": 4835 }, { "epoch": 0.660249846405898, "grad_norm": 0.09983623027801514, "learning_rate": 2.6521302303406546e-05, "loss": 46.0077, "step": 4836 }, { "epoch": 0.6603863744965527, "grad_norm": 0.07441040128469467, "learning_rate": 2.6502106776326284e-05, "loss": 46.0052, "step": 4837 }, { "epoch": 0.6605229025872074, "grad_norm": 0.1524616777896881, "learning_rate": 2.648291569323721e-05, "loss": 46.0014, "step": 4838 }, { "epoch": 0.6606594306778619, "grad_norm": 0.03465586155653, "learning_rate": 2.646372905776881e-05, "loss": 46.0066, "step": 4839 }, { "epoch": 0.6607959587685166, "grad_norm": 0.035771168768405914, "learning_rate": 2.6444546873549713e-05, "loss": 46.0056, "step": 4840 }, { "epoch": 0.6609324868591713, "grad_norm": 0.05988804250955582, "learning_rate": 2.6425369144207713e-05, "loss": 46.0092, "step": 4841 }, { "epoch": 0.661069014949826, "grad_norm": 0.05560154840350151, "learning_rate": 2.6406195873369754e-05, "loss": 46.002, "step": 4842 }, { "epoch": 0.6612055430404806, "grad_norm": 0.12982569634914398, "learning_rate": 2.638702706466195e-05, "loss": 46.0038, "step": 4843 }, { "epoch": 0.6613420711311352, "grad_norm": 0.1728799194097519, "learning_rate": 2.636786272170956e-05, "loss": 46.0024, "step": 4844 }, { "epoch": 0.6614785992217899, "grad_norm": 0.09249196946620941, "learning_rate": 2.6348702848136997e-05, "loss": 46.009, "step": 4845 }, { "epoch": 0.6616151273124445, "grad_norm": 0.05743841826915741, "learning_rate": 2.6329547447567836e-05, "loss": 46.004, "step": 4846 }, { "epoch": 0.6617516554030992, "grad_norm": 0.2176378071308136, "learning_rate": 2.6310396523624804e-05, "loss": 46.0099, "step": 4847 }, { "epoch": 0.6618881834937539, "grad_norm": 0.060791727155447006, "learning_rate": 2.629125007992978e-05, "loss": 46.0053, "step": 4848 }, { "epoch": 0.6620247115844085, "grad_norm": 0.2111022025346756, "learning_rate": 2.62721081201038e-05, "loss": 46.0, "step": 4849 }, { "epoch": 0.6621612396750631, "grad_norm": 0.12944187223911285, "learning_rate": 2.6252970647767015e-05, "loss": 46.0, "step": 4850 }, { "epoch": 0.6622977677657178, "grad_norm": 0.09870366007089615, "learning_rate": 2.623383766653883e-05, "loss": 46.0076, "step": 4851 }, { "epoch": 0.6624342958563725, "grad_norm": 0.05138515308499336, "learning_rate": 2.621470918003768e-05, "loss": 46.0042, "step": 4852 }, { "epoch": 0.6625708239470272, "grad_norm": 0.03600022941827774, "learning_rate": 2.61955851918812e-05, "loss": 46.0, "step": 4853 }, { "epoch": 0.6627073520376817, "grad_norm": 0.0968981608748436, "learning_rate": 2.617646570568617e-05, "loss": 46.0019, "step": 4854 }, { "epoch": 0.6628438801283364, "grad_norm": 0.04908251389861107, "learning_rate": 2.6157350725068586e-05, "loss": 46.0026, "step": 4855 }, { "epoch": 0.6629804082189911, "grad_norm": 0.1402931809425354, "learning_rate": 2.6138240253643463e-05, "loss": 46.0017, "step": 4856 }, { "epoch": 0.6631169363096457, "grad_norm": 0.2280590683221817, "learning_rate": 2.6119134295025026e-05, "loss": 46.0029, "step": 4857 }, { "epoch": 0.6632534644003003, "grad_norm": 0.06455282121896744, "learning_rate": 2.6100032852826705e-05, "loss": 46.0051, "step": 4858 }, { "epoch": 0.663389992490955, "grad_norm": 0.06709683686494827, "learning_rate": 2.6080935930661003e-05, "loss": 46.0035, "step": 4859 }, { "epoch": 0.6635265205816097, "grad_norm": 0.07872819155454636, "learning_rate": 2.606184353213956e-05, "loss": 46.0055, "step": 4860 }, { "epoch": 0.6636630486722643, "grad_norm": 0.07133118808269501, "learning_rate": 2.6042755660873186e-05, "loss": 46.0005, "step": 4861 }, { "epoch": 0.663799576762919, "grad_norm": 0.05082842707633972, "learning_rate": 2.602367232047187e-05, "loss": 46.0114, "step": 4862 }, { "epoch": 0.6639361048535736, "grad_norm": 0.08184070140123367, "learning_rate": 2.6004593514544705e-05, "loss": 46.0055, "step": 4863 }, { "epoch": 0.6640726329442282, "grad_norm": 0.12773053348064423, "learning_rate": 2.598551924669989e-05, "loss": 46.0112, "step": 4864 }, { "epoch": 0.6642091610348829, "grad_norm": 0.18643413484096527, "learning_rate": 2.596644952054485e-05, "loss": 46.0034, "step": 4865 }, { "epoch": 0.6643456891255376, "grad_norm": 0.09498260170221329, "learning_rate": 2.59473843396861e-05, "loss": 46.0089, "step": 4866 }, { "epoch": 0.6644822172161923, "grad_norm": 0.08536917716264725, "learning_rate": 2.5928323707729306e-05, "loss": 46.0012, "step": 4867 }, { "epoch": 0.6646187453068468, "grad_norm": 0.08905645459890366, "learning_rate": 2.5909267628279234e-05, "loss": 46.0064, "step": 4868 }, { "epoch": 0.6647552733975015, "grad_norm": 0.06258310377597809, "learning_rate": 2.589021610493987e-05, "loss": 46.0044, "step": 4869 }, { "epoch": 0.6648918014881562, "grad_norm": 0.1878410428762436, "learning_rate": 2.5871169141314277e-05, "loss": 46.006, "step": 4870 }, { "epoch": 0.6650283295788109, "grad_norm": 0.17325296998023987, "learning_rate": 2.5852126741004677e-05, "loss": 46.0092, "step": 4871 }, { "epoch": 0.6651648576694655, "grad_norm": 0.054065071046352386, "learning_rate": 2.583308890761243e-05, "loss": 46.0034, "step": 4872 }, { "epoch": 0.6653013857601201, "grad_norm": 0.04022978991270065, "learning_rate": 2.581405564473801e-05, "loss": 46.0049, "step": 4873 }, { "epoch": 0.6654379138507748, "grad_norm": 0.0631607323884964, "learning_rate": 2.5795026955981067e-05, "loss": 46.0027, "step": 4874 }, { "epoch": 0.6655744419414295, "grad_norm": 0.09858386963605881, "learning_rate": 2.577600284494035e-05, "loss": 46.0039, "step": 4875 }, { "epoch": 0.6657109700320841, "grad_norm": 0.1187295988202095, "learning_rate": 2.5756983315213744e-05, "loss": 46.006, "step": 4876 }, { "epoch": 0.6658474981227388, "grad_norm": 0.15623953938484192, "learning_rate": 2.57379683703983e-05, "loss": 46.001, "step": 4877 }, { "epoch": 0.6659840262133934, "grad_norm": 0.07633761316537857, "learning_rate": 2.5718958014090157e-05, "loss": 46.004, "step": 4878 }, { "epoch": 0.666120554304048, "grad_norm": 0.15115055441856384, "learning_rate": 2.5699952249884664e-05, "loss": 46.0011, "step": 4879 }, { "epoch": 0.6662570823947027, "grad_norm": 0.03515613079071045, "learning_rate": 2.5680951081376183e-05, "loss": 46.0026, "step": 4880 }, { "epoch": 0.6663936104853574, "grad_norm": 0.06567024439573288, "learning_rate": 2.56619545121583e-05, "loss": 46.0052, "step": 4881 }, { "epoch": 0.6665301385760121, "grad_norm": 0.05932092294096947, "learning_rate": 2.564296254582368e-05, "loss": 46.0029, "step": 4882 }, { "epoch": 0.6666666666666666, "grad_norm": 0.06943706423044205, "learning_rate": 2.562397518596419e-05, "loss": 46.0031, "step": 4883 }, { "epoch": 0.6668031947573213, "grad_norm": 0.0383334644138813, "learning_rate": 2.5604992436170737e-05, "loss": 46.0049, "step": 4884 }, { "epoch": 0.666939722847976, "grad_norm": 0.27586331963539124, "learning_rate": 2.5586014300033378e-05, "loss": 46.0075, "step": 4885 }, { "epoch": 0.6670762509386307, "grad_norm": 0.08055560290813446, "learning_rate": 2.556704078114136e-05, "loss": 46.0062, "step": 4886 }, { "epoch": 0.6672127790292853, "grad_norm": 0.14201059937477112, "learning_rate": 2.5548071883083002e-05, "loss": 46.0056, "step": 4887 }, { "epoch": 0.6673493071199399, "grad_norm": 0.08869993686676025, "learning_rate": 2.5529107609445733e-05, "loss": 46.0067, "step": 4888 }, { "epoch": 0.6674858352105946, "grad_norm": 0.10569900274276733, "learning_rate": 2.5510147963816134e-05, "loss": 46.0037, "step": 4889 }, { "epoch": 0.6676223633012492, "grad_norm": 0.029444841668009758, "learning_rate": 2.5491192949779937e-05, "loss": 46.0058, "step": 4890 }, { "epoch": 0.6677588913919039, "grad_norm": 0.040562715381383896, "learning_rate": 2.5472242570921966e-05, "loss": 46.0057, "step": 4891 }, { "epoch": 0.6678954194825585, "grad_norm": 0.11156721413135529, "learning_rate": 2.5453296830826135e-05, "loss": 46.0053, "step": 4892 }, { "epoch": 0.6680319475732132, "grad_norm": 0.1777917891740799, "learning_rate": 2.5434355733075556e-05, "loss": 46.0019, "step": 4893 }, { "epoch": 0.6681684756638678, "grad_norm": 0.08963904529809952, "learning_rate": 2.5415419281252417e-05, "loss": 46.0005, "step": 4894 }, { "epoch": 0.6683050037545225, "grad_norm": 0.050397321581840515, "learning_rate": 2.5396487478938054e-05, "loss": 46.0074, "step": 4895 }, { "epoch": 0.6684415318451772, "grad_norm": 0.0708584114909172, "learning_rate": 2.5377560329712846e-05, "loss": 46.0018, "step": 4896 }, { "epoch": 0.6685780599358317, "grad_norm": 0.1719769835472107, "learning_rate": 2.5358637837156407e-05, "loss": 46.0023, "step": 4897 }, { "epoch": 0.6687145880264864, "grad_norm": 0.25412046909332275, "learning_rate": 2.53397200048474e-05, "loss": 46.0023, "step": 4898 }, { "epoch": 0.6688511161171411, "grad_norm": 0.23478588461875916, "learning_rate": 2.5320806836363625e-05, "loss": 46.0029, "step": 4899 }, { "epoch": 0.6689876442077958, "grad_norm": 0.15307077765464783, "learning_rate": 2.5301898335281994e-05, "loss": 46.0, "step": 4900 }, { "epoch": 0.6691241722984504, "grad_norm": 0.10651998221874237, "learning_rate": 2.5282994505178526e-05, "loss": 46.0117, "step": 4901 }, { "epoch": 0.669260700389105, "grad_norm": 0.09331129491329193, "learning_rate": 2.526409534962838e-05, "loss": 46.0038, "step": 4902 }, { "epoch": 0.6693972284797597, "grad_norm": 0.07424173504114151, "learning_rate": 2.524520087220583e-05, "loss": 46.0034, "step": 4903 }, { "epoch": 0.6695337565704144, "grad_norm": 0.1277969926595688, "learning_rate": 2.522631107648424e-05, "loss": 46.0003, "step": 4904 }, { "epoch": 0.669670284661069, "grad_norm": 0.07572255283594131, "learning_rate": 2.5207425966036106e-05, "loss": 46.0006, "step": 4905 }, { "epoch": 0.6698068127517237, "grad_norm": 0.07823578268289566, "learning_rate": 2.5188545544433047e-05, "loss": 46.0027, "step": 4906 }, { "epoch": 0.6699433408423783, "grad_norm": 0.08578945696353912, "learning_rate": 2.5169669815245772e-05, "loss": 46.0045, "step": 4907 }, { "epoch": 0.670079868933033, "grad_norm": 0.059107955545186996, "learning_rate": 2.5150798782044122e-05, "loss": 46.0022, "step": 4908 }, { "epoch": 0.6702163970236876, "grad_norm": 0.0861542671918869, "learning_rate": 2.513193244839704e-05, "loss": 46.0038, "step": 4909 }, { "epoch": 0.6703529251143423, "grad_norm": 0.0426943302154541, "learning_rate": 2.5113070817872588e-05, "loss": 46.0026, "step": 4910 }, { "epoch": 0.670489453204997, "grad_norm": 0.048978086560964584, "learning_rate": 2.5094213894037923e-05, "loss": 46.0054, "step": 4911 }, { "epoch": 0.6706259812956515, "grad_norm": 0.031831320375204086, "learning_rate": 2.507536168045933e-05, "loss": 46.0, "step": 4912 }, { "epoch": 0.6707625093863062, "grad_norm": 0.03294705972075462, "learning_rate": 2.505651418070219e-05, "loss": 46.0087, "step": 4913 }, { "epoch": 0.6708990374769609, "grad_norm": 0.09761302173137665, "learning_rate": 2.503767139833101e-05, "loss": 46.0033, "step": 4914 }, { "epoch": 0.6710355655676156, "grad_norm": 0.060125261545181274, "learning_rate": 2.5018833336909376e-05, "loss": 46.0054, "step": 4915 }, { "epoch": 0.6711720936582702, "grad_norm": 0.0598856583237648, "learning_rate": 2.500000000000001e-05, "loss": 46.0028, "step": 4916 }, { "epoch": 0.6713086217489248, "grad_norm": 0.08304653316736221, "learning_rate": 2.498117139116471e-05, "loss": 46.0082, "step": 4917 }, { "epoch": 0.6714451498395795, "grad_norm": 0.08228328824043274, "learning_rate": 2.4962347513964446e-05, "loss": 46.0065, "step": 4918 }, { "epoch": 0.6715816779302342, "grad_norm": 0.05354752764105797, "learning_rate": 2.4943528371959197e-05, "loss": 46.0021, "step": 4919 }, { "epoch": 0.6717182060208888, "grad_norm": 0.15029428899288177, "learning_rate": 2.4924713968708106e-05, "loss": 46.0044, "step": 4920 }, { "epoch": 0.6718547341115434, "grad_norm": 0.05599668622016907, "learning_rate": 2.49059043077694e-05, "loss": 46.0031, "step": 4921 }, { "epoch": 0.6719912622021981, "grad_norm": 0.076009601354599, "learning_rate": 2.4887099392700468e-05, "loss": 46.0048, "step": 4922 }, { "epoch": 0.6721277902928527, "grad_norm": 0.03382216766476631, "learning_rate": 2.4868299227057696e-05, "loss": 46.0054, "step": 4923 }, { "epoch": 0.6722643183835074, "grad_norm": 0.05003516003489494, "learning_rate": 2.4849503814396624e-05, "loss": 46.0073, "step": 4924 }, { "epoch": 0.6724008464741621, "grad_norm": 0.0725400522351265, "learning_rate": 2.483071315827194e-05, "loss": 46.0131, "step": 4925 }, { "epoch": 0.6725373745648167, "grad_norm": 0.09088070690631866, "learning_rate": 2.4811927262237368e-05, "loss": 46.0047, "step": 4926 }, { "epoch": 0.6726739026554713, "grad_norm": 0.17356882989406586, "learning_rate": 2.4793146129845764e-05, "loss": 46.0111, "step": 4927 }, { "epoch": 0.672810430746126, "grad_norm": 0.0700259879231453, "learning_rate": 2.4774369764649025e-05, "loss": 46.0008, "step": 4928 }, { "epoch": 0.6729469588367807, "grad_norm": 0.09606499969959259, "learning_rate": 2.4755598170198242e-05, "loss": 46.0088, "step": 4929 }, { "epoch": 0.6730834869274354, "grad_norm": 0.15034924447536469, "learning_rate": 2.4736831350043536e-05, "loss": 46.005, "step": 4930 }, { "epoch": 0.6732200150180899, "grad_norm": 0.07535277307033539, "learning_rate": 2.471806930773415e-05, "loss": 46.0057, "step": 4931 }, { "epoch": 0.6733565431087446, "grad_norm": 0.0899248719215393, "learning_rate": 2.4699312046818413e-05, "loss": 46.0063, "step": 4932 }, { "epoch": 0.6734930711993993, "grad_norm": 0.10441923141479492, "learning_rate": 2.4680559570843747e-05, "loss": 46.0083, "step": 4933 }, { "epoch": 0.673629599290054, "grad_norm": 0.1277170032262802, "learning_rate": 2.466181188335669e-05, "loss": 46.0029, "step": 4934 }, { "epoch": 0.6737661273807086, "grad_norm": 0.07716192305088043, "learning_rate": 2.464306898790285e-05, "loss": 46.0039, "step": 4935 }, { "epoch": 0.6739026554713632, "grad_norm": 0.07210058718919754, "learning_rate": 2.4624330888026946e-05, "loss": 46.0056, "step": 4936 }, { "epoch": 0.6740391835620179, "grad_norm": 0.06510110199451447, "learning_rate": 2.4605597587272776e-05, "loss": 46.0057, "step": 4937 }, { "epoch": 0.6741757116526725, "grad_norm": 0.1745390146970749, "learning_rate": 2.458686908918324e-05, "loss": 46.0032, "step": 4938 }, { "epoch": 0.6743122397433272, "grad_norm": 0.05823897570371628, "learning_rate": 2.4568145397300325e-05, "loss": 46.0026, "step": 4939 }, { "epoch": 0.6744487678339819, "grad_norm": 0.06559072434902191, "learning_rate": 2.4549426515165114e-05, "loss": 46.011, "step": 4940 }, { "epoch": 0.6745852959246365, "grad_norm": 0.05454976484179497, "learning_rate": 2.453071244631778e-05, "loss": 46.0073, "step": 4941 }, { "epoch": 0.6747218240152911, "grad_norm": 0.10653302073478699, "learning_rate": 2.4512003194297578e-05, "loss": 46.0027, "step": 4942 }, { "epoch": 0.6748583521059458, "grad_norm": 0.21637769043445587, "learning_rate": 2.4493298762642857e-05, "loss": 46.0057, "step": 4943 }, { "epoch": 0.6749948801966005, "grad_norm": 0.15382452309131622, "learning_rate": 2.447459915489106e-05, "loss": 46.001, "step": 4944 }, { "epoch": 0.6751314082872552, "grad_norm": 0.20166277885437012, "learning_rate": 2.4455904374578686e-05, "loss": 46.0103, "step": 4945 }, { "epoch": 0.6752679363779097, "grad_norm": 0.06403713673353195, "learning_rate": 2.443721442524141e-05, "loss": 46.0018, "step": 4946 }, { "epoch": 0.6754044644685644, "grad_norm": 0.18377482891082764, "learning_rate": 2.4418529310413875e-05, "loss": 46.0039, "step": 4947 }, { "epoch": 0.6755409925592191, "grad_norm": 0.1610170304775238, "learning_rate": 2.4399849033629878e-05, "loss": 46.0103, "step": 4948 }, { "epoch": 0.6756775206498737, "grad_norm": 0.15722724795341492, "learning_rate": 2.4381173598422274e-05, "loss": 46.0029, "step": 4949 }, { "epoch": 0.6758140487405284, "grad_norm": 0.08481474965810776, "learning_rate": 2.436250300832307e-05, "loss": 46.0031, "step": 4950 }, { "epoch": 0.675950576831183, "grad_norm": 0.08426827937364578, "learning_rate": 2.4343837266863246e-05, "loss": 46.0052, "step": 4951 }, { "epoch": 0.6760871049218377, "grad_norm": 0.031087931245565414, "learning_rate": 2.432517637757293e-05, "loss": 46.0016, "step": 4952 }, { "epoch": 0.6762236330124923, "grad_norm": 0.07667192071676254, "learning_rate": 2.4306520343981354e-05, "loss": 46.0038, "step": 4953 }, { "epoch": 0.676360161103147, "grad_norm": 0.06999168545007706, "learning_rate": 2.4287869169616806e-05, "loss": 46.0028, "step": 4954 }, { "epoch": 0.6764966891938016, "grad_norm": 0.032955896109342575, "learning_rate": 2.4269222858006613e-05, "loss": 46.003, "step": 4955 }, { "epoch": 0.6766332172844562, "grad_norm": 0.19107869267463684, "learning_rate": 2.425058141267722e-05, "loss": 46.0027, "step": 4956 }, { "epoch": 0.6767697453751109, "grad_norm": 0.10031161457300186, "learning_rate": 2.4231944837154193e-05, "loss": 46.0023, "step": 4957 }, { "epoch": 0.6769062734657656, "grad_norm": 0.1628161519765854, "learning_rate": 2.4213313134962133e-05, "loss": 46.0054, "step": 4958 }, { "epoch": 0.6770428015564203, "grad_norm": 0.12189193069934845, "learning_rate": 2.4194686309624663e-05, "loss": 46.0053, "step": 4959 }, { "epoch": 0.6771793296470748, "grad_norm": 0.08201862126588821, "learning_rate": 2.4176064364664613e-05, "loss": 46.0059, "step": 4960 }, { "epoch": 0.6773158577377295, "grad_norm": 0.24425961077213287, "learning_rate": 2.4157447303603787e-05, "loss": 46.0054, "step": 4961 }, { "epoch": 0.6774523858283842, "grad_norm": 0.05425616353750229, "learning_rate": 2.4138835129963127e-05, "loss": 46.0051, "step": 4962 }, { "epoch": 0.6775889139190389, "grad_norm": 0.051187459379434586, "learning_rate": 2.412022784726256e-05, "loss": 46.0081, "step": 4963 }, { "epoch": 0.6777254420096935, "grad_norm": 0.04869277402758598, "learning_rate": 2.410162545902121e-05, "loss": 46.007, "step": 4964 }, { "epoch": 0.6778619701003481, "grad_norm": 0.11022113263607025, "learning_rate": 2.40830279687572e-05, "loss": 46.0098, "step": 4965 }, { "epoch": 0.6779984981910028, "grad_norm": 0.11804317682981491, "learning_rate": 2.4064435379987737e-05, "loss": 46.0013, "step": 4966 }, { "epoch": 0.6781350262816574, "grad_norm": 0.09153899550437927, "learning_rate": 2.4045847696229107e-05, "loss": 46.0045, "step": 4967 }, { "epoch": 0.6782715543723121, "grad_norm": 0.061087366193532944, "learning_rate": 2.4027264920996668e-05, "loss": 46.0051, "step": 4968 }, { "epoch": 0.6784080824629668, "grad_norm": 0.12203724682331085, "learning_rate": 2.4008687057804853e-05, "loss": 46.0037, "step": 4969 }, { "epoch": 0.6785446105536214, "grad_norm": 0.10453901439905167, "learning_rate": 2.3990114110167163e-05, "loss": 46.0054, "step": 4970 }, { "epoch": 0.678681138644276, "grad_norm": 0.10884836316108704, "learning_rate": 2.397154608159616e-05, "loss": 46.0061, "step": 4971 }, { "epoch": 0.6788176667349307, "grad_norm": 0.1000281423330307, "learning_rate": 2.3952982975603496e-05, "loss": 46.0041, "step": 4972 }, { "epoch": 0.6789541948255854, "grad_norm": 0.12657088041305542, "learning_rate": 2.3934424795699865e-05, "loss": 46.0119, "step": 4973 }, { "epoch": 0.6790907229162401, "grad_norm": 0.03392605483531952, "learning_rate": 2.391587154539507e-05, "loss": 46.0111, "step": 4974 }, { "epoch": 0.6792272510068946, "grad_norm": 0.09126529842615128, "learning_rate": 2.3897323228197932e-05, "loss": 46.0134, "step": 4975 }, { "epoch": 0.6793637790975493, "grad_norm": 0.10315334796905518, "learning_rate": 2.387877984761638e-05, "loss": 46.0039, "step": 4976 }, { "epoch": 0.679500307188204, "grad_norm": 0.06547874212265015, "learning_rate": 2.386024140715736e-05, "loss": 46.0043, "step": 4977 }, { "epoch": 0.6796368352788587, "grad_norm": 0.06127988174557686, "learning_rate": 2.3841707910326988e-05, "loss": 46.0057, "step": 4978 }, { "epoch": 0.6797733633695133, "grad_norm": 0.06712260842323303, "learning_rate": 2.382317936063031e-05, "loss": 46.0061, "step": 4979 }, { "epoch": 0.6799098914601679, "grad_norm": 0.08855251967906952, "learning_rate": 2.3804655761571514e-05, "loss": 46.0024, "step": 4980 }, { "epoch": 0.6800464195508226, "grad_norm": 0.060649313032627106, "learning_rate": 2.378613711665383e-05, "loss": 46.0074, "step": 4981 }, { "epoch": 0.6801829476414772, "grad_norm": 0.06834857910871506, "learning_rate": 2.376762342937961e-05, "loss": 46.0043, "step": 4982 }, { "epoch": 0.6803194757321319, "grad_norm": 0.039080217480659485, "learning_rate": 2.374911470325016e-05, "loss": 46.0003, "step": 4983 }, { "epoch": 0.6804560038227866, "grad_norm": 0.08346065878868103, "learning_rate": 2.373061094176591e-05, "loss": 46.002, "step": 4984 }, { "epoch": 0.6805925319134412, "grad_norm": 0.05340850353240967, "learning_rate": 2.3712112148426375e-05, "loss": 46.0006, "step": 4985 }, { "epoch": 0.6807290600040958, "grad_norm": 0.05342715233564377, "learning_rate": 2.3693618326730104e-05, "loss": 46.0056, "step": 4986 }, { "epoch": 0.6808655880947505, "grad_norm": 0.15549542009830475, "learning_rate": 2.3675129480174673e-05, "loss": 46.0129, "step": 4987 }, { "epoch": 0.6810021161854052, "grad_norm": 0.09258656203746796, "learning_rate": 2.3656645612256746e-05, "loss": 46.0037, "step": 4988 }, { "epoch": 0.6811386442760597, "grad_norm": 0.10214484483003616, "learning_rate": 2.363816672647208e-05, "loss": 46.0045, "step": 4989 }, { "epoch": 0.6812751723667144, "grad_norm": 0.05938047170639038, "learning_rate": 2.3619692826315455e-05, "loss": 46.0058, "step": 4990 }, { "epoch": 0.6814117004573691, "grad_norm": 0.03447167947888374, "learning_rate": 2.360122391528066e-05, "loss": 46.0017, "step": 4991 }, { "epoch": 0.6815482285480238, "grad_norm": 0.05443109571933746, "learning_rate": 2.3582759996860636e-05, "loss": 46.0059, "step": 4992 }, { "epoch": 0.6816847566386784, "grad_norm": 0.13812755048274994, "learning_rate": 2.356430107454733e-05, "loss": 46.0049, "step": 4993 }, { "epoch": 0.681821284729333, "grad_norm": 0.13524273037910461, "learning_rate": 2.3545847151831756e-05, "loss": 46.0062, "step": 4994 }, { "epoch": 0.6819578128199877, "grad_norm": 0.09652671217918396, "learning_rate": 2.3527398232203924e-05, "loss": 46.0066, "step": 4995 }, { "epoch": 0.6820943409106424, "grad_norm": 0.10961989313364029, "learning_rate": 2.3508954319152998e-05, "loss": 46.0036, "step": 4996 }, { "epoch": 0.682230869001297, "grad_norm": 0.2282295674085617, "learning_rate": 2.349051541616713e-05, "loss": 46.0088, "step": 4997 }, { "epoch": 0.6823673970919517, "grad_norm": 0.2604938745498657, "learning_rate": 2.347208152673354e-05, "loss": 46.0023, "step": 4998 }, { "epoch": 0.6825039251826063, "grad_norm": 0.29736506938934326, "learning_rate": 2.3453652654338502e-05, "loss": 46.0045, "step": 4999 }, { "epoch": 0.682640453273261, "grad_norm": 0.12608785927295685, "learning_rate": 2.343522880246734e-05, "loss": 46.0021, "step": 5000 }, { "epoch": 0.6827769813639156, "grad_norm": 0.11937367916107178, "learning_rate": 2.3416809974604424e-05, "loss": 46.0112, "step": 5001 }, { "epoch": 0.6829135094545703, "grad_norm": 0.05002835392951965, "learning_rate": 2.3398396174233178e-05, "loss": 46.0008, "step": 5002 }, { "epoch": 0.683050037545225, "grad_norm": 0.09755067527294159, "learning_rate": 2.3379987404836074e-05, "loss": 46.0052, "step": 5003 }, { "epoch": 0.6831865656358795, "grad_norm": 0.054576605558395386, "learning_rate": 2.336158366989463e-05, "loss": 46.0008, "step": 5004 }, { "epoch": 0.6833230937265342, "grad_norm": 0.09905961900949478, "learning_rate": 2.334318497288942e-05, "loss": 46.0051, "step": 5005 }, { "epoch": 0.6834596218171889, "grad_norm": 0.06639552116394043, "learning_rate": 2.3324791317300066e-05, "loss": 46.0025, "step": 5006 }, { "epoch": 0.6835961499078436, "grad_norm": 0.10017801076173782, "learning_rate": 2.330640270660521e-05, "loss": 46.002, "step": 5007 }, { "epoch": 0.6837326779984982, "grad_norm": 0.07683484256267548, "learning_rate": 2.3288019144282586e-05, "loss": 46.0013, "step": 5008 }, { "epoch": 0.6838692060891528, "grad_norm": 0.04013121500611305, "learning_rate": 2.326964063380893e-05, "loss": 46.0016, "step": 5009 }, { "epoch": 0.6840057341798075, "grad_norm": 0.0418909527361393, "learning_rate": 2.3251267178660047e-05, "loss": 46.0014, "step": 5010 }, { "epoch": 0.6841422622704622, "grad_norm": 0.08200832456350327, "learning_rate": 2.323289878231078e-05, "loss": 46.0039, "step": 5011 }, { "epoch": 0.6842787903611168, "grad_norm": 0.17319494485855103, "learning_rate": 2.321453544823499e-05, "loss": 46.0039, "step": 5012 }, { "epoch": 0.6844153184517715, "grad_norm": 0.05526414141058922, "learning_rate": 2.319617717990567e-05, "loss": 46.0005, "step": 5013 }, { "epoch": 0.6845518465424261, "grad_norm": 0.09422070533037186, "learning_rate": 2.3177823980794727e-05, "loss": 46.0011, "step": 5014 }, { "epoch": 0.6846883746330807, "grad_norm": 0.14105235040187836, "learning_rate": 2.315947585437319e-05, "loss": 46.0051, "step": 5015 }, { "epoch": 0.6848249027237354, "grad_norm": 0.04092694818973541, "learning_rate": 2.31411328041111e-05, "loss": 46.0073, "step": 5016 }, { "epoch": 0.6849614308143901, "grad_norm": 0.11112425476312637, "learning_rate": 2.31227948334776e-05, "loss": 46.0068, "step": 5017 }, { "epoch": 0.6850979589050448, "grad_norm": 0.07866758853197098, "learning_rate": 2.3104461945940763e-05, "loss": 46.0034, "step": 5018 }, { "epoch": 0.6852344869956993, "grad_norm": 0.08372779190540314, "learning_rate": 2.308613414496777e-05, "loss": 46.013, "step": 5019 }, { "epoch": 0.685371015086354, "grad_norm": 0.04412125051021576, "learning_rate": 2.306781143402485e-05, "loss": 46.0063, "step": 5020 }, { "epoch": 0.6855075431770087, "grad_norm": 0.16308295726776123, "learning_rate": 2.3049493816577245e-05, "loss": 46.0049, "step": 5021 }, { "epoch": 0.6856440712676634, "grad_norm": 0.10040201246738434, "learning_rate": 2.3031181296089242e-05, "loss": 46.0071, "step": 5022 }, { "epoch": 0.6857805993583179, "grad_norm": 0.19359032809734344, "learning_rate": 2.3012873876024114e-05, "loss": 46.0044, "step": 5023 }, { "epoch": 0.6859171274489726, "grad_norm": 0.06987074017524719, "learning_rate": 2.299457155984427e-05, "loss": 46.0036, "step": 5024 }, { "epoch": 0.6860536555396273, "grad_norm": 0.15236900746822357, "learning_rate": 2.2976274351011074e-05, "loss": 46.0027, "step": 5025 }, { "epoch": 0.686190183630282, "grad_norm": 0.2026595026254654, "learning_rate": 2.295798225298495e-05, "loss": 46.0094, "step": 5026 }, { "epoch": 0.6863267117209366, "grad_norm": 0.07638730853796005, "learning_rate": 2.2939695269225353e-05, "loss": 46.01, "step": 5027 }, { "epoch": 0.6864632398115912, "grad_norm": 0.08137550950050354, "learning_rate": 2.2921413403190772e-05, "loss": 46.0039, "step": 5028 }, { "epoch": 0.6865997679022459, "grad_norm": 0.06297550350427628, "learning_rate": 2.2903136658338736e-05, "loss": 46.0089, "step": 5029 }, { "epoch": 0.6867362959929005, "grad_norm": 0.0675877034664154, "learning_rate": 2.288486503812578e-05, "loss": 46.0049, "step": 5030 }, { "epoch": 0.6868728240835552, "grad_norm": 0.058461010456085205, "learning_rate": 2.2866598546007507e-05, "loss": 46.0016, "step": 5031 }, { "epoch": 0.6870093521742099, "grad_norm": 0.07248587906360626, "learning_rate": 2.284833718543851e-05, "loss": 46.0031, "step": 5032 }, { "epoch": 0.6871458802648645, "grad_norm": 0.08430295437574387, "learning_rate": 2.2830080959872446e-05, "loss": 46.0023, "step": 5033 }, { "epoch": 0.6872824083555191, "grad_norm": 0.10253546386957169, "learning_rate": 2.2811829872761986e-05, "loss": 46.0018, "step": 5034 }, { "epoch": 0.6874189364461738, "grad_norm": 0.07824158668518066, "learning_rate": 2.279358392755882e-05, "loss": 46.0106, "step": 5035 }, { "epoch": 0.6875554645368285, "grad_norm": 0.12725666165351868, "learning_rate": 2.2775343127713683e-05, "loss": 46.0104, "step": 5036 }, { "epoch": 0.6876919926274832, "grad_norm": 0.04404649883508682, "learning_rate": 2.2757107476676324e-05, "loss": 46.0002, "step": 5037 }, { "epoch": 0.6878285207181377, "grad_norm": 0.09856435656547546, "learning_rate": 2.273887697789553e-05, "loss": 46.0107, "step": 5038 }, { "epoch": 0.6879650488087924, "grad_norm": 0.1477057784795761, "learning_rate": 2.2720651634819102e-05, "loss": 46.0092, "step": 5039 }, { "epoch": 0.6881015768994471, "grad_norm": 0.08273810893297195, "learning_rate": 2.2702431450893876e-05, "loss": 46.0018, "step": 5040 }, { "epoch": 0.6882381049901017, "grad_norm": 0.0530787818133831, "learning_rate": 2.2684216429565698e-05, "loss": 46.0054, "step": 5041 }, { "epoch": 0.6883746330807564, "grad_norm": 0.0351531095802784, "learning_rate": 2.2666006574279454e-05, "loss": 46.0042, "step": 5042 }, { "epoch": 0.688511161171411, "grad_norm": 0.09655667096376419, "learning_rate": 2.264780188847904e-05, "loss": 46.0046, "step": 5043 }, { "epoch": 0.6886476892620657, "grad_norm": 0.045648179948329926, "learning_rate": 2.2629602375607372e-05, "loss": 46.0115, "step": 5044 }, { "epoch": 0.6887842173527203, "grad_norm": 0.07255356758832932, "learning_rate": 2.261140803910644e-05, "loss": 46.006, "step": 5045 }, { "epoch": 0.688920745443375, "grad_norm": 0.11541979759931564, "learning_rate": 2.2593218882417168e-05, "loss": 46.0014, "step": 5046 }, { "epoch": 0.6890572735340297, "grad_norm": 0.14597953855991364, "learning_rate": 2.2575034908979546e-05, "loss": 46.0031, "step": 5047 }, { "epoch": 0.6891938016246842, "grad_norm": 0.08211957663297653, "learning_rate": 2.2556856122232574e-05, "loss": 46.0038, "step": 5048 }, { "epoch": 0.6893303297153389, "grad_norm": 0.09999687969684601, "learning_rate": 2.253868252561433e-05, "loss": 46.0148, "step": 5049 }, { "epoch": 0.6894668578059936, "grad_norm": 0.12699414789676666, "learning_rate": 2.25205141225618e-05, "loss": 46.0059, "step": 5050 }, { "epoch": 0.6896033858966483, "grad_norm": 0.14138081669807434, "learning_rate": 2.250235091651105e-05, "loss": 46.0058, "step": 5051 }, { "epoch": 0.6897399139873028, "grad_norm": 0.0585128478705883, "learning_rate": 2.24841929108972e-05, "loss": 46.0033, "step": 5052 }, { "epoch": 0.6898764420779575, "grad_norm": 0.06521966308355331, "learning_rate": 2.2466040109154335e-05, "loss": 46.0005, "step": 5053 }, { "epoch": 0.6900129701686122, "grad_norm": 0.21377728879451752, "learning_rate": 2.244789251471554e-05, "loss": 46.0006, "step": 5054 }, { "epoch": 0.6901494982592669, "grad_norm": 0.045059237629175186, "learning_rate": 2.2429750131012934e-05, "loss": 46.0, "step": 5055 }, { "epoch": 0.6902860263499215, "grad_norm": 0.09248613566160202, "learning_rate": 2.24116129614777e-05, "loss": 46.005, "step": 5056 }, { "epoch": 0.6904225544405761, "grad_norm": 0.12657278776168823, "learning_rate": 2.2393481009539996e-05, "loss": 46.0017, "step": 5057 }, { "epoch": 0.6905590825312308, "grad_norm": 0.11635568737983704, "learning_rate": 2.237535427862893e-05, "loss": 46.0007, "step": 5058 }, { "epoch": 0.6906956106218854, "grad_norm": 0.11867185682058334, "learning_rate": 2.235723277217274e-05, "loss": 46.0015, "step": 5059 }, { "epoch": 0.6908321387125401, "grad_norm": 0.07674683630466461, "learning_rate": 2.23391164935986e-05, "loss": 46.0056, "step": 5060 }, { "epoch": 0.6909686668031948, "grad_norm": 0.048816245049238205, "learning_rate": 2.232100544633273e-05, "loss": 46.0034, "step": 5061 }, { "epoch": 0.6911051948938494, "grad_norm": 0.052430421113967896, "learning_rate": 2.2302899633800294e-05, "loss": 46.0095, "step": 5062 }, { "epoch": 0.691241722984504, "grad_norm": 0.07924339920282364, "learning_rate": 2.2284799059425576e-05, "loss": 46.0052, "step": 5063 }, { "epoch": 0.6913782510751587, "grad_norm": 0.07021056115627289, "learning_rate": 2.2266703726631784e-05, "loss": 46.0042, "step": 5064 }, { "epoch": 0.6915147791658134, "grad_norm": 0.03493840992450714, "learning_rate": 2.2248613638841164e-05, "loss": 46.0044, "step": 5065 }, { "epoch": 0.6916513072564681, "grad_norm": 0.044022947549819946, "learning_rate": 2.2230528799474964e-05, "loss": 46.0147, "step": 5066 }, { "epoch": 0.6917878353471226, "grad_norm": 0.07177254557609558, "learning_rate": 2.2212449211953446e-05, "loss": 46.0001, "step": 5067 }, { "epoch": 0.6919243634377773, "grad_norm": 0.04214945435523987, "learning_rate": 2.2194374879695877e-05, "loss": 46.0047, "step": 5068 }, { "epoch": 0.692060891528432, "grad_norm": 0.23512811958789825, "learning_rate": 2.2176305806120524e-05, "loss": 46.002, "step": 5069 }, { "epoch": 0.6921974196190867, "grad_norm": 0.2048012763261795, "learning_rate": 2.2158241994644664e-05, "loss": 46.002, "step": 5070 }, { "epoch": 0.6923339477097413, "grad_norm": 0.09096959233283997, "learning_rate": 2.2140183448684583e-05, "loss": 46.002, "step": 5071 }, { "epoch": 0.6924704758003959, "grad_norm": 0.11827035993337631, "learning_rate": 2.212213017165554e-05, "loss": 46.0028, "step": 5072 }, { "epoch": 0.6926070038910506, "grad_norm": 0.13538721203804016, "learning_rate": 2.210408216697189e-05, "loss": 46.0013, "step": 5073 }, { "epoch": 0.6927435319817052, "grad_norm": 0.18293237686157227, "learning_rate": 2.208603943804686e-05, "loss": 46.0046, "step": 5074 }, { "epoch": 0.6928800600723599, "grad_norm": 0.11757819354534149, "learning_rate": 2.2068001988292773e-05, "loss": 46.0098, "step": 5075 }, { "epoch": 0.6930165881630146, "grad_norm": 0.039146408438682556, "learning_rate": 2.2049969821120897e-05, "loss": 46.0097, "step": 5076 }, { "epoch": 0.6931531162536692, "grad_norm": 0.13343454897403717, "learning_rate": 2.2031942939941592e-05, "loss": 46.0044, "step": 5077 }, { "epoch": 0.6932896443443238, "grad_norm": 0.14763742685317993, "learning_rate": 2.201392134816409e-05, "loss": 46.0019, "step": 5078 }, { "epoch": 0.6934261724349785, "grad_norm": 0.10968134552240372, "learning_rate": 2.19959050491967e-05, "loss": 46.0114, "step": 5079 }, { "epoch": 0.6935627005256332, "grad_norm": 0.05662928521633148, "learning_rate": 2.1977894046446745e-05, "loss": 46.0087, "step": 5080 }, { "epoch": 0.6936992286162879, "grad_norm": 0.26862889528274536, "learning_rate": 2.1959888343320523e-05, "loss": 46.0045, "step": 5081 }, { "epoch": 0.6938357567069424, "grad_norm": 0.05120716243982315, "learning_rate": 2.1941887943223288e-05, "loss": 46.0009, "step": 5082 }, { "epoch": 0.6939722847975971, "grad_norm": 0.08388612419366837, "learning_rate": 2.1923892849559335e-05, "loss": 46.0019, "step": 5083 }, { "epoch": 0.6941088128882518, "grad_norm": 0.09234266728162766, "learning_rate": 2.1905903065731974e-05, "loss": 46.0019, "step": 5084 }, { "epoch": 0.6942453409789064, "grad_norm": 0.077538400888443, "learning_rate": 2.1887918595143496e-05, "loss": 46.0021, "step": 5085 }, { "epoch": 0.694381869069561, "grad_norm": 0.15929323434829712, "learning_rate": 2.186993944119512e-05, "loss": 46.0082, "step": 5086 }, { "epoch": 0.6945183971602157, "grad_norm": 0.043431077152490616, "learning_rate": 2.185196560728717e-05, "loss": 46.0073, "step": 5087 }, { "epoch": 0.6946549252508704, "grad_norm": 0.10348663479089737, "learning_rate": 2.1833997096818898e-05, "loss": 46.0054, "step": 5088 }, { "epoch": 0.694791453341525, "grad_norm": 0.2011130452156067, "learning_rate": 2.181603391318857e-05, "loss": 46.0049, "step": 5089 }, { "epoch": 0.6949279814321797, "grad_norm": 0.06866198033094406, "learning_rate": 2.1798076059793392e-05, "loss": 46.0028, "step": 5090 }, { "epoch": 0.6950645095228343, "grad_norm": 0.046408165246248245, "learning_rate": 2.1780123540029655e-05, "loss": 46.0041, "step": 5091 }, { "epoch": 0.695201037613489, "grad_norm": 0.2607335150241852, "learning_rate": 2.176217635729258e-05, "loss": 46.0126, "step": 5092 }, { "epoch": 0.6953375657041436, "grad_norm": 0.06043127924203873, "learning_rate": 2.1744234514976385e-05, "loss": 46.0011, "step": 5093 }, { "epoch": 0.6954740937947983, "grad_norm": 0.08197708427906036, "learning_rate": 2.172629801647429e-05, "loss": 46.0057, "step": 5094 }, { "epoch": 0.695610621885453, "grad_norm": 0.08721667528152466, "learning_rate": 2.170836686517849e-05, "loss": 46.0056, "step": 5095 }, { "epoch": 0.6957471499761075, "grad_norm": 0.10921205580234528, "learning_rate": 2.1690441064480187e-05, "loss": 46.0035, "step": 5096 }, { "epoch": 0.6958836780667622, "grad_norm": 0.08921442925930023, "learning_rate": 2.167252061776956e-05, "loss": 46.0086, "step": 5097 }, { "epoch": 0.6960202061574169, "grad_norm": 0.32457372546195984, "learning_rate": 2.1654605528435773e-05, "loss": 46.0081, "step": 5098 }, { "epoch": 0.6961567342480716, "grad_norm": 0.23795047402381897, "learning_rate": 2.1636695799866984e-05, "loss": 46.0018, "step": 5099 }, { "epoch": 0.6962932623387262, "grad_norm": 0.11100006848573685, "learning_rate": 2.161879143545033e-05, "loss": 46.0, "step": 5100 }, { "epoch": 0.6964297904293808, "grad_norm": 0.08989828079938889, "learning_rate": 2.1600892438571945e-05, "loss": 46.0055, "step": 5101 }, { "epoch": 0.6965663185200355, "grad_norm": 0.08597107231616974, "learning_rate": 2.1582998812616932e-05, "loss": 46.0063, "step": 5102 }, { "epoch": 0.6967028466106902, "grad_norm": 0.17220374941825867, "learning_rate": 2.1565110560969394e-05, "loss": 46.0019, "step": 5103 }, { "epoch": 0.6968393747013448, "grad_norm": 0.06592816114425659, "learning_rate": 2.15472276870124e-05, "loss": 46.0028, "step": 5104 }, { "epoch": 0.6969759027919995, "grad_norm": 0.09084564447402954, "learning_rate": 2.1529350194128023e-05, "loss": 46.0002, "step": 5105 }, { "epoch": 0.6971124308826541, "grad_norm": 0.17472386360168457, "learning_rate": 2.15114780856973e-05, "loss": 46.0034, "step": 5106 }, { "epoch": 0.6972489589733087, "grad_norm": 0.11805611848831177, "learning_rate": 2.1493611365100257e-05, "loss": 46.0007, "step": 5107 }, { "epoch": 0.6973854870639634, "grad_norm": 0.09158288687467575, "learning_rate": 2.1475750035715915e-05, "loss": 46.0027, "step": 5108 }, { "epoch": 0.6975220151546181, "grad_norm": 0.08734694868326187, "learning_rate": 2.1457894100922238e-05, "loss": 46.0004, "step": 5109 }, { "epoch": 0.6976585432452728, "grad_norm": 0.28821542859077454, "learning_rate": 2.1440043564096213e-05, "loss": 46.0046, "step": 5110 }, { "epoch": 0.6977950713359273, "grad_norm": 0.05882445350289345, "learning_rate": 2.142219842861376e-05, "loss": 46.0078, "step": 5111 }, { "epoch": 0.697931599426582, "grad_norm": 0.020530808717012405, "learning_rate": 2.140435869784986e-05, "loss": 46.0119, "step": 5112 }, { "epoch": 0.6980681275172367, "grad_norm": 0.0994158610701561, "learning_rate": 2.1386524375178358e-05, "loss": 46.0018, "step": 5113 }, { "epoch": 0.6982046556078914, "grad_norm": 0.12898339331150055, "learning_rate": 2.136869546397216e-05, "loss": 46.0086, "step": 5114 }, { "epoch": 0.698341183698546, "grad_norm": 0.09510669112205505, "learning_rate": 2.1350871967603093e-05, "loss": 46.0061, "step": 5115 }, { "epoch": 0.6984777117892006, "grad_norm": 0.05003180354833603, "learning_rate": 2.1333053889442032e-05, "loss": 46.0052, "step": 5116 }, { "epoch": 0.6986142398798553, "grad_norm": 0.20191466808319092, "learning_rate": 2.131524123285879e-05, "loss": 46.0004, "step": 5117 }, { "epoch": 0.69875076797051, "grad_norm": 0.12953762710094452, "learning_rate": 2.129743400122209e-05, "loss": 46.0034, "step": 5118 }, { "epoch": 0.6988872960611646, "grad_norm": 0.09067478775978088, "learning_rate": 2.127963219789974e-05, "loss": 46.0056, "step": 5119 }, { "epoch": 0.6990238241518192, "grad_norm": 0.05521351844072342, "learning_rate": 2.1261835826258454e-05, "loss": 46.0005, "step": 5120 }, { "epoch": 0.6991603522424739, "grad_norm": 0.07503247261047363, "learning_rate": 2.1244044889663955e-05, "loss": 46.0077, "step": 5121 }, { "epoch": 0.6992968803331285, "grad_norm": 0.10969360172748566, "learning_rate": 2.1226259391480856e-05, "loss": 46.0042, "step": 5122 }, { "epoch": 0.6994334084237832, "grad_norm": 0.057731982320547104, "learning_rate": 2.1208479335072866e-05, "loss": 46.0101, "step": 5123 }, { "epoch": 0.6995699365144379, "grad_norm": 0.05181717500090599, "learning_rate": 2.1190704723802584e-05, "loss": 46.0154, "step": 5124 }, { "epoch": 0.6997064646050924, "grad_norm": 0.18011587858200073, "learning_rate": 2.1172935561031586e-05, "loss": 46.0024, "step": 5125 }, { "epoch": 0.6998429926957471, "grad_norm": 0.11388000100851059, "learning_rate": 2.115517185012044e-05, "loss": 46.0132, "step": 5126 }, { "epoch": 0.6999795207864018, "grad_norm": 0.04288913309574127, "learning_rate": 2.1137413594428672e-05, "loss": 46.0078, "step": 5127 }, { "epoch": 0.7001160488770565, "grad_norm": 0.12249672412872314, "learning_rate": 2.111966079731477e-05, "loss": 46.0081, "step": 5128 }, { "epoch": 0.7002525769677111, "grad_norm": 0.05429196357727051, "learning_rate": 2.1101913462136196e-05, "loss": 46.0085, "step": 5129 }, { "epoch": 0.7003891050583657, "grad_norm": 0.1946028769016266, "learning_rate": 2.1084171592249378e-05, "loss": 46.0049, "step": 5130 }, { "epoch": 0.7005256331490204, "grad_norm": 0.05308947712182999, "learning_rate": 2.1066435191009715e-05, "loss": 46.0055, "step": 5131 }, { "epoch": 0.7006621612396751, "grad_norm": 0.05175310745835304, "learning_rate": 2.1048704261771566e-05, "loss": 46.0096, "step": 5132 }, { "epoch": 0.7007986893303297, "grad_norm": 0.11442005634307861, "learning_rate": 2.1030978807888257e-05, "loss": 46.003, "step": 5133 }, { "epoch": 0.7009352174209844, "grad_norm": 0.11002989858388901, "learning_rate": 2.101325883271208e-05, "loss": 46.003, "step": 5134 }, { "epoch": 0.701071745511639, "grad_norm": 0.09066247940063477, "learning_rate": 2.0995544339594285e-05, "loss": 46.0051, "step": 5135 }, { "epoch": 0.7012082736022937, "grad_norm": 0.04648731276392937, "learning_rate": 2.0977835331885093e-05, "loss": 46.0058, "step": 5136 }, { "epoch": 0.7013448016929483, "grad_norm": 0.05413609743118286, "learning_rate": 2.0960131812933682e-05, "loss": 46.005, "step": 5137 }, { "epoch": 0.701481329783603, "grad_norm": 0.039827436208724976, "learning_rate": 2.0942433786088193e-05, "loss": 46.0054, "step": 5138 }, { "epoch": 0.7016178578742577, "grad_norm": 0.0891973078250885, "learning_rate": 2.0924741254695718e-05, "loss": 46.0045, "step": 5139 }, { "epoch": 0.7017543859649122, "grad_norm": 0.06192993372678757, "learning_rate": 2.090705422210237e-05, "loss": 46.0036, "step": 5140 }, { "epoch": 0.7018909140555669, "grad_norm": 0.04570925608277321, "learning_rate": 2.0889372691653115e-05, "loss": 46.002, "step": 5141 }, { "epoch": 0.7020274421462216, "grad_norm": 0.060639478266239166, "learning_rate": 2.087169666669196e-05, "loss": 46.0066, "step": 5142 }, { "epoch": 0.7021639702368763, "grad_norm": 0.12573112547397614, "learning_rate": 2.085402615056183e-05, "loss": 46.006, "step": 5143 }, { "epoch": 0.7023004983275309, "grad_norm": 0.03591382876038551, "learning_rate": 2.0836361146604672e-05, "loss": 46.0006, "step": 5144 }, { "epoch": 0.7024370264181855, "grad_norm": 0.21914894878864288, "learning_rate": 2.0818701658161294e-05, "loss": 46.0074, "step": 5145 }, { "epoch": 0.7025735545088402, "grad_norm": 0.056073617190122604, "learning_rate": 2.080104768857152e-05, "loss": 46.0056, "step": 5146 }, { "epoch": 0.7027100825994949, "grad_norm": 0.219005286693573, "learning_rate": 2.0783399241174138e-05, "loss": 46.0128, "step": 5147 }, { "epoch": 0.7028466106901495, "grad_norm": 0.04803166165947914, "learning_rate": 2.0765756319306894e-05, "loss": 46.0054, "step": 5148 }, { "epoch": 0.7029831387808041, "grad_norm": 0.15981616079807281, "learning_rate": 2.074811892630642e-05, "loss": 46.0036, "step": 5149 }, { "epoch": 0.7031196668714588, "grad_norm": 0.26398077607154846, "learning_rate": 2.0730487065508365e-05, "loss": 46.0018, "step": 5150 }, { "epoch": 0.7032561949621134, "grad_norm": 0.11462284624576569, "learning_rate": 2.0712860740247338e-05, "loss": 46.0062, "step": 5151 }, { "epoch": 0.7033927230527681, "grad_norm": 0.13481062650680542, "learning_rate": 2.069523995385689e-05, "loss": 46.0011, "step": 5152 }, { "epoch": 0.7035292511434228, "grad_norm": 0.09193302690982819, "learning_rate": 2.0677624709669474e-05, "loss": 46.0048, "step": 5153 }, { "epoch": 0.7036657792340774, "grad_norm": 0.20138688385486603, "learning_rate": 2.066001501101657e-05, "loss": 46.001, "step": 5154 }, { "epoch": 0.703802307324732, "grad_norm": 0.054655108600854874, "learning_rate": 2.0642410861228574e-05, "loss": 46.0036, "step": 5155 }, { "epoch": 0.7039388354153867, "grad_norm": 0.09285002946853638, "learning_rate": 2.0624812263634847e-05, "loss": 46.004, "step": 5156 }, { "epoch": 0.7040753635060414, "grad_norm": 0.04675108566880226, "learning_rate": 2.0607219221563633e-05, "loss": 46.0002, "step": 5157 }, { "epoch": 0.7042118915966961, "grad_norm": 0.0700058564543724, "learning_rate": 2.0589631738342237e-05, "loss": 46.0016, "step": 5158 }, { "epoch": 0.7043484196873506, "grad_norm": 0.07849103212356567, "learning_rate": 2.0572049817296835e-05, "loss": 46.0027, "step": 5159 }, { "epoch": 0.7044849477780053, "grad_norm": 0.0597328320145607, "learning_rate": 2.0554473461752578e-05, "loss": 46.0058, "step": 5160 }, { "epoch": 0.70462147586866, "grad_norm": 0.08762061595916748, "learning_rate": 2.0536902675033548e-05, "loss": 46.0047, "step": 5161 }, { "epoch": 0.7047580039593146, "grad_norm": 0.09379762411117554, "learning_rate": 2.0519337460462797e-05, "loss": 46.0027, "step": 5162 }, { "epoch": 0.7048945320499693, "grad_norm": 0.08116351068019867, "learning_rate": 2.0501777821362294e-05, "loss": 46.0122, "step": 5163 }, { "epoch": 0.7050310601406239, "grad_norm": 0.04597565159201622, "learning_rate": 2.048422376105299e-05, "loss": 46.0057, "step": 5164 }, { "epoch": 0.7051675882312786, "grad_norm": 0.0960649773478508, "learning_rate": 2.0466675282854747e-05, "loss": 46.0083, "step": 5165 }, { "epoch": 0.7053041163219332, "grad_norm": 0.19374443590641022, "learning_rate": 2.044913239008639e-05, "loss": 46.0066, "step": 5166 }, { "epoch": 0.7054406444125879, "grad_norm": 0.09569646418094635, "learning_rate": 2.0431595086065682e-05, "loss": 46.0025, "step": 5167 }, { "epoch": 0.7055771725032426, "grad_norm": 0.13034628331661224, "learning_rate": 2.0414063374109327e-05, "loss": 46.0086, "step": 5168 }, { "epoch": 0.7057137005938972, "grad_norm": 0.06839288026094437, "learning_rate": 2.0396537257532976e-05, "loss": 46.007, "step": 5169 }, { "epoch": 0.7058502286845518, "grad_norm": 0.09085565060377121, "learning_rate": 2.0379016739651226e-05, "loss": 46.0026, "step": 5170 }, { "epoch": 0.7059867567752065, "grad_norm": 0.10229804366827011, "learning_rate": 2.0361501823777584e-05, "loss": 46.0086, "step": 5171 }, { "epoch": 0.7061232848658612, "grad_norm": 0.0355328693985939, "learning_rate": 2.034399251322458e-05, "loss": 46.0043, "step": 5172 }, { "epoch": 0.7062598129565159, "grad_norm": 0.044846389442682266, "learning_rate": 2.0326488811303573e-05, "loss": 46.0026, "step": 5173 }, { "epoch": 0.7063963410471704, "grad_norm": 0.09504590183496475, "learning_rate": 2.0308990721324927e-05, "loss": 46.0129, "step": 5174 }, { "epoch": 0.7065328691378251, "grad_norm": 0.03950425982475281, "learning_rate": 2.029149824659793e-05, "loss": 46.0067, "step": 5175 }, { "epoch": 0.7066693972284798, "grad_norm": 0.08720805495977402, "learning_rate": 2.027401139043084e-05, "loss": 46.0062, "step": 5176 }, { "epoch": 0.7068059253191344, "grad_norm": 0.06144655868411064, "learning_rate": 2.025653015613079e-05, "loss": 46.001, "step": 5177 }, { "epoch": 0.7069424534097891, "grad_norm": 0.0894489511847496, "learning_rate": 2.0239054547003877e-05, "loss": 46.0065, "step": 5178 }, { "epoch": 0.7070789815004437, "grad_norm": 0.07005266845226288, "learning_rate": 2.022158456635518e-05, "loss": 46.0006, "step": 5179 }, { "epoch": 0.7072155095910984, "grad_norm": 0.07192141562700272, "learning_rate": 2.020412021748866e-05, "loss": 46.0011, "step": 5180 }, { "epoch": 0.707352037681753, "grad_norm": 0.14554628729820251, "learning_rate": 2.0186661503707205e-05, "loss": 46.0017, "step": 5181 }, { "epoch": 0.7074885657724077, "grad_norm": 0.10300369560718536, "learning_rate": 2.0169208428312647e-05, "loss": 46.0046, "step": 5182 }, { "epoch": 0.7076250938630623, "grad_norm": 0.03740731254220009, "learning_rate": 2.0151760994605805e-05, "loss": 46.0001, "step": 5183 }, { "epoch": 0.707761621953717, "grad_norm": 0.0778786689043045, "learning_rate": 2.0134319205886394e-05, "loss": 46.0113, "step": 5184 }, { "epoch": 0.7078981500443716, "grad_norm": 0.22881224751472473, "learning_rate": 2.0116883065452994e-05, "loss": 46.0057, "step": 5185 }, { "epoch": 0.7080346781350263, "grad_norm": 0.13596513867378235, "learning_rate": 2.0099452576603244e-05, "loss": 46.0061, "step": 5186 }, { "epoch": 0.708171206225681, "grad_norm": 0.12914326786994934, "learning_rate": 2.0082027742633618e-05, "loss": 46.0035, "step": 5187 }, { "epoch": 0.7083077343163355, "grad_norm": 0.0909707248210907, "learning_rate": 2.006460856683958e-05, "loss": 46.0068, "step": 5188 }, { "epoch": 0.7084442624069902, "grad_norm": 0.08536619693040848, "learning_rate": 2.0047195052515443e-05, "loss": 46.0065, "step": 5189 }, { "epoch": 0.7085807904976449, "grad_norm": 0.16486145555973053, "learning_rate": 2.0029787202954546e-05, "loss": 46.0011, "step": 5190 }, { "epoch": 0.7087173185882996, "grad_norm": 0.041401129215955734, "learning_rate": 2.0012385021449108e-05, "loss": 46.0048, "step": 5191 }, { "epoch": 0.7088538466789542, "grad_norm": 0.07908639311790466, "learning_rate": 1.9994988511290274e-05, "loss": 46.0078, "step": 5192 }, { "epoch": 0.7089903747696088, "grad_norm": 0.08911649137735367, "learning_rate": 1.997759767576813e-05, "loss": 46.0017, "step": 5193 }, { "epoch": 0.7091269028602635, "grad_norm": 0.027235236018896103, "learning_rate": 1.9960212518171672e-05, "loss": 46.0049, "step": 5194 }, { "epoch": 0.7092634309509182, "grad_norm": 0.12643979489803314, "learning_rate": 1.994283304178884e-05, "loss": 46.0044, "step": 5195 }, { "epoch": 0.7093999590415728, "grad_norm": 0.08735233545303345, "learning_rate": 1.9925459249906485e-05, "loss": 46.0002, "step": 5196 }, { "epoch": 0.7095364871322275, "grad_norm": 0.12676003575325012, "learning_rate": 1.99080911458104e-05, "loss": 46.0039, "step": 5197 }, { "epoch": 0.7096730152228821, "grad_norm": 0.05440109223127365, "learning_rate": 1.9890728732785285e-05, "loss": 46.0, "step": 5198 }, { "epoch": 0.7098095433135367, "grad_norm": 0.3001730144023895, "learning_rate": 1.9873372014114773e-05, "loss": 46.004, "step": 5199 }, { "epoch": 0.7099460714041914, "grad_norm": 0.07254945486783981, "learning_rate": 1.985602099308142e-05, "loss": 46.0009, "step": 5200 }, { "epoch": 0.7100825994948461, "grad_norm": 0.05639840289950371, "learning_rate": 1.9838675672966696e-05, "loss": 46.0106, "step": 5201 }, { "epoch": 0.7102191275855008, "grad_norm": 0.07395826280117035, "learning_rate": 1.982133605705101e-05, "loss": 46.0048, "step": 5202 }, { "epoch": 0.7103556556761553, "grad_norm": 0.07803581655025482, "learning_rate": 1.980400214861367e-05, "loss": 46.0055, "step": 5203 }, { "epoch": 0.71049218376681, "grad_norm": 0.06422106921672821, "learning_rate": 1.9786673950932927e-05, "loss": 46.0009, "step": 5204 }, { "epoch": 0.7106287118574647, "grad_norm": 0.062141381204128265, "learning_rate": 1.9769351467285936e-05, "loss": 46.0016, "step": 5205 }, { "epoch": 0.7107652399481194, "grad_norm": 0.14852620661258698, "learning_rate": 1.9752034700948752e-05, "loss": 46.0043, "step": 5206 }, { "epoch": 0.710901768038774, "grad_norm": 0.059917107224464417, "learning_rate": 1.973472365519644e-05, "loss": 46.001, "step": 5207 }, { "epoch": 0.7110382961294286, "grad_norm": 0.10829856991767883, "learning_rate": 1.971741833330285e-05, "loss": 46.0017, "step": 5208 }, { "epoch": 0.7111748242200833, "grad_norm": 0.06492184102535248, "learning_rate": 1.9700118738540845e-05, "loss": 46.0028, "step": 5209 }, { "epoch": 0.7113113523107379, "grad_norm": 0.09659125655889511, "learning_rate": 1.9682824874182155e-05, "loss": 46.0068, "step": 5210 }, { "epoch": 0.7114478804013926, "grad_norm": 0.10635489970445633, "learning_rate": 1.966553674349747e-05, "loss": 46.0072, "step": 5211 }, { "epoch": 0.7115844084920473, "grad_norm": 0.13351090252399445, "learning_rate": 1.964825434975639e-05, "loss": 46.0091, "step": 5212 }, { "epoch": 0.7117209365827019, "grad_norm": 0.15740156173706055, "learning_rate": 1.963097769622735e-05, "loss": 46.0111, "step": 5213 }, { "epoch": 0.7118574646733565, "grad_norm": 0.06589256972074509, "learning_rate": 1.9613706786177816e-05, "loss": 46.0008, "step": 5214 }, { "epoch": 0.7119939927640112, "grad_norm": 0.12891656160354614, "learning_rate": 1.9596441622874097e-05, "loss": 46.0141, "step": 5215 }, { "epoch": 0.7121305208546659, "grad_norm": 0.14135286211967468, "learning_rate": 1.9579182209581448e-05, "loss": 46.0153, "step": 5216 }, { "epoch": 0.7122670489453204, "grad_norm": 0.03281141817569733, "learning_rate": 1.9561928549563968e-05, "loss": 46.0041, "step": 5217 }, { "epoch": 0.7124035770359751, "grad_norm": 0.06838557124137878, "learning_rate": 1.9544680646084773e-05, "loss": 46.0103, "step": 5218 }, { "epoch": 0.7125401051266298, "grad_norm": 0.1425132006406784, "learning_rate": 1.952743850240582e-05, "loss": 46.0121, "step": 5219 }, { "epoch": 0.7126766332172845, "grad_norm": 0.036407869309186935, "learning_rate": 1.9510202121788e-05, "loss": 46.0107, "step": 5220 }, { "epoch": 0.7128131613079391, "grad_norm": 0.1228683739900589, "learning_rate": 1.9492971507491105e-05, "loss": 46.0071, "step": 5221 }, { "epoch": 0.7129496893985937, "grad_norm": 0.151469886302948, "learning_rate": 1.947574666277383e-05, "loss": 46.0046, "step": 5222 }, { "epoch": 0.7130862174892484, "grad_norm": 0.06929731369018555, "learning_rate": 1.9458527590893803e-05, "loss": 46.0, "step": 5223 }, { "epoch": 0.7132227455799031, "grad_norm": 0.05061166733503342, "learning_rate": 1.9441314295107537e-05, "loss": 46.0001, "step": 5224 }, { "epoch": 0.7133592736705577, "grad_norm": 0.18184326589107513, "learning_rate": 1.9424106778670465e-05, "loss": 46.0101, "step": 5225 }, { "epoch": 0.7134958017612124, "grad_norm": 0.11618611216545105, "learning_rate": 1.9406905044836926e-05, "loss": 46.0036, "step": 5226 }, { "epoch": 0.713632329851867, "grad_norm": 0.07159554213285446, "learning_rate": 1.9389709096860155e-05, "loss": 46.0045, "step": 5227 }, { "epoch": 0.7137688579425217, "grad_norm": 0.052161939442157745, "learning_rate": 1.9372518937992306e-05, "loss": 46.0077, "step": 5228 }, { "epoch": 0.7139053860331763, "grad_norm": 0.08121146261692047, "learning_rate": 1.9355334571484436e-05, "loss": 46.0055, "step": 5229 }, { "epoch": 0.714041914123831, "grad_norm": 0.07842765003442764, "learning_rate": 1.93381560005865e-05, "loss": 46.0024, "step": 5230 }, { "epoch": 0.7141784422144857, "grad_norm": 0.16416840255260468, "learning_rate": 1.932098322854736e-05, "loss": 46.0099, "step": 5231 }, { "epoch": 0.7143149703051402, "grad_norm": 0.09169413894414902, "learning_rate": 1.930381625861477e-05, "loss": 46.0051, "step": 5232 }, { "epoch": 0.7144514983957949, "grad_norm": 0.197474405169487, "learning_rate": 1.9286655094035422e-05, "loss": 46.0091, "step": 5233 }, { "epoch": 0.7145880264864496, "grad_norm": 0.07755966484546661, "learning_rate": 1.926949973805487e-05, "loss": 46.0037, "step": 5234 }, { "epoch": 0.7147245545771043, "grad_norm": 0.06431570649147034, "learning_rate": 1.925235019391759e-05, "loss": 46.0066, "step": 5235 }, { "epoch": 0.7148610826677589, "grad_norm": 0.08679851144552231, "learning_rate": 1.923520646486695e-05, "loss": 46.0019, "step": 5236 }, { "epoch": 0.7149976107584135, "grad_norm": 0.08994612097740173, "learning_rate": 1.921806855414523e-05, "loss": 46.0095, "step": 5237 }, { "epoch": 0.7151341388490682, "grad_norm": 0.07908709347248077, "learning_rate": 1.9200936464993586e-05, "loss": 46.0032, "step": 5238 }, { "epoch": 0.7152706669397229, "grad_norm": 0.03593314439058304, "learning_rate": 1.918381020065213e-05, "loss": 46.0016, "step": 5239 }, { "epoch": 0.7154071950303775, "grad_norm": 0.13920776546001434, "learning_rate": 1.916668976435979e-05, "loss": 46.0093, "step": 5240 }, { "epoch": 0.7155437231210322, "grad_norm": 0.0481339655816555, "learning_rate": 1.9149575159354455e-05, "loss": 46.0028, "step": 5241 }, { "epoch": 0.7156802512116868, "grad_norm": 0.10939805954694748, "learning_rate": 1.913246638887286e-05, "loss": 46.0031, "step": 5242 }, { "epoch": 0.7158167793023414, "grad_norm": 0.039200861006975174, "learning_rate": 1.9115363456150724e-05, "loss": 46.0038, "step": 5243 }, { "epoch": 0.7159533073929961, "grad_norm": 0.19473020732402802, "learning_rate": 1.9098266364422553e-05, "loss": 46.0044, "step": 5244 }, { "epoch": 0.7160898354836508, "grad_norm": 0.11482709646224976, "learning_rate": 1.90811751169218e-05, "loss": 46.01, "step": 5245 }, { "epoch": 0.7162263635743054, "grad_norm": 0.19279418885707855, "learning_rate": 1.9064089716880845e-05, "loss": 46.0076, "step": 5246 }, { "epoch": 0.71636289166496, "grad_norm": 0.1402665674686432, "learning_rate": 1.9047010167530933e-05, "loss": 46.0074, "step": 5247 }, { "epoch": 0.7164994197556147, "grad_norm": 0.3061736524105072, "learning_rate": 1.9029936472102162e-05, "loss": 46.0058, "step": 5248 }, { "epoch": 0.7166359478462694, "grad_norm": 0.3206253945827484, "learning_rate": 1.9012868633823555e-05, "loss": 46.0063, "step": 5249 }, { "epoch": 0.7167724759369241, "grad_norm": 0.3918476700782776, "learning_rate": 1.8995806655923075e-05, "loss": 46.0061, "step": 5250 }, { "epoch": 0.7169090040275786, "grad_norm": 0.1272118240594864, "learning_rate": 1.8978750541627532e-05, "loss": 46.0069, "step": 5251 }, { "epoch": 0.7170455321182333, "grad_norm": 0.1047128215432167, "learning_rate": 1.8961700294162577e-05, "loss": 46.0022, "step": 5252 }, { "epoch": 0.717182060208888, "grad_norm": 0.06548454612493515, "learning_rate": 1.894465591675285e-05, "loss": 46.0023, "step": 5253 }, { "epoch": 0.7173185882995426, "grad_norm": 0.05019192770123482, "learning_rate": 1.8927617412621822e-05, "loss": 46.0028, "step": 5254 }, { "epoch": 0.7174551163901973, "grad_norm": 0.0470040962100029, "learning_rate": 1.891058478499188e-05, "loss": 46.0011, "step": 5255 }, { "epoch": 0.7175916444808519, "grad_norm": 0.054133109748363495, "learning_rate": 1.8893558037084237e-05, "loss": 46.0027, "step": 5256 }, { "epoch": 0.7177281725715066, "grad_norm": 0.08166297525167465, "learning_rate": 1.887653717211909e-05, "loss": 46.0016, "step": 5257 }, { "epoch": 0.7178647006621612, "grad_norm": 0.11193818598985672, "learning_rate": 1.885952219331546e-05, "loss": 46.003, "step": 5258 }, { "epoch": 0.7180012287528159, "grad_norm": 0.11662346869707108, "learning_rate": 1.8842513103891268e-05, "loss": 46.0007, "step": 5259 }, { "epoch": 0.7181377568434706, "grad_norm": 0.14808106422424316, "learning_rate": 1.8825509907063327e-05, "loss": 46.002, "step": 5260 }, { "epoch": 0.7182742849341252, "grad_norm": 0.05607736110687256, "learning_rate": 1.8808512606047328e-05, "loss": 46.0088, "step": 5261 }, { "epoch": 0.7184108130247798, "grad_norm": 0.1198188066482544, "learning_rate": 1.8791521204057856e-05, "loss": 46.0085, "step": 5262 }, { "epoch": 0.7185473411154345, "grad_norm": 0.07292872667312622, "learning_rate": 1.877453570430837e-05, "loss": 46.0079, "step": 5263 }, { "epoch": 0.7186838692060892, "grad_norm": 0.15939420461654663, "learning_rate": 1.875755611001122e-05, "loss": 46.0031, "step": 5264 }, { "epoch": 0.7188203972967439, "grad_norm": 0.05481604114174843, "learning_rate": 1.874058242437764e-05, "loss": 46.0079, "step": 5265 }, { "epoch": 0.7189569253873984, "grad_norm": 0.09659121930599213, "learning_rate": 1.8723614650617723e-05, "loss": 46.0052, "step": 5266 }, { "epoch": 0.7190934534780531, "grad_norm": 0.1037779375910759, "learning_rate": 1.8706652791940515e-05, "loss": 46.0031, "step": 5267 }, { "epoch": 0.7192299815687078, "grad_norm": 0.11536254733800888, "learning_rate": 1.8689696851553844e-05, "loss": 46.0075, "step": 5268 }, { "epoch": 0.7193665096593624, "grad_norm": 0.06232769042253494, "learning_rate": 1.867274683266449e-05, "loss": 46.0034, "step": 5269 }, { "epoch": 0.7195030377500171, "grad_norm": 0.09001810103654861, "learning_rate": 1.8655802738478063e-05, "loss": 46.0067, "step": 5270 }, { "epoch": 0.7196395658406717, "grad_norm": 0.18099626898765564, "learning_rate": 1.863886457219914e-05, "loss": 46.0061, "step": 5271 }, { "epoch": 0.7197760939313264, "grad_norm": 0.0519765205681324, "learning_rate": 1.8621932337031055e-05, "loss": 46.0044, "step": 5272 }, { "epoch": 0.719912622021981, "grad_norm": 0.11737678200006485, "learning_rate": 1.8605006036176098e-05, "loss": 46.0108, "step": 5273 }, { "epoch": 0.7200491501126357, "grad_norm": 0.10283605009317398, "learning_rate": 1.8588085672835443e-05, "loss": 46.0128, "step": 5274 }, { "epoch": 0.7201856782032904, "grad_norm": 0.07424657791852951, "learning_rate": 1.8571171250209128e-05, "loss": 46.0082, "step": 5275 }, { "epoch": 0.7203222062939449, "grad_norm": 0.08705967664718628, "learning_rate": 1.855426277149602e-05, "loss": 46.004, "step": 5276 }, { "epoch": 0.7204587343845996, "grad_norm": 0.06578507274389267, "learning_rate": 1.8537360239893897e-05, "loss": 46.0161, "step": 5277 }, { "epoch": 0.7205952624752543, "grad_norm": 0.08843359351158142, "learning_rate": 1.852046365859946e-05, "loss": 46.0032, "step": 5278 }, { "epoch": 0.720731790565909, "grad_norm": 0.1044355034828186, "learning_rate": 1.850357303080823e-05, "loss": 46.0091, "step": 5279 }, { "epoch": 0.7208683186565635, "grad_norm": 0.07189581543207169, "learning_rate": 1.8486688359714565e-05, "loss": 46.0103, "step": 5280 }, { "epoch": 0.7210048467472182, "grad_norm": 0.12152832001447678, "learning_rate": 1.8469809648511798e-05, "loss": 46.0042, "step": 5281 }, { "epoch": 0.7211413748378729, "grad_norm": 0.17689067125320435, "learning_rate": 1.8452936900392065e-05, "loss": 46.0049, "step": 5282 }, { "epoch": 0.7212779029285276, "grad_norm": 0.0805056169629097, "learning_rate": 1.84360701185464e-05, "loss": 46.0029, "step": 5283 }, { "epoch": 0.7214144310191822, "grad_norm": 0.0930216982960701, "learning_rate": 1.8419209306164652e-05, "loss": 46.0097, "step": 5284 }, { "epoch": 0.7215509591098368, "grad_norm": 0.049265045672655106, "learning_rate": 1.840235446643564e-05, "loss": 46.0004, "step": 5285 }, { "epoch": 0.7216874872004915, "grad_norm": 0.047425951808691025, "learning_rate": 1.8385505602546983e-05, "loss": 46.0047, "step": 5286 }, { "epoch": 0.7218240152911461, "grad_norm": 0.04126010462641716, "learning_rate": 1.8368662717685187e-05, "loss": 46.0028, "step": 5287 }, { "epoch": 0.7219605433818008, "grad_norm": 0.152616947889328, "learning_rate": 1.8351825815035628e-05, "loss": 46.0047, "step": 5288 }, { "epoch": 0.7220970714724555, "grad_norm": 0.1060638576745987, "learning_rate": 1.8334994897782554e-05, "loss": 46.0076, "step": 5289 }, { "epoch": 0.7222335995631101, "grad_norm": 0.11704757809638977, "learning_rate": 1.831816996910908e-05, "loss": 46.0039, "step": 5290 }, { "epoch": 0.7223701276537647, "grad_norm": 0.08203765749931335, "learning_rate": 1.830135103219718e-05, "loss": 46.0038, "step": 5291 }, { "epoch": 0.7225066557444194, "grad_norm": 0.083570197224617, "learning_rate": 1.82845380902277e-05, "loss": 46.0061, "step": 5292 }, { "epoch": 0.7226431838350741, "grad_norm": 0.13784277439117432, "learning_rate": 1.8267731146380357e-05, "loss": 46.0035, "step": 5293 }, { "epoch": 0.7227797119257288, "grad_norm": 0.053149230778217316, "learning_rate": 1.8250930203833732e-05, "loss": 46.0027, "step": 5294 }, { "epoch": 0.7229162400163833, "grad_norm": 0.09497136622667313, "learning_rate": 1.823413526576527e-05, "loss": 46.0025, "step": 5295 }, { "epoch": 0.723052768107038, "grad_norm": 0.3273810148239136, "learning_rate": 1.8217346335351272e-05, "loss": 46.0049, "step": 5296 }, { "epoch": 0.7231892961976927, "grad_norm": 0.13317608833312988, "learning_rate": 1.8200563415766914e-05, "loss": 46.0094, "step": 5297 }, { "epoch": 0.7233258242883474, "grad_norm": 0.3120952546596527, "learning_rate": 1.8183786510186235e-05, "loss": 46.0147, "step": 5298 }, { "epoch": 0.723462352379002, "grad_norm": 0.26940369606018066, "learning_rate": 1.816701562178213e-05, "loss": 46.0043, "step": 5299 }, { "epoch": 0.7235988804696566, "grad_norm": 0.2720176577568054, "learning_rate": 1.815025075372636e-05, "loss": 46.0068, "step": 5300 }, { "epoch": 0.7237354085603113, "grad_norm": 0.05258096382021904, "learning_rate": 1.8133491909189547e-05, "loss": 46.0119, "step": 5301 }, { "epoch": 0.7238719366509659, "grad_norm": 0.16392959654331207, "learning_rate": 1.8116739091341172e-05, "loss": 46.0034, "step": 5302 }, { "epoch": 0.7240084647416206, "grad_norm": 0.20461377501487732, "learning_rate": 1.8099992303349577e-05, "loss": 46.0085, "step": 5303 }, { "epoch": 0.7241449928322753, "grad_norm": 0.15692614018917084, "learning_rate": 1.8083251548381967e-05, "loss": 46.0023, "step": 5304 }, { "epoch": 0.7242815209229299, "grad_norm": 0.05253392457962036, "learning_rate": 1.8066516829604385e-05, "loss": 46.0013, "step": 5305 }, { "epoch": 0.7244180490135845, "grad_norm": 0.04466629773378372, "learning_rate": 1.804978815018178e-05, "loss": 46.0007, "step": 5306 }, { "epoch": 0.7245545771042392, "grad_norm": 0.10207663476467133, "learning_rate": 1.8033065513277937e-05, "loss": 46.0027, "step": 5307 }, { "epoch": 0.7246911051948939, "grad_norm": 0.19589634239673615, "learning_rate": 1.801634892205545e-05, "loss": 46.006, "step": 5308 }, { "epoch": 0.7248276332855486, "grad_norm": 0.09111536294221878, "learning_rate": 1.7999638379675814e-05, "loss": 46.0081, "step": 5309 }, { "epoch": 0.7249641613762031, "grad_norm": 0.04469921812415123, "learning_rate": 1.7982933889299407e-05, "loss": 46.0015, "step": 5310 }, { "epoch": 0.7251006894668578, "grad_norm": 0.03910407796502113, "learning_rate": 1.796623545408544e-05, "loss": 46.0083, "step": 5311 }, { "epoch": 0.7252372175575125, "grad_norm": 0.05065545067191124, "learning_rate": 1.794954307719191e-05, "loss": 46.0056, "step": 5312 }, { "epoch": 0.7253737456481671, "grad_norm": 0.06040341407060623, "learning_rate": 1.7932856761775783e-05, "loss": 46.0058, "step": 5313 }, { "epoch": 0.7255102737388217, "grad_norm": 0.10523628443479538, "learning_rate": 1.791617651099281e-05, "loss": 46.0114, "step": 5314 }, { "epoch": 0.7256468018294764, "grad_norm": 0.1608646661043167, "learning_rate": 1.7899502327997624e-05, "loss": 46.008, "step": 5315 }, { "epoch": 0.7257833299201311, "grad_norm": 0.08977536112070084, "learning_rate": 1.7882834215943644e-05, "loss": 46.0015, "step": 5316 }, { "epoch": 0.7259198580107857, "grad_norm": 0.15688104927539825, "learning_rate": 1.7866172177983247e-05, "loss": 46.0059, "step": 5317 }, { "epoch": 0.7260563861014404, "grad_norm": 0.05061528831720352, "learning_rate": 1.7849516217267593e-05, "loss": 46.0109, "step": 5318 }, { "epoch": 0.726192914192095, "grad_norm": 0.0746210366487503, "learning_rate": 1.7832866336946698e-05, "loss": 46.0108, "step": 5319 }, { "epoch": 0.7263294422827496, "grad_norm": 0.15195424854755402, "learning_rate": 1.7816222540169435e-05, "loss": 46.0072, "step": 5320 }, { "epoch": 0.7264659703734043, "grad_norm": 0.09558065235614777, "learning_rate": 1.7799584830083538e-05, "loss": 46.0047, "step": 5321 }, { "epoch": 0.726602498464059, "grad_norm": 0.05772695690393448, "learning_rate": 1.7782953209835574e-05, "loss": 46.0074, "step": 5322 }, { "epoch": 0.7267390265547137, "grad_norm": 0.26707372069358826, "learning_rate": 1.7766327682570967e-05, "loss": 46.0038, "step": 5323 }, { "epoch": 0.7268755546453682, "grad_norm": 0.0874713808298111, "learning_rate": 1.7749708251433984e-05, "loss": 46.0026, "step": 5324 }, { "epoch": 0.7270120827360229, "grad_norm": 0.2446008026599884, "learning_rate": 1.7733094919567745e-05, "loss": 46.0063, "step": 5325 }, { "epoch": 0.7271486108266776, "grad_norm": 0.11917874217033386, "learning_rate": 1.771648769011421e-05, "loss": 46.0032, "step": 5326 }, { "epoch": 0.7272851389173323, "grad_norm": 0.07293413579463959, "learning_rate": 1.769988656621418e-05, "loss": 46.0031, "step": 5327 }, { "epoch": 0.7274216670079869, "grad_norm": 0.09124890714883804, "learning_rate": 1.768329155100732e-05, "loss": 46.003, "step": 5328 }, { "epoch": 0.7275581950986415, "grad_norm": 0.11262402683496475, "learning_rate": 1.766670264763213e-05, "loss": 46.0004, "step": 5329 }, { "epoch": 0.7276947231892962, "grad_norm": 0.16831693053245544, "learning_rate": 1.765011985922594e-05, "loss": 46.0061, "step": 5330 }, { "epoch": 0.7278312512799509, "grad_norm": 0.06389530748128891, "learning_rate": 1.763354318892494e-05, "loss": 46.008, "step": 5331 }, { "epoch": 0.7279677793706055, "grad_norm": 0.0699084997177124, "learning_rate": 1.7616972639864166e-05, "loss": 46.0059, "step": 5332 }, { "epoch": 0.7281043074612602, "grad_norm": 0.039655040949583054, "learning_rate": 1.7600408215177472e-05, "loss": 46.0082, "step": 5333 }, { "epoch": 0.7282408355519148, "grad_norm": 0.06580458581447601, "learning_rate": 1.7583849917997615e-05, "loss": 46.0044, "step": 5334 }, { "epoch": 0.7283773636425694, "grad_norm": 0.030041389167308807, "learning_rate": 1.7567297751456096e-05, "loss": 46.0095, "step": 5335 }, { "epoch": 0.7285138917332241, "grad_norm": 0.10109765827655792, "learning_rate": 1.755075171868334e-05, "loss": 46.0027, "step": 5336 }, { "epoch": 0.7286504198238788, "grad_norm": 0.044945862144231796, "learning_rate": 1.7534211822808556e-05, "loss": 46.0, "step": 5337 }, { "epoch": 0.7287869479145335, "grad_norm": 0.11990824341773987, "learning_rate": 1.7517678066959865e-05, "loss": 46.0049, "step": 5338 }, { "epoch": 0.728923476005188, "grad_norm": 0.06522159278392792, "learning_rate": 1.7501150454264137e-05, "loss": 46.0161, "step": 5339 }, { "epoch": 0.7290600040958427, "grad_norm": 0.04154995456337929, "learning_rate": 1.7484628987847125e-05, "loss": 46.0082, "step": 5340 }, { "epoch": 0.7291965321864974, "grad_norm": 0.0465766116976738, "learning_rate": 1.746811367083344e-05, "loss": 46.0029, "step": 5341 }, { "epoch": 0.7293330602771521, "grad_norm": 0.03792663663625717, "learning_rate": 1.745160450634652e-05, "loss": 46.0012, "step": 5342 }, { "epoch": 0.7294695883678067, "grad_norm": 0.06525614112615585, "learning_rate": 1.7435101497508584e-05, "loss": 46.0073, "step": 5343 }, { "epoch": 0.7296061164584613, "grad_norm": 0.12308293581008911, "learning_rate": 1.7418604647440733e-05, "loss": 46.0032, "step": 5344 }, { "epoch": 0.729742644549116, "grad_norm": 0.07487914711236954, "learning_rate": 1.7402113959262932e-05, "loss": 46.0017, "step": 5345 }, { "epoch": 0.7298791726397706, "grad_norm": 0.2686620354652405, "learning_rate": 1.738562943609396e-05, "loss": 46.0084, "step": 5346 }, { "epoch": 0.7300157007304253, "grad_norm": 0.2511179447174072, "learning_rate": 1.7369151081051355e-05, "loss": 46.0051, "step": 5347 }, { "epoch": 0.7301522288210799, "grad_norm": 0.12429425120353699, "learning_rate": 1.7352678897251607e-05, "loss": 46.0026, "step": 5348 }, { "epoch": 0.7302887569117346, "grad_norm": 0.1282913088798523, "learning_rate": 1.7336212887809965e-05, "loss": 46.0, "step": 5349 }, { "epoch": 0.7304252850023892, "grad_norm": 0.2897639870643616, "learning_rate": 1.7319753055840553e-05, "loss": 46.0015, "step": 5350 }, { "epoch": 0.7305618130930439, "grad_norm": 0.09277843683958054, "learning_rate": 1.7303299404456252e-05, "loss": 46.006, "step": 5351 }, { "epoch": 0.7306983411836986, "grad_norm": 0.13819581270217896, "learning_rate": 1.7286851936768866e-05, "loss": 46.0096, "step": 5352 }, { "epoch": 0.7308348692743531, "grad_norm": 0.07666270434856415, "learning_rate": 1.7270410655888986e-05, "loss": 46.0034, "step": 5353 }, { "epoch": 0.7309713973650078, "grad_norm": 0.04584527760744095, "learning_rate": 1.725397556492602e-05, "loss": 46.0005, "step": 5354 }, { "epoch": 0.7311079254556625, "grad_norm": 0.06694572418928146, "learning_rate": 1.7237546666988235e-05, "loss": 46.0016, "step": 5355 }, { "epoch": 0.7312444535463172, "grad_norm": 0.10261277109384537, "learning_rate": 1.7221123965182713e-05, "loss": 46.0054, "step": 5356 }, { "epoch": 0.7313809816369718, "grad_norm": 0.06096161901950836, "learning_rate": 1.720470746261535e-05, "loss": 46.0002, "step": 5357 }, { "epoch": 0.7315175097276264, "grad_norm": 0.22287146747112274, "learning_rate": 1.718829716239089e-05, "loss": 46.0078, "step": 5358 }, { "epoch": 0.7316540378182811, "grad_norm": 0.10562193393707275, "learning_rate": 1.717189306761291e-05, "loss": 46.0037, "step": 5359 }, { "epoch": 0.7317905659089358, "grad_norm": 0.03857995942234993, "learning_rate": 1.7155495181383786e-05, "loss": 46.0046, "step": 5360 }, { "epoch": 0.7319270939995904, "grad_norm": 0.04753446951508522, "learning_rate": 1.7139103506804737e-05, "loss": 46.0089, "step": 5361 }, { "epoch": 0.7320636220902451, "grad_norm": 0.05965697392821312, "learning_rate": 1.712271804697581e-05, "loss": 46.0076, "step": 5362 }, { "epoch": 0.7322001501808997, "grad_norm": 0.11120961606502533, "learning_rate": 1.710633880499587e-05, "loss": 46.0013, "step": 5363 }, { "epoch": 0.7323366782715544, "grad_norm": 0.05367395654320717, "learning_rate": 1.7089965783962608e-05, "loss": 46.0072, "step": 5364 }, { "epoch": 0.732473206362209, "grad_norm": 0.042408574372529984, "learning_rate": 1.7073598986972517e-05, "loss": 46.0109, "step": 5365 }, { "epoch": 0.7326097344528637, "grad_norm": 0.10406532883644104, "learning_rate": 1.7057238417120993e-05, "loss": 46.0042, "step": 5366 }, { "epoch": 0.7327462625435184, "grad_norm": 0.07456917315721512, "learning_rate": 1.7040884077502133e-05, "loss": 46.0082, "step": 5367 }, { "epoch": 0.7328827906341729, "grad_norm": 0.10831788927316666, "learning_rate": 1.7024535971208948e-05, "loss": 46.0075, "step": 5368 }, { "epoch": 0.7330193187248276, "grad_norm": 0.13253560662269592, "learning_rate": 1.7008194101333212e-05, "loss": 46.006, "step": 5369 }, { "epoch": 0.7331558468154823, "grad_norm": 0.07692983001470566, "learning_rate": 1.699185847096561e-05, "loss": 46.0075, "step": 5370 }, { "epoch": 0.733292374906137, "grad_norm": 0.07200937718153, "learning_rate": 1.697552908319553e-05, "loss": 46.0064, "step": 5371 }, { "epoch": 0.7334289029967916, "grad_norm": 0.04683661833405495, "learning_rate": 1.695920594111123e-05, "loss": 46.0089, "step": 5372 }, { "epoch": 0.7335654310874462, "grad_norm": 0.08781977742910385, "learning_rate": 1.694288904779982e-05, "loss": 46.007, "step": 5373 }, { "epoch": 0.7337019591781009, "grad_norm": 0.0637134239077568, "learning_rate": 1.6926578406347215e-05, "loss": 46.0071, "step": 5374 }, { "epoch": 0.7338384872687556, "grad_norm": 0.09934572130441666, "learning_rate": 1.691027401983809e-05, "loss": 46.0028, "step": 5375 }, { "epoch": 0.7339750153594102, "grad_norm": 0.10775907337665558, "learning_rate": 1.6893975891355972e-05, "loss": 46.0037, "step": 5376 }, { "epoch": 0.7341115434500648, "grad_norm": 0.10067916661500931, "learning_rate": 1.687768402398326e-05, "loss": 46.0105, "step": 5377 }, { "epoch": 0.7342480715407195, "grad_norm": 0.092503622174263, "learning_rate": 1.6861398420801105e-05, "loss": 46.0101, "step": 5378 }, { "epoch": 0.7343845996313741, "grad_norm": 0.060784321278333664, "learning_rate": 1.6845119084889443e-05, "loss": 46.0038, "step": 5379 }, { "epoch": 0.7345211277220288, "grad_norm": 0.0479910708963871, "learning_rate": 1.6828846019327127e-05, "loss": 46.0062, "step": 5380 }, { "epoch": 0.7346576558126835, "grad_norm": 0.23504577577114105, "learning_rate": 1.6812579227191748e-05, "loss": 46.0115, "step": 5381 }, { "epoch": 0.7347941839033381, "grad_norm": 0.12136361002922058, "learning_rate": 1.6796318711559744e-05, "loss": 46.0094, "step": 5382 }, { "epoch": 0.7349307119939927, "grad_norm": 0.06396830826997757, "learning_rate": 1.6780064475506296e-05, "loss": 46.0034, "step": 5383 }, { "epoch": 0.7350672400846474, "grad_norm": 0.23416943848133087, "learning_rate": 1.676381652210552e-05, "loss": 46.0074, "step": 5384 }, { "epoch": 0.7352037681753021, "grad_norm": 0.17572827637195587, "learning_rate": 1.6747574854430243e-05, "loss": 46.0024, "step": 5385 }, { "epoch": 0.7353402962659568, "grad_norm": 0.05276840552687645, "learning_rate": 1.6731339475552144e-05, "loss": 46.0009, "step": 5386 }, { "epoch": 0.7354768243566113, "grad_norm": 0.12149900197982788, "learning_rate": 1.671511038854171e-05, "loss": 46.0055, "step": 5387 }, { "epoch": 0.735613352447266, "grad_norm": 0.06876447796821594, "learning_rate": 1.6698887596468233e-05, "loss": 46.0054, "step": 5388 }, { "epoch": 0.7357498805379207, "grad_norm": 0.08302745968103409, "learning_rate": 1.6682671102399805e-05, "loss": 46.0033, "step": 5389 }, { "epoch": 0.7358864086285754, "grad_norm": 0.0973598062992096, "learning_rate": 1.6666460909403352e-05, "loss": 46.0065, "step": 5390 }, { "epoch": 0.73602293671923, "grad_norm": 0.12575586140155792, "learning_rate": 1.6650257020544585e-05, "loss": 46.0028, "step": 5391 }, { "epoch": 0.7361594648098846, "grad_norm": 0.03935452178120613, "learning_rate": 1.6634059438888033e-05, "loss": 46.009, "step": 5392 }, { "epoch": 0.7362959929005393, "grad_norm": 0.11232893913984299, "learning_rate": 1.6617868167497036e-05, "loss": 46.0006, "step": 5393 }, { "epoch": 0.7364325209911939, "grad_norm": 0.08109261840581894, "learning_rate": 1.660168320943373e-05, "loss": 46.001, "step": 5394 }, { "epoch": 0.7365690490818486, "grad_norm": 0.4949767291545868, "learning_rate": 1.658550456775906e-05, "loss": 46.0043, "step": 5395 }, { "epoch": 0.7367055771725033, "grad_norm": 0.1577654778957367, "learning_rate": 1.6569332245532776e-05, "loss": 46.0057, "step": 5396 }, { "epoch": 0.7368421052631579, "grad_norm": 0.2697887718677521, "learning_rate": 1.6553166245813443e-05, "loss": 46.0035, "step": 5397 }, { "epoch": 0.7369786333538125, "grad_norm": 0.13978305459022522, "learning_rate": 1.653700657165843e-05, "loss": 46.0049, "step": 5398 }, { "epoch": 0.7371151614444672, "grad_norm": 0.22209419310092926, "learning_rate": 1.652085322612389e-05, "loss": 46.005, "step": 5399 }, { "epoch": 0.7372516895351219, "grad_norm": 0.1221342384815216, "learning_rate": 1.650470621226477e-05, "loss": 46.0016, "step": 5400 }, { "epoch": 0.7373882176257766, "grad_norm": 0.2416047304868698, "learning_rate": 1.648856553313489e-05, "loss": 46.0034, "step": 5401 }, { "epoch": 0.7375247457164311, "grad_norm": 0.04126585274934769, "learning_rate": 1.6472431191786813e-05, "loss": 46.003, "step": 5402 }, { "epoch": 0.7376612738070858, "grad_norm": 0.05683629959821701, "learning_rate": 1.6456303191271883e-05, "loss": 46.0001, "step": 5403 }, { "epoch": 0.7377978018977405, "grad_norm": 0.09062965214252472, "learning_rate": 1.6440181534640275e-05, "loss": 46.0044, "step": 5404 }, { "epoch": 0.7379343299883951, "grad_norm": 0.1460847556591034, "learning_rate": 1.6424066224940998e-05, "loss": 46.002, "step": 5405 }, { "epoch": 0.7380708580790498, "grad_norm": 0.08085830509662628, "learning_rate": 1.640795726522183e-05, "loss": 46.0046, "step": 5406 }, { "epoch": 0.7382073861697044, "grad_norm": 0.05993195250630379, "learning_rate": 1.6391854658529287e-05, "loss": 46.0002, "step": 5407 }, { "epoch": 0.7383439142603591, "grad_norm": 0.15321318805217743, "learning_rate": 1.6375758407908802e-05, "loss": 46.0023, "step": 5408 }, { "epoch": 0.7384804423510137, "grad_norm": 0.061659593135118484, "learning_rate": 1.635966851640452e-05, "loss": 46.0016, "step": 5409 }, { "epoch": 0.7386169704416684, "grad_norm": 0.10772497951984406, "learning_rate": 1.634358498705943e-05, "loss": 46.0017, "step": 5410 }, { "epoch": 0.738753498532323, "grad_norm": 0.10187914222478867, "learning_rate": 1.6327507822915244e-05, "loss": 46.0061, "step": 5411 }, { "epoch": 0.7388900266229776, "grad_norm": 0.18368183076381683, "learning_rate": 1.6311437027012584e-05, "loss": 46.0058, "step": 5412 }, { "epoch": 0.7390265547136323, "grad_norm": 0.07517113536596298, "learning_rate": 1.6295372602390767e-05, "loss": 46.0075, "step": 5413 }, { "epoch": 0.739163082804287, "grad_norm": 0.1156243309378624, "learning_rate": 1.627931455208797e-05, "loss": 46.0051, "step": 5414 }, { "epoch": 0.7392996108949417, "grad_norm": 0.1071356013417244, "learning_rate": 1.6263262879141124e-05, "loss": 46.0015, "step": 5415 }, { "epoch": 0.7394361389855962, "grad_norm": 0.04690391197800636, "learning_rate": 1.624721758658597e-05, "loss": 46.0035, "step": 5416 }, { "epoch": 0.7395726670762509, "grad_norm": 0.09242847561836243, "learning_rate": 1.6231178677457047e-05, "loss": 46.0066, "step": 5417 }, { "epoch": 0.7397091951669056, "grad_norm": 0.06877323985099792, "learning_rate": 1.6215146154787674e-05, "loss": 46.0096, "step": 5418 }, { "epoch": 0.7398457232575603, "grad_norm": 0.09247279167175293, "learning_rate": 1.6199120021609976e-05, "loss": 46.0003, "step": 5419 }, { "epoch": 0.7399822513482149, "grad_norm": 0.08279221504926682, "learning_rate": 1.618310028095486e-05, "loss": 46.0033, "step": 5420 }, { "epoch": 0.7401187794388695, "grad_norm": 0.052157264202833176, "learning_rate": 1.6167086935852027e-05, "loss": 46.0007, "step": 5421 }, { "epoch": 0.7402553075295242, "grad_norm": 0.15105421841144562, "learning_rate": 1.6151079989329964e-05, "loss": 46.0089, "step": 5422 }, { "epoch": 0.7403918356201789, "grad_norm": 0.12230578809976578, "learning_rate": 1.613507944441596e-05, "loss": 46.0023, "step": 5423 }, { "epoch": 0.7405283637108335, "grad_norm": 0.057427216321229935, "learning_rate": 1.6119085304136082e-05, "loss": 46.0039, "step": 5424 }, { "epoch": 0.7406648918014882, "grad_norm": 0.061058562248945236, "learning_rate": 1.6103097571515186e-05, "loss": 46.0041, "step": 5425 }, { "epoch": 0.7408014198921428, "grad_norm": 0.13891039788722992, "learning_rate": 1.6087116249576924e-05, "loss": 46.0088, "step": 5426 }, { "epoch": 0.7409379479827974, "grad_norm": 0.07750581949949265, "learning_rate": 1.6071141341343732e-05, "loss": 46.0042, "step": 5427 }, { "epoch": 0.7410744760734521, "grad_norm": 0.10438138991594315, "learning_rate": 1.6055172849836825e-05, "loss": 46.0012, "step": 5428 }, { "epoch": 0.7412110041641068, "grad_norm": 0.17802530527114868, "learning_rate": 1.6039210778076213e-05, "loss": 46.0033, "step": 5429 }, { "epoch": 0.7413475322547615, "grad_norm": 0.11366543918848038, "learning_rate": 1.6023255129080696e-05, "loss": 46.0055, "step": 5430 }, { "epoch": 0.741484060345416, "grad_norm": 0.18453694880008698, "learning_rate": 1.6007305905867848e-05, "loss": 46.0079, "step": 5431 }, { "epoch": 0.7416205884360707, "grad_norm": 0.31165289878845215, "learning_rate": 1.599136311145402e-05, "loss": 46.0065, "step": 5432 }, { "epoch": 0.7417571165267254, "grad_norm": 0.06537707149982452, "learning_rate": 1.5975426748854416e-05, "loss": 46.0035, "step": 5433 }, { "epoch": 0.74189364461738, "grad_norm": 0.06993164867162704, "learning_rate": 1.5959496821082905e-05, "loss": 46.0017, "step": 5434 }, { "epoch": 0.7420301727080347, "grad_norm": 0.09955190867185593, "learning_rate": 1.5943573331152227e-05, "loss": 46.0007, "step": 5435 }, { "epoch": 0.7421667007986893, "grad_norm": 0.07138502597808838, "learning_rate": 1.592765628207386e-05, "loss": 46.0059, "step": 5436 }, { "epoch": 0.742303228889344, "grad_norm": 0.062660813331604, "learning_rate": 1.5911745676858135e-05, "loss": 46.0036, "step": 5437 }, { "epoch": 0.7424397569799986, "grad_norm": 0.0448494479060173, "learning_rate": 1.5895841518514065e-05, "loss": 46.0108, "step": 5438 }, { "epoch": 0.7425762850706533, "grad_norm": 0.13105006515979767, "learning_rate": 1.587994381004949e-05, "loss": 46.0033, "step": 5439 }, { "epoch": 0.742712813161308, "grad_norm": 0.08791687339544296, "learning_rate": 1.5864052554471065e-05, "loss": 46.0047, "step": 5440 }, { "epoch": 0.7428493412519626, "grad_norm": 0.12187783420085907, "learning_rate": 1.5848167754784186e-05, "loss": 46.0033, "step": 5441 }, { "epoch": 0.7429858693426172, "grad_norm": 0.12232446670532227, "learning_rate": 1.5832289413993005e-05, "loss": 46.002, "step": 5442 }, { "epoch": 0.7431223974332719, "grad_norm": 0.038381125777959824, "learning_rate": 1.5816417535100487e-05, "loss": 46.0022, "step": 5443 }, { "epoch": 0.7432589255239266, "grad_norm": 0.08864034712314606, "learning_rate": 1.5800552121108393e-05, "loss": 46.0108, "step": 5444 }, { "epoch": 0.7433954536145811, "grad_norm": 0.11416912078857422, "learning_rate": 1.578469317501724e-05, "loss": 46.0014, "step": 5445 }, { "epoch": 0.7435319817052358, "grad_norm": 0.08968479931354523, "learning_rate": 1.5768840699826265e-05, "loss": 46.0, "step": 5446 }, { "epoch": 0.7436685097958905, "grad_norm": 0.19183535873889923, "learning_rate": 1.575299469853359e-05, "loss": 46.0112, "step": 5447 }, { "epoch": 0.7438050378865452, "grad_norm": 0.09258013963699341, "learning_rate": 1.573715517413604e-05, "loss": 46.0055, "step": 5448 }, { "epoch": 0.7439415659771998, "grad_norm": 0.1643524020910263, "learning_rate": 1.5721322129629236e-05, "loss": 46.0013, "step": 5449 }, { "epoch": 0.7440780940678544, "grad_norm": 0.10184331983327866, "learning_rate": 1.5705495568007566e-05, "loss": 46.0, "step": 5450 }, { "epoch": 0.7442146221585091, "grad_norm": 0.053965356200933456, "learning_rate": 1.5689675492264193e-05, "loss": 46.008, "step": 5451 }, { "epoch": 0.7443511502491638, "grad_norm": 0.07161077111959457, "learning_rate": 1.5673861905391067e-05, "loss": 46.0048, "step": 5452 }, { "epoch": 0.7444876783398184, "grad_norm": 0.11086741089820862, "learning_rate": 1.565805481037889e-05, "loss": 46.0016, "step": 5453 }, { "epoch": 0.7446242064304731, "grad_norm": 0.04855387285351753, "learning_rate": 1.5642254210217157e-05, "loss": 46.0038, "step": 5454 }, { "epoch": 0.7447607345211277, "grad_norm": 0.07063181698322296, "learning_rate": 1.562646010789411e-05, "loss": 46.003, "step": 5455 }, { "epoch": 0.7448972626117824, "grad_norm": 0.04436143487691879, "learning_rate": 1.5610672506396784e-05, "loss": 46.0084, "step": 5456 }, { "epoch": 0.745033790702437, "grad_norm": 0.09390319883823395, "learning_rate": 1.5594891408710982e-05, "loss": 46.0057, "step": 5457 }, { "epoch": 0.7451703187930917, "grad_norm": 0.0906984806060791, "learning_rate": 1.557911681782127e-05, "loss": 46.0007, "step": 5458 }, { "epoch": 0.7453068468837464, "grad_norm": 0.08012797683477402, "learning_rate": 1.556334873671098e-05, "loss": 46.0032, "step": 5459 }, { "epoch": 0.7454433749744009, "grad_norm": 0.09573224186897278, "learning_rate": 1.5547587168362206e-05, "loss": 46.0089, "step": 5460 }, { "epoch": 0.7455799030650556, "grad_norm": 0.09213871508836746, "learning_rate": 1.5531832115755868e-05, "loss": 46.0015, "step": 5461 }, { "epoch": 0.7457164311557103, "grad_norm": 0.09547095000743866, "learning_rate": 1.551608358187156e-05, "loss": 46.0082, "step": 5462 }, { "epoch": 0.745852959246365, "grad_norm": 0.09927356243133545, "learning_rate": 1.550034156968771e-05, "loss": 46.009, "step": 5463 }, { "epoch": 0.7459894873370196, "grad_norm": 0.1518290936946869, "learning_rate": 1.5484606082181478e-05, "loss": 46.0033, "step": 5464 }, { "epoch": 0.7461260154276742, "grad_norm": 0.11610133200883865, "learning_rate": 1.5468877122328858e-05, "loss": 46.0034, "step": 5465 }, { "epoch": 0.7462625435183289, "grad_norm": 0.0871618464589119, "learning_rate": 1.54531546931045e-05, "loss": 46.002, "step": 5466 }, { "epoch": 0.7463990716089836, "grad_norm": 0.07464397698640823, "learning_rate": 1.5437438797481885e-05, "loss": 46.0073, "step": 5467 }, { "epoch": 0.7465355996996382, "grad_norm": 0.11260885745286942, "learning_rate": 1.5421729438433274e-05, "loss": 46.0, "step": 5468 }, { "epoch": 0.7466721277902929, "grad_norm": 0.046967122703790665, "learning_rate": 1.540602661892967e-05, "loss": 46.0046, "step": 5469 }, { "epoch": 0.7468086558809475, "grad_norm": 0.14949579536914825, "learning_rate": 1.539033034194081e-05, "loss": 46.0048, "step": 5470 }, { "epoch": 0.7469451839716021, "grad_norm": 0.06460648030042648, "learning_rate": 1.5374640610435215e-05, "loss": 46.0007, "step": 5471 }, { "epoch": 0.7470817120622568, "grad_norm": 0.11784359067678452, "learning_rate": 1.535895742738021e-05, "loss": 46.0118, "step": 5472 }, { "epoch": 0.7472182401529115, "grad_norm": 0.11706537753343582, "learning_rate": 1.5343280795741837e-05, "loss": 46.0026, "step": 5473 }, { "epoch": 0.7473547682435661, "grad_norm": 0.036085180938243866, "learning_rate": 1.532761071848487e-05, "loss": 46.0051, "step": 5474 }, { "epoch": 0.7474912963342207, "grad_norm": 0.06279760599136353, "learning_rate": 1.531194719857292e-05, "loss": 46.0079, "step": 5475 }, { "epoch": 0.7476278244248754, "grad_norm": 0.09516369551420212, "learning_rate": 1.5296290238968303e-05, "loss": 46.0098, "step": 5476 }, { "epoch": 0.7477643525155301, "grad_norm": 0.08935368806123734, "learning_rate": 1.5280639842632134e-05, "loss": 46.0053, "step": 5477 }, { "epoch": 0.7479008806061848, "grad_norm": 0.037887092679739, "learning_rate": 1.52649960125242e-05, "loss": 46.008, "step": 5478 }, { "epoch": 0.7480374086968393, "grad_norm": 0.07405024021863937, "learning_rate": 1.5249358751603171e-05, "loss": 46.003, "step": 5479 }, { "epoch": 0.748173936787494, "grad_norm": 0.04815881699323654, "learning_rate": 1.5233728062826386e-05, "loss": 46.0066, "step": 5480 }, { "epoch": 0.7483104648781487, "grad_norm": 0.10593284666538239, "learning_rate": 1.521810394914997e-05, "loss": 46.0125, "step": 5481 }, { "epoch": 0.7484469929688033, "grad_norm": 0.17286375164985657, "learning_rate": 1.5202486413528806e-05, "loss": 46.0046, "step": 5482 }, { "epoch": 0.748583521059458, "grad_norm": 0.06346042454242706, "learning_rate": 1.5186875458916522e-05, "loss": 46.0043, "step": 5483 }, { "epoch": 0.7487200491501126, "grad_norm": 0.08684305101633072, "learning_rate": 1.517127108826551e-05, "loss": 46.0094, "step": 5484 }, { "epoch": 0.7488565772407673, "grad_norm": 0.047315407544374466, "learning_rate": 1.5155673304526919e-05, "loss": 46.0089, "step": 5485 }, { "epoch": 0.7489931053314219, "grad_norm": 0.06190827488899231, "learning_rate": 1.5140082110650645e-05, "loss": 46.0054, "step": 5486 }, { "epoch": 0.7491296334220766, "grad_norm": 0.11548011004924774, "learning_rate": 1.5124497509585339e-05, "loss": 46.0047, "step": 5487 }, { "epoch": 0.7492661615127313, "grad_norm": 0.09149407595396042, "learning_rate": 1.5108919504278408e-05, "loss": 46.0029, "step": 5488 }, { "epoch": 0.7494026896033859, "grad_norm": 0.04740230366587639, "learning_rate": 1.5093348097676014e-05, "loss": 46.0086, "step": 5489 }, { "epoch": 0.7495392176940405, "grad_norm": 0.05763833969831467, "learning_rate": 1.5077783292723058e-05, "loss": 46.0046, "step": 5490 }, { "epoch": 0.7496757457846952, "grad_norm": 0.10461901873350143, "learning_rate": 1.5062225092363213e-05, "loss": 46.0011, "step": 5491 }, { "epoch": 0.7498122738753499, "grad_norm": 0.13867789506912231, "learning_rate": 1.5046673499538894e-05, "loss": 46.0016, "step": 5492 }, { "epoch": 0.7499488019660046, "grad_norm": 0.21289250254631042, "learning_rate": 1.503112851719125e-05, "loss": 46.0048, "step": 5493 }, { "epoch": 0.7500853300566591, "grad_norm": 0.028058428317308426, "learning_rate": 1.5015590148260205e-05, "loss": 46.008, "step": 5494 }, { "epoch": 0.7502218581473138, "grad_norm": 0.035968244075775146, "learning_rate": 1.5000058395684418e-05, "loss": 46.0039, "step": 5495 }, { "epoch": 0.7503583862379685, "grad_norm": 0.11002418398857117, "learning_rate": 1.4984533262401285e-05, "loss": 46.0009, "step": 5496 }, { "epoch": 0.7504949143286231, "grad_norm": 0.2074373960494995, "learning_rate": 1.496901475134701e-05, "loss": 46.0055, "step": 5497 }, { "epoch": 0.7506314424192778, "grad_norm": 0.11169390380382538, "learning_rate": 1.495350286545646e-05, "loss": 46.004, "step": 5498 }, { "epoch": 0.7507679705099324, "grad_norm": 0.16610172390937805, "learning_rate": 1.493799760766328e-05, "loss": 46.0028, "step": 5499 }, { "epoch": 0.7509044986005871, "grad_norm": 0.12002262473106384, "learning_rate": 1.4922498980899907e-05, "loss": 46.0032, "step": 5500 }, { "epoch": 0.7510410266912417, "grad_norm": 0.11277088522911072, "learning_rate": 1.4907006988097483e-05, "loss": 46.0023, "step": 5501 }, { "epoch": 0.7511775547818964, "grad_norm": 0.07373197376728058, "learning_rate": 1.4891521632185873e-05, "loss": 46.0041, "step": 5502 }, { "epoch": 0.7513140828725511, "grad_norm": 0.04678313434123993, "learning_rate": 1.4876042916093708e-05, "loss": 46.0005, "step": 5503 }, { "epoch": 0.7514506109632056, "grad_norm": 0.09702575206756592, "learning_rate": 1.4860570842748412e-05, "loss": 46.0039, "step": 5504 }, { "epoch": 0.7515871390538603, "grad_norm": 0.14338792860507965, "learning_rate": 1.4845105415076093e-05, "loss": 46.0027, "step": 5505 }, { "epoch": 0.751723667144515, "grad_norm": 0.12629978358745575, "learning_rate": 1.4829646636001582e-05, "loss": 46.0017, "step": 5506 }, { "epoch": 0.7518601952351697, "grad_norm": 0.11340802162885666, "learning_rate": 1.4814194508448531e-05, "loss": 46.0056, "step": 5507 }, { "epoch": 0.7519967233258242, "grad_norm": 0.036225128918886185, "learning_rate": 1.4798749035339277e-05, "loss": 46.0034, "step": 5508 }, { "epoch": 0.7521332514164789, "grad_norm": 0.11289902031421661, "learning_rate": 1.4783310219594937e-05, "loss": 46.0033, "step": 5509 }, { "epoch": 0.7522697795071336, "grad_norm": 0.04426981508731842, "learning_rate": 1.4767878064135288e-05, "loss": 46.0016, "step": 5510 }, { "epoch": 0.7524063075977883, "grad_norm": 0.03953317552804947, "learning_rate": 1.475245257187896e-05, "loss": 46.001, "step": 5511 }, { "epoch": 0.7525428356884429, "grad_norm": 0.09013182669878006, "learning_rate": 1.4737033745743245e-05, "loss": 46.0044, "step": 5512 }, { "epoch": 0.7526793637790975, "grad_norm": 0.10082049667835236, "learning_rate": 1.4721621588644202e-05, "loss": 46.0031, "step": 5513 }, { "epoch": 0.7528158918697522, "grad_norm": 0.16157154738903046, "learning_rate": 1.4706216103496623e-05, "loss": 46.0021, "step": 5514 }, { "epoch": 0.7529524199604068, "grad_norm": 0.10698329657316208, "learning_rate": 1.4690817293214043e-05, "loss": 46.002, "step": 5515 }, { "epoch": 0.7530889480510615, "grad_norm": 0.06708524376153946, "learning_rate": 1.4675425160708722e-05, "loss": 46.0065, "step": 5516 }, { "epoch": 0.7532254761417162, "grad_norm": 0.20307935774326324, "learning_rate": 1.466003970889167e-05, "loss": 46.0049, "step": 5517 }, { "epoch": 0.7533620042323708, "grad_norm": 0.11220499128103256, "learning_rate": 1.4644660940672627e-05, "loss": 46.0053, "step": 5518 }, { "epoch": 0.7534985323230254, "grad_norm": 0.05145056173205376, "learning_rate": 1.4629288858960077e-05, "loss": 46.0037, "step": 5519 }, { "epoch": 0.7536350604136801, "grad_norm": 0.04559796303510666, "learning_rate": 1.4613923466661227e-05, "loss": 46.0056, "step": 5520 }, { "epoch": 0.7537715885043348, "grad_norm": 0.11851554363965988, "learning_rate": 1.4598564766682032e-05, "loss": 46.0078, "step": 5521 }, { "epoch": 0.7539081165949895, "grad_norm": 0.04320730268955231, "learning_rate": 1.4583212761927167e-05, "loss": 46.0022, "step": 5522 }, { "epoch": 0.754044644685644, "grad_norm": 0.14115899801254272, "learning_rate": 1.456786745530006e-05, "loss": 46.0082, "step": 5523 }, { "epoch": 0.7541811727762987, "grad_norm": 0.08957122266292572, "learning_rate": 1.4552528849702851e-05, "loss": 46.0085, "step": 5524 }, { "epoch": 0.7543177008669534, "grad_norm": 0.04198008030653, "learning_rate": 1.4537196948036425e-05, "loss": 46.0038, "step": 5525 }, { "epoch": 0.754454228957608, "grad_norm": 0.04739667475223541, "learning_rate": 1.4521871753200394e-05, "loss": 46.0028, "step": 5526 }, { "epoch": 0.7545907570482627, "grad_norm": 0.04576529935002327, "learning_rate": 1.4506553268093093e-05, "loss": 46.0082, "step": 5527 }, { "epoch": 0.7547272851389173, "grad_norm": 0.12560878694057465, "learning_rate": 1.4491241495611646e-05, "loss": 46.0034, "step": 5528 }, { "epoch": 0.754863813229572, "grad_norm": 0.20861421525478363, "learning_rate": 1.4475936438651816e-05, "loss": 46.008, "step": 5529 }, { "epoch": 0.7550003413202266, "grad_norm": 0.0406682975590229, "learning_rate": 1.4460638100108148e-05, "loss": 46.0042, "step": 5530 }, { "epoch": 0.7551368694108813, "grad_norm": 0.07173184305429459, "learning_rate": 1.4445346482873906e-05, "loss": 46.0067, "step": 5531 }, { "epoch": 0.755273397501536, "grad_norm": 0.052061211317777634, "learning_rate": 1.4430061589841121e-05, "loss": 46.0094, "step": 5532 }, { "epoch": 0.7554099255921906, "grad_norm": 0.09766284376382828, "learning_rate": 1.4414783423900474e-05, "loss": 46.0088, "step": 5533 }, { "epoch": 0.7555464536828452, "grad_norm": 0.1169370710849762, "learning_rate": 1.4399511987941416e-05, "loss": 46.0086, "step": 5534 }, { "epoch": 0.7556829817734999, "grad_norm": 0.1714611053466797, "learning_rate": 1.4384247284852165e-05, "loss": 46.0059, "step": 5535 }, { "epoch": 0.7558195098641546, "grad_norm": 0.033609021455049515, "learning_rate": 1.4368989317519615e-05, "loss": 46.0027, "step": 5536 }, { "epoch": 0.7559560379548093, "grad_norm": 0.12651553750038147, "learning_rate": 1.4353738088829377e-05, "loss": 46.0173, "step": 5537 }, { "epoch": 0.7560925660454638, "grad_norm": 0.07466648519039154, "learning_rate": 1.4338493601665803e-05, "loss": 46.0039, "step": 5538 }, { "epoch": 0.7562290941361185, "grad_norm": 0.1034608781337738, "learning_rate": 1.4323255858912011e-05, "loss": 46.0037, "step": 5539 }, { "epoch": 0.7563656222267732, "grad_norm": 0.0546271875500679, "learning_rate": 1.4308024863449804e-05, "loss": 46.0016, "step": 5540 }, { "epoch": 0.7565021503174278, "grad_norm": 0.12728621065616608, "learning_rate": 1.429280061815967e-05, "loss": 46.0039, "step": 5541 }, { "epoch": 0.7566386784080824, "grad_norm": 0.07160945981740952, "learning_rate": 1.4277583125920901e-05, "loss": 46.0025, "step": 5542 }, { "epoch": 0.7567752064987371, "grad_norm": 0.06913544237613678, "learning_rate": 1.4262372389611472e-05, "loss": 46.001, "step": 5543 }, { "epoch": 0.7569117345893918, "grad_norm": 0.09449836611747742, "learning_rate": 1.4247168412108075e-05, "loss": 46.0061, "step": 5544 }, { "epoch": 0.7570482626800464, "grad_norm": 0.14330917596817017, "learning_rate": 1.423197119628613e-05, "loss": 46.0081, "step": 5545 }, { "epoch": 0.7571847907707011, "grad_norm": 0.13240833580493927, "learning_rate": 1.4216780745019787e-05, "loss": 46.0039, "step": 5546 }, { "epoch": 0.7573213188613557, "grad_norm": 0.1957051306962967, "learning_rate": 1.4201597061181898e-05, "loss": 46.0009, "step": 5547 }, { "epoch": 0.7574578469520103, "grad_norm": 0.2716710865497589, "learning_rate": 1.4186420147644053e-05, "loss": 46.009, "step": 5548 }, { "epoch": 0.757594375042665, "grad_norm": 0.21198777854442596, "learning_rate": 1.417125000727656e-05, "loss": 46.0063, "step": 5549 }, { "epoch": 0.7577309031333197, "grad_norm": 0.11904077976942062, "learning_rate": 1.4156086642948429e-05, "loss": 46.003, "step": 5550 }, { "epoch": 0.7578674312239744, "grad_norm": 0.10085376352071762, "learning_rate": 1.4140930057527407e-05, "loss": 46.0085, "step": 5551 }, { "epoch": 0.7580039593146289, "grad_norm": 0.06283503025770187, "learning_rate": 1.412578025387995e-05, "loss": 46.0078, "step": 5552 }, { "epoch": 0.7581404874052836, "grad_norm": 0.03294713422656059, "learning_rate": 1.4110637234871238e-05, "loss": 46.0009, "step": 5553 }, { "epoch": 0.7582770154959383, "grad_norm": 0.05103800445795059, "learning_rate": 1.4095501003365164e-05, "loss": 46.0007, "step": 5554 }, { "epoch": 0.758413543586593, "grad_norm": 0.07666995376348495, "learning_rate": 1.4080371562224331e-05, "loss": 46.002, "step": 5555 }, { "epoch": 0.7585500716772476, "grad_norm": 0.040000252425670624, "learning_rate": 1.4065248914310064e-05, "loss": 46.0044, "step": 5556 }, { "epoch": 0.7586865997679022, "grad_norm": 0.08872611820697784, "learning_rate": 1.4050133062482412e-05, "loss": 46.0001, "step": 5557 }, { "epoch": 0.7588231278585569, "grad_norm": 0.037521250545978546, "learning_rate": 1.4035024009600117e-05, "loss": 46.0033, "step": 5558 }, { "epoch": 0.7589596559492116, "grad_norm": 0.057347264140844345, "learning_rate": 1.4019921758520644e-05, "loss": 46.0005, "step": 5559 }, { "epoch": 0.7590961840398662, "grad_norm": 0.11811425536870956, "learning_rate": 1.4004826312100216e-05, "loss": 46.0014, "step": 5560 }, { "epoch": 0.7592327121305209, "grad_norm": 0.07316473126411438, "learning_rate": 1.398973767319368e-05, "loss": 46.0054, "step": 5561 }, { "epoch": 0.7593692402211755, "grad_norm": 0.13004691898822784, "learning_rate": 1.3974655844654666e-05, "loss": 46.009, "step": 5562 }, { "epoch": 0.7595057683118301, "grad_norm": 0.06622076034545898, "learning_rate": 1.3959580829335472e-05, "loss": 46.0073, "step": 5563 }, { "epoch": 0.7596422964024848, "grad_norm": 0.08213396370410919, "learning_rate": 1.394451263008718e-05, "loss": 46.0076, "step": 5564 }, { "epoch": 0.7597788244931395, "grad_norm": 0.041061971336603165, "learning_rate": 1.392945124975949e-05, "loss": 46.0055, "step": 5565 }, { "epoch": 0.7599153525837942, "grad_norm": 0.0486404187977314, "learning_rate": 1.3914396691200843e-05, "loss": 46.0084, "step": 5566 }, { "epoch": 0.7600518806744487, "grad_norm": 0.07275395840406418, "learning_rate": 1.3899348957258446e-05, "loss": 46.0118, "step": 5567 }, { "epoch": 0.7601884087651034, "grad_norm": 0.08506298065185547, "learning_rate": 1.388430805077816e-05, "loss": 46.0028, "step": 5568 }, { "epoch": 0.7603249368557581, "grad_norm": 0.1104656308889389, "learning_rate": 1.3869273974604541e-05, "loss": 46.0017, "step": 5569 }, { "epoch": 0.7604614649464128, "grad_norm": 0.18311496078968048, "learning_rate": 1.3854246731580872e-05, "loss": 46.002, "step": 5570 }, { "epoch": 0.7605979930370673, "grad_norm": 0.14220935106277466, "learning_rate": 1.3839226324549192e-05, "loss": 46.0065, "step": 5571 }, { "epoch": 0.760734521127722, "grad_norm": 0.10887166112661362, "learning_rate": 1.3824212756350196e-05, "loss": 46.0062, "step": 5572 }, { "epoch": 0.7608710492183767, "grad_norm": 0.14147351682186127, "learning_rate": 1.3809206029823252e-05, "loss": 46.0032, "step": 5573 }, { "epoch": 0.7610075773090313, "grad_norm": 0.07178936153650284, "learning_rate": 1.3794206147806521e-05, "loss": 46.0057, "step": 5574 }, { "epoch": 0.761144105399686, "grad_norm": 0.07436658442020416, "learning_rate": 1.3779213113136807e-05, "loss": 46.0054, "step": 5575 }, { "epoch": 0.7612806334903406, "grad_norm": 0.08615552634000778, "learning_rate": 1.3764226928649654e-05, "loss": 46.0163, "step": 5576 }, { "epoch": 0.7614171615809953, "grad_norm": 0.16972313821315765, "learning_rate": 1.3749247597179255e-05, "loss": 46.0017, "step": 5577 }, { "epoch": 0.7615536896716499, "grad_norm": 0.09387530386447906, "learning_rate": 1.3734275121558582e-05, "loss": 46.005, "step": 5578 }, { "epoch": 0.7616902177623046, "grad_norm": 0.061109308153390884, "learning_rate": 1.371930950461926e-05, "loss": 46.0131, "step": 5579 }, { "epoch": 0.7618267458529593, "grad_norm": 0.11561349034309387, "learning_rate": 1.3704350749191642e-05, "loss": 46.004, "step": 5580 }, { "epoch": 0.7619632739436139, "grad_norm": 0.1245012879371643, "learning_rate": 1.3689398858104751e-05, "loss": 46.0033, "step": 5581 }, { "epoch": 0.7620998020342685, "grad_norm": 0.08885253220796585, "learning_rate": 1.3674453834186352e-05, "loss": 46.0052, "step": 5582 }, { "epoch": 0.7622363301249232, "grad_norm": 0.08344367891550064, "learning_rate": 1.3659515680262885e-05, "loss": 46.0004, "step": 5583 }, { "epoch": 0.7623728582155779, "grad_norm": 0.053883589804172516, "learning_rate": 1.3644584399159498e-05, "loss": 46.0014, "step": 5584 }, { "epoch": 0.7625093863062326, "grad_norm": 0.06692665815353394, "learning_rate": 1.362965999370004e-05, "loss": 46.0052, "step": 5585 }, { "epoch": 0.7626459143968871, "grad_norm": 0.22042740881443024, "learning_rate": 1.3614742466707058e-05, "loss": 46.0012, "step": 5586 }, { "epoch": 0.7627824424875418, "grad_norm": 0.11078871041536331, "learning_rate": 1.3599831821001802e-05, "loss": 46.0017, "step": 5587 }, { "epoch": 0.7629189705781965, "grad_norm": 0.055126722902059555, "learning_rate": 1.3584928059404205e-05, "loss": 46.0041, "step": 5588 }, { "epoch": 0.7630554986688511, "grad_norm": 0.06835740059614182, "learning_rate": 1.357003118473293e-05, "loss": 46.0019, "step": 5589 }, { "epoch": 0.7631920267595058, "grad_norm": 0.08007984608411789, "learning_rate": 1.3555141199805305e-05, "loss": 46.0055, "step": 5590 }, { "epoch": 0.7633285548501604, "grad_norm": 0.06130852922797203, "learning_rate": 1.3540258107437354e-05, "loss": 46.0027, "step": 5591 }, { "epoch": 0.763465082940815, "grad_norm": 0.13438451290130615, "learning_rate": 1.3525381910443862e-05, "loss": 46.0005, "step": 5592 }, { "epoch": 0.7636016110314697, "grad_norm": 0.08548463135957718, "learning_rate": 1.351051261163821e-05, "loss": 46.0022, "step": 5593 }, { "epoch": 0.7637381391221244, "grad_norm": 0.1316452920436859, "learning_rate": 1.3495650213832527e-05, "loss": 46.0, "step": 5594 }, { "epoch": 0.7638746672127791, "grad_norm": 0.19424159824848175, "learning_rate": 1.3480794719837659e-05, "loss": 46.0056, "step": 5595 }, { "epoch": 0.7640111953034336, "grad_norm": 0.04501059651374817, "learning_rate": 1.3465946132463126e-05, "loss": 46.0005, "step": 5596 }, { "epoch": 0.7641477233940883, "grad_norm": 0.12329796701669693, "learning_rate": 1.3451104454517105e-05, "loss": 46.0056, "step": 5597 }, { "epoch": 0.764284251484743, "grad_norm": 0.19247834384441376, "learning_rate": 1.3436269688806497e-05, "loss": 46.0054, "step": 5598 }, { "epoch": 0.7644207795753977, "grad_norm": 0.0961967334151268, "learning_rate": 1.3421441838136928e-05, "loss": 46.0017, "step": 5599 }, { "epoch": 0.7645573076660523, "grad_norm": 0.09641324728727341, "learning_rate": 1.3406620905312683e-05, "loss": 46.0, "step": 5600 }, { "epoch": 0.7646938357567069, "grad_norm": 0.046438440680503845, "learning_rate": 1.3391806893136699e-05, "loss": 46.0033, "step": 5601 }, { "epoch": 0.7648303638473616, "grad_norm": 0.15021951496601105, "learning_rate": 1.337699980441069e-05, "loss": 46.0013, "step": 5602 }, { "epoch": 0.7649668919380163, "grad_norm": 0.14225243031978607, "learning_rate": 1.3362199641935003e-05, "loss": 46.003, "step": 5603 }, { "epoch": 0.7651034200286709, "grad_norm": 0.07186275720596313, "learning_rate": 1.3347406408508695e-05, "loss": 46.0016, "step": 5604 }, { "epoch": 0.7652399481193255, "grad_norm": 0.04103955999016762, "learning_rate": 1.3332620106929473e-05, "loss": 46.0007, "step": 5605 }, { "epoch": 0.7653764762099802, "grad_norm": 0.08995547890663147, "learning_rate": 1.3317840739993803e-05, "loss": 46.0027, "step": 5606 }, { "epoch": 0.7655130043006348, "grad_norm": 0.12650354206562042, "learning_rate": 1.3303068310496797e-05, "loss": 46.0046, "step": 5607 }, { "epoch": 0.7656495323912895, "grad_norm": 0.1067887395620346, "learning_rate": 1.3288302821232246e-05, "loss": 46.001, "step": 5608 }, { "epoch": 0.7657860604819442, "grad_norm": 0.21279634535312653, "learning_rate": 1.3273544274992656e-05, "loss": 46.0035, "step": 5609 }, { "epoch": 0.7659225885725988, "grad_norm": 0.07982078939676285, "learning_rate": 1.3258792674569198e-05, "loss": 46.0051, "step": 5610 }, { "epoch": 0.7660591166632534, "grad_norm": 0.07457965612411499, "learning_rate": 1.3244048022751742e-05, "loss": 46.0042, "step": 5611 }, { "epoch": 0.7661956447539081, "grad_norm": 0.17062942683696747, "learning_rate": 1.3229310322328847e-05, "loss": 46.0022, "step": 5612 }, { "epoch": 0.7663321728445628, "grad_norm": 0.09794100373983383, "learning_rate": 1.3214579576087743e-05, "loss": 46.0033, "step": 5613 }, { "epoch": 0.7664687009352175, "grad_norm": 0.052369896322488785, "learning_rate": 1.3199855786814352e-05, "loss": 46.0083, "step": 5614 }, { "epoch": 0.766605229025872, "grad_norm": 0.08680252730846405, "learning_rate": 1.318513895729328e-05, "loss": 46.0046, "step": 5615 }, { "epoch": 0.7667417571165267, "grad_norm": 0.07481471449136734, "learning_rate": 1.3170429090307823e-05, "loss": 46.0084, "step": 5616 }, { "epoch": 0.7668782852071814, "grad_norm": 0.0747191533446312, "learning_rate": 1.315572618863995e-05, "loss": 46.005, "step": 5617 }, { "epoch": 0.767014813297836, "grad_norm": 0.10668200254440308, "learning_rate": 1.3141030255070318e-05, "loss": 46.008, "step": 5618 }, { "epoch": 0.7671513413884907, "grad_norm": 0.04996383190155029, "learning_rate": 1.3126341292378269e-05, "loss": 46.0046, "step": 5619 }, { "epoch": 0.7672878694791453, "grad_norm": 0.15961186587810516, "learning_rate": 1.3111659303341822e-05, "loss": 46.0081, "step": 5620 }, { "epoch": 0.7674243975698, "grad_norm": 0.11136925965547562, "learning_rate": 1.3096984290737674e-05, "loss": 46.0069, "step": 5621 }, { "epoch": 0.7675609256604546, "grad_norm": 0.1272493600845337, "learning_rate": 1.3082316257341209e-05, "loss": 46.0019, "step": 5622 }, { "epoch": 0.7676974537511093, "grad_norm": 0.08518612384796143, "learning_rate": 1.3067655205926488e-05, "loss": 46.0036, "step": 5623 }, { "epoch": 0.767833981841764, "grad_norm": 0.06118878349661827, "learning_rate": 1.3053001139266258e-05, "loss": 46.0131, "step": 5624 }, { "epoch": 0.7679705099324186, "grad_norm": 0.08676407486200333, "learning_rate": 1.3038354060131936e-05, "loss": 46.0037, "step": 5625 }, { "epoch": 0.7681070380230732, "grad_norm": 0.05196283012628555, "learning_rate": 1.30237139712936e-05, "loss": 46.0035, "step": 5626 }, { "epoch": 0.7682435661137279, "grad_norm": 0.29424750804901123, "learning_rate": 1.300908087552008e-05, "loss": 46.0088, "step": 5627 }, { "epoch": 0.7683800942043826, "grad_norm": 0.04583558812737465, "learning_rate": 1.2994454775578784e-05, "loss": 46.0036, "step": 5628 }, { "epoch": 0.7685166222950373, "grad_norm": 0.19812387228012085, "learning_rate": 1.2979835674235857e-05, "loss": 46.0034, "step": 5629 }, { "epoch": 0.7686531503856918, "grad_norm": 0.2646755278110504, "learning_rate": 1.296522357425609e-05, "loss": 46.0035, "step": 5630 }, { "epoch": 0.7687896784763465, "grad_norm": 0.04370562359690666, "learning_rate": 1.2950618478403021e-05, "loss": 46.0106, "step": 5631 }, { "epoch": 0.7689262065670012, "grad_norm": 0.03579622507095337, "learning_rate": 1.2936020389438752e-05, "loss": 46.0091, "step": 5632 }, { "epoch": 0.7690627346576558, "grad_norm": 0.07349875569343567, "learning_rate": 1.2921429310124123e-05, "loss": 46.0042, "step": 5633 }, { "epoch": 0.7691992627483105, "grad_norm": 0.07353857904672623, "learning_rate": 1.2906845243218668e-05, "loss": 46.0062, "step": 5634 }, { "epoch": 0.7693357908389651, "grad_norm": 0.059213265776634216, "learning_rate": 1.2892268191480572e-05, "loss": 46.0067, "step": 5635 }, { "epoch": 0.7694723189296198, "grad_norm": 0.11525087058544159, "learning_rate": 1.2877698157666662e-05, "loss": 46.0087, "step": 5636 }, { "epoch": 0.7696088470202744, "grad_norm": 0.10177426785230637, "learning_rate": 1.2863135144532462e-05, "loss": 46.0056, "step": 5637 }, { "epoch": 0.7697453751109291, "grad_norm": 0.06960554420948029, "learning_rate": 1.2848579154832202e-05, "loss": 46.01, "step": 5638 }, { "epoch": 0.7698819032015837, "grad_norm": 0.15230852365493774, "learning_rate": 1.2834030191318747e-05, "loss": 46.0026, "step": 5639 }, { "epoch": 0.7700184312922383, "grad_norm": 0.11944232881069183, "learning_rate": 1.2819488256743628e-05, "loss": 46.0033, "step": 5640 }, { "epoch": 0.770154959382893, "grad_norm": 0.11520679295063019, "learning_rate": 1.2804953353857068e-05, "loss": 46.0019, "step": 5641 }, { "epoch": 0.7702914874735477, "grad_norm": 0.11875377595424652, "learning_rate": 1.2790425485407947e-05, "loss": 46.0003, "step": 5642 }, { "epoch": 0.7704280155642024, "grad_norm": 0.047124169766902924, "learning_rate": 1.2775904654143823e-05, "loss": 46.0001, "step": 5643 }, { "epoch": 0.7705645436548569, "grad_norm": 0.11697347462177277, "learning_rate": 1.2761390862810907e-05, "loss": 46.0026, "step": 5644 }, { "epoch": 0.7707010717455116, "grad_norm": 0.13967040181159973, "learning_rate": 1.2746884114154106e-05, "loss": 46.0009, "step": 5645 }, { "epoch": 0.7708375998361663, "grad_norm": 0.11135353893041611, "learning_rate": 1.273238441091697e-05, "loss": 46.0053, "step": 5646 }, { "epoch": 0.770974127926821, "grad_norm": 0.04117932915687561, "learning_rate": 1.2717891755841722e-05, "loss": 46.0024, "step": 5647 }, { "epoch": 0.7711106560174756, "grad_norm": 0.48490986227989197, "learning_rate": 1.2703406151669266e-05, "loss": 46.0021, "step": 5648 }, { "epoch": 0.7712471841081302, "grad_norm": 0.14994315803050995, "learning_rate": 1.2688927601139156e-05, "loss": 46.0091, "step": 5649 }, { "epoch": 0.7713837121987849, "grad_norm": 0.7956830859184265, "learning_rate": 1.2674456106989618e-05, "loss": 46.0, "step": 5650 }, { "epoch": 0.7715202402894396, "grad_norm": 0.21616613864898682, "learning_rate": 1.265999167195755e-05, "loss": 46.0044, "step": 5651 }, { "epoch": 0.7716567683800942, "grad_norm": 0.09510566294193268, "learning_rate": 1.2645534298778505e-05, "loss": 46.0023, "step": 5652 }, { "epoch": 0.7717932964707489, "grad_norm": 0.11243732273578644, "learning_rate": 1.2631083990186698e-05, "loss": 46.0092, "step": 5653 }, { "epoch": 0.7719298245614035, "grad_norm": 0.08249913901090622, "learning_rate": 1.261664074891501e-05, "loss": 46.0016, "step": 5654 }, { "epoch": 0.7720663526520581, "grad_norm": 0.05913643166422844, "learning_rate": 1.2602204577695025e-05, "loss": 46.002, "step": 5655 }, { "epoch": 0.7722028807427128, "grad_norm": 0.051593389362096786, "learning_rate": 1.2587775479256918e-05, "loss": 46.0009, "step": 5656 }, { "epoch": 0.7723394088333675, "grad_norm": 0.09709769487380981, "learning_rate": 1.2573353456329573e-05, "loss": 46.0025, "step": 5657 }, { "epoch": 0.7724759369240222, "grad_norm": 0.16866035759449005, "learning_rate": 1.255893851164051e-05, "loss": 46.0045, "step": 5658 }, { "epoch": 0.7726124650146767, "grad_norm": 0.06055013835430145, "learning_rate": 1.2544530647915974e-05, "loss": 46.0033, "step": 5659 }, { "epoch": 0.7727489931053314, "grad_norm": 0.05415462329983711, "learning_rate": 1.253012986788078e-05, "loss": 46.0085, "step": 5660 }, { "epoch": 0.7728855211959861, "grad_norm": 0.16470696032047272, "learning_rate": 1.2515736174258441e-05, "loss": 46.0047, "step": 5661 }, { "epoch": 0.7730220492866408, "grad_norm": 0.05531174689531326, "learning_rate": 1.2501349569771165e-05, "loss": 46.0037, "step": 5662 }, { "epoch": 0.7731585773772954, "grad_norm": 0.03976884111762047, "learning_rate": 1.2486970057139791e-05, "loss": 46.0045, "step": 5663 }, { "epoch": 0.77329510546795, "grad_norm": 0.03262689337134361, "learning_rate": 1.2472597639083789e-05, "loss": 46.002, "step": 5664 }, { "epoch": 0.7734316335586047, "grad_norm": 0.07505180686712265, "learning_rate": 1.2458232318321305e-05, "loss": 46.0073, "step": 5665 }, { "epoch": 0.7735681616492593, "grad_norm": 0.08765189349651337, "learning_rate": 1.2443874097569187e-05, "loss": 46.0168, "step": 5666 }, { "epoch": 0.773704689739914, "grad_norm": 0.07255131751298904, "learning_rate": 1.2429522979542902e-05, "loss": 46.0055, "step": 5667 }, { "epoch": 0.7738412178305686, "grad_norm": 0.05851122736930847, "learning_rate": 1.2415178966956532e-05, "loss": 46.0106, "step": 5668 }, { "epoch": 0.7739777459212233, "grad_norm": 0.09017754346132278, "learning_rate": 1.24008420625229e-05, "loss": 46.0036, "step": 5669 }, { "epoch": 0.7741142740118779, "grad_norm": 0.03959783911705017, "learning_rate": 1.2386512268953437e-05, "loss": 46.0071, "step": 5670 }, { "epoch": 0.7742508021025326, "grad_norm": 0.1772712618112564, "learning_rate": 1.2372189588958249e-05, "loss": 46.0085, "step": 5671 }, { "epoch": 0.7743873301931873, "grad_norm": 0.0633809044957161, "learning_rate": 1.235787402524603e-05, "loss": 46.0066, "step": 5672 }, { "epoch": 0.7745238582838418, "grad_norm": 0.054992806166410446, "learning_rate": 1.2343565580524236e-05, "loss": 46.0093, "step": 5673 }, { "epoch": 0.7746603863744965, "grad_norm": 0.08734973520040512, "learning_rate": 1.2329264257498908e-05, "loss": 46.0056, "step": 5674 }, { "epoch": 0.7747969144651512, "grad_norm": 0.08960077911615372, "learning_rate": 1.2314970058874747e-05, "loss": 46.0026, "step": 5675 }, { "epoch": 0.7749334425558059, "grad_norm": 0.09703106433153152, "learning_rate": 1.2300682987355122e-05, "loss": 46.0083, "step": 5676 }, { "epoch": 0.7750699706464605, "grad_norm": 0.035594455897808075, "learning_rate": 1.2286403045642036e-05, "loss": 46.0053, "step": 5677 }, { "epoch": 0.7752064987371151, "grad_norm": 0.10288213193416595, "learning_rate": 1.2272130236436168e-05, "loss": 46.0072, "step": 5678 }, { "epoch": 0.7753430268277698, "grad_norm": 0.17127029597759247, "learning_rate": 1.225786456243682e-05, "loss": 46.004, "step": 5679 }, { "epoch": 0.7754795549184245, "grad_norm": 0.13961049914360046, "learning_rate": 1.2243606026341975e-05, "loss": 46.0106, "step": 5680 }, { "epoch": 0.7756160830090791, "grad_norm": 0.08722224831581116, "learning_rate": 1.2229354630848232e-05, "loss": 46.008, "step": 5681 }, { "epoch": 0.7757526110997338, "grad_norm": 0.12053588777780533, "learning_rate": 1.2215110378650868e-05, "loss": 46.0074, "step": 5682 }, { "epoch": 0.7758891391903884, "grad_norm": 0.10405191779136658, "learning_rate": 1.2200873272443792e-05, "loss": 46.0191, "step": 5683 }, { "epoch": 0.776025667281043, "grad_norm": 0.11627548187971115, "learning_rate": 1.2186643314919571e-05, "loss": 46.005, "step": 5684 }, { "epoch": 0.7761621953716977, "grad_norm": 0.047295622527599335, "learning_rate": 1.2172420508769417e-05, "loss": 46.0059, "step": 5685 }, { "epoch": 0.7762987234623524, "grad_norm": 0.11546719074249268, "learning_rate": 1.2158204856683176e-05, "loss": 46.0023, "step": 5686 }, { "epoch": 0.7764352515530071, "grad_norm": 0.06428378075361252, "learning_rate": 1.2143996361349397e-05, "loss": 46.0017, "step": 5687 }, { "epoch": 0.7765717796436616, "grad_norm": 0.06277608871459961, "learning_rate": 1.2129795025455187e-05, "loss": 46.0085, "step": 5688 }, { "epoch": 0.7767083077343163, "grad_norm": 0.16683776676654816, "learning_rate": 1.211560085168636e-05, "loss": 46.0038, "step": 5689 }, { "epoch": 0.776844835824971, "grad_norm": 0.051197346299886703, "learning_rate": 1.2101413842727345e-05, "loss": 46.0042, "step": 5690 }, { "epoch": 0.7769813639156257, "grad_norm": 0.044428762048482895, "learning_rate": 1.2087234001261278e-05, "loss": 46.0072, "step": 5691 }, { "epoch": 0.7771178920062803, "grad_norm": 0.1320255994796753, "learning_rate": 1.2073061329969843e-05, "loss": 46.0004, "step": 5692 }, { "epoch": 0.7772544200969349, "grad_norm": 0.11414042860269547, "learning_rate": 1.2058895831533429e-05, "loss": 46.0054, "step": 5693 }, { "epoch": 0.7773909481875896, "grad_norm": 0.04836287721991539, "learning_rate": 1.204473750863107e-05, "loss": 46.0017, "step": 5694 }, { "epoch": 0.7775274762782443, "grad_norm": 0.15028896927833557, "learning_rate": 1.2030586363940443e-05, "loss": 46.0018, "step": 5695 }, { "epoch": 0.7776640043688989, "grad_norm": 0.05919278785586357, "learning_rate": 1.2016442400137823e-05, "loss": 46.0029, "step": 5696 }, { "epoch": 0.7778005324595536, "grad_norm": 0.2607901990413666, "learning_rate": 1.2002305619898153e-05, "loss": 46.0049, "step": 5697 }, { "epoch": 0.7779370605502082, "grad_norm": 0.1793382465839386, "learning_rate": 1.1988176025895058e-05, "loss": 46.0065, "step": 5698 }, { "epoch": 0.7780735886408628, "grad_norm": 0.11992447078227997, "learning_rate": 1.1974053620800763e-05, "loss": 46.0038, "step": 5699 }, { "epoch": 0.7782101167315175, "grad_norm": 0.34471654891967773, "learning_rate": 1.1959938407286097e-05, "loss": 46.0, "step": 5700 }, { "epoch": 0.7783466448221722, "grad_norm": 0.1594083160161972, "learning_rate": 1.1945830388020623e-05, "loss": 46.0172, "step": 5701 }, { "epoch": 0.7784831729128268, "grad_norm": 0.0855049267411232, "learning_rate": 1.1931729565672462e-05, "loss": 46.0071, "step": 5702 }, { "epoch": 0.7786197010034814, "grad_norm": 0.051178380846977234, "learning_rate": 1.1917635942908429e-05, "loss": 46.0007, "step": 5703 }, { "epoch": 0.7787562290941361, "grad_norm": 0.0828336849808693, "learning_rate": 1.1903549522393909e-05, "loss": 46.0021, "step": 5704 }, { "epoch": 0.7788927571847908, "grad_norm": 0.04268272966146469, "learning_rate": 1.1889470306792999e-05, "loss": 46.0021, "step": 5705 }, { "epoch": 0.7790292852754455, "grad_norm": 0.038221221417188644, "learning_rate": 1.1875398298768398e-05, "loss": 46.0026, "step": 5706 }, { "epoch": 0.7791658133661, "grad_norm": 0.04043285548686981, "learning_rate": 1.1861333500981448e-05, "loss": 46.0025, "step": 5707 }, { "epoch": 0.7793023414567547, "grad_norm": 0.07267355918884277, "learning_rate": 1.1847275916092115e-05, "loss": 46.0056, "step": 5708 }, { "epoch": 0.7794388695474094, "grad_norm": 0.12419770658016205, "learning_rate": 1.183322554675902e-05, "loss": 46.0033, "step": 5709 }, { "epoch": 0.779575397638064, "grad_norm": 0.11191888898611069, "learning_rate": 1.1819182395639406e-05, "loss": 46.0047, "step": 5710 }, { "epoch": 0.7797119257287187, "grad_norm": 0.09443730860948563, "learning_rate": 1.1805146465389155e-05, "loss": 46.0018, "step": 5711 }, { "epoch": 0.7798484538193733, "grad_norm": 0.03915949910879135, "learning_rate": 1.1791117758662784e-05, "loss": 46.0005, "step": 5712 }, { "epoch": 0.779984981910028, "grad_norm": 0.08650901913642883, "learning_rate": 1.1777096278113448e-05, "loss": 46.0089, "step": 5713 }, { "epoch": 0.7801215100006826, "grad_norm": 0.0925716981291771, "learning_rate": 1.176308202639293e-05, "loss": 46.0073, "step": 5714 }, { "epoch": 0.7802580380913373, "grad_norm": 0.12096685916185379, "learning_rate": 1.1749075006151638e-05, "loss": 46.0116, "step": 5715 }, { "epoch": 0.780394566181992, "grad_norm": 0.09972070902585983, "learning_rate": 1.1735075220038633e-05, "loss": 46.0071, "step": 5716 }, { "epoch": 0.7805310942726466, "grad_norm": 0.058127839118242264, "learning_rate": 1.1721082670701588e-05, "loss": 46.0028, "step": 5717 }, { "epoch": 0.7806676223633012, "grad_norm": 0.07738945633172989, "learning_rate": 1.1707097360786818e-05, "loss": 46.0027, "step": 5718 }, { "epoch": 0.7808041504539559, "grad_norm": 0.085357666015625, "learning_rate": 1.1693119292939269e-05, "loss": 46.0, "step": 5719 }, { "epoch": 0.7809406785446106, "grad_norm": 0.0663960799574852, "learning_rate": 1.1679148469802509e-05, "loss": 46.0051, "step": 5720 }, { "epoch": 0.7810772066352653, "grad_norm": 0.1951136291027069, "learning_rate": 1.1665184894018733e-05, "loss": 46.0083, "step": 5721 }, { "epoch": 0.7812137347259198, "grad_norm": 0.13752393424510956, "learning_rate": 1.1651228568228811e-05, "loss": 46.0063, "step": 5722 }, { "epoch": 0.7813502628165745, "grad_norm": 0.12629210948944092, "learning_rate": 1.1637279495072167e-05, "loss": 46.0085, "step": 5723 }, { "epoch": 0.7814867909072292, "grad_norm": 0.0805964469909668, "learning_rate": 1.1623337677186902e-05, "loss": 46.0055, "step": 5724 }, { "epoch": 0.7816233189978838, "grad_norm": 0.0852535143494606, "learning_rate": 1.1609403117209721e-05, "loss": 46.0077, "step": 5725 }, { "epoch": 0.7817598470885385, "grad_norm": 0.1421537846326828, "learning_rate": 1.1595475817776008e-05, "loss": 46.0034, "step": 5726 }, { "epoch": 0.7818963751791931, "grad_norm": 0.07367920875549316, "learning_rate": 1.1581555781519704e-05, "loss": 46.006, "step": 5727 }, { "epoch": 0.7820329032698478, "grad_norm": 0.16967613995075226, "learning_rate": 1.1567643011073392e-05, "loss": 46.0039, "step": 5728 }, { "epoch": 0.7821694313605024, "grad_norm": 0.12262607365846634, "learning_rate": 1.1553737509068335e-05, "loss": 46.003, "step": 5729 }, { "epoch": 0.7823059594511571, "grad_norm": 0.05117976665496826, "learning_rate": 1.1539839278134384e-05, "loss": 46.0015, "step": 5730 }, { "epoch": 0.7824424875418118, "grad_norm": 0.09732077270746231, "learning_rate": 1.1525948320899976e-05, "loss": 46.0098, "step": 5731 }, { "epoch": 0.7825790156324663, "grad_norm": 0.15296554565429688, "learning_rate": 1.151206463999222e-05, "loss": 46.0052, "step": 5732 }, { "epoch": 0.782715543723121, "grad_norm": 0.1309371441602707, "learning_rate": 1.1498188238036861e-05, "loss": 46.0042, "step": 5733 }, { "epoch": 0.7828520718137757, "grad_norm": 0.13778959214687347, "learning_rate": 1.1484319117658233e-05, "loss": 46.0044, "step": 5734 }, { "epoch": 0.7829885999044304, "grad_norm": 0.07867218554019928, "learning_rate": 1.1470457281479301e-05, "loss": 46.0016, "step": 5735 }, { "epoch": 0.7831251279950849, "grad_norm": 0.2469724714756012, "learning_rate": 1.1456602732121663e-05, "loss": 46.0116, "step": 5736 }, { "epoch": 0.7832616560857396, "grad_norm": 0.12267932295799255, "learning_rate": 1.144275547220553e-05, "loss": 46.0087, "step": 5737 }, { "epoch": 0.7833981841763943, "grad_norm": 0.055693648755550385, "learning_rate": 1.1428915504349735e-05, "loss": 46.0019, "step": 5738 }, { "epoch": 0.783534712267049, "grad_norm": 0.1112055853009224, "learning_rate": 1.1415082831171736e-05, "loss": 46.0022, "step": 5739 }, { "epoch": 0.7836712403577036, "grad_norm": 0.04429011419415474, "learning_rate": 1.1401257455287612e-05, "loss": 46.0025, "step": 5740 }, { "epoch": 0.7838077684483582, "grad_norm": 0.036667849868535995, "learning_rate": 1.1387439379312054e-05, "loss": 46.0001, "step": 5741 }, { "epoch": 0.7839442965390129, "grad_norm": 0.0718018189072609, "learning_rate": 1.1373628605858378e-05, "loss": 46.0032, "step": 5742 }, { "epoch": 0.7840808246296675, "grad_norm": 0.18336980044841766, "learning_rate": 1.135982513753852e-05, "loss": 46.0024, "step": 5743 }, { "epoch": 0.7842173527203222, "grad_norm": 0.1481619030237198, "learning_rate": 1.134602897696303e-05, "loss": 46.0044, "step": 5744 }, { "epoch": 0.7843538808109769, "grad_norm": 0.25043347477912903, "learning_rate": 1.1332240126741083e-05, "loss": 46.0059, "step": 5745 }, { "epoch": 0.7844904089016315, "grad_norm": 0.1453932821750641, "learning_rate": 1.1318458589480469e-05, "loss": 46.0029, "step": 5746 }, { "epoch": 0.7846269369922861, "grad_norm": 0.06856336444616318, "learning_rate": 1.1304684367787582e-05, "loss": 46.0053, "step": 5747 }, { "epoch": 0.7847634650829408, "grad_norm": 0.24149984121322632, "learning_rate": 1.1290917464267458e-05, "loss": 46.0022, "step": 5748 }, { "epoch": 0.7848999931735955, "grad_norm": 0.18614135682582855, "learning_rate": 1.127715788152372e-05, "loss": 46.0034, "step": 5749 }, { "epoch": 0.7850365212642502, "grad_norm": 0.11879680305719376, "learning_rate": 1.1263405622158636e-05, "loss": 46.0, "step": 5750 }, { "epoch": 0.7851730493549047, "grad_norm": 0.058967359364032745, "learning_rate": 1.1249660688773061e-05, "loss": 46.011, "step": 5751 }, { "epoch": 0.7853095774455594, "grad_norm": 0.06784307956695557, "learning_rate": 1.1235923083966488e-05, "loss": 46.0107, "step": 5752 }, { "epoch": 0.7854461055362141, "grad_norm": 0.03961225599050522, "learning_rate": 1.1222192810336995e-05, "loss": 46.0026, "step": 5753 }, { "epoch": 0.7855826336268688, "grad_norm": 0.03252869099378586, "learning_rate": 1.1208469870481331e-05, "loss": 46.0, "step": 5754 }, { "epoch": 0.7857191617175234, "grad_norm": 0.033174820244312286, "learning_rate": 1.1194754266994777e-05, "loss": 46.0, "step": 5755 }, { "epoch": 0.785855689808178, "grad_norm": 0.06005732715129852, "learning_rate": 1.118104600247129e-05, "loss": 46.0057, "step": 5756 }, { "epoch": 0.7859922178988327, "grad_norm": 0.12169983237981796, "learning_rate": 1.1167345079503399e-05, "loss": 46.001, "step": 5757 }, { "epoch": 0.7861287459894873, "grad_norm": 0.07105310261249542, "learning_rate": 1.1153651500682306e-05, "loss": 46.0026, "step": 5758 }, { "epoch": 0.786265274080142, "grad_norm": 0.06846984475851059, "learning_rate": 1.1139965268597735e-05, "loss": 46.0057, "step": 5759 }, { "epoch": 0.7864018021707967, "grad_norm": 0.0478234626352787, "learning_rate": 1.1126286385838075e-05, "loss": 46.0035, "step": 5760 }, { "epoch": 0.7865383302614513, "grad_norm": 0.06804048269987106, "learning_rate": 1.1112614854990338e-05, "loss": 46.008, "step": 5761 }, { "epoch": 0.7866748583521059, "grad_norm": 0.057323481887578964, "learning_rate": 1.1098950678640124e-05, "loss": 46.0135, "step": 5762 }, { "epoch": 0.7868113864427606, "grad_norm": 0.04270123690366745, "learning_rate": 1.108529385937162e-05, "loss": 46.0018, "step": 5763 }, { "epoch": 0.7869479145334153, "grad_norm": 0.08276030421257019, "learning_rate": 1.107164439976764e-05, "loss": 46.0035, "step": 5764 }, { "epoch": 0.78708444262407, "grad_norm": 0.10893051326274872, "learning_rate": 1.1058002302409636e-05, "loss": 46.0114, "step": 5765 }, { "epoch": 0.7872209707147245, "grad_norm": 0.05165398493409157, "learning_rate": 1.104436756987765e-05, "loss": 46.0111, "step": 5766 }, { "epoch": 0.7873574988053792, "grad_norm": 0.07170470803976059, "learning_rate": 1.1030740204750278e-05, "loss": 46.0035, "step": 5767 }, { "epoch": 0.7874940268960339, "grad_norm": 0.07336630672216415, "learning_rate": 1.1017120209604803e-05, "loss": 46.0013, "step": 5768 }, { "epoch": 0.7876305549866885, "grad_norm": 0.07153099030256271, "learning_rate": 1.100350758701707e-05, "loss": 46.006, "step": 5769 }, { "epoch": 0.7877670830773431, "grad_norm": 0.032103732228279114, "learning_rate": 1.0989902339561553e-05, "loss": 46.0043, "step": 5770 }, { "epoch": 0.7879036111679978, "grad_norm": 0.12614946067333221, "learning_rate": 1.0976304469811277e-05, "loss": 46.0036, "step": 5771 }, { "epoch": 0.7880401392586525, "grad_norm": 0.1028219535946846, "learning_rate": 1.0962713980337947e-05, "loss": 46.0031, "step": 5772 }, { "epoch": 0.7881766673493071, "grad_norm": 0.12103339284658432, "learning_rate": 1.0949130873711827e-05, "loss": 46.008, "step": 5773 }, { "epoch": 0.7883131954399618, "grad_norm": 0.09402978420257568, "learning_rate": 1.0935555152501797e-05, "loss": 46.0126, "step": 5774 }, { "epoch": 0.7884497235306164, "grad_norm": 0.07511723041534424, "learning_rate": 1.0921986819275327e-05, "loss": 46.0028, "step": 5775 }, { "epoch": 0.788586251621271, "grad_norm": 0.07545360922813416, "learning_rate": 1.090842587659851e-05, "loss": 46.0069, "step": 5776 }, { "epoch": 0.7887227797119257, "grad_norm": 0.09917052835226059, "learning_rate": 1.0894872327036033e-05, "loss": 46.0097, "step": 5777 }, { "epoch": 0.7888593078025804, "grad_norm": 0.11971193552017212, "learning_rate": 1.088132617315118e-05, "loss": 46.0027, "step": 5778 }, { "epoch": 0.7889958358932351, "grad_norm": 0.06205608695745468, "learning_rate": 1.0867787417505832e-05, "loss": 46.0038, "step": 5779 }, { "epoch": 0.7891323639838896, "grad_norm": 0.12837688624858856, "learning_rate": 1.0854256062660489e-05, "loss": 46.0019, "step": 5780 }, { "epoch": 0.7892688920745443, "grad_norm": 0.11311503499746323, "learning_rate": 1.0840732111174228e-05, "loss": 46.0115, "step": 5781 }, { "epoch": 0.789405420165199, "grad_norm": 0.07282049208879471, "learning_rate": 1.0827215565604776e-05, "loss": 46.006, "step": 5782 }, { "epoch": 0.7895419482558537, "grad_norm": 0.08604851365089417, "learning_rate": 1.0813706428508375e-05, "loss": 46.0024, "step": 5783 }, { "epoch": 0.7896784763465083, "grad_norm": 0.1633615493774414, "learning_rate": 1.0800204702439937e-05, "loss": 46.004, "step": 5784 }, { "epoch": 0.7898150044371629, "grad_norm": 0.054695989936590195, "learning_rate": 1.078671038995293e-05, "loss": 46.0092, "step": 5785 }, { "epoch": 0.7899515325278176, "grad_norm": 0.08423034101724625, "learning_rate": 1.0773223493599482e-05, "loss": 46.0088, "step": 5786 }, { "epoch": 0.7900880606184723, "grad_norm": 0.1360296607017517, "learning_rate": 1.0759744015930228e-05, "loss": 46.0047, "step": 5787 }, { "epoch": 0.7902245887091269, "grad_norm": 0.1510058045387268, "learning_rate": 1.0746271959494453e-05, "loss": 46.0035, "step": 5788 }, { "epoch": 0.7903611167997816, "grad_norm": 0.03499859571456909, "learning_rate": 1.0732807326840056e-05, "loss": 46.0009, "step": 5789 }, { "epoch": 0.7904976448904362, "grad_norm": 0.06073112413287163, "learning_rate": 1.0719350120513511e-05, "loss": 46.0047, "step": 5790 }, { "epoch": 0.7906341729810908, "grad_norm": 0.09183137118816376, "learning_rate": 1.0705900343059855e-05, "loss": 46.0061, "step": 5791 }, { "epoch": 0.7907707010717455, "grad_norm": 0.08523363620042801, "learning_rate": 1.0692457997022748e-05, "loss": 46.0047, "step": 5792 }, { "epoch": 0.7909072291624002, "grad_norm": 0.11317682266235352, "learning_rate": 1.0679023084944479e-05, "loss": 46.0078, "step": 5793 }, { "epoch": 0.7910437572530549, "grad_norm": 0.07528141885995865, "learning_rate": 1.0665595609365892e-05, "loss": 46.0002, "step": 5794 }, { "epoch": 0.7911802853437094, "grad_norm": 0.1228296160697937, "learning_rate": 1.0652175572826394e-05, "loss": 46.002, "step": 5795 }, { "epoch": 0.7913168134343641, "grad_norm": 0.16216251254081726, "learning_rate": 1.0638762977864069e-05, "loss": 46.0038, "step": 5796 }, { "epoch": 0.7914533415250188, "grad_norm": 0.04984297603368759, "learning_rate": 1.062535782701552e-05, "loss": 46.0007, "step": 5797 }, { "epoch": 0.7915898696156735, "grad_norm": 0.3681827187538147, "learning_rate": 1.0611960122815994e-05, "loss": 46.0045, "step": 5798 }, { "epoch": 0.791726397706328, "grad_norm": 0.3101103603839874, "learning_rate": 1.0598569867799257e-05, "loss": 46.0045, "step": 5799 }, { "epoch": 0.7918629257969827, "grad_norm": 0.12757830321788788, "learning_rate": 1.0585187064497753e-05, "loss": 46.0, "step": 5800 }, { "epoch": 0.7919994538876374, "grad_norm": 0.26632383465766907, "learning_rate": 1.057181171544247e-05, "loss": 46.0045, "step": 5801 }, { "epoch": 0.792135981978292, "grad_norm": 0.04872816428542137, "learning_rate": 1.055844382316299e-05, "loss": 46.006, "step": 5802 }, { "epoch": 0.7922725100689467, "grad_norm": 0.06447499245405197, "learning_rate": 1.0545083390187488e-05, "loss": 46.0021, "step": 5803 }, { "epoch": 0.7924090381596013, "grad_norm": 0.03252457454800606, "learning_rate": 1.0531730419042735e-05, "loss": 46.0035, "step": 5804 }, { "epoch": 0.792545566250256, "grad_norm": 0.042077988386154175, "learning_rate": 1.0518384912254076e-05, "loss": 46.0005, "step": 5805 }, { "epoch": 0.7926820943409106, "grad_norm": 0.3534030616283417, "learning_rate": 1.0505046872345454e-05, "loss": 46.0027, "step": 5806 }, { "epoch": 0.7928186224315653, "grad_norm": 0.09426290541887283, "learning_rate": 1.04917163018394e-05, "loss": 46.0067, "step": 5807 }, { "epoch": 0.79295515052222, "grad_norm": 0.0804297924041748, "learning_rate": 1.0478393203257037e-05, "loss": 46.0019, "step": 5808 }, { "epoch": 0.7930916786128746, "grad_norm": 0.14284199476242065, "learning_rate": 1.046507757911806e-05, "loss": 46.0016, "step": 5809 }, { "epoch": 0.7932282067035292, "grad_norm": 0.13482679426670074, "learning_rate": 1.045176943194076e-05, "loss": 46.0027, "step": 5810 }, { "epoch": 0.7933647347941839, "grad_norm": 0.09341976791620255, "learning_rate": 1.0438468764242022e-05, "loss": 46.0032, "step": 5811 }, { "epoch": 0.7935012628848386, "grad_norm": 0.0517374724149704, "learning_rate": 1.0425175578537299e-05, "loss": 46.0087, "step": 5812 }, { "epoch": 0.7936377909754933, "grad_norm": 0.049808647483587265, "learning_rate": 1.0411889877340641e-05, "loss": 46.0017, "step": 5813 }, { "epoch": 0.7937743190661478, "grad_norm": 0.11142638325691223, "learning_rate": 1.039861166316468e-05, "loss": 46.0078, "step": 5814 }, { "epoch": 0.7939108471568025, "grad_norm": 0.07447032630443573, "learning_rate": 1.0385340938520627e-05, "loss": 46.0064, "step": 5815 }, { "epoch": 0.7940473752474572, "grad_norm": 0.0623822920024395, "learning_rate": 1.0372077705918287e-05, "loss": 46.0041, "step": 5816 }, { "epoch": 0.7941839033381118, "grad_norm": 0.14779381453990936, "learning_rate": 1.0358821967866033e-05, "loss": 46.0039, "step": 5817 }, { "epoch": 0.7943204314287665, "grad_norm": 0.05942235887050629, "learning_rate": 1.0345573726870843e-05, "loss": 46.002, "step": 5818 }, { "epoch": 0.7944569595194211, "grad_norm": 0.08322162181138992, "learning_rate": 1.0332332985438248e-05, "loss": 46.0024, "step": 5819 }, { "epoch": 0.7945934876100758, "grad_norm": 0.08986788988113403, "learning_rate": 1.0319099746072374e-05, "loss": 46.0104, "step": 5820 }, { "epoch": 0.7947300157007304, "grad_norm": 0.09268641471862793, "learning_rate": 1.0305874011275968e-05, "loss": 46.004, "step": 5821 }, { "epoch": 0.7948665437913851, "grad_norm": 0.06824496388435364, "learning_rate": 1.0292655783550275e-05, "loss": 46.0036, "step": 5822 }, { "epoch": 0.7950030718820398, "grad_norm": 0.048043400049209595, "learning_rate": 1.0279445065395187e-05, "loss": 46.0012, "step": 5823 }, { "epoch": 0.7951395999726943, "grad_norm": 0.08589378744363785, "learning_rate": 1.0266241859309134e-05, "loss": 46.0022, "step": 5824 }, { "epoch": 0.795276128063349, "grad_norm": 0.08791028708219528, "learning_rate": 1.0253046167789193e-05, "loss": 46.0036, "step": 5825 }, { "epoch": 0.7954126561540037, "grad_norm": 0.15372999012470245, "learning_rate": 1.023985799333092e-05, "loss": 46.0016, "step": 5826 }, { "epoch": 0.7955491842446584, "grad_norm": 0.06991811841726303, "learning_rate": 1.0226677338428514e-05, "loss": 46.0062, "step": 5827 }, { "epoch": 0.795685712335313, "grad_norm": 0.13604678213596344, "learning_rate": 1.0213504205574758e-05, "loss": 46.0066, "step": 5828 }, { "epoch": 0.7958222404259676, "grad_norm": 0.09337177127599716, "learning_rate": 1.0200338597260988e-05, "loss": 46.003, "step": 5829 }, { "epoch": 0.7959587685166223, "grad_norm": 0.2529038190841675, "learning_rate": 1.018718051597713e-05, "loss": 46.0052, "step": 5830 }, { "epoch": 0.796095296607277, "grad_norm": 0.11357038468122482, "learning_rate": 1.017402996421164e-05, "loss": 46.0025, "step": 5831 }, { "epoch": 0.7962318246979316, "grad_norm": 0.07177452743053436, "learning_rate": 1.016088694445163e-05, "loss": 46.0071, "step": 5832 }, { "epoch": 0.7963683527885862, "grad_norm": 0.1164173036813736, "learning_rate": 1.0147751459182736e-05, "loss": 46.0033, "step": 5833 }, { "epoch": 0.7965048808792409, "grad_norm": 0.042638082057237625, "learning_rate": 1.013462351088918e-05, "loss": 46.0024, "step": 5834 }, { "epoch": 0.7966414089698955, "grad_norm": 0.15164819359779358, "learning_rate": 1.0121503102053753e-05, "loss": 46.0041, "step": 5835 }, { "epoch": 0.7967779370605502, "grad_norm": 0.07500284165143967, "learning_rate": 1.0108390235157828e-05, "loss": 46.0031, "step": 5836 }, { "epoch": 0.7969144651512049, "grad_norm": 0.13623467087745667, "learning_rate": 1.0095284912681353e-05, "loss": 46.0098, "step": 5837 }, { "epoch": 0.7970509932418595, "grad_norm": 0.04425523430109024, "learning_rate": 1.0082187137102844e-05, "loss": 46.0034, "step": 5838 }, { "epoch": 0.7971875213325141, "grad_norm": 0.030956588685512543, "learning_rate": 1.006909691089939e-05, "loss": 46.0079, "step": 5839 }, { "epoch": 0.7973240494231688, "grad_norm": 0.08078370243310928, "learning_rate": 1.0056014236546646e-05, "loss": 46.011, "step": 5840 }, { "epoch": 0.7974605775138235, "grad_norm": 0.10914067178964615, "learning_rate": 1.0042939116518862e-05, "loss": 46.0101, "step": 5841 }, { "epoch": 0.7975971056044782, "grad_norm": 0.12075760960578918, "learning_rate": 1.0029871553288827e-05, "loss": 46.0068, "step": 5842 }, { "epoch": 0.7977336336951327, "grad_norm": 0.11396639794111252, "learning_rate": 1.0016811549327931e-05, "loss": 46.0, "step": 5843 }, { "epoch": 0.7978701617857874, "grad_norm": 0.2142723947763443, "learning_rate": 1.0003759107106114e-05, "loss": 46.002, "step": 5844 }, { "epoch": 0.7980066898764421, "grad_norm": 0.07956168055534363, "learning_rate": 9.990714229091896e-06, "loss": 46.006, "step": 5845 }, { "epoch": 0.7981432179670968, "grad_norm": 0.22335563600063324, "learning_rate": 9.977676917752354e-06, "loss": 46.0074, "step": 5846 }, { "epoch": 0.7982797460577514, "grad_norm": 0.07547445595264435, "learning_rate": 9.964647175553155e-06, "loss": 46.0062, "step": 5847 }, { "epoch": 0.798416274148406, "grad_norm": 0.12065991014242172, "learning_rate": 9.951625004958498e-06, "loss": 46.0, "step": 5848 }, { "epoch": 0.7985528022390607, "grad_norm": 0.04918050020933151, "learning_rate": 9.938610408431226e-06, "loss": 46.0, "step": 5849 }, { "epoch": 0.7986893303297153, "grad_norm": 0.0713077038526535, "learning_rate": 9.925603388432647e-06, "loss": 46.002, "step": 5850 }, { "epoch": 0.79882585842037, "grad_norm": 0.09158943593502045, "learning_rate": 9.912603947422705e-06, "loss": 46.0088, "step": 5851 }, { "epoch": 0.7989623865110247, "grad_norm": 0.09967497736215591, "learning_rate": 9.899612087859884e-06, "loss": 46.0004, "step": 5852 }, { "epoch": 0.7990989146016793, "grad_norm": 0.08443131297826767, "learning_rate": 9.886627812201277e-06, "loss": 46.0064, "step": 5853 }, { "epoch": 0.7992354426923339, "grad_norm": 0.13616536557674408, "learning_rate": 9.873651122902472e-06, "loss": 46.004, "step": 5854 }, { "epoch": 0.7993719707829886, "grad_norm": 0.07256169617176056, "learning_rate": 9.860682022417655e-06, "loss": 46.0082, "step": 5855 }, { "epoch": 0.7995084988736433, "grad_norm": 0.08552873879671097, "learning_rate": 9.84772051319961e-06, "loss": 46.0013, "step": 5856 }, { "epoch": 0.799645026964298, "grad_norm": 0.07092173397541046, "learning_rate": 9.834766597699658e-06, "loss": 46.0031, "step": 5857 }, { "epoch": 0.7997815550549525, "grad_norm": 0.06685712188482285, "learning_rate": 9.82182027836765e-06, "loss": 46.0019, "step": 5858 }, { "epoch": 0.7999180831456072, "grad_norm": 0.030255571007728577, "learning_rate": 9.808881557652038e-06, "loss": 46.0029, "step": 5859 }, { "epoch": 0.8000546112362619, "grad_norm": 0.06789059191942215, "learning_rate": 9.795950437999852e-06, "loss": 46.0028, "step": 5860 }, { "epoch": 0.8001911393269165, "grad_norm": 0.05537659302353859, "learning_rate": 9.783026921856669e-06, "loss": 46.0022, "step": 5861 }, { "epoch": 0.8003276674175712, "grad_norm": 0.08121687918901443, "learning_rate": 9.770111011666583e-06, "loss": 46.0034, "step": 5862 }, { "epoch": 0.8004641955082258, "grad_norm": 0.11652178317308426, "learning_rate": 9.757202709872327e-06, "loss": 46.0079, "step": 5863 }, { "epoch": 0.8006007235988805, "grad_norm": 0.1466701477766037, "learning_rate": 9.744302018915142e-06, "loss": 46.0126, "step": 5864 }, { "epoch": 0.8007372516895351, "grad_norm": 0.2904626131057739, "learning_rate": 9.731408941234854e-06, "loss": 46.0085, "step": 5865 }, { "epoch": 0.8008737797801898, "grad_norm": 0.04651442915201187, "learning_rate": 9.718523479269815e-06, "loss": 46.0045, "step": 5866 }, { "epoch": 0.8010103078708444, "grad_norm": 0.08006881177425385, "learning_rate": 9.705645635456989e-06, "loss": 46.0061, "step": 5867 }, { "epoch": 0.801146835961499, "grad_norm": 0.11526630073785782, "learning_rate": 9.692775412231863e-06, "loss": 46.0063, "step": 5868 }, { "epoch": 0.8012833640521537, "grad_norm": 0.05932728946208954, "learning_rate": 9.67991281202849e-06, "loss": 46.0005, "step": 5869 }, { "epoch": 0.8014198921428084, "grad_norm": 0.073017917573452, "learning_rate": 9.667057837279487e-06, "loss": 46.0078, "step": 5870 }, { "epoch": 0.8015564202334631, "grad_norm": 0.058755967766046524, "learning_rate": 9.65421049041602e-06, "loss": 46.0018, "step": 5871 }, { "epoch": 0.8016929483241176, "grad_norm": 0.07644613087177277, "learning_rate": 9.641370773867819e-06, "loss": 46.0064, "step": 5872 }, { "epoch": 0.8018294764147723, "grad_norm": 0.035758789628744125, "learning_rate": 9.628538690063171e-06, "loss": 46.006, "step": 5873 }, { "epoch": 0.801966004505427, "grad_norm": 0.08525457233190536, "learning_rate": 9.615714241428918e-06, "loss": 46.0024, "step": 5874 }, { "epoch": 0.8021025325960817, "grad_norm": 0.10871477425098419, "learning_rate": 9.602897430390457e-06, "loss": 46.0009, "step": 5875 }, { "epoch": 0.8022390606867363, "grad_norm": 0.08078046888113022, "learning_rate": 9.590088259371738e-06, "loss": 46.004, "step": 5876 }, { "epoch": 0.8023755887773909, "grad_norm": 0.09142289310693741, "learning_rate": 9.57728673079527e-06, "loss": 46.0037, "step": 5877 }, { "epoch": 0.8025121168680456, "grad_norm": 0.07498084008693695, "learning_rate": 9.564492847082119e-06, "loss": 46.0066, "step": 5878 }, { "epoch": 0.8026486449587003, "grad_norm": 0.12365581840276718, "learning_rate": 9.5517066106519e-06, "loss": 46.0049, "step": 5879 }, { "epoch": 0.8027851730493549, "grad_norm": 0.10755892097949982, "learning_rate": 9.538928023922778e-06, "loss": 46.0055, "step": 5880 }, { "epoch": 0.8029217011400096, "grad_norm": 0.041219186037778854, "learning_rate": 9.526157089311504e-06, "loss": 46.004, "step": 5881 }, { "epoch": 0.8030582292306642, "grad_norm": 0.0401761420071125, "learning_rate": 9.513393809233323e-06, "loss": 46.0066, "step": 5882 }, { "epoch": 0.8031947573213188, "grad_norm": 0.10216628015041351, "learning_rate": 9.500638186102079e-06, "loss": 46.0048, "step": 5883 }, { "epoch": 0.8033312854119735, "grad_norm": 0.06926777213811874, "learning_rate": 9.487890222330137e-06, "loss": 46.0024, "step": 5884 }, { "epoch": 0.8034678135026282, "grad_norm": 0.055062901228666306, "learning_rate": 9.47514992032847e-06, "loss": 46.0102, "step": 5885 }, { "epoch": 0.8036043415932829, "grad_norm": 0.11969775706529617, "learning_rate": 9.462417282506519e-06, "loss": 46.0034, "step": 5886 }, { "epoch": 0.8037408696839374, "grad_norm": 0.05663412809371948, "learning_rate": 9.449692311272323e-06, "loss": 46.0038, "step": 5887 }, { "epoch": 0.8038773977745921, "grad_norm": 0.09353141486644745, "learning_rate": 9.43697500903249e-06, "loss": 46.0034, "step": 5888 }, { "epoch": 0.8040139258652468, "grad_norm": 0.060910433530807495, "learning_rate": 9.424265378192154e-06, "loss": 46.0049, "step": 5889 }, { "epoch": 0.8041504539559015, "grad_norm": 0.12023376673460007, "learning_rate": 9.411563421154978e-06, "loss": 46.003, "step": 5890 }, { "epoch": 0.8042869820465561, "grad_norm": 0.17763890326023102, "learning_rate": 9.398869140323186e-06, "loss": 46.0043, "step": 5891 }, { "epoch": 0.8044235101372107, "grad_norm": 0.034410443156957626, "learning_rate": 9.386182538097582e-06, "loss": 46.0068, "step": 5892 }, { "epoch": 0.8045600382278654, "grad_norm": 0.1533651351928711, "learning_rate": 9.3735036168775e-06, "loss": 46.0014, "step": 5893 }, { "epoch": 0.80469656631852, "grad_norm": 0.13996151089668274, "learning_rate": 9.360832379060774e-06, "loss": 46.0033, "step": 5894 }, { "epoch": 0.8048330944091747, "grad_norm": 0.19542676210403442, "learning_rate": 9.348168827043868e-06, "loss": 46.0011, "step": 5895 }, { "epoch": 0.8049696224998293, "grad_norm": 0.06674681603908539, "learning_rate": 9.335512963221732e-06, "loss": 46.0062, "step": 5896 }, { "epoch": 0.805106150590484, "grad_norm": 0.11416524648666382, "learning_rate": 9.322864789987896e-06, "loss": 46.001, "step": 5897 }, { "epoch": 0.8052426786811386, "grad_norm": 0.17729733884334564, "learning_rate": 9.310224309734384e-06, "loss": 46.0044, "step": 5898 }, { "epoch": 0.8053792067717933, "grad_norm": 0.19683051109313965, "learning_rate": 9.297591524851834e-06, "loss": 46.0032, "step": 5899 }, { "epoch": 0.805515734862448, "grad_norm": 0.11876920610666275, "learning_rate": 9.284966437729387e-06, "loss": 46.0, "step": 5900 }, { "epoch": 0.8056522629531025, "grad_norm": 0.09236893057823181, "learning_rate": 9.272349050754736e-06, "loss": 46.0078, "step": 5901 }, { "epoch": 0.8057887910437572, "grad_norm": 0.05658535659313202, "learning_rate": 9.259739366314123e-06, "loss": 46.0007, "step": 5902 }, { "epoch": 0.8059253191344119, "grad_norm": 0.04136950150132179, "learning_rate": 9.247137386792321e-06, "loss": 46.0041, "step": 5903 }, { "epoch": 0.8060618472250666, "grad_norm": 0.24273079633712769, "learning_rate": 9.234543114572652e-06, "loss": 46.0031, "step": 5904 }, { "epoch": 0.8061983753157212, "grad_norm": 0.07003217935562134, "learning_rate": 9.221956552036992e-06, "loss": 46.0033, "step": 5905 }, { "epoch": 0.8063349034063758, "grad_norm": 0.03829824551939964, "learning_rate": 9.209377701565747e-06, "loss": 46.0006, "step": 5906 }, { "epoch": 0.8064714314970305, "grad_norm": 0.09543004631996155, "learning_rate": 9.196806565537864e-06, "loss": 46.0021, "step": 5907 }, { "epoch": 0.8066079595876852, "grad_norm": 0.09481719881296158, "learning_rate": 9.184243146330829e-06, "loss": 46.0013, "step": 5908 }, { "epoch": 0.8067444876783398, "grad_norm": 0.07018060237169266, "learning_rate": 9.171687446320676e-06, "loss": 46.0026, "step": 5909 }, { "epoch": 0.8068810157689945, "grad_norm": 0.04712304845452309, "learning_rate": 9.159139467881978e-06, "loss": 46.0035, "step": 5910 }, { "epoch": 0.8070175438596491, "grad_norm": 0.04333058372139931, "learning_rate": 9.146599213387846e-06, "loss": 46.0105, "step": 5911 }, { "epoch": 0.8071540719503038, "grad_norm": 0.055260069668293, "learning_rate": 9.13406668520993e-06, "loss": 46.0065, "step": 5912 }, { "epoch": 0.8072906000409584, "grad_norm": 0.14302298426628113, "learning_rate": 9.12154188571842e-06, "loss": 46.0094, "step": 5913 }, { "epoch": 0.8074271281316131, "grad_norm": 0.035834625363349915, "learning_rate": 9.109024817282041e-06, "loss": 46.0054, "step": 5914 }, { "epoch": 0.8075636562222678, "grad_norm": 0.07218681275844574, "learning_rate": 9.096515482268047e-06, "loss": 46.007, "step": 5915 }, { "epoch": 0.8077001843129223, "grad_norm": 0.03238914906978607, "learning_rate": 9.084013883042275e-06, "loss": 46.0109, "step": 5916 }, { "epoch": 0.807836712403577, "grad_norm": 0.09234192222356796, "learning_rate": 9.071520021969027e-06, "loss": 46.006, "step": 5917 }, { "epoch": 0.8079732404942317, "grad_norm": 0.13992471992969513, "learning_rate": 9.059033901411196e-06, "loss": 46.0064, "step": 5918 }, { "epoch": 0.8081097685848864, "grad_norm": 0.10941995680332184, "learning_rate": 9.046555523730178e-06, "loss": 46.0072, "step": 5919 }, { "epoch": 0.808246296675541, "grad_norm": 0.044685233384370804, "learning_rate": 9.03408489128596e-06, "loss": 46.0007, "step": 5920 }, { "epoch": 0.8083828247661956, "grad_norm": 0.08290278166532516, "learning_rate": 9.021622006436987e-06, "loss": 46.0083, "step": 5921 }, { "epoch": 0.8085193528568503, "grad_norm": 0.0790628120303154, "learning_rate": 9.00916687154028e-06, "loss": 46.0063, "step": 5922 }, { "epoch": 0.808655880947505, "grad_norm": 0.13341034948825836, "learning_rate": 8.99671948895141e-06, "loss": 46.0045, "step": 5923 }, { "epoch": 0.8087924090381596, "grad_norm": 0.07326343655586243, "learning_rate": 8.984279861024453e-06, "loss": 46.0123, "step": 5924 }, { "epoch": 0.8089289371288143, "grad_norm": 0.1404658854007721, "learning_rate": 8.971847990112036e-06, "loss": 46.0039, "step": 5925 }, { "epoch": 0.8090654652194689, "grad_norm": 0.11832907050848007, "learning_rate": 8.959423878565287e-06, "loss": 46.0014, "step": 5926 }, { "epoch": 0.8092019933101235, "grad_norm": 0.22275836765766144, "learning_rate": 8.947007528733914e-06, "loss": 46.0066, "step": 5927 }, { "epoch": 0.8093385214007782, "grad_norm": 0.18691939115524292, "learning_rate": 8.934598942966127e-06, "loss": 46.0013, "step": 5928 }, { "epoch": 0.8094750494914329, "grad_norm": 0.18901090323925018, "learning_rate": 8.922198123608677e-06, "loss": 46.0029, "step": 5929 }, { "epoch": 0.8096115775820875, "grad_norm": 0.07463503628969193, "learning_rate": 8.909805073006843e-06, "loss": 46.0061, "step": 5930 }, { "epoch": 0.8097481056727421, "grad_norm": 0.063026562333107, "learning_rate": 8.89741979350443e-06, "loss": 46.0021, "step": 5931 }, { "epoch": 0.8098846337633968, "grad_norm": 0.07125533372163773, "learning_rate": 8.885042287443784e-06, "loss": 46.0057, "step": 5932 }, { "epoch": 0.8100211618540515, "grad_norm": 0.07546109706163406, "learning_rate": 8.872672557165778e-06, "loss": 46.0019, "step": 5933 }, { "epoch": 0.8101576899447062, "grad_norm": 0.07451112568378448, "learning_rate": 8.860310605009803e-06, "loss": 46.0038, "step": 5934 }, { "epoch": 0.8102942180353607, "grad_norm": 0.08210200071334839, "learning_rate": 8.847956433313797e-06, "loss": 46.0021, "step": 5935 }, { "epoch": 0.8104307461260154, "grad_norm": 0.08025896549224854, "learning_rate": 8.835610044414211e-06, "loss": 46.0009, "step": 5936 }, { "epoch": 0.8105672742166701, "grad_norm": 0.13270145654678345, "learning_rate": 8.823271440646036e-06, "loss": 46.0029, "step": 5937 }, { "epoch": 0.8107038023073248, "grad_norm": 0.07072658091783524, "learning_rate": 8.810940624342785e-06, "loss": 46.0067, "step": 5938 }, { "epoch": 0.8108403303979794, "grad_norm": 0.041666921228170395, "learning_rate": 8.79861759783649e-06, "loss": 46.0044, "step": 5939 }, { "epoch": 0.810976858488634, "grad_norm": 0.02914901077747345, "learning_rate": 8.786302363457733e-06, "loss": 46.004, "step": 5940 }, { "epoch": 0.8111133865792887, "grad_norm": 0.08993687480688095, "learning_rate": 8.773994923535589e-06, "loss": 46.003, "step": 5941 }, { "epoch": 0.8112499146699433, "grad_norm": 0.15100060403347015, "learning_rate": 8.761695280397697e-06, "loss": 46.0013, "step": 5942 }, { "epoch": 0.811386442760598, "grad_norm": 0.19519123435020447, "learning_rate": 8.749403436370185e-06, "loss": 46.0048, "step": 5943 }, { "epoch": 0.8115229708512527, "grad_norm": 0.06684621423482895, "learning_rate": 8.737119393777738e-06, "loss": 46.0026, "step": 5944 }, { "epoch": 0.8116594989419073, "grad_norm": 0.07332012802362442, "learning_rate": 8.72484315494354e-06, "loss": 46.0047, "step": 5945 }, { "epoch": 0.8117960270325619, "grad_norm": 0.11932935565710068, "learning_rate": 8.712574722189314e-06, "loss": 46.0003, "step": 5946 }, { "epoch": 0.8119325551232166, "grad_norm": 0.07714877277612686, "learning_rate": 8.700314097835289e-06, "loss": 46.0053, "step": 5947 }, { "epoch": 0.8120690832138713, "grad_norm": 0.05920381471514702, "learning_rate": 8.688061284200266e-06, "loss": 46.0037, "step": 5948 }, { "epoch": 0.812205611304526, "grad_norm": 0.12333227694034576, "learning_rate": 8.675816283601496e-06, "loss": 46.0025, "step": 5949 }, { "epoch": 0.8123421393951805, "grad_norm": 0.09516225010156631, "learning_rate": 8.663579098354807e-06, "loss": 46.0033, "step": 5950 }, { "epoch": 0.8124786674858352, "grad_norm": 0.05783263221383095, "learning_rate": 8.651349730774516e-06, "loss": 46.0049, "step": 5951 }, { "epoch": 0.8126151955764899, "grad_norm": 0.036463383585214615, "learning_rate": 8.639128183173518e-06, "loss": 46.0037, "step": 5952 }, { "epoch": 0.8127517236671445, "grad_norm": 0.05300349369645119, "learning_rate": 8.626914457863145e-06, "loss": 46.0084, "step": 5953 }, { "epoch": 0.8128882517577992, "grad_norm": 0.0941852256655693, "learning_rate": 8.614708557153295e-06, "loss": 46.0064, "step": 5954 }, { "epoch": 0.8130247798484538, "grad_norm": 0.05898924916982651, "learning_rate": 8.60251048335241e-06, "loss": 46.004, "step": 5955 }, { "epoch": 0.8131613079391085, "grad_norm": 0.1458372324705124, "learning_rate": 8.590320238767423e-06, "loss": 46.0005, "step": 5956 }, { "epoch": 0.8132978360297631, "grad_norm": 0.2047327160835266, "learning_rate": 8.578137825703775e-06, "loss": 46.0002, "step": 5957 }, { "epoch": 0.8134343641204178, "grad_norm": 0.1339966356754303, "learning_rate": 8.565963246465425e-06, "loss": 46.0055, "step": 5958 }, { "epoch": 0.8135708922110725, "grad_norm": 0.048188742250204086, "learning_rate": 8.553796503354899e-06, "loss": 46.0044, "step": 5959 }, { "epoch": 0.813707420301727, "grad_norm": 0.03245240077376366, "learning_rate": 8.541637598673208e-06, "loss": 46.0048, "step": 5960 }, { "epoch": 0.8138439483923817, "grad_norm": 0.08267378062009811, "learning_rate": 8.529486534719838e-06, "loss": 46.0065, "step": 5961 }, { "epoch": 0.8139804764830364, "grad_norm": 0.07600201666355133, "learning_rate": 8.517343313792881e-06, "loss": 46.002, "step": 5962 }, { "epoch": 0.8141170045736911, "grad_norm": 0.20869910717010498, "learning_rate": 8.50520793818887e-06, "loss": 46.003, "step": 5963 }, { "epoch": 0.8142535326643456, "grad_norm": 0.07419902086257935, "learning_rate": 8.493080410202913e-06, "loss": 46.0104, "step": 5964 }, { "epoch": 0.8143900607550003, "grad_norm": 0.04914389178156853, "learning_rate": 8.480960732128557e-06, "loss": 46.006, "step": 5965 }, { "epoch": 0.814526588845655, "grad_norm": 0.1032484695315361, "learning_rate": 8.468848906257947e-06, "loss": 46.0016, "step": 5966 }, { "epoch": 0.8146631169363097, "grad_norm": 0.06239796057343483, "learning_rate": 8.456744934881706e-06, "loss": 46.007, "step": 5967 }, { "epoch": 0.8147996450269643, "grad_norm": 0.09587502479553223, "learning_rate": 8.444648820288958e-06, "loss": 46.0013, "step": 5968 }, { "epoch": 0.8149361731176189, "grad_norm": 0.13233664631843567, "learning_rate": 8.432560564767366e-06, "loss": 46.003, "step": 5969 }, { "epoch": 0.8150727012082736, "grad_norm": 0.18131539225578308, "learning_rate": 8.420480170603102e-06, "loss": 46.0066, "step": 5970 }, { "epoch": 0.8152092292989283, "grad_norm": 0.050610244274139404, "learning_rate": 8.40840764008083e-06, "loss": 46.0017, "step": 5971 }, { "epoch": 0.8153457573895829, "grad_norm": 0.11452195793390274, "learning_rate": 8.396342975483752e-06, "loss": 46.0036, "step": 5972 }, { "epoch": 0.8154822854802376, "grad_norm": 0.18434710800647736, "learning_rate": 8.384286179093575e-06, "loss": 46.0035, "step": 5973 }, { "epoch": 0.8156188135708922, "grad_norm": 0.10748566687107086, "learning_rate": 8.372237253190512e-06, "loss": 46.0069, "step": 5974 }, { "epoch": 0.8157553416615468, "grad_norm": 0.036327384412288666, "learning_rate": 8.360196200053282e-06, "loss": 46.002, "step": 5975 }, { "epoch": 0.8158918697522015, "grad_norm": 0.06790892034769058, "learning_rate": 8.34816302195916e-06, "loss": 46.0046, "step": 5976 }, { "epoch": 0.8160283978428562, "grad_norm": 0.10653574019670486, "learning_rate": 8.336137721183856e-06, "loss": 46.0084, "step": 5977 }, { "epoch": 0.8161649259335109, "grad_norm": 0.08631613850593567, "learning_rate": 8.324120300001648e-06, "loss": 46.003, "step": 5978 }, { "epoch": 0.8163014540241654, "grad_norm": 0.06537650525569916, "learning_rate": 8.312110760685293e-06, "loss": 46.0073, "step": 5979 }, { "epoch": 0.8164379821148201, "grad_norm": 0.03579774871468544, "learning_rate": 8.30010910550611e-06, "loss": 46.0, "step": 5980 }, { "epoch": 0.8165745102054748, "grad_norm": 0.04948611184954643, "learning_rate": 8.288115336733842e-06, "loss": 46.004, "step": 5981 }, { "epoch": 0.8167110382961295, "grad_norm": 0.08255138248205185, "learning_rate": 8.2761294566368e-06, "loss": 46.0016, "step": 5982 }, { "epoch": 0.8168475663867841, "grad_norm": 0.0994102731347084, "learning_rate": 8.2641514674818e-06, "loss": 46.01, "step": 5983 }, { "epoch": 0.8169840944774387, "grad_norm": 0.12283330410718918, "learning_rate": 8.252181371534157e-06, "loss": 46.008, "step": 5984 }, { "epoch": 0.8171206225680934, "grad_norm": 0.1769472360610962, "learning_rate": 8.240219171057678e-06, "loss": 46.0011, "step": 5985 }, { "epoch": 0.817257150658748, "grad_norm": 0.11141671240329742, "learning_rate": 8.228264868314672e-06, "loss": 46.0023, "step": 5986 }, { "epoch": 0.8173936787494027, "grad_norm": 0.07510065287351608, "learning_rate": 8.216318465566014e-06, "loss": 46.0033, "step": 5987 }, { "epoch": 0.8175302068400574, "grad_norm": 0.1655244678258896, "learning_rate": 8.204379965071035e-06, "loss": 46.0047, "step": 5988 }, { "epoch": 0.817666734930712, "grad_norm": 0.18520191311836243, "learning_rate": 8.19244936908754e-06, "loss": 46.0008, "step": 5989 }, { "epoch": 0.8178032630213666, "grad_norm": 0.14335767924785614, "learning_rate": 8.180526679871919e-06, "loss": 46.0037, "step": 5990 }, { "epoch": 0.8179397911120213, "grad_norm": 0.13842709362506866, "learning_rate": 8.168611899679013e-06, "loss": 46.0032, "step": 5991 }, { "epoch": 0.818076319202676, "grad_norm": 0.10836932808160782, "learning_rate": 8.156705030762197e-06, "loss": 46.0069, "step": 5992 }, { "epoch": 0.8182128472933305, "grad_norm": 0.05301300436258316, "learning_rate": 8.14480607537329e-06, "loss": 46.0011, "step": 5993 }, { "epoch": 0.8183493753839852, "grad_norm": 0.0419105589389801, "learning_rate": 8.132915035762696e-06, "loss": 46.0037, "step": 5994 }, { "epoch": 0.8184859034746399, "grad_norm": 0.08563525229692459, "learning_rate": 8.121031914179272e-06, "loss": 46.0057, "step": 5995 }, { "epoch": 0.8186224315652946, "grad_norm": 0.2169228196144104, "learning_rate": 8.109156712870397e-06, "loss": 46.0035, "step": 5996 }, { "epoch": 0.8187589596559492, "grad_norm": 0.08241445571184158, "learning_rate": 8.097289434081933e-06, "loss": 46.005, "step": 5997 }, { "epoch": 0.8188954877466038, "grad_norm": 0.20509202778339386, "learning_rate": 8.08543008005826e-06, "loss": 46.0037, "step": 5998 }, { "epoch": 0.8190320158372585, "grad_norm": 0.18722376227378845, "learning_rate": 8.073578653042251e-06, "loss": 46.0017, "step": 5999 }, { "epoch": 0.8191685439279132, "grad_norm": 0.10572522133588791, "learning_rate": 8.061735155275285e-06, "loss": 46.0052, "step": 6000 }, { "epoch": 0.8193050720185678, "grad_norm": 0.10645358264446259, "learning_rate": 8.049899588997244e-06, "loss": 46.0148, "step": 6001 }, { "epoch": 0.8194416001092225, "grad_norm": 0.0982646495103836, "learning_rate": 8.038071956446496e-06, "loss": 46.0014, "step": 6002 }, { "epoch": 0.8195781281998771, "grad_norm": 0.12638995051383972, "learning_rate": 8.026252259859928e-06, "loss": 46.0017, "step": 6003 }, { "epoch": 0.8197146562905318, "grad_norm": 0.03399622067809105, "learning_rate": 8.014440501472909e-06, "loss": 46.0058, "step": 6004 }, { "epoch": 0.8198511843811864, "grad_norm": 0.12535589933395386, "learning_rate": 8.002636683519315e-06, "loss": 46.0004, "step": 6005 }, { "epoch": 0.8199877124718411, "grad_norm": 0.09858422726392746, "learning_rate": 7.99084080823152e-06, "loss": 46.0033, "step": 6006 }, { "epoch": 0.8201242405624958, "grad_norm": 0.049560099840164185, "learning_rate": 7.979052877840398e-06, "loss": 46.0018, "step": 6007 }, { "epoch": 0.8202607686531503, "grad_norm": 0.08653762191534042, "learning_rate": 7.967272894575312e-06, "loss": 46.0025, "step": 6008 }, { "epoch": 0.820397296743805, "grad_norm": 0.12859395146369934, "learning_rate": 7.955500860664129e-06, "loss": 46.0063, "step": 6009 }, { "epoch": 0.8205338248344597, "grad_norm": 0.0611502006649971, "learning_rate": 7.94373677833321e-06, "loss": 46.0077, "step": 6010 }, { "epoch": 0.8206703529251144, "grad_norm": 0.08904537558555603, "learning_rate": 7.931980649807419e-06, "loss": 46.0072, "step": 6011 }, { "epoch": 0.820806881015769, "grad_norm": 0.05709671974182129, "learning_rate": 7.920232477310103e-06, "loss": 46.001, "step": 6012 }, { "epoch": 0.8209434091064236, "grad_norm": 0.05768035724759102, "learning_rate": 7.908492263063111e-06, "loss": 46.0025, "step": 6013 }, { "epoch": 0.8210799371970783, "grad_norm": 0.18711970746517181, "learning_rate": 7.89676000928678e-06, "loss": 46.0048, "step": 6014 }, { "epoch": 0.821216465287733, "grad_norm": 0.07377524673938751, "learning_rate": 7.885035718199984e-06, "loss": 46.0057, "step": 6015 }, { "epoch": 0.8213529933783876, "grad_norm": 0.034605689346790314, "learning_rate": 7.873319392020013e-06, "loss": 46.0081, "step": 6016 }, { "epoch": 0.8214895214690423, "grad_norm": 0.03718679025769234, "learning_rate": 7.861611032962712e-06, "loss": 46.0073, "step": 6017 }, { "epoch": 0.8216260495596969, "grad_norm": 0.08924690634012222, "learning_rate": 7.849910643242392e-06, "loss": 46.0043, "step": 6018 }, { "epoch": 0.8217625776503515, "grad_norm": 0.1583210676908493, "learning_rate": 7.83821822507188e-06, "loss": 46.0065, "step": 6019 }, { "epoch": 0.8218991057410062, "grad_norm": 0.1282387226819992, "learning_rate": 7.826533780662482e-06, "loss": 46.0088, "step": 6020 }, { "epoch": 0.8220356338316609, "grad_norm": 0.04746085777878761, "learning_rate": 7.814857312223967e-06, "loss": 46.0087, "step": 6021 }, { "epoch": 0.8221721619223156, "grad_norm": 0.13691987097263336, "learning_rate": 7.803188821964652e-06, "loss": 46.0104, "step": 6022 }, { "epoch": 0.8223086900129701, "grad_norm": 0.0932530090212822, "learning_rate": 7.791528312091307e-06, "loss": 46.0045, "step": 6023 }, { "epoch": 0.8224452181036248, "grad_norm": 0.14227940142154694, "learning_rate": 7.77987578480921e-06, "loss": 46.0033, "step": 6024 }, { "epoch": 0.8225817461942795, "grad_norm": 0.03251093626022339, "learning_rate": 7.768231242322094e-06, "loss": 46.0053, "step": 6025 }, { "epoch": 0.8227182742849342, "grad_norm": 0.17647387087345123, "learning_rate": 7.756594686832235e-06, "loss": 46.0102, "step": 6026 }, { "epoch": 0.8228548023755887, "grad_norm": 0.05886781960725784, "learning_rate": 7.744966120540376e-06, "loss": 46.0021, "step": 6027 }, { "epoch": 0.8229913304662434, "grad_norm": 0.06378333270549774, "learning_rate": 7.733345545645727e-06, "loss": 46.0058, "step": 6028 }, { "epoch": 0.8231278585568981, "grad_norm": 0.08237350732088089, "learning_rate": 7.721732964346018e-06, "loss": 46.0083, "step": 6029 }, { "epoch": 0.8232643866475527, "grad_norm": 0.07293424755334854, "learning_rate": 7.71012837883745e-06, "loss": 46.0054, "step": 6030 }, { "epoch": 0.8234009147382074, "grad_norm": 0.13019222021102905, "learning_rate": 7.698531791314723e-06, "loss": 46.009, "step": 6031 }, { "epoch": 0.823537442828862, "grad_norm": 0.12359003722667694, "learning_rate": 7.686943203971009e-06, "loss": 46.0011, "step": 6032 }, { "epoch": 0.8236739709195167, "grad_norm": 0.11732473224401474, "learning_rate": 7.675362618997983e-06, "loss": 46.0151, "step": 6033 }, { "epoch": 0.8238104990101713, "grad_norm": 0.07434398680925369, "learning_rate": 7.663790038585793e-06, "loss": 46.0073, "step": 6034 }, { "epoch": 0.823947027100826, "grad_norm": 0.059117913246154785, "learning_rate": 7.652225464923086e-06, "loss": 46.0027, "step": 6035 }, { "epoch": 0.8240835551914807, "grad_norm": 0.09749983996152878, "learning_rate": 7.640668900196984e-06, "loss": 46.004, "step": 6036 }, { "epoch": 0.8242200832821353, "grad_norm": 0.06091352924704552, "learning_rate": 7.629120346593105e-06, "loss": 46.0037, "step": 6037 }, { "epoch": 0.8243566113727899, "grad_norm": 0.151980921626091, "learning_rate": 7.617579806295539e-06, "loss": 46.004, "step": 6038 }, { "epoch": 0.8244931394634446, "grad_norm": 0.15078437328338623, "learning_rate": 7.6060472814868665e-06, "loss": 46.0073, "step": 6039 }, { "epoch": 0.8246296675540993, "grad_norm": 0.05726735293865204, "learning_rate": 7.5945227743481616e-06, "loss": 46.0076, "step": 6040 }, { "epoch": 0.824766195644754, "grad_norm": 0.0581265352666378, "learning_rate": 7.583006287058963e-06, "loss": 46.0051, "step": 6041 }, { "epoch": 0.8249027237354085, "grad_norm": 0.08179526031017303, "learning_rate": 7.571497821797297e-06, "loss": 46.0031, "step": 6042 }, { "epoch": 0.8250392518260632, "grad_norm": 0.28460294008255005, "learning_rate": 7.559997380739714e-06, "loss": 46.0034, "step": 6043 }, { "epoch": 0.8251757799167179, "grad_norm": 0.061760131269693375, "learning_rate": 7.5485049660611704e-06, "loss": 46.0019, "step": 6044 }, { "epoch": 0.8253123080073725, "grad_norm": 0.13007239997386932, "learning_rate": 7.537020579935167e-06, "loss": 46.002, "step": 6045 }, { "epoch": 0.8254488360980272, "grad_norm": 0.32157158851623535, "learning_rate": 7.525544224533642e-06, "loss": 46.0051, "step": 6046 }, { "epoch": 0.8255853641886818, "grad_norm": 0.11048310995101929, "learning_rate": 7.5140759020270845e-06, "loss": 46.0007, "step": 6047 }, { "epoch": 0.8257218922793365, "grad_norm": 0.10676388442516327, "learning_rate": 7.502615614584374e-06, "loss": 46.0029, "step": 6048 }, { "epoch": 0.8258584203699911, "grad_norm": 0.556442141532898, "learning_rate": 7.4911633643729215e-06, "loss": 46.0074, "step": 6049 }, { "epoch": 0.8259949484606458, "grad_norm": 0.09750233590602875, "learning_rate": 7.479719153558623e-06, "loss": 46.0024, "step": 6050 }, { "epoch": 0.8261314765513005, "grad_norm": 0.22688940167427063, "learning_rate": 7.468282984305852e-06, "loss": 46.02, "step": 6051 }, { "epoch": 0.826268004641955, "grad_norm": 0.05792313069105148, "learning_rate": 7.456854858777418e-06, "loss": 46.0002, "step": 6052 }, { "epoch": 0.8264045327326097, "grad_norm": 0.0689968690276146, "learning_rate": 7.445434779134653e-06, "loss": 46.0001, "step": 6053 }, { "epoch": 0.8265410608232644, "grad_norm": 0.04637899622321129, "learning_rate": 7.434022747537373e-06, "loss": 46.0074, "step": 6054 }, { "epoch": 0.8266775889139191, "grad_norm": 0.03404096141457558, "learning_rate": 7.422618766143852e-06, "loss": 46.0039, "step": 6055 }, { "epoch": 0.8268141170045737, "grad_norm": 0.05015493929386139, "learning_rate": 7.411222837110821e-06, "loss": 46.0059, "step": 6056 }, { "epoch": 0.8269506450952283, "grad_norm": 0.10109277069568634, "learning_rate": 7.399834962593532e-06, "loss": 46.003, "step": 6057 }, { "epoch": 0.827087173185883, "grad_norm": 0.036067619919776917, "learning_rate": 7.388455144745693e-06, "loss": 46.0041, "step": 6058 }, { "epoch": 0.8272237012765377, "grad_norm": 0.05580616742372513, "learning_rate": 7.377083385719496e-06, "loss": 46.0071, "step": 6059 }, { "epoch": 0.8273602293671923, "grad_norm": 0.12154435366392136, "learning_rate": 7.3657196876655676e-06, "loss": 46.0045, "step": 6060 }, { "epoch": 0.8274967574578469, "grad_norm": 0.06478314846754074, "learning_rate": 7.3543640527330815e-06, "loss": 46.0047, "step": 6061 }, { "epoch": 0.8276332855485016, "grad_norm": 0.1426214873790741, "learning_rate": 7.343016483069637e-06, "loss": 46.0048, "step": 6062 }, { "epoch": 0.8277698136391562, "grad_norm": 0.03235672414302826, "learning_rate": 7.331676980821317e-06, "loss": 46.0122, "step": 6063 }, { "epoch": 0.8279063417298109, "grad_norm": 0.14168477058410645, "learning_rate": 7.320345548132679e-06, "loss": 46.0089, "step": 6064 }, { "epoch": 0.8280428698204656, "grad_norm": 0.061205435544252396, "learning_rate": 7.309022187146764e-06, "loss": 46.0031, "step": 6065 }, { "epoch": 0.8281793979111202, "grad_norm": 0.08335790783166885, "learning_rate": 7.29770690000508e-06, "loss": 46.0037, "step": 6066 }, { "epoch": 0.8283159260017748, "grad_norm": 0.2071860134601593, "learning_rate": 7.2863996888476e-06, "loss": 46.0031, "step": 6067 }, { "epoch": 0.8284524540924295, "grad_norm": 0.12686733901500702, "learning_rate": 7.27510055581278e-06, "loss": 46.0011, "step": 6068 }, { "epoch": 0.8285889821830842, "grad_norm": 0.048894528299570084, "learning_rate": 7.26380950303755e-06, "loss": 46.0016, "step": 6069 }, { "epoch": 0.8287255102737389, "grad_norm": 0.028289075940847397, "learning_rate": 7.252526532657306e-06, "loss": 46.0091, "step": 6070 }, { "epoch": 0.8288620383643934, "grad_norm": 0.08121158182621002, "learning_rate": 7.2412516468059135e-06, "loss": 46.0073, "step": 6071 }, { "epoch": 0.8289985664550481, "grad_norm": 0.22146326303482056, "learning_rate": 7.2299848476157176e-06, "loss": 46.0061, "step": 6072 }, { "epoch": 0.8291350945457028, "grad_norm": 0.1996936947107315, "learning_rate": 7.218726137217518e-06, "loss": 46.0076, "step": 6073 }, { "epoch": 0.8292716226363575, "grad_norm": 0.1407512128353119, "learning_rate": 7.207475517740597e-06, "loss": 46.0061, "step": 6074 }, { "epoch": 0.8294081507270121, "grad_norm": 0.06108040735125542, "learning_rate": 7.1962329913127345e-06, "loss": 46.0047, "step": 6075 }, { "epoch": 0.8295446788176667, "grad_norm": 0.07648029923439026, "learning_rate": 7.184998560060114e-06, "loss": 46.0049, "step": 6076 }, { "epoch": 0.8296812069083214, "grad_norm": 0.07422847300767899, "learning_rate": 7.173772226107434e-06, "loss": 46.0027, "step": 6077 }, { "epoch": 0.829817734998976, "grad_norm": 0.12291015684604645, "learning_rate": 7.162553991577847e-06, "loss": 46.0048, "step": 6078 }, { "epoch": 0.8299542630896307, "grad_norm": 0.03870971128344536, "learning_rate": 7.151343858593007e-06, "loss": 46.0112, "step": 6079 }, { "epoch": 0.8300907911802854, "grad_norm": 0.11775518953800201, "learning_rate": 7.140141829272978e-06, "loss": 46.0052, "step": 6080 }, { "epoch": 0.83022731927094, "grad_norm": 0.03470349311828613, "learning_rate": 7.128947905736322e-06, "loss": 46.0077, "step": 6081 }, { "epoch": 0.8303638473615946, "grad_norm": 0.12610211968421936, "learning_rate": 7.117762090100083e-06, "loss": 46.0101, "step": 6082 }, { "epoch": 0.8305003754522493, "grad_norm": 0.0489531047642231, "learning_rate": 7.1065843844797645e-06, "loss": 46.0073, "step": 6083 }, { "epoch": 0.830636903542904, "grad_norm": 0.08801731467247009, "learning_rate": 7.095414790989291e-06, "loss": 46.0061, "step": 6084 }, { "epoch": 0.8307734316335587, "grad_norm": 0.06951206922531128, "learning_rate": 7.084253311741101e-06, "loss": 46.0028, "step": 6085 }, { "epoch": 0.8309099597242132, "grad_norm": 0.244242325425148, "learning_rate": 7.0730999488461126e-06, "loss": 46.0012, "step": 6086 }, { "epoch": 0.8310464878148679, "grad_norm": 0.08469612151384354, "learning_rate": 7.06195470441367e-06, "loss": 46.0043, "step": 6087 }, { "epoch": 0.8311830159055226, "grad_norm": 0.06869438290596008, "learning_rate": 7.050817580551566e-06, "loss": 46.004, "step": 6088 }, { "epoch": 0.8313195439961772, "grad_norm": 0.097316674888134, "learning_rate": 7.039688579366121e-06, "loss": 46.0015, "step": 6089 }, { "epoch": 0.8314560720868319, "grad_norm": 0.0877746194601059, "learning_rate": 7.02856770296208e-06, "loss": 46.0051, "step": 6090 }, { "epoch": 0.8315926001774865, "grad_norm": 0.16605941951274872, "learning_rate": 7.017454953442654e-06, "loss": 46.0048, "step": 6091 }, { "epoch": 0.8317291282681412, "grad_norm": 0.049366965889930725, "learning_rate": 7.006350332909495e-06, "loss": 46.0078, "step": 6092 }, { "epoch": 0.8318656563587958, "grad_norm": 0.10642926394939423, "learning_rate": 6.995253843462773e-06, "loss": 46.004, "step": 6093 }, { "epoch": 0.8320021844494505, "grad_norm": 0.11018841713666916, "learning_rate": 6.984165487201083e-06, "loss": 46.0053, "step": 6094 }, { "epoch": 0.8321387125401051, "grad_norm": 0.21221759915351868, "learning_rate": 6.9730852662214745e-06, "loss": 46.006, "step": 6095 }, { "epoch": 0.8322752406307597, "grad_norm": 0.06317576766014099, "learning_rate": 6.962013182619487e-06, "loss": 46.0015, "step": 6096 }, { "epoch": 0.8324117687214144, "grad_norm": 0.15660235285758972, "learning_rate": 6.95094923848909e-06, "loss": 46.0016, "step": 6097 }, { "epoch": 0.8325482968120691, "grad_norm": 0.11748997122049332, "learning_rate": 6.939893435922745e-06, "loss": 46.0057, "step": 6098 }, { "epoch": 0.8326848249027238, "grad_norm": 0.2605035901069641, "learning_rate": 6.92884577701135e-06, "loss": 46.0032, "step": 6099 }, { "epoch": 0.8328213529933783, "grad_norm": 0.08642741292715073, "learning_rate": 6.917806263844268e-06, "loss": 46.0, "step": 6100 }, { "epoch": 0.832957881084033, "grad_norm": 0.09709883481264114, "learning_rate": 6.906774898509333e-06, "loss": 46.017, "step": 6101 }, { "epoch": 0.8330944091746877, "grad_norm": 0.03645646572113037, "learning_rate": 6.8957516830928215e-06, "loss": 46.006, "step": 6102 }, { "epoch": 0.8332309372653424, "grad_norm": 0.1122979074716568, "learning_rate": 6.8847366196794825e-06, "loss": 46.0064, "step": 6103 }, { "epoch": 0.833367465355997, "grad_norm": 0.06446164101362228, "learning_rate": 6.873729710352511e-06, "loss": 46.0008, "step": 6104 }, { "epoch": 0.8335039934466516, "grad_norm": 0.06393400579690933, "learning_rate": 6.862730957193575e-06, "loss": 46.0027, "step": 6105 }, { "epoch": 0.8336405215373063, "grad_norm": 0.06039058417081833, "learning_rate": 6.851740362282788e-06, "loss": 46.0027, "step": 6106 }, { "epoch": 0.833777049627961, "grad_norm": 0.04073313623666763, "learning_rate": 6.840757927698715e-06, "loss": 46.0001, "step": 6107 }, { "epoch": 0.8339135777186156, "grad_norm": 0.12527039647102356, "learning_rate": 6.829783655518401e-06, "loss": 46.0018, "step": 6108 }, { "epoch": 0.8340501058092703, "grad_norm": 0.041363779455423355, "learning_rate": 6.818817547817313e-06, "loss": 46.0006, "step": 6109 }, { "epoch": 0.8341866338999249, "grad_norm": 0.09068888425827026, "learning_rate": 6.807859606669431e-06, "loss": 46.0021, "step": 6110 }, { "epoch": 0.8343231619905795, "grad_norm": 0.1332424134016037, "learning_rate": 6.7969098341471135e-06, "loss": 46.003, "step": 6111 }, { "epoch": 0.8344596900812342, "grad_norm": 0.13207131624221802, "learning_rate": 6.785968232321233e-06, "loss": 46.0031, "step": 6112 }, { "epoch": 0.8345962181718889, "grad_norm": 0.0770987793803215, "learning_rate": 6.775034803261082e-06, "loss": 46.0013, "step": 6113 }, { "epoch": 0.8347327462625436, "grad_norm": 0.039541322737932205, "learning_rate": 6.764109549034447e-06, "loss": 46.0039, "step": 6114 }, { "epoch": 0.8348692743531981, "grad_norm": 0.12872567772865295, "learning_rate": 6.753192471707542e-06, "loss": 46.0095, "step": 6115 }, { "epoch": 0.8350058024438528, "grad_norm": 0.05751314014196396, "learning_rate": 6.742283573345004e-06, "loss": 46.0109, "step": 6116 }, { "epoch": 0.8351423305345075, "grad_norm": 0.09330077469348907, "learning_rate": 6.731382856009993e-06, "loss": 46.0102, "step": 6117 }, { "epoch": 0.8352788586251622, "grad_norm": 0.13137857615947723, "learning_rate": 6.720490321764072e-06, "loss": 46.0074, "step": 6118 }, { "epoch": 0.8354153867158168, "grad_norm": 0.04454739764332771, "learning_rate": 6.7096059726672826e-06, "loss": 46.0027, "step": 6119 }, { "epoch": 0.8355519148064714, "grad_norm": 0.17801840603351593, "learning_rate": 6.698729810778065e-06, "loss": 46.0023, "step": 6120 }, { "epoch": 0.8356884428971261, "grad_norm": 0.26545488834381104, "learning_rate": 6.6878618381533915e-06, "loss": 46.0049, "step": 6121 }, { "epoch": 0.8358249709877807, "grad_norm": 0.10738561302423477, "learning_rate": 6.67700205684863e-06, "loss": 46.0011, "step": 6122 }, { "epoch": 0.8359614990784354, "grad_norm": 0.10425274819135666, "learning_rate": 6.6661504689176206e-06, "loss": 46.0019, "step": 6123 }, { "epoch": 0.83609802716909, "grad_norm": 0.09685317426919937, "learning_rate": 6.655307076412637e-06, "loss": 46.0099, "step": 6124 }, { "epoch": 0.8362345552597447, "grad_norm": 0.08820068091154099, "learning_rate": 6.644471881384423e-06, "loss": 46.0079, "step": 6125 }, { "epoch": 0.8363710833503993, "grad_norm": 0.06761814653873444, "learning_rate": 6.633644885882162e-06, "loss": 46.0047, "step": 6126 }, { "epoch": 0.836507611441054, "grad_norm": 0.10741560906171799, "learning_rate": 6.622826091953482e-06, "loss": 46.0081, "step": 6127 }, { "epoch": 0.8366441395317087, "grad_norm": 0.1729738861322403, "learning_rate": 6.61201550164447e-06, "loss": 46.004, "step": 6128 }, { "epoch": 0.8367806676223633, "grad_norm": 0.06941565126180649, "learning_rate": 6.601213116999655e-06, "loss": 46.0096, "step": 6129 }, { "epoch": 0.8369171957130179, "grad_norm": 0.11558625102043152, "learning_rate": 6.590418940062021e-06, "loss": 46.0037, "step": 6130 }, { "epoch": 0.8370537238036726, "grad_norm": 0.1280505359172821, "learning_rate": 6.579632972872985e-06, "loss": 46.0079, "step": 6131 }, { "epoch": 0.8371902518943273, "grad_norm": 0.08397727459669113, "learning_rate": 6.5688552174724246e-06, "loss": 46.0084, "step": 6132 }, { "epoch": 0.837326779984982, "grad_norm": 0.05542052537202835, "learning_rate": 6.558085675898668e-06, "loss": 46.0074, "step": 6133 }, { "epoch": 0.8374633080756365, "grad_norm": 0.06408143788576126, "learning_rate": 6.547324350188477e-06, "loss": 46.0067, "step": 6134 }, { "epoch": 0.8375998361662912, "grad_norm": 0.03796890750527382, "learning_rate": 6.536571242377065e-06, "loss": 46.0016, "step": 6135 }, { "epoch": 0.8377363642569459, "grad_norm": 0.1249837800860405, "learning_rate": 6.525826354498088e-06, "loss": 46.0028, "step": 6136 }, { "epoch": 0.8378728923476005, "grad_norm": 0.06939119845628738, "learning_rate": 6.515089688583659e-06, "loss": 46.0049, "step": 6137 }, { "epoch": 0.8380094204382552, "grad_norm": 0.073392853140831, "learning_rate": 6.5043612466643256e-06, "loss": 46.0141, "step": 6138 }, { "epoch": 0.8381459485289098, "grad_norm": 0.16204950213432312, "learning_rate": 6.493641030769082e-06, "loss": 46.0063, "step": 6139 }, { "epoch": 0.8382824766195645, "grad_norm": 0.03843788430094719, "learning_rate": 6.482929042925362e-06, "loss": 46.0029, "step": 6140 }, { "epoch": 0.8384190047102191, "grad_norm": 0.08909065276384354, "learning_rate": 6.472225285159045e-06, "loss": 46.0017, "step": 6141 }, { "epoch": 0.8385555328008738, "grad_norm": 0.051948919892311096, "learning_rate": 6.461529759494489e-06, "loss": 46.002, "step": 6142 }, { "epoch": 0.8386920608915285, "grad_norm": 0.06876768171787262, "learning_rate": 6.450842467954427e-06, "loss": 46.0018, "step": 6143 }, { "epoch": 0.838828588982183, "grad_norm": 0.03402873873710632, "learning_rate": 6.440163412560085e-06, "loss": 46.006, "step": 6144 }, { "epoch": 0.8389651170728377, "grad_norm": 0.06429588049650192, "learning_rate": 6.429492595331105e-06, "loss": 46.0051, "step": 6145 }, { "epoch": 0.8391016451634924, "grad_norm": 0.052849285304546356, "learning_rate": 6.418830018285621e-06, "loss": 46.0026, "step": 6146 }, { "epoch": 0.8392381732541471, "grad_norm": 0.17560140788555145, "learning_rate": 6.408175683440132e-06, "loss": 46.0035, "step": 6147 }, { "epoch": 0.8393747013448017, "grad_norm": 0.1917664259672165, "learning_rate": 6.397529592809614e-06, "loss": 46.0047, "step": 6148 }, { "epoch": 0.8395112294354563, "grad_norm": 0.15823040902614594, "learning_rate": 6.386891748407525e-06, "loss": 46.0044, "step": 6149 }, { "epoch": 0.839647757526111, "grad_norm": 0.24817152321338654, "learning_rate": 6.37626215224571e-06, "loss": 46.0, "step": 6150 }, { "epoch": 0.8397842856167657, "grad_norm": 0.08763664215803146, "learning_rate": 6.365640806334455e-06, "loss": 46.0042, "step": 6151 }, { "epoch": 0.8399208137074203, "grad_norm": 0.061033859848976135, "learning_rate": 6.355027712682498e-06, "loss": 46.0027, "step": 6152 }, { "epoch": 0.840057341798075, "grad_norm": 0.10757091641426086, "learning_rate": 6.344422873297046e-06, "loss": 46.0035, "step": 6153 }, { "epoch": 0.8401938698887296, "grad_norm": 0.04874802380800247, "learning_rate": 6.3338262901837084e-06, "loss": 46.0027, "step": 6154 }, { "epoch": 0.8403303979793842, "grad_norm": 0.05956596881151199, "learning_rate": 6.323237965346518e-06, "loss": 46.006, "step": 6155 }, { "epoch": 0.8404669260700389, "grad_norm": 0.20538052916526794, "learning_rate": 6.312657900788e-06, "loss": 46.0004, "step": 6156 }, { "epoch": 0.8406034541606936, "grad_norm": 0.06089404225349426, "learning_rate": 6.302086098509075e-06, "loss": 46.004, "step": 6157 }, { "epoch": 0.8407399822513482, "grad_norm": 0.11093447357416153, "learning_rate": 6.291522560509122e-06, "loss": 46.0028, "step": 6158 }, { "epoch": 0.8408765103420028, "grad_norm": 0.12928754091262817, "learning_rate": 6.280967288785922e-06, "loss": 46.0032, "step": 6159 }, { "epoch": 0.8410130384326575, "grad_norm": 0.05277484282851219, "learning_rate": 6.2704202853357485e-06, "loss": 46.0041, "step": 6160 }, { "epoch": 0.8411495665233122, "grad_norm": 0.05415710061788559, "learning_rate": 6.259881552153274e-06, "loss": 46.0042, "step": 6161 }, { "epoch": 0.8412860946139669, "grad_norm": 0.09116220474243164, "learning_rate": 6.249351091231615e-06, "loss": 46.0128, "step": 6162 }, { "epoch": 0.8414226227046214, "grad_norm": 0.0792597308754921, "learning_rate": 6.238828904562316e-06, "loss": 46.0062, "step": 6163 }, { "epoch": 0.8415591507952761, "grad_norm": 0.05169396847486496, "learning_rate": 6.2283149941353755e-06, "loss": 46.0042, "step": 6164 }, { "epoch": 0.8416956788859308, "grad_norm": 0.08008744567632675, "learning_rate": 6.217809361939203e-06, "loss": 46.008, "step": 6165 }, { "epoch": 0.8418322069765855, "grad_norm": 0.056986004114151, "learning_rate": 6.20731200996067e-06, "loss": 46.0029, "step": 6166 }, { "epoch": 0.8419687350672401, "grad_norm": 0.06399812549352646, "learning_rate": 6.196822940185048e-06, "loss": 46.0075, "step": 6167 }, { "epoch": 0.8421052631578947, "grad_norm": 0.10743512958288193, "learning_rate": 6.1863421545960806e-06, "loss": 46.0048, "step": 6168 }, { "epoch": 0.8422417912485494, "grad_norm": 0.0944955050945282, "learning_rate": 6.1758696551758976e-06, "loss": 46.0035, "step": 6169 }, { "epoch": 0.842378319339204, "grad_norm": 0.25673708319664, "learning_rate": 6.165405443905132e-06, "loss": 46.0068, "step": 6170 }, { "epoch": 0.8425148474298587, "grad_norm": 0.0836816281080246, "learning_rate": 6.154949522762771e-06, "loss": 46.0035, "step": 6171 }, { "epoch": 0.8426513755205134, "grad_norm": 0.10451210290193558, "learning_rate": 6.1445018937262784e-06, "loss": 46.0012, "step": 6172 }, { "epoch": 0.842787903611168, "grad_norm": 0.09137729555368423, "learning_rate": 6.13406255877153e-06, "loss": 46.0078, "step": 6173 }, { "epoch": 0.8429244317018226, "grad_norm": 0.1019613966345787, "learning_rate": 6.12363151987288e-06, "loss": 46.0011, "step": 6174 }, { "epoch": 0.8430609597924773, "grad_norm": 0.296228289604187, "learning_rate": 6.113208779003043e-06, "loss": 46.0121, "step": 6175 }, { "epoch": 0.843197487883132, "grad_norm": 0.07737822085618973, "learning_rate": 6.102794338133194e-06, "loss": 46.0129, "step": 6176 }, { "epoch": 0.8433340159737867, "grad_norm": 0.044169049710035324, "learning_rate": 6.092388199232973e-06, "loss": 46.0093, "step": 6177 }, { "epoch": 0.8434705440644412, "grad_norm": 0.08454393595457077, "learning_rate": 6.0819903642704175e-06, "loss": 46.0071, "step": 6178 }, { "epoch": 0.8436070721550959, "grad_norm": 0.1175178587436676, "learning_rate": 6.071600835211966e-06, "loss": 46.0026, "step": 6179 }, { "epoch": 0.8437436002457506, "grad_norm": 0.12188303470611572, "learning_rate": 6.061219614022534e-06, "loss": 46.004, "step": 6180 }, { "epoch": 0.8438801283364052, "grad_norm": 0.08681346476078033, "learning_rate": 6.050846702665453e-06, "loss": 46.0044, "step": 6181 }, { "epoch": 0.8440166564270599, "grad_norm": 0.06800805032253265, "learning_rate": 6.04048210310249e-06, "loss": 46.0072, "step": 6182 }, { "epoch": 0.8441531845177145, "grad_norm": 0.038432031869888306, "learning_rate": 6.030125817293792e-06, "loss": 46.0153, "step": 6183 }, { "epoch": 0.8442897126083692, "grad_norm": 0.15436340868473053, "learning_rate": 6.019777847198005e-06, "loss": 46.0042, "step": 6184 }, { "epoch": 0.8444262406990238, "grad_norm": 0.1063585877418518, "learning_rate": 6.009438194772154e-06, "loss": 46.0135, "step": 6185 }, { "epoch": 0.8445627687896785, "grad_norm": 0.10903940349817276, "learning_rate": 5.999106861971715e-06, "loss": 46.0063, "step": 6186 }, { "epoch": 0.8446992968803332, "grad_norm": 0.043781500309705734, "learning_rate": 5.9887838507505515e-06, "loss": 46.0049, "step": 6187 }, { "epoch": 0.8448358249709877, "grad_norm": 0.07723747193813324, "learning_rate": 5.978469163061018e-06, "loss": 46.0098, "step": 6188 }, { "epoch": 0.8449723530616424, "grad_norm": 0.09217383712530136, "learning_rate": 5.9681628008538385e-06, "loss": 46.0019, "step": 6189 }, { "epoch": 0.8451088811522971, "grad_norm": 0.08317796885967255, "learning_rate": 5.957864766078186e-06, "loss": 46.0017, "step": 6190 }, { "epoch": 0.8452454092429518, "grad_norm": 0.065298892557621, "learning_rate": 5.947575060681659e-06, "loss": 46.0056, "step": 6191 }, { "epoch": 0.8453819373336063, "grad_norm": 0.08831185847520828, "learning_rate": 5.937293686610274e-06, "loss": 46.0018, "step": 6192 }, { "epoch": 0.845518465424261, "grad_norm": 0.06422191858291626, "learning_rate": 5.927020645808473e-06, "loss": 46.0093, "step": 6193 }, { "epoch": 0.8456549935149157, "grad_norm": 0.05868272855877876, "learning_rate": 5.916755940219132e-06, "loss": 46.006, "step": 6194 }, { "epoch": 0.8457915216055704, "grad_norm": 0.15055608749389648, "learning_rate": 5.90649957178353e-06, "loss": 46.0035, "step": 6195 }, { "epoch": 0.845928049696225, "grad_norm": 0.09214746952056885, "learning_rate": 5.896251542441394e-06, "loss": 46.0012, "step": 6196 }, { "epoch": 0.8460645777868796, "grad_norm": 0.0797540619969368, "learning_rate": 5.886011854130857e-06, "loss": 46.0, "step": 6197 }, { "epoch": 0.8462011058775343, "grad_norm": 0.22085632383823395, "learning_rate": 5.875780508788476e-06, "loss": 46.0074, "step": 6198 }, { "epoch": 0.846337633968189, "grad_norm": 0.25238552689552307, "learning_rate": 5.86555750834924e-06, "loss": 46.0031, "step": 6199 }, { "epoch": 0.8464741620588436, "grad_norm": 0.07208089530467987, "learning_rate": 5.855342854746543e-06, "loss": 46.0029, "step": 6200 }, { "epoch": 0.8466106901494983, "grad_norm": 0.0876237079501152, "learning_rate": 5.845136549912222e-06, "loss": 46.0118, "step": 6201 }, { "epoch": 0.8467472182401529, "grad_norm": 0.0378757119178772, "learning_rate": 5.834938595776518e-06, "loss": 46.0002, "step": 6202 }, { "epoch": 0.8468837463308075, "grad_norm": 0.0763121023774147, "learning_rate": 5.824748994268098e-06, "loss": 46.0012, "step": 6203 }, { "epoch": 0.8470202744214622, "grad_norm": 0.13305479288101196, "learning_rate": 5.814567747314048e-06, "loss": 46.0033, "step": 6204 }, { "epoch": 0.8471568025121169, "grad_norm": 0.2069501429796219, "learning_rate": 5.804394856839879e-06, "loss": 46.0032, "step": 6205 }, { "epoch": 0.8472933306027716, "grad_norm": 0.05316414684057236, "learning_rate": 5.794230324769517e-06, "loss": 46.0018, "step": 6206 }, { "epoch": 0.8474298586934261, "grad_norm": 0.07309497892856598, "learning_rate": 5.784074153025309e-06, "loss": 46.0002, "step": 6207 }, { "epoch": 0.8475663867840808, "grad_norm": 0.060111045837402344, "learning_rate": 5.7739263435280076e-06, "loss": 46.0037, "step": 6208 }, { "epoch": 0.8477029148747355, "grad_norm": 0.0409068800508976, "learning_rate": 5.763786898196821e-06, "loss": 46.0017, "step": 6209 }, { "epoch": 0.8478394429653902, "grad_norm": 0.10164167732000351, "learning_rate": 5.7536558189493495e-06, "loss": 46.0018, "step": 6210 }, { "epoch": 0.8479759710560448, "grad_norm": 0.08834372460842133, "learning_rate": 5.743533107701593e-06, "loss": 46.0029, "step": 6211 }, { "epoch": 0.8481124991466994, "grad_norm": 0.18597055971622467, "learning_rate": 5.733418766367987e-06, "loss": 46.0037, "step": 6212 }, { "epoch": 0.8482490272373541, "grad_norm": 0.052618447691202164, "learning_rate": 5.723312796861407e-06, "loss": 46.0022, "step": 6213 }, { "epoch": 0.8483855553280087, "grad_norm": 0.10731707513332367, "learning_rate": 5.71321520109312e-06, "loss": 46.0022, "step": 6214 }, { "epoch": 0.8485220834186634, "grad_norm": 0.371520459651947, "learning_rate": 5.70312598097279e-06, "loss": 46.0051, "step": 6215 }, { "epoch": 0.8486586115093181, "grad_norm": 0.13528549671173096, "learning_rate": 5.693045138408548e-06, "loss": 46.0037, "step": 6216 }, { "epoch": 0.8487951395999727, "grad_norm": 0.03202936425805092, "learning_rate": 5.682972675306897e-06, "loss": 46.007, "step": 6217 }, { "epoch": 0.8489316676906273, "grad_norm": 0.22396743297576904, "learning_rate": 5.67290859357279e-06, "loss": 46.0076, "step": 6218 }, { "epoch": 0.849068195781282, "grad_norm": 0.1305983066558838, "learning_rate": 5.662852895109544e-06, "loss": 46.0004, "step": 6219 }, { "epoch": 0.8492047238719367, "grad_norm": 0.0358603373169899, "learning_rate": 5.652805581818943e-06, "loss": 46.0047, "step": 6220 }, { "epoch": 0.8493412519625912, "grad_norm": 0.12681376934051514, "learning_rate": 5.6427666556011675e-06, "loss": 46.0016, "step": 6221 }, { "epoch": 0.8494777800532459, "grad_norm": 0.06972219794988632, "learning_rate": 5.632736118354804e-06, "loss": 46.006, "step": 6222 }, { "epoch": 0.8496143081439006, "grad_norm": 0.04579448699951172, "learning_rate": 5.62271397197685e-06, "loss": 46.0011, "step": 6223 }, { "epoch": 0.8497508362345553, "grad_norm": 0.04295389726758003, "learning_rate": 5.612700218362738e-06, "loss": 46.0025, "step": 6224 }, { "epoch": 0.84988736432521, "grad_norm": 0.09665113687515259, "learning_rate": 5.602694859406288e-06, "loss": 46.0042, "step": 6225 }, { "epoch": 0.8500238924158645, "grad_norm": 0.06018732115626335, "learning_rate": 5.592697896999749e-06, "loss": 46.0051, "step": 6226 }, { "epoch": 0.8501604205065192, "grad_norm": 0.13203175365924835, "learning_rate": 5.58270933303377e-06, "loss": 46.0079, "step": 6227 }, { "epoch": 0.8502969485971739, "grad_norm": 0.12878674268722534, "learning_rate": 5.572729169397422e-06, "loss": 46.0008, "step": 6228 }, { "epoch": 0.8504334766878285, "grad_norm": 0.13498997688293457, "learning_rate": 5.5627574079781804e-06, "loss": 46.0048, "step": 6229 }, { "epoch": 0.8505700047784832, "grad_norm": 0.06516686826944351, "learning_rate": 5.552794050661942e-06, "loss": 46.007, "step": 6230 }, { "epoch": 0.8507065328691378, "grad_norm": 0.2249339073896408, "learning_rate": 5.542839099332997e-06, "loss": 46.0061, "step": 6231 }, { "epoch": 0.8508430609597925, "grad_norm": 0.122530996799469, "learning_rate": 5.532892555874059e-06, "loss": 46.0072, "step": 6232 }, { "epoch": 0.8509795890504471, "grad_norm": 0.046025484800338745, "learning_rate": 5.522954422166249e-06, "loss": 46.0115, "step": 6233 }, { "epoch": 0.8511161171411018, "grad_norm": 0.07992717623710632, "learning_rate": 5.513024700089092e-06, "loss": 46.0021, "step": 6234 }, { "epoch": 0.8512526452317565, "grad_norm": 0.06898265331983566, "learning_rate": 5.503103391520531e-06, "loss": 46.0013, "step": 6235 }, { "epoch": 0.851389173322411, "grad_norm": 0.20360061526298523, "learning_rate": 5.493190498336903e-06, "loss": 46.0015, "step": 6236 }, { "epoch": 0.8515257014130657, "grad_norm": 0.06443516910076141, "learning_rate": 5.483286022412992e-06, "loss": 46.0016, "step": 6237 }, { "epoch": 0.8516622295037204, "grad_norm": 0.07976426929235458, "learning_rate": 5.473389965621933e-06, "loss": 46.0069, "step": 6238 }, { "epoch": 0.8517987575943751, "grad_norm": 0.07185877114534378, "learning_rate": 5.463502329835307e-06, "loss": 46.002, "step": 6239 }, { "epoch": 0.8519352856850297, "grad_norm": 0.09070443361997604, "learning_rate": 5.453623116923085e-06, "loss": 46.0038, "step": 6240 }, { "epoch": 0.8520718137756843, "grad_norm": 0.06313758343458176, "learning_rate": 5.4437523287536765e-06, "loss": 46.001, "step": 6241 }, { "epoch": 0.852208341866339, "grad_norm": 0.06845686584711075, "learning_rate": 5.433889967193851e-06, "loss": 46.0085, "step": 6242 }, { "epoch": 0.8523448699569937, "grad_norm": 0.06695538759231567, "learning_rate": 5.424036034108804e-06, "loss": 46.0012, "step": 6243 }, { "epoch": 0.8524813980476483, "grad_norm": 0.08711196482181549, "learning_rate": 5.414190531362162e-06, "loss": 46.0029, "step": 6244 }, { "epoch": 0.852617926138303, "grad_norm": 0.1087542474269867, "learning_rate": 5.404353460815936e-06, "loss": 46.0075, "step": 6245 }, { "epoch": 0.8527544542289576, "grad_norm": 0.3180321156978607, "learning_rate": 5.394524824330522e-06, "loss": 46.0043, "step": 6246 }, { "epoch": 0.8528909823196122, "grad_norm": 0.3016775846481323, "learning_rate": 5.384704623764736e-06, "loss": 46.0075, "step": 6247 }, { "epoch": 0.8530275104102669, "grad_norm": 0.501380443572998, "learning_rate": 5.374892860975833e-06, "loss": 46.0092, "step": 6248 }, { "epoch": 0.8531640385009216, "grad_norm": 0.13894036412239075, "learning_rate": 5.365089537819434e-06, "loss": 46.0047, "step": 6249 }, { "epoch": 0.8533005665915763, "grad_norm": 0.10643784701824188, "learning_rate": 5.355294656149546e-06, "loss": 46.0, "step": 6250 }, { "epoch": 0.8534370946822308, "grad_norm": 0.14977635443210602, "learning_rate": 5.345508217818634e-06, "loss": 46.0138, "step": 6251 }, { "epoch": 0.8535736227728855, "grad_norm": 0.07418506592512131, "learning_rate": 5.335730224677537e-06, "loss": 46.0093, "step": 6252 }, { "epoch": 0.8537101508635402, "grad_norm": 0.10644843429327011, "learning_rate": 5.325960678575498e-06, "loss": 46.0024, "step": 6253 }, { "epoch": 0.8538466789541949, "grad_norm": 0.08437232673168182, "learning_rate": 5.316199581360131e-06, "loss": 46.0025, "step": 6254 }, { "epoch": 0.8539832070448494, "grad_norm": 0.12532442808151245, "learning_rate": 5.3064469348775235e-06, "loss": 46.0033, "step": 6255 }, { "epoch": 0.8541197351355041, "grad_norm": 0.17127573490142822, "learning_rate": 5.296702740972104e-06, "loss": 46.0018, "step": 6256 }, { "epoch": 0.8542562632261588, "grad_norm": 0.029678741469979286, "learning_rate": 5.286967001486737e-06, "loss": 46.0025, "step": 6257 }, { "epoch": 0.8543927913168134, "grad_norm": 0.10604444891214371, "learning_rate": 5.277239718262661e-06, "loss": 46.002, "step": 6258 }, { "epoch": 0.8545293194074681, "grad_norm": 0.11882036179304123, "learning_rate": 5.267520893139533e-06, "loss": 46.008, "step": 6259 }, { "epoch": 0.8546658474981227, "grad_norm": 0.09004377573728561, "learning_rate": 5.257810527955409e-06, "loss": 46.0034, "step": 6260 }, { "epoch": 0.8548023755887774, "grad_norm": 0.09785200655460358, "learning_rate": 5.248108624546738e-06, "loss": 46.0052, "step": 6261 }, { "epoch": 0.854938903679432, "grad_norm": 0.09282954037189484, "learning_rate": 5.2384151847483796e-06, "loss": 46.004, "step": 6262 }, { "epoch": 0.8550754317700867, "grad_norm": 0.0414278618991375, "learning_rate": 5.228730210393579e-06, "loss": 46.0022, "step": 6263 }, { "epoch": 0.8552119598607414, "grad_norm": 0.06583976000547409, "learning_rate": 5.219053703313987e-06, "loss": 46.0041, "step": 6264 }, { "epoch": 0.855348487951396, "grad_norm": 0.11196476966142654, "learning_rate": 5.209385665339656e-06, "loss": 46.0047, "step": 6265 }, { "epoch": 0.8554850160420506, "grad_norm": 0.09136521816253662, "learning_rate": 5.199726098299035e-06, "loss": 46.0111, "step": 6266 }, { "epoch": 0.8556215441327053, "grad_norm": 0.1343361884355545, "learning_rate": 5.1900750040189685e-06, "loss": 46.0067, "step": 6267 }, { "epoch": 0.85575807222336, "grad_norm": 0.08278503268957138, "learning_rate": 5.180432384324691e-06, "loss": 46.0009, "step": 6268 }, { "epoch": 0.8558946003140147, "grad_norm": 0.0781792476773262, "learning_rate": 5.17079824103987e-06, "loss": 46.0073, "step": 6269 }, { "epoch": 0.8560311284046692, "grad_norm": 0.027263283729553223, "learning_rate": 5.161172575986517e-06, "loss": 46.0048, "step": 6270 }, { "epoch": 0.8561676564953239, "grad_norm": 0.0413278304040432, "learning_rate": 5.151555390985075e-06, "loss": 46.0002, "step": 6271 }, { "epoch": 0.8563041845859786, "grad_norm": 0.174760639667511, "learning_rate": 5.14194668785436e-06, "loss": 46.0088, "step": 6272 }, { "epoch": 0.8564407126766332, "grad_norm": 0.07294417172670364, "learning_rate": 5.132346468411636e-06, "loss": 46.0069, "step": 6273 }, { "epoch": 0.8565772407672879, "grad_norm": 0.1732466220855713, "learning_rate": 5.122754734472496e-06, "loss": 46.0067, "step": 6274 }, { "epoch": 0.8567137688579425, "grad_norm": 0.10206674039363861, "learning_rate": 5.113171487850949e-06, "loss": 46.0045, "step": 6275 }, { "epoch": 0.8568502969485972, "grad_norm": 0.14966391026973724, "learning_rate": 5.103596730359428e-06, "loss": 46.0027, "step": 6276 }, { "epoch": 0.8569868250392518, "grad_norm": 0.1250821202993393, "learning_rate": 5.094030463808747e-06, "loss": 46.0071, "step": 6277 }, { "epoch": 0.8571233531299065, "grad_norm": 0.08549633622169495, "learning_rate": 5.08447269000808e-06, "loss": 46.0125, "step": 6278 }, { "epoch": 0.8572598812205612, "grad_norm": 0.15584120154380798, "learning_rate": 5.074923410765025e-06, "loss": 46.0091, "step": 6279 }, { "epoch": 0.8573964093112157, "grad_norm": 0.041059188544750214, "learning_rate": 5.065382627885584e-06, "loss": 46.0129, "step": 6280 }, { "epoch": 0.8575329374018704, "grad_norm": 0.08157568424940109, "learning_rate": 5.055850343174146e-06, "loss": 46.0052, "step": 6281 }, { "epoch": 0.8576694654925251, "grad_norm": 0.058933671563863754, "learning_rate": 5.046326558433451e-06, "loss": 46.0148, "step": 6282 }, { "epoch": 0.8578059935831798, "grad_norm": 0.1016300842165947, "learning_rate": 5.036811275464698e-06, "loss": 46.004, "step": 6283 }, { "epoch": 0.8579425216738344, "grad_norm": 0.07694025337696075, "learning_rate": 5.027304496067431e-06, "loss": 46.001, "step": 6284 }, { "epoch": 0.858079049764489, "grad_norm": 0.07719104737043381, "learning_rate": 5.017806222039617e-06, "loss": 46.0059, "step": 6285 }, { "epoch": 0.8582155778551437, "grad_norm": 0.13854746520519257, "learning_rate": 5.0083164551775585e-06, "loss": 46.0048, "step": 6286 }, { "epoch": 0.8583521059457984, "grad_norm": 0.0857921615242958, "learning_rate": 4.998835197276025e-06, "loss": 46.0049, "step": 6287 }, { "epoch": 0.858488634036453, "grad_norm": 0.07101180404424667, "learning_rate": 4.989362450128132e-06, "loss": 46.0025, "step": 6288 }, { "epoch": 0.8586251621271076, "grad_norm": 0.14180888235569, "learning_rate": 4.979898215525386e-06, "loss": 46.0008, "step": 6289 }, { "epoch": 0.8587616902177623, "grad_norm": 0.0757468119263649, "learning_rate": 4.970442495257699e-06, "loss": 46.0052, "step": 6290 }, { "epoch": 0.858898218308417, "grad_norm": 0.10137920826673508, "learning_rate": 4.960995291113357e-06, "loss": 46.0052, "step": 6291 }, { "epoch": 0.8590347463990716, "grad_norm": 0.06480114907026291, "learning_rate": 4.951556604879048e-06, "loss": 46.0013, "step": 6292 }, { "epoch": 0.8591712744897263, "grad_norm": 0.062010589987039566, "learning_rate": 4.942126438339839e-06, "loss": 46.0045, "step": 6293 }, { "epoch": 0.8593078025803809, "grad_norm": 0.13040238618850708, "learning_rate": 4.9327047932792025e-06, "loss": 46.0087, "step": 6294 }, { "epoch": 0.8594443306710355, "grad_norm": 0.13852538168430328, "learning_rate": 4.92329167147898e-06, "loss": 46.001, "step": 6295 }, { "epoch": 0.8595808587616902, "grad_norm": 0.0985373929142952, "learning_rate": 4.913887074719409e-06, "loss": 46.0023, "step": 6296 }, { "epoch": 0.8597173868523449, "grad_norm": 0.049778129905462265, "learning_rate": 4.904491004779116e-06, "loss": 46.0029, "step": 6297 }, { "epoch": 0.8598539149429996, "grad_norm": 0.23525266349315643, "learning_rate": 4.895103463435108e-06, "loss": 46.0097, "step": 6298 }, { "epoch": 0.8599904430336541, "grad_norm": 0.18074829876422882, "learning_rate": 4.8857244524627975e-06, "loss": 46.0148, "step": 6299 }, { "epoch": 0.8601269711243088, "grad_norm": 0.17561691999435425, "learning_rate": 4.876353973635955e-06, "loss": 46.0011, "step": 6300 }, { "epoch": 0.8602634992149635, "grad_norm": 0.1084398403763771, "learning_rate": 4.866992028726769e-06, "loss": 46.0069, "step": 6301 }, { "epoch": 0.8604000273056182, "grad_norm": 0.16469942033290863, "learning_rate": 4.857638619505789e-06, "loss": 46.0034, "step": 6302 }, { "epoch": 0.8605365553962728, "grad_norm": 0.06088807433843613, "learning_rate": 4.84829374774195e-06, "loss": 46.0049, "step": 6303 }, { "epoch": 0.8606730834869274, "grad_norm": 0.03890854865312576, "learning_rate": 4.838957415202605e-06, "loss": 46.0009, "step": 6304 }, { "epoch": 0.8608096115775821, "grad_norm": 0.2199796587228775, "learning_rate": 4.829629623653465e-06, "loss": 46.0087, "step": 6305 }, { "epoch": 0.8609461396682367, "grad_norm": 0.04636335372924805, "learning_rate": 4.820310374858611e-06, "loss": 46.0003, "step": 6306 }, { "epoch": 0.8610826677588914, "grad_norm": 0.16936033964157104, "learning_rate": 4.810999670580523e-06, "loss": 46.0022, "step": 6307 }, { "epoch": 0.8612191958495461, "grad_norm": 0.05558651313185692, "learning_rate": 4.8016975125801e-06, "loss": 46.0009, "step": 6308 }, { "epoch": 0.8613557239402007, "grad_norm": 0.08061353117227554, "learning_rate": 4.7924039026165875e-06, "loss": 46.0074, "step": 6309 }, { "epoch": 0.8614922520308553, "grad_norm": 0.0302040446549654, "learning_rate": 4.783118842447587e-06, "loss": 46.013, "step": 6310 }, { "epoch": 0.86162878012151, "grad_norm": 0.07129661738872528, "learning_rate": 4.773842333829148e-06, "loss": 46.011, "step": 6311 }, { "epoch": 0.8617653082121647, "grad_norm": 0.03952343016862869, "learning_rate": 4.764574378515668e-06, "loss": 46.0014, "step": 6312 }, { "epoch": 0.8619018363028194, "grad_norm": 0.10225177556276321, "learning_rate": 4.755314978259934e-06, "loss": 46.0143, "step": 6313 }, { "epoch": 0.8620383643934739, "grad_norm": 0.12329023331403732, "learning_rate": 4.746064134813078e-06, "loss": 46.0063, "step": 6314 }, { "epoch": 0.8621748924841286, "grad_norm": 0.0677800253033638, "learning_rate": 4.736821849924683e-06, "loss": 46.0167, "step": 6315 }, { "epoch": 0.8623114205747833, "grad_norm": 0.17329756915569305, "learning_rate": 4.727588125342669e-06, "loss": 46.0032, "step": 6316 }, { "epoch": 0.862447948665438, "grad_norm": 0.02971787191927433, "learning_rate": 4.7183629628133385e-06, "loss": 46.0106, "step": 6317 }, { "epoch": 0.8625844767560925, "grad_norm": 0.17098282277584076, "learning_rate": 4.70914636408139e-06, "loss": 46.014, "step": 6318 }, { "epoch": 0.8627210048467472, "grad_norm": 0.08451289683580399, "learning_rate": 4.699938330889886e-06, "loss": 46.0071, "step": 6319 }, { "epoch": 0.8628575329374019, "grad_norm": 0.13098810613155365, "learning_rate": 4.6907388649802765e-06, "loss": 46.0049, "step": 6320 }, { "epoch": 0.8629940610280565, "grad_norm": 0.06466472148895264, "learning_rate": 4.681547968092398e-06, "loss": 46.0034, "step": 6321 }, { "epoch": 0.8631305891187112, "grad_norm": 0.06506135314702988, "learning_rate": 4.67236564196446e-06, "loss": 46.0027, "step": 6322 }, { "epoch": 0.8632671172093658, "grad_norm": 0.06343895196914673, "learning_rate": 4.6631918883330465e-06, "loss": 46.0035, "step": 6323 }, { "epoch": 0.8634036453000205, "grad_norm": 0.12591898441314697, "learning_rate": 4.654026708933129e-06, "loss": 46.0079, "step": 6324 }, { "epoch": 0.8635401733906751, "grad_norm": 0.12393563240766525, "learning_rate": 4.644870105498045e-06, "loss": 46.0054, "step": 6325 }, { "epoch": 0.8636767014813298, "grad_norm": 0.19827647507190704, "learning_rate": 4.63572207975953e-06, "loss": 46.0047, "step": 6326 }, { "epoch": 0.8638132295719845, "grad_norm": 0.11271895468235016, "learning_rate": 4.626582633447685e-06, "loss": 46.004, "step": 6327 }, { "epoch": 0.863949757662639, "grad_norm": 0.028815651312470436, "learning_rate": 4.617451768290976e-06, "loss": 46.009, "step": 6328 }, { "epoch": 0.8640862857532937, "grad_norm": 0.08468382805585861, "learning_rate": 4.6083294860162715e-06, "loss": 46.0065, "step": 6329 }, { "epoch": 0.8642228138439484, "grad_norm": 0.10054520517587662, "learning_rate": 4.599215788348804e-06, "loss": 46.0061, "step": 6330 }, { "epoch": 0.8643593419346031, "grad_norm": 0.06935746222734451, "learning_rate": 4.590110677012177e-06, "loss": 46.0064, "step": 6331 }, { "epoch": 0.8644958700252577, "grad_norm": 0.040280357003211975, "learning_rate": 4.5810141537283855e-06, "loss": 46.0035, "step": 6332 }, { "epoch": 0.8646323981159123, "grad_norm": 0.05889052525162697, "learning_rate": 4.571926220217776e-06, "loss": 46.0077, "step": 6333 }, { "epoch": 0.864768926206567, "grad_norm": 0.06306872516870499, "learning_rate": 4.562846878199101e-06, "loss": 46.003, "step": 6334 }, { "epoch": 0.8649054542972217, "grad_norm": 0.04309394955635071, "learning_rate": 4.5537761293894535e-06, "loss": 46.0052, "step": 6335 }, { "epoch": 0.8650419823878763, "grad_norm": 0.0345597080886364, "learning_rate": 4.54471397550435e-06, "loss": 46.015, "step": 6336 }, { "epoch": 0.865178510478531, "grad_norm": 0.2072208672761917, "learning_rate": 4.535660418257631e-06, "loss": 46.0131, "step": 6337 }, { "epoch": 0.8653150385691856, "grad_norm": 0.08090057224035263, "learning_rate": 4.5266154593615326e-06, "loss": 46.0048, "step": 6338 }, { "epoch": 0.8654515666598402, "grad_norm": 0.0800909772515297, "learning_rate": 4.5175791005266535e-06, "loss": 46.0016, "step": 6339 }, { "epoch": 0.8655880947504949, "grad_norm": 0.08551318943500519, "learning_rate": 4.508551343462014e-06, "loss": 46.0008, "step": 6340 }, { "epoch": 0.8657246228411496, "grad_norm": 0.07786394655704498, "learning_rate": 4.499532189874939e-06, "loss": 46.0097, "step": 6341 }, { "epoch": 0.8658611509318043, "grad_norm": 0.05035264417529106, "learning_rate": 4.4905216414711516e-06, "loss": 46.0008, "step": 6342 }, { "epoch": 0.8659976790224588, "grad_norm": 0.06016929820179939, "learning_rate": 4.481519699954778e-06, "loss": 46.0048, "step": 6343 }, { "epoch": 0.8661342071131135, "grad_norm": 0.1089681088924408, "learning_rate": 4.4725263670282905e-06, "loss": 46.0024, "step": 6344 }, { "epoch": 0.8662707352037682, "grad_norm": 0.07395387440919876, "learning_rate": 4.463541644392516e-06, "loss": 46.0011, "step": 6345 }, { "epoch": 0.8664072632944229, "grad_norm": 0.5110549330711365, "learning_rate": 4.454565533746674e-06, "loss": 46.0097, "step": 6346 }, { "epoch": 0.8665437913850775, "grad_norm": 0.14075511693954468, "learning_rate": 4.445598036788368e-06, "loss": 46.0059, "step": 6347 }, { "epoch": 0.8666803194757321, "grad_norm": 0.11841179430484772, "learning_rate": 4.436639155213556e-06, "loss": 46.0029, "step": 6348 }, { "epoch": 0.8668168475663868, "grad_norm": 0.4675506055355072, "learning_rate": 4.427688890716547e-06, "loss": 46.0028, "step": 6349 }, { "epoch": 0.8669533756570414, "grad_norm": 0.15410633385181427, "learning_rate": 4.418747244990068e-06, "loss": 46.004, "step": 6350 }, { "epoch": 0.8670899037476961, "grad_norm": 0.17977851629257202, "learning_rate": 4.409814219725178e-06, "loss": 46.0118, "step": 6351 }, { "epoch": 0.8672264318383507, "grad_norm": 0.026908690109848976, "learning_rate": 4.400889816611315e-06, "loss": 46.0012, "step": 6352 }, { "epoch": 0.8673629599290054, "grad_norm": 0.10139092057943344, "learning_rate": 4.3919740373362935e-06, "loss": 46.0058, "step": 6353 }, { "epoch": 0.86749948801966, "grad_norm": 0.08596333116292953, "learning_rate": 4.38306688358629e-06, "loss": 46.0051, "step": 6354 }, { "epoch": 0.8676360161103147, "grad_norm": 0.07495640963315964, "learning_rate": 4.374168357045849e-06, "loss": 46.0049, "step": 6355 }, { "epoch": 0.8677725442009694, "grad_norm": 0.08146247267723083, "learning_rate": 4.365278459397892e-06, "loss": 46.0057, "step": 6356 }, { "epoch": 0.867909072291624, "grad_norm": 0.05982358753681183, "learning_rate": 4.3563971923236965e-06, "loss": 46.002, "step": 6357 }, { "epoch": 0.8680456003822786, "grad_norm": 0.11438316106796265, "learning_rate": 4.3475245575029185e-06, "loss": 46.0015, "step": 6358 }, { "epoch": 0.8681821284729333, "grad_norm": 0.1559840589761734, "learning_rate": 4.338660556613577e-06, "loss": 46.0015, "step": 6359 }, { "epoch": 0.868318656563588, "grad_norm": 0.07280874997377396, "learning_rate": 4.329805191332053e-06, "loss": 46.0069, "step": 6360 }, { "epoch": 0.8684551846542427, "grad_norm": 0.1580931395292282, "learning_rate": 4.320958463333108e-06, "loss": 46.0033, "step": 6361 }, { "epoch": 0.8685917127448972, "grad_norm": 0.07451383024454117, "learning_rate": 4.312120374289857e-06, "loss": 46.005, "step": 6362 }, { "epoch": 0.8687282408355519, "grad_norm": 0.11794652044773102, "learning_rate": 4.303290925873771e-06, "loss": 46.0062, "step": 6363 }, { "epoch": 0.8688647689262066, "grad_norm": 0.076228067278862, "learning_rate": 4.29447011975474e-06, "loss": 46.0104, "step": 6364 }, { "epoch": 0.8690012970168612, "grad_norm": 0.1668386310338974, "learning_rate": 4.285657957600941e-06, "loss": 46.0013, "step": 6365 }, { "epoch": 0.8691378251075159, "grad_norm": 0.1727270632982254, "learning_rate": 4.2768544410789755e-06, "loss": 46.006, "step": 6366 }, { "epoch": 0.8692743531981705, "grad_norm": 0.09881479293107986, "learning_rate": 4.268059571853778e-06, "loss": 46.0081, "step": 6367 }, { "epoch": 0.8694108812888252, "grad_norm": 0.09169193357229233, "learning_rate": 4.259273351588683e-06, "loss": 46.0048, "step": 6368 }, { "epoch": 0.8695474093794798, "grad_norm": 0.05397145077586174, "learning_rate": 4.2504957819453515e-06, "loss": 46.0091, "step": 6369 }, { "epoch": 0.8696839374701345, "grad_norm": 0.14817968010902405, "learning_rate": 4.241726864583811e-06, "loss": 46.006, "step": 6370 }, { "epoch": 0.8698204655607892, "grad_norm": 0.08073265105485916, "learning_rate": 4.232966601162486e-06, "loss": 46.0047, "step": 6371 }, { "epoch": 0.8699569936514437, "grad_norm": 0.04619685932993889, "learning_rate": 4.224214993338149e-06, "loss": 46.0011, "step": 6372 }, { "epoch": 0.8700935217420984, "grad_norm": 0.08142074197530746, "learning_rate": 4.2154720427659055e-06, "loss": 46.0069, "step": 6373 }, { "epoch": 0.8702300498327531, "grad_norm": 0.08773069828748703, "learning_rate": 4.206737751099255e-06, "loss": 46.0015, "step": 6374 }, { "epoch": 0.8703665779234078, "grad_norm": 0.08009561896324158, "learning_rate": 4.198012119990064e-06, "loss": 46.0029, "step": 6375 }, { "epoch": 0.8705031060140624, "grad_norm": 0.13649305701255798, "learning_rate": 4.189295151088551e-06, "loss": 46.0095, "step": 6376 }, { "epoch": 0.870639634104717, "grad_norm": 0.05281144008040428, "learning_rate": 4.180586846043272e-06, "loss": 46.0009, "step": 6377 }, { "epoch": 0.8707761621953717, "grad_norm": 0.04136611521244049, "learning_rate": 4.1718872065011904e-06, "loss": 46.0003, "step": 6378 }, { "epoch": 0.8709126902860264, "grad_norm": 0.1517009139060974, "learning_rate": 4.163196234107603e-06, "loss": 46.0022, "step": 6379 }, { "epoch": 0.871049218376681, "grad_norm": 0.08282803744077682, "learning_rate": 4.154513930506171e-06, "loss": 46.0092, "step": 6380 }, { "epoch": 0.8711857464673357, "grad_norm": 0.11389295756816864, "learning_rate": 4.145840297338904e-06, "loss": 46.0072, "step": 6381 }, { "epoch": 0.8713222745579903, "grad_norm": 0.05120983347296715, "learning_rate": 4.137175336246207e-06, "loss": 46.0127, "step": 6382 }, { "epoch": 0.871458802648645, "grad_norm": 0.05733403563499451, "learning_rate": 4.128519048866808e-06, "loss": 46.0032, "step": 6383 }, { "epoch": 0.8715953307392996, "grad_norm": 0.12491089105606079, "learning_rate": 4.119871436837819e-06, "loss": 46.0033, "step": 6384 }, { "epoch": 0.8717318588299543, "grad_norm": 0.06557711958885193, "learning_rate": 4.111232501794699e-06, "loss": 46.0041, "step": 6385 }, { "epoch": 0.8718683869206089, "grad_norm": 0.12021549046039581, "learning_rate": 4.102602245371268e-06, "loss": 46.0042, "step": 6386 }, { "epoch": 0.8720049150112635, "grad_norm": 0.051115743815898895, "learning_rate": 4.093980669199698e-06, "loss": 46.0037, "step": 6387 }, { "epoch": 0.8721414431019182, "grad_norm": 0.0841289609670639, "learning_rate": 4.085367774910543e-06, "loss": 46.0054, "step": 6388 }, { "epoch": 0.8722779711925729, "grad_norm": 0.06576114892959595, "learning_rate": 4.076763564132685e-06, "loss": 46.0007, "step": 6389 }, { "epoch": 0.8724144992832276, "grad_norm": 0.040514279156923294, "learning_rate": 4.068168038493392e-06, "loss": 46.0019, "step": 6390 }, { "epoch": 0.8725510273738821, "grad_norm": 0.08983656018972397, "learning_rate": 4.059581199618256e-06, "loss": 46.0001, "step": 6391 }, { "epoch": 0.8726875554645368, "grad_norm": 0.11722396314144135, "learning_rate": 4.051003049131263e-06, "loss": 46.0081, "step": 6392 }, { "epoch": 0.8728240835551915, "grad_norm": 0.06088083237409592, "learning_rate": 4.042433588654732e-06, "loss": 46.0055, "step": 6393 }, { "epoch": 0.8729606116458462, "grad_norm": 0.04294413700699806, "learning_rate": 4.033872819809342e-06, "loss": 46.0013, "step": 6394 }, { "epoch": 0.8730971397365008, "grad_norm": 0.2701825201511383, "learning_rate": 4.025320744214134e-06, "loss": 46.0065, "step": 6395 }, { "epoch": 0.8732336678271554, "grad_norm": 0.1790284365415573, "learning_rate": 4.016777363486501e-06, "loss": 46.0076, "step": 6396 }, { "epoch": 0.8733701959178101, "grad_norm": 0.22147029638290405, "learning_rate": 4.008242679242192e-06, "loss": 46.0022, "step": 6397 }, { "epoch": 0.8735067240084647, "grad_norm": 0.3341263234615326, "learning_rate": 3.999716693095312e-06, "loss": 46.0023, "step": 6398 }, { "epoch": 0.8736432520991194, "grad_norm": 0.08721467852592468, "learning_rate": 3.991199406658314e-06, "loss": 46.0011, "step": 6399 }, { "epoch": 0.8737797801897741, "grad_norm": 0.17829303443431854, "learning_rate": 3.982690821542035e-06, "loss": 46.0, "step": 6400 }, { "epoch": 0.8739163082804287, "grad_norm": 0.13139715790748596, "learning_rate": 3.974190939355621e-06, "loss": 46.0113, "step": 6401 }, { "epoch": 0.8740528363710833, "grad_norm": 0.11140086501836777, "learning_rate": 3.965699761706593e-06, "loss": 46.0045, "step": 6402 }, { "epoch": 0.874189364461738, "grad_norm": 0.02987714111804962, "learning_rate": 3.957217290200843e-06, "loss": 46.0035, "step": 6403 }, { "epoch": 0.8743258925523927, "grad_norm": 0.06751630455255508, "learning_rate": 3.948743526442606e-06, "loss": 46.0009, "step": 6404 }, { "epoch": 0.8744624206430474, "grad_norm": 0.08098773658275604, "learning_rate": 3.940278472034448e-06, "loss": 46.0056, "step": 6405 }, { "epoch": 0.8745989487337019, "grad_norm": 0.04693634435534477, "learning_rate": 3.9318221285773e-06, "loss": 46.0037, "step": 6406 }, { "epoch": 0.8747354768243566, "grad_norm": 0.13936494290828705, "learning_rate": 3.923374497670474e-06, "loss": 46.0021, "step": 6407 }, { "epoch": 0.8748720049150113, "grad_norm": 0.03975704684853554, "learning_rate": 3.914935580911605e-06, "loss": 46.0049, "step": 6408 }, { "epoch": 0.875008533005666, "grad_norm": 0.03562704846262932, "learning_rate": 3.906505379896663e-06, "loss": 46.0036, "step": 6409 }, { "epoch": 0.8751450610963206, "grad_norm": 0.2016821801662445, "learning_rate": 3.898083896220023e-06, "loss": 46.0015, "step": 6410 }, { "epoch": 0.8752815891869752, "grad_norm": 0.05976088345050812, "learning_rate": 3.889671131474359e-06, "loss": 46.002, "step": 6411 }, { "epoch": 0.8754181172776299, "grad_norm": 0.03431818634271622, "learning_rate": 3.881267087250745e-06, "loss": 46.0074, "step": 6412 }, { "epoch": 0.8755546453682845, "grad_norm": 0.12351123988628387, "learning_rate": 3.872871765138536e-06, "loss": 46.0031, "step": 6413 }, { "epoch": 0.8756911734589392, "grad_norm": 0.07610096782445908, "learning_rate": 3.864485166725517e-06, "loss": 46.005, "step": 6414 }, { "epoch": 0.8758277015495939, "grad_norm": 0.039659325033426285, "learning_rate": 3.856107293597777e-06, "loss": 46.002, "step": 6415 }, { "epoch": 0.8759642296402484, "grad_norm": 0.0594668872654438, "learning_rate": 3.847738147339758e-06, "loss": 46.0043, "step": 6416 }, { "epoch": 0.8761007577309031, "grad_norm": 0.1150984987616539, "learning_rate": 3.83937772953426e-06, "loss": 46.01, "step": 6417 }, { "epoch": 0.8762372858215578, "grad_norm": 0.09221015125513077, "learning_rate": 3.8310260417624315e-06, "loss": 46.0038, "step": 6418 }, { "epoch": 0.8763738139122125, "grad_norm": 0.11619951575994492, "learning_rate": 3.822683085603768e-06, "loss": 46.0061, "step": 6419 }, { "epoch": 0.876510342002867, "grad_norm": 0.09854526072740555, "learning_rate": 3.8143488626361133e-06, "loss": 46.0025, "step": 6420 }, { "epoch": 0.8766468700935217, "grad_norm": 0.08349821716547012, "learning_rate": 3.8060233744356633e-06, "loss": 46.0104, "step": 6421 }, { "epoch": 0.8767833981841764, "grad_norm": 0.16098985075950623, "learning_rate": 3.7977066225769575e-06, "loss": 46.0102, "step": 6422 }, { "epoch": 0.8769199262748311, "grad_norm": 0.07960782945156097, "learning_rate": 3.7893986086328824e-06, "loss": 46.0094, "step": 6423 }, { "epoch": 0.8770564543654857, "grad_norm": 0.10914652049541473, "learning_rate": 3.781099334174676e-06, "loss": 46.006, "step": 6424 }, { "epoch": 0.8771929824561403, "grad_norm": 0.08572352677583694, "learning_rate": 3.772808800771921e-06, "loss": 46.0092, "step": 6425 }, { "epoch": 0.877329510546795, "grad_norm": 0.12243523448705673, "learning_rate": 3.7645270099925466e-06, "loss": 46.006, "step": 6426 }, { "epoch": 0.8774660386374497, "grad_norm": 0.13334520161151886, "learning_rate": 3.756253963402834e-06, "loss": 46.0134, "step": 6427 }, { "epoch": 0.8776025667281043, "grad_norm": 0.09834010899066925, "learning_rate": 3.7479896625674027e-06, "loss": 46.0089, "step": 6428 }, { "epoch": 0.877739094818759, "grad_norm": 0.10123912245035172, "learning_rate": 3.73973410904922e-06, "loss": 46.0003, "step": 6429 }, { "epoch": 0.8778756229094136, "grad_norm": 0.1677052229642868, "learning_rate": 3.731487304409598e-06, "loss": 46.0042, "step": 6430 }, { "epoch": 0.8780121510000682, "grad_norm": 0.08505240082740784, "learning_rate": 3.7232492502082163e-06, "loss": 46.01, "step": 6431 }, { "epoch": 0.8781486790907229, "grad_norm": 0.09153187274932861, "learning_rate": 3.715019948003057e-06, "loss": 46.0048, "step": 6432 }, { "epoch": 0.8782852071813776, "grad_norm": 0.2322947084903717, "learning_rate": 3.70679939935048e-06, "loss": 46.0025, "step": 6433 }, { "epoch": 0.8784217352720323, "grad_norm": 0.0945059061050415, "learning_rate": 3.6985876058051703e-06, "loss": 46.0072, "step": 6434 }, { "epoch": 0.8785582633626868, "grad_norm": 0.03179434314370155, "learning_rate": 3.6903845689201965e-06, "loss": 46.0045, "step": 6435 }, { "epoch": 0.8786947914533415, "grad_norm": 0.2558063566684723, "learning_rate": 3.6821902902469065e-06, "loss": 46.0027, "step": 6436 }, { "epoch": 0.8788313195439962, "grad_norm": 0.09672646224498749, "learning_rate": 3.6740047713350324e-06, "loss": 46.0008, "step": 6437 }, { "epoch": 0.8789678476346509, "grad_norm": 0.03533100709319115, "learning_rate": 3.6658280137326585e-06, "loss": 46.0032, "step": 6438 }, { "epoch": 0.8791043757253055, "grad_norm": 0.04337439313530922, "learning_rate": 3.6576600189861977e-06, "loss": 46.002, "step": 6439 }, { "epoch": 0.8792409038159601, "grad_norm": 0.07139609009027481, "learning_rate": 3.649500788640392e-06, "loss": 46.0028, "step": 6440 }, { "epoch": 0.8793774319066148, "grad_norm": 0.14071647822856903, "learning_rate": 3.641350324238335e-06, "loss": 46.0034, "step": 6441 }, { "epoch": 0.8795139599972694, "grad_norm": 0.07212480902671814, "learning_rate": 3.6332086273214827e-06, "loss": 46.0019, "step": 6442 }, { "epoch": 0.8796504880879241, "grad_norm": 0.08181378245353699, "learning_rate": 3.625075699429625e-06, "loss": 46.0021, "step": 6443 }, { "epoch": 0.8797870161785788, "grad_norm": 0.05315856635570526, "learning_rate": 3.6169515421008495e-06, "loss": 46.0039, "step": 6444 }, { "epoch": 0.8799235442692334, "grad_norm": 0.046132300049066544, "learning_rate": 3.608836156871653e-06, "loss": 46.0012, "step": 6445 }, { "epoch": 0.880060072359888, "grad_norm": 0.3092614710330963, "learning_rate": 3.600729545276832e-06, "loss": 46.0067, "step": 6446 }, { "epoch": 0.8801966004505427, "grad_norm": 0.2476194202899933, "learning_rate": 3.592631708849531e-06, "loss": 46.0119, "step": 6447 }, { "epoch": 0.8803331285411974, "grad_norm": 0.1581500917673111, "learning_rate": 3.5845426491212364e-06, "loss": 46.0024, "step": 6448 }, { "epoch": 0.880469656631852, "grad_norm": 0.0684967115521431, "learning_rate": 3.57646236762178e-06, "loss": 46.0028, "step": 6449 }, { "epoch": 0.8806061847225066, "grad_norm": 0.08397220075130463, "learning_rate": 3.568390865879323e-06, "loss": 46.0024, "step": 6450 }, { "epoch": 0.8807427128131613, "grad_norm": 0.18890132009983063, "learning_rate": 3.560328145420372e-06, "loss": 46.0152, "step": 6451 }, { "epoch": 0.880879240903816, "grad_norm": 0.1269419640302658, "learning_rate": 3.5522742077697734e-06, "loss": 46.0024, "step": 6452 }, { "epoch": 0.8810157689944706, "grad_norm": 0.1664755642414093, "learning_rate": 3.544229054450715e-06, "loss": 46.0046, "step": 6453 }, { "epoch": 0.8811522970851252, "grad_norm": 0.10712799429893494, "learning_rate": 3.5361926869847182e-06, "loss": 46.0062, "step": 6454 }, { "epoch": 0.8812888251757799, "grad_norm": 0.06161699444055557, "learning_rate": 3.5281651068916455e-06, "loss": 46.0067, "step": 6455 }, { "epoch": 0.8814253532664346, "grad_norm": 0.060070302337408066, "learning_rate": 3.520146315689693e-06, "loss": 46.0014, "step": 6456 }, { "epoch": 0.8815618813570892, "grad_norm": 0.17000432312488556, "learning_rate": 3.5121363148953977e-06, "loss": 46.0004, "step": 6457 }, { "epoch": 0.8816984094477439, "grad_norm": 0.10035306215286255, "learning_rate": 3.5041351060236425e-06, "loss": 46.0018, "step": 6458 }, { "epoch": 0.8818349375383985, "grad_norm": 0.06055273115634918, "learning_rate": 3.496142690587628e-06, "loss": 46.0006, "step": 6459 }, { "epoch": 0.8819714656290532, "grad_norm": 0.08026725053787231, "learning_rate": 3.4881590700989177e-06, "loss": 46.007, "step": 6460 }, { "epoch": 0.8821079937197078, "grad_norm": 0.03337721526622772, "learning_rate": 3.480184246067386e-06, "loss": 46.0076, "step": 6461 }, { "epoch": 0.8822445218103625, "grad_norm": 0.13256090879440308, "learning_rate": 3.47221822000125e-06, "loss": 46.0041, "step": 6462 }, { "epoch": 0.8823810499010172, "grad_norm": 0.09090083092451096, "learning_rate": 3.464260993407098e-06, "loss": 46.004, "step": 6463 }, { "epoch": 0.8825175779916717, "grad_norm": 0.048707664012908936, "learning_rate": 3.4563125677897932e-06, "loss": 46.0077, "step": 6464 }, { "epoch": 0.8826541060823264, "grad_norm": 0.04058847948908806, "learning_rate": 3.4483729446525724e-06, "loss": 46.0054, "step": 6465 }, { "epoch": 0.8827906341729811, "grad_norm": 0.09472561627626419, "learning_rate": 3.4404421254970008e-06, "loss": 46.0018, "step": 6466 }, { "epoch": 0.8829271622636358, "grad_norm": 0.12029547244310379, "learning_rate": 3.4325201118229954e-06, "loss": 46.0034, "step": 6467 }, { "epoch": 0.8830636903542904, "grad_norm": 0.06481167674064636, "learning_rate": 3.424606905128774e-06, "loss": 46.0049, "step": 6468 }, { "epoch": 0.883200218444945, "grad_norm": 0.033905576914548874, "learning_rate": 3.416702506910896e-06, "loss": 46.0089, "step": 6469 }, { "epoch": 0.8833367465355997, "grad_norm": 0.1541006863117218, "learning_rate": 3.4088069186642866e-06, "loss": 46.0079, "step": 6470 }, { "epoch": 0.8834732746262544, "grad_norm": 0.14689305424690247, "learning_rate": 3.4009201418821857e-06, "loss": 46.0015, "step": 6471 }, { "epoch": 0.883609802716909, "grad_norm": 0.07359305769205093, "learning_rate": 3.3930421780561382e-06, "loss": 46.0059, "step": 6472 }, { "epoch": 0.8837463308075637, "grad_norm": 0.1269724816083908, "learning_rate": 3.3851730286760587e-06, "loss": 46.0019, "step": 6473 }, { "epoch": 0.8838828588982183, "grad_norm": 0.08136024326086044, "learning_rate": 3.377312695230189e-06, "loss": 46.0021, "step": 6474 }, { "epoch": 0.884019386988873, "grad_norm": 0.03827705234289169, "learning_rate": 3.3694611792051132e-06, "loss": 46.0023, "step": 6475 }, { "epoch": 0.8841559150795276, "grad_norm": 0.055505480617284775, "learning_rate": 3.361618482085693e-06, "loss": 46.0039, "step": 6476 }, { "epoch": 0.8842924431701823, "grad_norm": 0.20173132419586182, "learning_rate": 3.353784605355198e-06, "loss": 46.0084, "step": 6477 }, { "epoch": 0.884428971260837, "grad_norm": 0.07146106660366058, "learning_rate": 3.3459595504951867e-06, "loss": 46.0038, "step": 6478 }, { "epoch": 0.8845654993514915, "grad_norm": 0.0603923462331295, "learning_rate": 3.338143318985554e-06, "loss": 46.0009, "step": 6479 }, { "epoch": 0.8847020274421462, "grad_norm": 0.05518396943807602, "learning_rate": 3.3303359123045163e-06, "loss": 46.0069, "step": 6480 }, { "epoch": 0.8848385555328009, "grad_norm": 0.06804000586271286, "learning_rate": 3.322537331928649e-06, "loss": 46.0046, "step": 6481 }, { "epoch": 0.8849750836234556, "grad_norm": 0.07270091772079468, "learning_rate": 3.3147475793328387e-06, "loss": 46.0056, "step": 6482 }, { "epoch": 0.8851116117141101, "grad_norm": 0.059493109583854675, "learning_rate": 3.3069666559903066e-06, "loss": 46.0051, "step": 6483 }, { "epoch": 0.8852481398047648, "grad_norm": 0.15815384685993195, "learning_rate": 3.299194563372604e-06, "loss": 46.0041, "step": 6484 }, { "epoch": 0.8853846678954195, "grad_norm": 0.11210992187261581, "learning_rate": 3.29143130294961e-06, "loss": 46.0025, "step": 6485 }, { "epoch": 0.8855211959860741, "grad_norm": 0.0440499521791935, "learning_rate": 3.283676876189534e-06, "loss": 46.0001, "step": 6486 }, { "epoch": 0.8856577240767288, "grad_norm": 0.15704584121704102, "learning_rate": 3.275931284558925e-06, "loss": 46.0115, "step": 6487 }, { "epoch": 0.8857942521673834, "grad_norm": 0.11805671453475952, "learning_rate": 3.2681945295226446e-06, "loss": 46.0069, "step": 6488 }, { "epoch": 0.8859307802580381, "grad_norm": 0.1507607400417328, "learning_rate": 3.2604666125438887e-06, "loss": 46.0091, "step": 6489 }, { "epoch": 0.8860673083486927, "grad_norm": 0.14998261630535126, "learning_rate": 3.252747535084194e-06, "loss": 46.0026, "step": 6490 }, { "epoch": 0.8862038364393474, "grad_norm": 0.09246719628572464, "learning_rate": 3.245037298603404e-06, "loss": 46.0056, "step": 6491 }, { "epoch": 0.8863403645300021, "grad_norm": 0.07928751409053802, "learning_rate": 3.2373359045597127e-06, "loss": 46.0036, "step": 6492 }, { "epoch": 0.8864768926206567, "grad_norm": 0.05545226112008095, "learning_rate": 3.229643354409617e-06, "loss": 46.008, "step": 6493 }, { "epoch": 0.8866134207113113, "grad_norm": 0.054664041846990585, "learning_rate": 3.2219596496079586e-06, "loss": 46.0018, "step": 6494 }, { "epoch": 0.886749948801966, "grad_norm": 0.1110149547457695, "learning_rate": 3.214284791607919e-06, "loss": 46.0004, "step": 6495 }, { "epoch": 0.8868864768926207, "grad_norm": 0.23346339166164398, "learning_rate": 3.2066187818609706e-06, "loss": 46.0054, "step": 6496 }, { "epoch": 0.8870230049832754, "grad_norm": 0.11471693962812424, "learning_rate": 3.1989616218169317e-06, "loss": 46.0051, "step": 6497 }, { "epoch": 0.8871595330739299, "grad_norm": 0.16896580159664154, "learning_rate": 3.1913133129239603e-06, "loss": 46.0007, "step": 6498 }, { "epoch": 0.8872960611645846, "grad_norm": 0.11590029299259186, "learning_rate": 3.183673856628533e-06, "loss": 46.0019, "step": 6499 }, { "epoch": 0.8874325892552393, "grad_norm": 0.2005118429660797, "learning_rate": 3.1760432543754217e-06, "loss": 46.0, "step": 6500 }, { "epoch": 0.8875691173458939, "grad_norm": 0.05351467803120613, "learning_rate": 3.168421507607755e-06, "loss": 46.0069, "step": 6501 }, { "epoch": 0.8877056454365486, "grad_norm": 0.052932627499103546, "learning_rate": 3.1608086177669914e-06, "loss": 46.0027, "step": 6502 }, { "epoch": 0.8878421735272032, "grad_norm": 0.029281893745064735, "learning_rate": 3.153204586292907e-06, "loss": 46.0043, "step": 6503 }, { "epoch": 0.8879787016178579, "grad_norm": 0.11101015657186508, "learning_rate": 3.145609414623574e-06, "loss": 46.0055, "step": 6504 }, { "epoch": 0.8881152297085125, "grad_norm": 0.09191956371068954, "learning_rate": 3.1380231041954366e-06, "loss": 46.0055, "step": 6505 }, { "epoch": 0.8882517577991672, "grad_norm": 0.05612753704190254, "learning_rate": 3.1304456564432372e-06, "loss": 46.0024, "step": 6506 }, { "epoch": 0.8883882858898219, "grad_norm": 0.10963650792837143, "learning_rate": 3.1228770728000455e-06, "loss": 46.0026, "step": 6507 }, { "epoch": 0.8885248139804764, "grad_norm": 0.05351071432232857, "learning_rate": 3.115317354697239e-06, "loss": 46.0025, "step": 6508 }, { "epoch": 0.8886613420711311, "grad_norm": 0.034159544855356216, "learning_rate": 3.1077665035645465e-06, "loss": 46.0031, "step": 6509 }, { "epoch": 0.8887978701617858, "grad_norm": 0.19627976417541504, "learning_rate": 3.1002245208300084e-06, "loss": 46.0037, "step": 6510 }, { "epoch": 0.8889343982524405, "grad_norm": 0.11347929388284683, "learning_rate": 3.0926914079199844e-06, "loss": 46.0099, "step": 6511 }, { "epoch": 0.8890709263430951, "grad_norm": 0.032607149332761765, "learning_rate": 3.085167166259162e-06, "loss": 46.0078, "step": 6512 }, { "epoch": 0.8892074544337497, "grad_norm": 0.044253215193748474, "learning_rate": 3.077651797270542e-06, "loss": 46.0029, "step": 6513 }, { "epoch": 0.8893439825244044, "grad_norm": 0.060733530670404434, "learning_rate": 3.07014530237546e-06, "loss": 46.012, "step": 6514 }, { "epoch": 0.8894805106150591, "grad_norm": 0.11486084014177322, "learning_rate": 3.062647682993558e-06, "loss": 46.0045, "step": 6515 }, { "epoch": 0.8896170387057137, "grad_norm": 0.12552179396152496, "learning_rate": 3.0551589405428182e-06, "loss": 46.01, "step": 6516 }, { "epoch": 0.8897535667963683, "grad_norm": 0.13673348724842072, "learning_rate": 3.0476790764395245e-06, "loss": 46.0132, "step": 6517 }, { "epoch": 0.889890094887023, "grad_norm": 0.147305428981781, "learning_rate": 3.0402080920982955e-06, "loss": 46.0079, "step": 6518 }, { "epoch": 0.8900266229776777, "grad_norm": 0.0694836899638176, "learning_rate": 3.0327459889320676e-06, "loss": 46.004, "step": 6519 }, { "epoch": 0.8901631510683323, "grad_norm": 0.10554865747690201, "learning_rate": 3.0252927683520948e-06, "loss": 46.0014, "step": 6520 }, { "epoch": 0.890299679158987, "grad_norm": 0.06081525981426239, "learning_rate": 3.01784843176795e-06, "loss": 46.0068, "step": 6521 }, { "epoch": 0.8904362072496416, "grad_norm": 0.17026200890541077, "learning_rate": 3.0104129805875292e-06, "loss": 46.0049, "step": 6522 }, { "epoch": 0.8905727353402962, "grad_norm": 0.1103358194231987, "learning_rate": 3.0029864162170464e-06, "loss": 46.0052, "step": 6523 }, { "epoch": 0.8907092634309509, "grad_norm": 0.11488151550292969, "learning_rate": 2.9955687400610332e-06, "loss": 46.0044, "step": 6524 }, { "epoch": 0.8908457915216056, "grad_norm": 0.027208402752876282, "learning_rate": 2.9881599535223516e-06, "loss": 46.0074, "step": 6525 }, { "epoch": 0.8909823196122603, "grad_norm": 0.15961121022701263, "learning_rate": 2.9807600580021634e-06, "loss": 46.0124, "step": 6526 }, { "epoch": 0.8911188477029148, "grad_norm": 0.07938924431800842, "learning_rate": 2.973369054899966e-06, "loss": 46.0064, "step": 6527 }, { "epoch": 0.8912553757935695, "grad_norm": 0.043605126440525055, "learning_rate": 2.965986945613558e-06, "loss": 46.0057, "step": 6528 }, { "epoch": 0.8913919038842242, "grad_norm": 0.04931819066405296, "learning_rate": 2.958613731539073e-06, "loss": 46.0001, "step": 6529 }, { "epoch": 0.8915284319748789, "grad_norm": 0.04531484097242355, "learning_rate": 2.951249414070961e-06, "loss": 46.0016, "step": 6530 }, { "epoch": 0.8916649600655335, "grad_norm": 0.2810179889202118, "learning_rate": 2.943893994601976e-06, "loss": 46.0054, "step": 6531 }, { "epoch": 0.8918014881561881, "grad_norm": 0.14851410686969757, "learning_rate": 2.9365474745231934e-06, "loss": 46.0084, "step": 6532 }, { "epoch": 0.8919380162468428, "grad_norm": 0.09384735673666, "learning_rate": 2.9292098552240077e-06, "loss": 46.0026, "step": 6533 }, { "epoch": 0.8920745443374974, "grad_norm": 0.037772342562675476, "learning_rate": 2.9218811380921473e-06, "loss": 46.0044, "step": 6534 }, { "epoch": 0.8922110724281521, "grad_norm": 0.05703674256801605, "learning_rate": 2.9145613245136263e-06, "loss": 46.0025, "step": 6535 }, { "epoch": 0.8923476005188068, "grad_norm": 0.10809153318405151, "learning_rate": 2.9072504158727865e-06, "loss": 46.0041, "step": 6536 }, { "epoch": 0.8924841286094614, "grad_norm": 0.1760617047548294, "learning_rate": 2.8999484135523004e-06, "loss": 46.0026, "step": 6537 }, { "epoch": 0.892620656700116, "grad_norm": 0.05457278713583946, "learning_rate": 2.892655318933152e-06, "loss": 46.0079, "step": 6538 }, { "epoch": 0.8927571847907707, "grad_norm": 0.11025259643793106, "learning_rate": 2.8853711333946044e-06, "loss": 46.0048, "step": 6539 }, { "epoch": 0.8928937128814254, "grad_norm": 0.060633763670921326, "learning_rate": 2.878095858314278e-06, "loss": 46.001, "step": 6540 }, { "epoch": 0.8930302409720801, "grad_norm": 0.06728050112724304, "learning_rate": 2.8708294950681057e-06, "loss": 46.0003, "step": 6541 }, { "epoch": 0.8931667690627346, "grad_norm": 0.055884238332509995, "learning_rate": 2.8635720450303104e-06, "loss": 46.0018, "step": 6542 }, { "epoch": 0.8933032971533893, "grad_norm": 0.09461211413145065, "learning_rate": 2.85632350957345e-06, "loss": 46.0011, "step": 6543 }, { "epoch": 0.893439825244044, "grad_norm": 0.0887874960899353, "learning_rate": 2.8490838900683824e-06, "loss": 46.0043, "step": 6544 }, { "epoch": 0.8935763533346986, "grad_norm": 0.08220485597848892, "learning_rate": 2.8418531878842914e-06, "loss": 46.0057, "step": 6545 }, { "epoch": 0.8937128814253532, "grad_norm": 0.14857639372348785, "learning_rate": 2.8346314043886657e-06, "loss": 46.0013, "step": 6546 }, { "epoch": 0.8938494095160079, "grad_norm": 0.24916960299015045, "learning_rate": 2.827418540947313e-06, "loss": 46.009, "step": 6547 }, { "epoch": 0.8939859376066626, "grad_norm": 0.5876787900924683, "learning_rate": 2.820214598924348e-06, "loss": 46.0073, "step": 6548 }, { "epoch": 0.8941224656973172, "grad_norm": 0.1799488067626953, "learning_rate": 2.8130195796822024e-06, "loss": 46.0, "step": 6549 }, { "epoch": 0.8942589937879719, "grad_norm": 0.12206609547138214, "learning_rate": 2.8058334845816213e-06, "loss": 46.0, "step": 6550 }, { "epoch": 0.8943955218786265, "grad_norm": 0.22094731032848358, "learning_rate": 2.7986563149816626e-06, "loss": 46.0094, "step": 6551 }, { "epoch": 0.8945320499692812, "grad_norm": 0.12981094419956207, "learning_rate": 2.79148807223969e-06, "loss": 46.0034, "step": 6552 }, { "epoch": 0.8946685780599358, "grad_norm": 0.04555484652519226, "learning_rate": 2.7843287577113796e-06, "loss": 46.0023, "step": 6553 }, { "epoch": 0.8948051061505905, "grad_norm": 0.03315846249461174, "learning_rate": 2.777178372750727e-06, "loss": 46.0002, "step": 6554 }, { "epoch": 0.8949416342412452, "grad_norm": 0.08718633651733398, "learning_rate": 2.7700369187100394e-06, "loss": 46.0008, "step": 6555 }, { "epoch": 0.8950781623318997, "grad_norm": 0.03384488448500633, "learning_rate": 2.762904396939919e-06, "loss": 46.0002, "step": 6556 }, { "epoch": 0.8952146904225544, "grad_norm": 0.05206296220421791, "learning_rate": 2.7557808087892824e-06, "loss": 46.0001, "step": 6557 }, { "epoch": 0.8953512185132091, "grad_norm": 0.07032153755426407, "learning_rate": 2.748666155605395e-06, "loss": 46.0009, "step": 6558 }, { "epoch": 0.8954877466038638, "grad_norm": 0.1463831067085266, "learning_rate": 2.7415604387337757e-06, "loss": 46.0034, "step": 6559 }, { "epoch": 0.8956242746945184, "grad_norm": 0.11515834182500839, "learning_rate": 2.7344636595182828e-06, "loss": 46.0049, "step": 6560 }, { "epoch": 0.895760802785173, "grad_norm": 0.1254366636276245, "learning_rate": 2.7273758193010757e-06, "loss": 46.0047, "step": 6561 }, { "epoch": 0.8958973308758277, "grad_norm": 0.09830262511968613, "learning_rate": 2.7202969194226436e-06, "loss": 46.0109, "step": 6562 }, { "epoch": 0.8960338589664824, "grad_norm": 0.06717722862958908, "learning_rate": 2.71322696122176e-06, "loss": 46.005, "step": 6563 }, { "epoch": 0.896170387057137, "grad_norm": 0.1164201945066452, "learning_rate": 2.706165946035505e-06, "loss": 46.0028, "step": 6564 }, { "epoch": 0.8963069151477917, "grad_norm": 0.07616259902715683, "learning_rate": 2.699113875199294e-06, "loss": 46.0092, "step": 6565 }, { "epoch": 0.8964434432384463, "grad_norm": 0.06703346967697144, "learning_rate": 2.6920707500468435e-06, "loss": 46.0017, "step": 6566 }, { "epoch": 0.8965799713291009, "grad_norm": 0.08795315027236938, "learning_rate": 2.685036571910143e-06, "loss": 46.0067, "step": 6567 }, { "epoch": 0.8967164994197556, "grad_norm": 0.05399882420897484, "learning_rate": 2.6780113421195298e-06, "loss": 46.0033, "step": 6568 }, { "epoch": 0.8968530275104103, "grad_norm": 0.12155161052942276, "learning_rate": 2.6709950620036452e-06, "loss": 46.0099, "step": 6569 }, { "epoch": 0.896989555601065, "grad_norm": 0.0668846145272255, "learning_rate": 2.663987732889428e-06, "loss": 46.003, "step": 6570 }, { "epoch": 0.8971260836917195, "grad_norm": 0.07595466077327728, "learning_rate": 2.6569893561021074e-06, "loss": 46.0057, "step": 6571 }, { "epoch": 0.8972626117823742, "grad_norm": 0.03409165143966675, "learning_rate": 2.6499999329652527e-06, "loss": 46.0055, "step": 6572 }, { "epoch": 0.8973991398730289, "grad_norm": 0.08206338435411453, "learning_rate": 2.643019464800722e-06, "loss": 46.0063, "step": 6573 }, { "epoch": 0.8975356679636836, "grad_norm": 0.09334874153137207, "learning_rate": 2.6360479529286888e-06, "loss": 46.0114, "step": 6574 }, { "epoch": 0.8976721960543382, "grad_norm": 0.0623357780277729, "learning_rate": 2.6290853986676023e-06, "loss": 46.0099, "step": 6575 }, { "epoch": 0.8978087241449928, "grad_norm": 0.1009753942489624, "learning_rate": 2.62213180333426e-06, "loss": 46.0008, "step": 6576 }, { "epoch": 0.8979452522356475, "grad_norm": 0.07862342149019241, "learning_rate": 2.615187168243749e-06, "loss": 46.0046, "step": 6577 }, { "epoch": 0.8980817803263021, "grad_norm": 0.08651077002286911, "learning_rate": 2.6082514947094516e-06, "loss": 46.0049, "step": 6578 }, { "epoch": 0.8982183084169568, "grad_norm": 0.1496649980545044, "learning_rate": 2.6013247840430633e-06, "loss": 46.0052, "step": 6579 }, { "epoch": 0.8983548365076114, "grad_norm": 0.15557408332824707, "learning_rate": 2.594407037554586e-06, "loss": 46.0019, "step": 6580 }, { "epoch": 0.8984913645982661, "grad_norm": 0.05639243125915527, "learning_rate": 2.5874982565523233e-06, "loss": 46.0015, "step": 6581 }, { "epoch": 0.8986278926889207, "grad_norm": 0.10333550721406937, "learning_rate": 2.5805984423428908e-06, "loss": 46.0046, "step": 6582 }, { "epoch": 0.8987644207795754, "grad_norm": 0.05890995264053345, "learning_rate": 2.5737075962311896e-06, "loss": 46.005, "step": 6583 }, { "epoch": 0.8989009488702301, "grad_norm": 0.08597596734762192, "learning_rate": 2.5668257195204437e-06, "loss": 46.0072, "step": 6584 }, { "epoch": 0.8990374769608847, "grad_norm": 0.09850165992975235, "learning_rate": 2.5599528135121787e-06, "loss": 46.0041, "step": 6585 }, { "epoch": 0.8991740050515393, "grad_norm": 0.059254713356494904, "learning_rate": 2.5530888795062104e-06, "loss": 46.0, "step": 6586 }, { "epoch": 0.899310533142194, "grad_norm": 0.037503328174352646, "learning_rate": 2.546233918800672e-06, "loss": 46.0015, "step": 6587 }, { "epoch": 0.8994470612328487, "grad_norm": 0.056100599467754364, "learning_rate": 2.5393879326919946e-06, "loss": 46.001, "step": 6588 }, { "epoch": 0.8995835893235034, "grad_norm": 0.14744621515274048, "learning_rate": 2.532550922474897e-06, "loss": 46.0033, "step": 6589 }, { "epoch": 0.8997201174141579, "grad_norm": 0.09557776153087616, "learning_rate": 2.525722889442439e-06, "loss": 46.0048, "step": 6590 }, { "epoch": 0.8998566455048126, "grad_norm": 0.12413590401411057, "learning_rate": 2.518903834885944e-06, "loss": 46.0066, "step": 6591 }, { "epoch": 0.8999931735954673, "grad_norm": 0.12514495849609375, "learning_rate": 2.512093760095052e-06, "loss": 46.0016, "step": 6592 }, { "epoch": 0.9001297016861219, "grad_norm": 0.18977557122707367, "learning_rate": 2.5052926663577e-06, "loss": 46.0043, "step": 6593 }, { "epoch": 0.9002662297767766, "grad_norm": 0.06549373269081116, "learning_rate": 2.4985005549601483e-06, "loss": 46.0077, "step": 6594 }, { "epoch": 0.9004027578674312, "grad_norm": 0.08890310674905777, "learning_rate": 2.491717427186929e-06, "loss": 46.0034, "step": 6595 }, { "epoch": 0.9005392859580859, "grad_norm": 0.13648618757724762, "learning_rate": 2.4849432843208785e-06, "loss": 46.0031, "step": 6596 }, { "epoch": 0.9006758140487405, "grad_norm": 0.09024087339639664, "learning_rate": 2.4781781276431604e-06, "loss": 46.0031, "step": 6597 }, { "epoch": 0.9008123421393952, "grad_norm": 0.04649169370532036, "learning_rate": 2.4714219584332175e-06, "loss": 46.0017, "step": 6598 }, { "epoch": 0.9009488702300499, "grad_norm": 0.16363628208637238, "learning_rate": 2.4646747779687895e-06, "loss": 46.002, "step": 6599 }, { "epoch": 0.9010853983207044, "grad_norm": 0.1637767255306244, "learning_rate": 2.4579365875259165e-06, "loss": 46.0069, "step": 6600 }, { "epoch": 0.9012219264113591, "grad_norm": 0.14495576918125153, "learning_rate": 2.451207388378962e-06, "loss": 46.0067, "step": 6601 }, { "epoch": 0.9013584545020138, "grad_norm": 0.06934591382741928, "learning_rate": 2.4444871818005753e-06, "loss": 46.003, "step": 6602 }, { "epoch": 0.9014949825926685, "grad_norm": 0.0667373538017273, "learning_rate": 2.437775969061673e-06, "loss": 46.0015, "step": 6603 }, { "epoch": 0.9016315106833231, "grad_norm": 0.06775915622711182, "learning_rate": 2.4310737514315286e-06, "loss": 46.002, "step": 6604 }, { "epoch": 0.9017680387739777, "grad_norm": 0.0462862029671669, "learning_rate": 2.4243805301776723e-06, "loss": 46.0073, "step": 6605 }, { "epoch": 0.9019045668646324, "grad_norm": 0.0450550839304924, "learning_rate": 2.4176963065659587e-06, "loss": 46.0015, "step": 6606 }, { "epoch": 0.9020410949552871, "grad_norm": 0.03244467452168465, "learning_rate": 2.4110210818604982e-06, "loss": 46.0032, "step": 6607 }, { "epoch": 0.9021776230459417, "grad_norm": 0.040680140256881714, "learning_rate": 2.4043548573237595e-06, "loss": 46.0022, "step": 6608 }, { "epoch": 0.9023141511365964, "grad_norm": 0.07952241599559784, "learning_rate": 2.397697634216467e-06, "loss": 46.0027, "step": 6609 }, { "epoch": 0.902450679227251, "grad_norm": 0.19551369547843933, "learning_rate": 2.3910494137976523e-06, "loss": 46.0043, "step": 6610 }, { "epoch": 0.9025872073179056, "grad_norm": 0.052909817546606064, "learning_rate": 2.3844101973246546e-06, "loss": 46.0064, "step": 6611 }, { "epoch": 0.9027237354085603, "grad_norm": 0.14671124517917633, "learning_rate": 2.377779986053097e-06, "loss": 46.0047, "step": 6612 }, { "epoch": 0.902860263499215, "grad_norm": 0.10273399949073792, "learning_rate": 2.3711587812368983e-06, "loss": 46.0051, "step": 6613 }, { "epoch": 0.9029967915898696, "grad_norm": 0.10425538569688797, "learning_rate": 2.364546584128291e-06, "loss": 46.0082, "step": 6614 }, { "epoch": 0.9031333196805242, "grad_norm": 0.11395939439535141, "learning_rate": 2.357943395977791e-06, "loss": 46.0085, "step": 6615 }, { "epoch": 0.9032698477711789, "grad_norm": 0.09273294359445572, "learning_rate": 2.3513492180342102e-06, "loss": 46.0104, "step": 6616 }, { "epoch": 0.9034063758618336, "grad_norm": 0.09576912224292755, "learning_rate": 2.3447640515446567e-06, "loss": 46.0105, "step": 6617 }, { "epoch": 0.9035429039524883, "grad_norm": 0.07698849588632584, "learning_rate": 2.3381878977545403e-06, "loss": 46.0066, "step": 6618 }, { "epoch": 0.9036794320431428, "grad_norm": 0.08688672631978989, "learning_rate": 2.3316207579075654e-06, "loss": 46.0053, "step": 6619 }, { "epoch": 0.9038159601337975, "grad_norm": 0.1981174796819687, "learning_rate": 2.3250626332457224e-06, "loss": 46.0013, "step": 6620 }, { "epoch": 0.9039524882244522, "grad_norm": 0.07843770831823349, "learning_rate": 2.3185135250093137e-06, "loss": 46.0073, "step": 6621 }, { "epoch": 0.9040890163151069, "grad_norm": 0.07325591892004013, "learning_rate": 2.3119734344369094e-06, "loss": 46.0039, "step": 6622 }, { "epoch": 0.9042255444057615, "grad_norm": 0.06960899382829666, "learning_rate": 2.3054423627654087e-06, "loss": 46.0099, "step": 6623 }, { "epoch": 0.9043620724964161, "grad_norm": 0.048938870429992676, "learning_rate": 2.2989203112299684e-06, "loss": 46.0015, "step": 6624 }, { "epoch": 0.9044986005870708, "grad_norm": 0.06937189400196075, "learning_rate": 2.2924072810640796e-06, "loss": 46.0004, "step": 6625 }, { "epoch": 0.9046351286777254, "grad_norm": 0.10751316696405411, "learning_rate": 2.2859032734994957e-06, "loss": 46.0015, "step": 6626 }, { "epoch": 0.9047716567683801, "grad_norm": 0.15651454031467438, "learning_rate": 2.2794082897662663e-06, "loss": 46.0072, "step": 6627 }, { "epoch": 0.9049081848590348, "grad_norm": 0.06168805807828903, "learning_rate": 2.2729223310927472e-06, "loss": 46.007, "step": 6628 }, { "epoch": 0.9050447129496894, "grad_norm": 0.15351688861846924, "learning_rate": 2.2664453987055913e-06, "loss": 46.0049, "step": 6629 }, { "epoch": 0.905181241040344, "grad_norm": 0.05587732046842575, "learning_rate": 2.2599774938297238e-06, "loss": 46.0026, "step": 6630 }, { "epoch": 0.9053177691309987, "grad_norm": 0.1351049542427063, "learning_rate": 2.253518617688377e-06, "loss": 46.0034, "step": 6631 }, { "epoch": 0.9054542972216534, "grad_norm": 0.06693077832460403, "learning_rate": 2.247068771503075e-06, "loss": 46.0009, "step": 6632 }, { "epoch": 0.9055908253123081, "grad_norm": 0.07641613483428955, "learning_rate": 2.240627956493635e-06, "loss": 46.0094, "step": 6633 }, { "epoch": 0.9057273534029626, "grad_norm": 0.032466642558574677, "learning_rate": 2.234196173878156e-06, "loss": 46.0013, "step": 6634 }, { "epoch": 0.9058638814936173, "grad_norm": 0.04234271124005318, "learning_rate": 2.2277734248730307e-06, "loss": 46.0097, "step": 6635 }, { "epoch": 0.906000409584272, "grad_norm": 0.05517300218343735, "learning_rate": 2.221359710692961e-06, "loss": 46.0084, "step": 6636 }, { "epoch": 0.9061369376749266, "grad_norm": 0.12859518826007843, "learning_rate": 2.214955032550925e-06, "loss": 46.0031, "step": 6637 }, { "epoch": 0.9062734657655813, "grad_norm": 0.08623842149972916, "learning_rate": 2.208559391658194e-06, "loss": 46.0094, "step": 6638 }, { "epoch": 0.9064099938562359, "grad_norm": 0.041314639151096344, "learning_rate": 2.202172789224327e-06, "loss": 46.0077, "step": 6639 }, { "epoch": 0.9065465219468906, "grad_norm": 0.06387760490179062, "learning_rate": 2.195795226457176e-06, "loss": 46.0018, "step": 6640 }, { "epoch": 0.9066830500375452, "grad_norm": 0.07362432032823563, "learning_rate": 2.1894267045628856e-06, "loss": 46.0017, "step": 6641 }, { "epoch": 0.9068195781281999, "grad_norm": 0.062211908400058746, "learning_rate": 2.1830672247458993e-06, "loss": 46.0044, "step": 6642 }, { "epoch": 0.9069561062188545, "grad_norm": 0.20057694613933563, "learning_rate": 2.1767167882089268e-06, "loss": 46.0033, "step": 6643 }, { "epoch": 0.9070926343095091, "grad_norm": 0.09381841123104095, "learning_rate": 2.1703753961529904e-06, "loss": 46.0023, "step": 6644 }, { "epoch": 0.9072291624001638, "grad_norm": 0.13915705680847168, "learning_rate": 2.1640430497773867e-06, "loss": 46.0028, "step": 6645 }, { "epoch": 0.9073656904908185, "grad_norm": 0.2525423467159271, "learning_rate": 2.157719750279713e-06, "loss": 46.009, "step": 6646 }, { "epoch": 0.9075022185814732, "grad_norm": 0.08368133008480072, "learning_rate": 2.1514054988558464e-06, "loss": 46.0071, "step": 6647 }, { "epoch": 0.9076387466721277, "grad_norm": 0.22908446192741394, "learning_rate": 2.145100296699959e-06, "loss": 46.0072, "step": 6648 }, { "epoch": 0.9077752747627824, "grad_norm": 0.22080403566360474, "learning_rate": 2.1388041450045137e-06, "loss": 46.007, "step": 6649 }, { "epoch": 0.9079118028534371, "grad_norm": 0.11584609746932983, "learning_rate": 2.1325170449602528e-06, "loss": 46.0, "step": 6650 }, { "epoch": 0.9080483309440918, "grad_norm": 0.15033037960529327, "learning_rate": 2.126238997756208e-06, "loss": 46.0083, "step": 6651 }, { "epoch": 0.9081848590347464, "grad_norm": 0.08025525510311127, "learning_rate": 2.1199700045797077e-06, "loss": 46.0021, "step": 6652 }, { "epoch": 0.908321387125401, "grad_norm": 0.13504081964492798, "learning_rate": 2.113710066616359e-06, "loss": 46.0063, "step": 6653 }, { "epoch": 0.9084579152160557, "grad_norm": 0.14639493823051453, "learning_rate": 2.107459185050059e-06, "loss": 46.0035, "step": 6654 }, { "epoch": 0.9085944433067104, "grad_norm": 0.11444094032049179, "learning_rate": 2.101217361062996e-06, "loss": 46.0004, "step": 6655 }, { "epoch": 0.908730971397365, "grad_norm": 0.08330193161964417, "learning_rate": 2.0949845958356416e-06, "loss": 46.0041, "step": 6656 }, { "epoch": 0.9088674994880197, "grad_norm": 0.03549402579665184, "learning_rate": 2.0887608905467594e-06, "loss": 46.0014, "step": 6657 }, { "epoch": 0.9090040275786743, "grad_norm": 0.04791359230875969, "learning_rate": 2.08254624637339e-06, "loss": 46.0015, "step": 6658 }, { "epoch": 0.9091405556693289, "grad_norm": 0.131149023771286, "learning_rate": 2.076340664490861e-06, "loss": 46.0054, "step": 6659 }, { "epoch": 0.9092770837599836, "grad_norm": 0.06999377906322479, "learning_rate": 2.0701441460727886e-06, "loss": 46.0002, "step": 6660 }, { "epoch": 0.9094136118506383, "grad_norm": 0.06904777139425278, "learning_rate": 2.0639566922911024e-06, "loss": 46.0096, "step": 6661 }, { "epoch": 0.909550139941293, "grad_norm": 0.04411187395453453, "learning_rate": 2.0577783043159603e-06, "loss": 46.0075, "step": 6662 }, { "epoch": 0.9096866680319475, "grad_norm": 0.16808955371379852, "learning_rate": 2.0516089833158447e-06, "loss": 46.003, "step": 6663 }, { "epoch": 0.9098231961226022, "grad_norm": 0.05629805102944374, "learning_rate": 2.0454487304575277e-06, "loss": 46.0081, "step": 6664 }, { "epoch": 0.9099597242132569, "grad_norm": 0.04384401813149452, "learning_rate": 2.039297546906055e-06, "loss": 46.0047, "step": 6665 }, { "epoch": 0.9100962523039116, "grad_norm": 0.028815988451242447, "learning_rate": 2.0331554338247406e-06, "loss": 46.0082, "step": 6666 }, { "epoch": 0.9102327803945662, "grad_norm": 0.11900016665458679, "learning_rate": 2.0270223923752053e-06, "loss": 46.0061, "step": 6667 }, { "epoch": 0.9103693084852208, "grad_norm": 0.12809918820858002, "learning_rate": 2.0208984237173546e-06, "loss": 46.0122, "step": 6668 }, { "epoch": 0.9105058365758755, "grad_norm": 0.10810062289237976, "learning_rate": 2.014783529009379e-06, "loss": 46.0034, "step": 6669 }, { "epoch": 0.9106423646665301, "grad_norm": 0.10393380373716354, "learning_rate": 2.0086777094077137e-06, "loss": 46.0072, "step": 6670 }, { "epoch": 0.9107788927571848, "grad_norm": 0.02861216478049755, "learning_rate": 2.0025809660671412e-06, "loss": 46.0117, "step": 6671 }, { "epoch": 0.9109154208478395, "grad_norm": 0.11254635453224182, "learning_rate": 1.996493300140684e-06, "loss": 46.0042, "step": 6672 }, { "epoch": 0.9110519489384941, "grad_norm": 0.08628802001476288, "learning_rate": 1.9904147127796646e-06, "loss": 46.0037, "step": 6673 }, { "epoch": 0.9111884770291487, "grad_norm": 0.05554351583123207, "learning_rate": 1.9843452051336696e-06, "loss": 46.004, "step": 6674 }, { "epoch": 0.9113250051198034, "grad_norm": 0.05089298635721207, "learning_rate": 1.9782847783505978e-06, "loss": 46.0077, "step": 6675 }, { "epoch": 0.9114615332104581, "grad_norm": 0.09159374237060547, "learning_rate": 1.972233433576609e-06, "loss": 46.0106, "step": 6676 }, { "epoch": 0.9115980613011126, "grad_norm": 0.04602527990937233, "learning_rate": 1.9661911719561445e-06, "loss": 46.0014, "step": 6677 }, { "epoch": 0.9117345893917673, "grad_norm": 0.05745755508542061, "learning_rate": 1.9601579946319503e-06, "loss": 46.0031, "step": 6678 }, { "epoch": 0.911871117482422, "grad_norm": 0.08873550593852997, "learning_rate": 1.9541339027450256e-06, "loss": 46.0025, "step": 6679 }, { "epoch": 0.9120076455730767, "grad_norm": 0.16451172530651093, "learning_rate": 1.9481188974346696e-06, "loss": 46.0023, "step": 6680 }, { "epoch": 0.9121441736637314, "grad_norm": 0.16778989136219025, "learning_rate": 1.9421129798384565e-06, "loss": 46.0027, "step": 6681 }, { "epoch": 0.9122807017543859, "grad_norm": 0.06277430802583694, "learning_rate": 1.9361161510922434e-06, "loss": 46.0051, "step": 6682 }, { "epoch": 0.9124172298450406, "grad_norm": 0.09322349727153778, "learning_rate": 1.9301284123301676e-06, "loss": 46.0014, "step": 6683 }, { "epoch": 0.9125537579356953, "grad_norm": 0.06889473646879196, "learning_rate": 1.924149764684646e-06, "loss": 46.0069, "step": 6684 }, { "epoch": 0.9126902860263499, "grad_norm": 0.24801255762577057, "learning_rate": 1.9181802092863965e-06, "loss": 46.0006, "step": 6685 }, { "epoch": 0.9128268141170046, "grad_norm": 0.11183443665504456, "learning_rate": 1.912219747264371e-06, "loss": 46.0077, "step": 6686 }, { "epoch": 0.9129633422076592, "grad_norm": 0.08577326685190201, "learning_rate": 1.9062683797458458e-06, "loss": 46.0042, "step": 6687 }, { "epoch": 0.9130998702983139, "grad_norm": 0.04744362458586693, "learning_rate": 1.9003261078563539e-06, "loss": 46.002, "step": 6688 }, { "epoch": 0.9132363983889685, "grad_norm": 0.07302679866552353, "learning_rate": 1.8943929327197296e-06, "loss": 46.0074, "step": 6689 }, { "epoch": 0.9133729264796232, "grad_norm": 0.06263314187526703, "learning_rate": 1.8884688554580587e-06, "loss": 46.0025, "step": 6690 }, { "epoch": 0.9135094545702779, "grad_norm": 0.09779287129640579, "learning_rate": 1.8825538771917173e-06, "loss": 46.0084, "step": 6691 }, { "epoch": 0.9136459826609324, "grad_norm": 0.07389087975025177, "learning_rate": 1.876647999039377e-06, "loss": 46.0001, "step": 6692 }, { "epoch": 0.9137825107515871, "grad_norm": 0.04830813780426979, "learning_rate": 1.8707512221179725e-06, "loss": 46.0025, "step": 6693 }, { "epoch": 0.9139190388422418, "grad_norm": 0.09250009059906006, "learning_rate": 1.864863547542711e-06, "loss": 46.0046, "step": 6694 }, { "epoch": 0.9140555669328965, "grad_norm": 0.19655542075634003, "learning_rate": 1.8589849764270806e-06, "loss": 46.0057, "step": 6695 }, { "epoch": 0.9141920950235511, "grad_norm": 0.08576341718435287, "learning_rate": 1.8531155098828802e-06, "loss": 46.003, "step": 6696 }, { "epoch": 0.9143286231142057, "grad_norm": 0.32300952076911926, "learning_rate": 1.847255149020144e-06, "loss": 46.004, "step": 6697 }, { "epoch": 0.9144651512048604, "grad_norm": 0.2019822895526886, "learning_rate": 1.8414038949471912e-06, "loss": 46.002, "step": 6698 }, { "epoch": 0.9146016792955151, "grad_norm": 0.05147014558315277, "learning_rate": 1.8355617487706477e-06, "loss": 46.0, "step": 6699 }, { "epoch": 0.9147382073861697, "grad_norm": 0.3028050363063812, "learning_rate": 1.829728711595391e-06, "loss": 46.0, "step": 6700 }, { "epoch": 0.9148747354768244, "grad_norm": 0.06185584515333176, "learning_rate": 1.8239047845245828e-06, "loss": 46.0072, "step": 6701 }, { "epoch": 0.915011263567479, "grad_norm": 0.023672737181186676, "learning_rate": 1.818089968659653e-06, "loss": 46.0054, "step": 6702 }, { "epoch": 0.9151477916581336, "grad_norm": 0.11707445234060287, "learning_rate": 1.8122842651003224e-06, "loss": 46.0047, "step": 6703 }, { "epoch": 0.9152843197487883, "grad_norm": 0.12078045308589935, "learning_rate": 1.8064876749445903e-06, "loss": 46.0003, "step": 6704 }, { "epoch": 0.915420847839443, "grad_norm": 0.08872400224208832, "learning_rate": 1.8007001992887128e-06, "loss": 46.0016, "step": 6705 }, { "epoch": 0.9155573759300977, "grad_norm": 0.1843036264181137, "learning_rate": 1.7949218392272427e-06, "loss": 46.0007, "step": 6706 }, { "epoch": 0.9156939040207522, "grad_norm": 0.0893876701593399, "learning_rate": 1.7891525958529942e-06, "loss": 46.0042, "step": 6707 }, { "epoch": 0.9158304321114069, "grad_norm": 0.052444469183683395, "learning_rate": 1.7833924702570725e-06, "loss": 46.0032, "step": 6708 }, { "epoch": 0.9159669602020616, "grad_norm": 0.20061609148979187, "learning_rate": 1.7776414635288396e-06, "loss": 46.0082, "step": 6709 }, { "epoch": 0.9161034882927163, "grad_norm": 0.07619928568601608, "learning_rate": 1.7718995767559533e-06, "loss": 46.001, "step": 6710 }, { "epoch": 0.9162400163833708, "grad_norm": 0.06425197422504425, "learning_rate": 1.766166811024328e-06, "loss": 46.0102, "step": 6711 }, { "epoch": 0.9163765444740255, "grad_norm": 0.06378146260976791, "learning_rate": 1.7604431674181631e-06, "loss": 46.0106, "step": 6712 }, { "epoch": 0.9165130725646802, "grad_norm": 0.055262353271245956, "learning_rate": 1.754728647019932e-06, "loss": 46.0069, "step": 6713 }, { "epoch": 0.9166496006553349, "grad_norm": 0.06399935483932495, "learning_rate": 1.749023250910381e-06, "loss": 46.004, "step": 6714 }, { "epoch": 0.9167861287459895, "grad_norm": 0.10756520181894302, "learning_rate": 1.7433269801685303e-06, "loss": 46.0083, "step": 6715 }, { "epoch": 0.9169226568366441, "grad_norm": 0.1361415982246399, "learning_rate": 1.737639835871685e-06, "loss": 46.0006, "step": 6716 }, { "epoch": 0.9170591849272988, "grad_norm": 0.13266243040561676, "learning_rate": 1.7319618190954013e-06, "loss": 46.0034, "step": 6717 }, { "epoch": 0.9171957130179534, "grad_norm": 0.04906153306365013, "learning_rate": 1.7262929309135312e-06, "loss": 46.0097, "step": 6718 }, { "epoch": 0.9173322411086081, "grad_norm": 0.15726886689662933, "learning_rate": 1.720633172398184e-06, "loss": 46.0082, "step": 6719 }, { "epoch": 0.9174687691992628, "grad_norm": 0.11211641132831573, "learning_rate": 1.7149825446197586e-06, "loss": 46.0062, "step": 6720 }, { "epoch": 0.9176052972899174, "grad_norm": 0.10814480483531952, "learning_rate": 1.7093410486469175e-06, "loss": 46.0046, "step": 6721 }, { "epoch": 0.917741825380572, "grad_norm": 0.11198922246694565, "learning_rate": 1.70370868554659e-06, "loss": 46.0064, "step": 6722 }, { "epoch": 0.9178783534712267, "grad_norm": 0.06387977302074432, "learning_rate": 1.6980854563839855e-06, "loss": 46.0091, "step": 6723 }, { "epoch": 0.9180148815618814, "grad_norm": 0.08580218255519867, "learning_rate": 1.6924713622225975e-06, "loss": 46.0039, "step": 6724 }, { "epoch": 0.918151409652536, "grad_norm": 0.10275151580572128, "learning_rate": 1.686866404124171e-06, "loss": 46.0023, "step": 6725 }, { "epoch": 0.9182879377431906, "grad_norm": 0.07370983064174652, "learning_rate": 1.6812705831487362e-06, "loss": 46.0102, "step": 6726 }, { "epoch": 0.9184244658338453, "grad_norm": 0.128872349858284, "learning_rate": 1.6756839003545798e-06, "loss": 46.0026, "step": 6727 }, { "epoch": 0.9185609939245, "grad_norm": 0.08479044586420059, "learning_rate": 1.6701063567982899e-06, "loss": 46.0074, "step": 6728 }, { "epoch": 0.9186975220151546, "grad_norm": 0.0605442076921463, "learning_rate": 1.664537953534695e-06, "loss": 46.0033, "step": 6729 }, { "epoch": 0.9188340501058093, "grad_norm": 0.13991855084896088, "learning_rate": 1.658978691616908e-06, "loss": 46.0037, "step": 6730 }, { "epoch": 0.9189705781964639, "grad_norm": 0.0645413026213646, "learning_rate": 1.6534285720963216e-06, "loss": 46.0079, "step": 6731 }, { "epoch": 0.9191071062871186, "grad_norm": 0.08401951938867569, "learning_rate": 1.6478875960225904e-06, "loss": 46.0058, "step": 6732 }, { "epoch": 0.9192436343777732, "grad_norm": 0.10026422142982483, "learning_rate": 1.642355764443637e-06, "loss": 46.0021, "step": 6733 }, { "epoch": 0.9193801624684279, "grad_norm": 0.17389023303985596, "learning_rate": 1.6368330784056473e-06, "loss": 46.0095, "step": 6734 }, { "epoch": 0.9195166905590826, "grad_norm": 0.04882121831178665, "learning_rate": 1.6313195389531021e-06, "loss": 46.0062, "step": 6735 }, { "epoch": 0.9196532186497371, "grad_norm": 0.11449748277664185, "learning_rate": 1.6258151471287396e-06, "loss": 46.0044, "step": 6736 }, { "epoch": 0.9197897467403918, "grad_norm": 0.10529565811157227, "learning_rate": 1.620319903973555e-06, "loss": 46.0065, "step": 6737 }, { "epoch": 0.9199262748310465, "grad_norm": 0.04799014702439308, "learning_rate": 1.6148338105268335e-06, "loss": 46.0013, "step": 6738 }, { "epoch": 0.9200628029217012, "grad_norm": 0.07683337479829788, "learning_rate": 1.6093568678261173e-06, "loss": 46.003, "step": 6739 }, { "epoch": 0.9201993310123557, "grad_norm": 0.04152224585413933, "learning_rate": 1.6038890769072222e-06, "loss": 46.0004, "step": 6740 }, { "epoch": 0.9203358591030104, "grad_norm": 0.03946460783481598, "learning_rate": 1.5984304388042382e-06, "loss": 46.0, "step": 6741 }, { "epoch": 0.9204723871936651, "grad_norm": 0.0467805340886116, "learning_rate": 1.5929809545495111e-06, "loss": 46.0089, "step": 6742 }, { "epoch": 0.9206089152843198, "grad_norm": 0.06340005248785019, "learning_rate": 1.5875406251736724e-06, "loss": 46.0013, "step": 6743 }, { "epoch": 0.9207454433749744, "grad_norm": 0.3033835291862488, "learning_rate": 1.5821094517056046e-06, "loss": 46.0062, "step": 6744 }, { "epoch": 0.920881971465629, "grad_norm": 0.0525091327726841, "learning_rate": 1.5766874351724747e-06, "loss": 46.0, "step": 6745 }, { "epoch": 0.9210184995562837, "grad_norm": 0.22114337980747223, "learning_rate": 1.5712745765997072e-06, "loss": 46.0051, "step": 6746 }, { "epoch": 0.9211550276469384, "grad_norm": 0.23225148022174835, "learning_rate": 1.5658708770109943e-06, "loss": 46.0035, "step": 6747 }, { "epoch": 0.921291555737593, "grad_norm": 0.26744985580444336, "learning_rate": 1.560476337428307e-06, "loss": 46.0045, "step": 6748 }, { "epoch": 0.9214280838282477, "grad_norm": 0.11941954493522644, "learning_rate": 1.5550909588718688e-06, "loss": 46.0024, "step": 6749 }, { "epoch": 0.9215646119189023, "grad_norm": 0.6431065201759338, "learning_rate": 1.5497147423601866e-06, "loss": 46.0022, "step": 6750 }, { "epoch": 0.9217011400095569, "grad_norm": 0.26708126068115234, "learning_rate": 1.5443476889100138e-06, "loss": 46.0092, "step": 6751 }, { "epoch": 0.9218376681002116, "grad_norm": 0.06727378070354462, "learning_rate": 1.5389897995364056e-06, "loss": 46.0067, "step": 6752 }, { "epoch": 0.9219741961908663, "grad_norm": 0.1181660071015358, "learning_rate": 1.5336410752526454e-06, "loss": 46.0016, "step": 6753 }, { "epoch": 0.922110724281521, "grad_norm": 0.13896578550338745, "learning_rate": 1.5283015170702963e-06, "loss": 46.0019, "step": 6754 }, { "epoch": 0.9222472523721755, "grad_norm": 0.045336537063121796, "learning_rate": 1.5229711259991953e-06, "loss": 46.004, "step": 6755 }, { "epoch": 0.9223837804628302, "grad_norm": 0.19606101512908936, "learning_rate": 1.5176499030474577e-06, "loss": 46.0036, "step": 6756 }, { "epoch": 0.9225203085534849, "grad_norm": 0.20661170780658722, "learning_rate": 1.5123378492214291e-06, "loss": 46.0022, "step": 6757 }, { "epoch": 0.9226568366441396, "grad_norm": 0.06926345080137253, "learning_rate": 1.5070349655257388e-06, "loss": 46.0006, "step": 6758 }, { "epoch": 0.9227933647347942, "grad_norm": 0.0463615283370018, "learning_rate": 1.5017412529633012e-06, "loss": 46.0076, "step": 6759 }, { "epoch": 0.9229298928254488, "grad_norm": 0.09668014943599701, "learning_rate": 1.4964567125352713e-06, "loss": 46.0012, "step": 6760 }, { "epoch": 0.9230664209161035, "grad_norm": 0.04170968383550644, "learning_rate": 1.4911813452410717e-06, "loss": 46.0002, "step": 6761 }, { "epoch": 0.9232029490067581, "grad_norm": 0.06216321140527725, "learning_rate": 1.4859151520783988e-06, "loss": 46.0063, "step": 6762 }, { "epoch": 0.9233394770974128, "grad_norm": 0.1327812373638153, "learning_rate": 1.4806581340432112e-06, "loss": 46.0058, "step": 6763 }, { "epoch": 0.9234760051880675, "grad_norm": 0.0954553484916687, "learning_rate": 1.4754102921297364e-06, "loss": 46.0064, "step": 6764 }, { "epoch": 0.9236125332787221, "grad_norm": 0.09496381133794785, "learning_rate": 1.4701716273304521e-06, "loss": 46.0025, "step": 6765 }, { "epoch": 0.9237490613693767, "grad_norm": 0.141459122300148, "learning_rate": 1.4649421406361164e-06, "loss": 46.0034, "step": 6766 }, { "epoch": 0.9238855894600314, "grad_norm": 0.11492995917797089, "learning_rate": 1.4597218330357487e-06, "loss": 46.0082, "step": 6767 }, { "epoch": 0.9240221175506861, "grad_norm": 0.09954671561717987, "learning_rate": 1.4545107055166263e-06, "loss": 46.008, "step": 6768 }, { "epoch": 0.9241586456413408, "grad_norm": 0.07038677483797073, "learning_rate": 1.449308759064283e-06, "loss": 46.0092, "step": 6769 }, { "epoch": 0.9242951737319953, "grad_norm": 0.20773449540138245, "learning_rate": 1.4441159946625372e-06, "loss": 46.004, "step": 6770 }, { "epoch": 0.92443170182265, "grad_norm": 0.19040319323539734, "learning_rate": 1.438932413293459e-06, "loss": 46.0123, "step": 6771 }, { "epoch": 0.9245682299133047, "grad_norm": 0.09066922962665558, "learning_rate": 1.4337580159373864e-06, "loss": 46.0044, "step": 6772 }, { "epoch": 0.9247047580039593, "grad_norm": 0.07689357548952103, "learning_rate": 1.4285928035729035e-06, "loss": 46.0043, "step": 6773 }, { "epoch": 0.9248412860946139, "grad_norm": 0.0945148840546608, "learning_rate": 1.423436777176884e-06, "loss": 46.0046, "step": 6774 }, { "epoch": 0.9249778141852686, "grad_norm": 0.03817044198513031, "learning_rate": 1.418289937724443e-06, "loss": 46.004, "step": 6775 }, { "epoch": 0.9251143422759233, "grad_norm": 0.0447092168033123, "learning_rate": 1.413152286188968e-06, "loss": 46.0055, "step": 6776 }, { "epoch": 0.9252508703665779, "grad_norm": 0.09802338480949402, "learning_rate": 1.4080238235421096e-06, "loss": 46.0056, "step": 6777 }, { "epoch": 0.9253873984572326, "grad_norm": 0.10320574045181274, "learning_rate": 1.4029045507537697e-06, "loss": 46.0022, "step": 6778 }, { "epoch": 0.9255239265478872, "grad_norm": 0.06866712868213654, "learning_rate": 1.397794468792135e-06, "loss": 46.008, "step": 6779 }, { "epoch": 0.9256604546385419, "grad_norm": 0.10114262998104095, "learning_rate": 1.3926935786236217e-06, "loss": 46.0065, "step": 6780 }, { "epoch": 0.9257969827291965, "grad_norm": 0.04542737454175949, "learning_rate": 1.387601881212941e-06, "loss": 46.0061, "step": 6781 }, { "epoch": 0.9259335108198512, "grad_norm": 0.08486956357955933, "learning_rate": 1.3825193775230394e-06, "loss": 46.0079, "step": 6782 }, { "epoch": 0.9260700389105059, "grad_norm": 0.11898256838321686, "learning_rate": 1.3774460685151368e-06, "loss": 46.0089, "step": 6783 }, { "epoch": 0.9262065670011604, "grad_norm": 0.14172276854515076, "learning_rate": 1.3723819551487217e-06, "loss": 46.007, "step": 6784 }, { "epoch": 0.9263430950918151, "grad_norm": 0.10692764073610306, "learning_rate": 1.3673270383815217e-06, "loss": 46.0077, "step": 6785 }, { "epoch": 0.9264796231824698, "grad_norm": 0.11052722483873367, "learning_rate": 1.362281319169545e-06, "loss": 46.0131, "step": 6786 }, { "epoch": 0.9266161512731245, "grad_norm": 0.07590071111917496, "learning_rate": 1.3572447984670444e-06, "loss": 46.0064, "step": 6787 }, { "epoch": 0.9267526793637791, "grad_norm": 0.09943448752164841, "learning_rate": 1.3522174772265584e-06, "loss": 46.0103, "step": 6788 }, { "epoch": 0.9268892074544337, "grad_norm": 0.04657799378037453, "learning_rate": 1.3471993563988483e-06, "loss": 46.0056, "step": 6789 }, { "epoch": 0.9270257355450884, "grad_norm": 0.12995028495788574, "learning_rate": 1.3421904369329662e-06, "loss": 46.0073, "step": 6790 }, { "epoch": 0.9271622636357431, "grad_norm": 0.11856118589639664, "learning_rate": 1.3371907197762212e-06, "loss": 46.0078, "step": 6791 }, { "epoch": 0.9272987917263977, "grad_norm": 0.10882357507944107, "learning_rate": 1.3322002058741678e-06, "loss": 46.0085, "step": 6792 }, { "epoch": 0.9274353198170524, "grad_norm": 0.10458596795797348, "learning_rate": 1.3272188961706233e-06, "loss": 46.0041, "step": 6793 }, { "epoch": 0.927571847907707, "grad_norm": 0.04171836003661156, "learning_rate": 1.3222467916076619e-06, "loss": 46.0033, "step": 6794 }, { "epoch": 0.9277083759983616, "grad_norm": 0.1733270287513733, "learning_rate": 1.317283893125637e-06, "loss": 46.0008, "step": 6795 }, { "epoch": 0.9278449040890163, "grad_norm": 0.36702263355255127, "learning_rate": 1.3123302016631477e-06, "loss": 46.0044, "step": 6796 }, { "epoch": 0.927981432179671, "grad_norm": 0.15219739079475403, "learning_rate": 1.3073857181570393e-06, "loss": 46.009, "step": 6797 }, { "epoch": 0.9281179602703257, "grad_norm": 0.06298134475946426, "learning_rate": 1.30245044354243e-06, "loss": 46.0, "step": 6798 }, { "epoch": 0.9282544883609802, "grad_norm": 0.13447003066539764, "learning_rate": 1.297524378752696e-06, "loss": 46.0049, "step": 6799 }, { "epoch": 0.9283910164516349, "grad_norm": 0.12459953129291534, "learning_rate": 1.2926075247194748e-06, "loss": 46.0042, "step": 6800 }, { "epoch": 0.9285275445422896, "grad_norm": 0.10796579718589783, "learning_rate": 1.287699882372645e-06, "loss": 46.0071, "step": 6801 }, { "epoch": 0.9286640726329443, "grad_norm": 0.1069621592760086, "learning_rate": 1.2828014526403586e-06, "loss": 46.0022, "step": 6802 }, { "epoch": 0.9288006007235989, "grad_norm": 0.06332067400217056, "learning_rate": 1.2779122364490249e-06, "loss": 46.0046, "step": 6803 }, { "epoch": 0.9289371288142535, "grad_norm": 0.09229280054569244, "learning_rate": 1.2730322347233036e-06, "loss": 46.0011, "step": 6804 }, { "epoch": 0.9290736569049082, "grad_norm": 0.07603983581066132, "learning_rate": 1.2681614483861182e-06, "loss": 46.0004, "step": 6805 }, { "epoch": 0.9292101849955628, "grad_norm": 0.07415536046028137, "learning_rate": 1.2632998783586425e-06, "loss": 46.0026, "step": 6806 }, { "epoch": 0.9293467130862175, "grad_norm": 0.14149107038974762, "learning_rate": 1.2584475255603134e-06, "loss": 46.0053, "step": 6807 }, { "epoch": 0.9294832411768721, "grad_norm": 0.13768018782138824, "learning_rate": 1.2536043909088191e-06, "loss": 46.003, "step": 6808 }, { "epoch": 0.9296197692675268, "grad_norm": 0.07692079246044159, "learning_rate": 1.2487704753201157e-06, "loss": 46.0027, "step": 6809 }, { "epoch": 0.9297562973581814, "grad_norm": 0.03404892981052399, "learning_rate": 1.2439457797083942e-06, "loss": 46.0015, "step": 6810 }, { "epoch": 0.9298928254488361, "grad_norm": 0.08715056627988815, "learning_rate": 1.2391303049861303e-06, "loss": 46.0055, "step": 6811 }, { "epoch": 0.9300293535394908, "grad_norm": 0.028262238949537277, "learning_rate": 1.2343240520640287e-06, "loss": 46.0078, "step": 6812 }, { "epoch": 0.9301658816301454, "grad_norm": 0.06369028985500336, "learning_rate": 1.229527021851068e-06, "loss": 46.0033, "step": 6813 }, { "epoch": 0.9303024097208, "grad_norm": 0.04658006876707077, "learning_rate": 1.2247392152544779e-06, "loss": 46.009, "step": 6814 }, { "epoch": 0.9304389378114547, "grad_norm": 0.12427383661270142, "learning_rate": 1.2199606331797342e-06, "loss": 46.0104, "step": 6815 }, { "epoch": 0.9305754659021094, "grad_norm": 0.08266838639974594, "learning_rate": 1.2151912765305862e-06, "loss": 46.0021, "step": 6816 }, { "epoch": 0.930711993992764, "grad_norm": 0.14271098375320435, "learning_rate": 1.2104311462090289e-06, "loss": 46.0095, "step": 6817 }, { "epoch": 0.9308485220834186, "grad_norm": 0.04773583635687828, "learning_rate": 1.205680243115298e-06, "loss": 46.0067, "step": 6818 }, { "epoch": 0.9309850501740733, "grad_norm": 0.06910636276006699, "learning_rate": 1.2009385681479245e-06, "loss": 46.0077, "step": 6819 }, { "epoch": 0.931121578264728, "grad_norm": 0.05498082935810089, "learning_rate": 1.196206122203647e-06, "loss": 46.0084, "step": 6820 }, { "epoch": 0.9312581063553826, "grad_norm": 0.08856254816055298, "learning_rate": 1.1914829061774824e-06, "loss": 46.0027, "step": 6821 }, { "epoch": 0.9313946344460373, "grad_norm": 0.056587208062410355, "learning_rate": 1.1867689209626997e-06, "loss": 46.007, "step": 6822 }, { "epoch": 0.9315311625366919, "grad_norm": 0.11377828568220139, "learning_rate": 1.1820641674508303e-06, "loss": 46.0069, "step": 6823 }, { "epoch": 0.9316676906273466, "grad_norm": 0.20811580121517181, "learning_rate": 1.1773686465316401e-06, "loss": 46.0014, "step": 6824 }, { "epoch": 0.9318042187180012, "grad_norm": 0.09878051280975342, "learning_rate": 1.1726823590931635e-06, "loss": 46.0089, "step": 6825 }, { "epoch": 0.9319407468086559, "grad_norm": 0.04018838331103325, "learning_rate": 1.168005306021691e-06, "loss": 46.003, "step": 6826 }, { "epoch": 0.9320772748993106, "grad_norm": 0.04384996369481087, "learning_rate": 1.1633374882017545e-06, "loss": 46.0068, "step": 6827 }, { "epoch": 0.9322138029899651, "grad_norm": 0.11781659722328186, "learning_rate": 1.158678906516153e-06, "loss": 46.0092, "step": 6828 }, { "epoch": 0.9323503310806198, "grad_norm": 0.09412620216608047, "learning_rate": 1.1540295618459151e-06, "loss": 46.0111, "step": 6829 }, { "epoch": 0.9324868591712745, "grad_norm": 0.07324753701686859, "learning_rate": 1.1493894550703543e-06, "loss": 46.0087, "step": 6830 }, { "epoch": 0.9326233872619292, "grad_norm": 0.0760478600859642, "learning_rate": 1.1447585870670186e-06, "loss": 46.0062, "step": 6831 }, { "epoch": 0.9327599153525838, "grad_norm": 0.15409456193447113, "learning_rate": 1.1401369587117073e-06, "loss": 46.0005, "step": 6832 }, { "epoch": 0.9328964434432384, "grad_norm": 0.10055575519800186, "learning_rate": 1.1355245708784767e-06, "loss": 46.0052, "step": 6833 }, { "epoch": 0.9330329715338931, "grad_norm": 0.03515884652733803, "learning_rate": 1.130921424439635e-06, "loss": 46.0049, "step": 6834 }, { "epoch": 0.9331694996245478, "grad_norm": 0.11437547206878662, "learning_rate": 1.1263275202657464e-06, "loss": 46.0024, "step": 6835 }, { "epoch": 0.9333060277152024, "grad_norm": 0.16586244106292725, "learning_rate": 1.1217428592256218e-06, "loss": 46.003, "step": 6836 }, { "epoch": 0.9334425558058571, "grad_norm": 0.12067394703626633, "learning_rate": 1.1171674421863287e-06, "loss": 46.0103, "step": 6837 }, { "epoch": 0.9335790838965117, "grad_norm": 0.1435748040676117, "learning_rate": 1.1126012700131806e-06, "loss": 46.0029, "step": 6838 }, { "epoch": 0.9337156119871663, "grad_norm": 0.041171845048666, "learning_rate": 1.108044343569742e-06, "loss": 46.0034, "step": 6839 }, { "epoch": 0.933852140077821, "grad_norm": 0.05056198313832283, "learning_rate": 1.1034966637178346e-06, "loss": 46.0063, "step": 6840 }, { "epoch": 0.9339886681684757, "grad_norm": 0.08814932405948639, "learning_rate": 1.0989582313175374e-06, "loss": 46.0047, "step": 6841 }, { "epoch": 0.9341251962591303, "grad_norm": 0.0719960629940033, "learning_rate": 1.0944290472271578e-06, "loss": 46.006, "step": 6842 }, { "epoch": 0.9342617243497849, "grad_norm": 0.06941012293100357, "learning_rate": 1.0899091123032835e-06, "loss": 46.0018, "step": 6843 }, { "epoch": 0.9343982524404396, "grad_norm": 0.09896448999643326, "learning_rate": 1.0853984274007244e-06, "loss": 46.0032, "step": 6844 }, { "epoch": 0.9345347805310943, "grad_norm": 0.21830058097839355, "learning_rate": 1.0808969933725654e-06, "loss": 46.0046, "step": 6845 }, { "epoch": 0.934671308621749, "grad_norm": 0.21733933687210083, "learning_rate": 1.0764048110701197e-06, "loss": 46.0021, "step": 6846 }, { "epoch": 0.9348078367124035, "grad_norm": 0.18560531735420227, "learning_rate": 1.0719218813429744e-06, "loss": 46.0039, "step": 6847 }, { "epoch": 0.9349443648030582, "grad_norm": 0.3104836344718933, "learning_rate": 1.0674482050389455e-06, "loss": 46.0041, "step": 6848 }, { "epoch": 0.9350808928937129, "grad_norm": 0.23475243151187897, "learning_rate": 1.0629837830041123e-06, "loss": 46.0021, "step": 6849 }, { "epoch": 0.9352174209843676, "grad_norm": 0.1619904786348343, "learning_rate": 1.0585286160827934e-06, "loss": 46.0, "step": 6850 }, { "epoch": 0.9353539490750222, "grad_norm": 0.17036284506320953, "learning_rate": 1.0540827051175818e-06, "loss": 46.0101, "step": 6851 }, { "epoch": 0.9354904771656768, "grad_norm": 0.11389373242855072, "learning_rate": 1.0496460509492767e-06, "loss": 46.0004, "step": 6852 }, { "epoch": 0.9356270052563315, "grad_norm": 0.06486818939447403, "learning_rate": 1.0452186544169684e-06, "loss": 46.0, "step": 6853 }, { "epoch": 0.9357635333469861, "grad_norm": 0.15077140927314758, "learning_rate": 1.0408005163579648e-06, "loss": 46.0047, "step": 6854 }, { "epoch": 0.9359000614376408, "grad_norm": 0.12472234666347504, "learning_rate": 1.036391637607853e-06, "loss": 46.0002, "step": 6855 }, { "epoch": 0.9360365895282955, "grad_norm": 0.10812906175851822, "learning_rate": 1.031992019000444e-06, "loss": 46.0012, "step": 6856 }, { "epoch": 0.9361731176189501, "grad_norm": 0.12299193441867828, "learning_rate": 1.0276016613678053e-06, "loss": 46.0016, "step": 6857 }, { "epoch": 0.9363096457096047, "grad_norm": 0.0406714603304863, "learning_rate": 1.0232205655402615e-06, "loss": 46.0012, "step": 6858 }, { "epoch": 0.9364461738002594, "grad_norm": 0.037381984293460846, "learning_rate": 1.018848732346378e-06, "loss": 46.0015, "step": 6859 }, { "epoch": 0.9365827018909141, "grad_norm": 0.1486472636461258, "learning_rate": 1.0144861626129598e-06, "loss": 46.0059, "step": 6860 }, { "epoch": 0.9367192299815688, "grad_norm": 0.1068272590637207, "learning_rate": 1.0101328571650693e-06, "loss": 46.0013, "step": 6861 }, { "epoch": 0.9368557580722233, "grad_norm": 0.09068349748849869, "learning_rate": 1.005788816826031e-06, "loss": 46.0113, "step": 6862 }, { "epoch": 0.936992286162878, "grad_norm": 0.04873505234718323, "learning_rate": 1.0014540424173935e-06, "loss": 46.0038, "step": 6863 }, { "epoch": 0.9371288142535327, "grad_norm": 0.09556855261325836, "learning_rate": 9.971285347589565e-07, "loss": 46.0072, "step": 6864 }, { "epoch": 0.9372653423441873, "grad_norm": 0.19294068217277527, "learning_rate": 9.928122946687822e-07, "loss": 46.0053, "step": 6865 }, { "epoch": 0.937401870434842, "grad_norm": 0.1276911348104477, "learning_rate": 9.885053229631624e-07, "loss": 46.0037, "step": 6866 }, { "epoch": 0.9375383985254966, "grad_norm": 0.14404723048210144, "learning_rate": 9.842076204566619e-07, "loss": 46.0071, "step": 6867 }, { "epoch": 0.9376749266161513, "grad_norm": 0.12315364181995392, "learning_rate": 9.799191879620473e-07, "loss": 46.0082, "step": 6868 }, { "epoch": 0.9378114547068059, "grad_norm": 0.10397592186927795, "learning_rate": 9.756400262903808e-07, "loss": 46.012, "step": 6869 }, { "epoch": 0.9379479827974606, "grad_norm": 0.1841948926448822, "learning_rate": 9.71370136250943e-07, "loss": 46.0085, "step": 6870 }, { "epoch": 0.9380845108881152, "grad_norm": 0.13925699889659882, "learning_rate": 9.671095186512657e-07, "loss": 46.0063, "step": 6871 }, { "epoch": 0.9382210389787699, "grad_norm": 0.08372156322002411, "learning_rate": 9.628581742971377e-07, "loss": 46.0, "step": 6872 }, { "epoch": 0.9383575670694245, "grad_norm": 0.0694214403629303, "learning_rate": 9.586161039925768e-07, "loss": 46.0081, "step": 6873 }, { "epoch": 0.9384940951600792, "grad_norm": 0.08195605874061584, "learning_rate": 9.543833085398634e-07, "loss": 46.0053, "step": 6874 }, { "epoch": 0.9386306232507339, "grad_norm": 0.08467517048120499, "learning_rate": 9.501597887395131e-07, "loss": 46.0036, "step": 6875 }, { "epoch": 0.9387671513413884, "grad_norm": 0.09814326465129852, "learning_rate": 9.459455453902866e-07, "loss": 46.0084, "step": 6876 }, { "epoch": 0.9389036794320431, "grad_norm": 0.09502071142196655, "learning_rate": 9.41740579289202e-07, "loss": 46.0068, "step": 6877 }, { "epoch": 0.9390402075226978, "grad_norm": 0.159391388297081, "learning_rate": 9.375448912315066e-07, "loss": 46.0048, "step": 6878 }, { "epoch": 0.9391767356133525, "grad_norm": 0.13746854662895203, "learning_rate": 9.3335848201071e-07, "loss": 46.0087, "step": 6879 }, { "epoch": 0.9393132637040071, "grad_norm": 0.05817277356982231, "learning_rate": 9.291813524185511e-07, "loss": 46.0041, "step": 6880 }, { "epoch": 0.9394497917946617, "grad_norm": 0.0752512738108635, "learning_rate": 9.250135032450258e-07, "loss": 46.0066, "step": 6881 }, { "epoch": 0.9395863198853164, "grad_norm": 0.03367074579000473, "learning_rate": 9.208549352783591e-07, "loss": 46.0063, "step": 6882 }, { "epoch": 0.939722847975971, "grad_norm": 0.16520939767360687, "learning_rate": 9.167056493050496e-07, "loss": 46.0033, "step": 6883 }, { "epoch": 0.9398593760666257, "grad_norm": 0.1287623792886734, "learning_rate": 9.125656461098142e-07, "loss": 46.0053, "step": 6884 }, { "epoch": 0.9399959041572804, "grad_norm": 0.051931124180555344, "learning_rate": 9.084349264756154e-07, "loss": 46.0063, "step": 6885 }, { "epoch": 0.940132432247935, "grad_norm": 0.045916393399238586, "learning_rate": 9.043134911836781e-07, "loss": 46.0, "step": 6886 }, { "epoch": 0.9402689603385896, "grad_norm": 0.06237133964896202, "learning_rate": 9.002013410134569e-07, "loss": 46.0046, "step": 6887 }, { "epoch": 0.9404054884292443, "grad_norm": 0.035000987350940704, "learning_rate": 8.960984767426517e-07, "loss": 46.0007, "step": 6888 }, { "epoch": 0.940542016519899, "grad_norm": 0.10650096088647842, "learning_rate": 8.92004899147203e-07, "loss": 46.0059, "step": 6889 }, { "epoch": 0.9406785446105537, "grad_norm": 0.08362699300050735, "learning_rate": 8.879206090013137e-07, "loss": 46.005, "step": 6890 }, { "epoch": 0.9408150727012082, "grad_norm": 0.06492463499307632, "learning_rate": 8.838456070774104e-07, "loss": 46.0, "step": 6891 }, { "epoch": 0.9409516007918629, "grad_norm": 0.17197369039058685, "learning_rate": 8.797798941461654e-07, "loss": 46.0048, "step": 6892 }, { "epoch": 0.9410881288825176, "grad_norm": 0.22447915375232697, "learning_rate": 8.757234709765027e-07, "loss": 46.0048, "step": 6893 }, { "epoch": 0.9412246569731723, "grad_norm": 0.11290010809898376, "learning_rate": 8.716763383355864e-07, "loss": 46.0022, "step": 6894 }, { "epoch": 0.9413611850638269, "grad_norm": 0.09200143069028854, "learning_rate": 8.676384969888263e-07, "loss": 46.0011, "step": 6895 }, { "epoch": 0.9414977131544815, "grad_norm": 0.12536276876926422, "learning_rate": 8.636099476998561e-07, "loss": 46.0003, "step": 6896 }, { "epoch": 0.9416342412451362, "grad_norm": 0.1629967987537384, "learning_rate": 8.595906912305773e-07, "loss": 46.0047, "step": 6897 }, { "epoch": 0.9417707693357908, "grad_norm": 0.33932313323020935, "learning_rate": 8.55580728341121e-07, "loss": 46.005, "step": 6898 }, { "epoch": 0.9419072974264455, "grad_norm": 0.2794952392578125, "learning_rate": 8.515800597898637e-07, "loss": 46.0031, "step": 6899 }, { "epoch": 0.9420438255171002, "grad_norm": 0.06958658993244171, "learning_rate": 8.475886863334281e-07, "loss": 46.0116, "step": 6900 }, { "epoch": 0.9421803536077548, "grad_norm": 0.1140713021159172, "learning_rate": 8.436066087266714e-07, "loss": 46.0027, "step": 6901 }, { "epoch": 0.9423168816984094, "grad_norm": 0.034325093030929565, "learning_rate": 8.396338277226911e-07, "loss": 46.0, "step": 6902 }, { "epoch": 0.9424534097890641, "grad_norm": 0.17633271217346191, "learning_rate": 8.356703440728364e-07, "loss": 46.0014, "step": 6903 }, { "epoch": 0.9425899378797188, "grad_norm": 0.14169487357139587, "learning_rate": 8.317161585266964e-07, "loss": 46.0022, "step": 6904 }, { "epoch": 0.9427264659703734, "grad_norm": 0.08843012899160385, "learning_rate": 8.277712718320896e-07, "loss": 46.0012, "step": 6905 }, { "epoch": 0.942862994061028, "grad_norm": 0.054148029536008835, "learning_rate": 8.238356847350914e-07, "loss": 46.0003, "step": 6906 }, { "epoch": 0.9429995221516827, "grad_norm": 0.09819937497377396, "learning_rate": 8.199093979800065e-07, "loss": 46.0035, "step": 6907 }, { "epoch": 0.9431360502423374, "grad_norm": 0.10623885691165924, "learning_rate": 8.15992412309391e-07, "loss": 46.0035, "step": 6908 }, { "epoch": 0.943272578332992, "grad_norm": 0.06738341599702835, "learning_rate": 8.120847284640354e-07, "loss": 46.0088, "step": 6909 }, { "epoch": 0.9434091064236466, "grad_norm": 0.04905662685632706, "learning_rate": 8.081863471829709e-07, "loss": 46.0031, "step": 6910 }, { "epoch": 0.9435456345143013, "grad_norm": 0.10146837681531906, "learning_rate": 8.042972692034745e-07, "loss": 46.0058, "step": 6911 }, { "epoch": 0.943682162604956, "grad_norm": 0.050095733255147934, "learning_rate": 8.004174952610577e-07, "loss": 46.008, "step": 6912 }, { "epoch": 0.9438186906956106, "grad_norm": 0.024542683735489845, "learning_rate": 7.965470260894725e-07, "loss": 46.0078, "step": 6913 }, { "epoch": 0.9439552187862653, "grad_norm": 0.03275177255272865, "learning_rate": 7.926858624207112e-07, "loss": 46.0027, "step": 6914 }, { "epoch": 0.9440917468769199, "grad_norm": 0.16421373188495636, "learning_rate": 7.888340049850173e-07, "loss": 46.009, "step": 6915 }, { "epoch": 0.9442282749675746, "grad_norm": 0.05003058537840843, "learning_rate": 7.849914545108639e-07, "loss": 46.0063, "step": 6916 }, { "epoch": 0.9443648030582292, "grad_norm": 0.0722368061542511, "learning_rate": 7.811582117249527e-07, "loss": 46.0012, "step": 6917 }, { "epoch": 0.9445013311488839, "grad_norm": 0.13277366757392883, "learning_rate": 7.773342773522596e-07, "loss": 46.0177, "step": 6918 }, { "epoch": 0.9446378592395386, "grad_norm": 0.028346022590994835, "learning_rate": 7.735196521159616e-07, "loss": 46.0029, "step": 6919 }, { "epoch": 0.9447743873301931, "grad_norm": 0.10357624292373657, "learning_rate": 7.697143367374982e-07, "loss": 46.0074, "step": 6920 }, { "epoch": 0.9449109154208478, "grad_norm": 0.09462922066450119, "learning_rate": 7.659183319365381e-07, "loss": 46.0026, "step": 6921 }, { "epoch": 0.9450474435115025, "grad_norm": 0.05707864835858345, "learning_rate": 7.621316384309963e-07, "loss": 46.0077, "step": 6922 }, { "epoch": 0.9451839716021572, "grad_norm": 0.09295187145471573, "learning_rate": 7.583542569370272e-07, "loss": 46.0043, "step": 6923 }, { "epoch": 0.9453204996928118, "grad_norm": 0.07268337160348892, "learning_rate": 7.545861881690097e-07, "loss": 46.0103, "step": 6924 }, { "epoch": 0.9454570277834664, "grad_norm": 0.04354250058531761, "learning_rate": 7.508274328395848e-07, "loss": 46.0061, "step": 6925 }, { "epoch": 0.9455935558741211, "grad_norm": 0.07064705342054367, "learning_rate": 7.470779916596116e-07, "loss": 46.0, "step": 6926 }, { "epoch": 0.9457300839647758, "grad_norm": 0.10206176340579987, "learning_rate": 7.433378653382006e-07, "loss": 46.0084, "step": 6927 }, { "epoch": 0.9458666120554304, "grad_norm": 0.1324591487646103, "learning_rate": 7.396070545826806e-07, "loss": 46.0074, "step": 6928 }, { "epoch": 0.9460031401460851, "grad_norm": 0.12024889141321182, "learning_rate": 7.358855600986537e-07, "loss": 46.0046, "step": 6929 }, { "epoch": 0.9461396682367397, "grad_norm": 0.06082021817564964, "learning_rate": 7.321733825899291e-07, "loss": 46.0039, "step": 6930 }, { "epoch": 0.9462761963273943, "grad_norm": 0.05268791317939758, "learning_rate": 7.284705227585676e-07, "loss": 46.0076, "step": 6931 }, { "epoch": 0.946412724418049, "grad_norm": 0.08758164197206497, "learning_rate": 7.247769813048644e-07, "loss": 46.0109, "step": 6932 }, { "epoch": 0.9465492525087037, "grad_norm": 0.058164868503808975, "learning_rate": 7.210927589273552e-07, "loss": 46.0074, "step": 6933 }, { "epoch": 0.9466857805993584, "grad_norm": 0.259356290102005, "learning_rate": 7.174178563228051e-07, "loss": 46.0097, "step": 6934 }, { "epoch": 0.9468223086900129, "grad_norm": 0.17507486045360565, "learning_rate": 7.137522741862246e-07, "loss": 46.0082, "step": 6935 }, { "epoch": 0.9469588367806676, "grad_norm": 0.09167280048131943, "learning_rate": 7.100960132108648e-07, "loss": 46.0009, "step": 6936 }, { "epoch": 0.9470953648713223, "grad_norm": 0.09322277456521988, "learning_rate": 7.064490740882057e-07, "loss": 46.0123, "step": 6937 }, { "epoch": 0.947231892961977, "grad_norm": 0.20074842870235443, "learning_rate": 7.02811457507968e-07, "loss": 46.0051, "step": 6938 }, { "epoch": 0.9473684210526315, "grad_norm": 0.07635287940502167, "learning_rate": 6.991831641581015e-07, "loss": 46.005, "step": 6939 }, { "epoch": 0.9475049491432862, "grad_norm": 0.030790025368332863, "learning_rate": 6.955641947248126e-07, "loss": 46.0069, "step": 6940 }, { "epoch": 0.9476414772339409, "grad_norm": 0.19854773581027985, "learning_rate": 6.919545498925206e-07, "loss": 46.003, "step": 6941 }, { "epoch": 0.9477780053245956, "grad_norm": 0.09742925316095352, "learning_rate": 6.883542303438962e-07, "loss": 46.01, "step": 6942 }, { "epoch": 0.9479145334152502, "grad_norm": 0.06210726127028465, "learning_rate": 6.847632367598389e-07, "loss": 46.0103, "step": 6943 }, { "epoch": 0.9480510615059048, "grad_norm": 0.1092444583773613, "learning_rate": 6.811815698194945e-07, "loss": 46.0021, "step": 6944 }, { "epoch": 0.9481875895965595, "grad_norm": 0.04307679831981659, "learning_rate": 6.776092302002323e-07, "loss": 46.0046, "step": 6945 }, { "epoch": 0.9483241176872141, "grad_norm": 0.140605628490448, "learning_rate": 6.74046218577673e-07, "loss": 46.0059, "step": 6946 }, { "epoch": 0.9484606457778688, "grad_norm": 0.08600953966379166, "learning_rate": 6.704925356256553e-07, "loss": 46.0012, "step": 6947 }, { "epoch": 0.9485971738685235, "grad_norm": 0.2193940430879593, "learning_rate": 6.669481820162638e-07, "loss": 46.0036, "step": 6948 }, { "epoch": 0.948733701959178, "grad_norm": 0.09651878476142883, "learning_rate": 6.634131584198122e-07, "loss": 46.0042, "step": 6949 }, { "epoch": 0.9488702300498327, "grad_norm": 0.11122124642133713, "learning_rate": 6.598874655048714e-07, "loss": 46.002, "step": 6950 }, { "epoch": 0.9490067581404874, "grad_norm": 0.06454525142908096, "learning_rate": 6.563711039382137e-07, "loss": 46.0054, "step": 6951 }, { "epoch": 0.9491432862311421, "grad_norm": 0.17408327758312225, "learning_rate": 6.52864074384868e-07, "loss": 46.0034, "step": 6952 }, { "epoch": 0.9492798143217968, "grad_norm": 0.057936087250709534, "learning_rate": 6.493663775080982e-07, "loss": 46.003, "step": 6953 }, { "epoch": 0.9494163424124513, "grad_norm": 0.05345294252038002, "learning_rate": 6.458780139694032e-07, "loss": 46.0013, "step": 6954 }, { "epoch": 0.949552870503106, "grad_norm": 0.08148936182260513, "learning_rate": 6.42398984428505e-07, "loss": 46.0027, "step": 6955 }, { "epoch": 0.9496893985937607, "grad_norm": 0.11465345323085785, "learning_rate": 6.389292895433607e-07, "loss": 46.0035, "step": 6956 }, { "epoch": 0.9498259266844153, "grad_norm": 0.35127905011177063, "learning_rate": 6.354689299701844e-07, "loss": 46.009, "step": 6957 }, { "epoch": 0.94996245477507, "grad_norm": 0.18658971786499023, "learning_rate": 6.320179063634024e-07, "loss": 46.0028, "step": 6958 }, { "epoch": 0.9500989828657246, "grad_norm": 0.10089928656816483, "learning_rate": 6.285762193756817e-07, "loss": 46.0003, "step": 6959 }, { "epoch": 0.9502355109563793, "grad_norm": 0.15687218308448792, "learning_rate": 6.251438696579293e-07, "loss": 46.0024, "step": 6960 }, { "epoch": 0.9503720390470339, "grad_norm": 0.05431222543120384, "learning_rate": 6.217208578592759e-07, "loss": 46.0119, "step": 6961 }, { "epoch": 0.9505085671376886, "grad_norm": 0.10426490008831024, "learning_rate": 6.183071846270983e-07, "loss": 46.0037, "step": 6962 }, { "epoch": 0.9506450952283433, "grad_norm": 0.04488224536180496, "learning_rate": 6.149028506069909e-07, "loss": 46.0085, "step": 6963 }, { "epoch": 0.9507816233189978, "grad_norm": 0.11251979321241379, "learning_rate": 6.115078564427945e-07, "loss": 46.01, "step": 6964 }, { "epoch": 0.9509181514096525, "grad_norm": 0.09327413141727448, "learning_rate": 6.081222027765843e-07, "loss": 46.0048, "step": 6965 }, { "epoch": 0.9510546795003072, "grad_norm": 0.0498911589384079, "learning_rate": 6.047458902486647e-07, "loss": 46.0036, "step": 6966 }, { "epoch": 0.9511912075909619, "grad_norm": 0.05903641879558563, "learning_rate": 6.013789194975749e-07, "loss": 46.006, "step": 6967 }, { "epoch": 0.9513277356816164, "grad_norm": 0.03856969624757767, "learning_rate": 5.980212911600836e-07, "loss": 46.0117, "step": 6968 }, { "epoch": 0.9514642637722711, "grad_norm": 0.10870201885700226, "learning_rate": 5.946730058711935e-07, "loss": 46.0018, "step": 6969 }, { "epoch": 0.9516007918629258, "grad_norm": 0.08802196383476257, "learning_rate": 5.91334064264143e-07, "loss": 46.0028, "step": 6970 }, { "epoch": 0.9517373199535805, "grad_norm": 0.10287310928106308, "learning_rate": 5.880044669704099e-07, "loss": 46.0053, "step": 6971 }, { "epoch": 0.9518738480442351, "grad_norm": 0.1097191721200943, "learning_rate": 5.846842146196852e-07, "loss": 46.0043, "step": 6972 }, { "epoch": 0.9520103761348897, "grad_norm": 0.055113162845373154, "learning_rate": 5.813733078399164e-07, "loss": 46.0025, "step": 6973 }, { "epoch": 0.9521469042255444, "grad_norm": 0.1519494652748108, "learning_rate": 5.780717472572638e-07, "loss": 46.0048, "step": 6974 }, { "epoch": 0.952283432316199, "grad_norm": 0.12530197203159332, "learning_rate": 5.747795334961336e-07, "loss": 46.0099, "step": 6975 }, { "epoch": 0.9524199604068537, "grad_norm": 0.11176592856645584, "learning_rate": 5.714966671791556e-07, "loss": 46.0073, "step": 6976 }, { "epoch": 0.9525564884975084, "grad_norm": 0.15323077142238617, "learning_rate": 5.682231489271938e-07, "loss": 46.0071, "step": 6977 }, { "epoch": 0.952693016588163, "grad_norm": 0.12638357281684875, "learning_rate": 5.649589793593591e-07, "loss": 46.0029, "step": 6978 }, { "epoch": 0.9528295446788176, "grad_norm": 0.20770449936389923, "learning_rate": 5.617041590929628e-07, "loss": 46.0074, "step": 6979 }, { "epoch": 0.9529660727694723, "grad_norm": 0.050461236387491226, "learning_rate": 5.584586887435739e-07, "loss": 46.0042, "step": 6980 }, { "epoch": 0.953102600860127, "grad_norm": 0.10567734390497208, "learning_rate": 5.552225689249846e-07, "loss": 46.0048, "step": 6981 }, { "epoch": 0.9532391289507817, "grad_norm": 0.1779739260673523, "learning_rate": 5.519958002492221e-07, "loss": 46.0051, "step": 6982 }, { "epoch": 0.9533756570414362, "grad_norm": 0.1124066412448883, "learning_rate": 5.487783833265425e-07, "loss": 46.0013, "step": 6983 }, { "epoch": 0.9535121851320909, "grad_norm": 0.11008557677268982, "learning_rate": 5.45570318765426e-07, "loss": 46.0048, "step": 6984 }, { "epoch": 0.9536487132227456, "grad_norm": 0.055017564445734024, "learning_rate": 5.423716071725981e-07, "loss": 46.0073, "step": 6985 }, { "epoch": 0.9537852413134003, "grad_norm": 0.09516242891550064, "learning_rate": 5.391822491530085e-07, "loss": 46.0016, "step": 6986 }, { "epoch": 0.9539217694040549, "grad_norm": 0.07182832807302475, "learning_rate": 5.360022453098357e-07, "loss": 46.0091, "step": 6987 }, { "epoch": 0.9540582974947095, "grad_norm": 0.04664941132068634, "learning_rate": 5.328315962444874e-07, "loss": 46.0027, "step": 6988 }, { "epoch": 0.9541948255853642, "grad_norm": 0.1028062254190445, "learning_rate": 5.296703025566175e-07, "loss": 46.0034, "step": 6989 }, { "epoch": 0.9543313536760188, "grad_norm": 0.12866486608982086, "learning_rate": 5.265183648440919e-07, "loss": 46.0031, "step": 6990 }, { "epoch": 0.9544678817666735, "grad_norm": 0.15095901489257812, "learning_rate": 5.233757837030118e-07, "loss": 46.0056, "step": 6991 }, { "epoch": 0.9546044098573282, "grad_norm": 0.045242372900247574, "learning_rate": 5.202425597277183e-07, "loss": 46.005, "step": 6992 }, { "epoch": 0.9547409379479828, "grad_norm": 0.14112210273742676, "learning_rate": 5.171186935107708e-07, "loss": 46.002, "step": 6993 }, { "epoch": 0.9548774660386374, "grad_norm": 0.046833448112010956, "learning_rate": 5.14004185642969e-07, "loss": 46.0047, "step": 6994 }, { "epoch": 0.9550139941292921, "grad_norm": 0.061061080545186996, "learning_rate": 5.108990367133304e-07, "loss": 46.0023, "step": 6995 }, { "epoch": 0.9551505222199468, "grad_norm": 0.3781220614910126, "learning_rate": 5.078032473091188e-07, "loss": 46.0036, "step": 6996 }, { "epoch": 0.9552870503106015, "grad_norm": 0.18927641212940216, "learning_rate": 5.047168180158101e-07, "loss": 46.0046, "step": 6997 }, { "epoch": 0.955423578401256, "grad_norm": 0.0661652684211731, "learning_rate": 5.016397494171265e-07, "loss": 46.0012, "step": 6998 }, { "epoch": 0.9555601064919107, "grad_norm": 0.08351822197437286, "learning_rate": 4.98572042095008e-07, "loss": 46.0025, "step": 6999 }, { "epoch": 0.9556966345825654, "grad_norm": 0.08313079923391342, "learning_rate": 4.955136966296292e-07, "loss": 46.0, "step": 7000 }, { "epoch": 0.95583316267322, "grad_norm": 0.2776692509651184, "learning_rate": 4.924647135993942e-07, "loss": 46.0048, "step": 7001 }, { "epoch": 0.9559696907638746, "grad_norm": 0.0676804780960083, "learning_rate": 4.894250935809364e-07, "loss": 46.0024, "step": 7002 }, { "epoch": 0.9561062188545293, "grad_norm": 0.1363213211297989, "learning_rate": 4.863948371491122e-07, "loss": 46.0021, "step": 7003 }, { "epoch": 0.956242746945184, "grad_norm": 0.06328721344470978, "learning_rate": 4.833739448770247e-07, "loss": 46.0027, "step": 7004 }, { "epoch": 0.9563792750358386, "grad_norm": 0.09872937202453613, "learning_rate": 4.803624173359833e-07, "loss": 46.0002, "step": 7005 }, { "epoch": 0.9565158031264933, "grad_norm": 0.06226339936256409, "learning_rate": 4.77360255095538e-07, "loss": 46.0013, "step": 7006 }, { "epoch": 0.9566523312171479, "grad_norm": 0.1212535873055458, "learning_rate": 4.743674587234737e-07, "loss": 46.0019, "step": 7007 }, { "epoch": 0.9567888593078026, "grad_norm": 0.03763558715581894, "learning_rate": 4.7138402878578736e-07, "loss": 46.0017, "step": 7008 }, { "epoch": 0.9569253873984572, "grad_norm": 0.07306963950395584, "learning_rate": 4.684099658467223e-07, "loss": 46.0019, "step": 7009 }, { "epoch": 0.9570619154891119, "grad_norm": 0.16501565277576447, "learning_rate": 4.6544527046873953e-07, "loss": 46.0033, "step": 7010 }, { "epoch": 0.9571984435797666, "grad_norm": 0.094601109623909, "learning_rate": 4.624899432125296e-07, "loss": 46.0083, "step": 7011 }, { "epoch": 0.9573349716704211, "grad_norm": 0.03492524102330208, "learning_rate": 4.595439846370064e-07, "loss": 46.0073, "step": 7012 }, { "epoch": 0.9574714997610758, "grad_norm": 0.09824098646640778, "learning_rate": 4.566073952993355e-07, "loss": 46.0013, "step": 7013 }, { "epoch": 0.9576080278517305, "grad_norm": 0.06130605190992355, "learning_rate": 4.536801757548781e-07, "loss": 46.0104, "step": 7014 }, { "epoch": 0.9577445559423852, "grad_norm": 0.05644996836781502, "learning_rate": 4.5076232655724695e-07, "loss": 46.0124, "step": 7015 }, { "epoch": 0.9578810840330398, "grad_norm": 0.09736155718564987, "learning_rate": 4.4785384825826173e-07, "loss": 46.0055, "step": 7016 }, { "epoch": 0.9580176121236944, "grad_norm": 0.06964929401874542, "learning_rate": 4.449547414079991e-07, "loss": 46.0047, "step": 7017 }, { "epoch": 0.9581541402143491, "grad_norm": 0.08945920318365097, "learning_rate": 4.420650065547427e-07, "loss": 46.0032, "step": 7018 }, { "epoch": 0.9582906683050038, "grad_norm": 0.12674036622047424, "learning_rate": 4.3918464424499984e-07, "loss": 46.0168, "step": 7019 }, { "epoch": 0.9584271963956584, "grad_norm": 0.07263398170471191, "learning_rate": 4.3631365502351807e-07, "loss": 46.0024, "step": 7020 }, { "epoch": 0.9585637244863131, "grad_norm": 0.4187130630016327, "learning_rate": 4.334520394332686e-07, "loss": 46.0087, "step": 7021 }, { "epoch": 0.9587002525769677, "grad_norm": 0.06423017382621765, "learning_rate": 4.305997980154519e-07, "loss": 46.0113, "step": 7022 }, { "epoch": 0.9588367806676223, "grad_norm": 0.16717016696929932, "learning_rate": 4.277569313094809e-07, "loss": 46.0098, "step": 7023 }, { "epoch": 0.958973308758277, "grad_norm": 0.07602465152740479, "learning_rate": 4.2492343985301443e-07, "loss": 46.0064, "step": 7024 }, { "epoch": 0.9591098368489317, "grad_norm": 0.06755789369344711, "learning_rate": 4.220993241819293e-07, "loss": 46.0023, "step": 7025 }, { "epoch": 0.9592463649395864, "grad_norm": 0.08068063855171204, "learning_rate": 4.192845848303373e-07, "loss": 46.0024, "step": 7026 }, { "epoch": 0.9593828930302409, "grad_norm": 0.06310666352510452, "learning_rate": 4.164792223305569e-07, "loss": 46.003, "step": 7027 }, { "epoch": 0.9595194211208956, "grad_norm": 0.03446575999259949, "learning_rate": 4.1368323721315825e-07, "loss": 46.0022, "step": 7028 }, { "epoch": 0.9596559492115503, "grad_norm": 0.03199775144457817, "learning_rate": 4.108966300069239e-07, "loss": 46.0061, "step": 7029 }, { "epoch": 0.959792477302205, "grad_norm": 0.07025648653507233, "learning_rate": 4.0811940123886004e-07, "loss": 46.0121, "step": 7030 }, { "epoch": 0.9599290053928596, "grad_norm": 0.09752369672060013, "learning_rate": 4.0535155143420765e-07, "loss": 46.0066, "step": 7031 }, { "epoch": 0.9600655334835142, "grad_norm": 0.09167366474866867, "learning_rate": 4.025930811164369e-07, "loss": 46.0045, "step": 7032 }, { "epoch": 0.9602020615741689, "grad_norm": 0.04597092419862747, "learning_rate": 3.998439908072249e-07, "loss": 46.0013, "step": 7033 }, { "epoch": 0.9603385896648235, "grad_norm": 0.07277274876832962, "learning_rate": 3.971042810265002e-07, "loss": 46.0036, "step": 7034 }, { "epoch": 0.9604751177554782, "grad_norm": 0.19522015750408173, "learning_rate": 3.943739522923928e-07, "loss": 46.011, "step": 7035 }, { "epoch": 0.9606116458461328, "grad_norm": 0.1344897300004959, "learning_rate": 3.916530051212841e-07, "loss": 46.0066, "step": 7036 }, { "epoch": 0.9607481739367875, "grad_norm": 0.11266939342021942, "learning_rate": 3.889414400277569e-07, "loss": 46.0024, "step": 7037 }, { "epoch": 0.9608847020274421, "grad_norm": 0.03876335546374321, "learning_rate": 3.862392575246343e-07, "loss": 46.005, "step": 7038 }, { "epoch": 0.9610212301180968, "grad_norm": 0.04259270429611206, "learning_rate": 3.8354645812296306e-07, "loss": 46.0054, "step": 7039 }, { "epoch": 0.9611577582087515, "grad_norm": 0.07908140867948532, "learning_rate": 3.8086304233200807e-07, "loss": 46.0036, "step": 7040 }, { "epoch": 0.961294286299406, "grad_norm": 0.10258141160011292, "learning_rate": 3.781890106592689e-07, "loss": 46.0006, "step": 7041 }, { "epoch": 0.9614308143900607, "grad_norm": 0.07387635856866837, "learning_rate": 3.7552436361046884e-07, "loss": 46.0055, "step": 7042 }, { "epoch": 0.9615673424807154, "grad_norm": 0.13900095224380493, "learning_rate": 3.7286910168954914e-07, "loss": 46.0045, "step": 7043 }, { "epoch": 0.9617038705713701, "grad_norm": 0.09079175442457199, "learning_rate": 3.702232253986804e-07, "loss": 46.0049, "step": 7044 }, { "epoch": 0.9618403986620248, "grad_norm": 0.15789593756198883, "learning_rate": 3.6758673523826225e-07, "loss": 46.0071, "step": 7045 }, { "epoch": 0.9619769267526793, "grad_norm": 0.18449178338050842, "learning_rate": 3.649596317069126e-07, "loss": 46.0041, "step": 7046 }, { "epoch": 0.962113454843334, "grad_norm": 0.28473979234695435, "learning_rate": 3.623419153014784e-07, "loss": 46.0064, "step": 7047 }, { "epoch": 0.9622499829339887, "grad_norm": 0.11489143967628479, "learning_rate": 3.597335865170304e-07, "loss": 46.0089, "step": 7048 }, { "epoch": 0.9623865110246433, "grad_norm": 0.16180576384067535, "learning_rate": 3.5713464584686273e-07, "loss": 46.0006, "step": 7049 }, { "epoch": 0.962523039115298, "grad_norm": 0.2144034057855606, "learning_rate": 3.545450937824935e-07, "loss": 46.0011, "step": 7050 }, { "epoch": 0.9626595672059526, "grad_norm": 0.09911711513996124, "learning_rate": 3.5196493081366967e-07, "loss": 46.0078, "step": 7051 }, { "epoch": 0.9627960952966073, "grad_norm": 0.17295798659324646, "learning_rate": 3.4939415742835655e-07, "loss": 46.0019, "step": 7052 }, { "epoch": 0.9629326233872619, "grad_norm": 0.0951714962720871, "learning_rate": 3.468327741127486e-07, "loss": 46.0018, "step": 7053 }, { "epoch": 0.9630691514779166, "grad_norm": 0.10347151011228561, "learning_rate": 3.442807813512583e-07, "loss": 46.0035, "step": 7054 }, { "epoch": 0.9632056795685713, "grad_norm": 0.22633132338523865, "learning_rate": 3.417381796265273e-07, "loss": 46.0049, "step": 7055 }, { "epoch": 0.9633422076592258, "grad_norm": 0.047777723520994186, "learning_rate": 3.3920496941942657e-07, "loss": 46.002, "step": 7056 }, { "epoch": 0.9634787357498805, "grad_norm": 0.14216388761997223, "learning_rate": 3.3668115120903954e-07, "loss": 46.0048, "step": 7057 }, { "epoch": 0.9636152638405352, "grad_norm": 0.07215403020381927, "learning_rate": 3.3416672547267325e-07, "loss": 46.0042, "step": 7058 }, { "epoch": 0.9637517919311899, "grad_norm": 0.04733399674296379, "learning_rate": 3.3166169268586957e-07, "loss": 46.0042, "step": 7059 }, { "epoch": 0.9638883200218445, "grad_norm": 0.06736130267381668, "learning_rate": 3.291660533223828e-07, "loss": 46.0101, "step": 7060 }, { "epoch": 0.9640248481124991, "grad_norm": 0.03229368478059769, "learning_rate": 3.266798078542077e-07, "loss": 46.0017, "step": 7061 }, { "epoch": 0.9641613762031538, "grad_norm": 0.2435784935951233, "learning_rate": 3.242029567515348e-07, "loss": 46.0118, "step": 7062 }, { "epoch": 0.9642979042938085, "grad_norm": 0.08515594154596329, "learning_rate": 3.217355004828004e-07, "loss": 46.0011, "step": 7063 }, { "epoch": 0.9644344323844631, "grad_norm": 0.04602936655282974, "learning_rate": 3.1927743951465914e-07, "loss": 46.0024, "step": 7064 }, { "epoch": 0.9645709604751177, "grad_norm": 0.036774687469005585, "learning_rate": 3.1682877431198353e-07, "loss": 46.009, "step": 7065 }, { "epoch": 0.9647074885657724, "grad_norm": 0.06910528987646103, "learning_rate": 3.143895053378698e-07, "loss": 46.013, "step": 7066 }, { "epoch": 0.964844016656427, "grad_norm": 0.08251021802425385, "learning_rate": 3.1195963305364894e-07, "loss": 46.0075, "step": 7067 }, { "epoch": 0.9649805447470817, "grad_norm": 0.09395699948072433, "learning_rate": 3.095391579188589e-07, "loss": 46.0053, "step": 7068 }, { "epoch": 0.9651170728377364, "grad_norm": 0.043019089847803116, "learning_rate": 3.0712808039126685e-07, "loss": 46.0069, "step": 7069 }, { "epoch": 0.965253600928391, "grad_norm": 0.13021165132522583, "learning_rate": 3.0472640092686355e-07, "loss": 46.0039, "step": 7070 }, { "epoch": 0.9653901290190456, "grad_norm": 0.07317258417606354, "learning_rate": 3.0233411997986904e-07, "loss": 46.0053, "step": 7071 }, { "epoch": 0.9655266571097003, "grad_norm": 0.21500803530216217, "learning_rate": 2.9995123800270476e-07, "loss": 46.0071, "step": 7072 }, { "epoch": 0.965663185200355, "grad_norm": 0.03951704129576683, "learning_rate": 2.975777554460379e-07, "loss": 46.004, "step": 7073 }, { "epoch": 0.9657997132910097, "grad_norm": 0.08986201137304306, "learning_rate": 2.9521367275874844e-07, "loss": 46.0017, "step": 7074 }, { "epoch": 0.9659362413816642, "grad_norm": 0.08408564329147339, "learning_rate": 2.928589903879342e-07, "loss": 46.0027, "step": 7075 }, { "epoch": 0.9660727694723189, "grad_norm": 0.09055382758378983, "learning_rate": 2.9051370877892226e-07, "loss": 46.0016, "step": 7076 }, { "epoch": 0.9662092975629736, "grad_norm": 0.06586837023496628, "learning_rate": 2.8817782837526343e-07, "loss": 46.0031, "step": 7077 }, { "epoch": 0.9663458256536283, "grad_norm": 0.06981931626796722, "learning_rate": 2.858513496187154e-07, "loss": 46.0101, "step": 7078 }, { "epoch": 0.9664823537442829, "grad_norm": 0.11279299855232239, "learning_rate": 2.8353427294927627e-07, "loss": 46.0137, "step": 7079 }, { "epoch": 0.9666188818349375, "grad_norm": 0.1102173924446106, "learning_rate": 2.8122659880516213e-07, "loss": 46.0036, "step": 7080 }, { "epoch": 0.9667554099255922, "grad_norm": 0.10839895904064178, "learning_rate": 2.789283276228016e-07, "loss": 46.0082, "step": 7081 }, { "epoch": 0.9668919380162468, "grad_norm": 0.08555541932582855, "learning_rate": 2.7663945983684713e-07, "loss": 46.0035, "step": 7082 }, { "epoch": 0.9670284661069015, "grad_norm": 0.13209228217601776, "learning_rate": 2.7435999588018567e-07, "loss": 46.0042, "step": 7083 }, { "epoch": 0.9671649941975562, "grad_norm": 0.09820085763931274, "learning_rate": 2.7208993618390576e-07, "loss": 46.0108, "step": 7084 }, { "epoch": 0.9673015222882108, "grad_norm": 0.12806051969528198, "learning_rate": 2.698292811773362e-07, "loss": 46.0063, "step": 7085 }, { "epoch": 0.9674380503788654, "grad_norm": 0.10604225099086761, "learning_rate": 2.675780312880127e-07, "loss": 46.0055, "step": 7086 }, { "epoch": 0.9675745784695201, "grad_norm": 0.059482015669345856, "learning_rate": 2.653361869417059e-07, "loss": 46.0126, "step": 7087 }, { "epoch": 0.9677111065601748, "grad_norm": 0.11784996092319489, "learning_rate": 2.631037485623933e-07, "loss": 46.0021, "step": 7088 }, { "epoch": 0.9678476346508295, "grad_norm": 0.11169984936714172, "learning_rate": 2.6088071657228706e-07, "loss": 46.0031, "step": 7089 }, { "epoch": 0.967984162741484, "grad_norm": 0.11371095478534698, "learning_rate": 2.5866709139180103e-07, "loss": 46.0063, "step": 7090 }, { "epoch": 0.9681206908321387, "grad_norm": 0.1076866015791893, "learning_rate": 2.5646287343959464e-07, "loss": 46.0087, "step": 7091 }, { "epoch": 0.9682572189227934, "grad_norm": 0.10492290556430817, "learning_rate": 2.542680631325289e-07, "loss": 46.0015, "step": 7092 }, { "epoch": 0.968393747013448, "grad_norm": 0.3201570212841034, "learning_rate": 2.5208266088569966e-07, "loss": 46.0071, "step": 7093 }, { "epoch": 0.9685302751041027, "grad_norm": 0.07352815568447113, "learning_rate": 2.499066671124095e-07, "loss": 46.0015, "step": 7094 }, { "epoch": 0.9686668031947573, "grad_norm": 0.18468260765075684, "learning_rate": 2.4774008222419596e-07, "loss": 46.003, "step": 7095 }, { "epoch": 0.968803331285412, "grad_norm": 0.09998919814825058, "learning_rate": 2.455829066308035e-07, "loss": 46.003, "step": 7096 }, { "epoch": 0.9689398593760666, "grad_norm": 0.1068774163722992, "learning_rate": 2.434351407402058e-07, "loss": 46.0036, "step": 7097 }, { "epoch": 0.9690763874667213, "grad_norm": 0.1510547548532486, "learning_rate": 2.412967849586001e-07, "loss": 46.0024, "step": 7098 }, { "epoch": 0.9692129155573759, "grad_norm": 0.30026623606681824, "learning_rate": 2.391678396903907e-07, "loss": 46.0009, "step": 7099 }, { "epoch": 0.9693494436480306, "grad_norm": 0.213755264878273, "learning_rate": 2.3704830533821108e-07, "loss": 46.0044, "step": 7100 }, { "epoch": 0.9694859717386852, "grad_norm": 0.13603056967258453, "learning_rate": 2.3493818230291286e-07, "loss": 46.0119, "step": 7101 }, { "epoch": 0.9696224998293399, "grad_norm": 0.03541295602917671, "learning_rate": 2.3283747098357677e-07, "loss": 46.0053, "step": 7102 }, { "epoch": 0.9697590279199946, "grad_norm": 0.193925142288208, "learning_rate": 2.307461717774906e-07, "loss": 46.0049, "step": 7103 }, { "epoch": 0.9698955560106491, "grad_norm": 0.17536532878875732, "learning_rate": 2.2866428508016568e-07, "loss": 46.0038, "step": 7104 }, { "epoch": 0.9700320841013038, "grad_norm": 0.13933449983596802, "learning_rate": 2.2659181128533713e-07, "loss": 46.0022, "step": 7105 }, { "epoch": 0.9701686121919585, "grad_norm": 0.09835812449455261, "learning_rate": 2.245287507849525e-07, "loss": 46.0066, "step": 7106 }, { "epoch": 0.9703051402826132, "grad_norm": 0.0783412829041481, "learning_rate": 2.2247510396918859e-07, "loss": 46.0044, "step": 7107 }, { "epoch": 0.9704416683732678, "grad_norm": 0.13357435166835785, "learning_rate": 2.2043087122644023e-07, "loss": 46.0025, "step": 7108 }, { "epoch": 0.9705781964639224, "grad_norm": 0.12823739647865295, "learning_rate": 2.1839605294330933e-07, "loss": 46.0055, "step": 7109 }, { "epoch": 0.9707147245545771, "grad_norm": 0.0972222238779068, "learning_rate": 2.1637064950463247e-07, "loss": 46.0094, "step": 7110 }, { "epoch": 0.9708512526452318, "grad_norm": 0.08541359752416611, "learning_rate": 2.143546612934644e-07, "loss": 46.005, "step": 7111 }, { "epoch": 0.9709877807358864, "grad_norm": 0.06323409080505371, "learning_rate": 2.1234808869106117e-07, "loss": 46.0035, "step": 7112 }, { "epoch": 0.9711243088265411, "grad_norm": 0.061274804174900055, "learning_rate": 2.1035093207693036e-07, "loss": 46.009, "step": 7113 }, { "epoch": 0.9712608369171957, "grad_norm": 0.06812640279531479, "learning_rate": 2.083631918287643e-07, "loss": 46.0059, "step": 7114 }, { "epoch": 0.9713973650078503, "grad_norm": 0.05312773957848549, "learning_rate": 2.063848683224956e-07, "loss": 46.0003, "step": 7115 }, { "epoch": 0.971533893098505, "grad_norm": 0.09627732634544373, "learning_rate": 2.0441596193227497e-07, "loss": 46.0087, "step": 7116 }, { "epoch": 0.9716704211891597, "grad_norm": 0.07566139101982117, "learning_rate": 2.0245647303046568e-07, "loss": 46.0026, "step": 7117 }, { "epoch": 0.9718069492798144, "grad_norm": 0.11304798722267151, "learning_rate": 2.0050640198764347e-07, "loss": 46.0036, "step": 7118 }, { "epoch": 0.9719434773704689, "grad_norm": 0.06032518297433853, "learning_rate": 1.985657491726245e-07, "loss": 46.003, "step": 7119 }, { "epoch": 0.9720800054611236, "grad_norm": 0.0727953091263771, "learning_rate": 1.9663451495242068e-07, "loss": 46.0045, "step": 7120 }, { "epoch": 0.9722165335517783, "grad_norm": 0.06881820410490036, "learning_rate": 1.9471269969227878e-07, "loss": 46.0078, "step": 7121 }, { "epoch": 0.972353061642433, "grad_norm": 0.057409241795539856, "learning_rate": 1.9280030375565804e-07, "loss": 46.0076, "step": 7122 }, { "epoch": 0.9724895897330876, "grad_norm": 0.041979074478149414, "learning_rate": 1.9089732750423028e-07, "loss": 46.0056, "step": 7123 }, { "epoch": 0.9726261178237422, "grad_norm": 0.08132766932249069, "learning_rate": 1.8900377129790202e-07, "loss": 46.007, "step": 7124 }, { "epoch": 0.9727626459143969, "grad_norm": 0.16842536628246307, "learning_rate": 1.871196354947813e-07, "loss": 46.0082, "step": 7125 }, { "epoch": 0.9728991740050515, "grad_norm": 0.19767284393310547, "learning_rate": 1.8524492045119967e-07, "loss": 46.0047, "step": 7126 }, { "epoch": 0.9730357020957062, "grad_norm": 0.09396287798881531, "learning_rate": 1.8337962652171249e-07, "loss": 46.0017, "step": 7127 }, { "epoch": 0.9731722301863609, "grad_norm": 0.07244996726512909, "learning_rate": 1.8152375405909305e-07, "loss": 46.0015, "step": 7128 }, { "epoch": 0.9733087582770155, "grad_norm": 0.12894263863563538, "learning_rate": 1.7967730341432176e-07, "loss": 46.0043, "step": 7129 }, { "epoch": 0.9734452863676701, "grad_norm": 0.16393700242042542, "learning_rate": 1.778402749366137e-07, "loss": 46.0046, "step": 7130 }, { "epoch": 0.9735818144583248, "grad_norm": 0.0899428129196167, "learning_rate": 1.7601266897338542e-07, "loss": 46.0069, "step": 7131 }, { "epoch": 0.9737183425489795, "grad_norm": 0.11820844560861588, "learning_rate": 1.7419448587027708e-07, "loss": 46.0105, "step": 7132 }, { "epoch": 0.973854870639634, "grad_norm": 0.1485210806131363, "learning_rate": 1.723857259711581e-07, "loss": 46.0064, "step": 7133 }, { "epoch": 0.9739913987302887, "grad_norm": 0.07663711160421371, "learning_rate": 1.705863896181048e-07, "loss": 46.0067, "step": 7134 }, { "epoch": 0.9741279268209434, "grad_norm": 0.14216692745685577, "learning_rate": 1.6879647715140611e-07, "loss": 46.0041, "step": 7135 }, { "epoch": 0.9742644549115981, "grad_norm": 0.05832170695066452, "learning_rate": 1.6701598890958014e-07, "loss": 46.0078, "step": 7136 }, { "epoch": 0.9744009830022528, "grad_norm": 0.0745471715927124, "learning_rate": 1.6524492522935753e-07, "loss": 46.0003, "step": 7137 }, { "epoch": 0.9745375110929073, "grad_norm": 0.10810956358909607, "learning_rate": 1.63483286445687e-07, "loss": 46.0029, "step": 7138 }, { "epoch": 0.974674039183562, "grad_norm": 0.07453179359436035, "learning_rate": 1.6173107289173538e-07, "loss": 46.0042, "step": 7139 }, { "epoch": 0.9748105672742167, "grad_norm": 0.17998754978179932, "learning_rate": 1.5998828489888763e-07, "loss": 46.0023, "step": 7140 }, { "epoch": 0.9749470953648713, "grad_norm": 0.08225401490926743, "learning_rate": 1.5825492279674668e-07, "loss": 46.0072, "step": 7141 }, { "epoch": 0.975083623455526, "grad_norm": 0.10022734850645065, "learning_rate": 1.5653098691312263e-07, "loss": 46.0024, "step": 7142 }, { "epoch": 0.9752201515461806, "grad_norm": 0.04875979945063591, "learning_rate": 1.5481647757406015e-07, "loss": 46.0, "step": 7143 }, { "epoch": 0.9753566796368353, "grad_norm": 0.15972889959812164, "learning_rate": 1.5311139510380545e-07, "loss": 46.002, "step": 7144 }, { "epoch": 0.9754932077274899, "grad_norm": 0.0891147032380104, "learning_rate": 1.5141573982483392e-07, "loss": 46.0018, "step": 7145 }, { "epoch": 0.9756297358181446, "grad_norm": 0.1982298195362091, "learning_rate": 1.4972951205782793e-07, "loss": 46.0101, "step": 7146 }, { "epoch": 0.9757662639087993, "grad_norm": 0.22120720148086548, "learning_rate": 1.4805271212169902e-07, "loss": 46.0054, "step": 7147 }, { "epoch": 0.9759027919994538, "grad_norm": 0.09720038622617722, "learning_rate": 1.4638534033356577e-07, "loss": 46.0075, "step": 7148 }, { "epoch": 0.9760393200901085, "grad_norm": 0.25006312131881714, "learning_rate": 1.447273970087648e-07, "loss": 46.0042, "step": 7149 }, { "epoch": 0.9761758481807632, "grad_norm": 0.14676909148693085, "learning_rate": 1.4307888246085088e-07, "loss": 46.0059, "step": 7150 }, { "epoch": 0.9763123762714179, "grad_norm": 0.08185182511806488, "learning_rate": 1.4143979700159682e-07, "loss": 46.008, "step": 7151 }, { "epoch": 0.9764489043620725, "grad_norm": 0.08078208565711975, "learning_rate": 1.3981014094099353e-07, "loss": 46.0013, "step": 7152 }, { "epoch": 0.9765854324527271, "grad_norm": 0.12377557158470154, "learning_rate": 1.3818991458723894e-07, "loss": 46.0008, "step": 7153 }, { "epoch": 0.9767219605433818, "grad_norm": 0.18877796828746796, "learning_rate": 1.3657911824676574e-07, "loss": 46.0014, "step": 7154 }, { "epoch": 0.9768584886340365, "grad_norm": 0.040742017328739166, "learning_rate": 1.3497775222420793e-07, "loss": 46.0002, "step": 7155 }, { "epoch": 0.9769950167246911, "grad_norm": 0.04738624021410942, "learning_rate": 1.333858168224178e-07, "loss": 46.005, "step": 7156 }, { "epoch": 0.9771315448153458, "grad_norm": 0.05165252089500427, "learning_rate": 1.3180331234246556e-07, "loss": 46.0045, "step": 7157 }, { "epoch": 0.9772680729060004, "grad_norm": 0.09031161665916443, "learning_rate": 1.3023023908364518e-07, "loss": 46.0075, "step": 7158 }, { "epoch": 0.977404600996655, "grad_norm": 0.055213626474142075, "learning_rate": 1.286665973434631e-07, "loss": 46.0002, "step": 7159 }, { "epoch": 0.9775411290873097, "grad_norm": 0.08890896290540695, "learning_rate": 1.2711238741762722e-07, "loss": 46.004, "step": 7160 }, { "epoch": 0.9776776571779644, "grad_norm": 0.08437825739383698, "learning_rate": 1.2556760960008575e-07, "loss": 46.0016, "step": 7161 }, { "epoch": 0.9778141852686191, "grad_norm": 0.20216180384159088, "learning_rate": 1.240322641829883e-07, "loss": 46.0019, "step": 7162 }, { "epoch": 0.9779507133592736, "grad_norm": 0.1490899920463562, "learning_rate": 1.225063514567082e-07, "loss": 46.0074, "step": 7163 }, { "epoch": 0.9780872414499283, "grad_norm": 0.0793762356042862, "learning_rate": 1.2098987170982013e-07, "loss": 46.0034, "step": 7164 }, { "epoch": 0.978223769540583, "grad_norm": 0.08808130770921707, "learning_rate": 1.1948282522913357e-07, "loss": 46.0061, "step": 7165 }, { "epoch": 0.9783602976312377, "grad_norm": 0.047708965837955475, "learning_rate": 1.1798521229965942e-07, "loss": 46.0011, "step": 7166 }, { "epoch": 0.9784968257218922, "grad_norm": 0.06762000173330307, "learning_rate": 1.1649703320463779e-07, "loss": 46.0053, "step": 7167 }, { "epoch": 0.9786333538125469, "grad_norm": 0.14670976996421814, "learning_rate": 1.1501828822551575e-07, "loss": 46.0038, "step": 7168 }, { "epoch": 0.9787698819032016, "grad_norm": 0.061925239861011505, "learning_rate": 1.1354897764195293e-07, "loss": 46.0049, "step": 7169 }, { "epoch": 0.9789064099938563, "grad_norm": 0.08076687157154083, "learning_rate": 1.1208910173183817e-07, "loss": 46.0056, "step": 7170 }, { "epoch": 0.9790429380845109, "grad_norm": 0.08908099681138992, "learning_rate": 1.1063866077125618e-07, "loss": 46.0037, "step": 7171 }, { "epoch": 0.9791794661751655, "grad_norm": 0.06807133555412292, "learning_rate": 1.0919765503453195e-07, "loss": 46.0035, "step": 7172 }, { "epoch": 0.9793159942658202, "grad_norm": 0.08383559435606003, "learning_rate": 1.0776608479418082e-07, "loss": 46.0052, "step": 7173 }, { "epoch": 0.9794525223564748, "grad_norm": 0.07002658396959305, "learning_rate": 1.063439503209529e-07, "loss": 46.0028, "step": 7174 }, { "epoch": 0.9795890504471295, "grad_norm": 0.0548606738448143, "learning_rate": 1.0493125188379971e-07, "loss": 46.0023, "step": 7175 }, { "epoch": 0.9797255785377842, "grad_norm": 0.16602098941802979, "learning_rate": 1.0352798974990752e-07, "loss": 46.0117, "step": 7176 }, { "epoch": 0.9798621066284388, "grad_norm": 0.10093922168016434, "learning_rate": 1.0213416418465294e-07, "loss": 46.0034, "step": 7177 }, { "epoch": 0.9799986347190934, "grad_norm": 0.08100040256977081, "learning_rate": 1.0074977545164177e-07, "loss": 46.011, "step": 7178 }, { "epoch": 0.9801351628097481, "grad_norm": 0.046713173389434814, "learning_rate": 9.937482381270346e-08, "loss": 46.0018, "step": 7179 }, { "epoch": 0.9802716909004028, "grad_norm": 0.06428004056215286, "learning_rate": 9.800930952786336e-08, "loss": 46.0027, "step": 7180 }, { "epoch": 0.9804082189910575, "grad_norm": 0.09634747356176376, "learning_rate": 9.665323285537598e-08, "loss": 46.0029, "step": 7181 }, { "epoch": 0.980544747081712, "grad_norm": 0.10261856019496918, "learning_rate": 9.530659405169728e-08, "loss": 46.0024, "step": 7182 }, { "epoch": 0.9806812751723667, "grad_norm": 0.08636260032653809, "learning_rate": 9.396939337152355e-08, "loss": 46.016, "step": 7183 }, { "epoch": 0.9808178032630214, "grad_norm": 0.0685448870062828, "learning_rate": 9.264163106774137e-08, "loss": 46.0086, "step": 7184 }, { "epoch": 0.980954331353676, "grad_norm": 0.10635057091712952, "learning_rate": 9.132330739145545e-08, "loss": 46.0068, "step": 7185 }, { "epoch": 0.9810908594443307, "grad_norm": 0.04230385273694992, "learning_rate": 9.001442259200521e-08, "loss": 46.0053, "step": 7186 }, { "epoch": 0.9812273875349853, "grad_norm": 0.07328132539987564, "learning_rate": 8.871497691691489e-08, "loss": 46.0056, "step": 7187 }, { "epoch": 0.98136391562564, "grad_norm": 0.09978116303682327, "learning_rate": 8.742497061195454e-08, "loss": 46.0054, "step": 7188 }, { "epoch": 0.9815004437162946, "grad_norm": 0.05884576961398125, "learning_rate": 8.614440392108458e-08, "loss": 46.0009, "step": 7189 }, { "epoch": 0.9816369718069493, "grad_norm": 0.03834863379597664, "learning_rate": 8.487327708648907e-08, "loss": 46.0009, "step": 7190 }, { "epoch": 0.981773499897604, "grad_norm": 0.03978651016950607, "learning_rate": 8.361159034857569e-08, "loss": 46.0047, "step": 7191 }, { "epoch": 0.9819100279882585, "grad_norm": 0.07296937704086304, "learning_rate": 8.235934394594802e-08, "loss": 46.0005, "step": 7192 }, { "epoch": 0.9820465560789132, "grad_norm": 0.03498697653412819, "learning_rate": 8.11165381154444e-08, "loss": 46.0101, "step": 7193 }, { "epoch": 0.9821830841695679, "grad_norm": 0.038850992918014526, "learning_rate": 7.988317309209902e-08, "loss": 46.0043, "step": 7194 }, { "epoch": 0.9823196122602226, "grad_norm": 0.10181787610054016, "learning_rate": 7.865924910916977e-08, "loss": 46.001, "step": 7195 }, { "epoch": 0.9824561403508771, "grad_norm": 0.4068288803100586, "learning_rate": 7.744476639813814e-08, "loss": 46.0063, "step": 7196 }, { "epoch": 0.9825926684415318, "grad_norm": 0.17444883286952972, "learning_rate": 7.623972518868705e-08, "loss": 46.006, "step": 7197 }, { "epoch": 0.9827291965321865, "grad_norm": 0.11943278461694717, "learning_rate": 7.5044125708712e-08, "loss": 46.0, "step": 7198 }, { "epoch": 0.9828657246228412, "grad_norm": 0.13627932965755463, "learning_rate": 7.38579681843321e-08, "loss": 46.0006, "step": 7199 }, { "epoch": 0.9830022527134958, "grad_norm": 0.09473912417888641, "learning_rate": 7.268125283987348e-08, "loss": 46.0, "step": 7200 }, { "epoch": 0.9831387808041504, "grad_norm": 0.08604719489812851, "learning_rate": 7.151397989788588e-08, "loss": 46.0028, "step": 7201 }, { "epoch": 0.9832753088948051, "grad_norm": 0.03411213681101799, "learning_rate": 7.035614957912606e-08, "loss": 46.0001, "step": 7202 }, { "epoch": 0.9834118369854598, "grad_norm": 0.039187826216220856, "learning_rate": 6.92077621025633e-08, "loss": 46.0053, "step": 7203 }, { "epoch": 0.9835483650761144, "grad_norm": 0.059648871421813965, "learning_rate": 6.806881768539052e-08, "loss": 46.0022, "step": 7204 }, { "epoch": 0.9836848931667691, "grad_norm": 0.041936662048101425, "learning_rate": 6.693931654299657e-08, "loss": 46.0009, "step": 7205 }, { "epoch": 0.9838214212574237, "grad_norm": 0.08943246304988861, "learning_rate": 6.581925888900498e-08, "loss": 46.0064, "step": 7206 }, { "epoch": 0.9839579493480783, "grad_norm": 0.09488219022750854, "learning_rate": 6.470864493524075e-08, "loss": 46.0043, "step": 7207 }, { "epoch": 0.984094477438733, "grad_norm": 0.05033031851053238, "learning_rate": 6.360747489175256e-08, "loss": 46.0046, "step": 7208 }, { "epoch": 0.9842310055293877, "grad_norm": 0.1277073323726654, "learning_rate": 6.251574896679046e-08, "loss": 46.0052, "step": 7209 }, { "epoch": 0.9843675336200424, "grad_norm": 0.05453884229063988, "learning_rate": 6.14334673668282e-08, "loss": 46.0048, "step": 7210 }, { "epoch": 0.9845040617106969, "grad_norm": 0.061872418969869614, "learning_rate": 6.036063029654649e-08, "loss": 46.0076, "step": 7211 }, { "epoch": 0.9846405898013516, "grad_norm": 0.053107887506484985, "learning_rate": 5.929723795884967e-08, "loss": 46.0111, "step": 7212 }, { "epoch": 0.9847771178920063, "grad_norm": 0.0883575901389122, "learning_rate": 5.8243290554838014e-08, "loss": 46.01, "step": 7213 }, { "epoch": 0.984913645982661, "grad_norm": 0.10030897706747055, "learning_rate": 5.7198788283852057e-08, "loss": 46.0021, "step": 7214 }, { "epoch": 0.9850501740733156, "grad_norm": 0.07091116905212402, "learning_rate": 5.6163731343422675e-08, "loss": 46.0035, "step": 7215 }, { "epoch": 0.9851867021639702, "grad_norm": 0.07437314093112946, "learning_rate": 5.51381199293044e-08, "loss": 46.0071, "step": 7216 }, { "epoch": 0.9853232302546249, "grad_norm": 0.055116403847932816, "learning_rate": 5.412195423545874e-08, "loss": 46.0039, "step": 7217 }, { "epoch": 0.9854597583452795, "grad_norm": 0.10955941677093506, "learning_rate": 5.31152344540764e-08, "loss": 46.0068, "step": 7218 }, { "epoch": 0.9855962864359342, "grad_norm": 0.03393147885799408, "learning_rate": 5.2117960775543986e-08, "loss": 46.0066, "step": 7219 }, { "epoch": 0.9857328145265889, "grad_norm": 0.04151192307472229, "learning_rate": 5.1130133388471724e-08, "loss": 46.0126, "step": 7220 }, { "epoch": 0.9858693426172435, "grad_norm": 0.10087965428829193, "learning_rate": 5.015175247967685e-08, "loss": 46.0049, "step": 7221 }, { "epoch": 0.9860058707078981, "grad_norm": 0.07426264137029648, "learning_rate": 4.9182818234200235e-08, "loss": 46.0075, "step": 7222 }, { "epoch": 0.9861423987985528, "grad_norm": 0.059685610234737396, "learning_rate": 4.8223330835284184e-08, "loss": 46.0048, "step": 7223 }, { "epoch": 0.9862789268892075, "grad_norm": 0.06613564491271973, "learning_rate": 4.727329046438911e-08, "loss": 46.002, "step": 7224 }, { "epoch": 0.9864154549798622, "grad_norm": 0.07357048243284225, "learning_rate": 4.6332697301193496e-08, "loss": 46.0064, "step": 7225 }, { "epoch": 0.9865519830705167, "grad_norm": 0.19530758261680603, "learning_rate": 4.540155152358283e-08, "loss": 46.0062, "step": 7226 }, { "epoch": 0.9866885111611714, "grad_norm": 0.09539202600717545, "learning_rate": 4.447985330765514e-08, "loss": 46.0043, "step": 7227 }, { "epoch": 0.9868250392518261, "grad_norm": 0.04313179850578308, "learning_rate": 4.356760282773209e-08, "loss": 46.0006, "step": 7228 }, { "epoch": 0.9869615673424808, "grad_norm": 0.04633820801973343, "learning_rate": 4.266480025633679e-08, "loss": 46.0093, "step": 7229 }, { "epoch": 0.9870980954331353, "grad_norm": 0.04100847616791725, "learning_rate": 4.177144576420489e-08, "loss": 46.0027, "step": 7230 }, { "epoch": 0.98723462352379, "grad_norm": 0.1251312494277954, "learning_rate": 4.088753952030122e-08, "loss": 46.0043, "step": 7231 }, { "epoch": 0.9873711516144447, "grad_norm": 0.04459778219461441, "learning_rate": 4.0013081691786524e-08, "loss": 46.0116, "step": 7232 }, { "epoch": 0.9875076797050993, "grad_norm": 0.19127972424030304, "learning_rate": 3.9148072444039616e-08, "loss": 46.0086, "step": 7233 }, { "epoch": 0.987644207795754, "grad_norm": 0.11693168431520462, "learning_rate": 3.8292511940657415e-08, "loss": 46.0036, "step": 7234 }, { "epoch": 0.9877807358864086, "grad_norm": 0.05080500990152359, "learning_rate": 3.744640034344382e-08, "loss": 46.0033, "step": 7235 }, { "epoch": 0.9879172639770633, "grad_norm": 0.18874023854732513, "learning_rate": 3.660973781242083e-08, "loss": 46.0089, "step": 7236 }, { "epoch": 0.9880537920677179, "grad_norm": 0.06047139689326286, "learning_rate": 3.5782524505811876e-08, "loss": 46.009, "step": 7237 }, { "epoch": 0.9881903201583726, "grad_norm": 0.05019622668623924, "learning_rate": 3.496476058006959e-08, "loss": 46.0073, "step": 7238 }, { "epoch": 0.9883268482490273, "grad_norm": 0.05337301269173622, "learning_rate": 3.415644618985359e-08, "loss": 46.003, "step": 7239 }, { "epoch": 0.9884633763396818, "grad_norm": 0.06359133124351501, "learning_rate": 3.3357581488030475e-08, "loss": 46.0037, "step": 7240 }, { "epoch": 0.9885999044303365, "grad_norm": 0.05159619078040123, "learning_rate": 3.256816662568496e-08, "loss": 46.011, "step": 7241 }, { "epoch": 0.9887364325209912, "grad_norm": 0.07998304069042206, "learning_rate": 3.178820175211428e-08, "loss": 46.0029, "step": 7242 }, { "epoch": 0.9888729606116459, "grad_norm": 0.03504487872123718, "learning_rate": 3.1017687014828215e-08, "loss": 46.004, "step": 7243 }, { "epoch": 0.9890094887023005, "grad_norm": 0.0823051929473877, "learning_rate": 3.0256622559543536e-08, "loss": 46.0024, "step": 7244 }, { "epoch": 0.9891460167929551, "grad_norm": 0.422367662191391, "learning_rate": 2.950500853020066e-08, "loss": 46.0013, "step": 7245 }, { "epoch": 0.9892825448836098, "grad_norm": 0.1806757152080536, "learning_rate": 2.8762845068941445e-08, "loss": 46.0088, "step": 7246 }, { "epoch": 0.9894190729742645, "grad_norm": 0.17935673892498016, "learning_rate": 2.8030132316136938e-08, "loss": 46.0132, "step": 7247 }, { "epoch": 0.9895556010649191, "grad_norm": 0.2891080975532532, "learning_rate": 2.730687041034852e-08, "loss": 46.0057, "step": 7248 }, { "epoch": 0.9896921291555738, "grad_norm": 0.04798796772956848, "learning_rate": 2.6593059488366766e-08, "loss": 46.0012, "step": 7249 }, { "epoch": 0.9898286572462284, "grad_norm": 0.425402969121933, "learning_rate": 2.5888699685189255e-08, "loss": 46.0015, "step": 7250 }, { "epoch": 0.989965185336883, "grad_norm": 0.1112116277217865, "learning_rate": 2.519379113402609e-08, "loss": 46.0062, "step": 7251 }, { "epoch": 0.9901017134275377, "grad_norm": 0.053855422884225845, "learning_rate": 2.4508333966305473e-08, "loss": 46.0038, "step": 7252 }, { "epoch": 0.9902382415181924, "grad_norm": 0.03137766197323799, "learning_rate": 2.3832328311651496e-08, "loss": 46.006, "step": 7253 }, { "epoch": 0.9903747696088471, "grad_norm": 0.10431533306837082, "learning_rate": 2.3165774297922992e-08, "loss": 46.0022, "step": 7254 }, { "epoch": 0.9905112976995016, "grad_norm": 0.04208254814147949, "learning_rate": 2.2508672051174685e-08, "loss": 46.0001, "step": 7255 }, { "epoch": 0.9906478257901563, "grad_norm": 0.045661814510822296, "learning_rate": 2.1861021695684935e-08, "loss": 46.0032, "step": 7256 }, { "epoch": 0.990784353880811, "grad_norm": 0.030271239578723907, "learning_rate": 2.122282335393355e-08, "loss": 46.0025, "step": 7257 }, { "epoch": 0.9909208819714657, "grad_norm": 0.0907672792673111, "learning_rate": 2.059407714662398e-08, "loss": 46.0016, "step": 7258 }, { "epoch": 0.9910574100621203, "grad_norm": 0.08112984895706177, "learning_rate": 1.9974783192661107e-08, "loss": 46.0013, "step": 7259 }, { "epoch": 0.9911939381527749, "grad_norm": 0.13127990067005157, "learning_rate": 1.9364941609167907e-08, "loss": 46.0027, "step": 7260 }, { "epoch": 0.9913304662434296, "grad_norm": 0.07560381293296814, "learning_rate": 1.8764552511485457e-08, "loss": 46.0118, "step": 7261 }, { "epoch": 0.9914669943340843, "grad_norm": 0.03843251243233681, "learning_rate": 1.817361601315626e-08, "loss": 46.0048, "step": 7262 }, { "epoch": 0.9916035224247389, "grad_norm": 0.03968435525894165, "learning_rate": 1.7592132225946468e-08, "loss": 46.0057, "step": 7263 }, { "epoch": 0.9917400505153935, "grad_norm": 0.054471638053655624, "learning_rate": 1.702010125981812e-08, "loss": 46.0006, "step": 7264 }, { "epoch": 0.9918765786060482, "grad_norm": 0.1916276067495346, "learning_rate": 1.6457523222956907e-08, "loss": 46.0005, "step": 7265 }, { "epoch": 0.9920131066967028, "grad_norm": 0.2849438786506653, "learning_rate": 1.5904398221766592e-08, "loss": 46.0049, "step": 7266 }, { "epoch": 0.9921496347873575, "grad_norm": 0.12652307748794556, "learning_rate": 1.5360726360852397e-08, "loss": 46.0085, "step": 7267 }, { "epoch": 0.9922861628780122, "grad_norm": 0.11404263973236084, "learning_rate": 1.482650774303207e-08, "loss": 46.0012, "step": 7268 }, { "epoch": 0.9924226909686668, "grad_norm": 0.055355679243803024, "learning_rate": 1.430174246934146e-08, "loss": 46.0068, "step": 7269 }, { "epoch": 0.9925592190593214, "grad_norm": 0.09978245943784714, "learning_rate": 1.3786430639023407e-08, "loss": 46.0037, "step": 7270 }, { "epoch": 0.9926957471499761, "grad_norm": 0.05256585776805878, "learning_rate": 1.3280572349538834e-08, "loss": 46.0084, "step": 7271 }, { "epoch": 0.9928322752406308, "grad_norm": 0.1046704575419426, "learning_rate": 1.278416769655566e-08, "loss": 46.0039, "step": 7272 }, { "epoch": 0.9929688033312855, "grad_norm": 0.10620303452014923, "learning_rate": 1.2297216773954346e-08, "loss": 46.0041, "step": 7273 }, { "epoch": 0.99310533142194, "grad_norm": 0.11976180225610733, "learning_rate": 1.1819719673827889e-08, "loss": 46.0065, "step": 7274 }, { "epoch": 0.9932418595125947, "grad_norm": 0.28400561213493347, "learning_rate": 1.1351676486487383e-08, "loss": 46.0087, "step": 7275 }, { "epoch": 0.9933783876032494, "grad_norm": 0.07839032262563705, "learning_rate": 1.0893087300439809e-08, "loss": 46.0029, "step": 7276 }, { "epoch": 0.993514915693904, "grad_norm": 0.1288733035326004, "learning_rate": 1.0443952202426887e-08, "loss": 46.0086, "step": 7277 }, { "epoch": 0.9936514437845587, "grad_norm": 0.05590146407485008, "learning_rate": 1.0004271277386234e-08, "loss": 46.0053, "step": 7278 }, { "epoch": 0.9937879718752133, "grad_norm": 0.04046904668211937, "learning_rate": 9.574044608468003e-09, "loss": 46.0029, "step": 7279 }, { "epoch": 0.993924499965868, "grad_norm": 0.05340801179409027, "learning_rate": 9.153272277040436e-09, "loss": 46.0055, "step": 7280 }, { "epoch": 0.9940610280565226, "grad_norm": 0.0846848338842392, "learning_rate": 8.741954362678772e-09, "loss": 46.0034, "step": 7281 }, { "epoch": 0.9941975561471773, "grad_norm": 0.08063561469316483, "learning_rate": 8.340090943176338e-09, "loss": 46.0035, "step": 7282 }, { "epoch": 0.994334084237832, "grad_norm": 0.049741119146347046, "learning_rate": 7.947682094533449e-09, "loss": 46.0018, "step": 7283 }, { "epoch": 0.9944706123284865, "grad_norm": 0.0649300292134285, "learning_rate": 7.564727890968514e-09, "loss": 46.0059, "step": 7284 }, { "epoch": 0.9946071404191412, "grad_norm": 0.06503751128911972, "learning_rate": 7.1912284048958336e-09, "loss": 46.0024, "step": 7285 }, { "epoch": 0.9947436685097959, "grad_norm": 0.08364452421665192, "learning_rate": 6.8271837069588955e-09, "loss": 46.0006, "step": 7286 }, { "epoch": 0.9948801966004506, "grad_norm": 0.06732216477394104, "learning_rate": 6.472593866013732e-09, "loss": 46.0, "step": 7287 }, { "epoch": 0.9950167246911052, "grad_norm": 0.08693568408489227, "learning_rate": 6.127458949106713e-09, "loss": 46.005, "step": 7288 }, { "epoch": 0.9951532527817598, "grad_norm": 0.053449422121047974, "learning_rate": 5.7917790215245015e-09, "loss": 46.0021, "step": 7289 }, { "epoch": 0.9952897808724145, "grad_norm": 0.05752718821167946, "learning_rate": 5.4655541467441006e-09, "loss": 46.0032, "step": 7290 }, { "epoch": 0.9954263089630692, "grad_norm": 0.04035717621445656, "learning_rate": 5.148784386460604e-09, "loss": 46.001, "step": 7291 }, { "epoch": 0.9955628370537238, "grad_norm": 0.05446084216237068, "learning_rate": 4.841469800592746e-09, "loss": 46.008, "step": 7292 }, { "epoch": 0.9956993651443784, "grad_norm": 0.05863227695226669, "learning_rate": 4.543610447249602e-09, "loss": 46.0042, "step": 7293 }, { "epoch": 0.9958358932350331, "grad_norm": 0.05528897047042847, "learning_rate": 4.2552063827694386e-09, "loss": 46.0031, "step": 7294 }, { "epoch": 0.9959724213256878, "grad_norm": 0.07325014472007751, "learning_rate": 3.976257661691962e-09, "loss": 46.0055, "step": 7295 }, { "epoch": 0.9961089494163424, "grad_norm": 0.20819850265979767, "learning_rate": 3.7067643367749705e-09, "loss": 46.0083, "step": 7296 }, { "epoch": 0.9962454775069971, "grad_norm": 0.16535024344921112, "learning_rate": 3.446726458988803e-09, "loss": 46.0058, "step": 7297 }, { "epoch": 0.9963820055976517, "grad_norm": 0.06618311256170273, "learning_rate": 3.1961440775107878e-09, "loss": 46.0018, "step": 7298 }, { "epoch": 0.9965185336883063, "grad_norm": 0.20235642790794373, "learning_rate": 2.9550172397252442e-09, "loss": 46.0093, "step": 7299 }, { "epoch": 0.996655061778961, "grad_norm": 0.04973575845360756, "learning_rate": 2.723345991245685e-09, "loss": 46.0, "step": 7300 }, { "epoch": 0.9967915898696157, "grad_norm": 0.07193689048290253, "learning_rate": 2.5011303758759596e-09, "loss": 46.0096, "step": 7301 }, { "epoch": 0.9969281179602704, "grad_norm": 0.057107098400592804, "learning_rate": 2.288370435654663e-09, "loss": 46.0006, "step": 7302 }, { "epoch": 0.9970646460509249, "grad_norm": 0.10516843944787979, "learning_rate": 2.0850662108051755e-09, "loss": 46.0045, "step": 7303 }, { "epoch": 0.9972011741415796, "grad_norm": 0.15865033864974976, "learning_rate": 1.8912177397856224e-09, "loss": 46.0095, "step": 7304 }, { "epoch": 0.9973377022322343, "grad_norm": 0.07561293244361877, "learning_rate": 1.706825059255568e-09, "loss": 46.0012, "step": 7305 }, { "epoch": 0.997474230322889, "grad_norm": 0.08446773886680603, "learning_rate": 1.5318882040926686e-09, "loss": 46.0098, "step": 7306 }, { "epoch": 0.9976107584135436, "grad_norm": 0.11638659983873367, "learning_rate": 1.3664072073704681e-09, "loss": 46.0044, "step": 7307 }, { "epoch": 0.9977472865041982, "grad_norm": 0.049882251769304276, "learning_rate": 1.210382100397256e-09, "loss": 46.0102, "step": 7308 }, { "epoch": 0.9978838145948529, "grad_norm": 0.08385684341192245, "learning_rate": 1.063812912671658e-09, "loss": 46.0012, "step": 7309 }, { "epoch": 0.9980203426855075, "grad_norm": 0.15993371605873108, "learning_rate": 9.266996719159426e-10, "loss": 46.003, "step": 7310 }, { "epoch": 0.9981568707761622, "grad_norm": 0.04674162715673447, "learning_rate": 7.990424040649202e-10, "loss": 46.0063, "step": 7311 }, { "epoch": 0.9982933988668169, "grad_norm": 0.17500679194927216, "learning_rate": 6.808411332548393e-10, "loss": 46.0085, "step": 7312 }, { "epoch": 0.9984299269574715, "grad_norm": 0.11019917577505112, "learning_rate": 5.720958818511424e-10, "loss": 46.0036, "step": 7313 }, { "epoch": 0.9985664550481261, "grad_norm": 0.09337335079908371, "learning_rate": 4.72806670409609e-10, "loss": 46.0022, "step": 7314 }, { "epoch": 0.9987029831387808, "grad_norm": 0.09085293859243393, "learning_rate": 3.829735177096616e-10, "loss": 46.0003, "step": 7315 }, { "epoch": 0.9988395112294355, "grad_norm": 0.1213827133178711, "learning_rate": 3.0259644074326353e-10, "loss": 46.0042, "step": 7316 }, { "epoch": 0.9989760393200902, "grad_norm": 0.07254913449287415, "learning_rate": 2.3167545471491914e-10, "loss": 46.0039, "step": 7317 }, { "epoch": 0.9991125674107447, "grad_norm": 0.05589550361037254, "learning_rate": 1.7021057303057142e-10, "loss": 46.0002, "step": 7318 }, { "epoch": 0.9992490955013994, "grad_norm": 0.0650186687707901, "learning_rate": 1.1820180731980656e-10, "loss": 46.0024, "step": 7319 }, { "epoch": 0.9993856235920541, "grad_norm": 0.07707148045301437, "learning_rate": 7.564916741364947e-11, "loss": 46.0048, "step": 7320 }, { "epoch": 0.9995221516827087, "grad_norm": 0.06899863481521606, "learning_rate": 4.2552661366768164e-11, "loss": 46.0075, "step": 7321 }, { "epoch": 0.9996586797733634, "grad_norm": 0.13619117438793182, "learning_rate": 1.8912295429718285e-11, "loss": 46.0019, "step": 7322 }, { "epoch": 0.999795207864018, "grad_norm": 0.06259801238775253, "learning_rate": 4.728074082249734e-12, "loss": 46.0084, "step": 7323 }, { "epoch": 0.9999317359546727, "grad_norm": 0.11005455255508423, "learning_rate": 0.0, "loss": 46.0028, "step": 7324 } ], "logging_steps": 1, "max_steps": 7324, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 239, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 687479785488384.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }