{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.994404853053773, "eval_steps": 500, "global_step": 15900, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.000628227025541355, "grad_norm": 37.5, "learning_rate": 2.5000000000000004e-07, "loss": 1.9002, "step": 2 }, { "epoch": 0.00125645405108271, "grad_norm": 5.65625, "learning_rate": 5.000000000000001e-07, "loss": 1.6234, "step": 4 }, { "epoch": 0.001884681076624065, "grad_norm": 5.15625, "learning_rate": 7.5e-07, "loss": 1.6223, "step": 6 }, { "epoch": 0.00251290810216542, "grad_norm": 4.8125, "learning_rate": 1.0000000000000002e-06, "loss": 1.6607, "step": 8 }, { "epoch": 0.003141135127706775, "grad_norm": 5.03125, "learning_rate": 1.25e-06, "loss": 1.6644, "step": 10 }, { "epoch": 0.00376936215324813, "grad_norm": 5.71875, "learning_rate": 1.5e-06, "loss": 1.8307, "step": 12 }, { "epoch": 0.004397589178789485, "grad_norm": 5.28125, "learning_rate": 1.75e-06, "loss": 1.623, "step": 14 }, { "epoch": 0.00502581620433084, "grad_norm": 3.625, "learning_rate": 2.0000000000000003e-06, "loss": 1.6915, "step": 16 }, { "epoch": 0.005654043229872195, "grad_norm": 3.609375, "learning_rate": 2.25e-06, "loss": 1.8222, "step": 18 }, { "epoch": 0.00628227025541355, "grad_norm": 3.21875, "learning_rate": 2.5e-06, "loss": 1.6702, "step": 20 }, { "epoch": 0.006910497280954905, "grad_norm": 2.140625, "learning_rate": 2.7500000000000004e-06, "loss": 1.6118, "step": 22 }, { "epoch": 0.00753872430649626, "grad_norm": 2.890625, "learning_rate": 3e-06, "loss": 1.6499, "step": 24 }, { "epoch": 0.008166951332037615, "grad_norm": 2.421875, "learning_rate": 3.2500000000000002e-06, "loss": 1.4785, "step": 26 }, { "epoch": 0.00879517835757897, "grad_norm": 2.421875, "learning_rate": 3.5e-06, "loss": 1.7235, "step": 28 }, { "epoch": 0.009423405383120325, "grad_norm": 2.25, "learning_rate": 3.7500000000000005e-06, "loss": 1.6245, "step": 30 }, { "epoch": 0.01005163240866168, "grad_norm": 1.6640625, "learning_rate": 4.000000000000001e-06, "loss": 1.5486, "step": 32 }, { "epoch": 0.010679859434203035, "grad_norm": 2.09375, "learning_rate": 4.25e-06, "loss": 1.4448, "step": 34 }, { "epoch": 0.01130808645974439, "grad_norm": 1.5078125, "learning_rate": 4.5e-06, "loss": 1.48, "step": 36 }, { "epoch": 0.011936313485285744, "grad_norm": 1.6796875, "learning_rate": 4.75e-06, "loss": 1.5128, "step": 38 }, { "epoch": 0.0125645405108271, "grad_norm": 1.5234375, "learning_rate": 5e-06, "loss": 1.4774, "step": 40 }, { "epoch": 0.013192767536368456, "grad_norm": 1.546875, "learning_rate": 5.2500000000000006e-06, "loss": 1.3692, "step": 42 }, { "epoch": 0.01382099456190981, "grad_norm": 1.34375, "learning_rate": 5.500000000000001e-06, "loss": 1.5056, "step": 44 }, { "epoch": 0.014449221587451166, "grad_norm": 1.21875, "learning_rate": 5.75e-06, "loss": 1.4744, "step": 46 }, { "epoch": 0.01507744861299252, "grad_norm": 1.0078125, "learning_rate": 6e-06, "loss": 1.5776, "step": 48 }, { "epoch": 0.015705675638533874, "grad_norm": 1.1015625, "learning_rate": 6.25e-06, "loss": 1.486, "step": 50 }, { "epoch": 0.01633390266407523, "grad_norm": 0.85546875, "learning_rate": 6.5000000000000004e-06, "loss": 1.5541, "step": 52 }, { "epoch": 0.016962129689616585, "grad_norm": 0.984375, "learning_rate": 6.750000000000001e-06, "loss": 1.3999, "step": 54 }, { "epoch": 0.01759035671515794, "grad_norm": 0.96875, "learning_rate": 7e-06, "loss": 1.4455, "step": 56 }, { "epoch": 0.018218583740699294, "grad_norm": 1.0546875, "learning_rate": 7.25e-06, "loss": 1.5221, "step": 58 }, { "epoch": 0.01884681076624065, "grad_norm": 0.88671875, "learning_rate": 7.500000000000001e-06, "loss": 1.4798, "step": 60 }, { "epoch": 0.019475037791782005, "grad_norm": 0.9140625, "learning_rate": 7.75e-06, "loss": 1.4334, "step": 62 }, { "epoch": 0.02010326481732336, "grad_norm": 0.98046875, "learning_rate": 8.000000000000001e-06, "loss": 1.3293, "step": 64 }, { "epoch": 0.020731491842864717, "grad_norm": 0.83984375, "learning_rate": 8.25e-06, "loss": 1.4174, "step": 66 }, { "epoch": 0.02135971886840607, "grad_norm": 0.859375, "learning_rate": 8.5e-06, "loss": 1.4177, "step": 68 }, { "epoch": 0.021987945893947425, "grad_norm": 0.84765625, "learning_rate": 8.750000000000001e-06, "loss": 1.3708, "step": 70 }, { "epoch": 0.02261617291948878, "grad_norm": 0.90234375, "learning_rate": 9e-06, "loss": 1.4062, "step": 72 }, { "epoch": 0.023244399945030136, "grad_norm": 0.83984375, "learning_rate": 9.250000000000001e-06, "loss": 1.3829, "step": 74 }, { "epoch": 0.02387262697057149, "grad_norm": 0.9375, "learning_rate": 9.5e-06, "loss": 1.3441, "step": 76 }, { "epoch": 0.024500853996112845, "grad_norm": 0.90234375, "learning_rate": 9.75e-06, "loss": 1.5625, "step": 78 }, { "epoch": 0.0251290810216542, "grad_norm": 0.81640625, "learning_rate": 1e-05, "loss": 1.4504, "step": 80 }, { "epoch": 0.025757308047195556, "grad_norm": 0.83984375, "learning_rate": 1.025e-05, "loss": 1.4672, "step": 82 }, { "epoch": 0.026385535072736912, "grad_norm": 0.82421875, "learning_rate": 1.0500000000000001e-05, "loss": 1.4405, "step": 84 }, { "epoch": 0.027013762098278264, "grad_norm": 1.0546875, "learning_rate": 1.075e-05, "loss": 1.3557, "step": 86 }, { "epoch": 0.02764198912381962, "grad_norm": 0.90234375, "learning_rate": 1.1000000000000001e-05, "loss": 1.4309, "step": 88 }, { "epoch": 0.028270216149360976, "grad_norm": 0.78125, "learning_rate": 1.125e-05, "loss": 1.3528, "step": 90 }, { "epoch": 0.02889844317490233, "grad_norm": 0.96875, "learning_rate": 1.15e-05, "loss": 1.4093, "step": 92 }, { "epoch": 0.029526670200443684, "grad_norm": 0.87890625, "learning_rate": 1.1750000000000001e-05, "loss": 1.4324, "step": 94 }, { "epoch": 0.03015489722598504, "grad_norm": 0.875, "learning_rate": 1.2e-05, "loss": 1.4622, "step": 96 }, { "epoch": 0.030783124251526395, "grad_norm": 0.85546875, "learning_rate": 1.2250000000000001e-05, "loss": 1.5166, "step": 98 }, { "epoch": 0.03141135127706775, "grad_norm": 0.76953125, "learning_rate": 1.25e-05, "loss": 1.4729, "step": 100 }, { "epoch": 0.032039578302609104, "grad_norm": 0.8828125, "learning_rate": 1.275e-05, "loss": 1.4201, "step": 102 }, { "epoch": 0.03266780532815046, "grad_norm": 0.97265625, "learning_rate": 1.3000000000000001e-05, "loss": 1.3646, "step": 104 }, { "epoch": 0.033296032353691815, "grad_norm": 0.859375, "learning_rate": 1.325e-05, "loss": 1.3105, "step": 106 }, { "epoch": 0.03392425937923317, "grad_norm": 0.78125, "learning_rate": 1.3500000000000001e-05, "loss": 1.5302, "step": 108 }, { "epoch": 0.03455248640477453, "grad_norm": 0.875, "learning_rate": 1.375e-05, "loss": 1.3979, "step": 110 }, { "epoch": 0.03518071343031588, "grad_norm": 0.796875, "learning_rate": 1.4e-05, "loss": 1.3961, "step": 112 }, { "epoch": 0.03580894045585724, "grad_norm": 0.796875, "learning_rate": 1.425e-05, "loss": 1.3645, "step": 114 }, { "epoch": 0.03643716748139859, "grad_norm": 0.7421875, "learning_rate": 1.45e-05, "loss": 1.306, "step": 116 }, { "epoch": 0.03706539450693994, "grad_norm": 0.8828125, "learning_rate": 1.4750000000000003e-05, "loss": 1.3799, "step": 118 }, { "epoch": 0.0376936215324813, "grad_norm": 0.73828125, "learning_rate": 1.5000000000000002e-05, "loss": 1.3281, "step": 120 }, { "epoch": 0.038321848558022654, "grad_norm": 0.87890625, "learning_rate": 1.525e-05, "loss": 1.4052, "step": 122 }, { "epoch": 0.03895007558356401, "grad_norm": 0.8203125, "learning_rate": 1.55e-05, "loss": 1.4946, "step": 124 }, { "epoch": 0.039578302609105366, "grad_norm": 0.80859375, "learning_rate": 1.575e-05, "loss": 1.4292, "step": 126 }, { "epoch": 0.04020652963464672, "grad_norm": 1.0078125, "learning_rate": 1.6000000000000003e-05, "loss": 1.4858, "step": 128 }, { "epoch": 0.04083475666018808, "grad_norm": 0.9765625, "learning_rate": 1.6250000000000002e-05, "loss": 1.2745, "step": 130 }, { "epoch": 0.04146298368572943, "grad_norm": 0.8046875, "learning_rate": 1.65e-05, "loss": 1.4684, "step": 132 }, { "epoch": 0.04209121071127078, "grad_norm": 0.80859375, "learning_rate": 1.675e-05, "loss": 1.4275, "step": 134 }, { "epoch": 0.04271943773681214, "grad_norm": 0.90625, "learning_rate": 1.7e-05, "loss": 1.2831, "step": 136 }, { "epoch": 0.043347664762353494, "grad_norm": 0.953125, "learning_rate": 1.7250000000000003e-05, "loss": 1.445, "step": 138 }, { "epoch": 0.04397589178789485, "grad_norm": 0.80859375, "learning_rate": 1.7500000000000002e-05, "loss": 1.3457, "step": 140 }, { "epoch": 0.044604118813436205, "grad_norm": 0.8046875, "learning_rate": 1.775e-05, "loss": 1.3961, "step": 142 }, { "epoch": 0.04523234583897756, "grad_norm": 0.75, "learning_rate": 1.8e-05, "loss": 1.2985, "step": 144 }, { "epoch": 0.04586057286451892, "grad_norm": 0.81640625, "learning_rate": 1.825e-05, "loss": 1.3075, "step": 146 }, { "epoch": 0.04648879989006027, "grad_norm": 0.76953125, "learning_rate": 1.8500000000000002e-05, "loss": 1.3602, "step": 148 }, { "epoch": 0.04711702691560163, "grad_norm": 0.8828125, "learning_rate": 1.8750000000000002e-05, "loss": 1.4481, "step": 150 }, { "epoch": 0.04774525394114298, "grad_norm": 0.80078125, "learning_rate": 1.9e-05, "loss": 1.409, "step": 152 }, { "epoch": 0.04837348096668433, "grad_norm": 0.80859375, "learning_rate": 1.925e-05, "loss": 1.357, "step": 154 }, { "epoch": 0.04900170799222569, "grad_norm": 0.77734375, "learning_rate": 1.95e-05, "loss": 1.2841, "step": 156 }, { "epoch": 0.049629935017767045, "grad_norm": 0.7421875, "learning_rate": 1.9750000000000002e-05, "loss": 1.4336, "step": 158 }, { "epoch": 0.0502581620433084, "grad_norm": 0.9609375, "learning_rate": 2e-05, "loss": 1.3853, "step": 160 }, { "epoch": 0.050886389068849756, "grad_norm": 0.89453125, "learning_rate": 1.9997461123452876e-05, "loss": 1.3465, "step": 162 }, { "epoch": 0.05151461609439111, "grad_norm": 0.84375, "learning_rate": 1.9994922246905744e-05, "loss": 1.3669, "step": 164 }, { "epoch": 0.05214284311993247, "grad_norm": 0.7421875, "learning_rate": 1.999238337035862e-05, "loss": 1.6046, "step": 166 }, { "epoch": 0.052771070145473824, "grad_norm": 0.81640625, "learning_rate": 1.998984449381149e-05, "loss": 1.2993, "step": 168 }, { "epoch": 0.05339929717101517, "grad_norm": 0.79296875, "learning_rate": 1.9987305617264362e-05, "loss": 1.4495, "step": 170 }, { "epoch": 0.05402752419655653, "grad_norm": 0.7734375, "learning_rate": 1.9984766740717233e-05, "loss": 1.3141, "step": 172 }, { "epoch": 0.054655751222097884, "grad_norm": 0.71484375, "learning_rate": 1.9982227864170108e-05, "loss": 1.4852, "step": 174 }, { "epoch": 0.05528397824763924, "grad_norm": 0.875, "learning_rate": 1.997968898762298e-05, "loss": 1.4228, "step": 176 }, { "epoch": 0.055912205273180596, "grad_norm": 0.80078125, "learning_rate": 1.997715011107585e-05, "loss": 1.5617, "step": 178 }, { "epoch": 0.05654043229872195, "grad_norm": 0.82421875, "learning_rate": 1.9974611234528722e-05, "loss": 1.2938, "step": 180 }, { "epoch": 0.05716865932426331, "grad_norm": 0.7421875, "learning_rate": 1.9972072357981597e-05, "loss": 1.469, "step": 182 }, { "epoch": 0.05779688634980466, "grad_norm": 0.82421875, "learning_rate": 1.9969533481434465e-05, "loss": 1.41, "step": 184 }, { "epoch": 0.05842511337534602, "grad_norm": 0.7265625, "learning_rate": 1.996699460488734e-05, "loss": 1.4414, "step": 186 }, { "epoch": 0.05905334040088737, "grad_norm": 0.8828125, "learning_rate": 1.996445572834021e-05, "loss": 1.2717, "step": 188 }, { "epoch": 0.059681567426428724, "grad_norm": 0.7890625, "learning_rate": 1.9961916851793083e-05, "loss": 1.3179, "step": 190 }, { "epoch": 0.06030979445197008, "grad_norm": 0.88671875, "learning_rate": 1.9959377975245954e-05, "loss": 1.4353, "step": 192 }, { "epoch": 0.060938021477511435, "grad_norm": 0.79296875, "learning_rate": 1.995683909869883e-05, "loss": 1.4721, "step": 194 }, { "epoch": 0.06156624850305279, "grad_norm": 0.7890625, "learning_rate": 1.99543002221517e-05, "loss": 1.4394, "step": 196 }, { "epoch": 0.06219447552859415, "grad_norm": 0.765625, "learning_rate": 1.995176134560457e-05, "loss": 1.4004, "step": 198 }, { "epoch": 0.0628227025541355, "grad_norm": 0.703125, "learning_rate": 1.9949222469057443e-05, "loss": 1.3159, "step": 200 }, { "epoch": 0.06345092957967685, "grad_norm": 0.7890625, "learning_rate": 1.9946683592510318e-05, "loss": 1.408, "step": 202 }, { "epoch": 0.06407915660521821, "grad_norm": 0.77734375, "learning_rate": 1.994414471596319e-05, "loss": 1.362, "step": 204 }, { "epoch": 0.06470738363075956, "grad_norm": 0.7421875, "learning_rate": 1.994160583941606e-05, "loss": 1.321, "step": 206 }, { "epoch": 0.06533561065630092, "grad_norm": 0.76953125, "learning_rate": 1.9939066962868932e-05, "loss": 1.3576, "step": 208 }, { "epoch": 0.06596383768184227, "grad_norm": 0.77734375, "learning_rate": 1.9936528086321803e-05, "loss": 1.4552, "step": 210 }, { "epoch": 0.06659206470738363, "grad_norm": 0.875, "learning_rate": 1.9933989209774675e-05, "loss": 1.3144, "step": 212 }, { "epoch": 0.06722029173292499, "grad_norm": 0.8515625, "learning_rate": 1.993145033322755e-05, "loss": 1.3575, "step": 214 }, { "epoch": 0.06784851875846634, "grad_norm": 0.828125, "learning_rate": 1.992891145668042e-05, "loss": 1.25, "step": 216 }, { "epoch": 0.0684767457840077, "grad_norm": 0.75, "learning_rate": 1.9926372580133292e-05, "loss": 1.4611, "step": 218 }, { "epoch": 0.06910497280954905, "grad_norm": 0.73828125, "learning_rate": 1.9923833703586163e-05, "loss": 1.2994, "step": 220 }, { "epoch": 0.06973319983509041, "grad_norm": 0.9375, "learning_rate": 1.9921294827039038e-05, "loss": 1.2697, "step": 222 }, { "epoch": 0.07036142686063176, "grad_norm": 0.81640625, "learning_rate": 1.991875595049191e-05, "loss": 1.4699, "step": 224 }, { "epoch": 0.07098965388617312, "grad_norm": 0.92578125, "learning_rate": 1.991621707394478e-05, "loss": 1.4276, "step": 226 }, { "epoch": 0.07161788091171448, "grad_norm": 0.74609375, "learning_rate": 1.9913678197397652e-05, "loss": 1.4052, "step": 228 }, { "epoch": 0.07224610793725583, "grad_norm": 0.8203125, "learning_rate": 1.9911139320850527e-05, "loss": 1.3744, "step": 230 }, { "epoch": 0.07287433496279717, "grad_norm": 0.87109375, "learning_rate": 1.9908600444303395e-05, "loss": 1.363, "step": 232 }, { "epoch": 0.07350256198833853, "grad_norm": 0.859375, "learning_rate": 1.990606156775627e-05, "loss": 1.421, "step": 234 }, { "epoch": 0.07413078901387989, "grad_norm": 0.76171875, "learning_rate": 1.990352269120914e-05, "loss": 1.5131, "step": 236 }, { "epoch": 0.07475901603942124, "grad_norm": 1.5625, "learning_rate": 1.9900983814662013e-05, "loss": 1.3155, "step": 238 }, { "epoch": 0.0753872430649626, "grad_norm": 0.78125, "learning_rate": 1.9898444938114884e-05, "loss": 1.3595, "step": 240 }, { "epoch": 0.07601547009050395, "grad_norm": 0.7890625, "learning_rate": 1.989590606156776e-05, "loss": 1.3706, "step": 242 }, { "epoch": 0.07664369711604531, "grad_norm": 0.72265625, "learning_rate": 1.989336718502063e-05, "loss": 1.3058, "step": 244 }, { "epoch": 0.07727192414158666, "grad_norm": 0.71484375, "learning_rate": 1.98908283084735e-05, "loss": 1.3404, "step": 246 }, { "epoch": 0.07790015116712802, "grad_norm": 0.9453125, "learning_rate": 1.9888289431926376e-05, "loss": 1.235, "step": 248 }, { "epoch": 0.07852837819266938, "grad_norm": 0.84765625, "learning_rate": 1.9885750555379248e-05, "loss": 1.3668, "step": 250 }, { "epoch": 0.07915660521821073, "grad_norm": 0.71875, "learning_rate": 1.988321167883212e-05, "loss": 1.3602, "step": 252 }, { "epoch": 0.07978483224375209, "grad_norm": 0.828125, "learning_rate": 1.988067280228499e-05, "loss": 1.3833, "step": 254 }, { "epoch": 0.08041305926929344, "grad_norm": 0.796875, "learning_rate": 1.9878133925737865e-05, "loss": 1.4476, "step": 256 }, { "epoch": 0.0810412862948348, "grad_norm": 0.68359375, "learning_rate": 1.9875595049190733e-05, "loss": 1.4111, "step": 258 }, { "epoch": 0.08166951332037616, "grad_norm": 0.88671875, "learning_rate": 1.9873056172643608e-05, "loss": 1.3636, "step": 260 }, { "epoch": 0.08229774034591751, "grad_norm": 0.78515625, "learning_rate": 1.987051729609648e-05, "loss": 1.2524, "step": 262 }, { "epoch": 0.08292596737145887, "grad_norm": 0.9609375, "learning_rate": 1.986797841954935e-05, "loss": 1.4048, "step": 264 }, { "epoch": 0.08355419439700022, "grad_norm": 0.7109375, "learning_rate": 1.9865439543002222e-05, "loss": 1.3619, "step": 266 }, { "epoch": 0.08418242142254156, "grad_norm": 0.796875, "learning_rate": 1.9862900666455097e-05, "loss": 1.4125, "step": 268 }, { "epoch": 0.08481064844808292, "grad_norm": 0.6875, "learning_rate": 1.986036178990797e-05, "loss": 1.4536, "step": 270 }, { "epoch": 0.08543887547362428, "grad_norm": 0.8125, "learning_rate": 1.985782291336084e-05, "loss": 1.392, "step": 272 }, { "epoch": 0.08606710249916563, "grad_norm": 0.77734375, "learning_rate": 1.985528403681371e-05, "loss": 1.393, "step": 274 }, { "epoch": 0.08669532952470699, "grad_norm": 0.91015625, "learning_rate": 1.9852745160266586e-05, "loss": 1.3635, "step": 276 }, { "epoch": 0.08732355655024834, "grad_norm": 0.75, "learning_rate": 1.9850206283719454e-05, "loss": 1.4626, "step": 278 }, { "epoch": 0.0879517835757897, "grad_norm": 0.8671875, "learning_rate": 1.984766740717233e-05, "loss": 1.3507, "step": 280 }, { "epoch": 0.08858001060133106, "grad_norm": 0.83203125, "learning_rate": 1.98451285306252e-05, "loss": 1.4432, "step": 282 }, { "epoch": 0.08920823762687241, "grad_norm": 0.83203125, "learning_rate": 1.984258965407807e-05, "loss": 1.3932, "step": 284 }, { "epoch": 0.08983646465241377, "grad_norm": 0.8203125, "learning_rate": 1.9840050777530943e-05, "loss": 1.391, "step": 286 }, { "epoch": 0.09046469167795512, "grad_norm": 0.7109375, "learning_rate": 1.9837511900983818e-05, "loss": 1.4163, "step": 288 }, { "epoch": 0.09109291870349648, "grad_norm": 1.171875, "learning_rate": 1.983497302443669e-05, "loss": 1.4135, "step": 290 }, { "epoch": 0.09172114572903783, "grad_norm": 0.8515625, "learning_rate": 1.983243414788956e-05, "loss": 1.4099, "step": 292 }, { "epoch": 0.09234937275457919, "grad_norm": 0.76171875, "learning_rate": 1.982989527134243e-05, "loss": 1.2512, "step": 294 }, { "epoch": 0.09297759978012055, "grad_norm": 0.734375, "learning_rate": 1.9827356394795306e-05, "loss": 1.255, "step": 296 }, { "epoch": 0.0936058268056619, "grad_norm": 0.87109375, "learning_rate": 1.9824817518248174e-05, "loss": 1.2295, "step": 298 }, { "epoch": 0.09423405383120326, "grad_norm": 0.765625, "learning_rate": 1.982227864170105e-05, "loss": 1.4514, "step": 300 }, { "epoch": 0.09486228085674461, "grad_norm": 0.8828125, "learning_rate": 1.981973976515392e-05, "loss": 1.3137, "step": 302 }, { "epoch": 0.09549050788228595, "grad_norm": 0.86328125, "learning_rate": 1.9817200888606792e-05, "loss": 1.3511, "step": 304 }, { "epoch": 0.09611873490782731, "grad_norm": 0.85546875, "learning_rate": 1.9814662012059663e-05, "loss": 1.3035, "step": 306 }, { "epoch": 0.09674696193336867, "grad_norm": 0.734375, "learning_rate": 1.9812123135512538e-05, "loss": 1.4151, "step": 308 }, { "epoch": 0.09737518895891002, "grad_norm": 0.79296875, "learning_rate": 1.980958425896541e-05, "loss": 1.3819, "step": 310 }, { "epoch": 0.09800341598445138, "grad_norm": 0.76953125, "learning_rate": 1.980704538241828e-05, "loss": 1.3212, "step": 312 }, { "epoch": 0.09863164300999273, "grad_norm": 0.86328125, "learning_rate": 1.9804506505871152e-05, "loss": 1.4313, "step": 314 }, { "epoch": 0.09925987003553409, "grad_norm": 0.828125, "learning_rate": 1.9801967629324027e-05, "loss": 1.4021, "step": 316 }, { "epoch": 0.09988809706107545, "grad_norm": 0.7578125, "learning_rate": 1.97994287527769e-05, "loss": 1.3191, "step": 318 }, { "epoch": 0.1005163240866168, "grad_norm": 0.734375, "learning_rate": 1.979688987622977e-05, "loss": 1.3138, "step": 320 }, { "epoch": 0.10114455111215816, "grad_norm": 0.78125, "learning_rate": 1.979435099968264e-05, "loss": 1.3881, "step": 322 }, { "epoch": 0.10177277813769951, "grad_norm": 0.7265625, "learning_rate": 1.9791812123135513e-05, "loss": 1.1786, "step": 324 }, { "epoch": 0.10240100516324087, "grad_norm": 0.73828125, "learning_rate": 1.9789273246588384e-05, "loss": 1.246, "step": 326 }, { "epoch": 0.10302923218878222, "grad_norm": 0.8515625, "learning_rate": 1.978673437004126e-05, "loss": 1.359, "step": 328 }, { "epoch": 0.10365745921432358, "grad_norm": 0.91796875, "learning_rate": 1.978419549349413e-05, "loss": 1.271, "step": 330 }, { "epoch": 0.10428568623986494, "grad_norm": 0.75, "learning_rate": 1.9781656616947e-05, "loss": 1.4137, "step": 332 }, { "epoch": 0.10491391326540629, "grad_norm": 0.75390625, "learning_rate": 1.9779117740399876e-05, "loss": 1.3471, "step": 334 }, { "epoch": 0.10554214029094765, "grad_norm": 0.70703125, "learning_rate": 1.9776578863852748e-05, "loss": 1.4421, "step": 336 }, { "epoch": 0.10617036731648899, "grad_norm": 0.73828125, "learning_rate": 1.977403998730562e-05, "loss": 1.2823, "step": 338 }, { "epoch": 0.10679859434203035, "grad_norm": 0.76171875, "learning_rate": 1.977150111075849e-05, "loss": 1.463, "step": 340 }, { "epoch": 0.1074268213675717, "grad_norm": 0.74609375, "learning_rate": 1.9768962234211365e-05, "loss": 1.2987, "step": 342 }, { "epoch": 0.10805504839311306, "grad_norm": 0.88671875, "learning_rate": 1.9766423357664237e-05, "loss": 1.4113, "step": 344 }, { "epoch": 0.10868327541865441, "grad_norm": 0.7578125, "learning_rate": 1.9763884481117108e-05, "loss": 1.4153, "step": 346 }, { "epoch": 0.10931150244419577, "grad_norm": 0.703125, "learning_rate": 1.976134560456998e-05, "loss": 1.3976, "step": 348 }, { "epoch": 0.10993972946973712, "grad_norm": 0.78515625, "learning_rate": 1.975880672802285e-05, "loss": 1.395, "step": 350 }, { "epoch": 0.11056795649527848, "grad_norm": 0.83984375, "learning_rate": 1.9756267851475722e-05, "loss": 1.533, "step": 352 }, { "epoch": 0.11119618352081984, "grad_norm": 0.796875, "learning_rate": 1.9753728974928597e-05, "loss": 1.3265, "step": 354 }, { "epoch": 0.11182441054636119, "grad_norm": 0.76171875, "learning_rate": 1.9751190098381468e-05, "loss": 1.4088, "step": 356 }, { "epoch": 0.11245263757190255, "grad_norm": 0.8671875, "learning_rate": 1.974865122183434e-05, "loss": 1.4432, "step": 358 }, { "epoch": 0.1130808645974439, "grad_norm": 0.984375, "learning_rate": 1.974611234528721e-05, "loss": 1.2292, "step": 360 }, { "epoch": 0.11370909162298526, "grad_norm": 0.73828125, "learning_rate": 1.9743573468740086e-05, "loss": 1.3708, "step": 362 }, { "epoch": 0.11433731864852661, "grad_norm": 0.73046875, "learning_rate": 1.9741034592192957e-05, "loss": 1.2918, "step": 364 }, { "epoch": 0.11496554567406797, "grad_norm": 0.79296875, "learning_rate": 1.973849571564583e-05, "loss": 1.4335, "step": 366 }, { "epoch": 0.11559377269960933, "grad_norm": 0.78515625, "learning_rate": 1.97359568390987e-05, "loss": 1.2187, "step": 368 }, { "epoch": 0.11622199972515068, "grad_norm": 10.625, "learning_rate": 1.9733417962551575e-05, "loss": 1.2494, "step": 370 }, { "epoch": 0.11685022675069204, "grad_norm": 0.75, "learning_rate": 1.9730879086004443e-05, "loss": 1.2348, "step": 372 }, { "epoch": 0.11747845377623338, "grad_norm": 0.6953125, "learning_rate": 1.9728340209457317e-05, "loss": 1.3933, "step": 374 }, { "epoch": 0.11810668080177474, "grad_norm": 0.8515625, "learning_rate": 1.972580133291019e-05, "loss": 1.445, "step": 376 }, { "epoch": 0.11873490782731609, "grad_norm": 0.8515625, "learning_rate": 1.972326245636306e-05, "loss": 1.3521, "step": 378 }, { "epoch": 0.11936313485285745, "grad_norm": 0.69921875, "learning_rate": 1.972072357981593e-05, "loss": 1.4533, "step": 380 }, { "epoch": 0.1199913618783988, "grad_norm": 0.78515625, "learning_rate": 1.9718184703268806e-05, "loss": 1.2909, "step": 382 }, { "epoch": 0.12061958890394016, "grad_norm": 0.8203125, "learning_rate": 1.9715645826721678e-05, "loss": 1.5502, "step": 384 }, { "epoch": 0.12124781592948151, "grad_norm": 0.84765625, "learning_rate": 1.971310695017455e-05, "loss": 1.3525, "step": 386 }, { "epoch": 0.12187604295502287, "grad_norm": 0.796875, "learning_rate": 1.971056807362742e-05, "loss": 1.5028, "step": 388 }, { "epoch": 0.12250426998056423, "grad_norm": 6.21875, "learning_rate": 1.9708029197080295e-05, "loss": 1.3943, "step": 390 }, { "epoch": 0.12313249700610558, "grad_norm": 0.765625, "learning_rate": 1.9705490320533163e-05, "loss": 1.5042, "step": 392 }, { "epoch": 0.12376072403164694, "grad_norm": 0.79296875, "learning_rate": 1.9702951443986038e-05, "loss": 1.3527, "step": 394 }, { "epoch": 0.1243889510571883, "grad_norm": 0.734375, "learning_rate": 1.970041256743891e-05, "loss": 1.5268, "step": 396 }, { "epoch": 0.12501717808272964, "grad_norm": 0.77734375, "learning_rate": 1.969787369089178e-05, "loss": 1.2923, "step": 398 }, { "epoch": 0.125645405108271, "grad_norm": 0.85546875, "learning_rate": 1.9695334814344652e-05, "loss": 1.4865, "step": 400 }, { "epoch": 0.12627363213381235, "grad_norm": 0.828125, "learning_rate": 1.9692795937797527e-05, "loss": 1.2926, "step": 402 }, { "epoch": 0.1269018591593537, "grad_norm": 0.796875, "learning_rate": 1.96902570612504e-05, "loss": 1.3763, "step": 404 }, { "epoch": 0.12753008618489506, "grad_norm": 0.77734375, "learning_rate": 1.968771818470327e-05, "loss": 1.4208, "step": 406 }, { "epoch": 0.12815831321043641, "grad_norm": 0.8515625, "learning_rate": 1.968517930815614e-05, "loss": 1.2802, "step": 408 }, { "epoch": 0.12878654023597777, "grad_norm": 0.7578125, "learning_rate": 1.9682640431609016e-05, "loss": 1.3137, "step": 410 }, { "epoch": 0.12941476726151913, "grad_norm": 0.734375, "learning_rate": 1.9680101555061887e-05, "loss": 1.2313, "step": 412 }, { "epoch": 0.13004299428706048, "grad_norm": 0.73828125, "learning_rate": 1.967756267851476e-05, "loss": 1.3286, "step": 414 }, { "epoch": 0.13067122131260184, "grad_norm": 0.86328125, "learning_rate": 1.967502380196763e-05, "loss": 1.3544, "step": 416 }, { "epoch": 0.1312994483381432, "grad_norm": 0.796875, "learning_rate": 1.96724849254205e-05, "loss": 1.4726, "step": 418 }, { "epoch": 0.13192767536368455, "grad_norm": 0.71875, "learning_rate": 1.9669946048873376e-05, "loss": 1.3215, "step": 420 }, { "epoch": 0.1325559023892259, "grad_norm": 0.78515625, "learning_rate": 1.9667407172326248e-05, "loss": 1.5521, "step": 422 }, { "epoch": 0.13318412941476726, "grad_norm": 0.796875, "learning_rate": 1.966486829577912e-05, "loss": 1.3127, "step": 424 }, { "epoch": 0.13381235644030862, "grad_norm": 0.8359375, "learning_rate": 1.966232941923199e-05, "loss": 1.2696, "step": 426 }, { "epoch": 0.13444058346584997, "grad_norm": 0.8046875, "learning_rate": 1.9659790542684865e-05, "loss": 1.2138, "step": 428 }, { "epoch": 0.13506881049139133, "grad_norm": 0.7890625, "learning_rate": 1.9657251666137736e-05, "loss": 1.4204, "step": 430 }, { "epoch": 0.13569703751693268, "grad_norm": 0.75390625, "learning_rate": 1.9654712789590608e-05, "loss": 1.2865, "step": 432 }, { "epoch": 0.13632526454247404, "grad_norm": 0.73828125, "learning_rate": 1.965217391304348e-05, "loss": 1.2856, "step": 434 }, { "epoch": 0.1369534915680154, "grad_norm": 0.73046875, "learning_rate": 1.9649635036496354e-05, "loss": 1.4284, "step": 436 }, { "epoch": 0.13758171859355675, "grad_norm": 0.73828125, "learning_rate": 1.9647096159949225e-05, "loss": 1.3569, "step": 438 }, { "epoch": 0.1382099456190981, "grad_norm": 0.671875, "learning_rate": 1.9644557283402097e-05, "loss": 1.3295, "step": 440 }, { "epoch": 0.13883817264463946, "grad_norm": 0.7421875, "learning_rate": 1.9642018406854968e-05, "loss": 1.2948, "step": 442 }, { "epoch": 0.13946639967018082, "grad_norm": 0.79296875, "learning_rate": 1.963947953030784e-05, "loss": 1.4097, "step": 444 }, { "epoch": 0.14009462669572217, "grad_norm": 1.0703125, "learning_rate": 1.963694065376071e-05, "loss": 1.3674, "step": 446 }, { "epoch": 0.14072285372126353, "grad_norm": 0.8671875, "learning_rate": 1.9634401777213586e-05, "loss": 1.4544, "step": 448 }, { "epoch": 0.14135108074680489, "grad_norm": 0.72265625, "learning_rate": 1.9631862900666457e-05, "loss": 1.3385, "step": 450 }, { "epoch": 0.14197930777234624, "grad_norm": 0.75390625, "learning_rate": 1.962932402411933e-05, "loss": 1.3962, "step": 452 }, { "epoch": 0.1426075347978876, "grad_norm": 0.80078125, "learning_rate": 1.96267851475722e-05, "loss": 1.3257, "step": 454 }, { "epoch": 0.14323576182342895, "grad_norm": 0.8359375, "learning_rate": 1.9624246271025075e-05, "loss": 1.3572, "step": 456 }, { "epoch": 0.1438639888489703, "grad_norm": 0.73046875, "learning_rate": 1.9621707394477946e-05, "loss": 1.5115, "step": 458 }, { "epoch": 0.14449221587451166, "grad_norm": 0.7578125, "learning_rate": 1.9619168517930817e-05, "loss": 1.3532, "step": 460 }, { "epoch": 0.14512044290005302, "grad_norm": 0.8046875, "learning_rate": 1.961662964138369e-05, "loss": 1.3612, "step": 462 }, { "epoch": 0.14574866992559435, "grad_norm": 0.73828125, "learning_rate": 1.9614090764836564e-05, "loss": 1.3881, "step": 464 }, { "epoch": 0.1463768969511357, "grad_norm": 0.94140625, "learning_rate": 1.961155188828943e-05, "loss": 1.534, "step": 466 }, { "epoch": 0.14700512397667706, "grad_norm": 0.71484375, "learning_rate": 1.9609013011742306e-05, "loss": 1.4607, "step": 468 }, { "epoch": 0.14763335100221842, "grad_norm": 0.72265625, "learning_rate": 1.9606474135195178e-05, "loss": 1.3466, "step": 470 }, { "epoch": 0.14826157802775977, "grad_norm": 0.7109375, "learning_rate": 1.960393525864805e-05, "loss": 1.3187, "step": 472 }, { "epoch": 0.14888980505330113, "grad_norm": 0.77734375, "learning_rate": 1.960139638210092e-05, "loss": 1.36, "step": 474 }, { "epoch": 0.14951803207884248, "grad_norm": 0.859375, "learning_rate": 1.9598857505553795e-05, "loss": 1.2185, "step": 476 }, { "epoch": 0.15014625910438384, "grad_norm": 0.71484375, "learning_rate": 1.9596318629006667e-05, "loss": 1.4085, "step": 478 }, { "epoch": 0.1507744861299252, "grad_norm": 0.98046875, "learning_rate": 1.9593779752459538e-05, "loss": 1.3917, "step": 480 }, { "epoch": 0.15140271315546655, "grad_norm": 0.7421875, "learning_rate": 1.959124087591241e-05, "loss": 1.3497, "step": 482 }, { "epoch": 0.1520309401810079, "grad_norm": 0.76171875, "learning_rate": 1.9588701999365284e-05, "loss": 1.3855, "step": 484 }, { "epoch": 0.15265916720654926, "grad_norm": 0.81640625, "learning_rate": 1.9586163122818152e-05, "loss": 1.4071, "step": 486 }, { "epoch": 0.15328739423209062, "grad_norm": 0.80859375, "learning_rate": 1.9583624246271027e-05, "loss": 1.2817, "step": 488 }, { "epoch": 0.15391562125763197, "grad_norm": 0.75, "learning_rate": 1.9581085369723898e-05, "loss": 1.3758, "step": 490 }, { "epoch": 0.15454384828317333, "grad_norm": 0.78125, "learning_rate": 1.957854649317677e-05, "loss": 1.4021, "step": 492 }, { "epoch": 0.15517207530871469, "grad_norm": 0.75390625, "learning_rate": 1.957600761662964e-05, "loss": 1.4163, "step": 494 }, { "epoch": 0.15580030233425604, "grad_norm": 0.7890625, "learning_rate": 1.9573468740082516e-05, "loss": 1.3127, "step": 496 }, { "epoch": 0.1564285293597974, "grad_norm": 0.79296875, "learning_rate": 1.9570929863535387e-05, "loss": 1.2817, "step": 498 }, { "epoch": 0.15705675638533875, "grad_norm": 0.76171875, "learning_rate": 1.956839098698826e-05, "loss": 1.3561, "step": 500 }, { "epoch": 0.1576849834108801, "grad_norm": 0.76171875, "learning_rate": 1.956585211044113e-05, "loss": 1.317, "step": 502 }, { "epoch": 0.15831321043642146, "grad_norm": 1.3671875, "learning_rate": 1.9563313233894005e-05, "loss": 1.4507, "step": 504 }, { "epoch": 0.15894143746196282, "grad_norm": 1.0703125, "learning_rate": 1.9560774357346876e-05, "loss": 1.3715, "step": 506 }, { "epoch": 0.15956966448750418, "grad_norm": 0.765625, "learning_rate": 1.9558235480799747e-05, "loss": 1.3502, "step": 508 }, { "epoch": 0.16019789151304553, "grad_norm": 0.72265625, "learning_rate": 1.9555696604252622e-05, "loss": 1.4822, "step": 510 }, { "epoch": 0.1608261185385869, "grad_norm": 0.75, "learning_rate": 1.955315772770549e-05, "loss": 1.2966, "step": 512 }, { "epoch": 0.16145434556412824, "grad_norm": 0.80078125, "learning_rate": 1.9550618851158365e-05, "loss": 1.2823, "step": 514 }, { "epoch": 0.1620825725896696, "grad_norm": 0.87109375, "learning_rate": 1.9548079974611236e-05, "loss": 1.3542, "step": 516 }, { "epoch": 0.16271079961521095, "grad_norm": 0.72265625, "learning_rate": 1.9545541098064108e-05, "loss": 1.2341, "step": 518 }, { "epoch": 0.1633390266407523, "grad_norm": 0.90625, "learning_rate": 1.954300222151698e-05, "loss": 1.4864, "step": 520 }, { "epoch": 0.16396725366629367, "grad_norm": 0.80078125, "learning_rate": 1.9540463344969854e-05, "loss": 1.4035, "step": 522 }, { "epoch": 0.16459548069183502, "grad_norm": 0.7734375, "learning_rate": 1.9537924468422725e-05, "loss": 1.3945, "step": 524 }, { "epoch": 0.16522370771737638, "grad_norm": 0.94921875, "learning_rate": 1.9535385591875597e-05, "loss": 1.3526, "step": 526 }, { "epoch": 0.16585193474291773, "grad_norm": 0.71484375, "learning_rate": 1.9532846715328468e-05, "loss": 1.4006, "step": 528 }, { "epoch": 0.1664801617684591, "grad_norm": 0.703125, "learning_rate": 1.9530307838781343e-05, "loss": 1.3114, "step": 530 }, { "epoch": 0.16710838879400045, "grad_norm": 0.75390625, "learning_rate": 1.9527768962234214e-05, "loss": 1.4793, "step": 532 }, { "epoch": 0.16773661581954177, "grad_norm": 0.7734375, "learning_rate": 1.9525230085687086e-05, "loss": 1.3226, "step": 534 }, { "epoch": 0.16836484284508313, "grad_norm": 0.90625, "learning_rate": 1.9522691209139957e-05, "loss": 1.3562, "step": 536 }, { "epoch": 0.16899306987062448, "grad_norm": 0.91796875, "learning_rate": 1.952015233259283e-05, "loss": 1.3007, "step": 538 }, { "epoch": 0.16962129689616584, "grad_norm": 0.69140625, "learning_rate": 1.95176134560457e-05, "loss": 1.4449, "step": 540 }, { "epoch": 0.1702495239217072, "grad_norm": 0.74609375, "learning_rate": 1.9515074579498575e-05, "loss": 1.3835, "step": 542 }, { "epoch": 0.17087775094724855, "grad_norm": 0.74609375, "learning_rate": 1.9512535702951446e-05, "loss": 1.4734, "step": 544 }, { "epoch": 0.1715059779727899, "grad_norm": 0.7109375, "learning_rate": 1.9509996826404317e-05, "loss": 1.4759, "step": 546 }, { "epoch": 0.17213420499833126, "grad_norm": 0.6953125, "learning_rate": 1.950745794985719e-05, "loss": 1.3622, "step": 548 }, { "epoch": 0.17276243202387262, "grad_norm": 0.703125, "learning_rate": 1.9504919073310063e-05, "loss": 1.5, "step": 550 }, { "epoch": 0.17339065904941398, "grad_norm": 0.765625, "learning_rate": 1.9502380196762935e-05, "loss": 1.4584, "step": 552 }, { "epoch": 0.17401888607495533, "grad_norm": 0.78125, "learning_rate": 1.9499841320215806e-05, "loss": 1.1847, "step": 554 }, { "epoch": 0.1746471131004967, "grad_norm": 0.8046875, "learning_rate": 1.9497302443668678e-05, "loss": 1.4887, "step": 556 }, { "epoch": 0.17527534012603804, "grad_norm": 0.703125, "learning_rate": 1.9494763567121552e-05, "loss": 1.3411, "step": 558 }, { "epoch": 0.1759035671515794, "grad_norm": 0.8125, "learning_rate": 1.949222469057442e-05, "loss": 1.4525, "step": 560 }, { "epoch": 0.17653179417712075, "grad_norm": 0.78125, "learning_rate": 1.9489685814027295e-05, "loss": 1.2743, "step": 562 }, { "epoch": 0.1771600212026621, "grad_norm": 0.77734375, "learning_rate": 1.9487146937480167e-05, "loss": 1.385, "step": 564 }, { "epoch": 0.17778824822820347, "grad_norm": 0.89453125, "learning_rate": 1.9484608060933038e-05, "loss": 1.3988, "step": 566 }, { "epoch": 0.17841647525374482, "grad_norm": 0.78125, "learning_rate": 1.948206918438591e-05, "loss": 1.2637, "step": 568 }, { "epoch": 0.17904470227928618, "grad_norm": 0.73046875, "learning_rate": 1.9479530307838784e-05, "loss": 1.3832, "step": 570 }, { "epoch": 0.17967292930482753, "grad_norm": 0.83984375, "learning_rate": 1.9476991431291655e-05, "loss": 1.367, "step": 572 }, { "epoch": 0.1803011563303689, "grad_norm": 0.85546875, "learning_rate": 1.9474452554744527e-05, "loss": 1.3174, "step": 574 }, { "epoch": 0.18092938335591024, "grad_norm": 0.6875, "learning_rate": 1.9471913678197398e-05, "loss": 1.2966, "step": 576 }, { "epoch": 0.1815576103814516, "grad_norm": 0.796875, "learning_rate": 1.9469374801650273e-05, "loss": 1.4582, "step": 578 }, { "epoch": 0.18218583740699296, "grad_norm": 0.69921875, "learning_rate": 1.946683592510314e-05, "loss": 1.3229, "step": 580 }, { "epoch": 0.1828140644325343, "grad_norm": 0.734375, "learning_rate": 1.9464297048556016e-05, "loss": 1.2895, "step": 582 }, { "epoch": 0.18344229145807567, "grad_norm": 0.73046875, "learning_rate": 1.9461758172008887e-05, "loss": 1.5382, "step": 584 }, { "epoch": 0.18407051848361702, "grad_norm": 0.92578125, "learning_rate": 1.945921929546176e-05, "loss": 1.4349, "step": 586 }, { "epoch": 0.18469874550915838, "grad_norm": 0.828125, "learning_rate": 1.945668041891463e-05, "loss": 1.3861, "step": 588 }, { "epoch": 0.18532697253469974, "grad_norm": 0.76953125, "learning_rate": 1.9454141542367505e-05, "loss": 1.2897, "step": 590 }, { "epoch": 0.1859551995602411, "grad_norm": 0.8671875, "learning_rate": 1.9451602665820376e-05, "loss": 1.3362, "step": 592 }, { "epoch": 0.18658342658578245, "grad_norm": 0.8046875, "learning_rate": 1.9449063789273247e-05, "loss": 1.3954, "step": 594 }, { "epoch": 0.1872116536113238, "grad_norm": 0.734375, "learning_rate": 1.9446524912726122e-05, "loss": 1.3541, "step": 596 }, { "epoch": 0.18783988063686516, "grad_norm": 0.80859375, "learning_rate": 1.9443986036178994e-05, "loss": 1.4498, "step": 598 }, { "epoch": 0.18846810766240651, "grad_norm": 0.83203125, "learning_rate": 1.9441447159631865e-05, "loss": 1.3767, "step": 600 }, { "epoch": 0.18909633468794787, "grad_norm": 0.9453125, "learning_rate": 1.9438908283084736e-05, "loss": 1.245, "step": 602 }, { "epoch": 0.18972456171348923, "grad_norm": 0.66015625, "learning_rate": 1.943636940653761e-05, "loss": 1.4371, "step": 604 }, { "epoch": 0.19035278873903055, "grad_norm": 0.7890625, "learning_rate": 1.943383052999048e-05, "loss": 1.3194, "step": 606 }, { "epoch": 0.1909810157645719, "grad_norm": 0.7421875, "learning_rate": 1.9431291653443354e-05, "loss": 1.3339, "step": 608 }, { "epoch": 0.19160924279011327, "grad_norm": 0.7578125, "learning_rate": 1.9428752776896225e-05, "loss": 1.3773, "step": 610 }, { "epoch": 0.19223746981565462, "grad_norm": 0.7265625, "learning_rate": 1.9426213900349097e-05, "loss": 1.3456, "step": 612 }, { "epoch": 0.19286569684119598, "grad_norm": 1.015625, "learning_rate": 1.9423675023801968e-05, "loss": 1.3713, "step": 614 }, { "epoch": 0.19349392386673733, "grad_norm": 0.73828125, "learning_rate": 1.9421136147254843e-05, "loss": 1.4541, "step": 616 }, { "epoch": 0.1941221508922787, "grad_norm": 0.71484375, "learning_rate": 1.9418597270707714e-05, "loss": 1.4132, "step": 618 }, { "epoch": 0.19475037791782004, "grad_norm": 0.90234375, "learning_rate": 1.9416058394160586e-05, "loss": 1.461, "step": 620 }, { "epoch": 0.1953786049433614, "grad_norm": 0.765625, "learning_rate": 1.9413519517613457e-05, "loss": 1.2741, "step": 622 }, { "epoch": 0.19600683196890276, "grad_norm": 0.75, "learning_rate": 1.9410980641066332e-05, "loss": 1.4783, "step": 624 }, { "epoch": 0.1966350589944441, "grad_norm": 0.9296875, "learning_rate": 1.94084417645192e-05, "loss": 1.3676, "step": 626 }, { "epoch": 0.19726328601998547, "grad_norm": 0.83984375, "learning_rate": 1.9405902887972074e-05, "loss": 1.2958, "step": 628 }, { "epoch": 0.19789151304552682, "grad_norm": 0.8125, "learning_rate": 1.9403364011424946e-05, "loss": 1.3159, "step": 630 }, { "epoch": 0.19851974007106818, "grad_norm": 0.7578125, "learning_rate": 1.9400825134877817e-05, "loss": 1.3835, "step": 632 }, { "epoch": 0.19914796709660953, "grad_norm": 0.890625, "learning_rate": 1.939828625833069e-05, "loss": 1.2554, "step": 634 }, { "epoch": 0.1997761941221509, "grad_norm": 0.671875, "learning_rate": 1.9395747381783563e-05, "loss": 1.3815, "step": 636 }, { "epoch": 0.20040442114769225, "grad_norm": 0.78125, "learning_rate": 1.9393208505236435e-05, "loss": 1.4323, "step": 638 }, { "epoch": 0.2010326481732336, "grad_norm": 0.7265625, "learning_rate": 1.9390669628689306e-05, "loss": 1.3292, "step": 640 }, { "epoch": 0.20166087519877496, "grad_norm": 0.82421875, "learning_rate": 1.9388130752142178e-05, "loss": 1.2865, "step": 642 }, { "epoch": 0.20228910222431631, "grad_norm": 1.015625, "learning_rate": 1.9385591875595052e-05, "loss": 1.3822, "step": 644 }, { "epoch": 0.20291732924985767, "grad_norm": 0.75, "learning_rate": 1.9383052999047924e-05, "loss": 1.3657, "step": 646 }, { "epoch": 0.20354555627539903, "grad_norm": 0.82421875, "learning_rate": 1.9380514122500795e-05, "loss": 1.3554, "step": 648 }, { "epoch": 0.20417378330094038, "grad_norm": 0.75, "learning_rate": 1.9377975245953666e-05, "loss": 1.331, "step": 650 }, { "epoch": 0.20480201032648174, "grad_norm": 0.96484375, "learning_rate": 1.9375436369406538e-05, "loss": 1.3798, "step": 652 }, { "epoch": 0.2054302373520231, "grad_norm": 0.80078125, "learning_rate": 1.937289749285941e-05, "loss": 1.4126, "step": 654 }, { "epoch": 0.20605846437756445, "grad_norm": 0.7265625, "learning_rate": 1.9370358616312284e-05, "loss": 1.5412, "step": 656 }, { "epoch": 0.2066866914031058, "grad_norm": 0.6875, "learning_rate": 1.9367819739765155e-05, "loss": 1.4367, "step": 658 }, { "epoch": 0.20731491842864716, "grad_norm": 0.8828125, "learning_rate": 1.9365280863218027e-05, "loss": 1.3944, "step": 660 }, { "epoch": 0.20794314545418852, "grad_norm": 0.74609375, "learning_rate": 1.9362741986670898e-05, "loss": 1.4311, "step": 662 }, { "epoch": 0.20857137247972987, "grad_norm": 0.73046875, "learning_rate": 1.9360203110123773e-05, "loss": 1.452, "step": 664 }, { "epoch": 0.20919959950527123, "grad_norm": 0.73828125, "learning_rate": 1.9357664233576644e-05, "loss": 1.3529, "step": 666 }, { "epoch": 0.20982782653081258, "grad_norm": 0.703125, "learning_rate": 1.9355125357029516e-05, "loss": 1.3444, "step": 668 }, { "epoch": 0.21045605355635394, "grad_norm": 0.70703125, "learning_rate": 1.9352586480482387e-05, "loss": 1.352, "step": 670 }, { "epoch": 0.2110842805818953, "grad_norm": 0.81640625, "learning_rate": 1.9350047603935262e-05, "loss": 1.455, "step": 672 }, { "epoch": 0.21171250760743665, "grad_norm": 0.7578125, "learning_rate": 1.934750872738813e-05, "loss": 1.2581, "step": 674 }, { "epoch": 0.21234073463297798, "grad_norm": 0.8515625, "learning_rate": 1.9344969850841005e-05, "loss": 1.3224, "step": 676 }, { "epoch": 0.21296896165851933, "grad_norm": 0.6875, "learning_rate": 1.9342430974293876e-05, "loss": 1.4604, "step": 678 }, { "epoch": 0.2135971886840607, "grad_norm": 0.75, "learning_rate": 1.9339892097746747e-05, "loss": 1.2345, "step": 680 }, { "epoch": 0.21422541570960205, "grad_norm": 0.70703125, "learning_rate": 1.9337353221199622e-05, "loss": 1.4289, "step": 682 }, { "epoch": 0.2148536427351434, "grad_norm": 0.875, "learning_rate": 1.9334814344652494e-05, "loss": 1.4216, "step": 684 }, { "epoch": 0.21548186976068476, "grad_norm": 0.796875, "learning_rate": 1.9332275468105365e-05, "loss": 1.4541, "step": 686 }, { "epoch": 0.2161100967862261, "grad_norm": 0.83203125, "learning_rate": 1.9329736591558236e-05, "loss": 1.3089, "step": 688 }, { "epoch": 0.21673832381176747, "grad_norm": 0.8828125, "learning_rate": 1.932719771501111e-05, "loss": 1.3822, "step": 690 }, { "epoch": 0.21736655083730883, "grad_norm": 0.78125, "learning_rate": 1.9324658838463982e-05, "loss": 1.2839, "step": 692 }, { "epoch": 0.21799477786285018, "grad_norm": 0.7421875, "learning_rate": 1.9322119961916854e-05, "loss": 1.2813, "step": 694 }, { "epoch": 0.21862300488839154, "grad_norm": 0.7265625, "learning_rate": 1.9319581085369725e-05, "loss": 1.3437, "step": 696 }, { "epoch": 0.2192512319139329, "grad_norm": 0.69140625, "learning_rate": 1.93170422088226e-05, "loss": 1.3649, "step": 698 }, { "epoch": 0.21987945893947425, "grad_norm": 0.70703125, "learning_rate": 1.9314503332275468e-05, "loss": 1.3949, "step": 700 }, { "epoch": 0.2205076859650156, "grad_norm": 0.9921875, "learning_rate": 1.9311964455728343e-05, "loss": 1.3488, "step": 702 }, { "epoch": 0.22113591299055696, "grad_norm": 0.8671875, "learning_rate": 1.9309425579181214e-05, "loss": 1.268, "step": 704 }, { "epoch": 0.22176414001609832, "grad_norm": 0.875, "learning_rate": 1.9306886702634085e-05, "loss": 1.3855, "step": 706 }, { "epoch": 0.22239236704163967, "grad_norm": 0.765625, "learning_rate": 1.9304347826086957e-05, "loss": 1.2877, "step": 708 }, { "epoch": 0.22302059406718103, "grad_norm": 0.7734375, "learning_rate": 1.930180894953983e-05, "loss": 1.3324, "step": 710 }, { "epoch": 0.22364882109272238, "grad_norm": 0.75390625, "learning_rate": 1.9299270072992703e-05, "loss": 1.3602, "step": 712 }, { "epoch": 0.22427704811826374, "grad_norm": 0.76171875, "learning_rate": 1.9296731196445574e-05, "loss": 1.3006, "step": 714 }, { "epoch": 0.2249052751438051, "grad_norm": 0.7578125, "learning_rate": 1.9294192319898446e-05, "loss": 1.3667, "step": 716 }, { "epoch": 0.22553350216934645, "grad_norm": 0.70703125, "learning_rate": 1.929165344335132e-05, "loss": 1.3536, "step": 718 }, { "epoch": 0.2261617291948878, "grad_norm": 0.73828125, "learning_rate": 1.928911456680419e-05, "loss": 1.4343, "step": 720 }, { "epoch": 0.22678995622042916, "grad_norm": 0.76171875, "learning_rate": 1.9286575690257063e-05, "loss": 1.355, "step": 722 }, { "epoch": 0.22741818324597052, "grad_norm": 0.88671875, "learning_rate": 1.9284036813709935e-05, "loss": 1.3999, "step": 724 }, { "epoch": 0.22804641027151187, "grad_norm": 0.9140625, "learning_rate": 1.9281497937162806e-05, "loss": 1.3638, "step": 726 }, { "epoch": 0.22867463729705323, "grad_norm": 0.7265625, "learning_rate": 1.9278959060615677e-05, "loss": 1.2724, "step": 728 }, { "epoch": 0.22930286432259459, "grad_norm": 1.0, "learning_rate": 1.9276420184068552e-05, "loss": 1.3783, "step": 730 }, { "epoch": 0.22993109134813594, "grad_norm": 0.7578125, "learning_rate": 1.9273881307521424e-05, "loss": 1.2429, "step": 732 }, { "epoch": 0.2305593183736773, "grad_norm": 0.76171875, "learning_rate": 1.9271342430974295e-05, "loss": 1.4618, "step": 734 }, { "epoch": 0.23118754539921865, "grad_norm": 0.70703125, "learning_rate": 1.9268803554427166e-05, "loss": 1.3145, "step": 736 }, { "epoch": 0.23181577242476, "grad_norm": 0.74609375, "learning_rate": 1.926626467788004e-05, "loss": 1.3562, "step": 738 }, { "epoch": 0.23244399945030136, "grad_norm": 0.7734375, "learning_rate": 1.9263725801332913e-05, "loss": 1.3047, "step": 740 }, { "epoch": 0.23307222647584272, "grad_norm": 0.765625, "learning_rate": 1.9261186924785784e-05, "loss": 1.4534, "step": 742 }, { "epoch": 0.23370045350138408, "grad_norm": 0.84375, "learning_rate": 1.9258648048238655e-05, "loss": 1.3435, "step": 744 }, { "epoch": 0.23432868052692543, "grad_norm": 0.75, "learning_rate": 1.9256109171691527e-05, "loss": 1.3951, "step": 746 }, { "epoch": 0.23495690755246676, "grad_norm": 0.68359375, "learning_rate": 1.9253570295144398e-05, "loss": 1.3799, "step": 748 }, { "epoch": 0.23558513457800812, "grad_norm": 0.8046875, "learning_rate": 1.9251031418597273e-05, "loss": 1.5794, "step": 750 }, { "epoch": 0.23621336160354947, "grad_norm": 0.73828125, "learning_rate": 1.9248492542050144e-05, "loss": 1.3543, "step": 752 }, { "epoch": 0.23684158862909083, "grad_norm": 0.71484375, "learning_rate": 1.9245953665503016e-05, "loss": 1.2956, "step": 754 }, { "epoch": 0.23746981565463218, "grad_norm": 0.8359375, "learning_rate": 1.9243414788955887e-05, "loss": 1.2537, "step": 756 }, { "epoch": 0.23809804268017354, "grad_norm": 0.83203125, "learning_rate": 1.9240875912408762e-05, "loss": 1.3696, "step": 758 }, { "epoch": 0.2387262697057149, "grad_norm": 0.8359375, "learning_rate": 1.9238337035861633e-05, "loss": 1.4097, "step": 760 }, { "epoch": 0.23935449673125625, "grad_norm": 1.015625, "learning_rate": 1.9235798159314505e-05, "loss": 1.307, "step": 762 }, { "epoch": 0.2399827237567976, "grad_norm": 0.91015625, "learning_rate": 1.923325928276738e-05, "loss": 1.2294, "step": 764 }, { "epoch": 0.24061095078233896, "grad_norm": 0.796875, "learning_rate": 1.923072040622025e-05, "loss": 1.3091, "step": 766 }, { "epoch": 0.24123917780788032, "grad_norm": 0.859375, "learning_rate": 1.9228181529673122e-05, "loss": 1.3432, "step": 768 }, { "epoch": 0.24186740483342167, "grad_norm": 0.80078125, "learning_rate": 1.9225642653125993e-05, "loss": 1.3201, "step": 770 }, { "epoch": 0.24249563185896303, "grad_norm": 0.80078125, "learning_rate": 1.9223103776578865e-05, "loss": 1.4521, "step": 772 }, { "epoch": 0.24312385888450438, "grad_norm": 0.95703125, "learning_rate": 1.9220564900031736e-05, "loss": 1.4254, "step": 774 }, { "epoch": 0.24375208591004574, "grad_norm": 0.8125, "learning_rate": 1.921802602348461e-05, "loss": 1.3347, "step": 776 }, { "epoch": 0.2443803129355871, "grad_norm": 0.7890625, "learning_rate": 1.9215487146937482e-05, "loss": 1.2956, "step": 778 }, { "epoch": 0.24500853996112845, "grad_norm": 0.7734375, "learning_rate": 1.9212948270390354e-05, "loss": 1.3128, "step": 780 }, { "epoch": 0.2456367669866698, "grad_norm": 0.84375, "learning_rate": 1.9210409393843225e-05, "loss": 1.3065, "step": 782 }, { "epoch": 0.24626499401221116, "grad_norm": 1.015625, "learning_rate": 1.92078705172961e-05, "loss": 1.2968, "step": 784 }, { "epoch": 0.24689322103775252, "grad_norm": 0.87890625, "learning_rate": 1.920533164074897e-05, "loss": 1.3041, "step": 786 }, { "epoch": 0.24752144806329388, "grad_norm": 0.8046875, "learning_rate": 1.9202792764201843e-05, "loss": 1.4266, "step": 788 }, { "epoch": 0.24814967508883523, "grad_norm": 0.68359375, "learning_rate": 1.9200253887654714e-05, "loss": 1.4958, "step": 790 }, { "epoch": 0.2487779021143766, "grad_norm": 0.9375, "learning_rate": 1.919771501110759e-05, "loss": 1.4217, "step": 792 }, { "epoch": 0.24940612913991794, "grad_norm": 0.70703125, "learning_rate": 1.9195176134560457e-05, "loss": 1.3905, "step": 794 }, { "epoch": 0.25003435616545927, "grad_norm": 0.8203125, "learning_rate": 1.919263725801333e-05, "loss": 1.3715, "step": 796 }, { "epoch": 0.25066258319100065, "grad_norm": 0.9296875, "learning_rate": 1.9190098381466203e-05, "loss": 1.4086, "step": 798 }, { "epoch": 0.251290810216542, "grad_norm": 0.84375, "learning_rate": 1.9187559504919074e-05, "loss": 1.4157, "step": 800 }, { "epoch": 0.25191903724208337, "grad_norm": 0.7109375, "learning_rate": 1.9185020628371946e-05, "loss": 1.2557, "step": 802 }, { "epoch": 0.2525472642676247, "grad_norm": 0.7734375, "learning_rate": 1.918248175182482e-05, "loss": 1.3713, "step": 804 }, { "epoch": 0.2531754912931661, "grad_norm": 0.7265625, "learning_rate": 1.9179942875277692e-05, "loss": 1.3549, "step": 806 }, { "epoch": 0.2538037183187074, "grad_norm": 0.7734375, "learning_rate": 1.9177403998730563e-05, "loss": 1.4184, "step": 808 }, { "epoch": 0.2544319453442488, "grad_norm": 0.828125, "learning_rate": 1.9174865122183435e-05, "loss": 1.3112, "step": 810 }, { "epoch": 0.2550601723697901, "grad_norm": 0.7421875, "learning_rate": 1.917232624563631e-05, "loss": 1.3818, "step": 812 }, { "epoch": 0.2556883993953315, "grad_norm": 0.796875, "learning_rate": 1.9169787369089177e-05, "loss": 1.4245, "step": 814 }, { "epoch": 0.25631662642087283, "grad_norm": 0.91015625, "learning_rate": 1.9167248492542052e-05, "loss": 1.3986, "step": 816 }, { "epoch": 0.2569448534464142, "grad_norm": 0.7421875, "learning_rate": 1.9164709615994924e-05, "loss": 1.3054, "step": 818 }, { "epoch": 0.25757308047195554, "grad_norm": 0.8046875, "learning_rate": 1.9162170739447795e-05, "loss": 1.3303, "step": 820 }, { "epoch": 0.2582013074974969, "grad_norm": 0.77734375, "learning_rate": 1.9159631862900666e-05, "loss": 1.3877, "step": 822 }, { "epoch": 0.25882953452303825, "grad_norm": 0.8359375, "learning_rate": 1.915709298635354e-05, "loss": 1.3464, "step": 824 }, { "epoch": 0.25945776154857964, "grad_norm": 0.74609375, "learning_rate": 1.9154554109806412e-05, "loss": 1.4358, "step": 826 }, { "epoch": 0.26008598857412096, "grad_norm": 0.73046875, "learning_rate": 1.9152015233259284e-05, "loss": 1.2982, "step": 828 }, { "epoch": 0.26071421559966235, "grad_norm": 0.796875, "learning_rate": 1.9149476356712155e-05, "loss": 1.398, "step": 830 }, { "epoch": 0.2613424426252037, "grad_norm": 0.69921875, "learning_rate": 1.914693748016503e-05, "loss": 1.2641, "step": 832 }, { "epoch": 0.26197066965074506, "grad_norm": 0.88671875, "learning_rate": 1.91443986036179e-05, "loss": 1.3669, "step": 834 }, { "epoch": 0.2625988966762864, "grad_norm": 0.796875, "learning_rate": 1.9141859727070773e-05, "loss": 1.3182, "step": 836 }, { "epoch": 0.26322712370182777, "grad_norm": 0.734375, "learning_rate": 1.9139320850523644e-05, "loss": 1.3939, "step": 838 }, { "epoch": 0.2638553507273691, "grad_norm": 0.66796875, "learning_rate": 1.9136781973976516e-05, "loss": 1.4948, "step": 840 }, { "epoch": 0.2644835777529105, "grad_norm": 0.88671875, "learning_rate": 1.9134243097429387e-05, "loss": 1.34, "step": 842 }, { "epoch": 0.2651118047784518, "grad_norm": 0.890625, "learning_rate": 1.913170422088226e-05, "loss": 1.3576, "step": 844 }, { "epoch": 0.2657400318039932, "grad_norm": 0.71875, "learning_rate": 1.9129165344335133e-05, "loss": 1.3366, "step": 846 }, { "epoch": 0.2663682588295345, "grad_norm": 0.8359375, "learning_rate": 1.9126626467788004e-05, "loss": 1.4665, "step": 848 }, { "epoch": 0.2669964858550759, "grad_norm": 0.69140625, "learning_rate": 1.912408759124088e-05, "loss": 1.4036, "step": 850 }, { "epoch": 0.26762471288061723, "grad_norm": 0.73046875, "learning_rate": 1.912154871469375e-05, "loss": 1.2714, "step": 852 }, { "epoch": 0.26825293990615856, "grad_norm": 0.71875, "learning_rate": 1.9119009838146622e-05, "loss": 1.3858, "step": 854 }, { "epoch": 0.26888116693169994, "grad_norm": 0.734375, "learning_rate": 1.9116470961599493e-05, "loss": 1.4882, "step": 856 }, { "epoch": 0.26950939395724127, "grad_norm": 0.77734375, "learning_rate": 1.9113932085052368e-05, "loss": 1.2592, "step": 858 }, { "epoch": 0.27013762098278266, "grad_norm": 0.75, "learning_rate": 1.911139320850524e-05, "loss": 1.4349, "step": 860 }, { "epoch": 0.270765848008324, "grad_norm": 0.921875, "learning_rate": 1.910885433195811e-05, "loss": 1.2003, "step": 862 }, { "epoch": 0.27139407503386537, "grad_norm": 0.703125, "learning_rate": 1.9106315455410982e-05, "loss": 1.4485, "step": 864 }, { "epoch": 0.2720223020594067, "grad_norm": 0.78125, "learning_rate": 1.9103776578863854e-05, "loss": 1.2389, "step": 866 }, { "epoch": 0.2726505290849481, "grad_norm": 0.75, "learning_rate": 1.9101237702316725e-05, "loss": 1.4348, "step": 868 }, { "epoch": 0.2732787561104894, "grad_norm": 0.78125, "learning_rate": 1.90986988257696e-05, "loss": 1.4559, "step": 870 }, { "epoch": 0.2739069831360308, "grad_norm": 0.796875, "learning_rate": 1.909615994922247e-05, "loss": 1.4004, "step": 872 }, { "epoch": 0.2745352101615721, "grad_norm": 0.8046875, "learning_rate": 1.9093621072675343e-05, "loss": 1.3105, "step": 874 }, { "epoch": 0.2751634371871135, "grad_norm": 0.78125, "learning_rate": 1.9091082196128214e-05, "loss": 1.2796, "step": 876 }, { "epoch": 0.27579166421265483, "grad_norm": 0.74609375, "learning_rate": 1.908854331958109e-05, "loss": 1.4628, "step": 878 }, { "epoch": 0.2764198912381962, "grad_norm": 0.71484375, "learning_rate": 1.908600444303396e-05, "loss": 1.3618, "step": 880 }, { "epoch": 0.27704811826373754, "grad_norm": 0.73828125, "learning_rate": 1.908346556648683e-05, "loss": 1.3635, "step": 882 }, { "epoch": 0.2776763452892789, "grad_norm": 0.69921875, "learning_rate": 1.9080926689939703e-05, "loss": 1.3921, "step": 884 }, { "epoch": 0.27830457231482025, "grad_norm": 0.70703125, "learning_rate": 1.9078387813392578e-05, "loss": 1.3431, "step": 886 }, { "epoch": 0.27893279934036164, "grad_norm": 0.796875, "learning_rate": 1.9075848936845446e-05, "loss": 1.3725, "step": 888 }, { "epoch": 0.27956102636590296, "grad_norm": 0.6640625, "learning_rate": 1.907331006029832e-05, "loss": 1.2754, "step": 890 }, { "epoch": 0.28018925339144435, "grad_norm": 0.99609375, "learning_rate": 1.9070771183751192e-05, "loss": 1.1762, "step": 892 }, { "epoch": 0.2808174804169857, "grad_norm": 0.80859375, "learning_rate": 1.9068232307204063e-05, "loss": 1.301, "step": 894 }, { "epoch": 0.28144570744252706, "grad_norm": 0.68359375, "learning_rate": 1.9065693430656935e-05, "loss": 1.2999, "step": 896 }, { "epoch": 0.2820739344680684, "grad_norm": 0.76171875, "learning_rate": 1.906315455410981e-05, "loss": 1.355, "step": 898 }, { "epoch": 0.28270216149360977, "grad_norm": 0.71875, "learning_rate": 1.906061567756268e-05, "loss": 1.4332, "step": 900 }, { "epoch": 0.2833303885191511, "grad_norm": 0.96875, "learning_rate": 1.9058076801015552e-05, "loss": 1.4116, "step": 902 }, { "epoch": 0.2839586155446925, "grad_norm": 0.8203125, "learning_rate": 1.9055537924468423e-05, "loss": 1.3064, "step": 904 }, { "epoch": 0.2845868425702338, "grad_norm": 0.81640625, "learning_rate": 1.9052999047921298e-05, "loss": 1.5111, "step": 906 }, { "epoch": 0.2852150695957752, "grad_norm": 0.8125, "learning_rate": 1.9050460171374166e-05, "loss": 1.2457, "step": 908 }, { "epoch": 0.2858432966213165, "grad_norm": 0.78125, "learning_rate": 1.904792129482704e-05, "loss": 1.346, "step": 910 }, { "epoch": 0.2864715236468579, "grad_norm": 0.75390625, "learning_rate": 1.9045382418279912e-05, "loss": 1.3722, "step": 912 }, { "epoch": 0.28709975067239923, "grad_norm": 0.8203125, "learning_rate": 1.9042843541732784e-05, "loss": 1.3245, "step": 914 }, { "epoch": 0.2877279776979406, "grad_norm": 0.87109375, "learning_rate": 1.9040304665185655e-05, "loss": 1.42, "step": 916 }, { "epoch": 0.28835620472348195, "grad_norm": 0.83984375, "learning_rate": 1.903776578863853e-05, "loss": 1.405, "step": 918 }, { "epoch": 0.28898443174902333, "grad_norm": 0.703125, "learning_rate": 1.90352269120914e-05, "loss": 1.3066, "step": 920 }, { "epoch": 0.28961265877456466, "grad_norm": 0.8046875, "learning_rate": 1.9032688035544273e-05, "loss": 1.3226, "step": 922 }, { "epoch": 0.29024088580010604, "grad_norm": 0.875, "learning_rate": 1.9030149158997144e-05, "loss": 1.1937, "step": 924 }, { "epoch": 0.29086911282564737, "grad_norm": 0.78125, "learning_rate": 1.902761028245002e-05, "loss": 1.3474, "step": 926 }, { "epoch": 0.2914973398511887, "grad_norm": 0.76171875, "learning_rate": 1.9025071405902887e-05, "loss": 1.3306, "step": 928 }, { "epoch": 0.2921255668767301, "grad_norm": 0.89453125, "learning_rate": 1.902253252935576e-05, "loss": 1.3498, "step": 930 }, { "epoch": 0.2927537939022714, "grad_norm": 0.92578125, "learning_rate": 1.9019993652808633e-05, "loss": 1.4435, "step": 932 }, { "epoch": 0.2933820209278128, "grad_norm": 0.75, "learning_rate": 1.9017454776261504e-05, "loss": 1.3682, "step": 934 }, { "epoch": 0.2940102479533541, "grad_norm": 0.8203125, "learning_rate": 1.901491589971438e-05, "loss": 1.3391, "step": 936 }, { "epoch": 0.2946384749788955, "grad_norm": 0.7109375, "learning_rate": 1.901237702316725e-05, "loss": 1.5098, "step": 938 }, { "epoch": 0.29526670200443683, "grad_norm": 0.7109375, "learning_rate": 1.9009838146620122e-05, "loss": 1.4432, "step": 940 }, { "epoch": 0.2958949290299782, "grad_norm": 0.703125, "learning_rate": 1.9007299270072993e-05, "loss": 1.3789, "step": 942 }, { "epoch": 0.29652315605551954, "grad_norm": 0.703125, "learning_rate": 1.9004760393525868e-05, "loss": 1.2943, "step": 944 }, { "epoch": 0.2971513830810609, "grad_norm": 0.8671875, "learning_rate": 1.900222151697874e-05, "loss": 1.3753, "step": 946 }, { "epoch": 0.29777961010660225, "grad_norm": 0.70703125, "learning_rate": 1.899968264043161e-05, "loss": 1.3704, "step": 948 }, { "epoch": 0.29840783713214364, "grad_norm": 0.78515625, "learning_rate": 1.8997143763884482e-05, "loss": 1.4176, "step": 950 }, { "epoch": 0.29903606415768497, "grad_norm": 0.7890625, "learning_rate": 1.8994604887337357e-05, "loss": 1.2448, "step": 952 }, { "epoch": 0.29966429118322635, "grad_norm": 0.7421875, "learning_rate": 1.8992066010790225e-05, "loss": 1.2357, "step": 954 }, { "epoch": 0.3002925182087677, "grad_norm": 0.72265625, "learning_rate": 1.89895271342431e-05, "loss": 1.4002, "step": 956 }, { "epoch": 0.30092074523430906, "grad_norm": 0.796875, "learning_rate": 1.898698825769597e-05, "loss": 1.3756, "step": 958 }, { "epoch": 0.3015489722598504, "grad_norm": 0.75390625, "learning_rate": 1.8984449381148842e-05, "loss": 1.2851, "step": 960 }, { "epoch": 0.3021771992853918, "grad_norm": 0.79296875, "learning_rate": 1.8981910504601714e-05, "loss": 1.3339, "step": 962 }, { "epoch": 0.3028054263109331, "grad_norm": 0.6953125, "learning_rate": 1.897937162805459e-05, "loss": 1.4284, "step": 964 }, { "epoch": 0.3034336533364745, "grad_norm": 0.83203125, "learning_rate": 1.897683275150746e-05, "loss": 1.3142, "step": 966 }, { "epoch": 0.3040618803620158, "grad_norm": 0.76953125, "learning_rate": 1.897429387496033e-05, "loss": 1.4217, "step": 968 }, { "epoch": 0.3046901073875572, "grad_norm": 0.7890625, "learning_rate": 1.8971754998413203e-05, "loss": 1.4308, "step": 970 }, { "epoch": 0.3053183344130985, "grad_norm": 0.75390625, "learning_rate": 1.8969216121866078e-05, "loss": 1.2463, "step": 972 }, { "epoch": 0.3059465614386399, "grad_norm": 0.72265625, "learning_rate": 1.896667724531895e-05, "loss": 1.3149, "step": 974 }, { "epoch": 0.30657478846418124, "grad_norm": 0.91796875, "learning_rate": 1.896413836877182e-05, "loss": 1.3623, "step": 976 }, { "epoch": 0.3072030154897226, "grad_norm": 0.69921875, "learning_rate": 1.8961599492224692e-05, "loss": 1.5585, "step": 978 }, { "epoch": 0.30783124251526395, "grad_norm": 0.71875, "learning_rate": 1.8959060615677563e-05, "loss": 1.2155, "step": 980 }, { "epoch": 0.30845946954080533, "grad_norm": 0.703125, "learning_rate": 1.8956521739130434e-05, "loss": 1.384, "step": 982 }, { "epoch": 0.30908769656634666, "grad_norm": 0.890625, "learning_rate": 1.895398286258331e-05, "loss": 1.2724, "step": 984 }, { "epoch": 0.30971592359188804, "grad_norm": 0.75, "learning_rate": 1.895144398603618e-05, "loss": 1.3504, "step": 986 }, { "epoch": 0.31034415061742937, "grad_norm": 0.7578125, "learning_rate": 1.8948905109489052e-05, "loss": 1.3144, "step": 988 }, { "epoch": 0.31097237764297075, "grad_norm": 0.71484375, "learning_rate": 1.8946366232941923e-05, "loss": 1.3399, "step": 990 }, { "epoch": 0.3116006046685121, "grad_norm": 0.796875, "learning_rate": 1.8943827356394798e-05, "loss": 1.3355, "step": 992 }, { "epoch": 0.31222883169405347, "grad_norm": 0.78125, "learning_rate": 1.894128847984767e-05, "loss": 1.2823, "step": 994 }, { "epoch": 0.3128570587195948, "grad_norm": 0.78125, "learning_rate": 1.893874960330054e-05, "loss": 1.4969, "step": 996 }, { "epoch": 0.3134852857451361, "grad_norm": 0.7890625, "learning_rate": 1.8936210726753412e-05, "loss": 1.3046, "step": 998 }, { "epoch": 0.3141135127706775, "grad_norm": 0.7109375, "learning_rate": 1.8933671850206287e-05, "loss": 1.4317, "step": 1000 }, { "epoch": 0.31474173979621883, "grad_norm": 0.71875, "learning_rate": 1.8931132973659155e-05, "loss": 1.3786, "step": 1002 }, { "epoch": 0.3153699668217602, "grad_norm": 0.73046875, "learning_rate": 1.892859409711203e-05, "loss": 1.3259, "step": 1004 }, { "epoch": 0.31599819384730155, "grad_norm": 0.72265625, "learning_rate": 1.89260552205649e-05, "loss": 1.3619, "step": 1006 }, { "epoch": 0.31662642087284293, "grad_norm": 0.7578125, "learning_rate": 1.8923516344017773e-05, "loss": 1.4299, "step": 1008 }, { "epoch": 0.31725464789838426, "grad_norm": 0.78515625, "learning_rate": 1.8920977467470644e-05, "loss": 1.389, "step": 1010 }, { "epoch": 0.31788287492392564, "grad_norm": 0.8046875, "learning_rate": 1.891843859092352e-05, "loss": 1.3459, "step": 1012 }, { "epoch": 0.31851110194946697, "grad_norm": 0.765625, "learning_rate": 1.891589971437639e-05, "loss": 1.4309, "step": 1014 }, { "epoch": 0.31913932897500835, "grad_norm": 0.76953125, "learning_rate": 1.891336083782926e-05, "loss": 1.3712, "step": 1016 }, { "epoch": 0.3197675560005497, "grad_norm": 0.80859375, "learning_rate": 1.8910821961282133e-05, "loss": 1.3044, "step": 1018 }, { "epoch": 0.32039578302609106, "grad_norm": 0.6796875, "learning_rate": 1.8908283084735008e-05, "loss": 1.3589, "step": 1020 }, { "epoch": 0.3210240100516324, "grad_norm": 0.69140625, "learning_rate": 1.890574420818788e-05, "loss": 1.2593, "step": 1022 }, { "epoch": 0.3216522370771738, "grad_norm": 0.87109375, "learning_rate": 1.890320533164075e-05, "loss": 1.3657, "step": 1024 }, { "epoch": 0.3222804641027151, "grad_norm": 0.6796875, "learning_rate": 1.8900666455093625e-05, "loss": 1.2129, "step": 1026 }, { "epoch": 0.3229086911282565, "grad_norm": 0.71875, "learning_rate": 1.8898127578546493e-05, "loss": 1.09, "step": 1028 }, { "epoch": 0.3235369181537978, "grad_norm": 0.8671875, "learning_rate": 1.8895588701999368e-05, "loss": 1.3569, "step": 1030 }, { "epoch": 0.3241651451793392, "grad_norm": 0.78515625, "learning_rate": 1.889304982545224e-05, "loss": 1.4419, "step": 1032 }, { "epoch": 0.3247933722048805, "grad_norm": 0.7578125, "learning_rate": 1.889051094890511e-05, "loss": 1.3802, "step": 1034 }, { "epoch": 0.3254215992304219, "grad_norm": 0.75390625, "learning_rate": 1.8887972072357982e-05, "loss": 1.312, "step": 1036 }, { "epoch": 0.32604982625596324, "grad_norm": 0.74609375, "learning_rate": 1.8885433195810857e-05, "loss": 1.4378, "step": 1038 }, { "epoch": 0.3266780532815046, "grad_norm": 0.83203125, "learning_rate": 1.8882894319263728e-05, "loss": 1.2541, "step": 1040 }, { "epoch": 0.32730628030704595, "grad_norm": 0.7421875, "learning_rate": 1.88803554427166e-05, "loss": 1.3656, "step": 1042 }, { "epoch": 0.32793450733258733, "grad_norm": 0.7578125, "learning_rate": 1.887781656616947e-05, "loss": 1.4039, "step": 1044 }, { "epoch": 0.32856273435812866, "grad_norm": 0.72265625, "learning_rate": 1.8875277689622346e-05, "loss": 1.3563, "step": 1046 }, { "epoch": 0.32919096138367004, "grad_norm": 0.88671875, "learning_rate": 1.8872738813075214e-05, "loss": 1.285, "step": 1048 }, { "epoch": 0.3298191884092114, "grad_norm": 0.84375, "learning_rate": 1.887019993652809e-05, "loss": 1.2465, "step": 1050 }, { "epoch": 0.33044741543475276, "grad_norm": 0.92578125, "learning_rate": 1.886766105998096e-05, "loss": 1.2184, "step": 1052 }, { "epoch": 0.3310756424602941, "grad_norm": 0.69921875, "learning_rate": 1.886512218343383e-05, "loss": 1.3098, "step": 1054 }, { "epoch": 0.33170386948583547, "grad_norm": 0.76171875, "learning_rate": 1.8862583306886703e-05, "loss": 1.318, "step": 1056 }, { "epoch": 0.3323320965113768, "grad_norm": 0.91015625, "learning_rate": 1.8860044430339577e-05, "loss": 1.2984, "step": 1058 }, { "epoch": 0.3329603235369182, "grad_norm": 0.78515625, "learning_rate": 1.885750555379245e-05, "loss": 1.4075, "step": 1060 }, { "epoch": 0.3335885505624595, "grad_norm": 0.94140625, "learning_rate": 1.885496667724532e-05, "loss": 1.354, "step": 1062 }, { "epoch": 0.3342167775880009, "grad_norm": 0.74609375, "learning_rate": 1.885242780069819e-05, "loss": 1.2434, "step": 1064 }, { "epoch": 0.3348450046135422, "grad_norm": 0.8359375, "learning_rate": 1.8849888924151066e-05, "loss": 1.4308, "step": 1066 }, { "epoch": 0.33547323163908355, "grad_norm": 0.8984375, "learning_rate": 1.8847350047603938e-05, "loss": 1.2561, "step": 1068 }, { "epoch": 0.33610145866462493, "grad_norm": 0.875, "learning_rate": 1.884481117105681e-05, "loss": 1.4753, "step": 1070 }, { "epoch": 0.33672968569016626, "grad_norm": 0.69921875, "learning_rate": 1.884227229450968e-05, "loss": 1.369, "step": 1072 }, { "epoch": 0.33735791271570764, "grad_norm": 0.76171875, "learning_rate": 1.8839733417962552e-05, "loss": 1.4776, "step": 1074 }, { "epoch": 0.33798613974124897, "grad_norm": 0.73046875, "learning_rate": 1.8837194541415423e-05, "loss": 1.3619, "step": 1076 }, { "epoch": 0.33861436676679035, "grad_norm": 0.77734375, "learning_rate": 1.8834655664868298e-05, "loss": 1.2684, "step": 1078 }, { "epoch": 0.3392425937923317, "grad_norm": 0.7421875, "learning_rate": 1.883211678832117e-05, "loss": 1.4172, "step": 1080 }, { "epoch": 0.33987082081787306, "grad_norm": 0.890625, "learning_rate": 1.882957791177404e-05, "loss": 1.501, "step": 1082 }, { "epoch": 0.3404990478434144, "grad_norm": 0.82421875, "learning_rate": 1.8827039035226912e-05, "loss": 1.4823, "step": 1084 }, { "epoch": 0.3411272748689558, "grad_norm": 0.8828125, "learning_rate": 1.8824500158679787e-05, "loss": 1.2784, "step": 1086 }, { "epoch": 0.3417555018944971, "grad_norm": 0.76171875, "learning_rate": 1.882196128213266e-05, "loss": 1.359, "step": 1088 }, { "epoch": 0.3423837289200385, "grad_norm": 0.79296875, "learning_rate": 1.881942240558553e-05, "loss": 1.2725, "step": 1090 }, { "epoch": 0.3430119559455798, "grad_norm": 0.76171875, "learning_rate": 1.88168835290384e-05, "loss": 1.2185, "step": 1092 }, { "epoch": 0.3436401829711212, "grad_norm": 0.703125, "learning_rate": 1.8814344652491276e-05, "loss": 1.3709, "step": 1094 }, { "epoch": 0.3442684099966625, "grad_norm": 0.79296875, "learning_rate": 1.8811805775944144e-05, "loss": 1.4139, "step": 1096 }, { "epoch": 0.3448966370222039, "grad_norm": 0.69921875, "learning_rate": 1.880926689939702e-05, "loss": 1.5253, "step": 1098 }, { "epoch": 0.34552486404774524, "grad_norm": 0.72265625, "learning_rate": 1.880672802284989e-05, "loss": 1.2929, "step": 1100 }, { "epoch": 0.3461530910732866, "grad_norm": 0.90625, "learning_rate": 1.880418914630276e-05, "loss": 1.3314, "step": 1102 }, { "epoch": 0.34678131809882795, "grad_norm": 0.70703125, "learning_rate": 1.8801650269755633e-05, "loss": 1.1409, "step": 1104 }, { "epoch": 0.34740954512436933, "grad_norm": 0.765625, "learning_rate": 1.8799111393208508e-05, "loss": 1.4453, "step": 1106 }, { "epoch": 0.34803777214991066, "grad_norm": 0.671875, "learning_rate": 1.879657251666138e-05, "loss": 1.3495, "step": 1108 }, { "epoch": 0.34866599917545205, "grad_norm": 0.77734375, "learning_rate": 1.879403364011425e-05, "loss": 1.3406, "step": 1110 }, { "epoch": 0.3492942262009934, "grad_norm": 0.85546875, "learning_rate": 1.8791494763567125e-05, "loss": 1.2358, "step": 1112 }, { "epoch": 0.34992245322653476, "grad_norm": 0.83984375, "learning_rate": 1.8788955887019997e-05, "loss": 1.3972, "step": 1114 }, { "epoch": 0.3505506802520761, "grad_norm": 0.72265625, "learning_rate": 1.8786417010472868e-05, "loss": 1.3597, "step": 1116 }, { "epoch": 0.35117890727761747, "grad_norm": 0.66015625, "learning_rate": 1.878387813392574e-05, "loss": 1.3003, "step": 1118 }, { "epoch": 0.3518071343031588, "grad_norm": 0.86328125, "learning_rate": 1.8781339257378614e-05, "loss": 1.2663, "step": 1120 }, { "epoch": 0.3524353613287002, "grad_norm": 0.73828125, "learning_rate": 1.8778800380831482e-05, "loss": 1.4089, "step": 1122 }, { "epoch": 0.3530635883542415, "grad_norm": 0.828125, "learning_rate": 1.8776261504284357e-05, "loss": 1.3793, "step": 1124 }, { "epoch": 0.3536918153797829, "grad_norm": 0.796875, "learning_rate": 1.8773722627737228e-05, "loss": 1.4041, "step": 1126 }, { "epoch": 0.3543200424053242, "grad_norm": 0.8046875, "learning_rate": 1.87711837511901e-05, "loss": 1.252, "step": 1128 }, { "epoch": 0.3549482694308656, "grad_norm": 0.76953125, "learning_rate": 1.876864487464297e-05, "loss": 1.3771, "step": 1130 }, { "epoch": 0.35557649645640693, "grad_norm": 0.86328125, "learning_rate": 1.8766105998095846e-05, "loss": 1.2952, "step": 1132 }, { "epoch": 0.3562047234819483, "grad_norm": 0.7734375, "learning_rate": 1.8763567121548717e-05, "loss": 1.2377, "step": 1134 }, { "epoch": 0.35683295050748964, "grad_norm": 0.78125, "learning_rate": 1.876102824500159e-05, "loss": 1.429, "step": 1136 }, { "epoch": 0.35746117753303097, "grad_norm": 0.7734375, "learning_rate": 1.875848936845446e-05, "loss": 1.3617, "step": 1138 }, { "epoch": 0.35808940455857236, "grad_norm": 0.7109375, "learning_rate": 1.8755950491907335e-05, "loss": 1.4136, "step": 1140 }, { "epoch": 0.3587176315841137, "grad_norm": 0.80859375, "learning_rate": 1.8753411615360203e-05, "loss": 1.2859, "step": 1142 }, { "epoch": 0.35934585860965507, "grad_norm": 0.6796875, "learning_rate": 1.8750872738813077e-05, "loss": 1.2145, "step": 1144 }, { "epoch": 0.3599740856351964, "grad_norm": 0.70703125, "learning_rate": 1.874833386226595e-05, "loss": 1.294, "step": 1146 }, { "epoch": 0.3606023126607378, "grad_norm": 0.8203125, "learning_rate": 1.874579498571882e-05, "loss": 1.1749, "step": 1148 }, { "epoch": 0.3612305396862791, "grad_norm": 0.75, "learning_rate": 1.874325610917169e-05, "loss": 1.2759, "step": 1150 }, { "epoch": 0.3618587667118205, "grad_norm": 0.76953125, "learning_rate": 1.8740717232624566e-05, "loss": 1.2798, "step": 1152 }, { "epoch": 0.3624869937373618, "grad_norm": 0.83203125, "learning_rate": 1.8738178356077438e-05, "loss": 1.3493, "step": 1154 }, { "epoch": 0.3631152207629032, "grad_norm": 0.76953125, "learning_rate": 1.873563947953031e-05, "loss": 1.4311, "step": 1156 }, { "epoch": 0.36374344778844453, "grad_norm": 0.765625, "learning_rate": 1.873310060298318e-05, "loss": 1.2613, "step": 1158 }, { "epoch": 0.3643716748139859, "grad_norm": 1.0078125, "learning_rate": 1.8730561726436055e-05, "loss": 1.3474, "step": 1160 }, { "epoch": 0.36499990183952724, "grad_norm": 0.7109375, "learning_rate": 1.8728022849888923e-05, "loss": 1.4257, "step": 1162 }, { "epoch": 0.3656281288650686, "grad_norm": 0.78125, "learning_rate": 1.8725483973341798e-05, "loss": 1.3411, "step": 1164 }, { "epoch": 0.36625635589060995, "grad_norm": 0.734375, "learning_rate": 1.872294509679467e-05, "loss": 1.3309, "step": 1166 }, { "epoch": 0.36688458291615134, "grad_norm": 0.8984375, "learning_rate": 1.872040622024754e-05, "loss": 1.4467, "step": 1168 }, { "epoch": 0.36751280994169266, "grad_norm": 0.8515625, "learning_rate": 1.8717867343700412e-05, "loss": 1.2754, "step": 1170 }, { "epoch": 0.36814103696723405, "grad_norm": 0.7890625, "learning_rate": 1.8715328467153287e-05, "loss": 1.4556, "step": 1172 }, { "epoch": 0.3687692639927754, "grad_norm": 0.84375, "learning_rate": 1.871278959060616e-05, "loss": 1.3598, "step": 1174 }, { "epoch": 0.36939749101831676, "grad_norm": 0.6875, "learning_rate": 1.871025071405903e-05, "loss": 1.2428, "step": 1176 }, { "epoch": 0.3700257180438581, "grad_norm": 0.8046875, "learning_rate": 1.87077118375119e-05, "loss": 1.3761, "step": 1178 }, { "epoch": 0.37065394506939947, "grad_norm": 0.78515625, "learning_rate": 1.8705172960964776e-05, "loss": 1.3929, "step": 1180 }, { "epoch": 0.3712821720949408, "grad_norm": 0.8671875, "learning_rate": 1.8702634084417647e-05, "loss": 1.2633, "step": 1182 }, { "epoch": 0.3719103991204822, "grad_norm": 0.828125, "learning_rate": 1.870009520787052e-05, "loss": 1.4286, "step": 1184 }, { "epoch": 0.3725386261460235, "grad_norm": 0.7734375, "learning_rate": 1.869755633132339e-05, "loss": 1.2967, "step": 1186 }, { "epoch": 0.3731668531715649, "grad_norm": 1.015625, "learning_rate": 1.869501745477626e-05, "loss": 1.3566, "step": 1188 }, { "epoch": 0.3737950801971062, "grad_norm": 0.71875, "learning_rate": 1.8692478578229133e-05, "loss": 1.3837, "step": 1190 }, { "epoch": 0.3744233072226476, "grad_norm": 0.9296875, "learning_rate": 1.8689939701682008e-05, "loss": 1.369, "step": 1192 }, { "epoch": 0.37505153424818893, "grad_norm": 0.71484375, "learning_rate": 1.868740082513488e-05, "loss": 1.4206, "step": 1194 }, { "epoch": 0.3756797612737303, "grad_norm": 0.75390625, "learning_rate": 1.868486194858775e-05, "loss": 1.3345, "step": 1196 }, { "epoch": 0.37630798829927165, "grad_norm": 0.84765625, "learning_rate": 1.8682323072040625e-05, "loss": 1.3843, "step": 1198 }, { "epoch": 0.37693621532481303, "grad_norm": 0.71484375, "learning_rate": 1.8679784195493496e-05, "loss": 1.4273, "step": 1200 }, { "epoch": 0.37756444235035436, "grad_norm": 0.7734375, "learning_rate": 1.8677245318946368e-05, "loss": 1.3729, "step": 1202 }, { "epoch": 0.37819266937589574, "grad_norm": 1.15625, "learning_rate": 1.867470644239924e-05, "loss": 1.1632, "step": 1204 }, { "epoch": 0.37882089640143707, "grad_norm": 0.6796875, "learning_rate": 1.8672167565852114e-05, "loss": 1.3493, "step": 1206 }, { "epoch": 0.37944912342697845, "grad_norm": 0.7578125, "learning_rate": 1.8669628689304985e-05, "loss": 1.3056, "step": 1208 }, { "epoch": 0.3800773504525198, "grad_norm": 0.7265625, "learning_rate": 1.8667089812757857e-05, "loss": 1.414, "step": 1210 }, { "epoch": 0.3807055774780611, "grad_norm": 0.8359375, "learning_rate": 1.8664550936210728e-05, "loss": 1.33, "step": 1212 }, { "epoch": 0.3813338045036025, "grad_norm": 0.80859375, "learning_rate": 1.86620120596636e-05, "loss": 1.378, "step": 1214 }, { "epoch": 0.3819620315291438, "grad_norm": 0.95703125, "learning_rate": 1.865947318311647e-05, "loss": 1.2628, "step": 1216 }, { "epoch": 0.3825902585546852, "grad_norm": 0.73046875, "learning_rate": 1.8656934306569346e-05, "loss": 1.2875, "step": 1218 }, { "epoch": 0.38321848558022653, "grad_norm": 0.78515625, "learning_rate": 1.8654395430022217e-05, "loss": 1.3463, "step": 1220 }, { "epoch": 0.3838467126057679, "grad_norm": 0.80078125, "learning_rate": 1.865185655347509e-05, "loss": 1.3272, "step": 1222 }, { "epoch": 0.38447493963130924, "grad_norm": 0.71484375, "learning_rate": 1.864931767692796e-05, "loss": 1.3908, "step": 1224 }, { "epoch": 0.3851031666568506, "grad_norm": 0.6796875, "learning_rate": 1.8646778800380835e-05, "loss": 1.3235, "step": 1226 }, { "epoch": 0.38573139368239195, "grad_norm": 0.74609375, "learning_rate": 1.8644239923833706e-05, "loss": 1.2354, "step": 1228 }, { "epoch": 0.38635962070793334, "grad_norm": 0.88671875, "learning_rate": 1.8641701047286577e-05, "loss": 1.2592, "step": 1230 }, { "epoch": 0.38698784773347467, "grad_norm": 0.7265625, "learning_rate": 1.863916217073945e-05, "loss": 1.3272, "step": 1232 }, { "epoch": 0.38761607475901605, "grad_norm": 0.77734375, "learning_rate": 1.8636623294192323e-05, "loss": 1.2147, "step": 1234 }, { "epoch": 0.3882443017845574, "grad_norm": 0.7734375, "learning_rate": 1.863408441764519e-05, "loss": 1.3168, "step": 1236 }, { "epoch": 0.38887252881009876, "grad_norm": 0.73828125, "learning_rate": 1.8631545541098066e-05, "loss": 1.2581, "step": 1238 }, { "epoch": 0.3895007558356401, "grad_norm": 0.84375, "learning_rate": 1.8629006664550938e-05, "loss": 1.404, "step": 1240 }, { "epoch": 0.3901289828611815, "grad_norm": 0.79296875, "learning_rate": 1.862646778800381e-05, "loss": 1.3546, "step": 1242 }, { "epoch": 0.3907572098867228, "grad_norm": 0.74609375, "learning_rate": 1.862392891145668e-05, "loss": 1.2896, "step": 1244 }, { "epoch": 0.3913854369122642, "grad_norm": 0.74609375, "learning_rate": 1.8621390034909555e-05, "loss": 1.3196, "step": 1246 }, { "epoch": 0.3920136639378055, "grad_norm": 0.72265625, "learning_rate": 1.8618851158362427e-05, "loss": 1.3084, "step": 1248 }, { "epoch": 0.3926418909633469, "grad_norm": 0.75390625, "learning_rate": 1.8616312281815298e-05, "loss": 1.2459, "step": 1250 }, { "epoch": 0.3932701179888882, "grad_norm": 0.73828125, "learning_rate": 1.861377340526817e-05, "loss": 1.3642, "step": 1252 }, { "epoch": 0.3938983450144296, "grad_norm": 0.9140625, "learning_rate": 1.8611234528721044e-05, "loss": 1.2232, "step": 1254 }, { "epoch": 0.39452657203997094, "grad_norm": 0.6875, "learning_rate": 1.8608695652173912e-05, "loss": 1.2384, "step": 1256 }, { "epoch": 0.3951547990655123, "grad_norm": 0.6640625, "learning_rate": 1.8606156775626787e-05, "loss": 1.3031, "step": 1258 }, { "epoch": 0.39578302609105365, "grad_norm": 0.67578125, "learning_rate": 1.8603617899079658e-05, "loss": 1.3142, "step": 1260 }, { "epoch": 0.39641125311659503, "grad_norm": 0.875, "learning_rate": 1.860107902253253e-05, "loss": 1.2851, "step": 1262 }, { "epoch": 0.39703948014213636, "grad_norm": 0.73828125, "learning_rate": 1.85985401459854e-05, "loss": 1.3063, "step": 1264 }, { "epoch": 0.39766770716767774, "grad_norm": 0.7578125, "learning_rate": 1.8596001269438276e-05, "loss": 1.4062, "step": 1266 }, { "epoch": 0.39829593419321907, "grad_norm": 0.78125, "learning_rate": 1.8593462392891147e-05, "loss": 1.2698, "step": 1268 }, { "epoch": 0.39892416121876045, "grad_norm": 0.6796875, "learning_rate": 1.859092351634402e-05, "loss": 1.3242, "step": 1270 }, { "epoch": 0.3995523882443018, "grad_norm": 0.70703125, "learning_rate": 1.858838463979689e-05, "loss": 1.3655, "step": 1272 }, { "epoch": 0.40018061526984317, "grad_norm": 0.75, "learning_rate": 1.8585845763249765e-05, "loss": 1.259, "step": 1274 }, { "epoch": 0.4008088422953845, "grad_norm": 0.8984375, "learning_rate": 1.8583306886702636e-05, "loss": 1.2373, "step": 1276 }, { "epoch": 0.4014370693209259, "grad_norm": 0.75390625, "learning_rate": 1.8580768010155507e-05, "loss": 1.3231, "step": 1278 }, { "epoch": 0.4020652963464672, "grad_norm": 0.7421875, "learning_rate": 1.8578229133608382e-05, "loss": 1.3715, "step": 1280 }, { "epoch": 0.40269352337200853, "grad_norm": 0.91015625, "learning_rate": 1.857569025706125e-05, "loss": 1.4227, "step": 1282 }, { "epoch": 0.4033217503975499, "grad_norm": 0.72265625, "learning_rate": 1.8573151380514125e-05, "loss": 1.4352, "step": 1284 }, { "epoch": 0.40394997742309124, "grad_norm": 0.8359375, "learning_rate": 1.8570612503966996e-05, "loss": 1.3358, "step": 1286 }, { "epoch": 0.40457820444863263, "grad_norm": 0.7734375, "learning_rate": 1.8568073627419868e-05, "loss": 1.3508, "step": 1288 }, { "epoch": 0.40520643147417396, "grad_norm": 0.94921875, "learning_rate": 1.856553475087274e-05, "loss": 1.4527, "step": 1290 }, { "epoch": 0.40583465849971534, "grad_norm": 0.68359375, "learning_rate": 1.8562995874325614e-05, "loss": 1.4456, "step": 1292 }, { "epoch": 0.40646288552525667, "grad_norm": 0.90625, "learning_rate": 1.8560456997778485e-05, "loss": 1.3093, "step": 1294 }, { "epoch": 0.40709111255079805, "grad_norm": 0.74609375, "learning_rate": 1.8557918121231357e-05, "loss": 1.4534, "step": 1296 }, { "epoch": 0.4077193395763394, "grad_norm": 0.9609375, "learning_rate": 1.8555379244684228e-05, "loss": 1.2337, "step": 1298 }, { "epoch": 0.40834756660188076, "grad_norm": 0.71875, "learning_rate": 1.8552840368137103e-05, "loss": 1.212, "step": 1300 }, { "epoch": 0.4089757936274221, "grad_norm": 0.70703125, "learning_rate": 1.8550301491589974e-05, "loss": 1.3673, "step": 1302 }, { "epoch": 0.4096040206529635, "grad_norm": 0.6875, "learning_rate": 1.8547762615042846e-05, "loss": 1.3345, "step": 1304 }, { "epoch": 0.4102322476785048, "grad_norm": 0.70703125, "learning_rate": 1.8545223738495717e-05, "loss": 1.3542, "step": 1306 }, { "epoch": 0.4108604747040462, "grad_norm": 0.828125, "learning_rate": 1.854268486194859e-05, "loss": 1.4953, "step": 1308 }, { "epoch": 0.4114887017295875, "grad_norm": 0.7421875, "learning_rate": 1.854014598540146e-05, "loss": 1.4254, "step": 1310 }, { "epoch": 0.4121169287551289, "grad_norm": 0.71875, "learning_rate": 1.8537607108854335e-05, "loss": 1.3089, "step": 1312 }, { "epoch": 0.4127451557806702, "grad_norm": 0.73046875, "learning_rate": 1.8535068232307206e-05, "loss": 1.3985, "step": 1314 }, { "epoch": 0.4133733828062116, "grad_norm": 0.828125, "learning_rate": 1.8532529355760077e-05, "loss": 1.45, "step": 1316 }, { "epoch": 0.41400160983175294, "grad_norm": 0.71875, "learning_rate": 1.852999047921295e-05, "loss": 1.472, "step": 1318 }, { "epoch": 0.4146298368572943, "grad_norm": 0.69140625, "learning_rate": 1.8527451602665823e-05, "loss": 1.4135, "step": 1320 }, { "epoch": 0.41525806388283565, "grad_norm": 0.76171875, "learning_rate": 1.8524912726118695e-05, "loss": 1.2985, "step": 1322 }, { "epoch": 0.41588629090837703, "grad_norm": 0.84375, "learning_rate": 1.8522373849571566e-05, "loss": 1.292, "step": 1324 }, { "epoch": 0.41651451793391836, "grad_norm": 0.73046875, "learning_rate": 1.8519834973024438e-05, "loss": 1.3459, "step": 1326 }, { "epoch": 0.41714274495945974, "grad_norm": 0.72265625, "learning_rate": 1.8517296096477312e-05, "loss": 1.3259, "step": 1328 }, { "epoch": 0.41777097198500107, "grad_norm": 0.70703125, "learning_rate": 1.851475721993018e-05, "loss": 1.3027, "step": 1330 }, { "epoch": 0.41839919901054246, "grad_norm": 0.671875, "learning_rate": 1.8512218343383055e-05, "loss": 1.3385, "step": 1332 }, { "epoch": 0.4190274260360838, "grad_norm": 0.7109375, "learning_rate": 1.8509679466835926e-05, "loss": 1.3775, "step": 1334 }, { "epoch": 0.41965565306162517, "grad_norm": 0.79296875, "learning_rate": 1.8507140590288798e-05, "loss": 1.1561, "step": 1336 }, { "epoch": 0.4202838800871665, "grad_norm": 0.8125, "learning_rate": 1.850460171374167e-05, "loss": 1.2644, "step": 1338 }, { "epoch": 0.4209121071127079, "grad_norm": 0.72265625, "learning_rate": 1.8502062837194544e-05, "loss": 1.3686, "step": 1340 }, { "epoch": 0.4215403341382492, "grad_norm": 0.79296875, "learning_rate": 1.8499523960647415e-05, "loss": 1.4161, "step": 1342 }, { "epoch": 0.4221685611637906, "grad_norm": 0.796875, "learning_rate": 1.8496985084100287e-05, "loss": 1.3431, "step": 1344 }, { "epoch": 0.4227967881893319, "grad_norm": 0.80859375, "learning_rate": 1.8494446207553158e-05, "loss": 1.3203, "step": 1346 }, { "epoch": 0.4234250152148733, "grad_norm": 0.8359375, "learning_rate": 1.8491907331006033e-05, "loss": 1.3866, "step": 1348 }, { "epoch": 0.42405324224041463, "grad_norm": 0.73828125, "learning_rate": 1.84893684544589e-05, "loss": 1.307, "step": 1350 }, { "epoch": 0.42468146926595596, "grad_norm": 0.75390625, "learning_rate": 1.8486829577911776e-05, "loss": 1.3054, "step": 1352 }, { "epoch": 0.42530969629149734, "grad_norm": 0.73828125, "learning_rate": 1.8484290701364647e-05, "loss": 1.263, "step": 1354 }, { "epoch": 0.42593792331703867, "grad_norm": 0.7421875, "learning_rate": 1.848175182481752e-05, "loss": 1.2961, "step": 1356 }, { "epoch": 0.42656615034258005, "grad_norm": 0.70703125, "learning_rate": 1.847921294827039e-05, "loss": 1.386, "step": 1358 }, { "epoch": 0.4271943773681214, "grad_norm": 0.79296875, "learning_rate": 1.8476674071723265e-05, "loss": 1.2587, "step": 1360 }, { "epoch": 0.42782260439366276, "grad_norm": 0.80078125, "learning_rate": 1.8474135195176136e-05, "loss": 1.3613, "step": 1362 }, { "epoch": 0.4284508314192041, "grad_norm": 0.734375, "learning_rate": 1.8471596318629007e-05, "loss": 1.4578, "step": 1364 }, { "epoch": 0.4290790584447455, "grad_norm": 0.75, "learning_rate": 1.8469057442081882e-05, "loss": 1.4915, "step": 1366 }, { "epoch": 0.4297072854702868, "grad_norm": 0.984375, "learning_rate": 1.8466518565534754e-05, "loss": 1.2513, "step": 1368 }, { "epoch": 0.4303355124958282, "grad_norm": 0.78125, "learning_rate": 1.8463979688987625e-05, "loss": 1.3317, "step": 1370 }, { "epoch": 0.4309637395213695, "grad_norm": 0.76171875, "learning_rate": 1.8461440812440496e-05, "loss": 1.3281, "step": 1372 }, { "epoch": 0.4315919665469109, "grad_norm": 0.89453125, "learning_rate": 1.845890193589337e-05, "loss": 1.2836, "step": 1374 }, { "epoch": 0.4322201935724522, "grad_norm": 0.96875, "learning_rate": 1.845636305934624e-05, "loss": 1.3258, "step": 1376 }, { "epoch": 0.4328484205979936, "grad_norm": 0.703125, "learning_rate": 1.8453824182799114e-05, "loss": 1.3192, "step": 1378 }, { "epoch": 0.43347664762353494, "grad_norm": 0.7890625, "learning_rate": 1.8451285306251985e-05, "loss": 1.2383, "step": 1380 }, { "epoch": 0.4341048746490763, "grad_norm": 0.6953125, "learning_rate": 1.8448746429704857e-05, "loss": 1.4198, "step": 1382 }, { "epoch": 0.43473310167461765, "grad_norm": 0.84375, "learning_rate": 1.8446207553157728e-05, "loss": 1.3262, "step": 1384 }, { "epoch": 0.43536132870015903, "grad_norm": 0.90234375, "learning_rate": 1.8443668676610603e-05, "loss": 1.3783, "step": 1386 }, { "epoch": 0.43598955572570036, "grad_norm": 0.8046875, "learning_rate": 1.8441129800063474e-05, "loss": 1.3803, "step": 1388 }, { "epoch": 0.43661778275124175, "grad_norm": 0.8359375, "learning_rate": 1.8438590923516346e-05, "loss": 1.2537, "step": 1390 }, { "epoch": 0.4372460097767831, "grad_norm": 0.74609375, "learning_rate": 1.8436052046969217e-05, "loss": 1.4251, "step": 1392 }, { "epoch": 0.43787423680232446, "grad_norm": 0.80078125, "learning_rate": 1.843351317042209e-05, "loss": 1.3708, "step": 1394 }, { "epoch": 0.4385024638278658, "grad_norm": 0.81640625, "learning_rate": 1.8430974293874963e-05, "loss": 1.3983, "step": 1396 }, { "epoch": 0.43913069085340717, "grad_norm": 0.703125, "learning_rate": 1.8428435417327834e-05, "loss": 1.3208, "step": 1398 }, { "epoch": 0.4397589178789485, "grad_norm": 0.6484375, "learning_rate": 1.8425896540780706e-05, "loss": 1.2447, "step": 1400 }, { "epoch": 0.4403871449044899, "grad_norm": 0.7265625, "learning_rate": 1.8423357664233577e-05, "loss": 1.4995, "step": 1402 }, { "epoch": 0.4410153719300312, "grad_norm": 0.69140625, "learning_rate": 1.842081878768645e-05, "loss": 1.2333, "step": 1404 }, { "epoch": 0.4416435989555726, "grad_norm": 0.72265625, "learning_rate": 1.8418279911139323e-05, "loss": 1.438, "step": 1406 }, { "epoch": 0.4422718259811139, "grad_norm": 0.6796875, "learning_rate": 1.8415741034592195e-05, "loss": 1.3648, "step": 1408 }, { "epoch": 0.4429000530066553, "grad_norm": 0.87890625, "learning_rate": 1.8413202158045066e-05, "loss": 1.3982, "step": 1410 }, { "epoch": 0.44352828003219663, "grad_norm": 0.7734375, "learning_rate": 1.8410663281497937e-05, "loss": 1.2714, "step": 1412 }, { "epoch": 0.444156507057738, "grad_norm": 0.66015625, "learning_rate": 1.8408124404950812e-05, "loss": 1.3464, "step": 1414 }, { "epoch": 0.44478473408327934, "grad_norm": 0.671875, "learning_rate": 1.8405585528403684e-05, "loss": 1.3379, "step": 1416 }, { "epoch": 0.4454129611088207, "grad_norm": 0.73046875, "learning_rate": 1.8403046651856555e-05, "loss": 1.3022, "step": 1418 }, { "epoch": 0.44604118813436205, "grad_norm": 0.765625, "learning_rate": 1.8400507775309426e-05, "loss": 1.3677, "step": 1420 }, { "epoch": 0.4466694151599034, "grad_norm": 0.6796875, "learning_rate": 1.83979688987623e-05, "loss": 1.3101, "step": 1422 }, { "epoch": 0.44729764218544477, "grad_norm": 0.94140625, "learning_rate": 1.839543002221517e-05, "loss": 1.2118, "step": 1424 }, { "epoch": 0.4479258692109861, "grad_norm": 2.84375, "learning_rate": 1.8392891145668044e-05, "loss": 1.2927, "step": 1426 }, { "epoch": 0.4485540962365275, "grad_norm": 0.88671875, "learning_rate": 1.8390352269120915e-05, "loss": 1.4683, "step": 1428 }, { "epoch": 0.4491823232620688, "grad_norm": 0.75, "learning_rate": 1.8387813392573787e-05, "loss": 1.2949, "step": 1430 }, { "epoch": 0.4498105502876102, "grad_norm": 0.75, "learning_rate": 1.8385274516026658e-05, "loss": 1.3789, "step": 1432 }, { "epoch": 0.4504387773131515, "grad_norm": 0.7265625, "learning_rate": 1.8382735639479533e-05, "loss": 1.3308, "step": 1434 }, { "epoch": 0.4510670043386929, "grad_norm": 0.78125, "learning_rate": 1.8380196762932404e-05, "loss": 1.3221, "step": 1436 }, { "epoch": 0.45169523136423423, "grad_norm": 0.703125, "learning_rate": 1.8377657886385276e-05, "loss": 1.353, "step": 1438 }, { "epoch": 0.4523234583897756, "grad_norm": 0.84765625, "learning_rate": 1.8375119009838147e-05, "loss": 1.2386, "step": 1440 }, { "epoch": 0.45295168541531694, "grad_norm": 0.70703125, "learning_rate": 1.8372580133291022e-05, "loss": 1.5192, "step": 1442 }, { "epoch": 0.4535799124408583, "grad_norm": 0.7890625, "learning_rate": 1.837004125674389e-05, "loss": 1.4076, "step": 1444 }, { "epoch": 0.45420813946639965, "grad_norm": 0.6953125, "learning_rate": 1.8367502380196765e-05, "loss": 1.4394, "step": 1446 }, { "epoch": 0.45483636649194104, "grad_norm": 0.96484375, "learning_rate": 1.836496350364964e-05, "loss": 1.3238, "step": 1448 }, { "epoch": 0.45546459351748236, "grad_norm": 0.75390625, "learning_rate": 1.8362424627102507e-05, "loss": 1.2685, "step": 1450 }, { "epoch": 0.45609282054302375, "grad_norm": 0.7890625, "learning_rate": 1.8359885750555382e-05, "loss": 1.3883, "step": 1452 }, { "epoch": 0.4567210475685651, "grad_norm": 0.71484375, "learning_rate": 1.8357346874008253e-05, "loss": 1.3735, "step": 1454 }, { "epoch": 0.45734927459410646, "grad_norm": 0.7265625, "learning_rate": 1.8354807997461125e-05, "loss": 1.4432, "step": 1456 }, { "epoch": 0.4579775016196478, "grad_norm": 0.8046875, "learning_rate": 1.8352269120913996e-05, "loss": 1.3395, "step": 1458 }, { "epoch": 0.45860572864518917, "grad_norm": 0.78515625, "learning_rate": 1.834973024436687e-05, "loss": 1.2355, "step": 1460 }, { "epoch": 0.4592339556707305, "grad_norm": 0.703125, "learning_rate": 1.8347191367819742e-05, "loss": 1.4257, "step": 1462 }, { "epoch": 0.4598621826962719, "grad_norm": 0.78515625, "learning_rate": 1.8344652491272614e-05, "loss": 1.4014, "step": 1464 }, { "epoch": 0.4604904097218132, "grad_norm": 0.66015625, "learning_rate": 1.8342113614725485e-05, "loss": 1.4452, "step": 1466 }, { "epoch": 0.4611186367473546, "grad_norm": 0.7578125, "learning_rate": 1.833957473817836e-05, "loss": 1.2609, "step": 1468 }, { "epoch": 0.4617468637728959, "grad_norm": 3.109375, "learning_rate": 1.8337035861631228e-05, "loss": 1.3392, "step": 1470 }, { "epoch": 0.4623750907984373, "grad_norm": 0.8359375, "learning_rate": 1.8334496985084103e-05, "loss": 1.4992, "step": 1472 }, { "epoch": 0.46300331782397863, "grad_norm": 0.71875, "learning_rate": 1.8331958108536974e-05, "loss": 1.3606, "step": 1474 }, { "epoch": 0.46363154484952, "grad_norm": 0.73046875, "learning_rate": 1.8329419231989845e-05, "loss": 1.4007, "step": 1476 }, { "epoch": 0.46425977187506134, "grad_norm": 0.71484375, "learning_rate": 1.8326880355442717e-05, "loss": 1.3096, "step": 1478 }, { "epoch": 0.46488799890060273, "grad_norm": 0.75, "learning_rate": 1.832434147889559e-05, "loss": 1.3873, "step": 1480 }, { "epoch": 0.46551622592614406, "grad_norm": 0.75390625, "learning_rate": 1.8321802602348463e-05, "loss": 1.3962, "step": 1482 }, { "epoch": 0.46614445295168544, "grad_norm": 1.1171875, "learning_rate": 1.8319263725801334e-05, "loss": 1.3512, "step": 1484 }, { "epoch": 0.46677267997722677, "grad_norm": 0.671875, "learning_rate": 1.8316724849254206e-05, "loss": 1.3546, "step": 1486 }, { "epoch": 0.46740090700276815, "grad_norm": 0.76953125, "learning_rate": 1.831418597270708e-05, "loss": 1.3739, "step": 1488 }, { "epoch": 0.4680291340283095, "grad_norm": 0.71875, "learning_rate": 1.831164709615995e-05, "loss": 1.3045, "step": 1490 }, { "epoch": 0.46865736105385086, "grad_norm": 0.72265625, "learning_rate": 1.8309108219612823e-05, "loss": 1.385, "step": 1492 }, { "epoch": 0.4692855880793922, "grad_norm": 0.765625, "learning_rate": 1.8306569343065695e-05, "loss": 1.282, "step": 1494 }, { "epoch": 0.4699138151049335, "grad_norm": 0.671875, "learning_rate": 1.8304030466518566e-05, "loss": 1.5008, "step": 1496 }, { "epoch": 0.4705420421304749, "grad_norm": 0.78515625, "learning_rate": 1.8301491589971437e-05, "loss": 1.422, "step": 1498 }, { "epoch": 0.47117026915601623, "grad_norm": 0.70703125, "learning_rate": 1.8298952713424312e-05, "loss": 1.3419, "step": 1500 }, { "epoch": 0.4717984961815576, "grad_norm": 0.72265625, "learning_rate": 1.8296413836877184e-05, "loss": 1.3878, "step": 1502 }, { "epoch": 0.47242672320709894, "grad_norm": 0.7109375, "learning_rate": 1.8293874960330055e-05, "loss": 1.4378, "step": 1504 }, { "epoch": 0.4730549502326403, "grad_norm": 0.69140625, "learning_rate": 1.8291336083782926e-05, "loss": 1.4115, "step": 1506 }, { "epoch": 0.47368317725818165, "grad_norm": 0.75, "learning_rate": 1.82887972072358e-05, "loss": 1.2909, "step": 1508 }, { "epoch": 0.47431140428372304, "grad_norm": 0.74609375, "learning_rate": 1.8286258330688672e-05, "loss": 1.3813, "step": 1510 }, { "epoch": 0.47493963130926437, "grad_norm": 0.7890625, "learning_rate": 1.8283719454141544e-05, "loss": 1.3018, "step": 1512 }, { "epoch": 0.47556785833480575, "grad_norm": 0.7109375, "learning_rate": 1.8281180577594415e-05, "loss": 1.228, "step": 1514 }, { "epoch": 0.4761960853603471, "grad_norm": 0.65625, "learning_rate": 1.8278641701047287e-05, "loss": 1.3985, "step": 1516 }, { "epoch": 0.47682431238588846, "grad_norm": 0.67578125, "learning_rate": 1.8276102824500158e-05, "loss": 1.4065, "step": 1518 }, { "epoch": 0.4774525394114298, "grad_norm": 0.7421875, "learning_rate": 1.8273563947953033e-05, "loss": 1.34, "step": 1520 }, { "epoch": 0.47808076643697117, "grad_norm": 0.73046875, "learning_rate": 1.8271025071405904e-05, "loss": 1.3451, "step": 1522 }, { "epoch": 0.4787089934625125, "grad_norm": 0.75, "learning_rate": 1.8268486194858776e-05, "loss": 1.3477, "step": 1524 }, { "epoch": 0.4793372204880539, "grad_norm": 0.734375, "learning_rate": 1.8265947318311647e-05, "loss": 1.3247, "step": 1526 }, { "epoch": 0.4799654475135952, "grad_norm": 0.73046875, "learning_rate": 1.8263408441764522e-05, "loss": 1.21, "step": 1528 }, { "epoch": 0.4805936745391366, "grad_norm": 0.71875, "learning_rate": 1.8260869565217393e-05, "loss": 1.3398, "step": 1530 }, { "epoch": 0.4812219015646779, "grad_norm": 0.734375, "learning_rate": 1.8258330688670264e-05, "loss": 1.3262, "step": 1532 }, { "epoch": 0.4818501285902193, "grad_norm": 0.75, "learning_rate": 1.825579181212314e-05, "loss": 1.4908, "step": 1534 }, { "epoch": 0.48247835561576063, "grad_norm": 0.7890625, "learning_rate": 1.825325293557601e-05, "loss": 1.3113, "step": 1536 }, { "epoch": 0.483106582641302, "grad_norm": 0.6640625, "learning_rate": 1.8250714059028882e-05, "loss": 1.2718, "step": 1538 }, { "epoch": 0.48373480966684335, "grad_norm": 0.7265625, "learning_rate": 1.8248175182481753e-05, "loss": 1.406, "step": 1540 }, { "epoch": 0.48436303669238473, "grad_norm": 0.6953125, "learning_rate": 1.8245636305934625e-05, "loss": 1.3577, "step": 1542 }, { "epoch": 0.48499126371792606, "grad_norm": 0.7421875, "learning_rate": 1.8243097429387496e-05, "loss": 1.3054, "step": 1544 }, { "epoch": 0.48561949074346744, "grad_norm": 0.78515625, "learning_rate": 1.824055855284037e-05, "loss": 1.3842, "step": 1546 }, { "epoch": 0.48624771776900877, "grad_norm": 0.76953125, "learning_rate": 1.8238019676293242e-05, "loss": 1.3789, "step": 1548 }, { "epoch": 0.48687594479455015, "grad_norm": 0.65625, "learning_rate": 1.8235480799746114e-05, "loss": 1.3439, "step": 1550 }, { "epoch": 0.4875041718200915, "grad_norm": 0.69921875, "learning_rate": 1.8232941923198985e-05, "loss": 1.3715, "step": 1552 }, { "epoch": 0.48813239884563286, "grad_norm": 0.70703125, "learning_rate": 1.823040304665186e-05, "loss": 1.4506, "step": 1554 }, { "epoch": 0.4887606258711742, "grad_norm": 0.69140625, "learning_rate": 1.822786417010473e-05, "loss": 1.4064, "step": 1556 }, { "epoch": 0.4893888528967156, "grad_norm": 0.7578125, "learning_rate": 1.8225325293557603e-05, "loss": 1.3322, "step": 1558 }, { "epoch": 0.4900170799222569, "grad_norm": 0.76953125, "learning_rate": 1.8222786417010474e-05, "loss": 1.3075, "step": 1560 }, { "epoch": 0.4906453069477983, "grad_norm": 0.703125, "learning_rate": 1.822024754046335e-05, "loss": 1.4187, "step": 1562 }, { "epoch": 0.4912735339733396, "grad_norm": 0.8984375, "learning_rate": 1.8217708663916217e-05, "loss": 1.3365, "step": 1564 }, { "epoch": 0.49190176099888094, "grad_norm": 0.7578125, "learning_rate": 1.821516978736909e-05, "loss": 1.3593, "step": 1566 }, { "epoch": 0.4925299880244223, "grad_norm": 0.7421875, "learning_rate": 1.8212630910821963e-05, "loss": 1.298, "step": 1568 }, { "epoch": 0.49315821504996366, "grad_norm": 0.71875, "learning_rate": 1.8210092034274834e-05, "loss": 1.4256, "step": 1570 }, { "epoch": 0.49378644207550504, "grad_norm": 0.7890625, "learning_rate": 1.8207553157727706e-05, "loss": 1.4808, "step": 1572 }, { "epoch": 0.49441466910104637, "grad_norm": 0.875, "learning_rate": 1.820501428118058e-05, "loss": 1.4233, "step": 1574 }, { "epoch": 0.49504289612658775, "grad_norm": 0.7890625, "learning_rate": 1.8202475404633452e-05, "loss": 1.3273, "step": 1576 }, { "epoch": 0.4956711231521291, "grad_norm": 0.73828125, "learning_rate": 1.8199936528086323e-05, "loss": 1.3384, "step": 1578 }, { "epoch": 0.49629935017767046, "grad_norm": 0.7421875, "learning_rate": 1.8197397651539195e-05, "loss": 1.4455, "step": 1580 }, { "epoch": 0.4969275772032118, "grad_norm": 0.7421875, "learning_rate": 1.819485877499207e-05, "loss": 1.3235, "step": 1582 }, { "epoch": 0.4975558042287532, "grad_norm": 0.73828125, "learning_rate": 1.8192319898444937e-05, "loss": 1.3602, "step": 1584 }, { "epoch": 0.4981840312542945, "grad_norm": 0.890625, "learning_rate": 1.8189781021897812e-05, "loss": 1.2892, "step": 1586 }, { "epoch": 0.4988122582798359, "grad_norm": 0.85546875, "learning_rate": 1.8187242145350684e-05, "loss": 1.2329, "step": 1588 }, { "epoch": 0.4994404853053772, "grad_norm": 0.68359375, "learning_rate": 1.8184703268803555e-05, "loss": 1.2801, "step": 1590 }, { "epoch": 0.5000687123309185, "grad_norm": 0.76171875, "learning_rate": 1.8182164392256426e-05, "loss": 1.3857, "step": 1592 }, { "epoch": 0.50069693935646, "grad_norm": 0.8671875, "learning_rate": 1.81796255157093e-05, "loss": 1.3113, "step": 1594 }, { "epoch": 0.5013251663820013, "grad_norm": 0.78125, "learning_rate": 1.8177086639162172e-05, "loss": 1.4523, "step": 1596 }, { "epoch": 0.5019533934075426, "grad_norm": 0.77734375, "learning_rate": 1.8174547762615044e-05, "loss": 1.3314, "step": 1598 }, { "epoch": 0.502581620433084, "grad_norm": 0.796875, "learning_rate": 1.8172008886067915e-05, "loss": 1.3618, "step": 1600 }, { "epoch": 0.5032098474586254, "grad_norm": 0.8203125, "learning_rate": 1.816947000952079e-05, "loss": 1.3553, "step": 1602 }, { "epoch": 0.5038380744841667, "grad_norm": 0.671875, "learning_rate": 1.816693113297366e-05, "loss": 1.4201, "step": 1604 }, { "epoch": 0.5044663015097081, "grad_norm": 0.8125, "learning_rate": 1.8164392256426533e-05, "loss": 1.309, "step": 1606 }, { "epoch": 0.5050945285352494, "grad_norm": 0.7734375, "learning_rate": 1.8161853379879404e-05, "loss": 1.3145, "step": 1608 }, { "epoch": 0.5057227555607908, "grad_norm": 0.87890625, "learning_rate": 1.8159314503332275e-05, "loss": 1.3546, "step": 1610 }, { "epoch": 0.5063509825863322, "grad_norm": 0.7109375, "learning_rate": 1.8156775626785147e-05, "loss": 1.2818, "step": 1612 }, { "epoch": 0.5069792096118735, "grad_norm": 0.796875, "learning_rate": 1.815423675023802e-05, "loss": 1.4176, "step": 1614 }, { "epoch": 0.5076074366374148, "grad_norm": 0.734375, "learning_rate": 1.8151697873690893e-05, "loss": 1.3501, "step": 1616 }, { "epoch": 0.5082356636629562, "grad_norm": 0.74609375, "learning_rate": 1.8149158997143764e-05, "loss": 1.3265, "step": 1618 }, { "epoch": 0.5088638906884976, "grad_norm": 0.79296875, "learning_rate": 1.814662012059664e-05, "loss": 1.333, "step": 1620 }, { "epoch": 0.5094921177140389, "grad_norm": 0.7265625, "learning_rate": 1.814408124404951e-05, "loss": 1.2086, "step": 1622 }, { "epoch": 0.5101203447395802, "grad_norm": 0.8046875, "learning_rate": 1.8141542367502382e-05, "loss": 1.2181, "step": 1624 }, { "epoch": 0.5107485717651217, "grad_norm": 0.72265625, "learning_rate": 1.8139003490955253e-05, "loss": 1.3269, "step": 1626 }, { "epoch": 0.511376798790663, "grad_norm": 0.67578125, "learning_rate": 1.8136464614408128e-05, "loss": 1.2733, "step": 1628 }, { "epoch": 0.5120050258162043, "grad_norm": 0.69921875, "learning_rate": 1.8133925737861e-05, "loss": 1.253, "step": 1630 }, { "epoch": 0.5126332528417457, "grad_norm": 0.71484375, "learning_rate": 1.813138686131387e-05, "loss": 1.511, "step": 1632 }, { "epoch": 0.5132614798672871, "grad_norm": 0.671875, "learning_rate": 1.8128847984766742e-05, "loss": 1.2451, "step": 1634 }, { "epoch": 0.5138897068928284, "grad_norm": 0.66015625, "learning_rate": 1.8126309108219614e-05, "loss": 1.2587, "step": 1636 }, { "epoch": 0.5145179339183698, "grad_norm": 0.875, "learning_rate": 1.8123770231672485e-05, "loss": 1.301, "step": 1638 }, { "epoch": 0.5151461609439111, "grad_norm": 0.8046875, "learning_rate": 1.812123135512536e-05, "loss": 1.4174, "step": 1640 }, { "epoch": 0.5157743879694524, "grad_norm": 0.7265625, "learning_rate": 1.811869247857823e-05, "loss": 1.2725, "step": 1642 }, { "epoch": 0.5164026149949938, "grad_norm": 0.81640625, "learning_rate": 1.8116153602031103e-05, "loss": 1.3744, "step": 1644 }, { "epoch": 0.5170308420205352, "grad_norm": 0.734375, "learning_rate": 1.8113614725483974e-05, "loss": 1.2455, "step": 1646 }, { "epoch": 0.5176590690460765, "grad_norm": 0.68359375, "learning_rate": 1.811107584893685e-05, "loss": 1.4318, "step": 1648 }, { "epoch": 0.5182872960716178, "grad_norm": 0.80859375, "learning_rate": 1.810853697238972e-05, "loss": 1.3426, "step": 1650 }, { "epoch": 0.5189155230971593, "grad_norm": 0.73046875, "learning_rate": 1.810599809584259e-05, "loss": 1.1767, "step": 1652 }, { "epoch": 0.5195437501227006, "grad_norm": 0.73046875, "learning_rate": 1.8103459219295463e-05, "loss": 1.2447, "step": 1654 }, { "epoch": 0.5201719771482419, "grad_norm": 0.87890625, "learning_rate": 1.8100920342748338e-05, "loss": 1.3263, "step": 1656 }, { "epoch": 0.5208002041737833, "grad_norm": 0.74609375, "learning_rate": 1.8098381466201206e-05, "loss": 1.3857, "step": 1658 }, { "epoch": 0.5214284311993247, "grad_norm": 0.7421875, "learning_rate": 1.809584258965408e-05, "loss": 1.4577, "step": 1660 }, { "epoch": 0.522056658224866, "grad_norm": 0.77734375, "learning_rate": 1.8093303713106952e-05, "loss": 1.4192, "step": 1662 }, { "epoch": 0.5226848852504073, "grad_norm": 0.72265625, "learning_rate": 1.8090764836559823e-05, "loss": 1.2375, "step": 1664 }, { "epoch": 0.5233131122759487, "grad_norm": 0.7578125, "learning_rate": 1.8088225960012695e-05, "loss": 1.3199, "step": 1666 }, { "epoch": 0.5239413393014901, "grad_norm": 0.859375, "learning_rate": 1.808568708346557e-05, "loss": 1.3239, "step": 1668 }, { "epoch": 0.5245695663270314, "grad_norm": 0.63671875, "learning_rate": 1.808314820691844e-05, "loss": 1.4124, "step": 1670 }, { "epoch": 0.5251977933525728, "grad_norm": 0.8046875, "learning_rate": 1.8080609330371312e-05, "loss": 1.3539, "step": 1672 }, { "epoch": 0.5258260203781141, "grad_norm": 0.75, "learning_rate": 1.8078070453824183e-05, "loss": 1.2979, "step": 1674 }, { "epoch": 0.5264542474036555, "grad_norm": 0.67578125, "learning_rate": 1.8075531577277058e-05, "loss": 1.3582, "step": 1676 }, { "epoch": 0.5270824744291969, "grad_norm": 0.7890625, "learning_rate": 1.8072992700729926e-05, "loss": 1.2866, "step": 1678 }, { "epoch": 0.5277107014547382, "grad_norm": 0.83984375, "learning_rate": 1.80704538241828e-05, "loss": 1.1929, "step": 1680 }, { "epoch": 0.5283389284802795, "grad_norm": 0.6875, "learning_rate": 1.8067914947635672e-05, "loss": 1.2081, "step": 1682 }, { "epoch": 0.528967155505821, "grad_norm": 0.67578125, "learning_rate": 1.8065376071088544e-05, "loss": 1.4058, "step": 1684 }, { "epoch": 0.5295953825313623, "grad_norm": 0.69140625, "learning_rate": 1.8062837194541415e-05, "loss": 1.3689, "step": 1686 }, { "epoch": 0.5302236095569036, "grad_norm": 0.7734375, "learning_rate": 1.806029831799429e-05, "loss": 1.2963, "step": 1688 }, { "epoch": 0.530851836582445, "grad_norm": 0.76953125, "learning_rate": 1.805775944144716e-05, "loss": 1.3622, "step": 1690 }, { "epoch": 0.5314800636079864, "grad_norm": 0.765625, "learning_rate": 1.8055220564900033e-05, "loss": 1.2601, "step": 1692 }, { "epoch": 0.5321082906335277, "grad_norm": 0.78515625, "learning_rate": 1.8052681688352904e-05, "loss": 1.2963, "step": 1694 }, { "epoch": 0.532736517659069, "grad_norm": 0.7109375, "learning_rate": 1.805014281180578e-05, "loss": 1.3702, "step": 1696 }, { "epoch": 0.5333647446846104, "grad_norm": 0.734375, "learning_rate": 1.804760393525865e-05, "loss": 1.2802, "step": 1698 }, { "epoch": 0.5339929717101518, "grad_norm": 1.0078125, "learning_rate": 1.804506505871152e-05, "loss": 1.2703, "step": 1700 }, { "epoch": 0.5346211987356931, "grad_norm": 0.71875, "learning_rate": 1.8042526182164393e-05, "loss": 1.2858, "step": 1702 }, { "epoch": 0.5352494257612345, "grad_norm": 1.03125, "learning_rate": 1.8039987305617264e-05, "loss": 1.2983, "step": 1704 }, { "epoch": 0.5358776527867758, "grad_norm": 0.71484375, "learning_rate": 1.803744842907014e-05, "loss": 1.4192, "step": 1706 }, { "epoch": 0.5365058798123171, "grad_norm": 0.71484375, "learning_rate": 1.803490955252301e-05, "loss": 1.4011, "step": 1708 }, { "epoch": 0.5371341068378586, "grad_norm": 0.6796875, "learning_rate": 1.8032370675975882e-05, "loss": 1.3894, "step": 1710 }, { "epoch": 0.5377623338633999, "grad_norm": 0.75390625, "learning_rate": 1.8029831799428753e-05, "loss": 1.5168, "step": 1712 }, { "epoch": 0.5383905608889412, "grad_norm": 1.4609375, "learning_rate": 1.8027292922881628e-05, "loss": 1.3708, "step": 1714 }, { "epoch": 0.5390187879144825, "grad_norm": 0.76953125, "learning_rate": 1.80247540463345e-05, "loss": 1.3817, "step": 1716 }, { "epoch": 0.539647014940024, "grad_norm": 0.7578125, "learning_rate": 1.802221516978737e-05, "loss": 1.3174, "step": 1718 }, { "epoch": 0.5402752419655653, "grad_norm": 0.73828125, "learning_rate": 1.8019676293240242e-05, "loss": 1.3609, "step": 1720 }, { "epoch": 0.5409034689911066, "grad_norm": 0.734375, "learning_rate": 1.8017137416693117e-05, "loss": 1.4835, "step": 1722 }, { "epoch": 0.541531696016648, "grad_norm": 0.69921875, "learning_rate": 1.801459854014599e-05, "loss": 1.5052, "step": 1724 }, { "epoch": 0.5421599230421894, "grad_norm": 0.72265625, "learning_rate": 1.801205966359886e-05, "loss": 1.3482, "step": 1726 }, { "epoch": 0.5427881500677307, "grad_norm": 0.79296875, "learning_rate": 1.800952078705173e-05, "loss": 1.21, "step": 1728 }, { "epoch": 0.5434163770932721, "grad_norm": 0.75390625, "learning_rate": 1.8006981910504602e-05, "loss": 1.3702, "step": 1730 }, { "epoch": 0.5440446041188134, "grad_norm": 0.7578125, "learning_rate": 1.8004443033957474e-05, "loss": 1.4266, "step": 1732 }, { "epoch": 0.5446728311443548, "grad_norm": 0.671875, "learning_rate": 1.800190415741035e-05, "loss": 1.339, "step": 1734 }, { "epoch": 0.5453010581698962, "grad_norm": 0.74609375, "learning_rate": 1.799936528086322e-05, "loss": 1.3851, "step": 1736 }, { "epoch": 0.5459292851954375, "grad_norm": 0.69140625, "learning_rate": 1.799682640431609e-05, "loss": 1.4017, "step": 1738 }, { "epoch": 0.5465575122209788, "grad_norm": 0.734375, "learning_rate": 1.7994287527768963e-05, "loss": 1.2933, "step": 1740 }, { "epoch": 0.5471857392465203, "grad_norm": 0.74609375, "learning_rate": 1.7991748651221838e-05, "loss": 1.3104, "step": 1742 }, { "epoch": 0.5478139662720616, "grad_norm": 0.65625, "learning_rate": 1.798920977467471e-05, "loss": 1.231, "step": 1744 }, { "epoch": 0.5484421932976029, "grad_norm": 0.7578125, "learning_rate": 1.798667089812758e-05, "loss": 1.4584, "step": 1746 }, { "epoch": 0.5490704203231442, "grad_norm": 0.75390625, "learning_rate": 1.798413202158045e-05, "loss": 1.2988, "step": 1748 }, { "epoch": 0.5496986473486857, "grad_norm": 0.7578125, "learning_rate": 1.7981593145033326e-05, "loss": 1.2553, "step": 1750 }, { "epoch": 0.550326874374227, "grad_norm": 0.75, "learning_rate": 1.7979054268486194e-05, "loss": 1.3824, "step": 1752 }, { "epoch": 0.5509551013997683, "grad_norm": 0.765625, "learning_rate": 1.797651539193907e-05, "loss": 1.4831, "step": 1754 }, { "epoch": 0.5515833284253097, "grad_norm": 0.8046875, "learning_rate": 1.797397651539194e-05, "loss": 1.3839, "step": 1756 }, { "epoch": 0.5522115554508511, "grad_norm": 0.7578125, "learning_rate": 1.7971437638844812e-05, "loss": 1.4556, "step": 1758 }, { "epoch": 0.5528397824763924, "grad_norm": 0.67578125, "learning_rate": 1.7968898762297683e-05, "loss": 1.3564, "step": 1760 }, { "epoch": 0.5534680095019338, "grad_norm": 0.81640625, "learning_rate": 1.7966359885750558e-05, "loss": 1.4027, "step": 1762 }, { "epoch": 0.5540962365274751, "grad_norm": 0.93359375, "learning_rate": 1.796382100920343e-05, "loss": 1.3738, "step": 1764 }, { "epoch": 0.5547244635530165, "grad_norm": 0.8203125, "learning_rate": 1.79612821326563e-05, "loss": 1.4116, "step": 1766 }, { "epoch": 0.5553526905785579, "grad_norm": 0.9140625, "learning_rate": 1.7958743256109172e-05, "loss": 1.4383, "step": 1768 }, { "epoch": 0.5559809176040992, "grad_norm": 0.76171875, "learning_rate": 1.7956204379562047e-05, "loss": 1.2174, "step": 1770 }, { "epoch": 0.5566091446296405, "grad_norm": 0.75390625, "learning_rate": 1.7953665503014915e-05, "loss": 1.2893, "step": 1772 }, { "epoch": 0.557237371655182, "grad_norm": 0.796875, "learning_rate": 1.795112662646779e-05, "loss": 1.291, "step": 1774 }, { "epoch": 0.5578655986807233, "grad_norm": 0.82421875, "learning_rate": 1.794858774992066e-05, "loss": 1.4798, "step": 1776 }, { "epoch": 0.5584938257062646, "grad_norm": 0.9296875, "learning_rate": 1.7946048873373533e-05, "loss": 1.2961, "step": 1778 }, { "epoch": 0.5591220527318059, "grad_norm": 2.1875, "learning_rate": 1.7943509996826404e-05, "loss": 1.3342, "step": 1780 }, { "epoch": 0.5597502797573473, "grad_norm": 0.890625, "learning_rate": 1.794097112027928e-05, "loss": 1.2582, "step": 1782 }, { "epoch": 0.5603785067828887, "grad_norm": 0.66796875, "learning_rate": 1.793843224373215e-05, "loss": 1.3106, "step": 1784 }, { "epoch": 0.56100673380843, "grad_norm": 0.8125, "learning_rate": 1.793589336718502e-05, "loss": 1.3369, "step": 1786 }, { "epoch": 0.5616349608339714, "grad_norm": 0.859375, "learning_rate": 1.7933354490637893e-05, "loss": 1.2346, "step": 1788 }, { "epoch": 0.5622631878595127, "grad_norm": 0.76171875, "learning_rate": 1.7930815614090768e-05, "loss": 1.2644, "step": 1790 }, { "epoch": 0.5628914148850541, "grad_norm": 0.8359375, "learning_rate": 1.792827673754364e-05, "loss": 1.3247, "step": 1792 }, { "epoch": 0.5635196419105954, "grad_norm": 0.7734375, "learning_rate": 1.792573786099651e-05, "loss": 1.2764, "step": 1794 }, { "epoch": 0.5641478689361368, "grad_norm": 0.71484375, "learning_rate": 1.7923198984449385e-05, "loss": 1.2428, "step": 1796 }, { "epoch": 0.5647760959616781, "grad_norm": 0.80078125, "learning_rate": 1.7920660107902253e-05, "loss": 1.4744, "step": 1798 }, { "epoch": 0.5654043229872195, "grad_norm": 0.7421875, "learning_rate": 1.7918121231355128e-05, "loss": 1.3754, "step": 1800 }, { "epoch": 0.5660325500127609, "grad_norm": 0.8828125, "learning_rate": 1.7915582354808e-05, "loss": 1.3084, "step": 1802 }, { "epoch": 0.5666607770383022, "grad_norm": 0.75, "learning_rate": 1.791304347826087e-05, "loss": 1.3269, "step": 1804 }, { "epoch": 0.5672890040638435, "grad_norm": 0.7265625, "learning_rate": 1.7910504601713742e-05, "loss": 1.3141, "step": 1806 }, { "epoch": 0.567917231089385, "grad_norm": 0.90234375, "learning_rate": 1.7907965725166617e-05, "loss": 1.1482, "step": 1808 }, { "epoch": 0.5685454581149263, "grad_norm": 0.69921875, "learning_rate": 1.7905426848619488e-05, "loss": 1.3093, "step": 1810 }, { "epoch": 0.5691736851404676, "grad_norm": 0.6640625, "learning_rate": 1.790288797207236e-05, "loss": 1.4742, "step": 1812 }, { "epoch": 0.569801912166009, "grad_norm": 0.8203125, "learning_rate": 1.790034909552523e-05, "loss": 1.3429, "step": 1814 }, { "epoch": 0.5704301391915504, "grad_norm": 0.77734375, "learning_rate": 1.7897810218978106e-05, "loss": 1.3247, "step": 1816 }, { "epoch": 0.5710583662170917, "grad_norm": 0.6875, "learning_rate": 1.7895271342430974e-05, "loss": 1.386, "step": 1818 }, { "epoch": 0.571686593242633, "grad_norm": 0.6796875, "learning_rate": 1.789273246588385e-05, "loss": 1.3501, "step": 1820 }, { "epoch": 0.5723148202681744, "grad_norm": 0.73828125, "learning_rate": 1.789019358933672e-05, "loss": 1.2759, "step": 1822 }, { "epoch": 0.5729430472937158, "grad_norm": 0.78515625, "learning_rate": 1.788765471278959e-05, "loss": 1.2834, "step": 1824 }, { "epoch": 0.5735712743192571, "grad_norm": 0.765625, "learning_rate": 1.7885115836242463e-05, "loss": 1.3764, "step": 1826 }, { "epoch": 0.5741995013447985, "grad_norm": 0.80859375, "learning_rate": 1.7882576959695337e-05, "loss": 1.2428, "step": 1828 }, { "epoch": 0.5748277283703398, "grad_norm": 0.78125, "learning_rate": 1.788003808314821e-05, "loss": 1.3577, "step": 1830 }, { "epoch": 0.5754559553958812, "grad_norm": 0.94921875, "learning_rate": 1.787749920660108e-05, "loss": 1.2091, "step": 1832 }, { "epoch": 0.5760841824214226, "grad_norm": 0.83203125, "learning_rate": 1.787496033005395e-05, "loss": 1.4, "step": 1834 }, { "epoch": 0.5767124094469639, "grad_norm": 0.7109375, "learning_rate": 1.7872421453506826e-05, "loss": 1.3621, "step": 1836 }, { "epoch": 0.5773406364725052, "grad_norm": 0.828125, "learning_rate": 1.7869882576959698e-05, "loss": 1.3756, "step": 1838 }, { "epoch": 0.5779688634980467, "grad_norm": 0.68359375, "learning_rate": 1.786734370041257e-05, "loss": 1.3658, "step": 1840 }, { "epoch": 0.578597090523588, "grad_norm": 0.7109375, "learning_rate": 1.786480482386544e-05, "loss": 1.2812, "step": 1842 }, { "epoch": 0.5792253175491293, "grad_norm": 0.73828125, "learning_rate": 1.7862265947318312e-05, "loss": 1.4921, "step": 1844 }, { "epoch": 0.5798535445746706, "grad_norm": 0.77734375, "learning_rate": 1.7859727070771183e-05, "loss": 1.3042, "step": 1846 }, { "epoch": 0.5804817716002121, "grad_norm": 1.203125, "learning_rate": 1.7857188194224058e-05, "loss": 1.1429, "step": 1848 }, { "epoch": 0.5811099986257534, "grad_norm": 0.73046875, "learning_rate": 1.785464931767693e-05, "loss": 1.4471, "step": 1850 }, { "epoch": 0.5817382256512947, "grad_norm": 0.6953125, "learning_rate": 1.78521104411298e-05, "loss": 1.3808, "step": 1852 }, { "epoch": 0.5823664526768361, "grad_norm": 0.94140625, "learning_rate": 1.7849571564582672e-05, "loss": 1.3266, "step": 1854 }, { "epoch": 0.5829946797023774, "grad_norm": 0.68359375, "learning_rate": 1.7847032688035547e-05, "loss": 1.4399, "step": 1856 }, { "epoch": 0.5836229067279188, "grad_norm": 0.75, "learning_rate": 1.784449381148842e-05, "loss": 1.2884, "step": 1858 }, { "epoch": 0.5842511337534602, "grad_norm": 0.6796875, "learning_rate": 1.784195493494129e-05, "loss": 1.3308, "step": 1860 }, { "epoch": 0.5848793607790015, "grad_norm": 0.7890625, "learning_rate": 1.783941605839416e-05, "loss": 1.3215, "step": 1862 }, { "epoch": 0.5855075878045428, "grad_norm": 0.8671875, "learning_rate": 1.7836877181847036e-05, "loss": 1.4684, "step": 1864 }, { "epoch": 0.5861358148300843, "grad_norm": 0.6875, "learning_rate": 1.7834338305299904e-05, "loss": 1.293, "step": 1866 }, { "epoch": 0.5867640418556256, "grad_norm": 0.7578125, "learning_rate": 1.783179942875278e-05, "loss": 1.2667, "step": 1868 }, { "epoch": 0.5873922688811669, "grad_norm": 0.76953125, "learning_rate": 1.782926055220565e-05, "loss": 1.3243, "step": 1870 }, { "epoch": 0.5880204959067082, "grad_norm": 0.79296875, "learning_rate": 1.782672167565852e-05, "loss": 1.2651, "step": 1872 }, { "epoch": 0.5886487229322497, "grad_norm": 0.69921875, "learning_rate": 1.7824182799111393e-05, "loss": 1.2973, "step": 1874 }, { "epoch": 0.589276949957791, "grad_norm": 0.73828125, "learning_rate": 1.7821643922564268e-05, "loss": 1.2823, "step": 1876 }, { "epoch": 0.5899051769833323, "grad_norm": 0.94921875, "learning_rate": 1.781910504601714e-05, "loss": 1.415, "step": 1878 }, { "epoch": 0.5905334040088737, "grad_norm": 0.76171875, "learning_rate": 1.781656616947001e-05, "loss": 1.2477, "step": 1880 }, { "epoch": 0.5911616310344151, "grad_norm": 0.80078125, "learning_rate": 1.7814027292922885e-05, "loss": 1.2649, "step": 1882 }, { "epoch": 0.5917898580599564, "grad_norm": 0.64453125, "learning_rate": 1.7811488416375756e-05, "loss": 1.3797, "step": 1884 }, { "epoch": 0.5924180850854978, "grad_norm": 0.75390625, "learning_rate": 1.7808949539828628e-05, "loss": 1.3717, "step": 1886 }, { "epoch": 0.5930463121110391, "grad_norm": 0.70703125, "learning_rate": 1.78064106632815e-05, "loss": 1.2677, "step": 1888 }, { "epoch": 0.5936745391365805, "grad_norm": 0.78515625, "learning_rate": 1.7803871786734374e-05, "loss": 1.4157, "step": 1890 }, { "epoch": 0.5943027661621219, "grad_norm": 0.6875, "learning_rate": 1.7801332910187242e-05, "loss": 1.2478, "step": 1892 }, { "epoch": 0.5949309931876632, "grad_norm": 0.73046875, "learning_rate": 1.7798794033640117e-05, "loss": 1.3108, "step": 1894 }, { "epoch": 0.5955592202132045, "grad_norm": 0.75, "learning_rate": 1.7796255157092988e-05, "loss": 1.3043, "step": 1896 }, { "epoch": 0.596187447238746, "grad_norm": 0.703125, "learning_rate": 1.779371628054586e-05, "loss": 1.259, "step": 1898 }, { "epoch": 0.5968156742642873, "grad_norm": 0.83203125, "learning_rate": 1.779117740399873e-05, "loss": 1.2671, "step": 1900 }, { "epoch": 0.5974439012898286, "grad_norm": 0.7734375, "learning_rate": 1.7788638527451606e-05, "loss": 1.3808, "step": 1902 }, { "epoch": 0.5980721283153699, "grad_norm": 0.8828125, "learning_rate": 1.7786099650904477e-05, "loss": 1.3359, "step": 1904 }, { "epoch": 0.5987003553409114, "grad_norm": 0.8359375, "learning_rate": 1.778356077435735e-05, "loss": 1.3205, "step": 1906 }, { "epoch": 0.5993285823664527, "grad_norm": 0.73828125, "learning_rate": 1.778102189781022e-05, "loss": 1.3357, "step": 1908 }, { "epoch": 0.599956809391994, "grad_norm": 0.76953125, "learning_rate": 1.7778483021263095e-05, "loss": 1.3952, "step": 1910 }, { "epoch": 0.6005850364175354, "grad_norm": 0.828125, "learning_rate": 1.7775944144715963e-05, "loss": 1.2332, "step": 1912 }, { "epoch": 0.6012132634430768, "grad_norm": 0.828125, "learning_rate": 1.7773405268168837e-05, "loss": 1.3406, "step": 1914 }, { "epoch": 0.6018414904686181, "grad_norm": 0.71875, "learning_rate": 1.777086639162171e-05, "loss": 1.3423, "step": 1916 }, { "epoch": 0.6024697174941595, "grad_norm": 0.74609375, "learning_rate": 1.776832751507458e-05, "loss": 1.3578, "step": 1918 }, { "epoch": 0.6030979445197008, "grad_norm": 0.65625, "learning_rate": 1.776578863852745e-05, "loss": 1.3513, "step": 1920 }, { "epoch": 0.6037261715452421, "grad_norm": 0.8203125, "learning_rate": 1.7763249761980326e-05, "loss": 1.2171, "step": 1922 }, { "epoch": 0.6043543985707835, "grad_norm": 0.72265625, "learning_rate": 1.7760710885433198e-05, "loss": 1.3335, "step": 1924 }, { "epoch": 0.6049826255963249, "grad_norm": 0.83203125, "learning_rate": 1.775817200888607e-05, "loss": 1.3946, "step": 1926 }, { "epoch": 0.6056108526218662, "grad_norm": 0.76953125, "learning_rate": 1.775563313233894e-05, "loss": 1.2521, "step": 1928 }, { "epoch": 0.6062390796474075, "grad_norm": 0.78125, "learning_rate": 1.7753094255791815e-05, "loss": 1.4663, "step": 1930 }, { "epoch": 0.606867306672949, "grad_norm": 0.71484375, "learning_rate": 1.7750555379244687e-05, "loss": 1.1201, "step": 1932 }, { "epoch": 0.6074955336984903, "grad_norm": 0.78515625, "learning_rate": 1.7748016502697558e-05, "loss": 1.4028, "step": 1934 }, { "epoch": 0.6081237607240316, "grad_norm": 0.7734375, "learning_rate": 1.774547762615043e-05, "loss": 1.2642, "step": 1936 }, { "epoch": 0.608751987749573, "grad_norm": 0.76953125, "learning_rate": 1.77429387496033e-05, "loss": 1.2945, "step": 1938 }, { "epoch": 0.6093802147751144, "grad_norm": 0.76953125, "learning_rate": 1.7740399873056172e-05, "loss": 1.265, "step": 1940 }, { "epoch": 0.6100084418006557, "grad_norm": 0.859375, "learning_rate": 1.7737860996509047e-05, "loss": 1.4148, "step": 1942 }, { "epoch": 0.610636668826197, "grad_norm": 0.66796875, "learning_rate": 1.7735322119961918e-05, "loss": 1.2506, "step": 1944 }, { "epoch": 0.6112648958517384, "grad_norm": 0.90234375, "learning_rate": 1.773278324341479e-05, "loss": 1.3281, "step": 1946 }, { "epoch": 0.6118931228772798, "grad_norm": 0.7109375, "learning_rate": 1.773024436686766e-05, "loss": 1.273, "step": 1948 }, { "epoch": 0.6125213499028211, "grad_norm": 0.75, "learning_rate": 1.7727705490320536e-05, "loss": 1.2533, "step": 1950 }, { "epoch": 0.6131495769283625, "grad_norm": 0.77734375, "learning_rate": 1.7725166613773407e-05, "loss": 1.2262, "step": 1952 }, { "epoch": 0.6137778039539038, "grad_norm": 0.72265625, "learning_rate": 1.772262773722628e-05, "loss": 1.2834, "step": 1954 }, { "epoch": 0.6144060309794452, "grad_norm": 0.6796875, "learning_rate": 1.772008886067915e-05, "loss": 1.286, "step": 1956 }, { "epoch": 0.6150342580049866, "grad_norm": 0.7265625, "learning_rate": 1.7717549984132025e-05, "loss": 1.2474, "step": 1958 }, { "epoch": 0.6156624850305279, "grad_norm": 0.71484375, "learning_rate": 1.7715011107584893e-05, "loss": 1.345, "step": 1960 }, { "epoch": 0.6162907120560692, "grad_norm": 0.79296875, "learning_rate": 1.7712472231037767e-05, "loss": 1.2253, "step": 1962 }, { "epoch": 0.6169189390816107, "grad_norm": 0.76953125, "learning_rate": 1.770993335449064e-05, "loss": 1.3628, "step": 1964 }, { "epoch": 0.617547166107152, "grad_norm": 0.76953125, "learning_rate": 1.770739447794351e-05, "loss": 1.2676, "step": 1966 }, { "epoch": 0.6181753931326933, "grad_norm": 0.72265625, "learning_rate": 1.7704855601396385e-05, "loss": 1.2463, "step": 1968 }, { "epoch": 0.6188036201582346, "grad_norm": 0.7109375, "learning_rate": 1.7702316724849256e-05, "loss": 1.3617, "step": 1970 }, { "epoch": 0.6194318471837761, "grad_norm": 0.83984375, "learning_rate": 1.7699777848302128e-05, "loss": 1.4785, "step": 1972 }, { "epoch": 0.6200600742093174, "grad_norm": 0.76953125, "learning_rate": 1.7697238971755e-05, "loss": 1.2933, "step": 1974 }, { "epoch": 0.6206883012348587, "grad_norm": 0.70703125, "learning_rate": 1.7694700095207874e-05, "loss": 1.4211, "step": 1976 }, { "epoch": 0.6213165282604001, "grad_norm": 0.734375, "learning_rate": 1.7692161218660745e-05, "loss": 1.4411, "step": 1978 }, { "epoch": 0.6219447552859415, "grad_norm": 0.70703125, "learning_rate": 1.7689622342113617e-05, "loss": 1.2598, "step": 1980 }, { "epoch": 0.6225729823114828, "grad_norm": 0.71875, "learning_rate": 1.7687083465566488e-05, "loss": 1.2098, "step": 1982 }, { "epoch": 0.6232012093370242, "grad_norm": 0.69921875, "learning_rate": 1.7684544589019363e-05, "loss": 1.3236, "step": 1984 }, { "epoch": 0.6238294363625655, "grad_norm": 0.73046875, "learning_rate": 1.768200571247223e-05, "loss": 1.3541, "step": 1986 }, { "epoch": 0.6244576633881069, "grad_norm": 0.84765625, "learning_rate": 1.7679466835925106e-05, "loss": 1.2746, "step": 1988 }, { "epoch": 0.6250858904136483, "grad_norm": 0.86328125, "learning_rate": 1.7676927959377977e-05, "loss": 1.3703, "step": 1990 }, { "epoch": 0.6257141174391896, "grad_norm": 0.80859375, "learning_rate": 1.767438908283085e-05, "loss": 1.2673, "step": 1992 }, { "epoch": 0.6263423444647309, "grad_norm": 0.88671875, "learning_rate": 1.767185020628372e-05, "loss": 1.2734, "step": 1994 }, { "epoch": 0.6269705714902722, "grad_norm": 0.8125, "learning_rate": 1.7669311329736595e-05, "loss": 1.2994, "step": 1996 }, { "epoch": 0.6275987985158137, "grad_norm": 0.84765625, "learning_rate": 1.7666772453189466e-05, "loss": 1.2314, "step": 1998 }, { "epoch": 0.628227025541355, "grad_norm": 0.71875, "learning_rate": 1.7664233576642337e-05, "loss": 1.3692, "step": 2000 }, { "epoch": 0.6288552525668963, "grad_norm": 0.703125, "learning_rate": 1.766169470009521e-05, "loss": 1.1083, "step": 2002 }, { "epoch": 0.6294834795924377, "grad_norm": 0.71875, "learning_rate": 1.7659155823548083e-05, "loss": 1.3513, "step": 2004 }, { "epoch": 0.6301117066179791, "grad_norm": 0.71875, "learning_rate": 1.765661694700095e-05, "loss": 1.2768, "step": 2006 }, { "epoch": 0.6307399336435204, "grad_norm": 0.77734375, "learning_rate": 1.7654078070453826e-05, "loss": 1.399, "step": 2008 }, { "epoch": 0.6313681606690618, "grad_norm": 0.7734375, "learning_rate": 1.7651539193906698e-05, "loss": 1.3596, "step": 2010 }, { "epoch": 0.6319963876946031, "grad_norm": 0.99609375, "learning_rate": 1.764900031735957e-05, "loss": 1.3298, "step": 2012 }, { "epoch": 0.6326246147201445, "grad_norm": 0.81640625, "learning_rate": 1.764646144081244e-05, "loss": 1.3194, "step": 2014 }, { "epoch": 0.6332528417456859, "grad_norm": 0.78125, "learning_rate": 1.7643922564265315e-05, "loss": 1.2478, "step": 2016 }, { "epoch": 0.6338810687712272, "grad_norm": 0.78125, "learning_rate": 1.7641383687718187e-05, "loss": 1.285, "step": 2018 }, { "epoch": 0.6345092957967685, "grad_norm": 0.75, "learning_rate": 1.7638844811171058e-05, "loss": 1.4251, "step": 2020 }, { "epoch": 0.63513752282231, "grad_norm": 0.97265625, "learning_rate": 1.763630593462393e-05, "loss": 1.281, "step": 2022 }, { "epoch": 0.6357657498478513, "grad_norm": 0.859375, "learning_rate": 1.7633767058076804e-05, "loss": 1.3546, "step": 2024 }, { "epoch": 0.6363939768733926, "grad_norm": 0.6796875, "learning_rate": 1.7631228181529672e-05, "loss": 1.2134, "step": 2026 }, { "epoch": 0.6370222038989339, "grad_norm": 0.7734375, "learning_rate": 1.7628689304982547e-05, "loss": 1.352, "step": 2028 }, { "epoch": 0.6376504309244754, "grad_norm": 0.69140625, "learning_rate": 1.7626150428435418e-05, "loss": 1.3923, "step": 2030 }, { "epoch": 0.6382786579500167, "grad_norm": 0.69140625, "learning_rate": 1.762361155188829e-05, "loss": 1.3658, "step": 2032 }, { "epoch": 0.638906884975558, "grad_norm": 0.96875, "learning_rate": 1.762107267534116e-05, "loss": 1.2421, "step": 2034 }, { "epoch": 0.6395351120010994, "grad_norm": 0.71875, "learning_rate": 1.7618533798794036e-05, "loss": 1.3427, "step": 2036 }, { "epoch": 0.6401633390266408, "grad_norm": 0.8515625, "learning_rate": 1.7615994922246907e-05, "loss": 1.3342, "step": 2038 }, { "epoch": 0.6407915660521821, "grad_norm": 0.7578125, "learning_rate": 1.761345604569978e-05, "loss": 1.4221, "step": 2040 }, { "epoch": 0.6414197930777235, "grad_norm": 0.73046875, "learning_rate": 1.761091716915265e-05, "loss": 1.3898, "step": 2042 }, { "epoch": 0.6420480201032648, "grad_norm": 0.703125, "learning_rate": 1.7608378292605525e-05, "loss": 1.5576, "step": 2044 }, { "epoch": 0.6426762471288062, "grad_norm": 0.79296875, "learning_rate": 1.7605839416058396e-05, "loss": 1.3117, "step": 2046 }, { "epoch": 0.6433044741543475, "grad_norm": 0.76171875, "learning_rate": 1.7603300539511267e-05, "loss": 1.2932, "step": 2048 }, { "epoch": 0.6439327011798889, "grad_norm": 0.7734375, "learning_rate": 1.7600761662964142e-05, "loss": 1.2463, "step": 2050 }, { "epoch": 0.6445609282054302, "grad_norm": 0.703125, "learning_rate": 1.759822278641701e-05, "loss": 1.3657, "step": 2052 }, { "epoch": 0.6451891552309716, "grad_norm": 0.7734375, "learning_rate": 1.7595683909869885e-05, "loss": 1.4386, "step": 2054 }, { "epoch": 0.645817382256513, "grad_norm": 0.80078125, "learning_rate": 1.7593145033322756e-05, "loss": 1.3022, "step": 2056 }, { "epoch": 0.6464456092820543, "grad_norm": 1.0546875, "learning_rate": 1.7590606156775628e-05, "loss": 1.5185, "step": 2058 }, { "epoch": 0.6470738363075956, "grad_norm": 0.890625, "learning_rate": 1.75880672802285e-05, "loss": 1.1322, "step": 2060 }, { "epoch": 0.647702063333137, "grad_norm": 0.671875, "learning_rate": 1.7585528403681374e-05, "loss": 1.3653, "step": 2062 }, { "epoch": 0.6483302903586784, "grad_norm": 0.81640625, "learning_rate": 1.7582989527134245e-05, "loss": 1.2649, "step": 2064 }, { "epoch": 0.6489585173842197, "grad_norm": 0.7578125, "learning_rate": 1.7580450650587117e-05, "loss": 1.4218, "step": 2066 }, { "epoch": 0.649586744409761, "grad_norm": 0.84375, "learning_rate": 1.7577911774039988e-05, "loss": 1.42, "step": 2068 }, { "epoch": 0.6502149714353024, "grad_norm": 0.83984375, "learning_rate": 1.7575372897492863e-05, "loss": 1.2207, "step": 2070 }, { "epoch": 0.6508431984608438, "grad_norm": 0.73046875, "learning_rate": 1.7572834020945734e-05, "loss": 1.3358, "step": 2072 }, { "epoch": 0.6514714254863851, "grad_norm": 0.921875, "learning_rate": 1.7570295144398606e-05, "loss": 1.2739, "step": 2074 }, { "epoch": 0.6520996525119265, "grad_norm": 0.8125, "learning_rate": 1.7567756267851477e-05, "loss": 1.3296, "step": 2076 }, { "epoch": 0.6527278795374678, "grad_norm": 0.76171875, "learning_rate": 1.756521739130435e-05, "loss": 1.3499, "step": 2078 }, { "epoch": 0.6533561065630092, "grad_norm": 0.78515625, "learning_rate": 1.756267851475722e-05, "loss": 1.3431, "step": 2080 }, { "epoch": 0.6539843335885506, "grad_norm": 0.6796875, "learning_rate": 1.7560139638210094e-05, "loss": 1.4784, "step": 2082 }, { "epoch": 0.6546125606140919, "grad_norm": 0.7265625, "learning_rate": 1.7557600761662966e-05, "loss": 1.2925, "step": 2084 }, { "epoch": 0.6552407876396332, "grad_norm": 0.828125, "learning_rate": 1.7555061885115837e-05, "loss": 1.2877, "step": 2086 }, { "epoch": 0.6558690146651747, "grad_norm": 0.74609375, "learning_rate": 1.755252300856871e-05, "loss": 1.3971, "step": 2088 }, { "epoch": 0.656497241690716, "grad_norm": 0.69921875, "learning_rate": 1.7549984132021583e-05, "loss": 1.2842, "step": 2090 }, { "epoch": 0.6571254687162573, "grad_norm": 0.734375, "learning_rate": 1.7547445255474455e-05, "loss": 1.1632, "step": 2092 }, { "epoch": 0.6577536957417986, "grad_norm": 0.828125, "learning_rate": 1.7544906378927326e-05, "loss": 1.3903, "step": 2094 }, { "epoch": 0.6583819227673401, "grad_norm": 0.7421875, "learning_rate": 1.7542367502380198e-05, "loss": 1.4009, "step": 2096 }, { "epoch": 0.6590101497928814, "grad_norm": 0.75390625, "learning_rate": 1.7539828625833072e-05, "loss": 1.353, "step": 2098 }, { "epoch": 0.6596383768184227, "grad_norm": 0.7421875, "learning_rate": 1.753728974928594e-05, "loss": 1.4069, "step": 2100 }, { "epoch": 0.6602666038439641, "grad_norm": 0.828125, "learning_rate": 1.7534750872738815e-05, "loss": 1.2694, "step": 2102 }, { "epoch": 0.6608948308695055, "grad_norm": 0.6796875, "learning_rate": 1.7532211996191686e-05, "loss": 1.2923, "step": 2104 }, { "epoch": 0.6615230578950468, "grad_norm": 0.75390625, "learning_rate": 1.7529673119644558e-05, "loss": 1.2756, "step": 2106 }, { "epoch": 0.6621512849205882, "grad_norm": 0.7421875, "learning_rate": 1.752713424309743e-05, "loss": 1.4151, "step": 2108 }, { "epoch": 0.6627795119461295, "grad_norm": 0.71875, "learning_rate": 1.7524595366550304e-05, "loss": 1.3067, "step": 2110 }, { "epoch": 0.6634077389716709, "grad_norm": 0.70703125, "learning_rate": 1.7522056490003175e-05, "loss": 1.3295, "step": 2112 }, { "epoch": 0.6640359659972123, "grad_norm": 0.7421875, "learning_rate": 1.7519517613456047e-05, "loss": 1.3994, "step": 2114 }, { "epoch": 0.6646641930227536, "grad_norm": 0.79296875, "learning_rate": 1.7516978736908918e-05, "loss": 1.3512, "step": 2116 }, { "epoch": 0.6652924200482949, "grad_norm": 0.71484375, "learning_rate": 1.7514439860361793e-05, "loss": 1.2393, "step": 2118 }, { "epoch": 0.6659206470738364, "grad_norm": 0.7734375, "learning_rate": 1.751190098381466e-05, "loss": 1.2977, "step": 2120 }, { "epoch": 0.6665488740993777, "grad_norm": 0.73828125, "learning_rate": 1.7509362107267536e-05, "loss": 1.4039, "step": 2122 }, { "epoch": 0.667177101124919, "grad_norm": 0.7265625, "learning_rate": 1.7506823230720407e-05, "loss": 1.3294, "step": 2124 }, { "epoch": 0.6678053281504603, "grad_norm": 0.69921875, "learning_rate": 1.750428435417328e-05, "loss": 1.2816, "step": 2126 }, { "epoch": 0.6684335551760018, "grad_norm": 0.75, "learning_rate": 1.750174547762615e-05, "loss": 1.3298, "step": 2128 }, { "epoch": 0.6690617822015431, "grad_norm": 0.6796875, "learning_rate": 1.7499206601079025e-05, "loss": 1.3823, "step": 2130 }, { "epoch": 0.6696900092270844, "grad_norm": 0.72265625, "learning_rate": 1.7496667724531896e-05, "loss": 1.2973, "step": 2132 }, { "epoch": 0.6703182362526258, "grad_norm": 0.67578125, "learning_rate": 1.7494128847984767e-05, "loss": 1.3873, "step": 2134 }, { "epoch": 0.6709464632781671, "grad_norm": 0.71484375, "learning_rate": 1.7491589971437642e-05, "loss": 1.3746, "step": 2136 }, { "epoch": 0.6715746903037085, "grad_norm": 0.71875, "learning_rate": 1.7489051094890514e-05, "loss": 1.2803, "step": 2138 }, { "epoch": 0.6722029173292499, "grad_norm": 0.78515625, "learning_rate": 1.7486512218343385e-05, "loss": 1.3632, "step": 2140 }, { "epoch": 0.6728311443547912, "grad_norm": 0.75, "learning_rate": 1.7483973341796256e-05, "loss": 1.3377, "step": 2142 }, { "epoch": 0.6734593713803325, "grad_norm": 0.69921875, "learning_rate": 1.748143446524913e-05, "loss": 1.2896, "step": 2144 }, { "epoch": 0.674087598405874, "grad_norm": 0.890625, "learning_rate": 1.7478895588702e-05, "loss": 1.2543, "step": 2146 }, { "epoch": 0.6747158254314153, "grad_norm": 0.87109375, "learning_rate": 1.7476356712154874e-05, "loss": 1.2882, "step": 2148 }, { "epoch": 0.6753440524569566, "grad_norm": 0.86328125, "learning_rate": 1.7473817835607745e-05, "loss": 1.3234, "step": 2150 }, { "epoch": 0.6759722794824979, "grad_norm": 0.75390625, "learning_rate": 1.7471278959060617e-05, "loss": 1.2965, "step": 2152 }, { "epoch": 0.6766005065080394, "grad_norm": 0.67578125, "learning_rate": 1.7468740082513488e-05, "loss": 1.4172, "step": 2154 }, { "epoch": 0.6772287335335807, "grad_norm": 0.73828125, "learning_rate": 1.7466201205966363e-05, "loss": 1.3369, "step": 2156 }, { "epoch": 0.677856960559122, "grad_norm": 0.7265625, "learning_rate": 1.7463662329419234e-05, "loss": 1.3239, "step": 2158 }, { "epoch": 0.6784851875846634, "grad_norm": 0.7265625, "learning_rate": 1.7461123452872105e-05, "loss": 1.2926, "step": 2160 }, { "epoch": 0.6791134146102048, "grad_norm": 0.83203125, "learning_rate": 1.7458584576324977e-05, "loss": 1.3588, "step": 2162 }, { "epoch": 0.6797416416357461, "grad_norm": 0.87109375, "learning_rate": 1.745604569977785e-05, "loss": 1.1972, "step": 2164 }, { "epoch": 0.6803698686612875, "grad_norm": 0.71484375, "learning_rate": 1.7453506823230723e-05, "loss": 1.2391, "step": 2166 }, { "epoch": 0.6809980956868288, "grad_norm": 0.82421875, "learning_rate": 1.7450967946683594e-05, "loss": 1.3438, "step": 2168 }, { "epoch": 0.6816263227123702, "grad_norm": 0.72265625, "learning_rate": 1.7448429070136466e-05, "loss": 1.4117, "step": 2170 }, { "epoch": 0.6822545497379116, "grad_norm": 0.79296875, "learning_rate": 1.7445890193589337e-05, "loss": 1.388, "step": 2172 }, { "epoch": 0.6828827767634529, "grad_norm": 0.6875, "learning_rate": 1.744335131704221e-05, "loss": 1.3602, "step": 2174 }, { "epoch": 0.6835110037889942, "grad_norm": 1.0546875, "learning_rate": 1.7440812440495083e-05, "loss": 1.2999, "step": 2176 }, { "epoch": 0.6841392308145356, "grad_norm": 0.828125, "learning_rate": 1.7438273563947955e-05, "loss": 1.3296, "step": 2178 }, { "epoch": 0.684767457840077, "grad_norm": 0.78125, "learning_rate": 1.7435734687400826e-05, "loss": 1.302, "step": 2180 }, { "epoch": 0.6853956848656183, "grad_norm": 0.73046875, "learning_rate": 1.7433195810853697e-05, "loss": 1.321, "step": 2182 }, { "epoch": 0.6860239118911596, "grad_norm": 0.78515625, "learning_rate": 1.7430656934306572e-05, "loss": 1.3628, "step": 2184 }, { "epoch": 0.6866521389167011, "grad_norm": 0.8671875, "learning_rate": 1.7428118057759444e-05, "loss": 1.3183, "step": 2186 }, { "epoch": 0.6872803659422424, "grad_norm": 0.921875, "learning_rate": 1.7425579181212315e-05, "loss": 1.4956, "step": 2188 }, { "epoch": 0.6879085929677837, "grad_norm": 0.7265625, "learning_rate": 1.7423040304665186e-05, "loss": 1.4264, "step": 2190 }, { "epoch": 0.688536819993325, "grad_norm": 0.765625, "learning_rate": 1.742050142811806e-05, "loss": 1.3176, "step": 2192 }, { "epoch": 0.6891650470188665, "grad_norm": 0.78515625, "learning_rate": 1.741796255157093e-05, "loss": 1.3268, "step": 2194 }, { "epoch": 0.6897932740444078, "grad_norm": 1.046875, "learning_rate": 1.7415423675023804e-05, "loss": 1.346, "step": 2196 }, { "epoch": 0.6904215010699492, "grad_norm": 0.80078125, "learning_rate": 1.7412884798476675e-05, "loss": 1.3614, "step": 2198 }, { "epoch": 0.6910497280954905, "grad_norm": 0.7265625, "learning_rate": 1.7410345921929547e-05, "loss": 1.2779, "step": 2200 }, { "epoch": 0.6916779551210319, "grad_norm": 0.7265625, "learning_rate": 1.7407807045382418e-05, "loss": 1.2913, "step": 2202 }, { "epoch": 0.6923061821465732, "grad_norm": 0.86328125, "learning_rate": 1.7405268168835293e-05, "loss": 1.3669, "step": 2204 }, { "epoch": 0.6929344091721146, "grad_norm": 0.71484375, "learning_rate": 1.7402729292288164e-05, "loss": 1.2992, "step": 2206 }, { "epoch": 0.6935626361976559, "grad_norm": 0.80078125, "learning_rate": 1.7400190415741036e-05, "loss": 1.3157, "step": 2208 }, { "epoch": 0.6941908632231972, "grad_norm": 0.828125, "learning_rate": 1.7397651539193907e-05, "loss": 1.1712, "step": 2210 }, { "epoch": 0.6948190902487387, "grad_norm": 0.78515625, "learning_rate": 1.7395112662646782e-05, "loss": 1.1813, "step": 2212 }, { "epoch": 0.69544731727428, "grad_norm": 0.76171875, "learning_rate": 1.739257378609965e-05, "loss": 1.3688, "step": 2214 }, { "epoch": 0.6960755442998213, "grad_norm": 0.9453125, "learning_rate": 1.7390034909552525e-05, "loss": 1.3554, "step": 2216 }, { "epoch": 0.6967037713253627, "grad_norm": 0.87890625, "learning_rate": 1.7387496033005396e-05, "loss": 1.3605, "step": 2218 }, { "epoch": 0.6973319983509041, "grad_norm": 0.890625, "learning_rate": 1.7384957156458267e-05, "loss": 1.2138, "step": 2220 }, { "epoch": 0.6979602253764454, "grad_norm": 0.8515625, "learning_rate": 1.7382418279911142e-05, "loss": 1.309, "step": 2222 }, { "epoch": 0.6985884524019867, "grad_norm": 0.78515625, "learning_rate": 1.7379879403364013e-05, "loss": 1.2578, "step": 2224 }, { "epoch": 0.6992166794275281, "grad_norm": 0.78515625, "learning_rate": 1.7377340526816885e-05, "loss": 1.3457, "step": 2226 }, { "epoch": 0.6998449064530695, "grad_norm": 0.7890625, "learning_rate": 1.7374801650269756e-05, "loss": 1.2938, "step": 2228 }, { "epoch": 0.7004731334786108, "grad_norm": 0.8984375, "learning_rate": 1.737226277372263e-05, "loss": 1.284, "step": 2230 }, { "epoch": 0.7011013605041522, "grad_norm": 0.72265625, "learning_rate": 1.7369723897175502e-05, "loss": 1.415, "step": 2232 }, { "epoch": 0.7017295875296935, "grad_norm": 0.94921875, "learning_rate": 1.7367185020628374e-05, "loss": 1.2291, "step": 2234 }, { "epoch": 0.7023578145552349, "grad_norm": 0.74609375, "learning_rate": 1.7364646144081245e-05, "loss": 1.374, "step": 2236 }, { "epoch": 0.7029860415807763, "grad_norm": 0.6953125, "learning_rate": 1.736210726753412e-05, "loss": 1.4697, "step": 2238 }, { "epoch": 0.7036142686063176, "grad_norm": 0.71484375, "learning_rate": 1.7359568390986988e-05, "loss": 1.2784, "step": 2240 }, { "epoch": 0.7042424956318589, "grad_norm": 0.73828125, "learning_rate": 1.7357029514439863e-05, "loss": 1.2381, "step": 2242 }, { "epoch": 0.7048707226574004, "grad_norm": 0.78125, "learning_rate": 1.7354490637892734e-05, "loss": 1.2173, "step": 2244 }, { "epoch": 0.7054989496829417, "grad_norm": 0.77734375, "learning_rate": 1.7351951761345605e-05, "loss": 1.2839, "step": 2246 }, { "epoch": 0.706127176708483, "grad_norm": 0.6953125, "learning_rate": 1.7349412884798477e-05, "loss": 1.3768, "step": 2248 }, { "epoch": 0.7067554037340243, "grad_norm": 0.81640625, "learning_rate": 1.734687400825135e-05, "loss": 1.3607, "step": 2250 }, { "epoch": 0.7073836307595658, "grad_norm": 0.703125, "learning_rate": 1.7344335131704223e-05, "loss": 1.3943, "step": 2252 }, { "epoch": 0.7080118577851071, "grad_norm": 0.6875, "learning_rate": 1.7341796255157094e-05, "loss": 1.4092, "step": 2254 }, { "epoch": 0.7086400848106484, "grad_norm": 0.75, "learning_rate": 1.7339257378609966e-05, "loss": 1.2429, "step": 2256 }, { "epoch": 0.7092683118361898, "grad_norm": 0.7109375, "learning_rate": 1.733671850206284e-05, "loss": 1.458, "step": 2258 }, { "epoch": 0.7098965388617312, "grad_norm": 0.67578125, "learning_rate": 1.7334179625515712e-05, "loss": 1.3227, "step": 2260 }, { "epoch": 0.7105247658872725, "grad_norm": 0.79296875, "learning_rate": 1.7331640748968583e-05, "loss": 1.4453, "step": 2262 }, { "epoch": 0.7111529929128139, "grad_norm": 0.74609375, "learning_rate": 1.7329101872421455e-05, "loss": 1.2725, "step": 2264 }, { "epoch": 0.7117812199383552, "grad_norm": 0.74609375, "learning_rate": 1.7326562995874326e-05, "loss": 1.2165, "step": 2266 }, { "epoch": 0.7124094469638966, "grad_norm": 0.76171875, "learning_rate": 1.7324024119327197e-05, "loss": 1.4287, "step": 2268 }, { "epoch": 0.713037673989438, "grad_norm": 0.6484375, "learning_rate": 1.7321485242780072e-05, "loss": 1.2783, "step": 2270 }, { "epoch": 0.7136659010149793, "grad_norm": 0.6875, "learning_rate": 1.7318946366232944e-05, "loss": 1.3641, "step": 2272 }, { "epoch": 0.7142941280405206, "grad_norm": 0.7109375, "learning_rate": 1.7316407489685815e-05, "loss": 1.3313, "step": 2274 }, { "epoch": 0.7149223550660619, "grad_norm": 0.68359375, "learning_rate": 1.7313868613138686e-05, "loss": 1.3461, "step": 2276 }, { "epoch": 0.7155505820916034, "grad_norm": 0.6953125, "learning_rate": 1.731132973659156e-05, "loss": 1.3159, "step": 2278 }, { "epoch": 0.7161788091171447, "grad_norm": 0.765625, "learning_rate": 1.7308790860044432e-05, "loss": 1.2575, "step": 2280 }, { "epoch": 0.716807036142686, "grad_norm": 0.65625, "learning_rate": 1.7306251983497304e-05, "loss": 1.3816, "step": 2282 }, { "epoch": 0.7174352631682274, "grad_norm": 0.71875, "learning_rate": 1.7303713106950175e-05, "loss": 1.4548, "step": 2284 }, { "epoch": 0.7180634901937688, "grad_norm": 0.83984375, "learning_rate": 1.730117423040305e-05, "loss": 1.2777, "step": 2286 }, { "epoch": 0.7186917172193101, "grad_norm": 0.7421875, "learning_rate": 1.7298635353855918e-05, "loss": 1.3142, "step": 2288 }, { "epoch": 0.7193199442448515, "grad_norm": 0.7890625, "learning_rate": 1.7296096477308793e-05, "loss": 1.2618, "step": 2290 }, { "epoch": 0.7199481712703928, "grad_norm": 0.70703125, "learning_rate": 1.7293557600761664e-05, "loss": 1.3586, "step": 2292 }, { "epoch": 0.7205763982959342, "grad_norm": 0.77734375, "learning_rate": 1.7291018724214536e-05, "loss": 1.2284, "step": 2294 }, { "epoch": 0.7212046253214756, "grad_norm": 0.76953125, "learning_rate": 1.7288479847667407e-05, "loss": 1.3143, "step": 2296 }, { "epoch": 0.7218328523470169, "grad_norm": 0.73828125, "learning_rate": 1.728594097112028e-05, "loss": 1.2988, "step": 2298 }, { "epoch": 0.7224610793725582, "grad_norm": 0.78125, "learning_rate": 1.7283402094573153e-05, "loss": 1.3754, "step": 2300 }, { "epoch": 0.7230893063980997, "grad_norm": 0.69140625, "learning_rate": 1.7280863218026024e-05, "loss": 1.3633, "step": 2302 }, { "epoch": 0.723717533423641, "grad_norm": 0.71875, "learning_rate": 1.7278324341478896e-05, "loss": 1.4773, "step": 2304 }, { "epoch": 0.7243457604491823, "grad_norm": 0.77734375, "learning_rate": 1.727578546493177e-05, "loss": 1.351, "step": 2306 }, { "epoch": 0.7249739874747236, "grad_norm": 0.73828125, "learning_rate": 1.7273246588384642e-05, "loss": 1.3577, "step": 2308 }, { "epoch": 0.7256022145002651, "grad_norm": 0.81640625, "learning_rate": 1.7270707711837513e-05, "loss": 1.2883, "step": 2310 }, { "epoch": 0.7262304415258064, "grad_norm": 0.67578125, "learning_rate": 1.7268168835290388e-05, "loss": 1.4049, "step": 2312 }, { "epoch": 0.7268586685513477, "grad_norm": 0.6796875, "learning_rate": 1.7265629958743256e-05, "loss": 1.3443, "step": 2314 }, { "epoch": 0.7274868955768891, "grad_norm": 0.90234375, "learning_rate": 1.726309108219613e-05, "loss": 1.2131, "step": 2316 }, { "epoch": 0.7281151226024305, "grad_norm": 0.71875, "learning_rate": 1.7260552205649002e-05, "loss": 1.353, "step": 2318 }, { "epoch": 0.7287433496279718, "grad_norm": 0.73828125, "learning_rate": 1.7258013329101874e-05, "loss": 1.2911, "step": 2320 }, { "epoch": 0.7293715766535132, "grad_norm": 0.90625, "learning_rate": 1.7255474452554745e-05, "loss": 1.3567, "step": 2322 }, { "epoch": 0.7299998036790545, "grad_norm": 0.70703125, "learning_rate": 1.725293557600762e-05, "loss": 1.3589, "step": 2324 }, { "epoch": 0.7306280307045959, "grad_norm": 0.87109375, "learning_rate": 1.725039669946049e-05, "loss": 1.3734, "step": 2326 }, { "epoch": 0.7312562577301372, "grad_norm": 0.6484375, "learning_rate": 1.7247857822913363e-05, "loss": 1.3007, "step": 2328 }, { "epoch": 0.7318844847556786, "grad_norm": 0.80859375, "learning_rate": 1.7245318946366234e-05, "loss": 1.3652, "step": 2330 }, { "epoch": 0.7325127117812199, "grad_norm": 0.72265625, "learning_rate": 1.724278006981911e-05, "loss": 1.3929, "step": 2332 }, { "epoch": 0.7331409388067613, "grad_norm": 0.6640625, "learning_rate": 1.7240241193271977e-05, "loss": 1.2893, "step": 2334 }, { "epoch": 0.7337691658323027, "grad_norm": 0.77734375, "learning_rate": 1.723770231672485e-05, "loss": 1.4986, "step": 2336 }, { "epoch": 0.734397392857844, "grad_norm": 0.6875, "learning_rate": 1.7235163440177723e-05, "loss": 1.3224, "step": 2338 }, { "epoch": 0.7350256198833853, "grad_norm": 0.77734375, "learning_rate": 1.7232624563630594e-05, "loss": 1.422, "step": 2340 }, { "epoch": 0.7356538469089268, "grad_norm": 0.703125, "learning_rate": 1.7230085687083466e-05, "loss": 1.4021, "step": 2342 }, { "epoch": 0.7362820739344681, "grad_norm": 0.67578125, "learning_rate": 1.722754681053634e-05, "loss": 1.3948, "step": 2344 }, { "epoch": 0.7369103009600094, "grad_norm": 0.73046875, "learning_rate": 1.7225007933989212e-05, "loss": 1.2958, "step": 2346 }, { "epoch": 0.7375385279855508, "grad_norm": 0.734375, "learning_rate": 1.7222469057442083e-05, "loss": 1.2972, "step": 2348 }, { "epoch": 0.7381667550110921, "grad_norm": 0.68359375, "learning_rate": 1.7219930180894955e-05, "loss": 1.3356, "step": 2350 }, { "epoch": 0.7387949820366335, "grad_norm": 0.82421875, "learning_rate": 1.721739130434783e-05, "loss": 1.2247, "step": 2352 }, { "epoch": 0.7394232090621748, "grad_norm": 0.70703125, "learning_rate": 1.7214852427800697e-05, "loss": 1.3243, "step": 2354 }, { "epoch": 0.7400514360877162, "grad_norm": 0.7265625, "learning_rate": 1.7212313551253572e-05, "loss": 1.4064, "step": 2356 }, { "epoch": 0.7406796631132575, "grad_norm": 0.77734375, "learning_rate": 1.7209774674706443e-05, "loss": 1.4806, "step": 2358 }, { "epoch": 0.7413078901387989, "grad_norm": 0.85546875, "learning_rate": 1.7207235798159315e-05, "loss": 1.3769, "step": 2360 }, { "epoch": 0.7419361171643403, "grad_norm": 0.71875, "learning_rate": 1.7204696921612186e-05, "loss": 1.2256, "step": 2362 }, { "epoch": 0.7425643441898816, "grad_norm": 0.78125, "learning_rate": 1.720215804506506e-05, "loss": 1.389, "step": 2364 }, { "epoch": 0.7431925712154229, "grad_norm": 0.6796875, "learning_rate": 1.7199619168517932e-05, "loss": 1.4362, "step": 2366 }, { "epoch": 0.7438207982409644, "grad_norm": 0.8984375, "learning_rate": 1.7197080291970804e-05, "loss": 1.4191, "step": 2368 }, { "epoch": 0.7444490252665057, "grad_norm": 0.7265625, "learning_rate": 1.7194541415423675e-05, "loss": 1.3115, "step": 2370 }, { "epoch": 0.745077252292047, "grad_norm": 0.7578125, "learning_rate": 1.719200253887655e-05, "loss": 1.4019, "step": 2372 }, { "epoch": 0.7457054793175883, "grad_norm": 0.734375, "learning_rate": 1.718946366232942e-05, "loss": 1.3587, "step": 2374 }, { "epoch": 0.7463337063431298, "grad_norm": 0.87109375, "learning_rate": 1.7186924785782293e-05, "loss": 1.3749, "step": 2376 }, { "epoch": 0.7469619333686711, "grad_norm": 0.6875, "learning_rate": 1.7184385909235164e-05, "loss": 1.3042, "step": 2378 }, { "epoch": 0.7475901603942124, "grad_norm": 0.73828125, "learning_rate": 1.7181847032688035e-05, "loss": 1.2356, "step": 2380 }, { "epoch": 0.7482183874197538, "grad_norm": 0.7734375, "learning_rate": 1.7179308156140907e-05, "loss": 1.2864, "step": 2382 }, { "epoch": 0.7488466144452952, "grad_norm": 0.69921875, "learning_rate": 1.717676927959378e-05, "loss": 1.3995, "step": 2384 }, { "epoch": 0.7494748414708365, "grad_norm": 0.78125, "learning_rate": 1.7174230403046653e-05, "loss": 1.2924, "step": 2386 }, { "epoch": 0.7501030684963779, "grad_norm": 0.81640625, "learning_rate": 1.7171691526499524e-05, "loss": 1.2801, "step": 2388 }, { "epoch": 0.7507312955219192, "grad_norm": 0.7890625, "learning_rate": 1.7169152649952396e-05, "loss": 1.2726, "step": 2390 }, { "epoch": 0.7513595225474606, "grad_norm": 0.734375, "learning_rate": 1.716661377340527e-05, "loss": 1.35, "step": 2392 }, { "epoch": 0.751987749573002, "grad_norm": 0.796875, "learning_rate": 1.7164074896858142e-05, "loss": 1.2783, "step": 2394 }, { "epoch": 0.7526159765985433, "grad_norm": 0.78515625, "learning_rate": 1.7161536020311013e-05, "loss": 1.3665, "step": 2396 }, { "epoch": 0.7532442036240846, "grad_norm": 0.98046875, "learning_rate": 1.7158997143763888e-05, "loss": 1.3679, "step": 2398 }, { "epoch": 0.7538724306496261, "grad_norm": 0.78515625, "learning_rate": 1.715645826721676e-05, "loss": 1.3874, "step": 2400 }, { "epoch": 0.7545006576751674, "grad_norm": 0.75390625, "learning_rate": 1.715391939066963e-05, "loss": 1.2631, "step": 2402 }, { "epoch": 0.7551288847007087, "grad_norm": 0.796875, "learning_rate": 1.7151380514122502e-05, "loss": 1.2159, "step": 2404 }, { "epoch": 0.75575711172625, "grad_norm": 0.74609375, "learning_rate": 1.7148841637575374e-05, "loss": 1.3067, "step": 2406 }, { "epoch": 0.7563853387517915, "grad_norm": 0.7109375, "learning_rate": 1.7146302761028245e-05, "loss": 1.3503, "step": 2408 }, { "epoch": 0.7570135657773328, "grad_norm": 0.7421875, "learning_rate": 1.714376388448112e-05, "loss": 1.369, "step": 2410 }, { "epoch": 0.7576417928028741, "grad_norm": 0.88671875, "learning_rate": 1.714122500793399e-05, "loss": 1.2634, "step": 2412 }, { "epoch": 0.7582700198284155, "grad_norm": 0.75390625, "learning_rate": 1.7138686131386862e-05, "loss": 1.2631, "step": 2414 }, { "epoch": 0.7588982468539569, "grad_norm": 0.72265625, "learning_rate": 1.7136147254839734e-05, "loss": 1.33, "step": 2416 }, { "epoch": 0.7595264738794982, "grad_norm": 0.72265625, "learning_rate": 1.713360837829261e-05, "loss": 1.3229, "step": 2418 }, { "epoch": 0.7601547009050396, "grad_norm": 1.1640625, "learning_rate": 1.713106950174548e-05, "loss": 1.2857, "step": 2420 }, { "epoch": 0.7607829279305809, "grad_norm": 0.875, "learning_rate": 1.712853062519835e-05, "loss": 1.3812, "step": 2422 }, { "epoch": 0.7614111549561222, "grad_norm": 0.6953125, "learning_rate": 1.7125991748651223e-05, "loss": 1.3809, "step": 2424 }, { "epoch": 0.7620393819816637, "grad_norm": 0.7890625, "learning_rate": 1.7123452872104098e-05, "loss": 1.3366, "step": 2426 }, { "epoch": 0.762667609007205, "grad_norm": 0.77734375, "learning_rate": 1.7120913995556966e-05, "loss": 1.3628, "step": 2428 }, { "epoch": 0.7632958360327463, "grad_norm": 0.8046875, "learning_rate": 1.711837511900984e-05, "loss": 1.3394, "step": 2430 }, { "epoch": 0.7639240630582876, "grad_norm": 0.7265625, "learning_rate": 1.7115836242462712e-05, "loss": 1.4378, "step": 2432 }, { "epoch": 0.7645522900838291, "grad_norm": 0.7890625, "learning_rate": 1.7113297365915583e-05, "loss": 1.1978, "step": 2434 }, { "epoch": 0.7651805171093704, "grad_norm": 0.75, "learning_rate": 1.7110758489368454e-05, "loss": 1.2939, "step": 2436 }, { "epoch": 0.7658087441349117, "grad_norm": 0.7109375, "learning_rate": 1.710821961282133e-05, "loss": 1.3248, "step": 2438 }, { "epoch": 0.7664369711604531, "grad_norm": 0.7578125, "learning_rate": 1.71056807362742e-05, "loss": 1.2087, "step": 2440 }, { "epoch": 0.7670651981859945, "grad_norm": 0.81640625, "learning_rate": 1.7103141859727072e-05, "loss": 1.1633, "step": 2442 }, { "epoch": 0.7676934252115358, "grad_norm": 1.078125, "learning_rate": 1.7100602983179943e-05, "loss": 1.2432, "step": 2444 }, { "epoch": 0.7683216522370772, "grad_norm": 0.75390625, "learning_rate": 1.7098064106632818e-05, "loss": 1.3272, "step": 2446 }, { "epoch": 0.7689498792626185, "grad_norm": 0.78125, "learning_rate": 1.7095525230085686e-05, "loss": 1.2589, "step": 2448 }, { "epoch": 0.7695781062881599, "grad_norm": 0.71484375, "learning_rate": 1.709298635353856e-05, "loss": 1.375, "step": 2450 }, { "epoch": 0.7702063333137013, "grad_norm": 0.72265625, "learning_rate": 1.7090447476991432e-05, "loss": 1.2817, "step": 2452 }, { "epoch": 0.7708345603392426, "grad_norm": 0.75390625, "learning_rate": 1.7087908600444304e-05, "loss": 1.2879, "step": 2454 }, { "epoch": 0.7714627873647839, "grad_norm": 1.234375, "learning_rate": 1.7085369723897175e-05, "loss": 1.2573, "step": 2456 }, { "epoch": 0.7720910143903253, "grad_norm": 0.7890625, "learning_rate": 1.708283084735005e-05, "loss": 1.343, "step": 2458 }, { "epoch": 0.7727192414158667, "grad_norm": 0.7109375, "learning_rate": 1.708029197080292e-05, "loss": 1.357, "step": 2460 }, { "epoch": 0.773347468441408, "grad_norm": 0.74609375, "learning_rate": 1.7077753094255793e-05, "loss": 1.3493, "step": 2462 }, { "epoch": 0.7739756954669493, "grad_norm": 0.78515625, "learning_rate": 1.7075214217708664e-05, "loss": 1.2568, "step": 2464 }, { "epoch": 0.7746039224924908, "grad_norm": 0.73828125, "learning_rate": 1.707267534116154e-05, "loss": 1.3476, "step": 2466 }, { "epoch": 0.7752321495180321, "grad_norm": 0.76171875, "learning_rate": 1.707013646461441e-05, "loss": 1.2797, "step": 2468 }, { "epoch": 0.7758603765435734, "grad_norm": 0.75390625, "learning_rate": 1.706759758806728e-05, "loss": 1.3368, "step": 2470 }, { "epoch": 0.7764886035691148, "grad_norm": 0.671875, "learning_rate": 1.7065058711520153e-05, "loss": 1.2807, "step": 2472 }, { "epoch": 0.7771168305946562, "grad_norm": 0.71484375, "learning_rate": 1.7062519834973024e-05, "loss": 1.5012, "step": 2474 }, { "epoch": 0.7777450576201975, "grad_norm": 0.734375, "learning_rate": 1.7059980958425896e-05, "loss": 1.3204, "step": 2476 }, { "epoch": 0.7783732846457388, "grad_norm": 0.8046875, "learning_rate": 1.705744208187877e-05, "loss": 1.3475, "step": 2478 }, { "epoch": 0.7790015116712802, "grad_norm": 0.796875, "learning_rate": 1.7054903205331642e-05, "loss": 1.2051, "step": 2480 }, { "epoch": 0.7796297386968216, "grad_norm": 0.68359375, "learning_rate": 1.7052364328784513e-05, "loss": 1.3502, "step": 2482 }, { "epoch": 0.780257965722363, "grad_norm": 0.9140625, "learning_rate": 1.7049825452237388e-05, "loss": 1.2337, "step": 2484 }, { "epoch": 0.7808861927479043, "grad_norm": 0.77734375, "learning_rate": 1.704728657569026e-05, "loss": 1.3524, "step": 2486 }, { "epoch": 0.7815144197734456, "grad_norm": 0.82421875, "learning_rate": 1.704474769914313e-05, "loss": 1.3843, "step": 2488 }, { "epoch": 0.7821426467989869, "grad_norm": 0.6953125, "learning_rate": 1.7042208822596002e-05, "loss": 1.3905, "step": 2490 }, { "epoch": 0.7827708738245284, "grad_norm": 0.69921875, "learning_rate": 1.7039669946048877e-05, "loss": 1.3168, "step": 2492 }, { "epoch": 0.7833991008500697, "grad_norm": 0.79296875, "learning_rate": 1.7037131069501748e-05, "loss": 1.233, "step": 2494 }, { "epoch": 0.784027327875611, "grad_norm": 0.77734375, "learning_rate": 1.703459219295462e-05, "loss": 1.3278, "step": 2496 }, { "epoch": 0.7846555549011524, "grad_norm": 0.6953125, "learning_rate": 1.703205331640749e-05, "loss": 1.2751, "step": 2498 }, { "epoch": 0.7852837819266938, "grad_norm": 0.796875, "learning_rate": 1.7029514439860362e-05, "loss": 1.3463, "step": 2500 }, { "epoch": 0.7859120089522351, "grad_norm": 0.80859375, "learning_rate": 1.7026975563313234e-05, "loss": 1.2921, "step": 2502 }, { "epoch": 0.7865402359777764, "grad_norm": 0.71484375, "learning_rate": 1.702443668676611e-05, "loss": 1.1402, "step": 2504 }, { "epoch": 0.7871684630033178, "grad_norm": 1.125, "learning_rate": 1.702189781021898e-05, "loss": 1.2382, "step": 2506 }, { "epoch": 0.7877966900288592, "grad_norm": 0.63671875, "learning_rate": 1.701935893367185e-05, "loss": 1.3848, "step": 2508 }, { "epoch": 0.7884249170544005, "grad_norm": 0.7578125, "learning_rate": 1.7016820057124723e-05, "loss": 1.2577, "step": 2510 }, { "epoch": 0.7890531440799419, "grad_norm": 0.74609375, "learning_rate": 1.7014281180577597e-05, "loss": 1.4976, "step": 2512 }, { "epoch": 0.7896813711054832, "grad_norm": 0.65234375, "learning_rate": 1.701174230403047e-05, "loss": 1.3051, "step": 2514 }, { "epoch": 0.7903095981310246, "grad_norm": 0.75, "learning_rate": 1.700920342748334e-05, "loss": 1.3637, "step": 2516 }, { "epoch": 0.790937825156566, "grad_norm": 0.828125, "learning_rate": 1.700666455093621e-05, "loss": 1.2335, "step": 2518 }, { "epoch": 0.7915660521821073, "grad_norm": 0.73828125, "learning_rate": 1.7004125674389086e-05, "loss": 1.2534, "step": 2520 }, { "epoch": 0.7921942792076486, "grad_norm": 0.78515625, "learning_rate": 1.7001586797841954e-05, "loss": 1.4272, "step": 2522 }, { "epoch": 0.7928225062331901, "grad_norm": 0.66796875, "learning_rate": 1.699904792129483e-05, "loss": 1.2296, "step": 2524 }, { "epoch": 0.7934507332587314, "grad_norm": 0.765625, "learning_rate": 1.69965090447477e-05, "loss": 1.3799, "step": 2526 }, { "epoch": 0.7940789602842727, "grad_norm": 0.625, "learning_rate": 1.6993970168200572e-05, "loss": 1.4241, "step": 2528 }, { "epoch": 0.794707187309814, "grad_norm": 0.8125, "learning_rate": 1.6991431291653443e-05, "loss": 1.2411, "step": 2530 }, { "epoch": 0.7953354143353555, "grad_norm": 1.078125, "learning_rate": 1.6988892415106318e-05, "loss": 1.3962, "step": 2532 }, { "epoch": 0.7959636413608968, "grad_norm": 0.8828125, "learning_rate": 1.698635353855919e-05, "loss": 1.3154, "step": 2534 }, { "epoch": 0.7965918683864381, "grad_norm": 0.62890625, "learning_rate": 1.698381466201206e-05, "loss": 1.3236, "step": 2536 }, { "epoch": 0.7972200954119795, "grad_norm": 0.80859375, "learning_rate": 1.6981275785464932e-05, "loss": 1.2605, "step": 2538 }, { "epoch": 0.7978483224375209, "grad_norm": 0.7578125, "learning_rate": 1.6978736908917807e-05, "loss": 1.2216, "step": 2540 }, { "epoch": 0.7984765494630622, "grad_norm": 0.6875, "learning_rate": 1.6976198032370675e-05, "loss": 1.3394, "step": 2542 }, { "epoch": 0.7991047764886036, "grad_norm": 0.73828125, "learning_rate": 1.697365915582355e-05, "loss": 1.331, "step": 2544 }, { "epoch": 0.7997330035141449, "grad_norm": 0.72265625, "learning_rate": 1.697112027927642e-05, "loss": 1.3703, "step": 2546 }, { "epoch": 0.8003612305396863, "grad_norm": 0.828125, "learning_rate": 1.6968581402729293e-05, "loss": 1.3128, "step": 2548 }, { "epoch": 0.8009894575652277, "grad_norm": 0.8125, "learning_rate": 1.6966042526182164e-05, "loss": 1.278, "step": 2550 }, { "epoch": 0.801617684590769, "grad_norm": 0.65625, "learning_rate": 1.696350364963504e-05, "loss": 1.3876, "step": 2552 }, { "epoch": 0.8022459116163103, "grad_norm": 0.71484375, "learning_rate": 1.696096477308791e-05, "loss": 1.2858, "step": 2554 }, { "epoch": 0.8028741386418518, "grad_norm": 0.6953125, "learning_rate": 1.695842589654078e-05, "loss": 1.412, "step": 2556 }, { "epoch": 0.8035023656673931, "grad_norm": 0.7109375, "learning_rate": 1.6955887019993653e-05, "loss": 1.4499, "step": 2558 }, { "epoch": 0.8041305926929344, "grad_norm": 0.9140625, "learning_rate": 1.6953348143446528e-05, "loss": 1.291, "step": 2560 }, { "epoch": 0.8047588197184757, "grad_norm": 0.90625, "learning_rate": 1.69508092668994e-05, "loss": 1.4154, "step": 2562 }, { "epoch": 0.8053870467440171, "grad_norm": 0.82421875, "learning_rate": 1.694827039035227e-05, "loss": 1.4474, "step": 2564 }, { "epoch": 0.8060152737695585, "grad_norm": 0.79296875, "learning_rate": 1.6945731513805145e-05, "loss": 1.3263, "step": 2566 }, { "epoch": 0.8066435007950998, "grad_norm": 0.84375, "learning_rate": 1.6943192637258013e-05, "loss": 1.3238, "step": 2568 }, { "epoch": 0.8072717278206412, "grad_norm": 0.83984375, "learning_rate": 1.6940653760710888e-05, "loss": 1.4225, "step": 2570 }, { "epoch": 0.8078999548461825, "grad_norm": 0.70703125, "learning_rate": 1.693811488416376e-05, "loss": 1.2038, "step": 2572 }, { "epoch": 0.8085281818717239, "grad_norm": 0.8359375, "learning_rate": 1.693557600761663e-05, "loss": 1.1913, "step": 2574 }, { "epoch": 0.8091564088972653, "grad_norm": 0.76953125, "learning_rate": 1.6933037131069502e-05, "loss": 1.3431, "step": 2576 }, { "epoch": 0.8097846359228066, "grad_norm": 0.87890625, "learning_rate": 1.6930498254522377e-05, "loss": 1.3336, "step": 2578 }, { "epoch": 0.8104128629483479, "grad_norm": 0.87890625, "learning_rate": 1.6927959377975248e-05, "loss": 1.2205, "step": 2580 }, { "epoch": 0.8110410899738894, "grad_norm": 0.69921875, "learning_rate": 1.692542050142812e-05, "loss": 1.3004, "step": 2582 }, { "epoch": 0.8116693169994307, "grad_norm": 0.75390625, "learning_rate": 1.692288162488099e-05, "loss": 1.3125, "step": 2584 }, { "epoch": 0.812297544024972, "grad_norm": 0.6953125, "learning_rate": 1.6920342748333866e-05, "loss": 1.4572, "step": 2586 }, { "epoch": 0.8129257710505133, "grad_norm": 0.74609375, "learning_rate": 1.6917803871786737e-05, "loss": 1.2809, "step": 2588 }, { "epoch": 0.8135539980760548, "grad_norm": 0.66796875, "learning_rate": 1.691526499523961e-05, "loss": 1.2979, "step": 2590 }, { "epoch": 0.8141822251015961, "grad_norm": 0.890625, "learning_rate": 1.691272611869248e-05, "loss": 1.3751, "step": 2592 }, { "epoch": 0.8148104521271374, "grad_norm": 0.8125, "learning_rate": 1.691018724214535e-05, "loss": 1.3556, "step": 2594 }, { "epoch": 0.8154386791526788, "grad_norm": 0.734375, "learning_rate": 1.6907648365598223e-05, "loss": 1.2648, "step": 2596 }, { "epoch": 0.8160669061782202, "grad_norm": 0.77734375, "learning_rate": 1.6905109489051097e-05, "loss": 1.3499, "step": 2598 }, { "epoch": 0.8166951332037615, "grad_norm": 0.8359375, "learning_rate": 1.690257061250397e-05, "loss": 1.3424, "step": 2600 }, { "epoch": 0.8173233602293029, "grad_norm": 0.72265625, "learning_rate": 1.690003173595684e-05, "loss": 1.3746, "step": 2602 }, { "epoch": 0.8179515872548442, "grad_norm": 0.78515625, "learning_rate": 1.689749285940971e-05, "loss": 1.3152, "step": 2604 }, { "epoch": 0.8185798142803856, "grad_norm": 0.7109375, "learning_rate": 1.6894953982862586e-05, "loss": 1.3755, "step": 2606 }, { "epoch": 0.819208041305927, "grad_norm": 0.84765625, "learning_rate": 1.6892415106315458e-05, "loss": 1.2247, "step": 2608 }, { "epoch": 0.8198362683314683, "grad_norm": 0.69921875, "learning_rate": 1.688987622976833e-05, "loss": 1.4328, "step": 2610 }, { "epoch": 0.8204644953570096, "grad_norm": 0.6796875, "learning_rate": 1.68873373532212e-05, "loss": 1.2965, "step": 2612 }, { "epoch": 0.821092722382551, "grad_norm": 0.91015625, "learning_rate": 1.6884798476674075e-05, "loss": 1.2175, "step": 2614 }, { "epoch": 0.8217209494080924, "grad_norm": 0.8828125, "learning_rate": 1.6882259600126943e-05, "loss": 1.1868, "step": 2616 }, { "epoch": 0.8223491764336337, "grad_norm": 0.9296875, "learning_rate": 1.6879720723579818e-05, "loss": 1.331, "step": 2618 }, { "epoch": 0.822977403459175, "grad_norm": 0.69140625, "learning_rate": 1.687718184703269e-05, "loss": 1.3342, "step": 2620 }, { "epoch": 0.8236056304847165, "grad_norm": 0.68359375, "learning_rate": 1.687464297048556e-05, "loss": 1.3036, "step": 2622 }, { "epoch": 0.8242338575102578, "grad_norm": 0.75390625, "learning_rate": 1.6872104093938432e-05, "loss": 1.2481, "step": 2624 }, { "epoch": 0.8248620845357991, "grad_norm": 0.703125, "learning_rate": 1.6869565217391307e-05, "loss": 1.3175, "step": 2626 }, { "epoch": 0.8254903115613405, "grad_norm": 0.97265625, "learning_rate": 1.686702634084418e-05, "loss": 1.3181, "step": 2628 }, { "epoch": 0.8261185385868819, "grad_norm": 0.7421875, "learning_rate": 1.686448746429705e-05, "loss": 1.3106, "step": 2630 }, { "epoch": 0.8267467656124232, "grad_norm": 0.82421875, "learning_rate": 1.686194858774992e-05, "loss": 1.3216, "step": 2632 }, { "epoch": 0.8273749926379645, "grad_norm": 0.85546875, "learning_rate": 1.6859409711202796e-05, "loss": 1.3221, "step": 2634 }, { "epoch": 0.8280032196635059, "grad_norm": 0.7734375, "learning_rate": 1.6856870834655664e-05, "loss": 1.3614, "step": 2636 }, { "epoch": 0.8286314466890472, "grad_norm": 0.7890625, "learning_rate": 1.685433195810854e-05, "loss": 1.3956, "step": 2638 }, { "epoch": 0.8292596737145886, "grad_norm": 0.6875, "learning_rate": 1.685179308156141e-05, "loss": 1.1662, "step": 2640 }, { "epoch": 0.82988790074013, "grad_norm": 0.76953125, "learning_rate": 1.684925420501428e-05, "loss": 1.2505, "step": 2642 }, { "epoch": 0.8305161277656713, "grad_norm": 0.86328125, "learning_rate": 1.6846715328467153e-05, "loss": 1.251, "step": 2644 }, { "epoch": 0.8311443547912126, "grad_norm": 0.78515625, "learning_rate": 1.6844176451920028e-05, "loss": 1.398, "step": 2646 }, { "epoch": 0.8317725818167541, "grad_norm": 0.79296875, "learning_rate": 1.68416375753729e-05, "loss": 1.2618, "step": 2648 }, { "epoch": 0.8324008088422954, "grad_norm": 0.66796875, "learning_rate": 1.683909869882577e-05, "loss": 1.3516, "step": 2650 }, { "epoch": 0.8330290358678367, "grad_norm": 0.74609375, "learning_rate": 1.6836559822278645e-05, "loss": 1.4359, "step": 2652 }, { "epoch": 0.833657262893378, "grad_norm": 0.703125, "learning_rate": 1.6834020945731516e-05, "loss": 1.3158, "step": 2654 }, { "epoch": 0.8342854899189195, "grad_norm": 0.81640625, "learning_rate": 1.6831482069184388e-05, "loss": 1.2849, "step": 2656 }, { "epoch": 0.8349137169444608, "grad_norm": 0.734375, "learning_rate": 1.682894319263726e-05, "loss": 1.4921, "step": 2658 }, { "epoch": 0.8355419439700021, "grad_norm": 0.94921875, "learning_rate": 1.6826404316090134e-05, "loss": 1.2774, "step": 2660 }, { "epoch": 0.8361701709955435, "grad_norm": 0.78125, "learning_rate": 1.6823865439543002e-05, "loss": 1.3282, "step": 2662 }, { "epoch": 0.8367983980210849, "grad_norm": 0.75, "learning_rate": 1.6821326562995877e-05, "loss": 1.2604, "step": 2664 }, { "epoch": 0.8374266250466262, "grad_norm": 0.75390625, "learning_rate": 1.6818787686448748e-05, "loss": 1.2322, "step": 2666 }, { "epoch": 0.8380548520721676, "grad_norm": 0.75, "learning_rate": 1.681624880990162e-05, "loss": 1.3847, "step": 2668 }, { "epoch": 0.8386830790977089, "grad_norm": 1.0, "learning_rate": 1.681370993335449e-05, "loss": 1.2521, "step": 2670 }, { "epoch": 0.8393113061232503, "grad_norm": 0.73046875, "learning_rate": 1.6811171056807366e-05, "loss": 1.4187, "step": 2672 }, { "epoch": 0.8399395331487917, "grad_norm": 0.7109375, "learning_rate": 1.6808632180260237e-05, "loss": 1.195, "step": 2674 }, { "epoch": 0.840567760174333, "grad_norm": 1.015625, "learning_rate": 1.680609330371311e-05, "loss": 1.3454, "step": 2676 }, { "epoch": 0.8411959871998743, "grad_norm": 0.78515625, "learning_rate": 1.680355442716598e-05, "loss": 1.453, "step": 2678 }, { "epoch": 0.8418242142254158, "grad_norm": 0.68359375, "learning_rate": 1.6801015550618855e-05, "loss": 1.4218, "step": 2680 }, { "epoch": 0.8424524412509571, "grad_norm": 0.85546875, "learning_rate": 1.6798476674071723e-05, "loss": 1.4194, "step": 2682 }, { "epoch": 0.8430806682764984, "grad_norm": 0.80859375, "learning_rate": 1.6795937797524597e-05, "loss": 1.3225, "step": 2684 }, { "epoch": 0.8437088953020397, "grad_norm": 0.7578125, "learning_rate": 1.679339892097747e-05, "loss": 1.205, "step": 2686 }, { "epoch": 0.8443371223275812, "grad_norm": 1.046875, "learning_rate": 1.679086004443034e-05, "loss": 1.425, "step": 2688 }, { "epoch": 0.8449653493531225, "grad_norm": 0.6875, "learning_rate": 1.678832116788321e-05, "loss": 1.3743, "step": 2690 }, { "epoch": 0.8455935763786638, "grad_norm": 0.83203125, "learning_rate": 1.6785782291336086e-05, "loss": 1.2462, "step": 2692 }, { "epoch": 0.8462218034042052, "grad_norm": 0.671875, "learning_rate": 1.6783243414788958e-05, "loss": 1.3989, "step": 2694 }, { "epoch": 0.8468500304297466, "grad_norm": 0.76953125, "learning_rate": 1.678070453824183e-05, "loss": 1.4101, "step": 2696 }, { "epoch": 0.8474782574552879, "grad_norm": 0.71484375, "learning_rate": 1.67781656616947e-05, "loss": 1.2639, "step": 2698 }, { "epoch": 0.8481064844808293, "grad_norm": 0.79296875, "learning_rate": 1.6775626785147575e-05, "loss": 1.3388, "step": 2700 }, { "epoch": 0.8487347115063706, "grad_norm": 0.78515625, "learning_rate": 1.6773087908600447e-05, "loss": 1.363, "step": 2702 }, { "epoch": 0.8493629385319119, "grad_norm": 0.828125, "learning_rate": 1.6770549032053318e-05, "loss": 1.2831, "step": 2704 }, { "epoch": 0.8499911655574534, "grad_norm": 0.7109375, "learning_rate": 1.676801015550619e-05, "loss": 1.2638, "step": 2706 }, { "epoch": 0.8506193925829947, "grad_norm": 0.6875, "learning_rate": 1.676547127895906e-05, "loss": 1.3733, "step": 2708 }, { "epoch": 0.851247619608536, "grad_norm": 0.6796875, "learning_rate": 1.6762932402411932e-05, "loss": 1.3726, "step": 2710 }, { "epoch": 0.8518758466340773, "grad_norm": 0.73828125, "learning_rate": 1.6760393525864807e-05, "loss": 1.3406, "step": 2712 }, { "epoch": 0.8525040736596188, "grad_norm": 0.69921875, "learning_rate": 1.6757854649317678e-05, "loss": 1.4331, "step": 2714 }, { "epoch": 0.8531323006851601, "grad_norm": 0.7890625, "learning_rate": 1.675531577277055e-05, "loss": 1.302, "step": 2716 }, { "epoch": 0.8537605277107014, "grad_norm": 0.79296875, "learning_rate": 1.675277689622342e-05, "loss": 1.3428, "step": 2718 }, { "epoch": 0.8543887547362428, "grad_norm": 0.765625, "learning_rate": 1.6750238019676296e-05, "loss": 1.2827, "step": 2720 }, { "epoch": 0.8550169817617842, "grad_norm": 0.67578125, "learning_rate": 1.6747699143129167e-05, "loss": 1.2744, "step": 2722 }, { "epoch": 0.8556452087873255, "grad_norm": 0.75, "learning_rate": 1.674516026658204e-05, "loss": 1.2999, "step": 2724 }, { "epoch": 0.8562734358128669, "grad_norm": 0.9765625, "learning_rate": 1.674262139003491e-05, "loss": 1.2288, "step": 2726 }, { "epoch": 0.8569016628384082, "grad_norm": 0.7109375, "learning_rate": 1.6740082513487785e-05, "loss": 1.3101, "step": 2728 }, { "epoch": 0.8575298898639496, "grad_norm": 0.71484375, "learning_rate": 1.6737543636940653e-05, "loss": 1.309, "step": 2730 }, { "epoch": 0.858158116889491, "grad_norm": 0.69921875, "learning_rate": 1.6735004760393527e-05, "loss": 1.3683, "step": 2732 }, { "epoch": 0.8587863439150323, "grad_norm": 1.015625, "learning_rate": 1.67324658838464e-05, "loss": 1.2708, "step": 2734 }, { "epoch": 0.8594145709405736, "grad_norm": 0.7578125, "learning_rate": 1.672992700729927e-05, "loss": 1.5443, "step": 2736 }, { "epoch": 0.860042797966115, "grad_norm": 0.73046875, "learning_rate": 1.6727388130752145e-05, "loss": 1.3305, "step": 2738 }, { "epoch": 0.8606710249916564, "grad_norm": 0.86328125, "learning_rate": 1.6724849254205016e-05, "loss": 1.3512, "step": 2740 }, { "epoch": 0.8612992520171977, "grad_norm": 0.73828125, "learning_rate": 1.6722310377657888e-05, "loss": 1.3854, "step": 2742 }, { "epoch": 0.861927479042739, "grad_norm": 0.75390625, "learning_rate": 1.671977150111076e-05, "loss": 1.2901, "step": 2744 }, { "epoch": 0.8625557060682805, "grad_norm": 0.68359375, "learning_rate": 1.6717232624563634e-05, "loss": 1.3502, "step": 2746 }, { "epoch": 0.8631839330938218, "grad_norm": 0.7578125, "learning_rate": 1.6714693748016505e-05, "loss": 1.1293, "step": 2748 }, { "epoch": 0.8638121601193631, "grad_norm": 0.74609375, "learning_rate": 1.6712154871469377e-05, "loss": 1.3325, "step": 2750 }, { "epoch": 0.8644403871449045, "grad_norm": 0.7890625, "learning_rate": 1.6709615994922248e-05, "loss": 1.4138, "step": 2752 }, { "epoch": 0.8650686141704459, "grad_norm": 0.69140625, "learning_rate": 1.6707077118375123e-05, "loss": 1.2818, "step": 2754 }, { "epoch": 0.8656968411959872, "grad_norm": 0.73046875, "learning_rate": 1.670453824182799e-05, "loss": 1.2926, "step": 2756 }, { "epoch": 0.8663250682215285, "grad_norm": 0.6953125, "learning_rate": 1.6701999365280866e-05, "loss": 1.3686, "step": 2758 }, { "epoch": 0.8669532952470699, "grad_norm": 0.8359375, "learning_rate": 1.6699460488733737e-05, "loss": 1.2924, "step": 2760 }, { "epoch": 0.8675815222726113, "grad_norm": 0.78515625, "learning_rate": 1.669692161218661e-05, "loss": 1.4022, "step": 2762 }, { "epoch": 0.8682097492981526, "grad_norm": 0.8359375, "learning_rate": 1.669438273563948e-05, "loss": 1.429, "step": 2764 }, { "epoch": 0.868837976323694, "grad_norm": 0.7890625, "learning_rate": 1.6691843859092355e-05, "loss": 1.2911, "step": 2766 }, { "epoch": 0.8694662033492353, "grad_norm": 0.73046875, "learning_rate": 1.6689304982545226e-05, "loss": 1.4, "step": 2768 }, { "epoch": 0.8700944303747767, "grad_norm": 0.88671875, "learning_rate": 1.6686766105998097e-05, "loss": 1.3409, "step": 2770 }, { "epoch": 0.8707226574003181, "grad_norm": 1.0390625, "learning_rate": 1.668422722945097e-05, "loss": 1.2781, "step": 2772 }, { "epoch": 0.8713508844258594, "grad_norm": 0.8359375, "learning_rate": 1.6681688352903843e-05, "loss": 1.3083, "step": 2774 }, { "epoch": 0.8719791114514007, "grad_norm": 0.73046875, "learning_rate": 1.667914947635671e-05, "loss": 1.2491, "step": 2776 }, { "epoch": 0.872607338476942, "grad_norm": 0.67578125, "learning_rate": 1.6676610599809586e-05, "loss": 1.3156, "step": 2778 }, { "epoch": 0.8732355655024835, "grad_norm": 0.8515625, "learning_rate": 1.6674071723262458e-05, "loss": 1.2403, "step": 2780 }, { "epoch": 0.8738637925280248, "grad_norm": 0.74609375, "learning_rate": 1.667153284671533e-05, "loss": 1.4226, "step": 2782 }, { "epoch": 0.8744920195535661, "grad_norm": 0.84765625, "learning_rate": 1.66689939701682e-05, "loss": 1.2981, "step": 2784 }, { "epoch": 0.8751202465791075, "grad_norm": 0.6953125, "learning_rate": 1.6666455093621075e-05, "loss": 1.256, "step": 2786 }, { "epoch": 0.8757484736046489, "grad_norm": 0.734375, "learning_rate": 1.6663916217073946e-05, "loss": 1.255, "step": 2788 }, { "epoch": 0.8763767006301902, "grad_norm": 0.7265625, "learning_rate": 1.6661377340526818e-05, "loss": 1.2185, "step": 2790 }, { "epoch": 0.8770049276557316, "grad_norm": 0.6640625, "learning_rate": 1.665883846397969e-05, "loss": 1.4315, "step": 2792 }, { "epoch": 0.8776331546812729, "grad_norm": 0.703125, "learning_rate": 1.6656299587432564e-05, "loss": 1.4531, "step": 2794 }, { "epoch": 0.8782613817068143, "grad_norm": 0.8828125, "learning_rate": 1.6653760710885435e-05, "loss": 1.2937, "step": 2796 }, { "epoch": 0.8788896087323557, "grad_norm": 0.9375, "learning_rate": 1.6651221834338307e-05, "loss": 1.2382, "step": 2798 }, { "epoch": 0.879517835757897, "grad_norm": 0.8515625, "learning_rate": 1.6648682957791178e-05, "loss": 1.2398, "step": 2800 }, { "epoch": 0.8801460627834383, "grad_norm": 0.7890625, "learning_rate": 1.664614408124405e-05, "loss": 1.3117, "step": 2802 }, { "epoch": 0.8807742898089798, "grad_norm": 0.88671875, "learning_rate": 1.664360520469692e-05, "loss": 1.35, "step": 2804 }, { "epoch": 0.8814025168345211, "grad_norm": 0.8125, "learning_rate": 1.6641066328149796e-05, "loss": 1.4186, "step": 2806 }, { "epoch": 0.8820307438600624, "grad_norm": 0.67578125, "learning_rate": 1.6638527451602667e-05, "loss": 1.2733, "step": 2808 }, { "epoch": 0.8826589708856037, "grad_norm": 0.734375, "learning_rate": 1.663598857505554e-05, "loss": 1.312, "step": 2810 }, { "epoch": 0.8832871979111452, "grad_norm": 0.76171875, "learning_rate": 1.663344969850841e-05, "loss": 1.2711, "step": 2812 }, { "epoch": 0.8839154249366865, "grad_norm": 0.7265625, "learning_rate": 1.6630910821961285e-05, "loss": 1.3649, "step": 2814 }, { "epoch": 0.8845436519622278, "grad_norm": 0.734375, "learning_rate": 1.6628371945414156e-05, "loss": 1.5247, "step": 2816 }, { "epoch": 0.8851718789877692, "grad_norm": 0.76171875, "learning_rate": 1.6625833068867027e-05, "loss": 1.2794, "step": 2818 }, { "epoch": 0.8858001060133106, "grad_norm": 0.68359375, "learning_rate": 1.66232941923199e-05, "loss": 1.2475, "step": 2820 }, { "epoch": 0.8864283330388519, "grad_norm": 0.8359375, "learning_rate": 1.6620755315772774e-05, "loss": 1.1927, "step": 2822 }, { "epoch": 0.8870565600643933, "grad_norm": 0.8828125, "learning_rate": 1.6618216439225645e-05, "loss": 1.2817, "step": 2824 }, { "epoch": 0.8876847870899346, "grad_norm": 0.72265625, "learning_rate": 1.6615677562678516e-05, "loss": 1.3958, "step": 2826 }, { "epoch": 0.888313014115476, "grad_norm": 0.69140625, "learning_rate": 1.6613138686131388e-05, "loss": 1.4729, "step": 2828 }, { "epoch": 0.8889412411410174, "grad_norm": 0.87109375, "learning_rate": 1.661059980958426e-05, "loss": 1.2705, "step": 2830 }, { "epoch": 0.8895694681665587, "grad_norm": 0.73828125, "learning_rate": 1.6608060933037134e-05, "loss": 1.4104, "step": 2832 }, { "epoch": 0.8901976951921, "grad_norm": 0.78125, "learning_rate": 1.6605522056490005e-05, "loss": 1.3625, "step": 2834 }, { "epoch": 0.8908259222176415, "grad_norm": 0.8046875, "learning_rate": 1.6602983179942877e-05, "loss": 1.3754, "step": 2836 }, { "epoch": 0.8914541492431828, "grad_norm": 0.7578125, "learning_rate": 1.6600444303395748e-05, "loss": 1.2763, "step": 2838 }, { "epoch": 0.8920823762687241, "grad_norm": 0.6953125, "learning_rate": 1.6597905426848623e-05, "loss": 1.1745, "step": 2840 }, { "epoch": 0.8927106032942654, "grad_norm": 0.75390625, "learning_rate": 1.6595366550301494e-05, "loss": 1.2782, "step": 2842 }, { "epoch": 0.8933388303198068, "grad_norm": 0.796875, "learning_rate": 1.6592827673754366e-05, "loss": 1.3032, "step": 2844 }, { "epoch": 0.8939670573453482, "grad_norm": 0.7265625, "learning_rate": 1.6590288797207237e-05, "loss": 1.4176, "step": 2846 }, { "epoch": 0.8945952843708895, "grad_norm": 0.71875, "learning_rate": 1.658774992066011e-05, "loss": 1.3804, "step": 2848 }, { "epoch": 0.8952235113964309, "grad_norm": 0.6796875, "learning_rate": 1.658521104411298e-05, "loss": 1.3173, "step": 2850 }, { "epoch": 0.8958517384219722, "grad_norm": 0.86328125, "learning_rate": 1.6582672167565854e-05, "loss": 1.1673, "step": 2852 }, { "epoch": 0.8964799654475136, "grad_norm": 0.69921875, "learning_rate": 1.6580133291018726e-05, "loss": 1.3201, "step": 2854 }, { "epoch": 0.897108192473055, "grad_norm": 0.6953125, "learning_rate": 1.6577594414471597e-05, "loss": 1.3234, "step": 2856 }, { "epoch": 0.8977364194985963, "grad_norm": 2.359375, "learning_rate": 1.657505553792447e-05, "loss": 1.4672, "step": 2858 }, { "epoch": 0.8983646465241376, "grad_norm": 0.78515625, "learning_rate": 1.6572516661377343e-05, "loss": 1.3377, "step": 2860 }, { "epoch": 0.898992873549679, "grad_norm": 0.71484375, "learning_rate": 1.6569977784830215e-05, "loss": 1.2545, "step": 2862 }, { "epoch": 0.8996211005752204, "grad_norm": 0.8984375, "learning_rate": 1.6567438908283086e-05, "loss": 1.2684, "step": 2864 }, { "epoch": 0.9002493276007617, "grad_norm": 0.7421875, "learning_rate": 1.6564900031735957e-05, "loss": 1.2329, "step": 2866 }, { "epoch": 0.900877554626303, "grad_norm": 0.6796875, "learning_rate": 1.6562361155188832e-05, "loss": 1.4101, "step": 2868 }, { "epoch": 0.9015057816518445, "grad_norm": 0.7578125, "learning_rate": 1.65598222786417e-05, "loss": 1.2965, "step": 2870 }, { "epoch": 0.9021340086773858, "grad_norm": 0.90234375, "learning_rate": 1.6557283402094575e-05, "loss": 1.331, "step": 2872 }, { "epoch": 0.9027622357029271, "grad_norm": 0.765625, "learning_rate": 1.6554744525547446e-05, "loss": 1.4061, "step": 2874 }, { "epoch": 0.9033904627284685, "grad_norm": 0.76953125, "learning_rate": 1.6552205649000318e-05, "loss": 1.3482, "step": 2876 }, { "epoch": 0.9040186897540099, "grad_norm": 0.68359375, "learning_rate": 1.654966677245319e-05, "loss": 1.3822, "step": 2878 }, { "epoch": 0.9046469167795512, "grad_norm": 0.79296875, "learning_rate": 1.6547127895906064e-05, "loss": 1.2013, "step": 2880 }, { "epoch": 0.9052751438050926, "grad_norm": 0.75390625, "learning_rate": 1.6544589019358935e-05, "loss": 1.2415, "step": 2882 }, { "epoch": 0.9059033708306339, "grad_norm": 0.8984375, "learning_rate": 1.6542050142811807e-05, "loss": 1.3142, "step": 2884 }, { "epoch": 0.9065315978561753, "grad_norm": 0.7734375, "learning_rate": 1.6539511266264678e-05, "loss": 1.3292, "step": 2886 }, { "epoch": 0.9071598248817166, "grad_norm": 0.7421875, "learning_rate": 1.6536972389717553e-05, "loss": 1.3243, "step": 2888 }, { "epoch": 0.907788051907258, "grad_norm": 0.75, "learning_rate": 1.6534433513170424e-05, "loss": 1.2548, "step": 2890 }, { "epoch": 0.9084162789327993, "grad_norm": 0.78515625, "learning_rate": 1.6531894636623296e-05, "loss": 1.3526, "step": 2892 }, { "epoch": 0.9090445059583407, "grad_norm": 0.7890625, "learning_rate": 1.6529355760076167e-05, "loss": 1.3198, "step": 2894 }, { "epoch": 0.9096727329838821, "grad_norm": 0.6875, "learning_rate": 1.652681688352904e-05, "loss": 1.0987, "step": 2896 }, { "epoch": 0.9103009600094234, "grad_norm": 0.7890625, "learning_rate": 1.652427800698191e-05, "loss": 1.2387, "step": 2898 }, { "epoch": 0.9109291870349647, "grad_norm": 0.71484375, "learning_rate": 1.6521739130434785e-05, "loss": 1.1774, "step": 2900 }, { "epoch": 0.9115574140605062, "grad_norm": 0.78515625, "learning_rate": 1.6519200253887656e-05, "loss": 1.2341, "step": 2902 }, { "epoch": 0.9121856410860475, "grad_norm": 0.796875, "learning_rate": 1.6516661377340527e-05, "loss": 1.2046, "step": 2904 }, { "epoch": 0.9128138681115888, "grad_norm": 0.7890625, "learning_rate": 1.65141225007934e-05, "loss": 1.477, "step": 2906 }, { "epoch": 0.9134420951371302, "grad_norm": 0.7109375, "learning_rate": 1.6511583624246273e-05, "loss": 1.4045, "step": 2908 }, { "epoch": 0.9140703221626716, "grad_norm": 0.7734375, "learning_rate": 1.6509044747699145e-05, "loss": 1.2798, "step": 2910 }, { "epoch": 0.9146985491882129, "grad_norm": 0.703125, "learning_rate": 1.6506505871152016e-05, "loss": 1.3729, "step": 2912 }, { "epoch": 0.9153267762137542, "grad_norm": 0.84375, "learning_rate": 1.650396699460489e-05, "loss": 1.3434, "step": 2914 }, { "epoch": 0.9159550032392956, "grad_norm": 0.73046875, "learning_rate": 1.650142811805776e-05, "loss": 1.3866, "step": 2916 }, { "epoch": 0.9165832302648369, "grad_norm": 0.71875, "learning_rate": 1.6498889241510634e-05, "loss": 1.2233, "step": 2918 }, { "epoch": 0.9172114572903783, "grad_norm": 0.95703125, "learning_rate": 1.6496350364963505e-05, "loss": 1.2631, "step": 2920 }, { "epoch": 0.9178396843159197, "grad_norm": 0.6640625, "learning_rate": 1.6493811488416377e-05, "loss": 1.3768, "step": 2922 }, { "epoch": 0.918467911341461, "grad_norm": 0.84375, "learning_rate": 1.6491272611869248e-05, "loss": 1.2722, "step": 2924 }, { "epoch": 0.9190961383670023, "grad_norm": 0.83203125, "learning_rate": 1.6488733735322123e-05, "loss": 1.2799, "step": 2926 }, { "epoch": 0.9197243653925438, "grad_norm": 0.859375, "learning_rate": 1.6486194858774994e-05, "loss": 1.2571, "step": 2928 }, { "epoch": 0.9203525924180851, "grad_norm": 0.71875, "learning_rate": 1.6483655982227865e-05, "loss": 1.2148, "step": 2930 }, { "epoch": 0.9209808194436264, "grad_norm": 0.74609375, "learning_rate": 1.6481117105680737e-05, "loss": 1.3129, "step": 2932 }, { "epoch": 0.9216090464691677, "grad_norm": 0.71484375, "learning_rate": 1.647857822913361e-05, "loss": 1.2683, "step": 2934 }, { "epoch": 0.9222372734947092, "grad_norm": 0.703125, "learning_rate": 1.6476039352586483e-05, "loss": 1.356, "step": 2936 }, { "epoch": 0.9228655005202505, "grad_norm": 0.74609375, "learning_rate": 1.6473500476039354e-05, "loss": 1.2901, "step": 2938 }, { "epoch": 0.9234937275457918, "grad_norm": 0.68359375, "learning_rate": 1.6470961599492226e-05, "loss": 1.4158, "step": 2940 }, { "epoch": 0.9241219545713332, "grad_norm": 0.796875, "learning_rate": 1.6468422722945097e-05, "loss": 1.2391, "step": 2942 }, { "epoch": 0.9247501815968746, "grad_norm": 0.74609375, "learning_rate": 1.646588384639797e-05, "loss": 1.3964, "step": 2944 }, { "epoch": 0.9253784086224159, "grad_norm": 0.71875, "learning_rate": 1.6463344969850843e-05, "loss": 1.3187, "step": 2946 }, { "epoch": 0.9260066356479573, "grad_norm": 0.6640625, "learning_rate": 1.6460806093303715e-05, "loss": 1.3794, "step": 2948 }, { "epoch": 0.9266348626734986, "grad_norm": 0.69140625, "learning_rate": 1.6458267216756586e-05, "loss": 1.3897, "step": 2950 }, { "epoch": 0.92726308969904, "grad_norm": 0.73046875, "learning_rate": 1.6455728340209457e-05, "loss": 1.2514, "step": 2952 }, { "epoch": 0.9278913167245814, "grad_norm": 0.7265625, "learning_rate": 1.6453189463662332e-05, "loss": 1.2275, "step": 2954 }, { "epoch": 0.9285195437501227, "grad_norm": 0.8828125, "learning_rate": 1.6450650587115204e-05, "loss": 1.3661, "step": 2956 }, { "epoch": 0.929147770775664, "grad_norm": 0.703125, "learning_rate": 1.6448111710568075e-05, "loss": 1.3095, "step": 2958 }, { "epoch": 0.9297759978012055, "grad_norm": 0.80859375, "learning_rate": 1.6445572834020946e-05, "loss": 1.4244, "step": 2960 }, { "epoch": 0.9304042248267468, "grad_norm": 0.69921875, "learning_rate": 1.644303395747382e-05, "loss": 1.3683, "step": 2962 }, { "epoch": 0.9310324518522881, "grad_norm": 0.71875, "learning_rate": 1.644049508092669e-05, "loss": 1.512, "step": 2964 }, { "epoch": 0.9316606788778294, "grad_norm": 0.80859375, "learning_rate": 1.6437956204379564e-05, "loss": 1.4732, "step": 2966 }, { "epoch": 0.9322889059033709, "grad_norm": 0.734375, "learning_rate": 1.6435417327832435e-05, "loss": 1.34, "step": 2968 }, { "epoch": 0.9329171329289122, "grad_norm": 0.77734375, "learning_rate": 1.6432878451285307e-05, "loss": 1.2436, "step": 2970 }, { "epoch": 0.9335453599544535, "grad_norm": 0.7421875, "learning_rate": 1.6430339574738178e-05, "loss": 1.3719, "step": 2972 }, { "epoch": 0.9341735869799949, "grad_norm": 0.79296875, "learning_rate": 1.6427800698191053e-05, "loss": 1.3081, "step": 2974 }, { "epoch": 0.9348018140055363, "grad_norm": 0.74609375, "learning_rate": 1.6425261821643924e-05, "loss": 1.3141, "step": 2976 }, { "epoch": 0.9354300410310776, "grad_norm": 0.73828125, "learning_rate": 1.6422722945096796e-05, "loss": 1.3385, "step": 2978 }, { "epoch": 0.936058268056619, "grad_norm": 0.73046875, "learning_rate": 1.6420184068549667e-05, "loss": 1.3452, "step": 2980 }, { "epoch": 0.9366864950821603, "grad_norm": 0.80078125, "learning_rate": 1.6417645192002542e-05, "loss": 1.2058, "step": 2982 }, { "epoch": 0.9373147221077017, "grad_norm": 0.75390625, "learning_rate": 1.641510631545541e-05, "loss": 1.3226, "step": 2984 }, { "epoch": 0.937942949133243, "grad_norm": 0.7109375, "learning_rate": 1.6412567438908284e-05, "loss": 1.362, "step": 2986 }, { "epoch": 0.9385711761587844, "grad_norm": 0.734375, "learning_rate": 1.6410028562361156e-05, "loss": 1.2904, "step": 2988 }, { "epoch": 0.9391994031843257, "grad_norm": 0.875, "learning_rate": 1.6407489685814027e-05, "loss": 1.3076, "step": 2990 }, { "epoch": 0.939827630209867, "grad_norm": 1.2890625, "learning_rate": 1.64049508092669e-05, "loss": 1.2634, "step": 2992 }, { "epoch": 0.9404558572354085, "grad_norm": 0.77734375, "learning_rate": 1.6402411932719773e-05, "loss": 1.3545, "step": 2994 }, { "epoch": 0.9410840842609498, "grad_norm": 0.73046875, "learning_rate": 1.6399873056172645e-05, "loss": 1.3319, "step": 2996 }, { "epoch": 0.9417123112864911, "grad_norm": 0.75, "learning_rate": 1.6397334179625516e-05, "loss": 1.3534, "step": 2998 }, { "epoch": 0.9423405383120325, "grad_norm": 0.68359375, "learning_rate": 1.639479530307839e-05, "loss": 1.2353, "step": 3000 }, { "epoch": 0.9429687653375739, "grad_norm": 0.68359375, "learning_rate": 1.6392256426531262e-05, "loss": 1.2108, "step": 3002 }, { "epoch": 0.9435969923631152, "grad_norm": 0.71875, "learning_rate": 1.6389717549984134e-05, "loss": 1.2961, "step": 3004 }, { "epoch": 0.9442252193886566, "grad_norm": 0.734375, "learning_rate": 1.6387178673437005e-05, "loss": 1.2746, "step": 3006 }, { "epoch": 0.9448534464141979, "grad_norm": 0.72265625, "learning_rate": 1.638463979688988e-05, "loss": 1.2231, "step": 3008 }, { "epoch": 0.9454816734397393, "grad_norm": 0.85546875, "learning_rate": 1.6382100920342748e-05, "loss": 1.4304, "step": 3010 }, { "epoch": 0.9461099004652807, "grad_norm": 0.87890625, "learning_rate": 1.6379562043795623e-05, "loss": 1.336, "step": 3012 }, { "epoch": 0.946738127490822, "grad_norm": 0.78125, "learning_rate": 1.6377023167248494e-05, "loss": 1.2532, "step": 3014 }, { "epoch": 0.9473663545163633, "grad_norm": 0.73046875, "learning_rate": 1.6374484290701365e-05, "loss": 1.3438, "step": 3016 }, { "epoch": 0.9479945815419047, "grad_norm": 0.765625, "learning_rate": 1.6371945414154237e-05, "loss": 1.3412, "step": 3018 }, { "epoch": 0.9486228085674461, "grad_norm": 0.7578125, "learning_rate": 1.636940653760711e-05, "loss": 1.2943, "step": 3020 }, { "epoch": 0.9492510355929874, "grad_norm": 0.71484375, "learning_rate": 1.6366867661059983e-05, "loss": 1.3679, "step": 3022 }, { "epoch": 0.9498792626185287, "grad_norm": 0.6953125, "learning_rate": 1.6364328784512854e-05, "loss": 1.2899, "step": 3024 }, { "epoch": 0.9505074896440702, "grad_norm": 0.87109375, "learning_rate": 1.6361789907965726e-05, "loss": 1.4329, "step": 3026 }, { "epoch": 0.9511357166696115, "grad_norm": 0.6875, "learning_rate": 1.63592510314186e-05, "loss": 1.2883, "step": 3028 }, { "epoch": 0.9517639436951528, "grad_norm": 0.66796875, "learning_rate": 1.6356712154871472e-05, "loss": 1.2225, "step": 3030 }, { "epoch": 0.9523921707206942, "grad_norm": 0.796875, "learning_rate": 1.6354173278324343e-05, "loss": 1.3128, "step": 3032 }, { "epoch": 0.9530203977462356, "grad_norm": 0.8828125, "learning_rate": 1.6351634401777215e-05, "loss": 1.3125, "step": 3034 }, { "epoch": 0.9536486247717769, "grad_norm": 0.7265625, "learning_rate": 1.6349095525230086e-05, "loss": 1.3294, "step": 3036 }, { "epoch": 0.9542768517973182, "grad_norm": 0.79296875, "learning_rate": 1.6346556648682957e-05, "loss": 1.2799, "step": 3038 }, { "epoch": 0.9549050788228596, "grad_norm": 0.703125, "learning_rate": 1.6344017772135832e-05, "loss": 1.3259, "step": 3040 }, { "epoch": 0.955533305848401, "grad_norm": 0.66796875, "learning_rate": 1.6341478895588704e-05, "loss": 1.2925, "step": 3042 }, { "epoch": 0.9561615328739423, "grad_norm": 0.81640625, "learning_rate": 1.6338940019041575e-05, "loss": 1.4347, "step": 3044 }, { "epoch": 0.9567897598994837, "grad_norm": 0.71875, "learning_rate": 1.6336401142494446e-05, "loss": 1.1746, "step": 3046 }, { "epoch": 0.957417986925025, "grad_norm": 0.8671875, "learning_rate": 1.633386226594732e-05, "loss": 1.3328, "step": 3048 }, { "epoch": 0.9580462139505664, "grad_norm": 0.6953125, "learning_rate": 1.6331323389400192e-05, "loss": 1.3283, "step": 3050 }, { "epoch": 0.9586744409761078, "grad_norm": 0.67578125, "learning_rate": 1.6328784512853064e-05, "loss": 1.4036, "step": 3052 }, { "epoch": 0.9593026680016491, "grad_norm": 0.83984375, "learning_rate": 1.6326245636305935e-05, "loss": 1.2255, "step": 3054 }, { "epoch": 0.9599308950271904, "grad_norm": 0.7265625, "learning_rate": 1.632370675975881e-05, "loss": 1.2382, "step": 3056 }, { "epoch": 0.9605591220527318, "grad_norm": 0.72265625, "learning_rate": 1.6321167883211678e-05, "loss": 1.311, "step": 3058 }, { "epoch": 0.9611873490782732, "grad_norm": 0.7109375, "learning_rate": 1.6318629006664553e-05, "loss": 1.2916, "step": 3060 }, { "epoch": 0.9618155761038145, "grad_norm": 0.734375, "learning_rate": 1.6316090130117424e-05, "loss": 1.2996, "step": 3062 }, { "epoch": 0.9624438031293558, "grad_norm": 0.6796875, "learning_rate": 1.6313551253570295e-05, "loss": 1.3253, "step": 3064 }, { "epoch": 0.9630720301548972, "grad_norm": 0.66796875, "learning_rate": 1.6311012377023167e-05, "loss": 1.3582, "step": 3066 }, { "epoch": 0.9637002571804386, "grad_norm": 0.671875, "learning_rate": 1.630847350047604e-05, "loss": 1.3637, "step": 3068 }, { "epoch": 0.9643284842059799, "grad_norm": 0.80859375, "learning_rate": 1.6305934623928913e-05, "loss": 1.2882, "step": 3070 }, { "epoch": 0.9649567112315213, "grad_norm": 1.1171875, "learning_rate": 1.6303395747381784e-05, "loss": 1.2281, "step": 3072 }, { "epoch": 0.9655849382570626, "grad_norm": 0.80078125, "learning_rate": 1.6300856870834656e-05, "loss": 1.3915, "step": 3074 }, { "epoch": 0.966213165282604, "grad_norm": 0.75, "learning_rate": 1.629831799428753e-05, "loss": 1.401, "step": 3076 }, { "epoch": 0.9668413923081454, "grad_norm": 0.73828125, "learning_rate": 1.62957791177404e-05, "loss": 1.2698, "step": 3078 }, { "epoch": 0.9674696193336867, "grad_norm": 0.75, "learning_rate": 1.6293240241193273e-05, "loss": 1.3833, "step": 3080 }, { "epoch": 0.968097846359228, "grad_norm": 0.84375, "learning_rate": 1.6290701364646148e-05, "loss": 1.2167, "step": 3082 }, { "epoch": 0.9687260733847695, "grad_norm": 0.6875, "learning_rate": 1.6288162488099016e-05, "loss": 1.3872, "step": 3084 }, { "epoch": 0.9693543004103108, "grad_norm": 0.65625, "learning_rate": 1.628562361155189e-05, "loss": 1.1942, "step": 3086 }, { "epoch": 0.9699825274358521, "grad_norm": 0.91015625, "learning_rate": 1.6283084735004762e-05, "loss": 1.3442, "step": 3088 }, { "epoch": 0.9706107544613934, "grad_norm": 0.68359375, "learning_rate": 1.6280545858457634e-05, "loss": 1.1895, "step": 3090 }, { "epoch": 0.9712389814869349, "grad_norm": 0.765625, "learning_rate": 1.6278006981910505e-05, "loss": 1.2817, "step": 3092 }, { "epoch": 0.9718672085124762, "grad_norm": 0.85546875, "learning_rate": 1.627546810536338e-05, "loss": 1.3469, "step": 3094 }, { "epoch": 0.9724954355380175, "grad_norm": 0.76171875, "learning_rate": 1.627292922881625e-05, "loss": 1.3717, "step": 3096 }, { "epoch": 0.9731236625635589, "grad_norm": 0.6953125, "learning_rate": 1.6270390352269123e-05, "loss": 1.3015, "step": 3098 }, { "epoch": 0.9737518895891003, "grad_norm": 0.703125, "learning_rate": 1.6267851475721994e-05, "loss": 1.4265, "step": 3100 }, { "epoch": 0.9743801166146416, "grad_norm": 0.80859375, "learning_rate": 1.626531259917487e-05, "loss": 1.2424, "step": 3102 }, { "epoch": 0.975008343640183, "grad_norm": 0.8125, "learning_rate": 1.6262773722627737e-05, "loss": 1.3252, "step": 3104 }, { "epoch": 0.9756365706657243, "grad_norm": 0.87890625, "learning_rate": 1.626023484608061e-05, "loss": 1.3464, "step": 3106 }, { "epoch": 0.9762647976912657, "grad_norm": 0.81640625, "learning_rate": 1.6257695969533483e-05, "loss": 1.3007, "step": 3108 }, { "epoch": 0.9768930247168071, "grad_norm": 0.90234375, "learning_rate": 1.6255157092986354e-05, "loss": 1.2549, "step": 3110 }, { "epoch": 0.9775212517423484, "grad_norm": 0.76171875, "learning_rate": 1.6252618216439226e-05, "loss": 1.3299, "step": 3112 }, { "epoch": 0.9781494787678897, "grad_norm": 0.77734375, "learning_rate": 1.62500793398921e-05, "loss": 1.3118, "step": 3114 }, { "epoch": 0.9787777057934312, "grad_norm": 0.765625, "learning_rate": 1.6247540463344972e-05, "loss": 1.3114, "step": 3116 }, { "epoch": 0.9794059328189725, "grad_norm": 0.66796875, "learning_rate": 1.6245001586797843e-05, "loss": 1.2729, "step": 3118 }, { "epoch": 0.9800341598445138, "grad_norm": 0.69921875, "learning_rate": 1.6242462710250715e-05, "loss": 1.3769, "step": 3120 }, { "epoch": 0.9806623868700551, "grad_norm": 0.65234375, "learning_rate": 1.623992383370359e-05, "loss": 1.2581, "step": 3122 }, { "epoch": 0.9812906138955966, "grad_norm": 0.73046875, "learning_rate": 1.623738495715646e-05, "loss": 1.3578, "step": 3124 }, { "epoch": 0.9819188409211379, "grad_norm": 0.67578125, "learning_rate": 1.6234846080609332e-05, "loss": 1.3055, "step": 3126 }, { "epoch": 0.9825470679466792, "grad_norm": 1.3125, "learning_rate": 1.6232307204062203e-05, "loss": 1.2103, "step": 3128 }, { "epoch": 0.9831752949722206, "grad_norm": 0.79296875, "learning_rate": 1.6229768327515075e-05, "loss": 1.3818, "step": 3130 }, { "epoch": 0.9838035219977619, "grad_norm": 0.703125, "learning_rate": 1.6227229450967946e-05, "loss": 1.3531, "step": 3132 }, { "epoch": 0.9844317490233033, "grad_norm": 0.73046875, "learning_rate": 1.622469057442082e-05, "loss": 1.2813, "step": 3134 }, { "epoch": 0.9850599760488447, "grad_norm": 0.7578125, "learning_rate": 1.6222151697873692e-05, "loss": 1.3331, "step": 3136 }, { "epoch": 0.985688203074386, "grad_norm": 0.72265625, "learning_rate": 1.6219612821326564e-05, "loss": 1.3681, "step": 3138 }, { "epoch": 0.9863164300999273, "grad_norm": 0.7421875, "learning_rate": 1.6217073944779435e-05, "loss": 1.3545, "step": 3140 }, { "epoch": 0.9869446571254687, "grad_norm": 0.703125, "learning_rate": 1.621453506823231e-05, "loss": 1.392, "step": 3142 }, { "epoch": 0.9875728841510101, "grad_norm": 0.79296875, "learning_rate": 1.621199619168518e-05, "loss": 1.3232, "step": 3144 }, { "epoch": 0.9882011111765514, "grad_norm": 0.7578125, "learning_rate": 1.6209457315138053e-05, "loss": 1.2144, "step": 3146 }, { "epoch": 0.9888293382020927, "grad_norm": 0.6796875, "learning_rate": 1.6206918438590924e-05, "loss": 1.3129, "step": 3148 }, { "epoch": 0.9894575652276342, "grad_norm": 0.74609375, "learning_rate": 1.62043795620438e-05, "loss": 1.3398, "step": 3150 }, { "epoch": 0.9900857922531755, "grad_norm": 0.79296875, "learning_rate": 1.6201840685496667e-05, "loss": 1.3094, "step": 3152 }, { "epoch": 0.9907140192787168, "grad_norm": 0.66796875, "learning_rate": 1.619930180894954e-05, "loss": 1.436, "step": 3154 }, { "epoch": 0.9913422463042582, "grad_norm": 0.828125, "learning_rate": 1.6196762932402413e-05, "loss": 1.4225, "step": 3156 }, { "epoch": 0.9919704733297996, "grad_norm": 0.76953125, "learning_rate": 1.6194224055855284e-05, "loss": 1.2521, "step": 3158 }, { "epoch": 0.9925987003553409, "grad_norm": 0.7265625, "learning_rate": 1.6191685179308156e-05, "loss": 1.2926, "step": 3160 }, { "epoch": 0.9932269273808823, "grad_norm": 0.66796875, "learning_rate": 1.618914630276103e-05, "loss": 1.222, "step": 3162 }, { "epoch": 0.9938551544064236, "grad_norm": 0.87890625, "learning_rate": 1.6186607426213902e-05, "loss": 1.3083, "step": 3164 }, { "epoch": 0.994483381431965, "grad_norm": 0.66015625, "learning_rate": 1.6184068549666773e-05, "loss": 1.3349, "step": 3166 }, { "epoch": 0.9951116084575063, "grad_norm": 0.66015625, "learning_rate": 1.6181529673119648e-05, "loss": 1.306, "step": 3168 }, { "epoch": 0.9957398354830477, "grad_norm": 0.796875, "learning_rate": 1.617899079657252e-05, "loss": 1.2053, "step": 3170 }, { "epoch": 0.996368062508589, "grad_norm": 0.91015625, "learning_rate": 1.617645192002539e-05, "loss": 1.2241, "step": 3172 }, { "epoch": 0.9969962895341304, "grad_norm": 0.78125, "learning_rate": 1.6173913043478262e-05, "loss": 1.1861, "step": 3174 }, { "epoch": 0.9976245165596718, "grad_norm": 0.6953125, "learning_rate": 1.6171374166931137e-05, "loss": 1.3181, "step": 3176 }, { "epoch": 0.9982527435852131, "grad_norm": 0.7265625, "learning_rate": 1.6168835290384005e-05, "loss": 1.2654, "step": 3178 }, { "epoch": 0.9988809706107544, "grad_norm": 0.77734375, "learning_rate": 1.616629641383688e-05, "loss": 1.435, "step": 3180 }, { "epoch": 0.9995091976362959, "grad_norm": 0.75, "learning_rate": 1.616375753728975e-05, "loss": 1.4596, "step": 3182 }, { "epoch": 1.000137424661837, "grad_norm": 0.68359375, "learning_rate": 1.6161218660742622e-05, "loss": 1.3658, "step": 3184 }, { "epoch": 1.0007656516873786, "grad_norm": 0.67578125, "learning_rate": 1.6158679784195494e-05, "loss": 1.278, "step": 3186 }, { "epoch": 1.00139387871292, "grad_norm": 0.6640625, "learning_rate": 1.615614090764837e-05, "loss": 1.3165, "step": 3188 }, { "epoch": 1.0020221057384613, "grad_norm": 0.68359375, "learning_rate": 1.615360203110124e-05, "loss": 1.292, "step": 3190 }, { "epoch": 1.0026503327640026, "grad_norm": 0.7109375, "learning_rate": 1.615106315455411e-05, "loss": 1.2257, "step": 3192 }, { "epoch": 1.003278559789544, "grad_norm": 0.78125, "learning_rate": 1.6148524278006983e-05, "loss": 1.256, "step": 3194 }, { "epoch": 1.0039067868150853, "grad_norm": 0.83984375, "learning_rate": 1.6145985401459858e-05, "loss": 1.2126, "step": 3196 }, { "epoch": 1.0045350138406266, "grad_norm": 0.75390625, "learning_rate": 1.6143446524912726e-05, "loss": 1.2712, "step": 3198 }, { "epoch": 1.005163240866168, "grad_norm": 0.76953125, "learning_rate": 1.61409076483656e-05, "loss": 1.1845, "step": 3200 }, { "epoch": 1.0057914678917093, "grad_norm": 0.796875, "learning_rate": 1.613836877181847e-05, "loss": 1.2542, "step": 3202 }, { "epoch": 1.0064196949172508, "grad_norm": 0.8125, "learning_rate": 1.6135829895271343e-05, "loss": 1.2691, "step": 3204 }, { "epoch": 1.0070479219427921, "grad_norm": 0.78125, "learning_rate": 1.6133291018724214e-05, "loss": 1.1913, "step": 3206 }, { "epoch": 1.0076761489683335, "grad_norm": 0.78125, "learning_rate": 1.613075214217709e-05, "loss": 1.1205, "step": 3208 }, { "epoch": 1.0083043759938748, "grad_norm": 0.7578125, "learning_rate": 1.612821326562996e-05, "loss": 1.2168, "step": 3210 }, { "epoch": 1.0089326030194161, "grad_norm": 0.73046875, "learning_rate": 1.6125674389082832e-05, "loss": 1.0832, "step": 3212 }, { "epoch": 1.0095608300449574, "grad_norm": 0.76953125, "learning_rate": 1.6123135512535703e-05, "loss": 1.2185, "step": 3214 }, { "epoch": 1.0101890570704988, "grad_norm": 0.78515625, "learning_rate": 1.6120596635988578e-05, "loss": 1.3084, "step": 3216 }, { "epoch": 1.01081728409604, "grad_norm": 0.82421875, "learning_rate": 1.6118057759441446e-05, "loss": 1.1159, "step": 3218 }, { "epoch": 1.0114455111215817, "grad_norm": 0.7421875, "learning_rate": 1.611551888289432e-05, "loss": 1.165, "step": 3220 }, { "epoch": 1.012073738147123, "grad_norm": 0.73046875, "learning_rate": 1.6112980006347192e-05, "loss": 1.1485, "step": 3222 }, { "epoch": 1.0127019651726643, "grad_norm": 0.90234375, "learning_rate": 1.6110441129800064e-05, "loss": 1.23, "step": 3224 }, { "epoch": 1.0133301921982056, "grad_norm": 0.6875, "learning_rate": 1.6107902253252935e-05, "loss": 1.1168, "step": 3226 }, { "epoch": 1.013958419223747, "grad_norm": 0.8359375, "learning_rate": 1.610536337670581e-05, "loss": 1.0645, "step": 3228 }, { "epoch": 1.0145866462492883, "grad_norm": 0.7890625, "learning_rate": 1.610282450015868e-05, "loss": 1.2309, "step": 3230 }, { "epoch": 1.0152148732748296, "grad_norm": 0.83203125, "learning_rate": 1.6100285623611553e-05, "loss": 1.2314, "step": 3232 }, { "epoch": 1.015843100300371, "grad_norm": 0.7734375, "learning_rate": 1.6097746747064424e-05, "loss": 1.3229, "step": 3234 }, { "epoch": 1.0164713273259125, "grad_norm": 0.73828125, "learning_rate": 1.60952078705173e-05, "loss": 1.1837, "step": 3236 }, { "epoch": 1.0170995543514538, "grad_norm": 0.8203125, "learning_rate": 1.609266899397017e-05, "loss": 1.2404, "step": 3238 }, { "epoch": 1.0177277813769952, "grad_norm": 0.765625, "learning_rate": 1.609013011742304e-05, "loss": 1.344, "step": 3240 }, { "epoch": 1.0183560084025365, "grad_norm": 0.74609375, "learning_rate": 1.6087591240875913e-05, "loss": 1.3881, "step": 3242 }, { "epoch": 1.0189842354280778, "grad_norm": 0.7890625, "learning_rate": 1.6085052364328784e-05, "loss": 1.3539, "step": 3244 }, { "epoch": 1.0196124624536191, "grad_norm": 0.72265625, "learning_rate": 1.6082513487781656e-05, "loss": 1.2862, "step": 3246 }, { "epoch": 1.0202406894791605, "grad_norm": 0.73046875, "learning_rate": 1.607997461123453e-05, "loss": 1.3068, "step": 3248 }, { "epoch": 1.0208689165047018, "grad_norm": 0.6875, "learning_rate": 1.6077435734687402e-05, "loss": 1.2403, "step": 3250 }, { "epoch": 1.0214971435302433, "grad_norm": 0.7578125, "learning_rate": 1.6074896858140273e-05, "loss": 1.2603, "step": 3252 }, { "epoch": 1.0221253705557847, "grad_norm": 0.83203125, "learning_rate": 1.6072357981593148e-05, "loss": 1.1728, "step": 3254 }, { "epoch": 1.022753597581326, "grad_norm": 0.7734375, "learning_rate": 1.606981910504602e-05, "loss": 1.1989, "step": 3256 }, { "epoch": 1.0233818246068673, "grad_norm": 0.84765625, "learning_rate": 1.606728022849889e-05, "loss": 1.2041, "step": 3258 }, { "epoch": 1.0240100516324087, "grad_norm": 0.84375, "learning_rate": 1.6064741351951762e-05, "loss": 1.2197, "step": 3260 }, { "epoch": 1.02463827865795, "grad_norm": 0.81640625, "learning_rate": 1.6062202475404637e-05, "loss": 1.1039, "step": 3262 }, { "epoch": 1.0252665056834913, "grad_norm": 0.74609375, "learning_rate": 1.6059663598857508e-05, "loss": 1.2697, "step": 3264 }, { "epoch": 1.0258947327090326, "grad_norm": 0.8515625, "learning_rate": 1.605712472231038e-05, "loss": 1.152, "step": 3266 }, { "epoch": 1.026522959734574, "grad_norm": 0.83203125, "learning_rate": 1.605458584576325e-05, "loss": 1.2728, "step": 3268 }, { "epoch": 1.0271511867601155, "grad_norm": 0.77734375, "learning_rate": 1.6052046969216122e-05, "loss": 1.1901, "step": 3270 }, { "epoch": 1.0277794137856568, "grad_norm": 0.765625, "learning_rate": 1.6049508092668994e-05, "loss": 1.2918, "step": 3272 }, { "epoch": 1.0284076408111982, "grad_norm": 0.7890625, "learning_rate": 1.604696921612187e-05, "loss": 1.2328, "step": 3274 }, { "epoch": 1.0290358678367395, "grad_norm": 0.7578125, "learning_rate": 1.604443033957474e-05, "loss": 1.2688, "step": 3276 }, { "epoch": 1.0296640948622808, "grad_norm": 0.78125, "learning_rate": 1.604189146302761e-05, "loss": 1.1562, "step": 3278 }, { "epoch": 1.0302923218878222, "grad_norm": 0.83203125, "learning_rate": 1.6039352586480483e-05, "loss": 1.2546, "step": 3280 }, { "epoch": 1.0309205489133635, "grad_norm": 0.75, "learning_rate": 1.6036813709933357e-05, "loss": 1.1542, "step": 3282 }, { "epoch": 1.0315487759389048, "grad_norm": 0.87109375, "learning_rate": 1.603427483338623e-05, "loss": 1.2115, "step": 3284 }, { "epoch": 1.0321770029644464, "grad_norm": 0.8671875, "learning_rate": 1.60317359568391e-05, "loss": 1.3227, "step": 3286 }, { "epoch": 1.0328052299899877, "grad_norm": 0.77734375, "learning_rate": 1.602919708029197e-05, "loss": 1.2416, "step": 3288 }, { "epoch": 1.033433457015529, "grad_norm": 0.8125, "learning_rate": 1.6026658203744846e-05, "loss": 1.2146, "step": 3290 }, { "epoch": 1.0340616840410704, "grad_norm": 0.75390625, "learning_rate": 1.6024119327197714e-05, "loss": 1.2237, "step": 3292 }, { "epoch": 1.0346899110666117, "grad_norm": 0.734375, "learning_rate": 1.602158045065059e-05, "loss": 1.1345, "step": 3294 }, { "epoch": 1.035318138092153, "grad_norm": 0.76171875, "learning_rate": 1.601904157410346e-05, "loss": 1.1062, "step": 3296 }, { "epoch": 1.0359463651176943, "grad_norm": 0.74609375, "learning_rate": 1.6016502697556332e-05, "loss": 1.2563, "step": 3298 }, { "epoch": 1.0365745921432357, "grad_norm": 0.7890625, "learning_rate": 1.6013963821009203e-05, "loss": 1.1443, "step": 3300 }, { "epoch": 1.0372028191687772, "grad_norm": 0.8125, "learning_rate": 1.6011424944462078e-05, "loss": 1.3163, "step": 3302 }, { "epoch": 1.0378310461943185, "grad_norm": 0.80078125, "learning_rate": 1.600888606791495e-05, "loss": 1.2362, "step": 3304 }, { "epoch": 1.0384592732198599, "grad_norm": 0.828125, "learning_rate": 1.600634719136782e-05, "loss": 1.2361, "step": 3306 }, { "epoch": 1.0390875002454012, "grad_norm": 0.859375, "learning_rate": 1.6003808314820692e-05, "loss": 1.2125, "step": 3308 }, { "epoch": 1.0397157272709425, "grad_norm": 0.9140625, "learning_rate": 1.6001269438273567e-05, "loss": 1.1605, "step": 3310 }, { "epoch": 1.0403439542964839, "grad_norm": 0.88671875, "learning_rate": 1.5998730561726435e-05, "loss": 1.1425, "step": 3312 }, { "epoch": 1.0409721813220252, "grad_norm": 0.72265625, "learning_rate": 1.599619168517931e-05, "loss": 1.1267, "step": 3314 }, { "epoch": 1.0416004083475665, "grad_norm": 0.79296875, "learning_rate": 1.599365280863218e-05, "loss": 1.3268, "step": 3316 }, { "epoch": 1.042228635373108, "grad_norm": 0.85546875, "learning_rate": 1.5991113932085053e-05, "loss": 1.2864, "step": 3318 }, { "epoch": 1.0428568623986494, "grad_norm": 0.796875, "learning_rate": 1.5988575055537924e-05, "loss": 1.3753, "step": 3320 }, { "epoch": 1.0434850894241907, "grad_norm": 0.734375, "learning_rate": 1.59860361789908e-05, "loss": 1.279, "step": 3322 }, { "epoch": 1.044113316449732, "grad_norm": 0.77734375, "learning_rate": 1.598349730244367e-05, "loss": 1.2186, "step": 3324 }, { "epoch": 1.0447415434752734, "grad_norm": 0.765625, "learning_rate": 1.598095842589654e-05, "loss": 1.2752, "step": 3326 }, { "epoch": 1.0453697705008147, "grad_norm": 0.88671875, "learning_rate": 1.5978419549349413e-05, "loss": 1.0901, "step": 3328 }, { "epoch": 1.045997997526356, "grad_norm": 0.71875, "learning_rate": 1.5975880672802288e-05, "loss": 1.2462, "step": 3330 }, { "epoch": 1.0466262245518974, "grad_norm": 0.81640625, "learning_rate": 1.597334179625516e-05, "loss": 1.2196, "step": 3332 }, { "epoch": 1.0472544515774387, "grad_norm": 0.77734375, "learning_rate": 1.597080291970803e-05, "loss": 1.181, "step": 3334 }, { "epoch": 1.0478826786029802, "grad_norm": 0.828125, "learning_rate": 1.5968264043160902e-05, "loss": 1.2889, "step": 3336 }, { "epoch": 1.0485109056285216, "grad_norm": 0.79296875, "learning_rate": 1.5965725166613773e-05, "loss": 1.2918, "step": 3338 }, { "epoch": 1.049139132654063, "grad_norm": 0.8515625, "learning_rate": 1.5963186290066648e-05, "loss": 1.2074, "step": 3340 }, { "epoch": 1.0497673596796042, "grad_norm": 0.72265625, "learning_rate": 1.596064741351952e-05, "loss": 1.0973, "step": 3342 }, { "epoch": 1.0503955867051455, "grad_norm": 0.85546875, "learning_rate": 1.595810853697239e-05, "loss": 1.2965, "step": 3344 }, { "epoch": 1.0510238137306869, "grad_norm": 0.90625, "learning_rate": 1.5955569660425262e-05, "loss": 1.1753, "step": 3346 }, { "epoch": 1.0516520407562282, "grad_norm": 0.89453125, "learning_rate": 1.5953030783878137e-05, "loss": 1.1732, "step": 3348 }, { "epoch": 1.0522802677817698, "grad_norm": 0.8125, "learning_rate": 1.5950491907331008e-05, "loss": 1.2432, "step": 3350 }, { "epoch": 1.052908494807311, "grad_norm": 0.74609375, "learning_rate": 1.594795303078388e-05, "loss": 1.3405, "step": 3352 }, { "epoch": 1.0535367218328524, "grad_norm": 0.73828125, "learning_rate": 1.594541415423675e-05, "loss": 1.2937, "step": 3354 }, { "epoch": 1.0541649488583937, "grad_norm": 0.76953125, "learning_rate": 1.5942875277689626e-05, "loss": 1.2403, "step": 3356 }, { "epoch": 1.054793175883935, "grad_norm": 0.71875, "learning_rate": 1.5940336401142497e-05, "loss": 1.1935, "step": 3358 }, { "epoch": 1.0554214029094764, "grad_norm": 0.80859375, "learning_rate": 1.593779752459537e-05, "loss": 1.1554, "step": 3360 }, { "epoch": 1.0560496299350177, "grad_norm": 0.79296875, "learning_rate": 1.593525864804824e-05, "loss": 1.2863, "step": 3362 }, { "epoch": 1.056677856960559, "grad_norm": 0.76953125, "learning_rate": 1.593271977150111e-05, "loss": 1.3232, "step": 3364 }, { "epoch": 1.0573060839861004, "grad_norm": 0.87109375, "learning_rate": 1.5930180894953983e-05, "loss": 1.165, "step": 3366 }, { "epoch": 1.057934311011642, "grad_norm": 0.859375, "learning_rate": 1.5927642018406857e-05, "loss": 1.2739, "step": 3368 }, { "epoch": 1.0585625380371833, "grad_norm": 0.72265625, "learning_rate": 1.592510314185973e-05, "loss": 1.1609, "step": 3370 }, { "epoch": 1.0591907650627246, "grad_norm": 0.7421875, "learning_rate": 1.59225642653126e-05, "loss": 1.3293, "step": 3372 }, { "epoch": 1.059818992088266, "grad_norm": 0.76171875, "learning_rate": 1.592002538876547e-05, "loss": 1.2502, "step": 3374 }, { "epoch": 1.0604472191138072, "grad_norm": 0.7578125, "learning_rate": 1.5917486512218346e-05, "loss": 1.1905, "step": 3376 }, { "epoch": 1.0610754461393486, "grad_norm": 0.79296875, "learning_rate": 1.5914947635671218e-05, "loss": 1.3572, "step": 3378 }, { "epoch": 1.06170367316489, "grad_norm": 0.96875, "learning_rate": 1.591240875912409e-05, "loss": 1.2005, "step": 3380 }, { "epoch": 1.0623319001904312, "grad_norm": 0.74609375, "learning_rate": 1.590986988257696e-05, "loss": 1.2663, "step": 3382 }, { "epoch": 1.0629601272159728, "grad_norm": 0.80078125, "learning_rate": 1.5907331006029835e-05, "loss": 1.2143, "step": 3384 }, { "epoch": 1.063588354241514, "grad_norm": 0.73828125, "learning_rate": 1.5904792129482703e-05, "loss": 1.2849, "step": 3386 }, { "epoch": 1.0642165812670554, "grad_norm": 0.765625, "learning_rate": 1.5902253252935578e-05, "loss": 1.2787, "step": 3388 }, { "epoch": 1.0648448082925968, "grad_norm": 0.8046875, "learning_rate": 1.589971437638845e-05, "loss": 1.1279, "step": 3390 }, { "epoch": 1.065473035318138, "grad_norm": 0.7890625, "learning_rate": 1.589717549984132e-05, "loss": 1.2399, "step": 3392 }, { "epoch": 1.0661012623436794, "grad_norm": 0.8125, "learning_rate": 1.5894636623294192e-05, "loss": 1.2965, "step": 3394 }, { "epoch": 1.0667294893692207, "grad_norm": 0.7265625, "learning_rate": 1.5892097746747067e-05, "loss": 1.1753, "step": 3396 }, { "epoch": 1.067357716394762, "grad_norm": 0.80859375, "learning_rate": 1.5889558870199938e-05, "loss": 1.229, "step": 3398 }, { "epoch": 1.0679859434203034, "grad_norm": 0.73046875, "learning_rate": 1.588701999365281e-05, "loss": 1.2592, "step": 3400 }, { "epoch": 1.068614170445845, "grad_norm": 0.78125, "learning_rate": 1.588448111710568e-05, "loss": 1.2941, "step": 3402 }, { "epoch": 1.0692423974713863, "grad_norm": 0.75390625, "learning_rate": 1.5881942240558556e-05, "loss": 1.2962, "step": 3404 }, { "epoch": 1.0698706244969276, "grad_norm": 0.796875, "learning_rate": 1.5879403364011424e-05, "loss": 1.2558, "step": 3406 }, { "epoch": 1.070498851522469, "grad_norm": 0.7578125, "learning_rate": 1.58768644874643e-05, "loss": 1.3021, "step": 3408 }, { "epoch": 1.0711270785480103, "grad_norm": 0.80859375, "learning_rate": 1.587432561091717e-05, "loss": 1.2742, "step": 3410 }, { "epoch": 1.0717553055735516, "grad_norm": 0.796875, "learning_rate": 1.587178673437004e-05, "loss": 1.2945, "step": 3412 }, { "epoch": 1.072383532599093, "grad_norm": 0.83984375, "learning_rate": 1.5869247857822913e-05, "loss": 1.323, "step": 3414 }, { "epoch": 1.0730117596246345, "grad_norm": 0.76953125, "learning_rate": 1.5866708981275787e-05, "loss": 1.2498, "step": 3416 }, { "epoch": 1.0736399866501758, "grad_norm": 0.765625, "learning_rate": 1.586417010472866e-05, "loss": 1.0935, "step": 3418 }, { "epoch": 1.0742682136757171, "grad_norm": 0.77734375, "learning_rate": 1.586163122818153e-05, "loss": 1.1667, "step": 3420 }, { "epoch": 1.0748964407012584, "grad_norm": 0.75390625, "learning_rate": 1.58590923516344e-05, "loss": 1.2579, "step": 3422 }, { "epoch": 1.0755246677267998, "grad_norm": 0.79296875, "learning_rate": 1.5856553475087276e-05, "loss": 1.1673, "step": 3424 }, { "epoch": 1.076152894752341, "grad_norm": 0.73046875, "learning_rate": 1.5854014598540148e-05, "loss": 1.411, "step": 3426 }, { "epoch": 1.0767811217778824, "grad_norm": 0.82421875, "learning_rate": 1.585147572199302e-05, "loss": 1.1398, "step": 3428 }, { "epoch": 1.0774093488034238, "grad_norm": 0.76953125, "learning_rate": 1.5848936845445894e-05, "loss": 1.2636, "step": 3430 }, { "epoch": 1.078037575828965, "grad_norm": 0.71484375, "learning_rate": 1.5846397968898762e-05, "loss": 1.2364, "step": 3432 }, { "epoch": 1.0786658028545066, "grad_norm": 0.78515625, "learning_rate": 1.5843859092351637e-05, "loss": 1.4491, "step": 3434 }, { "epoch": 1.079294029880048, "grad_norm": 0.828125, "learning_rate": 1.5841320215804508e-05, "loss": 1.1533, "step": 3436 }, { "epoch": 1.0799222569055893, "grad_norm": 0.95703125, "learning_rate": 1.583878133925738e-05, "loss": 1.229, "step": 3438 }, { "epoch": 1.0805504839311306, "grad_norm": 0.76171875, "learning_rate": 1.583624246271025e-05, "loss": 1.4361, "step": 3440 }, { "epoch": 1.081178710956672, "grad_norm": 0.7890625, "learning_rate": 1.5833703586163126e-05, "loss": 1.3107, "step": 3442 }, { "epoch": 1.0818069379822133, "grad_norm": 0.84765625, "learning_rate": 1.5831164709615997e-05, "loss": 1.1652, "step": 3444 }, { "epoch": 1.0824351650077546, "grad_norm": 0.8359375, "learning_rate": 1.582862583306887e-05, "loss": 1.2367, "step": 3446 }, { "epoch": 1.083063392033296, "grad_norm": 0.76953125, "learning_rate": 1.582608695652174e-05, "loss": 1.178, "step": 3448 }, { "epoch": 1.0836916190588375, "grad_norm": 0.82421875, "learning_rate": 1.5823548079974615e-05, "loss": 1.2211, "step": 3450 }, { "epoch": 1.0843198460843788, "grad_norm": 0.9140625, "learning_rate": 1.5821009203427486e-05, "loss": 1.2025, "step": 3452 }, { "epoch": 1.0849480731099201, "grad_norm": 0.83203125, "learning_rate": 1.5818470326880357e-05, "loss": 1.2151, "step": 3454 }, { "epoch": 1.0855763001354615, "grad_norm": 0.79296875, "learning_rate": 1.581593145033323e-05, "loss": 1.2384, "step": 3456 }, { "epoch": 1.0862045271610028, "grad_norm": 0.85546875, "learning_rate": 1.58133925737861e-05, "loss": 1.2462, "step": 3458 }, { "epoch": 1.0868327541865441, "grad_norm": 0.78515625, "learning_rate": 1.581085369723897e-05, "loss": 1.1743, "step": 3460 }, { "epoch": 1.0874609812120855, "grad_norm": 0.7890625, "learning_rate": 1.5808314820691846e-05, "loss": 1.2484, "step": 3462 }, { "epoch": 1.0880892082376268, "grad_norm": 0.8203125, "learning_rate": 1.5805775944144718e-05, "loss": 1.1292, "step": 3464 }, { "epoch": 1.088717435263168, "grad_norm": 0.7890625, "learning_rate": 1.580323706759759e-05, "loss": 1.3436, "step": 3466 }, { "epoch": 1.0893456622887097, "grad_norm": 0.79296875, "learning_rate": 1.580069819105046e-05, "loss": 1.106, "step": 3468 }, { "epoch": 1.089973889314251, "grad_norm": 0.7109375, "learning_rate": 1.5798159314503335e-05, "loss": 1.1988, "step": 3470 }, { "epoch": 1.0906021163397923, "grad_norm": 0.84765625, "learning_rate": 1.5795620437956207e-05, "loss": 1.4247, "step": 3472 }, { "epoch": 1.0912303433653336, "grad_norm": 0.79296875, "learning_rate": 1.5793081561409078e-05, "loss": 1.2297, "step": 3474 }, { "epoch": 1.091858570390875, "grad_norm": 0.78125, "learning_rate": 1.579054268486195e-05, "loss": 1.1387, "step": 3476 }, { "epoch": 1.0924867974164163, "grad_norm": 0.73046875, "learning_rate": 1.5788003808314824e-05, "loss": 1.3101, "step": 3478 }, { "epoch": 1.0931150244419576, "grad_norm": 0.98046875, "learning_rate": 1.5785464931767692e-05, "loss": 1.2612, "step": 3480 }, { "epoch": 1.0937432514674992, "grad_norm": 0.79296875, "learning_rate": 1.5782926055220567e-05, "loss": 1.1812, "step": 3482 }, { "epoch": 1.0943714784930405, "grad_norm": 0.7265625, "learning_rate": 1.5780387178673438e-05, "loss": 1.2531, "step": 3484 }, { "epoch": 1.0949997055185818, "grad_norm": 0.91796875, "learning_rate": 1.577784830212631e-05, "loss": 1.1636, "step": 3486 }, { "epoch": 1.0956279325441232, "grad_norm": 0.75, "learning_rate": 1.577530942557918e-05, "loss": 1.2318, "step": 3488 }, { "epoch": 1.0962561595696645, "grad_norm": 0.859375, "learning_rate": 1.5772770549032056e-05, "loss": 1.337, "step": 3490 }, { "epoch": 1.0968843865952058, "grad_norm": 3.765625, "learning_rate": 1.5770231672484927e-05, "loss": 1.2689, "step": 3492 }, { "epoch": 1.0975126136207471, "grad_norm": 0.80078125, "learning_rate": 1.57676927959378e-05, "loss": 1.2185, "step": 3494 }, { "epoch": 1.0981408406462885, "grad_norm": 0.8359375, "learning_rate": 1.576515391939067e-05, "loss": 1.3053, "step": 3496 }, { "epoch": 1.0987690676718298, "grad_norm": 0.76171875, "learning_rate": 1.5762615042843545e-05, "loss": 1.1346, "step": 3498 }, { "epoch": 1.0993972946973714, "grad_norm": 0.7109375, "learning_rate": 1.5760076166296413e-05, "loss": 1.2331, "step": 3500 }, { "epoch": 1.1000255217229127, "grad_norm": 0.74609375, "learning_rate": 1.5757537289749287e-05, "loss": 1.2669, "step": 3502 }, { "epoch": 1.100653748748454, "grad_norm": 0.78515625, "learning_rate": 1.575499841320216e-05, "loss": 1.2222, "step": 3504 }, { "epoch": 1.1012819757739953, "grad_norm": 0.8046875, "learning_rate": 1.575245953665503e-05, "loss": 1.1517, "step": 3506 }, { "epoch": 1.1019102027995367, "grad_norm": 0.79296875, "learning_rate": 1.57499206601079e-05, "loss": 1.2206, "step": 3508 }, { "epoch": 1.102538429825078, "grad_norm": 0.7890625, "learning_rate": 1.5747381783560776e-05, "loss": 1.2261, "step": 3510 }, { "epoch": 1.1031666568506193, "grad_norm": 2.125, "learning_rate": 1.5744842907013648e-05, "loss": 1.0953, "step": 3512 }, { "epoch": 1.1037948838761606, "grad_norm": 0.83984375, "learning_rate": 1.574230403046652e-05, "loss": 1.3215, "step": 3514 }, { "epoch": 1.1044231109017022, "grad_norm": 0.79296875, "learning_rate": 1.5739765153919394e-05, "loss": 1.3369, "step": 3516 }, { "epoch": 1.1050513379272435, "grad_norm": 0.84375, "learning_rate": 1.5737226277372265e-05, "loss": 1.2915, "step": 3518 }, { "epoch": 1.1056795649527849, "grad_norm": 0.85546875, "learning_rate": 1.5734687400825137e-05, "loss": 1.3005, "step": 3520 }, { "epoch": 1.1063077919783262, "grad_norm": 0.83984375, "learning_rate": 1.5732148524278008e-05, "loss": 1.2166, "step": 3522 }, { "epoch": 1.1069360190038675, "grad_norm": 0.86328125, "learning_rate": 1.5729609647730883e-05, "loss": 1.1558, "step": 3524 }, { "epoch": 1.1075642460294088, "grad_norm": 0.77734375, "learning_rate": 1.572707077118375e-05, "loss": 1.2904, "step": 3526 }, { "epoch": 1.1081924730549502, "grad_norm": 0.859375, "learning_rate": 1.5724531894636626e-05, "loss": 1.1995, "step": 3528 }, { "epoch": 1.1088207000804915, "grad_norm": 0.78515625, "learning_rate": 1.5721993018089497e-05, "loss": 1.3063, "step": 3530 }, { "epoch": 1.109448927106033, "grad_norm": 0.85546875, "learning_rate": 1.571945414154237e-05, "loss": 1.3033, "step": 3532 }, { "epoch": 1.1100771541315744, "grad_norm": 0.81640625, "learning_rate": 1.571691526499524e-05, "loss": 1.2816, "step": 3534 }, { "epoch": 1.1107053811571157, "grad_norm": 0.81640625, "learning_rate": 1.5714376388448114e-05, "loss": 1.3709, "step": 3536 }, { "epoch": 1.111333608182657, "grad_norm": 0.8125, "learning_rate": 1.5711837511900986e-05, "loss": 1.1248, "step": 3538 }, { "epoch": 1.1119618352081984, "grad_norm": 0.81640625, "learning_rate": 1.5709298635353857e-05, "loss": 1.2373, "step": 3540 }, { "epoch": 1.1125900622337397, "grad_norm": 0.8671875, "learning_rate": 1.570675975880673e-05, "loss": 1.3827, "step": 3542 }, { "epoch": 1.113218289259281, "grad_norm": 0.90234375, "learning_rate": 1.5704220882259603e-05, "loss": 1.2102, "step": 3544 }, { "epoch": 1.1138465162848223, "grad_norm": 0.80859375, "learning_rate": 1.570168200571247e-05, "loss": 1.1883, "step": 3546 }, { "epoch": 1.114474743310364, "grad_norm": 0.828125, "learning_rate": 1.5699143129165346e-05, "loss": 1.1861, "step": 3548 }, { "epoch": 1.1151029703359052, "grad_norm": 0.80078125, "learning_rate": 1.5696604252618218e-05, "loss": 1.3095, "step": 3550 }, { "epoch": 1.1157311973614465, "grad_norm": 0.76171875, "learning_rate": 1.569406537607109e-05, "loss": 1.1794, "step": 3552 }, { "epoch": 1.1163594243869879, "grad_norm": 0.84765625, "learning_rate": 1.569152649952396e-05, "loss": 1.2195, "step": 3554 }, { "epoch": 1.1169876514125292, "grad_norm": 0.7578125, "learning_rate": 1.5688987622976835e-05, "loss": 1.3026, "step": 3556 }, { "epoch": 1.1176158784380705, "grad_norm": 0.859375, "learning_rate": 1.5686448746429706e-05, "loss": 1.308, "step": 3558 }, { "epoch": 1.1182441054636119, "grad_norm": 0.734375, "learning_rate": 1.5683909869882578e-05, "loss": 1.192, "step": 3560 }, { "epoch": 1.1188723324891532, "grad_norm": 0.76953125, "learning_rate": 1.568137099333545e-05, "loss": 1.1283, "step": 3562 }, { "epoch": 1.1195005595146945, "grad_norm": 0.765625, "learning_rate": 1.5678832116788324e-05, "loss": 1.2358, "step": 3564 }, { "epoch": 1.120128786540236, "grad_norm": 0.77734375, "learning_rate": 1.5676293240241195e-05, "loss": 1.1856, "step": 3566 }, { "epoch": 1.1207570135657774, "grad_norm": 0.828125, "learning_rate": 1.5673754363694067e-05, "loss": 1.2214, "step": 3568 }, { "epoch": 1.1213852405913187, "grad_norm": 0.796875, "learning_rate": 1.5671215487146938e-05, "loss": 1.1597, "step": 3570 }, { "epoch": 1.12201346761686, "grad_norm": 0.796875, "learning_rate": 1.566867661059981e-05, "loss": 1.1691, "step": 3572 }, { "epoch": 1.1226416946424014, "grad_norm": 0.78125, "learning_rate": 1.566613773405268e-05, "loss": 1.3458, "step": 3574 }, { "epoch": 1.1232699216679427, "grad_norm": 0.7890625, "learning_rate": 1.5663598857505556e-05, "loss": 1.0832, "step": 3576 }, { "epoch": 1.123898148693484, "grad_norm": 0.93359375, "learning_rate": 1.5661059980958427e-05, "loss": 1.2571, "step": 3578 }, { "epoch": 1.1245263757190254, "grad_norm": 0.78515625, "learning_rate": 1.56585211044113e-05, "loss": 1.3284, "step": 3580 }, { "epoch": 1.125154602744567, "grad_norm": 0.8984375, "learning_rate": 1.565598222786417e-05, "loss": 1.1614, "step": 3582 }, { "epoch": 1.1257828297701082, "grad_norm": 0.78125, "learning_rate": 1.5653443351317045e-05, "loss": 1.208, "step": 3584 }, { "epoch": 1.1264110567956496, "grad_norm": 0.75, "learning_rate": 1.5650904474769916e-05, "loss": 1.1761, "step": 3586 }, { "epoch": 1.127039283821191, "grad_norm": 0.734375, "learning_rate": 1.5648365598222787e-05, "loss": 1.3036, "step": 3588 }, { "epoch": 1.1276675108467322, "grad_norm": 0.765625, "learning_rate": 1.564582672167566e-05, "loss": 1.1291, "step": 3590 }, { "epoch": 1.1282957378722736, "grad_norm": 0.8125, "learning_rate": 1.5643287845128534e-05, "loss": 1.2135, "step": 3592 }, { "epoch": 1.1289239648978149, "grad_norm": 0.7421875, "learning_rate": 1.56407489685814e-05, "loss": 1.2834, "step": 3594 }, { "epoch": 1.1295521919233562, "grad_norm": 0.828125, "learning_rate": 1.5638210092034276e-05, "loss": 1.328, "step": 3596 }, { "epoch": 1.1301804189488975, "grad_norm": 0.79296875, "learning_rate": 1.5635671215487148e-05, "loss": 1.346, "step": 3598 }, { "epoch": 1.130808645974439, "grad_norm": 0.74609375, "learning_rate": 1.563313233894002e-05, "loss": 1.0939, "step": 3600 }, { "epoch": 1.1314368729999804, "grad_norm": 0.8046875, "learning_rate": 1.5630593462392894e-05, "loss": 1.243, "step": 3602 }, { "epoch": 1.1320651000255217, "grad_norm": 0.84765625, "learning_rate": 1.5628054585845765e-05, "loss": 1.2313, "step": 3604 }, { "epoch": 1.132693327051063, "grad_norm": 0.81640625, "learning_rate": 1.5625515709298637e-05, "loss": 1.2229, "step": 3606 }, { "epoch": 1.1333215540766044, "grad_norm": 0.890625, "learning_rate": 1.5622976832751508e-05, "loss": 1.3329, "step": 3608 }, { "epoch": 1.1339497811021457, "grad_norm": 0.96484375, "learning_rate": 1.5620437956204383e-05, "loss": 1.1156, "step": 3610 }, { "epoch": 1.134578008127687, "grad_norm": 0.828125, "learning_rate": 1.5617899079657254e-05, "loss": 1.1354, "step": 3612 }, { "epoch": 1.1352062351532286, "grad_norm": 0.85546875, "learning_rate": 1.5615360203110125e-05, "loss": 1.2974, "step": 3614 }, { "epoch": 1.13583446217877, "grad_norm": 0.76953125, "learning_rate": 1.5612821326562997e-05, "loss": 1.3404, "step": 3616 }, { "epoch": 1.1364626892043113, "grad_norm": 0.84765625, "learning_rate": 1.561028245001587e-05, "loss": 1.2677, "step": 3618 }, { "epoch": 1.1370909162298526, "grad_norm": 0.82421875, "learning_rate": 1.560774357346874e-05, "loss": 1.1016, "step": 3620 }, { "epoch": 1.137719143255394, "grad_norm": 0.703125, "learning_rate": 1.5605204696921614e-05, "loss": 1.2943, "step": 3622 }, { "epoch": 1.1383473702809352, "grad_norm": 0.796875, "learning_rate": 1.5602665820374486e-05, "loss": 1.196, "step": 3624 }, { "epoch": 1.1389755973064766, "grad_norm": 0.79296875, "learning_rate": 1.5600126943827357e-05, "loss": 1.1846, "step": 3626 }, { "epoch": 1.139603824332018, "grad_norm": 0.8046875, "learning_rate": 1.559758806728023e-05, "loss": 1.2626, "step": 3628 }, { "epoch": 1.1402320513575592, "grad_norm": 0.80078125, "learning_rate": 1.5595049190733103e-05, "loss": 1.3086, "step": 3630 }, { "epoch": 1.1408602783831008, "grad_norm": 0.9453125, "learning_rate": 1.5592510314185975e-05, "loss": 1.2576, "step": 3632 }, { "epoch": 1.141488505408642, "grad_norm": 0.8515625, "learning_rate": 1.5589971437638846e-05, "loss": 1.3336, "step": 3634 }, { "epoch": 1.1421167324341834, "grad_norm": 0.71484375, "learning_rate": 1.5587432561091717e-05, "loss": 1.1999, "step": 3636 }, { "epoch": 1.1427449594597248, "grad_norm": 0.90625, "learning_rate": 1.5584893684544592e-05, "loss": 1.2824, "step": 3638 }, { "epoch": 1.143373186485266, "grad_norm": 0.8125, "learning_rate": 1.558235480799746e-05, "loss": 1.3522, "step": 3640 }, { "epoch": 1.1440014135108074, "grad_norm": 0.96484375, "learning_rate": 1.5579815931450335e-05, "loss": 1.2944, "step": 3642 }, { "epoch": 1.1446296405363487, "grad_norm": 0.77734375, "learning_rate": 1.5577277054903206e-05, "loss": 1.2327, "step": 3644 }, { "epoch": 1.1452578675618903, "grad_norm": 0.796875, "learning_rate": 1.5574738178356078e-05, "loss": 1.3088, "step": 3646 }, { "epoch": 1.1458860945874316, "grad_norm": 0.8046875, "learning_rate": 1.557219930180895e-05, "loss": 1.2625, "step": 3648 }, { "epoch": 1.146514321612973, "grad_norm": 0.79296875, "learning_rate": 1.5569660425261824e-05, "loss": 1.3164, "step": 3650 }, { "epoch": 1.1471425486385143, "grad_norm": 0.875, "learning_rate": 1.5567121548714695e-05, "loss": 1.1474, "step": 3652 }, { "epoch": 1.1477707756640556, "grad_norm": 0.76953125, "learning_rate": 1.5564582672167567e-05, "loss": 1.3335, "step": 3654 }, { "epoch": 1.148399002689597, "grad_norm": 0.74609375, "learning_rate": 1.5562043795620438e-05, "loss": 1.2914, "step": 3656 }, { "epoch": 1.1490272297151383, "grad_norm": 0.8125, "learning_rate": 1.5559504919073313e-05, "loss": 1.2283, "step": 3658 }, { "epoch": 1.1496554567406796, "grad_norm": 0.8203125, "learning_rate": 1.5556966042526184e-05, "loss": 1.2828, "step": 3660 }, { "epoch": 1.150283683766221, "grad_norm": 0.96875, "learning_rate": 1.5554427165979056e-05, "loss": 1.2971, "step": 3662 }, { "epoch": 1.1509119107917622, "grad_norm": 0.80859375, "learning_rate": 1.5551888289431927e-05, "loss": 1.1939, "step": 3664 }, { "epoch": 1.1515401378173038, "grad_norm": 0.72265625, "learning_rate": 1.55493494128848e-05, "loss": 1.2101, "step": 3666 }, { "epoch": 1.1521683648428451, "grad_norm": 0.79296875, "learning_rate": 1.554681053633767e-05, "loss": 1.3979, "step": 3668 }, { "epoch": 1.1527965918683865, "grad_norm": 0.84375, "learning_rate": 1.5544271659790545e-05, "loss": 1.1002, "step": 3670 }, { "epoch": 1.1534248188939278, "grad_norm": 0.80859375, "learning_rate": 1.5541732783243416e-05, "loss": 1.2375, "step": 3672 }, { "epoch": 1.1540530459194691, "grad_norm": 0.71875, "learning_rate": 1.5539193906696287e-05, "loss": 1.315, "step": 3674 }, { "epoch": 1.1546812729450104, "grad_norm": 0.74609375, "learning_rate": 1.553665503014916e-05, "loss": 1.3209, "step": 3676 }, { "epoch": 1.1553094999705518, "grad_norm": 0.7265625, "learning_rate": 1.5534116153602033e-05, "loss": 1.2149, "step": 3678 }, { "epoch": 1.1559377269960933, "grad_norm": 0.73828125, "learning_rate": 1.5531577277054905e-05, "loss": 1.2292, "step": 3680 }, { "epoch": 1.1565659540216346, "grad_norm": 0.79296875, "learning_rate": 1.5529038400507776e-05, "loss": 1.148, "step": 3682 }, { "epoch": 1.157194181047176, "grad_norm": 0.73828125, "learning_rate": 1.552649952396065e-05, "loss": 1.2629, "step": 3684 }, { "epoch": 1.1578224080727173, "grad_norm": 0.7734375, "learning_rate": 1.5523960647413522e-05, "loss": 1.13, "step": 3686 }, { "epoch": 1.1584506350982586, "grad_norm": 0.7578125, "learning_rate": 1.5521421770866394e-05, "loss": 1.2941, "step": 3688 }, { "epoch": 1.1590788621238, "grad_norm": 0.8046875, "learning_rate": 1.5518882894319265e-05, "loss": 1.2437, "step": 3690 }, { "epoch": 1.1597070891493413, "grad_norm": 0.84765625, "learning_rate": 1.5516344017772136e-05, "loss": 1.3109, "step": 3692 }, { "epoch": 1.1603353161748826, "grad_norm": 0.8125, "learning_rate": 1.5513805141225008e-05, "loss": 1.0916, "step": 3694 }, { "epoch": 1.160963543200424, "grad_norm": 0.84375, "learning_rate": 1.5511266264677883e-05, "loss": 1.1996, "step": 3696 }, { "epoch": 1.1615917702259655, "grad_norm": 0.94140625, "learning_rate": 1.5508727388130754e-05, "loss": 1.2492, "step": 3698 }, { "epoch": 1.1622199972515068, "grad_norm": 0.77734375, "learning_rate": 1.5506188511583625e-05, "loss": 1.2623, "step": 3700 }, { "epoch": 1.1628482242770481, "grad_norm": 0.83203125, "learning_rate": 1.5503649635036497e-05, "loss": 1.4083, "step": 3702 }, { "epoch": 1.1634764513025895, "grad_norm": 0.78515625, "learning_rate": 1.550111075848937e-05, "loss": 1.2405, "step": 3704 }, { "epoch": 1.1641046783281308, "grad_norm": 0.86328125, "learning_rate": 1.5498571881942243e-05, "loss": 1.0913, "step": 3706 }, { "epoch": 1.1647329053536721, "grad_norm": 0.83984375, "learning_rate": 1.5496033005395114e-05, "loss": 1.1841, "step": 3708 }, { "epoch": 1.1653611323792135, "grad_norm": 0.77734375, "learning_rate": 1.5493494128847986e-05, "loss": 1.3798, "step": 3710 }, { "epoch": 1.165989359404755, "grad_norm": 0.765625, "learning_rate": 1.549095525230086e-05, "loss": 1.3218, "step": 3712 }, { "epoch": 1.1666175864302963, "grad_norm": 1.0, "learning_rate": 1.548841637575373e-05, "loss": 1.3271, "step": 3714 }, { "epoch": 1.1672458134558377, "grad_norm": 0.796875, "learning_rate": 1.5485877499206603e-05, "loss": 1.3283, "step": 3716 }, { "epoch": 1.167874040481379, "grad_norm": 0.88671875, "learning_rate": 1.5483338622659475e-05, "loss": 1.1618, "step": 3718 }, { "epoch": 1.1685022675069203, "grad_norm": 0.83984375, "learning_rate": 1.5480799746112346e-05, "loss": 1.2939, "step": 3720 }, { "epoch": 1.1691304945324617, "grad_norm": 0.8203125, "learning_rate": 1.5478260869565217e-05, "loss": 1.2001, "step": 3722 }, { "epoch": 1.169758721558003, "grad_norm": 0.8125, "learning_rate": 1.5475721993018092e-05, "loss": 1.2964, "step": 3724 }, { "epoch": 1.1703869485835443, "grad_norm": 0.75390625, "learning_rate": 1.5473183116470964e-05, "loss": 1.2945, "step": 3726 }, { "epoch": 1.1710151756090856, "grad_norm": 0.7578125, "learning_rate": 1.5470644239923835e-05, "loss": 1.3001, "step": 3728 }, { "epoch": 1.1716434026346272, "grad_norm": 0.78515625, "learning_rate": 1.5468105363376706e-05, "loss": 1.0717, "step": 3730 }, { "epoch": 1.1722716296601685, "grad_norm": 0.890625, "learning_rate": 1.546556648682958e-05, "loss": 1.1697, "step": 3732 }, { "epoch": 1.1728998566857098, "grad_norm": 0.91796875, "learning_rate": 1.546302761028245e-05, "loss": 1.1945, "step": 3734 }, { "epoch": 1.1735280837112512, "grad_norm": 0.765625, "learning_rate": 1.5460488733735324e-05, "loss": 1.1745, "step": 3736 }, { "epoch": 1.1741563107367925, "grad_norm": 0.83203125, "learning_rate": 1.5457949857188195e-05, "loss": 1.2856, "step": 3738 }, { "epoch": 1.1747845377623338, "grad_norm": 0.76953125, "learning_rate": 1.5455410980641067e-05, "loss": 1.2289, "step": 3740 }, { "epoch": 1.1754127647878752, "grad_norm": 0.8671875, "learning_rate": 1.5452872104093938e-05, "loss": 1.1443, "step": 3742 }, { "epoch": 1.1760409918134165, "grad_norm": 0.81640625, "learning_rate": 1.5450333227546813e-05, "loss": 1.2261, "step": 3744 }, { "epoch": 1.176669218838958, "grad_norm": 0.77734375, "learning_rate": 1.5447794350999684e-05, "loss": 1.2703, "step": 3746 }, { "epoch": 1.1772974458644994, "grad_norm": 0.73828125, "learning_rate": 1.5445255474452556e-05, "loss": 1.2568, "step": 3748 }, { "epoch": 1.1779256728900407, "grad_norm": 0.7265625, "learning_rate": 1.5442716597905427e-05, "loss": 1.2741, "step": 3750 }, { "epoch": 1.178553899915582, "grad_norm": 0.796875, "learning_rate": 1.54401777213583e-05, "loss": 1.2063, "step": 3752 }, { "epoch": 1.1791821269411233, "grad_norm": 0.75390625, "learning_rate": 1.5437638844811173e-05, "loss": 1.2187, "step": 3754 }, { "epoch": 1.1798103539666647, "grad_norm": 0.77734375, "learning_rate": 1.5435099968264044e-05, "loss": 1.2115, "step": 3756 }, { "epoch": 1.180438580992206, "grad_norm": 0.84375, "learning_rate": 1.5432561091716916e-05, "loss": 1.2467, "step": 3758 }, { "epoch": 1.1810668080177473, "grad_norm": 0.76953125, "learning_rate": 1.5430022215169787e-05, "loss": 1.3909, "step": 3760 }, { "epoch": 1.1816950350432887, "grad_norm": 0.8359375, "learning_rate": 1.542748333862266e-05, "loss": 1.3045, "step": 3762 }, { "epoch": 1.1823232620688302, "grad_norm": 0.76953125, "learning_rate": 1.5424944462075533e-05, "loss": 1.2189, "step": 3764 }, { "epoch": 1.1829514890943715, "grad_norm": 0.80078125, "learning_rate": 1.5422405585528405e-05, "loss": 1.0881, "step": 3766 }, { "epoch": 1.1835797161199129, "grad_norm": 0.74609375, "learning_rate": 1.5419866708981276e-05, "loss": 1.2768, "step": 3768 }, { "epoch": 1.1842079431454542, "grad_norm": 0.84765625, "learning_rate": 1.541732783243415e-05, "loss": 1.3732, "step": 3770 }, { "epoch": 1.1848361701709955, "grad_norm": 0.8203125, "learning_rate": 1.5414788955887022e-05, "loss": 1.3118, "step": 3772 }, { "epoch": 1.1854643971965368, "grad_norm": 0.8359375, "learning_rate": 1.5412250079339894e-05, "loss": 1.3224, "step": 3774 }, { "epoch": 1.1860926242220782, "grad_norm": 0.828125, "learning_rate": 1.5409711202792765e-05, "loss": 1.1572, "step": 3776 }, { "epoch": 1.1867208512476197, "grad_norm": 0.73828125, "learning_rate": 1.540717232624564e-05, "loss": 1.2863, "step": 3778 }, { "epoch": 1.187349078273161, "grad_norm": 0.76171875, "learning_rate": 1.5404633449698508e-05, "loss": 1.1624, "step": 3780 }, { "epoch": 1.1879773052987024, "grad_norm": 0.78515625, "learning_rate": 1.5402094573151383e-05, "loss": 1.3295, "step": 3782 }, { "epoch": 1.1886055323242437, "grad_norm": 0.79296875, "learning_rate": 1.5399555696604254e-05, "loss": 1.3428, "step": 3784 }, { "epoch": 1.189233759349785, "grad_norm": 0.8046875, "learning_rate": 1.5397016820057125e-05, "loss": 1.2549, "step": 3786 }, { "epoch": 1.1898619863753264, "grad_norm": 0.734375, "learning_rate": 1.5394477943509997e-05, "loss": 1.2031, "step": 3788 }, { "epoch": 1.1904902134008677, "grad_norm": 0.74609375, "learning_rate": 1.539193906696287e-05, "loss": 1.13, "step": 3790 }, { "epoch": 1.191118440426409, "grad_norm": 0.82421875, "learning_rate": 1.5389400190415743e-05, "loss": 1.2701, "step": 3792 }, { "epoch": 1.1917466674519503, "grad_norm": 0.828125, "learning_rate": 1.5386861313868614e-05, "loss": 1.208, "step": 3794 }, { "epoch": 1.192374894477492, "grad_norm": 0.80078125, "learning_rate": 1.5384322437321486e-05, "loss": 1.2884, "step": 3796 }, { "epoch": 1.1930031215030332, "grad_norm": 0.78125, "learning_rate": 1.538178356077436e-05, "loss": 1.1525, "step": 3798 }, { "epoch": 1.1936313485285746, "grad_norm": 0.8515625, "learning_rate": 1.5379244684227232e-05, "loss": 1.3432, "step": 3800 }, { "epoch": 1.1942595755541159, "grad_norm": 0.73828125, "learning_rate": 1.5376705807680103e-05, "loss": 1.1941, "step": 3802 }, { "epoch": 1.1948878025796572, "grad_norm": 0.76953125, "learning_rate": 1.5374166931132975e-05, "loss": 1.1627, "step": 3804 }, { "epoch": 1.1955160296051985, "grad_norm": 0.83984375, "learning_rate": 1.5371628054585846e-05, "loss": 1.1927, "step": 3806 }, { "epoch": 1.1961442566307399, "grad_norm": 0.73046875, "learning_rate": 1.5369089178038717e-05, "loss": 1.2496, "step": 3808 }, { "epoch": 1.1967724836562812, "grad_norm": 0.7578125, "learning_rate": 1.5366550301491592e-05, "loss": 1.3009, "step": 3810 }, { "epoch": 1.1974007106818227, "grad_norm": 0.76953125, "learning_rate": 1.5364011424944463e-05, "loss": 1.2321, "step": 3812 }, { "epoch": 1.198028937707364, "grad_norm": 0.78515625, "learning_rate": 1.5361472548397335e-05, "loss": 1.3292, "step": 3814 }, { "epoch": 1.1986571647329054, "grad_norm": 0.8125, "learning_rate": 1.5358933671850206e-05, "loss": 1.1956, "step": 3816 }, { "epoch": 1.1992853917584467, "grad_norm": 0.77734375, "learning_rate": 1.535639479530308e-05, "loss": 1.1833, "step": 3818 }, { "epoch": 1.199913618783988, "grad_norm": 0.828125, "learning_rate": 1.5353855918755952e-05, "loss": 1.3083, "step": 3820 }, { "epoch": 1.2005418458095294, "grad_norm": 0.84375, "learning_rate": 1.5351317042208824e-05, "loss": 1.2926, "step": 3822 }, { "epoch": 1.2011700728350707, "grad_norm": 0.859375, "learning_rate": 1.5348778165661695e-05, "loss": 1.2154, "step": 3824 }, { "epoch": 1.201798299860612, "grad_norm": 0.828125, "learning_rate": 1.534623928911457e-05, "loss": 1.2833, "step": 3826 }, { "epoch": 1.2024265268861534, "grad_norm": 0.84765625, "learning_rate": 1.5343700412567438e-05, "loss": 1.2619, "step": 3828 }, { "epoch": 1.203054753911695, "grad_norm": 0.89453125, "learning_rate": 1.5341161536020313e-05, "loss": 1.1693, "step": 3830 }, { "epoch": 1.2036829809372362, "grad_norm": 0.82421875, "learning_rate": 1.5338622659473184e-05, "loss": 1.388, "step": 3832 }, { "epoch": 1.2043112079627776, "grad_norm": 0.73828125, "learning_rate": 1.5336083782926055e-05, "loss": 1.1663, "step": 3834 }, { "epoch": 1.204939434988319, "grad_norm": 0.80078125, "learning_rate": 1.5333544906378927e-05, "loss": 1.2422, "step": 3836 }, { "epoch": 1.2055676620138602, "grad_norm": 0.76953125, "learning_rate": 1.53310060298318e-05, "loss": 1.3582, "step": 3838 }, { "epoch": 1.2061958890394016, "grad_norm": 0.75, "learning_rate": 1.5328467153284673e-05, "loss": 1.279, "step": 3840 }, { "epoch": 1.2068241160649429, "grad_norm": 0.796875, "learning_rate": 1.5325928276737544e-05, "loss": 1.1224, "step": 3842 }, { "epoch": 1.2074523430904844, "grad_norm": 0.75, "learning_rate": 1.5323389400190416e-05, "loss": 1.164, "step": 3844 }, { "epoch": 1.2080805701160258, "grad_norm": 0.76171875, "learning_rate": 1.532085052364329e-05, "loss": 1.2088, "step": 3846 }, { "epoch": 1.208708797141567, "grad_norm": 0.80078125, "learning_rate": 1.531831164709616e-05, "loss": 1.2329, "step": 3848 }, { "epoch": 1.2093370241671084, "grad_norm": 0.7890625, "learning_rate": 1.5315772770549033e-05, "loss": 1.251, "step": 3850 }, { "epoch": 1.2099652511926497, "grad_norm": 0.88671875, "learning_rate": 1.5313233894001908e-05, "loss": 1.2042, "step": 3852 }, { "epoch": 1.210593478218191, "grad_norm": 0.8046875, "learning_rate": 1.5310695017454776e-05, "loss": 1.143, "step": 3854 }, { "epoch": 1.2112217052437324, "grad_norm": 0.7734375, "learning_rate": 1.530815614090765e-05, "loss": 1.319, "step": 3856 }, { "epoch": 1.2118499322692737, "grad_norm": 0.80078125, "learning_rate": 1.5305617264360522e-05, "loss": 1.2545, "step": 3858 }, { "epoch": 1.212478159294815, "grad_norm": 0.75390625, "learning_rate": 1.5303078387813394e-05, "loss": 1.2631, "step": 3860 }, { "epoch": 1.2131063863203566, "grad_norm": 0.84375, "learning_rate": 1.5300539511266265e-05, "loss": 1.2877, "step": 3862 }, { "epoch": 1.213734613345898, "grad_norm": 0.76953125, "learning_rate": 1.529800063471914e-05, "loss": 1.2363, "step": 3864 }, { "epoch": 1.2143628403714393, "grad_norm": 0.87890625, "learning_rate": 1.529546175817201e-05, "loss": 1.1702, "step": 3866 }, { "epoch": 1.2149910673969806, "grad_norm": 0.7734375, "learning_rate": 1.5292922881624882e-05, "loss": 1.25, "step": 3868 }, { "epoch": 1.215619294422522, "grad_norm": 0.83984375, "learning_rate": 1.5290384005077754e-05, "loss": 1.3495, "step": 3870 }, { "epoch": 1.2162475214480633, "grad_norm": 0.7734375, "learning_rate": 1.528784512853063e-05, "loss": 1.2843, "step": 3872 }, { "epoch": 1.2168757484736046, "grad_norm": 0.7109375, "learning_rate": 1.5285306251983497e-05, "loss": 1.2542, "step": 3874 }, { "epoch": 1.217503975499146, "grad_norm": 0.74609375, "learning_rate": 1.528276737543637e-05, "loss": 1.3101, "step": 3876 }, { "epoch": 1.2181322025246875, "grad_norm": 0.7734375, "learning_rate": 1.5280228498889243e-05, "loss": 1.28, "step": 3878 }, { "epoch": 1.2187604295502288, "grad_norm": 0.7734375, "learning_rate": 1.5277689622342114e-05, "loss": 1.2766, "step": 3880 }, { "epoch": 1.2193886565757701, "grad_norm": 0.796875, "learning_rate": 1.5275150745794986e-05, "loss": 1.236, "step": 3882 }, { "epoch": 1.2200168836013114, "grad_norm": 0.79296875, "learning_rate": 1.527261186924786e-05, "loss": 1.1882, "step": 3884 }, { "epoch": 1.2206451106268528, "grad_norm": 0.82421875, "learning_rate": 1.5270072992700732e-05, "loss": 1.1035, "step": 3886 }, { "epoch": 1.221273337652394, "grad_norm": 0.796875, "learning_rate": 1.5267534116153603e-05, "loss": 1.1533, "step": 3888 }, { "epoch": 1.2219015646779354, "grad_norm": 0.8125, "learning_rate": 1.5264995239606474e-05, "loss": 1.1649, "step": 3890 }, { "epoch": 1.2225297917034768, "grad_norm": 0.875, "learning_rate": 1.526245636305935e-05, "loss": 1.2608, "step": 3892 }, { "epoch": 1.223158018729018, "grad_norm": 0.83984375, "learning_rate": 1.525991748651222e-05, "loss": 1.4731, "step": 3894 }, { "epoch": 1.2237862457545596, "grad_norm": 0.7578125, "learning_rate": 1.5257378609965092e-05, "loss": 1.276, "step": 3896 }, { "epoch": 1.224414472780101, "grad_norm": 0.7265625, "learning_rate": 1.5254839733417963e-05, "loss": 1.1836, "step": 3898 }, { "epoch": 1.2250426998056423, "grad_norm": 0.8515625, "learning_rate": 1.5252300856870836e-05, "loss": 1.1804, "step": 3900 }, { "epoch": 1.2256709268311836, "grad_norm": 0.765625, "learning_rate": 1.5249761980323708e-05, "loss": 1.3125, "step": 3902 }, { "epoch": 1.226299153856725, "grad_norm": 0.7421875, "learning_rate": 1.5247223103776581e-05, "loss": 1.2143, "step": 3904 }, { "epoch": 1.2269273808822663, "grad_norm": 0.80859375, "learning_rate": 1.5244684227229452e-05, "loss": 1.2005, "step": 3906 }, { "epoch": 1.2275556079078076, "grad_norm": 0.8359375, "learning_rate": 1.5242145350682325e-05, "loss": 1.2187, "step": 3908 }, { "epoch": 1.2281838349333492, "grad_norm": 0.734375, "learning_rate": 1.5239606474135195e-05, "loss": 1.3649, "step": 3910 }, { "epoch": 1.2288120619588905, "grad_norm": 0.85546875, "learning_rate": 1.5237067597588068e-05, "loss": 1.2188, "step": 3912 }, { "epoch": 1.2294402889844318, "grad_norm": 0.80078125, "learning_rate": 1.523452872104094e-05, "loss": 1.1949, "step": 3914 }, { "epoch": 1.2300685160099731, "grad_norm": 0.796875, "learning_rate": 1.5231989844493813e-05, "loss": 1.2312, "step": 3916 }, { "epoch": 1.2306967430355145, "grad_norm": 0.765625, "learning_rate": 1.5229450967946684e-05, "loss": 1.1944, "step": 3918 }, { "epoch": 1.2313249700610558, "grad_norm": 0.81640625, "learning_rate": 1.5226912091399557e-05, "loss": 1.2278, "step": 3920 }, { "epoch": 1.2319531970865971, "grad_norm": 0.7578125, "learning_rate": 1.5224373214852428e-05, "loss": 1.2428, "step": 3922 }, { "epoch": 1.2325814241121384, "grad_norm": 0.73828125, "learning_rate": 1.5221834338305302e-05, "loss": 1.3336, "step": 3924 }, { "epoch": 1.2332096511376798, "grad_norm": 0.76171875, "learning_rate": 1.5219295461758173e-05, "loss": 1.3555, "step": 3926 }, { "epoch": 1.2338378781632213, "grad_norm": 0.875, "learning_rate": 1.5216756585211046e-05, "loss": 1.1514, "step": 3928 }, { "epoch": 1.2344661051887627, "grad_norm": 0.70703125, "learning_rate": 1.5214217708663916e-05, "loss": 1.3315, "step": 3930 }, { "epoch": 1.235094332214304, "grad_norm": 0.7890625, "learning_rate": 1.521167883211679e-05, "loss": 1.2199, "step": 3932 }, { "epoch": 1.2357225592398453, "grad_norm": 0.84375, "learning_rate": 1.520913995556966e-05, "loss": 1.1983, "step": 3934 }, { "epoch": 1.2363507862653866, "grad_norm": 0.73828125, "learning_rate": 1.5206601079022533e-05, "loss": 1.2382, "step": 3936 }, { "epoch": 1.236979013290928, "grad_norm": 0.91015625, "learning_rate": 1.5204062202475406e-05, "loss": 1.1452, "step": 3938 }, { "epoch": 1.2376072403164693, "grad_norm": 0.84765625, "learning_rate": 1.5201523325928278e-05, "loss": 1.2165, "step": 3940 }, { "epoch": 1.2382354673420108, "grad_norm": 0.95703125, "learning_rate": 1.519898444938115e-05, "loss": 1.1627, "step": 3942 }, { "epoch": 1.2388636943675522, "grad_norm": 0.8125, "learning_rate": 1.5196445572834022e-05, "loss": 1.1965, "step": 3944 }, { "epoch": 1.2394919213930935, "grad_norm": 0.69921875, "learning_rate": 1.5193906696286895e-05, "loss": 1.19, "step": 3946 }, { "epoch": 1.2401201484186348, "grad_norm": 0.796875, "learning_rate": 1.5191367819739767e-05, "loss": 1.2672, "step": 3948 }, { "epoch": 1.2407483754441762, "grad_norm": 0.84765625, "learning_rate": 1.518882894319264e-05, "loss": 1.2684, "step": 3950 }, { "epoch": 1.2413766024697175, "grad_norm": 0.76953125, "learning_rate": 1.5186290066645511e-05, "loss": 1.3026, "step": 3952 }, { "epoch": 1.2420048294952588, "grad_norm": 0.7421875, "learning_rate": 1.5183751190098384e-05, "loss": 1.2142, "step": 3954 }, { "epoch": 1.2426330565208001, "grad_norm": 0.7421875, "learning_rate": 1.5181212313551254e-05, "loss": 1.2394, "step": 3956 }, { "epoch": 1.2432612835463415, "grad_norm": 0.84765625, "learning_rate": 1.5178673437004129e-05, "loss": 1.1269, "step": 3958 }, { "epoch": 1.2438895105718828, "grad_norm": 0.80078125, "learning_rate": 1.5176134560456998e-05, "loss": 1.2404, "step": 3960 }, { "epoch": 1.2445177375974243, "grad_norm": 0.8671875, "learning_rate": 1.5173595683909871e-05, "loss": 1.2585, "step": 3962 }, { "epoch": 1.2451459646229657, "grad_norm": 0.80859375, "learning_rate": 1.5171056807362743e-05, "loss": 1.2264, "step": 3964 }, { "epoch": 1.245774191648507, "grad_norm": 0.80859375, "learning_rate": 1.5168517930815616e-05, "loss": 1.3453, "step": 3966 }, { "epoch": 1.2464024186740483, "grad_norm": 0.80859375, "learning_rate": 1.5165979054268487e-05, "loss": 1.2773, "step": 3968 }, { "epoch": 1.2470306456995897, "grad_norm": 0.8671875, "learning_rate": 1.516344017772136e-05, "loss": 1.15, "step": 3970 }, { "epoch": 1.247658872725131, "grad_norm": 0.80078125, "learning_rate": 1.5160901301174232e-05, "loss": 1.3222, "step": 3972 }, { "epoch": 1.2482870997506723, "grad_norm": 0.796875, "learning_rate": 1.5158362424627105e-05, "loss": 1.1521, "step": 3974 }, { "epoch": 1.2489153267762139, "grad_norm": 0.91015625, "learning_rate": 1.5155823548079976e-05, "loss": 1.2704, "step": 3976 }, { "epoch": 1.2495435538017552, "grad_norm": 0.76171875, "learning_rate": 1.515328467153285e-05, "loss": 1.3046, "step": 3978 }, { "epoch": 1.2501717808272965, "grad_norm": 0.75390625, "learning_rate": 1.5150745794985719e-05, "loss": 1.1866, "step": 3980 }, { "epoch": 1.2508000078528378, "grad_norm": 0.73828125, "learning_rate": 1.5148206918438592e-05, "loss": 1.3763, "step": 3982 }, { "epoch": 1.2514282348783792, "grad_norm": 0.76171875, "learning_rate": 1.5145668041891463e-05, "loss": 1.3093, "step": 3984 }, { "epoch": 1.2520564619039205, "grad_norm": 0.79296875, "learning_rate": 1.5143129165344336e-05, "loss": 1.1302, "step": 3986 }, { "epoch": 1.2526846889294618, "grad_norm": 0.8203125, "learning_rate": 1.5140590288797208e-05, "loss": 1.2198, "step": 3988 }, { "epoch": 1.2533129159550032, "grad_norm": 0.7421875, "learning_rate": 1.5138051412250081e-05, "loss": 1.2666, "step": 3990 }, { "epoch": 1.2539411429805445, "grad_norm": 0.921875, "learning_rate": 1.5135512535702952e-05, "loss": 1.0958, "step": 3992 }, { "epoch": 1.2545693700060858, "grad_norm": 0.8515625, "learning_rate": 1.5132973659155825e-05, "loss": 1.2114, "step": 3994 }, { "epoch": 1.2551975970316274, "grad_norm": 0.78515625, "learning_rate": 1.5130434782608697e-05, "loss": 1.3153, "step": 3996 }, { "epoch": 1.2558258240571687, "grad_norm": 0.8125, "learning_rate": 1.512789590606157e-05, "loss": 1.2129, "step": 3998 }, { "epoch": 1.25645405108271, "grad_norm": 0.7890625, "learning_rate": 1.512535702951444e-05, "loss": 1.2922, "step": 4000 }, { "epoch": 1.2570822781082513, "grad_norm": 0.7578125, "learning_rate": 1.5122818152967314e-05, "loss": 1.1252, "step": 4002 }, { "epoch": 1.2577105051337927, "grad_norm": 0.77734375, "learning_rate": 1.5120279276420184e-05, "loss": 1.1488, "step": 4004 }, { "epoch": 1.258338732159334, "grad_norm": 0.80078125, "learning_rate": 1.5117740399873057e-05, "loss": 1.2191, "step": 4006 }, { "epoch": 1.2589669591848756, "grad_norm": 0.859375, "learning_rate": 1.5115201523325928e-05, "loss": 1.345, "step": 4008 }, { "epoch": 1.2595951862104169, "grad_norm": 0.7734375, "learning_rate": 1.5112662646778801e-05, "loss": 1.2722, "step": 4010 }, { "epoch": 1.2602234132359582, "grad_norm": 0.75390625, "learning_rate": 1.5110123770231673e-05, "loss": 1.2438, "step": 4012 }, { "epoch": 1.2608516402614995, "grad_norm": 0.7734375, "learning_rate": 1.5107584893684546e-05, "loss": 1.2488, "step": 4014 }, { "epoch": 1.2614798672870409, "grad_norm": 0.765625, "learning_rate": 1.5105046017137417e-05, "loss": 1.1744, "step": 4016 }, { "epoch": 1.2621080943125822, "grad_norm": 0.78515625, "learning_rate": 1.510250714059029e-05, "loss": 1.2071, "step": 4018 }, { "epoch": 1.2627363213381235, "grad_norm": 0.75, "learning_rate": 1.5099968264043162e-05, "loss": 1.3156, "step": 4020 }, { "epoch": 1.2633645483636649, "grad_norm": 0.890625, "learning_rate": 1.5097429387496035e-05, "loss": 1.1231, "step": 4022 }, { "epoch": 1.2639927753892062, "grad_norm": 0.796875, "learning_rate": 1.5094890510948908e-05, "loss": 1.151, "step": 4024 }, { "epoch": 1.2646210024147475, "grad_norm": 0.75, "learning_rate": 1.5092351634401778e-05, "loss": 1.3427, "step": 4026 }, { "epoch": 1.265249229440289, "grad_norm": 0.796875, "learning_rate": 1.5089812757854652e-05, "loss": 1.2093, "step": 4028 }, { "epoch": 1.2658774564658304, "grad_norm": 0.765625, "learning_rate": 1.5087273881307522e-05, "loss": 1.179, "step": 4030 }, { "epoch": 1.2665056834913717, "grad_norm": 0.8125, "learning_rate": 1.5084735004760395e-05, "loss": 1.2487, "step": 4032 }, { "epoch": 1.267133910516913, "grad_norm": 0.81640625, "learning_rate": 1.5082196128213267e-05, "loss": 1.2788, "step": 4034 }, { "epoch": 1.2677621375424544, "grad_norm": 0.78515625, "learning_rate": 1.507965725166614e-05, "loss": 1.2915, "step": 4036 }, { "epoch": 1.2683903645679957, "grad_norm": 0.8046875, "learning_rate": 1.5077118375119011e-05, "loss": 1.2832, "step": 4038 }, { "epoch": 1.2690185915935372, "grad_norm": 0.84375, "learning_rate": 1.5074579498571884e-05, "loss": 1.2789, "step": 4040 }, { "epoch": 1.2696468186190786, "grad_norm": 0.74609375, "learning_rate": 1.5072040622024755e-05, "loss": 1.0852, "step": 4042 }, { "epoch": 1.27027504564462, "grad_norm": 0.8828125, "learning_rate": 1.5069501745477629e-05, "loss": 1.2983, "step": 4044 }, { "epoch": 1.2709032726701612, "grad_norm": 0.8203125, "learning_rate": 1.50669628689305e-05, "loss": 1.2256, "step": 4046 }, { "epoch": 1.2715314996957026, "grad_norm": 0.79296875, "learning_rate": 1.5064423992383373e-05, "loss": 1.2011, "step": 4048 }, { "epoch": 1.2721597267212439, "grad_norm": 0.7109375, "learning_rate": 1.5061885115836243e-05, "loss": 1.2055, "step": 4050 }, { "epoch": 1.2727879537467852, "grad_norm": 0.828125, "learning_rate": 1.5059346239289116e-05, "loss": 1.1325, "step": 4052 }, { "epoch": 1.2734161807723265, "grad_norm": 0.828125, "learning_rate": 1.5056807362741987e-05, "loss": 1.182, "step": 4054 }, { "epoch": 1.2740444077978679, "grad_norm": 1.1796875, "learning_rate": 1.505426848619486e-05, "loss": 1.2868, "step": 4056 }, { "epoch": 1.2746726348234092, "grad_norm": 0.7578125, "learning_rate": 1.5051729609647732e-05, "loss": 1.3885, "step": 4058 }, { "epoch": 1.2753008618489505, "grad_norm": 0.7265625, "learning_rate": 1.5049190733100605e-05, "loss": 1.2637, "step": 4060 }, { "epoch": 1.275929088874492, "grad_norm": 0.7265625, "learning_rate": 1.5046651856553476e-05, "loss": 1.2665, "step": 4062 }, { "epoch": 1.2765573159000334, "grad_norm": 0.72265625, "learning_rate": 1.5044112980006349e-05, "loss": 1.3012, "step": 4064 }, { "epoch": 1.2771855429255747, "grad_norm": 0.953125, "learning_rate": 1.504157410345922e-05, "loss": 1.2597, "step": 4066 }, { "epoch": 1.277813769951116, "grad_norm": 0.7265625, "learning_rate": 1.5039035226912094e-05, "loss": 1.2633, "step": 4068 }, { "epoch": 1.2784419969766574, "grad_norm": 0.7890625, "learning_rate": 1.5036496350364965e-05, "loss": 1.2377, "step": 4070 }, { "epoch": 1.2790702240021987, "grad_norm": 0.80859375, "learning_rate": 1.5033957473817838e-05, "loss": 1.4411, "step": 4072 }, { "epoch": 1.2796984510277403, "grad_norm": 0.75390625, "learning_rate": 1.5031418597270708e-05, "loss": 1.1997, "step": 4074 }, { "epoch": 1.2803266780532816, "grad_norm": 0.77734375, "learning_rate": 1.502887972072358e-05, "loss": 1.2864, "step": 4076 }, { "epoch": 1.280954905078823, "grad_norm": 0.8125, "learning_rate": 1.5026340844176452e-05, "loss": 1.293, "step": 4078 }, { "epoch": 1.2815831321043643, "grad_norm": 0.77734375, "learning_rate": 1.5023801967629325e-05, "loss": 1.2866, "step": 4080 }, { "epoch": 1.2822113591299056, "grad_norm": 0.73046875, "learning_rate": 1.5021263091082197e-05, "loss": 1.2788, "step": 4082 }, { "epoch": 1.282839586155447, "grad_norm": 0.828125, "learning_rate": 1.501872421453507e-05, "loss": 1.1705, "step": 4084 }, { "epoch": 1.2834678131809882, "grad_norm": 0.86328125, "learning_rate": 1.5016185337987941e-05, "loss": 1.1257, "step": 4086 }, { "epoch": 1.2840960402065296, "grad_norm": 0.76953125, "learning_rate": 1.5013646461440814e-05, "loss": 1.2574, "step": 4088 }, { "epoch": 1.284724267232071, "grad_norm": 0.6953125, "learning_rate": 1.5011107584893686e-05, "loss": 1.2126, "step": 4090 }, { "epoch": 1.2853524942576122, "grad_norm": 0.6953125, "learning_rate": 1.5008568708346559e-05, "loss": 1.281, "step": 4092 }, { "epoch": 1.2859807212831538, "grad_norm": 2.875, "learning_rate": 1.5006029831799428e-05, "loss": 1.2376, "step": 4094 }, { "epoch": 1.286608948308695, "grad_norm": 0.76171875, "learning_rate": 1.5003490955252303e-05, "loss": 1.2656, "step": 4096 }, { "epoch": 1.2872371753342364, "grad_norm": 0.83203125, "learning_rate": 1.5000952078705173e-05, "loss": 1.3466, "step": 4098 }, { "epoch": 1.2878654023597778, "grad_norm": 0.79296875, "learning_rate": 1.4998413202158046e-05, "loss": 1.266, "step": 4100 }, { "epoch": 1.288493629385319, "grad_norm": 0.78125, "learning_rate": 1.4995874325610917e-05, "loss": 1.283, "step": 4102 }, { "epoch": 1.2891218564108604, "grad_norm": 0.74609375, "learning_rate": 1.499333544906379e-05, "loss": 1.2892, "step": 4104 }, { "epoch": 1.289750083436402, "grad_norm": 0.76953125, "learning_rate": 1.4990796572516662e-05, "loss": 1.2792, "step": 4106 }, { "epoch": 1.2903783104619433, "grad_norm": 0.7421875, "learning_rate": 1.4988257695969535e-05, "loss": 1.3327, "step": 4108 }, { "epoch": 1.2910065374874846, "grad_norm": 0.859375, "learning_rate": 1.4985718819422408e-05, "loss": 1.1793, "step": 4110 }, { "epoch": 1.291634764513026, "grad_norm": 0.75, "learning_rate": 1.498317994287528e-05, "loss": 1.3966, "step": 4112 }, { "epoch": 1.2922629915385673, "grad_norm": 0.78515625, "learning_rate": 1.4980641066328152e-05, "loss": 1.235, "step": 4114 }, { "epoch": 1.2928912185641086, "grad_norm": 0.8984375, "learning_rate": 1.4978102189781024e-05, "loss": 1.1285, "step": 4116 }, { "epoch": 1.29351944558965, "grad_norm": 0.7578125, "learning_rate": 1.4975563313233897e-05, "loss": 1.1071, "step": 4118 }, { "epoch": 1.2941476726151913, "grad_norm": 0.7421875, "learning_rate": 1.4973024436686766e-05, "loss": 1.3497, "step": 4120 }, { "epoch": 1.2947758996407326, "grad_norm": 0.89453125, "learning_rate": 1.4970485560139641e-05, "loss": 1.2041, "step": 4122 }, { "epoch": 1.295404126666274, "grad_norm": 0.8203125, "learning_rate": 1.4967946683592511e-05, "loss": 1.2132, "step": 4124 }, { "epoch": 1.2960323536918152, "grad_norm": 0.8828125, "learning_rate": 1.4965407807045384e-05, "loss": 1.2878, "step": 4126 }, { "epoch": 1.2966605807173568, "grad_norm": 0.91015625, "learning_rate": 1.4962868930498255e-05, "loss": 1.337, "step": 4128 }, { "epoch": 1.2972888077428981, "grad_norm": 0.82421875, "learning_rate": 1.4960330053951128e-05, "loss": 1.2482, "step": 4130 }, { "epoch": 1.2979170347684394, "grad_norm": 0.78515625, "learning_rate": 1.4957791177404e-05, "loss": 1.3392, "step": 4132 }, { "epoch": 1.2985452617939808, "grad_norm": 0.76953125, "learning_rate": 1.4955252300856873e-05, "loss": 1.3088, "step": 4134 }, { "epoch": 1.299173488819522, "grad_norm": 0.78125, "learning_rate": 1.4952713424309744e-05, "loss": 1.3334, "step": 4136 }, { "epoch": 1.2998017158450634, "grad_norm": 0.91796875, "learning_rate": 1.4950174547762617e-05, "loss": 1.1723, "step": 4138 }, { "epoch": 1.300429942870605, "grad_norm": 0.78515625, "learning_rate": 1.4947635671215489e-05, "loss": 1.3257, "step": 4140 }, { "epoch": 1.3010581698961463, "grad_norm": 0.8125, "learning_rate": 1.4945096794668362e-05, "loss": 1.3229, "step": 4142 }, { "epoch": 1.3016863969216876, "grad_norm": 0.76953125, "learning_rate": 1.4942557918121231e-05, "loss": 1.3304, "step": 4144 }, { "epoch": 1.302314623947229, "grad_norm": 0.80078125, "learning_rate": 1.4940019041574105e-05, "loss": 1.3206, "step": 4146 }, { "epoch": 1.3029428509727703, "grad_norm": 0.796875, "learning_rate": 1.4937480165026976e-05, "loss": 1.1349, "step": 4148 }, { "epoch": 1.3035710779983116, "grad_norm": 0.8046875, "learning_rate": 1.4934941288479849e-05, "loss": 1.2338, "step": 4150 }, { "epoch": 1.304199305023853, "grad_norm": 0.82421875, "learning_rate": 1.493240241193272e-05, "loss": 1.1981, "step": 4152 }, { "epoch": 1.3048275320493943, "grad_norm": 0.96875, "learning_rate": 1.4929863535385593e-05, "loss": 1.2488, "step": 4154 }, { "epoch": 1.3054557590749356, "grad_norm": 0.75390625, "learning_rate": 1.4927324658838465e-05, "loss": 1.1739, "step": 4156 }, { "epoch": 1.306083986100477, "grad_norm": 0.703125, "learning_rate": 1.4924785782291338e-05, "loss": 1.1962, "step": 4158 }, { "epoch": 1.3067122131260185, "grad_norm": 0.875, "learning_rate": 1.492224690574421e-05, "loss": 1.2288, "step": 4160 }, { "epoch": 1.3073404401515598, "grad_norm": 0.81640625, "learning_rate": 1.4919708029197082e-05, "loss": 1.2643, "step": 4162 }, { "epoch": 1.3079686671771011, "grad_norm": 0.859375, "learning_rate": 1.4917169152649952e-05, "loss": 1.3054, "step": 4164 }, { "epoch": 1.3085968942026425, "grad_norm": 0.7265625, "learning_rate": 1.4914630276102827e-05, "loss": 1.135, "step": 4166 }, { "epoch": 1.3092251212281838, "grad_norm": 0.92578125, "learning_rate": 1.4912091399555697e-05, "loss": 1.1145, "step": 4168 }, { "epoch": 1.3098533482537251, "grad_norm": 0.78125, "learning_rate": 1.490955252300857e-05, "loss": 1.3165, "step": 4170 }, { "epoch": 1.3104815752792667, "grad_norm": 0.8828125, "learning_rate": 1.4907013646461441e-05, "loss": 1.2578, "step": 4172 }, { "epoch": 1.311109802304808, "grad_norm": 0.84375, "learning_rate": 1.4904474769914314e-05, "loss": 1.3692, "step": 4174 }, { "epoch": 1.3117380293303493, "grad_norm": 0.8359375, "learning_rate": 1.4901935893367185e-05, "loss": 1.3078, "step": 4176 }, { "epoch": 1.3123662563558907, "grad_norm": 0.9296875, "learning_rate": 1.4899397016820059e-05, "loss": 1.2137, "step": 4178 }, { "epoch": 1.312994483381432, "grad_norm": 0.84375, "learning_rate": 1.489685814027293e-05, "loss": 1.2295, "step": 4180 }, { "epoch": 1.3136227104069733, "grad_norm": 0.85546875, "learning_rate": 1.4894319263725803e-05, "loss": 1.1217, "step": 4182 }, { "epoch": 1.3142509374325146, "grad_norm": 0.80859375, "learning_rate": 1.4891780387178674e-05, "loss": 1.2411, "step": 4184 }, { "epoch": 1.314879164458056, "grad_norm": 0.78125, "learning_rate": 1.4889241510631547e-05, "loss": 1.3081, "step": 4186 }, { "epoch": 1.3155073914835973, "grad_norm": 0.828125, "learning_rate": 1.4886702634084417e-05, "loss": 1.3283, "step": 4188 }, { "epoch": 1.3161356185091386, "grad_norm": 0.76171875, "learning_rate": 1.488416375753729e-05, "loss": 1.2081, "step": 4190 }, { "epoch": 1.3167638455346802, "grad_norm": 0.7734375, "learning_rate": 1.4881624880990162e-05, "loss": 1.2085, "step": 4192 }, { "epoch": 1.3173920725602215, "grad_norm": 0.765625, "learning_rate": 1.4879086004443035e-05, "loss": 1.3019, "step": 4194 }, { "epoch": 1.3180202995857628, "grad_norm": 0.75390625, "learning_rate": 1.4876547127895908e-05, "loss": 1.2424, "step": 4196 }, { "epoch": 1.3186485266113042, "grad_norm": 0.76171875, "learning_rate": 1.4874008251348779e-05, "loss": 1.2922, "step": 4198 }, { "epoch": 1.3192767536368455, "grad_norm": 0.7890625, "learning_rate": 1.4871469374801652e-05, "loss": 1.262, "step": 4200 }, { "epoch": 1.3199049806623868, "grad_norm": 0.7265625, "learning_rate": 1.4868930498254524e-05, "loss": 1.2194, "step": 4202 }, { "epoch": 1.3205332076879281, "grad_norm": 0.75, "learning_rate": 1.4866391621707397e-05, "loss": 1.1209, "step": 4204 }, { "epoch": 1.3211614347134697, "grad_norm": 0.80078125, "learning_rate": 1.4863852745160268e-05, "loss": 1.3312, "step": 4206 }, { "epoch": 1.321789661739011, "grad_norm": 0.86328125, "learning_rate": 1.4861313868613141e-05, "loss": 1.2484, "step": 4208 }, { "epoch": 1.3224178887645524, "grad_norm": 0.87890625, "learning_rate": 1.4858774992066013e-05, "loss": 1.2436, "step": 4210 }, { "epoch": 1.3230461157900937, "grad_norm": 0.7890625, "learning_rate": 1.4856236115518886e-05, "loss": 1.2811, "step": 4212 }, { "epoch": 1.323674342815635, "grad_norm": 2.375, "learning_rate": 1.4853697238971755e-05, "loss": 1.0794, "step": 4214 }, { "epoch": 1.3243025698411763, "grad_norm": 0.82421875, "learning_rate": 1.4851158362424628e-05, "loss": 1.2972, "step": 4216 }, { "epoch": 1.3249307968667177, "grad_norm": 0.7734375, "learning_rate": 1.48486194858775e-05, "loss": 1.2615, "step": 4218 }, { "epoch": 1.325559023892259, "grad_norm": 0.7734375, "learning_rate": 1.4846080609330373e-05, "loss": 1.2853, "step": 4220 }, { "epoch": 1.3261872509178003, "grad_norm": 0.83984375, "learning_rate": 1.4843541732783244e-05, "loss": 1.2105, "step": 4222 }, { "epoch": 1.3268154779433416, "grad_norm": 0.87109375, "learning_rate": 1.4841002856236117e-05, "loss": 1.0318, "step": 4224 }, { "epoch": 1.3274437049688832, "grad_norm": 0.7578125, "learning_rate": 1.4838463979688989e-05, "loss": 1.2111, "step": 4226 }, { "epoch": 1.3280719319944245, "grad_norm": 0.86328125, "learning_rate": 1.4835925103141862e-05, "loss": 1.0525, "step": 4228 }, { "epoch": 1.3287001590199659, "grad_norm": 0.78515625, "learning_rate": 1.4833386226594733e-05, "loss": 1.2753, "step": 4230 }, { "epoch": 1.3293283860455072, "grad_norm": 0.83203125, "learning_rate": 1.4830847350047606e-05, "loss": 1.1766, "step": 4232 }, { "epoch": 1.3299566130710485, "grad_norm": 0.765625, "learning_rate": 1.4828308473500478e-05, "loss": 1.3159, "step": 4234 }, { "epoch": 1.3305848400965898, "grad_norm": 0.7890625, "learning_rate": 1.482576959695335e-05, "loss": 1.1587, "step": 4236 }, { "epoch": 1.3312130671221314, "grad_norm": 0.8203125, "learning_rate": 1.482323072040622e-05, "loss": 1.1362, "step": 4238 }, { "epoch": 1.3318412941476727, "grad_norm": 0.9140625, "learning_rate": 1.4820691843859093e-05, "loss": 1.2305, "step": 4240 }, { "epoch": 1.332469521173214, "grad_norm": 0.77734375, "learning_rate": 1.4818152967311965e-05, "loss": 1.163, "step": 4242 }, { "epoch": 1.3330977481987554, "grad_norm": 0.80078125, "learning_rate": 1.4815614090764838e-05, "loss": 1.2654, "step": 4244 }, { "epoch": 1.3337259752242967, "grad_norm": 0.76171875, "learning_rate": 1.481307521421771e-05, "loss": 1.1764, "step": 4246 }, { "epoch": 1.334354202249838, "grad_norm": 0.7890625, "learning_rate": 1.4810536337670582e-05, "loss": 1.2239, "step": 4248 }, { "epoch": 1.3349824292753794, "grad_norm": 0.77734375, "learning_rate": 1.4807997461123454e-05, "loss": 1.2153, "step": 4250 }, { "epoch": 1.3356106563009207, "grad_norm": 0.73046875, "learning_rate": 1.4805458584576327e-05, "loss": 1.1689, "step": 4252 }, { "epoch": 1.336238883326462, "grad_norm": 0.8671875, "learning_rate": 1.4802919708029198e-05, "loss": 1.2603, "step": 4254 }, { "epoch": 1.3368671103520033, "grad_norm": 0.8046875, "learning_rate": 1.4800380831482071e-05, "loss": 1.1904, "step": 4256 }, { "epoch": 1.337495337377545, "grad_norm": 0.7578125, "learning_rate": 1.4797841954934941e-05, "loss": 1.4012, "step": 4258 }, { "epoch": 1.3381235644030862, "grad_norm": 0.9453125, "learning_rate": 1.4795303078387816e-05, "loss": 1.2366, "step": 4260 }, { "epoch": 1.3387517914286275, "grad_norm": 0.7734375, "learning_rate": 1.4792764201840685e-05, "loss": 1.2833, "step": 4262 }, { "epoch": 1.3393800184541689, "grad_norm": 0.7578125, "learning_rate": 1.4790225325293558e-05, "loss": 1.3163, "step": 4264 }, { "epoch": 1.3400082454797102, "grad_norm": 0.80859375, "learning_rate": 1.478768644874643e-05, "loss": 1.2823, "step": 4266 }, { "epoch": 1.3406364725052515, "grad_norm": 0.8515625, "learning_rate": 1.4785147572199303e-05, "loss": 1.1118, "step": 4268 }, { "epoch": 1.3412646995307929, "grad_norm": 0.7578125, "learning_rate": 1.4782608695652174e-05, "loss": 1.1947, "step": 4270 }, { "epoch": 1.3418929265563344, "grad_norm": 0.71875, "learning_rate": 1.4780069819105047e-05, "loss": 1.3296, "step": 4272 }, { "epoch": 1.3425211535818757, "grad_norm": 0.7578125, "learning_rate": 1.4777530942557919e-05, "loss": 1.3039, "step": 4274 }, { "epoch": 1.343149380607417, "grad_norm": 0.9609375, "learning_rate": 1.4774992066010792e-05, "loss": 1.1281, "step": 4276 }, { "epoch": 1.3437776076329584, "grad_norm": 0.79296875, "learning_rate": 1.4772453189463663e-05, "loss": 1.2095, "step": 4278 }, { "epoch": 1.3444058346584997, "grad_norm": 0.796875, "learning_rate": 1.4769914312916536e-05, "loss": 1.2037, "step": 4280 }, { "epoch": 1.345034061684041, "grad_norm": 0.79296875, "learning_rate": 1.476737543636941e-05, "loss": 1.2451, "step": 4282 }, { "epoch": 1.3456622887095824, "grad_norm": 0.76953125, "learning_rate": 1.4764836559822279e-05, "loss": 1.2812, "step": 4284 }, { "epoch": 1.3462905157351237, "grad_norm": 0.8125, "learning_rate": 1.4762297683275154e-05, "loss": 1.1563, "step": 4286 }, { "epoch": 1.346918742760665, "grad_norm": 0.74609375, "learning_rate": 1.4759758806728024e-05, "loss": 1.2046, "step": 4288 }, { "epoch": 1.3475469697862064, "grad_norm": 0.8046875, "learning_rate": 1.4757219930180897e-05, "loss": 1.1475, "step": 4290 }, { "epoch": 1.348175196811748, "grad_norm": 0.84375, "learning_rate": 1.4754681053633768e-05, "loss": 1.19, "step": 4292 }, { "epoch": 1.3488034238372892, "grad_norm": 0.83203125, "learning_rate": 1.4752142177086641e-05, "loss": 1.1384, "step": 4294 }, { "epoch": 1.3494316508628306, "grad_norm": 0.75390625, "learning_rate": 1.4749603300539512e-05, "loss": 1.3681, "step": 4296 }, { "epoch": 1.350059877888372, "grad_norm": 0.77734375, "learning_rate": 1.4747064423992386e-05, "loss": 1.4912, "step": 4298 }, { "epoch": 1.3506881049139132, "grad_norm": 0.796875, "learning_rate": 1.4744525547445257e-05, "loss": 1.2292, "step": 4300 }, { "epoch": 1.3513163319394546, "grad_norm": 0.80859375, "learning_rate": 1.474198667089813e-05, "loss": 1.2261, "step": 4302 }, { "epoch": 1.351944558964996, "grad_norm": 0.7578125, "learning_rate": 1.4739447794351001e-05, "loss": 1.3621, "step": 4304 }, { "epoch": 1.3525727859905374, "grad_norm": 0.8515625, "learning_rate": 1.4736908917803874e-05, "loss": 1.1424, "step": 4306 }, { "epoch": 1.3532010130160788, "grad_norm": 0.8203125, "learning_rate": 1.4734370041256744e-05, "loss": 1.1803, "step": 4308 }, { "epoch": 1.35382924004162, "grad_norm": 0.73828125, "learning_rate": 1.4731831164709617e-05, "loss": 1.2921, "step": 4310 }, { "epoch": 1.3544574670671614, "grad_norm": 0.77734375, "learning_rate": 1.4729292288162489e-05, "loss": 1.359, "step": 4312 }, { "epoch": 1.3550856940927027, "grad_norm": 0.7578125, "learning_rate": 1.4726753411615362e-05, "loss": 1.2099, "step": 4314 }, { "epoch": 1.355713921118244, "grad_norm": 0.73046875, "learning_rate": 1.4724214535068233e-05, "loss": 1.3183, "step": 4316 }, { "epoch": 1.3563421481437854, "grad_norm": 0.80078125, "learning_rate": 1.4721675658521106e-05, "loss": 1.3491, "step": 4318 }, { "epoch": 1.3569703751693267, "grad_norm": 0.8203125, "learning_rate": 1.4719136781973978e-05, "loss": 1.239, "step": 4320 }, { "epoch": 1.357598602194868, "grad_norm": 0.80859375, "learning_rate": 1.471659790542685e-05, "loss": 1.2861, "step": 4322 }, { "epoch": 1.3582268292204096, "grad_norm": 0.79296875, "learning_rate": 1.4714059028879722e-05, "loss": 1.2679, "step": 4324 }, { "epoch": 1.358855056245951, "grad_norm": 0.72265625, "learning_rate": 1.4711520152332595e-05, "loss": 1.2569, "step": 4326 }, { "epoch": 1.3594832832714923, "grad_norm": 0.7890625, "learning_rate": 1.4708981275785465e-05, "loss": 1.2359, "step": 4328 }, { "epoch": 1.3601115102970336, "grad_norm": 0.85546875, "learning_rate": 1.470644239923834e-05, "loss": 1.2838, "step": 4330 }, { "epoch": 1.360739737322575, "grad_norm": 0.8359375, "learning_rate": 1.470390352269121e-05, "loss": 1.302, "step": 4332 }, { "epoch": 1.3613679643481162, "grad_norm": 0.83984375, "learning_rate": 1.4701364646144082e-05, "loss": 1.3001, "step": 4334 }, { "epoch": 1.3619961913736578, "grad_norm": 0.828125, "learning_rate": 1.4698825769596954e-05, "loss": 1.2303, "step": 4336 }, { "epoch": 1.3626244183991991, "grad_norm": 0.83984375, "learning_rate": 1.4696286893049827e-05, "loss": 1.2391, "step": 4338 }, { "epoch": 1.3632526454247405, "grad_norm": 0.8515625, "learning_rate": 1.4693748016502698e-05, "loss": 1.2481, "step": 4340 }, { "epoch": 1.3638808724502818, "grad_norm": 0.76171875, "learning_rate": 1.4691209139955571e-05, "loss": 1.2994, "step": 4342 }, { "epoch": 1.364509099475823, "grad_norm": 0.96484375, "learning_rate": 1.4688670263408443e-05, "loss": 1.392, "step": 4344 }, { "epoch": 1.3651373265013644, "grad_norm": 0.83984375, "learning_rate": 1.4686131386861316e-05, "loss": 1.3212, "step": 4346 }, { "epoch": 1.3657655535269058, "grad_norm": 0.81640625, "learning_rate": 1.4683592510314187e-05, "loss": 1.3631, "step": 4348 }, { "epoch": 1.366393780552447, "grad_norm": 0.79296875, "learning_rate": 1.468105363376706e-05, "loss": 1.2164, "step": 4350 }, { "epoch": 1.3670220075779884, "grad_norm": 0.82421875, "learning_rate": 1.467851475721993e-05, "loss": 1.1514, "step": 4352 }, { "epoch": 1.3676502346035297, "grad_norm": 0.79296875, "learning_rate": 1.4675975880672803e-05, "loss": 1.3862, "step": 4354 }, { "epoch": 1.368278461629071, "grad_norm": 0.890625, "learning_rate": 1.4673437004125674e-05, "loss": 1.2115, "step": 4356 }, { "epoch": 1.3689066886546126, "grad_norm": 0.91796875, "learning_rate": 1.4670898127578547e-05, "loss": 1.1759, "step": 4358 }, { "epoch": 1.369534915680154, "grad_norm": 0.78515625, "learning_rate": 1.4668359251031419e-05, "loss": 1.2677, "step": 4360 }, { "epoch": 1.3701631427056953, "grad_norm": 0.79296875, "learning_rate": 1.4665820374484292e-05, "loss": 1.287, "step": 4362 }, { "epoch": 1.3707913697312366, "grad_norm": 0.76171875, "learning_rate": 1.4663281497937163e-05, "loss": 1.3592, "step": 4364 }, { "epoch": 1.371419596756778, "grad_norm": 0.828125, "learning_rate": 1.4660742621390036e-05, "loss": 1.2563, "step": 4366 }, { "epoch": 1.3720478237823193, "grad_norm": 0.76953125, "learning_rate": 1.465820374484291e-05, "loss": 1.1968, "step": 4368 }, { "epoch": 1.3726760508078608, "grad_norm": 0.78125, "learning_rate": 1.465566486829578e-05, "loss": 1.3271, "step": 4370 }, { "epoch": 1.3733042778334021, "grad_norm": 0.74609375, "learning_rate": 1.4653125991748654e-05, "loss": 1.304, "step": 4372 }, { "epoch": 1.3739325048589435, "grad_norm": 0.7421875, "learning_rate": 1.4650587115201525e-05, "loss": 1.3881, "step": 4374 }, { "epoch": 1.3745607318844848, "grad_norm": 0.765625, "learning_rate": 1.4648048238654398e-05, "loss": 1.2676, "step": 4376 }, { "epoch": 1.3751889589100261, "grad_norm": 0.76171875, "learning_rate": 1.4645509362107268e-05, "loss": 1.2412, "step": 4378 }, { "epoch": 1.3758171859355675, "grad_norm": 0.921875, "learning_rate": 1.4642970485560141e-05, "loss": 1.1142, "step": 4380 }, { "epoch": 1.3764454129611088, "grad_norm": 0.78515625, "learning_rate": 1.4640431609013012e-05, "loss": 1.2977, "step": 4382 }, { "epoch": 1.37707363998665, "grad_norm": 0.76953125, "learning_rate": 1.4637892732465885e-05, "loss": 1.1754, "step": 4384 }, { "epoch": 1.3777018670121914, "grad_norm": 0.7890625, "learning_rate": 1.4635353855918757e-05, "loss": 1.3088, "step": 4386 }, { "epoch": 1.3783300940377328, "grad_norm": 0.79296875, "learning_rate": 1.463281497937163e-05, "loss": 1.359, "step": 4388 }, { "epoch": 1.3789583210632743, "grad_norm": 0.7578125, "learning_rate": 1.4630276102824501e-05, "loss": 1.1869, "step": 4390 }, { "epoch": 1.3795865480888156, "grad_norm": 0.7265625, "learning_rate": 1.4627737226277374e-05, "loss": 1.3206, "step": 4392 }, { "epoch": 1.380214775114357, "grad_norm": 0.7109375, "learning_rate": 1.4625198349730246e-05, "loss": 1.308, "step": 4394 }, { "epoch": 1.3808430021398983, "grad_norm": 0.77734375, "learning_rate": 1.4622659473183119e-05, "loss": 1.2614, "step": 4396 }, { "epoch": 1.3814712291654396, "grad_norm": 0.8671875, "learning_rate": 1.462012059663599e-05, "loss": 1.1872, "step": 4398 }, { "epoch": 1.382099456190981, "grad_norm": 0.82421875, "learning_rate": 1.4617581720088863e-05, "loss": 1.2658, "step": 4400 }, { "epoch": 1.3827276832165225, "grad_norm": 0.82421875, "learning_rate": 1.4615042843541733e-05, "loss": 1.3195, "step": 4402 }, { "epoch": 1.3833559102420638, "grad_norm": 0.83984375, "learning_rate": 1.4612503966994606e-05, "loss": 1.1573, "step": 4404 }, { "epoch": 1.3839841372676052, "grad_norm": 0.85546875, "learning_rate": 1.4609965090447477e-05, "loss": 1.2768, "step": 4406 }, { "epoch": 1.3846123642931465, "grad_norm": 0.83984375, "learning_rate": 1.460742621390035e-05, "loss": 1.2157, "step": 4408 }, { "epoch": 1.3852405913186878, "grad_norm": 0.76171875, "learning_rate": 1.4604887337353222e-05, "loss": 1.1608, "step": 4410 }, { "epoch": 1.3858688183442291, "grad_norm": 0.8984375, "learning_rate": 1.4602348460806095e-05, "loss": 1.1619, "step": 4412 }, { "epoch": 1.3864970453697705, "grad_norm": 0.73828125, "learning_rate": 1.4599809584258966e-05, "loss": 1.1692, "step": 4414 }, { "epoch": 1.3871252723953118, "grad_norm": 0.76171875, "learning_rate": 1.459727070771184e-05, "loss": 1.3201, "step": 4416 }, { "epoch": 1.3877534994208531, "grad_norm": 0.890625, "learning_rate": 1.459473183116471e-05, "loss": 1.2667, "step": 4418 }, { "epoch": 1.3883817264463945, "grad_norm": 0.84375, "learning_rate": 1.4592192954617584e-05, "loss": 1.3936, "step": 4420 }, { "epoch": 1.3890099534719358, "grad_norm": 0.796875, "learning_rate": 1.4589654078070454e-05, "loss": 1.271, "step": 4422 }, { "epoch": 1.3896381804974773, "grad_norm": 0.84375, "learning_rate": 1.4587115201523328e-05, "loss": 1.2298, "step": 4424 }, { "epoch": 1.3902664075230187, "grad_norm": 0.7421875, "learning_rate": 1.4584576324976198e-05, "loss": 1.3023, "step": 4426 }, { "epoch": 1.39089463454856, "grad_norm": 0.78125, "learning_rate": 1.4582037448429071e-05, "loss": 1.0285, "step": 4428 }, { "epoch": 1.3915228615741013, "grad_norm": 0.90234375, "learning_rate": 1.4579498571881942e-05, "loss": 1.345, "step": 4430 }, { "epoch": 1.3921510885996426, "grad_norm": 0.76953125, "learning_rate": 1.4576959695334816e-05, "loss": 1.3171, "step": 4432 }, { "epoch": 1.392779315625184, "grad_norm": 0.83203125, "learning_rate": 1.4574420818787687e-05, "loss": 1.1312, "step": 4434 }, { "epoch": 1.3934075426507255, "grad_norm": 0.80078125, "learning_rate": 1.457188194224056e-05, "loss": 1.281, "step": 4436 }, { "epoch": 1.3940357696762669, "grad_norm": 0.8046875, "learning_rate": 1.4569343065693431e-05, "loss": 1.3061, "step": 4438 }, { "epoch": 1.3946639967018082, "grad_norm": 0.90625, "learning_rate": 1.4566804189146304e-05, "loss": 1.2167, "step": 4440 }, { "epoch": 1.3952922237273495, "grad_norm": 0.80859375, "learning_rate": 1.4564265312599176e-05, "loss": 1.2837, "step": 4442 }, { "epoch": 1.3959204507528908, "grad_norm": 0.9140625, "learning_rate": 1.4561726436052049e-05, "loss": 1.1497, "step": 4444 }, { "epoch": 1.3965486777784322, "grad_norm": 1.1015625, "learning_rate": 1.4559187559504919e-05, "loss": 1.2668, "step": 4446 }, { "epoch": 1.3971769048039735, "grad_norm": 0.81640625, "learning_rate": 1.4556648682957792e-05, "loss": 1.2495, "step": 4448 }, { "epoch": 1.3978051318295148, "grad_norm": 0.73046875, "learning_rate": 1.4554109806410663e-05, "loss": 1.3183, "step": 4450 }, { "epoch": 1.3984333588550562, "grad_norm": 0.88671875, "learning_rate": 1.4551570929863536e-05, "loss": 1.2387, "step": 4452 }, { "epoch": 1.3990615858805975, "grad_norm": 0.84375, "learning_rate": 1.454903205331641e-05, "loss": 1.3808, "step": 4454 }, { "epoch": 1.399689812906139, "grad_norm": 0.796875, "learning_rate": 1.454649317676928e-05, "loss": 1.2578, "step": 4456 }, { "epoch": 1.4003180399316804, "grad_norm": 0.8125, "learning_rate": 1.4543954300222154e-05, "loss": 1.3209, "step": 4458 }, { "epoch": 1.4009462669572217, "grad_norm": 0.83984375, "learning_rate": 1.4541415423675025e-05, "loss": 1.2369, "step": 4460 }, { "epoch": 1.401574493982763, "grad_norm": 0.765625, "learning_rate": 1.4538876547127898e-05, "loss": 1.2486, "step": 4462 }, { "epoch": 1.4022027210083043, "grad_norm": 0.7421875, "learning_rate": 1.453633767058077e-05, "loss": 1.1907, "step": 4464 }, { "epoch": 1.4028309480338457, "grad_norm": 0.74609375, "learning_rate": 1.4533798794033643e-05, "loss": 1.1616, "step": 4466 }, { "epoch": 1.4034591750593872, "grad_norm": 0.84375, "learning_rate": 1.4531259917486514e-05, "loss": 1.1429, "step": 4468 }, { "epoch": 1.4040874020849285, "grad_norm": 0.8125, "learning_rate": 1.4528721040939387e-05, "loss": 1.2914, "step": 4470 }, { "epoch": 1.4047156291104699, "grad_norm": 0.89453125, "learning_rate": 1.4526182164392257e-05, "loss": 1.3573, "step": 4472 }, { "epoch": 1.4053438561360112, "grad_norm": 0.8125, "learning_rate": 1.452364328784513e-05, "loss": 1.258, "step": 4474 }, { "epoch": 1.4059720831615525, "grad_norm": 0.76171875, "learning_rate": 1.4521104411298001e-05, "loss": 1.3615, "step": 4476 }, { "epoch": 1.4066003101870939, "grad_norm": 0.734375, "learning_rate": 1.4518565534750874e-05, "loss": 1.2793, "step": 4478 }, { "epoch": 1.4072285372126352, "grad_norm": 0.75390625, "learning_rate": 1.4516026658203746e-05, "loss": 1.2621, "step": 4480 }, { "epoch": 1.4078567642381765, "grad_norm": 0.734375, "learning_rate": 1.4513487781656619e-05, "loss": 1.2854, "step": 4482 }, { "epoch": 1.4084849912637178, "grad_norm": 0.80859375, "learning_rate": 1.451094890510949e-05, "loss": 1.2765, "step": 4484 }, { "epoch": 1.4091132182892592, "grad_norm": 0.7734375, "learning_rate": 1.4508410028562363e-05, "loss": 1.2565, "step": 4486 }, { "epoch": 1.4097414453148005, "grad_norm": 0.83203125, "learning_rate": 1.4505871152015235e-05, "loss": 1.1619, "step": 4488 }, { "epoch": 1.410369672340342, "grad_norm": 0.73828125, "learning_rate": 1.4503332275468108e-05, "loss": 1.2787, "step": 4490 }, { "epoch": 1.4109978993658834, "grad_norm": 0.7890625, "learning_rate": 1.4500793398920977e-05, "loss": 1.1383, "step": 4492 }, { "epoch": 1.4116261263914247, "grad_norm": 0.84375, "learning_rate": 1.4498254522373852e-05, "loss": 1.2907, "step": 4494 }, { "epoch": 1.412254353416966, "grad_norm": 0.7734375, "learning_rate": 1.4495715645826722e-05, "loss": 1.2964, "step": 4496 }, { "epoch": 1.4128825804425074, "grad_norm": 0.78125, "learning_rate": 1.4493176769279595e-05, "loss": 1.4016, "step": 4498 }, { "epoch": 1.4135108074680487, "grad_norm": 0.83203125, "learning_rate": 1.4490637892732466e-05, "loss": 1.1816, "step": 4500 }, { "epoch": 1.4141390344935902, "grad_norm": 0.796875, "learning_rate": 1.448809901618534e-05, "loss": 1.2369, "step": 4502 }, { "epoch": 1.4147672615191316, "grad_norm": 0.8515625, "learning_rate": 1.448556013963821e-05, "loss": 1.2545, "step": 4504 }, { "epoch": 1.415395488544673, "grad_norm": 0.76171875, "learning_rate": 1.4483021263091084e-05, "loss": 1.3477, "step": 4506 }, { "epoch": 1.4160237155702142, "grad_norm": 0.8671875, "learning_rate": 1.4480482386543955e-05, "loss": 1.2051, "step": 4508 }, { "epoch": 1.4166519425957556, "grad_norm": 0.890625, "learning_rate": 1.4477943509996828e-05, "loss": 1.1868, "step": 4510 }, { "epoch": 1.4172801696212969, "grad_norm": 0.91015625, "learning_rate": 1.44754046334497e-05, "loss": 1.2113, "step": 4512 }, { "epoch": 1.4179083966468382, "grad_norm": 0.78125, "learning_rate": 1.4472865756902573e-05, "loss": 1.2932, "step": 4514 }, { "epoch": 1.4185366236723795, "grad_norm": 0.80859375, "learning_rate": 1.4470326880355442e-05, "loss": 1.2502, "step": 4516 }, { "epoch": 1.4191648506979209, "grad_norm": 0.8515625, "learning_rate": 1.4467788003808315e-05, "loss": 1.3597, "step": 4518 }, { "epoch": 1.4197930777234622, "grad_norm": 0.796875, "learning_rate": 1.4465249127261187e-05, "loss": 1.2579, "step": 4520 }, { "epoch": 1.4204213047490037, "grad_norm": 0.765625, "learning_rate": 1.446271025071406e-05, "loss": 1.315, "step": 4522 }, { "epoch": 1.421049531774545, "grad_norm": 0.80078125, "learning_rate": 1.4460171374166931e-05, "loss": 1.1785, "step": 4524 }, { "epoch": 1.4216777588000864, "grad_norm": 0.75, "learning_rate": 1.4457632497619804e-05, "loss": 1.2612, "step": 4526 }, { "epoch": 1.4223059858256277, "grad_norm": 0.76953125, "learning_rate": 1.4455093621072676e-05, "loss": 1.2523, "step": 4528 }, { "epoch": 1.422934212851169, "grad_norm": 0.83984375, "learning_rate": 1.4452554744525549e-05, "loss": 1.288, "step": 4530 }, { "epoch": 1.4235624398767104, "grad_norm": 0.7421875, "learning_rate": 1.445001586797842e-05, "loss": 1.2778, "step": 4532 }, { "epoch": 1.424190666902252, "grad_norm": 0.8203125, "learning_rate": 1.4447476991431293e-05, "loss": 1.3666, "step": 4534 }, { "epoch": 1.4248188939277933, "grad_norm": 0.8671875, "learning_rate": 1.4444938114884165e-05, "loss": 1.2365, "step": 4536 }, { "epoch": 1.4254471209533346, "grad_norm": 0.81640625, "learning_rate": 1.4442399238337038e-05, "loss": 1.4104, "step": 4538 }, { "epoch": 1.426075347978876, "grad_norm": 0.78125, "learning_rate": 1.4439860361789911e-05, "loss": 1.3859, "step": 4540 }, { "epoch": 1.4267035750044172, "grad_norm": 0.77734375, "learning_rate": 1.443732148524278e-05, "loss": 1.2868, "step": 4542 }, { "epoch": 1.4273318020299586, "grad_norm": 0.91796875, "learning_rate": 1.4434782608695654e-05, "loss": 1.2102, "step": 4544 }, { "epoch": 1.4279600290555, "grad_norm": 0.81640625, "learning_rate": 1.4432243732148525e-05, "loss": 1.4068, "step": 4546 }, { "epoch": 1.4285882560810412, "grad_norm": 0.75, "learning_rate": 1.4429704855601398e-05, "loss": 1.1468, "step": 4548 }, { "epoch": 1.4292164831065826, "grad_norm": 0.74609375, "learning_rate": 1.442716597905427e-05, "loss": 1.114, "step": 4550 }, { "epoch": 1.4298447101321239, "grad_norm": 0.7578125, "learning_rate": 1.4424627102507143e-05, "loss": 1.1786, "step": 4552 }, { "epoch": 1.4304729371576652, "grad_norm": 0.8515625, "learning_rate": 1.4422088225960014e-05, "loss": 1.2882, "step": 4554 }, { "epoch": 1.4311011641832068, "grad_norm": 0.875, "learning_rate": 1.4419549349412887e-05, "loss": 1.2154, "step": 4556 }, { "epoch": 1.431729391208748, "grad_norm": 0.91015625, "learning_rate": 1.4417010472865758e-05, "loss": 1.3404, "step": 4558 }, { "epoch": 1.4323576182342894, "grad_norm": 0.83203125, "learning_rate": 1.4414471596318631e-05, "loss": 1.17, "step": 4560 }, { "epoch": 1.4329858452598307, "grad_norm": 0.7578125, "learning_rate": 1.4411932719771503e-05, "loss": 1.2197, "step": 4562 }, { "epoch": 1.433614072285372, "grad_norm": 0.8515625, "learning_rate": 1.4409393843224376e-05, "loss": 1.2704, "step": 4564 }, { "epoch": 1.4342422993109134, "grad_norm": 0.73828125, "learning_rate": 1.4406854966677246e-05, "loss": 1.217, "step": 4566 }, { "epoch": 1.434870526336455, "grad_norm": 0.796875, "learning_rate": 1.4404316090130119e-05, "loss": 1.158, "step": 4568 }, { "epoch": 1.4354987533619963, "grad_norm": 0.78125, "learning_rate": 1.440177721358299e-05, "loss": 1.2378, "step": 4570 }, { "epoch": 1.4361269803875376, "grad_norm": 0.7421875, "learning_rate": 1.4399238337035863e-05, "loss": 1.1346, "step": 4572 }, { "epoch": 1.436755207413079, "grad_norm": 0.83984375, "learning_rate": 1.4396699460488735e-05, "loss": 1.3427, "step": 4574 }, { "epoch": 1.4373834344386203, "grad_norm": 0.80078125, "learning_rate": 1.4394160583941608e-05, "loss": 1.2679, "step": 4576 }, { "epoch": 1.4380116614641616, "grad_norm": 0.76953125, "learning_rate": 1.4391621707394479e-05, "loss": 1.1892, "step": 4578 }, { "epoch": 1.438639888489703, "grad_norm": 0.73828125, "learning_rate": 1.4389082830847352e-05, "loss": 1.3267, "step": 4580 }, { "epoch": 1.4392681155152443, "grad_norm": 0.734375, "learning_rate": 1.4386543954300223e-05, "loss": 1.2524, "step": 4582 }, { "epoch": 1.4398963425407856, "grad_norm": 0.734375, "learning_rate": 1.4384005077753097e-05, "loss": 1.1367, "step": 4584 }, { "epoch": 1.440524569566327, "grad_norm": 0.7578125, "learning_rate": 1.4381466201205966e-05, "loss": 1.3201, "step": 4586 }, { "epoch": 1.4411527965918685, "grad_norm": 0.75390625, "learning_rate": 1.437892732465884e-05, "loss": 1.2344, "step": 4588 }, { "epoch": 1.4417810236174098, "grad_norm": 0.8125, "learning_rate": 1.437638844811171e-05, "loss": 1.1569, "step": 4590 }, { "epoch": 1.4424092506429511, "grad_norm": 0.75, "learning_rate": 1.4373849571564584e-05, "loss": 1.1568, "step": 4592 }, { "epoch": 1.4430374776684924, "grad_norm": 0.82421875, "learning_rate": 1.4371310695017455e-05, "loss": 1.1668, "step": 4594 }, { "epoch": 1.4436657046940338, "grad_norm": 0.8046875, "learning_rate": 1.4368771818470328e-05, "loss": 1.318, "step": 4596 }, { "epoch": 1.444293931719575, "grad_norm": 0.7578125, "learning_rate": 1.43662329419232e-05, "loss": 1.3324, "step": 4598 }, { "epoch": 1.4449221587451166, "grad_norm": 0.73828125, "learning_rate": 1.4363694065376073e-05, "loss": 1.2083, "step": 4600 }, { "epoch": 1.445550385770658, "grad_norm": 0.75, "learning_rate": 1.4361155188828944e-05, "loss": 1.1944, "step": 4602 }, { "epoch": 1.4461786127961993, "grad_norm": 0.84375, "learning_rate": 1.4358616312281817e-05, "loss": 1.2482, "step": 4604 }, { "epoch": 1.4468068398217406, "grad_norm": 0.75390625, "learning_rate": 1.4356077435734688e-05, "loss": 1.162, "step": 4606 }, { "epoch": 1.447435066847282, "grad_norm": 0.83984375, "learning_rate": 1.4353538559187562e-05, "loss": 1.1867, "step": 4608 }, { "epoch": 1.4480632938728233, "grad_norm": 0.828125, "learning_rate": 1.4350999682640431e-05, "loss": 1.2864, "step": 4610 }, { "epoch": 1.4486915208983646, "grad_norm": 0.95703125, "learning_rate": 1.4348460806093304e-05, "loss": 1.1464, "step": 4612 }, { "epoch": 1.449319747923906, "grad_norm": 0.8046875, "learning_rate": 1.4345921929546176e-05, "loss": 1.1811, "step": 4614 }, { "epoch": 1.4499479749494473, "grad_norm": 0.796875, "learning_rate": 1.4343383052999049e-05, "loss": 1.2498, "step": 4616 }, { "epoch": 1.4505762019749886, "grad_norm": 0.7890625, "learning_rate": 1.434084417645192e-05, "loss": 1.192, "step": 4618 }, { "epoch": 1.4512044290005301, "grad_norm": 0.78515625, "learning_rate": 1.4338305299904793e-05, "loss": 1.3238, "step": 4620 }, { "epoch": 1.4518326560260715, "grad_norm": 0.85546875, "learning_rate": 1.4335766423357665e-05, "loss": 1.1391, "step": 4622 }, { "epoch": 1.4524608830516128, "grad_norm": 0.77734375, "learning_rate": 1.4333227546810538e-05, "loss": 1.2083, "step": 4624 }, { "epoch": 1.4530891100771541, "grad_norm": 0.84375, "learning_rate": 1.433068867026341e-05, "loss": 1.2331, "step": 4626 }, { "epoch": 1.4537173371026955, "grad_norm": 0.79296875, "learning_rate": 1.4328149793716282e-05, "loss": 1.2771, "step": 4628 }, { "epoch": 1.4543455641282368, "grad_norm": 0.8359375, "learning_rate": 1.4325610917169155e-05, "loss": 1.1523, "step": 4630 }, { "epoch": 1.4549737911537781, "grad_norm": 0.81640625, "learning_rate": 1.4323072040622027e-05, "loss": 1.3079, "step": 4632 }, { "epoch": 1.4556020181793197, "grad_norm": 0.765625, "learning_rate": 1.43205331640749e-05, "loss": 1.1524, "step": 4634 }, { "epoch": 1.456230245204861, "grad_norm": 0.78125, "learning_rate": 1.431799428752777e-05, "loss": 1.1978, "step": 4636 }, { "epoch": 1.4568584722304023, "grad_norm": 0.78125, "learning_rate": 1.4315455410980642e-05, "loss": 1.2417, "step": 4638 }, { "epoch": 1.4574866992559437, "grad_norm": 0.7734375, "learning_rate": 1.4312916534433514e-05, "loss": 1.3005, "step": 4640 }, { "epoch": 1.458114926281485, "grad_norm": 0.90234375, "learning_rate": 1.4310377657886387e-05, "loss": 1.2385, "step": 4642 }, { "epoch": 1.4587431533070263, "grad_norm": 0.76171875, "learning_rate": 1.4307838781339258e-05, "loss": 1.324, "step": 4644 }, { "epoch": 1.4593713803325676, "grad_norm": 0.74609375, "learning_rate": 1.4305299904792131e-05, "loss": 1.3922, "step": 4646 }, { "epoch": 1.459999607358109, "grad_norm": 0.91015625, "learning_rate": 1.4302761028245003e-05, "loss": 1.3316, "step": 4648 }, { "epoch": 1.4606278343836503, "grad_norm": 0.88671875, "learning_rate": 1.4300222151697876e-05, "loss": 1.2378, "step": 4650 }, { "epoch": 1.4612560614091916, "grad_norm": 0.85546875, "learning_rate": 1.4297683275150747e-05, "loss": 1.2244, "step": 4652 }, { "epoch": 1.4618842884347332, "grad_norm": 0.7578125, "learning_rate": 1.429514439860362e-05, "loss": 1.2384, "step": 4654 }, { "epoch": 1.4625125154602745, "grad_norm": 0.76953125, "learning_rate": 1.429260552205649e-05, "loss": 1.39, "step": 4656 }, { "epoch": 1.4631407424858158, "grad_norm": 0.79296875, "learning_rate": 1.4290066645509365e-05, "loss": 1.1775, "step": 4658 }, { "epoch": 1.4637689695113572, "grad_norm": 0.75390625, "learning_rate": 1.4287527768962234e-05, "loss": 1.2629, "step": 4660 }, { "epoch": 1.4643971965368985, "grad_norm": 0.90234375, "learning_rate": 1.4284988892415108e-05, "loss": 1.1894, "step": 4662 }, { "epoch": 1.4650254235624398, "grad_norm": 0.75, "learning_rate": 1.4282450015867979e-05, "loss": 1.4307, "step": 4664 }, { "epoch": 1.4656536505879814, "grad_norm": 0.7734375, "learning_rate": 1.4279911139320852e-05, "loss": 1.2757, "step": 4666 }, { "epoch": 1.4662818776135227, "grad_norm": 0.80078125, "learning_rate": 1.4277372262773723e-05, "loss": 1.1575, "step": 4668 }, { "epoch": 1.466910104639064, "grad_norm": 0.75, "learning_rate": 1.4274833386226596e-05, "loss": 1.2476, "step": 4670 }, { "epoch": 1.4675383316646053, "grad_norm": 0.7421875, "learning_rate": 1.4272294509679468e-05, "loss": 1.1947, "step": 4672 }, { "epoch": 1.4681665586901467, "grad_norm": 0.8046875, "learning_rate": 1.4269755633132341e-05, "loss": 1.2767, "step": 4674 }, { "epoch": 1.468794785715688, "grad_norm": 0.76953125, "learning_rate": 1.4267216756585212e-05, "loss": 1.1294, "step": 4676 }, { "epoch": 1.4694230127412293, "grad_norm": 0.88671875, "learning_rate": 1.4264677880038085e-05, "loss": 1.1821, "step": 4678 }, { "epoch": 1.4700512397667707, "grad_norm": 0.80859375, "learning_rate": 1.4262139003490955e-05, "loss": 1.1981, "step": 4680 }, { "epoch": 1.470679466792312, "grad_norm": 0.8671875, "learning_rate": 1.4259600126943828e-05, "loss": 1.2921, "step": 4682 }, { "epoch": 1.4713076938178533, "grad_norm": 0.87109375, "learning_rate": 1.42570612503967e-05, "loss": 1.2258, "step": 4684 }, { "epoch": 1.4719359208433949, "grad_norm": 0.79296875, "learning_rate": 1.4254522373849573e-05, "loss": 1.1364, "step": 4686 }, { "epoch": 1.4725641478689362, "grad_norm": 0.8671875, "learning_rate": 1.4251983497302444e-05, "loss": 1.2497, "step": 4688 }, { "epoch": 1.4731923748944775, "grad_norm": 0.796875, "learning_rate": 1.4249444620755317e-05, "loss": 1.1485, "step": 4690 }, { "epoch": 1.4738206019200188, "grad_norm": 0.8125, "learning_rate": 1.4246905744208188e-05, "loss": 1.1257, "step": 4692 }, { "epoch": 1.4744488289455602, "grad_norm": 0.7421875, "learning_rate": 1.4244366867661061e-05, "loss": 1.2718, "step": 4694 }, { "epoch": 1.4750770559711015, "grad_norm": 0.90234375, "learning_rate": 1.4241827991113933e-05, "loss": 1.0906, "step": 4696 }, { "epoch": 1.4757052829966428, "grad_norm": 0.7578125, "learning_rate": 1.4239289114566806e-05, "loss": 1.2542, "step": 4698 }, { "epoch": 1.4763335100221844, "grad_norm": 0.8125, "learning_rate": 1.4236750238019676e-05, "loss": 1.2242, "step": 4700 }, { "epoch": 1.4769617370477257, "grad_norm": 0.78125, "learning_rate": 1.423421136147255e-05, "loss": 1.1202, "step": 4702 }, { "epoch": 1.477589964073267, "grad_norm": 0.78515625, "learning_rate": 1.423167248492542e-05, "loss": 1.2829, "step": 4704 }, { "epoch": 1.4782181910988084, "grad_norm": 0.7421875, "learning_rate": 1.4229133608378293e-05, "loss": 1.2276, "step": 4706 }, { "epoch": 1.4788464181243497, "grad_norm": 0.82421875, "learning_rate": 1.4226594731831165e-05, "loss": 1.0959, "step": 4708 }, { "epoch": 1.479474645149891, "grad_norm": 0.91796875, "learning_rate": 1.4224055855284038e-05, "loss": 1.2402, "step": 4710 }, { "epoch": 1.4801028721754323, "grad_norm": 0.73828125, "learning_rate": 1.422151697873691e-05, "loss": 1.41, "step": 4712 }, { "epoch": 1.4807310992009737, "grad_norm": 0.8515625, "learning_rate": 1.4218978102189782e-05, "loss": 1.2751, "step": 4714 }, { "epoch": 1.481359326226515, "grad_norm": 0.7734375, "learning_rate": 1.4216439225642655e-05, "loss": 1.1848, "step": 4716 }, { "epoch": 1.4819875532520563, "grad_norm": 0.85546875, "learning_rate": 1.4213900349095527e-05, "loss": 1.1541, "step": 4718 }, { "epoch": 1.4826157802775979, "grad_norm": 0.79296875, "learning_rate": 1.42113614725484e-05, "loss": 1.248, "step": 4720 }, { "epoch": 1.4832440073031392, "grad_norm": 0.84375, "learning_rate": 1.4208822596001271e-05, "loss": 1.0783, "step": 4722 }, { "epoch": 1.4838722343286805, "grad_norm": 0.82421875, "learning_rate": 1.4206283719454144e-05, "loss": 1.3559, "step": 4724 }, { "epoch": 1.4845004613542219, "grad_norm": 0.83984375, "learning_rate": 1.4203744842907014e-05, "loss": 1.2525, "step": 4726 }, { "epoch": 1.4851286883797632, "grad_norm": 0.83203125, "learning_rate": 1.4201205966359889e-05, "loss": 1.3513, "step": 4728 }, { "epoch": 1.4857569154053045, "grad_norm": 0.84765625, "learning_rate": 1.4198667089812758e-05, "loss": 1.1958, "step": 4730 }, { "epoch": 1.486385142430846, "grad_norm": 0.83984375, "learning_rate": 1.4196128213265631e-05, "loss": 1.2256, "step": 4732 }, { "epoch": 1.4870133694563874, "grad_norm": 0.828125, "learning_rate": 1.4193589336718503e-05, "loss": 1.2719, "step": 4734 }, { "epoch": 1.4876415964819287, "grad_norm": 0.8984375, "learning_rate": 1.4191050460171376e-05, "loss": 1.186, "step": 4736 }, { "epoch": 1.48826982350747, "grad_norm": 0.78125, "learning_rate": 1.4188511583624247e-05, "loss": 1.3374, "step": 4738 }, { "epoch": 1.4888980505330114, "grad_norm": 0.90234375, "learning_rate": 1.418597270707712e-05, "loss": 1.139, "step": 4740 }, { "epoch": 1.4895262775585527, "grad_norm": 0.84375, "learning_rate": 1.4183433830529992e-05, "loss": 1.269, "step": 4742 }, { "epoch": 1.490154504584094, "grad_norm": 0.796875, "learning_rate": 1.4180894953982865e-05, "loss": 1.1428, "step": 4744 }, { "epoch": 1.4907827316096354, "grad_norm": 0.73046875, "learning_rate": 1.4178356077435736e-05, "loss": 1.304, "step": 4746 }, { "epoch": 1.4914109586351767, "grad_norm": 0.81640625, "learning_rate": 1.4175817200888609e-05, "loss": 1.4207, "step": 4748 }, { "epoch": 1.492039185660718, "grad_norm": 0.7109375, "learning_rate": 1.4173278324341479e-05, "loss": 1.2348, "step": 4750 }, { "epoch": 1.4926674126862596, "grad_norm": 0.796875, "learning_rate": 1.4170739447794352e-05, "loss": 1.2315, "step": 4752 }, { "epoch": 1.493295639711801, "grad_norm": 0.78515625, "learning_rate": 1.4168200571247223e-05, "loss": 1.408, "step": 4754 }, { "epoch": 1.4939238667373422, "grad_norm": 0.796875, "learning_rate": 1.4165661694700096e-05, "loss": 1.2126, "step": 4756 }, { "epoch": 1.4945520937628836, "grad_norm": 0.80859375, "learning_rate": 1.4163122818152968e-05, "loss": 1.3488, "step": 4758 }, { "epoch": 1.4951803207884249, "grad_norm": 0.75, "learning_rate": 1.416058394160584e-05, "loss": 1.2919, "step": 4760 }, { "epoch": 1.4958085478139662, "grad_norm": 0.79296875, "learning_rate": 1.4158045065058712e-05, "loss": 1.2386, "step": 4762 }, { "epoch": 1.4964367748395075, "grad_norm": 0.93359375, "learning_rate": 1.4155506188511585e-05, "loss": 1.1495, "step": 4764 }, { "epoch": 1.497065001865049, "grad_norm": 0.80859375, "learning_rate": 1.4152967311964457e-05, "loss": 1.0899, "step": 4766 }, { "epoch": 1.4976932288905904, "grad_norm": 0.703125, "learning_rate": 1.415042843541733e-05, "loss": 1.1979, "step": 4768 }, { "epoch": 1.4983214559161318, "grad_norm": 0.8125, "learning_rate": 1.4147889558870201e-05, "loss": 1.2938, "step": 4770 }, { "epoch": 1.498949682941673, "grad_norm": 1.6875, "learning_rate": 1.4145350682323074e-05, "loss": 1.1939, "step": 4772 }, { "epoch": 1.4995779099672144, "grad_norm": 0.76953125, "learning_rate": 1.4142811805775944e-05, "loss": 1.2171, "step": 4774 }, { "epoch": 1.5002061369927557, "grad_norm": 0.8046875, "learning_rate": 1.4140272929228817e-05, "loss": 1.202, "step": 4776 }, { "epoch": 1.500834364018297, "grad_norm": 0.90234375, "learning_rate": 1.4137734052681688e-05, "loss": 1.2284, "step": 4778 }, { "epoch": 1.5014625910438384, "grad_norm": 0.75, "learning_rate": 1.4135195176134561e-05, "loss": 1.2907, "step": 4780 }, { "epoch": 1.5020908180693797, "grad_norm": 0.87109375, "learning_rate": 1.4132656299587433e-05, "loss": 1.304, "step": 4782 }, { "epoch": 1.502719045094921, "grad_norm": 0.85546875, "learning_rate": 1.4130117423040306e-05, "loss": 1.3084, "step": 4784 }, { "epoch": 1.5033472721204624, "grad_norm": 0.7890625, "learning_rate": 1.4127578546493177e-05, "loss": 1.277, "step": 4786 }, { "epoch": 1.503975499146004, "grad_norm": 0.7734375, "learning_rate": 1.412503966994605e-05, "loss": 1.2962, "step": 4788 }, { "epoch": 1.5046037261715453, "grad_norm": 0.73046875, "learning_rate": 1.4122500793398922e-05, "loss": 1.1253, "step": 4790 }, { "epoch": 1.5052319531970866, "grad_norm": 0.95703125, "learning_rate": 1.4119961916851795e-05, "loss": 1.1104, "step": 4792 }, { "epoch": 1.505860180222628, "grad_norm": 0.7890625, "learning_rate": 1.4117423040304664e-05, "loss": 1.2353, "step": 4794 }, { "epoch": 1.5064884072481695, "grad_norm": 0.81640625, "learning_rate": 1.411488416375754e-05, "loss": 1.2573, "step": 4796 }, { "epoch": 1.5071166342737108, "grad_norm": 0.85546875, "learning_rate": 1.4112345287210412e-05, "loss": 1.2517, "step": 4798 }, { "epoch": 1.5077448612992521, "grad_norm": 0.75390625, "learning_rate": 1.4109806410663282e-05, "loss": 1.3036, "step": 4800 }, { "epoch": 1.5083730883247934, "grad_norm": 0.8046875, "learning_rate": 1.4107267534116155e-05, "loss": 1.364, "step": 4802 }, { "epoch": 1.5090013153503348, "grad_norm": 0.890625, "learning_rate": 1.4104728657569026e-05, "loss": 1.1784, "step": 4804 }, { "epoch": 1.509629542375876, "grad_norm": 0.74609375, "learning_rate": 1.41021897810219e-05, "loss": 1.1693, "step": 4806 }, { "epoch": 1.5102577694014174, "grad_norm": 0.8203125, "learning_rate": 1.4099650904474771e-05, "loss": 1.2911, "step": 4808 }, { "epoch": 1.5108859964269588, "grad_norm": 0.796875, "learning_rate": 1.4097112027927644e-05, "loss": 1.1448, "step": 4810 }, { "epoch": 1.5115142234525, "grad_norm": 0.828125, "learning_rate": 1.4094573151380515e-05, "loss": 1.3268, "step": 4812 }, { "epoch": 1.5121424504780414, "grad_norm": 0.7890625, "learning_rate": 1.4092034274833388e-05, "loss": 1.3592, "step": 4814 }, { "epoch": 1.5127706775035827, "grad_norm": 0.8125, "learning_rate": 1.408949539828626e-05, "loss": 1.2284, "step": 4816 }, { "epoch": 1.513398904529124, "grad_norm": 0.82421875, "learning_rate": 1.4086956521739133e-05, "loss": 1.3063, "step": 4818 }, { "epoch": 1.5140271315546656, "grad_norm": 0.859375, "learning_rate": 1.4084417645192003e-05, "loss": 1.291, "step": 4820 }, { "epoch": 1.514655358580207, "grad_norm": 0.8515625, "learning_rate": 1.4081878768644877e-05, "loss": 1.1867, "step": 4822 }, { "epoch": 1.5152835856057483, "grad_norm": 0.7890625, "learning_rate": 1.4079339892097747e-05, "loss": 1.1726, "step": 4824 }, { "epoch": 1.5159118126312896, "grad_norm": 0.76171875, "learning_rate": 1.407680101555062e-05, "loss": 1.2812, "step": 4826 }, { "epoch": 1.5165400396568312, "grad_norm": 0.83984375, "learning_rate": 1.4074262139003492e-05, "loss": 1.2503, "step": 4828 }, { "epoch": 1.5171682666823725, "grad_norm": 0.8125, "learning_rate": 1.4071723262456365e-05, "loss": 1.1713, "step": 4830 }, { "epoch": 1.5177964937079138, "grad_norm": 0.7578125, "learning_rate": 1.4069184385909236e-05, "loss": 1.2758, "step": 4832 }, { "epoch": 1.5184247207334551, "grad_norm": 0.796875, "learning_rate": 1.4066645509362109e-05, "loss": 1.4859, "step": 4834 }, { "epoch": 1.5190529477589965, "grad_norm": 0.8125, "learning_rate": 1.406410663281498e-05, "loss": 1.1555, "step": 4836 }, { "epoch": 1.5196811747845378, "grad_norm": 0.953125, "learning_rate": 1.4061567756267854e-05, "loss": 1.211, "step": 4838 }, { "epoch": 1.5203094018100791, "grad_norm": 0.89453125, "learning_rate": 1.4059028879720725e-05, "loss": 1.1481, "step": 4840 }, { "epoch": 1.5209376288356204, "grad_norm": 0.84765625, "learning_rate": 1.4056490003173598e-05, "loss": 1.1373, "step": 4842 }, { "epoch": 1.5215658558611618, "grad_norm": 0.7734375, "learning_rate": 1.4053951126626468e-05, "loss": 1.2099, "step": 4844 }, { "epoch": 1.522194082886703, "grad_norm": 0.8125, "learning_rate": 1.405141225007934e-05, "loss": 1.331, "step": 4846 }, { "epoch": 1.5228223099122444, "grad_norm": 0.7578125, "learning_rate": 1.4048873373532212e-05, "loss": 1.2244, "step": 4848 }, { "epoch": 1.5234505369377858, "grad_norm": 0.7109375, "learning_rate": 1.4046334496985085e-05, "loss": 1.301, "step": 4850 }, { "epoch": 1.524078763963327, "grad_norm": 0.83984375, "learning_rate": 1.4043795620437957e-05, "loss": 1.2133, "step": 4852 }, { "epoch": 1.5247069909888686, "grad_norm": 0.828125, "learning_rate": 1.404125674389083e-05, "loss": 1.1242, "step": 4854 }, { "epoch": 1.52533521801441, "grad_norm": 0.84375, "learning_rate": 1.4038717867343701e-05, "loss": 1.2627, "step": 4856 }, { "epoch": 1.5259634450399513, "grad_norm": 0.71875, "learning_rate": 1.4036178990796574e-05, "loss": 1.3087, "step": 4858 }, { "epoch": 1.5265916720654926, "grad_norm": 0.85546875, "learning_rate": 1.4033640114249446e-05, "loss": 1.1323, "step": 4860 }, { "epoch": 1.5272198990910342, "grad_norm": 0.75390625, "learning_rate": 1.4031101237702319e-05, "loss": 1.2379, "step": 4862 }, { "epoch": 1.5278481261165755, "grad_norm": 0.765625, "learning_rate": 1.4028562361155188e-05, "loss": 1.2671, "step": 4864 }, { "epoch": 1.5284763531421168, "grad_norm": 0.765625, "learning_rate": 1.4026023484608063e-05, "loss": 1.2881, "step": 4866 }, { "epoch": 1.5291045801676582, "grad_norm": 0.85546875, "learning_rate": 1.4023484608060933e-05, "loss": 1.2102, "step": 4868 }, { "epoch": 1.5297328071931995, "grad_norm": 0.828125, "learning_rate": 1.4020945731513806e-05, "loss": 1.2949, "step": 4870 }, { "epoch": 1.5303610342187408, "grad_norm": 0.7734375, "learning_rate": 1.4018406854966677e-05, "loss": 1.2383, "step": 4872 }, { "epoch": 1.5309892612442821, "grad_norm": 0.96484375, "learning_rate": 1.401586797841955e-05, "loss": 1.1815, "step": 4874 }, { "epoch": 1.5316174882698235, "grad_norm": 0.8125, "learning_rate": 1.4013329101872422e-05, "loss": 1.2497, "step": 4876 }, { "epoch": 1.5322457152953648, "grad_norm": 0.8125, "learning_rate": 1.4010790225325295e-05, "loss": 1.1109, "step": 4878 }, { "epoch": 1.5328739423209061, "grad_norm": 0.8359375, "learning_rate": 1.4008251348778166e-05, "loss": 1.2732, "step": 4880 }, { "epoch": 1.5335021693464475, "grad_norm": 0.81640625, "learning_rate": 1.400571247223104e-05, "loss": 1.2428, "step": 4882 }, { "epoch": 1.5341303963719888, "grad_norm": 0.87890625, "learning_rate": 1.4003173595683912e-05, "loss": 1.2501, "step": 4884 }, { "epoch": 1.5347586233975303, "grad_norm": 0.83984375, "learning_rate": 1.4000634719136784e-05, "loss": 1.2886, "step": 4886 }, { "epoch": 1.5353868504230717, "grad_norm": 0.828125, "learning_rate": 1.3998095842589657e-05, "loss": 1.2391, "step": 4888 }, { "epoch": 1.536015077448613, "grad_norm": 0.828125, "learning_rate": 1.3995556966042526e-05, "loss": 1.2547, "step": 4890 }, { "epoch": 1.5366433044741543, "grad_norm": 0.7734375, "learning_rate": 1.3993018089495401e-05, "loss": 1.2538, "step": 4892 }, { "epoch": 1.5372715314996959, "grad_norm": 0.8671875, "learning_rate": 1.3990479212948271e-05, "loss": 1.2798, "step": 4894 }, { "epoch": 1.5378997585252372, "grad_norm": 0.7734375, "learning_rate": 1.3987940336401144e-05, "loss": 1.2126, "step": 4896 }, { "epoch": 1.5385279855507785, "grad_norm": 0.79296875, "learning_rate": 1.3985401459854015e-05, "loss": 1.2583, "step": 4898 }, { "epoch": 1.5391562125763198, "grad_norm": 0.83203125, "learning_rate": 1.3982862583306888e-05, "loss": 1.2099, "step": 4900 }, { "epoch": 1.5397844396018612, "grad_norm": 0.859375, "learning_rate": 1.398032370675976e-05, "loss": 1.3456, "step": 4902 }, { "epoch": 1.5404126666274025, "grad_norm": 0.78125, "learning_rate": 1.3977784830212633e-05, "loss": 1.2055, "step": 4904 }, { "epoch": 1.5410408936529438, "grad_norm": 0.78515625, "learning_rate": 1.3975245953665504e-05, "loss": 1.2495, "step": 4906 }, { "epoch": 1.5416691206784852, "grad_norm": 0.78125, "learning_rate": 1.3972707077118377e-05, "loss": 1.2034, "step": 4908 }, { "epoch": 1.5422973477040265, "grad_norm": 0.734375, "learning_rate": 1.3970168200571249e-05, "loss": 1.2081, "step": 4910 }, { "epoch": 1.5429255747295678, "grad_norm": 0.79296875, "learning_rate": 1.3967629324024122e-05, "loss": 1.0711, "step": 4912 }, { "epoch": 1.5435538017551091, "grad_norm": 0.75390625, "learning_rate": 1.3965090447476991e-05, "loss": 1.233, "step": 4914 }, { "epoch": 1.5441820287806505, "grad_norm": 0.7578125, "learning_rate": 1.3962551570929865e-05, "loss": 1.2307, "step": 4916 }, { "epoch": 1.5448102558061918, "grad_norm": 0.7890625, "learning_rate": 1.3960012694382736e-05, "loss": 1.3425, "step": 4918 }, { "epoch": 1.5454384828317334, "grad_norm": 0.7578125, "learning_rate": 1.3957473817835609e-05, "loss": 1.2415, "step": 4920 }, { "epoch": 1.5460667098572747, "grad_norm": 0.859375, "learning_rate": 1.395493494128848e-05, "loss": 1.1973, "step": 4922 }, { "epoch": 1.546694936882816, "grad_norm": 0.71875, "learning_rate": 1.3952396064741353e-05, "loss": 1.2275, "step": 4924 }, { "epoch": 1.5473231639083573, "grad_norm": 0.8125, "learning_rate": 1.3949857188194225e-05, "loss": 1.416, "step": 4926 }, { "epoch": 1.5479513909338989, "grad_norm": 0.80859375, "learning_rate": 1.3947318311647098e-05, "loss": 1.262, "step": 4928 }, { "epoch": 1.5485796179594402, "grad_norm": 0.75, "learning_rate": 1.394477943509997e-05, "loss": 1.0942, "step": 4930 }, { "epoch": 1.5492078449849815, "grad_norm": 0.8203125, "learning_rate": 1.3942240558552842e-05, "loss": 1.2883, "step": 4932 }, { "epoch": 1.5498360720105229, "grad_norm": 0.84765625, "learning_rate": 1.3939701682005714e-05, "loss": 1.2058, "step": 4934 }, { "epoch": 1.5504642990360642, "grad_norm": 0.75390625, "learning_rate": 1.3937162805458587e-05, "loss": 1.2756, "step": 4936 }, { "epoch": 1.5510925260616055, "grad_norm": 0.8359375, "learning_rate": 1.3934623928911457e-05, "loss": 1.2265, "step": 4938 }, { "epoch": 1.5517207530871469, "grad_norm": 0.82421875, "learning_rate": 1.393208505236433e-05, "loss": 1.0057, "step": 4940 }, { "epoch": 1.5523489801126882, "grad_norm": 0.77734375, "learning_rate": 1.3929546175817201e-05, "loss": 1.1519, "step": 4942 }, { "epoch": 1.5529772071382295, "grad_norm": 0.7734375, "learning_rate": 1.3927007299270074e-05, "loss": 1.3281, "step": 4944 }, { "epoch": 1.5536054341637708, "grad_norm": 0.74609375, "learning_rate": 1.3924468422722945e-05, "loss": 1.267, "step": 4946 }, { "epoch": 1.5542336611893122, "grad_norm": 0.8046875, "learning_rate": 1.3921929546175819e-05, "loss": 1.2482, "step": 4948 }, { "epoch": 1.5548618882148535, "grad_norm": 0.7578125, "learning_rate": 1.391939066962869e-05, "loss": 1.3847, "step": 4950 }, { "epoch": 1.555490115240395, "grad_norm": 0.7890625, "learning_rate": 1.3916851793081563e-05, "loss": 1.4112, "step": 4952 }, { "epoch": 1.5561183422659364, "grad_norm": 0.85546875, "learning_rate": 1.3914312916534434e-05, "loss": 1.2111, "step": 4954 }, { "epoch": 1.5567465692914777, "grad_norm": 0.7734375, "learning_rate": 1.3911774039987307e-05, "loss": 1.4313, "step": 4956 }, { "epoch": 1.557374796317019, "grad_norm": 0.74609375, "learning_rate": 1.3909235163440177e-05, "loss": 1.2122, "step": 4958 }, { "epoch": 1.5580030233425606, "grad_norm": 0.7578125, "learning_rate": 1.3906696286893052e-05, "loss": 1.2285, "step": 4960 }, { "epoch": 1.558631250368102, "grad_norm": 0.890625, "learning_rate": 1.3904157410345922e-05, "loss": 1.2074, "step": 4962 }, { "epoch": 1.5592594773936432, "grad_norm": 0.79296875, "learning_rate": 1.3901618533798795e-05, "loss": 1.3096, "step": 4964 }, { "epoch": 1.5598877044191846, "grad_norm": 0.859375, "learning_rate": 1.3899079657251666e-05, "loss": 1.1488, "step": 4966 }, { "epoch": 1.560515931444726, "grad_norm": 0.79296875, "learning_rate": 1.3896540780704539e-05, "loss": 1.358, "step": 4968 }, { "epoch": 1.5611441584702672, "grad_norm": 0.78515625, "learning_rate": 1.3894001904157412e-05, "loss": 1.2134, "step": 4970 }, { "epoch": 1.5617723854958085, "grad_norm": 0.7890625, "learning_rate": 1.3891463027610284e-05, "loss": 1.3555, "step": 4972 }, { "epoch": 1.5624006125213499, "grad_norm": 0.87109375, "learning_rate": 1.3888924151063157e-05, "loss": 1.2697, "step": 4974 }, { "epoch": 1.5630288395468912, "grad_norm": 0.94140625, "learning_rate": 1.3886385274516028e-05, "loss": 1.1357, "step": 4976 }, { "epoch": 1.5636570665724325, "grad_norm": 0.8359375, "learning_rate": 1.3883846397968901e-05, "loss": 1.1122, "step": 4978 }, { "epoch": 1.5642852935979739, "grad_norm": 0.9375, "learning_rate": 1.3881307521421772e-05, "loss": 1.3238, "step": 4980 }, { "epoch": 1.5649135206235152, "grad_norm": 0.85546875, "learning_rate": 1.3878768644874646e-05, "loss": 1.1624, "step": 4982 }, { "epoch": 1.5655417476490565, "grad_norm": 0.83203125, "learning_rate": 1.3876229768327515e-05, "loss": 1.1995, "step": 4984 }, { "epoch": 1.566169974674598, "grad_norm": 0.921875, "learning_rate": 1.387369089178039e-05, "loss": 1.433, "step": 4986 }, { "epoch": 1.5667982017001394, "grad_norm": 0.828125, "learning_rate": 1.387115201523326e-05, "loss": 1.2223, "step": 4988 }, { "epoch": 1.5674264287256807, "grad_norm": 0.8203125, "learning_rate": 1.3868613138686133e-05, "loss": 1.2335, "step": 4990 }, { "epoch": 1.568054655751222, "grad_norm": 0.78515625, "learning_rate": 1.3866074262139004e-05, "loss": 1.2387, "step": 4992 }, { "epoch": 1.5686828827767636, "grad_norm": 0.74609375, "learning_rate": 1.3863535385591877e-05, "loss": 1.2488, "step": 4994 }, { "epoch": 1.569311109802305, "grad_norm": 0.765625, "learning_rate": 1.3860996509044749e-05, "loss": 1.3808, "step": 4996 }, { "epoch": 1.5699393368278463, "grad_norm": 0.73828125, "learning_rate": 1.3858457632497622e-05, "loss": 1.4113, "step": 4998 }, { "epoch": 1.5705675638533876, "grad_norm": 0.8671875, "learning_rate": 1.3855918755950493e-05, "loss": 1.2078, "step": 5000 }, { "epoch": 1.571195790878929, "grad_norm": 0.7734375, "learning_rate": 1.3853379879403366e-05, "loss": 1.2806, "step": 5002 }, { "epoch": 1.5718240179044702, "grad_norm": 0.8515625, "learning_rate": 1.3850841002856238e-05, "loss": 1.2592, "step": 5004 }, { "epoch": 1.5724522449300116, "grad_norm": 0.76171875, "learning_rate": 1.384830212630911e-05, "loss": 1.2786, "step": 5006 }, { "epoch": 1.573080471955553, "grad_norm": 0.8515625, "learning_rate": 1.384576324976198e-05, "loss": 1.2668, "step": 5008 }, { "epoch": 1.5737086989810942, "grad_norm": 0.796875, "learning_rate": 1.3843224373214853e-05, "loss": 1.1614, "step": 5010 }, { "epoch": 1.5743369260066356, "grad_norm": 0.78515625, "learning_rate": 1.3840685496667725e-05, "loss": 1.1552, "step": 5012 }, { "epoch": 1.5749651530321769, "grad_norm": 0.79296875, "learning_rate": 1.3838146620120598e-05, "loss": 1.295, "step": 5014 }, { "epoch": 1.5755933800577182, "grad_norm": 0.8203125, "learning_rate": 1.383560774357347e-05, "loss": 1.2958, "step": 5016 }, { "epoch": 1.5762216070832598, "grad_norm": 0.73828125, "learning_rate": 1.3833068867026342e-05, "loss": 1.3272, "step": 5018 }, { "epoch": 1.576849834108801, "grad_norm": 0.8828125, "learning_rate": 1.3830529990479214e-05, "loss": 1.2085, "step": 5020 }, { "epoch": 1.5774780611343424, "grad_norm": 0.82421875, "learning_rate": 1.3827991113932087e-05, "loss": 1.28, "step": 5022 }, { "epoch": 1.5781062881598837, "grad_norm": 0.78125, "learning_rate": 1.3825452237384958e-05, "loss": 1.2884, "step": 5024 }, { "epoch": 1.5787345151854253, "grad_norm": 0.69140625, "learning_rate": 1.3822913360837831e-05, "loss": 1.3882, "step": 5026 }, { "epoch": 1.5793627422109666, "grad_norm": 0.77734375, "learning_rate": 1.3820374484290701e-05, "loss": 1.3267, "step": 5028 }, { "epoch": 1.579990969236508, "grad_norm": 0.859375, "learning_rate": 1.3817835607743576e-05, "loss": 1.2475, "step": 5030 }, { "epoch": 1.5806191962620493, "grad_norm": 0.6953125, "learning_rate": 1.3815296731196445e-05, "loss": 1.2973, "step": 5032 }, { "epoch": 1.5812474232875906, "grad_norm": 0.80078125, "learning_rate": 1.3812757854649318e-05, "loss": 1.1669, "step": 5034 }, { "epoch": 1.581875650313132, "grad_norm": 0.78515625, "learning_rate": 1.381021897810219e-05, "loss": 1.2783, "step": 5036 }, { "epoch": 1.5825038773386733, "grad_norm": 0.765625, "learning_rate": 1.3807680101555063e-05, "loss": 1.1044, "step": 5038 }, { "epoch": 1.5831321043642146, "grad_norm": 0.71875, "learning_rate": 1.3805141225007934e-05, "loss": 1.2039, "step": 5040 }, { "epoch": 1.583760331389756, "grad_norm": 0.76171875, "learning_rate": 1.3802602348460807e-05, "loss": 1.3272, "step": 5042 }, { "epoch": 1.5843885584152972, "grad_norm": 0.859375, "learning_rate": 1.3800063471913679e-05, "loss": 1.1171, "step": 5044 }, { "epoch": 1.5850167854408386, "grad_norm": 0.83984375, "learning_rate": 1.3797524595366552e-05, "loss": 1.085, "step": 5046 }, { "epoch": 1.58564501246638, "grad_norm": 1.0703125, "learning_rate": 1.3794985718819423e-05, "loss": 1.1656, "step": 5048 }, { "epoch": 1.5862732394919212, "grad_norm": 0.8203125, "learning_rate": 1.3792446842272296e-05, "loss": 1.2341, "step": 5050 }, { "epoch": 1.5869014665174628, "grad_norm": 0.8203125, "learning_rate": 1.3789907965725166e-05, "loss": 1.2506, "step": 5052 }, { "epoch": 1.587529693543004, "grad_norm": 0.78125, "learning_rate": 1.3787369089178039e-05, "loss": 1.2872, "step": 5054 }, { "epoch": 1.5881579205685454, "grad_norm": 0.81640625, "learning_rate": 1.3784830212630914e-05, "loss": 1.1942, "step": 5056 }, { "epoch": 1.5887861475940868, "grad_norm": 0.75390625, "learning_rate": 1.3782291336083783e-05, "loss": 1.2892, "step": 5058 }, { "epoch": 1.5894143746196283, "grad_norm": 0.796875, "learning_rate": 1.3779752459536657e-05, "loss": 1.2254, "step": 5060 }, { "epoch": 1.5900426016451696, "grad_norm": 0.765625, "learning_rate": 1.3777213582989528e-05, "loss": 1.2721, "step": 5062 }, { "epoch": 1.590670828670711, "grad_norm": 0.78515625, "learning_rate": 1.3774674706442401e-05, "loss": 1.2791, "step": 5064 }, { "epoch": 1.5912990556962523, "grad_norm": 0.76953125, "learning_rate": 1.3772135829895272e-05, "loss": 1.2474, "step": 5066 }, { "epoch": 1.5919272827217936, "grad_norm": 0.796875, "learning_rate": 1.3769596953348145e-05, "loss": 1.2996, "step": 5068 }, { "epoch": 1.592555509747335, "grad_norm": 0.796875, "learning_rate": 1.3767058076801017e-05, "loss": 1.1624, "step": 5070 }, { "epoch": 1.5931837367728763, "grad_norm": 0.79296875, "learning_rate": 1.376451920025389e-05, "loss": 1.1621, "step": 5072 }, { "epoch": 1.5938119637984176, "grad_norm": 0.8203125, "learning_rate": 1.3761980323706761e-05, "loss": 1.1595, "step": 5074 }, { "epoch": 1.594440190823959, "grad_norm": 0.75, "learning_rate": 1.3759441447159634e-05, "loss": 1.2585, "step": 5076 }, { "epoch": 1.5950684178495003, "grad_norm": 0.859375, "learning_rate": 1.3756902570612504e-05, "loss": 1.1875, "step": 5078 }, { "epoch": 1.5956966448750416, "grad_norm": 0.8359375, "learning_rate": 1.3754363694065377e-05, "loss": 1.286, "step": 5080 }, { "epoch": 1.596324871900583, "grad_norm": 0.7421875, "learning_rate": 1.3751824817518249e-05, "loss": 1.313, "step": 5082 }, { "epoch": 1.5969530989261245, "grad_norm": 0.8984375, "learning_rate": 1.3749285940971122e-05, "loss": 1.2761, "step": 5084 }, { "epoch": 1.5975813259516658, "grad_norm": 0.82421875, "learning_rate": 1.3746747064423993e-05, "loss": 1.2133, "step": 5086 }, { "epoch": 1.5982095529772071, "grad_norm": 0.7890625, "learning_rate": 1.3744208187876866e-05, "loss": 1.2465, "step": 5088 }, { "epoch": 1.5988377800027485, "grad_norm": 0.76171875, "learning_rate": 1.3741669311329737e-05, "loss": 1.3734, "step": 5090 }, { "epoch": 1.59946600702829, "grad_norm": 0.80859375, "learning_rate": 1.373913043478261e-05, "loss": 1.4389, "step": 5092 }, { "epoch": 1.6000942340538313, "grad_norm": 0.8125, "learning_rate": 1.3736591558235482e-05, "loss": 1.2194, "step": 5094 }, { "epoch": 1.6007224610793727, "grad_norm": 0.86328125, "learning_rate": 1.3734052681688355e-05, "loss": 1.1488, "step": 5096 }, { "epoch": 1.601350688104914, "grad_norm": 0.87109375, "learning_rate": 1.3731513805141226e-05, "loss": 1.324, "step": 5098 }, { "epoch": 1.6019789151304553, "grad_norm": 0.8125, "learning_rate": 1.37289749285941e-05, "loss": 1.2289, "step": 5100 }, { "epoch": 1.6026071421559966, "grad_norm": 0.76953125, "learning_rate": 1.3726436052046969e-05, "loss": 1.1883, "step": 5102 }, { "epoch": 1.603235369181538, "grad_norm": 0.83203125, "learning_rate": 1.3723897175499842e-05, "loss": 1.2215, "step": 5104 }, { "epoch": 1.6038635962070793, "grad_norm": 0.81640625, "learning_rate": 1.3721358298952714e-05, "loss": 1.2423, "step": 5106 }, { "epoch": 1.6044918232326206, "grad_norm": 0.7890625, "learning_rate": 1.3718819422405587e-05, "loss": 1.1615, "step": 5108 }, { "epoch": 1.605120050258162, "grad_norm": 0.7578125, "learning_rate": 1.3716280545858458e-05, "loss": 1.1432, "step": 5110 }, { "epoch": 1.6057482772837033, "grad_norm": 0.8203125, "learning_rate": 1.3713741669311331e-05, "loss": 1.3432, "step": 5112 }, { "epoch": 1.6063765043092446, "grad_norm": 0.8828125, "learning_rate": 1.3711202792764203e-05, "loss": 1.2572, "step": 5114 }, { "epoch": 1.6070047313347862, "grad_norm": 0.83984375, "learning_rate": 1.3708663916217076e-05, "loss": 1.141, "step": 5116 }, { "epoch": 1.6076329583603275, "grad_norm": 0.80078125, "learning_rate": 1.3706125039669947e-05, "loss": 1.2724, "step": 5118 }, { "epoch": 1.6082611853858688, "grad_norm": 0.98046875, "learning_rate": 1.370358616312282e-05, "loss": 1.2636, "step": 5120 }, { "epoch": 1.6088894124114101, "grad_norm": 0.89453125, "learning_rate": 1.370104728657569e-05, "loss": 1.2074, "step": 5122 }, { "epoch": 1.6095176394369515, "grad_norm": 0.81640625, "learning_rate": 1.3698508410028565e-05, "loss": 1.3499, "step": 5124 }, { "epoch": 1.610145866462493, "grad_norm": 0.76953125, "learning_rate": 1.3695969533481434e-05, "loss": 1.3866, "step": 5126 }, { "epoch": 1.6107740934880344, "grad_norm": 0.796875, "learning_rate": 1.3693430656934307e-05, "loss": 1.2968, "step": 5128 }, { "epoch": 1.6114023205135757, "grad_norm": 0.75, "learning_rate": 1.3690891780387179e-05, "loss": 1.4032, "step": 5130 }, { "epoch": 1.612030547539117, "grad_norm": 0.73046875, "learning_rate": 1.3688352903840052e-05, "loss": 1.3089, "step": 5132 }, { "epoch": 1.6126587745646583, "grad_norm": 0.7734375, "learning_rate": 1.3685814027292923e-05, "loss": 1.332, "step": 5134 }, { "epoch": 1.6132870015901997, "grad_norm": 0.79296875, "learning_rate": 1.3683275150745796e-05, "loss": 1.1898, "step": 5136 }, { "epoch": 1.613915228615741, "grad_norm": 0.87890625, "learning_rate": 1.3680736274198668e-05, "loss": 1.2405, "step": 5138 }, { "epoch": 1.6145434556412823, "grad_norm": 0.77734375, "learning_rate": 1.367819739765154e-05, "loss": 1.2174, "step": 5140 }, { "epoch": 1.6151716826668236, "grad_norm": 0.75, "learning_rate": 1.3675658521104414e-05, "loss": 1.0467, "step": 5142 }, { "epoch": 1.615799909692365, "grad_norm": 0.765625, "learning_rate": 1.3673119644557285e-05, "loss": 1.2141, "step": 5144 }, { "epoch": 1.6164281367179063, "grad_norm": 0.81640625, "learning_rate": 1.3670580768010158e-05, "loss": 1.362, "step": 5146 }, { "epoch": 1.6170563637434476, "grad_norm": 0.80078125, "learning_rate": 1.3668041891463028e-05, "loss": 1.239, "step": 5148 }, { "epoch": 1.6176845907689892, "grad_norm": 0.890625, "learning_rate": 1.3665503014915903e-05, "loss": 1.3551, "step": 5150 }, { "epoch": 1.6183128177945305, "grad_norm": 0.89453125, "learning_rate": 1.3662964138368772e-05, "loss": 1.3027, "step": 5152 }, { "epoch": 1.6189410448200718, "grad_norm": 0.70703125, "learning_rate": 1.3660425261821645e-05, "loss": 1.262, "step": 5154 }, { "epoch": 1.6195692718456132, "grad_norm": 0.81640625, "learning_rate": 1.3657886385274517e-05, "loss": 1.3053, "step": 5156 }, { "epoch": 1.6201974988711547, "grad_norm": 0.74609375, "learning_rate": 1.365534750872739e-05, "loss": 1.3218, "step": 5158 }, { "epoch": 1.620825725896696, "grad_norm": 0.765625, "learning_rate": 1.3652808632180261e-05, "loss": 1.2845, "step": 5160 }, { "epoch": 1.6214539529222374, "grad_norm": 0.953125, "learning_rate": 1.3650269755633134e-05, "loss": 1.1991, "step": 5162 }, { "epoch": 1.6220821799477787, "grad_norm": 0.734375, "learning_rate": 1.3647730879086006e-05, "loss": 1.3062, "step": 5164 }, { "epoch": 1.62271040697332, "grad_norm": 0.80859375, "learning_rate": 1.3645192002538879e-05, "loss": 1.0548, "step": 5166 }, { "epoch": 1.6233386339988614, "grad_norm": 0.80078125, "learning_rate": 1.364265312599175e-05, "loss": 1.1821, "step": 5168 }, { "epoch": 1.6239668610244027, "grad_norm": 0.78125, "learning_rate": 1.3640114249444623e-05, "loss": 1.1822, "step": 5170 }, { "epoch": 1.624595088049944, "grad_norm": 0.90234375, "learning_rate": 1.3637575372897493e-05, "loss": 1.2382, "step": 5172 }, { "epoch": 1.6252233150754853, "grad_norm": 0.828125, "learning_rate": 1.3635036496350366e-05, "loss": 1.2857, "step": 5174 }, { "epoch": 1.6258515421010267, "grad_norm": 0.87890625, "learning_rate": 1.3632497619803237e-05, "loss": 1.0598, "step": 5176 }, { "epoch": 1.626479769126568, "grad_norm": 0.765625, "learning_rate": 1.362995874325611e-05, "loss": 1.2989, "step": 5178 }, { "epoch": 1.6271079961521093, "grad_norm": 0.84375, "learning_rate": 1.3627419866708982e-05, "loss": 1.2732, "step": 5180 }, { "epoch": 1.6277362231776509, "grad_norm": 0.77734375, "learning_rate": 1.3624880990161855e-05, "loss": 1.1808, "step": 5182 }, { "epoch": 1.6283644502031922, "grad_norm": 0.79296875, "learning_rate": 1.3622342113614726e-05, "loss": 1.2011, "step": 5184 }, { "epoch": 1.6289926772287335, "grad_norm": 0.7734375, "learning_rate": 1.36198032370676e-05, "loss": 1.2063, "step": 5186 }, { "epoch": 1.6296209042542749, "grad_norm": 0.91015625, "learning_rate": 1.361726436052047e-05, "loss": 1.243, "step": 5188 }, { "epoch": 1.6302491312798162, "grad_norm": 0.90625, "learning_rate": 1.3614725483973344e-05, "loss": 1.3291, "step": 5190 }, { "epoch": 1.6308773583053577, "grad_norm": 0.76953125, "learning_rate": 1.3612186607426214e-05, "loss": 1.2388, "step": 5192 }, { "epoch": 1.631505585330899, "grad_norm": 0.8203125, "learning_rate": 1.3609647730879088e-05, "loss": 1.2186, "step": 5194 }, { "epoch": 1.6321338123564404, "grad_norm": 0.83203125, "learning_rate": 1.3607108854331958e-05, "loss": 1.3375, "step": 5196 }, { "epoch": 1.6327620393819817, "grad_norm": 0.7890625, "learning_rate": 1.3604569977784831e-05, "loss": 1.1837, "step": 5198 }, { "epoch": 1.633390266407523, "grad_norm": 0.87890625, "learning_rate": 1.3602031101237702e-05, "loss": 1.1972, "step": 5200 }, { "epoch": 1.6340184934330644, "grad_norm": 0.80859375, "learning_rate": 1.3599492224690576e-05, "loss": 1.303, "step": 5202 }, { "epoch": 1.6346467204586057, "grad_norm": 1.015625, "learning_rate": 1.3596953348143447e-05, "loss": 1.2091, "step": 5204 }, { "epoch": 1.635274947484147, "grad_norm": 0.7734375, "learning_rate": 1.359441447159632e-05, "loss": 1.2598, "step": 5206 }, { "epoch": 1.6359031745096884, "grad_norm": 0.8671875, "learning_rate": 1.3591875595049191e-05, "loss": 1.0553, "step": 5208 }, { "epoch": 1.6365314015352297, "grad_norm": 0.8203125, "learning_rate": 1.3589336718502064e-05, "loss": 1.2889, "step": 5210 }, { "epoch": 1.637159628560771, "grad_norm": 1.2109375, "learning_rate": 1.3586797841954936e-05, "loss": 1.1413, "step": 5212 }, { "epoch": 1.6377878555863123, "grad_norm": 0.828125, "learning_rate": 1.3584258965407809e-05, "loss": 1.1636, "step": 5214 }, { "epoch": 1.638416082611854, "grad_norm": 1.0625, "learning_rate": 1.3581720088860679e-05, "loss": 1.276, "step": 5216 }, { "epoch": 1.6390443096373952, "grad_norm": 0.80859375, "learning_rate": 1.3579181212313552e-05, "loss": 1.305, "step": 5218 }, { "epoch": 1.6396725366629366, "grad_norm": 0.7734375, "learning_rate": 1.3576642335766423e-05, "loss": 1.201, "step": 5220 }, { "epoch": 1.6403007636884779, "grad_norm": 0.80078125, "learning_rate": 1.3574103459219296e-05, "loss": 1.4048, "step": 5222 }, { "epoch": 1.6409289907140194, "grad_norm": 0.80078125, "learning_rate": 1.3571564582672168e-05, "loss": 1.2899, "step": 5224 }, { "epoch": 1.6415572177395608, "grad_norm": 0.765625, "learning_rate": 1.356902570612504e-05, "loss": 1.2957, "step": 5226 }, { "epoch": 1.642185444765102, "grad_norm": 0.7734375, "learning_rate": 1.3566486829577914e-05, "loss": 1.1528, "step": 5228 }, { "epoch": 1.6428136717906434, "grad_norm": 0.79296875, "learning_rate": 1.3563947953030785e-05, "loss": 1.3055, "step": 5230 }, { "epoch": 1.6434418988161847, "grad_norm": 0.953125, "learning_rate": 1.3561409076483658e-05, "loss": 1.0612, "step": 5232 }, { "epoch": 1.644070125841726, "grad_norm": 0.80078125, "learning_rate": 1.355887019993653e-05, "loss": 1.1704, "step": 5234 }, { "epoch": 1.6446983528672674, "grad_norm": 0.89453125, "learning_rate": 1.3556331323389403e-05, "loss": 1.1888, "step": 5236 }, { "epoch": 1.6453265798928087, "grad_norm": 0.75390625, "learning_rate": 1.3553792446842274e-05, "loss": 1.2332, "step": 5238 }, { "epoch": 1.64595480691835, "grad_norm": 0.74609375, "learning_rate": 1.3551253570295147e-05, "loss": 1.2469, "step": 5240 }, { "epoch": 1.6465830339438914, "grad_norm": 0.80859375, "learning_rate": 1.3548714693748017e-05, "loss": 1.2556, "step": 5242 }, { "epoch": 1.6472112609694327, "grad_norm": 0.80859375, "learning_rate": 1.354617581720089e-05, "loss": 1.2354, "step": 5244 }, { "epoch": 1.647839487994974, "grad_norm": 0.75390625, "learning_rate": 1.3543636940653761e-05, "loss": 1.4014, "step": 5246 }, { "epoch": 1.6484677150205156, "grad_norm": 0.75, "learning_rate": 1.3541098064106634e-05, "loss": 1.3373, "step": 5248 }, { "epoch": 1.649095942046057, "grad_norm": 0.91015625, "learning_rate": 1.3538559187559506e-05, "loss": 1.3094, "step": 5250 }, { "epoch": 1.6497241690715982, "grad_norm": 0.765625, "learning_rate": 1.3536020311012379e-05, "loss": 1.2154, "step": 5252 }, { "epoch": 1.6503523960971396, "grad_norm": 0.7890625, "learning_rate": 1.353348143446525e-05, "loss": 1.1919, "step": 5254 }, { "epoch": 1.6509806231226811, "grad_norm": 0.80859375, "learning_rate": 1.3530942557918123e-05, "loss": 1.2142, "step": 5256 }, { "epoch": 1.6516088501482225, "grad_norm": 0.78125, "learning_rate": 1.3528403681370995e-05, "loss": 1.2202, "step": 5258 }, { "epoch": 1.6522370771737638, "grad_norm": 0.78515625, "learning_rate": 1.3525864804823868e-05, "loss": 1.3794, "step": 5260 }, { "epoch": 1.652865304199305, "grad_norm": 0.765625, "learning_rate": 1.3523325928276739e-05, "loss": 1.1983, "step": 5262 }, { "epoch": 1.6534935312248464, "grad_norm": 0.8046875, "learning_rate": 1.3520787051729612e-05, "loss": 1.3406, "step": 5264 }, { "epoch": 1.6541217582503878, "grad_norm": 0.96484375, "learning_rate": 1.3518248175182482e-05, "loss": 1.1838, "step": 5266 }, { "epoch": 1.654749985275929, "grad_norm": 0.77734375, "learning_rate": 1.3515709298635355e-05, "loss": 1.3651, "step": 5268 }, { "epoch": 1.6553782123014704, "grad_norm": 0.703125, "learning_rate": 1.3513170422088226e-05, "loss": 1.2919, "step": 5270 }, { "epoch": 1.6560064393270117, "grad_norm": 0.71875, "learning_rate": 1.35106315455411e-05, "loss": 1.3154, "step": 5272 }, { "epoch": 1.656634666352553, "grad_norm": 0.8125, "learning_rate": 1.350809266899397e-05, "loss": 1.2643, "step": 5274 }, { "epoch": 1.6572628933780944, "grad_norm": 0.77734375, "learning_rate": 1.3505553792446844e-05, "loss": 1.3239, "step": 5276 }, { "epoch": 1.6578911204036357, "grad_norm": 0.9140625, "learning_rate": 1.3503014915899715e-05, "loss": 1.2137, "step": 5278 }, { "epoch": 1.658519347429177, "grad_norm": 0.82421875, "learning_rate": 1.3500476039352588e-05, "loss": 1.2215, "step": 5280 }, { "epoch": 1.6591475744547186, "grad_norm": 0.953125, "learning_rate": 1.349793716280546e-05, "loss": 1.1606, "step": 5282 }, { "epoch": 1.65977580148026, "grad_norm": 0.7734375, "learning_rate": 1.3495398286258333e-05, "loss": 1.2496, "step": 5284 }, { "epoch": 1.6604040285058013, "grad_norm": 0.73828125, "learning_rate": 1.3492859409711202e-05, "loss": 1.1227, "step": 5286 }, { "epoch": 1.6610322555313426, "grad_norm": 0.75, "learning_rate": 1.3490320533164077e-05, "loss": 1.2112, "step": 5288 }, { "epoch": 1.6616604825568841, "grad_norm": 0.79296875, "learning_rate": 1.3487781656616947e-05, "loss": 1.206, "step": 5290 }, { "epoch": 1.6622887095824255, "grad_norm": 0.90625, "learning_rate": 1.348524278006982e-05, "loss": 1.2691, "step": 5292 }, { "epoch": 1.6629169366079668, "grad_norm": 0.81640625, "learning_rate": 1.3482703903522691e-05, "loss": 1.2359, "step": 5294 }, { "epoch": 1.6635451636335081, "grad_norm": 0.765625, "learning_rate": 1.3480165026975564e-05, "loss": 1.2491, "step": 5296 }, { "epoch": 1.6641733906590495, "grad_norm": 0.8984375, "learning_rate": 1.3477626150428436e-05, "loss": 1.1253, "step": 5298 }, { "epoch": 1.6648016176845908, "grad_norm": 0.79296875, "learning_rate": 1.3475087273881309e-05, "loss": 1.2954, "step": 5300 }, { "epoch": 1.6654298447101321, "grad_norm": 0.88671875, "learning_rate": 1.347254839733418e-05, "loss": 1.178, "step": 5302 }, { "epoch": 1.6660580717356734, "grad_norm": 0.81640625, "learning_rate": 1.3470009520787053e-05, "loss": 1.2116, "step": 5304 }, { "epoch": 1.6666862987612148, "grad_norm": 0.90234375, "learning_rate": 1.3467470644239925e-05, "loss": 1.2565, "step": 5306 }, { "epoch": 1.667314525786756, "grad_norm": 0.8984375, "learning_rate": 1.3464931767692798e-05, "loss": 1.0026, "step": 5308 }, { "epoch": 1.6679427528122974, "grad_norm": 0.83203125, "learning_rate": 1.3462392891145667e-05, "loss": 1.292, "step": 5310 }, { "epoch": 1.6685709798378388, "grad_norm": 0.86328125, "learning_rate": 1.345985401459854e-05, "loss": 1.104, "step": 5312 }, { "epoch": 1.6691992068633803, "grad_norm": 0.828125, "learning_rate": 1.3457315138051414e-05, "loss": 1.2361, "step": 5314 }, { "epoch": 1.6698274338889216, "grad_norm": 0.90234375, "learning_rate": 1.3454776261504285e-05, "loss": 1.2665, "step": 5316 }, { "epoch": 1.670455660914463, "grad_norm": 0.8125, "learning_rate": 1.3452237384957158e-05, "loss": 1.2824, "step": 5318 }, { "epoch": 1.6710838879400043, "grad_norm": 0.8515625, "learning_rate": 1.344969850841003e-05, "loss": 1.1185, "step": 5320 }, { "epoch": 1.6717121149655458, "grad_norm": 0.77734375, "learning_rate": 1.3447159631862903e-05, "loss": 1.2738, "step": 5322 }, { "epoch": 1.6723403419910872, "grad_norm": 0.77734375, "learning_rate": 1.3444620755315774e-05, "loss": 1.2713, "step": 5324 }, { "epoch": 1.6729685690166285, "grad_norm": 0.91015625, "learning_rate": 1.3442081878768647e-05, "loss": 1.1093, "step": 5326 }, { "epoch": 1.6735967960421698, "grad_norm": 0.86328125, "learning_rate": 1.3439543002221518e-05, "loss": 1.3263, "step": 5328 }, { "epoch": 1.6742250230677111, "grad_norm": 0.76953125, "learning_rate": 1.3437004125674391e-05, "loss": 1.3777, "step": 5330 }, { "epoch": 1.6748532500932525, "grad_norm": 1.0546875, "learning_rate": 1.3434465249127263e-05, "loss": 1.1151, "step": 5332 }, { "epoch": 1.6754814771187938, "grad_norm": 0.77734375, "learning_rate": 1.3431926372580136e-05, "loss": 1.5733, "step": 5334 }, { "epoch": 1.6761097041443351, "grad_norm": 0.796875, "learning_rate": 1.3429387496033006e-05, "loss": 1.1432, "step": 5336 }, { "epoch": 1.6767379311698765, "grad_norm": 0.8671875, "learning_rate": 1.3426848619485879e-05, "loss": 1.3853, "step": 5338 }, { "epoch": 1.6773661581954178, "grad_norm": 0.78125, "learning_rate": 1.342430974293875e-05, "loss": 1.3414, "step": 5340 }, { "epoch": 1.6779943852209591, "grad_norm": 0.85546875, "learning_rate": 1.3421770866391623e-05, "loss": 1.2539, "step": 5342 }, { "epoch": 1.6786226122465004, "grad_norm": 0.99609375, "learning_rate": 1.3419231989844494e-05, "loss": 1.2596, "step": 5344 }, { "epoch": 1.6792508392720418, "grad_norm": 0.890625, "learning_rate": 1.3416693113297368e-05, "loss": 1.1853, "step": 5346 }, { "epoch": 1.6798790662975833, "grad_norm": 0.76953125, "learning_rate": 1.3414154236750239e-05, "loss": 1.3223, "step": 5348 }, { "epoch": 1.6805072933231247, "grad_norm": 0.83203125, "learning_rate": 1.3411615360203112e-05, "loss": 1.3358, "step": 5350 }, { "epoch": 1.681135520348666, "grad_norm": 0.796875, "learning_rate": 1.3409076483655983e-05, "loss": 1.1666, "step": 5352 }, { "epoch": 1.6817637473742073, "grad_norm": 0.9453125, "learning_rate": 1.3406537607108856e-05, "loss": 1.2604, "step": 5354 }, { "epoch": 1.6823919743997489, "grad_norm": 0.76953125, "learning_rate": 1.3403998730561726e-05, "loss": 1.2772, "step": 5356 }, { "epoch": 1.6830202014252902, "grad_norm": 0.87109375, "learning_rate": 1.3401459854014601e-05, "loss": 1.2507, "step": 5358 }, { "epoch": 1.6836484284508315, "grad_norm": 0.82421875, "learning_rate": 1.339892097746747e-05, "loss": 1.1198, "step": 5360 }, { "epoch": 1.6842766554763728, "grad_norm": 0.79296875, "learning_rate": 1.3396382100920344e-05, "loss": 1.3049, "step": 5362 }, { "epoch": 1.6849048825019142, "grad_norm": 0.828125, "learning_rate": 1.3393843224373215e-05, "loss": 1.1426, "step": 5364 }, { "epoch": 1.6855331095274555, "grad_norm": 0.83984375, "learning_rate": 1.3391304347826088e-05, "loss": 1.1442, "step": 5366 }, { "epoch": 1.6861613365529968, "grad_norm": 0.8359375, "learning_rate": 1.338876547127896e-05, "loss": 1.2436, "step": 5368 }, { "epoch": 1.6867895635785382, "grad_norm": 0.7734375, "learning_rate": 1.3386226594731833e-05, "loss": 1.2264, "step": 5370 }, { "epoch": 1.6874177906040795, "grad_norm": 0.828125, "learning_rate": 1.3383687718184704e-05, "loss": 1.2157, "step": 5372 }, { "epoch": 1.6880460176296208, "grad_norm": 0.77734375, "learning_rate": 1.3381148841637577e-05, "loss": 1.2976, "step": 5374 }, { "epoch": 1.6886742446551621, "grad_norm": 0.81640625, "learning_rate": 1.3378609965090448e-05, "loss": 1.2154, "step": 5376 }, { "epoch": 1.6893024716807035, "grad_norm": 0.7265625, "learning_rate": 1.3376071088543322e-05, "loss": 1.1889, "step": 5378 }, { "epoch": 1.689930698706245, "grad_norm": 0.83203125, "learning_rate": 1.3373532211996191e-05, "loss": 1.2453, "step": 5380 }, { "epoch": 1.6905589257317863, "grad_norm": 0.796875, "learning_rate": 1.3370993335449064e-05, "loss": 1.1871, "step": 5382 }, { "epoch": 1.6911871527573277, "grad_norm": 0.74609375, "learning_rate": 1.3368454458901936e-05, "loss": 1.3467, "step": 5384 }, { "epoch": 1.691815379782869, "grad_norm": 0.77734375, "learning_rate": 1.3365915582354809e-05, "loss": 1.3004, "step": 5386 }, { "epoch": 1.6924436068084106, "grad_norm": 0.9140625, "learning_rate": 1.336337670580768e-05, "loss": 1.3259, "step": 5388 }, { "epoch": 1.6930718338339519, "grad_norm": 0.85546875, "learning_rate": 1.3360837829260553e-05, "loss": 1.2515, "step": 5390 }, { "epoch": 1.6937000608594932, "grad_norm": 0.80859375, "learning_rate": 1.3358298952713425e-05, "loss": 1.2712, "step": 5392 }, { "epoch": 1.6943282878850345, "grad_norm": 0.76171875, "learning_rate": 1.3355760076166298e-05, "loss": 1.2064, "step": 5394 }, { "epoch": 1.6949565149105759, "grad_norm": 0.75, "learning_rate": 1.3353221199619169e-05, "loss": 1.3884, "step": 5396 }, { "epoch": 1.6955847419361172, "grad_norm": 0.796875, "learning_rate": 1.3350682323072042e-05, "loss": 1.24, "step": 5398 }, { "epoch": 1.6962129689616585, "grad_norm": 0.77734375, "learning_rate": 1.3348143446524915e-05, "loss": 1.2544, "step": 5400 }, { "epoch": 1.6968411959871998, "grad_norm": 0.83984375, "learning_rate": 1.3345604569977787e-05, "loss": 1.2522, "step": 5402 }, { "epoch": 1.6974694230127412, "grad_norm": 0.83984375, "learning_rate": 1.334306569343066e-05, "loss": 1.2362, "step": 5404 }, { "epoch": 1.6980976500382825, "grad_norm": 0.9375, "learning_rate": 1.334052681688353e-05, "loss": 1.3574, "step": 5406 }, { "epoch": 1.6987258770638238, "grad_norm": 0.765625, "learning_rate": 1.3337987940336402e-05, "loss": 1.274, "step": 5408 }, { "epoch": 1.6993541040893652, "grad_norm": 0.87890625, "learning_rate": 1.3335449063789274e-05, "loss": 1.2817, "step": 5410 }, { "epoch": 1.6999823311149065, "grad_norm": 0.765625, "learning_rate": 1.3332910187242147e-05, "loss": 1.3324, "step": 5412 }, { "epoch": 1.700610558140448, "grad_norm": 0.76953125, "learning_rate": 1.3330371310695018e-05, "loss": 1.2094, "step": 5414 }, { "epoch": 1.7012387851659894, "grad_norm": 0.8203125, "learning_rate": 1.3327832434147891e-05, "loss": 1.1927, "step": 5416 }, { "epoch": 1.7018670121915307, "grad_norm": 0.8203125, "learning_rate": 1.3325293557600763e-05, "loss": 1.166, "step": 5418 }, { "epoch": 1.702495239217072, "grad_norm": 0.859375, "learning_rate": 1.3322754681053636e-05, "loss": 1.3582, "step": 5420 }, { "epoch": 1.7031234662426136, "grad_norm": 0.80078125, "learning_rate": 1.3320215804506507e-05, "loss": 1.3184, "step": 5422 }, { "epoch": 1.703751693268155, "grad_norm": 0.8046875, "learning_rate": 1.331767692795938e-05, "loss": 1.2835, "step": 5424 }, { "epoch": 1.7043799202936962, "grad_norm": 0.80859375, "learning_rate": 1.3315138051412252e-05, "loss": 1.2446, "step": 5426 }, { "epoch": 1.7050081473192376, "grad_norm": 0.8359375, "learning_rate": 1.3312599174865125e-05, "loss": 1.2357, "step": 5428 }, { "epoch": 1.7056363743447789, "grad_norm": 0.921875, "learning_rate": 1.3310060298317994e-05, "loss": 1.2273, "step": 5430 }, { "epoch": 1.7062646013703202, "grad_norm": 0.8203125, "learning_rate": 1.3307521421770867e-05, "loss": 1.2684, "step": 5432 }, { "epoch": 1.7068928283958615, "grad_norm": 0.85546875, "learning_rate": 1.3304982545223739e-05, "loss": 1.2107, "step": 5434 }, { "epoch": 1.7075210554214029, "grad_norm": 0.77734375, "learning_rate": 1.3302443668676612e-05, "loss": 1.3441, "step": 5436 }, { "epoch": 1.7081492824469442, "grad_norm": 0.69921875, "learning_rate": 1.3299904792129483e-05, "loss": 1.2601, "step": 5438 }, { "epoch": 1.7087775094724855, "grad_norm": 0.78515625, "learning_rate": 1.3297365915582356e-05, "loss": 1.2648, "step": 5440 }, { "epoch": 1.7094057364980269, "grad_norm": 0.80078125, "learning_rate": 1.3294827039035228e-05, "loss": 1.1744, "step": 5442 }, { "epoch": 1.7100339635235682, "grad_norm": 0.8203125, "learning_rate": 1.3292288162488101e-05, "loss": 1.0949, "step": 5444 }, { "epoch": 1.7106621905491097, "grad_norm": 0.8515625, "learning_rate": 1.3289749285940972e-05, "loss": 1.1723, "step": 5446 }, { "epoch": 1.711290417574651, "grad_norm": 0.86328125, "learning_rate": 1.3287210409393845e-05, "loss": 1.2938, "step": 5448 }, { "epoch": 1.7119186446001924, "grad_norm": 0.81640625, "learning_rate": 1.3284671532846715e-05, "loss": 1.3799, "step": 5450 }, { "epoch": 1.7125468716257337, "grad_norm": 0.77734375, "learning_rate": 1.3282132656299588e-05, "loss": 1.0393, "step": 5452 }, { "epoch": 1.7131750986512753, "grad_norm": 0.75390625, "learning_rate": 1.327959377975246e-05, "loss": 1.0883, "step": 5454 }, { "epoch": 1.7138033256768166, "grad_norm": 0.82421875, "learning_rate": 1.3277054903205333e-05, "loss": 1.1746, "step": 5456 }, { "epoch": 1.714431552702358, "grad_norm": 0.8984375, "learning_rate": 1.3274516026658204e-05, "loss": 1.2404, "step": 5458 }, { "epoch": 1.7150597797278992, "grad_norm": 0.84765625, "learning_rate": 1.3271977150111077e-05, "loss": 1.1545, "step": 5460 }, { "epoch": 1.7156880067534406, "grad_norm": 0.8828125, "learning_rate": 1.3269438273563948e-05, "loss": 1.3406, "step": 5462 }, { "epoch": 1.716316233778982, "grad_norm": 0.8125, "learning_rate": 1.3266899397016821e-05, "loss": 1.2526, "step": 5464 }, { "epoch": 1.7169444608045232, "grad_norm": 0.8046875, "learning_rate": 1.3264360520469693e-05, "loss": 1.2369, "step": 5466 }, { "epoch": 1.7175726878300646, "grad_norm": 0.84375, "learning_rate": 1.3261821643922566e-05, "loss": 1.2515, "step": 5468 }, { "epoch": 1.7182009148556059, "grad_norm": 0.79296875, "learning_rate": 1.3259282767375437e-05, "loss": 1.1968, "step": 5470 }, { "epoch": 1.7188291418811472, "grad_norm": 0.84375, "learning_rate": 1.325674389082831e-05, "loss": 1.2784, "step": 5472 }, { "epoch": 1.7194573689066885, "grad_norm": 0.8125, "learning_rate": 1.325420501428118e-05, "loss": 1.2446, "step": 5474 }, { "epoch": 1.7200855959322299, "grad_norm": 1.2421875, "learning_rate": 1.3251666137734053e-05, "loss": 1.1592, "step": 5476 }, { "epoch": 1.7207138229577712, "grad_norm": 0.80859375, "learning_rate": 1.3249127261186925e-05, "loss": 1.2955, "step": 5478 }, { "epoch": 1.7213420499833127, "grad_norm": 0.76171875, "learning_rate": 1.3246588384639798e-05, "loss": 1.334, "step": 5480 }, { "epoch": 1.721970277008854, "grad_norm": 0.8359375, "learning_rate": 1.3244049508092669e-05, "loss": 1.114, "step": 5482 }, { "epoch": 1.7225985040343954, "grad_norm": 0.859375, "learning_rate": 1.3241510631545542e-05, "loss": 1.0863, "step": 5484 }, { "epoch": 1.7232267310599367, "grad_norm": 0.78515625, "learning_rate": 1.3238971754998415e-05, "loss": 1.3088, "step": 5486 }, { "epoch": 1.7238549580854783, "grad_norm": 0.765625, "learning_rate": 1.3236432878451287e-05, "loss": 1.0641, "step": 5488 }, { "epoch": 1.7244831851110196, "grad_norm": 0.796875, "learning_rate": 1.323389400190416e-05, "loss": 1.3067, "step": 5490 }, { "epoch": 1.725111412136561, "grad_norm": 0.80859375, "learning_rate": 1.3231355125357031e-05, "loss": 1.2372, "step": 5492 }, { "epoch": 1.7257396391621023, "grad_norm": 0.90234375, "learning_rate": 1.3228816248809904e-05, "loss": 1.0898, "step": 5494 }, { "epoch": 1.7263678661876436, "grad_norm": 0.80078125, "learning_rate": 1.3226277372262775e-05, "loss": 1.297, "step": 5496 }, { "epoch": 1.726996093213185, "grad_norm": 0.72265625, "learning_rate": 1.3223738495715649e-05, "loss": 1.2627, "step": 5498 }, { "epoch": 1.7276243202387263, "grad_norm": 0.7421875, "learning_rate": 1.3221199619168518e-05, "loss": 1.3876, "step": 5500 }, { "epoch": 1.7282525472642676, "grad_norm": 0.85546875, "learning_rate": 1.3218660742621391e-05, "loss": 1.1941, "step": 5502 }, { "epoch": 1.728880774289809, "grad_norm": 0.7578125, "learning_rate": 1.3216121866074263e-05, "loss": 1.2344, "step": 5504 }, { "epoch": 1.7295090013153502, "grad_norm": 0.86328125, "learning_rate": 1.3213582989527136e-05, "loss": 1.1713, "step": 5506 }, { "epoch": 1.7301372283408916, "grad_norm": 0.80859375, "learning_rate": 1.3211044112980007e-05, "loss": 1.242, "step": 5508 }, { "epoch": 1.730765455366433, "grad_norm": 0.74609375, "learning_rate": 1.320850523643288e-05, "loss": 1.1983, "step": 5510 }, { "epoch": 1.7313936823919744, "grad_norm": 0.80078125, "learning_rate": 1.3205966359885752e-05, "loss": 1.2646, "step": 5512 }, { "epoch": 1.7320219094175158, "grad_norm": 0.80078125, "learning_rate": 1.3203427483338625e-05, "loss": 1.3675, "step": 5514 }, { "epoch": 1.732650136443057, "grad_norm": 0.82421875, "learning_rate": 1.3200888606791496e-05, "loss": 1.2915, "step": 5516 }, { "epoch": 1.7332783634685984, "grad_norm": 0.8046875, "learning_rate": 1.3198349730244369e-05, "loss": 1.3231, "step": 5518 }, { "epoch": 1.73390659049414, "grad_norm": 0.8203125, "learning_rate": 1.3195810853697239e-05, "loss": 1.228, "step": 5520 }, { "epoch": 1.7345348175196813, "grad_norm": 0.91796875, "learning_rate": 1.3193271977150114e-05, "loss": 1.154, "step": 5522 }, { "epoch": 1.7351630445452226, "grad_norm": 0.82421875, "learning_rate": 1.3190733100602983e-05, "loss": 1.2426, "step": 5524 }, { "epoch": 1.735791271570764, "grad_norm": 0.76953125, "learning_rate": 1.3188194224055856e-05, "loss": 1.1921, "step": 5526 }, { "epoch": 1.7364194985963053, "grad_norm": 0.90625, "learning_rate": 1.3185655347508728e-05, "loss": 1.2466, "step": 5528 }, { "epoch": 1.7370477256218466, "grad_norm": 0.7578125, "learning_rate": 1.31831164709616e-05, "loss": 1.2553, "step": 5530 }, { "epoch": 1.737675952647388, "grad_norm": 0.765625, "learning_rate": 1.3180577594414472e-05, "loss": 1.2312, "step": 5532 }, { "epoch": 1.7383041796729293, "grad_norm": 0.765625, "learning_rate": 1.3178038717867345e-05, "loss": 1.2277, "step": 5534 }, { "epoch": 1.7389324066984706, "grad_norm": 0.84375, "learning_rate": 1.3175499841320217e-05, "loss": 1.2192, "step": 5536 }, { "epoch": 1.739560633724012, "grad_norm": 0.796875, "learning_rate": 1.317296096477309e-05, "loss": 1.1536, "step": 5538 }, { "epoch": 1.7401888607495533, "grad_norm": 0.890625, "learning_rate": 1.3170422088225961e-05, "loss": 1.16, "step": 5540 }, { "epoch": 1.7408170877750946, "grad_norm": 0.90625, "learning_rate": 1.3167883211678834e-05, "loss": 1.472, "step": 5542 }, { "epoch": 1.7414453148006361, "grad_norm": 0.796875, "learning_rate": 1.3165344335131704e-05, "loss": 1.3138, "step": 5544 }, { "epoch": 1.7420735418261775, "grad_norm": 0.79296875, "learning_rate": 1.3162805458584577e-05, "loss": 1.2316, "step": 5546 }, { "epoch": 1.7427017688517188, "grad_norm": 0.8359375, "learning_rate": 1.3160266582037448e-05, "loss": 1.2371, "step": 5548 }, { "epoch": 1.7433299958772601, "grad_norm": 0.8125, "learning_rate": 1.3157727705490321e-05, "loss": 1.302, "step": 5550 }, { "epoch": 1.7439582229028014, "grad_norm": 0.83203125, "learning_rate": 1.3155188828943193e-05, "loss": 1.1977, "step": 5552 }, { "epoch": 1.744586449928343, "grad_norm": 0.84765625, "learning_rate": 1.3152649952396066e-05, "loss": 1.234, "step": 5554 }, { "epoch": 1.7452146769538843, "grad_norm": 0.80859375, "learning_rate": 1.3150111075848937e-05, "loss": 1.2929, "step": 5556 }, { "epoch": 1.7458429039794257, "grad_norm": 0.77734375, "learning_rate": 1.314757219930181e-05, "loss": 1.2919, "step": 5558 }, { "epoch": 1.746471131004967, "grad_norm": 0.71875, "learning_rate": 1.3145033322754682e-05, "loss": 1.3428, "step": 5560 }, { "epoch": 1.7470993580305083, "grad_norm": 0.875, "learning_rate": 1.3142494446207555e-05, "loss": 1.1509, "step": 5562 }, { "epoch": 1.7477275850560496, "grad_norm": 0.96875, "learning_rate": 1.3139955569660424e-05, "loss": 1.1809, "step": 5564 }, { "epoch": 1.748355812081591, "grad_norm": 0.78515625, "learning_rate": 1.31374166931133e-05, "loss": 1.2189, "step": 5566 }, { "epoch": 1.7489840391071323, "grad_norm": 0.85546875, "learning_rate": 1.3134877816566169e-05, "loss": 1.1861, "step": 5568 }, { "epoch": 1.7496122661326736, "grad_norm": 0.875, "learning_rate": 1.3132338940019042e-05, "loss": 1.1364, "step": 5570 }, { "epoch": 1.750240493158215, "grad_norm": 0.77734375, "learning_rate": 1.3129800063471915e-05, "loss": 1.1878, "step": 5572 }, { "epoch": 1.7508687201837563, "grad_norm": 0.76953125, "learning_rate": 1.3127261186924786e-05, "loss": 1.1927, "step": 5574 }, { "epoch": 1.7514969472092976, "grad_norm": 0.8359375, "learning_rate": 1.312472231037766e-05, "loss": 1.1183, "step": 5576 }, { "epoch": 1.7521251742348392, "grad_norm": 0.80078125, "learning_rate": 1.3122183433830531e-05, "loss": 1.293, "step": 5578 }, { "epoch": 1.7527534012603805, "grad_norm": 0.7890625, "learning_rate": 1.3119644557283404e-05, "loss": 1.2526, "step": 5580 }, { "epoch": 1.7533816282859218, "grad_norm": 0.83203125, "learning_rate": 1.3117105680736275e-05, "loss": 1.1872, "step": 5582 }, { "epoch": 1.7540098553114631, "grad_norm": 0.88671875, "learning_rate": 1.3114566804189148e-05, "loss": 1.2216, "step": 5584 }, { "epoch": 1.7546380823370047, "grad_norm": 0.734375, "learning_rate": 1.311202792764202e-05, "loss": 1.1968, "step": 5586 }, { "epoch": 1.755266309362546, "grad_norm": 0.80078125, "learning_rate": 1.3109489051094893e-05, "loss": 1.3127, "step": 5588 }, { "epoch": 1.7558945363880873, "grad_norm": 0.81640625, "learning_rate": 1.3106950174547763e-05, "loss": 1.143, "step": 5590 }, { "epoch": 1.7565227634136287, "grad_norm": 1.0546875, "learning_rate": 1.3104411298000637e-05, "loss": 1.2036, "step": 5592 }, { "epoch": 1.75715099043917, "grad_norm": 0.875, "learning_rate": 1.3101872421453507e-05, "loss": 1.3434, "step": 5594 }, { "epoch": 1.7577792174647113, "grad_norm": 0.97265625, "learning_rate": 1.309933354490638e-05, "loss": 1.1842, "step": 5596 }, { "epoch": 1.7584074444902527, "grad_norm": 0.796875, "learning_rate": 1.3096794668359251e-05, "loss": 1.2543, "step": 5598 }, { "epoch": 1.759035671515794, "grad_norm": 0.91015625, "learning_rate": 1.3094255791812125e-05, "loss": 1.2244, "step": 5600 }, { "epoch": 1.7596638985413353, "grad_norm": 0.8046875, "learning_rate": 1.3091716915264996e-05, "loss": 1.1647, "step": 5602 }, { "epoch": 1.7602921255668766, "grad_norm": 0.79296875, "learning_rate": 1.3089178038717869e-05, "loss": 1.3046, "step": 5604 }, { "epoch": 1.760920352592418, "grad_norm": 0.765625, "learning_rate": 1.308663916217074e-05, "loss": 1.1823, "step": 5606 }, { "epoch": 1.7615485796179593, "grad_norm": 0.875, "learning_rate": 1.3084100285623613e-05, "loss": 1.3261, "step": 5608 }, { "epoch": 1.7621768066435008, "grad_norm": 0.73828125, "learning_rate": 1.3081561409076485e-05, "loss": 1.2197, "step": 5610 }, { "epoch": 1.7628050336690422, "grad_norm": 0.75, "learning_rate": 1.3079022532529358e-05, "loss": 1.3214, "step": 5612 }, { "epoch": 1.7634332606945835, "grad_norm": 0.76171875, "learning_rate": 1.3076483655982228e-05, "loss": 1.3303, "step": 5614 }, { "epoch": 1.7640614877201248, "grad_norm": 0.796875, "learning_rate": 1.30739447794351e-05, "loss": 1.2344, "step": 5616 }, { "epoch": 1.7646897147456662, "grad_norm": 0.90625, "learning_rate": 1.3071405902887972e-05, "loss": 1.2934, "step": 5618 }, { "epoch": 1.7653179417712077, "grad_norm": 0.8203125, "learning_rate": 1.3068867026340845e-05, "loss": 1.1482, "step": 5620 }, { "epoch": 1.765946168796749, "grad_norm": 0.70703125, "learning_rate": 1.3066328149793717e-05, "loss": 1.2069, "step": 5622 }, { "epoch": 1.7665743958222904, "grad_norm": 0.82421875, "learning_rate": 1.306378927324659e-05, "loss": 1.1891, "step": 5624 }, { "epoch": 1.7672026228478317, "grad_norm": 0.71484375, "learning_rate": 1.3061250396699461e-05, "loss": 1.3099, "step": 5626 }, { "epoch": 1.767830849873373, "grad_norm": 0.7578125, "learning_rate": 1.3058711520152334e-05, "loss": 1.2631, "step": 5628 }, { "epoch": 1.7684590768989144, "grad_norm": 0.8125, "learning_rate": 1.3056172643605205e-05, "loss": 1.2516, "step": 5630 }, { "epoch": 1.7690873039244557, "grad_norm": 0.8046875, "learning_rate": 1.3053633767058079e-05, "loss": 1.2557, "step": 5632 }, { "epoch": 1.769715530949997, "grad_norm": 0.92578125, "learning_rate": 1.305109489051095e-05, "loss": 1.2499, "step": 5634 }, { "epoch": 1.7703437579755383, "grad_norm": 0.765625, "learning_rate": 1.3048556013963823e-05, "loss": 1.3002, "step": 5636 }, { "epoch": 1.7709719850010797, "grad_norm": 0.82421875, "learning_rate": 1.3046017137416693e-05, "loss": 1.2968, "step": 5638 }, { "epoch": 1.771600212026621, "grad_norm": 0.8125, "learning_rate": 1.3043478260869566e-05, "loss": 1.3062, "step": 5640 }, { "epoch": 1.7722284390521623, "grad_norm": 0.765625, "learning_rate": 1.3040939384322437e-05, "loss": 1.1898, "step": 5642 }, { "epoch": 1.7728566660777039, "grad_norm": 0.8203125, "learning_rate": 1.303840050777531e-05, "loss": 1.3012, "step": 5644 }, { "epoch": 1.7734848931032452, "grad_norm": 0.82421875, "learning_rate": 1.3035861631228182e-05, "loss": 1.1432, "step": 5646 }, { "epoch": 1.7741131201287865, "grad_norm": 0.75390625, "learning_rate": 1.3033322754681055e-05, "loss": 1.2746, "step": 5648 }, { "epoch": 1.7747413471543279, "grad_norm": 0.8359375, "learning_rate": 1.3030783878133926e-05, "loss": 1.2069, "step": 5650 }, { "epoch": 1.7753695741798694, "grad_norm": 0.8125, "learning_rate": 1.3028245001586799e-05, "loss": 1.4756, "step": 5652 }, { "epoch": 1.7759978012054107, "grad_norm": 0.79296875, "learning_rate": 1.302570612503967e-05, "loss": 1.2794, "step": 5654 }, { "epoch": 1.776626028230952, "grad_norm": 0.8125, "learning_rate": 1.3023167248492544e-05, "loss": 1.4529, "step": 5656 }, { "epoch": 1.7772542552564934, "grad_norm": 0.74609375, "learning_rate": 1.3020628371945417e-05, "loss": 1.2728, "step": 5658 }, { "epoch": 1.7778824822820347, "grad_norm": 0.796875, "learning_rate": 1.3018089495398288e-05, "loss": 1.2532, "step": 5660 }, { "epoch": 1.778510709307576, "grad_norm": 0.76171875, "learning_rate": 1.3015550618851161e-05, "loss": 1.1706, "step": 5662 }, { "epoch": 1.7791389363331174, "grad_norm": 0.78125, "learning_rate": 1.301301174230403e-05, "loss": 1.3033, "step": 5664 }, { "epoch": 1.7797671633586587, "grad_norm": 0.74609375, "learning_rate": 1.3010472865756904e-05, "loss": 1.2695, "step": 5666 }, { "epoch": 1.7803953903842, "grad_norm": 0.84375, "learning_rate": 1.3007933989209775e-05, "loss": 1.1627, "step": 5668 }, { "epoch": 1.7810236174097414, "grad_norm": 0.87109375, "learning_rate": 1.3005395112662648e-05, "loss": 1.2485, "step": 5670 }, { "epoch": 1.7816518444352827, "grad_norm": 0.77734375, "learning_rate": 1.300285623611552e-05, "loss": 1.2374, "step": 5672 }, { "epoch": 1.782280071460824, "grad_norm": 0.76953125, "learning_rate": 1.3000317359568393e-05, "loss": 1.302, "step": 5674 }, { "epoch": 1.7829082984863656, "grad_norm": 0.8828125, "learning_rate": 1.2997778483021264e-05, "loss": 1.3329, "step": 5676 }, { "epoch": 1.783536525511907, "grad_norm": 0.77734375, "learning_rate": 1.2995239606474137e-05, "loss": 1.2451, "step": 5678 }, { "epoch": 1.7841647525374482, "grad_norm": 0.81640625, "learning_rate": 1.2992700729927009e-05, "loss": 1.2296, "step": 5680 }, { "epoch": 1.7847929795629895, "grad_norm": 0.76171875, "learning_rate": 1.2990161853379882e-05, "loss": 1.2764, "step": 5682 }, { "epoch": 1.785421206588531, "grad_norm": 0.76953125, "learning_rate": 1.2987622976832751e-05, "loss": 1.3022, "step": 5684 }, { "epoch": 1.7860494336140724, "grad_norm": 0.78515625, "learning_rate": 1.2985084100285626e-05, "loss": 1.1862, "step": 5686 }, { "epoch": 1.7866776606396138, "grad_norm": 0.9375, "learning_rate": 1.2982545223738496e-05, "loss": 1.097, "step": 5688 }, { "epoch": 1.787305887665155, "grad_norm": 0.7421875, "learning_rate": 1.2980006347191369e-05, "loss": 1.3138, "step": 5690 }, { "epoch": 1.7879341146906964, "grad_norm": 0.73828125, "learning_rate": 1.297746747064424e-05, "loss": 1.2888, "step": 5692 }, { "epoch": 1.7885623417162377, "grad_norm": 0.8671875, "learning_rate": 1.2974928594097113e-05, "loss": 1.3605, "step": 5694 }, { "epoch": 1.789190568741779, "grad_norm": 0.7734375, "learning_rate": 1.2972389717549985e-05, "loss": 1.2925, "step": 5696 }, { "epoch": 1.7898187957673204, "grad_norm": 0.89453125, "learning_rate": 1.2969850841002858e-05, "loss": 1.0879, "step": 5698 }, { "epoch": 1.7904470227928617, "grad_norm": 0.78515625, "learning_rate": 1.296731196445573e-05, "loss": 1.1732, "step": 5700 }, { "epoch": 1.791075249818403, "grad_norm": 0.71875, "learning_rate": 1.2964773087908602e-05, "loss": 1.2151, "step": 5702 }, { "epoch": 1.7917034768439444, "grad_norm": 0.7734375, "learning_rate": 1.2962234211361474e-05, "loss": 1.1779, "step": 5704 }, { "epoch": 1.7923317038694857, "grad_norm": 0.8125, "learning_rate": 1.2959695334814347e-05, "loss": 1.2626, "step": 5706 }, { "epoch": 1.792959930895027, "grad_norm": 0.7890625, "learning_rate": 1.2957156458267216e-05, "loss": 1.1991, "step": 5708 }, { "epoch": 1.7935881579205686, "grad_norm": 0.74609375, "learning_rate": 1.295461758172009e-05, "loss": 1.3407, "step": 5710 }, { "epoch": 1.79421638494611, "grad_norm": 0.78515625, "learning_rate": 1.2952078705172961e-05, "loss": 1.2508, "step": 5712 }, { "epoch": 1.7948446119716512, "grad_norm": 0.74609375, "learning_rate": 1.2949539828625834e-05, "loss": 1.4018, "step": 5714 }, { "epoch": 1.7954728389971926, "grad_norm": 0.76953125, "learning_rate": 1.2947000952078705e-05, "loss": 1.1721, "step": 5716 }, { "epoch": 1.7961010660227341, "grad_norm": 0.79296875, "learning_rate": 1.2944462075531578e-05, "loss": 1.2232, "step": 5718 }, { "epoch": 1.7967292930482754, "grad_norm": 0.73828125, "learning_rate": 1.294192319898445e-05, "loss": 1.2397, "step": 5720 }, { "epoch": 1.7973575200738168, "grad_norm": 0.8203125, "learning_rate": 1.2939384322437323e-05, "loss": 1.172, "step": 5722 }, { "epoch": 1.797985747099358, "grad_norm": 0.73828125, "learning_rate": 1.2936845445890194e-05, "loss": 1.3208, "step": 5724 }, { "epoch": 1.7986139741248994, "grad_norm": 0.77734375, "learning_rate": 1.2934306569343067e-05, "loss": 1.2101, "step": 5726 }, { "epoch": 1.7992422011504408, "grad_norm": 0.84375, "learning_rate": 1.2931767692795937e-05, "loss": 1.2136, "step": 5728 }, { "epoch": 1.799870428175982, "grad_norm": 0.81640625, "learning_rate": 1.2929228816248812e-05, "loss": 1.3315, "step": 5730 }, { "epoch": 1.8004986552015234, "grad_norm": 0.82421875, "learning_rate": 1.2926689939701682e-05, "loss": 1.1851, "step": 5732 }, { "epoch": 1.8011268822270647, "grad_norm": 0.83984375, "learning_rate": 1.2924151063154555e-05, "loss": 1.1484, "step": 5734 }, { "epoch": 1.801755109252606, "grad_norm": 0.78515625, "learning_rate": 1.2921612186607426e-05, "loss": 1.1852, "step": 5736 }, { "epoch": 1.8023833362781474, "grad_norm": 0.82421875, "learning_rate": 1.2919073310060299e-05, "loss": 1.2899, "step": 5738 }, { "epoch": 1.8030115633036887, "grad_norm": 0.84375, "learning_rate": 1.291653443351317e-05, "loss": 1.2624, "step": 5740 }, { "epoch": 1.8036397903292303, "grad_norm": 0.73828125, "learning_rate": 1.2913995556966044e-05, "loss": 1.1763, "step": 5742 }, { "epoch": 1.8042680173547716, "grad_norm": 0.828125, "learning_rate": 1.2911456680418917e-05, "loss": 1.384, "step": 5744 }, { "epoch": 1.804896244380313, "grad_norm": 0.83203125, "learning_rate": 1.2908917803871788e-05, "loss": 1.241, "step": 5746 }, { "epoch": 1.8055244714058543, "grad_norm": 0.83203125, "learning_rate": 1.2906378927324661e-05, "loss": 1.3487, "step": 5748 }, { "epoch": 1.8061526984313958, "grad_norm": 0.76953125, "learning_rate": 1.2903840050777532e-05, "loss": 1.2097, "step": 5750 }, { "epoch": 1.8067809254569371, "grad_norm": 0.72265625, "learning_rate": 1.2901301174230406e-05, "loss": 1.2788, "step": 5752 }, { "epoch": 1.8074091524824785, "grad_norm": 0.7578125, "learning_rate": 1.2898762297683275e-05, "loss": 1.2052, "step": 5754 }, { "epoch": 1.8080373795080198, "grad_norm": 0.765625, "learning_rate": 1.289622342113615e-05, "loss": 1.3531, "step": 5756 }, { "epoch": 1.8086656065335611, "grad_norm": 0.83984375, "learning_rate": 1.289368454458902e-05, "loss": 1.2512, "step": 5758 }, { "epoch": 1.8092938335591024, "grad_norm": 0.765625, "learning_rate": 1.2891145668041893e-05, "loss": 1.2617, "step": 5760 }, { "epoch": 1.8099220605846438, "grad_norm": 0.80078125, "learning_rate": 1.2888606791494764e-05, "loss": 1.2172, "step": 5762 }, { "epoch": 1.810550287610185, "grad_norm": 0.80859375, "learning_rate": 1.2886067914947637e-05, "loss": 1.3569, "step": 5764 }, { "epoch": 1.8111785146357264, "grad_norm": 0.8828125, "learning_rate": 1.2883529038400509e-05, "loss": 1.1964, "step": 5766 }, { "epoch": 1.8118067416612678, "grad_norm": 0.85546875, "learning_rate": 1.2880990161853382e-05, "loss": 1.1786, "step": 5768 }, { "epoch": 1.812434968686809, "grad_norm": 0.7578125, "learning_rate": 1.2878451285306253e-05, "loss": 1.2148, "step": 5770 }, { "epoch": 1.8130631957123504, "grad_norm": 0.90234375, "learning_rate": 1.2875912408759126e-05, "loss": 1.1685, "step": 5772 }, { "epoch": 1.8136914227378917, "grad_norm": 0.8046875, "learning_rate": 1.2873373532211998e-05, "loss": 1.1826, "step": 5774 }, { "epoch": 1.8143196497634333, "grad_norm": 0.8828125, "learning_rate": 1.287083465566487e-05, "loss": 1.2594, "step": 5776 }, { "epoch": 1.8149478767889746, "grad_norm": 0.828125, "learning_rate": 1.286829577911774e-05, "loss": 1.209, "step": 5778 }, { "epoch": 1.815576103814516, "grad_norm": 0.83984375, "learning_rate": 1.2865756902570613e-05, "loss": 1.2065, "step": 5780 }, { "epoch": 1.8162043308400573, "grad_norm": 0.78125, "learning_rate": 1.2863218026023485e-05, "loss": 1.1886, "step": 5782 }, { "epoch": 1.8168325578655988, "grad_norm": 0.79296875, "learning_rate": 1.2860679149476358e-05, "loss": 1.2196, "step": 5784 }, { "epoch": 1.8174607848911402, "grad_norm": 0.92578125, "learning_rate": 1.285814027292923e-05, "loss": 1.1996, "step": 5786 }, { "epoch": 1.8180890119166815, "grad_norm": 0.7578125, "learning_rate": 1.2855601396382102e-05, "loss": 1.1933, "step": 5788 }, { "epoch": 1.8187172389422228, "grad_norm": 0.80859375, "learning_rate": 1.2853062519834974e-05, "loss": 1.2438, "step": 5790 }, { "epoch": 1.8193454659677641, "grad_norm": 0.8671875, "learning_rate": 1.2850523643287847e-05, "loss": 1.2136, "step": 5792 }, { "epoch": 1.8199736929933055, "grad_norm": 0.78125, "learning_rate": 1.2847984766740718e-05, "loss": 1.3761, "step": 5794 }, { "epoch": 1.8206019200188468, "grad_norm": 0.75390625, "learning_rate": 1.2845445890193591e-05, "loss": 1.1976, "step": 5796 }, { "epoch": 1.8212301470443881, "grad_norm": 0.78125, "learning_rate": 1.2842907013646463e-05, "loss": 1.3462, "step": 5798 }, { "epoch": 1.8218583740699295, "grad_norm": 0.875, "learning_rate": 1.2840368137099336e-05, "loss": 1.2293, "step": 5800 }, { "epoch": 1.8224866010954708, "grad_norm": 0.92578125, "learning_rate": 1.2837829260552205e-05, "loss": 1.2233, "step": 5802 }, { "epoch": 1.823114828121012, "grad_norm": 0.78125, "learning_rate": 1.2835290384005078e-05, "loss": 1.2824, "step": 5804 }, { "epoch": 1.8237430551465534, "grad_norm": 0.75390625, "learning_rate": 1.283275150745795e-05, "loss": 1.2794, "step": 5806 }, { "epoch": 1.824371282172095, "grad_norm": 0.78515625, "learning_rate": 1.2830212630910823e-05, "loss": 1.1825, "step": 5808 }, { "epoch": 1.8249995091976363, "grad_norm": 0.9296875, "learning_rate": 1.2827673754363694e-05, "loss": 1.1168, "step": 5810 }, { "epoch": 1.8256277362231776, "grad_norm": 0.83984375, "learning_rate": 1.2825134877816567e-05, "loss": 1.2021, "step": 5812 }, { "epoch": 1.826255963248719, "grad_norm": 0.81640625, "learning_rate": 1.2822596001269439e-05, "loss": 1.3024, "step": 5814 }, { "epoch": 1.8268841902742605, "grad_norm": 0.76953125, "learning_rate": 1.2820057124722312e-05, "loss": 1.2621, "step": 5816 }, { "epoch": 1.8275124172998019, "grad_norm": 0.80078125, "learning_rate": 1.2817518248175183e-05, "loss": 1.0934, "step": 5818 }, { "epoch": 1.8281406443253432, "grad_norm": 0.7890625, "learning_rate": 1.2814979371628056e-05, "loss": 1.3914, "step": 5820 }, { "epoch": 1.8287688713508845, "grad_norm": 0.8828125, "learning_rate": 1.2812440495080926e-05, "loss": 1.1312, "step": 5822 }, { "epoch": 1.8293970983764258, "grad_norm": 0.83203125, "learning_rate": 1.28099016185338e-05, "loss": 1.1222, "step": 5824 }, { "epoch": 1.8300253254019672, "grad_norm": 0.73828125, "learning_rate": 1.280736274198667e-05, "loss": 1.2303, "step": 5826 }, { "epoch": 1.8306535524275085, "grad_norm": 0.86328125, "learning_rate": 1.2804823865439543e-05, "loss": 1.2426, "step": 5828 }, { "epoch": 1.8312817794530498, "grad_norm": 0.77734375, "learning_rate": 1.2802284988892417e-05, "loss": 1.1024, "step": 5830 }, { "epoch": 1.8319100064785911, "grad_norm": 0.8515625, "learning_rate": 1.2799746112345288e-05, "loss": 1.3141, "step": 5832 }, { "epoch": 1.8325382335041325, "grad_norm": 0.75390625, "learning_rate": 1.2797207235798161e-05, "loss": 1.1605, "step": 5834 }, { "epoch": 1.8331664605296738, "grad_norm": 0.81640625, "learning_rate": 1.2794668359251032e-05, "loss": 1.1718, "step": 5836 }, { "epoch": 1.8337946875552151, "grad_norm": 0.7421875, "learning_rate": 1.2792129482703905e-05, "loss": 1.1882, "step": 5838 }, { "epoch": 1.8344229145807565, "grad_norm": 0.80859375, "learning_rate": 1.2789590606156777e-05, "loss": 1.3604, "step": 5840 }, { "epoch": 1.835051141606298, "grad_norm": 0.79296875, "learning_rate": 1.278705172960965e-05, "loss": 1.1974, "step": 5842 }, { "epoch": 1.8356793686318393, "grad_norm": 0.8828125, "learning_rate": 1.2784512853062521e-05, "loss": 1.1672, "step": 5844 }, { "epoch": 1.8363075956573807, "grad_norm": 0.80859375, "learning_rate": 1.2781973976515394e-05, "loss": 1.2177, "step": 5846 }, { "epoch": 1.836935822682922, "grad_norm": 0.91796875, "learning_rate": 1.2779435099968264e-05, "loss": 1.2313, "step": 5848 }, { "epoch": 1.8375640497084635, "grad_norm": 0.76171875, "learning_rate": 1.2776896223421139e-05, "loss": 1.3003, "step": 5850 }, { "epoch": 1.8381922767340049, "grad_norm": 0.84375, "learning_rate": 1.2774357346874009e-05, "loss": 1.192, "step": 5852 }, { "epoch": 1.8388205037595462, "grad_norm": 0.96484375, "learning_rate": 1.2771818470326882e-05, "loss": 1.2617, "step": 5854 }, { "epoch": 1.8394487307850875, "grad_norm": 0.76171875, "learning_rate": 1.2769279593779753e-05, "loss": 1.0411, "step": 5856 }, { "epoch": 1.8400769578106289, "grad_norm": 0.76953125, "learning_rate": 1.2766740717232626e-05, "loss": 1.2904, "step": 5858 }, { "epoch": 1.8407051848361702, "grad_norm": 0.79296875, "learning_rate": 1.2764201840685497e-05, "loss": 1.3183, "step": 5860 }, { "epoch": 1.8413334118617115, "grad_norm": 0.83984375, "learning_rate": 1.276166296413837e-05, "loss": 1.3205, "step": 5862 }, { "epoch": 1.8419616388872528, "grad_norm": 0.828125, "learning_rate": 1.2759124087591242e-05, "loss": 1.2196, "step": 5864 }, { "epoch": 1.8425898659127942, "grad_norm": 0.78515625, "learning_rate": 1.2756585211044115e-05, "loss": 1.243, "step": 5866 }, { "epoch": 1.8432180929383355, "grad_norm": 0.82421875, "learning_rate": 1.2754046334496986e-05, "loss": 1.1317, "step": 5868 }, { "epoch": 1.8438463199638768, "grad_norm": 0.82421875, "learning_rate": 1.275150745794986e-05, "loss": 1.2339, "step": 5870 }, { "epoch": 1.8444745469894182, "grad_norm": 0.77734375, "learning_rate": 1.2748968581402729e-05, "loss": 1.1604, "step": 5872 }, { "epoch": 1.8451027740149597, "grad_norm": 0.79296875, "learning_rate": 1.2746429704855602e-05, "loss": 1.1184, "step": 5874 }, { "epoch": 1.845731001040501, "grad_norm": 0.78125, "learning_rate": 1.2743890828308474e-05, "loss": 1.2737, "step": 5876 }, { "epoch": 1.8463592280660424, "grad_norm": 0.78125, "learning_rate": 1.2741351951761347e-05, "loss": 1.2273, "step": 5878 }, { "epoch": 1.8469874550915837, "grad_norm": 0.79296875, "learning_rate": 1.2738813075214218e-05, "loss": 1.2768, "step": 5880 }, { "epoch": 1.8476156821171252, "grad_norm": 0.73828125, "learning_rate": 1.2736274198667091e-05, "loss": 1.3388, "step": 5882 }, { "epoch": 1.8482439091426666, "grad_norm": 0.85546875, "learning_rate": 1.2733735322119962e-05, "loss": 1.2175, "step": 5884 }, { "epoch": 1.848872136168208, "grad_norm": 0.75390625, "learning_rate": 1.2731196445572836e-05, "loss": 1.2659, "step": 5886 }, { "epoch": 1.8495003631937492, "grad_norm": 0.84765625, "learning_rate": 1.2728657569025707e-05, "loss": 1.2642, "step": 5888 }, { "epoch": 1.8501285902192905, "grad_norm": 0.8203125, "learning_rate": 1.272611869247858e-05, "loss": 1.0971, "step": 5890 }, { "epoch": 1.8507568172448319, "grad_norm": 0.74609375, "learning_rate": 1.272357981593145e-05, "loss": 1.3001, "step": 5892 }, { "epoch": 1.8513850442703732, "grad_norm": 0.6953125, "learning_rate": 1.2721040939384324e-05, "loss": 1.457, "step": 5894 }, { "epoch": 1.8520132712959145, "grad_norm": 0.72265625, "learning_rate": 1.2718502062837194e-05, "loss": 1.3597, "step": 5896 }, { "epoch": 1.8526414983214559, "grad_norm": 0.89453125, "learning_rate": 1.2715963186290067e-05, "loss": 1.2853, "step": 5898 }, { "epoch": 1.8532697253469972, "grad_norm": 0.7734375, "learning_rate": 1.2713424309742939e-05, "loss": 1.2468, "step": 5900 }, { "epoch": 1.8538979523725385, "grad_norm": 0.76953125, "learning_rate": 1.2710885433195812e-05, "loss": 1.3252, "step": 5902 }, { "epoch": 1.8545261793980798, "grad_norm": 0.7421875, "learning_rate": 1.2708346556648683e-05, "loss": 1.3121, "step": 5904 }, { "epoch": 1.8551544064236212, "grad_norm": 0.81640625, "learning_rate": 1.2705807680101556e-05, "loss": 1.1819, "step": 5906 }, { "epoch": 1.8557826334491627, "grad_norm": 0.8359375, "learning_rate": 1.2703268803554428e-05, "loss": 1.2418, "step": 5908 }, { "epoch": 1.856410860474704, "grad_norm": 0.71875, "learning_rate": 1.27007299270073e-05, "loss": 1.3177, "step": 5910 }, { "epoch": 1.8570390875002454, "grad_norm": 0.78515625, "learning_rate": 1.2698191050460172e-05, "loss": 1.2514, "step": 5912 }, { "epoch": 1.8576673145257867, "grad_norm": 0.89453125, "learning_rate": 1.2695652173913045e-05, "loss": 1.2886, "step": 5914 }, { "epoch": 1.8582955415513283, "grad_norm": 0.82421875, "learning_rate": 1.2693113297365918e-05, "loss": 1.3111, "step": 5916 }, { "epoch": 1.8589237685768696, "grad_norm": 0.90625, "learning_rate": 1.2690574420818788e-05, "loss": 1.1563, "step": 5918 }, { "epoch": 1.859551995602411, "grad_norm": 2.40625, "learning_rate": 1.2688035544271663e-05, "loss": 1.2238, "step": 5920 }, { "epoch": 1.8601802226279522, "grad_norm": 0.8515625, "learning_rate": 1.2685496667724532e-05, "loss": 1.2487, "step": 5922 }, { "epoch": 1.8608084496534936, "grad_norm": 0.80859375, "learning_rate": 1.2682957791177405e-05, "loss": 1.2178, "step": 5924 }, { "epoch": 1.861436676679035, "grad_norm": 0.8125, "learning_rate": 1.2680418914630277e-05, "loss": 1.1426, "step": 5926 }, { "epoch": 1.8620649037045762, "grad_norm": 0.80859375, "learning_rate": 1.267788003808315e-05, "loss": 1.2623, "step": 5928 }, { "epoch": 1.8626931307301176, "grad_norm": 0.87109375, "learning_rate": 1.2675341161536021e-05, "loss": 1.0865, "step": 5930 }, { "epoch": 1.8633213577556589, "grad_norm": 0.85546875, "learning_rate": 1.2672802284988894e-05, "loss": 1.1582, "step": 5932 }, { "epoch": 1.8639495847812002, "grad_norm": 0.84765625, "learning_rate": 1.2670263408441766e-05, "loss": 0.9822, "step": 5934 }, { "epoch": 1.8645778118067415, "grad_norm": 1.0625, "learning_rate": 1.2667724531894639e-05, "loss": 1.1975, "step": 5936 }, { "epoch": 1.8652060388322829, "grad_norm": 0.87109375, "learning_rate": 1.266518565534751e-05, "loss": 1.1873, "step": 5938 }, { "epoch": 1.8658342658578244, "grad_norm": 0.8125, "learning_rate": 1.2662646778800383e-05, "loss": 1.2006, "step": 5940 }, { "epoch": 1.8664624928833657, "grad_norm": 0.78515625, "learning_rate": 1.2660107902253253e-05, "loss": 1.2259, "step": 5942 }, { "epoch": 1.867090719908907, "grad_norm": 0.8359375, "learning_rate": 1.2657569025706126e-05, "loss": 1.1884, "step": 5944 }, { "epoch": 1.8677189469344484, "grad_norm": 0.8671875, "learning_rate": 1.2655030149158997e-05, "loss": 1.1981, "step": 5946 }, { "epoch": 1.86834717395999, "grad_norm": 0.77734375, "learning_rate": 1.265249127261187e-05, "loss": 1.1178, "step": 5948 }, { "epoch": 1.8689754009855313, "grad_norm": 0.7578125, "learning_rate": 1.2649952396064742e-05, "loss": 1.2778, "step": 5950 }, { "epoch": 1.8696036280110726, "grad_norm": 0.78125, "learning_rate": 1.2647413519517615e-05, "loss": 1.171, "step": 5952 }, { "epoch": 1.870231855036614, "grad_norm": 0.7890625, "learning_rate": 1.2644874642970486e-05, "loss": 1.2154, "step": 5954 }, { "epoch": 1.8708600820621553, "grad_norm": 0.79296875, "learning_rate": 1.264233576642336e-05, "loss": 1.2481, "step": 5956 }, { "epoch": 1.8714883090876966, "grad_norm": 0.84375, "learning_rate": 1.263979688987623e-05, "loss": 1.2464, "step": 5958 }, { "epoch": 1.872116536113238, "grad_norm": 0.76171875, "learning_rate": 1.2637258013329104e-05, "loss": 1.1656, "step": 5960 }, { "epoch": 1.8727447631387792, "grad_norm": 0.73046875, "learning_rate": 1.2634719136781975e-05, "loss": 1.3967, "step": 5962 }, { "epoch": 1.8733729901643206, "grad_norm": 0.81640625, "learning_rate": 1.2632180260234848e-05, "loss": 1.295, "step": 5964 }, { "epoch": 1.874001217189862, "grad_norm": 0.71875, "learning_rate": 1.2629641383687718e-05, "loss": 1.1594, "step": 5966 }, { "epoch": 1.8746294442154032, "grad_norm": 0.86328125, "learning_rate": 1.2627102507140591e-05, "loss": 1.1973, "step": 5968 }, { "epoch": 1.8752576712409446, "grad_norm": 0.9765625, "learning_rate": 1.2624563630593462e-05, "loss": 1.3587, "step": 5970 }, { "epoch": 1.8758858982664859, "grad_norm": 0.80078125, "learning_rate": 1.2622024754046335e-05, "loss": 1.3327, "step": 5972 }, { "epoch": 1.8765141252920274, "grad_norm": 0.8515625, "learning_rate": 1.2619485877499207e-05, "loss": 1.1387, "step": 5974 }, { "epoch": 1.8771423523175688, "grad_norm": 0.9140625, "learning_rate": 1.261694700095208e-05, "loss": 1.1447, "step": 5976 }, { "epoch": 1.87777057934311, "grad_norm": 0.75, "learning_rate": 1.2614408124404951e-05, "loss": 1.1872, "step": 5978 }, { "epoch": 1.8783988063686514, "grad_norm": 0.7890625, "learning_rate": 1.2611869247857824e-05, "loss": 1.2919, "step": 5980 }, { "epoch": 1.879027033394193, "grad_norm": 0.78125, "learning_rate": 1.2609330371310696e-05, "loss": 1.3053, "step": 5982 }, { "epoch": 1.8796552604197343, "grad_norm": 0.8125, "learning_rate": 1.2606791494763569e-05, "loss": 1.1493, "step": 5984 }, { "epoch": 1.8802834874452756, "grad_norm": 0.765625, "learning_rate": 1.2604252618216439e-05, "loss": 1.2792, "step": 5986 }, { "epoch": 1.880911714470817, "grad_norm": 0.765625, "learning_rate": 1.2601713741669313e-05, "loss": 1.3208, "step": 5988 }, { "epoch": 1.8815399414963583, "grad_norm": 0.80859375, "learning_rate": 1.2599174865122183e-05, "loss": 1.1665, "step": 5990 }, { "epoch": 1.8821681685218996, "grad_norm": 0.73828125, "learning_rate": 1.2596635988575056e-05, "loss": 1.2419, "step": 5992 }, { "epoch": 1.882796395547441, "grad_norm": 0.83203125, "learning_rate": 1.2594097112027927e-05, "loss": 1.4174, "step": 5994 }, { "epoch": 1.8834246225729823, "grad_norm": 0.8984375, "learning_rate": 1.25915582354808e-05, "loss": 1.2457, "step": 5996 }, { "epoch": 1.8840528495985236, "grad_norm": 0.80859375, "learning_rate": 1.2589019358933672e-05, "loss": 1.2441, "step": 5998 }, { "epoch": 1.884681076624065, "grad_norm": 0.8046875, "learning_rate": 1.2586480482386545e-05, "loss": 1.2268, "step": 6000 }, { "epoch": 1.8853093036496062, "grad_norm": 0.8828125, "learning_rate": 1.2583941605839418e-05, "loss": 1.117, "step": 6002 }, { "epoch": 1.8859375306751476, "grad_norm": 0.76953125, "learning_rate": 1.258140272929229e-05, "loss": 1.2595, "step": 6004 }, { "epoch": 1.8865657577006891, "grad_norm": 1.0078125, "learning_rate": 1.2578863852745163e-05, "loss": 1.2623, "step": 6006 }, { "epoch": 1.8871939847262305, "grad_norm": 0.8671875, "learning_rate": 1.2576324976198034e-05, "loss": 1.2805, "step": 6008 }, { "epoch": 1.8878222117517718, "grad_norm": 0.8359375, "learning_rate": 1.2573786099650907e-05, "loss": 1.1744, "step": 6010 }, { "epoch": 1.8884504387773131, "grad_norm": 0.875, "learning_rate": 1.2571247223103777e-05, "loss": 1.3327, "step": 6012 }, { "epoch": 1.8890786658028547, "grad_norm": 0.73046875, "learning_rate": 1.2568708346556651e-05, "loss": 1.1917, "step": 6014 }, { "epoch": 1.889706892828396, "grad_norm": 0.75390625, "learning_rate": 1.2566169470009521e-05, "loss": 1.4492, "step": 6016 }, { "epoch": 1.8903351198539373, "grad_norm": 0.8515625, "learning_rate": 1.2563630593462394e-05, "loss": 1.1507, "step": 6018 }, { "epoch": 1.8909633468794786, "grad_norm": 0.80859375, "learning_rate": 1.2561091716915266e-05, "loss": 1.2665, "step": 6020 }, { "epoch": 1.89159157390502, "grad_norm": 0.8671875, "learning_rate": 1.2558552840368139e-05, "loss": 1.2222, "step": 6022 }, { "epoch": 1.8922198009305613, "grad_norm": 0.81640625, "learning_rate": 1.255601396382101e-05, "loss": 1.2067, "step": 6024 }, { "epoch": 1.8928480279561026, "grad_norm": 0.82421875, "learning_rate": 1.2553475087273883e-05, "loss": 1.1897, "step": 6026 }, { "epoch": 1.893476254981644, "grad_norm": 0.9375, "learning_rate": 1.2550936210726755e-05, "loss": 1.1622, "step": 6028 }, { "epoch": 1.8941044820071853, "grad_norm": 0.91796875, "learning_rate": 1.2548397334179628e-05, "loss": 1.3068, "step": 6030 }, { "epoch": 1.8947327090327266, "grad_norm": 0.8984375, "learning_rate": 1.2545858457632499e-05, "loss": 1.2051, "step": 6032 }, { "epoch": 1.895360936058268, "grad_norm": 0.76171875, "learning_rate": 1.2543319581085372e-05, "loss": 1.1956, "step": 6034 }, { "epoch": 1.8959891630838093, "grad_norm": 0.8125, "learning_rate": 1.2540780704538242e-05, "loss": 1.3253, "step": 6036 }, { "epoch": 1.8966173901093508, "grad_norm": 0.83203125, "learning_rate": 1.2538241827991115e-05, "loss": 1.2558, "step": 6038 }, { "epoch": 1.8972456171348921, "grad_norm": 0.80078125, "learning_rate": 1.2535702951443986e-05, "loss": 1.2556, "step": 6040 }, { "epoch": 1.8978738441604335, "grad_norm": 0.8125, "learning_rate": 1.253316407489686e-05, "loss": 1.1596, "step": 6042 }, { "epoch": 1.8985020711859748, "grad_norm": 0.80859375, "learning_rate": 1.253062519834973e-05, "loss": 1.2663, "step": 6044 }, { "epoch": 1.8991302982115161, "grad_norm": 0.81640625, "learning_rate": 1.2528086321802604e-05, "loss": 1.2155, "step": 6046 }, { "epoch": 1.8997585252370577, "grad_norm": 0.75390625, "learning_rate": 1.2525547445255475e-05, "loss": 1.272, "step": 6048 }, { "epoch": 1.900386752262599, "grad_norm": 0.78125, "learning_rate": 1.2523008568708348e-05, "loss": 1.2012, "step": 6050 }, { "epoch": 1.9010149792881403, "grad_norm": 0.76171875, "learning_rate": 1.252046969216122e-05, "loss": 1.2281, "step": 6052 }, { "epoch": 1.9016432063136817, "grad_norm": 0.81640625, "learning_rate": 1.2517930815614093e-05, "loss": 1.2045, "step": 6054 }, { "epoch": 1.902271433339223, "grad_norm": 0.79296875, "learning_rate": 1.2515391939066962e-05, "loss": 1.2902, "step": 6056 }, { "epoch": 1.9028996603647643, "grad_norm": 0.80078125, "learning_rate": 1.2512853062519837e-05, "loss": 1.2146, "step": 6058 }, { "epoch": 1.9035278873903057, "grad_norm": 0.83203125, "learning_rate": 1.2510314185972707e-05, "loss": 1.1062, "step": 6060 }, { "epoch": 1.904156114415847, "grad_norm": 0.83984375, "learning_rate": 1.250777530942558e-05, "loss": 1.1466, "step": 6062 }, { "epoch": 1.9047843414413883, "grad_norm": 0.7890625, "learning_rate": 1.2505236432878451e-05, "loss": 1.2591, "step": 6064 }, { "epoch": 1.9054125684669296, "grad_norm": 0.8203125, "learning_rate": 1.2502697556331324e-05, "loss": 1.2363, "step": 6066 }, { "epoch": 1.906040795492471, "grad_norm": 0.7265625, "learning_rate": 1.2500158679784196e-05, "loss": 1.3046, "step": 6068 }, { "epoch": 1.9066690225180123, "grad_norm": 0.765625, "learning_rate": 1.2497619803237069e-05, "loss": 1.3882, "step": 6070 }, { "epoch": 1.9072972495435538, "grad_norm": 0.81640625, "learning_rate": 1.249508092668994e-05, "loss": 1.2277, "step": 6072 }, { "epoch": 1.9079254765690952, "grad_norm": 0.76171875, "learning_rate": 1.2492542050142813e-05, "loss": 1.2135, "step": 6074 }, { "epoch": 1.9085537035946365, "grad_norm": 0.80859375, "learning_rate": 1.2490003173595685e-05, "loss": 1.171, "step": 6076 }, { "epoch": 1.9091819306201778, "grad_norm": 0.8359375, "learning_rate": 1.2487464297048558e-05, "loss": 1.2015, "step": 6078 }, { "epoch": 1.9098101576457194, "grad_norm": 0.78515625, "learning_rate": 1.2484925420501427e-05, "loss": 1.3251, "step": 6080 }, { "epoch": 1.9104383846712607, "grad_norm": 0.74609375, "learning_rate": 1.24823865439543e-05, "loss": 1.3621, "step": 6082 }, { "epoch": 1.911066611696802, "grad_norm": 0.71484375, "learning_rate": 1.2479847667407172e-05, "loss": 1.2972, "step": 6084 }, { "epoch": 1.9116948387223434, "grad_norm": 0.8671875, "learning_rate": 1.2477308790860045e-05, "loss": 1.1877, "step": 6086 }, { "epoch": 1.9123230657478847, "grad_norm": 0.78125, "learning_rate": 1.2474769914312918e-05, "loss": 1.221, "step": 6088 }, { "epoch": 1.912951292773426, "grad_norm": 0.85546875, "learning_rate": 1.247223103776579e-05, "loss": 1.3329, "step": 6090 }, { "epoch": 1.9135795197989673, "grad_norm": 0.84375, "learning_rate": 1.2469692161218662e-05, "loss": 1.2478, "step": 6092 }, { "epoch": 1.9142077468245087, "grad_norm": 0.859375, "learning_rate": 1.2467153284671534e-05, "loss": 1.2821, "step": 6094 }, { "epoch": 1.91483597385005, "grad_norm": 0.765625, "learning_rate": 1.2464614408124407e-05, "loss": 1.1848, "step": 6096 }, { "epoch": 1.9154642008755913, "grad_norm": 0.81640625, "learning_rate": 1.2462075531577278e-05, "loss": 1.2984, "step": 6098 }, { "epoch": 1.9160924279011327, "grad_norm": 0.69921875, "learning_rate": 1.2459536655030151e-05, "loss": 1.1486, "step": 6100 }, { "epoch": 1.916720654926674, "grad_norm": 0.74609375, "learning_rate": 1.2456997778483023e-05, "loss": 1.3557, "step": 6102 }, { "epoch": 1.9173488819522155, "grad_norm": 0.76953125, "learning_rate": 1.2454458901935896e-05, "loss": 1.2475, "step": 6104 }, { "epoch": 1.9179771089777569, "grad_norm": 0.83984375, "learning_rate": 1.2451920025388766e-05, "loss": 1.1533, "step": 6106 }, { "epoch": 1.9186053360032982, "grad_norm": 0.796875, "learning_rate": 1.2449381148841639e-05, "loss": 1.2381, "step": 6108 }, { "epoch": 1.9192335630288395, "grad_norm": 1.0234375, "learning_rate": 1.244684227229451e-05, "loss": 1.2304, "step": 6110 }, { "epoch": 1.919861790054381, "grad_norm": 0.75, "learning_rate": 1.2444303395747383e-05, "loss": 1.3272, "step": 6112 }, { "epoch": 1.9204900170799224, "grad_norm": 0.796875, "learning_rate": 1.2441764519200254e-05, "loss": 1.1725, "step": 6114 }, { "epoch": 1.9211182441054637, "grad_norm": 0.953125, "learning_rate": 1.2439225642653128e-05, "loss": 1.0932, "step": 6116 }, { "epoch": 1.921746471131005, "grad_norm": 0.75390625, "learning_rate": 1.2436686766105999e-05, "loss": 1.2361, "step": 6118 }, { "epoch": 1.9223746981565464, "grad_norm": 0.7421875, "learning_rate": 1.2434147889558872e-05, "loss": 1.3047, "step": 6120 }, { "epoch": 1.9230029251820877, "grad_norm": 0.86328125, "learning_rate": 1.2431609013011743e-05, "loss": 1.3639, "step": 6122 }, { "epoch": 1.923631152207629, "grad_norm": 0.9140625, "learning_rate": 1.2429070136464616e-05, "loss": 1.1849, "step": 6124 }, { "epoch": 1.9242593792331704, "grad_norm": 0.77734375, "learning_rate": 1.2426531259917488e-05, "loss": 1.2506, "step": 6126 }, { "epoch": 1.9248876062587117, "grad_norm": 0.79296875, "learning_rate": 1.2423992383370361e-05, "loss": 1.2748, "step": 6128 }, { "epoch": 1.925515833284253, "grad_norm": 0.921875, "learning_rate": 1.242145350682323e-05, "loss": 1.3513, "step": 6130 }, { "epoch": 1.9261440603097943, "grad_norm": 0.75, "learning_rate": 1.2418914630276104e-05, "loss": 1.3052, "step": 6132 }, { "epoch": 1.9267722873353357, "grad_norm": 0.796875, "learning_rate": 1.2416375753728975e-05, "loss": 1.1691, "step": 6134 }, { "epoch": 1.927400514360877, "grad_norm": 0.74609375, "learning_rate": 1.2413836877181848e-05, "loss": 1.1503, "step": 6136 }, { "epoch": 1.9280287413864186, "grad_norm": 0.85546875, "learning_rate": 1.241129800063472e-05, "loss": 1.218, "step": 6138 }, { "epoch": 1.9286569684119599, "grad_norm": 0.93359375, "learning_rate": 1.2408759124087593e-05, "loss": 1.1987, "step": 6140 }, { "epoch": 1.9292851954375012, "grad_norm": 0.76171875, "learning_rate": 1.2406220247540464e-05, "loss": 1.1148, "step": 6142 }, { "epoch": 1.9299134224630425, "grad_norm": 0.80859375, "learning_rate": 1.2403681370993337e-05, "loss": 1.3232, "step": 6144 }, { "epoch": 1.930541649488584, "grad_norm": 0.7734375, "learning_rate": 1.2401142494446208e-05, "loss": 1.2638, "step": 6146 }, { "epoch": 1.9311698765141254, "grad_norm": 0.8359375, "learning_rate": 1.2398603617899081e-05, "loss": 1.282, "step": 6148 }, { "epoch": 1.9317981035396667, "grad_norm": 0.80859375, "learning_rate": 1.2396064741351951e-05, "loss": 1.292, "step": 6150 }, { "epoch": 1.932426330565208, "grad_norm": 0.796875, "learning_rate": 1.2393525864804826e-05, "loss": 1.3647, "step": 6152 }, { "epoch": 1.9330545575907494, "grad_norm": 0.828125, "learning_rate": 1.2390986988257696e-05, "loss": 1.1766, "step": 6154 }, { "epoch": 1.9336827846162907, "grad_norm": 0.7578125, "learning_rate": 1.2388448111710569e-05, "loss": 1.2779, "step": 6156 }, { "epoch": 1.934311011641832, "grad_norm": 0.8984375, "learning_rate": 1.238590923516344e-05, "loss": 1.2979, "step": 6158 }, { "epoch": 1.9349392386673734, "grad_norm": 0.7890625, "learning_rate": 1.2383370358616313e-05, "loss": 1.3134, "step": 6160 }, { "epoch": 1.9355674656929147, "grad_norm": 0.96875, "learning_rate": 1.2380831482069185e-05, "loss": 1.1554, "step": 6162 }, { "epoch": 1.936195692718456, "grad_norm": 0.8046875, "learning_rate": 1.2378292605522058e-05, "loss": 1.3649, "step": 6164 }, { "epoch": 1.9368239197439974, "grad_norm": 0.76953125, "learning_rate": 1.2375753728974929e-05, "loss": 1.2077, "step": 6166 }, { "epoch": 1.9374521467695387, "grad_norm": 0.84765625, "learning_rate": 1.2373214852427802e-05, "loss": 1.288, "step": 6168 }, { "epoch": 1.9380803737950802, "grad_norm": 0.85546875, "learning_rate": 1.2370675975880673e-05, "loss": 1.3333, "step": 6170 }, { "epoch": 1.9387086008206216, "grad_norm": 0.84375, "learning_rate": 1.2368137099333547e-05, "loss": 1.345, "step": 6172 }, { "epoch": 1.939336827846163, "grad_norm": 0.83203125, "learning_rate": 1.236559822278642e-05, "loss": 1.3079, "step": 6174 }, { "epoch": 1.9399650548717042, "grad_norm": 0.80859375, "learning_rate": 1.236305934623929e-05, "loss": 1.2672, "step": 6176 }, { "epoch": 1.9405932818972458, "grad_norm": 0.75390625, "learning_rate": 1.2360520469692162e-05, "loss": 1.2107, "step": 6178 }, { "epoch": 1.941221508922787, "grad_norm": 0.76171875, "learning_rate": 1.2357981593145034e-05, "loss": 1.2125, "step": 6180 }, { "epoch": 1.9418497359483284, "grad_norm": 0.77734375, "learning_rate": 1.2355442716597907e-05, "loss": 1.2236, "step": 6182 }, { "epoch": 1.9424779629738698, "grad_norm": 0.8359375, "learning_rate": 1.2352903840050778e-05, "loss": 1.1823, "step": 6184 }, { "epoch": 1.943106189999411, "grad_norm": 0.9765625, "learning_rate": 1.2350364963503651e-05, "loss": 1.2182, "step": 6186 }, { "epoch": 1.9437344170249524, "grad_norm": 0.8125, "learning_rate": 1.2347826086956523e-05, "loss": 1.2996, "step": 6188 }, { "epoch": 1.9443626440504937, "grad_norm": 0.796875, "learning_rate": 1.2345287210409396e-05, "loss": 1.1519, "step": 6190 }, { "epoch": 1.944990871076035, "grad_norm": 0.7578125, "learning_rate": 1.2342748333862267e-05, "loss": 1.125, "step": 6192 }, { "epoch": 1.9456190981015764, "grad_norm": 0.8046875, "learning_rate": 1.234020945731514e-05, "loss": 1.1524, "step": 6194 }, { "epoch": 1.9462473251271177, "grad_norm": 0.765625, "learning_rate": 1.2337670580768012e-05, "loss": 1.2544, "step": 6196 }, { "epoch": 1.946875552152659, "grad_norm": 0.8125, "learning_rate": 1.2335131704220885e-05, "loss": 1.1555, "step": 6198 }, { "epoch": 1.9475037791782004, "grad_norm": 0.75390625, "learning_rate": 1.2332592827673754e-05, "loss": 1.2127, "step": 6200 }, { "epoch": 1.9481320062037417, "grad_norm": 0.984375, "learning_rate": 1.2330053951126627e-05, "loss": 1.2985, "step": 6202 }, { "epoch": 1.9487602332292833, "grad_norm": 0.83984375, "learning_rate": 1.2327515074579499e-05, "loss": 1.3043, "step": 6204 }, { "epoch": 1.9493884602548246, "grad_norm": 0.78515625, "learning_rate": 1.2324976198032372e-05, "loss": 1.2911, "step": 6206 }, { "epoch": 1.950016687280366, "grad_norm": 0.875, "learning_rate": 1.2322437321485243e-05, "loss": 1.2217, "step": 6208 }, { "epoch": 1.9506449143059073, "grad_norm": 0.7734375, "learning_rate": 1.2319898444938116e-05, "loss": 1.2718, "step": 6210 }, { "epoch": 1.9512731413314488, "grad_norm": 0.79296875, "learning_rate": 1.2317359568390988e-05, "loss": 1.3247, "step": 6212 }, { "epoch": 1.9519013683569901, "grad_norm": 0.7890625, "learning_rate": 1.231482069184386e-05, "loss": 1.31, "step": 6214 }, { "epoch": 1.9525295953825315, "grad_norm": 0.7265625, "learning_rate": 1.2312281815296732e-05, "loss": 1.2022, "step": 6216 }, { "epoch": 1.9531578224080728, "grad_norm": 0.78515625, "learning_rate": 1.2309742938749605e-05, "loss": 1.1422, "step": 6218 }, { "epoch": 1.9537860494336141, "grad_norm": 0.78125, "learning_rate": 1.2307204062202475e-05, "loss": 1.1487, "step": 6220 }, { "epoch": 1.9544142764591554, "grad_norm": 0.9609375, "learning_rate": 1.230466518565535e-05, "loss": 1.2259, "step": 6222 }, { "epoch": 1.9550425034846968, "grad_norm": 0.859375, "learning_rate": 1.230212630910822e-05, "loss": 1.1845, "step": 6224 }, { "epoch": 1.955670730510238, "grad_norm": 0.76953125, "learning_rate": 1.2299587432561093e-05, "loss": 1.2533, "step": 6226 }, { "epoch": 1.9562989575357794, "grad_norm": 0.78515625, "learning_rate": 1.2297048556013964e-05, "loss": 1.2527, "step": 6228 }, { "epoch": 1.9569271845613208, "grad_norm": 0.78125, "learning_rate": 1.2294509679466837e-05, "loss": 1.2083, "step": 6230 }, { "epoch": 1.957555411586862, "grad_norm": 0.75, "learning_rate": 1.2291970802919708e-05, "loss": 1.2065, "step": 6232 }, { "epoch": 1.9581836386124034, "grad_norm": 0.7578125, "learning_rate": 1.2289431926372581e-05, "loss": 1.2186, "step": 6234 }, { "epoch": 1.958811865637945, "grad_norm": 0.78125, "learning_rate": 1.2286893049825453e-05, "loss": 1.2687, "step": 6236 }, { "epoch": 1.9594400926634863, "grad_norm": 0.76171875, "learning_rate": 1.2284354173278326e-05, "loss": 1.2311, "step": 6238 }, { "epoch": 1.9600683196890276, "grad_norm": 0.87890625, "learning_rate": 1.2281815296731197e-05, "loss": 1.1847, "step": 6240 }, { "epoch": 1.960696546714569, "grad_norm": 0.7578125, "learning_rate": 1.227927642018407e-05, "loss": 1.2003, "step": 6242 }, { "epoch": 1.9613247737401105, "grad_norm": 0.7890625, "learning_rate": 1.227673754363694e-05, "loss": 1.4039, "step": 6244 }, { "epoch": 1.9619530007656518, "grad_norm": 0.828125, "learning_rate": 1.2274198667089813e-05, "loss": 1.2274, "step": 6246 }, { "epoch": 1.9625812277911932, "grad_norm": 0.82421875, "learning_rate": 1.2271659790542684e-05, "loss": 1.2743, "step": 6248 }, { "epoch": 1.9632094548167345, "grad_norm": 0.734375, "learning_rate": 1.2269120913995558e-05, "loss": 1.414, "step": 6250 }, { "epoch": 1.9638376818422758, "grad_norm": 0.90625, "learning_rate": 1.2266582037448429e-05, "loss": 1.2246, "step": 6252 }, { "epoch": 1.9644659088678171, "grad_norm": 0.73046875, "learning_rate": 1.2264043160901302e-05, "loss": 1.2005, "step": 6254 }, { "epoch": 1.9650941358933585, "grad_norm": 0.91015625, "learning_rate": 1.2261504284354175e-05, "loss": 1.2002, "step": 6256 }, { "epoch": 1.9657223629188998, "grad_norm": 0.7421875, "learning_rate": 1.2258965407807046e-05, "loss": 1.2792, "step": 6258 }, { "epoch": 1.9663505899444411, "grad_norm": 0.796875, "learning_rate": 1.225642653125992e-05, "loss": 1.0757, "step": 6260 }, { "epoch": 1.9669788169699824, "grad_norm": 0.7890625, "learning_rate": 1.2253887654712791e-05, "loss": 1.3089, "step": 6262 }, { "epoch": 1.9676070439955238, "grad_norm": 0.828125, "learning_rate": 1.2251348778165664e-05, "loss": 1.2696, "step": 6264 }, { "epoch": 1.968235271021065, "grad_norm": 0.7890625, "learning_rate": 1.2248809901618535e-05, "loss": 1.2951, "step": 6266 }, { "epoch": 1.9688634980466064, "grad_norm": 0.74609375, "learning_rate": 1.2246271025071408e-05, "loss": 1.232, "step": 6268 }, { "epoch": 1.969491725072148, "grad_norm": 0.76171875, "learning_rate": 1.2243732148524278e-05, "loss": 1.3751, "step": 6270 }, { "epoch": 1.9701199520976893, "grad_norm": 0.83203125, "learning_rate": 1.2241193271977151e-05, "loss": 1.3549, "step": 6272 }, { "epoch": 1.9707481791232306, "grad_norm": 1.046875, "learning_rate": 1.2238654395430023e-05, "loss": 1.2652, "step": 6274 }, { "epoch": 1.971376406148772, "grad_norm": 0.80859375, "learning_rate": 1.2236115518882896e-05, "loss": 1.2687, "step": 6276 }, { "epoch": 1.9720046331743135, "grad_norm": 0.7890625, "learning_rate": 1.2233576642335767e-05, "loss": 1.4137, "step": 6278 }, { "epoch": 1.9726328601998548, "grad_norm": 0.828125, "learning_rate": 1.223103776578864e-05, "loss": 1.1847, "step": 6280 }, { "epoch": 1.9732610872253962, "grad_norm": 0.78125, "learning_rate": 1.2228498889241512e-05, "loss": 1.3383, "step": 6282 }, { "epoch": 1.9738893142509375, "grad_norm": 0.8125, "learning_rate": 1.2225960012694385e-05, "loss": 1.2893, "step": 6284 }, { "epoch": 1.9745175412764788, "grad_norm": 0.765625, "learning_rate": 1.2223421136147256e-05, "loss": 1.2479, "step": 6286 }, { "epoch": 1.9751457683020202, "grad_norm": 0.8359375, "learning_rate": 1.2220882259600129e-05, "loss": 1.2674, "step": 6288 }, { "epoch": 1.9757739953275615, "grad_norm": 0.71484375, "learning_rate": 1.2218343383053e-05, "loss": 1.332, "step": 6290 }, { "epoch": 1.9764022223531028, "grad_norm": 0.8203125, "learning_rate": 1.2215804506505874e-05, "loss": 1.3086, "step": 6292 }, { "epoch": 1.9770304493786441, "grad_norm": 0.74609375, "learning_rate": 1.2213265629958743e-05, "loss": 1.3607, "step": 6294 }, { "epoch": 1.9776586764041855, "grad_norm": 0.78515625, "learning_rate": 1.2210726753411616e-05, "loss": 1.2653, "step": 6296 }, { "epoch": 1.9782869034297268, "grad_norm": 0.77734375, "learning_rate": 1.2208187876864488e-05, "loss": 1.3309, "step": 6298 }, { "epoch": 1.9789151304552681, "grad_norm": 0.80859375, "learning_rate": 1.220564900031736e-05, "loss": 1.3084, "step": 6300 }, { "epoch": 1.9795433574808097, "grad_norm": 0.82421875, "learning_rate": 1.2203110123770232e-05, "loss": 1.2319, "step": 6302 }, { "epoch": 1.980171584506351, "grad_norm": 0.76171875, "learning_rate": 1.2200571247223105e-05, "loss": 1.193, "step": 6304 }, { "epoch": 1.9807998115318923, "grad_norm": 0.7890625, "learning_rate": 1.2198032370675977e-05, "loss": 1.1819, "step": 6306 }, { "epoch": 1.9814280385574337, "grad_norm": 0.75390625, "learning_rate": 1.219549349412885e-05, "loss": 1.2694, "step": 6308 }, { "epoch": 1.9820562655829752, "grad_norm": 0.796875, "learning_rate": 1.2192954617581721e-05, "loss": 1.2051, "step": 6310 }, { "epoch": 1.9826844926085165, "grad_norm": 0.8125, "learning_rate": 1.2190415741034594e-05, "loss": 1.1432, "step": 6312 }, { "epoch": 1.9833127196340579, "grad_norm": 0.859375, "learning_rate": 1.2187876864487464e-05, "loss": 1.1966, "step": 6314 }, { "epoch": 1.9839409466595992, "grad_norm": 0.73046875, "learning_rate": 1.2185337987940337e-05, "loss": 1.1952, "step": 6316 }, { "epoch": 1.9845691736851405, "grad_norm": 0.796875, "learning_rate": 1.2182799111393208e-05, "loss": 1.2597, "step": 6318 }, { "epoch": 1.9851974007106818, "grad_norm": 1.2578125, "learning_rate": 1.2180260234846081e-05, "loss": 1.1049, "step": 6320 }, { "epoch": 1.9858256277362232, "grad_norm": 0.73828125, "learning_rate": 1.2177721358298953e-05, "loss": 1.2622, "step": 6322 }, { "epoch": 1.9864538547617645, "grad_norm": 0.75390625, "learning_rate": 1.2175182481751826e-05, "loss": 1.2554, "step": 6324 }, { "epoch": 1.9870820817873058, "grad_norm": 0.8046875, "learning_rate": 1.2172643605204697e-05, "loss": 1.4156, "step": 6326 }, { "epoch": 1.9877103088128472, "grad_norm": 0.78125, "learning_rate": 1.217010472865757e-05, "loss": 1.2517, "step": 6328 }, { "epoch": 1.9883385358383885, "grad_norm": 0.83203125, "learning_rate": 1.2167565852110442e-05, "loss": 1.3161, "step": 6330 }, { "epoch": 1.9889667628639298, "grad_norm": 0.828125, "learning_rate": 1.2165026975563315e-05, "loss": 1.2247, "step": 6332 }, { "epoch": 1.9895949898894711, "grad_norm": 0.8203125, "learning_rate": 1.2162488099016186e-05, "loss": 1.1215, "step": 6334 }, { "epoch": 1.9902232169150127, "grad_norm": 0.8046875, "learning_rate": 1.215994922246906e-05, "loss": 1.1817, "step": 6336 }, { "epoch": 1.990851443940554, "grad_norm": 0.8515625, "learning_rate": 1.2157410345921929e-05, "loss": 1.2248, "step": 6338 }, { "epoch": 1.9914796709660953, "grad_norm": 0.7734375, "learning_rate": 1.2154871469374802e-05, "loss": 1.3265, "step": 6340 }, { "epoch": 1.9921078979916367, "grad_norm": 0.81640625, "learning_rate": 1.2152332592827675e-05, "loss": 1.2027, "step": 6342 }, { "epoch": 1.9927361250171782, "grad_norm": 1.234375, "learning_rate": 1.2149793716280546e-05, "loss": 1.1223, "step": 6344 }, { "epoch": 1.9933643520427196, "grad_norm": 0.86328125, "learning_rate": 1.214725483973342e-05, "loss": 1.2041, "step": 6346 }, { "epoch": 1.9939925790682609, "grad_norm": 0.86328125, "learning_rate": 1.2144715963186291e-05, "loss": 1.2209, "step": 6348 }, { "epoch": 1.9946208060938022, "grad_norm": 0.83984375, "learning_rate": 1.2142177086639164e-05, "loss": 1.3876, "step": 6350 }, { "epoch": 1.9952490331193435, "grad_norm": 0.83203125, "learning_rate": 1.2139638210092035e-05, "loss": 1.341, "step": 6352 }, { "epoch": 1.9958772601448849, "grad_norm": 0.83203125, "learning_rate": 1.2137099333544908e-05, "loss": 1.2775, "step": 6354 }, { "epoch": 1.9965054871704262, "grad_norm": 0.8046875, "learning_rate": 1.213456045699778e-05, "loss": 1.2379, "step": 6356 }, { "epoch": 1.9971337141959675, "grad_norm": 0.85546875, "learning_rate": 1.2132021580450653e-05, "loss": 1.311, "step": 6358 }, { "epoch": 1.9977619412215089, "grad_norm": 0.84765625, "learning_rate": 1.2129482703903524e-05, "loss": 1.3089, "step": 6360 }, { "epoch": 1.9983901682470502, "grad_norm": 0.84765625, "learning_rate": 1.2126943827356397e-05, "loss": 1.1843, "step": 6362 }, { "epoch": 1.9990183952725915, "grad_norm": 0.7734375, "learning_rate": 1.2124404950809267e-05, "loss": 1.1485, "step": 6364 }, { "epoch": 1.9996466222981328, "grad_norm": 0.875, "learning_rate": 1.212186607426214e-05, "loss": 1.0792, "step": 6366 }, { "epoch": 2.000274849323674, "grad_norm": 0.7265625, "learning_rate": 1.2119327197715011e-05, "loss": 1.3043, "step": 6368 }, { "epoch": 2.0009030763492155, "grad_norm": 0.78515625, "learning_rate": 1.2116788321167885e-05, "loss": 1.1913, "step": 6370 }, { "epoch": 2.0015313033747573, "grad_norm": 0.8046875, "learning_rate": 1.2114249444620756e-05, "loss": 1.1807, "step": 6372 }, { "epoch": 2.0021595304002986, "grad_norm": 0.91015625, "learning_rate": 1.2111710568073629e-05, "loss": 1.1107, "step": 6374 }, { "epoch": 2.00278775742584, "grad_norm": 0.89453125, "learning_rate": 1.21091716915265e-05, "loss": 1.2526, "step": 6376 }, { "epoch": 2.0034159844513812, "grad_norm": 0.78125, "learning_rate": 1.2106632814979373e-05, "loss": 1.1686, "step": 6378 }, { "epoch": 2.0040442114769226, "grad_norm": 0.87890625, "learning_rate": 1.2104093938432245e-05, "loss": 1.0691, "step": 6380 }, { "epoch": 2.004672438502464, "grad_norm": 0.96875, "learning_rate": 1.2101555061885118e-05, "loss": 1.199, "step": 6382 }, { "epoch": 2.0053006655280052, "grad_norm": 0.875, "learning_rate": 1.2099016185337988e-05, "loss": 1.194, "step": 6384 }, { "epoch": 2.0059288925535466, "grad_norm": 1.0, "learning_rate": 1.2096477308790862e-05, "loss": 1.2757, "step": 6386 }, { "epoch": 2.006557119579088, "grad_norm": 0.75, "learning_rate": 1.2093938432243732e-05, "loss": 1.2772, "step": 6388 }, { "epoch": 2.007185346604629, "grad_norm": 0.87890625, "learning_rate": 1.2091399555696605e-05, "loss": 1.2131, "step": 6390 }, { "epoch": 2.0078135736301705, "grad_norm": 0.90625, "learning_rate": 1.2088860679149477e-05, "loss": 1.0315, "step": 6392 }, { "epoch": 2.008441800655712, "grad_norm": 0.8203125, "learning_rate": 1.208632180260235e-05, "loss": 1.207, "step": 6394 }, { "epoch": 2.009070027681253, "grad_norm": 0.90625, "learning_rate": 1.2083782926055221e-05, "loss": 1.1659, "step": 6396 }, { "epoch": 2.0096982547067945, "grad_norm": 0.859375, "learning_rate": 1.2081244049508094e-05, "loss": 1.1712, "step": 6398 }, { "epoch": 2.010326481732336, "grad_norm": 1.0390625, "learning_rate": 1.2078705172960965e-05, "loss": 1.235, "step": 6400 }, { "epoch": 2.010954708757877, "grad_norm": 0.9296875, "learning_rate": 1.2076166296413839e-05, "loss": 1.1337, "step": 6402 }, { "epoch": 2.0115829357834185, "grad_norm": 0.8046875, "learning_rate": 1.207362741986671e-05, "loss": 1.1306, "step": 6404 }, { "epoch": 2.0122111628089603, "grad_norm": 0.8125, "learning_rate": 1.2071088543319583e-05, "loss": 1.1945, "step": 6406 }, { "epoch": 2.0128393898345016, "grad_norm": 0.859375, "learning_rate": 1.2068549666772453e-05, "loss": 1.1251, "step": 6408 }, { "epoch": 2.013467616860043, "grad_norm": 0.8359375, "learning_rate": 1.2066010790225326e-05, "loss": 1.1349, "step": 6410 }, { "epoch": 2.0140958438855843, "grad_norm": 0.8984375, "learning_rate": 1.2063471913678197e-05, "loss": 1.0783, "step": 6412 }, { "epoch": 2.0147240709111256, "grad_norm": 0.9296875, "learning_rate": 1.206093303713107e-05, "loss": 1.0695, "step": 6414 }, { "epoch": 2.015352297936667, "grad_norm": 1.0390625, "learning_rate": 1.2058394160583942e-05, "loss": 1.1497, "step": 6416 }, { "epoch": 2.0159805249622083, "grad_norm": 0.8046875, "learning_rate": 1.2055855284036815e-05, "loss": 1.3358, "step": 6418 }, { "epoch": 2.0166087519877496, "grad_norm": 0.90625, "learning_rate": 1.2053316407489686e-05, "loss": 1.1467, "step": 6420 }, { "epoch": 2.017236979013291, "grad_norm": 0.80078125, "learning_rate": 1.2050777530942559e-05, "loss": 1.1837, "step": 6422 }, { "epoch": 2.0178652060388322, "grad_norm": 0.87109375, "learning_rate": 1.204823865439543e-05, "loss": 1.1541, "step": 6424 }, { "epoch": 2.0184934330643736, "grad_norm": 0.8828125, "learning_rate": 1.2045699777848304e-05, "loss": 1.2496, "step": 6426 }, { "epoch": 2.019121660089915, "grad_norm": 0.8203125, "learning_rate": 1.2043160901301177e-05, "loss": 1.1149, "step": 6428 }, { "epoch": 2.019749887115456, "grad_norm": 0.8359375, "learning_rate": 1.2040622024754048e-05, "loss": 1.236, "step": 6430 }, { "epoch": 2.0203781141409975, "grad_norm": 0.90625, "learning_rate": 1.2038083148206921e-05, "loss": 1.1907, "step": 6432 }, { "epoch": 2.021006341166539, "grad_norm": 0.80078125, "learning_rate": 1.203554427165979e-05, "loss": 1.1428, "step": 6434 }, { "epoch": 2.02163456819208, "grad_norm": 0.8515625, "learning_rate": 1.2033005395112664e-05, "loss": 1.2013, "step": 6436 }, { "epoch": 2.022262795217622, "grad_norm": 0.921875, "learning_rate": 1.2030466518565535e-05, "loss": 1.2182, "step": 6438 }, { "epoch": 2.0228910222431633, "grad_norm": 0.85546875, "learning_rate": 1.2027927642018408e-05, "loss": 1.1166, "step": 6440 }, { "epoch": 2.0235192492687046, "grad_norm": 0.828125, "learning_rate": 1.202538876547128e-05, "loss": 1.1307, "step": 6442 }, { "epoch": 2.024147476294246, "grad_norm": 0.89453125, "learning_rate": 1.2022849888924153e-05, "loss": 0.9865, "step": 6444 }, { "epoch": 2.0247757033197873, "grad_norm": 0.84375, "learning_rate": 1.2020311012377024e-05, "loss": 1.0833, "step": 6446 }, { "epoch": 2.0254039303453286, "grad_norm": 0.89453125, "learning_rate": 1.2017772135829897e-05, "loss": 1.1807, "step": 6448 }, { "epoch": 2.02603215737087, "grad_norm": 0.94140625, "learning_rate": 1.2015233259282769e-05, "loss": 1.0344, "step": 6450 }, { "epoch": 2.0266603843964113, "grad_norm": 0.83984375, "learning_rate": 1.2012694382735642e-05, "loss": 1.0775, "step": 6452 }, { "epoch": 2.0272886114219526, "grad_norm": 0.86328125, "learning_rate": 1.2010155506188511e-05, "loss": 1.1049, "step": 6454 }, { "epoch": 2.027916838447494, "grad_norm": 0.83984375, "learning_rate": 1.2007616629641386e-05, "loss": 1.3446, "step": 6456 }, { "epoch": 2.0285450654730353, "grad_norm": 0.91015625, "learning_rate": 1.2005077753094256e-05, "loss": 1.3205, "step": 6458 }, { "epoch": 2.0291732924985766, "grad_norm": 0.8671875, "learning_rate": 1.2002538876547129e-05, "loss": 1.28, "step": 6460 }, { "epoch": 2.029801519524118, "grad_norm": 0.828125, "learning_rate": 1.2e-05, "loss": 1.157, "step": 6462 }, { "epoch": 2.0304297465496592, "grad_norm": 0.8828125, "learning_rate": 1.1997461123452873e-05, "loss": 1.132, "step": 6464 }, { "epoch": 2.0310579735752006, "grad_norm": 0.8671875, "learning_rate": 1.1994922246905745e-05, "loss": 1.1373, "step": 6466 }, { "epoch": 2.031686200600742, "grad_norm": 0.89453125, "learning_rate": 1.1992383370358618e-05, "loss": 1.2225, "step": 6468 }, { "epoch": 2.0323144276262832, "grad_norm": 0.84765625, "learning_rate": 1.198984449381149e-05, "loss": 1.21, "step": 6470 }, { "epoch": 2.032942654651825, "grad_norm": 0.80859375, "learning_rate": 1.1987305617264362e-05, "loss": 1.1602, "step": 6472 }, { "epoch": 2.0335708816773663, "grad_norm": 0.8828125, "learning_rate": 1.1984766740717234e-05, "loss": 1.1242, "step": 6474 }, { "epoch": 2.0341991087029077, "grad_norm": 0.875, "learning_rate": 1.1982227864170107e-05, "loss": 1.1352, "step": 6476 }, { "epoch": 2.034827335728449, "grad_norm": 0.87890625, "learning_rate": 1.1979688987622976e-05, "loss": 1.1541, "step": 6478 }, { "epoch": 2.0354555627539903, "grad_norm": 0.8515625, "learning_rate": 1.197715011107585e-05, "loss": 1.1356, "step": 6480 }, { "epoch": 2.0360837897795316, "grad_norm": 0.84375, "learning_rate": 1.1974611234528721e-05, "loss": 1.3641, "step": 6482 }, { "epoch": 2.036712016805073, "grad_norm": 1.21875, "learning_rate": 1.1972072357981594e-05, "loss": 1.0959, "step": 6484 }, { "epoch": 2.0373402438306143, "grad_norm": 0.859375, "learning_rate": 1.1969533481434465e-05, "loss": 1.2647, "step": 6486 }, { "epoch": 2.0379684708561556, "grad_norm": 0.81640625, "learning_rate": 1.1966994604887338e-05, "loss": 1.0983, "step": 6488 }, { "epoch": 2.038596697881697, "grad_norm": 0.84375, "learning_rate": 1.196445572834021e-05, "loss": 1.1839, "step": 6490 }, { "epoch": 2.0392249249072383, "grad_norm": 0.859375, "learning_rate": 1.1961916851793083e-05, "loss": 1.2528, "step": 6492 }, { "epoch": 2.0398531519327796, "grad_norm": 0.87890625, "learning_rate": 1.1959377975245954e-05, "loss": 1.1767, "step": 6494 }, { "epoch": 2.040481378958321, "grad_norm": 0.87890625, "learning_rate": 1.1956839098698827e-05, "loss": 1.2149, "step": 6496 }, { "epoch": 2.0411096059838623, "grad_norm": 0.88671875, "learning_rate": 1.1954300222151699e-05, "loss": 1.1772, "step": 6498 }, { "epoch": 2.0417378330094036, "grad_norm": 0.91796875, "learning_rate": 1.1951761345604572e-05, "loss": 1.1659, "step": 6500 }, { "epoch": 2.042366060034945, "grad_norm": 0.9140625, "learning_rate": 1.1949222469057442e-05, "loss": 1.1775, "step": 6502 }, { "epoch": 2.0429942870604867, "grad_norm": 0.953125, "learning_rate": 1.1946683592510315e-05, "loss": 1.2286, "step": 6504 }, { "epoch": 2.043622514086028, "grad_norm": 0.8203125, "learning_rate": 1.1944144715963186e-05, "loss": 1.265, "step": 6506 }, { "epoch": 2.0442507411115693, "grad_norm": 0.89453125, "learning_rate": 1.1941605839416059e-05, "loss": 1.2133, "step": 6508 }, { "epoch": 2.0448789681371107, "grad_norm": 1.0, "learning_rate": 1.193906696286893e-05, "loss": 1.0619, "step": 6510 }, { "epoch": 2.045507195162652, "grad_norm": 0.8515625, "learning_rate": 1.1936528086321803e-05, "loss": 1.292, "step": 6512 }, { "epoch": 2.0461354221881933, "grad_norm": 0.91015625, "learning_rate": 1.1933989209774677e-05, "loss": 1.1345, "step": 6514 }, { "epoch": 2.0467636492137347, "grad_norm": 0.87109375, "learning_rate": 1.1931450333227548e-05, "loss": 1.0742, "step": 6516 }, { "epoch": 2.047391876239276, "grad_norm": 0.88671875, "learning_rate": 1.1928911456680421e-05, "loss": 1.0563, "step": 6518 }, { "epoch": 2.0480201032648173, "grad_norm": 0.84765625, "learning_rate": 1.1926372580133292e-05, "loss": 1.0728, "step": 6520 }, { "epoch": 2.0486483302903586, "grad_norm": 0.87109375, "learning_rate": 1.1923833703586165e-05, "loss": 1.2013, "step": 6522 }, { "epoch": 2.0492765573159, "grad_norm": 0.76953125, "learning_rate": 1.1921294827039037e-05, "loss": 1.2654, "step": 6524 }, { "epoch": 2.0499047843414413, "grad_norm": 0.8984375, "learning_rate": 1.191875595049191e-05, "loss": 1.1785, "step": 6526 }, { "epoch": 2.0505330113669826, "grad_norm": 0.98046875, "learning_rate": 1.191621707394478e-05, "loss": 1.166, "step": 6528 }, { "epoch": 2.051161238392524, "grad_norm": 0.8125, "learning_rate": 1.1913678197397653e-05, "loss": 1.0234, "step": 6530 }, { "epoch": 2.0517894654180653, "grad_norm": 0.8203125, "learning_rate": 1.1911139320850524e-05, "loss": 1.1007, "step": 6532 }, { "epoch": 2.0524176924436066, "grad_norm": 0.8828125, "learning_rate": 1.1908600444303397e-05, "loss": 1.2466, "step": 6534 }, { "epoch": 2.053045919469148, "grad_norm": 1.0234375, "learning_rate": 1.1906061567756269e-05, "loss": 1.1554, "step": 6536 }, { "epoch": 2.0536741464946897, "grad_norm": 0.79296875, "learning_rate": 1.1903522691209142e-05, "loss": 1.293, "step": 6538 }, { "epoch": 2.054302373520231, "grad_norm": 0.7890625, "learning_rate": 1.1900983814662013e-05, "loss": 1.1603, "step": 6540 }, { "epoch": 2.0549306005457724, "grad_norm": 0.98046875, "learning_rate": 1.1898444938114886e-05, "loss": 1.1157, "step": 6542 }, { "epoch": 2.0555588275713137, "grad_norm": 0.84765625, "learning_rate": 1.1895906061567757e-05, "loss": 1.102, "step": 6544 }, { "epoch": 2.056187054596855, "grad_norm": 0.8046875, "learning_rate": 1.189336718502063e-05, "loss": 1.1276, "step": 6546 }, { "epoch": 2.0568152816223964, "grad_norm": 0.8125, "learning_rate": 1.18908283084735e-05, "loss": 1.3061, "step": 6548 }, { "epoch": 2.0574435086479377, "grad_norm": 0.83984375, "learning_rate": 1.1888289431926375e-05, "loss": 1.2868, "step": 6550 }, { "epoch": 2.058071735673479, "grad_norm": 0.90234375, "learning_rate": 1.1885750555379245e-05, "loss": 1.2344, "step": 6552 }, { "epoch": 2.0586999626990203, "grad_norm": 0.91015625, "learning_rate": 1.1883211678832118e-05, "loss": 1.1805, "step": 6554 }, { "epoch": 2.0593281897245617, "grad_norm": 0.8671875, "learning_rate": 1.1880672802284989e-05, "loss": 1.1347, "step": 6556 }, { "epoch": 2.059956416750103, "grad_norm": 0.8828125, "learning_rate": 1.1878133925737862e-05, "loss": 1.1707, "step": 6558 }, { "epoch": 2.0605846437756443, "grad_norm": 0.8828125, "learning_rate": 1.1875595049190734e-05, "loss": 1.2383, "step": 6560 }, { "epoch": 2.0612128708011856, "grad_norm": 0.83203125, "learning_rate": 1.1873056172643607e-05, "loss": 1.1534, "step": 6562 }, { "epoch": 2.061841097826727, "grad_norm": 0.83984375, "learning_rate": 1.1870517296096478e-05, "loss": 1.1963, "step": 6564 }, { "epoch": 2.0624693248522683, "grad_norm": 0.828125, "learning_rate": 1.1867978419549351e-05, "loss": 1.2344, "step": 6566 }, { "epoch": 2.0630975518778096, "grad_norm": 0.8359375, "learning_rate": 1.1865439543002223e-05, "loss": 1.158, "step": 6568 }, { "epoch": 2.0637257789033514, "grad_norm": 0.8359375, "learning_rate": 1.1862900666455096e-05, "loss": 1.2401, "step": 6570 }, { "epoch": 2.0643540059288927, "grad_norm": 0.85546875, "learning_rate": 1.1860361789907965e-05, "loss": 1.1387, "step": 6572 }, { "epoch": 2.064982232954434, "grad_norm": 0.8359375, "learning_rate": 1.1857822913360838e-05, "loss": 1.1706, "step": 6574 }, { "epoch": 2.0656104599799754, "grad_norm": 0.80859375, "learning_rate": 1.185528403681371e-05, "loss": 1.2223, "step": 6576 }, { "epoch": 2.0662386870055167, "grad_norm": 0.828125, "learning_rate": 1.1852745160266583e-05, "loss": 1.1918, "step": 6578 }, { "epoch": 2.066866914031058, "grad_norm": 0.8203125, "learning_rate": 1.1850206283719454e-05, "loss": 1.1429, "step": 6580 }, { "epoch": 2.0674951410565994, "grad_norm": 0.91796875, "learning_rate": 1.1847667407172327e-05, "loss": 1.1263, "step": 6582 }, { "epoch": 2.0681233680821407, "grad_norm": 0.84375, "learning_rate": 1.1845128530625199e-05, "loss": 1.2082, "step": 6584 }, { "epoch": 2.068751595107682, "grad_norm": 0.90625, "learning_rate": 1.1842589654078072e-05, "loss": 1.1829, "step": 6586 }, { "epoch": 2.0693798221332234, "grad_norm": 0.8046875, "learning_rate": 1.1840050777530943e-05, "loss": 1.0186, "step": 6588 }, { "epoch": 2.0700080491587647, "grad_norm": 0.828125, "learning_rate": 1.1837511900983816e-05, "loss": 1.2465, "step": 6590 }, { "epoch": 2.070636276184306, "grad_norm": 0.8671875, "learning_rate": 1.1834973024436686e-05, "loss": 1.1994, "step": 6592 }, { "epoch": 2.0712645032098473, "grad_norm": 0.8828125, "learning_rate": 1.183243414788956e-05, "loss": 1.1866, "step": 6594 }, { "epoch": 2.0718927302353887, "grad_norm": 0.89453125, "learning_rate": 1.182989527134243e-05, "loss": 1.2716, "step": 6596 }, { "epoch": 2.07252095726093, "grad_norm": 0.921875, "learning_rate": 1.1827356394795303e-05, "loss": 1.3113, "step": 6598 }, { "epoch": 2.0731491842864713, "grad_norm": 0.859375, "learning_rate": 1.1824817518248176e-05, "loss": 1.1559, "step": 6600 }, { "epoch": 2.0737774113120127, "grad_norm": 0.83984375, "learning_rate": 1.1822278641701048e-05, "loss": 1.0895, "step": 6602 }, { "epoch": 2.0744056383375544, "grad_norm": 0.87109375, "learning_rate": 1.1819739765153921e-05, "loss": 1.2286, "step": 6604 }, { "epoch": 2.0750338653630958, "grad_norm": 0.9375, "learning_rate": 1.1817200888606792e-05, "loss": 1.1859, "step": 6606 }, { "epoch": 2.075662092388637, "grad_norm": 0.87890625, "learning_rate": 1.1814662012059665e-05, "loss": 1.0625, "step": 6608 }, { "epoch": 2.0762903194141784, "grad_norm": 0.84375, "learning_rate": 1.1812123135512537e-05, "loss": 1.1966, "step": 6610 }, { "epoch": 2.0769185464397197, "grad_norm": 0.90234375, "learning_rate": 1.180958425896541e-05, "loss": 1.2155, "step": 6612 }, { "epoch": 2.077546773465261, "grad_norm": 0.828125, "learning_rate": 1.1807045382418281e-05, "loss": 1.2382, "step": 6614 }, { "epoch": 2.0781750004908024, "grad_norm": 0.875, "learning_rate": 1.1804506505871154e-05, "loss": 1.3356, "step": 6616 }, { "epoch": 2.0788032275163437, "grad_norm": 0.859375, "learning_rate": 1.1801967629324024e-05, "loss": 1.1531, "step": 6618 }, { "epoch": 2.079431454541885, "grad_norm": 0.828125, "learning_rate": 1.1799428752776899e-05, "loss": 1.1634, "step": 6620 }, { "epoch": 2.0800596815674264, "grad_norm": 0.81640625, "learning_rate": 1.1796889876229768e-05, "loss": 1.0372, "step": 6622 }, { "epoch": 2.0806879085929677, "grad_norm": 0.82421875, "learning_rate": 1.1794350999682642e-05, "loss": 1.1603, "step": 6624 }, { "epoch": 2.081316135618509, "grad_norm": 0.87890625, "learning_rate": 1.1791812123135513e-05, "loss": 1.2412, "step": 6626 }, { "epoch": 2.0819443626440504, "grad_norm": 0.765625, "learning_rate": 1.1789273246588386e-05, "loss": 1.2592, "step": 6628 }, { "epoch": 2.0825725896695917, "grad_norm": 0.82421875, "learning_rate": 1.1786734370041257e-05, "loss": 1.197, "step": 6630 }, { "epoch": 2.083200816695133, "grad_norm": 0.86328125, "learning_rate": 1.178419549349413e-05, "loss": 1.0592, "step": 6632 }, { "epoch": 2.0838290437206743, "grad_norm": 0.82421875, "learning_rate": 1.1781656616947002e-05, "loss": 1.1009, "step": 6634 }, { "epoch": 2.084457270746216, "grad_norm": 0.8515625, "learning_rate": 1.1779117740399875e-05, "loss": 1.1818, "step": 6636 }, { "epoch": 2.0850854977717574, "grad_norm": 0.875, "learning_rate": 1.1776578863852746e-05, "loss": 1.1779, "step": 6638 }, { "epoch": 2.0857137247972988, "grad_norm": 0.95703125, "learning_rate": 1.177403998730562e-05, "loss": 1.1441, "step": 6640 }, { "epoch": 2.08634195182284, "grad_norm": 0.8125, "learning_rate": 1.1771501110758489e-05, "loss": 1.0996, "step": 6642 }, { "epoch": 2.0869701788483814, "grad_norm": 0.8828125, "learning_rate": 1.1768962234211362e-05, "loss": 1.0396, "step": 6644 }, { "epoch": 2.0875984058739228, "grad_norm": 0.83203125, "learning_rate": 1.1766423357664234e-05, "loss": 1.1889, "step": 6646 }, { "epoch": 2.088226632899464, "grad_norm": 0.89453125, "learning_rate": 1.1763884481117107e-05, "loss": 1.0357, "step": 6648 }, { "epoch": 2.0888548599250054, "grad_norm": 0.8359375, "learning_rate": 1.1761345604569978e-05, "loss": 1.2735, "step": 6650 }, { "epoch": 2.0894830869505467, "grad_norm": 0.828125, "learning_rate": 1.1758806728022851e-05, "loss": 1.0615, "step": 6652 }, { "epoch": 2.090111313976088, "grad_norm": 0.80859375, "learning_rate": 1.1756267851475722e-05, "loss": 1.2334, "step": 6654 }, { "epoch": 2.0907395410016294, "grad_norm": 0.84375, "learning_rate": 1.1753728974928596e-05, "loss": 1.1619, "step": 6656 }, { "epoch": 2.0913677680271707, "grad_norm": 1.0234375, "learning_rate": 1.1751190098381467e-05, "loss": 1.2739, "step": 6658 }, { "epoch": 2.091995995052712, "grad_norm": 0.90234375, "learning_rate": 1.174865122183434e-05, "loss": 1.1238, "step": 6660 }, { "epoch": 2.0926242220782534, "grad_norm": 0.8359375, "learning_rate": 1.1746112345287211e-05, "loss": 1.2821, "step": 6662 }, { "epoch": 2.0932524491037947, "grad_norm": 0.859375, "learning_rate": 1.1743573468740084e-05, "loss": 1.128, "step": 6664 }, { "epoch": 2.093880676129336, "grad_norm": 0.875, "learning_rate": 1.1741034592192954e-05, "loss": 1.2666, "step": 6666 }, { "epoch": 2.0945089031548774, "grad_norm": 1.0, "learning_rate": 1.1738495715645827e-05, "loss": 1.2499, "step": 6668 }, { "epoch": 2.095137130180419, "grad_norm": 0.81640625, "learning_rate": 1.1735956839098699e-05, "loss": 1.1686, "step": 6670 }, { "epoch": 2.0957653572059605, "grad_norm": 0.8515625, "learning_rate": 1.1733417962551572e-05, "loss": 1.2688, "step": 6672 }, { "epoch": 2.096393584231502, "grad_norm": 0.86328125, "learning_rate": 1.1730879086004443e-05, "loss": 1.1446, "step": 6674 }, { "epoch": 2.097021811257043, "grad_norm": 0.9453125, "learning_rate": 1.1728340209457316e-05, "loss": 1.0533, "step": 6676 }, { "epoch": 2.0976500382825845, "grad_norm": 0.8203125, "learning_rate": 1.1725801332910188e-05, "loss": 1.1993, "step": 6678 }, { "epoch": 2.098278265308126, "grad_norm": 0.8515625, "learning_rate": 1.172326245636306e-05, "loss": 1.2439, "step": 6680 }, { "epoch": 2.098906492333667, "grad_norm": 0.875, "learning_rate": 1.1720723579815932e-05, "loss": 1.1796, "step": 6682 }, { "epoch": 2.0995347193592084, "grad_norm": 0.86328125, "learning_rate": 1.1718184703268805e-05, "loss": 1.1686, "step": 6684 }, { "epoch": 2.1001629463847498, "grad_norm": 0.90234375, "learning_rate": 1.1715645826721678e-05, "loss": 1.2925, "step": 6686 }, { "epoch": 2.100791173410291, "grad_norm": 0.8671875, "learning_rate": 1.171310695017455e-05, "loss": 1.1615, "step": 6688 }, { "epoch": 2.1014194004358324, "grad_norm": 0.8125, "learning_rate": 1.1710568073627423e-05, "loss": 1.1193, "step": 6690 }, { "epoch": 2.1020476274613737, "grad_norm": 0.87109375, "learning_rate": 1.1708029197080292e-05, "loss": 1.1156, "step": 6692 }, { "epoch": 2.102675854486915, "grad_norm": 0.828125, "learning_rate": 1.1705490320533165e-05, "loss": 1.3214, "step": 6694 }, { "epoch": 2.1033040815124564, "grad_norm": 0.828125, "learning_rate": 1.1702951443986037e-05, "loss": 1.3833, "step": 6696 }, { "epoch": 2.1039323085379977, "grad_norm": 0.92578125, "learning_rate": 1.170041256743891e-05, "loss": 1.1531, "step": 6698 }, { "epoch": 2.1045605355635395, "grad_norm": 0.8515625, "learning_rate": 1.1697873690891781e-05, "loss": 1.0297, "step": 6700 }, { "epoch": 2.105188762589081, "grad_norm": 0.87109375, "learning_rate": 1.1695334814344654e-05, "loss": 1.1562, "step": 6702 }, { "epoch": 2.105816989614622, "grad_norm": 0.87109375, "learning_rate": 1.1692795937797526e-05, "loss": 1.171, "step": 6704 }, { "epoch": 2.1064452166401635, "grad_norm": 0.87890625, "learning_rate": 1.1690257061250399e-05, "loss": 1.0549, "step": 6706 }, { "epoch": 2.107073443665705, "grad_norm": 0.8828125, "learning_rate": 1.168771818470327e-05, "loss": 1.094, "step": 6708 }, { "epoch": 2.107701670691246, "grad_norm": 0.84375, "learning_rate": 1.1685179308156143e-05, "loss": 1.1696, "step": 6710 }, { "epoch": 2.1083298977167875, "grad_norm": 0.828125, "learning_rate": 1.1682640431609013e-05, "loss": 1.1725, "step": 6712 }, { "epoch": 2.108958124742329, "grad_norm": 0.875, "learning_rate": 1.1680101555061888e-05, "loss": 1.3241, "step": 6714 }, { "epoch": 2.10958635176787, "grad_norm": 0.890625, "learning_rate": 1.1677562678514757e-05, "loss": 1.0591, "step": 6716 }, { "epoch": 2.1102145787934115, "grad_norm": 0.99609375, "learning_rate": 1.167502380196763e-05, "loss": 1.1485, "step": 6718 }, { "epoch": 2.110842805818953, "grad_norm": 0.83203125, "learning_rate": 1.1672484925420502e-05, "loss": 1.1271, "step": 6720 }, { "epoch": 2.111471032844494, "grad_norm": 0.83984375, "learning_rate": 1.1669946048873375e-05, "loss": 1.1642, "step": 6722 }, { "epoch": 2.1120992598700354, "grad_norm": 0.82421875, "learning_rate": 1.1667407172326246e-05, "loss": 1.2039, "step": 6724 }, { "epoch": 2.1127274868955768, "grad_norm": 0.87109375, "learning_rate": 1.166486829577912e-05, "loss": 1.1125, "step": 6726 }, { "epoch": 2.113355713921118, "grad_norm": 0.96484375, "learning_rate": 1.166232941923199e-05, "loss": 1.2027, "step": 6728 }, { "epoch": 2.1139839409466594, "grad_norm": 0.9375, "learning_rate": 1.1659790542684864e-05, "loss": 1.1512, "step": 6730 }, { "epoch": 2.1146121679722008, "grad_norm": 0.9453125, "learning_rate": 1.1657251666137735e-05, "loss": 1.2524, "step": 6732 }, { "epoch": 2.1152403949977425, "grad_norm": 0.8828125, "learning_rate": 1.1654712789590608e-05, "loss": 1.1181, "step": 6734 }, { "epoch": 2.115868622023284, "grad_norm": 0.890625, "learning_rate": 1.1652173913043478e-05, "loss": 1.1363, "step": 6736 }, { "epoch": 2.116496849048825, "grad_norm": 0.859375, "learning_rate": 1.1649635036496351e-05, "loss": 1.2479, "step": 6738 }, { "epoch": 2.1171250760743665, "grad_norm": 0.94140625, "learning_rate": 1.1647096159949222e-05, "loss": 1.1432, "step": 6740 }, { "epoch": 2.117753303099908, "grad_norm": 0.953125, "learning_rate": 1.1644557283402095e-05, "loss": 1.2668, "step": 6742 }, { "epoch": 2.118381530125449, "grad_norm": 0.88671875, "learning_rate": 1.1642018406854967e-05, "loss": 1.1356, "step": 6744 }, { "epoch": 2.1190097571509905, "grad_norm": 0.9140625, "learning_rate": 1.163947953030784e-05, "loss": 1.0993, "step": 6746 }, { "epoch": 2.119637984176532, "grad_norm": 0.98046875, "learning_rate": 1.1636940653760711e-05, "loss": 1.0308, "step": 6748 }, { "epoch": 2.120266211202073, "grad_norm": 0.94921875, "learning_rate": 1.1634401777213584e-05, "loss": 1.1342, "step": 6750 }, { "epoch": 2.1208944382276145, "grad_norm": 0.9765625, "learning_rate": 1.1631862900666456e-05, "loss": 1.0846, "step": 6752 }, { "epoch": 2.121522665253156, "grad_norm": 0.96484375, "learning_rate": 1.1629324024119329e-05, "loss": 1.1746, "step": 6754 }, { "epoch": 2.122150892278697, "grad_norm": 0.8359375, "learning_rate": 1.1626785147572199e-05, "loss": 1.114, "step": 6756 }, { "epoch": 2.1227791193042385, "grad_norm": 0.82421875, "learning_rate": 1.1624246271025073e-05, "loss": 1.123, "step": 6758 }, { "epoch": 2.12340734632978, "grad_norm": 0.828125, "learning_rate": 1.1621707394477943e-05, "loss": 1.2326, "step": 6760 }, { "epoch": 2.124035573355321, "grad_norm": 0.84765625, "learning_rate": 1.1619168517930816e-05, "loss": 1.2209, "step": 6762 }, { "epoch": 2.1246638003808624, "grad_norm": 0.9375, "learning_rate": 1.1616629641383687e-05, "loss": 0.9569, "step": 6764 }, { "epoch": 2.125292027406404, "grad_norm": 0.83984375, "learning_rate": 1.161409076483656e-05, "loss": 1.139, "step": 6766 }, { "epoch": 2.1259202544319455, "grad_norm": 0.81640625, "learning_rate": 1.1611551888289432e-05, "loss": 1.2186, "step": 6768 }, { "epoch": 2.126548481457487, "grad_norm": 0.8828125, "learning_rate": 1.1609013011742305e-05, "loss": 1.2514, "step": 6770 }, { "epoch": 2.127176708483028, "grad_norm": 0.85546875, "learning_rate": 1.1606474135195178e-05, "loss": 1.2252, "step": 6772 }, { "epoch": 2.1278049355085695, "grad_norm": 0.8515625, "learning_rate": 1.160393525864805e-05, "loss": 1.1557, "step": 6774 }, { "epoch": 2.128433162534111, "grad_norm": 0.88671875, "learning_rate": 1.1601396382100923e-05, "loss": 1.2549, "step": 6776 }, { "epoch": 2.129061389559652, "grad_norm": 0.8984375, "learning_rate": 1.1598857505553794e-05, "loss": 1.1201, "step": 6778 }, { "epoch": 2.1296896165851935, "grad_norm": 0.96484375, "learning_rate": 1.1596318629006667e-05, "loss": 1.2005, "step": 6780 }, { "epoch": 2.130317843610735, "grad_norm": 0.80078125, "learning_rate": 1.1593779752459537e-05, "loss": 1.1851, "step": 6782 }, { "epoch": 2.130946070636276, "grad_norm": 0.8515625, "learning_rate": 1.1591240875912411e-05, "loss": 1.0721, "step": 6784 }, { "epoch": 2.1315742976618175, "grad_norm": 0.8515625, "learning_rate": 1.1588701999365281e-05, "loss": 1.1945, "step": 6786 }, { "epoch": 2.132202524687359, "grad_norm": 0.85546875, "learning_rate": 1.1586163122818154e-05, "loss": 1.1655, "step": 6788 }, { "epoch": 2.1328307517129, "grad_norm": 0.80859375, "learning_rate": 1.1583624246271026e-05, "loss": 1.0261, "step": 6790 }, { "epoch": 2.1334589787384415, "grad_norm": 0.84375, "learning_rate": 1.1581085369723899e-05, "loss": 1.1892, "step": 6792 }, { "epoch": 2.134087205763983, "grad_norm": 0.875, "learning_rate": 1.157854649317677e-05, "loss": 1.196, "step": 6794 }, { "epoch": 2.134715432789524, "grad_norm": 0.9140625, "learning_rate": 1.1576007616629643e-05, "loss": 1.2396, "step": 6796 }, { "epoch": 2.1353436598150655, "grad_norm": 0.90234375, "learning_rate": 1.1573468740082514e-05, "loss": 1.2077, "step": 6798 }, { "epoch": 2.135971886840607, "grad_norm": 0.92578125, "learning_rate": 1.1570929863535388e-05, "loss": 1.1368, "step": 6800 }, { "epoch": 2.1366001138661486, "grad_norm": 0.85546875, "learning_rate": 1.1568390986988259e-05, "loss": 1.1799, "step": 6802 }, { "epoch": 2.13722834089169, "grad_norm": 0.83984375, "learning_rate": 1.1565852110441132e-05, "loss": 1.1639, "step": 6804 }, { "epoch": 2.137856567917231, "grad_norm": 0.859375, "learning_rate": 1.1563313233894002e-05, "loss": 1.207, "step": 6806 }, { "epoch": 2.1384847949427725, "grad_norm": 0.88671875, "learning_rate": 1.1560774357346875e-05, "loss": 1.0282, "step": 6808 }, { "epoch": 2.139113021968314, "grad_norm": 0.859375, "learning_rate": 1.1558235480799746e-05, "loss": 1.095, "step": 6810 }, { "epoch": 2.139741248993855, "grad_norm": 0.8515625, "learning_rate": 1.155569660425262e-05, "loss": 1.1784, "step": 6812 }, { "epoch": 2.1403694760193965, "grad_norm": 0.85546875, "learning_rate": 1.155315772770549e-05, "loss": 1.2698, "step": 6814 }, { "epoch": 2.140997703044938, "grad_norm": 0.88671875, "learning_rate": 1.1550618851158364e-05, "loss": 1.1665, "step": 6816 }, { "epoch": 2.141625930070479, "grad_norm": 0.8046875, "learning_rate": 1.1548079974611235e-05, "loss": 1.1065, "step": 6818 }, { "epoch": 2.1422541570960205, "grad_norm": 0.7890625, "learning_rate": 1.1545541098064108e-05, "loss": 1.1438, "step": 6820 }, { "epoch": 2.142882384121562, "grad_norm": 0.84375, "learning_rate": 1.154300222151698e-05, "loss": 1.1842, "step": 6822 }, { "epoch": 2.143510611147103, "grad_norm": 0.875, "learning_rate": 1.1540463344969853e-05, "loss": 1.1727, "step": 6824 }, { "epoch": 2.1441388381726445, "grad_norm": 0.87109375, "learning_rate": 1.1537924468422724e-05, "loss": 1.2433, "step": 6826 }, { "epoch": 2.144767065198186, "grad_norm": 0.95703125, "learning_rate": 1.1535385591875597e-05, "loss": 1.1275, "step": 6828 }, { "epoch": 2.145395292223727, "grad_norm": 0.81640625, "learning_rate": 1.1532846715328467e-05, "loss": 1.1126, "step": 6830 }, { "epoch": 2.146023519249269, "grad_norm": 1.578125, "learning_rate": 1.153030783878134e-05, "loss": 1.0254, "step": 6832 }, { "epoch": 2.1466517462748103, "grad_norm": 0.84765625, "learning_rate": 1.1527768962234211e-05, "loss": 1.1911, "step": 6834 }, { "epoch": 2.1472799733003516, "grad_norm": 0.875, "learning_rate": 1.1525230085687084e-05, "loss": 1.2118, "step": 6836 }, { "epoch": 2.147908200325893, "grad_norm": 0.93359375, "learning_rate": 1.1522691209139956e-05, "loss": 1.081, "step": 6838 }, { "epoch": 2.1485364273514342, "grad_norm": 0.9296875, "learning_rate": 1.1520152332592829e-05, "loss": 1.2892, "step": 6840 }, { "epoch": 2.1491646543769756, "grad_norm": 0.83203125, "learning_rate": 1.15176134560457e-05, "loss": 1.0985, "step": 6842 }, { "epoch": 2.149792881402517, "grad_norm": 0.890625, "learning_rate": 1.1515074579498573e-05, "loss": 1.0704, "step": 6844 }, { "epoch": 2.1504211084280582, "grad_norm": 0.875, "learning_rate": 1.1512535702951445e-05, "loss": 1.0915, "step": 6846 }, { "epoch": 2.1510493354535996, "grad_norm": 0.859375, "learning_rate": 1.1509996826404318e-05, "loss": 1.1404, "step": 6848 }, { "epoch": 2.151677562479141, "grad_norm": 0.8828125, "learning_rate": 1.1507457949857187e-05, "loss": 1.0769, "step": 6850 }, { "epoch": 2.152305789504682, "grad_norm": 0.90625, "learning_rate": 1.1504919073310062e-05, "loss": 1.0789, "step": 6852 }, { "epoch": 2.1529340165302235, "grad_norm": 0.88671875, "learning_rate": 1.1502380196762932e-05, "loss": 1.2021, "step": 6854 }, { "epoch": 2.153562243555765, "grad_norm": 0.90234375, "learning_rate": 1.1499841320215805e-05, "loss": 1.1089, "step": 6856 }, { "epoch": 2.154190470581306, "grad_norm": 0.81640625, "learning_rate": 1.1497302443668678e-05, "loss": 1.1592, "step": 6858 }, { "epoch": 2.1548186976068475, "grad_norm": 0.9296875, "learning_rate": 1.149476356712155e-05, "loss": 1.2386, "step": 6860 }, { "epoch": 2.155446924632389, "grad_norm": 0.90234375, "learning_rate": 1.1492224690574422e-05, "loss": 1.1695, "step": 6862 }, { "epoch": 2.15607515165793, "grad_norm": 0.8515625, "learning_rate": 1.1489685814027294e-05, "loss": 1.1925, "step": 6864 }, { "epoch": 2.1567033786834715, "grad_norm": 0.8515625, "learning_rate": 1.1487146937480167e-05, "loss": 1.1248, "step": 6866 }, { "epoch": 2.1573316057090133, "grad_norm": 0.8828125, "learning_rate": 1.1484608060933038e-05, "loss": 1.1595, "step": 6868 }, { "epoch": 2.1579598327345546, "grad_norm": 0.859375, "learning_rate": 1.1482069184385911e-05, "loss": 1.2658, "step": 6870 }, { "epoch": 2.158588059760096, "grad_norm": 0.8671875, "learning_rate": 1.1479530307838783e-05, "loss": 1.1204, "step": 6872 }, { "epoch": 2.1592162867856373, "grad_norm": 0.84375, "learning_rate": 1.1476991431291656e-05, "loss": 1.2738, "step": 6874 }, { "epoch": 2.1598445138111786, "grad_norm": 0.90234375, "learning_rate": 1.1474452554744525e-05, "loss": 1.183, "step": 6876 }, { "epoch": 2.16047274083672, "grad_norm": 0.8671875, "learning_rate": 1.14719136781974e-05, "loss": 1.2339, "step": 6878 }, { "epoch": 2.1611009678622612, "grad_norm": 0.8125, "learning_rate": 1.146937480165027e-05, "loss": 1.2331, "step": 6880 }, { "epoch": 2.1617291948878026, "grad_norm": 0.9296875, "learning_rate": 1.1466835925103143e-05, "loss": 1.2599, "step": 6882 }, { "epoch": 2.162357421913344, "grad_norm": 0.82421875, "learning_rate": 1.1464297048556014e-05, "loss": 1.1194, "step": 6884 }, { "epoch": 2.1629856489388852, "grad_norm": 0.8671875, "learning_rate": 1.1461758172008887e-05, "loss": 1.1056, "step": 6886 }, { "epoch": 2.1636138759644266, "grad_norm": 0.8828125, "learning_rate": 1.1459219295461759e-05, "loss": 1.1664, "step": 6888 }, { "epoch": 2.164242102989968, "grad_norm": 0.890625, "learning_rate": 1.1456680418914632e-05, "loss": 1.3292, "step": 6890 }, { "epoch": 2.164870330015509, "grad_norm": 0.8828125, "learning_rate": 1.1454141542367503e-05, "loss": 1.1255, "step": 6892 }, { "epoch": 2.1654985570410505, "grad_norm": 0.80078125, "learning_rate": 1.1451602665820376e-05, "loss": 1.193, "step": 6894 }, { "epoch": 2.166126784066592, "grad_norm": 0.88671875, "learning_rate": 1.1449063789273248e-05, "loss": 1.1852, "step": 6896 }, { "epoch": 2.1667550110921336, "grad_norm": 0.91796875, "learning_rate": 1.1446524912726121e-05, "loss": 1.1961, "step": 6898 }, { "epoch": 2.167383238117675, "grad_norm": 0.8828125, "learning_rate": 1.144398603617899e-05, "loss": 1.0907, "step": 6900 }, { "epoch": 2.1680114651432163, "grad_norm": 0.88671875, "learning_rate": 1.1441447159631864e-05, "loss": 1.3612, "step": 6902 }, { "epoch": 2.1686396921687576, "grad_norm": 0.9140625, "learning_rate": 1.1438908283084735e-05, "loss": 1.1934, "step": 6904 }, { "epoch": 2.169267919194299, "grad_norm": 0.85546875, "learning_rate": 1.1436369406537608e-05, "loss": 1.2283, "step": 6906 }, { "epoch": 2.1698961462198403, "grad_norm": 0.83203125, "learning_rate": 1.143383052999048e-05, "loss": 1.1552, "step": 6908 }, { "epoch": 2.1705243732453816, "grad_norm": 0.82421875, "learning_rate": 1.1431291653443353e-05, "loss": 1.173, "step": 6910 }, { "epoch": 2.171152600270923, "grad_norm": 0.8515625, "learning_rate": 1.1428752776896224e-05, "loss": 1.148, "step": 6912 }, { "epoch": 2.1717808272964643, "grad_norm": 0.90625, "learning_rate": 1.1426213900349097e-05, "loss": 1.2096, "step": 6914 }, { "epoch": 2.1724090543220056, "grad_norm": 0.8671875, "learning_rate": 1.1423675023801968e-05, "loss": 1.193, "step": 6916 }, { "epoch": 2.173037281347547, "grad_norm": 0.8359375, "learning_rate": 1.1421136147254841e-05, "loss": 1.1284, "step": 6918 }, { "epoch": 2.1736655083730883, "grad_norm": 0.87109375, "learning_rate": 1.1418597270707711e-05, "loss": 1.1559, "step": 6920 }, { "epoch": 2.1742937353986296, "grad_norm": 0.87890625, "learning_rate": 1.1416058394160586e-05, "loss": 1.3299, "step": 6922 }, { "epoch": 2.174921962424171, "grad_norm": 0.84375, "learning_rate": 1.1413519517613456e-05, "loss": 1.2832, "step": 6924 }, { "epoch": 2.1755501894497122, "grad_norm": 0.875, "learning_rate": 1.1410980641066329e-05, "loss": 1.1829, "step": 6926 }, { "epoch": 2.1761784164752536, "grad_norm": 0.828125, "learning_rate": 1.14084417645192e-05, "loss": 1.2522, "step": 6928 }, { "epoch": 2.176806643500795, "grad_norm": 0.828125, "learning_rate": 1.1405902887972073e-05, "loss": 1.0834, "step": 6930 }, { "epoch": 2.177434870526336, "grad_norm": 0.796875, "learning_rate": 1.1403364011424945e-05, "loss": 1.225, "step": 6932 }, { "epoch": 2.178063097551878, "grad_norm": 0.95703125, "learning_rate": 1.1400825134877818e-05, "loss": 1.0854, "step": 6934 }, { "epoch": 2.1786913245774193, "grad_norm": 0.89453125, "learning_rate": 1.1398286258330689e-05, "loss": 1.1618, "step": 6936 }, { "epoch": 2.1793195516029606, "grad_norm": 0.875, "learning_rate": 1.1395747381783562e-05, "loss": 1.2538, "step": 6938 }, { "epoch": 2.179947778628502, "grad_norm": 0.90625, "learning_rate": 1.1393208505236433e-05, "loss": 1.2129, "step": 6940 }, { "epoch": 2.1805760056540433, "grad_norm": 0.84765625, "learning_rate": 1.1390669628689307e-05, "loss": 1.1104, "step": 6942 }, { "epoch": 2.1812042326795846, "grad_norm": 0.80078125, "learning_rate": 1.138813075214218e-05, "loss": 1.1527, "step": 6944 }, { "epoch": 2.181832459705126, "grad_norm": 0.91015625, "learning_rate": 1.138559187559505e-05, "loss": 1.1983, "step": 6946 }, { "epoch": 2.1824606867306673, "grad_norm": 0.81640625, "learning_rate": 1.1383052999047924e-05, "loss": 1.1743, "step": 6948 }, { "epoch": 2.1830889137562086, "grad_norm": 0.91015625, "learning_rate": 1.1380514122500794e-05, "loss": 1.0212, "step": 6950 }, { "epoch": 2.18371714078175, "grad_norm": 0.859375, "learning_rate": 1.1377975245953667e-05, "loss": 1.1853, "step": 6952 }, { "epoch": 2.1843453678072913, "grad_norm": 0.87890625, "learning_rate": 1.1375436369406538e-05, "loss": 1.1346, "step": 6954 }, { "epoch": 2.1849735948328326, "grad_norm": 0.8828125, "learning_rate": 1.1372897492859411e-05, "loss": 1.3125, "step": 6956 }, { "epoch": 2.185601821858374, "grad_norm": 0.8359375, "learning_rate": 1.1370358616312283e-05, "loss": 1.3227, "step": 6958 }, { "epoch": 2.1862300488839153, "grad_norm": 0.8828125, "learning_rate": 1.1367819739765156e-05, "loss": 1.2947, "step": 6960 }, { "epoch": 2.1868582759094566, "grad_norm": 0.93359375, "learning_rate": 1.1365280863218027e-05, "loss": 1.1531, "step": 6962 }, { "epoch": 2.1874865029349984, "grad_norm": 0.9375, "learning_rate": 1.13627419866709e-05, "loss": 1.0524, "step": 6964 }, { "epoch": 2.1881147299605397, "grad_norm": 0.8515625, "learning_rate": 1.1360203110123772e-05, "loss": 1.1572, "step": 6966 }, { "epoch": 2.188742956986081, "grad_norm": 0.94921875, "learning_rate": 1.1357664233576645e-05, "loss": 1.3277, "step": 6968 }, { "epoch": 2.1893711840116223, "grad_norm": 0.92578125, "learning_rate": 1.1355125357029514e-05, "loss": 1.2082, "step": 6970 }, { "epoch": 2.1899994110371637, "grad_norm": 0.8359375, "learning_rate": 1.1352586480482387e-05, "loss": 1.2906, "step": 6972 }, { "epoch": 2.190627638062705, "grad_norm": 0.88671875, "learning_rate": 1.1350047603935259e-05, "loss": 1.1647, "step": 6974 }, { "epoch": 2.1912558650882463, "grad_norm": 0.8046875, "learning_rate": 1.1347508727388132e-05, "loss": 1.2105, "step": 6976 }, { "epoch": 2.1918840921137877, "grad_norm": 0.88671875, "learning_rate": 1.1344969850841003e-05, "loss": 1.2325, "step": 6978 }, { "epoch": 2.192512319139329, "grad_norm": 0.79296875, "learning_rate": 1.1342430974293876e-05, "loss": 1.2232, "step": 6980 }, { "epoch": 2.1931405461648703, "grad_norm": 0.87109375, "learning_rate": 1.1339892097746748e-05, "loss": 1.2276, "step": 6982 }, { "epoch": 2.1937687731904116, "grad_norm": 0.859375, "learning_rate": 1.133735322119962e-05, "loss": 1.1263, "step": 6984 }, { "epoch": 2.194397000215953, "grad_norm": 0.8515625, "learning_rate": 1.1334814344652492e-05, "loss": 1.1551, "step": 6986 }, { "epoch": 2.1950252272414943, "grad_norm": 0.875, "learning_rate": 1.1332275468105365e-05, "loss": 1.1824, "step": 6988 }, { "epoch": 2.1956534542670356, "grad_norm": 0.8828125, "learning_rate": 1.1329736591558237e-05, "loss": 1.2166, "step": 6990 }, { "epoch": 2.196281681292577, "grad_norm": 0.8359375, "learning_rate": 1.132719771501111e-05, "loss": 1.2635, "step": 6992 }, { "epoch": 2.1969099083181183, "grad_norm": 0.8984375, "learning_rate": 1.132465883846398e-05, "loss": 1.2588, "step": 6994 }, { "epoch": 2.1975381353436596, "grad_norm": 0.83203125, "learning_rate": 1.1322119961916852e-05, "loss": 1.3271, "step": 6996 }, { "epoch": 2.1981663623692014, "grad_norm": 0.8359375, "learning_rate": 1.1319581085369724e-05, "loss": 1.1563, "step": 6998 }, { "epoch": 2.1987945893947427, "grad_norm": 0.8828125, "learning_rate": 1.1317042208822597e-05, "loss": 1.109, "step": 7000 }, { "epoch": 2.199422816420284, "grad_norm": 0.80859375, "learning_rate": 1.1314503332275468e-05, "loss": 1.144, "step": 7002 }, { "epoch": 2.2000510434458254, "grad_norm": 0.83203125, "learning_rate": 1.1311964455728341e-05, "loss": 1.2422, "step": 7004 }, { "epoch": 2.2006792704713667, "grad_norm": 0.87109375, "learning_rate": 1.1309425579181213e-05, "loss": 1.2329, "step": 7006 }, { "epoch": 2.201307497496908, "grad_norm": 0.92578125, "learning_rate": 1.1306886702634086e-05, "loss": 1.1037, "step": 7008 }, { "epoch": 2.2019357245224493, "grad_norm": 0.92578125, "learning_rate": 1.1304347826086957e-05, "loss": 1.2786, "step": 7010 }, { "epoch": 2.2025639515479907, "grad_norm": 0.8125, "learning_rate": 1.130180894953983e-05, "loss": 1.3284, "step": 7012 }, { "epoch": 2.203192178573532, "grad_norm": 0.90625, "learning_rate": 1.12992700729927e-05, "loss": 1.1047, "step": 7014 }, { "epoch": 2.2038204055990733, "grad_norm": 0.88671875, "learning_rate": 1.1296731196445575e-05, "loss": 1.2299, "step": 7016 }, { "epoch": 2.2044486326246147, "grad_norm": 0.8359375, "learning_rate": 1.1294192319898444e-05, "loss": 1.1535, "step": 7018 }, { "epoch": 2.205076859650156, "grad_norm": 0.9609375, "learning_rate": 1.1291653443351318e-05, "loss": 1.3391, "step": 7020 }, { "epoch": 2.2057050866756973, "grad_norm": 0.8828125, "learning_rate": 1.1289114566804189e-05, "loss": 1.092, "step": 7022 }, { "epoch": 2.2063333137012386, "grad_norm": 0.9140625, "learning_rate": 1.1286575690257062e-05, "loss": 1.2479, "step": 7024 }, { "epoch": 2.20696154072678, "grad_norm": 1.0078125, "learning_rate": 1.1284036813709933e-05, "loss": 1.0299, "step": 7026 }, { "epoch": 2.2075897677523213, "grad_norm": 0.91796875, "learning_rate": 1.1281497937162806e-05, "loss": 1.1123, "step": 7028 }, { "epoch": 2.208217994777863, "grad_norm": 0.86328125, "learning_rate": 1.127895906061568e-05, "loss": 1.1984, "step": 7030 }, { "epoch": 2.2088462218034044, "grad_norm": 0.84765625, "learning_rate": 1.1276420184068551e-05, "loss": 1.1251, "step": 7032 }, { "epoch": 2.2094744488289457, "grad_norm": 0.9453125, "learning_rate": 1.1273881307521424e-05, "loss": 1.0623, "step": 7034 }, { "epoch": 2.210102675854487, "grad_norm": 0.85546875, "learning_rate": 1.1271342430974295e-05, "loss": 1.1495, "step": 7036 }, { "epoch": 2.2107309028800284, "grad_norm": 0.87890625, "learning_rate": 1.1268803554427168e-05, "loss": 1.1898, "step": 7038 }, { "epoch": 2.2113591299055697, "grad_norm": 0.97265625, "learning_rate": 1.1266264677880038e-05, "loss": 1.1346, "step": 7040 }, { "epoch": 2.211987356931111, "grad_norm": 0.8125, "learning_rate": 1.1263725801332913e-05, "loss": 1.0782, "step": 7042 }, { "epoch": 2.2126155839566524, "grad_norm": 0.953125, "learning_rate": 1.1261186924785783e-05, "loss": 1.0988, "step": 7044 }, { "epoch": 2.2132438109821937, "grad_norm": 0.8671875, "learning_rate": 1.1258648048238656e-05, "loss": 1.2726, "step": 7046 }, { "epoch": 2.213872038007735, "grad_norm": 0.89453125, "learning_rate": 1.1256109171691527e-05, "loss": 1.3077, "step": 7048 }, { "epoch": 2.2145002650332763, "grad_norm": 0.890625, "learning_rate": 1.12535702951444e-05, "loss": 1.1586, "step": 7050 }, { "epoch": 2.2151284920588177, "grad_norm": 0.87109375, "learning_rate": 1.1251031418597271e-05, "loss": 1.1196, "step": 7052 }, { "epoch": 2.215756719084359, "grad_norm": 0.87109375, "learning_rate": 1.1248492542050145e-05, "loss": 1.3712, "step": 7054 }, { "epoch": 2.2163849461099003, "grad_norm": 0.8359375, "learning_rate": 1.1245953665503016e-05, "loss": 1.1446, "step": 7056 }, { "epoch": 2.2170131731354417, "grad_norm": 0.8515625, "learning_rate": 1.1243414788955889e-05, "loss": 1.2527, "step": 7058 }, { "epoch": 2.217641400160983, "grad_norm": 0.921875, "learning_rate": 1.124087591240876e-05, "loss": 1.1417, "step": 7060 }, { "epoch": 2.2182696271865243, "grad_norm": 1.734375, "learning_rate": 1.1238337035861633e-05, "loss": 1.1248, "step": 7062 }, { "epoch": 2.218897854212066, "grad_norm": 0.9296875, "learning_rate": 1.1235798159314503e-05, "loss": 1.1408, "step": 7064 }, { "epoch": 2.2195260812376074, "grad_norm": 0.890625, "learning_rate": 1.1233259282767376e-05, "loss": 1.1888, "step": 7066 }, { "epoch": 2.2201543082631487, "grad_norm": 0.875, "learning_rate": 1.1230720406220248e-05, "loss": 1.3227, "step": 7068 }, { "epoch": 2.22078253528869, "grad_norm": 0.91796875, "learning_rate": 1.122818152967312e-05, "loss": 1.2715, "step": 7070 }, { "epoch": 2.2214107623142314, "grad_norm": 0.8125, "learning_rate": 1.1225642653125992e-05, "loss": 1.1767, "step": 7072 }, { "epoch": 2.2220389893397727, "grad_norm": 0.86328125, "learning_rate": 1.1223103776578865e-05, "loss": 1.2829, "step": 7074 }, { "epoch": 2.222667216365314, "grad_norm": 0.95703125, "learning_rate": 1.1220564900031737e-05, "loss": 1.1629, "step": 7076 }, { "epoch": 2.2232954433908554, "grad_norm": 0.9296875, "learning_rate": 1.121802602348461e-05, "loss": 1.252, "step": 7078 }, { "epoch": 2.2239236704163967, "grad_norm": 0.79296875, "learning_rate": 1.1215487146937481e-05, "loss": 1.0698, "step": 7080 }, { "epoch": 2.224551897441938, "grad_norm": 0.88671875, "learning_rate": 1.1212948270390354e-05, "loss": 1.136, "step": 7082 }, { "epoch": 2.2251801244674794, "grad_norm": 0.98046875, "learning_rate": 1.1210409393843224e-05, "loss": 1.1405, "step": 7084 }, { "epoch": 2.2258083514930207, "grad_norm": 0.86328125, "learning_rate": 1.1207870517296099e-05, "loss": 1.2, "step": 7086 }, { "epoch": 2.226436578518562, "grad_norm": 0.89453125, "learning_rate": 1.1205331640748968e-05, "loss": 1.1154, "step": 7088 }, { "epoch": 2.2270648055441034, "grad_norm": 0.85546875, "learning_rate": 1.1202792764201841e-05, "loss": 1.1577, "step": 7090 }, { "epoch": 2.2276930325696447, "grad_norm": 0.85546875, "learning_rate": 1.1200253887654713e-05, "loss": 1.1263, "step": 7092 }, { "epoch": 2.228321259595186, "grad_norm": 0.8515625, "learning_rate": 1.1197715011107586e-05, "loss": 1.0956, "step": 7094 }, { "epoch": 2.228949486620728, "grad_norm": 0.91015625, "learning_rate": 1.1195176134560457e-05, "loss": 1.0752, "step": 7096 }, { "epoch": 2.229577713646269, "grad_norm": 0.83203125, "learning_rate": 1.119263725801333e-05, "loss": 1.08, "step": 7098 }, { "epoch": 2.2302059406718104, "grad_norm": 0.7734375, "learning_rate": 1.1190098381466202e-05, "loss": 1.206, "step": 7100 }, { "epoch": 2.2308341676973518, "grad_norm": 0.875, "learning_rate": 1.1187559504919075e-05, "loss": 1.1989, "step": 7102 }, { "epoch": 2.231462394722893, "grad_norm": 1.0, "learning_rate": 1.1185020628371946e-05, "loss": 1.2279, "step": 7104 }, { "epoch": 2.2320906217484344, "grad_norm": 0.8671875, "learning_rate": 1.1182481751824819e-05, "loss": 1.2506, "step": 7106 }, { "epoch": 2.2327188487739758, "grad_norm": 0.92578125, "learning_rate": 1.1179942875277689e-05, "loss": 1.1432, "step": 7108 }, { "epoch": 2.233347075799517, "grad_norm": 0.85546875, "learning_rate": 1.1177403998730562e-05, "loss": 1.2258, "step": 7110 }, { "epoch": 2.2339753028250584, "grad_norm": 0.88671875, "learning_rate": 1.1174865122183433e-05, "loss": 1.1279, "step": 7112 }, { "epoch": 2.2346035298505997, "grad_norm": 0.87890625, "learning_rate": 1.1172326245636306e-05, "loss": 1.2938, "step": 7114 }, { "epoch": 2.235231756876141, "grad_norm": 0.84765625, "learning_rate": 1.116978736908918e-05, "loss": 1.1135, "step": 7116 }, { "epoch": 2.2358599839016824, "grad_norm": 0.8359375, "learning_rate": 1.116724849254205e-05, "loss": 1.1685, "step": 7118 }, { "epoch": 2.2364882109272237, "grad_norm": 0.85546875, "learning_rate": 1.1164709615994924e-05, "loss": 1.1768, "step": 7120 }, { "epoch": 2.237116437952765, "grad_norm": 0.921875, "learning_rate": 1.1162170739447795e-05, "loss": 1.118, "step": 7122 }, { "epoch": 2.2377446649783064, "grad_norm": 0.95703125, "learning_rate": 1.1159631862900668e-05, "loss": 1.1579, "step": 7124 }, { "epoch": 2.2383728920038477, "grad_norm": 0.8203125, "learning_rate": 1.115709298635354e-05, "loss": 1.2208, "step": 7126 }, { "epoch": 2.239001119029389, "grad_norm": 0.84765625, "learning_rate": 1.1154554109806413e-05, "loss": 1.2217, "step": 7128 }, { "epoch": 2.239629346054931, "grad_norm": 0.8515625, "learning_rate": 1.1152015233259284e-05, "loss": 1.0771, "step": 7130 }, { "epoch": 2.240257573080472, "grad_norm": 0.84765625, "learning_rate": 1.1149476356712157e-05, "loss": 1.366, "step": 7132 }, { "epoch": 2.2408858001060135, "grad_norm": 0.83984375, "learning_rate": 1.1146937480165027e-05, "loss": 1.118, "step": 7134 }, { "epoch": 2.241514027131555, "grad_norm": 0.8984375, "learning_rate": 1.11443986036179e-05, "loss": 1.1676, "step": 7136 }, { "epoch": 2.242142254157096, "grad_norm": 0.8671875, "learning_rate": 1.1141859727070771e-05, "loss": 1.2008, "step": 7138 }, { "epoch": 2.2427704811826374, "grad_norm": 0.875, "learning_rate": 1.1139320850523645e-05, "loss": 1.1776, "step": 7140 }, { "epoch": 2.2433987082081788, "grad_norm": 0.8515625, "learning_rate": 1.1136781973976516e-05, "loss": 1.2754, "step": 7142 }, { "epoch": 2.24402693523372, "grad_norm": 0.80859375, "learning_rate": 1.1134243097429389e-05, "loss": 1.2551, "step": 7144 }, { "epoch": 2.2446551622592614, "grad_norm": 0.8828125, "learning_rate": 1.113170422088226e-05, "loss": 1.1994, "step": 7146 }, { "epoch": 2.2452833892848028, "grad_norm": 0.859375, "learning_rate": 1.1129165344335133e-05, "loss": 1.2192, "step": 7148 }, { "epoch": 2.245911616310344, "grad_norm": 0.9296875, "learning_rate": 1.1126626467788005e-05, "loss": 1.2826, "step": 7150 }, { "epoch": 2.2465398433358854, "grad_norm": 0.90625, "learning_rate": 1.1124087591240878e-05, "loss": 1.2019, "step": 7152 }, { "epoch": 2.2471680703614267, "grad_norm": 0.81640625, "learning_rate": 1.112154871469375e-05, "loss": 1.1506, "step": 7154 }, { "epoch": 2.247796297386968, "grad_norm": 0.9453125, "learning_rate": 1.1119009838146622e-05, "loss": 1.0458, "step": 7156 }, { "epoch": 2.2484245244125094, "grad_norm": 0.8828125, "learning_rate": 1.1116470961599492e-05, "loss": 1.1759, "step": 7158 }, { "epoch": 2.2490527514380507, "grad_norm": 0.95703125, "learning_rate": 1.1113932085052365e-05, "loss": 1.1225, "step": 7160 }, { "epoch": 2.2496809784635925, "grad_norm": 0.87109375, "learning_rate": 1.1111393208505236e-05, "loss": 1.1735, "step": 7162 }, { "epoch": 2.250309205489134, "grad_norm": 0.9140625, "learning_rate": 1.110885433195811e-05, "loss": 1.0964, "step": 7164 }, { "epoch": 2.250937432514675, "grad_norm": 0.94140625, "learning_rate": 1.1106315455410981e-05, "loss": 1.1575, "step": 7166 }, { "epoch": 2.2515656595402165, "grad_norm": 0.828125, "learning_rate": 1.1103776578863854e-05, "loss": 1.308, "step": 7168 }, { "epoch": 2.252193886565758, "grad_norm": 0.9296875, "learning_rate": 1.1101237702316725e-05, "loss": 1.2186, "step": 7170 }, { "epoch": 2.252822113591299, "grad_norm": 0.90625, "learning_rate": 1.1098698825769598e-05, "loss": 1.2044, "step": 7172 }, { "epoch": 2.2534503406168405, "grad_norm": 0.91796875, "learning_rate": 1.109615994922247e-05, "loss": 1.1716, "step": 7174 }, { "epoch": 2.254078567642382, "grad_norm": 0.8984375, "learning_rate": 1.1093621072675343e-05, "loss": 1.1242, "step": 7176 }, { "epoch": 2.254706794667923, "grad_norm": 0.79296875, "learning_rate": 1.1091082196128213e-05, "loss": 1.254, "step": 7178 }, { "epoch": 2.2553350216934644, "grad_norm": 0.95703125, "learning_rate": 1.1088543319581086e-05, "loss": 1.2207, "step": 7180 }, { "epoch": 2.2559632487190058, "grad_norm": 0.88671875, "learning_rate": 1.1086004443033957e-05, "loss": 1.2314, "step": 7182 }, { "epoch": 2.256591475744547, "grad_norm": 0.8671875, "learning_rate": 1.108346556648683e-05, "loss": 1.1507, "step": 7184 }, { "epoch": 2.2572197027700884, "grad_norm": 0.85546875, "learning_rate": 1.1080926689939702e-05, "loss": 1.0976, "step": 7186 }, { "epoch": 2.2578479297956298, "grad_norm": 0.87109375, "learning_rate": 1.1078387813392575e-05, "loss": 1.2232, "step": 7188 }, { "epoch": 2.258476156821171, "grad_norm": 0.8515625, "learning_rate": 1.1075848936845446e-05, "loss": 1.3585, "step": 7190 }, { "epoch": 2.2591043838467124, "grad_norm": 0.93359375, "learning_rate": 1.1073310060298319e-05, "loss": 1.2097, "step": 7192 }, { "epoch": 2.2597326108722537, "grad_norm": 0.81640625, "learning_rate": 1.107077118375119e-05, "loss": 1.1563, "step": 7194 }, { "epoch": 2.260360837897795, "grad_norm": 0.94140625, "learning_rate": 1.1068232307204064e-05, "loss": 1.1191, "step": 7196 }, { "epoch": 2.260989064923337, "grad_norm": 0.98046875, "learning_rate": 1.1065693430656935e-05, "loss": 1.1386, "step": 7198 }, { "epoch": 2.261617291948878, "grad_norm": 0.9765625, "learning_rate": 1.1063154554109808e-05, "loss": 1.0648, "step": 7200 }, { "epoch": 2.2622455189744195, "grad_norm": 0.97265625, "learning_rate": 1.1060615677562681e-05, "loss": 1.2043, "step": 7202 }, { "epoch": 2.262873745999961, "grad_norm": 0.8828125, "learning_rate": 1.105807680101555e-05, "loss": 1.1253, "step": 7204 }, { "epoch": 2.263501973025502, "grad_norm": 0.83203125, "learning_rate": 1.1055537924468424e-05, "loss": 1.2436, "step": 7206 }, { "epoch": 2.2641302000510435, "grad_norm": 1.0234375, "learning_rate": 1.1052999047921295e-05, "loss": 1.1287, "step": 7208 }, { "epoch": 2.264758427076585, "grad_norm": 0.875, "learning_rate": 1.1050460171374168e-05, "loss": 1.1132, "step": 7210 }, { "epoch": 2.265386654102126, "grad_norm": 0.8515625, "learning_rate": 1.104792129482704e-05, "loss": 1.1685, "step": 7212 }, { "epoch": 2.2660148811276675, "grad_norm": 1.0546875, "learning_rate": 1.1045382418279913e-05, "loss": 1.1164, "step": 7214 }, { "epoch": 2.266643108153209, "grad_norm": 0.8515625, "learning_rate": 1.1042843541732784e-05, "loss": 1.259, "step": 7216 }, { "epoch": 2.26727133517875, "grad_norm": 0.78125, "learning_rate": 1.1040304665185657e-05, "loss": 1.2067, "step": 7218 }, { "epoch": 2.2678995622042915, "grad_norm": 0.9375, "learning_rate": 1.1037765788638529e-05, "loss": 1.2234, "step": 7220 }, { "epoch": 2.268527789229833, "grad_norm": 0.8203125, "learning_rate": 1.1035226912091402e-05, "loss": 1.2389, "step": 7222 }, { "epoch": 2.269156016255374, "grad_norm": 0.81640625, "learning_rate": 1.1032688035544273e-05, "loss": 1.307, "step": 7224 }, { "epoch": 2.269784243280916, "grad_norm": 0.91015625, "learning_rate": 1.1030149158997146e-05, "loss": 1.1486, "step": 7226 }, { "epoch": 2.270412470306457, "grad_norm": 0.90234375, "learning_rate": 1.1027610282450016e-05, "loss": 1.2867, "step": 7228 }, { "epoch": 2.2710406973319985, "grad_norm": 1.0390625, "learning_rate": 1.1025071405902889e-05, "loss": 1.1398, "step": 7230 }, { "epoch": 2.27166892435754, "grad_norm": 0.875, "learning_rate": 1.102253252935576e-05, "loss": 1.1913, "step": 7232 }, { "epoch": 2.272297151383081, "grad_norm": 0.828125, "learning_rate": 1.1019993652808633e-05, "loss": 1.2196, "step": 7234 }, { "epoch": 2.2729253784086225, "grad_norm": 0.87109375, "learning_rate": 1.1017454776261505e-05, "loss": 1.209, "step": 7236 }, { "epoch": 2.273553605434164, "grad_norm": 0.84765625, "learning_rate": 1.1014915899714378e-05, "loss": 1.0517, "step": 7238 }, { "epoch": 2.274181832459705, "grad_norm": 0.84765625, "learning_rate": 1.101237702316725e-05, "loss": 1.1559, "step": 7240 }, { "epoch": 2.2748100594852465, "grad_norm": 0.8828125, "learning_rate": 1.1009838146620122e-05, "loss": 1.2096, "step": 7242 }, { "epoch": 2.275438286510788, "grad_norm": 0.82421875, "learning_rate": 1.1007299270072994e-05, "loss": 1.0206, "step": 7244 }, { "epoch": 2.276066513536329, "grad_norm": 0.8671875, "learning_rate": 1.1004760393525867e-05, "loss": 1.2358, "step": 7246 }, { "epoch": 2.2766947405618705, "grad_norm": 0.87109375, "learning_rate": 1.1002221516978736e-05, "loss": 1.1215, "step": 7248 }, { "epoch": 2.277322967587412, "grad_norm": 0.8359375, "learning_rate": 1.0999682640431611e-05, "loss": 1.313, "step": 7250 }, { "epoch": 2.277951194612953, "grad_norm": 0.87890625, "learning_rate": 1.0997143763884481e-05, "loss": 1.1596, "step": 7252 }, { "epoch": 2.2785794216384945, "grad_norm": 0.8515625, "learning_rate": 1.0994604887337354e-05, "loss": 1.1222, "step": 7254 }, { "epoch": 2.279207648664036, "grad_norm": 0.8984375, "learning_rate": 1.0992066010790225e-05, "loss": 1.116, "step": 7256 }, { "epoch": 2.279835875689577, "grad_norm": 0.86328125, "learning_rate": 1.0989527134243098e-05, "loss": 1.1827, "step": 7258 }, { "epoch": 2.2804641027151185, "grad_norm": 0.91796875, "learning_rate": 1.098698825769597e-05, "loss": 1.1286, "step": 7260 }, { "epoch": 2.28109232974066, "grad_norm": 0.83203125, "learning_rate": 1.0984449381148843e-05, "loss": 0.9997, "step": 7262 }, { "epoch": 2.2817205567662016, "grad_norm": 0.875, "learning_rate": 1.0981910504601714e-05, "loss": 1.1823, "step": 7264 }, { "epoch": 2.282348783791743, "grad_norm": 0.8203125, "learning_rate": 1.0979371628054587e-05, "loss": 1.2145, "step": 7266 }, { "epoch": 2.282977010817284, "grad_norm": 0.89453125, "learning_rate": 1.0976832751507459e-05, "loss": 1.1597, "step": 7268 }, { "epoch": 2.2836052378428255, "grad_norm": 0.8359375, "learning_rate": 1.0974293874960332e-05, "loss": 1.1206, "step": 7270 }, { "epoch": 2.284233464868367, "grad_norm": 0.8984375, "learning_rate": 1.0971754998413201e-05, "loss": 1.1522, "step": 7272 }, { "epoch": 2.284861691893908, "grad_norm": 0.8515625, "learning_rate": 1.0969216121866075e-05, "loss": 1.2065, "step": 7274 }, { "epoch": 2.2854899189194495, "grad_norm": 0.9140625, "learning_rate": 1.0966677245318946e-05, "loss": 1.2534, "step": 7276 }, { "epoch": 2.286118145944991, "grad_norm": 0.84375, "learning_rate": 1.0964138368771819e-05, "loss": 1.3111, "step": 7278 }, { "epoch": 2.286746372970532, "grad_norm": 0.83203125, "learning_rate": 1.096159949222469e-05, "loss": 1.1712, "step": 7280 }, { "epoch": 2.2873745999960735, "grad_norm": 0.81640625, "learning_rate": 1.0959060615677563e-05, "loss": 1.2978, "step": 7282 }, { "epoch": 2.288002827021615, "grad_norm": 1.0078125, "learning_rate": 1.0956521739130435e-05, "loss": 1.1384, "step": 7284 }, { "epoch": 2.288631054047156, "grad_norm": 0.91796875, "learning_rate": 1.0953982862583308e-05, "loss": 1.1677, "step": 7286 }, { "epoch": 2.2892592810726975, "grad_norm": 0.84765625, "learning_rate": 1.0951443986036181e-05, "loss": 1.2128, "step": 7288 }, { "epoch": 2.289887508098239, "grad_norm": 1.0703125, "learning_rate": 1.0948905109489052e-05, "loss": 1.274, "step": 7290 }, { "epoch": 2.2905157351237806, "grad_norm": 0.890625, "learning_rate": 1.0946366232941925e-05, "loss": 1.1172, "step": 7292 }, { "epoch": 2.291143962149322, "grad_norm": 0.8671875, "learning_rate": 1.0943827356394797e-05, "loss": 1.2049, "step": 7294 }, { "epoch": 2.2917721891748633, "grad_norm": 0.859375, "learning_rate": 1.094128847984767e-05, "loss": 1.1945, "step": 7296 }, { "epoch": 2.2924004162004046, "grad_norm": 0.8671875, "learning_rate": 1.093874960330054e-05, "loss": 1.0366, "step": 7298 }, { "epoch": 2.293028643225946, "grad_norm": 0.84765625, "learning_rate": 1.0936210726753413e-05, "loss": 1.1452, "step": 7300 }, { "epoch": 2.2936568702514872, "grad_norm": 0.87109375, "learning_rate": 1.0933671850206284e-05, "loss": 1.2821, "step": 7302 }, { "epoch": 2.2942850972770286, "grad_norm": 0.88671875, "learning_rate": 1.0931132973659157e-05, "loss": 1.0961, "step": 7304 }, { "epoch": 2.29491332430257, "grad_norm": 0.78515625, "learning_rate": 1.0928594097112029e-05, "loss": 1.2104, "step": 7306 }, { "epoch": 2.295541551328111, "grad_norm": 0.8515625, "learning_rate": 1.0926055220564902e-05, "loss": 1.29, "step": 7308 }, { "epoch": 2.2961697783536525, "grad_norm": 0.8515625, "learning_rate": 1.0923516344017773e-05, "loss": 1.2525, "step": 7310 }, { "epoch": 2.296798005379194, "grad_norm": 0.8828125, "learning_rate": 1.0920977467470646e-05, "loss": 1.2847, "step": 7312 }, { "epoch": 2.297426232404735, "grad_norm": 0.90234375, "learning_rate": 1.0918438590923517e-05, "loss": 1.1948, "step": 7314 }, { "epoch": 2.2980544594302765, "grad_norm": 0.91796875, "learning_rate": 1.091589971437639e-05, "loss": 1.1595, "step": 7316 }, { "epoch": 2.298682686455818, "grad_norm": 0.84375, "learning_rate": 1.091336083782926e-05, "loss": 1.2342, "step": 7318 }, { "epoch": 2.299310913481359, "grad_norm": 0.83203125, "learning_rate": 1.0910821961282135e-05, "loss": 1.1913, "step": 7320 }, { "epoch": 2.2999391405069005, "grad_norm": 0.97265625, "learning_rate": 1.0908283084735005e-05, "loss": 1.1333, "step": 7322 }, { "epoch": 2.300567367532442, "grad_norm": 0.875, "learning_rate": 1.0905744208187878e-05, "loss": 1.1376, "step": 7324 }, { "epoch": 2.301195594557983, "grad_norm": 0.88671875, "learning_rate": 1.0903205331640749e-05, "loss": 1.1524, "step": 7326 }, { "epoch": 2.3018238215835245, "grad_norm": 0.8515625, "learning_rate": 1.0900666455093622e-05, "loss": 1.1174, "step": 7328 }, { "epoch": 2.3024520486090663, "grad_norm": 0.82421875, "learning_rate": 1.0898127578546494e-05, "loss": 1.1047, "step": 7330 }, { "epoch": 2.3030802756346076, "grad_norm": 0.8203125, "learning_rate": 1.0895588701999367e-05, "loss": 1.1898, "step": 7332 }, { "epoch": 2.303708502660149, "grad_norm": 0.8125, "learning_rate": 1.0893049825452238e-05, "loss": 1.3392, "step": 7334 }, { "epoch": 2.3043367296856903, "grad_norm": 0.8671875, "learning_rate": 1.0890510948905111e-05, "loss": 1.2683, "step": 7336 }, { "epoch": 2.3049649567112316, "grad_norm": 0.7890625, "learning_rate": 1.0887972072357982e-05, "loss": 1.0879, "step": 7338 }, { "epoch": 2.305593183736773, "grad_norm": 0.92578125, "learning_rate": 1.0885433195810856e-05, "loss": 1.095, "step": 7340 }, { "epoch": 2.3062214107623142, "grad_norm": 0.83984375, "learning_rate": 1.0882894319263725e-05, "loss": 1.3192, "step": 7342 }, { "epoch": 2.3068496377878556, "grad_norm": 0.8828125, "learning_rate": 1.0880355442716598e-05, "loss": 1.3008, "step": 7344 }, { "epoch": 2.307477864813397, "grad_norm": 0.8203125, "learning_rate": 1.087781656616947e-05, "loss": 1.1895, "step": 7346 }, { "epoch": 2.3081060918389382, "grad_norm": 0.87109375, "learning_rate": 1.0875277689622343e-05, "loss": 1.1833, "step": 7348 }, { "epoch": 2.3087343188644796, "grad_norm": 0.8515625, "learning_rate": 1.0872738813075214e-05, "loss": 1.1241, "step": 7350 }, { "epoch": 2.309362545890021, "grad_norm": 0.921875, "learning_rate": 1.0870199936528087e-05, "loss": 1.1805, "step": 7352 }, { "epoch": 2.309990772915562, "grad_norm": 0.87109375, "learning_rate": 1.0867661059980959e-05, "loss": 1.0791, "step": 7354 }, { "epoch": 2.3106189999411035, "grad_norm": 0.8828125, "learning_rate": 1.0865122183433832e-05, "loss": 1.1263, "step": 7356 }, { "epoch": 2.3112472269666453, "grad_norm": 0.828125, "learning_rate": 1.0862583306886703e-05, "loss": 1.1829, "step": 7358 }, { "epoch": 2.3118754539921866, "grad_norm": 1.0078125, "learning_rate": 1.0860044430339576e-05, "loss": 1.2385, "step": 7360 }, { "epoch": 2.312503681017728, "grad_norm": 0.83203125, "learning_rate": 1.0857505553792448e-05, "loss": 1.1448, "step": 7362 }, { "epoch": 2.3131319080432693, "grad_norm": 1.140625, "learning_rate": 1.085496667724532e-05, "loss": 1.2102, "step": 7364 }, { "epoch": 2.3137601350688106, "grad_norm": 0.83203125, "learning_rate": 1.085242780069819e-05, "loss": 1.2639, "step": 7366 }, { "epoch": 2.314388362094352, "grad_norm": 0.8515625, "learning_rate": 1.0849888924151063e-05, "loss": 1.1321, "step": 7368 }, { "epoch": 2.3150165891198933, "grad_norm": 0.89453125, "learning_rate": 1.0847350047603935e-05, "loss": 1.0745, "step": 7370 }, { "epoch": 2.3156448161454346, "grad_norm": 0.95703125, "learning_rate": 1.0844811171056808e-05, "loss": 1.1193, "step": 7372 }, { "epoch": 2.316273043170976, "grad_norm": 0.9140625, "learning_rate": 1.0842272294509681e-05, "loss": 1.303, "step": 7374 }, { "epoch": 2.3169012701965173, "grad_norm": 0.8359375, "learning_rate": 1.0839733417962552e-05, "loss": 1.1937, "step": 7376 }, { "epoch": 2.3175294972220586, "grad_norm": 0.8359375, "learning_rate": 1.0837194541415425e-05, "loss": 1.229, "step": 7378 }, { "epoch": 2.3181577242476, "grad_norm": 1.0234375, "learning_rate": 1.0834655664868297e-05, "loss": 1.0703, "step": 7380 }, { "epoch": 2.3187859512731412, "grad_norm": 0.7578125, "learning_rate": 1.083211678832117e-05, "loss": 1.3122, "step": 7382 }, { "epoch": 2.3194141782986826, "grad_norm": 0.875, "learning_rate": 1.0829577911774041e-05, "loss": 1.208, "step": 7384 }, { "epoch": 2.320042405324224, "grad_norm": 0.8125, "learning_rate": 1.0827039035226914e-05, "loss": 1.1552, "step": 7386 }, { "epoch": 2.3206706323497652, "grad_norm": 0.96875, "learning_rate": 1.0824500158679786e-05, "loss": 1.1077, "step": 7388 }, { "epoch": 2.3212988593753066, "grad_norm": 0.81640625, "learning_rate": 1.0821961282132659e-05, "loss": 1.2388, "step": 7390 }, { "epoch": 2.321927086400848, "grad_norm": 0.91015625, "learning_rate": 1.0819422405585528e-05, "loss": 1.2115, "step": 7392 }, { "epoch": 2.322555313426389, "grad_norm": 0.87890625, "learning_rate": 1.0816883529038402e-05, "loss": 1.1479, "step": 7394 }, { "epoch": 2.323183540451931, "grad_norm": 0.8203125, "learning_rate": 1.0814344652491273e-05, "loss": 1.2548, "step": 7396 }, { "epoch": 2.3238117674774723, "grad_norm": 0.90234375, "learning_rate": 1.0811805775944146e-05, "loss": 1.1445, "step": 7398 }, { "epoch": 2.3244399945030136, "grad_norm": 0.84765625, "learning_rate": 1.0809266899397017e-05, "loss": 1.1688, "step": 7400 }, { "epoch": 2.325068221528555, "grad_norm": 0.8203125, "learning_rate": 1.080672802284989e-05, "loss": 1.2903, "step": 7402 }, { "epoch": 2.3256964485540963, "grad_norm": 0.828125, "learning_rate": 1.0804189146302762e-05, "loss": 1.2819, "step": 7404 }, { "epoch": 2.3263246755796376, "grad_norm": 0.87890625, "learning_rate": 1.0801650269755635e-05, "loss": 1.1805, "step": 7406 }, { "epoch": 2.326952902605179, "grad_norm": 0.921875, "learning_rate": 1.0799111393208506e-05, "loss": 0.9431, "step": 7408 }, { "epoch": 2.3275811296307203, "grad_norm": 0.828125, "learning_rate": 1.079657251666138e-05, "loss": 1.0793, "step": 7410 }, { "epoch": 2.3282093566562616, "grad_norm": 1.0078125, "learning_rate": 1.0794033640114249e-05, "loss": 1.1367, "step": 7412 }, { "epoch": 2.328837583681803, "grad_norm": 0.84765625, "learning_rate": 1.0791494763567124e-05, "loss": 1.1915, "step": 7414 }, { "epoch": 2.3294658107073443, "grad_norm": 0.80859375, "learning_rate": 1.0788955887019993e-05, "loss": 1.1777, "step": 7416 }, { "epoch": 2.3300940377328856, "grad_norm": 0.86328125, "learning_rate": 1.0786417010472867e-05, "loss": 1.1717, "step": 7418 }, { "epoch": 2.330722264758427, "grad_norm": 0.83984375, "learning_rate": 1.0783878133925738e-05, "loss": 1.229, "step": 7420 }, { "epoch": 2.3313504917839682, "grad_norm": 0.86328125, "learning_rate": 1.0781339257378611e-05, "loss": 1.1072, "step": 7422 }, { "epoch": 2.33197871880951, "grad_norm": 0.90234375, "learning_rate": 1.0778800380831482e-05, "loss": 1.2157, "step": 7424 }, { "epoch": 2.3326069458350513, "grad_norm": 0.828125, "learning_rate": 1.0776261504284355e-05, "loss": 1.1263, "step": 7426 }, { "epoch": 2.3332351728605927, "grad_norm": 0.83984375, "learning_rate": 1.0773722627737227e-05, "loss": 1.2423, "step": 7428 }, { "epoch": 2.333863399886134, "grad_norm": 0.89453125, "learning_rate": 1.07711837511901e-05, "loss": 1.2594, "step": 7430 }, { "epoch": 2.3344916269116753, "grad_norm": 0.8671875, "learning_rate": 1.0768644874642971e-05, "loss": 1.2169, "step": 7432 }, { "epoch": 2.3351198539372167, "grad_norm": 0.85546875, "learning_rate": 1.0766105998095844e-05, "loss": 1.2165, "step": 7434 }, { "epoch": 2.335748080962758, "grad_norm": 0.84765625, "learning_rate": 1.0763567121548714e-05, "loss": 1.1919, "step": 7436 }, { "epoch": 2.3363763079882993, "grad_norm": 0.91796875, "learning_rate": 1.0761028245001587e-05, "loss": 1.1077, "step": 7438 }, { "epoch": 2.3370045350138406, "grad_norm": 0.84375, "learning_rate": 1.0758489368454459e-05, "loss": 1.1936, "step": 7440 }, { "epoch": 2.337632762039382, "grad_norm": 0.8671875, "learning_rate": 1.0755950491907332e-05, "loss": 1.088, "step": 7442 }, { "epoch": 2.3382609890649233, "grad_norm": 0.92578125, "learning_rate": 1.0753411615360203e-05, "loss": 1.2247, "step": 7444 }, { "epoch": 2.3388892160904646, "grad_norm": 0.90234375, "learning_rate": 1.0750872738813076e-05, "loss": 1.1426, "step": 7446 }, { "epoch": 2.339517443116006, "grad_norm": 0.94921875, "learning_rate": 1.0748333862265947e-05, "loss": 1.1249, "step": 7448 }, { "epoch": 2.3401456701415473, "grad_norm": 0.87109375, "learning_rate": 1.074579498571882e-05, "loss": 0.98, "step": 7450 }, { "epoch": 2.3407738971670886, "grad_norm": 0.96875, "learning_rate": 1.0743256109171692e-05, "loss": 1.1628, "step": 7452 }, { "epoch": 2.34140212419263, "grad_norm": 0.91796875, "learning_rate": 1.0740717232624565e-05, "loss": 1.067, "step": 7454 }, { "epoch": 2.3420303512181713, "grad_norm": 0.890625, "learning_rate": 1.0738178356077435e-05, "loss": 1.1918, "step": 7456 }, { "epoch": 2.3426585782437126, "grad_norm": 0.859375, "learning_rate": 1.073563947953031e-05, "loss": 1.2056, "step": 7458 }, { "epoch": 2.3432868052692544, "grad_norm": 0.859375, "learning_rate": 1.0733100602983183e-05, "loss": 1.1389, "step": 7460 }, { "epoch": 2.3439150322947957, "grad_norm": 0.8125, "learning_rate": 1.0730561726436052e-05, "loss": 1.1479, "step": 7462 }, { "epoch": 2.344543259320337, "grad_norm": 0.97265625, "learning_rate": 1.0728022849888925e-05, "loss": 1.119, "step": 7464 }, { "epoch": 2.3451714863458784, "grad_norm": 0.9296875, "learning_rate": 1.0725483973341797e-05, "loss": 1.0562, "step": 7466 }, { "epoch": 2.3457997133714197, "grad_norm": 0.91015625, "learning_rate": 1.072294509679467e-05, "loss": 1.097, "step": 7468 }, { "epoch": 2.346427940396961, "grad_norm": 0.88671875, "learning_rate": 1.0720406220247541e-05, "loss": 1.2363, "step": 7470 }, { "epoch": 2.3470561674225023, "grad_norm": 0.8125, "learning_rate": 1.0717867343700414e-05, "loss": 1.168, "step": 7472 }, { "epoch": 2.3476843944480437, "grad_norm": 0.9140625, "learning_rate": 1.0715328467153286e-05, "loss": 1.0952, "step": 7474 }, { "epoch": 2.348312621473585, "grad_norm": 0.88671875, "learning_rate": 1.0712789590606159e-05, "loss": 1.2332, "step": 7476 }, { "epoch": 2.3489408484991263, "grad_norm": 0.85546875, "learning_rate": 1.071025071405903e-05, "loss": 1.1856, "step": 7478 }, { "epoch": 2.3495690755246676, "grad_norm": 0.859375, "learning_rate": 1.0707711837511903e-05, "loss": 1.2385, "step": 7480 }, { "epoch": 2.350197302550209, "grad_norm": 0.96875, "learning_rate": 1.0705172960964773e-05, "loss": 1.2637, "step": 7482 }, { "epoch": 2.3508255295757503, "grad_norm": 0.94140625, "learning_rate": 1.0702634084417648e-05, "loss": 1.0995, "step": 7484 }, { "epoch": 2.3514537566012916, "grad_norm": 0.953125, "learning_rate": 1.0700095207870517e-05, "loss": 1.2033, "step": 7486 }, { "epoch": 2.352081983626833, "grad_norm": 1.0546875, "learning_rate": 1.069755633132339e-05, "loss": 1.2124, "step": 7488 }, { "epoch": 2.3527102106523747, "grad_norm": 0.9765625, "learning_rate": 1.0695017454776262e-05, "loss": 1.2302, "step": 7490 }, { "epoch": 2.353338437677916, "grad_norm": 0.8984375, "learning_rate": 1.0692478578229135e-05, "loss": 1.2885, "step": 7492 }, { "epoch": 2.3539666647034574, "grad_norm": 0.796875, "learning_rate": 1.0689939701682006e-05, "loss": 1.1231, "step": 7494 }, { "epoch": 2.3545948917289987, "grad_norm": 0.796875, "learning_rate": 1.068740082513488e-05, "loss": 1.1761, "step": 7496 }, { "epoch": 2.35522311875454, "grad_norm": 0.859375, "learning_rate": 1.068486194858775e-05, "loss": 1.2641, "step": 7498 }, { "epoch": 2.3558513457800814, "grad_norm": 0.79296875, "learning_rate": 1.0682323072040624e-05, "loss": 1.1151, "step": 7500 }, { "epoch": 2.3564795728056227, "grad_norm": 0.86328125, "learning_rate": 1.0679784195493495e-05, "loss": 1.2444, "step": 7502 }, { "epoch": 2.357107799831164, "grad_norm": 0.85546875, "learning_rate": 1.0677245318946368e-05, "loss": 1.272, "step": 7504 }, { "epoch": 2.3577360268567054, "grad_norm": 0.86328125, "learning_rate": 1.0674706442399238e-05, "loss": 1.1819, "step": 7506 }, { "epoch": 2.3583642538822467, "grad_norm": 0.859375, "learning_rate": 1.0672167565852111e-05, "loss": 1.2951, "step": 7508 }, { "epoch": 2.358992480907788, "grad_norm": 0.828125, "learning_rate": 1.0669628689304982e-05, "loss": 1.1947, "step": 7510 }, { "epoch": 2.3596207079333293, "grad_norm": 0.83984375, "learning_rate": 1.0667089812757855e-05, "loss": 1.2004, "step": 7512 }, { "epoch": 2.3602489349588707, "grad_norm": 1.0703125, "learning_rate": 1.0664550936210727e-05, "loss": 1.2573, "step": 7514 }, { "epoch": 2.360877161984412, "grad_norm": 0.83203125, "learning_rate": 1.06620120596636e-05, "loss": 1.1788, "step": 7516 }, { "epoch": 2.3615053890099533, "grad_norm": 0.83984375, "learning_rate": 1.0659473183116471e-05, "loss": 1.3298, "step": 7518 }, { "epoch": 2.3621336160354947, "grad_norm": 0.85546875, "learning_rate": 1.0656934306569344e-05, "loss": 1.2565, "step": 7520 }, { "epoch": 2.362761843061036, "grad_norm": 0.87890625, "learning_rate": 1.0654395430022216e-05, "loss": 1.3338, "step": 7522 }, { "epoch": 2.3633900700865773, "grad_norm": 0.96875, "learning_rate": 1.0651856553475089e-05, "loss": 1.12, "step": 7524 }, { "epoch": 2.364018297112119, "grad_norm": 0.97265625, "learning_rate": 1.064931767692796e-05, "loss": 0.9746, "step": 7526 }, { "epoch": 2.3646465241376604, "grad_norm": 0.859375, "learning_rate": 1.0646778800380833e-05, "loss": 1.2273, "step": 7528 }, { "epoch": 2.3652747511632017, "grad_norm": 0.84765625, "learning_rate": 1.0644239923833703e-05, "loss": 1.2064, "step": 7530 }, { "epoch": 2.365902978188743, "grad_norm": 0.9296875, "learning_rate": 1.0641701047286576e-05, "loss": 1.118, "step": 7532 }, { "epoch": 2.3665312052142844, "grad_norm": 0.99609375, "learning_rate": 1.0639162170739447e-05, "loss": 1.1464, "step": 7534 }, { "epoch": 2.3671594322398257, "grad_norm": 0.8984375, "learning_rate": 1.063662329419232e-05, "loss": 1.2686, "step": 7536 }, { "epoch": 2.367787659265367, "grad_norm": 0.8984375, "learning_rate": 1.0634084417645192e-05, "loss": 1.0966, "step": 7538 }, { "epoch": 2.3684158862909084, "grad_norm": 0.890625, "learning_rate": 1.0631545541098065e-05, "loss": 1.0444, "step": 7540 }, { "epoch": 2.3690441133164497, "grad_norm": 1.015625, "learning_rate": 1.0629006664550936e-05, "loss": 1.1434, "step": 7542 }, { "epoch": 2.369672340341991, "grad_norm": 0.875, "learning_rate": 1.062646778800381e-05, "loss": 1.2808, "step": 7544 }, { "epoch": 2.3703005673675324, "grad_norm": 0.953125, "learning_rate": 1.0623928911456682e-05, "loss": 1.207, "step": 7546 }, { "epoch": 2.3709287943930737, "grad_norm": 0.9296875, "learning_rate": 1.0621390034909554e-05, "loss": 1.2094, "step": 7548 }, { "epoch": 2.371557021418615, "grad_norm": 0.89453125, "learning_rate": 1.0618851158362427e-05, "loss": 1.1322, "step": 7550 }, { "epoch": 2.3721852484441563, "grad_norm": 0.81640625, "learning_rate": 1.0616312281815298e-05, "loss": 1.1648, "step": 7552 }, { "epoch": 2.3728134754696977, "grad_norm": 0.828125, "learning_rate": 1.0613773405268171e-05, "loss": 1.1761, "step": 7554 }, { "epoch": 2.3734417024952394, "grad_norm": 0.83203125, "learning_rate": 1.0611234528721041e-05, "loss": 1.1397, "step": 7556 }, { "epoch": 2.3740699295207808, "grad_norm": 0.87109375, "learning_rate": 1.0608695652173914e-05, "loss": 1.0505, "step": 7558 }, { "epoch": 2.374698156546322, "grad_norm": 0.859375, "learning_rate": 1.0606156775626786e-05, "loss": 1.3071, "step": 7560 }, { "epoch": 2.3753263835718634, "grad_norm": 0.76953125, "learning_rate": 1.0603617899079659e-05, "loss": 1.1697, "step": 7562 }, { "epoch": 2.3759546105974048, "grad_norm": 0.875, "learning_rate": 1.060107902253253e-05, "loss": 1.3307, "step": 7564 }, { "epoch": 2.376582837622946, "grad_norm": 0.96484375, "learning_rate": 1.0598540145985403e-05, "loss": 1.1086, "step": 7566 }, { "epoch": 2.3772110646484874, "grad_norm": 0.875, "learning_rate": 1.0596001269438274e-05, "loss": 1.3303, "step": 7568 }, { "epoch": 2.3778392916740287, "grad_norm": 0.8828125, "learning_rate": 1.0593462392891148e-05, "loss": 1.176, "step": 7570 }, { "epoch": 2.37846751869957, "grad_norm": 0.81640625, "learning_rate": 1.0590923516344019e-05, "loss": 1.3351, "step": 7572 }, { "epoch": 2.3790957457251114, "grad_norm": 0.84375, "learning_rate": 1.0588384639796892e-05, "loss": 1.1274, "step": 7574 }, { "epoch": 2.3797239727506527, "grad_norm": 0.9296875, "learning_rate": 1.0585845763249762e-05, "loss": 1.0805, "step": 7576 }, { "epoch": 2.380352199776194, "grad_norm": 0.99609375, "learning_rate": 1.0583306886702636e-05, "loss": 1.1506, "step": 7578 }, { "epoch": 2.3809804268017354, "grad_norm": 0.90234375, "learning_rate": 1.0580768010155506e-05, "loss": 1.1659, "step": 7580 }, { "epoch": 2.3816086538272767, "grad_norm": 0.82421875, "learning_rate": 1.057822913360838e-05, "loss": 1.2277, "step": 7582 }, { "epoch": 2.382236880852818, "grad_norm": 0.86328125, "learning_rate": 1.057569025706125e-05, "loss": 1.1857, "step": 7584 }, { "epoch": 2.3828651078783594, "grad_norm": 0.90625, "learning_rate": 1.0573151380514124e-05, "loss": 1.1172, "step": 7586 }, { "epoch": 2.3834933349039007, "grad_norm": 0.81640625, "learning_rate": 1.0570612503966995e-05, "loss": 1.1305, "step": 7588 }, { "epoch": 2.384121561929442, "grad_norm": 0.890625, "learning_rate": 1.0568073627419868e-05, "loss": 1.2697, "step": 7590 }, { "epoch": 2.384749788954984, "grad_norm": 0.84375, "learning_rate": 1.056553475087274e-05, "loss": 1.2452, "step": 7592 }, { "epoch": 2.385378015980525, "grad_norm": 0.88671875, "learning_rate": 1.0562995874325613e-05, "loss": 1.2094, "step": 7594 }, { "epoch": 2.3860062430060665, "grad_norm": 0.89453125, "learning_rate": 1.0560456997778484e-05, "loss": 1.1119, "step": 7596 }, { "epoch": 2.386634470031608, "grad_norm": 0.93359375, "learning_rate": 1.0557918121231357e-05, "loss": 1.2432, "step": 7598 }, { "epoch": 2.387262697057149, "grad_norm": 0.96484375, "learning_rate": 1.0555379244684227e-05, "loss": 1.35, "step": 7600 }, { "epoch": 2.3878909240826904, "grad_norm": 0.82421875, "learning_rate": 1.05528403681371e-05, "loss": 1.2152, "step": 7602 }, { "epoch": 2.3885191511082318, "grad_norm": 0.890625, "learning_rate": 1.0550301491589971e-05, "loss": 1.0723, "step": 7604 }, { "epoch": 2.389147378133773, "grad_norm": 0.84375, "learning_rate": 1.0547762615042844e-05, "loss": 1.2081, "step": 7606 }, { "epoch": 2.3897756051593144, "grad_norm": 0.859375, "learning_rate": 1.0545223738495716e-05, "loss": 1.1111, "step": 7608 }, { "epoch": 2.3904038321848557, "grad_norm": 0.83984375, "learning_rate": 1.0542684861948589e-05, "loss": 1.2082, "step": 7610 }, { "epoch": 2.391032059210397, "grad_norm": 0.8828125, "learning_rate": 1.054014598540146e-05, "loss": 1.2288, "step": 7612 }, { "epoch": 2.3916602862359384, "grad_norm": 0.85546875, "learning_rate": 1.0537607108854333e-05, "loss": 1.181, "step": 7614 }, { "epoch": 2.3922885132614797, "grad_norm": 0.90625, "learning_rate": 1.0535068232307205e-05, "loss": 1.1182, "step": 7616 }, { "epoch": 2.392916740287021, "grad_norm": 0.90625, "learning_rate": 1.0532529355760078e-05, "loss": 1.2174, "step": 7618 }, { "epoch": 2.3935449673125624, "grad_norm": 0.8203125, "learning_rate": 1.0529990479212947e-05, "loss": 1.1535, "step": 7620 }, { "epoch": 2.394173194338104, "grad_norm": 0.90234375, "learning_rate": 1.0527451602665822e-05, "loss": 1.1007, "step": 7622 }, { "epoch": 2.3948014213636455, "grad_norm": 0.93359375, "learning_rate": 1.0524912726118692e-05, "loss": 1.2071, "step": 7624 }, { "epoch": 2.395429648389187, "grad_norm": 0.875, "learning_rate": 1.0522373849571565e-05, "loss": 1.2248, "step": 7626 }, { "epoch": 2.396057875414728, "grad_norm": 0.83203125, "learning_rate": 1.0519834973024436e-05, "loss": 1.1908, "step": 7628 }, { "epoch": 2.3966861024402695, "grad_norm": 0.96484375, "learning_rate": 1.051729609647731e-05, "loss": 1.2519, "step": 7630 }, { "epoch": 2.397314329465811, "grad_norm": 0.90234375, "learning_rate": 1.0514757219930182e-05, "loss": 1.1041, "step": 7632 }, { "epoch": 2.397942556491352, "grad_norm": 0.8671875, "learning_rate": 1.0512218343383054e-05, "loss": 1.1484, "step": 7634 }, { "epoch": 2.3985707835168935, "grad_norm": 0.88671875, "learning_rate": 1.0509679466835927e-05, "loss": 1.165, "step": 7636 }, { "epoch": 2.399199010542435, "grad_norm": 0.83203125, "learning_rate": 1.0507140590288798e-05, "loss": 1.2547, "step": 7638 }, { "epoch": 2.399827237567976, "grad_norm": 0.93359375, "learning_rate": 1.0504601713741671e-05, "loss": 1.3175, "step": 7640 }, { "epoch": 2.4004554645935174, "grad_norm": 1.0078125, "learning_rate": 1.0502062837194543e-05, "loss": 1.1751, "step": 7642 }, { "epoch": 2.4010836916190588, "grad_norm": 0.8203125, "learning_rate": 1.0499523960647416e-05, "loss": 1.1984, "step": 7644 }, { "epoch": 2.4017119186446, "grad_norm": 0.84765625, "learning_rate": 1.0496985084100285e-05, "loss": 1.036, "step": 7646 }, { "epoch": 2.4023401456701414, "grad_norm": 0.9375, "learning_rate": 1.049444620755316e-05, "loss": 1.3037, "step": 7648 }, { "epoch": 2.4029683726956828, "grad_norm": 1.046875, "learning_rate": 1.049190733100603e-05, "loss": 1.2453, "step": 7650 }, { "epoch": 2.403596599721224, "grad_norm": 0.8828125, "learning_rate": 1.0489368454458903e-05, "loss": 1.1922, "step": 7652 }, { "epoch": 2.4042248267467654, "grad_norm": 0.8203125, "learning_rate": 1.0486829577911774e-05, "loss": 1.282, "step": 7654 }, { "epoch": 2.4048530537723067, "grad_norm": 0.890625, "learning_rate": 1.0484290701364647e-05, "loss": 1.1515, "step": 7656 }, { "epoch": 2.4054812807978485, "grad_norm": 0.84375, "learning_rate": 1.0481751824817519e-05, "loss": 1.1316, "step": 7658 }, { "epoch": 2.40610950782339, "grad_norm": 0.9296875, "learning_rate": 1.0479212948270392e-05, "loss": 1.1016, "step": 7660 }, { "epoch": 2.406737734848931, "grad_norm": 0.83203125, "learning_rate": 1.0476674071723263e-05, "loss": 1.2394, "step": 7662 }, { "epoch": 2.4073659618744725, "grad_norm": 0.91015625, "learning_rate": 1.0474135195176136e-05, "loss": 1.2902, "step": 7664 }, { "epoch": 2.407994188900014, "grad_norm": 0.8828125, "learning_rate": 1.0471596318629008e-05, "loss": 1.1772, "step": 7666 }, { "epoch": 2.408622415925555, "grad_norm": 0.9609375, "learning_rate": 1.046905744208188e-05, "loss": 1.1476, "step": 7668 }, { "epoch": 2.4092506429510965, "grad_norm": 0.8828125, "learning_rate": 1.046651856553475e-05, "loss": 1.1821, "step": 7670 }, { "epoch": 2.409878869976638, "grad_norm": 0.8515625, "learning_rate": 1.0463979688987624e-05, "loss": 1.1742, "step": 7672 }, { "epoch": 2.410507097002179, "grad_norm": 0.76953125, "learning_rate": 1.0461440812440495e-05, "loss": 1.2042, "step": 7674 }, { "epoch": 2.4111353240277205, "grad_norm": 0.7890625, "learning_rate": 1.0458901935893368e-05, "loss": 1.1789, "step": 7676 }, { "epoch": 2.411763551053262, "grad_norm": 0.87890625, "learning_rate": 1.045636305934624e-05, "loss": 1.297, "step": 7678 }, { "epoch": 2.412391778078803, "grad_norm": 0.9140625, "learning_rate": 1.0453824182799113e-05, "loss": 1.3176, "step": 7680 }, { "epoch": 2.4130200051043444, "grad_norm": 0.92578125, "learning_rate": 1.0451285306251984e-05, "loss": 1.2168, "step": 7682 }, { "epoch": 2.4136482321298858, "grad_norm": 0.86328125, "learning_rate": 1.0448746429704857e-05, "loss": 1.1846, "step": 7684 }, { "epoch": 2.414276459155427, "grad_norm": 0.9375, "learning_rate": 1.0446207553157728e-05, "loss": 1.0846, "step": 7686 }, { "epoch": 2.414904686180969, "grad_norm": 0.84375, "learning_rate": 1.0443668676610601e-05, "loss": 1.4388, "step": 7688 }, { "epoch": 2.41553291320651, "grad_norm": 0.96484375, "learning_rate": 1.0441129800063473e-05, "loss": 1.253, "step": 7690 }, { "epoch": 2.4161611402320515, "grad_norm": 0.84765625, "learning_rate": 1.0438590923516346e-05, "loss": 1.1133, "step": 7692 }, { "epoch": 2.416789367257593, "grad_norm": 0.95703125, "learning_rate": 1.0436052046969216e-05, "loss": 1.1624, "step": 7694 }, { "epoch": 2.417417594283134, "grad_norm": 0.89453125, "learning_rate": 1.0433513170422089e-05, "loss": 1.256, "step": 7696 }, { "epoch": 2.4180458213086755, "grad_norm": 0.90625, "learning_rate": 1.043097429387496e-05, "loss": 1.1515, "step": 7698 }, { "epoch": 2.418674048334217, "grad_norm": 0.88671875, "learning_rate": 1.0428435417327833e-05, "loss": 1.2425, "step": 7700 }, { "epoch": 2.419302275359758, "grad_norm": 0.88671875, "learning_rate": 1.0425896540780704e-05, "loss": 1.1807, "step": 7702 }, { "epoch": 2.4199305023852995, "grad_norm": 0.9296875, "learning_rate": 1.0423357664233578e-05, "loss": 1.1746, "step": 7704 }, { "epoch": 2.420558729410841, "grad_norm": 0.8046875, "learning_rate": 1.0420818787686449e-05, "loss": 1.2989, "step": 7706 }, { "epoch": 2.421186956436382, "grad_norm": 0.90234375, "learning_rate": 1.0418279911139322e-05, "loss": 1.1241, "step": 7708 }, { "epoch": 2.4218151834619235, "grad_norm": 0.94140625, "learning_rate": 1.0415741034592193e-05, "loss": 1.1698, "step": 7710 }, { "epoch": 2.422443410487465, "grad_norm": 0.84375, "learning_rate": 1.0413202158045066e-05, "loss": 1.2301, "step": 7712 }, { "epoch": 2.423071637513006, "grad_norm": 0.83984375, "learning_rate": 1.0410663281497936e-05, "loss": 1.2581, "step": 7714 }, { "epoch": 2.4236998645385475, "grad_norm": 0.83203125, "learning_rate": 1.0408124404950811e-05, "loss": 1.2973, "step": 7716 }, { "epoch": 2.424328091564089, "grad_norm": 0.85546875, "learning_rate": 1.0405585528403684e-05, "loss": 1.2082, "step": 7718 }, { "epoch": 2.42495631858963, "grad_norm": 0.91796875, "learning_rate": 1.0403046651856554e-05, "loss": 1.1844, "step": 7720 }, { "epoch": 2.4255845456151715, "grad_norm": 0.93359375, "learning_rate": 1.0400507775309427e-05, "loss": 1.2662, "step": 7722 }, { "epoch": 2.4262127726407132, "grad_norm": 0.94140625, "learning_rate": 1.0397968898762298e-05, "loss": 1.2522, "step": 7724 }, { "epoch": 2.4268409996662546, "grad_norm": 0.87890625, "learning_rate": 1.0395430022215171e-05, "loss": 1.1265, "step": 7726 }, { "epoch": 2.427469226691796, "grad_norm": 0.95703125, "learning_rate": 1.0392891145668043e-05, "loss": 1.1795, "step": 7728 }, { "epoch": 2.428097453717337, "grad_norm": 0.85546875, "learning_rate": 1.0390352269120916e-05, "loss": 1.1316, "step": 7730 }, { "epoch": 2.4287256807428785, "grad_norm": 1.1171875, "learning_rate": 1.0387813392573787e-05, "loss": 1.1675, "step": 7732 }, { "epoch": 2.42935390776842, "grad_norm": 0.8671875, "learning_rate": 1.038527451602666e-05, "loss": 1.1802, "step": 7734 }, { "epoch": 2.429982134793961, "grad_norm": 0.85546875, "learning_rate": 1.0382735639479532e-05, "loss": 1.216, "step": 7736 }, { "epoch": 2.4306103618195025, "grad_norm": 0.84375, "learning_rate": 1.0380196762932405e-05, "loss": 1.059, "step": 7738 }, { "epoch": 2.431238588845044, "grad_norm": 0.8828125, "learning_rate": 1.0377657886385274e-05, "loss": 1.31, "step": 7740 }, { "epoch": 2.431866815870585, "grad_norm": 0.9765625, "learning_rate": 1.0375119009838149e-05, "loss": 1.1443, "step": 7742 }, { "epoch": 2.4324950428961265, "grad_norm": 0.82421875, "learning_rate": 1.0372580133291019e-05, "loss": 1.0858, "step": 7744 }, { "epoch": 2.433123269921668, "grad_norm": 0.890625, "learning_rate": 1.0370041256743892e-05, "loss": 1.0358, "step": 7746 }, { "epoch": 2.433751496947209, "grad_norm": 0.91796875, "learning_rate": 1.0367502380196763e-05, "loss": 1.2427, "step": 7748 }, { "epoch": 2.4343797239727505, "grad_norm": 0.85546875, "learning_rate": 1.0364963503649636e-05, "loss": 1.265, "step": 7750 }, { "epoch": 2.435007950998292, "grad_norm": 0.94140625, "learning_rate": 1.0362424627102508e-05, "loss": 1.2406, "step": 7752 }, { "epoch": 2.4356361780238336, "grad_norm": 0.94921875, "learning_rate": 1.035988575055538e-05, "loss": 1.2867, "step": 7754 }, { "epoch": 2.436264405049375, "grad_norm": 0.87890625, "learning_rate": 1.0357346874008252e-05, "loss": 1.1434, "step": 7756 }, { "epoch": 2.4368926320749162, "grad_norm": 0.88671875, "learning_rate": 1.0354807997461125e-05, "loss": 1.1873, "step": 7758 }, { "epoch": 2.4375208591004576, "grad_norm": 0.83984375, "learning_rate": 1.0352269120913997e-05, "loss": 1.1372, "step": 7760 }, { "epoch": 2.438149086125999, "grad_norm": 0.87890625, "learning_rate": 1.034973024436687e-05, "loss": 1.0095, "step": 7762 }, { "epoch": 2.4387773131515402, "grad_norm": 0.91015625, "learning_rate": 1.034719136781974e-05, "loss": 1.2339, "step": 7764 }, { "epoch": 2.4394055401770816, "grad_norm": 0.88671875, "learning_rate": 1.0344652491272612e-05, "loss": 1.1116, "step": 7766 }, { "epoch": 2.440033767202623, "grad_norm": 0.9140625, "learning_rate": 1.0342113614725484e-05, "loss": 1.3111, "step": 7768 }, { "epoch": 2.440661994228164, "grad_norm": 0.82421875, "learning_rate": 1.0339574738178357e-05, "loss": 1.119, "step": 7770 }, { "epoch": 2.4412902212537055, "grad_norm": 0.89453125, "learning_rate": 1.0337035861631228e-05, "loss": 1.1471, "step": 7772 }, { "epoch": 2.441918448279247, "grad_norm": 0.95703125, "learning_rate": 1.0334496985084101e-05, "loss": 1.1424, "step": 7774 }, { "epoch": 2.442546675304788, "grad_norm": 0.9140625, "learning_rate": 1.0331958108536973e-05, "loss": 1.2449, "step": 7776 }, { "epoch": 2.4431749023303295, "grad_norm": 0.8515625, "learning_rate": 1.0329419231989846e-05, "loss": 1.1482, "step": 7778 }, { "epoch": 2.443803129355871, "grad_norm": 0.8515625, "learning_rate": 1.0326880355442717e-05, "loss": 1.3518, "step": 7780 }, { "epoch": 2.444431356381412, "grad_norm": 0.8984375, "learning_rate": 1.032434147889559e-05, "loss": 1.2404, "step": 7782 }, { "epoch": 2.4450595834069535, "grad_norm": 0.9296875, "learning_rate": 1.032180260234846e-05, "loss": 1.1999, "step": 7784 }, { "epoch": 2.445687810432495, "grad_norm": 0.796875, "learning_rate": 1.0319263725801335e-05, "loss": 1.1479, "step": 7786 }, { "epoch": 2.446316037458036, "grad_norm": 0.88671875, "learning_rate": 1.0316724849254204e-05, "loss": 1.164, "step": 7788 }, { "epoch": 2.446944264483578, "grad_norm": 0.8828125, "learning_rate": 1.0314185972707077e-05, "loss": 1.2489, "step": 7790 }, { "epoch": 2.4475724915091193, "grad_norm": 0.828125, "learning_rate": 1.0311647096159949e-05, "loss": 1.0448, "step": 7792 }, { "epoch": 2.4482007185346606, "grad_norm": 0.97265625, "learning_rate": 1.0309108219612822e-05, "loss": 1.1342, "step": 7794 }, { "epoch": 2.448828945560202, "grad_norm": 0.91796875, "learning_rate": 1.0306569343065693e-05, "loss": 1.1211, "step": 7796 }, { "epoch": 2.4494571725857432, "grad_norm": 0.9296875, "learning_rate": 1.0304030466518566e-05, "loss": 1.1104, "step": 7798 }, { "epoch": 2.4500853996112846, "grad_norm": 0.91015625, "learning_rate": 1.0301491589971438e-05, "loss": 1.0956, "step": 7800 }, { "epoch": 2.450713626636826, "grad_norm": 0.84375, "learning_rate": 1.0298952713424311e-05, "loss": 1.2627, "step": 7802 }, { "epoch": 2.4513418536623672, "grad_norm": 0.91796875, "learning_rate": 1.0296413836877184e-05, "loss": 1.1842, "step": 7804 }, { "epoch": 2.4519700806879086, "grad_norm": 0.82421875, "learning_rate": 1.0293874960330055e-05, "loss": 1.2095, "step": 7806 }, { "epoch": 2.45259830771345, "grad_norm": 0.90625, "learning_rate": 1.0291336083782928e-05, "loss": 1.2931, "step": 7808 }, { "epoch": 2.453226534738991, "grad_norm": 0.83203125, "learning_rate": 1.0288797207235798e-05, "loss": 1.1803, "step": 7810 }, { "epoch": 2.4538547617645325, "grad_norm": 0.89453125, "learning_rate": 1.0286258330688673e-05, "loss": 1.1717, "step": 7812 }, { "epoch": 2.454482988790074, "grad_norm": 0.83203125, "learning_rate": 1.0283719454141543e-05, "loss": 0.999, "step": 7814 }, { "epoch": 2.455111215815615, "grad_norm": 0.94140625, "learning_rate": 1.0281180577594416e-05, "loss": 1.103, "step": 7816 }, { "epoch": 2.455739442841157, "grad_norm": 0.87109375, "learning_rate": 1.0278641701047287e-05, "loss": 1.2353, "step": 7818 }, { "epoch": 2.4563676698666983, "grad_norm": 0.8359375, "learning_rate": 1.027610282450016e-05, "loss": 1.1041, "step": 7820 }, { "epoch": 2.4569958968922396, "grad_norm": 0.828125, "learning_rate": 1.0273563947953031e-05, "loss": 1.2442, "step": 7822 }, { "epoch": 2.457624123917781, "grad_norm": 0.87109375, "learning_rate": 1.0271025071405905e-05, "loss": 1.1837, "step": 7824 }, { "epoch": 2.4582523509433223, "grad_norm": 0.921875, "learning_rate": 1.0268486194858776e-05, "loss": 1.0694, "step": 7826 }, { "epoch": 2.4588805779688636, "grad_norm": 0.8515625, "learning_rate": 1.0265947318311649e-05, "loss": 1.1621, "step": 7828 }, { "epoch": 2.459508804994405, "grad_norm": 0.890625, "learning_rate": 1.026340844176452e-05, "loss": 1.2203, "step": 7830 }, { "epoch": 2.4601370320199463, "grad_norm": 0.84765625, "learning_rate": 1.0260869565217393e-05, "loss": 1.2414, "step": 7832 }, { "epoch": 2.4607652590454876, "grad_norm": 0.81640625, "learning_rate": 1.0258330688670263e-05, "loss": 1.298, "step": 7834 }, { "epoch": 2.461393486071029, "grad_norm": 0.921875, "learning_rate": 1.0255791812123136e-05, "loss": 1.0458, "step": 7836 }, { "epoch": 2.4620217130965703, "grad_norm": 0.8359375, "learning_rate": 1.0253252935576008e-05, "loss": 1.2697, "step": 7838 }, { "epoch": 2.4626499401221116, "grad_norm": 0.88671875, "learning_rate": 1.025071405902888e-05, "loss": 1.1559, "step": 7840 }, { "epoch": 2.463278167147653, "grad_norm": 0.90625, "learning_rate": 1.0248175182481752e-05, "loss": 1.1766, "step": 7842 }, { "epoch": 2.4639063941731942, "grad_norm": 0.859375, "learning_rate": 1.0245636305934625e-05, "loss": 1.1575, "step": 7844 }, { "epoch": 2.4645346211987356, "grad_norm": 0.85546875, "learning_rate": 1.0243097429387497e-05, "loss": 1.1253, "step": 7846 }, { "epoch": 2.465162848224277, "grad_norm": 0.8515625, "learning_rate": 1.024055855284037e-05, "loss": 1.1804, "step": 7848 }, { "epoch": 2.465791075249818, "grad_norm": 0.86328125, "learning_rate": 1.0238019676293241e-05, "loss": 1.2267, "step": 7850 }, { "epoch": 2.4664193022753595, "grad_norm": 0.796875, "learning_rate": 1.0235480799746114e-05, "loss": 1.2627, "step": 7852 }, { "epoch": 2.467047529300901, "grad_norm": 0.8515625, "learning_rate": 1.0232941923198985e-05, "loss": 1.2447, "step": 7854 }, { "epoch": 2.4676757563264426, "grad_norm": 0.83984375, "learning_rate": 1.0230403046651859e-05, "loss": 1.1281, "step": 7856 }, { "epoch": 2.468303983351984, "grad_norm": 0.8515625, "learning_rate": 1.0227864170104728e-05, "loss": 1.1362, "step": 7858 }, { "epoch": 2.4689322103775253, "grad_norm": 0.90625, "learning_rate": 1.0225325293557601e-05, "loss": 1.1659, "step": 7860 }, { "epoch": 2.4695604374030666, "grad_norm": 0.86328125, "learning_rate": 1.0222786417010473e-05, "loss": 1.1334, "step": 7862 }, { "epoch": 2.470188664428608, "grad_norm": 0.92578125, "learning_rate": 1.0220247540463346e-05, "loss": 1.2789, "step": 7864 }, { "epoch": 2.4708168914541493, "grad_norm": 0.8515625, "learning_rate": 1.0217708663916217e-05, "loss": 1.2958, "step": 7866 }, { "epoch": 2.4714451184796906, "grad_norm": 0.90234375, "learning_rate": 1.021516978736909e-05, "loss": 1.1327, "step": 7868 }, { "epoch": 2.472073345505232, "grad_norm": 0.875, "learning_rate": 1.0212630910821962e-05, "loss": 1.2687, "step": 7870 }, { "epoch": 2.4727015725307733, "grad_norm": 0.953125, "learning_rate": 1.0210092034274835e-05, "loss": 1.2305, "step": 7872 }, { "epoch": 2.4733297995563146, "grad_norm": 0.83203125, "learning_rate": 1.0207553157727706e-05, "loss": 1.3892, "step": 7874 }, { "epoch": 2.473958026581856, "grad_norm": 0.84765625, "learning_rate": 1.0205014281180579e-05, "loss": 1.1705, "step": 7876 }, { "epoch": 2.4745862536073973, "grad_norm": 0.89453125, "learning_rate": 1.0202475404633449e-05, "loss": 1.148, "step": 7878 }, { "epoch": 2.4752144806329386, "grad_norm": 0.91015625, "learning_rate": 1.0199936528086324e-05, "loss": 1.3282, "step": 7880 }, { "epoch": 2.47584270765848, "grad_norm": 0.90625, "learning_rate": 1.0197397651539193e-05, "loss": 1.1612, "step": 7882 }, { "epoch": 2.4764709346840217, "grad_norm": 0.84375, "learning_rate": 1.0194858774992066e-05, "loss": 1.2072, "step": 7884 }, { "epoch": 2.477099161709563, "grad_norm": 0.8671875, "learning_rate": 1.0192319898444938e-05, "loss": 1.3195, "step": 7886 }, { "epoch": 2.4777273887351043, "grad_norm": 0.80859375, "learning_rate": 1.018978102189781e-05, "loss": 1.1765, "step": 7888 }, { "epoch": 2.4783556157606457, "grad_norm": 0.83984375, "learning_rate": 1.0187242145350684e-05, "loss": 1.1609, "step": 7890 }, { "epoch": 2.478983842786187, "grad_norm": 1.0, "learning_rate": 1.0184703268803555e-05, "loss": 1.1592, "step": 7892 }, { "epoch": 2.4796120698117283, "grad_norm": 0.8828125, "learning_rate": 1.0182164392256428e-05, "loss": 1.1972, "step": 7894 }, { "epoch": 2.4802402968372697, "grad_norm": 0.8984375, "learning_rate": 1.01796255157093e-05, "loss": 1.2579, "step": 7896 }, { "epoch": 2.480868523862811, "grad_norm": 0.859375, "learning_rate": 1.0177086639162173e-05, "loss": 1.1676, "step": 7898 }, { "epoch": 2.4814967508883523, "grad_norm": 0.83203125, "learning_rate": 1.0174547762615044e-05, "loss": 1.299, "step": 7900 }, { "epoch": 2.4821249779138936, "grad_norm": 0.89453125, "learning_rate": 1.0172008886067917e-05, "loss": 1.2395, "step": 7902 }, { "epoch": 2.482753204939435, "grad_norm": 0.9296875, "learning_rate": 1.0169470009520787e-05, "loss": 1.2603, "step": 7904 }, { "epoch": 2.4833814319649763, "grad_norm": 0.8515625, "learning_rate": 1.0166931132973662e-05, "loss": 1.129, "step": 7906 }, { "epoch": 2.4840096589905176, "grad_norm": 0.8203125, "learning_rate": 1.0164392256426531e-05, "loss": 1.3505, "step": 7908 }, { "epoch": 2.484637886016059, "grad_norm": 0.8359375, "learning_rate": 1.0161853379879404e-05, "loss": 1.1911, "step": 7910 }, { "epoch": 2.4852661130416003, "grad_norm": 0.85546875, "learning_rate": 1.0159314503332276e-05, "loss": 1.2497, "step": 7912 }, { "epoch": 2.4858943400671416, "grad_norm": 0.875, "learning_rate": 1.0156775626785149e-05, "loss": 1.2195, "step": 7914 }, { "epoch": 2.486522567092683, "grad_norm": 0.8984375, "learning_rate": 1.015423675023802e-05, "loss": 1.2527, "step": 7916 }, { "epoch": 2.4871507941182243, "grad_norm": 0.8046875, "learning_rate": 1.0151697873690893e-05, "loss": 1.2442, "step": 7918 }, { "epoch": 2.4877790211437656, "grad_norm": 0.83203125, "learning_rate": 1.0149158997143765e-05, "loss": 1.2383, "step": 7920 }, { "epoch": 2.4884072481693074, "grad_norm": 0.88671875, "learning_rate": 1.0146620120596638e-05, "loss": 1.1643, "step": 7922 }, { "epoch": 2.4890354751948487, "grad_norm": 0.93359375, "learning_rate": 1.014408124404951e-05, "loss": 1.1913, "step": 7924 }, { "epoch": 2.48966370222039, "grad_norm": 0.84765625, "learning_rate": 1.0141542367502382e-05, "loss": 1.1843, "step": 7926 }, { "epoch": 2.4902919292459313, "grad_norm": 0.8359375, "learning_rate": 1.0139003490955252e-05, "loss": 1.1378, "step": 7928 }, { "epoch": 2.4909201562714727, "grad_norm": 0.98046875, "learning_rate": 1.0136464614408125e-05, "loss": 1.1175, "step": 7930 }, { "epoch": 2.491548383297014, "grad_norm": 0.90625, "learning_rate": 1.0133925737860996e-05, "loss": 1.2275, "step": 7932 }, { "epoch": 2.4921766103225553, "grad_norm": 0.80078125, "learning_rate": 1.013138686131387e-05, "loss": 1.0809, "step": 7934 }, { "epoch": 2.4928048373480967, "grad_norm": 0.86328125, "learning_rate": 1.0128847984766741e-05, "loss": 1.223, "step": 7936 }, { "epoch": 2.493433064373638, "grad_norm": 0.90234375, "learning_rate": 1.0126309108219614e-05, "loss": 1.2318, "step": 7938 }, { "epoch": 2.4940612913991793, "grad_norm": 0.80859375, "learning_rate": 1.0123770231672485e-05, "loss": 1.1458, "step": 7940 }, { "epoch": 2.4946895184247206, "grad_norm": 0.85546875, "learning_rate": 1.0121231355125358e-05, "loss": 1.1603, "step": 7942 }, { "epoch": 2.495317745450262, "grad_norm": 0.890625, "learning_rate": 1.011869247857823e-05, "loss": 1.1126, "step": 7944 }, { "epoch": 2.4959459724758033, "grad_norm": 0.8671875, "learning_rate": 1.0116153602031103e-05, "loss": 1.2311, "step": 7946 }, { "epoch": 2.4965741995013446, "grad_norm": 0.8984375, "learning_rate": 1.0113614725483973e-05, "loss": 1.2241, "step": 7948 }, { "epoch": 2.4972024265268864, "grad_norm": 0.8671875, "learning_rate": 1.0111075848936847e-05, "loss": 1.1663, "step": 7950 }, { "epoch": 2.4978306535524277, "grad_norm": 0.8671875, "learning_rate": 1.0108536972389717e-05, "loss": 1.1048, "step": 7952 }, { "epoch": 2.498458880577969, "grad_norm": 0.85546875, "learning_rate": 1.010599809584259e-05, "loss": 1.1416, "step": 7954 }, { "epoch": 2.4990871076035104, "grad_norm": 1.0390625, "learning_rate": 1.0103459219295462e-05, "loss": 1.2229, "step": 7956 }, { "epoch": 2.4997153346290517, "grad_norm": 0.890625, "learning_rate": 1.0100920342748335e-05, "loss": 1.0936, "step": 7958 }, { "epoch": 2.500343561654593, "grad_norm": 0.859375, "learning_rate": 1.0098381466201206e-05, "loss": 1.2353, "step": 7960 }, { "epoch": 2.5009717886801344, "grad_norm": 0.828125, "learning_rate": 1.0095842589654079e-05, "loss": 1.1868, "step": 7962 }, { "epoch": 2.5016000157056757, "grad_norm": 0.87890625, "learning_rate": 1.009330371310695e-05, "loss": 1.1775, "step": 7964 }, { "epoch": 2.502228242731217, "grad_norm": 0.8671875, "learning_rate": 1.0090764836559823e-05, "loss": 1.1873, "step": 7966 }, { "epoch": 2.5028564697567584, "grad_norm": 1.0, "learning_rate": 1.0088225960012695e-05, "loss": 1.2451, "step": 7968 }, { "epoch": 2.5034846967822997, "grad_norm": 0.8828125, "learning_rate": 1.0085687083465568e-05, "loss": 1.1386, "step": 7970 }, { "epoch": 2.504112923807841, "grad_norm": 0.84765625, "learning_rate": 1.0083148206918438e-05, "loss": 1.2973, "step": 7972 }, { "epoch": 2.5047411508333823, "grad_norm": 0.96875, "learning_rate": 1.008060933037131e-05, "loss": 1.1667, "step": 7974 }, { "epoch": 2.5053693778589237, "grad_norm": 0.8828125, "learning_rate": 1.0078070453824185e-05, "loss": 1.3141, "step": 7976 }, { "epoch": 2.505997604884465, "grad_norm": 0.8828125, "learning_rate": 1.0075531577277055e-05, "loss": 1.1125, "step": 7978 }, { "epoch": 2.5066258319100063, "grad_norm": 0.9375, "learning_rate": 1.0072992700729928e-05, "loss": 1.2697, "step": 7980 }, { "epoch": 2.5072540589355476, "grad_norm": 0.93359375, "learning_rate": 1.00704538241828e-05, "loss": 1.1035, "step": 7982 }, { "epoch": 2.507882285961089, "grad_norm": 0.83984375, "learning_rate": 1.0067914947635673e-05, "loss": 1.1884, "step": 7984 }, { "epoch": 2.5085105129866303, "grad_norm": 0.921875, "learning_rate": 1.0065376071088544e-05, "loss": 1.2257, "step": 7986 }, { "epoch": 2.5091387400121716, "grad_norm": 0.90234375, "learning_rate": 1.0062837194541417e-05, "loss": 1.2123, "step": 7988 }, { "epoch": 2.5097669670377134, "grad_norm": 0.890625, "learning_rate": 1.0060298317994289e-05, "loss": 1.2626, "step": 7990 }, { "epoch": 2.5103951940632547, "grad_norm": 0.86328125, "learning_rate": 1.0057759441447162e-05, "loss": 1.2488, "step": 7992 }, { "epoch": 2.511023421088796, "grad_norm": 0.8515625, "learning_rate": 1.0055220564900033e-05, "loss": 1.3123, "step": 7994 }, { "epoch": 2.5116516481143374, "grad_norm": 0.84765625, "learning_rate": 1.0052681688352906e-05, "loss": 1.204, "step": 7996 }, { "epoch": 2.5122798751398787, "grad_norm": 0.96875, "learning_rate": 1.0050142811805776e-05, "loss": 1.1576, "step": 7998 }, { "epoch": 2.51290810216542, "grad_norm": 0.94921875, "learning_rate": 1.0047603935258649e-05, "loss": 1.1342, "step": 8000 }, { "epoch": 2.5135363291909614, "grad_norm": 0.9375, "learning_rate": 1.004506505871152e-05, "loss": 1.1785, "step": 8002 }, { "epoch": 2.5141645562165027, "grad_norm": 0.88671875, "learning_rate": 1.0042526182164393e-05, "loss": 1.1783, "step": 8004 }, { "epoch": 2.514792783242044, "grad_norm": 0.890625, "learning_rate": 1.0039987305617265e-05, "loss": 1.2789, "step": 8006 }, { "epoch": 2.5154210102675854, "grad_norm": 0.890625, "learning_rate": 1.0037448429070138e-05, "loss": 1.3081, "step": 8008 }, { "epoch": 2.5160492372931267, "grad_norm": 0.9375, "learning_rate": 1.0034909552523009e-05, "loss": 1.1454, "step": 8010 }, { "epoch": 2.516677464318668, "grad_norm": 0.921875, "learning_rate": 1.0032370675975882e-05, "loss": 1.1284, "step": 8012 }, { "epoch": 2.51730569134421, "grad_norm": 0.8984375, "learning_rate": 1.0029831799428754e-05, "loss": 1.1488, "step": 8014 }, { "epoch": 2.517933918369751, "grad_norm": 0.83203125, "learning_rate": 1.0027292922881627e-05, "loss": 1.2606, "step": 8016 }, { "epoch": 2.5185621453952924, "grad_norm": 0.8828125, "learning_rate": 1.0024754046334498e-05, "loss": 1.2357, "step": 8018 }, { "epoch": 2.5191903724208338, "grad_norm": 0.90625, "learning_rate": 1.0022215169787371e-05, "loss": 1.1624, "step": 8020 }, { "epoch": 2.519818599446375, "grad_norm": 0.9453125, "learning_rate": 1.001967629324024e-05, "loss": 1.1266, "step": 8022 }, { "epoch": 2.5204468264719164, "grad_norm": 1.0234375, "learning_rate": 1.0017137416693114e-05, "loss": 1.2335, "step": 8024 }, { "epoch": 2.5210750534974578, "grad_norm": 0.87890625, "learning_rate": 1.0014598540145985e-05, "loss": 1.1071, "step": 8026 }, { "epoch": 2.521703280522999, "grad_norm": 0.8671875, "learning_rate": 1.0012059663598858e-05, "loss": 1.1173, "step": 8028 }, { "epoch": 2.5223315075485404, "grad_norm": 0.87109375, "learning_rate": 1.000952078705173e-05, "loss": 1.1481, "step": 8030 }, { "epoch": 2.5229597345740817, "grad_norm": 0.875, "learning_rate": 1.0006981910504603e-05, "loss": 1.2871, "step": 8032 }, { "epoch": 2.523587961599623, "grad_norm": 0.98828125, "learning_rate": 1.0004443033957474e-05, "loss": 1.1876, "step": 8034 }, { "epoch": 2.5242161886251644, "grad_norm": 0.97265625, "learning_rate": 1.0001904157410347e-05, "loss": 1.22, "step": 8036 }, { "epoch": 2.5248444156507057, "grad_norm": 2.921875, "learning_rate": 9.999365280863219e-06, "loss": 1.0271, "step": 8038 }, { "epoch": 2.525472642676247, "grad_norm": 0.83984375, "learning_rate": 9.996826404316092e-06, "loss": 1.2888, "step": 8040 }, { "epoch": 2.5261008697017884, "grad_norm": 0.984375, "learning_rate": 9.994287527768963e-06, "loss": 1.3042, "step": 8042 }, { "epoch": 2.5267290967273297, "grad_norm": 0.84765625, "learning_rate": 9.991748651221835e-06, "loss": 1.2412, "step": 8044 }, { "epoch": 2.527357323752871, "grad_norm": 0.921875, "learning_rate": 9.989209774674708e-06, "loss": 1.2465, "step": 8046 }, { "epoch": 2.5279855507784124, "grad_norm": 1.0546875, "learning_rate": 9.986670898127579e-06, "loss": 1.1235, "step": 8048 }, { "epoch": 2.5286137778039537, "grad_norm": 0.953125, "learning_rate": 9.984132021580452e-06, "loss": 1.1607, "step": 8050 }, { "epoch": 2.529242004829495, "grad_norm": 0.83984375, "learning_rate": 9.981593145033323e-06, "loss": 1.1748, "step": 8052 }, { "epoch": 2.5298702318550363, "grad_norm": 0.89453125, "learning_rate": 9.979054268486196e-06, "loss": 1.0597, "step": 8054 }, { "epoch": 2.530498458880578, "grad_norm": 0.953125, "learning_rate": 9.976515391939068e-06, "loss": 1.0744, "step": 8056 }, { "epoch": 2.5311266859061194, "grad_norm": 0.890625, "learning_rate": 9.97397651539194e-06, "loss": 1.2989, "step": 8058 }, { "epoch": 2.5317549129316608, "grad_norm": 0.921875, "learning_rate": 9.971437638844812e-06, "loss": 1.1052, "step": 8060 }, { "epoch": 2.532383139957202, "grad_norm": 0.8125, "learning_rate": 9.968898762297684e-06, "loss": 1.2326, "step": 8062 }, { "epoch": 2.5330113669827434, "grad_norm": 0.890625, "learning_rate": 9.966359885750557e-06, "loss": 1.258, "step": 8064 }, { "epoch": 2.5336395940082848, "grad_norm": 0.828125, "learning_rate": 9.963821009203428e-06, "loss": 1.3359, "step": 8066 }, { "epoch": 2.534267821033826, "grad_norm": 0.87890625, "learning_rate": 9.9612821326563e-06, "loss": 1.1445, "step": 8068 }, { "epoch": 2.5348960480593674, "grad_norm": 0.8515625, "learning_rate": 9.958743256109173e-06, "loss": 1.1916, "step": 8070 }, { "epoch": 2.5355242750849087, "grad_norm": 0.85546875, "learning_rate": 9.956204379562044e-06, "loss": 1.2186, "step": 8072 }, { "epoch": 2.53615250211045, "grad_norm": 0.953125, "learning_rate": 9.953665503014917e-06, "loss": 1.1839, "step": 8074 }, { "epoch": 2.5367807291359914, "grad_norm": 0.81640625, "learning_rate": 9.951126626467788e-06, "loss": 1.1174, "step": 8076 }, { "epoch": 2.5374089561615327, "grad_norm": 0.85546875, "learning_rate": 9.94858774992066e-06, "loss": 1.3167, "step": 8078 }, { "epoch": 2.5380371831870745, "grad_norm": 0.8359375, "learning_rate": 9.946048873373533e-06, "loss": 1.2239, "step": 8080 }, { "epoch": 2.538665410212616, "grad_norm": 0.8515625, "learning_rate": 9.943509996826404e-06, "loss": 1.2857, "step": 8082 }, { "epoch": 2.539293637238157, "grad_norm": 1.265625, "learning_rate": 9.940971120279277e-06, "loss": 1.2533, "step": 8084 }, { "epoch": 2.5399218642636985, "grad_norm": 0.87890625, "learning_rate": 9.938432243732149e-06, "loss": 1.2453, "step": 8086 }, { "epoch": 2.54055009128924, "grad_norm": 1.4375, "learning_rate": 9.935893367185022e-06, "loss": 1.2452, "step": 8088 }, { "epoch": 2.541178318314781, "grad_norm": 0.93359375, "learning_rate": 9.933354490637893e-06, "loss": 1.1738, "step": 8090 }, { "epoch": 2.5418065453403225, "grad_norm": 0.96484375, "learning_rate": 9.930815614090765e-06, "loss": 1.2676, "step": 8092 }, { "epoch": 2.542434772365864, "grad_norm": 0.921875, "learning_rate": 9.928276737543638e-06, "loss": 1.1425, "step": 8094 }, { "epoch": 2.543062999391405, "grad_norm": 1.03125, "learning_rate": 9.925737860996509e-06, "loss": 1.134, "step": 8096 }, { "epoch": 2.5436912264169464, "grad_norm": 0.875, "learning_rate": 9.923198984449382e-06, "loss": 1.1667, "step": 8098 }, { "epoch": 2.5443194534424878, "grad_norm": 0.90234375, "learning_rate": 9.920660107902254e-06, "loss": 1.1229, "step": 8100 }, { "epoch": 2.544947680468029, "grad_norm": 0.9375, "learning_rate": 9.918121231355127e-06, "loss": 1.2614, "step": 8102 }, { "epoch": 2.5455759074935704, "grad_norm": 0.86328125, "learning_rate": 9.915582354807998e-06, "loss": 1.2031, "step": 8104 }, { "epoch": 2.5462041345191118, "grad_norm": 0.8359375, "learning_rate": 9.913043478260871e-06, "loss": 1.0952, "step": 8106 }, { "epoch": 2.546832361544653, "grad_norm": 0.84375, "learning_rate": 9.910504601713742e-06, "loss": 1.3312, "step": 8108 }, { "epoch": 2.5474605885701944, "grad_norm": 0.90234375, "learning_rate": 9.907965725166616e-06, "loss": 1.1553, "step": 8110 }, { "epoch": 2.5480888155957357, "grad_norm": 0.85546875, "learning_rate": 9.905426848619487e-06, "loss": 1.1358, "step": 8112 }, { "epoch": 2.548717042621277, "grad_norm": 0.890625, "learning_rate": 9.90288797207236e-06, "loss": 1.1889, "step": 8114 }, { "epoch": 2.5493452696468184, "grad_norm": 0.87109375, "learning_rate": 9.900349095525231e-06, "loss": 1.3103, "step": 8116 }, { "epoch": 2.5499734966723597, "grad_norm": 0.77734375, "learning_rate": 9.897810218978103e-06, "loss": 1.2243, "step": 8118 }, { "epoch": 2.550601723697901, "grad_norm": 0.92578125, "learning_rate": 9.895271342430976e-06, "loss": 1.1031, "step": 8120 }, { "epoch": 2.551229950723443, "grad_norm": 0.89453125, "learning_rate": 9.892732465883847e-06, "loss": 1.2186, "step": 8122 }, { "epoch": 2.551858177748984, "grad_norm": 0.89453125, "learning_rate": 9.89019358933672e-06, "loss": 1.2599, "step": 8124 }, { "epoch": 2.5524864047745255, "grad_norm": 0.94921875, "learning_rate": 9.887654712789592e-06, "loss": 1.2472, "step": 8126 }, { "epoch": 2.553114631800067, "grad_norm": 0.8125, "learning_rate": 9.885115836242463e-06, "loss": 1.1494, "step": 8128 }, { "epoch": 2.553742858825608, "grad_norm": 1.0390625, "learning_rate": 9.882576959695336e-06, "loss": 1.0775, "step": 8130 }, { "epoch": 2.5543710858511495, "grad_norm": 0.90625, "learning_rate": 9.880038083148208e-06, "loss": 1.2127, "step": 8132 }, { "epoch": 2.554999312876691, "grad_norm": 0.91796875, "learning_rate": 9.87749920660108e-06, "loss": 1.1725, "step": 8134 }, { "epoch": 2.555627539902232, "grad_norm": 0.86328125, "learning_rate": 9.874960330053952e-06, "loss": 1.2109, "step": 8136 }, { "epoch": 2.5562557669277735, "grad_norm": 0.96875, "learning_rate": 9.872421453506823e-06, "loss": 1.1498, "step": 8138 }, { "epoch": 2.556883993953315, "grad_norm": 0.91796875, "learning_rate": 9.869882576959696e-06, "loss": 1.2461, "step": 8140 }, { "epoch": 2.557512220978856, "grad_norm": 0.984375, "learning_rate": 9.867343700412568e-06, "loss": 1.1263, "step": 8142 }, { "epoch": 2.5581404480043974, "grad_norm": 0.984375, "learning_rate": 9.864804823865441e-06, "loss": 1.0624, "step": 8144 }, { "epoch": 2.558768675029939, "grad_norm": 0.921875, "learning_rate": 9.862265947318312e-06, "loss": 1.1112, "step": 8146 }, { "epoch": 2.5593969020554805, "grad_norm": 0.84375, "learning_rate": 9.859727070771185e-06, "loss": 1.1871, "step": 8148 }, { "epoch": 2.560025129081022, "grad_norm": 0.87890625, "learning_rate": 9.857188194224057e-06, "loss": 1.2094, "step": 8150 }, { "epoch": 2.560653356106563, "grad_norm": 0.9140625, "learning_rate": 9.854649317676928e-06, "loss": 1.0918, "step": 8152 }, { "epoch": 2.5612815831321045, "grad_norm": 0.83984375, "learning_rate": 9.852110441129801e-06, "loss": 1.1145, "step": 8154 }, { "epoch": 2.561909810157646, "grad_norm": 0.84765625, "learning_rate": 9.849571564582673e-06, "loss": 1.178, "step": 8156 }, { "epoch": 2.562538037183187, "grad_norm": 0.90625, "learning_rate": 9.847032688035546e-06, "loss": 1.2077, "step": 8158 }, { "epoch": 2.5631662642087285, "grad_norm": 0.87109375, "learning_rate": 9.844493811488417e-06, "loss": 1.2177, "step": 8160 }, { "epoch": 2.56379449123427, "grad_norm": 0.86328125, "learning_rate": 9.841954934941288e-06, "loss": 1.1381, "step": 8162 }, { "epoch": 2.564422718259811, "grad_norm": 0.9765625, "learning_rate": 9.839416058394161e-06, "loss": 1.1781, "step": 8164 }, { "epoch": 2.5650509452853525, "grad_norm": 0.8359375, "learning_rate": 9.836877181847033e-06, "loss": 1.1931, "step": 8166 }, { "epoch": 2.565679172310894, "grad_norm": 0.79296875, "learning_rate": 9.834338305299906e-06, "loss": 1.203, "step": 8168 }, { "epoch": 2.566307399336435, "grad_norm": 1.03125, "learning_rate": 9.831799428752777e-06, "loss": 1.2147, "step": 8170 }, { "epoch": 2.5669356263619765, "grad_norm": 0.8984375, "learning_rate": 9.829260552205649e-06, "loss": 1.2632, "step": 8172 }, { "epoch": 2.567563853387518, "grad_norm": 0.88671875, "learning_rate": 9.826721675658522e-06, "loss": 1.2249, "step": 8174 }, { "epoch": 2.568192080413059, "grad_norm": 0.8359375, "learning_rate": 9.824182799111393e-06, "loss": 1.1366, "step": 8176 }, { "epoch": 2.5688203074386005, "grad_norm": 0.875, "learning_rate": 9.821643922564266e-06, "loss": 1.1498, "step": 8178 }, { "epoch": 2.569448534464142, "grad_norm": 1.171875, "learning_rate": 9.819105046017138e-06, "loss": 1.121, "step": 8180 }, { "epoch": 2.570076761489683, "grad_norm": 0.8828125, "learning_rate": 9.816566169470009e-06, "loss": 1.1996, "step": 8182 }, { "epoch": 2.5707049885152244, "grad_norm": 0.90625, "learning_rate": 9.814027292922882e-06, "loss": 1.2784, "step": 8184 }, { "epoch": 2.5713332155407658, "grad_norm": 0.90625, "learning_rate": 9.811488416375753e-06, "loss": 1.145, "step": 8186 }, { "epoch": 2.5719614425663075, "grad_norm": 0.8125, "learning_rate": 9.808949539828627e-06, "loss": 1.1618, "step": 8188 }, { "epoch": 2.572589669591849, "grad_norm": 0.97265625, "learning_rate": 9.8064106632815e-06, "loss": 1.2192, "step": 8190 }, { "epoch": 2.57321789661739, "grad_norm": 0.87890625, "learning_rate": 9.803871786734371e-06, "loss": 1.166, "step": 8192 }, { "epoch": 2.5738461236429315, "grad_norm": 0.83984375, "learning_rate": 9.801332910187244e-06, "loss": 1.3588, "step": 8194 }, { "epoch": 2.574474350668473, "grad_norm": 0.91796875, "learning_rate": 9.798794033640115e-06, "loss": 1.0738, "step": 8196 }, { "epoch": 2.575102577694014, "grad_norm": 0.84765625, "learning_rate": 9.796255157092987e-06, "loss": 1.3274, "step": 8198 }, { "epoch": 2.5757308047195555, "grad_norm": 0.828125, "learning_rate": 9.79371628054586e-06, "loss": 1.3462, "step": 8200 }, { "epoch": 2.576359031745097, "grad_norm": 0.8984375, "learning_rate": 9.791177403998731e-06, "loss": 1.1931, "step": 8202 }, { "epoch": 2.576987258770638, "grad_norm": 0.8125, "learning_rate": 9.788638527451604e-06, "loss": 1.0762, "step": 8204 }, { "epoch": 2.5776154857961795, "grad_norm": 0.8671875, "learning_rate": 9.786099650904476e-06, "loss": 1.0787, "step": 8206 }, { "epoch": 2.578243712821721, "grad_norm": 0.875, "learning_rate": 9.783560774357347e-06, "loss": 1.1584, "step": 8208 }, { "epoch": 2.578871939847262, "grad_norm": 0.953125, "learning_rate": 9.78102189781022e-06, "loss": 1.1298, "step": 8210 }, { "epoch": 2.579500166872804, "grad_norm": 0.9609375, "learning_rate": 9.778483021263092e-06, "loss": 1.1622, "step": 8212 }, { "epoch": 2.5801283938983453, "grad_norm": 0.90625, "learning_rate": 9.775944144715965e-06, "loss": 1.2961, "step": 8214 }, { "epoch": 2.5807566209238866, "grad_norm": 0.9453125, "learning_rate": 9.773405268168836e-06, "loss": 1.1895, "step": 8216 }, { "epoch": 2.581384847949428, "grad_norm": 0.9296875, "learning_rate": 9.770866391621709e-06, "loss": 1.1523, "step": 8218 }, { "epoch": 2.5820130749749692, "grad_norm": 0.9375, "learning_rate": 9.76832751507458e-06, "loss": 1.0955, "step": 8220 }, { "epoch": 2.5826413020005106, "grad_norm": 0.890625, "learning_rate": 9.765788638527452e-06, "loss": 1.1717, "step": 8222 }, { "epoch": 2.583269529026052, "grad_norm": 0.99609375, "learning_rate": 9.763249761980325e-06, "loss": 1.1134, "step": 8224 }, { "epoch": 2.583897756051593, "grad_norm": 0.86328125, "learning_rate": 9.760710885433196e-06, "loss": 1.1845, "step": 8226 }, { "epoch": 2.5845259830771345, "grad_norm": 0.91796875, "learning_rate": 9.75817200888607e-06, "loss": 1.0873, "step": 8228 }, { "epoch": 2.585154210102676, "grad_norm": 0.8515625, "learning_rate": 9.75563313233894e-06, "loss": 1.1864, "step": 8230 }, { "epoch": 2.585782437128217, "grad_norm": 0.9140625, "learning_rate": 9.753094255791812e-06, "loss": 1.0575, "step": 8232 }, { "epoch": 2.5864106641537585, "grad_norm": 0.89453125, "learning_rate": 9.750555379244685e-06, "loss": 1.2174, "step": 8234 }, { "epoch": 2.5870388911793, "grad_norm": 0.91015625, "learning_rate": 9.748016502697557e-06, "loss": 1.0819, "step": 8236 }, { "epoch": 2.587667118204841, "grad_norm": 0.83984375, "learning_rate": 9.74547762615043e-06, "loss": 1.3066, "step": 8238 }, { "epoch": 2.5882953452303825, "grad_norm": 0.9296875, "learning_rate": 9.742938749603301e-06, "loss": 1.0422, "step": 8240 }, { "epoch": 2.588923572255924, "grad_norm": 1.0078125, "learning_rate": 9.740399873056172e-06, "loss": 1.315, "step": 8242 }, { "epoch": 2.589551799281465, "grad_norm": 0.953125, "learning_rate": 9.737860996509046e-06, "loss": 1.1542, "step": 8244 }, { "epoch": 2.5901800263070065, "grad_norm": 0.8125, "learning_rate": 9.735322119961917e-06, "loss": 1.1142, "step": 8246 }, { "epoch": 2.590808253332548, "grad_norm": 0.87890625, "learning_rate": 9.73278324341479e-06, "loss": 1.1555, "step": 8248 }, { "epoch": 2.591436480358089, "grad_norm": 0.91796875, "learning_rate": 9.730244366867661e-06, "loss": 1.1318, "step": 8250 }, { "epoch": 2.5920647073836305, "grad_norm": 0.8671875, "learning_rate": 9.727705490320534e-06, "loss": 1.1723, "step": 8252 }, { "epoch": 2.5926929344091723, "grad_norm": 0.8515625, "learning_rate": 9.725166613773406e-06, "loss": 1.1697, "step": 8254 }, { "epoch": 2.5933211614347136, "grad_norm": 0.9921875, "learning_rate": 9.722627737226277e-06, "loss": 1.142, "step": 8256 }, { "epoch": 2.593949388460255, "grad_norm": 0.89453125, "learning_rate": 9.72008886067915e-06, "loss": 1.0324, "step": 8258 }, { "epoch": 2.5945776154857962, "grad_norm": 0.95703125, "learning_rate": 9.717549984132022e-06, "loss": 1.0065, "step": 8260 }, { "epoch": 2.5952058425113376, "grad_norm": 0.82421875, "learning_rate": 9.715011107584895e-06, "loss": 1.104, "step": 8262 }, { "epoch": 2.595834069536879, "grad_norm": 0.890625, "learning_rate": 9.712472231037766e-06, "loss": 1.1189, "step": 8264 }, { "epoch": 2.5964622965624202, "grad_norm": 0.89453125, "learning_rate": 9.709933354490638e-06, "loss": 1.1932, "step": 8266 }, { "epoch": 2.5970905235879616, "grad_norm": 0.8828125, "learning_rate": 9.70739447794351e-06, "loss": 1.2323, "step": 8268 }, { "epoch": 2.597718750613503, "grad_norm": 0.9375, "learning_rate": 9.704855601396382e-06, "loss": 1.1692, "step": 8270 }, { "epoch": 2.598346977639044, "grad_norm": 0.8359375, "learning_rate": 9.702316724849255e-06, "loss": 1.2307, "step": 8272 }, { "epoch": 2.5989752046645855, "grad_norm": 0.88671875, "learning_rate": 9.699777848302128e-06, "loss": 1.112, "step": 8274 }, { "epoch": 2.599603431690127, "grad_norm": 0.8828125, "learning_rate": 9.697238971755e-06, "loss": 1.3361, "step": 8276 }, { "epoch": 2.6002316587156686, "grad_norm": 0.93359375, "learning_rate": 9.694700095207873e-06, "loss": 0.9816, "step": 8278 }, { "epoch": 2.60085988574121, "grad_norm": 0.85546875, "learning_rate": 9.692161218660744e-06, "loss": 1.2718, "step": 8280 }, { "epoch": 2.6014881127667513, "grad_norm": 0.953125, "learning_rate": 9.689622342113615e-06, "loss": 1.2261, "step": 8282 }, { "epoch": 2.6021163397922926, "grad_norm": 1.109375, "learning_rate": 9.687083465566488e-06, "loss": 1.1821, "step": 8284 }, { "epoch": 2.602744566817834, "grad_norm": 0.953125, "learning_rate": 9.68454458901936e-06, "loss": 1.1777, "step": 8286 }, { "epoch": 2.6033727938433753, "grad_norm": 0.875, "learning_rate": 9.682005712472233e-06, "loss": 1.1155, "step": 8288 }, { "epoch": 2.6040010208689166, "grad_norm": 1.546875, "learning_rate": 9.679466835925104e-06, "loss": 1.1201, "step": 8290 }, { "epoch": 2.604629247894458, "grad_norm": 0.87109375, "learning_rate": 9.676927959377976e-06, "loss": 1.0158, "step": 8292 }, { "epoch": 2.6052574749199993, "grad_norm": 0.97265625, "learning_rate": 9.674389082830849e-06, "loss": 1.0516, "step": 8294 }, { "epoch": 2.6058857019455406, "grad_norm": 0.91015625, "learning_rate": 9.67185020628372e-06, "loss": 1.3452, "step": 8296 }, { "epoch": 2.606513928971082, "grad_norm": 0.85546875, "learning_rate": 9.669311329736593e-06, "loss": 1.2381, "step": 8298 }, { "epoch": 2.6071421559966232, "grad_norm": 0.8515625, "learning_rate": 9.666772453189465e-06, "loss": 1.1524, "step": 8300 }, { "epoch": 2.6077703830221646, "grad_norm": 0.984375, "learning_rate": 9.664233576642336e-06, "loss": 1.0884, "step": 8302 }, { "epoch": 2.608398610047706, "grad_norm": 1.0390625, "learning_rate": 9.661694700095209e-06, "loss": 1.2137, "step": 8304 }, { "epoch": 2.6090268370732472, "grad_norm": 0.86328125, "learning_rate": 9.65915582354808e-06, "loss": 1.2906, "step": 8306 }, { "epoch": 2.6096550640987886, "grad_norm": 0.88671875, "learning_rate": 9.656616947000954e-06, "loss": 1.1416, "step": 8308 }, { "epoch": 2.61028329112433, "grad_norm": 0.87109375, "learning_rate": 9.654078070453825e-06, "loss": 1.2919, "step": 8310 }, { "epoch": 2.610911518149871, "grad_norm": 0.82421875, "learning_rate": 9.651539193906698e-06, "loss": 1.3006, "step": 8312 }, { "epoch": 2.6115397451754125, "grad_norm": 0.87109375, "learning_rate": 9.64900031735957e-06, "loss": 1.2853, "step": 8314 }, { "epoch": 2.612167972200954, "grad_norm": 0.953125, "learning_rate": 9.64646144081244e-06, "loss": 1.1489, "step": 8316 }, { "epoch": 2.612796199226495, "grad_norm": 0.99609375, "learning_rate": 9.643922564265314e-06, "loss": 1.2045, "step": 8318 }, { "epoch": 2.613424426252037, "grad_norm": 0.8359375, "learning_rate": 9.641383687718185e-06, "loss": 1.1971, "step": 8320 }, { "epoch": 2.6140526532775783, "grad_norm": 0.83203125, "learning_rate": 9.638844811171058e-06, "loss": 1.1142, "step": 8322 }, { "epoch": 2.6146808803031196, "grad_norm": 0.890625, "learning_rate": 9.63630593462393e-06, "loss": 1.3033, "step": 8324 }, { "epoch": 2.615309107328661, "grad_norm": 0.84375, "learning_rate": 9.633767058076801e-06, "loss": 1.2125, "step": 8326 }, { "epoch": 2.6159373343542023, "grad_norm": 0.828125, "learning_rate": 9.631228181529674e-06, "loss": 1.0768, "step": 8328 }, { "epoch": 2.6165655613797436, "grad_norm": 0.84765625, "learning_rate": 9.628689304982545e-06, "loss": 1.3004, "step": 8330 }, { "epoch": 2.617193788405285, "grad_norm": 0.921875, "learning_rate": 9.626150428435419e-06, "loss": 1.247, "step": 8332 }, { "epoch": 2.6178220154308263, "grad_norm": 0.828125, "learning_rate": 9.62361155188829e-06, "loss": 1.1625, "step": 8334 }, { "epoch": 2.6184502424563676, "grad_norm": 0.953125, "learning_rate": 9.621072675341161e-06, "loss": 1.1265, "step": 8336 }, { "epoch": 2.619078469481909, "grad_norm": 0.87890625, "learning_rate": 9.618533798794034e-06, "loss": 1.2211, "step": 8338 }, { "epoch": 2.6197066965074502, "grad_norm": 0.95703125, "learning_rate": 9.615994922246906e-06, "loss": 1.1047, "step": 8340 }, { "epoch": 2.6203349235329916, "grad_norm": 0.84765625, "learning_rate": 9.613456045699779e-06, "loss": 1.2186, "step": 8342 }, { "epoch": 2.6209631505585333, "grad_norm": 0.8671875, "learning_rate": 9.61091716915265e-06, "loss": 1.0722, "step": 8344 }, { "epoch": 2.6215913775840747, "grad_norm": 0.9609375, "learning_rate": 9.608378292605522e-06, "loss": 1.2318, "step": 8346 }, { "epoch": 2.622219604609616, "grad_norm": 0.85546875, "learning_rate": 9.605839416058395e-06, "loss": 1.1319, "step": 8348 }, { "epoch": 2.6228478316351573, "grad_norm": 0.85546875, "learning_rate": 9.603300539511266e-06, "loss": 1.1703, "step": 8350 }, { "epoch": 2.6234760586606987, "grad_norm": 0.87109375, "learning_rate": 9.60076166296414e-06, "loss": 1.2391, "step": 8352 }, { "epoch": 2.62410428568624, "grad_norm": 0.890625, "learning_rate": 9.59822278641701e-06, "loss": 1.2098, "step": 8354 }, { "epoch": 2.6247325127117813, "grad_norm": 0.91796875, "learning_rate": 9.595683909869884e-06, "loss": 1.239, "step": 8356 }, { "epoch": 2.6253607397373226, "grad_norm": 0.84375, "learning_rate": 9.593145033322755e-06, "loss": 1.2125, "step": 8358 }, { "epoch": 2.625988966762864, "grad_norm": 0.93359375, "learning_rate": 9.590606156775628e-06, "loss": 1.2088, "step": 8360 }, { "epoch": 2.6266171937884053, "grad_norm": 0.828125, "learning_rate": 9.5880672802285e-06, "loss": 1.166, "step": 8362 }, { "epoch": 2.6272454208139466, "grad_norm": 0.9453125, "learning_rate": 9.585528403681373e-06, "loss": 1.204, "step": 8364 }, { "epoch": 2.627873647839488, "grad_norm": 1.03125, "learning_rate": 9.582989527134244e-06, "loss": 1.1946, "step": 8366 }, { "epoch": 2.6285018748650293, "grad_norm": 0.94140625, "learning_rate": 9.580450650587117e-06, "loss": 1.2513, "step": 8368 }, { "epoch": 2.6291301018905706, "grad_norm": 0.91796875, "learning_rate": 9.577911774039988e-06, "loss": 1.2714, "step": 8370 }, { "epoch": 2.629758328916112, "grad_norm": 0.87890625, "learning_rate": 9.57537289749286e-06, "loss": 1.3492, "step": 8372 }, { "epoch": 2.6303865559416533, "grad_norm": 0.93359375, "learning_rate": 9.572834020945733e-06, "loss": 1.2335, "step": 8374 }, { "epoch": 2.6310147829671946, "grad_norm": 0.85546875, "learning_rate": 9.570295144398604e-06, "loss": 1.3027, "step": 8376 }, { "epoch": 2.631643009992736, "grad_norm": 0.9296875, "learning_rate": 9.567756267851477e-06, "loss": 1.2272, "step": 8378 }, { "epoch": 2.6322712370182773, "grad_norm": 0.9140625, "learning_rate": 9.565217391304349e-06, "loss": 1.086, "step": 8380 }, { "epoch": 2.6328994640438186, "grad_norm": 0.85546875, "learning_rate": 9.562678514757222e-06, "loss": 1.2643, "step": 8382 }, { "epoch": 2.6335276910693604, "grad_norm": 0.87109375, "learning_rate": 9.560139638210093e-06, "loss": 1.1945, "step": 8384 }, { "epoch": 2.6341559180949017, "grad_norm": 0.8515625, "learning_rate": 9.557600761662965e-06, "loss": 1.2233, "step": 8386 }, { "epoch": 2.634784145120443, "grad_norm": 0.984375, "learning_rate": 9.555061885115838e-06, "loss": 1.2933, "step": 8388 }, { "epoch": 2.6354123721459843, "grad_norm": 0.93359375, "learning_rate": 9.552523008568709e-06, "loss": 1.2637, "step": 8390 }, { "epoch": 2.6360405991715257, "grad_norm": 0.90625, "learning_rate": 9.549984132021582e-06, "loss": 1.2201, "step": 8392 }, { "epoch": 2.636668826197067, "grad_norm": 0.8359375, "learning_rate": 9.547445255474453e-06, "loss": 1.1775, "step": 8394 }, { "epoch": 2.6372970532226083, "grad_norm": 0.8359375, "learning_rate": 9.544906378927325e-06, "loss": 1.1219, "step": 8396 }, { "epoch": 2.6379252802481497, "grad_norm": 0.90625, "learning_rate": 9.542367502380198e-06, "loss": 1.2112, "step": 8398 }, { "epoch": 2.638553507273691, "grad_norm": 0.96484375, "learning_rate": 9.53982862583307e-06, "loss": 1.1167, "step": 8400 }, { "epoch": 2.6391817342992323, "grad_norm": 0.953125, "learning_rate": 9.537289749285942e-06, "loss": 1.2177, "step": 8402 }, { "epoch": 2.6398099613247736, "grad_norm": 0.8671875, "learning_rate": 9.534750872738814e-06, "loss": 1.2147, "step": 8404 }, { "epoch": 2.640438188350315, "grad_norm": 0.8671875, "learning_rate": 9.532211996191685e-06, "loss": 1.2503, "step": 8406 }, { "epoch": 2.6410664153758563, "grad_norm": 0.8828125, "learning_rate": 9.529673119644558e-06, "loss": 1.2971, "step": 8408 }, { "epoch": 2.641694642401398, "grad_norm": 0.8671875, "learning_rate": 9.52713424309743e-06, "loss": 1.1697, "step": 8410 }, { "epoch": 2.6423228694269394, "grad_norm": 0.83984375, "learning_rate": 9.524595366550303e-06, "loss": 1.1083, "step": 8412 }, { "epoch": 2.6429510964524807, "grad_norm": 0.85546875, "learning_rate": 9.522056490003174e-06, "loss": 1.2256, "step": 8414 }, { "epoch": 2.643579323478022, "grad_norm": 0.828125, "learning_rate": 9.519517613456047e-06, "loss": 1.1345, "step": 8416 }, { "epoch": 2.6442075505035634, "grad_norm": 0.97265625, "learning_rate": 9.516978736908918e-06, "loss": 1.1583, "step": 8418 }, { "epoch": 2.6448357775291047, "grad_norm": 0.91796875, "learning_rate": 9.51443986036179e-06, "loss": 1.1522, "step": 8420 }, { "epoch": 2.645464004554646, "grad_norm": 0.8125, "learning_rate": 9.511900983814663e-06, "loss": 1.1517, "step": 8422 }, { "epoch": 2.6460922315801874, "grad_norm": 0.9609375, "learning_rate": 9.509362107267534e-06, "loss": 1.0531, "step": 8424 }, { "epoch": 2.6467204586057287, "grad_norm": 0.98046875, "learning_rate": 9.506823230720407e-06, "loss": 1.177, "step": 8426 }, { "epoch": 2.64734868563127, "grad_norm": 0.8671875, "learning_rate": 9.504284354173279e-06, "loss": 1.2406, "step": 8428 }, { "epoch": 2.6479769126568113, "grad_norm": 0.8671875, "learning_rate": 9.50174547762615e-06, "loss": 1.2265, "step": 8430 }, { "epoch": 2.6486051396823527, "grad_norm": 0.99609375, "learning_rate": 9.499206601079023e-06, "loss": 1.161, "step": 8432 }, { "epoch": 2.649233366707894, "grad_norm": 0.94921875, "learning_rate": 9.496667724531895e-06, "loss": 1.1494, "step": 8434 }, { "epoch": 2.6498615937334353, "grad_norm": 0.859375, "learning_rate": 9.494128847984768e-06, "loss": 1.1465, "step": 8436 }, { "epoch": 2.6504898207589767, "grad_norm": 0.84375, "learning_rate": 9.491589971437639e-06, "loss": 1.1225, "step": 8438 }, { "epoch": 2.651118047784518, "grad_norm": 0.8828125, "learning_rate": 9.48905109489051e-06, "loss": 1.094, "step": 8440 }, { "epoch": 2.6517462748100593, "grad_norm": 0.859375, "learning_rate": 9.486512218343384e-06, "loss": 1.0814, "step": 8442 }, { "epoch": 2.6523745018356006, "grad_norm": 1.03125, "learning_rate": 9.483973341796255e-06, "loss": 1.0285, "step": 8444 }, { "epoch": 2.653002728861142, "grad_norm": 0.8515625, "learning_rate": 9.481434465249128e-06, "loss": 1.2455, "step": 8446 }, { "epoch": 2.6536309558866833, "grad_norm": 0.86328125, "learning_rate": 9.478895588702001e-06, "loss": 1.1987, "step": 8448 }, { "epoch": 2.654259182912225, "grad_norm": 0.8828125, "learning_rate": 9.476356712154872e-06, "loss": 1.2405, "step": 8450 }, { "epoch": 2.6548874099377664, "grad_norm": 0.92578125, "learning_rate": 9.473817835607746e-06, "loss": 1.0695, "step": 8452 }, { "epoch": 2.6555156369633077, "grad_norm": 0.8984375, "learning_rate": 9.471278959060617e-06, "loss": 1.0887, "step": 8454 }, { "epoch": 2.656143863988849, "grad_norm": 0.90625, "learning_rate": 9.468740082513488e-06, "loss": 1.1871, "step": 8456 }, { "epoch": 2.6567720910143904, "grad_norm": 0.82421875, "learning_rate": 9.466201205966361e-06, "loss": 1.235, "step": 8458 }, { "epoch": 2.6574003180399317, "grad_norm": 0.8671875, "learning_rate": 9.463662329419233e-06, "loss": 1.1305, "step": 8460 }, { "epoch": 2.658028545065473, "grad_norm": 0.83984375, "learning_rate": 9.461123452872106e-06, "loss": 1.0736, "step": 8462 }, { "epoch": 2.6586567720910144, "grad_norm": 0.9453125, "learning_rate": 9.458584576324977e-06, "loss": 1.1682, "step": 8464 }, { "epoch": 2.6592849991165557, "grad_norm": 0.9296875, "learning_rate": 9.456045699777849e-06, "loss": 1.1639, "step": 8466 }, { "epoch": 2.659913226142097, "grad_norm": 0.8671875, "learning_rate": 9.453506823230722e-06, "loss": 1.2223, "step": 8468 }, { "epoch": 2.6605414531676383, "grad_norm": 0.94921875, "learning_rate": 9.450967946683593e-06, "loss": 1.1154, "step": 8470 }, { "epoch": 2.6611696801931797, "grad_norm": 0.87890625, "learning_rate": 9.448429070136466e-06, "loss": 1.0826, "step": 8472 }, { "epoch": 2.661797907218721, "grad_norm": 1.015625, "learning_rate": 9.445890193589338e-06, "loss": 1.2074, "step": 8474 }, { "epoch": 2.6624261342442628, "grad_norm": 0.890625, "learning_rate": 9.443351317042209e-06, "loss": 1.1333, "step": 8476 }, { "epoch": 2.663054361269804, "grad_norm": 1.03125, "learning_rate": 9.440812440495082e-06, "loss": 1.2202, "step": 8478 }, { "epoch": 2.6636825882953454, "grad_norm": 0.87890625, "learning_rate": 9.438273563947953e-06, "loss": 1.2403, "step": 8480 }, { "epoch": 2.6643108153208868, "grad_norm": 0.84375, "learning_rate": 9.435734687400826e-06, "loss": 1.1786, "step": 8482 }, { "epoch": 2.664939042346428, "grad_norm": 0.94921875, "learning_rate": 9.433195810853698e-06, "loss": 1.1325, "step": 8484 }, { "epoch": 2.6655672693719694, "grad_norm": 0.86328125, "learning_rate": 9.430656934306571e-06, "loss": 1.0782, "step": 8486 }, { "epoch": 2.6661954963975107, "grad_norm": 0.94140625, "learning_rate": 9.428118057759442e-06, "loss": 1.217, "step": 8488 }, { "epoch": 2.666823723423052, "grad_norm": 0.875, "learning_rate": 9.425579181212314e-06, "loss": 1.1414, "step": 8490 }, { "epoch": 2.6674519504485934, "grad_norm": 0.83984375, "learning_rate": 9.423040304665187e-06, "loss": 1.2201, "step": 8492 }, { "epoch": 2.6680801774741347, "grad_norm": 0.83203125, "learning_rate": 9.420501428118058e-06, "loss": 1.3012, "step": 8494 }, { "epoch": 2.668708404499676, "grad_norm": 0.90234375, "learning_rate": 9.417962551570931e-06, "loss": 1.2001, "step": 8496 }, { "epoch": 2.6693366315252174, "grad_norm": 0.8515625, "learning_rate": 9.415423675023803e-06, "loss": 1.2408, "step": 8498 }, { "epoch": 2.6699648585507587, "grad_norm": 0.84375, "learning_rate": 9.412884798476674e-06, "loss": 1.129, "step": 8500 }, { "epoch": 2.6705930855763, "grad_norm": 0.8515625, "learning_rate": 9.410345921929547e-06, "loss": 1.3063, "step": 8502 }, { "epoch": 2.6712213126018414, "grad_norm": 1.0234375, "learning_rate": 9.407807045382418e-06, "loss": 1.2523, "step": 8504 }, { "epoch": 2.6718495396273827, "grad_norm": 0.90625, "learning_rate": 9.405268168835291e-06, "loss": 1.1856, "step": 8506 }, { "epoch": 2.672477766652924, "grad_norm": 0.984375, "learning_rate": 9.402729292288163e-06, "loss": 1.1575, "step": 8508 }, { "epoch": 2.6731059936784654, "grad_norm": 0.8515625, "learning_rate": 9.400190415741034e-06, "loss": 1.0457, "step": 8510 }, { "epoch": 2.6737342207040067, "grad_norm": 0.8828125, "learning_rate": 9.397651539193907e-06, "loss": 1.1486, "step": 8512 }, { "epoch": 2.674362447729548, "grad_norm": 0.9609375, "learning_rate": 9.395112662646779e-06, "loss": 1.094, "step": 8514 }, { "epoch": 2.67499067475509, "grad_norm": 0.8671875, "learning_rate": 9.392573786099652e-06, "loss": 1.1416, "step": 8516 }, { "epoch": 2.675618901780631, "grad_norm": 0.828125, "learning_rate": 9.390034909552523e-06, "loss": 1.2959, "step": 8518 }, { "epoch": 2.6762471288061724, "grad_norm": 0.87109375, "learning_rate": 9.387496033005396e-06, "loss": 1.0791, "step": 8520 }, { "epoch": 2.6768753558317138, "grad_norm": 0.8984375, "learning_rate": 9.384957156458268e-06, "loss": 1.2186, "step": 8522 }, { "epoch": 2.677503582857255, "grad_norm": 0.94140625, "learning_rate": 9.382418279911139e-06, "loss": 1.1701, "step": 8524 }, { "epoch": 2.6781318098827964, "grad_norm": 0.8984375, "learning_rate": 9.379879403364012e-06, "loss": 1.0534, "step": 8526 }, { "epoch": 2.6787600369083377, "grad_norm": 0.86328125, "learning_rate": 9.377340526816883e-06, "loss": 1.0403, "step": 8528 }, { "epoch": 2.679388263933879, "grad_norm": 0.859375, "learning_rate": 9.374801650269757e-06, "loss": 1.2593, "step": 8530 }, { "epoch": 2.6800164909594204, "grad_norm": 0.87109375, "learning_rate": 9.37226277372263e-06, "loss": 1.401, "step": 8532 }, { "epoch": 2.6806447179849617, "grad_norm": 0.83203125, "learning_rate": 9.369723897175501e-06, "loss": 1.113, "step": 8534 }, { "epoch": 2.681272945010503, "grad_norm": 0.859375, "learning_rate": 9.367185020628372e-06, "loss": 1.2193, "step": 8536 }, { "epoch": 2.6819011720360444, "grad_norm": 0.89453125, "learning_rate": 9.364646144081245e-06, "loss": 1.2076, "step": 8538 }, { "epoch": 2.6825293990615857, "grad_norm": 0.859375, "learning_rate": 9.362107267534117e-06, "loss": 1.2201, "step": 8540 }, { "epoch": 2.6831576260871275, "grad_norm": 0.96484375, "learning_rate": 9.35956839098699e-06, "loss": 1.2454, "step": 8542 }, { "epoch": 2.683785853112669, "grad_norm": 0.84765625, "learning_rate": 9.357029514439861e-06, "loss": 1.1856, "step": 8544 }, { "epoch": 2.68441408013821, "grad_norm": 1.1640625, "learning_rate": 9.354490637892734e-06, "loss": 1.1561, "step": 8546 }, { "epoch": 2.6850423071637515, "grad_norm": 0.92578125, "learning_rate": 9.351951761345606e-06, "loss": 1.2809, "step": 8548 }, { "epoch": 2.685670534189293, "grad_norm": 1.0078125, "learning_rate": 9.349412884798477e-06, "loss": 1.1202, "step": 8550 }, { "epoch": 2.686298761214834, "grad_norm": 0.8828125, "learning_rate": 9.34687400825135e-06, "loss": 1.2268, "step": 8552 }, { "epoch": 2.6869269882403755, "grad_norm": 0.859375, "learning_rate": 9.344335131704222e-06, "loss": 1.3182, "step": 8554 }, { "epoch": 2.687555215265917, "grad_norm": 0.890625, "learning_rate": 9.341796255157095e-06, "loss": 1.1671, "step": 8556 }, { "epoch": 2.688183442291458, "grad_norm": 0.84375, "learning_rate": 9.339257378609966e-06, "loss": 1.1365, "step": 8558 }, { "epoch": 2.6888116693169994, "grad_norm": 0.91015625, "learning_rate": 9.336718502062837e-06, "loss": 1.3515, "step": 8560 }, { "epoch": 2.6894398963425408, "grad_norm": 0.859375, "learning_rate": 9.33417962551571e-06, "loss": 1.2687, "step": 8562 }, { "epoch": 2.690068123368082, "grad_norm": 0.84375, "learning_rate": 9.331640748968582e-06, "loss": 1.3292, "step": 8564 }, { "epoch": 2.6906963503936234, "grad_norm": 0.8359375, "learning_rate": 9.329101872421455e-06, "loss": 1.1909, "step": 8566 }, { "epoch": 2.6913245774191648, "grad_norm": 0.8984375, "learning_rate": 9.326562995874326e-06, "loss": 1.1728, "step": 8568 }, { "epoch": 2.691952804444706, "grad_norm": 0.95703125, "learning_rate": 9.324024119327198e-06, "loss": 1.129, "step": 8570 }, { "epoch": 2.6925810314702474, "grad_norm": 0.859375, "learning_rate": 9.32148524278007e-06, "loss": 1.1925, "step": 8572 }, { "epoch": 2.6932092584957887, "grad_norm": 0.99609375, "learning_rate": 9.318946366232942e-06, "loss": 1.2486, "step": 8574 }, { "epoch": 2.69383748552133, "grad_norm": 0.84375, "learning_rate": 9.316407489685815e-06, "loss": 1.3359, "step": 8576 }, { "epoch": 2.6944657125468714, "grad_norm": 0.8125, "learning_rate": 9.313868613138687e-06, "loss": 1.1934, "step": 8578 }, { "epoch": 2.6950939395724127, "grad_norm": 0.84765625, "learning_rate": 9.31132973659156e-06, "loss": 1.1376, "step": 8580 }, { "epoch": 2.6957221665979545, "grad_norm": 0.9296875, "learning_rate": 9.308790860044431e-06, "loss": 1.1097, "step": 8582 }, { "epoch": 2.696350393623496, "grad_norm": 1.09375, "learning_rate": 9.306251983497303e-06, "loss": 1.1256, "step": 8584 }, { "epoch": 2.696978620649037, "grad_norm": 0.9453125, "learning_rate": 9.303713106950176e-06, "loss": 1.1685, "step": 8586 }, { "epoch": 2.6976068476745785, "grad_norm": 0.83203125, "learning_rate": 9.301174230403047e-06, "loss": 1.2755, "step": 8588 }, { "epoch": 2.69823507470012, "grad_norm": 0.9140625, "learning_rate": 9.29863535385592e-06, "loss": 1.0006, "step": 8590 }, { "epoch": 2.698863301725661, "grad_norm": 0.88671875, "learning_rate": 9.296096477308791e-06, "loss": 1.1454, "step": 8592 }, { "epoch": 2.6994915287512025, "grad_norm": 0.875, "learning_rate": 9.293557600761663e-06, "loss": 1.1855, "step": 8594 }, { "epoch": 2.700119755776744, "grad_norm": 0.99609375, "learning_rate": 9.291018724214536e-06, "loss": 1.0808, "step": 8596 }, { "epoch": 2.700747982802285, "grad_norm": 0.875, "learning_rate": 9.288479847667407e-06, "loss": 1.2162, "step": 8598 }, { "epoch": 2.7013762098278264, "grad_norm": 1.0625, "learning_rate": 9.28594097112028e-06, "loss": 1.252, "step": 8600 }, { "epoch": 2.7020044368533678, "grad_norm": 0.86328125, "learning_rate": 9.283402094573152e-06, "loss": 1.1132, "step": 8602 }, { "epoch": 2.702632663878909, "grad_norm": 0.8984375, "learning_rate": 9.280863218026023e-06, "loss": 1.2573, "step": 8604 }, { "epoch": 2.7032608909044504, "grad_norm": 0.92578125, "learning_rate": 9.278324341478896e-06, "loss": 0.9743, "step": 8606 }, { "epoch": 2.703889117929992, "grad_norm": 0.953125, "learning_rate": 9.275785464931768e-06, "loss": 1.2334, "step": 8608 }, { "epoch": 2.7045173449555335, "grad_norm": 0.875, "learning_rate": 9.27324658838464e-06, "loss": 1.2431, "step": 8610 }, { "epoch": 2.705145571981075, "grad_norm": 0.88671875, "learning_rate": 9.270707711837512e-06, "loss": 1.1449, "step": 8612 }, { "epoch": 2.705773799006616, "grad_norm": 0.8828125, "learning_rate": 9.268168835290383e-06, "loss": 1.2655, "step": 8614 }, { "epoch": 2.7064020260321575, "grad_norm": 0.91015625, "learning_rate": 9.265629958743256e-06, "loss": 1.0926, "step": 8616 }, { "epoch": 2.707030253057699, "grad_norm": 0.90625, "learning_rate": 9.26309108219613e-06, "loss": 1.2308, "step": 8618 }, { "epoch": 2.70765848008324, "grad_norm": 0.91015625, "learning_rate": 9.260552205649001e-06, "loss": 1.1774, "step": 8620 }, { "epoch": 2.7082867071087815, "grad_norm": 0.91796875, "learning_rate": 9.258013329101874e-06, "loss": 1.2488, "step": 8622 }, { "epoch": 2.708914934134323, "grad_norm": 0.91015625, "learning_rate": 9.255474452554745e-06, "loss": 1.1772, "step": 8624 }, { "epoch": 2.709543161159864, "grad_norm": 0.921875, "learning_rate": 9.252935576007618e-06, "loss": 1.0649, "step": 8626 }, { "epoch": 2.7101713881854055, "grad_norm": 0.9375, "learning_rate": 9.25039669946049e-06, "loss": 1.1924, "step": 8628 }, { "epoch": 2.710799615210947, "grad_norm": 0.88671875, "learning_rate": 9.247857822913361e-06, "loss": 1.2255, "step": 8630 }, { "epoch": 2.711427842236488, "grad_norm": 0.8828125, "learning_rate": 9.245318946366234e-06, "loss": 1.2982, "step": 8632 }, { "epoch": 2.7120560692620295, "grad_norm": 0.890625, "learning_rate": 9.242780069819106e-06, "loss": 1.242, "step": 8634 }, { "epoch": 2.712684296287571, "grad_norm": 0.84765625, "learning_rate": 9.240241193271979e-06, "loss": 1.2739, "step": 8636 }, { "epoch": 2.713312523313112, "grad_norm": 0.890625, "learning_rate": 9.23770231672485e-06, "loss": 1.1578, "step": 8638 }, { "epoch": 2.7139407503386535, "grad_norm": 0.90234375, "learning_rate": 9.235163440177722e-06, "loss": 1.3106, "step": 8640 }, { "epoch": 2.714568977364195, "grad_norm": 0.88671875, "learning_rate": 9.232624563630595e-06, "loss": 1.142, "step": 8642 }, { "epoch": 2.715197204389736, "grad_norm": 0.91015625, "learning_rate": 9.230085687083466e-06, "loss": 1.1661, "step": 8644 }, { "epoch": 2.7158254314152774, "grad_norm": 0.87109375, "learning_rate": 9.227546810536339e-06, "loss": 1.2508, "step": 8646 }, { "epoch": 2.716453658440819, "grad_norm": 0.8984375, "learning_rate": 9.22500793398921e-06, "loss": 1.3473, "step": 8648 }, { "epoch": 2.7170818854663605, "grad_norm": 0.93359375, "learning_rate": 9.222469057442084e-06, "loss": 1.2084, "step": 8650 }, { "epoch": 2.717710112491902, "grad_norm": 0.84765625, "learning_rate": 9.219930180894955e-06, "loss": 1.1336, "step": 8652 }, { "epoch": 2.718338339517443, "grad_norm": 0.859375, "learning_rate": 9.217391304347826e-06, "loss": 1.2546, "step": 8654 }, { "epoch": 2.7189665665429845, "grad_norm": 0.82421875, "learning_rate": 9.2148524278007e-06, "loss": 1.2444, "step": 8656 }, { "epoch": 2.719594793568526, "grad_norm": 0.83984375, "learning_rate": 9.21231355125357e-06, "loss": 1.1496, "step": 8658 }, { "epoch": 2.720223020594067, "grad_norm": 0.90234375, "learning_rate": 9.209774674706444e-06, "loss": 1.0408, "step": 8660 }, { "epoch": 2.7208512476196085, "grad_norm": 0.87890625, "learning_rate": 9.207235798159315e-06, "loss": 1.1055, "step": 8662 }, { "epoch": 2.72147947464515, "grad_norm": 0.8828125, "learning_rate": 9.204696921612187e-06, "loss": 1.203, "step": 8664 }, { "epoch": 2.722107701670691, "grad_norm": 0.921875, "learning_rate": 9.20215804506506e-06, "loss": 1.1595, "step": 8666 }, { "epoch": 2.7227359286962325, "grad_norm": 0.91796875, "learning_rate": 9.199619168517931e-06, "loss": 1.4185, "step": 8668 }, { "epoch": 2.723364155721774, "grad_norm": 0.94921875, "learning_rate": 9.197080291970804e-06, "loss": 1.1604, "step": 8670 }, { "epoch": 2.7239923827473156, "grad_norm": 0.87890625, "learning_rate": 9.194541415423676e-06, "loss": 1.1948, "step": 8672 }, { "epoch": 2.724620609772857, "grad_norm": 0.8203125, "learning_rate": 9.192002538876547e-06, "loss": 1.1164, "step": 8674 }, { "epoch": 2.7252488367983982, "grad_norm": 0.890625, "learning_rate": 9.18946366232942e-06, "loss": 1.1879, "step": 8676 }, { "epoch": 2.7258770638239396, "grad_norm": 0.84375, "learning_rate": 9.186924785782291e-06, "loss": 1.2257, "step": 8678 }, { "epoch": 2.726505290849481, "grad_norm": 0.81640625, "learning_rate": 9.184385909235164e-06, "loss": 1.1763, "step": 8680 }, { "epoch": 2.7271335178750222, "grad_norm": 0.9140625, "learning_rate": 9.181847032688036e-06, "loss": 1.1286, "step": 8682 }, { "epoch": 2.7277617449005636, "grad_norm": 0.8125, "learning_rate": 9.179308156140909e-06, "loss": 1.1175, "step": 8684 }, { "epoch": 2.728389971926105, "grad_norm": 0.8671875, "learning_rate": 9.17676927959378e-06, "loss": 1.2022, "step": 8686 }, { "epoch": 2.729018198951646, "grad_norm": 0.77734375, "learning_rate": 9.174230403046652e-06, "loss": 1.3269, "step": 8688 }, { "epoch": 2.7296464259771875, "grad_norm": 0.859375, "learning_rate": 9.171691526499525e-06, "loss": 1.131, "step": 8690 }, { "epoch": 2.730274653002729, "grad_norm": 0.96875, "learning_rate": 9.169152649952396e-06, "loss": 1.1066, "step": 8692 }, { "epoch": 2.73090288002827, "grad_norm": 0.87109375, "learning_rate": 9.16661377340527e-06, "loss": 1.1499, "step": 8694 }, { "epoch": 2.7315311070538115, "grad_norm": 0.90234375, "learning_rate": 9.16407489685814e-06, "loss": 1.1262, "step": 8696 }, { "epoch": 2.732159334079353, "grad_norm": 1.0, "learning_rate": 9.161536020311012e-06, "loss": 1.1625, "step": 8698 }, { "epoch": 2.732787561104894, "grad_norm": 1.0, "learning_rate": 9.158997143763885e-06, "loss": 1.1464, "step": 8700 }, { "epoch": 2.7334157881304355, "grad_norm": 0.94140625, "learning_rate": 9.156458267216756e-06, "loss": 1.2327, "step": 8702 }, { "epoch": 2.734044015155977, "grad_norm": 0.8359375, "learning_rate": 9.15391939066963e-06, "loss": 1.0969, "step": 8704 }, { "epoch": 2.734672242181518, "grad_norm": 0.828125, "learning_rate": 9.151380514122503e-06, "loss": 1.1112, "step": 8706 }, { "epoch": 2.7353004692070595, "grad_norm": 0.90234375, "learning_rate": 9.148841637575374e-06, "loss": 1.3812, "step": 8708 }, { "epoch": 2.735928696232601, "grad_norm": 0.83203125, "learning_rate": 9.146302761028247e-06, "loss": 1.2572, "step": 8710 }, { "epoch": 2.736556923258142, "grad_norm": 0.8984375, "learning_rate": 9.143763884481118e-06, "loss": 1.142, "step": 8712 }, { "epoch": 2.737185150283684, "grad_norm": 0.8984375, "learning_rate": 9.14122500793399e-06, "loss": 1.144, "step": 8714 }, { "epoch": 2.7378133773092252, "grad_norm": 0.89453125, "learning_rate": 9.138686131386863e-06, "loss": 1.3231, "step": 8716 }, { "epoch": 2.7384416043347666, "grad_norm": 0.8828125, "learning_rate": 9.136147254839734e-06, "loss": 1.1236, "step": 8718 }, { "epoch": 2.739069831360308, "grad_norm": 0.88671875, "learning_rate": 9.133608378292607e-06, "loss": 1.1687, "step": 8720 }, { "epoch": 2.7396980583858492, "grad_norm": 0.86328125, "learning_rate": 9.131069501745479e-06, "loss": 1.2533, "step": 8722 }, { "epoch": 2.7403262854113906, "grad_norm": 0.84375, "learning_rate": 9.12853062519835e-06, "loss": 1.0806, "step": 8724 }, { "epoch": 2.740954512436932, "grad_norm": 0.890625, "learning_rate": 9.125991748651223e-06, "loss": 1.2269, "step": 8726 }, { "epoch": 2.741582739462473, "grad_norm": 0.90234375, "learning_rate": 9.123452872104095e-06, "loss": 1.0659, "step": 8728 }, { "epoch": 2.7422109664880145, "grad_norm": 0.83203125, "learning_rate": 9.120913995556968e-06, "loss": 1.2846, "step": 8730 }, { "epoch": 2.742839193513556, "grad_norm": 0.8828125, "learning_rate": 9.118375119009839e-06, "loss": 1.2342, "step": 8732 }, { "epoch": 2.743467420539097, "grad_norm": 0.79296875, "learning_rate": 9.11583624246271e-06, "loss": 1.2884, "step": 8734 }, { "epoch": 2.7440956475646385, "grad_norm": 0.859375, "learning_rate": 9.113297365915583e-06, "loss": 1.2428, "step": 8736 }, { "epoch": 2.7447238745901803, "grad_norm": 0.8984375, "learning_rate": 9.110758489368455e-06, "loss": 1.2098, "step": 8738 }, { "epoch": 2.7453521016157216, "grad_norm": 0.828125, "learning_rate": 9.108219612821328e-06, "loss": 1.2179, "step": 8740 }, { "epoch": 2.745980328641263, "grad_norm": 0.8203125, "learning_rate": 9.1056807362742e-06, "loss": 1.3499, "step": 8742 }, { "epoch": 2.7466085556668043, "grad_norm": 0.8515625, "learning_rate": 9.103141859727072e-06, "loss": 1.0601, "step": 8744 }, { "epoch": 2.7472367826923456, "grad_norm": 0.828125, "learning_rate": 9.100602983179944e-06, "loss": 1.2922, "step": 8746 }, { "epoch": 2.747865009717887, "grad_norm": 0.8515625, "learning_rate": 9.098064106632815e-06, "loss": 1.1371, "step": 8748 }, { "epoch": 2.7484932367434283, "grad_norm": 0.8828125, "learning_rate": 9.095525230085688e-06, "loss": 1.2278, "step": 8750 }, { "epoch": 2.7491214637689696, "grad_norm": 0.953125, "learning_rate": 9.09298635353856e-06, "loss": 1.1732, "step": 8752 }, { "epoch": 2.749749690794511, "grad_norm": 0.859375, "learning_rate": 9.090447476991433e-06, "loss": 1.1805, "step": 8754 }, { "epoch": 2.7503779178200523, "grad_norm": 1.078125, "learning_rate": 9.087908600444304e-06, "loss": 1.0657, "step": 8756 }, { "epoch": 2.7510061448455936, "grad_norm": 1.15625, "learning_rate": 9.085369723897175e-06, "loss": 1.1152, "step": 8758 }, { "epoch": 2.751634371871135, "grad_norm": 0.96875, "learning_rate": 9.082830847350049e-06, "loss": 1.1851, "step": 8760 }, { "epoch": 2.7522625988966762, "grad_norm": 0.8828125, "learning_rate": 9.08029197080292e-06, "loss": 1.1882, "step": 8762 }, { "epoch": 2.7528908259222176, "grad_norm": 0.8671875, "learning_rate": 9.077753094255793e-06, "loss": 1.1752, "step": 8764 }, { "epoch": 2.753519052947759, "grad_norm": 1.625, "learning_rate": 9.075214217708664e-06, "loss": 1.2417, "step": 8766 }, { "epoch": 2.7541472799733, "grad_norm": 0.84375, "learning_rate": 9.072675341161536e-06, "loss": 1.1084, "step": 8768 }, { "epoch": 2.7547755069988415, "grad_norm": 0.9140625, "learning_rate": 9.070136464614409e-06, "loss": 1.1443, "step": 8770 }, { "epoch": 2.755403734024383, "grad_norm": 1.0078125, "learning_rate": 9.06759758806728e-06, "loss": 1.193, "step": 8772 }, { "epoch": 2.756031961049924, "grad_norm": 0.93359375, "learning_rate": 9.065058711520153e-06, "loss": 1.2437, "step": 8774 }, { "epoch": 2.7566601880754655, "grad_norm": 0.8359375, "learning_rate": 9.062519834973025e-06, "loss": 1.2941, "step": 8776 }, { "epoch": 2.757288415101007, "grad_norm": 0.89453125, "learning_rate": 9.059980958425896e-06, "loss": 1.0076, "step": 8778 }, { "epoch": 2.7579166421265486, "grad_norm": 0.91015625, "learning_rate": 9.057442081878769e-06, "loss": 1.1461, "step": 8780 }, { "epoch": 2.75854486915209, "grad_norm": 0.93359375, "learning_rate": 9.05490320533164e-06, "loss": 1.1561, "step": 8782 }, { "epoch": 2.7591730961776313, "grad_norm": 0.90234375, "learning_rate": 9.052364328784514e-06, "loss": 1.104, "step": 8784 }, { "epoch": 2.7598013232031726, "grad_norm": 0.796875, "learning_rate": 9.049825452237385e-06, "loss": 1.1338, "step": 8786 }, { "epoch": 2.760429550228714, "grad_norm": 0.859375, "learning_rate": 9.047286575690258e-06, "loss": 1.2664, "step": 8788 }, { "epoch": 2.7610577772542553, "grad_norm": 0.8984375, "learning_rate": 9.044747699143131e-06, "loss": 1.1987, "step": 8790 }, { "epoch": 2.7616860042797966, "grad_norm": 0.84765625, "learning_rate": 9.042208822596002e-06, "loss": 1.1966, "step": 8792 }, { "epoch": 2.762314231305338, "grad_norm": 0.87109375, "learning_rate": 9.039669946048874e-06, "loss": 1.3295, "step": 8794 }, { "epoch": 2.7629424583308793, "grad_norm": 0.88671875, "learning_rate": 9.037131069501747e-06, "loss": 1.2477, "step": 8796 }, { "epoch": 2.7635706853564206, "grad_norm": 0.88671875, "learning_rate": 9.034592192954618e-06, "loss": 1.1968, "step": 8798 }, { "epoch": 2.764198912381962, "grad_norm": 0.921875, "learning_rate": 9.032053316407491e-06, "loss": 1.2968, "step": 8800 }, { "epoch": 2.7648271394075032, "grad_norm": 0.8671875, "learning_rate": 9.029514439860363e-06, "loss": 1.22, "step": 8802 }, { "epoch": 2.765455366433045, "grad_norm": 0.98046875, "learning_rate": 9.026975563313234e-06, "loss": 1.184, "step": 8804 }, { "epoch": 2.7660835934585863, "grad_norm": 0.9296875, "learning_rate": 9.024436686766107e-06, "loss": 1.2109, "step": 8806 }, { "epoch": 2.7667118204841277, "grad_norm": 0.82421875, "learning_rate": 9.021897810218979e-06, "loss": 1.2232, "step": 8808 }, { "epoch": 2.767340047509669, "grad_norm": 0.875, "learning_rate": 9.019358933671852e-06, "loss": 1.4004, "step": 8810 }, { "epoch": 2.7679682745352103, "grad_norm": 0.9765625, "learning_rate": 9.016820057124723e-06, "loss": 1.2854, "step": 8812 }, { "epoch": 2.7685965015607517, "grad_norm": 0.9140625, "learning_rate": 9.014281180577596e-06, "loss": 1.0666, "step": 8814 }, { "epoch": 2.769224728586293, "grad_norm": 0.9140625, "learning_rate": 9.011742304030468e-06, "loss": 1.241, "step": 8816 }, { "epoch": 2.7698529556118343, "grad_norm": 0.93359375, "learning_rate": 9.009203427483339e-06, "loss": 1.0892, "step": 8818 }, { "epoch": 2.7704811826373756, "grad_norm": 0.9140625, "learning_rate": 9.006664550936212e-06, "loss": 1.1606, "step": 8820 }, { "epoch": 2.771109409662917, "grad_norm": 0.88671875, "learning_rate": 9.004125674389083e-06, "loss": 1.2497, "step": 8822 }, { "epoch": 2.7717376366884583, "grad_norm": 0.8828125, "learning_rate": 9.001586797841956e-06, "loss": 1.2447, "step": 8824 }, { "epoch": 2.7723658637139996, "grad_norm": 0.98046875, "learning_rate": 8.999047921294828e-06, "loss": 1.1626, "step": 8826 }, { "epoch": 2.772994090739541, "grad_norm": 0.9609375, "learning_rate": 8.9965090447477e-06, "loss": 1.0602, "step": 8828 }, { "epoch": 2.7736223177650823, "grad_norm": 1.0390625, "learning_rate": 8.993970168200572e-06, "loss": 1.1906, "step": 8830 }, { "epoch": 2.7742505447906236, "grad_norm": 0.94921875, "learning_rate": 8.991431291653444e-06, "loss": 1.1996, "step": 8832 }, { "epoch": 2.774878771816165, "grad_norm": 0.8671875, "learning_rate": 8.988892415106317e-06, "loss": 1.2612, "step": 8834 }, { "epoch": 2.7755069988417063, "grad_norm": 0.92578125, "learning_rate": 8.986353538559188e-06, "loss": 1.1974, "step": 8836 }, { "epoch": 2.7761352258672476, "grad_norm": 0.83984375, "learning_rate": 8.98381466201206e-06, "loss": 1.1045, "step": 8838 }, { "epoch": 2.776763452892789, "grad_norm": 0.9609375, "learning_rate": 8.981275785464933e-06, "loss": 1.1507, "step": 8840 }, { "epoch": 2.7773916799183302, "grad_norm": 0.890625, "learning_rate": 8.978736908917804e-06, "loss": 1.2665, "step": 8842 }, { "epoch": 2.7780199069438716, "grad_norm": 0.8984375, "learning_rate": 8.976198032370677e-06, "loss": 1.1378, "step": 8844 }, { "epoch": 2.7786481339694133, "grad_norm": 0.9453125, "learning_rate": 8.973659155823548e-06, "loss": 1.0703, "step": 8846 }, { "epoch": 2.7792763609949547, "grad_norm": 0.90234375, "learning_rate": 8.971120279276422e-06, "loss": 1.0785, "step": 8848 }, { "epoch": 2.779904588020496, "grad_norm": 0.796875, "learning_rate": 8.968581402729293e-06, "loss": 1.2303, "step": 8850 }, { "epoch": 2.7805328150460373, "grad_norm": 0.9609375, "learning_rate": 8.966042526182164e-06, "loss": 1.2005, "step": 8852 }, { "epoch": 2.7811610420715787, "grad_norm": 0.87109375, "learning_rate": 8.963503649635037e-06, "loss": 1.2388, "step": 8854 }, { "epoch": 2.78178926909712, "grad_norm": 0.9609375, "learning_rate": 8.960964773087909e-06, "loss": 1.1309, "step": 8856 }, { "epoch": 2.7824174961226613, "grad_norm": 0.90234375, "learning_rate": 8.958425896540782e-06, "loss": 1.2428, "step": 8858 }, { "epoch": 2.7830457231482026, "grad_norm": 0.875, "learning_rate": 8.955887019993653e-06, "loss": 1.1717, "step": 8860 }, { "epoch": 2.783673950173744, "grad_norm": 0.83984375, "learning_rate": 8.953348143446525e-06, "loss": 1.2138, "step": 8862 }, { "epoch": 2.7843021771992853, "grad_norm": 0.89453125, "learning_rate": 8.950809266899398e-06, "loss": 1.0967, "step": 8864 }, { "epoch": 2.7849304042248266, "grad_norm": 0.87109375, "learning_rate": 8.948270390352269e-06, "loss": 1.164, "step": 8866 }, { "epoch": 2.785558631250368, "grad_norm": 0.875, "learning_rate": 8.945731513805142e-06, "loss": 1.1851, "step": 8868 }, { "epoch": 2.7861868582759097, "grad_norm": 0.85546875, "learning_rate": 8.943192637258013e-06, "loss": 1.09, "step": 8870 }, { "epoch": 2.786815085301451, "grad_norm": 0.94140625, "learning_rate": 8.940653760710885e-06, "loss": 1.2916, "step": 8872 }, { "epoch": 2.7874433123269924, "grad_norm": 0.828125, "learning_rate": 8.938114884163758e-06, "loss": 1.2003, "step": 8874 }, { "epoch": 2.7880715393525337, "grad_norm": 0.890625, "learning_rate": 8.935576007616631e-06, "loss": 1.2229, "step": 8876 }, { "epoch": 2.788699766378075, "grad_norm": 0.91796875, "learning_rate": 8.933037131069502e-06, "loss": 1.2452, "step": 8878 }, { "epoch": 2.7893279934036164, "grad_norm": 0.890625, "learning_rate": 8.930498254522375e-06, "loss": 1.2213, "step": 8880 }, { "epoch": 2.7899562204291577, "grad_norm": 0.94140625, "learning_rate": 8.927959377975247e-06, "loss": 1.0626, "step": 8882 }, { "epoch": 2.790584447454699, "grad_norm": 0.89453125, "learning_rate": 8.92542050142812e-06, "loss": 1.0885, "step": 8884 }, { "epoch": 2.7912126744802404, "grad_norm": 0.91796875, "learning_rate": 8.922881624880991e-06, "loss": 1.1579, "step": 8886 }, { "epoch": 2.7918409015057817, "grad_norm": 0.8671875, "learning_rate": 8.920342748333863e-06, "loss": 1.1024, "step": 8888 }, { "epoch": 2.792469128531323, "grad_norm": 0.875, "learning_rate": 8.917803871786736e-06, "loss": 1.2193, "step": 8890 }, { "epoch": 2.7930973555568643, "grad_norm": 0.921875, "learning_rate": 8.915264995239607e-06, "loss": 1.0511, "step": 8892 }, { "epoch": 2.7937255825824057, "grad_norm": 0.8359375, "learning_rate": 8.91272611869248e-06, "loss": 1.1799, "step": 8894 }, { "epoch": 2.794353809607947, "grad_norm": 0.91796875, "learning_rate": 8.910187242145352e-06, "loss": 1.121, "step": 8896 }, { "epoch": 2.7949820366334883, "grad_norm": 0.90234375, "learning_rate": 8.907648365598223e-06, "loss": 1.3075, "step": 8898 }, { "epoch": 2.7956102636590296, "grad_norm": 0.8984375, "learning_rate": 8.905109489051096e-06, "loss": 1.0603, "step": 8900 }, { "epoch": 2.796238490684571, "grad_norm": 0.859375, "learning_rate": 8.902570612503967e-06, "loss": 1.1152, "step": 8902 }, { "epoch": 2.7968667177101123, "grad_norm": 0.8359375, "learning_rate": 8.90003173595684e-06, "loss": 1.1665, "step": 8904 }, { "epoch": 2.7974949447356536, "grad_norm": 0.92578125, "learning_rate": 8.897492859409712e-06, "loss": 1.1832, "step": 8906 }, { "epoch": 2.798123171761195, "grad_norm": 1.046875, "learning_rate": 8.894953982862583e-06, "loss": 1.162, "step": 8908 }, { "epoch": 2.7987513987867363, "grad_norm": 0.83203125, "learning_rate": 8.892415106315456e-06, "loss": 1.2164, "step": 8910 }, { "epoch": 2.799379625812278, "grad_norm": 0.86328125, "learning_rate": 8.889876229768328e-06, "loss": 1.1247, "step": 8912 }, { "epoch": 2.8000078528378194, "grad_norm": 0.84375, "learning_rate": 8.8873373532212e-06, "loss": 1.0951, "step": 8914 }, { "epoch": 2.8006360798633607, "grad_norm": 1.1953125, "learning_rate": 8.884798476674072e-06, "loss": 1.1273, "step": 8916 }, { "epoch": 2.801264306888902, "grad_norm": 0.859375, "learning_rate": 8.882259600126945e-06, "loss": 1.0575, "step": 8918 }, { "epoch": 2.8018925339144434, "grad_norm": 1.109375, "learning_rate": 8.879720723579817e-06, "loss": 1.2221, "step": 8920 }, { "epoch": 2.8025207609399847, "grad_norm": 0.91015625, "learning_rate": 8.877181847032688e-06, "loss": 1.2034, "step": 8922 }, { "epoch": 2.803148987965526, "grad_norm": 0.84765625, "learning_rate": 8.874642970485561e-06, "loss": 1.1343, "step": 8924 }, { "epoch": 2.8037772149910674, "grad_norm": 0.9140625, "learning_rate": 8.872104093938433e-06, "loss": 1.1435, "step": 8926 }, { "epoch": 2.8044054420166087, "grad_norm": 0.97265625, "learning_rate": 8.869565217391306e-06, "loss": 1.1574, "step": 8928 }, { "epoch": 2.80503366904215, "grad_norm": 0.83984375, "learning_rate": 8.867026340844177e-06, "loss": 1.1683, "step": 8930 }, { "epoch": 2.8056618960676913, "grad_norm": 0.921875, "learning_rate": 8.864487464297048e-06, "loss": 1.1728, "step": 8932 }, { "epoch": 2.8062901230932327, "grad_norm": 0.82421875, "learning_rate": 8.861948587749921e-06, "loss": 1.3423, "step": 8934 }, { "epoch": 2.8069183501187744, "grad_norm": 0.80859375, "learning_rate": 8.859409711202793e-06, "loss": 1.1885, "step": 8936 }, { "epoch": 2.8075465771443158, "grad_norm": 0.8515625, "learning_rate": 8.856870834655666e-06, "loss": 1.2339, "step": 8938 }, { "epoch": 2.808174804169857, "grad_norm": 0.859375, "learning_rate": 8.854331958108537e-06, "loss": 1.1838, "step": 8940 }, { "epoch": 2.8088030311953984, "grad_norm": 0.828125, "learning_rate": 8.851793081561409e-06, "loss": 1.1717, "step": 8942 }, { "epoch": 2.8094312582209398, "grad_norm": 0.89453125, "learning_rate": 8.849254205014282e-06, "loss": 1.1993, "step": 8944 }, { "epoch": 2.810059485246481, "grad_norm": 0.8671875, "learning_rate": 8.846715328467153e-06, "loss": 1.2121, "step": 8946 }, { "epoch": 2.8106877122720224, "grad_norm": 0.9453125, "learning_rate": 8.844176451920026e-06, "loss": 1.1136, "step": 8948 }, { "epoch": 2.8113159392975637, "grad_norm": 0.8828125, "learning_rate": 8.841637575372898e-06, "loss": 1.0336, "step": 8950 }, { "epoch": 2.811944166323105, "grad_norm": 0.84375, "learning_rate": 8.83909869882577e-06, "loss": 1.2212, "step": 8952 }, { "epoch": 2.8125723933486464, "grad_norm": 0.91015625, "learning_rate": 8.836559822278642e-06, "loss": 1.1801, "step": 8954 }, { "epoch": 2.8132006203741877, "grad_norm": 0.86328125, "learning_rate": 8.834020945731513e-06, "loss": 1.2164, "step": 8956 }, { "epoch": 2.813828847399729, "grad_norm": 0.90234375, "learning_rate": 8.831482069184387e-06, "loss": 1.1291, "step": 8958 }, { "epoch": 2.8144570744252704, "grad_norm": 0.8359375, "learning_rate": 8.828943192637258e-06, "loss": 1.1178, "step": 8960 }, { "epoch": 2.8150853014508117, "grad_norm": 1.0, "learning_rate": 8.826404316090131e-06, "loss": 1.0997, "step": 8962 }, { "epoch": 2.815713528476353, "grad_norm": 0.85546875, "learning_rate": 8.823865439543004e-06, "loss": 1.2184, "step": 8964 }, { "epoch": 2.8163417555018944, "grad_norm": 0.94921875, "learning_rate": 8.821326562995875e-06, "loss": 1.1807, "step": 8966 }, { "epoch": 2.8169699825274357, "grad_norm": 0.90234375, "learning_rate": 8.818787686448747e-06, "loss": 1.2236, "step": 8968 }, { "epoch": 2.817598209552977, "grad_norm": 0.90234375, "learning_rate": 8.81624880990162e-06, "loss": 1.2124, "step": 8970 }, { "epoch": 2.8182264365785183, "grad_norm": 0.9296875, "learning_rate": 8.813709933354491e-06, "loss": 1.1715, "step": 8972 }, { "epoch": 2.8188546636040597, "grad_norm": 0.94140625, "learning_rate": 8.811171056807364e-06, "loss": 1.1341, "step": 8974 }, { "epoch": 2.819482890629601, "grad_norm": 0.95703125, "learning_rate": 8.808632180260236e-06, "loss": 1.1516, "step": 8976 }, { "epoch": 2.8201111176551428, "grad_norm": 0.8671875, "learning_rate": 8.806093303713109e-06, "loss": 1.2244, "step": 8978 }, { "epoch": 2.820739344680684, "grad_norm": 0.92578125, "learning_rate": 8.80355442716598e-06, "loss": 1.0713, "step": 8980 }, { "epoch": 2.8213675717062254, "grad_norm": 0.890625, "learning_rate": 8.801015550618852e-06, "loss": 1.2261, "step": 8982 }, { "epoch": 2.8219957987317668, "grad_norm": 0.89453125, "learning_rate": 8.798476674071725e-06, "loss": 1.2586, "step": 8984 }, { "epoch": 2.822624025757308, "grad_norm": 0.890625, "learning_rate": 8.795937797524596e-06, "loss": 1.1711, "step": 8986 }, { "epoch": 2.8232522527828494, "grad_norm": 0.859375, "learning_rate": 8.793398920977469e-06, "loss": 1.2093, "step": 8988 }, { "epoch": 2.8238804798083907, "grad_norm": 0.87890625, "learning_rate": 8.79086004443034e-06, "loss": 1.2378, "step": 8990 }, { "epoch": 2.824508706833932, "grad_norm": 0.9140625, "learning_rate": 8.788321167883212e-06, "loss": 1.1352, "step": 8992 }, { "epoch": 2.8251369338594734, "grad_norm": 0.96484375, "learning_rate": 8.785782291336085e-06, "loss": 1.2086, "step": 8994 }, { "epoch": 2.8257651608850147, "grad_norm": 0.95703125, "learning_rate": 8.783243414788956e-06, "loss": 1.1629, "step": 8996 }, { "epoch": 2.826393387910556, "grad_norm": 0.89453125, "learning_rate": 8.78070453824183e-06, "loss": 1.1229, "step": 8998 }, { "epoch": 2.8270216149360974, "grad_norm": 0.9765625, "learning_rate": 8.7781656616947e-06, "loss": 1.2158, "step": 9000 }, { "epoch": 2.827649841961639, "grad_norm": 0.875, "learning_rate": 8.775626785147572e-06, "loss": 1.2396, "step": 9002 }, { "epoch": 2.8282780689871805, "grad_norm": 0.78125, "learning_rate": 8.773087908600445e-06, "loss": 1.1963, "step": 9004 }, { "epoch": 2.828906296012722, "grad_norm": 0.84375, "learning_rate": 8.770549032053317e-06, "loss": 1.2174, "step": 9006 }, { "epoch": 2.829534523038263, "grad_norm": 0.90625, "learning_rate": 8.76801015550619e-06, "loss": 1.1699, "step": 9008 }, { "epoch": 2.8301627500638045, "grad_norm": 1.015625, "learning_rate": 8.765471278959061e-06, "loss": 1.1734, "step": 9010 }, { "epoch": 2.830790977089346, "grad_norm": 0.8671875, "learning_rate": 8.762932402411934e-06, "loss": 1.1106, "step": 9012 }, { "epoch": 2.831419204114887, "grad_norm": 0.84765625, "learning_rate": 8.760393525864806e-06, "loss": 1.228, "step": 9014 }, { "epoch": 2.8320474311404285, "grad_norm": 0.91796875, "learning_rate": 8.757854649317677e-06, "loss": 1.2233, "step": 9016 }, { "epoch": 2.83267565816597, "grad_norm": 0.80078125, "learning_rate": 8.75531577277055e-06, "loss": 1.265, "step": 9018 }, { "epoch": 2.833303885191511, "grad_norm": 0.88671875, "learning_rate": 8.752776896223421e-06, "loss": 1.2456, "step": 9020 }, { "epoch": 2.8339321122170524, "grad_norm": 0.96484375, "learning_rate": 8.750238019676294e-06, "loss": 1.2257, "step": 9022 }, { "epoch": 2.8345603392425938, "grad_norm": 0.890625, "learning_rate": 8.747699143129166e-06, "loss": 1.1829, "step": 9024 }, { "epoch": 2.835188566268135, "grad_norm": 0.8515625, "learning_rate": 8.745160266582037e-06, "loss": 1.2734, "step": 9026 }, { "epoch": 2.8358167932936764, "grad_norm": 0.9765625, "learning_rate": 8.74262139003491e-06, "loss": 1.1946, "step": 9028 }, { "epoch": 2.8364450203192177, "grad_norm": 0.828125, "learning_rate": 8.740082513487782e-06, "loss": 1.3757, "step": 9030 }, { "epoch": 2.837073247344759, "grad_norm": 0.90625, "learning_rate": 8.737543636940655e-06, "loss": 1.2224, "step": 9032 }, { "epoch": 2.8377014743703004, "grad_norm": 0.83984375, "learning_rate": 8.735004760393526e-06, "loss": 1.1388, "step": 9034 }, { "epoch": 2.8383297013958417, "grad_norm": 0.85546875, "learning_rate": 8.732465883846398e-06, "loss": 1.1863, "step": 9036 }, { "epoch": 2.838957928421383, "grad_norm": 0.87890625, "learning_rate": 8.72992700729927e-06, "loss": 1.2225, "step": 9038 }, { "epoch": 2.8395861554469244, "grad_norm": 0.87890625, "learning_rate": 8.727388130752142e-06, "loss": 1.2286, "step": 9040 }, { "epoch": 2.8402143824724657, "grad_norm": 0.8515625, "learning_rate": 8.724849254205015e-06, "loss": 1.086, "step": 9042 }, { "epoch": 2.8408426094980075, "grad_norm": 0.92578125, "learning_rate": 8.722310377657886e-06, "loss": 1.159, "step": 9044 }, { "epoch": 2.841470836523549, "grad_norm": 0.90234375, "learning_rate": 8.719771501110758e-06, "loss": 1.2525, "step": 9046 }, { "epoch": 2.84209906354909, "grad_norm": 0.88671875, "learning_rate": 8.717232624563633e-06, "loss": 1.0467, "step": 9048 }, { "epoch": 2.8427272905746315, "grad_norm": 0.89453125, "learning_rate": 8.714693748016504e-06, "loss": 1.3227, "step": 9050 }, { "epoch": 2.843355517600173, "grad_norm": 0.91796875, "learning_rate": 8.712154871469375e-06, "loss": 1.0992, "step": 9052 }, { "epoch": 2.843983744625714, "grad_norm": 0.859375, "learning_rate": 8.709615994922248e-06, "loss": 1.1781, "step": 9054 }, { "epoch": 2.8446119716512555, "grad_norm": 0.98828125, "learning_rate": 8.70707711837512e-06, "loss": 1.2496, "step": 9056 }, { "epoch": 2.845240198676797, "grad_norm": 0.94921875, "learning_rate": 8.704538241827993e-06, "loss": 1.2135, "step": 9058 }, { "epoch": 2.845868425702338, "grad_norm": 0.9296875, "learning_rate": 8.701999365280864e-06, "loss": 1.0489, "step": 9060 }, { "epoch": 2.8464966527278794, "grad_norm": 0.97265625, "learning_rate": 8.699460488733736e-06, "loss": 1.0931, "step": 9062 }, { "epoch": 2.8471248797534208, "grad_norm": 0.87109375, "learning_rate": 8.696921612186609e-06, "loss": 1.1408, "step": 9064 }, { "epoch": 2.847753106778962, "grad_norm": 0.87890625, "learning_rate": 8.69438273563948e-06, "loss": 1.2734, "step": 9066 }, { "epoch": 2.848381333804504, "grad_norm": 0.82421875, "learning_rate": 8.691843859092353e-06, "loss": 1.1622, "step": 9068 }, { "epoch": 2.849009560830045, "grad_norm": 0.87890625, "learning_rate": 8.689304982545225e-06, "loss": 1.1151, "step": 9070 }, { "epoch": 2.8496377878555865, "grad_norm": 0.83984375, "learning_rate": 8.686766105998096e-06, "loss": 1.0864, "step": 9072 }, { "epoch": 2.850266014881128, "grad_norm": 0.85546875, "learning_rate": 8.684227229450969e-06, "loss": 1.1366, "step": 9074 }, { "epoch": 2.850894241906669, "grad_norm": 0.828125, "learning_rate": 8.68168835290384e-06, "loss": 1.1638, "step": 9076 }, { "epoch": 2.8515224689322105, "grad_norm": 0.87109375, "learning_rate": 8.679149476356713e-06, "loss": 1.2531, "step": 9078 }, { "epoch": 2.852150695957752, "grad_norm": 0.90234375, "learning_rate": 8.676610599809585e-06, "loss": 1.2749, "step": 9080 }, { "epoch": 2.852778922983293, "grad_norm": 0.859375, "learning_rate": 8.674071723262458e-06, "loss": 1.1556, "step": 9082 }, { "epoch": 2.8534071500088345, "grad_norm": 0.8828125, "learning_rate": 8.67153284671533e-06, "loss": 1.129, "step": 9084 }, { "epoch": 2.854035377034376, "grad_norm": 0.8359375, "learning_rate": 8.6689939701682e-06, "loss": 1.2725, "step": 9086 }, { "epoch": 2.854663604059917, "grad_norm": 0.9921875, "learning_rate": 8.666455093621074e-06, "loss": 1.3089, "step": 9088 }, { "epoch": 2.8552918310854585, "grad_norm": 0.91796875, "learning_rate": 8.663916217073945e-06, "loss": 1.2405, "step": 9090 }, { "epoch": 2.855920058111, "grad_norm": 0.93359375, "learning_rate": 8.661377340526818e-06, "loss": 1.2605, "step": 9092 }, { "epoch": 2.856548285136541, "grad_norm": 1.0703125, "learning_rate": 8.65883846397969e-06, "loss": 1.1111, "step": 9094 }, { "epoch": 2.8571765121620825, "grad_norm": 0.89453125, "learning_rate": 8.656299587432561e-06, "loss": 1.1104, "step": 9096 }, { "epoch": 2.857804739187624, "grad_norm": 0.86328125, "learning_rate": 8.653760710885434e-06, "loss": 1.2555, "step": 9098 }, { "epoch": 2.858432966213165, "grad_norm": 0.89453125, "learning_rate": 8.651221834338305e-06, "loss": 1.2309, "step": 9100 }, { "epoch": 2.8590611932387064, "grad_norm": 0.88671875, "learning_rate": 8.648682957791179e-06, "loss": 1.1012, "step": 9102 }, { "epoch": 2.8596894202642478, "grad_norm": 0.87890625, "learning_rate": 8.64614408124405e-06, "loss": 1.1028, "step": 9104 }, { "epoch": 2.860317647289789, "grad_norm": 0.921875, "learning_rate": 8.643605204696921e-06, "loss": 1.2502, "step": 9106 }, { "epoch": 2.8609458743153304, "grad_norm": 0.86328125, "learning_rate": 8.641066328149794e-06, "loss": 1.2413, "step": 9108 }, { "epoch": 2.861574101340872, "grad_norm": 0.91015625, "learning_rate": 8.638527451602666e-06, "loss": 1.1771, "step": 9110 }, { "epoch": 2.8622023283664135, "grad_norm": 0.89453125, "learning_rate": 8.635988575055539e-06, "loss": 1.2355, "step": 9112 }, { "epoch": 2.862830555391955, "grad_norm": 0.81640625, "learning_rate": 8.63344969850841e-06, "loss": 1.1476, "step": 9114 }, { "epoch": 2.863458782417496, "grad_norm": 0.90625, "learning_rate": 8.630910821961283e-06, "loss": 1.1657, "step": 9116 }, { "epoch": 2.8640870094430375, "grad_norm": 0.8828125, "learning_rate": 8.628371945414155e-06, "loss": 1.2089, "step": 9118 }, { "epoch": 2.864715236468579, "grad_norm": 0.96484375, "learning_rate": 8.625833068867026e-06, "loss": 1.173, "step": 9120 }, { "epoch": 2.86534346349412, "grad_norm": 0.859375, "learning_rate": 8.623294192319899e-06, "loss": 1.2437, "step": 9122 }, { "epoch": 2.8659716905196615, "grad_norm": 0.89453125, "learning_rate": 8.62075531577277e-06, "loss": 1.1699, "step": 9124 }, { "epoch": 2.866599917545203, "grad_norm": 0.84765625, "learning_rate": 8.618216439225644e-06, "loss": 1.2851, "step": 9126 }, { "epoch": 2.867228144570744, "grad_norm": 0.84765625, "learning_rate": 8.615677562678515e-06, "loss": 1.0717, "step": 9128 }, { "epoch": 2.8678563715962855, "grad_norm": 0.88671875, "learning_rate": 8.613138686131386e-06, "loss": 0.9805, "step": 9130 }, { "epoch": 2.868484598621827, "grad_norm": 1.078125, "learning_rate": 8.61059980958426e-06, "loss": 1.1905, "step": 9132 }, { "epoch": 2.8691128256473686, "grad_norm": 0.875, "learning_rate": 8.608060933037133e-06, "loss": 1.3264, "step": 9134 }, { "epoch": 2.86974105267291, "grad_norm": 0.86328125, "learning_rate": 8.605522056490004e-06, "loss": 1.2117, "step": 9136 }, { "epoch": 2.8703692796984512, "grad_norm": 0.9765625, "learning_rate": 8.602983179942877e-06, "loss": 1.1902, "step": 9138 }, { "epoch": 2.8709975067239926, "grad_norm": 0.8203125, "learning_rate": 8.600444303395748e-06, "loss": 1.2074, "step": 9140 }, { "epoch": 2.871625733749534, "grad_norm": 0.89453125, "learning_rate": 8.597905426848621e-06, "loss": 1.2359, "step": 9142 }, { "epoch": 2.872253960775075, "grad_norm": 1.140625, "learning_rate": 8.595366550301493e-06, "loss": 1.0976, "step": 9144 }, { "epoch": 2.8728821878006165, "grad_norm": 0.90234375, "learning_rate": 8.592827673754364e-06, "loss": 1.4076, "step": 9146 }, { "epoch": 2.873510414826158, "grad_norm": 0.91015625, "learning_rate": 8.590288797207237e-06, "loss": 1.2328, "step": 9148 }, { "epoch": 2.874138641851699, "grad_norm": 0.94921875, "learning_rate": 8.587749920660109e-06, "loss": 1.1426, "step": 9150 }, { "epoch": 2.8747668688772405, "grad_norm": 0.95703125, "learning_rate": 8.585211044112982e-06, "loss": 1.1638, "step": 9152 }, { "epoch": 2.875395095902782, "grad_norm": 0.9140625, "learning_rate": 8.582672167565853e-06, "loss": 1.3359, "step": 9154 }, { "epoch": 2.876023322928323, "grad_norm": 0.85546875, "learning_rate": 8.580133291018724e-06, "loss": 1.0855, "step": 9156 }, { "epoch": 2.8766515499538645, "grad_norm": 0.90625, "learning_rate": 8.577594414471598e-06, "loss": 1.2933, "step": 9158 }, { "epoch": 2.877279776979406, "grad_norm": 0.94140625, "learning_rate": 8.575055537924469e-06, "loss": 1.1769, "step": 9160 }, { "epoch": 2.877908004004947, "grad_norm": 0.875, "learning_rate": 8.572516661377342e-06, "loss": 1.296, "step": 9162 }, { "epoch": 2.8785362310304885, "grad_norm": 0.84765625, "learning_rate": 8.569977784830213e-06, "loss": 1.2159, "step": 9164 }, { "epoch": 2.87916445805603, "grad_norm": 0.86328125, "learning_rate": 8.567438908283085e-06, "loss": 1.1394, "step": 9166 }, { "epoch": 2.879792685081571, "grad_norm": 0.9765625, "learning_rate": 8.564900031735958e-06, "loss": 1.1248, "step": 9168 }, { "epoch": 2.8804209121071125, "grad_norm": 0.8125, "learning_rate": 8.56236115518883e-06, "loss": 1.2397, "step": 9170 }, { "epoch": 2.881049139132654, "grad_norm": 0.875, "learning_rate": 8.559822278641702e-06, "loss": 1.2171, "step": 9172 }, { "epoch": 2.881677366158195, "grad_norm": 0.88671875, "learning_rate": 8.557283402094574e-06, "loss": 1.1612, "step": 9174 }, { "epoch": 2.882305593183737, "grad_norm": 0.8359375, "learning_rate": 8.554744525547447e-06, "loss": 1.1409, "step": 9176 }, { "epoch": 2.8829338202092782, "grad_norm": 0.90234375, "learning_rate": 8.552205649000318e-06, "loss": 1.1146, "step": 9178 }, { "epoch": 2.8835620472348196, "grad_norm": 0.84375, "learning_rate": 8.54966677245319e-06, "loss": 1.1532, "step": 9180 }, { "epoch": 2.884190274260361, "grad_norm": 0.88671875, "learning_rate": 8.547127895906063e-06, "loss": 1.1232, "step": 9182 }, { "epoch": 2.8848185012859022, "grad_norm": 0.8828125, "learning_rate": 8.544589019358934e-06, "loss": 1.1765, "step": 9184 }, { "epoch": 2.8854467283114436, "grad_norm": 0.8515625, "learning_rate": 8.542050142811807e-06, "loss": 1.1835, "step": 9186 }, { "epoch": 2.886074955336985, "grad_norm": 0.921875, "learning_rate": 8.539511266264678e-06, "loss": 1.1912, "step": 9188 }, { "epoch": 2.886703182362526, "grad_norm": 0.87890625, "learning_rate": 8.53697238971755e-06, "loss": 1.2664, "step": 9190 }, { "epoch": 2.8873314093880675, "grad_norm": 0.9453125, "learning_rate": 8.534433513170423e-06, "loss": 1.0205, "step": 9192 }, { "epoch": 2.887959636413609, "grad_norm": 0.875, "learning_rate": 8.531894636623294e-06, "loss": 1.0527, "step": 9194 }, { "epoch": 2.88858786343915, "grad_norm": 0.86328125, "learning_rate": 8.529355760076167e-06, "loss": 1.2545, "step": 9196 }, { "epoch": 2.8892160904646915, "grad_norm": 0.84765625, "learning_rate": 8.526816883529039e-06, "loss": 1.0928, "step": 9198 }, { "epoch": 2.8898443174902333, "grad_norm": 0.92578125, "learning_rate": 8.52427800698191e-06, "loss": 1.209, "step": 9200 }, { "epoch": 2.8904725445157746, "grad_norm": 0.84375, "learning_rate": 8.521739130434783e-06, "loss": 1.171, "step": 9202 }, { "epoch": 2.891100771541316, "grad_norm": 0.96875, "learning_rate": 8.519200253887655e-06, "loss": 1.2062, "step": 9204 }, { "epoch": 2.8917289985668573, "grad_norm": 0.8359375, "learning_rate": 8.516661377340528e-06, "loss": 1.2552, "step": 9206 }, { "epoch": 2.8923572255923986, "grad_norm": 1.65625, "learning_rate": 8.514122500793399e-06, "loss": 1.1772, "step": 9208 }, { "epoch": 2.89298545261794, "grad_norm": 0.94140625, "learning_rate": 8.51158362424627e-06, "loss": 1.2433, "step": 9210 }, { "epoch": 2.8936136796434813, "grad_norm": 0.8046875, "learning_rate": 8.509044747699144e-06, "loss": 1.1172, "step": 9212 }, { "epoch": 2.8942419066690226, "grad_norm": 0.90625, "learning_rate": 8.506505871152015e-06, "loss": 1.224, "step": 9214 }, { "epoch": 2.894870133694564, "grad_norm": 0.859375, "learning_rate": 8.503966994604888e-06, "loss": 1.1046, "step": 9216 }, { "epoch": 2.8954983607201052, "grad_norm": 0.8046875, "learning_rate": 8.50142811805776e-06, "loss": 1.0652, "step": 9218 }, { "epoch": 2.8961265877456466, "grad_norm": 0.96484375, "learning_rate": 8.498889241510632e-06, "loss": 1.1309, "step": 9220 }, { "epoch": 2.896754814771188, "grad_norm": 0.89453125, "learning_rate": 8.496350364963506e-06, "loss": 1.0387, "step": 9222 }, { "epoch": 2.8973830417967292, "grad_norm": 0.9375, "learning_rate": 8.493811488416377e-06, "loss": 1.0663, "step": 9224 }, { "epoch": 2.8980112688222706, "grad_norm": 0.82421875, "learning_rate": 8.491272611869248e-06, "loss": 1.2568, "step": 9226 }, { "epoch": 2.898639495847812, "grad_norm": 0.8359375, "learning_rate": 8.488733735322121e-06, "loss": 1.2742, "step": 9228 }, { "epoch": 2.899267722873353, "grad_norm": 0.91796875, "learning_rate": 8.486194858774993e-06, "loss": 1.1714, "step": 9230 }, { "epoch": 2.8998959498988945, "grad_norm": 1.0703125, "learning_rate": 8.483655982227866e-06, "loss": 1.1513, "step": 9232 }, { "epoch": 2.900524176924436, "grad_norm": 0.78125, "learning_rate": 8.481117105680737e-06, "loss": 1.1691, "step": 9234 }, { "epoch": 2.901152403949977, "grad_norm": 0.87890625, "learning_rate": 8.478578229133609e-06, "loss": 1.3591, "step": 9236 }, { "epoch": 2.9017806309755185, "grad_norm": 0.94921875, "learning_rate": 8.476039352586482e-06, "loss": 1.2139, "step": 9238 }, { "epoch": 2.9024088580010603, "grad_norm": 0.80078125, "learning_rate": 8.473500476039353e-06, "loss": 1.1475, "step": 9240 }, { "epoch": 2.9030370850266016, "grad_norm": 0.90234375, "learning_rate": 8.470961599492226e-06, "loss": 1.1807, "step": 9242 }, { "epoch": 2.903665312052143, "grad_norm": 0.8125, "learning_rate": 8.468422722945097e-06, "loss": 1.1678, "step": 9244 }, { "epoch": 2.9042935390776843, "grad_norm": 0.8359375, "learning_rate": 8.46588384639797e-06, "loss": 1.3143, "step": 9246 }, { "epoch": 2.9049217661032256, "grad_norm": 0.86328125, "learning_rate": 8.463344969850842e-06, "loss": 1.1935, "step": 9248 }, { "epoch": 2.905549993128767, "grad_norm": 0.85546875, "learning_rate": 8.460806093303713e-06, "loss": 1.3033, "step": 9250 }, { "epoch": 2.9061782201543083, "grad_norm": 0.859375, "learning_rate": 8.458267216756586e-06, "loss": 1.2278, "step": 9252 }, { "epoch": 2.9068064471798496, "grad_norm": 0.88671875, "learning_rate": 8.455728340209458e-06, "loss": 1.2614, "step": 9254 }, { "epoch": 2.907434674205391, "grad_norm": 0.82421875, "learning_rate": 8.453189463662331e-06, "loss": 1.2318, "step": 9256 }, { "epoch": 2.9080629012309323, "grad_norm": 0.92578125, "learning_rate": 8.450650587115202e-06, "loss": 1.1713, "step": 9258 }, { "epoch": 2.9086911282564736, "grad_norm": 0.859375, "learning_rate": 8.448111710568074e-06, "loss": 1.2783, "step": 9260 }, { "epoch": 2.909319355282015, "grad_norm": 0.921875, "learning_rate": 8.445572834020947e-06, "loss": 1.1782, "step": 9262 }, { "epoch": 2.9099475823075562, "grad_norm": 0.828125, "learning_rate": 8.443033957473818e-06, "loss": 1.2585, "step": 9264 }, { "epoch": 2.910575809333098, "grad_norm": 0.84765625, "learning_rate": 8.440495080926691e-06, "loss": 1.0162, "step": 9266 }, { "epoch": 2.9112040363586393, "grad_norm": 0.95703125, "learning_rate": 8.437956204379563e-06, "loss": 1.1789, "step": 9268 }, { "epoch": 2.9118322633841807, "grad_norm": 0.8671875, "learning_rate": 8.435417327832434e-06, "loss": 1.1599, "step": 9270 }, { "epoch": 2.912460490409722, "grad_norm": 0.83203125, "learning_rate": 8.432878451285307e-06, "loss": 1.2784, "step": 9272 }, { "epoch": 2.9130887174352633, "grad_norm": 0.90234375, "learning_rate": 8.430339574738178e-06, "loss": 1.2503, "step": 9274 }, { "epoch": 2.9137169444608046, "grad_norm": 0.890625, "learning_rate": 8.427800698191051e-06, "loss": 1.2095, "step": 9276 }, { "epoch": 2.914345171486346, "grad_norm": 0.85546875, "learning_rate": 8.425261821643923e-06, "loss": 1.1473, "step": 9278 }, { "epoch": 2.9149733985118873, "grad_norm": 0.94140625, "learning_rate": 8.422722945096796e-06, "loss": 1.2989, "step": 9280 }, { "epoch": 2.9156016255374286, "grad_norm": 0.83984375, "learning_rate": 8.420184068549667e-06, "loss": 1.1785, "step": 9282 }, { "epoch": 2.91622985256297, "grad_norm": 0.859375, "learning_rate": 8.417645192002539e-06, "loss": 1.1599, "step": 9284 }, { "epoch": 2.9168580795885113, "grad_norm": 0.8984375, "learning_rate": 8.415106315455412e-06, "loss": 1.1402, "step": 9286 }, { "epoch": 2.9174863066140526, "grad_norm": 0.80859375, "learning_rate": 8.412567438908283e-06, "loss": 1.056, "step": 9288 }, { "epoch": 2.918114533639594, "grad_norm": 0.98828125, "learning_rate": 8.410028562361156e-06, "loss": 1.1068, "step": 9290 }, { "epoch": 2.9187427606651353, "grad_norm": 0.88671875, "learning_rate": 8.407489685814028e-06, "loss": 1.1869, "step": 9292 }, { "epoch": 2.9193709876906766, "grad_norm": 0.98828125, "learning_rate": 8.404950809266899e-06, "loss": 1.145, "step": 9294 }, { "epoch": 2.919999214716218, "grad_norm": 0.91015625, "learning_rate": 8.402411932719772e-06, "loss": 1.1293, "step": 9296 }, { "epoch": 2.9206274417417593, "grad_norm": 0.94921875, "learning_rate": 8.399873056172643e-06, "loss": 1.1936, "step": 9298 }, { "epoch": 2.9212556687673006, "grad_norm": 0.96484375, "learning_rate": 8.397334179625517e-06, "loss": 1.1045, "step": 9300 }, { "epoch": 2.921883895792842, "grad_norm": 0.8359375, "learning_rate": 8.394795303078388e-06, "loss": 1.2715, "step": 9302 }, { "epoch": 2.9225121228183832, "grad_norm": 0.96875, "learning_rate": 8.392256426531261e-06, "loss": 1.1606, "step": 9304 }, { "epoch": 2.923140349843925, "grad_norm": 0.90625, "learning_rate": 8.389717549984134e-06, "loss": 1.0997, "step": 9306 }, { "epoch": 2.9237685768694663, "grad_norm": 0.92578125, "learning_rate": 8.387178673437005e-06, "loss": 1.0984, "step": 9308 }, { "epoch": 2.9243968038950077, "grad_norm": 0.8984375, "learning_rate": 8.384639796889877e-06, "loss": 1.2038, "step": 9310 }, { "epoch": 2.925025030920549, "grad_norm": 0.890625, "learning_rate": 8.38210092034275e-06, "loss": 1.0816, "step": 9312 }, { "epoch": 2.9256532579460903, "grad_norm": 0.88671875, "learning_rate": 8.379562043795621e-06, "loss": 1.2256, "step": 9314 }, { "epoch": 2.9262814849716317, "grad_norm": 0.875, "learning_rate": 8.377023167248494e-06, "loss": 1.2699, "step": 9316 }, { "epoch": 2.926909711997173, "grad_norm": 0.86328125, "learning_rate": 8.374484290701366e-06, "loss": 1.1746, "step": 9318 }, { "epoch": 2.9275379390227143, "grad_norm": 0.91015625, "learning_rate": 8.371945414154237e-06, "loss": 1.1686, "step": 9320 }, { "epoch": 2.9281661660482556, "grad_norm": 0.89453125, "learning_rate": 8.36940653760711e-06, "loss": 1.1485, "step": 9322 }, { "epoch": 2.928794393073797, "grad_norm": 0.8046875, "learning_rate": 8.366867661059982e-06, "loss": 1.2353, "step": 9324 }, { "epoch": 2.9294226200993383, "grad_norm": 0.91796875, "learning_rate": 8.364328784512855e-06, "loss": 1.2038, "step": 9326 }, { "epoch": 2.9300508471248796, "grad_norm": 0.94140625, "learning_rate": 8.361789907965726e-06, "loss": 1.2617, "step": 9328 }, { "epoch": 2.930679074150421, "grad_norm": 0.87890625, "learning_rate": 8.359251031418597e-06, "loss": 1.1843, "step": 9330 }, { "epoch": 2.9313073011759627, "grad_norm": 0.88671875, "learning_rate": 8.35671215487147e-06, "loss": 1.217, "step": 9332 }, { "epoch": 2.931935528201504, "grad_norm": 0.859375, "learning_rate": 8.354173278324342e-06, "loss": 1.1822, "step": 9334 }, { "epoch": 2.9325637552270454, "grad_norm": 0.9296875, "learning_rate": 8.351634401777215e-06, "loss": 1.1071, "step": 9336 }, { "epoch": 2.9331919822525867, "grad_norm": 0.8515625, "learning_rate": 8.349095525230086e-06, "loss": 1.1019, "step": 9338 }, { "epoch": 2.933820209278128, "grad_norm": 0.93359375, "learning_rate": 8.346556648682958e-06, "loss": 1.1753, "step": 9340 }, { "epoch": 2.9344484363036694, "grad_norm": 0.87109375, "learning_rate": 8.34401777213583e-06, "loss": 1.1814, "step": 9342 }, { "epoch": 2.9350766633292107, "grad_norm": 1.0546875, "learning_rate": 8.341478895588702e-06, "loss": 1.1398, "step": 9344 }, { "epoch": 2.935704890354752, "grad_norm": 1.015625, "learning_rate": 8.338940019041575e-06, "loss": 1.2175, "step": 9346 }, { "epoch": 2.9363331173802933, "grad_norm": 0.92578125, "learning_rate": 8.336401142494447e-06, "loss": 1.057, "step": 9348 }, { "epoch": 2.9369613444058347, "grad_norm": 0.8671875, "learning_rate": 8.33386226594732e-06, "loss": 1.2624, "step": 9350 }, { "epoch": 2.937589571431376, "grad_norm": 0.85546875, "learning_rate": 8.331323389400191e-06, "loss": 1.1828, "step": 9352 }, { "epoch": 2.9382177984569173, "grad_norm": 0.8984375, "learning_rate": 8.328784512853062e-06, "loss": 1.2289, "step": 9354 }, { "epoch": 2.9388460254824587, "grad_norm": 0.96875, "learning_rate": 8.326245636305936e-06, "loss": 1.0668, "step": 9356 }, { "epoch": 2.939474252508, "grad_norm": 0.90625, "learning_rate": 8.323706759758807e-06, "loss": 1.212, "step": 9358 }, { "epoch": 2.9401024795335413, "grad_norm": 0.8671875, "learning_rate": 8.32116788321168e-06, "loss": 1.1254, "step": 9360 }, { "epoch": 2.9407307065590826, "grad_norm": 0.859375, "learning_rate": 8.318629006664551e-06, "loss": 1.048, "step": 9362 }, { "epoch": 2.941358933584624, "grad_norm": 0.89453125, "learning_rate": 8.316090130117423e-06, "loss": 1.0829, "step": 9364 }, { "epoch": 2.9419871606101653, "grad_norm": 0.8671875, "learning_rate": 8.313551253570296e-06, "loss": 1.163, "step": 9366 }, { "epoch": 2.9426153876357066, "grad_norm": 0.87890625, "learning_rate": 8.311012377023167e-06, "loss": 1.3302, "step": 9368 }, { "epoch": 2.943243614661248, "grad_norm": 0.8828125, "learning_rate": 8.30847350047604e-06, "loss": 1.115, "step": 9370 }, { "epoch": 2.9438718416867897, "grad_norm": 0.91796875, "learning_rate": 8.305934623928912e-06, "loss": 1.218, "step": 9372 }, { "epoch": 2.944500068712331, "grad_norm": 0.89453125, "learning_rate": 8.303395747381783e-06, "loss": 1.17, "step": 9374 }, { "epoch": 2.9451282957378724, "grad_norm": 0.875, "learning_rate": 8.300856870834656e-06, "loss": 1.2241, "step": 9376 }, { "epoch": 2.9457565227634137, "grad_norm": 1.0234375, "learning_rate": 8.298317994287528e-06, "loss": 1.1651, "step": 9378 }, { "epoch": 2.946384749788955, "grad_norm": 0.98828125, "learning_rate": 8.2957791177404e-06, "loss": 1.1871, "step": 9380 }, { "epoch": 2.9470129768144964, "grad_norm": 1.0546875, "learning_rate": 8.293240241193272e-06, "loss": 1.2128, "step": 9382 }, { "epoch": 2.9476412038400377, "grad_norm": 0.86328125, "learning_rate": 8.290701364646145e-06, "loss": 1.2018, "step": 9384 }, { "epoch": 2.948269430865579, "grad_norm": 0.85546875, "learning_rate": 8.288162488099016e-06, "loss": 1.2859, "step": 9386 }, { "epoch": 2.9488976578911203, "grad_norm": 0.8359375, "learning_rate": 8.285623611551888e-06, "loss": 1.2422, "step": 9388 }, { "epoch": 2.9495258849166617, "grad_norm": 0.8671875, "learning_rate": 8.283084735004761e-06, "loss": 1.1708, "step": 9390 }, { "epoch": 2.950154111942203, "grad_norm": 0.8046875, "learning_rate": 8.280545858457634e-06, "loss": 1.1229, "step": 9392 }, { "epoch": 2.9507823389677443, "grad_norm": 0.92578125, "learning_rate": 8.278006981910505e-06, "loss": 1.117, "step": 9394 }, { "epoch": 2.9514105659932857, "grad_norm": 0.828125, "learning_rate": 8.275468105363378e-06, "loss": 1.3889, "step": 9396 }, { "epoch": 2.9520387930188274, "grad_norm": 0.828125, "learning_rate": 8.27292922881625e-06, "loss": 1.1619, "step": 9398 }, { "epoch": 2.9526670200443688, "grad_norm": 0.93359375, "learning_rate": 8.270390352269121e-06, "loss": 1.1436, "step": 9400 }, { "epoch": 2.95329524706991, "grad_norm": 0.8984375, "learning_rate": 8.267851475721994e-06, "loss": 1.1739, "step": 9402 }, { "epoch": 2.9539234740954514, "grad_norm": 0.8984375, "learning_rate": 8.265312599174866e-06, "loss": 1.1537, "step": 9404 }, { "epoch": 2.9545517011209927, "grad_norm": 0.87890625, "learning_rate": 8.262773722627739e-06, "loss": 1.1615, "step": 9406 }, { "epoch": 2.955179928146534, "grad_norm": 0.87890625, "learning_rate": 8.26023484608061e-06, "loss": 1.2431, "step": 9408 }, { "epoch": 2.9558081551720754, "grad_norm": 0.9296875, "learning_rate": 8.257695969533483e-06, "loss": 1.1621, "step": 9410 }, { "epoch": 2.9564363821976167, "grad_norm": 0.890625, "learning_rate": 8.255157092986355e-06, "loss": 1.2451, "step": 9412 }, { "epoch": 2.957064609223158, "grad_norm": 0.89453125, "learning_rate": 8.252618216439226e-06, "loss": 1.1724, "step": 9414 }, { "epoch": 2.9576928362486994, "grad_norm": 0.8671875, "learning_rate": 8.250079339892099e-06, "loss": 1.095, "step": 9416 }, { "epoch": 2.9583210632742407, "grad_norm": 0.9453125, "learning_rate": 8.24754046334497e-06, "loss": 1.1788, "step": 9418 }, { "epoch": 2.958949290299782, "grad_norm": 0.84375, "learning_rate": 8.245001586797843e-06, "loss": 1.1435, "step": 9420 }, { "epoch": 2.9595775173253234, "grad_norm": 0.99609375, "learning_rate": 8.242462710250715e-06, "loss": 1.2214, "step": 9422 }, { "epoch": 2.9602057443508647, "grad_norm": 0.83984375, "learning_rate": 8.239923833703586e-06, "loss": 1.1846, "step": 9424 }, { "epoch": 2.960833971376406, "grad_norm": 0.89453125, "learning_rate": 8.23738495715646e-06, "loss": 1.1564, "step": 9426 }, { "epoch": 2.9614621984019474, "grad_norm": 0.875, "learning_rate": 8.23484608060933e-06, "loss": 1.1448, "step": 9428 }, { "epoch": 2.9620904254274887, "grad_norm": 0.8046875, "learning_rate": 8.232307204062204e-06, "loss": 1.3647, "step": 9430 }, { "epoch": 2.96271865245303, "grad_norm": 0.890625, "learning_rate": 8.229768327515075e-06, "loss": 1.221, "step": 9432 }, { "epoch": 2.9633468794785713, "grad_norm": 0.92578125, "learning_rate": 8.227229450967947e-06, "loss": 1.2453, "step": 9434 }, { "epoch": 2.9639751065041127, "grad_norm": 0.921875, "learning_rate": 8.22469057442082e-06, "loss": 1.1119, "step": 9436 }, { "epoch": 2.9646033335296544, "grad_norm": 0.87109375, "learning_rate": 8.222151697873691e-06, "loss": 1.2253, "step": 9438 }, { "epoch": 2.9652315605551958, "grad_norm": 0.91796875, "learning_rate": 8.219612821326564e-06, "loss": 1.191, "step": 9440 }, { "epoch": 2.965859787580737, "grad_norm": 0.94140625, "learning_rate": 8.217073944779435e-06, "loss": 1.1427, "step": 9442 }, { "epoch": 2.9664880146062784, "grad_norm": 0.890625, "learning_rate": 8.214535068232309e-06, "loss": 1.2054, "step": 9444 }, { "epoch": 2.9671162416318198, "grad_norm": 0.91015625, "learning_rate": 8.21199619168518e-06, "loss": 1.2363, "step": 9446 }, { "epoch": 2.967744468657361, "grad_norm": 0.91796875, "learning_rate": 8.209457315138051e-06, "loss": 1.3343, "step": 9448 }, { "epoch": 2.9683726956829024, "grad_norm": 0.8671875, "learning_rate": 8.206918438590924e-06, "loss": 1.0634, "step": 9450 }, { "epoch": 2.9690009227084437, "grad_norm": 0.8125, "learning_rate": 8.204379562043796e-06, "loss": 1.1639, "step": 9452 }, { "epoch": 2.969629149733985, "grad_norm": 0.828125, "learning_rate": 8.201840685496669e-06, "loss": 1.1921, "step": 9454 }, { "epoch": 2.9702573767595264, "grad_norm": 0.8828125, "learning_rate": 8.19930180894954e-06, "loss": 1.3581, "step": 9456 }, { "epoch": 2.9708856037850677, "grad_norm": 0.921875, "learning_rate": 8.196762932402412e-06, "loss": 1.2369, "step": 9458 }, { "epoch": 2.971513830810609, "grad_norm": 0.94140625, "learning_rate": 8.194224055855285e-06, "loss": 1.0998, "step": 9460 }, { "epoch": 2.9721420578361504, "grad_norm": 1.375, "learning_rate": 8.191685179308156e-06, "loss": 1.0054, "step": 9462 }, { "epoch": 2.972770284861692, "grad_norm": 0.8515625, "learning_rate": 8.189146302761029e-06, "loss": 1.2289, "step": 9464 }, { "epoch": 2.9733985118872335, "grad_norm": 0.8359375, "learning_rate": 8.1866074262139e-06, "loss": 1.0944, "step": 9466 }, { "epoch": 2.974026738912775, "grad_norm": 0.90625, "learning_rate": 8.184068549666772e-06, "loss": 1.168, "step": 9468 }, { "epoch": 2.974654965938316, "grad_norm": 0.8203125, "learning_rate": 8.181529673119645e-06, "loss": 1.2147, "step": 9470 }, { "epoch": 2.9752831929638575, "grad_norm": 0.9609375, "learning_rate": 8.178990796572516e-06, "loss": 1.2055, "step": 9472 }, { "epoch": 2.975911419989399, "grad_norm": 0.8203125, "learning_rate": 8.17645192002539e-06, "loss": 1.2641, "step": 9474 }, { "epoch": 2.97653964701494, "grad_norm": 0.88671875, "learning_rate": 8.173913043478263e-06, "loss": 1.2016, "step": 9476 }, { "epoch": 2.9771678740404814, "grad_norm": 0.93359375, "learning_rate": 8.171374166931134e-06, "loss": 1.2389, "step": 9478 }, { "epoch": 2.9777961010660228, "grad_norm": 0.83984375, "learning_rate": 8.168835290384007e-06, "loss": 1.3398, "step": 9480 }, { "epoch": 2.978424328091564, "grad_norm": 0.8203125, "learning_rate": 8.166296413836878e-06, "loss": 1.3117, "step": 9482 }, { "epoch": 2.9790525551171054, "grad_norm": 0.82421875, "learning_rate": 8.16375753728975e-06, "loss": 1.1342, "step": 9484 }, { "epoch": 2.9796807821426468, "grad_norm": 0.82421875, "learning_rate": 8.161218660742623e-06, "loss": 1.183, "step": 9486 }, { "epoch": 2.980309009168188, "grad_norm": 0.8203125, "learning_rate": 8.158679784195494e-06, "loss": 1.1521, "step": 9488 }, { "epoch": 2.9809372361937294, "grad_norm": 0.91796875, "learning_rate": 8.156140907648367e-06, "loss": 1.0698, "step": 9490 }, { "epoch": 2.9815654632192707, "grad_norm": 0.8671875, "learning_rate": 8.153602031101239e-06, "loss": 1.1782, "step": 9492 }, { "epoch": 2.982193690244812, "grad_norm": 0.86328125, "learning_rate": 8.15106315455411e-06, "loss": 1.2666, "step": 9494 }, { "epoch": 2.9828219172703534, "grad_norm": 0.95703125, "learning_rate": 8.148524278006983e-06, "loss": 1.3233, "step": 9496 }, { "epoch": 2.9834501442958947, "grad_norm": 0.8828125, "learning_rate": 8.145985401459855e-06, "loss": 1.1066, "step": 9498 }, { "epoch": 2.984078371321436, "grad_norm": 0.953125, "learning_rate": 8.143446524912728e-06, "loss": 1.1686, "step": 9500 }, { "epoch": 2.9847065983469774, "grad_norm": 0.8203125, "learning_rate": 8.140907648365599e-06, "loss": 1.1282, "step": 9502 }, { "epoch": 2.985334825372519, "grad_norm": 0.8671875, "learning_rate": 8.13836877181847e-06, "loss": 1.221, "step": 9504 }, { "epoch": 2.9859630523980605, "grad_norm": 0.8359375, "learning_rate": 8.135829895271343e-06, "loss": 1.1773, "step": 9506 }, { "epoch": 2.986591279423602, "grad_norm": 0.90625, "learning_rate": 8.133291018724215e-06, "loss": 1.136, "step": 9508 }, { "epoch": 2.987219506449143, "grad_norm": 0.9296875, "learning_rate": 8.130752142177088e-06, "loss": 1.1043, "step": 9510 }, { "epoch": 2.9878477334746845, "grad_norm": 0.828125, "learning_rate": 8.12821326562996e-06, "loss": 1.1536, "step": 9512 }, { "epoch": 2.988475960500226, "grad_norm": 0.90625, "learning_rate": 8.125674389082832e-06, "loss": 1.1983, "step": 9514 }, { "epoch": 2.989104187525767, "grad_norm": 0.8828125, "learning_rate": 8.123135512535704e-06, "loss": 1.1925, "step": 9516 }, { "epoch": 2.9897324145513084, "grad_norm": 0.8203125, "learning_rate": 8.120596635988575e-06, "loss": 1.2503, "step": 9518 }, { "epoch": 2.9903606415768498, "grad_norm": 0.83203125, "learning_rate": 8.118057759441448e-06, "loss": 1.3267, "step": 9520 }, { "epoch": 2.990988868602391, "grad_norm": 1.0703125, "learning_rate": 8.11551888289432e-06, "loss": 1.1573, "step": 9522 }, { "epoch": 2.9916170956279324, "grad_norm": 1.0234375, "learning_rate": 8.112980006347193e-06, "loss": 1.1968, "step": 9524 }, { "epoch": 2.9922453226534738, "grad_norm": 0.8828125, "learning_rate": 8.110441129800064e-06, "loss": 1.2677, "step": 9526 }, { "epoch": 2.992873549679015, "grad_norm": 0.83984375, "learning_rate": 8.107902253252935e-06, "loss": 1.082, "step": 9528 }, { "epoch": 2.993501776704557, "grad_norm": 0.91015625, "learning_rate": 8.105363376705808e-06, "loss": 1.1113, "step": 9530 }, { "epoch": 2.994130003730098, "grad_norm": 0.8984375, "learning_rate": 8.10282450015868e-06, "loss": 1.1446, "step": 9532 }, { "epoch": 2.9947582307556395, "grad_norm": 0.90234375, "learning_rate": 8.100285623611553e-06, "loss": 1.109, "step": 9534 }, { "epoch": 2.995386457781181, "grad_norm": 0.79296875, "learning_rate": 8.097746747064424e-06, "loss": 1.2334, "step": 9536 }, { "epoch": 2.996014684806722, "grad_norm": 0.79296875, "learning_rate": 8.095207870517296e-06, "loss": 1.1576, "step": 9538 }, { "epoch": 2.9966429118322635, "grad_norm": 0.9609375, "learning_rate": 8.092668993970169e-06, "loss": 1.2902, "step": 9540 }, { "epoch": 2.997271138857805, "grad_norm": 0.96484375, "learning_rate": 8.09013011742304e-06, "loss": 1.2216, "step": 9542 }, { "epoch": 2.997899365883346, "grad_norm": 0.84765625, "learning_rate": 8.087591240875913e-06, "loss": 1.2174, "step": 9544 }, { "epoch": 2.9985275929088875, "grad_norm": 0.83984375, "learning_rate": 8.085052364328785e-06, "loss": 1.2307, "step": 9546 }, { "epoch": 2.999155819934429, "grad_norm": 0.96875, "learning_rate": 8.082513487781658e-06, "loss": 1.2016, "step": 9548 }, { "epoch": 2.99978404695997, "grad_norm": 0.99609375, "learning_rate": 8.079974611234529e-06, "loss": 1.2336, "step": 9550 }, { "epoch": 3.0004122739855115, "grad_norm": 0.86328125, "learning_rate": 8.0774357346874e-06, "loss": 1.1413, "step": 9552 }, { "epoch": 3.001040501011053, "grad_norm": 0.83984375, "learning_rate": 8.074896858140274e-06, "loss": 1.1411, "step": 9554 }, { "epoch": 3.001668728036594, "grad_norm": 0.96875, "learning_rate": 8.072357981593145e-06, "loss": 1.1882, "step": 9556 }, { "epoch": 3.0022969550621355, "grad_norm": 0.83203125, "learning_rate": 8.069819105046018e-06, "loss": 1.1242, "step": 9558 }, { "epoch": 3.002925182087677, "grad_norm": 0.93359375, "learning_rate": 8.06728022849889e-06, "loss": 1.2071, "step": 9560 }, { "epoch": 3.003553409113218, "grad_norm": 0.83984375, "learning_rate": 8.064741351951762e-06, "loss": 1.2413, "step": 9562 }, { "epoch": 3.0041816361387594, "grad_norm": 0.87109375, "learning_rate": 8.062202475404634e-06, "loss": 1.1862, "step": 9564 }, { "epoch": 3.004809863164301, "grad_norm": 0.8984375, "learning_rate": 8.059663598857507e-06, "loss": 1.0735, "step": 9566 }, { "epoch": 3.0054380901898425, "grad_norm": 0.92578125, "learning_rate": 8.057124722310378e-06, "loss": 1.3115, "step": 9568 }, { "epoch": 3.006066317215384, "grad_norm": 0.9296875, "learning_rate": 8.054585845763251e-06, "loss": 1.1773, "step": 9570 }, { "epoch": 3.006694544240925, "grad_norm": 0.9375, "learning_rate": 8.052046969216123e-06, "loss": 1.3357, "step": 9572 }, { "epoch": 3.0073227712664665, "grad_norm": 0.875, "learning_rate": 8.049508092668996e-06, "loss": 1.1719, "step": 9574 }, { "epoch": 3.007950998292008, "grad_norm": 0.921875, "learning_rate": 8.046969216121867e-06, "loss": 1.1707, "step": 9576 }, { "epoch": 3.008579225317549, "grad_norm": 1.0390625, "learning_rate": 8.044430339574739e-06, "loss": 1.1256, "step": 9578 }, { "epoch": 3.0092074523430905, "grad_norm": 0.92578125, "learning_rate": 8.041891463027612e-06, "loss": 1.2182, "step": 9580 }, { "epoch": 3.009835679368632, "grad_norm": 0.94140625, "learning_rate": 8.039352586480483e-06, "loss": 1.0409, "step": 9582 }, { "epoch": 3.010463906394173, "grad_norm": 0.88671875, "learning_rate": 8.036813709933356e-06, "loss": 1.224, "step": 9584 }, { "epoch": 3.0110921334197145, "grad_norm": 0.9140625, "learning_rate": 8.034274833386228e-06, "loss": 1.1287, "step": 9586 }, { "epoch": 3.011720360445256, "grad_norm": 0.94921875, "learning_rate": 8.031735956839099e-06, "loss": 1.0691, "step": 9588 }, { "epoch": 3.012348587470797, "grad_norm": 1.03125, "learning_rate": 8.029197080291972e-06, "loss": 1.1738, "step": 9590 }, { "epoch": 3.0129768144963385, "grad_norm": 0.87109375, "learning_rate": 8.026658203744843e-06, "loss": 0.999, "step": 9592 }, { "epoch": 3.01360504152188, "grad_norm": 0.8515625, "learning_rate": 8.024119327197716e-06, "loss": 1.1218, "step": 9594 }, { "epoch": 3.014233268547421, "grad_norm": 0.90625, "learning_rate": 8.021580450650588e-06, "loss": 1.1781, "step": 9596 }, { "epoch": 3.014861495572963, "grad_norm": 0.91796875, "learning_rate": 8.01904157410346e-06, "loss": 1.1791, "step": 9598 }, { "epoch": 3.0154897225985042, "grad_norm": 0.8984375, "learning_rate": 8.016502697556332e-06, "loss": 1.094, "step": 9600 }, { "epoch": 3.0161179496240456, "grad_norm": 0.9453125, "learning_rate": 8.013963821009204e-06, "loss": 1.1885, "step": 9602 }, { "epoch": 3.016746176649587, "grad_norm": 0.87890625, "learning_rate": 8.011424944462077e-06, "loss": 1.1053, "step": 9604 }, { "epoch": 3.017374403675128, "grad_norm": 1.046875, "learning_rate": 8.008886067914948e-06, "loss": 1.0629, "step": 9606 }, { "epoch": 3.0180026307006695, "grad_norm": 0.90625, "learning_rate": 8.006347191367821e-06, "loss": 1.1964, "step": 9608 }, { "epoch": 3.018630857726211, "grad_norm": 1.015625, "learning_rate": 8.003808314820693e-06, "loss": 1.2276, "step": 9610 }, { "epoch": 3.019259084751752, "grad_norm": 0.953125, "learning_rate": 8.001269438273564e-06, "loss": 1.1627, "step": 9612 }, { "epoch": 3.0198873117772935, "grad_norm": 0.921875, "learning_rate": 7.998730561726437e-06, "loss": 1.0934, "step": 9614 }, { "epoch": 3.020515538802835, "grad_norm": 0.921875, "learning_rate": 7.996191685179308e-06, "loss": 1.1188, "step": 9616 }, { "epoch": 3.021143765828376, "grad_norm": 0.90234375, "learning_rate": 7.993652808632181e-06, "loss": 1.2286, "step": 9618 }, { "epoch": 3.0217719928539175, "grad_norm": 0.8828125, "learning_rate": 7.991113932085053e-06, "loss": 1.1258, "step": 9620 }, { "epoch": 3.022400219879459, "grad_norm": 0.9609375, "learning_rate": 7.988575055537924e-06, "loss": 1.2508, "step": 9622 }, { "epoch": 3.023028446905, "grad_norm": 0.9609375, "learning_rate": 7.986036178990797e-06, "loss": 1.2295, "step": 9624 }, { "epoch": 3.0236566739305415, "grad_norm": 0.96484375, "learning_rate": 7.983497302443669e-06, "loss": 1.1972, "step": 9626 }, { "epoch": 3.024284900956083, "grad_norm": 0.9296875, "learning_rate": 7.980958425896542e-06, "loss": 1.045, "step": 9628 }, { "epoch": 3.024913127981624, "grad_norm": 0.91796875, "learning_rate": 7.978419549349413e-06, "loss": 1.1654, "step": 9630 }, { "epoch": 3.025541355007166, "grad_norm": 0.9453125, "learning_rate": 7.975880672802285e-06, "loss": 1.0317, "step": 9632 }, { "epoch": 3.0261695820327073, "grad_norm": 0.8984375, "learning_rate": 7.973341796255158e-06, "loss": 1.254, "step": 9634 }, { "epoch": 3.0267978090582486, "grad_norm": 0.93359375, "learning_rate": 7.970802919708029e-06, "loss": 1.1879, "step": 9636 }, { "epoch": 3.02742603608379, "grad_norm": 0.94140625, "learning_rate": 7.968264043160902e-06, "loss": 1.2707, "step": 9638 }, { "epoch": 3.0280542631093312, "grad_norm": 0.91015625, "learning_rate": 7.965725166613773e-06, "loss": 1.2233, "step": 9640 }, { "epoch": 3.0286824901348726, "grad_norm": 0.92578125, "learning_rate": 7.963186290066645e-06, "loss": 1.1569, "step": 9642 }, { "epoch": 3.029310717160414, "grad_norm": 0.95703125, "learning_rate": 7.960647413519518e-06, "loss": 1.09, "step": 9644 }, { "epoch": 3.029938944185955, "grad_norm": 0.859375, "learning_rate": 7.95810853697239e-06, "loss": 1.3154, "step": 9646 }, { "epoch": 3.0305671712114965, "grad_norm": 1.015625, "learning_rate": 7.955569660425262e-06, "loss": 1.1869, "step": 9648 }, { "epoch": 3.031195398237038, "grad_norm": 1.0078125, "learning_rate": 7.953030783878135e-06, "loss": 1.1007, "step": 9650 }, { "epoch": 3.031823625262579, "grad_norm": 0.8984375, "learning_rate": 7.950491907331007e-06, "loss": 1.2366, "step": 9652 }, { "epoch": 3.0324518522881205, "grad_norm": 0.9296875, "learning_rate": 7.94795303078388e-06, "loss": 1.1909, "step": 9654 }, { "epoch": 3.033080079313662, "grad_norm": 0.875, "learning_rate": 7.945414154236751e-06, "loss": 1.215, "step": 9656 }, { "epoch": 3.033708306339203, "grad_norm": 0.9375, "learning_rate": 7.942875277689623e-06, "loss": 1.1398, "step": 9658 }, { "epoch": 3.0343365333647445, "grad_norm": 0.98046875, "learning_rate": 7.940336401142496e-06, "loss": 1.0704, "step": 9660 }, { "epoch": 3.034964760390286, "grad_norm": 1.03125, "learning_rate": 7.937797524595367e-06, "loss": 1.1312, "step": 9662 }, { "epoch": 3.0355929874158276, "grad_norm": 0.99609375, "learning_rate": 7.93525864804824e-06, "loss": 1.245, "step": 9664 }, { "epoch": 3.036221214441369, "grad_norm": 0.93359375, "learning_rate": 7.932719771501112e-06, "loss": 1.1847, "step": 9666 }, { "epoch": 3.0368494414669103, "grad_norm": 0.953125, "learning_rate": 7.930180894953983e-06, "loss": 1.1145, "step": 9668 }, { "epoch": 3.0374776684924516, "grad_norm": 0.92578125, "learning_rate": 7.927642018406856e-06, "loss": 1.2597, "step": 9670 }, { "epoch": 3.038105895517993, "grad_norm": 0.91796875, "learning_rate": 7.925103141859727e-06, "loss": 1.097, "step": 9672 }, { "epoch": 3.0387341225435343, "grad_norm": 0.94921875, "learning_rate": 7.9225642653126e-06, "loss": 1.1814, "step": 9674 }, { "epoch": 3.0393623495690756, "grad_norm": 0.93359375, "learning_rate": 7.920025388765472e-06, "loss": 1.1166, "step": 9676 }, { "epoch": 3.039990576594617, "grad_norm": 0.94140625, "learning_rate": 7.917486512218345e-06, "loss": 1.0558, "step": 9678 }, { "epoch": 3.0406188036201582, "grad_norm": 0.890625, "learning_rate": 7.914947635671216e-06, "loss": 1.2832, "step": 9680 }, { "epoch": 3.0412470306456996, "grad_norm": 0.921875, "learning_rate": 7.912408759124088e-06, "loss": 1.2709, "step": 9682 }, { "epoch": 3.041875257671241, "grad_norm": 0.94140625, "learning_rate": 7.90986988257696e-06, "loss": 1.1153, "step": 9684 }, { "epoch": 3.0425034846967822, "grad_norm": 0.99609375, "learning_rate": 7.907331006029832e-06, "loss": 1.062, "step": 9686 }, { "epoch": 3.0431317117223236, "grad_norm": 0.93359375, "learning_rate": 7.904792129482705e-06, "loss": 1.084, "step": 9688 }, { "epoch": 3.043759938747865, "grad_norm": 1.015625, "learning_rate": 7.902253252935577e-06, "loss": 1.2204, "step": 9690 }, { "epoch": 3.044388165773406, "grad_norm": 0.90234375, "learning_rate": 7.899714376388448e-06, "loss": 1.2418, "step": 9692 }, { "epoch": 3.0450163927989475, "grad_norm": 0.953125, "learning_rate": 7.897175499841321e-06, "loss": 1.0818, "step": 9694 }, { "epoch": 3.045644619824489, "grad_norm": 0.9765625, "learning_rate": 7.894636623294192e-06, "loss": 1.132, "step": 9696 }, { "epoch": 3.0462728468500306, "grad_norm": 1.03125, "learning_rate": 7.892097746747066e-06, "loss": 1.1114, "step": 9698 }, { "epoch": 3.046901073875572, "grad_norm": 0.9375, "learning_rate": 7.889558870199937e-06, "loss": 1.0377, "step": 9700 }, { "epoch": 3.0475293009011133, "grad_norm": 0.8984375, "learning_rate": 7.887019993652808e-06, "loss": 1.348, "step": 9702 }, { "epoch": 3.0481575279266546, "grad_norm": 0.94921875, "learning_rate": 7.884481117105681e-06, "loss": 1.1789, "step": 9704 }, { "epoch": 3.048785754952196, "grad_norm": 0.89453125, "learning_rate": 7.881942240558553e-06, "loss": 1.2218, "step": 9706 }, { "epoch": 3.0494139819777373, "grad_norm": 0.96875, "learning_rate": 7.879403364011426e-06, "loss": 1.1665, "step": 9708 }, { "epoch": 3.0500422090032786, "grad_norm": 0.9609375, "learning_rate": 7.876864487464297e-06, "loss": 1.1696, "step": 9710 }, { "epoch": 3.05067043602882, "grad_norm": 1.0, "learning_rate": 7.87432561091717e-06, "loss": 1.1098, "step": 9712 }, { "epoch": 3.0512986630543613, "grad_norm": 0.94140625, "learning_rate": 7.871786734370042e-06, "loss": 1.19, "step": 9714 }, { "epoch": 3.0519268900799026, "grad_norm": 0.9140625, "learning_rate": 7.869247857822913e-06, "loss": 1.1369, "step": 9716 }, { "epoch": 3.052555117105444, "grad_norm": 1.0390625, "learning_rate": 7.866708981275786e-06, "loss": 1.048, "step": 9718 }, { "epoch": 3.0531833441309852, "grad_norm": 0.96484375, "learning_rate": 7.864170104728658e-06, "loss": 1.117, "step": 9720 }, { "epoch": 3.0538115711565266, "grad_norm": 0.890625, "learning_rate": 7.86163122818153e-06, "loss": 1.1648, "step": 9722 }, { "epoch": 3.054439798182068, "grad_norm": 0.91015625, "learning_rate": 7.859092351634402e-06, "loss": 1.2634, "step": 9724 }, { "epoch": 3.0550680252076092, "grad_norm": 0.90625, "learning_rate": 7.856553475087273e-06, "loss": 1.1022, "step": 9726 }, { "epoch": 3.0556962522331506, "grad_norm": 0.91015625, "learning_rate": 7.854014598540146e-06, "loss": 1.2666, "step": 9728 }, { "epoch": 3.0563244792586923, "grad_norm": 0.99609375, "learning_rate": 7.851475721993018e-06, "loss": 1.2058, "step": 9730 }, { "epoch": 3.0569527062842337, "grad_norm": 0.9453125, "learning_rate": 7.848936845445891e-06, "loss": 1.1898, "step": 9732 }, { "epoch": 3.057580933309775, "grad_norm": 0.921875, "learning_rate": 7.846397968898764e-06, "loss": 1.2825, "step": 9734 }, { "epoch": 3.0582091603353163, "grad_norm": 1.0390625, "learning_rate": 7.843859092351635e-06, "loss": 1.1024, "step": 9736 }, { "epoch": 3.0588373873608576, "grad_norm": 0.94921875, "learning_rate": 7.841320215804508e-06, "loss": 1.0692, "step": 9738 }, { "epoch": 3.059465614386399, "grad_norm": 1.09375, "learning_rate": 7.83878133925738e-06, "loss": 1.1078, "step": 9740 }, { "epoch": 3.0600938414119403, "grad_norm": 0.98828125, "learning_rate": 7.836242462710251e-06, "loss": 1.2499, "step": 9742 }, { "epoch": 3.0607220684374816, "grad_norm": 0.9609375, "learning_rate": 7.833703586163124e-06, "loss": 1.2793, "step": 9744 }, { "epoch": 3.061350295463023, "grad_norm": 0.84765625, "learning_rate": 7.831164709615996e-06, "loss": 1.2405, "step": 9746 }, { "epoch": 3.0619785224885643, "grad_norm": 0.87890625, "learning_rate": 7.828625833068869e-06, "loss": 1.1427, "step": 9748 }, { "epoch": 3.0626067495141056, "grad_norm": 0.9609375, "learning_rate": 7.82608695652174e-06, "loss": 1.0382, "step": 9750 }, { "epoch": 3.063234976539647, "grad_norm": 0.99609375, "learning_rate": 7.823548079974612e-06, "loss": 1.1375, "step": 9752 }, { "epoch": 3.0638632035651883, "grad_norm": 0.953125, "learning_rate": 7.821009203427485e-06, "loss": 1.1466, "step": 9754 }, { "epoch": 3.0644914305907296, "grad_norm": 0.9375, "learning_rate": 7.818470326880356e-06, "loss": 1.251, "step": 9756 }, { "epoch": 3.065119657616271, "grad_norm": 1.09375, "learning_rate": 7.815931450333229e-06, "loss": 1.1365, "step": 9758 }, { "epoch": 3.0657478846418122, "grad_norm": 0.9140625, "learning_rate": 7.8133925737861e-06, "loss": 1.1269, "step": 9760 }, { "epoch": 3.0663761116673536, "grad_norm": 0.9921875, "learning_rate": 7.810853697238972e-06, "loss": 1.1741, "step": 9762 }, { "epoch": 3.0670043386928953, "grad_norm": 0.9609375, "learning_rate": 7.808314820691845e-06, "loss": 1.1396, "step": 9764 }, { "epoch": 3.0676325657184367, "grad_norm": 0.890625, "learning_rate": 7.805775944144716e-06, "loss": 1.0172, "step": 9766 }, { "epoch": 3.068260792743978, "grad_norm": 0.875, "learning_rate": 7.80323706759759e-06, "loss": 1.102, "step": 9768 }, { "epoch": 3.0688890197695193, "grad_norm": 0.9453125, "learning_rate": 7.80069819105046e-06, "loss": 1.0918, "step": 9770 }, { "epoch": 3.0695172467950607, "grad_norm": 1.0859375, "learning_rate": 7.798159314503334e-06, "loss": 1.0728, "step": 9772 }, { "epoch": 3.070145473820602, "grad_norm": 0.83984375, "learning_rate": 7.795620437956205e-06, "loss": 1.1298, "step": 9774 }, { "epoch": 3.0707737008461433, "grad_norm": 0.82421875, "learning_rate": 7.793081561409077e-06, "loss": 1.1785, "step": 9776 }, { "epoch": 3.0714019278716846, "grad_norm": 0.9609375, "learning_rate": 7.79054268486195e-06, "loss": 1.22, "step": 9778 }, { "epoch": 3.072030154897226, "grad_norm": 0.91015625, "learning_rate": 7.788003808314821e-06, "loss": 1.0492, "step": 9780 }, { "epoch": 3.0726583819227673, "grad_norm": 0.9609375, "learning_rate": 7.785464931767694e-06, "loss": 1.1182, "step": 9782 }, { "epoch": 3.0732866089483086, "grad_norm": 1.0234375, "learning_rate": 7.782926055220565e-06, "loss": 1.1537, "step": 9784 }, { "epoch": 3.07391483597385, "grad_norm": 0.890625, "learning_rate": 7.780387178673437e-06, "loss": 1.061, "step": 9786 }, { "epoch": 3.0745430629993913, "grad_norm": 0.89453125, "learning_rate": 7.77784830212631e-06, "loss": 1.1307, "step": 9788 }, { "epoch": 3.0751712900249326, "grad_norm": 0.859375, "learning_rate": 7.775309425579181e-06, "loss": 1.0999, "step": 9790 }, { "epoch": 3.075799517050474, "grad_norm": 1.1875, "learning_rate": 7.772770549032054e-06, "loss": 1.1043, "step": 9792 }, { "epoch": 3.0764277440760153, "grad_norm": 0.9765625, "learning_rate": 7.770231672484926e-06, "loss": 1.1119, "step": 9794 }, { "epoch": 3.077055971101557, "grad_norm": 0.86328125, "learning_rate": 7.767692795937797e-06, "loss": 1.2127, "step": 9796 }, { "epoch": 3.0776841981270984, "grad_norm": 0.98046875, "learning_rate": 7.76515391939067e-06, "loss": 1.1337, "step": 9798 }, { "epoch": 3.0783124251526397, "grad_norm": 0.953125, "learning_rate": 7.762615042843542e-06, "loss": 1.1153, "step": 9800 }, { "epoch": 3.078940652178181, "grad_norm": 0.9140625, "learning_rate": 7.760076166296415e-06, "loss": 1.0809, "step": 9802 }, { "epoch": 3.0795688792037224, "grad_norm": 0.90625, "learning_rate": 7.757537289749286e-06, "loss": 1.0561, "step": 9804 }, { "epoch": 3.0801971062292637, "grad_norm": 0.9453125, "learning_rate": 7.754998413202157e-06, "loss": 1.1603, "step": 9806 }, { "epoch": 3.080825333254805, "grad_norm": 0.890625, "learning_rate": 7.75245953665503e-06, "loss": 1.208, "step": 9808 }, { "epoch": 3.0814535602803463, "grad_norm": 0.9921875, "learning_rate": 7.749920660107902e-06, "loss": 1.1505, "step": 9810 }, { "epoch": 3.0820817873058877, "grad_norm": 1.1171875, "learning_rate": 7.747381783560775e-06, "loss": 1.0524, "step": 9812 }, { "epoch": 3.082710014331429, "grad_norm": 0.9296875, "learning_rate": 7.744842907013646e-06, "loss": 1.225, "step": 9814 }, { "epoch": 3.0833382413569703, "grad_norm": 0.98828125, "learning_rate": 7.74230403046652e-06, "loss": 1.0884, "step": 9816 }, { "epoch": 3.0839664683825116, "grad_norm": 0.99609375, "learning_rate": 7.739765153919391e-06, "loss": 1.1976, "step": 9818 }, { "epoch": 3.084594695408053, "grad_norm": 1.015625, "learning_rate": 7.737226277372264e-06, "loss": 1.1264, "step": 9820 }, { "epoch": 3.0852229224335943, "grad_norm": 0.9921875, "learning_rate": 7.734687400825135e-06, "loss": 1.2508, "step": 9822 }, { "epoch": 3.0858511494591356, "grad_norm": 0.8984375, "learning_rate": 7.732148524278008e-06, "loss": 1.1333, "step": 9824 }, { "epoch": 3.086479376484677, "grad_norm": 0.96875, "learning_rate": 7.72960964773088e-06, "loss": 1.1799, "step": 9826 }, { "epoch": 3.0871076035102183, "grad_norm": 0.9921875, "learning_rate": 7.727070771183753e-06, "loss": 1.0684, "step": 9828 }, { "epoch": 3.08773583053576, "grad_norm": 0.875, "learning_rate": 7.724531894636624e-06, "loss": 1.1541, "step": 9830 }, { "epoch": 3.0883640575613014, "grad_norm": 0.921875, "learning_rate": 7.721993018089496e-06, "loss": 0.9546, "step": 9832 }, { "epoch": 3.0889922845868427, "grad_norm": 1.0234375, "learning_rate": 7.719454141542369e-06, "loss": 1.0915, "step": 9834 }, { "epoch": 3.089620511612384, "grad_norm": 0.8984375, "learning_rate": 7.71691526499524e-06, "loss": 1.1257, "step": 9836 }, { "epoch": 3.0902487386379254, "grad_norm": 0.9296875, "learning_rate": 7.714376388448113e-06, "loss": 1.1292, "step": 9838 }, { "epoch": 3.0908769656634667, "grad_norm": 0.9921875, "learning_rate": 7.711837511900985e-06, "loss": 1.2024, "step": 9840 }, { "epoch": 3.091505192689008, "grad_norm": 0.8671875, "learning_rate": 7.709298635353858e-06, "loss": 1.1148, "step": 9842 }, { "epoch": 3.0921334197145494, "grad_norm": 0.953125, "learning_rate": 7.706759758806729e-06, "loss": 1.0558, "step": 9844 }, { "epoch": 3.0927616467400907, "grad_norm": 0.87109375, "learning_rate": 7.7042208822596e-06, "loss": 1.0944, "step": 9846 }, { "epoch": 3.093389873765632, "grad_norm": 0.87890625, "learning_rate": 7.701682005712473e-06, "loss": 1.1657, "step": 9848 }, { "epoch": 3.0940181007911733, "grad_norm": 1.1328125, "learning_rate": 7.699143129165345e-06, "loss": 1.1462, "step": 9850 }, { "epoch": 3.0946463278167147, "grad_norm": 0.9375, "learning_rate": 7.696604252618218e-06, "loss": 1.1265, "step": 9852 }, { "epoch": 3.095274554842256, "grad_norm": 0.9140625, "learning_rate": 7.69406537607109e-06, "loss": 0.9607, "step": 9854 }, { "epoch": 3.0959027818677973, "grad_norm": 0.94140625, "learning_rate": 7.69152649952396e-06, "loss": 1.084, "step": 9856 }, { "epoch": 3.0965310088933387, "grad_norm": 0.89453125, "learning_rate": 7.688987622976834e-06, "loss": 1.0735, "step": 9858 }, { "epoch": 3.09715923591888, "grad_norm": 0.8671875, "learning_rate": 7.686448746429705e-06, "loss": 1.1488, "step": 9860 }, { "epoch": 3.0977874629444218, "grad_norm": 0.94921875, "learning_rate": 7.683909869882578e-06, "loss": 1.1955, "step": 9862 }, { "epoch": 3.098415689969963, "grad_norm": 0.9375, "learning_rate": 7.68137099333545e-06, "loss": 1.1951, "step": 9864 }, { "epoch": 3.0990439169955044, "grad_norm": 0.8828125, "learning_rate": 7.678832116788321e-06, "loss": 1.1366, "step": 9866 }, { "epoch": 3.0996721440210457, "grad_norm": 0.94921875, "learning_rate": 7.676293240241194e-06, "loss": 1.1516, "step": 9868 }, { "epoch": 3.100300371046587, "grad_norm": 1.015625, "learning_rate": 7.673754363694065e-06, "loss": 1.1279, "step": 9870 }, { "epoch": 3.1009285980721284, "grad_norm": 0.90625, "learning_rate": 7.671215487146938e-06, "loss": 1.2269, "step": 9872 }, { "epoch": 3.1015568250976697, "grad_norm": 0.94140625, "learning_rate": 7.66867661059981e-06, "loss": 1.1702, "step": 9874 }, { "epoch": 3.102185052123211, "grad_norm": 0.98828125, "learning_rate": 7.666137734052683e-06, "loss": 1.2396, "step": 9876 }, { "epoch": 3.1028132791487524, "grad_norm": 0.97265625, "learning_rate": 7.663598857505554e-06, "loss": 1.1257, "step": 9878 }, { "epoch": 3.1034415061742937, "grad_norm": 0.95703125, "learning_rate": 7.661059980958426e-06, "loss": 0.9795, "step": 9880 }, { "epoch": 3.104069733199835, "grad_norm": 0.96484375, "learning_rate": 7.658521104411299e-06, "loss": 1.0225, "step": 9882 }, { "epoch": 3.1046979602253764, "grad_norm": 0.90234375, "learning_rate": 7.65598222786417e-06, "loss": 1.0617, "step": 9884 }, { "epoch": 3.1053261872509177, "grad_norm": 0.98828125, "learning_rate": 7.653443351317043e-06, "loss": 1.0403, "step": 9886 }, { "epoch": 3.105954414276459, "grad_norm": 0.96484375, "learning_rate": 7.650904474769915e-06, "loss": 1.1762, "step": 9888 }, { "epoch": 3.1065826413020003, "grad_norm": 0.91015625, "learning_rate": 7.648365598222786e-06, "loss": 1.2454, "step": 9890 }, { "epoch": 3.1072108683275417, "grad_norm": 0.9140625, "learning_rate": 7.645826721675659e-06, "loss": 1.079, "step": 9892 }, { "epoch": 3.107839095353083, "grad_norm": 0.91796875, "learning_rate": 7.64328784512853e-06, "loss": 1.1204, "step": 9894 }, { "epoch": 3.1084673223786248, "grad_norm": 0.87890625, "learning_rate": 7.640748968581404e-06, "loss": 1.184, "step": 9896 }, { "epoch": 3.109095549404166, "grad_norm": 0.9765625, "learning_rate": 7.638210092034275e-06, "loss": 1.1159, "step": 9898 }, { "epoch": 3.1097237764297074, "grad_norm": 0.95703125, "learning_rate": 7.635671215487146e-06, "loss": 1.0844, "step": 9900 }, { "epoch": 3.1103520034552488, "grad_norm": 0.91015625, "learning_rate": 7.63313233894002e-06, "loss": 1.2249, "step": 9902 }, { "epoch": 3.11098023048079, "grad_norm": 0.8671875, "learning_rate": 7.63059346239289e-06, "loss": 1.3296, "step": 9904 }, { "epoch": 3.1116084575063314, "grad_norm": 1.0703125, "learning_rate": 7.628054585845765e-06, "loss": 1.0242, "step": 9906 }, { "epoch": 3.1122366845318727, "grad_norm": 0.875, "learning_rate": 7.625515709298636e-06, "loss": 1.0308, "step": 9908 }, { "epoch": 3.112864911557414, "grad_norm": 1.1328125, "learning_rate": 7.622976832751508e-06, "loss": 1.1393, "step": 9910 }, { "epoch": 3.1134931385829554, "grad_norm": 0.859375, "learning_rate": 7.6204379562043805e-06, "loss": 1.0464, "step": 9912 }, { "epoch": 3.1141213656084967, "grad_norm": 0.94921875, "learning_rate": 7.617899079657253e-06, "loss": 1.1938, "step": 9914 }, { "epoch": 3.114749592634038, "grad_norm": 0.91796875, "learning_rate": 7.615360203110125e-06, "loss": 1.1147, "step": 9916 }, { "epoch": 3.1153778196595794, "grad_norm": 0.9296875, "learning_rate": 7.612821326562997e-06, "loss": 1.181, "step": 9918 }, { "epoch": 3.1160060466851207, "grad_norm": 0.98828125, "learning_rate": 7.610282450015869e-06, "loss": 1.1613, "step": 9920 }, { "epoch": 3.116634273710662, "grad_norm": 0.9453125, "learning_rate": 7.607743573468741e-06, "loss": 1.1342, "step": 9922 }, { "epoch": 3.1172625007362034, "grad_norm": 0.90234375, "learning_rate": 7.605204696921613e-06, "loss": 1.1917, "step": 9924 }, { "epoch": 3.1178907277617447, "grad_norm": 0.87890625, "learning_rate": 7.602665820374485e-06, "loss": 1.2218, "step": 9926 }, { "epoch": 3.1185189547872865, "grad_norm": 0.97265625, "learning_rate": 7.6001269438273575e-06, "loss": 1.0742, "step": 9928 }, { "epoch": 3.119147181812828, "grad_norm": 1.015625, "learning_rate": 7.59758806728023e-06, "loss": 1.0696, "step": 9930 }, { "epoch": 3.119775408838369, "grad_norm": 0.8671875, "learning_rate": 7.595049190733101e-06, "loss": 1.211, "step": 9932 }, { "epoch": 3.1204036358639105, "grad_norm": 1.0625, "learning_rate": 7.592510314185973e-06, "loss": 1.0915, "step": 9934 }, { "epoch": 3.121031862889452, "grad_norm": 0.984375, "learning_rate": 7.589971437638846e-06, "loss": 1.0551, "step": 9936 }, { "epoch": 3.121660089914993, "grad_norm": 0.9921875, "learning_rate": 7.587432561091718e-06, "loss": 1.143, "step": 9938 }, { "epoch": 3.1222883169405344, "grad_norm": 0.94140625, "learning_rate": 7.58489368454459e-06, "loss": 1.21, "step": 9940 }, { "epoch": 3.1229165439660758, "grad_norm": 1.0703125, "learning_rate": 7.5823548079974614e-06, "loss": 1.1149, "step": 9942 }, { "epoch": 3.123544770991617, "grad_norm": 0.9375, "learning_rate": 7.579815931450334e-06, "loss": 1.1868, "step": 9944 }, { "epoch": 3.1241729980171584, "grad_norm": 0.8984375, "learning_rate": 7.577277054903206e-06, "loss": 1.2012, "step": 9946 }, { "epoch": 3.1248012250426997, "grad_norm": 0.984375, "learning_rate": 7.574738178356078e-06, "loss": 1.0744, "step": 9948 }, { "epoch": 3.125429452068241, "grad_norm": 0.9921875, "learning_rate": 7.57219930180895e-06, "loss": 0.9798, "step": 9950 }, { "epoch": 3.1260576790937824, "grad_norm": 0.9921875, "learning_rate": 7.569660425261823e-06, "loss": 1.1369, "step": 9952 }, { "epoch": 3.1266859061193237, "grad_norm": 0.953125, "learning_rate": 7.567121548714694e-06, "loss": 1.1483, "step": 9954 }, { "epoch": 3.127314133144865, "grad_norm": 0.90625, "learning_rate": 7.564582672167566e-06, "loss": 1.0818, "step": 9956 }, { "epoch": 3.1279423601704064, "grad_norm": 1.0625, "learning_rate": 7.5620437956204384e-06, "loss": 1.0731, "step": 9958 }, { "epoch": 3.1285705871959477, "grad_norm": 0.91796875, "learning_rate": 7.559504919073311e-06, "loss": 1.1774, "step": 9960 }, { "epoch": 3.1291988142214895, "grad_norm": 0.94140625, "learning_rate": 7.556966042526183e-06, "loss": 1.1607, "step": 9962 }, { "epoch": 3.129827041247031, "grad_norm": 1.09375, "learning_rate": 7.554427165979054e-06, "loss": 1.062, "step": 9964 }, { "epoch": 3.130455268272572, "grad_norm": 1.03125, "learning_rate": 7.5518882894319265e-06, "loss": 1.0606, "step": 9966 }, { "epoch": 3.1310834952981135, "grad_norm": 0.953125, "learning_rate": 7.549349412884799e-06, "loss": 1.0376, "step": 9968 }, { "epoch": 3.131711722323655, "grad_norm": 0.89453125, "learning_rate": 7.546810536337671e-06, "loss": 1.2551, "step": 9970 }, { "epoch": 3.132339949349196, "grad_norm": 0.91796875, "learning_rate": 7.544271659790543e-06, "loss": 1.056, "step": 9972 }, { "epoch": 3.1329681763747375, "grad_norm": 0.96875, "learning_rate": 7.541732783243415e-06, "loss": 1.0927, "step": 9974 }, { "epoch": 3.133596403400279, "grad_norm": 0.93359375, "learning_rate": 7.539193906696287e-06, "loss": 1.1162, "step": 9976 }, { "epoch": 3.13422463042582, "grad_norm": 0.96875, "learning_rate": 7.536655030149159e-06, "loss": 1.1638, "step": 9978 }, { "epoch": 3.1348528574513614, "grad_norm": 0.90234375, "learning_rate": 7.534116153602031e-06, "loss": 1.1686, "step": 9980 }, { "epoch": 3.1354810844769028, "grad_norm": 0.91796875, "learning_rate": 7.5315772770549035e-06, "loss": 1.194, "step": 9982 }, { "epoch": 3.136109311502444, "grad_norm": 0.9609375, "learning_rate": 7.529038400507776e-06, "loss": 1.1348, "step": 9984 }, { "epoch": 3.1367375385279854, "grad_norm": 0.92578125, "learning_rate": 7.526499523960648e-06, "loss": 1.1898, "step": 9986 }, { "epoch": 3.1373657655535268, "grad_norm": 0.83984375, "learning_rate": 7.523960647413519e-06, "loss": 1.1747, "step": 9988 }, { "epoch": 3.137993992579068, "grad_norm": 0.88671875, "learning_rate": 7.5214217708663916e-06, "loss": 1.0769, "step": 9990 }, { "epoch": 3.1386222196046094, "grad_norm": 0.90625, "learning_rate": 7.518882894319265e-06, "loss": 1.1708, "step": 9992 }, { "epoch": 3.139250446630151, "grad_norm": 0.88671875, "learning_rate": 7.516344017772137e-06, "loss": 1.1925, "step": 9994 }, { "epoch": 3.1398786736556925, "grad_norm": 0.89453125, "learning_rate": 7.513805141225009e-06, "loss": 1.2119, "step": 9996 }, { "epoch": 3.140506900681234, "grad_norm": 1.0078125, "learning_rate": 7.511266264677881e-06, "loss": 1.1209, "step": 9998 }, { "epoch": 3.141135127706775, "grad_norm": 0.88671875, "learning_rate": 7.5087273881307535e-06, "loss": 1.1929, "step": 10000 }, { "epoch": 3.1417633547323165, "grad_norm": 1.03125, "learning_rate": 7.506188511583625e-06, "loss": 1.23, "step": 10002 }, { "epoch": 3.142391581757858, "grad_norm": 1.0, "learning_rate": 7.503649635036497e-06, "loss": 1.0612, "step": 10004 }, { "epoch": 3.143019808783399, "grad_norm": 1.015625, "learning_rate": 7.501110758489369e-06, "loss": 1.0114, "step": 10006 }, { "epoch": 3.1436480358089405, "grad_norm": 0.88671875, "learning_rate": 7.498571881942242e-06, "loss": 1.2459, "step": 10008 }, { "epoch": 3.144276262834482, "grad_norm": 0.9375, "learning_rate": 7.496033005395114e-06, "loss": 1.1578, "step": 10010 }, { "epoch": 3.144904489860023, "grad_norm": 1.0078125, "learning_rate": 7.493494128847986e-06, "loss": 1.2484, "step": 10012 }, { "epoch": 3.1455327168855645, "grad_norm": 0.9296875, "learning_rate": 7.4909552523008575e-06, "loss": 1.1003, "step": 10014 }, { "epoch": 3.146160943911106, "grad_norm": 1.03125, "learning_rate": 7.48841637575373e-06, "loss": 1.0281, "step": 10016 }, { "epoch": 3.146789170936647, "grad_norm": 0.98046875, "learning_rate": 7.485877499206602e-06, "loss": 1.0874, "step": 10018 }, { "epoch": 3.1474173979621884, "grad_norm": 0.9609375, "learning_rate": 7.483338622659474e-06, "loss": 1.0475, "step": 10020 }, { "epoch": 3.1480456249877298, "grad_norm": 0.92578125, "learning_rate": 7.480799746112346e-06, "loss": 1.1066, "step": 10022 }, { "epoch": 3.148673852013271, "grad_norm": 1.0078125, "learning_rate": 7.478260869565218e-06, "loss": 1.145, "step": 10024 }, { "epoch": 3.1493020790388124, "grad_norm": 0.89453125, "learning_rate": 7.47572199301809e-06, "loss": 1.1539, "step": 10026 }, { "epoch": 3.149930306064354, "grad_norm": 0.91015625, "learning_rate": 7.473183116470962e-06, "loss": 1.1841, "step": 10028 }, { "epoch": 3.1505585330898955, "grad_norm": 0.9296875, "learning_rate": 7.4706442399238344e-06, "loss": 1.1323, "step": 10030 }, { "epoch": 3.151186760115437, "grad_norm": 0.91796875, "learning_rate": 7.468105363376707e-06, "loss": 1.1475, "step": 10032 }, { "epoch": 3.151814987140978, "grad_norm": 1.0234375, "learning_rate": 7.465566486829579e-06, "loss": 1.0844, "step": 10034 }, { "epoch": 3.1524432141665195, "grad_norm": 0.98046875, "learning_rate": 7.46302761028245e-06, "loss": 1.1418, "step": 10036 }, { "epoch": 3.153071441192061, "grad_norm": 0.93359375, "learning_rate": 7.4604887337353225e-06, "loss": 1.1751, "step": 10038 }, { "epoch": 3.153699668217602, "grad_norm": 1.125, "learning_rate": 7.457949857188195e-06, "loss": 1.1755, "step": 10040 }, { "epoch": 3.1543278952431435, "grad_norm": 0.859375, "learning_rate": 7.455410980641067e-06, "loss": 1.2037, "step": 10042 }, { "epoch": 3.154956122268685, "grad_norm": 0.9765625, "learning_rate": 7.452872104093939e-06, "loss": 1.0926, "step": 10044 }, { "epoch": 3.155584349294226, "grad_norm": 1.0, "learning_rate": 7.450333227546811e-06, "loss": 1.1753, "step": 10046 }, { "epoch": 3.1562125763197675, "grad_norm": 0.9140625, "learning_rate": 7.447794350999683e-06, "loss": 1.2306, "step": 10048 }, { "epoch": 3.156840803345309, "grad_norm": 0.9921875, "learning_rate": 7.445255474452555e-06, "loss": 1.2188, "step": 10050 }, { "epoch": 3.15746903037085, "grad_norm": 0.90234375, "learning_rate": 7.442716597905427e-06, "loss": 1.1345, "step": 10052 }, { "epoch": 3.1580972573963915, "grad_norm": 0.98046875, "learning_rate": 7.4401777213582995e-06, "loss": 1.1176, "step": 10054 }, { "epoch": 3.158725484421933, "grad_norm": 0.9453125, "learning_rate": 7.437638844811172e-06, "loss": 1.2491, "step": 10056 }, { "epoch": 3.1593537114474746, "grad_norm": 0.8828125, "learning_rate": 7.435099968264043e-06, "loss": 1.1737, "step": 10058 }, { "epoch": 3.159981938473016, "grad_norm": 0.89453125, "learning_rate": 7.432561091716915e-06, "loss": 1.1634, "step": 10060 }, { "epoch": 3.1606101654985572, "grad_norm": 0.92578125, "learning_rate": 7.4300222151697876e-06, "loss": 1.2016, "step": 10062 }, { "epoch": 3.1612383925240986, "grad_norm": 1.09375, "learning_rate": 7.42748333862266e-06, "loss": 1.2305, "step": 10064 }, { "epoch": 3.16186661954964, "grad_norm": 1.015625, "learning_rate": 7.424944462075532e-06, "loss": 1.0784, "step": 10066 }, { "epoch": 3.162494846575181, "grad_norm": 1.015625, "learning_rate": 7.422405585528404e-06, "loss": 1.1745, "step": 10068 }, { "epoch": 3.1631230736007225, "grad_norm": 0.953125, "learning_rate": 7.419866708981276e-06, "loss": 1.1402, "step": 10070 }, { "epoch": 3.163751300626264, "grad_norm": 0.90234375, "learning_rate": 7.417327832434148e-06, "loss": 1.1377, "step": 10072 }, { "epoch": 3.164379527651805, "grad_norm": 0.94140625, "learning_rate": 7.41478895588702e-06, "loss": 1.1755, "step": 10074 }, { "epoch": 3.1650077546773465, "grad_norm": 0.93359375, "learning_rate": 7.412250079339892e-06, "loss": 1.1081, "step": 10076 }, { "epoch": 3.165635981702888, "grad_norm": 0.953125, "learning_rate": 7.409711202792765e-06, "loss": 1.119, "step": 10078 }, { "epoch": 3.166264208728429, "grad_norm": 0.890625, "learning_rate": 7.407172326245638e-06, "loss": 1.2574, "step": 10080 }, { "epoch": 3.1668924357539705, "grad_norm": 1.0, "learning_rate": 7.40463344969851e-06, "loss": 1.1278, "step": 10082 }, { "epoch": 3.167520662779512, "grad_norm": 0.97265625, "learning_rate": 7.402094573151381e-06, "loss": 1.0189, "step": 10084 }, { "epoch": 3.168148889805053, "grad_norm": 0.99609375, "learning_rate": 7.3995556966042535e-06, "loss": 1.0706, "step": 10086 }, { "epoch": 3.1687771168305945, "grad_norm": 0.9609375, "learning_rate": 7.397016820057126e-06, "loss": 1.1806, "step": 10088 }, { "epoch": 3.169405343856136, "grad_norm": 0.91015625, "learning_rate": 7.394477943509998e-06, "loss": 1.2527, "step": 10090 }, { "epoch": 3.170033570881677, "grad_norm": 0.9765625, "learning_rate": 7.39193906696287e-06, "loss": 1.3736, "step": 10092 }, { "epoch": 3.170661797907219, "grad_norm": 1.015625, "learning_rate": 7.389400190415742e-06, "loss": 1.1667, "step": 10094 }, { "epoch": 3.1712900249327602, "grad_norm": 0.8828125, "learning_rate": 7.386861313868614e-06, "loss": 1.135, "step": 10096 }, { "epoch": 3.1719182519583016, "grad_norm": 0.88671875, "learning_rate": 7.384322437321486e-06, "loss": 1.2879, "step": 10098 }, { "epoch": 3.172546478983843, "grad_norm": 0.9140625, "learning_rate": 7.381783560774358e-06, "loss": 1.0763, "step": 10100 }, { "epoch": 3.1731747060093842, "grad_norm": 0.9609375, "learning_rate": 7.3792446842272305e-06, "loss": 1.0748, "step": 10102 }, { "epoch": 3.1738029330349256, "grad_norm": 1.078125, "learning_rate": 7.376705807680103e-06, "loss": 1.1931, "step": 10104 }, { "epoch": 3.174431160060467, "grad_norm": 0.9453125, "learning_rate": 7.374166931132974e-06, "loss": 1.2303, "step": 10106 }, { "epoch": 3.175059387086008, "grad_norm": 0.93359375, "learning_rate": 7.371628054585846e-06, "loss": 1.1724, "step": 10108 }, { "epoch": 3.1756876141115495, "grad_norm": 0.93359375, "learning_rate": 7.3690891780387185e-06, "loss": 1.2339, "step": 10110 }, { "epoch": 3.176315841137091, "grad_norm": 0.91015625, "learning_rate": 7.366550301491591e-06, "loss": 1.2258, "step": 10112 }, { "epoch": 3.176944068162632, "grad_norm": 0.90625, "learning_rate": 7.364011424944463e-06, "loss": 1.3114, "step": 10114 }, { "epoch": 3.1775722951881735, "grad_norm": 0.89453125, "learning_rate": 7.361472548397335e-06, "loss": 1.0958, "step": 10116 }, { "epoch": 3.178200522213715, "grad_norm": 0.890625, "learning_rate": 7.358933671850207e-06, "loss": 1.1284, "step": 10118 }, { "epoch": 3.178828749239256, "grad_norm": 0.94921875, "learning_rate": 7.356394795303079e-06, "loss": 1.1064, "step": 10120 }, { "epoch": 3.1794569762647975, "grad_norm": 0.97265625, "learning_rate": 7.353855918755951e-06, "loss": 1.1858, "step": 10122 }, { "epoch": 3.1800852032903393, "grad_norm": 1.0390625, "learning_rate": 7.351317042208823e-06, "loss": 1.0978, "step": 10124 }, { "epoch": 3.1807134303158806, "grad_norm": 1.0390625, "learning_rate": 7.3487781656616955e-06, "loss": 1.0579, "step": 10126 }, { "epoch": 3.181341657341422, "grad_norm": 0.97265625, "learning_rate": 7.346239289114567e-06, "loss": 1.23, "step": 10128 }, { "epoch": 3.1819698843669633, "grad_norm": 0.9921875, "learning_rate": 7.343700412567439e-06, "loss": 1.1933, "step": 10130 }, { "epoch": 3.1825981113925046, "grad_norm": 0.9765625, "learning_rate": 7.341161536020311e-06, "loss": 1.0513, "step": 10132 }, { "epoch": 3.183226338418046, "grad_norm": 1.0546875, "learning_rate": 7.338622659473184e-06, "loss": 1.0088, "step": 10134 }, { "epoch": 3.1838545654435872, "grad_norm": 0.91015625, "learning_rate": 7.336083782926056e-06, "loss": 1.1312, "step": 10136 }, { "epoch": 3.1844827924691286, "grad_norm": 0.90234375, "learning_rate": 7.333544906378928e-06, "loss": 1.1491, "step": 10138 }, { "epoch": 3.18511101949467, "grad_norm": 0.96875, "learning_rate": 7.331006029831799e-06, "loss": 1.0765, "step": 10140 }, { "epoch": 3.1857392465202112, "grad_norm": 0.99609375, "learning_rate": 7.328467153284672e-06, "loss": 0.9256, "step": 10142 }, { "epoch": 3.1863674735457526, "grad_norm": 0.984375, "learning_rate": 7.325928276737544e-06, "loss": 1.1434, "step": 10144 }, { "epoch": 3.186995700571294, "grad_norm": 0.91796875, "learning_rate": 7.323389400190416e-06, "loss": 1.1603, "step": 10146 }, { "epoch": 3.187623927596835, "grad_norm": 0.95703125, "learning_rate": 7.320850523643288e-06, "loss": 1.2294, "step": 10148 }, { "epoch": 3.1882521546223765, "grad_norm": 0.98046875, "learning_rate": 7.3183116470961606e-06, "loss": 1.2099, "step": 10150 }, { "epoch": 3.188880381647918, "grad_norm": 0.90234375, "learning_rate": 7.315772770549032e-06, "loss": 1.2161, "step": 10152 }, { "epoch": 3.189508608673459, "grad_norm": 0.91796875, "learning_rate": 7.313233894001904e-06, "loss": 0.9546, "step": 10154 }, { "epoch": 3.1901368356990005, "grad_norm": 0.953125, "learning_rate": 7.310695017454776e-06, "loss": 1.2637, "step": 10156 }, { "epoch": 3.190765062724542, "grad_norm": 0.9375, "learning_rate": 7.308156140907649e-06, "loss": 1.031, "step": 10158 }, { "epoch": 3.1913932897500836, "grad_norm": 0.984375, "learning_rate": 7.305617264360521e-06, "loss": 1.1905, "step": 10160 }, { "epoch": 3.192021516775625, "grad_norm": 0.9375, "learning_rate": 7.303078387813392e-06, "loss": 1.3106, "step": 10162 }, { "epoch": 3.1926497438011663, "grad_norm": 0.9140625, "learning_rate": 7.300539511266266e-06, "loss": 1.1366, "step": 10164 }, { "epoch": 3.1932779708267076, "grad_norm": 1.03125, "learning_rate": 7.2980006347191376e-06, "loss": 1.2454, "step": 10166 }, { "epoch": 3.193906197852249, "grad_norm": 0.984375, "learning_rate": 7.29546175817201e-06, "loss": 1.1462, "step": 10168 }, { "epoch": 3.1945344248777903, "grad_norm": 0.9921875, "learning_rate": 7.292922881624882e-06, "loss": 1.1659, "step": 10170 }, { "epoch": 3.1951626519033316, "grad_norm": 0.9609375, "learning_rate": 7.290384005077754e-06, "loss": 1.1283, "step": 10172 }, { "epoch": 3.195790878928873, "grad_norm": 1.078125, "learning_rate": 7.2878451285306265e-06, "loss": 1.0473, "step": 10174 }, { "epoch": 3.1964191059544143, "grad_norm": 0.88671875, "learning_rate": 7.285306251983499e-06, "loss": 1.268, "step": 10176 }, { "epoch": 3.1970473329799556, "grad_norm": 1.0078125, "learning_rate": 7.28276737543637e-06, "loss": 1.1784, "step": 10178 }, { "epoch": 3.197675560005497, "grad_norm": 0.953125, "learning_rate": 7.280228498889242e-06, "loss": 1.1936, "step": 10180 }, { "epoch": 3.1983037870310382, "grad_norm": 0.90625, "learning_rate": 7.2776896223421145e-06, "loss": 1.2131, "step": 10182 }, { "epoch": 3.1989320140565796, "grad_norm": 1.0859375, "learning_rate": 7.275150745794987e-06, "loss": 1.043, "step": 10184 }, { "epoch": 3.199560241082121, "grad_norm": 0.953125, "learning_rate": 7.272611869247859e-06, "loss": 1.0352, "step": 10186 }, { "epoch": 3.200188468107662, "grad_norm": 0.90625, "learning_rate": 7.27007299270073e-06, "loss": 1.1692, "step": 10188 }, { "epoch": 3.200816695133204, "grad_norm": 0.97265625, "learning_rate": 7.267534116153603e-06, "loss": 1.1422, "step": 10190 }, { "epoch": 3.2014449221587453, "grad_norm": 0.9296875, "learning_rate": 7.264995239606475e-06, "loss": 1.1398, "step": 10192 }, { "epoch": 3.2020731491842866, "grad_norm": 0.91796875, "learning_rate": 7.262456363059347e-06, "loss": 1.1952, "step": 10194 }, { "epoch": 3.202701376209828, "grad_norm": 0.9609375, "learning_rate": 7.259917486512219e-06, "loss": 1.1287, "step": 10196 }, { "epoch": 3.2033296032353693, "grad_norm": 0.98828125, "learning_rate": 7.2573786099650915e-06, "loss": 1.1601, "step": 10198 }, { "epoch": 3.2039578302609106, "grad_norm": 0.94140625, "learning_rate": 7.254839733417963e-06, "loss": 1.0464, "step": 10200 }, { "epoch": 3.204586057286452, "grad_norm": 0.90625, "learning_rate": 7.252300856870835e-06, "loss": 1.047, "step": 10202 }, { "epoch": 3.2052142843119933, "grad_norm": 0.9765625, "learning_rate": 7.249761980323707e-06, "loss": 1.0959, "step": 10204 }, { "epoch": 3.2058425113375346, "grad_norm": 0.8828125, "learning_rate": 7.24722310377658e-06, "loss": 1.2146, "step": 10206 }, { "epoch": 3.206470738363076, "grad_norm": 0.96484375, "learning_rate": 7.244684227229452e-06, "loss": 1.1376, "step": 10208 }, { "epoch": 3.2070989653886173, "grad_norm": 0.94921875, "learning_rate": 7.242145350682323e-06, "loss": 1.0661, "step": 10210 }, { "epoch": 3.2077271924141586, "grad_norm": 0.90234375, "learning_rate": 7.2396064741351954e-06, "loss": 1.29, "step": 10212 }, { "epoch": 3.2083554194397, "grad_norm": 0.90625, "learning_rate": 7.237067597588068e-06, "loss": 0.9408, "step": 10214 }, { "epoch": 3.2089836464652413, "grad_norm": 0.8984375, "learning_rate": 7.23452872104094e-06, "loss": 1.1794, "step": 10216 }, { "epoch": 3.2096118734907826, "grad_norm": 1.0234375, "learning_rate": 7.231989844493812e-06, "loss": 1.0859, "step": 10218 }, { "epoch": 3.210240100516324, "grad_norm": 0.93359375, "learning_rate": 7.229450967946684e-06, "loss": 1.2767, "step": 10220 }, { "epoch": 3.2108683275418652, "grad_norm": 0.9375, "learning_rate": 7.226912091399556e-06, "loss": 1.0371, "step": 10222 }, { "epoch": 3.2114965545674066, "grad_norm": 0.92578125, "learning_rate": 7.224373214852428e-06, "loss": 1.1416, "step": 10224 }, { "epoch": 3.2121247815929483, "grad_norm": 0.9140625, "learning_rate": 7.2218343383053e-06, "loss": 1.074, "step": 10226 }, { "epoch": 3.2127530086184897, "grad_norm": 0.91015625, "learning_rate": 7.2192954617581724e-06, "loss": 1.2668, "step": 10228 }, { "epoch": 3.213381235644031, "grad_norm": 0.90234375, "learning_rate": 7.216756585211045e-06, "loss": 1.0487, "step": 10230 }, { "epoch": 3.2140094626695723, "grad_norm": 0.9609375, "learning_rate": 7.214217708663917e-06, "loss": 1.1737, "step": 10232 }, { "epoch": 3.2146376896951137, "grad_norm": 0.98828125, "learning_rate": 7.211678832116788e-06, "loss": 1.0189, "step": 10234 }, { "epoch": 3.215265916720655, "grad_norm": 0.99609375, "learning_rate": 7.2091399555696605e-06, "loss": 1.0689, "step": 10236 }, { "epoch": 3.2158941437461963, "grad_norm": 1.0625, "learning_rate": 7.206601079022533e-06, "loss": 1.0584, "step": 10238 }, { "epoch": 3.2165223707717376, "grad_norm": 0.90234375, "learning_rate": 7.204062202475405e-06, "loss": 1.2163, "step": 10240 }, { "epoch": 3.217150597797279, "grad_norm": 0.9453125, "learning_rate": 7.201523325928277e-06, "loss": 1.133, "step": 10242 }, { "epoch": 3.2177788248228203, "grad_norm": 1.03125, "learning_rate": 7.1989844493811486e-06, "loss": 1.1017, "step": 10244 }, { "epoch": 3.2184070518483616, "grad_norm": 1.0234375, "learning_rate": 7.196445572834021e-06, "loss": 1.2524, "step": 10246 }, { "epoch": 3.219035278873903, "grad_norm": 0.94140625, "learning_rate": 7.193906696286893e-06, "loss": 1.2265, "step": 10248 }, { "epoch": 3.2196635058994443, "grad_norm": 0.93359375, "learning_rate": 7.191367819739766e-06, "loss": 1.1439, "step": 10250 }, { "epoch": 3.2202917329249856, "grad_norm": 1.109375, "learning_rate": 7.188828943192638e-06, "loss": 1.0571, "step": 10252 }, { "epoch": 3.220919959950527, "grad_norm": 0.89453125, "learning_rate": 7.1862900666455106e-06, "loss": 1.1079, "step": 10254 }, { "epoch": 3.2215481869760687, "grad_norm": 0.9765625, "learning_rate": 7.183751190098383e-06, "loss": 1.0474, "step": 10256 }, { "epoch": 3.22217641400161, "grad_norm": 0.96875, "learning_rate": 7.181212313551254e-06, "loss": 1.146, "step": 10258 }, { "epoch": 3.2228046410271514, "grad_norm": 0.91015625, "learning_rate": 7.178673437004126e-06, "loss": 1.2181, "step": 10260 }, { "epoch": 3.2234328680526927, "grad_norm": 0.91015625, "learning_rate": 7.176134560456999e-06, "loss": 1.2536, "step": 10262 }, { "epoch": 3.224061095078234, "grad_norm": 0.9296875, "learning_rate": 7.173595683909871e-06, "loss": 1.1294, "step": 10264 }, { "epoch": 3.2246893221037753, "grad_norm": 0.984375, "learning_rate": 7.171056807362743e-06, "loss": 1.1661, "step": 10266 }, { "epoch": 3.2253175491293167, "grad_norm": 0.953125, "learning_rate": 7.168517930815615e-06, "loss": 1.1184, "step": 10268 }, { "epoch": 3.225945776154858, "grad_norm": 0.99609375, "learning_rate": 7.165979054268487e-06, "loss": 0.9504, "step": 10270 }, { "epoch": 3.2265740031803993, "grad_norm": 0.9765625, "learning_rate": 7.163440177721359e-06, "loss": 1.1186, "step": 10272 }, { "epoch": 3.2272022302059407, "grad_norm": 1.0, "learning_rate": 7.160901301174231e-06, "loss": 1.1909, "step": 10274 }, { "epoch": 3.227830457231482, "grad_norm": 0.96875, "learning_rate": 7.158362424627103e-06, "loss": 1.2077, "step": 10276 }, { "epoch": 3.2284586842570233, "grad_norm": 0.87890625, "learning_rate": 7.155823548079976e-06, "loss": 1.2479, "step": 10278 }, { "epoch": 3.2290869112825646, "grad_norm": 0.890625, "learning_rate": 7.153284671532848e-06, "loss": 1.2517, "step": 10280 }, { "epoch": 3.229715138308106, "grad_norm": 0.97265625, "learning_rate": 7.150745794985719e-06, "loss": 1.1495, "step": 10282 }, { "epoch": 3.2303433653336473, "grad_norm": 0.86328125, "learning_rate": 7.1482069184385915e-06, "loss": 1.0794, "step": 10284 }, { "epoch": 3.2309715923591886, "grad_norm": 1.0, "learning_rate": 7.145668041891464e-06, "loss": 1.0907, "step": 10286 }, { "epoch": 3.23159981938473, "grad_norm": 0.9765625, "learning_rate": 7.143129165344336e-06, "loss": 1.0097, "step": 10288 }, { "epoch": 3.2322280464102713, "grad_norm": 0.96484375, "learning_rate": 7.140590288797208e-06, "loss": 1.1427, "step": 10290 }, { "epoch": 3.232856273435813, "grad_norm": 0.8828125, "learning_rate": 7.1380514122500795e-06, "loss": 1.0051, "step": 10292 }, { "epoch": 3.2334845004613544, "grad_norm": 0.9609375, "learning_rate": 7.135512535702952e-06, "loss": 1.1305, "step": 10294 }, { "epoch": 3.2341127274868957, "grad_norm": 0.88671875, "learning_rate": 7.132973659155824e-06, "loss": 1.1982, "step": 10296 }, { "epoch": 3.234740954512437, "grad_norm": 0.9453125, "learning_rate": 7.130434782608696e-06, "loss": 1.1885, "step": 10298 }, { "epoch": 3.2353691815379784, "grad_norm": 0.9375, "learning_rate": 7.1278959060615684e-06, "loss": 1.2612, "step": 10300 }, { "epoch": 3.2359974085635197, "grad_norm": 1.046875, "learning_rate": 7.125357029514441e-06, "loss": 1.1184, "step": 10302 }, { "epoch": 3.236625635589061, "grad_norm": 0.96484375, "learning_rate": 7.122818152967312e-06, "loss": 1.0652, "step": 10304 }, { "epoch": 3.2372538626146024, "grad_norm": 0.87109375, "learning_rate": 7.120279276420184e-06, "loss": 1.1797, "step": 10306 }, { "epoch": 3.2378820896401437, "grad_norm": 0.86328125, "learning_rate": 7.1177403998730565e-06, "loss": 1.1021, "step": 10308 }, { "epoch": 3.238510316665685, "grad_norm": 0.89453125, "learning_rate": 7.115201523325929e-06, "loss": 1.1729, "step": 10310 }, { "epoch": 3.2391385436912263, "grad_norm": 0.97265625, "learning_rate": 7.112662646778801e-06, "loss": 1.1686, "step": 10312 }, { "epoch": 3.2397667707167677, "grad_norm": 0.97265625, "learning_rate": 7.110123770231672e-06, "loss": 1.1495, "step": 10314 }, { "epoch": 3.240394997742309, "grad_norm": 0.90234375, "learning_rate": 7.107584893684545e-06, "loss": 1.155, "step": 10316 }, { "epoch": 3.2410232247678503, "grad_norm": 0.921875, "learning_rate": 7.105046017137417e-06, "loss": 1.0967, "step": 10318 }, { "epoch": 3.2416514517933916, "grad_norm": 0.9296875, "learning_rate": 7.102507140590289e-06, "loss": 1.3103, "step": 10320 }, { "epoch": 3.2422796788189334, "grad_norm": 0.9296875, "learning_rate": 7.099968264043161e-06, "loss": 1.188, "step": 10322 }, { "epoch": 3.2429079058444747, "grad_norm": 0.9609375, "learning_rate": 7.0974293874960335e-06, "loss": 1.1541, "step": 10324 }, { "epoch": 3.243536132870016, "grad_norm": 0.92578125, "learning_rate": 7.094890510948905e-06, "loss": 1.2587, "step": 10326 }, { "epoch": 3.2441643598955574, "grad_norm": 0.9296875, "learning_rate": 7.092351634401777e-06, "loss": 1.1103, "step": 10328 }, { "epoch": 3.2447925869210987, "grad_norm": 0.89453125, "learning_rate": 7.089812757854649e-06, "loss": 1.1622, "step": 10330 }, { "epoch": 3.24542081394664, "grad_norm": 0.9765625, "learning_rate": 7.0872738813075216e-06, "loss": 1.1794, "step": 10332 }, { "epoch": 3.2460490409721814, "grad_norm": 0.9296875, "learning_rate": 7.084735004760394e-06, "loss": 1.1815, "step": 10334 }, { "epoch": 3.2466772679977227, "grad_norm": 0.8984375, "learning_rate": 7.082196128213267e-06, "loss": 1.1693, "step": 10336 }, { "epoch": 3.247305495023264, "grad_norm": 1.203125, "learning_rate": 7.079657251666139e-06, "loss": 1.2519, "step": 10338 }, { "epoch": 3.2479337220488054, "grad_norm": 0.94140625, "learning_rate": 7.0771183751190105e-06, "loss": 1.0799, "step": 10340 }, { "epoch": 3.2485619490743467, "grad_norm": 0.92578125, "learning_rate": 7.074579498571883e-06, "loss": 1.2206, "step": 10342 }, { "epoch": 3.249190176099888, "grad_norm": 0.9609375, "learning_rate": 7.072040622024755e-06, "loss": 1.2237, "step": 10344 }, { "epoch": 3.2498184031254294, "grad_norm": 0.921875, "learning_rate": 7.069501745477627e-06, "loss": 1.1214, "step": 10346 }, { "epoch": 3.2504466301509707, "grad_norm": 0.94921875, "learning_rate": 7.066962868930499e-06, "loss": 1.1268, "step": 10348 }, { "epoch": 3.251074857176512, "grad_norm": 0.9609375, "learning_rate": 7.064423992383372e-06, "loss": 1.0585, "step": 10350 }, { "epoch": 3.2517030842020533, "grad_norm": 0.90625, "learning_rate": 7.061885115836243e-06, "loss": 1.2253, "step": 10352 }, { "epoch": 3.2523313112275947, "grad_norm": 0.984375, "learning_rate": 7.059346239289115e-06, "loss": 1.2403, "step": 10354 }, { "epoch": 3.252959538253136, "grad_norm": 0.97265625, "learning_rate": 7.0568073627419875e-06, "loss": 1.1713, "step": 10356 }, { "epoch": 3.2535877652786778, "grad_norm": 0.875, "learning_rate": 7.05426848619486e-06, "loss": 1.3097, "step": 10358 }, { "epoch": 3.254215992304219, "grad_norm": 0.8828125, "learning_rate": 7.051729609647732e-06, "loss": 1.145, "step": 10360 }, { "epoch": 3.2548442193297604, "grad_norm": 0.99609375, "learning_rate": 7.049190733100604e-06, "loss": 1.1837, "step": 10362 }, { "epoch": 3.2554724463553018, "grad_norm": 0.98828125, "learning_rate": 7.0466518565534755e-06, "loss": 1.1316, "step": 10364 }, { "epoch": 3.256100673380843, "grad_norm": 1.0234375, "learning_rate": 7.044112980006348e-06, "loss": 1.2483, "step": 10366 }, { "epoch": 3.2567289004063844, "grad_norm": 1.0234375, "learning_rate": 7.04157410345922e-06, "loss": 1.2733, "step": 10368 }, { "epoch": 3.2573571274319257, "grad_norm": 1.0078125, "learning_rate": 7.039035226912092e-06, "loss": 1.2181, "step": 10370 }, { "epoch": 3.257985354457467, "grad_norm": 0.9921875, "learning_rate": 7.0364963503649645e-06, "loss": 1.1303, "step": 10372 }, { "epoch": 3.2586135814830084, "grad_norm": 0.9140625, "learning_rate": 7.033957473817836e-06, "loss": 1.1117, "step": 10374 }, { "epoch": 3.2592418085085497, "grad_norm": 0.89453125, "learning_rate": 7.031418597270708e-06, "loss": 1.058, "step": 10376 }, { "epoch": 3.259870035534091, "grad_norm": 1.03125, "learning_rate": 7.02887972072358e-06, "loss": 1.0555, "step": 10378 }, { "epoch": 3.2604982625596324, "grad_norm": 1.015625, "learning_rate": 7.0263408441764525e-06, "loss": 1.1395, "step": 10380 }, { "epoch": 3.2611264895851737, "grad_norm": 0.99609375, "learning_rate": 7.023801967629325e-06, "loss": 1.0492, "step": 10382 }, { "epoch": 3.261754716610715, "grad_norm": 1.0625, "learning_rate": 7.021263091082197e-06, "loss": 1.1549, "step": 10384 }, { "epoch": 3.2623829436362564, "grad_norm": 1.109375, "learning_rate": 7.018724214535068e-06, "loss": 1.0926, "step": 10386 }, { "epoch": 3.263011170661798, "grad_norm": 0.9921875, "learning_rate": 7.016185337987941e-06, "loss": 1.1246, "step": 10388 }, { "epoch": 3.2636393976873395, "grad_norm": 0.93359375, "learning_rate": 7.013646461440813e-06, "loss": 1.1074, "step": 10390 }, { "epoch": 3.264267624712881, "grad_norm": 0.90234375, "learning_rate": 7.011107584893685e-06, "loss": 1.1876, "step": 10392 }, { "epoch": 3.264895851738422, "grad_norm": 1.0, "learning_rate": 7.008568708346557e-06, "loss": 1.1291, "step": 10394 }, { "epoch": 3.2655240787639634, "grad_norm": 0.99609375, "learning_rate": 7.006029831799429e-06, "loss": 1.1616, "step": 10396 }, { "epoch": 3.2661523057895048, "grad_norm": 0.953125, "learning_rate": 7.003490955252301e-06, "loss": 1.2542, "step": 10398 }, { "epoch": 3.266780532815046, "grad_norm": 1.0859375, "learning_rate": 7.000952078705173e-06, "loss": 1.2848, "step": 10400 }, { "epoch": 3.2674087598405874, "grad_norm": 0.99609375, "learning_rate": 6.998413202158045e-06, "loss": 1.1474, "step": 10402 }, { "epoch": 3.2680369868661288, "grad_norm": 0.875, "learning_rate": 6.995874325610918e-06, "loss": 1.2233, "step": 10404 }, { "epoch": 3.26866521389167, "grad_norm": 1.0390625, "learning_rate": 6.99333544906379e-06, "loss": 1.1822, "step": 10406 }, { "epoch": 3.2692934409172114, "grad_norm": 0.9140625, "learning_rate": 6.990796572516661e-06, "loss": 1.0164, "step": 10408 }, { "epoch": 3.2699216679427527, "grad_norm": 0.87890625, "learning_rate": 6.9882576959695334e-06, "loss": 1.1886, "step": 10410 }, { "epoch": 3.270549894968294, "grad_norm": 0.91015625, "learning_rate": 6.985718819422406e-06, "loss": 1.1992, "step": 10412 }, { "epoch": 3.2711781219938354, "grad_norm": 0.94140625, "learning_rate": 6.983179942875278e-06, "loss": 1.1893, "step": 10414 }, { "epoch": 3.2718063490193767, "grad_norm": 0.91015625, "learning_rate": 6.98064106632815e-06, "loss": 1.1882, "step": 10416 }, { "epoch": 3.272434576044918, "grad_norm": 1.0390625, "learning_rate": 6.978102189781022e-06, "loss": 1.2091, "step": 10418 }, { "epoch": 3.2730628030704594, "grad_norm": 1.0390625, "learning_rate": 6.975563313233894e-06, "loss": 1.2254, "step": 10420 }, { "epoch": 3.2736910300960007, "grad_norm": 0.89453125, "learning_rate": 6.973024436686767e-06, "loss": 1.1037, "step": 10422 }, { "epoch": 3.2743192571215425, "grad_norm": 0.9765625, "learning_rate": 6.970485560139639e-06, "loss": 1.0805, "step": 10424 }, { "epoch": 3.274947484147084, "grad_norm": 0.93359375, "learning_rate": 6.967946683592511e-06, "loss": 1.0814, "step": 10426 }, { "epoch": 3.275575711172625, "grad_norm": 0.86328125, "learning_rate": 6.9654078070453835e-06, "loss": 1.248, "step": 10428 }, { "epoch": 3.2762039381981665, "grad_norm": 0.94921875, "learning_rate": 6.962868930498256e-06, "loss": 1.1478, "step": 10430 }, { "epoch": 3.276832165223708, "grad_norm": 0.90625, "learning_rate": 6.960330053951128e-06, "loss": 1.1056, "step": 10432 }, { "epoch": 3.277460392249249, "grad_norm": 0.94921875, "learning_rate": 6.957791177403999e-06, "loss": 1.0772, "step": 10434 }, { "epoch": 3.2780886192747904, "grad_norm": 0.921875, "learning_rate": 6.9552523008568716e-06, "loss": 1.2289, "step": 10436 }, { "epoch": 3.2787168463003318, "grad_norm": 1.0859375, "learning_rate": 6.952713424309744e-06, "loss": 1.3057, "step": 10438 }, { "epoch": 3.279345073325873, "grad_norm": 0.94921875, "learning_rate": 6.950174547762616e-06, "loss": 1.1268, "step": 10440 }, { "epoch": 3.2799733003514144, "grad_norm": 0.95703125, "learning_rate": 6.947635671215488e-06, "loss": 1.2393, "step": 10442 }, { "epoch": 3.2806015273769558, "grad_norm": 0.921875, "learning_rate": 6.9450967946683605e-06, "loss": 1.0518, "step": 10444 }, { "epoch": 3.281229754402497, "grad_norm": 0.859375, "learning_rate": 6.942557918121232e-06, "loss": 1.2375, "step": 10446 }, { "epoch": 3.2818579814280384, "grad_norm": 0.93359375, "learning_rate": 6.940019041574104e-06, "loss": 1.0491, "step": 10448 }, { "epoch": 3.2824862084535797, "grad_norm": 0.93359375, "learning_rate": 6.937480165026976e-06, "loss": 1.1878, "step": 10450 }, { "epoch": 3.283114435479121, "grad_norm": 0.875, "learning_rate": 6.9349412884798485e-06, "loss": 1.0074, "step": 10452 }, { "epoch": 3.283742662504663, "grad_norm": 0.9375, "learning_rate": 6.932402411932721e-06, "loss": 1.1085, "step": 10454 }, { "epoch": 3.284370889530204, "grad_norm": 0.91796875, "learning_rate": 6.929863535385592e-06, "loss": 1.1297, "step": 10456 }, { "epoch": 3.2849991165557455, "grad_norm": 0.984375, "learning_rate": 6.927324658838464e-06, "loss": 1.1374, "step": 10458 }, { "epoch": 3.285627343581287, "grad_norm": 1.09375, "learning_rate": 6.924785782291337e-06, "loss": 1.1152, "step": 10460 }, { "epoch": 3.286255570606828, "grad_norm": 0.91796875, "learning_rate": 6.922246905744209e-06, "loss": 1.0489, "step": 10462 }, { "epoch": 3.2868837976323695, "grad_norm": 0.875, "learning_rate": 6.919708029197081e-06, "loss": 1.0782, "step": 10464 }, { "epoch": 3.287512024657911, "grad_norm": 0.953125, "learning_rate": 6.917169152649953e-06, "loss": 1.1337, "step": 10466 }, { "epoch": 3.288140251683452, "grad_norm": 0.87890625, "learning_rate": 6.914630276102825e-06, "loss": 1.1485, "step": 10468 }, { "epoch": 3.2887684787089935, "grad_norm": 0.95703125, "learning_rate": 6.912091399555697e-06, "loss": 1.2591, "step": 10470 }, { "epoch": 3.289396705734535, "grad_norm": 0.94921875, "learning_rate": 6.909552523008569e-06, "loss": 1.1256, "step": 10472 }, { "epoch": 3.290024932760076, "grad_norm": 0.8828125, "learning_rate": 6.907013646461441e-06, "loss": 1.1078, "step": 10474 }, { "epoch": 3.2906531597856175, "grad_norm": 0.890625, "learning_rate": 6.904474769914314e-06, "loss": 1.3103, "step": 10476 }, { "epoch": 3.291281386811159, "grad_norm": 0.95703125, "learning_rate": 6.901935893367185e-06, "loss": 1.2223, "step": 10478 }, { "epoch": 3.2919096138367, "grad_norm": 0.94921875, "learning_rate": 6.899397016820057e-06, "loss": 1.1194, "step": 10480 }, { "epoch": 3.2925378408622414, "grad_norm": 0.91796875, "learning_rate": 6.8968581402729294e-06, "loss": 1.1599, "step": 10482 }, { "epoch": 3.2931660678877828, "grad_norm": 0.97265625, "learning_rate": 6.894319263725802e-06, "loss": 1.0042, "step": 10484 }, { "epoch": 3.293794294913324, "grad_norm": 0.875, "learning_rate": 6.891780387178674e-06, "loss": 1.0522, "step": 10486 }, { "epoch": 3.2944225219388654, "grad_norm": 0.90625, "learning_rate": 6.889241510631546e-06, "loss": 1.1101, "step": 10488 }, { "epoch": 3.295050748964407, "grad_norm": 0.96875, "learning_rate": 6.8867026340844175e-06, "loss": 1.0947, "step": 10490 }, { "epoch": 3.2956789759899485, "grad_norm": 0.90625, "learning_rate": 6.88416375753729e-06, "loss": 1.1662, "step": 10492 }, { "epoch": 3.29630720301549, "grad_norm": 0.98828125, "learning_rate": 6.881624880990162e-06, "loss": 1.1615, "step": 10494 }, { "epoch": 3.296935430041031, "grad_norm": 0.8515625, "learning_rate": 6.879086004443034e-06, "loss": 1.1144, "step": 10496 }, { "epoch": 3.2975636570665725, "grad_norm": 0.984375, "learning_rate": 6.8765471278959064e-06, "loss": 1.1171, "step": 10498 }, { "epoch": 3.298191884092114, "grad_norm": 0.91796875, "learning_rate": 6.874008251348779e-06, "loss": 1.283, "step": 10500 }, { "epoch": 3.298820111117655, "grad_norm": 0.9765625, "learning_rate": 6.87146937480165e-06, "loss": 1.1525, "step": 10502 }, { "epoch": 3.2994483381431965, "grad_norm": 0.9296875, "learning_rate": 6.868930498254522e-06, "loss": 1.1565, "step": 10504 }, { "epoch": 3.300076565168738, "grad_norm": 0.91015625, "learning_rate": 6.866391621707395e-06, "loss": 1.0658, "step": 10506 }, { "epoch": 3.300704792194279, "grad_norm": 0.9375, "learning_rate": 6.8638527451602676e-06, "loss": 1.2982, "step": 10508 }, { "epoch": 3.3013330192198205, "grad_norm": 0.9296875, "learning_rate": 6.86131386861314e-06, "loss": 1.3325, "step": 10510 }, { "epoch": 3.301961246245362, "grad_norm": 0.9296875, "learning_rate": 6.858774992066012e-06, "loss": 1.1668, "step": 10512 }, { "epoch": 3.302589473270903, "grad_norm": 0.9296875, "learning_rate": 6.856236115518884e-06, "loss": 1.2057, "step": 10514 }, { "epoch": 3.3032177002964445, "grad_norm": 1.015625, "learning_rate": 6.853697238971756e-06, "loss": 1.1159, "step": 10516 }, { "epoch": 3.3038459273219862, "grad_norm": 0.9375, "learning_rate": 6.851158362424628e-06, "loss": 1.1096, "step": 10518 }, { "epoch": 3.3044741543475276, "grad_norm": 0.890625, "learning_rate": 6.8486194858775e-06, "loss": 1.1195, "step": 10520 }, { "epoch": 3.305102381373069, "grad_norm": 0.88671875, "learning_rate": 6.846080609330372e-06, "loss": 1.2133, "step": 10522 }, { "epoch": 3.30573060839861, "grad_norm": 1.015625, "learning_rate": 6.8435417327832446e-06, "loss": 1.2796, "step": 10524 }, { "epoch": 3.3063588354241515, "grad_norm": 1.0234375, "learning_rate": 6.841002856236117e-06, "loss": 1.0225, "step": 10526 }, { "epoch": 3.306987062449693, "grad_norm": 1.0, "learning_rate": 6.838463979688988e-06, "loss": 1.1179, "step": 10528 }, { "epoch": 3.307615289475234, "grad_norm": 0.87109375, "learning_rate": 6.83592510314186e-06, "loss": 1.1149, "step": 10530 }, { "epoch": 3.3082435165007755, "grad_norm": 0.93359375, "learning_rate": 6.833386226594733e-06, "loss": 1.2376, "step": 10532 }, { "epoch": 3.308871743526317, "grad_norm": 1.0078125, "learning_rate": 6.830847350047605e-06, "loss": 1.043, "step": 10534 }, { "epoch": 3.309499970551858, "grad_norm": 0.9765625, "learning_rate": 6.828308473500477e-06, "loss": 1.1284, "step": 10536 }, { "epoch": 3.3101281975773995, "grad_norm": 1.0234375, "learning_rate": 6.8257695969533485e-06, "loss": 1.1421, "step": 10538 }, { "epoch": 3.310756424602941, "grad_norm": 0.90234375, "learning_rate": 6.823230720406221e-06, "loss": 1.0513, "step": 10540 }, { "epoch": 3.311384651628482, "grad_norm": 0.93359375, "learning_rate": 6.820691843859093e-06, "loss": 1.2187, "step": 10542 }, { "epoch": 3.3120128786540235, "grad_norm": 0.96484375, "learning_rate": 6.818152967311965e-06, "loss": 1.2302, "step": 10544 }, { "epoch": 3.312641105679565, "grad_norm": 0.93359375, "learning_rate": 6.815614090764837e-06, "loss": 1.0384, "step": 10546 }, { "epoch": 3.313269332705106, "grad_norm": 0.95703125, "learning_rate": 6.81307521421771e-06, "loss": 1.1188, "step": 10548 }, { "epoch": 3.3138975597306475, "grad_norm": 0.90625, "learning_rate": 6.810536337670581e-06, "loss": 1.0462, "step": 10550 }, { "epoch": 3.314525786756189, "grad_norm": 0.9375, "learning_rate": 6.807997461123453e-06, "loss": 1.232, "step": 10552 }, { "epoch": 3.31515401378173, "grad_norm": 0.953125, "learning_rate": 6.8054585845763255e-06, "loss": 1.0462, "step": 10554 }, { "epoch": 3.315782240807272, "grad_norm": 0.9765625, "learning_rate": 6.802919708029198e-06, "loss": 1.1489, "step": 10556 }, { "epoch": 3.3164104678328132, "grad_norm": 1.015625, "learning_rate": 6.80038083148207e-06, "loss": 1.0657, "step": 10558 }, { "epoch": 3.3170386948583546, "grad_norm": 0.9453125, "learning_rate": 6.797841954934941e-06, "loss": 1.079, "step": 10560 }, { "epoch": 3.317666921883896, "grad_norm": 0.8828125, "learning_rate": 6.7953030783878135e-06, "loss": 1.1705, "step": 10562 }, { "epoch": 3.318295148909437, "grad_norm": 0.921875, "learning_rate": 6.792764201840686e-06, "loss": 1.1055, "step": 10564 }, { "epoch": 3.3189233759349785, "grad_norm": 0.8515625, "learning_rate": 6.790225325293558e-06, "loss": 1.084, "step": 10566 }, { "epoch": 3.31955160296052, "grad_norm": 1.0, "learning_rate": 6.78768644874643e-06, "loss": 1.1487, "step": 10568 }, { "epoch": 3.320179829986061, "grad_norm": 0.93359375, "learning_rate": 6.7851475721993024e-06, "loss": 1.2549, "step": 10570 }, { "epoch": 3.3208080570116025, "grad_norm": 0.890625, "learning_rate": 6.782608695652174e-06, "loss": 1.1988, "step": 10572 }, { "epoch": 3.321436284037144, "grad_norm": 0.91015625, "learning_rate": 6.780069819105046e-06, "loss": 1.1349, "step": 10574 }, { "epoch": 3.322064511062685, "grad_norm": 0.94140625, "learning_rate": 6.777530942557918e-06, "loss": 1.0763, "step": 10576 }, { "epoch": 3.3226927380882265, "grad_norm": 0.8984375, "learning_rate": 6.7749920660107905e-06, "loss": 0.9752, "step": 10578 }, { "epoch": 3.323320965113768, "grad_norm": 0.96875, "learning_rate": 6.772453189463663e-06, "loss": 0.9849, "step": 10580 }, { "epoch": 3.323949192139309, "grad_norm": 1.0390625, "learning_rate": 6.769914312916535e-06, "loss": 1.1948, "step": 10582 }, { "epoch": 3.324577419164851, "grad_norm": 0.91015625, "learning_rate": 6.767375436369406e-06, "loss": 1.2006, "step": 10584 }, { "epoch": 3.3252056461903923, "grad_norm": 0.87109375, "learning_rate": 6.764836559822279e-06, "loss": 1.1086, "step": 10586 }, { "epoch": 3.3258338732159336, "grad_norm": 0.93359375, "learning_rate": 6.762297683275151e-06, "loss": 1.1083, "step": 10588 }, { "epoch": 3.326462100241475, "grad_norm": 0.87109375, "learning_rate": 6.759758806728023e-06, "loss": 1.1684, "step": 10590 }, { "epoch": 3.3270903272670163, "grad_norm": 0.9375, "learning_rate": 6.757219930180896e-06, "loss": 1.2012, "step": 10592 }, { "epoch": 3.3277185542925576, "grad_norm": 0.99609375, "learning_rate": 6.754681053633768e-06, "loss": 1.1999, "step": 10594 }, { "epoch": 3.328346781318099, "grad_norm": 0.9375, "learning_rate": 6.752142177086641e-06, "loss": 1.1609, "step": 10596 }, { "epoch": 3.3289750083436402, "grad_norm": 0.91015625, "learning_rate": 6.749603300539512e-06, "loss": 1.1023, "step": 10598 }, { "epoch": 3.3296032353691816, "grad_norm": 0.94921875, "learning_rate": 6.747064423992384e-06, "loss": 1.2342, "step": 10600 }, { "epoch": 3.330231462394723, "grad_norm": 0.890625, "learning_rate": 6.744525547445256e-06, "loss": 1.1796, "step": 10602 }, { "epoch": 3.3308596894202642, "grad_norm": 0.99609375, "learning_rate": 6.741986670898129e-06, "loss": 1.2729, "step": 10604 }, { "epoch": 3.3314879164458056, "grad_norm": 0.953125, "learning_rate": 6.739447794351001e-06, "loss": 0.9809, "step": 10606 }, { "epoch": 3.332116143471347, "grad_norm": 0.94140625, "learning_rate": 6.736908917803873e-06, "loss": 1.2242, "step": 10608 }, { "epoch": 3.332744370496888, "grad_norm": 1.171875, "learning_rate": 6.7343700412567445e-06, "loss": 1.141, "step": 10610 }, { "epoch": 3.3333725975224295, "grad_norm": 0.88671875, "learning_rate": 6.731831164709617e-06, "loss": 1.1341, "step": 10612 }, { "epoch": 3.334000824547971, "grad_norm": 0.91015625, "learning_rate": 6.729292288162489e-06, "loss": 1.206, "step": 10614 }, { "epoch": 3.334629051573512, "grad_norm": 0.94140625, "learning_rate": 6.726753411615361e-06, "loss": 1.2741, "step": 10616 }, { "epoch": 3.3352572785990535, "grad_norm": 0.984375, "learning_rate": 6.724214535068233e-06, "loss": 1.2351, "step": 10618 }, { "epoch": 3.335885505624595, "grad_norm": 0.890625, "learning_rate": 6.721675658521105e-06, "loss": 1.1367, "step": 10620 }, { "epoch": 3.3365137326501366, "grad_norm": 0.9921875, "learning_rate": 6.719136781973977e-06, "loss": 1.0592, "step": 10622 }, { "epoch": 3.337141959675678, "grad_norm": 0.890625, "learning_rate": 6.716597905426849e-06, "loss": 1.2064, "step": 10624 }, { "epoch": 3.3377701867012193, "grad_norm": 0.97265625, "learning_rate": 6.7140590288797215e-06, "loss": 1.1519, "step": 10626 }, { "epoch": 3.3383984137267606, "grad_norm": 0.98046875, "learning_rate": 6.711520152332594e-06, "loss": 1.1904, "step": 10628 }, { "epoch": 3.339026640752302, "grad_norm": 0.9375, "learning_rate": 6.708981275785466e-06, "loss": 0.9693, "step": 10630 }, { "epoch": 3.3396548677778433, "grad_norm": 0.87109375, "learning_rate": 6.706442399238337e-06, "loss": 1.3857, "step": 10632 }, { "epoch": 3.3402830948033846, "grad_norm": 0.921875, "learning_rate": 6.7039035226912095e-06, "loss": 1.1203, "step": 10634 }, { "epoch": 3.340911321828926, "grad_norm": 0.89453125, "learning_rate": 6.701364646144082e-06, "loss": 1.0453, "step": 10636 }, { "epoch": 3.3415395488544672, "grad_norm": 0.98046875, "learning_rate": 6.698825769596954e-06, "loss": 1.2062, "step": 10638 }, { "epoch": 3.3421677758800086, "grad_norm": 0.88671875, "learning_rate": 6.696286893049826e-06, "loss": 1.1011, "step": 10640 }, { "epoch": 3.34279600290555, "grad_norm": 0.921875, "learning_rate": 6.693748016502698e-06, "loss": 1.149, "step": 10642 }, { "epoch": 3.3434242299310912, "grad_norm": 0.921875, "learning_rate": 6.69120913995557e-06, "loss": 1.1035, "step": 10644 }, { "epoch": 3.3440524569566326, "grad_norm": 0.92578125, "learning_rate": 6.688670263408442e-06, "loss": 1.1818, "step": 10646 }, { "epoch": 3.344680683982174, "grad_norm": 0.95703125, "learning_rate": 6.686131386861314e-06, "loss": 1.1823, "step": 10648 }, { "epoch": 3.3453089110077157, "grad_norm": 0.890625, "learning_rate": 6.6835925103141865e-06, "loss": 1.2015, "step": 10650 }, { "epoch": 3.345937138033257, "grad_norm": 1.03125, "learning_rate": 6.681053633767059e-06, "loss": 1.1749, "step": 10652 }, { "epoch": 3.3465653650587983, "grad_norm": 0.95703125, "learning_rate": 6.67851475721993e-06, "loss": 1.1185, "step": 10654 }, { "epoch": 3.3471935920843396, "grad_norm": 0.90234375, "learning_rate": 6.675975880672802e-06, "loss": 1.0148, "step": 10656 }, { "epoch": 3.347821819109881, "grad_norm": 0.97265625, "learning_rate": 6.673437004125675e-06, "loss": 1.0726, "step": 10658 }, { "epoch": 3.3484500461354223, "grad_norm": 1.0234375, "learning_rate": 6.670898127578547e-06, "loss": 1.1161, "step": 10660 }, { "epoch": 3.3490782731609636, "grad_norm": 1.0703125, "learning_rate": 6.668359251031419e-06, "loss": 1.1386, "step": 10662 }, { "epoch": 3.349706500186505, "grad_norm": 0.90234375, "learning_rate": 6.665820374484291e-06, "loss": 1.0713, "step": 10664 }, { "epoch": 3.3503347272120463, "grad_norm": 1.0078125, "learning_rate": 6.663281497937163e-06, "loss": 1.0811, "step": 10666 }, { "epoch": 3.3509629542375876, "grad_norm": 0.953125, "learning_rate": 6.660742621390035e-06, "loss": 1.1566, "step": 10668 }, { "epoch": 3.351591181263129, "grad_norm": 1.0, "learning_rate": 6.658203744842907e-06, "loss": 1.023, "step": 10670 }, { "epoch": 3.3522194082886703, "grad_norm": 0.9765625, "learning_rate": 6.655664868295779e-06, "loss": 1.1115, "step": 10672 }, { "epoch": 3.3528476353142116, "grad_norm": 0.9296875, "learning_rate": 6.653125991748652e-06, "loss": 1.4462, "step": 10674 }, { "epoch": 3.353475862339753, "grad_norm": 0.88671875, "learning_rate": 6.650587115201523e-06, "loss": 1.0297, "step": 10676 }, { "epoch": 3.3541040893652943, "grad_norm": 1.0859375, "learning_rate": 6.648048238654397e-06, "loss": 1.0831, "step": 10678 }, { "epoch": 3.3547323163908356, "grad_norm": 0.98046875, "learning_rate": 6.645509362107268e-06, "loss": 1.1362, "step": 10680 }, { "epoch": 3.355360543416377, "grad_norm": 0.9375, "learning_rate": 6.6429704855601405e-06, "loss": 1.1074, "step": 10682 }, { "epoch": 3.3559887704419182, "grad_norm": 0.9609375, "learning_rate": 6.640431609013013e-06, "loss": 1.0628, "step": 10684 }, { "epoch": 3.3566169974674596, "grad_norm": 0.93359375, "learning_rate": 6.637892732465885e-06, "loss": 1.178, "step": 10686 }, { "epoch": 3.3572452244930013, "grad_norm": 0.88671875, "learning_rate": 6.635353855918757e-06, "loss": 1.0914, "step": 10688 }, { "epoch": 3.3578734515185427, "grad_norm": 0.9765625, "learning_rate": 6.6328149793716286e-06, "loss": 1.1933, "step": 10690 }, { "epoch": 3.358501678544084, "grad_norm": 0.90234375, "learning_rate": 6.630276102824501e-06, "loss": 1.2403, "step": 10692 }, { "epoch": 3.3591299055696253, "grad_norm": 1.0078125, "learning_rate": 6.627737226277373e-06, "loss": 1.2356, "step": 10694 }, { "epoch": 3.3597581325951666, "grad_norm": 0.96875, "learning_rate": 6.625198349730245e-06, "loss": 1.2369, "step": 10696 }, { "epoch": 3.360386359620708, "grad_norm": 0.94921875, "learning_rate": 6.6226594731831175e-06, "loss": 1.159, "step": 10698 }, { "epoch": 3.3610145866462493, "grad_norm": 1.0, "learning_rate": 6.62012059663599e-06, "loss": 1.2388, "step": 10700 }, { "epoch": 3.3616428136717906, "grad_norm": 1.03125, "learning_rate": 6.617581720088861e-06, "loss": 1.264, "step": 10702 }, { "epoch": 3.362271040697332, "grad_norm": 0.89453125, "learning_rate": 6.615042843541733e-06, "loss": 1.0578, "step": 10704 }, { "epoch": 3.3628992677228733, "grad_norm": 0.8984375, "learning_rate": 6.6125039669946056e-06, "loss": 1.2439, "step": 10706 }, { "epoch": 3.3635274947484146, "grad_norm": 0.86328125, "learning_rate": 6.609965090447478e-06, "loss": 1.2267, "step": 10708 }, { "epoch": 3.364155721773956, "grad_norm": 0.98828125, "learning_rate": 6.60742621390035e-06, "loss": 1.0835, "step": 10710 }, { "epoch": 3.3647839487994973, "grad_norm": 0.94921875, "learning_rate": 6.604887337353222e-06, "loss": 1.1791, "step": 10712 }, { "epoch": 3.3654121758250386, "grad_norm": 1.21875, "learning_rate": 6.602348460806094e-06, "loss": 1.1867, "step": 10714 }, { "epoch": 3.3660404028505804, "grad_norm": 0.921875, "learning_rate": 6.599809584258966e-06, "loss": 1.1989, "step": 10716 }, { "epoch": 3.3666686298761217, "grad_norm": 0.99609375, "learning_rate": 6.597270707711838e-06, "loss": 1.1781, "step": 10718 }, { "epoch": 3.367296856901663, "grad_norm": 1.015625, "learning_rate": 6.59473183116471e-06, "loss": 1.1196, "step": 10720 }, { "epoch": 3.3679250839272044, "grad_norm": 0.9609375, "learning_rate": 6.5921929546175825e-06, "loss": 1.0938, "step": 10722 }, { "epoch": 3.3685533109527457, "grad_norm": 0.8984375, "learning_rate": 6.589654078070454e-06, "loss": 1.1731, "step": 10724 }, { "epoch": 3.369181537978287, "grad_norm": 0.9375, "learning_rate": 6.587115201523326e-06, "loss": 1.1171, "step": 10726 }, { "epoch": 3.3698097650038283, "grad_norm": 0.94140625, "learning_rate": 6.584576324976198e-06, "loss": 1.1154, "step": 10728 }, { "epoch": 3.3704379920293697, "grad_norm": 0.99609375, "learning_rate": 6.582037448429071e-06, "loss": 1.1677, "step": 10730 }, { "epoch": 3.371066219054911, "grad_norm": 0.9375, "learning_rate": 6.579498571881943e-06, "loss": 1.2053, "step": 10732 }, { "epoch": 3.3716944460804523, "grad_norm": 0.984375, "learning_rate": 6.576959695334815e-06, "loss": 1.051, "step": 10734 }, { "epoch": 3.3723226731059937, "grad_norm": 1.0234375, "learning_rate": 6.5744208187876865e-06, "loss": 1.0513, "step": 10736 }, { "epoch": 3.372950900131535, "grad_norm": 1.0625, "learning_rate": 6.571881942240559e-06, "loss": 1.3122, "step": 10738 }, { "epoch": 3.3735791271570763, "grad_norm": 0.97265625, "learning_rate": 6.569343065693431e-06, "loss": 1.2466, "step": 10740 }, { "epoch": 3.3742073541826176, "grad_norm": 1.0390625, "learning_rate": 6.566804189146303e-06, "loss": 1.0188, "step": 10742 }, { "epoch": 3.374835581208159, "grad_norm": 0.921875, "learning_rate": 6.564265312599175e-06, "loss": 1.2077, "step": 10744 }, { "epoch": 3.3754638082337003, "grad_norm": 0.921875, "learning_rate": 6.561726436052047e-06, "loss": 1.111, "step": 10746 }, { "epoch": 3.3760920352592416, "grad_norm": 1.0234375, "learning_rate": 6.559187559504919e-06, "loss": 1.0817, "step": 10748 }, { "epoch": 3.376720262284783, "grad_norm": 0.8984375, "learning_rate": 6.556648682957791e-06, "loss": 1.2191, "step": 10750 }, { "epoch": 3.3773484893103243, "grad_norm": 0.91015625, "learning_rate": 6.5541098064106634e-06, "loss": 1.082, "step": 10752 }, { "epoch": 3.377976716335866, "grad_norm": 0.9375, "learning_rate": 6.551570929863536e-06, "loss": 1.1786, "step": 10754 }, { "epoch": 3.3786049433614074, "grad_norm": 1.0859375, "learning_rate": 6.549032053316408e-06, "loss": 1.1452, "step": 10756 }, { "epoch": 3.3792331703869487, "grad_norm": 1.015625, "learning_rate": 6.546493176769279e-06, "loss": 1.1977, "step": 10758 }, { "epoch": 3.37986139741249, "grad_norm": 1.0078125, "learning_rate": 6.5439543002221515e-06, "loss": 1.1402, "step": 10760 }, { "epoch": 3.3804896244380314, "grad_norm": 0.97265625, "learning_rate": 6.541415423675024e-06, "loss": 1.2007, "step": 10762 }, { "epoch": 3.3811178514635727, "grad_norm": 0.87890625, "learning_rate": 6.538876547127897e-06, "loss": 1.0993, "step": 10764 }, { "epoch": 3.381746078489114, "grad_norm": 0.94140625, "learning_rate": 6.536337670580769e-06, "loss": 1.1458, "step": 10766 }, { "epoch": 3.3823743055146553, "grad_norm": 0.9765625, "learning_rate": 6.533798794033641e-06, "loss": 1.2291, "step": 10768 }, { "epoch": 3.3830025325401967, "grad_norm": 1.03125, "learning_rate": 6.5312599174865135e-06, "loss": 1.0954, "step": 10770 }, { "epoch": 3.383630759565738, "grad_norm": 0.875, "learning_rate": 6.528721040939385e-06, "loss": 1.3269, "step": 10772 }, { "epoch": 3.3842589865912793, "grad_norm": 0.9609375, "learning_rate": 6.526182164392257e-06, "loss": 1.1057, "step": 10774 }, { "epoch": 3.3848872136168207, "grad_norm": 0.96875, "learning_rate": 6.523643287845129e-06, "loss": 1.2137, "step": 10776 }, { "epoch": 3.385515440642362, "grad_norm": 0.8828125, "learning_rate": 6.521104411298002e-06, "loss": 1.1742, "step": 10778 }, { "epoch": 3.3861436676679033, "grad_norm": 0.93359375, "learning_rate": 6.518565534750874e-06, "loss": 1.2112, "step": 10780 }, { "epoch": 3.386771894693445, "grad_norm": 0.94921875, "learning_rate": 6.516026658203746e-06, "loss": 1.1953, "step": 10782 }, { "epoch": 3.3874001217189864, "grad_norm": 1.0, "learning_rate": 6.513487781656617e-06, "loss": 1.0078, "step": 10784 }, { "epoch": 3.3880283487445277, "grad_norm": 0.8828125, "learning_rate": 6.51094890510949e-06, "loss": 1.2054, "step": 10786 }, { "epoch": 3.388656575770069, "grad_norm": 0.93359375, "learning_rate": 6.508410028562362e-06, "loss": 1.0709, "step": 10788 }, { "epoch": 3.3892848027956104, "grad_norm": 1.0703125, "learning_rate": 6.505871152015234e-06, "loss": 1.0578, "step": 10790 }, { "epoch": 3.3899130298211517, "grad_norm": 0.9296875, "learning_rate": 6.503332275468106e-06, "loss": 1.07, "step": 10792 }, { "epoch": 3.390541256846693, "grad_norm": 0.99609375, "learning_rate": 6.5007933989209786e-06, "loss": 1.1555, "step": 10794 }, { "epoch": 3.3911694838722344, "grad_norm": 0.90234375, "learning_rate": 6.49825452237385e-06, "loss": 1.2158, "step": 10796 }, { "epoch": 3.3917977108977757, "grad_norm": 0.92578125, "learning_rate": 6.495715645826722e-06, "loss": 1.1943, "step": 10798 }, { "epoch": 3.392425937923317, "grad_norm": 0.97265625, "learning_rate": 6.493176769279594e-06, "loss": 1.169, "step": 10800 }, { "epoch": 3.3930541649488584, "grad_norm": 0.92578125, "learning_rate": 6.490637892732467e-06, "loss": 1.1648, "step": 10802 }, { "epoch": 3.3936823919743997, "grad_norm": 0.9296875, "learning_rate": 6.488099016185339e-06, "loss": 1.3229, "step": 10804 }, { "epoch": 3.394310618999941, "grad_norm": 0.9375, "learning_rate": 6.48556013963821e-06, "loss": 1.0462, "step": 10806 }, { "epoch": 3.3949388460254823, "grad_norm": 0.94921875, "learning_rate": 6.4830212630910825e-06, "loss": 1.1275, "step": 10808 }, { "epoch": 3.3955670730510237, "grad_norm": 0.9375, "learning_rate": 6.480482386543955e-06, "loss": 1.2203, "step": 10810 }, { "epoch": 3.396195300076565, "grad_norm": 1.0390625, "learning_rate": 6.477943509996827e-06, "loss": 1.1646, "step": 10812 }, { "epoch": 3.3968235271021063, "grad_norm": 0.96484375, "learning_rate": 6.475404633449699e-06, "loss": 1.201, "step": 10814 }, { "epoch": 3.3974517541276477, "grad_norm": 1.0078125, "learning_rate": 6.472865756902571e-06, "loss": 1.1171, "step": 10816 }, { "epoch": 3.398079981153189, "grad_norm": 0.93359375, "learning_rate": 6.470326880355443e-06, "loss": 1.2143, "step": 10818 }, { "epoch": 3.3987082081787308, "grad_norm": 0.9296875, "learning_rate": 6.467788003808315e-06, "loss": 1.3175, "step": 10820 }, { "epoch": 3.399336435204272, "grad_norm": 0.9609375, "learning_rate": 6.465249127261187e-06, "loss": 1.1409, "step": 10822 }, { "epoch": 3.3999646622298134, "grad_norm": 0.95703125, "learning_rate": 6.4627102507140595e-06, "loss": 1.2369, "step": 10824 }, { "epoch": 3.4005928892553547, "grad_norm": 0.9609375, "learning_rate": 6.460171374166932e-06, "loss": 1.1063, "step": 10826 }, { "epoch": 3.401221116280896, "grad_norm": 0.91796875, "learning_rate": 6.457632497619803e-06, "loss": 1.3036, "step": 10828 }, { "epoch": 3.4018493433064374, "grad_norm": 0.921875, "learning_rate": 6.455093621072675e-06, "loss": 1.248, "step": 10830 }, { "epoch": 3.4024775703319787, "grad_norm": 0.91796875, "learning_rate": 6.4525547445255475e-06, "loss": 1.2786, "step": 10832 }, { "epoch": 3.40310579735752, "grad_norm": 0.92578125, "learning_rate": 6.45001586797842e-06, "loss": 1.2079, "step": 10834 }, { "epoch": 3.4037340243830614, "grad_norm": 0.859375, "learning_rate": 6.447476991431292e-06, "loss": 1.1461, "step": 10836 }, { "epoch": 3.4043622514086027, "grad_norm": 0.84765625, "learning_rate": 6.444938114884164e-06, "loss": 1.12, "step": 10838 }, { "epoch": 3.404990478434144, "grad_norm": 0.96484375, "learning_rate": 6.442399238337036e-06, "loss": 1.1413, "step": 10840 }, { "epoch": 3.4056187054596854, "grad_norm": 0.88671875, "learning_rate": 6.439860361789908e-06, "loss": 1.1782, "step": 10842 }, { "epoch": 3.4062469324852267, "grad_norm": 0.90625, "learning_rate": 6.43732148524278e-06, "loss": 1.0453, "step": 10844 }, { "epoch": 3.406875159510768, "grad_norm": 0.9921875, "learning_rate": 6.434782608695652e-06, "loss": 1.1346, "step": 10846 }, { "epoch": 3.40750338653631, "grad_norm": 0.9921875, "learning_rate": 6.4322437321485245e-06, "loss": 0.9915, "step": 10848 }, { "epoch": 3.408131613561851, "grad_norm": 0.90625, "learning_rate": 6.429704855601398e-06, "loss": 1.026, "step": 10850 }, { "epoch": 3.4087598405873925, "grad_norm": 0.95703125, "learning_rate": 6.42716597905427e-06, "loss": 1.1636, "step": 10852 }, { "epoch": 3.409388067612934, "grad_norm": 0.99609375, "learning_rate": 6.424627102507141e-06, "loss": 1.2143, "step": 10854 }, { "epoch": 3.410016294638475, "grad_norm": 0.890625, "learning_rate": 6.4220882259600134e-06, "loss": 1.1237, "step": 10856 }, { "epoch": 3.4106445216640164, "grad_norm": 0.93359375, "learning_rate": 6.419549349412886e-06, "loss": 1.2076, "step": 10858 }, { "epoch": 3.4112727486895578, "grad_norm": 0.93359375, "learning_rate": 6.417010472865758e-06, "loss": 1.2396, "step": 10860 }, { "epoch": 3.411900975715099, "grad_norm": 0.96875, "learning_rate": 6.41447159631863e-06, "loss": 1.1429, "step": 10862 }, { "epoch": 3.4125292027406404, "grad_norm": 1.0078125, "learning_rate": 6.411932719771502e-06, "loss": 1.0231, "step": 10864 }, { "epoch": 3.4131574297661817, "grad_norm": 0.91015625, "learning_rate": 6.409393843224374e-06, "loss": 1.2666, "step": 10866 }, { "epoch": 3.413785656791723, "grad_norm": 0.99609375, "learning_rate": 6.406854966677246e-06, "loss": 1.1413, "step": 10868 }, { "epoch": 3.4144138838172644, "grad_norm": 0.984375, "learning_rate": 6.404316090130118e-06, "loss": 1.1285, "step": 10870 }, { "epoch": 3.4150421108428057, "grad_norm": 0.99609375, "learning_rate": 6.40177721358299e-06, "loss": 1.2473, "step": 10872 }, { "epoch": 3.415670337868347, "grad_norm": 1.0546875, "learning_rate": 6.399238337035863e-06, "loss": 1.0431, "step": 10874 }, { "epoch": 3.4162985648938884, "grad_norm": 0.9296875, "learning_rate": 6.396699460488735e-06, "loss": 1.1339, "step": 10876 }, { "epoch": 3.4169267919194297, "grad_norm": 0.984375, "learning_rate": 6.394160583941606e-06, "loss": 1.1569, "step": 10878 }, { "epoch": 3.417555018944971, "grad_norm": 0.99609375, "learning_rate": 6.3916217073944785e-06, "loss": 1.0074, "step": 10880 }, { "epoch": 3.4181832459705124, "grad_norm": 0.95703125, "learning_rate": 6.389082830847351e-06, "loss": 0.9634, "step": 10882 }, { "epoch": 3.4188114729960537, "grad_norm": 0.90234375, "learning_rate": 6.386543954300223e-06, "loss": 1.1607, "step": 10884 }, { "epoch": 3.4194397000215955, "grad_norm": 0.94140625, "learning_rate": 6.384005077753095e-06, "loss": 1.1907, "step": 10886 }, { "epoch": 3.420067927047137, "grad_norm": 0.90625, "learning_rate": 6.3814662012059666e-06, "loss": 1.2198, "step": 10888 }, { "epoch": 3.420696154072678, "grad_norm": 1.0078125, "learning_rate": 6.378927324658839e-06, "loss": 1.1116, "step": 10890 }, { "epoch": 3.4213243810982195, "grad_norm": 0.9296875, "learning_rate": 6.376388448111711e-06, "loss": 1.1512, "step": 10892 }, { "epoch": 3.421952608123761, "grad_norm": 0.9296875, "learning_rate": 6.373849571564583e-06, "loss": 1.2218, "step": 10894 }, { "epoch": 3.422580835149302, "grad_norm": 1.0625, "learning_rate": 6.3713106950174555e-06, "loss": 1.146, "step": 10896 }, { "epoch": 3.4232090621748434, "grad_norm": 1.0078125, "learning_rate": 6.368771818470328e-06, "loss": 1.2181, "step": 10898 }, { "epoch": 3.4238372892003848, "grad_norm": 0.76953125, "learning_rate": 6.366232941923199e-06, "loss": 1.144, "step": 10900 }, { "epoch": 3.424465516225926, "grad_norm": 0.890625, "learning_rate": 6.363694065376071e-06, "loss": 1.2201, "step": 10902 }, { "epoch": 3.4250937432514674, "grad_norm": 0.96875, "learning_rate": 6.3611551888289435e-06, "loss": 1.0955, "step": 10904 }, { "epoch": 3.4257219702770088, "grad_norm": 0.91796875, "learning_rate": 6.358616312281816e-06, "loss": 1.2795, "step": 10906 }, { "epoch": 3.42635019730255, "grad_norm": 0.8828125, "learning_rate": 6.356077435734688e-06, "loss": 1.2552, "step": 10908 }, { "epoch": 3.4269784243280914, "grad_norm": 0.94140625, "learning_rate": 6.353538559187559e-06, "loss": 1.0161, "step": 10910 }, { "epoch": 3.4276066513536327, "grad_norm": 0.97265625, "learning_rate": 6.350999682640432e-06, "loss": 1.0605, "step": 10912 }, { "epoch": 3.4282348783791745, "grad_norm": 0.88671875, "learning_rate": 6.348460806093304e-06, "loss": 1.0985, "step": 10914 }, { "epoch": 3.428863105404716, "grad_norm": 0.9140625, "learning_rate": 6.345921929546176e-06, "loss": 1.0812, "step": 10916 }, { "epoch": 3.429491332430257, "grad_norm": 0.99609375, "learning_rate": 6.343383052999048e-06, "loss": 1.1428, "step": 10918 }, { "epoch": 3.4301195594557985, "grad_norm": 0.9296875, "learning_rate": 6.3408441764519205e-06, "loss": 1.1921, "step": 10920 }, { "epoch": 3.43074778648134, "grad_norm": 0.91015625, "learning_rate": 6.338305299904792e-06, "loss": 1.3087, "step": 10922 }, { "epoch": 3.431376013506881, "grad_norm": 0.91015625, "learning_rate": 6.335766423357664e-06, "loss": 1.0718, "step": 10924 }, { "epoch": 3.4320042405324225, "grad_norm": 0.9453125, "learning_rate": 6.333227546810536e-06, "loss": 1.1654, "step": 10926 }, { "epoch": 3.432632467557964, "grad_norm": 0.953125, "learning_rate": 6.330688670263409e-06, "loss": 1.1773, "step": 10928 }, { "epoch": 3.433260694583505, "grad_norm": 0.8984375, "learning_rate": 6.328149793716281e-06, "loss": 1.0469, "step": 10930 }, { "epoch": 3.4338889216090465, "grad_norm": 1.1328125, "learning_rate": 6.325610917169153e-06, "loss": 1.0754, "step": 10932 }, { "epoch": 3.434517148634588, "grad_norm": 0.9765625, "learning_rate": 6.3230720406220244e-06, "loss": 1.1134, "step": 10934 }, { "epoch": 3.435145375660129, "grad_norm": 0.95703125, "learning_rate": 6.3205331640748975e-06, "loss": 1.2075, "step": 10936 }, { "epoch": 3.4357736026856704, "grad_norm": 1.015625, "learning_rate": 6.31799428752777e-06, "loss": 1.1066, "step": 10938 }, { "epoch": 3.4364018297112118, "grad_norm": 0.96484375, "learning_rate": 6.315455410980642e-06, "loss": 1.1853, "step": 10940 }, { "epoch": 3.437030056736753, "grad_norm": 0.98046875, "learning_rate": 6.312916534433514e-06, "loss": 1.2086, "step": 10942 }, { "epoch": 3.4376582837622944, "grad_norm": 0.9609375, "learning_rate": 6.3103776578863864e-06, "loss": 1.2887, "step": 10944 }, { "epoch": 3.4382865107878358, "grad_norm": 0.9765625, "learning_rate": 6.307838781339259e-06, "loss": 1.1533, "step": 10946 }, { "epoch": 3.438914737813377, "grad_norm": 0.93359375, "learning_rate": 6.30529990479213e-06, "loss": 1.1553, "step": 10948 }, { "epoch": 3.4395429648389184, "grad_norm": 0.9296875, "learning_rate": 6.302761028245002e-06, "loss": 1.1345, "step": 10950 }, { "epoch": 3.44017119186446, "grad_norm": 1.0625, "learning_rate": 6.3002221516978745e-06, "loss": 1.2416, "step": 10952 }, { "epoch": 3.4407994188900015, "grad_norm": 0.96484375, "learning_rate": 6.297683275150747e-06, "loss": 1.0372, "step": 10954 }, { "epoch": 3.441427645915543, "grad_norm": 0.89453125, "learning_rate": 6.295144398603619e-06, "loss": 1.1715, "step": 10956 }, { "epoch": 3.442055872941084, "grad_norm": 0.95703125, "learning_rate": 6.292605522056491e-06, "loss": 1.0325, "step": 10958 }, { "epoch": 3.4426840999666255, "grad_norm": 1.046875, "learning_rate": 6.2900666455093626e-06, "loss": 1.1205, "step": 10960 }, { "epoch": 3.443312326992167, "grad_norm": 0.94921875, "learning_rate": 6.287527768962235e-06, "loss": 1.1489, "step": 10962 }, { "epoch": 3.443940554017708, "grad_norm": 0.98828125, "learning_rate": 6.284988892415107e-06, "loss": 1.1253, "step": 10964 }, { "epoch": 3.4445687810432495, "grad_norm": 0.95703125, "learning_rate": 6.282450015867979e-06, "loss": 1.1584, "step": 10966 }, { "epoch": 3.445197008068791, "grad_norm": 0.9453125, "learning_rate": 6.2799111393208515e-06, "loss": 1.1465, "step": 10968 }, { "epoch": 3.445825235094332, "grad_norm": 0.9296875, "learning_rate": 6.277372262773723e-06, "loss": 1.2696, "step": 10970 }, { "epoch": 3.4464534621198735, "grad_norm": 0.94921875, "learning_rate": 6.274833386226595e-06, "loss": 1.0707, "step": 10972 }, { "epoch": 3.447081689145415, "grad_norm": 0.90234375, "learning_rate": 6.272294509679467e-06, "loss": 1.0225, "step": 10974 }, { "epoch": 3.447709916170956, "grad_norm": 0.9765625, "learning_rate": 6.2697556331323396e-06, "loss": 1.0989, "step": 10976 }, { "epoch": 3.4483381431964975, "grad_norm": 0.9296875, "learning_rate": 6.267216756585212e-06, "loss": 1.2347, "step": 10978 }, { "epoch": 3.4489663702220392, "grad_norm": 0.98828125, "learning_rate": 6.264677880038084e-06, "loss": 1.1642, "step": 10980 }, { "epoch": 3.4495945972475806, "grad_norm": 0.875, "learning_rate": 6.262139003490955e-06, "loss": 1.1545, "step": 10982 }, { "epoch": 3.450222824273122, "grad_norm": 0.9296875, "learning_rate": 6.259600126943828e-06, "loss": 1.2058, "step": 10984 }, { "epoch": 3.450851051298663, "grad_norm": 0.9140625, "learning_rate": 6.2570612503967e-06, "loss": 1.0851, "step": 10986 }, { "epoch": 3.4514792783242045, "grad_norm": 0.91015625, "learning_rate": 6.254522373849572e-06, "loss": 1.1532, "step": 10988 }, { "epoch": 3.452107505349746, "grad_norm": 0.99609375, "learning_rate": 6.251983497302444e-06, "loss": 1.2667, "step": 10990 }, { "epoch": 3.452735732375287, "grad_norm": 0.93359375, "learning_rate": 6.249444620755316e-06, "loss": 1.1346, "step": 10992 }, { "epoch": 3.4533639594008285, "grad_norm": 1.1640625, "learning_rate": 6.246905744208188e-06, "loss": 1.1293, "step": 10994 }, { "epoch": 3.45399218642637, "grad_norm": 1.0703125, "learning_rate": 6.24436686766106e-06, "loss": 1.0294, "step": 10996 }, { "epoch": 3.454620413451911, "grad_norm": 1.0, "learning_rate": 6.241827991113932e-06, "loss": 0.9804, "step": 10998 }, { "epoch": 3.4552486404774525, "grad_norm": 0.9375, "learning_rate": 6.239289114566805e-06, "loss": 1.2133, "step": 11000 }, { "epoch": 3.455876867502994, "grad_norm": 0.9453125, "learning_rate": 6.236750238019677e-06, "loss": 1.2363, "step": 11002 }, { "epoch": 3.456505094528535, "grad_norm": 0.93359375, "learning_rate": 6.234211361472548e-06, "loss": 1.0909, "step": 11004 }, { "epoch": 3.4571333215540765, "grad_norm": 1.125, "learning_rate": 6.2316724849254205e-06, "loss": 1.0861, "step": 11006 }, { "epoch": 3.457761548579618, "grad_norm": 1.0625, "learning_rate": 6.229133608378293e-06, "loss": 1.0554, "step": 11008 }, { "epoch": 3.458389775605159, "grad_norm": 0.87890625, "learning_rate": 6.226594731831165e-06, "loss": 1.3417, "step": 11010 }, { "epoch": 3.4590180026307005, "grad_norm": 0.8984375, "learning_rate": 6.224055855284037e-06, "loss": 1.1874, "step": 11012 }, { "epoch": 3.459646229656242, "grad_norm": 0.9140625, "learning_rate": 6.221516978736909e-06, "loss": 1.2087, "step": 11014 }, { "epoch": 3.4602744566817836, "grad_norm": 0.91796875, "learning_rate": 6.218978102189781e-06, "loss": 1.3437, "step": 11016 }, { "epoch": 3.460902683707325, "grad_norm": 0.8828125, "learning_rate": 6.216439225642653e-06, "loss": 1.1702, "step": 11018 }, { "epoch": 3.4615309107328662, "grad_norm": 0.921875, "learning_rate": 6.213900349095525e-06, "loss": 1.1392, "step": 11020 }, { "epoch": 3.4621591377584076, "grad_norm": 0.90625, "learning_rate": 6.211361472548398e-06, "loss": 1.1052, "step": 11022 }, { "epoch": 3.462787364783949, "grad_norm": 0.9609375, "learning_rate": 6.2088225960012705e-06, "loss": 1.0205, "step": 11024 }, { "epoch": 3.46341559180949, "grad_norm": 0.9453125, "learning_rate": 6.206283719454143e-06, "loss": 1.2898, "step": 11026 }, { "epoch": 3.4640438188350315, "grad_norm": 1.03125, "learning_rate": 6.203744842907015e-06, "loss": 1.2229, "step": 11028 }, { "epoch": 3.464672045860573, "grad_norm": 0.91015625, "learning_rate": 6.201205966359886e-06, "loss": 1.1456, "step": 11030 }, { "epoch": 3.465300272886114, "grad_norm": 0.93359375, "learning_rate": 6.198667089812759e-06, "loss": 1.1733, "step": 11032 }, { "epoch": 3.4659284999116555, "grad_norm": 0.93359375, "learning_rate": 6.196128213265631e-06, "loss": 1.2323, "step": 11034 }, { "epoch": 3.466556726937197, "grad_norm": 0.890625, "learning_rate": 6.193589336718503e-06, "loss": 1.1309, "step": 11036 }, { "epoch": 3.467184953962738, "grad_norm": 0.94140625, "learning_rate": 6.191050460171375e-06, "loss": 1.1598, "step": 11038 }, { "epoch": 3.4678131809882795, "grad_norm": 0.92578125, "learning_rate": 6.1885115836242475e-06, "loss": 1.0874, "step": 11040 }, { "epoch": 3.468441408013821, "grad_norm": 0.89453125, "learning_rate": 6.185972707077119e-06, "loss": 1.1937, "step": 11042 }, { "epoch": 3.469069635039362, "grad_norm": 0.91796875, "learning_rate": 6.183433830529991e-06, "loss": 1.1319, "step": 11044 }, { "epoch": 3.469697862064904, "grad_norm": 1.0703125, "learning_rate": 6.180894953982863e-06, "loss": 1.1905, "step": 11046 }, { "epoch": 3.4703260890904453, "grad_norm": 0.9765625, "learning_rate": 6.178356077435736e-06, "loss": 1.228, "step": 11048 }, { "epoch": 3.4709543161159866, "grad_norm": 0.99609375, "learning_rate": 6.175817200888608e-06, "loss": 1.1172, "step": 11050 }, { "epoch": 3.471582543141528, "grad_norm": 1.03125, "learning_rate": 6.173278324341479e-06, "loss": 1.1464, "step": 11052 }, { "epoch": 3.4722107701670692, "grad_norm": 0.90234375, "learning_rate": 6.170739447794351e-06, "loss": 1.185, "step": 11054 }, { "epoch": 3.4728389971926106, "grad_norm": 0.859375, "learning_rate": 6.168200571247224e-06, "loss": 1.1739, "step": 11056 }, { "epoch": 3.473467224218152, "grad_norm": 0.91015625, "learning_rate": 6.165661694700096e-06, "loss": 1.1478, "step": 11058 }, { "epoch": 3.4740954512436932, "grad_norm": 0.92578125, "learning_rate": 6.163122818152968e-06, "loss": 1.0971, "step": 11060 }, { "epoch": 3.4747236782692346, "grad_norm": 0.9140625, "learning_rate": 6.16058394160584e-06, "loss": 1.2276, "step": 11062 }, { "epoch": 3.475351905294776, "grad_norm": 1.0, "learning_rate": 6.158045065058712e-06, "loss": 1.1134, "step": 11064 }, { "epoch": 3.475980132320317, "grad_norm": 1.078125, "learning_rate": 6.155506188511584e-06, "loss": 1.1422, "step": 11066 }, { "epoch": 3.4766083593458585, "grad_norm": 0.91796875, "learning_rate": 6.152967311964456e-06, "loss": 1.204, "step": 11068 }, { "epoch": 3.4772365863714, "grad_norm": 0.921875, "learning_rate": 6.150428435417328e-06, "loss": 1.0458, "step": 11070 }, { "epoch": 3.477864813396941, "grad_norm": 0.9375, "learning_rate": 6.147889558870201e-06, "loss": 1.0818, "step": 11072 }, { "epoch": 3.4784930404224825, "grad_norm": 0.89453125, "learning_rate": 6.145350682323072e-06, "loss": 1.2234, "step": 11074 }, { "epoch": 3.479121267448024, "grad_norm": 0.890625, "learning_rate": 6.142811805775944e-06, "loss": 1.2797, "step": 11076 }, { "epoch": 3.479749494473565, "grad_norm": 1.03125, "learning_rate": 6.1402729292288165e-06, "loss": 1.1263, "step": 11078 }, { "epoch": 3.4803777214991065, "grad_norm": 0.87109375, "learning_rate": 6.137734052681689e-06, "loss": 1.0559, "step": 11080 }, { "epoch": 3.4810059485246483, "grad_norm": 0.94140625, "learning_rate": 6.135195176134561e-06, "loss": 1.1086, "step": 11082 }, { "epoch": 3.4816341755501896, "grad_norm": 0.9375, "learning_rate": 6.132656299587433e-06, "loss": 1.1401, "step": 11084 }, { "epoch": 3.482262402575731, "grad_norm": 0.94140625, "learning_rate": 6.1301174230403045e-06, "loss": 1.2093, "step": 11086 }, { "epoch": 3.4828906296012723, "grad_norm": 0.9375, "learning_rate": 6.127578546493177e-06, "loss": 1.0413, "step": 11088 }, { "epoch": 3.4835188566268136, "grad_norm": 1.0859375, "learning_rate": 6.125039669946049e-06, "loss": 1.1005, "step": 11090 }, { "epoch": 3.484147083652355, "grad_norm": 0.92578125, "learning_rate": 6.122500793398921e-06, "loss": 1.1829, "step": 11092 }, { "epoch": 3.4847753106778963, "grad_norm": 0.98046875, "learning_rate": 6.1199619168517935e-06, "loss": 1.2212, "step": 11094 }, { "epoch": 3.4854035377034376, "grad_norm": 0.9609375, "learning_rate": 6.117423040304666e-06, "loss": 1.1858, "step": 11096 }, { "epoch": 3.486031764728979, "grad_norm": 0.890625, "learning_rate": 6.114884163757537e-06, "loss": 1.2006, "step": 11098 }, { "epoch": 3.4866599917545202, "grad_norm": 0.97265625, "learning_rate": 6.112345287210409e-06, "loss": 1.1293, "step": 11100 }, { "epoch": 3.4872882187800616, "grad_norm": 0.93359375, "learning_rate": 6.1098064106632815e-06, "loss": 1.2502, "step": 11102 }, { "epoch": 3.487916445805603, "grad_norm": 0.9375, "learning_rate": 6.107267534116154e-06, "loss": 1.1942, "step": 11104 }, { "epoch": 3.4885446728311442, "grad_norm": 0.94140625, "learning_rate": 6.104728657569026e-06, "loss": 1.2327, "step": 11106 }, { "epoch": 3.4891728998566856, "grad_norm": 0.9453125, "learning_rate": 6.102189781021899e-06, "loss": 1.1035, "step": 11108 }, { "epoch": 3.489801126882227, "grad_norm": 0.9375, "learning_rate": 6.099650904474771e-06, "loss": 1.1476, "step": 11110 }, { "epoch": 3.4904293539077687, "grad_norm": 0.953125, "learning_rate": 6.097112027927643e-06, "loss": 1.0286, "step": 11112 }, { "epoch": 3.49105758093331, "grad_norm": 0.9921875, "learning_rate": 6.094573151380515e-06, "loss": 1.2274, "step": 11114 }, { "epoch": 3.4916858079588513, "grad_norm": 0.8984375, "learning_rate": 6.092034274833387e-06, "loss": 1.0385, "step": 11116 }, { "epoch": 3.4923140349843926, "grad_norm": 0.90234375, "learning_rate": 6.089495398286259e-06, "loss": 1.2014, "step": 11118 }, { "epoch": 3.492942262009934, "grad_norm": 1.0546875, "learning_rate": 6.086956521739132e-06, "loss": 1.0983, "step": 11120 }, { "epoch": 3.4935704890354753, "grad_norm": 0.98828125, "learning_rate": 6.084417645192003e-06, "loss": 1.1603, "step": 11122 }, { "epoch": 3.4941987160610166, "grad_norm": 0.8828125, "learning_rate": 6.081878768644875e-06, "loss": 1.2327, "step": 11124 }, { "epoch": 3.494826943086558, "grad_norm": 0.890625, "learning_rate": 6.0793398920977474e-06, "loss": 1.2577, "step": 11126 }, { "epoch": 3.4954551701120993, "grad_norm": 1.0078125, "learning_rate": 6.07680101555062e-06, "loss": 1.2451, "step": 11128 }, { "epoch": 3.4960833971376406, "grad_norm": 0.9765625, "learning_rate": 6.074262139003492e-06, "loss": 1.2095, "step": 11130 }, { "epoch": 3.496711624163182, "grad_norm": 0.93359375, "learning_rate": 6.071723262456364e-06, "loss": 1.0172, "step": 11132 }, { "epoch": 3.4973398511887233, "grad_norm": 0.84375, "learning_rate": 6.0691843859092355e-06, "loss": 1.1915, "step": 11134 }, { "epoch": 3.4979680782142646, "grad_norm": 0.91796875, "learning_rate": 6.066645509362108e-06, "loss": 1.0484, "step": 11136 }, { "epoch": 3.498596305239806, "grad_norm": 1.109375, "learning_rate": 6.06410663281498e-06, "loss": 1.2477, "step": 11138 }, { "epoch": 3.4992245322653472, "grad_norm": 0.9453125, "learning_rate": 6.061567756267852e-06, "loss": 1.2101, "step": 11140 }, { "epoch": 3.4998527592908886, "grad_norm": 0.8671875, "learning_rate": 6.059028879720724e-06, "loss": 1.1432, "step": 11142 }, { "epoch": 3.50048098631643, "grad_norm": 0.9609375, "learning_rate": 6.056490003173597e-06, "loss": 1.1703, "step": 11144 }, { "epoch": 3.5011092133419712, "grad_norm": 0.984375, "learning_rate": 6.053951126626468e-06, "loss": 1.1637, "step": 11146 }, { "epoch": 3.5017374403675126, "grad_norm": 0.94921875, "learning_rate": 6.05141225007934e-06, "loss": 1.0913, "step": 11148 }, { "epoch": 3.5023656673930543, "grad_norm": 0.98046875, "learning_rate": 6.0488733735322125e-06, "loss": 1.2434, "step": 11150 }, { "epoch": 3.5029938944185957, "grad_norm": 0.9453125, "learning_rate": 6.046334496985085e-06, "loss": 1.2095, "step": 11152 }, { "epoch": 3.503622121444137, "grad_norm": 0.90234375, "learning_rate": 6.043795620437957e-06, "loss": 1.2453, "step": 11154 }, { "epoch": 3.5042503484696783, "grad_norm": 1.0390625, "learning_rate": 6.041256743890828e-06, "loss": 1.1522, "step": 11156 }, { "epoch": 3.5048785754952196, "grad_norm": 0.91796875, "learning_rate": 6.0387178673437006e-06, "loss": 1.1903, "step": 11158 }, { "epoch": 3.505506802520761, "grad_norm": 0.9140625, "learning_rate": 6.036178990796573e-06, "loss": 1.2343, "step": 11160 }, { "epoch": 3.5061350295463023, "grad_norm": 0.9140625, "learning_rate": 6.033640114249445e-06, "loss": 1.1658, "step": 11162 }, { "epoch": 3.5067632565718436, "grad_norm": 0.9921875, "learning_rate": 6.031101237702317e-06, "loss": 0.9975, "step": 11164 }, { "epoch": 3.507391483597385, "grad_norm": 0.9296875, "learning_rate": 6.0285623611551895e-06, "loss": 1.3154, "step": 11166 }, { "epoch": 3.5080197106229263, "grad_norm": 0.9453125, "learning_rate": 6.026023484608061e-06, "loss": 1.2834, "step": 11168 }, { "epoch": 3.5086479376484676, "grad_norm": 0.97265625, "learning_rate": 6.023484608060933e-06, "loss": 1.1296, "step": 11170 }, { "epoch": 3.509276164674009, "grad_norm": 0.98046875, "learning_rate": 6.020945731513805e-06, "loss": 1.1008, "step": 11172 }, { "epoch": 3.5099043916995503, "grad_norm": 0.91796875, "learning_rate": 6.0184068549666775e-06, "loss": 1.2117, "step": 11174 }, { "epoch": 3.510532618725092, "grad_norm": 0.9453125, "learning_rate": 6.01586797841955e-06, "loss": 1.108, "step": 11176 }, { "epoch": 3.5111608457506334, "grad_norm": 1.0234375, "learning_rate": 6.013329101872421e-06, "loss": 1.1307, "step": 11178 }, { "epoch": 3.5117890727761747, "grad_norm": 0.953125, "learning_rate": 6.010790225325293e-06, "loss": 1.0639, "step": 11180 }, { "epoch": 3.512417299801716, "grad_norm": 0.88671875, "learning_rate": 6.008251348778166e-06, "loss": 1.2354, "step": 11182 }, { "epoch": 3.5130455268272573, "grad_norm": 0.94140625, "learning_rate": 6.005712472231038e-06, "loss": 1.1692, "step": 11184 }, { "epoch": 3.5136737538527987, "grad_norm": 0.9609375, "learning_rate": 6.00317359568391e-06, "loss": 1.133, "step": 11186 }, { "epoch": 3.51430198087834, "grad_norm": 0.90234375, "learning_rate": 6.000634719136782e-06, "loss": 1.254, "step": 11188 }, { "epoch": 3.5149302079038813, "grad_norm": 0.98828125, "learning_rate": 5.998095842589654e-06, "loss": 1.1775, "step": 11190 }, { "epoch": 3.5155584349294227, "grad_norm": 1.0234375, "learning_rate": 5.995556966042526e-06, "loss": 1.0842, "step": 11192 }, { "epoch": 3.516186661954964, "grad_norm": 0.88671875, "learning_rate": 5.993018089495399e-06, "loss": 1.0806, "step": 11194 }, { "epoch": 3.5168148889805053, "grad_norm": 0.9453125, "learning_rate": 5.990479212948271e-06, "loss": 1.003, "step": 11196 }, { "epoch": 3.5174431160060466, "grad_norm": 0.9765625, "learning_rate": 5.9879403364011435e-06, "loss": 1.1988, "step": 11198 }, { "epoch": 3.518071343031588, "grad_norm": 0.9140625, "learning_rate": 5.985401459854016e-06, "loss": 1.131, "step": 11200 }, { "epoch": 3.5186995700571293, "grad_norm": 0.95703125, "learning_rate": 5.982862583306888e-06, "loss": 1.0454, "step": 11202 }, { "epoch": 3.5193277970826706, "grad_norm": 0.92578125, "learning_rate": 5.980323706759759e-06, "loss": 1.1296, "step": 11204 }, { "epoch": 3.519956024108212, "grad_norm": 0.9296875, "learning_rate": 5.9777848302126315e-06, "loss": 1.097, "step": 11206 }, { "epoch": 3.5205842511337533, "grad_norm": 1.046875, "learning_rate": 5.975245953665504e-06, "loss": 1.1769, "step": 11208 }, { "epoch": 3.5212124781592946, "grad_norm": 0.9140625, "learning_rate": 5.972707077118376e-06, "loss": 1.1894, "step": 11210 }, { "epoch": 3.521840705184836, "grad_norm": 0.94140625, "learning_rate": 5.970168200571248e-06, "loss": 1.139, "step": 11212 }, { "epoch": 3.5224689322103773, "grad_norm": 1.0703125, "learning_rate": 5.9676293240241204e-06, "loss": 1.0904, "step": 11214 }, { "epoch": 3.523097159235919, "grad_norm": 0.97265625, "learning_rate": 5.965090447476992e-06, "loss": 1.1153, "step": 11216 }, { "epoch": 3.5237253862614604, "grad_norm": 0.97265625, "learning_rate": 5.962551570929864e-06, "loss": 1.0791, "step": 11218 }, { "epoch": 3.5243536132870017, "grad_norm": 0.9453125, "learning_rate": 5.960012694382736e-06, "loss": 1.1225, "step": 11220 }, { "epoch": 3.524981840312543, "grad_norm": 0.88671875, "learning_rate": 5.9574738178356085e-06, "loss": 1.2022, "step": 11222 }, { "epoch": 3.5256100673380844, "grad_norm": 1.046875, "learning_rate": 5.954934941288481e-06, "loss": 1.0843, "step": 11224 }, { "epoch": 3.5262382943636257, "grad_norm": 1.0546875, "learning_rate": 5.952396064741353e-06, "loss": 1.0399, "step": 11226 }, { "epoch": 3.526866521389167, "grad_norm": 0.9921875, "learning_rate": 5.949857188194224e-06, "loss": 1.1219, "step": 11228 }, { "epoch": 3.5274947484147083, "grad_norm": 0.96484375, "learning_rate": 5.947318311647097e-06, "loss": 1.1905, "step": 11230 }, { "epoch": 3.5281229754402497, "grad_norm": 0.96484375, "learning_rate": 5.944779435099969e-06, "loss": 1.2098, "step": 11232 }, { "epoch": 3.528751202465791, "grad_norm": 0.9375, "learning_rate": 5.942240558552841e-06, "loss": 1.0147, "step": 11234 }, { "epoch": 3.5293794294913323, "grad_norm": 0.91015625, "learning_rate": 5.939701682005713e-06, "loss": 1.1371, "step": 11236 }, { "epoch": 3.5300076565168736, "grad_norm": 1.0078125, "learning_rate": 5.937162805458585e-06, "loss": 1.1556, "step": 11238 }, { "epoch": 3.530635883542415, "grad_norm": 1.3359375, "learning_rate": 5.934623928911457e-06, "loss": 1.2628, "step": 11240 }, { "epoch": 3.5312641105679567, "grad_norm": 0.91015625, "learning_rate": 5.932085052364329e-06, "loss": 1.216, "step": 11242 }, { "epoch": 3.531892337593498, "grad_norm": 0.99609375, "learning_rate": 5.929546175817201e-06, "loss": 1.2328, "step": 11244 }, { "epoch": 3.5325205646190394, "grad_norm": 0.98046875, "learning_rate": 5.9270072992700736e-06, "loss": 0.9865, "step": 11246 }, { "epoch": 3.5331487916445807, "grad_norm": 0.95703125, "learning_rate": 5.924468422722946e-06, "loss": 1.2476, "step": 11248 }, { "epoch": 3.533777018670122, "grad_norm": 0.88671875, "learning_rate": 5.921929546175817e-06, "loss": 1.2275, "step": 11250 }, { "epoch": 3.5344052456956634, "grad_norm": 0.87109375, "learning_rate": 5.919390669628689e-06, "loss": 1.1545, "step": 11252 }, { "epoch": 3.5350334727212047, "grad_norm": 0.97265625, "learning_rate": 5.916851793081562e-06, "loss": 1.2343, "step": 11254 }, { "epoch": 3.535661699746746, "grad_norm": 1.015625, "learning_rate": 5.914312916534434e-06, "loss": 1.1394, "step": 11256 }, { "epoch": 3.5362899267722874, "grad_norm": 0.96875, "learning_rate": 5.911774039987306e-06, "loss": 1.2141, "step": 11258 }, { "epoch": 3.5369181537978287, "grad_norm": 0.91796875, "learning_rate": 5.9092351634401775e-06, "loss": 0.9556, "step": 11260 }, { "epoch": 3.53754638082337, "grad_norm": 0.99609375, "learning_rate": 5.90669628689305e-06, "loss": 1.1464, "step": 11262 }, { "epoch": 3.5381746078489114, "grad_norm": 0.921875, "learning_rate": 5.904157410345922e-06, "loss": 1.1737, "step": 11264 }, { "epoch": 3.5388028348744527, "grad_norm": 0.94921875, "learning_rate": 5.901618533798794e-06, "loss": 1.1306, "step": 11266 }, { "epoch": 3.539431061899994, "grad_norm": 0.94921875, "learning_rate": 5.899079657251666e-06, "loss": 1.055, "step": 11268 }, { "epoch": 3.5400592889255353, "grad_norm": 1.015625, "learning_rate": 5.896540780704539e-06, "loss": 1.1041, "step": 11270 }, { "epoch": 3.5406875159510767, "grad_norm": 0.9140625, "learning_rate": 5.89400190415741e-06, "loss": 1.2009, "step": 11272 }, { "epoch": 3.541315742976618, "grad_norm": 0.96875, "learning_rate": 5.891463027610282e-06, "loss": 1.1949, "step": 11274 }, { "epoch": 3.5419439700021593, "grad_norm": 1.0078125, "learning_rate": 5.8889241510631545e-06, "loss": 1.133, "step": 11276 }, { "epoch": 3.5425721970277007, "grad_norm": 0.921875, "learning_rate": 5.886385274516027e-06, "loss": 1.1228, "step": 11278 }, { "epoch": 3.543200424053242, "grad_norm": 0.85546875, "learning_rate": 5.8838463979689e-06, "loss": 1.0944, "step": 11280 }, { "epoch": 3.5438286510787838, "grad_norm": 0.92578125, "learning_rate": 5.881307521421772e-06, "loss": 1.3202, "step": 11282 }, { "epoch": 3.544456878104325, "grad_norm": 0.9765625, "learning_rate": 5.878768644874644e-06, "loss": 1.2548, "step": 11284 }, { "epoch": 3.5450851051298664, "grad_norm": 0.8984375, "learning_rate": 5.876229768327516e-06, "loss": 1.2594, "step": 11286 }, { "epoch": 3.5457133321554077, "grad_norm": 0.88671875, "learning_rate": 5.873690891780388e-06, "loss": 1.155, "step": 11288 }, { "epoch": 3.546341559180949, "grad_norm": 0.93359375, "learning_rate": 5.87115201523326e-06, "loss": 1.0473, "step": 11290 }, { "epoch": 3.5469697862064904, "grad_norm": 0.9375, "learning_rate": 5.868613138686132e-06, "loss": 1.172, "step": 11292 }, { "epoch": 3.5475980132320317, "grad_norm": 0.9140625, "learning_rate": 5.8660742621390045e-06, "loss": 1.1314, "step": 11294 }, { "epoch": 3.548226240257573, "grad_norm": 0.8515625, "learning_rate": 5.863535385591877e-06, "loss": 1.2274, "step": 11296 }, { "epoch": 3.5488544672831144, "grad_norm": 0.8828125, "learning_rate": 5.860996509044748e-06, "loss": 1.0827, "step": 11298 }, { "epoch": 3.5494826943086557, "grad_norm": 1.0234375, "learning_rate": 5.85845763249762e-06, "loss": 1.0691, "step": 11300 }, { "epoch": 3.550110921334197, "grad_norm": 0.97265625, "learning_rate": 5.855918755950493e-06, "loss": 0.939, "step": 11302 }, { "epoch": 3.5507391483597384, "grad_norm": 0.8828125, "learning_rate": 5.853379879403365e-06, "loss": 1.2289, "step": 11304 }, { "epoch": 3.5513673753852797, "grad_norm": 1.1015625, "learning_rate": 5.850841002856237e-06, "loss": 1.3369, "step": 11306 }, { "epoch": 3.5519956024108215, "grad_norm": 0.97265625, "learning_rate": 5.848302126309109e-06, "loss": 1.1913, "step": 11308 }, { "epoch": 3.552623829436363, "grad_norm": 0.95703125, "learning_rate": 5.845763249761981e-06, "loss": 1.2954, "step": 11310 }, { "epoch": 3.553252056461904, "grad_norm": 0.9296875, "learning_rate": 5.843224373214853e-06, "loss": 1.0153, "step": 11312 }, { "epoch": 3.5538802834874454, "grad_norm": 1.0078125, "learning_rate": 5.840685496667725e-06, "loss": 0.9941, "step": 11314 }, { "epoch": 3.5545085105129868, "grad_norm": 0.90625, "learning_rate": 5.838146620120597e-06, "loss": 1.1261, "step": 11316 }, { "epoch": 3.555136737538528, "grad_norm": 0.94140625, "learning_rate": 5.83560774357347e-06, "loss": 1.1552, "step": 11318 }, { "epoch": 3.5557649645640694, "grad_norm": 0.9296875, "learning_rate": 5.833068867026341e-06, "loss": 1.1953, "step": 11320 }, { "epoch": 3.5563931915896108, "grad_norm": 0.9296875, "learning_rate": 5.830529990479213e-06, "loss": 1.1332, "step": 11322 }, { "epoch": 3.557021418615152, "grad_norm": 0.96484375, "learning_rate": 5.827991113932085e-06, "loss": 1.1145, "step": 11324 }, { "epoch": 3.5576496456406934, "grad_norm": 0.93359375, "learning_rate": 5.825452237384958e-06, "loss": 1.3539, "step": 11326 }, { "epoch": 3.5582778726662347, "grad_norm": 0.90625, "learning_rate": 5.82291336083783e-06, "loss": 1.0381, "step": 11328 }, { "epoch": 3.558906099691776, "grad_norm": 0.94140625, "learning_rate": 5.820374484290702e-06, "loss": 1.0749, "step": 11330 }, { "epoch": 3.5595343267173174, "grad_norm": 1.03125, "learning_rate": 5.8178356077435735e-06, "loss": 1.2138, "step": 11332 }, { "epoch": 3.5601625537428587, "grad_norm": 1.078125, "learning_rate": 5.815296731196446e-06, "loss": 1.1305, "step": 11334 }, { "epoch": 3.5607907807684, "grad_norm": 1.078125, "learning_rate": 5.812757854649318e-06, "loss": 1.0445, "step": 11336 }, { "epoch": 3.5614190077939414, "grad_norm": 1.0078125, "learning_rate": 5.81021897810219e-06, "loss": 1.2928, "step": 11338 }, { "epoch": 3.5620472348194827, "grad_norm": 0.99609375, "learning_rate": 5.807680101555062e-06, "loss": 1.0647, "step": 11340 }, { "epoch": 3.562675461845024, "grad_norm": 1.0, "learning_rate": 5.805141225007934e-06, "loss": 1.1451, "step": 11342 }, { "epoch": 3.5633036888705654, "grad_norm": 0.98828125, "learning_rate": 5.802602348460806e-06, "loss": 1.2456, "step": 11344 }, { "epoch": 3.5639319158961067, "grad_norm": 0.98046875, "learning_rate": 5.800063471913678e-06, "loss": 0.9982, "step": 11346 }, { "epoch": 3.5645601429216485, "grad_norm": 1.0078125, "learning_rate": 5.7975245953665505e-06, "loss": 1.2202, "step": 11348 }, { "epoch": 3.56518836994719, "grad_norm": 0.8984375, "learning_rate": 5.794985718819423e-06, "loss": 1.2568, "step": 11350 }, { "epoch": 3.565816596972731, "grad_norm": 0.96484375, "learning_rate": 5.792446842272295e-06, "loss": 1.1494, "step": 11352 }, { "epoch": 3.5664448239982725, "grad_norm": 1.03125, "learning_rate": 5.789907965725166e-06, "loss": 1.1779, "step": 11354 }, { "epoch": 3.567073051023814, "grad_norm": 0.953125, "learning_rate": 5.7873690891780385e-06, "loss": 1.0687, "step": 11356 }, { "epoch": 3.567701278049355, "grad_norm": 0.95703125, "learning_rate": 5.784830212630911e-06, "loss": 1.1963, "step": 11358 }, { "epoch": 3.5683295050748964, "grad_norm": 0.91015625, "learning_rate": 5.782291336083783e-06, "loss": 1.1986, "step": 11360 }, { "epoch": 3.5689577321004378, "grad_norm": 0.90234375, "learning_rate": 5.779752459536655e-06, "loss": 1.1832, "step": 11362 }, { "epoch": 3.569585959125979, "grad_norm": 0.96875, "learning_rate": 5.7772135829895275e-06, "loss": 1.1861, "step": 11364 }, { "epoch": 3.5702141861515204, "grad_norm": 0.94140625, "learning_rate": 5.7746747064424005e-06, "loss": 1.1638, "step": 11366 }, { "epoch": 3.5708424131770617, "grad_norm": 0.953125, "learning_rate": 5.772135829895272e-06, "loss": 1.0138, "step": 11368 }, { "epoch": 3.571470640202603, "grad_norm": 1.0078125, "learning_rate": 5.769596953348144e-06, "loss": 1.2892, "step": 11370 }, { "epoch": 3.572098867228145, "grad_norm": 1.0078125, "learning_rate": 5.767058076801016e-06, "loss": 1.1486, "step": 11372 }, { "epoch": 3.572727094253686, "grad_norm": 0.91015625, "learning_rate": 5.764519200253889e-06, "loss": 1.1013, "step": 11374 }, { "epoch": 3.5733553212792275, "grad_norm": 0.91796875, "learning_rate": 5.761980323706761e-06, "loss": 1.2456, "step": 11376 }, { "epoch": 3.573983548304769, "grad_norm": 1.015625, "learning_rate": 5.759441447159633e-06, "loss": 1.1477, "step": 11378 }, { "epoch": 3.57461177533031, "grad_norm": 0.95703125, "learning_rate": 5.7569025706125045e-06, "loss": 1.1999, "step": 11380 }, { "epoch": 3.5752400023558515, "grad_norm": 0.953125, "learning_rate": 5.754363694065377e-06, "loss": 1.2306, "step": 11382 }, { "epoch": 3.575868229381393, "grad_norm": 0.875, "learning_rate": 5.751824817518249e-06, "loss": 1.2421, "step": 11384 }, { "epoch": 3.576496456406934, "grad_norm": 0.96484375, "learning_rate": 5.749285940971121e-06, "loss": 1.2756, "step": 11386 }, { "epoch": 3.5771246834324755, "grad_norm": 1.0234375, "learning_rate": 5.746747064423993e-06, "loss": 1.0853, "step": 11388 }, { "epoch": 3.577752910458017, "grad_norm": 0.984375, "learning_rate": 5.744208187876866e-06, "loss": 1.2225, "step": 11390 }, { "epoch": 3.578381137483558, "grad_norm": 0.98828125, "learning_rate": 5.741669311329737e-06, "loss": 1.0597, "step": 11392 }, { "epoch": 3.5790093645090995, "grad_norm": 1.046875, "learning_rate": 5.739130434782609e-06, "loss": 1.1556, "step": 11394 }, { "epoch": 3.579637591534641, "grad_norm": 0.9375, "learning_rate": 5.7365915582354814e-06, "loss": 1.1976, "step": 11396 }, { "epoch": 3.580265818560182, "grad_norm": 0.9453125, "learning_rate": 5.734052681688354e-06, "loss": 1.1446, "step": 11398 }, { "epoch": 3.5808940455857234, "grad_norm": 0.875, "learning_rate": 5.731513805141226e-06, "loss": 1.18, "step": 11400 }, { "epoch": 3.5815222726112648, "grad_norm": 0.98828125, "learning_rate": 5.728974928594097e-06, "loss": 1.1415, "step": 11402 }, { "epoch": 3.582150499636806, "grad_norm": 0.92578125, "learning_rate": 5.7264360520469695e-06, "loss": 1.1852, "step": 11404 }, { "epoch": 3.5827787266623474, "grad_norm": 0.9375, "learning_rate": 5.723897175499842e-06, "loss": 1.2331, "step": 11406 }, { "epoch": 3.5834069536878888, "grad_norm": 0.94140625, "learning_rate": 5.721358298952714e-06, "loss": 1.1502, "step": 11408 }, { "epoch": 3.58403518071343, "grad_norm": 0.94140625, "learning_rate": 5.718819422405586e-06, "loss": 1.1276, "step": 11410 }, { "epoch": 3.5846634077389714, "grad_norm": 1.015625, "learning_rate": 5.7162805458584584e-06, "loss": 1.1064, "step": 11412 }, { "epoch": 3.585291634764513, "grad_norm": 0.90625, "learning_rate": 5.71374166931133e-06, "loss": 1.1148, "step": 11414 }, { "epoch": 3.5859198617900545, "grad_norm": 0.95703125, "learning_rate": 5.711202792764202e-06, "loss": 1.1668, "step": 11416 }, { "epoch": 3.586548088815596, "grad_norm": 0.97265625, "learning_rate": 5.708663916217074e-06, "loss": 1.0383, "step": 11418 }, { "epoch": 3.587176315841137, "grad_norm": 1.0234375, "learning_rate": 5.7061250396699465e-06, "loss": 1.0694, "step": 11420 }, { "epoch": 3.5878045428666785, "grad_norm": 0.9375, "learning_rate": 5.703586163122819e-06, "loss": 1.1698, "step": 11422 }, { "epoch": 3.58843276989222, "grad_norm": 1.0, "learning_rate": 5.70104728657569e-06, "loss": 1.0761, "step": 11424 }, { "epoch": 3.589060996917761, "grad_norm": 0.94140625, "learning_rate": 5.698508410028562e-06, "loss": 1.2412, "step": 11426 }, { "epoch": 3.5896892239433025, "grad_norm": 0.90625, "learning_rate": 5.6959695334814346e-06, "loss": 1.2238, "step": 11428 }, { "epoch": 3.590317450968844, "grad_norm": 0.984375, "learning_rate": 5.693430656934307e-06, "loss": 1.1599, "step": 11430 }, { "epoch": 3.590945677994385, "grad_norm": 1.0, "learning_rate": 5.690891780387179e-06, "loss": 1.1274, "step": 11432 }, { "epoch": 3.5915739050199265, "grad_norm": 0.921875, "learning_rate": 5.688352903840051e-06, "loss": 1.1337, "step": 11434 }, { "epoch": 3.592202132045468, "grad_norm": 0.96875, "learning_rate": 5.685814027292923e-06, "loss": 1.0997, "step": 11436 }, { "epoch": 3.5928303590710096, "grad_norm": 0.875, "learning_rate": 5.683275150745795e-06, "loss": 1.1566, "step": 11438 }, { "epoch": 3.593458586096551, "grad_norm": 0.9140625, "learning_rate": 5.680736274198667e-06, "loss": 0.9862, "step": 11440 }, { "epoch": 3.594086813122092, "grad_norm": 0.953125, "learning_rate": 5.678197397651539e-06, "loss": 1.1242, "step": 11442 }, { "epoch": 3.5947150401476335, "grad_norm": 0.9296875, "learning_rate": 5.6756585211044116e-06, "loss": 1.1372, "step": 11444 }, { "epoch": 3.595343267173175, "grad_norm": 0.890625, "learning_rate": 5.673119644557284e-06, "loss": 1.3612, "step": 11446 }, { "epoch": 3.595971494198716, "grad_norm": 0.95703125, "learning_rate": 5.670580768010155e-06, "loss": 1.0862, "step": 11448 }, { "epoch": 3.5965997212242575, "grad_norm": 1.0390625, "learning_rate": 5.668041891463027e-06, "loss": 1.0769, "step": 11450 }, { "epoch": 3.597227948249799, "grad_norm": 0.99609375, "learning_rate": 5.6655030149159005e-06, "loss": 1.1605, "step": 11452 }, { "epoch": 3.59785617527534, "grad_norm": 0.93359375, "learning_rate": 5.662964138368773e-06, "loss": 1.2166, "step": 11454 }, { "epoch": 3.5984844023008815, "grad_norm": 0.90234375, "learning_rate": 5.660425261821645e-06, "loss": 1.1015, "step": 11456 }, { "epoch": 3.599112629326423, "grad_norm": 0.9609375, "learning_rate": 5.657886385274517e-06, "loss": 1.0794, "step": 11458 }, { "epoch": 3.599740856351964, "grad_norm": 0.90234375, "learning_rate": 5.655347508727389e-06, "loss": 0.8758, "step": 11460 }, { "epoch": 3.6003690833775055, "grad_norm": 0.99609375, "learning_rate": 5.652808632180261e-06, "loss": 1.144, "step": 11462 }, { "epoch": 3.600997310403047, "grad_norm": 0.9375, "learning_rate": 5.650269755633133e-06, "loss": 1.0838, "step": 11464 }, { "epoch": 3.601625537428588, "grad_norm": 0.91015625, "learning_rate": 5.647730879086005e-06, "loss": 1.0612, "step": 11466 }, { "epoch": 3.6022537644541295, "grad_norm": 0.9140625, "learning_rate": 5.6451920025388775e-06, "loss": 1.0767, "step": 11468 }, { "epoch": 3.602881991479671, "grad_norm": 0.9453125, "learning_rate": 5.64265312599175e-06, "loss": 1.1736, "step": 11470 }, { "epoch": 3.603510218505212, "grad_norm": 0.9296875, "learning_rate": 5.640114249444622e-06, "loss": 1.3238, "step": 11472 }, { "epoch": 3.6041384455307535, "grad_norm": 0.94140625, "learning_rate": 5.637575372897493e-06, "loss": 1.126, "step": 11474 }, { "epoch": 3.604766672556295, "grad_norm": 0.90625, "learning_rate": 5.6350364963503655e-06, "loss": 1.2382, "step": 11476 }, { "epoch": 3.605394899581836, "grad_norm": 0.921875, "learning_rate": 5.632497619803238e-06, "loss": 1.1958, "step": 11478 }, { "epoch": 3.606023126607378, "grad_norm": 1.0078125, "learning_rate": 5.62995874325611e-06, "loss": 1.3461, "step": 11480 }, { "epoch": 3.606651353632919, "grad_norm": 1.0859375, "learning_rate": 5.627419866708982e-06, "loss": 1.0715, "step": 11482 }, { "epoch": 3.6072795806584605, "grad_norm": 0.9140625, "learning_rate": 5.624880990161854e-06, "loss": 1.2413, "step": 11484 }, { "epoch": 3.607907807684002, "grad_norm": 1.015625, "learning_rate": 5.622342113614726e-06, "loss": 1.067, "step": 11486 }, { "epoch": 3.608536034709543, "grad_norm": 0.90234375, "learning_rate": 5.619803237067598e-06, "loss": 1.1361, "step": 11488 }, { "epoch": 3.6091642617350845, "grad_norm": 0.89453125, "learning_rate": 5.61726436052047e-06, "loss": 1.2633, "step": 11490 }, { "epoch": 3.609792488760626, "grad_norm": 0.99609375, "learning_rate": 5.6147254839733425e-06, "loss": 1.1846, "step": 11492 }, { "epoch": 3.610420715786167, "grad_norm": 1.015625, "learning_rate": 5.612186607426215e-06, "loss": 0.9834, "step": 11494 }, { "epoch": 3.6110489428117085, "grad_norm": 1.015625, "learning_rate": 5.609647730879086e-06, "loss": 1.114, "step": 11496 }, { "epoch": 3.61167716983725, "grad_norm": 0.9140625, "learning_rate": 5.607108854331958e-06, "loss": 1.2863, "step": 11498 }, { "epoch": 3.612305396862791, "grad_norm": 0.93359375, "learning_rate": 5.604569977784831e-06, "loss": 1.1199, "step": 11500 }, { "epoch": 3.6129336238883325, "grad_norm": 1.0, "learning_rate": 5.602031101237703e-06, "loss": 1.1379, "step": 11502 }, { "epoch": 3.6135618509138743, "grad_norm": 1.0234375, "learning_rate": 5.599492224690575e-06, "loss": 1.207, "step": 11504 }, { "epoch": 3.6141900779394156, "grad_norm": 1.0078125, "learning_rate": 5.596953348143446e-06, "loss": 1.1472, "step": 11506 }, { "epoch": 3.614818304964957, "grad_norm": 0.94921875, "learning_rate": 5.594414471596319e-06, "loss": 1.1576, "step": 11508 }, { "epoch": 3.6154465319904983, "grad_norm": 0.9375, "learning_rate": 5.591875595049191e-06, "loss": 1.2488, "step": 11510 }, { "epoch": 3.6160747590160396, "grad_norm": 0.98046875, "learning_rate": 5.589336718502063e-06, "loss": 1.2093, "step": 11512 }, { "epoch": 3.616702986041581, "grad_norm": 0.93359375, "learning_rate": 5.586797841954935e-06, "loss": 1.1613, "step": 11514 }, { "epoch": 3.6173312130671222, "grad_norm": 0.984375, "learning_rate": 5.5842589654078076e-06, "loss": 1.1537, "step": 11516 }, { "epoch": 3.6179594400926636, "grad_norm": 0.9140625, "learning_rate": 5.581720088860679e-06, "loss": 1.2531, "step": 11518 }, { "epoch": 3.618587667118205, "grad_norm": 0.90625, "learning_rate": 5.579181212313551e-06, "loss": 1.101, "step": 11520 }, { "epoch": 3.6192158941437462, "grad_norm": 1.0, "learning_rate": 5.576642335766423e-06, "loss": 1.3074, "step": 11522 }, { "epoch": 3.6198441211692876, "grad_norm": 0.90625, "learning_rate": 5.574103459219296e-06, "loss": 1.1264, "step": 11524 }, { "epoch": 3.620472348194829, "grad_norm": 1.078125, "learning_rate": 5.571564582672168e-06, "loss": 1.0794, "step": 11526 }, { "epoch": 3.62110057522037, "grad_norm": 1.1953125, "learning_rate": 5.56902570612504e-06, "loss": 1.0671, "step": 11528 }, { "epoch": 3.6217288022459115, "grad_norm": 0.9296875, "learning_rate": 5.5664868295779115e-06, "loss": 1.0818, "step": 11530 }, { "epoch": 3.622357029271453, "grad_norm": 1.0078125, "learning_rate": 5.563947953030784e-06, "loss": 1.099, "step": 11532 }, { "epoch": 3.622985256296994, "grad_norm": 0.9453125, "learning_rate": 5.561409076483656e-06, "loss": 1.0239, "step": 11534 }, { "epoch": 3.6236134833225355, "grad_norm": 0.9609375, "learning_rate": 5.558870199936528e-06, "loss": 1.3068, "step": 11536 }, { "epoch": 3.624241710348077, "grad_norm": 0.99609375, "learning_rate": 5.556331323389401e-06, "loss": 1.1156, "step": 11538 }, { "epoch": 3.624869937373618, "grad_norm": 0.8828125, "learning_rate": 5.5537924468422735e-06, "loss": 1.1805, "step": 11540 }, { "epoch": 3.6254981643991595, "grad_norm": 0.87890625, "learning_rate": 5.551253570295146e-06, "loss": 1.0772, "step": 11542 }, { "epoch": 3.626126391424701, "grad_norm": 1.078125, "learning_rate": 5.548714693748017e-06, "loss": 1.0592, "step": 11544 }, { "epoch": 3.6267546184502426, "grad_norm": 0.90625, "learning_rate": 5.546175817200889e-06, "loss": 1.065, "step": 11546 }, { "epoch": 3.627382845475784, "grad_norm": 1.0390625, "learning_rate": 5.5436369406537615e-06, "loss": 1.3523, "step": 11548 }, { "epoch": 3.6280110725013253, "grad_norm": 0.92578125, "learning_rate": 5.541098064106634e-06, "loss": 1.1195, "step": 11550 }, { "epoch": 3.6286392995268666, "grad_norm": 0.953125, "learning_rate": 5.538559187559506e-06, "loss": 1.0374, "step": 11552 }, { "epoch": 3.629267526552408, "grad_norm": 0.94140625, "learning_rate": 5.536020311012377e-06, "loss": 1.1696, "step": 11554 }, { "epoch": 3.6298957535779492, "grad_norm": 0.9765625, "learning_rate": 5.53348143446525e-06, "loss": 0.9872, "step": 11556 }, { "epoch": 3.6305239806034906, "grad_norm": 0.9375, "learning_rate": 5.530942557918122e-06, "loss": 1.0936, "step": 11558 }, { "epoch": 3.631152207629032, "grad_norm": 0.96484375, "learning_rate": 5.528403681370994e-06, "loss": 1.1922, "step": 11560 }, { "epoch": 3.6317804346545732, "grad_norm": 0.921875, "learning_rate": 5.525864804823866e-06, "loss": 1.2428, "step": 11562 }, { "epoch": 3.6324086616801146, "grad_norm": 1.03125, "learning_rate": 5.5233259282767385e-06, "loss": 1.0423, "step": 11564 }, { "epoch": 3.633036888705656, "grad_norm": 0.94921875, "learning_rate": 5.52078705172961e-06, "loss": 1.1871, "step": 11566 }, { "epoch": 3.633665115731197, "grad_norm": 0.9453125, "learning_rate": 5.518248175182482e-06, "loss": 1.1536, "step": 11568 }, { "epoch": 3.634293342756739, "grad_norm": 0.9765625, "learning_rate": 5.515709298635354e-06, "loss": 1.1891, "step": 11570 }, { "epoch": 3.6349215697822803, "grad_norm": 0.9765625, "learning_rate": 5.513170422088227e-06, "loss": 1.059, "step": 11572 }, { "epoch": 3.6355497968078216, "grad_norm": 0.94140625, "learning_rate": 5.510631545541099e-06, "loss": 1.0851, "step": 11574 }, { "epoch": 3.636178023833363, "grad_norm": 0.953125, "learning_rate": 5.508092668993971e-06, "loss": 1.1382, "step": 11576 }, { "epoch": 3.6368062508589043, "grad_norm": 1.0078125, "learning_rate": 5.5055537924468424e-06, "loss": 1.2336, "step": 11578 }, { "epoch": 3.6374344778844456, "grad_norm": 0.94140625, "learning_rate": 5.503014915899715e-06, "loss": 1.1593, "step": 11580 }, { "epoch": 3.638062704909987, "grad_norm": 1.015625, "learning_rate": 5.500476039352587e-06, "loss": 1.1513, "step": 11582 }, { "epoch": 3.6386909319355283, "grad_norm": 0.94140625, "learning_rate": 5.497937162805459e-06, "loss": 0.9616, "step": 11584 }, { "epoch": 3.6393191589610696, "grad_norm": 0.9375, "learning_rate": 5.495398286258331e-06, "loss": 1.1793, "step": 11586 }, { "epoch": 3.639947385986611, "grad_norm": 0.98046875, "learning_rate": 5.492859409711203e-06, "loss": 1.1944, "step": 11588 }, { "epoch": 3.6405756130121523, "grad_norm": 0.9765625, "learning_rate": 5.490320533164075e-06, "loss": 1.0856, "step": 11590 }, { "epoch": 3.6412038400376936, "grad_norm": 0.99609375, "learning_rate": 5.487781656616947e-06, "loss": 1.1474, "step": 11592 }, { "epoch": 3.641832067063235, "grad_norm": 1.0078125, "learning_rate": 5.4852427800698194e-06, "loss": 1.3082, "step": 11594 }, { "epoch": 3.6424602940887763, "grad_norm": 0.91015625, "learning_rate": 5.482703903522692e-06, "loss": 1.2301, "step": 11596 }, { "epoch": 3.6430885211143176, "grad_norm": 1.03125, "learning_rate": 5.480165026975564e-06, "loss": 1.1918, "step": 11598 }, { "epoch": 3.643716748139859, "grad_norm": 0.8984375, "learning_rate": 5.477626150428435e-06, "loss": 1.1904, "step": 11600 }, { "epoch": 3.6443449751654002, "grad_norm": 1.0078125, "learning_rate": 5.4750872738813075e-06, "loss": 1.1756, "step": 11602 }, { "epoch": 3.6449732021909416, "grad_norm": 0.890625, "learning_rate": 5.47254839733418e-06, "loss": 1.3296, "step": 11604 }, { "epoch": 3.645601429216483, "grad_norm": 0.95703125, "learning_rate": 5.470009520787052e-06, "loss": 1.1642, "step": 11606 }, { "epoch": 3.646229656242024, "grad_norm": 0.85546875, "learning_rate": 5.467470644239924e-06, "loss": 1.1055, "step": 11608 }, { "epoch": 3.6468578832675655, "grad_norm": 0.92578125, "learning_rate": 5.4649317676927956e-06, "loss": 1.1195, "step": 11610 }, { "epoch": 3.6474861102931073, "grad_norm": 0.875, "learning_rate": 5.462392891145668e-06, "loss": 1.2059, "step": 11612 }, { "epoch": 3.6481143373186486, "grad_norm": 0.93359375, "learning_rate": 5.45985401459854e-06, "loss": 1.2452, "step": 11614 }, { "epoch": 3.64874256434419, "grad_norm": 0.9453125, "learning_rate": 5.457315138051412e-06, "loss": 1.0831, "step": 11616 }, { "epoch": 3.6493707913697313, "grad_norm": 1.015625, "learning_rate": 5.4547762615042845e-06, "loss": 1.1822, "step": 11618 }, { "epoch": 3.6499990183952726, "grad_norm": 0.96875, "learning_rate": 5.452237384957157e-06, "loss": 1.1627, "step": 11620 }, { "epoch": 3.650627245420814, "grad_norm": 0.953125, "learning_rate": 5.449698508410028e-06, "loss": 1.1668, "step": 11622 }, { "epoch": 3.6512554724463553, "grad_norm": 0.94921875, "learning_rate": 5.447159631862902e-06, "loss": 1.1588, "step": 11624 }, { "epoch": 3.6518836994718966, "grad_norm": 0.87890625, "learning_rate": 5.444620755315773e-06, "loss": 1.2683, "step": 11626 }, { "epoch": 3.652511926497438, "grad_norm": 1.0, "learning_rate": 5.442081878768646e-06, "loss": 0.9844, "step": 11628 }, { "epoch": 3.6531401535229793, "grad_norm": 0.88671875, "learning_rate": 5.439543002221518e-06, "loss": 1.2026, "step": 11630 }, { "epoch": 3.6537683805485206, "grad_norm": 0.96875, "learning_rate": 5.43700412567439e-06, "loss": 1.0655, "step": 11632 }, { "epoch": 3.654396607574062, "grad_norm": 0.98828125, "learning_rate": 5.434465249127262e-06, "loss": 1.1819, "step": 11634 }, { "epoch": 3.6550248345996037, "grad_norm": 0.921875, "learning_rate": 5.431926372580134e-06, "loss": 1.0078, "step": 11636 }, { "epoch": 3.655653061625145, "grad_norm": 0.8984375, "learning_rate": 5.429387496033006e-06, "loss": 1.2018, "step": 11638 }, { "epoch": 3.6562812886506864, "grad_norm": 0.953125, "learning_rate": 5.426848619485878e-06, "loss": 1.035, "step": 11640 }, { "epoch": 3.6569095156762277, "grad_norm": 0.92578125, "learning_rate": 5.42430974293875e-06, "loss": 1.2414, "step": 11642 }, { "epoch": 3.657537742701769, "grad_norm": 0.9375, "learning_rate": 5.421770866391623e-06, "loss": 1.238, "step": 11644 }, { "epoch": 3.6581659697273103, "grad_norm": 0.953125, "learning_rate": 5.419231989844495e-06, "loss": 1.0806, "step": 11646 }, { "epoch": 3.6587941967528517, "grad_norm": 0.984375, "learning_rate": 5.416693113297366e-06, "loss": 1.2051, "step": 11648 }, { "epoch": 3.659422423778393, "grad_norm": 0.94921875, "learning_rate": 5.4141542367502385e-06, "loss": 1.0907, "step": 11650 }, { "epoch": 3.6600506508039343, "grad_norm": 0.9765625, "learning_rate": 5.411615360203111e-06, "loss": 1.1259, "step": 11652 }, { "epoch": 3.6606788778294757, "grad_norm": 0.9921875, "learning_rate": 5.409076483655983e-06, "loss": 0.9945, "step": 11654 }, { "epoch": 3.661307104855017, "grad_norm": 0.92578125, "learning_rate": 5.406537607108855e-06, "loss": 1.1954, "step": 11656 }, { "epoch": 3.6619353318805583, "grad_norm": 0.890625, "learning_rate": 5.403998730561727e-06, "loss": 1.3123, "step": 11658 }, { "epoch": 3.6625635589060996, "grad_norm": 0.97265625, "learning_rate": 5.401459854014599e-06, "loss": 1.1593, "step": 11660 }, { "epoch": 3.663191785931641, "grad_norm": 0.9765625, "learning_rate": 5.398920977467471e-06, "loss": 1.1683, "step": 11662 }, { "epoch": 3.6638200129571823, "grad_norm": 0.94140625, "learning_rate": 5.396382100920343e-06, "loss": 1.0364, "step": 11664 }, { "epoch": 3.6644482399827236, "grad_norm": 0.921875, "learning_rate": 5.3938432243732154e-06, "loss": 1.1765, "step": 11666 }, { "epoch": 3.665076467008265, "grad_norm": 1.0078125, "learning_rate": 5.391304347826088e-06, "loss": 1.1056, "step": 11668 }, { "epoch": 3.6657046940338063, "grad_norm": 1.0234375, "learning_rate": 5.388765471278959e-06, "loss": 1.1184, "step": 11670 }, { "epoch": 3.6663329210593476, "grad_norm": 0.96875, "learning_rate": 5.386226594731831e-06, "loss": 1.27, "step": 11672 }, { "epoch": 3.666961148084889, "grad_norm": 0.98046875, "learning_rate": 5.3836877181847035e-06, "loss": 1.0209, "step": 11674 }, { "epoch": 3.6675893751104303, "grad_norm": 0.921875, "learning_rate": 5.381148841637576e-06, "loss": 1.3329, "step": 11676 }, { "epoch": 3.668217602135972, "grad_norm": 1.0078125, "learning_rate": 5.378609965090448e-06, "loss": 1.1848, "step": 11678 }, { "epoch": 3.6688458291615134, "grad_norm": 0.9453125, "learning_rate": 5.37607108854332e-06, "loss": 1.3145, "step": 11680 }, { "epoch": 3.6694740561870547, "grad_norm": 0.9609375, "learning_rate": 5.373532211996192e-06, "loss": 1.0256, "step": 11682 }, { "epoch": 3.670102283212596, "grad_norm": 0.94921875, "learning_rate": 5.370993335449064e-06, "loss": 1.2281, "step": 11684 }, { "epoch": 3.6707305102381373, "grad_norm": 0.9765625, "learning_rate": 5.368454458901936e-06, "loss": 1.226, "step": 11686 }, { "epoch": 3.6713587372636787, "grad_norm": 0.87890625, "learning_rate": 5.365915582354808e-06, "loss": 1.2453, "step": 11688 }, { "epoch": 3.67198696428922, "grad_norm": 0.91796875, "learning_rate": 5.3633767058076805e-06, "loss": 1.2313, "step": 11690 }, { "epoch": 3.6726151913147613, "grad_norm": 0.90625, "learning_rate": 5.360837829260552e-06, "loss": 1.2045, "step": 11692 }, { "epoch": 3.6732434183403027, "grad_norm": 0.890625, "learning_rate": 5.358298952713424e-06, "loss": 1.2634, "step": 11694 }, { "epoch": 3.673871645365844, "grad_norm": 0.94921875, "learning_rate": 5.355760076166296e-06, "loss": 1.0967, "step": 11696 }, { "epoch": 3.6744998723913853, "grad_norm": 1.0859375, "learning_rate": 5.3532211996191686e-06, "loss": 1.0834, "step": 11698 }, { "epoch": 3.6751280994169266, "grad_norm": 0.95703125, "learning_rate": 5.350682323072041e-06, "loss": 1.0638, "step": 11700 }, { "epoch": 3.6757563264424684, "grad_norm": 0.9609375, "learning_rate": 5.348143446524913e-06, "loss": 1.1455, "step": 11702 }, { "epoch": 3.6763845534680097, "grad_norm": 0.90625, "learning_rate": 5.345604569977784e-06, "loss": 1.1827, "step": 11704 }, { "epoch": 3.677012780493551, "grad_norm": 0.89453125, "learning_rate": 5.343065693430657e-06, "loss": 1.2791, "step": 11706 }, { "epoch": 3.6776410075190924, "grad_norm": 0.9453125, "learning_rate": 5.34052681688353e-06, "loss": 1.1829, "step": 11708 }, { "epoch": 3.6782692345446337, "grad_norm": 0.91796875, "learning_rate": 5.337987940336402e-06, "loss": 1.188, "step": 11710 }, { "epoch": 3.678897461570175, "grad_norm": 0.890625, "learning_rate": 5.335449063789274e-06, "loss": 1.2239, "step": 11712 }, { "epoch": 3.6795256885957164, "grad_norm": 0.90625, "learning_rate": 5.332910187242146e-06, "loss": 1.2597, "step": 11714 }, { "epoch": 3.6801539156212577, "grad_norm": 0.9765625, "learning_rate": 5.330371310695019e-06, "loss": 1.2832, "step": 11716 }, { "epoch": 3.680782142646799, "grad_norm": 1.0859375, "learning_rate": 5.32783243414789e-06, "loss": 1.032, "step": 11718 }, { "epoch": 3.6814103696723404, "grad_norm": 0.875, "learning_rate": 5.325293557600762e-06, "loss": 1.3036, "step": 11720 }, { "epoch": 3.6820385966978817, "grad_norm": 0.953125, "learning_rate": 5.3227546810536345e-06, "loss": 1.2033, "step": 11722 }, { "epoch": 3.682666823723423, "grad_norm": 0.90234375, "learning_rate": 5.320215804506507e-06, "loss": 1.2431, "step": 11724 }, { "epoch": 3.6832950507489643, "grad_norm": 0.9140625, "learning_rate": 5.317676927959379e-06, "loss": 1.2312, "step": 11726 }, { "epoch": 3.6839232777745057, "grad_norm": 0.97265625, "learning_rate": 5.315138051412251e-06, "loss": 1.1801, "step": 11728 }, { "epoch": 3.684551504800047, "grad_norm": 0.95703125, "learning_rate": 5.3125991748651225e-06, "loss": 1.1082, "step": 11730 }, { "epoch": 3.6851797318255883, "grad_norm": 0.99609375, "learning_rate": 5.310060298317995e-06, "loss": 1.2264, "step": 11732 }, { "epoch": 3.6858079588511297, "grad_norm": 0.98828125, "learning_rate": 5.307521421770867e-06, "loss": 1.1202, "step": 11734 }, { "epoch": 3.686436185876671, "grad_norm": 0.90234375, "learning_rate": 5.304982545223739e-06, "loss": 1.2169, "step": 11736 }, { "epoch": 3.6870644129022123, "grad_norm": 0.94921875, "learning_rate": 5.3024436686766115e-06, "loss": 1.1606, "step": 11738 }, { "epoch": 3.6876926399277536, "grad_norm": 1.015625, "learning_rate": 5.299904792129484e-06, "loss": 1.0209, "step": 11740 }, { "epoch": 3.688320866953295, "grad_norm": 0.9453125, "learning_rate": 5.297365915582355e-06, "loss": 1.406, "step": 11742 }, { "epoch": 3.6889490939788367, "grad_norm": 0.953125, "learning_rate": 5.294827039035227e-06, "loss": 1.2754, "step": 11744 }, { "epoch": 3.689577321004378, "grad_norm": 0.9609375, "learning_rate": 5.2922881624880995e-06, "loss": 1.2359, "step": 11746 }, { "epoch": 3.6902055480299194, "grad_norm": 1.0234375, "learning_rate": 5.289749285940972e-06, "loss": 1.1256, "step": 11748 }, { "epoch": 3.6908337750554607, "grad_norm": 0.95703125, "learning_rate": 5.287210409393844e-06, "loss": 1.1917, "step": 11750 }, { "epoch": 3.691462002081002, "grad_norm": 1.0234375, "learning_rate": 5.284671532846715e-06, "loss": 1.1793, "step": 11752 }, { "epoch": 3.6920902291065434, "grad_norm": 0.96875, "learning_rate": 5.282132656299588e-06, "loss": 1.0178, "step": 11754 }, { "epoch": 3.6927184561320847, "grad_norm": 1.0390625, "learning_rate": 5.27959377975246e-06, "loss": 1.0539, "step": 11756 }, { "epoch": 3.693346683157626, "grad_norm": 0.95703125, "learning_rate": 5.277054903205332e-06, "loss": 1.2223, "step": 11758 }, { "epoch": 3.6939749101831674, "grad_norm": 0.921875, "learning_rate": 5.274516026658204e-06, "loss": 1.237, "step": 11760 }, { "epoch": 3.6946031372087087, "grad_norm": 0.90625, "learning_rate": 5.2719771501110765e-06, "loss": 1.0302, "step": 11762 }, { "epoch": 3.69523136423425, "grad_norm": 1.0, "learning_rate": 5.269438273563948e-06, "loss": 1.2322, "step": 11764 }, { "epoch": 3.6958595912597914, "grad_norm": 0.98828125, "learning_rate": 5.26689939701682e-06, "loss": 1.1811, "step": 11766 }, { "epoch": 3.696487818285333, "grad_norm": 0.90625, "learning_rate": 5.264360520469692e-06, "loss": 1.0611, "step": 11768 }, { "epoch": 3.6971160453108745, "grad_norm": 0.921875, "learning_rate": 5.261821643922565e-06, "loss": 1.075, "step": 11770 }, { "epoch": 3.697744272336416, "grad_norm": 0.9765625, "learning_rate": 5.259282767375437e-06, "loss": 1.1552, "step": 11772 }, { "epoch": 3.698372499361957, "grad_norm": 1.0234375, "learning_rate": 5.256743890828308e-06, "loss": 1.0725, "step": 11774 }, { "epoch": 3.6990007263874984, "grad_norm": 0.9453125, "learning_rate": 5.2542050142811804e-06, "loss": 1.1973, "step": 11776 }, { "epoch": 3.6996289534130398, "grad_norm": 0.9296875, "learning_rate": 5.251666137734053e-06, "loss": 1.0253, "step": 11778 }, { "epoch": 3.700257180438581, "grad_norm": 0.98046875, "learning_rate": 5.249127261186925e-06, "loss": 1.1157, "step": 11780 }, { "epoch": 3.7008854074641224, "grad_norm": 0.85546875, "learning_rate": 5.246588384639797e-06, "loss": 1.1586, "step": 11782 }, { "epoch": 3.7015136344896638, "grad_norm": 1.0859375, "learning_rate": 5.244049508092669e-06, "loss": 1.15, "step": 11784 }, { "epoch": 3.702141861515205, "grad_norm": 0.953125, "learning_rate": 5.241510631545541e-06, "loss": 1.1173, "step": 11786 }, { "epoch": 3.7027700885407464, "grad_norm": 1.0390625, "learning_rate": 5.238971754998413e-06, "loss": 1.2061, "step": 11788 }, { "epoch": 3.7033983155662877, "grad_norm": 0.9921875, "learning_rate": 5.236432878451285e-06, "loss": 0.9934, "step": 11790 }, { "epoch": 3.704026542591829, "grad_norm": 0.9296875, "learning_rate": 5.233894001904157e-06, "loss": 1.0623, "step": 11792 }, { "epoch": 3.7046547696173704, "grad_norm": 0.89453125, "learning_rate": 5.2313551253570305e-06, "loss": 1.2121, "step": 11794 }, { "epoch": 3.7052829966429117, "grad_norm": 0.90625, "learning_rate": 5.228816248809903e-06, "loss": 1.1443, "step": 11796 }, { "epoch": 3.705911223668453, "grad_norm": 0.93359375, "learning_rate": 5.226277372262775e-06, "loss": 1.2093, "step": 11798 }, { "epoch": 3.7065394506939944, "grad_norm": 1.0546875, "learning_rate": 5.223738495715646e-06, "loss": 1.0593, "step": 11800 }, { "epoch": 3.7071676777195357, "grad_norm": 1.046875, "learning_rate": 5.2211996191685186e-06, "loss": 1.1497, "step": 11802 }, { "epoch": 3.707795904745077, "grad_norm": 0.97265625, "learning_rate": 5.218660742621391e-06, "loss": 1.2319, "step": 11804 }, { "epoch": 3.7084241317706184, "grad_norm": 0.921875, "learning_rate": 5.216121866074263e-06, "loss": 1.1222, "step": 11806 }, { "epoch": 3.7090523587961597, "grad_norm": 0.98046875, "learning_rate": 5.213582989527135e-06, "loss": 1.2875, "step": 11808 }, { "epoch": 3.7096805858217015, "grad_norm": 0.96875, "learning_rate": 5.2110441129800075e-06, "loss": 1.2012, "step": 11810 }, { "epoch": 3.710308812847243, "grad_norm": 0.93359375, "learning_rate": 5.208505236432879e-06, "loss": 1.1301, "step": 11812 }, { "epoch": 3.710937039872784, "grad_norm": 0.875, "learning_rate": 5.205966359885751e-06, "loss": 1.1852, "step": 11814 }, { "epoch": 3.7115652668983254, "grad_norm": 0.88671875, "learning_rate": 5.203427483338623e-06, "loss": 1.2328, "step": 11816 }, { "epoch": 3.7121934939238668, "grad_norm": 0.95703125, "learning_rate": 5.2008886067914955e-06, "loss": 1.2106, "step": 11818 }, { "epoch": 3.712821720949408, "grad_norm": 0.8984375, "learning_rate": 5.198349730244368e-06, "loss": 1.0475, "step": 11820 }, { "epoch": 3.7134499479749494, "grad_norm": 0.98046875, "learning_rate": 5.19581085369724e-06, "loss": 1.1586, "step": 11822 }, { "epoch": 3.7140781750004908, "grad_norm": 1.0234375, "learning_rate": 5.193271977150111e-06, "loss": 1.1229, "step": 11824 }, { "epoch": 3.714706402026032, "grad_norm": 0.8515625, "learning_rate": 5.190733100602984e-06, "loss": 1.1751, "step": 11826 }, { "epoch": 3.7153346290515734, "grad_norm": 1.0390625, "learning_rate": 5.188194224055856e-06, "loss": 1.1472, "step": 11828 }, { "epoch": 3.7159628560771147, "grad_norm": 0.96875, "learning_rate": 5.185655347508728e-06, "loss": 1.212, "step": 11830 }, { "epoch": 3.716591083102656, "grad_norm": 0.94921875, "learning_rate": 5.1831164709616e-06, "loss": 1.0835, "step": 11832 }, { "epoch": 3.717219310128198, "grad_norm": 0.890625, "learning_rate": 5.180577594414472e-06, "loss": 1.1717, "step": 11834 }, { "epoch": 3.717847537153739, "grad_norm": 0.92578125, "learning_rate": 5.178038717867344e-06, "loss": 1.2557, "step": 11836 }, { "epoch": 3.7184757641792805, "grad_norm": 0.91796875, "learning_rate": 5.175499841320216e-06, "loss": 1.3584, "step": 11838 }, { "epoch": 3.719103991204822, "grad_norm": 1.0234375, "learning_rate": 5.172960964773088e-06, "loss": 1.1168, "step": 11840 }, { "epoch": 3.719732218230363, "grad_norm": 0.8828125, "learning_rate": 5.170422088225961e-06, "loss": 1.0798, "step": 11842 }, { "epoch": 3.7203604452559045, "grad_norm": 0.95703125, "learning_rate": 5.167883211678833e-06, "loss": 1.1726, "step": 11844 }, { "epoch": 3.720988672281446, "grad_norm": 0.96875, "learning_rate": 5.165344335131704e-06, "loss": 1.0255, "step": 11846 }, { "epoch": 3.721616899306987, "grad_norm": 0.95703125, "learning_rate": 5.1628054585845764e-06, "loss": 1.2139, "step": 11848 }, { "epoch": 3.7222451263325285, "grad_norm": 0.87890625, "learning_rate": 5.160266582037449e-06, "loss": 1.1871, "step": 11850 }, { "epoch": 3.72287335335807, "grad_norm": 0.9140625, "learning_rate": 5.157727705490321e-06, "loss": 1.1147, "step": 11852 }, { "epoch": 3.723501580383611, "grad_norm": 1.0546875, "learning_rate": 5.155188828943193e-06, "loss": 1.2063, "step": 11854 }, { "epoch": 3.7241298074091524, "grad_norm": 0.94140625, "learning_rate": 5.1526499523960645e-06, "loss": 1.1751, "step": 11856 }, { "epoch": 3.7247580344346938, "grad_norm": 0.87109375, "learning_rate": 5.150111075848937e-06, "loss": 1.3506, "step": 11858 }, { "epoch": 3.725386261460235, "grad_norm": 0.8828125, "learning_rate": 5.147572199301809e-06, "loss": 1.0863, "step": 11860 }, { "epoch": 3.7260144884857764, "grad_norm": 0.9453125, "learning_rate": 5.145033322754681e-06, "loss": 1.2238, "step": 11862 }, { "epoch": 3.7266427155113178, "grad_norm": 0.97265625, "learning_rate": 5.1424944462075534e-06, "loss": 0.9766, "step": 11864 }, { "epoch": 3.727270942536859, "grad_norm": 0.9375, "learning_rate": 5.139955569660426e-06, "loss": 1.2481, "step": 11866 }, { "epoch": 3.7278991695624004, "grad_norm": 0.9765625, "learning_rate": 5.137416693113297e-06, "loss": 1.2336, "step": 11868 }, { "epoch": 3.7285273965879417, "grad_norm": 0.8671875, "learning_rate": 5.134877816566169e-06, "loss": 1.164, "step": 11870 }, { "epoch": 3.729155623613483, "grad_norm": 0.97265625, "learning_rate": 5.1323389400190415e-06, "loss": 1.0777, "step": 11872 }, { "epoch": 3.7297838506390244, "grad_norm": 0.96484375, "learning_rate": 5.129800063471914e-06, "loss": 1.1229, "step": 11874 }, { "epoch": 3.730412077664566, "grad_norm": 0.92578125, "learning_rate": 5.127261186924786e-06, "loss": 1.0506, "step": 11876 }, { "epoch": 3.7310403046901075, "grad_norm": 0.9609375, "learning_rate": 5.124722310377658e-06, "loss": 1.1452, "step": 11878 }, { "epoch": 3.731668531715649, "grad_norm": 0.96484375, "learning_rate": 5.122183433830531e-06, "loss": 1.199, "step": 11880 }, { "epoch": 3.73229675874119, "grad_norm": 0.8828125, "learning_rate": 5.119644557283403e-06, "loss": 1.1466, "step": 11882 }, { "epoch": 3.7329249857667315, "grad_norm": 0.90625, "learning_rate": 5.117105680736275e-06, "loss": 1.1567, "step": 11884 }, { "epoch": 3.733553212792273, "grad_norm": 0.88671875, "learning_rate": 5.114566804189147e-06, "loss": 1.072, "step": 11886 }, { "epoch": 3.734181439817814, "grad_norm": 0.94140625, "learning_rate": 5.112027927642019e-06, "loss": 1.0938, "step": 11888 }, { "epoch": 3.7348096668433555, "grad_norm": 0.9375, "learning_rate": 5.1094890510948916e-06, "loss": 1.2843, "step": 11890 }, { "epoch": 3.735437893868897, "grad_norm": 0.9296875, "learning_rate": 5.106950174547764e-06, "loss": 1.28, "step": 11892 }, { "epoch": 3.736066120894438, "grad_norm": 1.0078125, "learning_rate": 5.104411298000635e-06, "loss": 1.2049, "step": 11894 }, { "epoch": 3.7366943479199795, "grad_norm": 0.87109375, "learning_rate": 5.101872421453507e-06, "loss": 1.197, "step": 11896 }, { "epoch": 3.737322574945521, "grad_norm": 0.97265625, "learning_rate": 5.09933354490638e-06, "loss": 1.1381, "step": 11898 }, { "epoch": 3.7379508019710626, "grad_norm": 0.91015625, "learning_rate": 5.096794668359252e-06, "loss": 1.1226, "step": 11900 }, { "epoch": 3.738579028996604, "grad_norm": 0.9296875, "learning_rate": 5.094255791812124e-06, "loss": 1.2687, "step": 11902 }, { "epoch": 3.739207256022145, "grad_norm": 0.8984375, "learning_rate": 5.091716915264996e-06, "loss": 1.0659, "step": 11904 }, { "epoch": 3.7398354830476865, "grad_norm": 1.0, "learning_rate": 5.089178038717868e-06, "loss": 1.2246, "step": 11906 }, { "epoch": 3.740463710073228, "grad_norm": 0.87890625, "learning_rate": 5.08663916217074e-06, "loss": 1.1104, "step": 11908 }, { "epoch": 3.741091937098769, "grad_norm": 0.94921875, "learning_rate": 5.084100285623612e-06, "loss": 1.2692, "step": 11910 }, { "epoch": 3.7417201641243105, "grad_norm": 0.9375, "learning_rate": 5.081561409076484e-06, "loss": 1.2342, "step": 11912 }, { "epoch": 3.742348391149852, "grad_norm": 1.0, "learning_rate": 5.079022532529357e-06, "loss": 1.1405, "step": 11914 }, { "epoch": 3.742976618175393, "grad_norm": 0.8984375, "learning_rate": 5.076483655982228e-06, "loss": 1.135, "step": 11916 }, { "epoch": 3.7436048452009345, "grad_norm": 0.96484375, "learning_rate": 5.0739447794351e-06, "loss": 1.1112, "step": 11918 }, { "epoch": 3.744233072226476, "grad_norm": 0.98046875, "learning_rate": 5.0714059028879725e-06, "loss": 1.118, "step": 11920 }, { "epoch": 3.744861299252017, "grad_norm": 1.015625, "learning_rate": 5.068867026340845e-06, "loss": 1.1847, "step": 11922 }, { "epoch": 3.7454895262775585, "grad_norm": 0.92578125, "learning_rate": 5.066328149793717e-06, "loss": 1.2068, "step": 11924 }, { "epoch": 3.7461177533031, "grad_norm": 0.93359375, "learning_rate": 5.063789273246589e-06, "loss": 1.0601, "step": 11926 }, { "epoch": 3.746745980328641, "grad_norm": 1.078125, "learning_rate": 5.0612503966994605e-06, "loss": 0.9802, "step": 11928 }, { "epoch": 3.7473742073541825, "grad_norm": 0.9921875, "learning_rate": 5.058711520152333e-06, "loss": 1.1564, "step": 11930 }, { "epoch": 3.748002434379724, "grad_norm": 1.0078125, "learning_rate": 5.056172643605205e-06, "loss": 1.0987, "step": 11932 }, { "epoch": 3.748630661405265, "grad_norm": 0.921875, "learning_rate": 5.053633767058077e-06, "loss": 1.2393, "step": 11934 }, { "epoch": 3.7492588884308065, "grad_norm": 0.94140625, "learning_rate": 5.0510948905109494e-06, "loss": 1.1374, "step": 11936 }, { "epoch": 3.749887115456348, "grad_norm": 0.890625, "learning_rate": 5.048556013963821e-06, "loss": 1.0968, "step": 11938 }, { "epoch": 3.7505153424818896, "grad_norm": 0.89453125, "learning_rate": 5.046017137416693e-06, "loss": 1.1254, "step": 11940 }, { "epoch": 3.751143569507431, "grad_norm": 0.99609375, "learning_rate": 5.043478260869565e-06, "loss": 0.9335, "step": 11942 }, { "epoch": 3.751771796532972, "grad_norm": 0.91015625, "learning_rate": 5.0409393843224375e-06, "loss": 1.2111, "step": 11944 }, { "epoch": 3.7524000235585135, "grad_norm": 1.109375, "learning_rate": 5.03840050777531e-06, "loss": 1.1728, "step": 11946 }, { "epoch": 3.753028250584055, "grad_norm": 0.9765625, "learning_rate": 5.035861631228182e-06, "loss": 1.126, "step": 11948 }, { "epoch": 3.753656477609596, "grad_norm": 0.9140625, "learning_rate": 5.033322754681053e-06, "loss": 1.165, "step": 11950 }, { "epoch": 3.7542847046351375, "grad_norm": 0.99609375, "learning_rate": 5.030783878133926e-06, "loss": 0.9907, "step": 11952 }, { "epoch": 3.754912931660679, "grad_norm": 0.91015625, "learning_rate": 5.028245001586798e-06, "loss": 1.1371, "step": 11954 }, { "epoch": 3.75554115868622, "grad_norm": 0.9375, "learning_rate": 5.02570612503967e-06, "loss": 1.0414, "step": 11956 }, { "epoch": 3.7561693857117615, "grad_norm": 0.99609375, "learning_rate": 5.023167248492542e-06, "loss": 1.2628, "step": 11958 }, { "epoch": 3.756797612737303, "grad_norm": 0.921875, "learning_rate": 5.0206283719454145e-06, "loss": 1.2723, "step": 11960 }, { "epoch": 3.757425839762844, "grad_norm": 0.95703125, "learning_rate": 5.018089495398286e-06, "loss": 1.1672, "step": 11962 }, { "epoch": 3.7580540667883855, "grad_norm": 0.87109375, "learning_rate": 5.015550618851158e-06, "loss": 1.1774, "step": 11964 }, { "epoch": 3.7586822938139273, "grad_norm": 0.890625, "learning_rate": 5.013011742304031e-06, "loss": 1.1269, "step": 11966 }, { "epoch": 3.7593105208394686, "grad_norm": 0.91796875, "learning_rate": 5.010472865756903e-06, "loss": 1.2015, "step": 11968 }, { "epoch": 3.75993874786501, "grad_norm": 0.94921875, "learning_rate": 5.007933989209776e-06, "loss": 1.0714, "step": 11970 }, { "epoch": 3.7605669748905513, "grad_norm": 0.96484375, "learning_rate": 5.005395112662648e-06, "loss": 1.1061, "step": 11972 }, { "epoch": 3.7611952019160926, "grad_norm": 0.92578125, "learning_rate": 5.00285623611552e-06, "loss": 1.2162, "step": 11974 }, { "epoch": 3.761823428941634, "grad_norm": 1.03125, "learning_rate": 5.0003173595683915e-06, "loss": 1.0886, "step": 11976 }, { "epoch": 3.7624516559671752, "grad_norm": 1.015625, "learning_rate": 4.997778483021264e-06, "loss": 1.068, "step": 11978 }, { "epoch": 3.7630798829927166, "grad_norm": 0.94921875, "learning_rate": 4.995239606474135e-06, "loss": 1.0862, "step": 11980 }, { "epoch": 3.763708110018258, "grad_norm": 0.92578125, "learning_rate": 4.992700729927007e-06, "loss": 1.2137, "step": 11982 }, { "epoch": 3.764336337043799, "grad_norm": 0.9375, "learning_rate": 4.9901618533798796e-06, "loss": 1.18, "step": 11984 }, { "epoch": 3.7649645640693405, "grad_norm": 0.91015625, "learning_rate": 4.987622976832752e-06, "loss": 1.1375, "step": 11986 }, { "epoch": 3.765592791094882, "grad_norm": 0.93359375, "learning_rate": 4.985084100285624e-06, "loss": 1.1603, "step": 11988 }, { "epoch": 3.766221018120423, "grad_norm": 1.0234375, "learning_rate": 4.982545223738496e-06, "loss": 1.0825, "step": 11990 }, { "epoch": 3.7668492451459645, "grad_norm": 0.96875, "learning_rate": 4.9800063471913685e-06, "loss": 1.0698, "step": 11992 }, { "epoch": 3.767477472171506, "grad_norm": 1.1015625, "learning_rate": 4.977467470644241e-06, "loss": 1.2823, "step": 11994 }, { "epoch": 3.768105699197047, "grad_norm": 1.0078125, "learning_rate": 4.974928594097113e-06, "loss": 1.2009, "step": 11996 }, { "epoch": 3.7687339262225885, "grad_norm": 0.99609375, "learning_rate": 4.972389717549984e-06, "loss": 1.2377, "step": 11998 }, { "epoch": 3.76936215324813, "grad_norm": 0.98828125, "learning_rate": 4.9698508410028565e-06, "loss": 1.1723, "step": 12000 }, { "epoch": 3.769990380273671, "grad_norm": 0.98046875, "learning_rate": 4.967311964455729e-06, "loss": 1.0863, "step": 12002 }, { "epoch": 3.7706186072992125, "grad_norm": 0.99609375, "learning_rate": 4.964773087908601e-06, "loss": 1.1009, "step": 12004 }, { "epoch": 3.7712468343247543, "grad_norm": 0.93359375, "learning_rate": 4.962234211361473e-06, "loss": 1.0626, "step": 12006 }, { "epoch": 3.7718750613502956, "grad_norm": 1.2265625, "learning_rate": 4.9596953348143455e-06, "loss": 1.1158, "step": 12008 }, { "epoch": 3.772503288375837, "grad_norm": 0.8828125, "learning_rate": 4.957156458267217e-06, "loss": 1.0807, "step": 12010 }, { "epoch": 3.7731315154013783, "grad_norm": 0.96484375, "learning_rate": 4.954617581720089e-06, "loss": 1.1255, "step": 12012 }, { "epoch": 3.7737597424269196, "grad_norm": 1.03125, "learning_rate": 4.952078705172961e-06, "loss": 1.0589, "step": 12014 }, { "epoch": 3.774387969452461, "grad_norm": 0.953125, "learning_rate": 4.9495398286258335e-06, "loss": 1.122, "step": 12016 }, { "epoch": 3.7750161964780022, "grad_norm": 0.98046875, "learning_rate": 4.947000952078706e-06, "loss": 1.0827, "step": 12018 }, { "epoch": 3.7756444235035436, "grad_norm": 0.98828125, "learning_rate": 4.944462075531577e-06, "loss": 1.1193, "step": 12020 }, { "epoch": 3.776272650529085, "grad_norm": 0.95703125, "learning_rate": 4.941923198984449e-06, "loss": 1.0658, "step": 12022 }, { "epoch": 3.7769008775546262, "grad_norm": 0.93359375, "learning_rate": 4.939384322437322e-06, "loss": 1.2408, "step": 12024 }, { "epoch": 3.7775291045801676, "grad_norm": 0.95703125, "learning_rate": 4.936845445890194e-06, "loss": 1.0518, "step": 12026 }, { "epoch": 3.778157331605709, "grad_norm": 0.93359375, "learning_rate": 4.934306569343066e-06, "loss": 1.0622, "step": 12028 }, { "epoch": 3.77878555863125, "grad_norm": 0.96484375, "learning_rate": 4.931767692795938e-06, "loss": 0.9788, "step": 12030 }, { "epoch": 3.779413785656792, "grad_norm": 1.0234375, "learning_rate": 4.9292288162488105e-06, "loss": 1.1672, "step": 12032 }, { "epoch": 3.7800420126823333, "grad_norm": 0.9609375, "learning_rate": 4.926689939701683e-06, "loss": 1.0933, "step": 12034 }, { "epoch": 3.7806702397078746, "grad_norm": 0.96875, "learning_rate": 4.924151063154555e-06, "loss": 1.1878, "step": 12036 }, { "epoch": 3.781298466733416, "grad_norm": 0.99609375, "learning_rate": 4.921612186607427e-06, "loss": 1.0392, "step": 12038 }, { "epoch": 3.7819266937589573, "grad_norm": 0.98046875, "learning_rate": 4.919073310060299e-06, "loss": 1.1429, "step": 12040 }, { "epoch": 3.7825549207844986, "grad_norm": 0.9921875, "learning_rate": 4.916534433513171e-06, "loss": 1.1327, "step": 12042 }, { "epoch": 3.78318314781004, "grad_norm": 0.92578125, "learning_rate": 4.913995556966043e-06, "loss": 1.2193, "step": 12044 }, { "epoch": 3.7838113748355813, "grad_norm": 0.92578125, "learning_rate": 4.911456680418915e-06, "loss": 1.087, "step": 12046 }, { "epoch": 3.7844396018611226, "grad_norm": 0.99609375, "learning_rate": 4.9089178038717875e-06, "loss": 1.0708, "step": 12048 }, { "epoch": 3.785067828886664, "grad_norm": 1.03125, "learning_rate": 4.906378927324659e-06, "loss": 1.0847, "step": 12050 }, { "epoch": 3.7856960559122053, "grad_norm": 0.94921875, "learning_rate": 4.903840050777531e-06, "loss": 1.2207, "step": 12052 }, { "epoch": 3.7863242829377466, "grad_norm": 1.0078125, "learning_rate": 4.901301174230403e-06, "loss": 1.1499, "step": 12054 }, { "epoch": 3.786952509963288, "grad_norm": 1.0078125, "learning_rate": 4.8987622976832756e-06, "loss": 1.1413, "step": 12056 }, { "epoch": 3.7875807369888292, "grad_norm": 1.0234375, "learning_rate": 4.896223421136148e-06, "loss": 1.1915, "step": 12058 }, { "epoch": 3.7882089640143706, "grad_norm": 0.9453125, "learning_rate": 4.89368454458902e-06, "loss": 1.2213, "step": 12060 }, { "epoch": 3.788837191039912, "grad_norm": 1.1171875, "learning_rate": 4.891145668041891e-06, "loss": 1.0897, "step": 12062 }, { "epoch": 3.7894654180654532, "grad_norm": 0.953125, "learning_rate": 4.888606791494764e-06, "loss": 1.2109, "step": 12064 }, { "epoch": 3.7900936450909946, "grad_norm": 0.9609375, "learning_rate": 4.886067914947636e-06, "loss": 1.2414, "step": 12066 }, { "epoch": 3.790721872116536, "grad_norm": 0.98828125, "learning_rate": 4.883529038400508e-06, "loss": 1.1439, "step": 12068 }, { "epoch": 3.791350099142077, "grad_norm": 1.03125, "learning_rate": 4.88099016185338e-06, "loss": 1.1465, "step": 12070 }, { "epoch": 3.791978326167619, "grad_norm": 0.94140625, "learning_rate": 4.878451285306252e-06, "loss": 1.2355, "step": 12072 }, { "epoch": 3.7926065531931603, "grad_norm": 1.015625, "learning_rate": 4.875912408759125e-06, "loss": 1.1857, "step": 12074 }, { "epoch": 3.7932347802187016, "grad_norm": 0.94921875, "learning_rate": 4.873373532211997e-06, "loss": 1.1957, "step": 12076 }, { "epoch": 3.793863007244243, "grad_norm": 0.92578125, "learning_rate": 4.870834655664869e-06, "loss": 1.2572, "step": 12078 }, { "epoch": 3.7944912342697843, "grad_norm": 0.9921875, "learning_rate": 4.868295779117741e-06, "loss": 1.2248, "step": 12080 }, { "epoch": 3.7951194612953256, "grad_norm": 1.046875, "learning_rate": 4.865756902570613e-06, "loss": 1.2293, "step": 12082 }, { "epoch": 3.795747688320867, "grad_norm": 0.9921875, "learning_rate": 4.863218026023485e-06, "loss": 1.2331, "step": 12084 }, { "epoch": 3.7963759153464083, "grad_norm": 0.8828125, "learning_rate": 4.860679149476357e-06, "loss": 0.993, "step": 12086 }, { "epoch": 3.7970041423719496, "grad_norm": 1.015625, "learning_rate": 4.8581402729292295e-06, "loss": 1.0923, "step": 12088 }, { "epoch": 3.797632369397491, "grad_norm": 0.88671875, "learning_rate": 4.855601396382102e-06, "loss": 1.2149, "step": 12090 }, { "epoch": 3.7982605964230323, "grad_norm": 1.09375, "learning_rate": 4.853062519834973e-06, "loss": 1.0396, "step": 12092 }, { "epoch": 3.7988888234485736, "grad_norm": 0.8515625, "learning_rate": 4.850523643287845e-06, "loss": 1.1284, "step": 12094 }, { "epoch": 3.799517050474115, "grad_norm": 1.0, "learning_rate": 4.847984766740718e-06, "loss": 1.0428, "step": 12096 }, { "epoch": 3.8001452774996567, "grad_norm": 0.87109375, "learning_rate": 4.84544589019359e-06, "loss": 1.0858, "step": 12098 }, { "epoch": 3.800773504525198, "grad_norm": 0.90234375, "learning_rate": 4.842907013646462e-06, "loss": 1.1251, "step": 12100 }, { "epoch": 3.8014017315507393, "grad_norm": 0.94921875, "learning_rate": 4.8403681370993335e-06, "loss": 1.1747, "step": 12102 }, { "epoch": 3.8020299585762807, "grad_norm": 0.88671875, "learning_rate": 4.837829260552206e-06, "loss": 1.1914, "step": 12104 }, { "epoch": 3.802658185601822, "grad_norm": 0.9765625, "learning_rate": 4.835290384005078e-06, "loss": 1.2628, "step": 12106 }, { "epoch": 3.8032864126273633, "grad_norm": 0.90625, "learning_rate": 4.83275150745795e-06, "loss": 1.1276, "step": 12108 }, { "epoch": 3.8039146396529047, "grad_norm": 1.03125, "learning_rate": 4.830212630910822e-06, "loss": 1.0146, "step": 12110 }, { "epoch": 3.804542866678446, "grad_norm": 1.0234375, "learning_rate": 4.827673754363695e-06, "loss": 1.1389, "step": 12112 }, { "epoch": 3.8051710937039873, "grad_norm": 0.87109375, "learning_rate": 4.825134877816566e-06, "loss": 1.2158, "step": 12114 }, { "epoch": 3.8057993207295286, "grad_norm": 0.93359375, "learning_rate": 4.822596001269439e-06, "loss": 1.0963, "step": 12116 }, { "epoch": 3.80642754775507, "grad_norm": 1.015625, "learning_rate": 4.820057124722311e-06, "loss": 1.2093, "step": 12118 }, { "epoch": 3.8070557747806113, "grad_norm": 0.98046875, "learning_rate": 4.8175182481751835e-06, "loss": 1.249, "step": 12120 }, { "epoch": 3.8076840018061526, "grad_norm": 1.0234375, "learning_rate": 4.814979371628055e-06, "loss": 1.1076, "step": 12122 }, { "epoch": 3.808312228831694, "grad_norm": 0.90625, "learning_rate": 4.812440495080927e-06, "loss": 1.0584, "step": 12124 }, { "epoch": 3.8089404558572353, "grad_norm": 0.95703125, "learning_rate": 4.809901618533799e-06, "loss": 1.1624, "step": 12126 }, { "epoch": 3.8095686828827766, "grad_norm": 0.9765625, "learning_rate": 4.807362741986672e-06, "loss": 1.2014, "step": 12128 }, { "epoch": 3.810196909908318, "grad_norm": 0.98046875, "learning_rate": 4.804823865439544e-06, "loss": 1.1427, "step": 12130 }, { "epoch": 3.8108251369338593, "grad_norm": 1.0234375, "learning_rate": 4.802284988892415e-06, "loss": 1.0649, "step": 12132 }, { "epoch": 3.8114533639594006, "grad_norm": 0.92578125, "learning_rate": 4.7997461123452874e-06, "loss": 1.1226, "step": 12134 }, { "epoch": 3.812081590984942, "grad_norm": 0.9453125, "learning_rate": 4.79720723579816e-06, "loss": 1.2498, "step": 12136 }, { "epoch": 3.8127098180104837, "grad_norm": 1.0390625, "learning_rate": 4.794668359251032e-06, "loss": 1.2386, "step": 12138 }, { "epoch": 3.813338045036025, "grad_norm": 0.9609375, "learning_rate": 4.792129482703904e-06, "loss": 1.2659, "step": 12140 }, { "epoch": 3.8139662720615664, "grad_norm": 0.90625, "learning_rate": 4.789590606156776e-06, "loss": 1.2739, "step": 12142 }, { "epoch": 3.8145944990871077, "grad_norm": 0.8984375, "learning_rate": 4.787051729609648e-06, "loss": 1.1676, "step": 12144 }, { "epoch": 3.815222726112649, "grad_norm": 0.90234375, "learning_rate": 4.78451285306252e-06, "loss": 1.0749, "step": 12146 }, { "epoch": 3.8158509531381903, "grad_norm": 0.93359375, "learning_rate": 4.781973976515392e-06, "loss": 1.1828, "step": 12148 }, { "epoch": 3.8164791801637317, "grad_norm": 0.94921875, "learning_rate": 4.779435099968264e-06, "loss": 1.1831, "step": 12150 }, { "epoch": 3.817107407189273, "grad_norm": 0.9140625, "learning_rate": 4.776896223421137e-06, "loss": 1.2788, "step": 12152 }, { "epoch": 3.8177356342148143, "grad_norm": 0.95703125, "learning_rate": 4.774357346874008e-06, "loss": 0.9748, "step": 12154 }, { "epoch": 3.8183638612403557, "grad_norm": 1.0234375, "learning_rate": 4.77181847032688e-06, "loss": 1.0512, "step": 12156 }, { "epoch": 3.818992088265897, "grad_norm": 0.921875, "learning_rate": 4.7692795937797525e-06, "loss": 1.2132, "step": 12158 }, { "epoch": 3.8196203152914383, "grad_norm": 1.03125, "learning_rate": 4.7667407172326256e-06, "loss": 1.1206, "step": 12160 }, { "epoch": 3.8202485423169796, "grad_norm": 0.99609375, "learning_rate": 4.764201840685497e-06, "loss": 1.1343, "step": 12162 }, { "epoch": 3.8208767693425214, "grad_norm": 0.8671875, "learning_rate": 4.761662964138369e-06, "loss": 1.2469, "step": 12164 }, { "epoch": 3.8215049963680627, "grad_norm": 1.0390625, "learning_rate": 4.759124087591241e-06, "loss": 1.1596, "step": 12166 }, { "epoch": 3.822133223393604, "grad_norm": 0.96875, "learning_rate": 4.756585211044114e-06, "loss": 1.149, "step": 12168 }, { "epoch": 3.8227614504191454, "grad_norm": 1.0078125, "learning_rate": 4.754046334496986e-06, "loss": 1.0773, "step": 12170 }, { "epoch": 3.8233896774446867, "grad_norm": 0.90625, "learning_rate": 4.751507457949858e-06, "loss": 1.2292, "step": 12172 }, { "epoch": 3.824017904470228, "grad_norm": 0.9375, "learning_rate": 4.7489685814027295e-06, "loss": 1.267, "step": 12174 }, { "epoch": 3.8246461314957694, "grad_norm": 0.984375, "learning_rate": 4.746429704855602e-06, "loss": 1.1683, "step": 12176 }, { "epoch": 3.8252743585213107, "grad_norm": 0.93359375, "learning_rate": 4.743890828308474e-06, "loss": 1.1635, "step": 12178 }, { "epoch": 3.825902585546852, "grad_norm": 0.91796875, "learning_rate": 4.741351951761346e-06, "loss": 1.1106, "step": 12180 }, { "epoch": 3.8265308125723934, "grad_norm": 0.9609375, "learning_rate": 4.738813075214218e-06, "loss": 1.1508, "step": 12182 }, { "epoch": 3.8271590395979347, "grad_norm": 0.94921875, "learning_rate": 4.73627419866709e-06, "loss": 1.0409, "step": 12184 }, { "epoch": 3.827787266623476, "grad_norm": 1.0234375, "learning_rate": 4.733735322119962e-06, "loss": 1.2153, "step": 12186 }, { "epoch": 3.8284154936490173, "grad_norm": 0.93359375, "learning_rate": 4.731196445572834e-06, "loss": 1.1997, "step": 12188 }, { "epoch": 3.8290437206745587, "grad_norm": 0.94140625, "learning_rate": 4.7286575690257065e-06, "loss": 1.1751, "step": 12190 }, { "epoch": 3.8296719477001, "grad_norm": 0.93359375, "learning_rate": 4.726118692478579e-06, "loss": 1.0904, "step": 12192 }, { "epoch": 3.8303001747256413, "grad_norm": 0.8984375, "learning_rate": 4.723579815931451e-06, "loss": 1.0757, "step": 12194 }, { "epoch": 3.8309284017511827, "grad_norm": 0.94921875, "learning_rate": 4.721040939384322e-06, "loss": 1.0996, "step": 12196 }, { "epoch": 3.831556628776724, "grad_norm": 0.9296875, "learning_rate": 4.7185020628371945e-06, "loss": 1.0379, "step": 12198 }, { "epoch": 3.8321848558022653, "grad_norm": 0.94921875, "learning_rate": 4.715963186290067e-06, "loss": 1.2516, "step": 12200 }, { "epoch": 3.8328130828278066, "grad_norm": 1.0859375, "learning_rate": 4.713424309742939e-06, "loss": 1.0186, "step": 12202 }, { "epoch": 3.8334413098533484, "grad_norm": 0.921875, "learning_rate": 4.710885433195811e-06, "loss": 1.1177, "step": 12204 }, { "epoch": 3.8340695368788897, "grad_norm": 0.9296875, "learning_rate": 4.7083465566486834e-06, "loss": 1.0952, "step": 12206 }, { "epoch": 3.834697763904431, "grad_norm": 0.98828125, "learning_rate": 4.705807680101556e-06, "loss": 1.1326, "step": 12208 }, { "epoch": 3.8353259909299724, "grad_norm": 0.90234375, "learning_rate": 4.703268803554428e-06, "loss": 1.2268, "step": 12210 }, { "epoch": 3.8359542179555137, "grad_norm": 0.8828125, "learning_rate": 4.7007299270073e-06, "loss": 1.243, "step": 12212 }, { "epoch": 3.836582444981055, "grad_norm": 0.9453125, "learning_rate": 4.6981910504601715e-06, "loss": 1.1975, "step": 12214 }, { "epoch": 3.8372106720065964, "grad_norm": 0.94140625, "learning_rate": 4.695652173913044e-06, "loss": 1.0564, "step": 12216 }, { "epoch": 3.8378388990321377, "grad_norm": 0.83203125, "learning_rate": 4.693113297365916e-06, "loss": 1.1739, "step": 12218 }, { "epoch": 3.838467126057679, "grad_norm": 0.9453125, "learning_rate": 4.690574420818788e-06, "loss": 1.1258, "step": 12220 }, { "epoch": 3.8390953530832204, "grad_norm": 0.98046875, "learning_rate": 4.6880355442716604e-06, "loss": 1.2965, "step": 12222 }, { "epoch": 3.8397235801087617, "grad_norm": 0.97265625, "learning_rate": 4.685496667724533e-06, "loss": 1.1632, "step": 12224 }, { "epoch": 3.840351807134303, "grad_norm": 0.96875, "learning_rate": 4.682957791177404e-06, "loss": 0.9985, "step": 12226 }, { "epoch": 3.8409800341598443, "grad_norm": 0.95703125, "learning_rate": 4.680418914630276e-06, "loss": 1.2272, "step": 12228 }, { "epoch": 3.841608261185386, "grad_norm": 0.96875, "learning_rate": 4.6778800380831485e-06, "loss": 1.0628, "step": 12230 }, { "epoch": 3.8422364882109274, "grad_norm": 0.90625, "learning_rate": 4.675341161536021e-06, "loss": 1.1363, "step": 12232 }, { "epoch": 3.8428647152364688, "grad_norm": 1.015625, "learning_rate": 4.672802284988893e-06, "loss": 1.0293, "step": 12234 }, { "epoch": 3.84349294226201, "grad_norm": 0.97265625, "learning_rate": 4.670263408441764e-06, "loss": 1.0746, "step": 12236 }, { "epoch": 3.8441211692875514, "grad_norm": 1.0390625, "learning_rate": 4.6677245318946366e-06, "loss": 0.9883, "step": 12238 }, { "epoch": 3.8447493963130928, "grad_norm": 0.86328125, "learning_rate": 4.665185655347509e-06, "loss": 1.1013, "step": 12240 }, { "epoch": 3.845377623338634, "grad_norm": 0.92578125, "learning_rate": 4.662646778800381e-06, "loss": 1.1302, "step": 12242 }, { "epoch": 3.8460058503641754, "grad_norm": 0.98828125, "learning_rate": 4.660107902253253e-06, "loss": 1.0861, "step": 12244 }, { "epoch": 3.8466340773897167, "grad_norm": 1.0078125, "learning_rate": 4.6575690257061255e-06, "loss": 1.2235, "step": 12246 }, { "epoch": 3.847262304415258, "grad_norm": 0.9921875, "learning_rate": 4.655030149158998e-06, "loss": 1.2454, "step": 12248 }, { "epoch": 3.8478905314407994, "grad_norm": 0.9453125, "learning_rate": 4.65249127261187e-06, "loss": 1.1821, "step": 12250 }, { "epoch": 3.8485187584663407, "grad_norm": 0.95703125, "learning_rate": 4.649952396064742e-06, "loss": 1.0959, "step": 12252 }, { "epoch": 3.849146985491882, "grad_norm": 1.0546875, "learning_rate": 4.647413519517614e-06, "loss": 1.1153, "step": 12254 }, { "epoch": 3.8497752125174234, "grad_norm": 0.953125, "learning_rate": 4.644874642970486e-06, "loss": 1.1424, "step": 12256 }, { "epoch": 3.8504034395429647, "grad_norm": 0.91796875, "learning_rate": 4.642335766423358e-06, "loss": 1.1183, "step": 12258 }, { "epoch": 3.851031666568506, "grad_norm": 0.94921875, "learning_rate": 4.63979688987623e-06, "loss": 1.2609, "step": 12260 }, { "epoch": 3.8516598935940474, "grad_norm": 0.98828125, "learning_rate": 4.6372580133291025e-06, "loss": 1.3048, "step": 12262 }, { "epoch": 3.8522881206195887, "grad_norm": 1.0, "learning_rate": 4.634719136781975e-06, "loss": 1.1693, "step": 12264 }, { "epoch": 3.85291634764513, "grad_norm": 0.921875, "learning_rate": 4.632180260234846e-06, "loss": 1.1647, "step": 12266 }, { "epoch": 3.8535445746706714, "grad_norm": 1.109375, "learning_rate": 4.629641383687718e-06, "loss": 1.0381, "step": 12268 }, { "epoch": 3.854172801696213, "grad_norm": 0.984375, "learning_rate": 4.6271025071405905e-06, "loss": 1.1254, "step": 12270 }, { "epoch": 3.8548010287217545, "grad_norm": 0.91015625, "learning_rate": 4.624563630593463e-06, "loss": 1.1723, "step": 12272 }, { "epoch": 3.855429255747296, "grad_norm": 0.90625, "learning_rate": 4.622024754046335e-06, "loss": 1.1209, "step": 12274 }, { "epoch": 3.856057482772837, "grad_norm": 1.0390625, "learning_rate": 4.619485877499207e-06, "loss": 1.2799, "step": 12276 }, { "epoch": 3.8566857097983784, "grad_norm": 0.95703125, "learning_rate": 4.616947000952079e-06, "loss": 1.2174, "step": 12278 }, { "epoch": 3.8573139368239198, "grad_norm": 0.98046875, "learning_rate": 4.614408124404951e-06, "loss": 1.2086, "step": 12280 }, { "epoch": 3.857942163849461, "grad_norm": 0.95703125, "learning_rate": 4.611869247857823e-06, "loss": 1.17, "step": 12282 }, { "epoch": 3.8585703908750024, "grad_norm": 0.921875, "learning_rate": 4.609330371310695e-06, "loss": 1.1979, "step": 12284 }, { "epoch": 3.8591986179005437, "grad_norm": 0.921875, "learning_rate": 4.6067914947635675e-06, "loss": 1.1368, "step": 12286 }, { "epoch": 3.859826844926085, "grad_norm": 0.9453125, "learning_rate": 4.60425261821644e-06, "loss": 1.1079, "step": 12288 }, { "epoch": 3.8604550719516264, "grad_norm": 0.90625, "learning_rate": 4.601713741669312e-06, "loss": 1.1628, "step": 12290 }, { "epoch": 3.8610832989771677, "grad_norm": 0.8515625, "learning_rate": 4.599174865122184e-06, "loss": 1.1608, "step": 12292 }, { "epoch": 3.8617115260027095, "grad_norm": 0.9765625, "learning_rate": 4.5966359885750564e-06, "loss": 1.1792, "step": 12294 }, { "epoch": 3.862339753028251, "grad_norm": 1.03125, "learning_rate": 4.594097112027928e-06, "loss": 1.1424, "step": 12296 }, { "epoch": 3.862967980053792, "grad_norm": 0.94921875, "learning_rate": 4.5915582354808e-06, "loss": 1.1908, "step": 12298 }, { "epoch": 3.8635962070793335, "grad_norm": 0.875, "learning_rate": 4.589019358933672e-06, "loss": 1.1613, "step": 12300 }, { "epoch": 3.864224434104875, "grad_norm": 0.98828125, "learning_rate": 4.5864804823865445e-06, "loss": 1.2128, "step": 12302 }, { "epoch": 3.864852661130416, "grad_norm": 1.0, "learning_rate": 4.583941605839417e-06, "loss": 1.0869, "step": 12304 }, { "epoch": 3.8654808881559575, "grad_norm": 0.99609375, "learning_rate": 4.581402729292289e-06, "loss": 0.9723, "step": 12306 }, { "epoch": 3.866109115181499, "grad_norm": 1.109375, "learning_rate": 4.57886385274516e-06, "loss": 1.0521, "step": 12308 }, { "epoch": 3.86673734220704, "grad_norm": 0.8828125, "learning_rate": 4.576324976198033e-06, "loss": 1.0835, "step": 12310 }, { "epoch": 3.8673655692325815, "grad_norm": 1.0234375, "learning_rate": 4.573786099650905e-06, "loss": 1.1166, "step": 12312 }, { "epoch": 3.867993796258123, "grad_norm": 0.9921875, "learning_rate": 4.571247223103777e-06, "loss": 1.0926, "step": 12314 }, { "epoch": 3.868622023283664, "grad_norm": 0.96484375, "learning_rate": 4.568708346556649e-06, "loss": 1.0539, "step": 12316 }, { "epoch": 3.8692502503092054, "grad_norm": 1.0390625, "learning_rate": 4.566169470009521e-06, "loss": 1.1853, "step": 12318 }, { "epoch": 3.8698784773347468, "grad_norm": 0.9609375, "learning_rate": 4.563630593462393e-06, "loss": 1.2294, "step": 12320 }, { "epoch": 3.870506704360288, "grad_norm": 0.93359375, "learning_rate": 4.561091716915265e-06, "loss": 1.2653, "step": 12322 }, { "epoch": 3.8711349313858294, "grad_norm": 0.98046875, "learning_rate": 4.558552840368137e-06, "loss": 1.2602, "step": 12324 }, { "epoch": 3.8717631584113708, "grad_norm": 0.95703125, "learning_rate": 4.5560139638210096e-06, "loss": 1.2335, "step": 12326 }, { "epoch": 3.872391385436912, "grad_norm": 0.90625, "learning_rate": 4.553475087273882e-06, "loss": 1.2117, "step": 12328 }, { "epoch": 3.8730196124624534, "grad_norm": 0.89453125, "learning_rate": 4.550936210726753e-06, "loss": 1.158, "step": 12330 }, { "epoch": 3.8736478394879947, "grad_norm": 0.9296875, "learning_rate": 4.548397334179626e-06, "loss": 1.1693, "step": 12332 }, { "epoch": 3.874276066513536, "grad_norm": 0.9140625, "learning_rate": 4.5458584576324985e-06, "loss": 1.1719, "step": 12334 }, { "epoch": 3.874904293539078, "grad_norm": 0.94140625, "learning_rate": 4.543319581085371e-06, "loss": 1.191, "step": 12336 }, { "epoch": 3.875532520564619, "grad_norm": 0.9140625, "learning_rate": 4.540780704538242e-06, "loss": 1.1827, "step": 12338 }, { "epoch": 3.8761607475901605, "grad_norm": 0.99609375, "learning_rate": 4.538241827991114e-06, "loss": 0.9954, "step": 12340 }, { "epoch": 3.876788974615702, "grad_norm": 0.953125, "learning_rate": 4.5357029514439866e-06, "loss": 1.2572, "step": 12342 }, { "epoch": 3.877417201641243, "grad_norm": 0.9453125, "learning_rate": 4.533164074896859e-06, "loss": 1.1503, "step": 12344 }, { "epoch": 3.8780454286667845, "grad_norm": 0.98828125, "learning_rate": 4.530625198349731e-06, "loss": 1.2242, "step": 12346 }, { "epoch": 3.878673655692326, "grad_norm": 0.9453125, "learning_rate": 4.528086321802602e-06, "loss": 1.066, "step": 12348 }, { "epoch": 3.879301882717867, "grad_norm": 1.0234375, "learning_rate": 4.525547445255475e-06, "loss": 1.1375, "step": 12350 }, { "epoch": 3.8799301097434085, "grad_norm": 1.0078125, "learning_rate": 4.523008568708347e-06, "loss": 1.0705, "step": 12352 }, { "epoch": 3.88055833676895, "grad_norm": 0.96875, "learning_rate": 4.520469692161219e-06, "loss": 1.1467, "step": 12354 }, { "epoch": 3.881186563794491, "grad_norm": 1.171875, "learning_rate": 4.517930815614091e-06, "loss": 1.0642, "step": 12356 }, { "epoch": 3.8818147908200324, "grad_norm": 0.91015625, "learning_rate": 4.5153919390669635e-06, "loss": 1.2064, "step": 12358 }, { "epoch": 3.882443017845574, "grad_norm": 0.92578125, "learning_rate": 4.512853062519835e-06, "loss": 1.0946, "step": 12360 }, { "epoch": 3.8830712448711155, "grad_norm": 0.96875, "learning_rate": 4.510314185972707e-06, "loss": 1.2365, "step": 12362 }, { "epoch": 3.883699471896657, "grad_norm": 0.87890625, "learning_rate": 4.507775309425579e-06, "loss": 1.1655, "step": 12364 }, { "epoch": 3.884327698922198, "grad_norm": 0.96875, "learning_rate": 4.505236432878452e-06, "loss": 1.1299, "step": 12366 }, { "epoch": 3.8849559259477395, "grad_norm": 0.9765625, "learning_rate": 4.502697556331324e-06, "loss": 1.2254, "step": 12368 }, { "epoch": 3.885584152973281, "grad_norm": 0.89453125, "learning_rate": 4.500158679784195e-06, "loss": 1.0553, "step": 12370 }, { "epoch": 3.886212379998822, "grad_norm": 0.984375, "learning_rate": 4.4976198032370675e-06, "loss": 1.1731, "step": 12372 }, { "epoch": 3.8868406070243635, "grad_norm": 1.09375, "learning_rate": 4.4950809266899405e-06, "loss": 0.9475, "step": 12374 }, { "epoch": 3.887468834049905, "grad_norm": 0.91796875, "learning_rate": 4.492542050142813e-06, "loss": 1.1945, "step": 12376 }, { "epoch": 3.888097061075446, "grad_norm": 0.96484375, "learning_rate": 4.490003173595684e-06, "loss": 1.0415, "step": 12378 }, { "epoch": 3.8887252881009875, "grad_norm": 1.03125, "learning_rate": 4.487464297048556e-06, "loss": 1.0846, "step": 12380 }, { "epoch": 3.889353515126529, "grad_norm": 0.86328125, "learning_rate": 4.484925420501429e-06, "loss": 1.0884, "step": 12382 }, { "epoch": 3.88998174215207, "grad_norm": 0.95703125, "learning_rate": 4.482386543954301e-06, "loss": 1.1239, "step": 12384 }, { "epoch": 3.8906099691776115, "grad_norm": 0.9453125, "learning_rate": 4.479847667407173e-06, "loss": 1.2658, "step": 12386 }, { "epoch": 3.891238196203153, "grad_norm": 0.94140625, "learning_rate": 4.477308790860045e-06, "loss": 1.2021, "step": 12388 }, { "epoch": 3.891866423228694, "grad_norm": 0.98046875, "learning_rate": 4.474769914312917e-06, "loss": 1.2028, "step": 12390 }, { "epoch": 3.8924946502542355, "grad_norm": 0.87890625, "learning_rate": 4.472231037765789e-06, "loss": 1.2137, "step": 12392 }, { "epoch": 3.893122877279777, "grad_norm": 0.953125, "learning_rate": 4.469692161218661e-06, "loss": 1.2416, "step": 12394 }, { "epoch": 3.893751104305318, "grad_norm": 0.9453125, "learning_rate": 4.467153284671533e-06, "loss": 1.2285, "step": 12396 }, { "epoch": 3.8943793313308595, "grad_norm": 0.90625, "learning_rate": 4.464614408124406e-06, "loss": 0.9647, "step": 12398 }, { "epoch": 3.895007558356401, "grad_norm": 0.93359375, "learning_rate": 4.462075531577277e-06, "loss": 1.0642, "step": 12400 }, { "epoch": 3.8956357853819426, "grad_norm": 1.0, "learning_rate": 4.459536655030149e-06, "loss": 1.2091, "step": 12402 }, { "epoch": 3.896264012407484, "grad_norm": 0.91015625, "learning_rate": 4.4569977784830214e-06, "loss": 1.1449, "step": 12404 }, { "epoch": 3.896892239433025, "grad_norm": 0.9453125, "learning_rate": 4.454458901935894e-06, "loss": 1.043, "step": 12406 }, { "epoch": 3.8975204664585665, "grad_norm": 0.98046875, "learning_rate": 4.451920025388766e-06, "loss": 1.2356, "step": 12408 }, { "epoch": 3.898148693484108, "grad_norm": 0.9609375, "learning_rate": 4.449381148841638e-06, "loss": 1.1759, "step": 12410 }, { "epoch": 3.898776920509649, "grad_norm": 0.890625, "learning_rate": 4.4468422722945095e-06, "loss": 1.077, "step": 12412 }, { "epoch": 3.8994051475351905, "grad_norm": 0.875, "learning_rate": 4.444303395747382e-06, "loss": 1.1653, "step": 12414 }, { "epoch": 3.900033374560732, "grad_norm": 0.9453125, "learning_rate": 4.441764519200254e-06, "loss": 1.1169, "step": 12416 }, { "epoch": 3.900661601586273, "grad_norm": 0.96875, "learning_rate": 4.439225642653126e-06, "loss": 1.1098, "step": 12418 }, { "epoch": 3.9012898286118145, "grad_norm": 0.9140625, "learning_rate": 4.436686766105998e-06, "loss": 1.1159, "step": 12420 }, { "epoch": 3.901918055637356, "grad_norm": 0.95703125, "learning_rate": 4.434147889558871e-06, "loss": 1.3125, "step": 12422 }, { "epoch": 3.902546282662897, "grad_norm": 0.984375, "learning_rate": 4.431609013011743e-06, "loss": 1.0783, "step": 12424 }, { "epoch": 3.903174509688439, "grad_norm": 0.9375, "learning_rate": 4.429070136464615e-06, "loss": 1.1511, "step": 12426 }, { "epoch": 3.9038027367139803, "grad_norm": 1.0234375, "learning_rate": 4.426531259917487e-06, "loss": 1.1327, "step": 12428 }, { "epoch": 3.9044309637395216, "grad_norm": 0.96484375, "learning_rate": 4.423992383370359e-06, "loss": 1.1367, "step": 12430 }, { "epoch": 3.905059190765063, "grad_norm": 1.03125, "learning_rate": 4.421453506823231e-06, "loss": 1.0522, "step": 12432 }, { "epoch": 3.9056874177906042, "grad_norm": 0.88671875, "learning_rate": 4.418914630276103e-06, "loss": 1.2103, "step": 12434 }, { "epoch": 3.9063156448161456, "grad_norm": 0.89453125, "learning_rate": 4.416375753728975e-06, "loss": 1.1193, "step": 12436 }, { "epoch": 3.906943871841687, "grad_norm": 0.97265625, "learning_rate": 4.413836877181848e-06, "loss": 1.3031, "step": 12438 }, { "epoch": 3.9075720988672282, "grad_norm": 0.953125, "learning_rate": 4.41129800063472e-06, "loss": 1.1426, "step": 12440 }, { "epoch": 3.9082003258927696, "grad_norm": 1.0, "learning_rate": 4.408759124087591e-06, "loss": 1.1638, "step": 12442 }, { "epoch": 3.908828552918311, "grad_norm": 0.91015625, "learning_rate": 4.4062202475404635e-06, "loss": 1.1062, "step": 12444 }, { "epoch": 3.909456779943852, "grad_norm": 1.0703125, "learning_rate": 4.403681370993336e-06, "loss": 1.1254, "step": 12446 }, { "epoch": 3.9100850069693935, "grad_norm": 0.89453125, "learning_rate": 4.401142494446208e-06, "loss": 1.2607, "step": 12448 }, { "epoch": 3.910713233994935, "grad_norm": 0.875, "learning_rate": 4.39860361789908e-06, "loss": 1.126, "step": 12450 }, { "epoch": 3.911341461020476, "grad_norm": 0.98828125, "learning_rate": 4.3960647413519515e-06, "loss": 1.1349, "step": 12452 }, { "epoch": 3.9119696880460175, "grad_norm": 1.0234375, "learning_rate": 4.393525864804824e-06, "loss": 1.005, "step": 12454 }, { "epoch": 3.912597915071559, "grad_norm": 0.9921875, "learning_rate": 4.390986988257696e-06, "loss": 1.1345, "step": 12456 }, { "epoch": 3.9132261420971, "grad_norm": 0.91796875, "learning_rate": 4.388448111710568e-06, "loss": 1.1369, "step": 12458 }, { "epoch": 3.9138543691226415, "grad_norm": 0.953125, "learning_rate": 4.3859092351634405e-06, "loss": 1.1372, "step": 12460 }, { "epoch": 3.914482596148183, "grad_norm": 0.984375, "learning_rate": 4.383370358616313e-06, "loss": 1.0209, "step": 12462 }, { "epoch": 3.915110823173724, "grad_norm": 0.94140625, "learning_rate": 4.380831482069185e-06, "loss": 1.1598, "step": 12464 }, { "epoch": 3.9157390501992655, "grad_norm": 0.94921875, "learning_rate": 4.378292605522057e-06, "loss": 1.1645, "step": 12466 }, { "epoch": 3.9163672772248073, "grad_norm": 0.90625, "learning_rate": 4.375753728974929e-06, "loss": 1.1817, "step": 12468 }, { "epoch": 3.9169955042503486, "grad_norm": 0.95703125, "learning_rate": 4.373214852427802e-06, "loss": 1.3065, "step": 12470 }, { "epoch": 3.91762373127589, "grad_norm": 1.078125, "learning_rate": 4.370675975880673e-06, "loss": 1.1078, "step": 12472 }, { "epoch": 3.9182519583014312, "grad_norm": 1.0625, "learning_rate": 4.368137099333545e-06, "loss": 1.1269, "step": 12474 }, { "epoch": 3.9188801853269726, "grad_norm": 0.92578125, "learning_rate": 4.3655982227864174e-06, "loss": 1.1832, "step": 12476 }, { "epoch": 3.919508412352514, "grad_norm": 0.9765625, "learning_rate": 4.36305934623929e-06, "loss": 1.0834, "step": 12478 }, { "epoch": 3.9201366393780552, "grad_norm": 1.0234375, "learning_rate": 4.360520469692162e-06, "loss": 1.1245, "step": 12480 }, { "epoch": 3.9207648664035966, "grad_norm": 0.95703125, "learning_rate": 4.357981593145033e-06, "loss": 1.1531, "step": 12482 }, { "epoch": 3.921393093429138, "grad_norm": 0.9453125, "learning_rate": 4.3554427165979055e-06, "loss": 1.2539, "step": 12484 }, { "epoch": 3.922021320454679, "grad_norm": 1.0234375, "learning_rate": 4.352903840050778e-06, "loss": 1.1369, "step": 12486 }, { "epoch": 3.9226495474802205, "grad_norm": 1.03125, "learning_rate": 4.35036496350365e-06, "loss": 1.069, "step": 12488 }, { "epoch": 3.923277774505762, "grad_norm": 1.015625, "learning_rate": 4.347826086956522e-06, "loss": 1.212, "step": 12490 }, { "epoch": 3.9239060015313036, "grad_norm": 0.921875, "learning_rate": 4.3452872104093944e-06, "loss": 1.2442, "step": 12492 }, { "epoch": 3.924534228556845, "grad_norm": 0.9375, "learning_rate": 4.342748333862266e-06, "loss": 1.1009, "step": 12494 }, { "epoch": 3.9251624555823863, "grad_norm": 0.9921875, "learning_rate": 4.340209457315138e-06, "loss": 1.1885, "step": 12496 }, { "epoch": 3.9257906826079276, "grad_norm": 0.98828125, "learning_rate": 4.33767058076801e-06, "loss": 1.1714, "step": 12498 }, { "epoch": 3.926418909633469, "grad_norm": 0.92578125, "learning_rate": 4.3351317042208825e-06, "loss": 1.0216, "step": 12500 }, { "epoch": 3.9270471366590103, "grad_norm": 0.953125, "learning_rate": 4.332592827673755e-06, "loss": 1.1479, "step": 12502 }, { "epoch": 3.9276753636845516, "grad_norm": 0.96875, "learning_rate": 4.330053951126627e-06, "loss": 1.1281, "step": 12504 }, { "epoch": 3.928303590710093, "grad_norm": 0.921875, "learning_rate": 4.327515074579499e-06, "loss": 1.1531, "step": 12506 }, { "epoch": 3.9289318177356343, "grad_norm": 0.9453125, "learning_rate": 4.324976198032371e-06, "loss": 1.1762, "step": 12508 }, { "epoch": 3.9295600447611756, "grad_norm": 1.09375, "learning_rate": 4.322437321485244e-06, "loss": 1.2529, "step": 12510 }, { "epoch": 3.930188271786717, "grad_norm": 0.94140625, "learning_rate": 4.319898444938115e-06, "loss": 1.2325, "step": 12512 }, { "epoch": 3.9308164988122583, "grad_norm": 0.99609375, "learning_rate": 4.317359568390987e-06, "loss": 1.2266, "step": 12514 }, { "epoch": 3.9314447258377996, "grad_norm": 0.88671875, "learning_rate": 4.3148206918438595e-06, "loss": 1.1749, "step": 12516 }, { "epoch": 3.932072952863341, "grad_norm": 0.90234375, "learning_rate": 4.312281815296732e-06, "loss": 1.1377, "step": 12518 }, { "epoch": 3.9327011798888822, "grad_norm": 1.0078125, "learning_rate": 4.309742938749604e-06, "loss": 1.1638, "step": 12520 }, { "epoch": 3.9333294069144236, "grad_norm": 0.8984375, "learning_rate": 4.307204062202476e-06, "loss": 1.1871, "step": 12522 }, { "epoch": 3.933957633939965, "grad_norm": 0.98046875, "learning_rate": 4.3046651856553476e-06, "loss": 1.15, "step": 12524 }, { "epoch": 3.934585860965506, "grad_norm": 0.97265625, "learning_rate": 4.30212630910822e-06, "loss": 1.0905, "step": 12526 }, { "epoch": 3.9352140879910475, "grad_norm": 0.9609375, "learning_rate": 4.299587432561092e-06, "loss": 1.2026, "step": 12528 }, { "epoch": 3.935842315016589, "grad_norm": 0.90625, "learning_rate": 4.297048556013964e-06, "loss": 1.0666, "step": 12530 }, { "epoch": 3.93647054204213, "grad_norm": 0.953125, "learning_rate": 4.2945096794668365e-06, "loss": 1.2539, "step": 12532 }, { "epoch": 3.937098769067672, "grad_norm": 1.078125, "learning_rate": 4.291970802919708e-06, "loss": 1.0726, "step": 12534 }, { "epoch": 3.9377269960932133, "grad_norm": 0.984375, "learning_rate": 4.28943192637258e-06, "loss": 1.153, "step": 12536 }, { "epoch": 3.9383552231187546, "grad_norm": 0.96875, "learning_rate": 4.286893049825452e-06, "loss": 1.1202, "step": 12538 }, { "epoch": 3.938983450144296, "grad_norm": 0.94921875, "learning_rate": 4.2843541732783245e-06, "loss": 1.1556, "step": 12540 }, { "epoch": 3.9396116771698373, "grad_norm": 0.921875, "learning_rate": 4.281815296731197e-06, "loss": 1.3425, "step": 12542 }, { "epoch": 3.9402399041953786, "grad_norm": 0.98828125, "learning_rate": 4.279276420184069e-06, "loss": 1.0217, "step": 12544 }, { "epoch": 3.94086813122092, "grad_norm": 0.91796875, "learning_rate": 4.276737543636941e-06, "loss": 1.1011, "step": 12546 }, { "epoch": 3.9414963582464613, "grad_norm": 0.8671875, "learning_rate": 4.2741986670898135e-06, "loss": 1.1959, "step": 12548 }, { "epoch": 3.9421245852720026, "grad_norm": 0.875, "learning_rate": 4.271659790542686e-06, "loss": 1.1609, "step": 12550 }, { "epoch": 3.942752812297544, "grad_norm": 0.9296875, "learning_rate": 4.269120913995558e-06, "loss": 1.1995, "step": 12552 }, { "epoch": 3.9433810393230853, "grad_norm": 0.890625, "learning_rate": 4.266582037448429e-06, "loss": 1.2182, "step": 12554 }, { "epoch": 3.9440092663486266, "grad_norm": 0.92578125, "learning_rate": 4.2640431609013015e-06, "loss": 1.165, "step": 12556 }, { "epoch": 3.9446374933741684, "grad_norm": 0.91015625, "learning_rate": 4.261504284354174e-06, "loss": 1.0655, "step": 12558 }, { "epoch": 3.9452657203997097, "grad_norm": 1.09375, "learning_rate": 4.258965407807046e-06, "loss": 1.1886, "step": 12560 }, { "epoch": 3.945893947425251, "grad_norm": 1.0546875, "learning_rate": 4.256426531259918e-06, "loss": 1.2035, "step": 12562 }, { "epoch": 3.9465221744507923, "grad_norm": 0.9140625, "learning_rate": 4.25388765471279e-06, "loss": 1.0162, "step": 12564 }, { "epoch": 3.9471504014763337, "grad_norm": 0.9609375, "learning_rate": 4.251348778165662e-06, "loss": 1.0941, "step": 12566 }, { "epoch": 3.947778628501875, "grad_norm": 1.0, "learning_rate": 4.248809901618534e-06, "loss": 1.191, "step": 12568 }, { "epoch": 3.9484068555274163, "grad_norm": 0.96484375, "learning_rate": 4.246271025071406e-06, "loss": 1.1618, "step": 12570 }, { "epoch": 3.9490350825529577, "grad_norm": 0.97265625, "learning_rate": 4.2437321485242785e-06, "loss": 1.1224, "step": 12572 }, { "epoch": 3.949663309578499, "grad_norm": 0.9140625, "learning_rate": 4.241193271977151e-06, "loss": 1.0541, "step": 12574 }, { "epoch": 3.9502915366040403, "grad_norm": 0.96875, "learning_rate": 4.238654395430022e-06, "loss": 1.0555, "step": 12576 }, { "epoch": 3.9509197636295816, "grad_norm": 0.93359375, "learning_rate": 4.236115518882894e-06, "loss": 1.2063, "step": 12578 }, { "epoch": 3.951547990655123, "grad_norm": 0.92578125, "learning_rate": 4.233576642335767e-06, "loss": 1.2369, "step": 12580 }, { "epoch": 3.9521762176806643, "grad_norm": 1.0, "learning_rate": 4.231037765788639e-06, "loss": 1.1153, "step": 12582 }, { "epoch": 3.9528044447062056, "grad_norm": 0.921875, "learning_rate": 4.228498889241511e-06, "loss": 1.1209, "step": 12584 }, { "epoch": 3.953432671731747, "grad_norm": 1.03125, "learning_rate": 4.2259600126943824e-06, "loss": 1.062, "step": 12586 }, { "epoch": 3.9540608987572883, "grad_norm": 0.9140625, "learning_rate": 4.2234211361472555e-06, "loss": 1.2535, "step": 12588 }, { "epoch": 3.9546891257828296, "grad_norm": 0.93359375, "learning_rate": 4.220882259600128e-06, "loss": 1.1186, "step": 12590 }, { "epoch": 3.955317352808371, "grad_norm": 0.91015625, "learning_rate": 4.218343383053e-06, "loss": 1.062, "step": 12592 }, { "epoch": 3.9559455798339123, "grad_norm": 0.9765625, "learning_rate": 4.215804506505871e-06, "loss": 1.1268, "step": 12594 }, { "epoch": 3.9565738068594536, "grad_norm": 0.96875, "learning_rate": 4.213265629958744e-06, "loss": 1.161, "step": 12596 }, { "epoch": 3.957202033884995, "grad_norm": 1.0234375, "learning_rate": 4.210726753411616e-06, "loss": 1.0971, "step": 12598 }, { "epoch": 3.9578302609105367, "grad_norm": 1.0, "learning_rate": 4.208187876864488e-06, "loss": 1.1588, "step": 12600 }, { "epoch": 3.958458487936078, "grad_norm": 0.9140625, "learning_rate": 4.20564900031736e-06, "loss": 1.2104, "step": 12602 }, { "epoch": 3.9590867149616193, "grad_norm": 0.94140625, "learning_rate": 4.2031101237702325e-06, "loss": 1.1464, "step": 12604 }, { "epoch": 3.9597149419871607, "grad_norm": 0.98046875, "learning_rate": 4.200571247223104e-06, "loss": 1.2806, "step": 12606 }, { "epoch": 3.960343169012702, "grad_norm": 1.0546875, "learning_rate": 4.198032370675976e-06, "loss": 1.1866, "step": 12608 }, { "epoch": 3.9609713960382433, "grad_norm": 0.93359375, "learning_rate": 4.195493494128848e-06, "loss": 1.2247, "step": 12610 }, { "epoch": 3.9615996230637847, "grad_norm": 0.98828125, "learning_rate": 4.1929546175817206e-06, "loss": 1.0871, "step": 12612 }, { "epoch": 3.962227850089326, "grad_norm": 0.8828125, "learning_rate": 4.190415741034593e-06, "loss": 1.236, "step": 12614 }, { "epoch": 3.9628560771148673, "grad_norm": 0.88671875, "learning_rate": 4.187876864487464e-06, "loss": 1.0454, "step": 12616 }, { "epoch": 3.9634843041404086, "grad_norm": 0.90234375, "learning_rate": 4.185337987940336e-06, "loss": 1.2834, "step": 12618 }, { "epoch": 3.96411253116595, "grad_norm": 0.9375, "learning_rate": 4.182799111393209e-06, "loss": 1.1114, "step": 12620 }, { "epoch": 3.9647407581914913, "grad_norm": 0.9921875, "learning_rate": 4.180260234846081e-06, "loss": 1.2861, "step": 12622 }, { "epoch": 3.965368985217033, "grad_norm": 0.9140625, "learning_rate": 4.177721358298953e-06, "loss": 1.0832, "step": 12624 }, { "epoch": 3.9659972122425744, "grad_norm": 0.9765625, "learning_rate": 4.175182481751825e-06, "loss": 1.0841, "step": 12626 }, { "epoch": 3.9666254392681157, "grad_norm": 1.03125, "learning_rate": 4.172643605204697e-06, "loss": 1.2627, "step": 12628 }, { "epoch": 3.967253666293657, "grad_norm": 1.03125, "learning_rate": 4.170104728657569e-06, "loss": 1.1566, "step": 12630 }, { "epoch": 3.9678818933191984, "grad_norm": 1.1015625, "learning_rate": 4.167565852110442e-06, "loss": 1.1956, "step": 12632 }, { "epoch": 3.9685101203447397, "grad_norm": 0.98046875, "learning_rate": 4.165026975563313e-06, "loss": 1.1519, "step": 12634 }, { "epoch": 3.969138347370281, "grad_norm": 0.9765625, "learning_rate": 4.162488099016186e-06, "loss": 1.2159, "step": 12636 }, { "epoch": 3.9697665743958224, "grad_norm": 0.9765625, "learning_rate": 4.159949222469058e-06, "loss": 1.2195, "step": 12638 }, { "epoch": 3.9703948014213637, "grad_norm": 0.84765625, "learning_rate": 4.15741034592193e-06, "loss": 1.2115, "step": 12640 }, { "epoch": 3.971023028446905, "grad_norm": 0.85546875, "learning_rate": 4.154871469374802e-06, "loss": 1.2504, "step": 12642 }, { "epoch": 3.9716512554724464, "grad_norm": 1.0625, "learning_rate": 4.1523325928276745e-06, "loss": 1.2124, "step": 12644 }, { "epoch": 3.9722794824979877, "grad_norm": 0.92578125, "learning_rate": 4.149793716280546e-06, "loss": 1.024, "step": 12646 }, { "epoch": 3.972907709523529, "grad_norm": 0.97265625, "learning_rate": 4.147254839733418e-06, "loss": 1.1354, "step": 12648 }, { "epoch": 3.9735359365490703, "grad_norm": 0.98828125, "learning_rate": 4.14471596318629e-06, "loss": 1.1186, "step": 12650 }, { "epoch": 3.9741641635746117, "grad_norm": 0.96875, "learning_rate": 4.142177086639163e-06, "loss": 1.1617, "step": 12652 }, { "epoch": 3.974792390600153, "grad_norm": 0.97265625, "learning_rate": 4.139638210092035e-06, "loss": 1.1645, "step": 12654 }, { "epoch": 3.9754206176256943, "grad_norm": 0.92578125, "learning_rate": 4.137099333544907e-06, "loss": 1.183, "step": 12656 }, { "epoch": 3.9760488446512356, "grad_norm": 1.03125, "learning_rate": 4.1345604569977784e-06, "loss": 1.1666, "step": 12658 }, { "epoch": 3.976677071676777, "grad_norm": 0.8984375, "learning_rate": 4.132021580450651e-06, "loss": 1.2763, "step": 12660 }, { "epoch": 3.9773052987023183, "grad_norm": 0.875, "learning_rate": 4.129482703903523e-06, "loss": 1.2024, "step": 12662 }, { "epoch": 3.9779335257278596, "grad_norm": 0.95703125, "learning_rate": 4.126943827356395e-06, "loss": 1.2548, "step": 12664 }, { "epoch": 3.9785617527534014, "grad_norm": 0.9609375, "learning_rate": 4.124404950809267e-06, "loss": 1.2977, "step": 12666 }, { "epoch": 3.9791899797789427, "grad_norm": 0.95703125, "learning_rate": 4.121866074262139e-06, "loss": 1.1041, "step": 12668 }, { "epoch": 3.979818206804484, "grad_norm": 0.95703125, "learning_rate": 4.119327197715011e-06, "loss": 1.0995, "step": 12670 }, { "epoch": 3.9804464338300254, "grad_norm": 1.0078125, "learning_rate": 4.116788321167883e-06, "loss": 1.1022, "step": 12672 }, { "epoch": 3.9810746608555667, "grad_norm": 0.98828125, "learning_rate": 4.114249444620756e-06, "loss": 1.1291, "step": 12674 }, { "epoch": 3.981702887881108, "grad_norm": 1.0390625, "learning_rate": 4.111710568073628e-06, "loss": 1.2085, "step": 12676 }, { "epoch": 3.9823311149066494, "grad_norm": 0.9453125, "learning_rate": 4.1091716915265e-06, "loss": 1.124, "step": 12678 }, { "epoch": 3.9829593419321907, "grad_norm": 0.98046875, "learning_rate": 4.106632814979372e-06, "loss": 1.3034, "step": 12680 }, { "epoch": 3.983587568957732, "grad_norm": 0.9296875, "learning_rate": 4.104093938432244e-06, "loss": 1.2619, "step": 12682 }, { "epoch": 3.9842157959832734, "grad_norm": 0.96875, "learning_rate": 4.101555061885117e-06, "loss": 1.1895, "step": 12684 }, { "epoch": 3.9848440230088147, "grad_norm": 0.91796875, "learning_rate": 4.099016185337989e-06, "loss": 1.157, "step": 12686 }, { "epoch": 3.985472250034356, "grad_norm": 1.0, "learning_rate": 4.09647730879086e-06, "loss": 1.2454, "step": 12688 }, { "epoch": 3.986100477059898, "grad_norm": 0.984375, "learning_rate": 4.093938432243732e-06, "loss": 1.233, "step": 12690 }, { "epoch": 3.986728704085439, "grad_norm": 0.8984375, "learning_rate": 4.091399555696605e-06, "loss": 1.0538, "step": 12692 }, { "epoch": 3.9873569311109804, "grad_norm": 0.88671875, "learning_rate": 4.088860679149477e-06, "loss": 1.2349, "step": 12694 }, { "epoch": 3.9879851581365218, "grad_norm": 1.0390625, "learning_rate": 4.086321802602349e-06, "loss": 1.2276, "step": 12696 }, { "epoch": 3.988613385162063, "grad_norm": 0.93359375, "learning_rate": 4.0837829260552205e-06, "loss": 1.206, "step": 12698 }, { "epoch": 3.9892416121876044, "grad_norm": 0.92578125, "learning_rate": 4.081244049508093e-06, "loss": 1.1242, "step": 12700 }, { "epoch": 3.9898698392131458, "grad_norm": 0.9296875, "learning_rate": 4.078705172960965e-06, "loss": 1.0459, "step": 12702 }, { "epoch": 3.990498066238687, "grad_norm": 1.0234375, "learning_rate": 4.076166296413837e-06, "loss": 1.1662, "step": 12704 }, { "epoch": 3.9911262932642284, "grad_norm": 1.0078125, "learning_rate": 4.073627419866709e-06, "loss": 1.0847, "step": 12706 }, { "epoch": 3.9917545202897697, "grad_norm": 0.92578125, "learning_rate": 4.071088543319582e-06, "loss": 1.1892, "step": 12708 }, { "epoch": 3.992382747315311, "grad_norm": 0.9921875, "learning_rate": 4.068549666772453e-06, "loss": 1.1002, "step": 12710 }, { "epoch": 3.9930109743408524, "grad_norm": 0.9375, "learning_rate": 4.066010790225325e-06, "loss": 1.1391, "step": 12712 }, { "epoch": 3.9936392013663937, "grad_norm": 1.0390625, "learning_rate": 4.0634719136781975e-06, "loss": 1.1105, "step": 12714 }, { "epoch": 3.994267428391935, "grad_norm": 0.89453125, "learning_rate": 4.06093303713107e-06, "loss": 1.1659, "step": 12716 }, { "epoch": 3.9948956554174764, "grad_norm": 1.078125, "learning_rate": 4.058394160583942e-06, "loss": 1.124, "step": 12718 }, { "epoch": 3.9955238824430177, "grad_norm": 0.9140625, "learning_rate": 4.055855284036814e-06, "loss": 1.098, "step": 12720 }, { "epoch": 3.996152109468559, "grad_norm": 1.0078125, "learning_rate": 4.053316407489686e-06, "loss": 1.1416, "step": 12722 }, { "epoch": 3.9967803364941004, "grad_norm": 0.8984375, "learning_rate": 4.050777530942559e-06, "loss": 1.2225, "step": 12724 }, { "epoch": 3.9974085635196417, "grad_norm": 1.0625, "learning_rate": 4.048238654395431e-06, "loss": 1.1809, "step": 12726 }, { "epoch": 3.998036790545183, "grad_norm": 0.96875, "learning_rate": 4.045699777848302e-06, "loss": 1.1978, "step": 12728 }, { "epoch": 3.9986650175707243, "grad_norm": 0.9453125, "learning_rate": 4.0431609013011745e-06, "loss": 1.1221, "step": 12730 }, { "epoch": 3.999293244596266, "grad_norm": 0.91796875, "learning_rate": 4.040622024754047e-06, "loss": 1.2161, "step": 12732 }, { "epoch": 3.9999214716218074, "grad_norm": 1.0234375, "learning_rate": 4.038083148206919e-06, "loss": 1.0009, "step": 12734 }, { "epoch": 4.000549698647348, "grad_norm": 0.96484375, "learning_rate": 4.035544271659791e-06, "loss": 1.1304, "step": 12736 }, { "epoch": 4.00117792567289, "grad_norm": 0.84765625, "learning_rate": 4.033005395112663e-06, "loss": 1.2638, "step": 12738 }, { "epoch": 4.001806152698431, "grad_norm": 0.93359375, "learning_rate": 4.030466518565535e-06, "loss": 1.1447, "step": 12740 }, { "epoch": 4.002434379723972, "grad_norm": 0.91015625, "learning_rate": 4.027927642018407e-06, "loss": 1.2587, "step": 12742 }, { "epoch": 4.0030626067495145, "grad_norm": 0.9296875, "learning_rate": 4.025388765471279e-06, "loss": 1.269, "step": 12744 }, { "epoch": 4.003690833775056, "grad_norm": 0.96484375, "learning_rate": 4.0228498889241515e-06, "loss": 1.0855, "step": 12746 }, { "epoch": 4.004319060800597, "grad_norm": 0.98046875, "learning_rate": 4.020311012377024e-06, "loss": 1.1698, "step": 12748 }, { "epoch": 4.0049472878261385, "grad_norm": 0.92578125, "learning_rate": 4.017772135829895e-06, "loss": 1.1687, "step": 12750 }, { "epoch": 4.00557551485168, "grad_norm": 0.953125, "learning_rate": 4.015233259282767e-06, "loss": 1.0411, "step": 12752 }, { "epoch": 4.006203741877221, "grad_norm": 0.89453125, "learning_rate": 4.0126943827356395e-06, "loss": 0.9888, "step": 12754 }, { "epoch": 4.0068319689027625, "grad_norm": 0.94921875, "learning_rate": 4.010155506188512e-06, "loss": 1.209, "step": 12756 }, { "epoch": 4.007460195928304, "grad_norm": 1.0078125, "learning_rate": 4.007616629641384e-06, "loss": 1.111, "step": 12758 }, { "epoch": 4.008088422953845, "grad_norm": 1.0234375, "learning_rate": 4.005077753094256e-06, "loss": 1.0575, "step": 12760 }, { "epoch": 4.0087166499793865, "grad_norm": 0.90234375, "learning_rate": 4.0025388765471284e-06, "loss": 1.2568, "step": 12762 }, { "epoch": 4.009344877004928, "grad_norm": 1.0546875, "learning_rate": 4.000000000000001e-06, "loss": 1.052, "step": 12764 }, { "epoch": 4.009973104030469, "grad_norm": 0.9609375, "learning_rate": 3.997461123452873e-06, "loss": 1.0463, "step": 12766 }, { "epoch": 4.0106013310560105, "grad_norm": 0.96875, "learning_rate": 3.994922246905745e-06, "loss": 1.1147, "step": 12768 }, { "epoch": 4.011229558081552, "grad_norm": 1.0078125, "learning_rate": 3.9923833703586165e-06, "loss": 1.1353, "step": 12770 }, { "epoch": 4.011857785107093, "grad_norm": 0.9765625, "learning_rate": 3.989844493811489e-06, "loss": 0.9974, "step": 12772 }, { "epoch": 4.0124860121326344, "grad_norm": 0.96484375, "learning_rate": 3.987305617264361e-06, "loss": 1.2094, "step": 12774 }, { "epoch": 4.013114239158176, "grad_norm": 1.0546875, "learning_rate": 3.984766740717233e-06, "loss": 1.1667, "step": 12776 }, { "epoch": 4.013742466183717, "grad_norm": 0.96484375, "learning_rate": 3.982227864170105e-06, "loss": 0.9973, "step": 12778 }, { "epoch": 4.014370693209258, "grad_norm": 0.94921875, "learning_rate": 3.979688987622977e-06, "loss": 1.0872, "step": 12780 }, { "epoch": 4.0149989202348, "grad_norm": 1.0703125, "learning_rate": 3.977150111075849e-06, "loss": 1.0737, "step": 12782 }, { "epoch": 4.015627147260341, "grad_norm": 1.0234375, "learning_rate": 3.974611234528721e-06, "loss": 0.9641, "step": 12784 }, { "epoch": 4.016255374285882, "grad_norm": 1.1171875, "learning_rate": 3.9720723579815935e-06, "loss": 1.2179, "step": 12786 }, { "epoch": 4.016883601311424, "grad_norm": 0.94140625, "learning_rate": 3.969533481434466e-06, "loss": 1.1504, "step": 12788 }, { "epoch": 4.017511828336965, "grad_norm": 0.9765625, "learning_rate": 3.966994604887338e-06, "loss": 1.0377, "step": 12790 }, { "epoch": 4.018140055362506, "grad_norm": 0.90625, "learning_rate": 3.964455728340209e-06, "loss": 1.3309, "step": 12792 }, { "epoch": 4.018768282388048, "grad_norm": 0.93359375, "learning_rate": 3.9619168517930816e-06, "loss": 1.0257, "step": 12794 }, { "epoch": 4.019396509413589, "grad_norm": 1.0234375, "learning_rate": 3.959377975245954e-06, "loss": 1.206, "step": 12796 }, { "epoch": 4.02002473643913, "grad_norm": 0.9453125, "learning_rate": 3.956839098698826e-06, "loss": 1.0081, "step": 12798 }, { "epoch": 4.020652963464672, "grad_norm": 0.95703125, "learning_rate": 3.954300222151698e-06, "loss": 1.1653, "step": 12800 }, { "epoch": 4.021281190490213, "grad_norm": 0.953125, "learning_rate": 3.95176134560457e-06, "loss": 1.0933, "step": 12802 }, { "epoch": 4.021909417515754, "grad_norm": 0.9375, "learning_rate": 3.949222469057443e-06, "loss": 1.0876, "step": 12804 }, { "epoch": 4.022537644541296, "grad_norm": 0.875, "learning_rate": 3.946683592510315e-06, "loss": 1.1527, "step": 12806 }, { "epoch": 4.023165871566837, "grad_norm": 0.91015625, "learning_rate": 3.944144715963187e-06, "loss": 1.0136, "step": 12808 }, { "epoch": 4.023794098592379, "grad_norm": 0.9609375, "learning_rate": 3.9416058394160585e-06, "loss": 1.1302, "step": 12810 }, { "epoch": 4.024422325617921, "grad_norm": 0.953125, "learning_rate": 3.939066962868931e-06, "loss": 1.0471, "step": 12812 }, { "epoch": 4.025050552643462, "grad_norm": 0.96484375, "learning_rate": 3.936528086321803e-06, "loss": 1.1518, "step": 12814 }, { "epoch": 4.025678779669003, "grad_norm": 0.94140625, "learning_rate": 3.933989209774675e-06, "loss": 1.1025, "step": 12816 }, { "epoch": 4.0263070066945446, "grad_norm": 0.93359375, "learning_rate": 3.9314503332275475e-06, "loss": 1.3368, "step": 12818 }, { "epoch": 4.026935233720086, "grad_norm": 0.97265625, "learning_rate": 3.92891145668042e-06, "loss": 1.0752, "step": 12820 }, { "epoch": 4.027563460745627, "grad_norm": 0.94921875, "learning_rate": 3.926372580133291e-06, "loss": 1.1852, "step": 12822 }, { "epoch": 4.0281916877711685, "grad_norm": 1.078125, "learning_rate": 3.923833703586163e-06, "loss": 1.0661, "step": 12824 }, { "epoch": 4.02881991479671, "grad_norm": 0.9765625, "learning_rate": 3.9212948270390355e-06, "loss": 1.2044, "step": 12826 }, { "epoch": 4.029448141822251, "grad_norm": 0.90234375, "learning_rate": 3.918755950491908e-06, "loss": 1.1273, "step": 12828 }, { "epoch": 4.0300763688477925, "grad_norm": 0.94921875, "learning_rate": 3.91621707394478e-06, "loss": 1.0986, "step": 12830 }, { "epoch": 4.030704595873334, "grad_norm": 0.95703125, "learning_rate": 3.913678197397651e-06, "loss": 1.2333, "step": 12832 }, { "epoch": 4.031332822898875, "grad_norm": 1.1328125, "learning_rate": 3.911139320850524e-06, "loss": 1.168, "step": 12834 }, { "epoch": 4.0319610499244165, "grad_norm": 1.0234375, "learning_rate": 3.908600444303396e-06, "loss": 1.1292, "step": 12836 }, { "epoch": 4.032589276949958, "grad_norm": 0.98046875, "learning_rate": 3.906061567756268e-06, "loss": 1.0967, "step": 12838 }, { "epoch": 4.033217503975499, "grad_norm": 1.015625, "learning_rate": 3.90352269120914e-06, "loss": 1.0788, "step": 12840 }, { "epoch": 4.0338457310010405, "grad_norm": 0.8984375, "learning_rate": 3.9009838146620125e-06, "loss": 1.0786, "step": 12842 }, { "epoch": 4.034473958026582, "grad_norm": 0.96875, "learning_rate": 3.898444938114884e-06, "loss": 1.0863, "step": 12844 }, { "epoch": 4.035102185052123, "grad_norm": 0.92578125, "learning_rate": 3.895906061567757e-06, "loss": 1.0822, "step": 12846 }, { "epoch": 4.0357304120776645, "grad_norm": 0.94921875, "learning_rate": 3.893367185020629e-06, "loss": 1.1691, "step": 12848 }, { "epoch": 4.036358639103206, "grad_norm": 0.94140625, "learning_rate": 3.890828308473501e-06, "loss": 1.1549, "step": 12850 }, { "epoch": 4.036986866128747, "grad_norm": 0.94921875, "learning_rate": 3.888289431926373e-06, "loss": 1.1599, "step": 12852 }, { "epoch": 4.0376150931542885, "grad_norm": 1.0625, "learning_rate": 3.885750555379245e-06, "loss": 0.9489, "step": 12854 }, { "epoch": 4.03824332017983, "grad_norm": 0.96875, "learning_rate": 3.883211678832117e-06, "loss": 1.1197, "step": 12856 }, { "epoch": 4.038871547205371, "grad_norm": 1.015625, "learning_rate": 3.8806728022849895e-06, "loss": 1.0526, "step": 12858 }, { "epoch": 4.039499774230912, "grad_norm": 0.9375, "learning_rate": 3.878133925737862e-06, "loss": 1.1995, "step": 12860 }, { "epoch": 4.040128001256454, "grad_norm": 0.99609375, "learning_rate": 3.875595049190733e-06, "loss": 1.1496, "step": 12862 }, { "epoch": 4.040756228281995, "grad_norm": 0.98828125, "learning_rate": 3.873056172643605e-06, "loss": 1.1494, "step": 12864 }, { "epoch": 4.041384455307536, "grad_norm": 0.98828125, "learning_rate": 3.870517296096478e-06, "loss": 1.0612, "step": 12866 }, { "epoch": 4.042012682333078, "grad_norm": 1.078125, "learning_rate": 3.86797841954935e-06, "loss": 1.0646, "step": 12868 }, { "epoch": 4.042640909358619, "grad_norm": 1.03125, "learning_rate": 3.865439543002222e-06, "loss": 1.1488, "step": 12870 }, { "epoch": 4.04326913638416, "grad_norm": 1.0625, "learning_rate": 3.862900666455094e-06, "loss": 1.16, "step": 12872 }, { "epoch": 4.043897363409702, "grad_norm": 0.9453125, "learning_rate": 3.860361789907966e-06, "loss": 1.298, "step": 12874 }, { "epoch": 4.044525590435244, "grad_norm": 0.91015625, "learning_rate": 3.857822913360838e-06, "loss": 1.0697, "step": 12876 }, { "epoch": 4.045153817460785, "grad_norm": 0.9765625, "learning_rate": 3.85528403681371e-06, "loss": 1.1631, "step": 12878 }, { "epoch": 4.045782044486327, "grad_norm": 0.921875, "learning_rate": 3.852745160266582e-06, "loss": 1.1346, "step": 12880 }, { "epoch": 4.046410271511868, "grad_norm": 0.93359375, "learning_rate": 3.8502062837194546e-06, "loss": 1.2186, "step": 12882 }, { "epoch": 4.047038498537409, "grad_norm": 0.94921875, "learning_rate": 3.847667407172326e-06, "loss": 1.1479, "step": 12884 }, { "epoch": 4.047666725562951, "grad_norm": 0.95703125, "learning_rate": 3.845128530625198e-06, "loss": 1.1927, "step": 12886 }, { "epoch": 4.048294952588492, "grad_norm": 1.046875, "learning_rate": 3.84258965407807e-06, "loss": 1.0875, "step": 12888 }, { "epoch": 4.048923179614033, "grad_norm": 0.90234375, "learning_rate": 3.8400507775309435e-06, "loss": 1.1342, "step": 12890 }, { "epoch": 4.049551406639575, "grad_norm": 0.84765625, "learning_rate": 3.837511900983815e-06, "loss": 1.1159, "step": 12892 }, { "epoch": 4.050179633665116, "grad_norm": 0.96484375, "learning_rate": 3.834973024436687e-06, "loss": 1.1065, "step": 12894 }, { "epoch": 4.050807860690657, "grad_norm": 0.94921875, "learning_rate": 3.832434147889559e-06, "loss": 1.0296, "step": 12896 }, { "epoch": 4.051436087716199, "grad_norm": 1.0546875, "learning_rate": 3.8298952713424316e-06, "loss": 1.1117, "step": 12898 }, { "epoch": 4.05206431474174, "grad_norm": 0.95703125, "learning_rate": 3.827356394795304e-06, "loss": 1.1958, "step": 12900 }, { "epoch": 4.052692541767281, "grad_norm": 1.09375, "learning_rate": 3.824817518248176e-06, "loss": 1.1713, "step": 12902 }, { "epoch": 4.0533207687928225, "grad_norm": 0.99609375, "learning_rate": 3.822278641701047e-06, "loss": 1.0092, "step": 12904 }, { "epoch": 4.053948995818364, "grad_norm": 1.0390625, "learning_rate": 3.81973976515392e-06, "loss": 1.0783, "step": 12906 }, { "epoch": 4.054577222843905, "grad_norm": 0.92578125, "learning_rate": 3.817200888606792e-06, "loss": 1.1055, "step": 12908 }, { "epoch": 4.0552054498694465, "grad_norm": 0.92578125, "learning_rate": 3.814662012059664e-06, "loss": 1.2228, "step": 12910 }, { "epoch": 4.055833676894988, "grad_norm": 0.9375, "learning_rate": 3.812123135512536e-06, "loss": 1.2225, "step": 12912 }, { "epoch": 4.056461903920529, "grad_norm": 0.9453125, "learning_rate": 3.809584258965408e-06, "loss": 1.1644, "step": 12914 }, { "epoch": 4.0570901309460705, "grad_norm": 1.03125, "learning_rate": 3.8070453824182803e-06, "loss": 1.1235, "step": 12916 }, { "epoch": 4.057718357971612, "grad_norm": 0.95703125, "learning_rate": 3.804506505871152e-06, "loss": 1.0109, "step": 12918 }, { "epoch": 4.058346584997153, "grad_norm": 1.234375, "learning_rate": 3.8019676293240244e-06, "loss": 1.2752, "step": 12920 }, { "epoch": 4.0589748120226945, "grad_norm": 0.953125, "learning_rate": 3.799428752776896e-06, "loss": 1.0734, "step": 12922 }, { "epoch": 4.059603039048236, "grad_norm": 0.97265625, "learning_rate": 3.7968898762297684e-06, "loss": 0.9919, "step": 12924 }, { "epoch": 4.060231266073777, "grad_norm": 0.953125, "learning_rate": 3.7943509996826406e-06, "loss": 1.1991, "step": 12926 }, { "epoch": 4.0608594930993185, "grad_norm": 0.875, "learning_rate": 3.7918121231355124e-06, "loss": 1.1653, "step": 12928 }, { "epoch": 4.06148772012486, "grad_norm": 1.0234375, "learning_rate": 3.7892732465883847e-06, "loss": 1.2273, "step": 12930 }, { "epoch": 4.062115947150401, "grad_norm": 0.94140625, "learning_rate": 3.7867343700412573e-06, "loss": 1.2042, "step": 12932 }, { "epoch": 4.0627441741759425, "grad_norm": 0.97265625, "learning_rate": 3.7841954934941296e-06, "loss": 1.2191, "step": 12934 }, { "epoch": 4.063372401201484, "grad_norm": 1.0234375, "learning_rate": 3.7816566169470014e-06, "loss": 1.0432, "step": 12936 }, { "epoch": 4.064000628227025, "grad_norm": 0.9453125, "learning_rate": 3.7791177403998736e-06, "loss": 1.1528, "step": 12938 }, { "epoch": 4.0646288552525665, "grad_norm": 0.95703125, "learning_rate": 3.776578863852746e-06, "loss": 1.1453, "step": 12940 }, { "epoch": 4.065257082278109, "grad_norm": 0.9375, "learning_rate": 3.7740399873056176e-06, "loss": 1.06, "step": 12942 }, { "epoch": 4.06588530930365, "grad_norm": 1.0546875, "learning_rate": 3.77150111075849e-06, "loss": 1.1953, "step": 12944 }, { "epoch": 4.066513536329191, "grad_norm": 0.9453125, "learning_rate": 3.7689622342113617e-06, "loss": 1.1033, "step": 12946 }, { "epoch": 4.067141763354733, "grad_norm": 0.953125, "learning_rate": 3.766423357664234e-06, "loss": 1.1877, "step": 12948 }, { "epoch": 4.067769990380274, "grad_norm": 0.91796875, "learning_rate": 3.763884481117106e-06, "loss": 1.1879, "step": 12950 }, { "epoch": 4.068398217405815, "grad_norm": 0.92578125, "learning_rate": 3.761345604569978e-06, "loss": 1.1494, "step": 12952 }, { "epoch": 4.069026444431357, "grad_norm": 0.95703125, "learning_rate": 3.75880672802285e-06, "loss": 1.2413, "step": 12954 }, { "epoch": 4.069654671456898, "grad_norm": 0.97265625, "learning_rate": 3.7562678514757224e-06, "loss": 1.2644, "step": 12956 }, { "epoch": 4.070282898482439, "grad_norm": 0.98828125, "learning_rate": 3.753728974928594e-06, "loss": 1.0326, "step": 12958 }, { "epoch": 4.070911125507981, "grad_norm": 0.90625, "learning_rate": 3.7511900983814664e-06, "loss": 1.134, "step": 12960 }, { "epoch": 4.071539352533522, "grad_norm": 0.9140625, "learning_rate": 3.7486512218343387e-06, "loss": 1.1512, "step": 12962 }, { "epoch": 4.072167579559063, "grad_norm": 0.921875, "learning_rate": 3.7461123452872105e-06, "loss": 1.1145, "step": 12964 }, { "epoch": 4.072795806584605, "grad_norm": 1.0234375, "learning_rate": 3.7435734687400827e-06, "loss": 1.0943, "step": 12966 }, { "epoch": 4.073424033610146, "grad_norm": 1.046875, "learning_rate": 3.741034592192955e-06, "loss": 1.066, "step": 12968 }, { "epoch": 4.074052260635687, "grad_norm": 1.0546875, "learning_rate": 3.7384957156458267e-06, "loss": 1.1018, "step": 12970 }, { "epoch": 4.074680487661229, "grad_norm": 0.92578125, "learning_rate": 3.735956839098699e-06, "loss": 1.183, "step": 12972 }, { "epoch": 4.07530871468677, "grad_norm": 0.91796875, "learning_rate": 3.7334179625515708e-06, "loss": 1.097, "step": 12974 }, { "epoch": 4.075936941712311, "grad_norm": 1.0703125, "learning_rate": 3.7308790860044434e-06, "loss": 1.0547, "step": 12976 }, { "epoch": 4.076565168737853, "grad_norm": 0.93359375, "learning_rate": 3.7283402094573156e-06, "loss": 1.1599, "step": 12978 }, { "epoch": 4.077193395763394, "grad_norm": 0.98046875, "learning_rate": 3.725801332910188e-06, "loss": 1.1639, "step": 12980 }, { "epoch": 4.077821622788935, "grad_norm": 0.93359375, "learning_rate": 3.7232624563630597e-06, "loss": 1.0774, "step": 12982 }, { "epoch": 4.078449849814477, "grad_norm": 0.984375, "learning_rate": 3.720723579815932e-06, "loss": 1.0822, "step": 12984 }, { "epoch": 4.079078076840018, "grad_norm": 0.96875, "learning_rate": 3.718184703268804e-06, "loss": 1.0684, "step": 12986 }, { "epoch": 4.079706303865559, "grad_norm": 0.953125, "learning_rate": 3.715645826721676e-06, "loss": 1.2843, "step": 12988 }, { "epoch": 4.0803345308911005, "grad_norm": 0.98046875, "learning_rate": 3.713106950174548e-06, "loss": 1.1601, "step": 12990 }, { "epoch": 4.080962757916642, "grad_norm": 1.1171875, "learning_rate": 3.7105680736274204e-06, "loss": 1.1826, "step": 12992 }, { "epoch": 4.081590984942183, "grad_norm": 0.93359375, "learning_rate": 3.708029197080292e-06, "loss": 1.1141, "step": 12994 }, { "epoch": 4.0822192119677245, "grad_norm": 0.92578125, "learning_rate": 3.7054903205331644e-06, "loss": 1.1176, "step": 12996 }, { "epoch": 4.082847438993266, "grad_norm": 1.0, "learning_rate": 3.7029514439860367e-06, "loss": 1.1293, "step": 12998 }, { "epoch": 4.083475666018807, "grad_norm": 0.94140625, "learning_rate": 3.7004125674389085e-06, "loss": 1.1928, "step": 13000 }, { "epoch": 4.0841038930443485, "grad_norm": 0.91015625, "learning_rate": 3.6978736908917807e-06, "loss": 1.0282, "step": 13002 }, { "epoch": 4.08473212006989, "grad_norm": 0.984375, "learning_rate": 3.6953348143446525e-06, "loss": 1.3174, "step": 13004 }, { "epoch": 4.085360347095431, "grad_norm": 0.96484375, "learning_rate": 3.6927959377975247e-06, "loss": 1.0055, "step": 13006 }, { "epoch": 4.085988574120973, "grad_norm": 0.97265625, "learning_rate": 3.690257061250397e-06, "loss": 1.2523, "step": 13008 }, { "epoch": 4.086616801146515, "grad_norm": 0.9765625, "learning_rate": 3.6877181847032688e-06, "loss": 1.1612, "step": 13010 }, { "epoch": 4.087245028172056, "grad_norm": 0.96484375, "learning_rate": 3.685179308156141e-06, "loss": 1.1353, "step": 13012 }, { "epoch": 4.087873255197597, "grad_norm": 0.96484375, "learning_rate": 3.6826404316090132e-06, "loss": 1.2032, "step": 13014 }, { "epoch": 4.088501482223139, "grad_norm": 0.9609375, "learning_rate": 3.680101555061885e-06, "loss": 1.2032, "step": 13016 }, { "epoch": 4.08912970924868, "grad_norm": 0.91796875, "learning_rate": 3.6775626785147577e-06, "loss": 1.1805, "step": 13018 }, { "epoch": 4.089757936274221, "grad_norm": 1.0234375, "learning_rate": 3.67502380196763e-06, "loss": 1.132, "step": 13020 }, { "epoch": 4.090386163299763, "grad_norm": 0.98828125, "learning_rate": 3.672484925420502e-06, "loss": 1.0084, "step": 13022 }, { "epoch": 4.091014390325304, "grad_norm": 1.109375, "learning_rate": 3.669946048873374e-06, "loss": 1.1804, "step": 13024 }, { "epoch": 4.091642617350845, "grad_norm": 0.9921875, "learning_rate": 3.667407172326246e-06, "loss": 1.1413, "step": 13026 }, { "epoch": 4.092270844376387, "grad_norm": 0.9296875, "learning_rate": 3.664868295779118e-06, "loss": 1.0861, "step": 13028 }, { "epoch": 4.092899071401928, "grad_norm": 0.96875, "learning_rate": 3.66232941923199e-06, "loss": 1.1333, "step": 13030 }, { "epoch": 4.093527298427469, "grad_norm": 0.9765625, "learning_rate": 3.6597905426848624e-06, "loss": 1.1493, "step": 13032 }, { "epoch": 4.094155525453011, "grad_norm": 0.91796875, "learning_rate": 3.6572516661377342e-06, "loss": 1.1086, "step": 13034 }, { "epoch": 4.094783752478552, "grad_norm": 1.0078125, "learning_rate": 3.6547127895906065e-06, "loss": 1.1578, "step": 13036 }, { "epoch": 4.095411979504093, "grad_norm": 1.0078125, "learning_rate": 3.6521739130434787e-06, "loss": 1.2425, "step": 13038 }, { "epoch": 4.096040206529635, "grad_norm": 0.96484375, "learning_rate": 3.6496350364963505e-06, "loss": 1.1495, "step": 13040 }, { "epoch": 4.096668433555176, "grad_norm": 0.984375, "learning_rate": 3.6470961599492227e-06, "loss": 0.9513, "step": 13042 }, { "epoch": 4.097296660580717, "grad_norm": 1.0078125, "learning_rate": 3.644557283402095e-06, "loss": 1.09, "step": 13044 }, { "epoch": 4.097924887606259, "grad_norm": 0.95703125, "learning_rate": 3.6420184068549668e-06, "loss": 1.2044, "step": 13046 }, { "epoch": 4.0985531146318, "grad_norm": 1.0, "learning_rate": 3.639479530307839e-06, "loss": 1.1193, "step": 13048 }, { "epoch": 4.099181341657341, "grad_norm": 1.078125, "learning_rate": 3.6369406537607112e-06, "loss": 1.1474, "step": 13050 }, { "epoch": 4.099809568682883, "grad_norm": 0.96875, "learning_rate": 3.634401777213583e-06, "loss": 1.0775, "step": 13052 }, { "epoch": 4.100437795708424, "grad_norm": 0.89453125, "learning_rate": 3.6318629006664553e-06, "loss": 1.1762, "step": 13054 }, { "epoch": 4.101066022733965, "grad_norm": 0.88671875, "learning_rate": 3.629324024119327e-06, "loss": 1.2647, "step": 13056 }, { "epoch": 4.101694249759507, "grad_norm": 0.9609375, "learning_rate": 3.6267851475721993e-06, "loss": 1.0898, "step": 13058 }, { "epoch": 4.102322476785048, "grad_norm": 0.94921875, "learning_rate": 3.6242462710250715e-06, "loss": 1.0764, "step": 13060 }, { "epoch": 4.102950703810589, "grad_norm": 0.9453125, "learning_rate": 3.621707394477944e-06, "loss": 1.0317, "step": 13062 }, { "epoch": 4.103578930836131, "grad_norm": 0.98046875, "learning_rate": 3.619168517930816e-06, "loss": 1.2475, "step": 13064 }, { "epoch": 4.104207157861672, "grad_norm": 0.98828125, "learning_rate": 3.6166296413836882e-06, "loss": 1.1045, "step": 13066 }, { "epoch": 4.104835384887213, "grad_norm": 1.1171875, "learning_rate": 3.6140907648365604e-06, "loss": 1.1274, "step": 13068 }, { "epoch": 4.1054636119127546, "grad_norm": 0.95703125, "learning_rate": 3.6115518882894323e-06, "loss": 1.1229, "step": 13070 }, { "epoch": 4.106091838938296, "grad_norm": 0.92578125, "learning_rate": 3.6090130117423045e-06, "loss": 1.2849, "step": 13072 }, { "epoch": 4.106720065963838, "grad_norm": 0.94140625, "learning_rate": 3.6064741351951767e-06, "loss": 1.212, "step": 13074 }, { "epoch": 4.107348292989379, "grad_norm": 1.0703125, "learning_rate": 3.6039352586480485e-06, "loss": 1.1514, "step": 13076 }, { "epoch": 4.107976520014921, "grad_norm": 0.9375, "learning_rate": 3.6013963821009207e-06, "loss": 1.1792, "step": 13078 }, { "epoch": 4.108604747040462, "grad_norm": 0.9140625, "learning_rate": 3.5988575055537926e-06, "loss": 1.1968, "step": 13080 }, { "epoch": 4.109232974066003, "grad_norm": 0.98046875, "learning_rate": 3.5963186290066648e-06, "loss": 1.1344, "step": 13082 }, { "epoch": 4.109861201091545, "grad_norm": 1.015625, "learning_rate": 3.593779752459537e-06, "loss": 1.1625, "step": 13084 }, { "epoch": 4.110489428117086, "grad_norm": 1.0234375, "learning_rate": 3.591240875912409e-06, "loss": 1.189, "step": 13086 }, { "epoch": 4.111117655142627, "grad_norm": 0.98828125, "learning_rate": 3.588701999365281e-06, "loss": 1.186, "step": 13088 }, { "epoch": 4.111745882168169, "grad_norm": 0.9140625, "learning_rate": 3.5861631228181533e-06, "loss": 1.1582, "step": 13090 }, { "epoch": 4.11237410919371, "grad_norm": 1.015625, "learning_rate": 3.583624246271025e-06, "loss": 1.1451, "step": 13092 }, { "epoch": 4.113002336219251, "grad_norm": 0.94140625, "learning_rate": 3.5810853697238973e-06, "loss": 1.1356, "step": 13094 }, { "epoch": 4.113630563244793, "grad_norm": 0.9453125, "learning_rate": 3.5785464931767695e-06, "loss": 1.0128, "step": 13096 }, { "epoch": 4.114258790270334, "grad_norm": 0.9453125, "learning_rate": 3.5760076166296413e-06, "loss": 1.1429, "step": 13098 }, { "epoch": 4.114887017295875, "grad_norm": 0.91015625, "learning_rate": 3.5734687400825136e-06, "loss": 1.1854, "step": 13100 }, { "epoch": 4.115515244321417, "grad_norm": 0.96484375, "learning_rate": 3.570929863535386e-06, "loss": 1.0948, "step": 13102 }, { "epoch": 4.116143471346958, "grad_norm": 0.8671875, "learning_rate": 3.5683909869882585e-06, "loss": 1.1388, "step": 13104 }, { "epoch": 4.116771698372499, "grad_norm": 0.9765625, "learning_rate": 3.5658521104411303e-06, "loss": 1.1874, "step": 13106 }, { "epoch": 4.117399925398041, "grad_norm": 1.1484375, "learning_rate": 3.5633132338940025e-06, "loss": 1.1221, "step": 13108 }, { "epoch": 4.118028152423582, "grad_norm": 0.9375, "learning_rate": 3.5607743573468743e-06, "loss": 1.1757, "step": 13110 }, { "epoch": 4.118656379449123, "grad_norm": 0.953125, "learning_rate": 3.5582354807997465e-06, "loss": 1.0752, "step": 13112 }, { "epoch": 4.119284606474665, "grad_norm": 0.92578125, "learning_rate": 3.5556966042526188e-06, "loss": 1.0669, "step": 13114 }, { "epoch": 4.119912833500206, "grad_norm": 0.96484375, "learning_rate": 3.5531577277054906e-06, "loss": 1.0789, "step": 13116 }, { "epoch": 4.120541060525747, "grad_norm": 0.96484375, "learning_rate": 3.550618851158363e-06, "loss": 1.0973, "step": 13118 }, { "epoch": 4.121169287551289, "grad_norm": 1.0, "learning_rate": 3.548079974611235e-06, "loss": 1.2916, "step": 13120 }, { "epoch": 4.12179751457683, "grad_norm": 0.98046875, "learning_rate": 3.545541098064107e-06, "loss": 1.0939, "step": 13122 }, { "epoch": 4.122425741602371, "grad_norm": 0.9453125, "learning_rate": 3.543002221516979e-06, "loss": 1.1648, "step": 13124 }, { "epoch": 4.123053968627913, "grad_norm": 0.9765625, "learning_rate": 3.5404633449698513e-06, "loss": 1.0477, "step": 13126 }, { "epoch": 4.123682195653454, "grad_norm": 0.9375, "learning_rate": 3.537924468422723e-06, "loss": 1.1372, "step": 13128 }, { "epoch": 4.124310422678995, "grad_norm": 0.92578125, "learning_rate": 3.5353855918755953e-06, "loss": 1.199, "step": 13130 }, { "epoch": 4.124938649704537, "grad_norm": 0.98046875, "learning_rate": 3.5328467153284675e-06, "loss": 1.219, "step": 13132 }, { "epoch": 4.125566876730078, "grad_norm": 0.890625, "learning_rate": 3.5303078387813394e-06, "loss": 1.1872, "step": 13134 }, { "epoch": 4.126195103755619, "grad_norm": 1.0546875, "learning_rate": 3.5277689622342116e-06, "loss": 1.1893, "step": 13136 }, { "epoch": 4.126823330781161, "grad_norm": 1.1328125, "learning_rate": 3.5252300856870834e-06, "loss": 1.1652, "step": 13138 }, { "epoch": 4.127451557806703, "grad_norm": 0.9609375, "learning_rate": 3.5226912091399556e-06, "loss": 1.1278, "step": 13140 }, { "epoch": 4.128079784832244, "grad_norm": 0.890625, "learning_rate": 3.520152332592828e-06, "loss": 1.2258, "step": 13142 }, { "epoch": 4.1287080118577855, "grad_norm": 0.9609375, "learning_rate": 3.5176134560456997e-06, "loss": 0.9841, "step": 13144 }, { "epoch": 4.129336238883327, "grad_norm": 1.0078125, "learning_rate": 3.515074579498572e-06, "loss": 1.097, "step": 13146 }, { "epoch": 4.129964465908868, "grad_norm": 0.92578125, "learning_rate": 3.5125357029514445e-06, "loss": 1.1676, "step": 13148 }, { "epoch": 4.1305926929344094, "grad_norm": 0.921875, "learning_rate": 3.5099968264043168e-06, "loss": 1.1999, "step": 13150 }, { "epoch": 4.131220919959951, "grad_norm": 0.9765625, "learning_rate": 3.5074579498571886e-06, "loss": 1.0872, "step": 13152 }, { "epoch": 4.131849146985492, "grad_norm": 0.97265625, "learning_rate": 3.504919073310061e-06, "loss": 1.2063, "step": 13154 }, { "epoch": 4.132477374011033, "grad_norm": 0.98828125, "learning_rate": 3.502380196762933e-06, "loss": 1.0338, "step": 13156 }, { "epoch": 4.133105601036575, "grad_norm": 1.0703125, "learning_rate": 3.499841320215805e-06, "loss": 1.0693, "step": 13158 }, { "epoch": 4.133733828062116, "grad_norm": 0.98828125, "learning_rate": 3.497302443668677e-06, "loss": 1.1831, "step": 13160 }, { "epoch": 4.134362055087657, "grad_norm": 0.93359375, "learning_rate": 3.494763567121549e-06, "loss": 1.0618, "step": 13162 }, { "epoch": 4.134990282113199, "grad_norm": 0.9375, "learning_rate": 3.492224690574421e-06, "loss": 1.0687, "step": 13164 }, { "epoch": 4.13561850913874, "grad_norm": 1.0078125, "learning_rate": 3.4896858140272933e-06, "loss": 1.103, "step": 13166 }, { "epoch": 4.136246736164281, "grad_norm": 0.9609375, "learning_rate": 3.487146937480165e-06, "loss": 1.2587, "step": 13168 }, { "epoch": 4.136874963189823, "grad_norm": 0.94921875, "learning_rate": 3.4846080609330374e-06, "loss": 1.1793, "step": 13170 }, { "epoch": 4.137503190215364, "grad_norm": 0.9375, "learning_rate": 3.4820691843859096e-06, "loss": 1.1986, "step": 13172 }, { "epoch": 4.138131417240905, "grad_norm": 0.890625, "learning_rate": 3.4795303078387814e-06, "loss": 1.1754, "step": 13174 }, { "epoch": 4.138759644266447, "grad_norm": 0.8984375, "learning_rate": 3.4769914312916536e-06, "loss": 0.9874, "step": 13176 }, { "epoch": 4.139387871291988, "grad_norm": 0.9765625, "learning_rate": 3.474452554744526e-06, "loss": 1.1154, "step": 13178 }, { "epoch": 4.140016098317529, "grad_norm": 0.87890625, "learning_rate": 3.4719136781973977e-06, "loss": 1.068, "step": 13180 }, { "epoch": 4.140644325343071, "grad_norm": 1.015625, "learning_rate": 3.46937480165027e-06, "loss": 1.0213, "step": 13182 }, { "epoch": 4.141272552368612, "grad_norm": 0.9609375, "learning_rate": 3.466835925103142e-06, "loss": 1.051, "step": 13184 }, { "epoch": 4.141900779394153, "grad_norm": 0.96484375, "learning_rate": 3.464297048556014e-06, "loss": 1.0408, "step": 13186 }, { "epoch": 4.142529006419695, "grad_norm": 1.0078125, "learning_rate": 3.461758172008886e-06, "loss": 0.9432, "step": 13188 }, { "epoch": 4.143157233445236, "grad_norm": 1.03125, "learning_rate": 3.459219295461759e-06, "loss": 1.0752, "step": 13190 }, { "epoch": 4.143785460470777, "grad_norm": 1.0625, "learning_rate": 3.4566804189146306e-06, "loss": 1.1965, "step": 13192 }, { "epoch": 4.144413687496319, "grad_norm": 0.921875, "learning_rate": 3.454141542367503e-06, "loss": 1.1253, "step": 13194 }, { "epoch": 4.14504191452186, "grad_norm": 0.9921875, "learning_rate": 3.451602665820375e-06, "loss": 1.0991, "step": 13196 }, { "epoch": 4.145670141547401, "grad_norm": 0.96484375, "learning_rate": 3.449063789273247e-06, "loss": 1.2669, "step": 13198 }, { "epoch": 4.146298368572943, "grad_norm": 0.89453125, "learning_rate": 3.446524912726119e-06, "loss": 1.2149, "step": 13200 }, { "epoch": 4.146926595598484, "grad_norm": 1.0625, "learning_rate": 3.4439860361789913e-06, "loss": 1.1671, "step": 13202 }, { "epoch": 4.147554822624025, "grad_norm": 1.0703125, "learning_rate": 3.441447159631863e-06, "loss": 1.1164, "step": 13204 }, { "epoch": 4.1481830496495675, "grad_norm": 0.95703125, "learning_rate": 3.4389082830847354e-06, "loss": 1.2815, "step": 13206 }, { "epoch": 4.148811276675109, "grad_norm": 1.0234375, "learning_rate": 3.4363694065376076e-06, "loss": 1.0553, "step": 13208 }, { "epoch": 4.14943950370065, "grad_norm": 0.9921875, "learning_rate": 3.4338305299904794e-06, "loss": 1.1001, "step": 13210 }, { "epoch": 4.1500677307261915, "grad_norm": 1.0234375, "learning_rate": 3.4312916534433516e-06, "loss": 1.1171, "step": 13212 }, { "epoch": 4.150695957751733, "grad_norm": 0.9140625, "learning_rate": 3.428752776896224e-06, "loss": 1.0604, "step": 13214 }, { "epoch": 4.151324184777274, "grad_norm": 1.03125, "learning_rate": 3.4262139003490957e-06, "loss": 1.0243, "step": 13216 }, { "epoch": 4.1519524118028155, "grad_norm": 1.0390625, "learning_rate": 3.423675023801968e-06, "loss": 1.102, "step": 13218 }, { "epoch": 4.152580638828357, "grad_norm": 0.99609375, "learning_rate": 3.4211361472548397e-06, "loss": 1.1887, "step": 13220 }, { "epoch": 4.153208865853898, "grad_norm": 0.94140625, "learning_rate": 3.418597270707712e-06, "loss": 1.2418, "step": 13222 }, { "epoch": 4.1538370928794395, "grad_norm": 1.03125, "learning_rate": 3.416058394160584e-06, "loss": 1.0751, "step": 13224 }, { "epoch": 4.154465319904981, "grad_norm": 0.9296875, "learning_rate": 3.413519517613456e-06, "loss": 1.2239, "step": 13226 }, { "epoch": 4.155093546930522, "grad_norm": 0.921875, "learning_rate": 3.410980641066328e-06, "loss": 0.9991, "step": 13228 }, { "epoch": 4.1557217739560635, "grad_norm": 0.953125, "learning_rate": 3.4084417645192004e-06, "loss": 1.2483, "step": 13230 }, { "epoch": 4.156350000981605, "grad_norm": 0.96484375, "learning_rate": 3.405902887972073e-06, "loss": 1.0796, "step": 13232 }, { "epoch": 4.156978228007146, "grad_norm": 0.9453125, "learning_rate": 3.403364011424945e-06, "loss": 1.1281, "step": 13234 }, { "epoch": 4.157606455032687, "grad_norm": 0.9296875, "learning_rate": 3.400825134877817e-06, "loss": 1.2788, "step": 13236 }, { "epoch": 4.158234682058229, "grad_norm": 0.98828125, "learning_rate": 3.3982862583306893e-06, "loss": 1.1226, "step": 13238 }, { "epoch": 4.15886290908377, "grad_norm": 0.9296875, "learning_rate": 3.395747381783561e-06, "loss": 1.1469, "step": 13240 }, { "epoch": 4.159491136109311, "grad_norm": 0.9140625, "learning_rate": 3.3932085052364334e-06, "loss": 1.1438, "step": 13242 }, { "epoch": 4.160119363134853, "grad_norm": 0.95703125, "learning_rate": 3.390669628689305e-06, "loss": 1.0702, "step": 13244 }, { "epoch": 4.160747590160394, "grad_norm": 0.96875, "learning_rate": 3.3881307521421774e-06, "loss": 1.0628, "step": 13246 }, { "epoch": 4.161375817185935, "grad_norm": 0.88671875, "learning_rate": 3.3855918755950496e-06, "loss": 1.09, "step": 13248 }, { "epoch": 4.162004044211477, "grad_norm": 1.0234375, "learning_rate": 3.3830529990479214e-06, "loss": 1.0034, "step": 13250 }, { "epoch": 4.162632271237018, "grad_norm": 1.0078125, "learning_rate": 3.3805141225007937e-06, "loss": 1.0981, "step": 13252 }, { "epoch": 4.163260498262559, "grad_norm": 0.94921875, "learning_rate": 3.377975245953666e-06, "loss": 1.1283, "step": 13254 }, { "epoch": 4.163888725288101, "grad_norm": 0.93359375, "learning_rate": 3.3754363694065377e-06, "loss": 1.1439, "step": 13256 }, { "epoch": 4.164516952313642, "grad_norm": 0.9375, "learning_rate": 3.37289749285941e-06, "loss": 1.0823, "step": 13258 }, { "epoch": 4.165145179339183, "grad_norm": 0.9296875, "learning_rate": 3.370358616312282e-06, "loss": 1.2133, "step": 13260 }, { "epoch": 4.165773406364725, "grad_norm": 1.015625, "learning_rate": 3.367819739765154e-06, "loss": 1.1793, "step": 13262 }, { "epoch": 4.166401633390266, "grad_norm": 1.0, "learning_rate": 3.365280863218026e-06, "loss": 1.1161, "step": 13264 }, { "epoch": 4.167029860415807, "grad_norm": 0.9921875, "learning_rate": 3.3627419866708984e-06, "loss": 1.0324, "step": 13266 }, { "epoch": 4.167658087441349, "grad_norm": 1.0234375, "learning_rate": 3.3602031101237702e-06, "loss": 1.071, "step": 13268 }, { "epoch": 4.16828631446689, "grad_norm": 0.91015625, "learning_rate": 3.3576642335766425e-06, "loss": 1.0944, "step": 13270 }, { "epoch": 4.168914541492432, "grad_norm": 1.0, "learning_rate": 3.3551253570295143e-06, "loss": 1.1548, "step": 13272 }, { "epoch": 4.169542768517974, "grad_norm": 0.95703125, "learning_rate": 3.3525864804823865e-06, "loss": 1.0755, "step": 13274 }, { "epoch": 4.170170995543515, "grad_norm": 0.90234375, "learning_rate": 3.350047603935259e-06, "loss": 1.1612, "step": 13276 }, { "epoch": 4.170799222569056, "grad_norm": 1.0078125, "learning_rate": 3.3475087273881314e-06, "loss": 1.1584, "step": 13278 }, { "epoch": 4.1714274495945975, "grad_norm": 0.890625, "learning_rate": 3.344969850841003e-06, "loss": 1.1381, "step": 13280 }, { "epoch": 4.172055676620139, "grad_norm": 0.94140625, "learning_rate": 3.3424309742938754e-06, "loss": 1.0934, "step": 13282 }, { "epoch": 4.17268390364568, "grad_norm": 1.0078125, "learning_rate": 3.3398920977467476e-06, "loss": 1.1372, "step": 13284 }, { "epoch": 4.1733121306712215, "grad_norm": 0.984375, "learning_rate": 3.3373532211996195e-06, "loss": 1.3176, "step": 13286 }, { "epoch": 4.173940357696763, "grad_norm": 0.96484375, "learning_rate": 3.3348143446524917e-06, "loss": 1.0293, "step": 13288 }, { "epoch": 4.174568584722304, "grad_norm": 0.91015625, "learning_rate": 3.332275468105364e-06, "loss": 1.1804, "step": 13290 }, { "epoch": 4.1751968117478455, "grad_norm": 1.0, "learning_rate": 3.3297365915582357e-06, "loss": 1.1198, "step": 13292 }, { "epoch": 4.175825038773387, "grad_norm": 0.984375, "learning_rate": 3.327197715011108e-06, "loss": 1.1353, "step": 13294 }, { "epoch": 4.176453265798928, "grad_norm": 0.9609375, "learning_rate": 3.3246588384639798e-06, "loss": 1.0919, "step": 13296 }, { "epoch": 4.1770814928244695, "grad_norm": 0.93359375, "learning_rate": 3.322119961916852e-06, "loss": 1.0492, "step": 13298 }, { "epoch": 4.177709719850011, "grad_norm": 0.96484375, "learning_rate": 3.3195810853697242e-06, "loss": 1.1439, "step": 13300 }, { "epoch": 4.178337946875552, "grad_norm": 0.96484375, "learning_rate": 3.317042208822596e-06, "loss": 1.3192, "step": 13302 }, { "epoch": 4.1789661739010935, "grad_norm": 0.91796875, "learning_rate": 3.3145033322754682e-06, "loss": 1.1151, "step": 13304 }, { "epoch": 4.179594400926635, "grad_norm": 0.94921875, "learning_rate": 3.3119644557283405e-06, "loss": 1.2668, "step": 13306 }, { "epoch": 4.180222627952176, "grad_norm": 0.94140625, "learning_rate": 3.3094255791812123e-06, "loss": 1.1162, "step": 13308 }, { "epoch": 4.1808508549777175, "grad_norm": 0.9375, "learning_rate": 3.3068867026340845e-06, "loss": 0.9888, "step": 13310 }, { "epoch": 4.181479082003259, "grad_norm": 0.8984375, "learning_rate": 3.3043478260869567e-06, "loss": 1.2565, "step": 13312 }, { "epoch": 4.1821073090288, "grad_norm": 0.90625, "learning_rate": 3.3018089495398285e-06, "loss": 1.2899, "step": 13314 }, { "epoch": 4.1827355360543415, "grad_norm": 1.0546875, "learning_rate": 3.2992700729927008e-06, "loss": 0.9787, "step": 13316 }, { "epoch": 4.183363763079883, "grad_norm": 0.89453125, "learning_rate": 3.2967311964455734e-06, "loss": 1.2491, "step": 13318 }, { "epoch": 4.183991990105424, "grad_norm": 1.0390625, "learning_rate": 3.2941923198984457e-06, "loss": 1.1314, "step": 13320 }, { "epoch": 4.184620217130965, "grad_norm": 0.89453125, "learning_rate": 3.2916534433513175e-06, "loss": 1.0971, "step": 13322 }, { "epoch": 4.185248444156507, "grad_norm": 0.9609375, "learning_rate": 3.2891145668041897e-06, "loss": 1.0568, "step": 13324 }, { "epoch": 4.185876671182048, "grad_norm": 0.9921875, "learning_rate": 3.2865756902570615e-06, "loss": 1.2299, "step": 13326 }, { "epoch": 4.186504898207589, "grad_norm": 1.0, "learning_rate": 3.2840368137099337e-06, "loss": 1.1155, "step": 13328 }, { "epoch": 4.187133125233131, "grad_norm": 0.96484375, "learning_rate": 3.281497937162806e-06, "loss": 1.1052, "step": 13330 }, { "epoch": 4.187761352258672, "grad_norm": 0.921875, "learning_rate": 3.2789590606156778e-06, "loss": 1.0977, "step": 13332 }, { "epoch": 4.188389579284213, "grad_norm": 1.125, "learning_rate": 3.27642018406855e-06, "loss": 1.2308, "step": 13334 }, { "epoch": 4.189017806309755, "grad_norm": 1.078125, "learning_rate": 3.2738813075214222e-06, "loss": 1.077, "step": 13336 }, { "epoch": 4.189646033335297, "grad_norm": 0.953125, "learning_rate": 3.271342430974294e-06, "loss": 1.2691, "step": 13338 }, { "epoch": 4.190274260360838, "grad_norm": 0.91796875, "learning_rate": 3.2688035544271663e-06, "loss": 1.1123, "step": 13340 }, { "epoch": 4.19090248738638, "grad_norm": 0.94140625, "learning_rate": 3.2662646778800385e-06, "loss": 1.0874, "step": 13342 }, { "epoch": 4.191530714411921, "grad_norm": 0.93359375, "learning_rate": 3.2637258013329103e-06, "loss": 1.1186, "step": 13344 }, { "epoch": 4.192158941437462, "grad_norm": 1.0859375, "learning_rate": 3.2611869247857825e-06, "loss": 1.1074, "step": 13346 }, { "epoch": 4.192787168463004, "grad_norm": 0.921875, "learning_rate": 3.2586480482386547e-06, "loss": 1.1609, "step": 13348 }, { "epoch": 4.193415395488545, "grad_norm": 0.98828125, "learning_rate": 3.2561091716915266e-06, "loss": 1.2608, "step": 13350 }, { "epoch": 4.194043622514086, "grad_norm": 1.0546875, "learning_rate": 3.2535702951443988e-06, "loss": 1.1254, "step": 13352 }, { "epoch": 4.194671849539628, "grad_norm": 0.9296875, "learning_rate": 3.2510314185972706e-06, "loss": 1.1664, "step": 13354 }, { "epoch": 4.195300076565169, "grad_norm": 0.96484375, "learning_rate": 3.248492542050143e-06, "loss": 1.0228, "step": 13356 }, { "epoch": 4.19592830359071, "grad_norm": 0.99609375, "learning_rate": 3.245953665503015e-06, "loss": 1.0682, "step": 13358 }, { "epoch": 4.196556530616252, "grad_norm": 0.9453125, "learning_rate": 3.243414788955887e-06, "loss": 1.1205, "step": 13360 }, { "epoch": 4.197184757641793, "grad_norm": 0.95703125, "learning_rate": 3.2408759124087595e-06, "loss": 1.1429, "step": 13362 }, { "epoch": 4.197812984667334, "grad_norm": 0.953125, "learning_rate": 3.2383370358616317e-06, "loss": 1.1568, "step": 13364 }, { "epoch": 4.1984412116928755, "grad_norm": 1.125, "learning_rate": 3.235798159314504e-06, "loss": 1.0898, "step": 13366 }, { "epoch": 4.199069438718417, "grad_norm": 0.99609375, "learning_rate": 3.2332592827673758e-06, "loss": 1.0845, "step": 13368 }, { "epoch": 4.199697665743958, "grad_norm": 0.953125, "learning_rate": 3.230720406220248e-06, "loss": 1.1888, "step": 13370 }, { "epoch": 4.2003258927694995, "grad_norm": 0.99609375, "learning_rate": 3.2281815296731202e-06, "loss": 1.119, "step": 13372 }, { "epoch": 4.200954119795041, "grad_norm": 0.91796875, "learning_rate": 3.225642653125992e-06, "loss": 1.2503, "step": 13374 }, { "epoch": 4.201582346820582, "grad_norm": 0.9140625, "learning_rate": 3.2231037765788643e-06, "loss": 1.1553, "step": 13376 }, { "epoch": 4.2022105738461235, "grad_norm": 0.96484375, "learning_rate": 3.220564900031736e-06, "loss": 1.1383, "step": 13378 }, { "epoch": 4.202838800871665, "grad_norm": 0.96484375, "learning_rate": 3.2180260234846083e-06, "loss": 1.2733, "step": 13380 }, { "epoch": 4.203467027897206, "grad_norm": 0.9765625, "learning_rate": 3.2154871469374805e-06, "loss": 1.0387, "step": 13382 }, { "epoch": 4.2040952549227475, "grad_norm": 0.97265625, "learning_rate": 3.2129482703903523e-06, "loss": 1.0777, "step": 13384 }, { "epoch": 4.204723481948289, "grad_norm": 0.97265625, "learning_rate": 3.2104093938432246e-06, "loss": 1.2113, "step": 13386 }, { "epoch": 4.20535170897383, "grad_norm": 0.875, "learning_rate": 3.207870517296097e-06, "loss": 1.1534, "step": 13388 }, { "epoch": 4.2059799359993715, "grad_norm": 1.0078125, "learning_rate": 3.2053316407489686e-06, "loss": 1.0804, "step": 13390 }, { "epoch": 4.206608163024913, "grad_norm": 1.0546875, "learning_rate": 3.202792764201841e-06, "loss": 1.2191, "step": 13392 }, { "epoch": 4.207236390050454, "grad_norm": 0.91015625, "learning_rate": 3.200253887654713e-06, "loss": 1.1824, "step": 13394 }, { "epoch": 4.2078646170759955, "grad_norm": 0.94921875, "learning_rate": 3.197715011107585e-06, "loss": 1.2805, "step": 13396 }, { "epoch": 4.208492844101537, "grad_norm": 0.88671875, "learning_rate": 3.195176134560457e-06, "loss": 1.2513, "step": 13398 }, { "epoch": 4.209121071127079, "grad_norm": 1.0390625, "learning_rate": 3.1926372580133293e-06, "loss": 1.1223, "step": 13400 }, { "epoch": 4.2097492981526194, "grad_norm": 0.9609375, "learning_rate": 3.190098381466201e-06, "loss": 1.2145, "step": 13402 }, { "epoch": 4.210377525178162, "grad_norm": 0.9453125, "learning_rate": 3.1875595049190738e-06, "loss": 1.081, "step": 13404 }, { "epoch": 4.211005752203703, "grad_norm": 0.99609375, "learning_rate": 3.185020628371946e-06, "loss": 1.147, "step": 13406 }, { "epoch": 4.211633979229244, "grad_norm": 0.98828125, "learning_rate": 3.182481751824818e-06, "loss": 1.1873, "step": 13408 }, { "epoch": 4.212262206254786, "grad_norm": 1.0546875, "learning_rate": 3.17994287527769e-06, "loss": 1.1968, "step": 13410 }, { "epoch": 4.212890433280327, "grad_norm": 0.96875, "learning_rate": 3.1774039987305623e-06, "loss": 1.1712, "step": 13412 }, { "epoch": 4.213518660305868, "grad_norm": 0.98046875, "learning_rate": 3.174865122183434e-06, "loss": 1.0781, "step": 13414 }, { "epoch": 4.21414688733141, "grad_norm": 0.95703125, "learning_rate": 3.1723262456363063e-06, "loss": 1.2744, "step": 13416 }, { "epoch": 4.214775114356951, "grad_norm": 0.953125, "learning_rate": 3.1697873690891785e-06, "loss": 1.2767, "step": 13418 }, { "epoch": 4.215403341382492, "grad_norm": 0.91015625, "learning_rate": 3.1672484925420503e-06, "loss": 1.2527, "step": 13420 }, { "epoch": 4.216031568408034, "grad_norm": 0.9375, "learning_rate": 3.1647096159949226e-06, "loss": 1.083, "step": 13422 }, { "epoch": 4.216659795433575, "grad_norm": 0.91796875, "learning_rate": 3.162170739447795e-06, "loss": 1.0102, "step": 13424 }, { "epoch": 4.217288022459116, "grad_norm": 0.9921875, "learning_rate": 3.1596318629006666e-06, "loss": 1.2045, "step": 13426 }, { "epoch": 4.217916249484658, "grad_norm": 0.96875, "learning_rate": 3.157092986353539e-06, "loss": 1.1392, "step": 13428 }, { "epoch": 4.218544476510199, "grad_norm": 0.91796875, "learning_rate": 3.154554109806411e-06, "loss": 1.0956, "step": 13430 }, { "epoch": 4.21917270353574, "grad_norm": 0.953125, "learning_rate": 3.152015233259283e-06, "loss": 1.0189, "step": 13432 }, { "epoch": 4.219800930561282, "grad_norm": 0.9375, "learning_rate": 3.149476356712155e-06, "loss": 1.2862, "step": 13434 }, { "epoch": 4.220429157586823, "grad_norm": 0.9921875, "learning_rate": 3.146937480165027e-06, "loss": 1.1364, "step": 13436 }, { "epoch": 4.221057384612364, "grad_norm": 0.9375, "learning_rate": 3.144398603617899e-06, "loss": 1.1584, "step": 13438 }, { "epoch": 4.221685611637906, "grad_norm": 1.03125, "learning_rate": 3.1418597270707714e-06, "loss": 1.0644, "step": 13440 }, { "epoch": 4.222313838663447, "grad_norm": 0.921875, "learning_rate": 3.139320850523643e-06, "loss": 1.2372, "step": 13442 }, { "epoch": 4.222942065688988, "grad_norm": 0.96484375, "learning_rate": 3.1367819739765154e-06, "loss": 1.009, "step": 13444 }, { "epoch": 4.2235702927145296, "grad_norm": 1.0078125, "learning_rate": 3.1342430974293876e-06, "loss": 1.1188, "step": 13446 }, { "epoch": 4.224198519740071, "grad_norm": 0.9453125, "learning_rate": 3.1317042208822603e-06, "loss": 1.0869, "step": 13448 }, { "epoch": 4.224826746765612, "grad_norm": 1.3046875, "learning_rate": 3.129165344335132e-06, "loss": 1.0426, "step": 13450 }, { "epoch": 4.2254549737911535, "grad_norm": 0.94921875, "learning_rate": 3.1266264677880043e-06, "loss": 1.1086, "step": 13452 }, { "epoch": 4.226083200816695, "grad_norm": 0.9375, "learning_rate": 3.1240875912408765e-06, "loss": 1.1355, "step": 13454 }, { "epoch": 4.226711427842236, "grad_norm": 0.890625, "learning_rate": 3.1215487146937483e-06, "loss": 1.2425, "step": 13456 }, { "epoch": 4.2273396548677775, "grad_norm": 0.9375, "learning_rate": 3.1190098381466206e-06, "loss": 1.2068, "step": 13458 }, { "epoch": 4.227967881893319, "grad_norm": 0.96484375, "learning_rate": 3.1164709615994924e-06, "loss": 1.1716, "step": 13460 }, { "epoch": 4.22859610891886, "grad_norm": 1.0, "learning_rate": 3.1139320850523646e-06, "loss": 1.0456, "step": 13462 }, { "epoch": 4.2292243359444015, "grad_norm": 0.95703125, "learning_rate": 3.111393208505237e-06, "loss": 1.1513, "step": 13464 }, { "epoch": 4.229852562969944, "grad_norm": 0.953125, "learning_rate": 3.1088543319581086e-06, "loss": 1.1656, "step": 13466 }, { "epoch": 4.230480789995485, "grad_norm": 1.0546875, "learning_rate": 3.106315455410981e-06, "loss": 1.0559, "step": 13468 }, { "epoch": 4.231109017021026, "grad_norm": 1.0390625, "learning_rate": 3.103776578863853e-06, "loss": 1.2135, "step": 13470 }, { "epoch": 4.231737244046568, "grad_norm": 0.90625, "learning_rate": 3.101237702316725e-06, "loss": 1.1644, "step": 13472 }, { "epoch": 4.232365471072109, "grad_norm": 0.97265625, "learning_rate": 3.098698825769597e-06, "loss": 1.2244, "step": 13474 }, { "epoch": 4.23299369809765, "grad_norm": 0.890625, "learning_rate": 3.0961599492224694e-06, "loss": 1.1854, "step": 13476 }, { "epoch": 4.233621925123192, "grad_norm": 0.97265625, "learning_rate": 3.093621072675341e-06, "loss": 1.2373, "step": 13478 }, { "epoch": 4.234250152148733, "grad_norm": 0.93359375, "learning_rate": 3.0910821961282134e-06, "loss": 1.1918, "step": 13480 }, { "epoch": 4.234878379174274, "grad_norm": 0.98828125, "learning_rate": 3.0885433195810856e-06, "loss": 1.0742, "step": 13482 }, { "epoch": 4.235506606199816, "grad_norm": 1.015625, "learning_rate": 3.0860044430339574e-06, "loss": 1.098, "step": 13484 }, { "epoch": 4.236134833225357, "grad_norm": 0.953125, "learning_rate": 3.0834655664868297e-06, "loss": 1.2629, "step": 13486 }, { "epoch": 4.236763060250898, "grad_norm": 1.03125, "learning_rate": 3.0809266899397015e-06, "loss": 1.093, "step": 13488 }, { "epoch": 4.23739128727644, "grad_norm": 1.0234375, "learning_rate": 3.078387813392574e-06, "loss": 1.1481, "step": 13490 }, { "epoch": 4.238019514301981, "grad_norm": 0.94140625, "learning_rate": 3.0758489368454464e-06, "loss": 1.2297, "step": 13492 }, { "epoch": 4.238647741327522, "grad_norm": 1.0, "learning_rate": 3.0733100602983186e-06, "loss": 1.1674, "step": 13494 }, { "epoch": 4.239275968353064, "grad_norm": 1.0, "learning_rate": 3.0707711837511904e-06, "loss": 1.1699, "step": 13496 }, { "epoch": 4.239904195378605, "grad_norm": 0.96484375, "learning_rate": 3.0682323072040626e-06, "loss": 0.9182, "step": 13498 }, { "epoch": 4.240532422404146, "grad_norm": 1.0078125, "learning_rate": 3.065693430656935e-06, "loss": 1.1024, "step": 13500 }, { "epoch": 4.241160649429688, "grad_norm": 1.0, "learning_rate": 3.0631545541098067e-06, "loss": 1.0213, "step": 13502 }, { "epoch": 4.241788876455229, "grad_norm": 1.0546875, "learning_rate": 3.060615677562679e-06, "loss": 1.1893, "step": 13504 }, { "epoch": 4.24241710348077, "grad_norm": 0.99609375, "learning_rate": 3.058076801015551e-06, "loss": 1.1344, "step": 13506 }, { "epoch": 4.243045330506312, "grad_norm": 0.94140625, "learning_rate": 3.055537924468423e-06, "loss": 1.4122, "step": 13508 }, { "epoch": 4.243673557531853, "grad_norm": 1.0234375, "learning_rate": 3.052999047921295e-06, "loss": 1.1822, "step": 13510 }, { "epoch": 4.244301784557394, "grad_norm": 1.046875, "learning_rate": 3.050460171374167e-06, "loss": 1.2055, "step": 13512 }, { "epoch": 4.244930011582936, "grad_norm": 0.97265625, "learning_rate": 3.047921294827039e-06, "loss": 1.0405, "step": 13514 }, { "epoch": 4.245558238608477, "grad_norm": 1.015625, "learning_rate": 3.0453824182799114e-06, "loss": 0.9959, "step": 13516 }, { "epoch": 4.246186465634018, "grad_norm": 0.9921875, "learning_rate": 3.0428435417327832e-06, "loss": 1.2703, "step": 13518 }, { "epoch": 4.24681469265956, "grad_norm": 0.96484375, "learning_rate": 3.0403046651856554e-06, "loss": 1.2516, "step": 13520 }, { "epoch": 4.247442919685101, "grad_norm": 1.0, "learning_rate": 3.0377657886385277e-06, "loss": 1.1878, "step": 13522 }, { "epoch": 4.248071146710642, "grad_norm": 0.94140625, "learning_rate": 3.0352269120913995e-06, "loss": 1.2498, "step": 13524 }, { "epoch": 4.248699373736184, "grad_norm": 1.0, "learning_rate": 3.0326880355442717e-06, "loss": 1.0533, "step": 13526 }, { "epoch": 4.249327600761725, "grad_norm": 0.9609375, "learning_rate": 3.030149158997144e-06, "loss": 1.1909, "step": 13528 }, { "epoch": 4.249955827787266, "grad_norm": 0.96484375, "learning_rate": 3.0276102824500157e-06, "loss": 1.187, "step": 13530 }, { "epoch": 4.250584054812808, "grad_norm": 0.95703125, "learning_rate": 3.025071405902888e-06, "loss": 1.1655, "step": 13532 }, { "epoch": 4.251212281838349, "grad_norm": 0.984375, "learning_rate": 3.0225325293557606e-06, "loss": 1.1706, "step": 13534 }, { "epoch": 4.251840508863891, "grad_norm": 0.953125, "learning_rate": 3.019993652808633e-06, "loss": 1.2641, "step": 13536 }, { "epoch": 4.252468735889432, "grad_norm": 0.9609375, "learning_rate": 3.0174547762615047e-06, "loss": 1.0268, "step": 13538 }, { "epoch": 4.253096962914974, "grad_norm": 0.9921875, "learning_rate": 3.014915899714377e-06, "loss": 0.9608, "step": 13540 }, { "epoch": 4.253725189940515, "grad_norm": 1.0703125, "learning_rate": 3.0123770231672487e-06, "loss": 1.0549, "step": 13542 }, { "epoch": 4.254353416966056, "grad_norm": 1.0703125, "learning_rate": 3.009838146620121e-06, "loss": 0.9804, "step": 13544 }, { "epoch": 4.254981643991598, "grad_norm": 0.984375, "learning_rate": 3.007299270072993e-06, "loss": 1.2008, "step": 13546 }, { "epoch": 4.255609871017139, "grad_norm": 1.0390625, "learning_rate": 3.004760393525865e-06, "loss": 1.1889, "step": 13548 }, { "epoch": 4.25623809804268, "grad_norm": 0.8828125, "learning_rate": 3.002221516978737e-06, "loss": 1.0427, "step": 13550 }, { "epoch": 4.256866325068222, "grad_norm": 0.94140625, "learning_rate": 2.9996826404316094e-06, "loss": 1.2546, "step": 13552 }, { "epoch": 4.257494552093763, "grad_norm": 0.91796875, "learning_rate": 2.9971437638844812e-06, "loss": 1.1948, "step": 13554 }, { "epoch": 4.258122779119304, "grad_norm": 1.0703125, "learning_rate": 2.9946048873373535e-06, "loss": 1.1694, "step": 13556 }, { "epoch": 4.258751006144846, "grad_norm": 1.015625, "learning_rate": 2.9920660107902257e-06, "loss": 1.0566, "step": 13558 }, { "epoch": 4.259379233170387, "grad_norm": 0.98046875, "learning_rate": 2.9895271342430975e-06, "loss": 1.2167, "step": 13560 }, { "epoch": 4.260007460195928, "grad_norm": 1.0078125, "learning_rate": 2.9869882576959697e-06, "loss": 1.2121, "step": 13562 }, { "epoch": 4.26063568722147, "grad_norm": 1.0, "learning_rate": 2.984449381148842e-06, "loss": 1.0269, "step": 13564 }, { "epoch": 4.261263914247011, "grad_norm": 0.9296875, "learning_rate": 2.9819105046017138e-06, "loss": 1.086, "step": 13566 }, { "epoch": 4.261892141272552, "grad_norm": 0.875, "learning_rate": 2.979371628054586e-06, "loss": 1.0571, "step": 13568 }, { "epoch": 4.262520368298094, "grad_norm": 0.95703125, "learning_rate": 2.976832751507458e-06, "loss": 1.201, "step": 13570 }, { "epoch": 4.263148595323635, "grad_norm": 1.015625, "learning_rate": 2.97429387496033e-06, "loss": 1.1162, "step": 13572 }, { "epoch": 4.263776822349176, "grad_norm": 0.984375, "learning_rate": 2.9717549984132022e-06, "loss": 1.0984, "step": 13574 }, { "epoch": 4.264405049374718, "grad_norm": 0.96875, "learning_rate": 2.969216121866075e-06, "loss": 1.0579, "step": 13576 }, { "epoch": 4.265033276400259, "grad_norm": 0.9140625, "learning_rate": 2.9666772453189467e-06, "loss": 1.2065, "step": 13578 }, { "epoch": 4.2656615034258, "grad_norm": 0.95703125, "learning_rate": 2.964138368771819e-06, "loss": 1.1145, "step": 13580 }, { "epoch": 4.266289730451342, "grad_norm": 1.03125, "learning_rate": 2.961599492224691e-06, "loss": 1.1297, "step": 13582 }, { "epoch": 4.266917957476883, "grad_norm": 0.96875, "learning_rate": 2.959060615677563e-06, "loss": 1.1875, "step": 13584 }, { "epoch": 4.267546184502424, "grad_norm": 0.9453125, "learning_rate": 2.956521739130435e-06, "loss": 1.3674, "step": 13586 }, { "epoch": 4.268174411527966, "grad_norm": 0.9765625, "learning_rate": 2.9539828625833074e-06, "loss": 1.1034, "step": 13588 }, { "epoch": 4.268802638553507, "grad_norm": 0.88671875, "learning_rate": 2.9514439860361792e-06, "loss": 1.1427, "step": 13590 }, { "epoch": 4.269430865579048, "grad_norm": 0.9609375, "learning_rate": 2.9489051094890515e-06, "loss": 1.0106, "step": 13592 }, { "epoch": 4.27005909260459, "grad_norm": 0.90625, "learning_rate": 2.9463662329419233e-06, "loss": 1.2435, "step": 13594 }, { "epoch": 4.270687319630131, "grad_norm": 0.9765625, "learning_rate": 2.9438273563947955e-06, "loss": 1.0462, "step": 13596 }, { "epoch": 4.271315546655673, "grad_norm": 0.94140625, "learning_rate": 2.9412884798476677e-06, "loss": 1.0533, "step": 13598 }, { "epoch": 4.271943773681214, "grad_norm": 0.953125, "learning_rate": 2.9387496033005395e-06, "loss": 1.143, "step": 13600 }, { "epoch": 4.272572000706756, "grad_norm": 1.0703125, "learning_rate": 2.9362107267534118e-06, "loss": 1.1671, "step": 13602 }, { "epoch": 4.273200227732297, "grad_norm": 0.96875, "learning_rate": 2.933671850206284e-06, "loss": 1.238, "step": 13604 }, { "epoch": 4.2738284547578385, "grad_norm": 0.95703125, "learning_rate": 2.931132973659156e-06, "loss": 1.1749, "step": 13606 }, { "epoch": 4.27445668178338, "grad_norm": 0.94921875, "learning_rate": 2.928594097112028e-06, "loss": 1.1214, "step": 13608 }, { "epoch": 4.275084908808921, "grad_norm": 1.046875, "learning_rate": 2.9260552205649003e-06, "loss": 1.1935, "step": 13610 }, { "epoch": 4.275713135834462, "grad_norm": 0.91015625, "learning_rate": 2.923516344017772e-06, "loss": 1.1772, "step": 13612 }, { "epoch": 4.276341362860004, "grad_norm": 0.99609375, "learning_rate": 2.9209774674706443e-06, "loss": 1.1523, "step": 13614 }, { "epoch": 4.276969589885545, "grad_norm": 0.93359375, "learning_rate": 2.9184385909235165e-06, "loss": 1.1316, "step": 13616 }, { "epoch": 4.277597816911086, "grad_norm": 1.0234375, "learning_rate": 2.9158997143763883e-06, "loss": 1.1698, "step": 13618 }, { "epoch": 4.278226043936628, "grad_norm": 0.9765625, "learning_rate": 2.913360837829261e-06, "loss": 1.0559, "step": 13620 }, { "epoch": 4.278854270962169, "grad_norm": 0.94921875, "learning_rate": 2.910821961282133e-06, "loss": 1.0771, "step": 13622 }, { "epoch": 4.27948249798771, "grad_norm": 0.9296875, "learning_rate": 2.908283084735005e-06, "loss": 1.2926, "step": 13624 }, { "epoch": 4.280110725013252, "grad_norm": 0.921875, "learning_rate": 2.9057442081878772e-06, "loss": 1.2266, "step": 13626 }, { "epoch": 4.280738952038793, "grad_norm": 1.015625, "learning_rate": 2.9032053316407495e-06, "loss": 1.1286, "step": 13628 }, { "epoch": 4.281367179064334, "grad_norm": 1.046875, "learning_rate": 2.9006664550936213e-06, "loss": 1.1101, "step": 13630 }, { "epoch": 4.281995406089876, "grad_norm": 0.8984375, "learning_rate": 2.8981275785464935e-06, "loss": 1.0859, "step": 13632 }, { "epoch": 4.282623633115417, "grad_norm": 1.0390625, "learning_rate": 2.8955887019993657e-06, "loss": 1.1893, "step": 13634 }, { "epoch": 4.283251860140958, "grad_norm": 0.9375, "learning_rate": 2.8930498254522375e-06, "loss": 1.1745, "step": 13636 }, { "epoch": 4.2838800871665, "grad_norm": 0.96484375, "learning_rate": 2.8905109489051098e-06, "loss": 1.234, "step": 13638 }, { "epoch": 4.284508314192041, "grad_norm": 1.0, "learning_rate": 2.887972072357982e-06, "loss": 1.175, "step": 13640 }, { "epoch": 4.285136541217582, "grad_norm": 0.9609375, "learning_rate": 2.885433195810854e-06, "loss": 1.2837, "step": 13642 }, { "epoch": 4.285764768243124, "grad_norm": 0.99609375, "learning_rate": 2.882894319263726e-06, "loss": 1.1425, "step": 13644 }, { "epoch": 4.286392995268665, "grad_norm": 1.0, "learning_rate": 2.8803554427165983e-06, "loss": 1.1498, "step": 13646 }, { "epoch": 4.287021222294206, "grad_norm": 0.98828125, "learning_rate": 2.87781656616947e-06, "loss": 1.1987, "step": 13648 }, { "epoch": 4.287649449319748, "grad_norm": 1.0390625, "learning_rate": 2.8752776896223423e-06, "loss": 1.0983, "step": 13650 }, { "epoch": 4.288277676345289, "grad_norm": 0.9921875, "learning_rate": 2.872738813075214e-06, "loss": 1.1283, "step": 13652 }, { "epoch": 4.28890590337083, "grad_norm": 1.0234375, "learning_rate": 2.8701999365280863e-06, "loss": 1.0676, "step": 13654 }, { "epoch": 4.289534130396372, "grad_norm": 0.93359375, "learning_rate": 2.8676610599809586e-06, "loss": 1.111, "step": 13656 }, { "epoch": 4.290162357421913, "grad_norm": 1.1484375, "learning_rate": 2.8651221834338304e-06, "loss": 1.3049, "step": 13658 }, { "epoch": 4.290790584447454, "grad_norm": 0.9609375, "learning_rate": 2.8625833068867026e-06, "loss": 1.1531, "step": 13660 }, { "epoch": 4.291418811472996, "grad_norm": 1.0078125, "learning_rate": 2.8600444303395753e-06, "loss": 1.151, "step": 13662 }, { "epoch": 4.292047038498538, "grad_norm": 0.9140625, "learning_rate": 2.8575055537924475e-06, "loss": 1.1488, "step": 13664 }, { "epoch": 4.292675265524078, "grad_norm": 1.0078125, "learning_rate": 2.8549666772453193e-06, "loss": 1.0546, "step": 13666 }, { "epoch": 4.2933034925496205, "grad_norm": 0.94921875, "learning_rate": 2.8524278006981915e-06, "loss": 1.2151, "step": 13668 }, { "epoch": 4.293931719575162, "grad_norm": 0.97265625, "learning_rate": 2.8498889241510637e-06, "loss": 1.1798, "step": 13670 }, { "epoch": 4.294559946600703, "grad_norm": 1.0234375, "learning_rate": 2.8473500476039356e-06, "loss": 1.1261, "step": 13672 }, { "epoch": 4.2951881736262445, "grad_norm": 0.84765625, "learning_rate": 2.8448111710568078e-06, "loss": 1.2101, "step": 13674 }, { "epoch": 4.295816400651786, "grad_norm": 1.046875, "learning_rate": 2.8422722945096796e-06, "loss": 1.1997, "step": 13676 }, { "epoch": 4.296444627677327, "grad_norm": 1.0078125, "learning_rate": 2.839733417962552e-06, "loss": 1.1194, "step": 13678 }, { "epoch": 4.2970728547028685, "grad_norm": 0.99609375, "learning_rate": 2.837194541415424e-06, "loss": 1.1607, "step": 13680 }, { "epoch": 4.29770108172841, "grad_norm": 0.90625, "learning_rate": 2.834655664868296e-06, "loss": 1.2652, "step": 13682 }, { "epoch": 4.298329308753951, "grad_norm": 0.89453125, "learning_rate": 2.832116788321168e-06, "loss": 1.1769, "step": 13684 }, { "epoch": 4.2989575357794925, "grad_norm": 0.953125, "learning_rate": 2.8295779117740403e-06, "loss": 1.2594, "step": 13686 }, { "epoch": 4.299585762805034, "grad_norm": 1.015625, "learning_rate": 2.827039035226912e-06, "loss": 1.0585, "step": 13688 }, { "epoch": 4.300213989830575, "grad_norm": 0.91796875, "learning_rate": 2.8245001586797843e-06, "loss": 1.1953, "step": 13690 }, { "epoch": 4.3008422168561165, "grad_norm": 0.99609375, "learning_rate": 2.8219612821326566e-06, "loss": 1.1865, "step": 13692 }, { "epoch": 4.301470443881658, "grad_norm": 0.99609375, "learning_rate": 2.8194224055855284e-06, "loss": 1.2221, "step": 13694 }, { "epoch": 4.302098670907199, "grad_norm": 0.9921875, "learning_rate": 2.8168835290384006e-06, "loss": 1.1206, "step": 13696 }, { "epoch": 4.30272689793274, "grad_norm": 0.91015625, "learning_rate": 2.814344652491273e-06, "loss": 1.188, "step": 13698 }, { "epoch": 4.303355124958282, "grad_norm": 0.93359375, "learning_rate": 2.8118057759441446e-06, "loss": 1.2963, "step": 13700 }, { "epoch": 4.303983351983823, "grad_norm": 0.90625, "learning_rate": 2.809266899397017e-06, "loss": 1.0639, "step": 13702 }, { "epoch": 4.304611579009364, "grad_norm": 0.9609375, "learning_rate": 2.8067280228498887e-06, "loss": 1.3112, "step": 13704 }, { "epoch": 4.305239806034906, "grad_norm": 0.96875, "learning_rate": 2.8041891463027613e-06, "loss": 1.0653, "step": 13706 }, { "epoch": 4.305868033060447, "grad_norm": 1.078125, "learning_rate": 2.8016502697556336e-06, "loss": 1.2647, "step": 13708 }, { "epoch": 4.306496260085988, "grad_norm": 1.0078125, "learning_rate": 2.799111393208506e-06, "loss": 1.2902, "step": 13710 }, { "epoch": 4.30712448711153, "grad_norm": 0.890625, "learning_rate": 2.7965725166613776e-06, "loss": 1.2115, "step": 13712 }, { "epoch": 4.307752714137071, "grad_norm": 0.953125, "learning_rate": 2.79403364011425e-06, "loss": 1.1643, "step": 13714 }, { "epoch": 4.308380941162612, "grad_norm": 0.96875, "learning_rate": 2.791494763567122e-06, "loss": 1.132, "step": 13716 }, { "epoch": 4.309009168188154, "grad_norm": 1.0, "learning_rate": 2.788955887019994e-06, "loss": 1.1157, "step": 13718 }, { "epoch": 4.309637395213695, "grad_norm": 0.9609375, "learning_rate": 2.786417010472866e-06, "loss": 1.0234, "step": 13720 }, { "epoch": 4.310265622239236, "grad_norm": 0.98046875, "learning_rate": 2.7838781339257383e-06, "loss": 1.109, "step": 13722 }, { "epoch": 4.310893849264778, "grad_norm": 1.0078125, "learning_rate": 2.78133925737861e-06, "loss": 1.0876, "step": 13724 }, { "epoch": 4.311522076290319, "grad_norm": 0.96875, "learning_rate": 2.7788003808314824e-06, "loss": 1.1413, "step": 13726 }, { "epoch": 4.31215030331586, "grad_norm": 0.98046875, "learning_rate": 2.7762615042843546e-06, "loss": 1.1749, "step": 13728 }, { "epoch": 4.312778530341403, "grad_norm": 0.953125, "learning_rate": 2.7737226277372264e-06, "loss": 1.2139, "step": 13730 }, { "epoch": 4.313406757366943, "grad_norm": 1.0546875, "learning_rate": 2.7711837511900986e-06, "loss": 1.051, "step": 13732 }, { "epoch": 4.314034984392485, "grad_norm": 0.93359375, "learning_rate": 2.7686448746429704e-06, "loss": 1.2775, "step": 13734 }, { "epoch": 4.314663211418027, "grad_norm": 0.88671875, "learning_rate": 2.7661059980958427e-06, "loss": 1.2677, "step": 13736 }, { "epoch": 4.315291438443568, "grad_norm": 1.0078125, "learning_rate": 2.763567121548715e-06, "loss": 1.1346, "step": 13738 }, { "epoch": 4.315919665469109, "grad_norm": 0.96484375, "learning_rate": 2.7610282450015867e-06, "loss": 1.1452, "step": 13740 }, { "epoch": 4.3165478924946505, "grad_norm": 0.9921875, "learning_rate": 2.758489368454459e-06, "loss": 1.0566, "step": 13742 }, { "epoch": 4.317176119520192, "grad_norm": 1.0234375, "learning_rate": 2.755950491907331e-06, "loss": 1.1328, "step": 13744 }, { "epoch": 4.317804346545733, "grad_norm": 1.0078125, "learning_rate": 2.753411615360203e-06, "loss": 1.1967, "step": 13746 }, { "epoch": 4.3184325735712745, "grad_norm": 0.96875, "learning_rate": 2.7508727388130756e-06, "loss": 1.1624, "step": 13748 }, { "epoch": 4.319060800596816, "grad_norm": 0.94140625, "learning_rate": 2.748333862265948e-06, "loss": 1.0194, "step": 13750 }, { "epoch": 4.319689027622357, "grad_norm": 0.8984375, "learning_rate": 2.74579498571882e-06, "loss": 1.1225, "step": 13752 }, { "epoch": 4.3203172546478985, "grad_norm": 0.9921875, "learning_rate": 2.743256109171692e-06, "loss": 1.1141, "step": 13754 }, { "epoch": 4.32094548167344, "grad_norm": 0.921875, "learning_rate": 2.740717232624564e-06, "loss": 1.0245, "step": 13756 }, { "epoch": 4.321573708698981, "grad_norm": 0.9140625, "learning_rate": 2.738178356077436e-06, "loss": 1.151, "step": 13758 }, { "epoch": 4.3222019357245225, "grad_norm": 0.95703125, "learning_rate": 2.735639479530308e-06, "loss": 1.0834, "step": 13760 }, { "epoch": 4.322830162750064, "grad_norm": 1.078125, "learning_rate": 2.7331006029831804e-06, "loss": 1.1382, "step": 13762 }, { "epoch": 4.323458389775605, "grad_norm": 1.0234375, "learning_rate": 2.730561726436052e-06, "loss": 1.0732, "step": 13764 }, { "epoch": 4.3240866168011465, "grad_norm": 0.89453125, "learning_rate": 2.7280228498889244e-06, "loss": 1.1561, "step": 13766 }, { "epoch": 4.324714843826688, "grad_norm": 1.1015625, "learning_rate": 2.7254839733417966e-06, "loss": 1.122, "step": 13768 }, { "epoch": 4.325343070852229, "grad_norm": 0.9609375, "learning_rate": 2.7229450967946684e-06, "loss": 1.1201, "step": 13770 }, { "epoch": 4.3259712978777705, "grad_norm": 0.96875, "learning_rate": 2.7204062202475407e-06, "loss": 1.1334, "step": 13772 }, { "epoch": 4.326599524903312, "grad_norm": 0.9140625, "learning_rate": 2.717867343700413e-06, "loss": 1.147, "step": 13774 }, { "epoch": 4.327227751928853, "grad_norm": 1.0078125, "learning_rate": 2.7153284671532847e-06, "loss": 1.1642, "step": 13776 }, { "epoch": 4.3278559789543944, "grad_norm": 1.1171875, "learning_rate": 2.712789590606157e-06, "loss": 1.1672, "step": 13778 }, { "epoch": 4.328484205979936, "grad_norm": 0.95703125, "learning_rate": 2.710250714059029e-06, "loss": 1.0505, "step": 13780 }, { "epoch": 4.329112433005477, "grad_norm": 1.03125, "learning_rate": 2.707711837511901e-06, "loss": 0.997, "step": 13782 }, { "epoch": 4.329740660031018, "grad_norm": 1.0546875, "learning_rate": 2.705172960964773e-06, "loss": 1.1018, "step": 13784 }, { "epoch": 4.33036888705656, "grad_norm": 1.0234375, "learning_rate": 2.702634084417645e-06, "loss": 1.0032, "step": 13786 }, { "epoch": 4.330997114082101, "grad_norm": 1.0390625, "learning_rate": 2.7000952078705172e-06, "loss": 1.181, "step": 13788 }, { "epoch": 4.331625341107642, "grad_norm": 0.96484375, "learning_rate": 2.6975563313233895e-06, "loss": 1.1166, "step": 13790 }, { "epoch": 4.332253568133184, "grad_norm": 0.94921875, "learning_rate": 2.695017454776262e-06, "loss": 1.1511, "step": 13792 }, { "epoch": 4.332881795158725, "grad_norm": 1.0703125, "learning_rate": 2.692478578229134e-06, "loss": 1.0772, "step": 13794 }, { "epoch": 4.333510022184267, "grad_norm": 1.0390625, "learning_rate": 2.689939701682006e-06, "loss": 1.041, "step": 13796 }, { "epoch": 4.334138249209808, "grad_norm": 0.91796875, "learning_rate": 2.6874008251348784e-06, "loss": 1.1861, "step": 13798 }, { "epoch": 4.33476647623535, "grad_norm": 1.203125, "learning_rate": 2.68486194858775e-06, "loss": 1.1599, "step": 13800 }, { "epoch": 4.335394703260891, "grad_norm": 0.98828125, "learning_rate": 2.6823230720406224e-06, "loss": 1.0589, "step": 13802 }, { "epoch": 4.336022930286433, "grad_norm": 0.9453125, "learning_rate": 2.6797841954934946e-06, "loss": 1.0535, "step": 13804 }, { "epoch": 4.336651157311974, "grad_norm": 0.9453125, "learning_rate": 2.6772453189463664e-06, "loss": 1.0986, "step": 13806 }, { "epoch": 4.337279384337515, "grad_norm": 0.99609375, "learning_rate": 2.6747064423992387e-06, "loss": 1.0398, "step": 13808 }, { "epoch": 4.337907611363057, "grad_norm": 0.953125, "learning_rate": 2.6721675658521105e-06, "loss": 1.0584, "step": 13810 }, { "epoch": 4.338535838388598, "grad_norm": 1.0234375, "learning_rate": 2.6696286893049827e-06, "loss": 1.064, "step": 13812 }, { "epoch": 4.339164065414139, "grad_norm": 0.9453125, "learning_rate": 2.667089812757855e-06, "loss": 1.1344, "step": 13814 }, { "epoch": 4.339792292439681, "grad_norm": 1.0078125, "learning_rate": 2.6645509362107267e-06, "loss": 1.141, "step": 13816 }, { "epoch": 4.340420519465222, "grad_norm": 0.9140625, "learning_rate": 2.662012059663599e-06, "loss": 1.1533, "step": 13818 }, { "epoch": 4.341048746490763, "grad_norm": 1.09375, "learning_rate": 2.659473183116471e-06, "loss": 1.0925, "step": 13820 }, { "epoch": 4.3416769735163045, "grad_norm": 1.0703125, "learning_rate": 2.656934306569343e-06, "loss": 1.1583, "step": 13822 }, { "epoch": 4.342305200541846, "grad_norm": 1.0234375, "learning_rate": 2.6543954300222152e-06, "loss": 1.1532, "step": 13824 }, { "epoch": 4.342933427567387, "grad_norm": 0.98046875, "learning_rate": 2.6518565534750875e-06, "loss": 1.1269, "step": 13826 }, { "epoch": 4.3435616545929285, "grad_norm": 1.0, "learning_rate": 2.6493176769279593e-06, "loss": 1.2179, "step": 13828 }, { "epoch": 4.34418988161847, "grad_norm": 0.9375, "learning_rate": 2.6467788003808315e-06, "loss": 1.143, "step": 13830 }, { "epoch": 4.344818108644011, "grad_norm": 0.9453125, "learning_rate": 2.6442399238337037e-06, "loss": 1.1222, "step": 13832 }, { "epoch": 4.3454463356695525, "grad_norm": 0.95703125, "learning_rate": 2.6417010472865764e-06, "loss": 1.3169, "step": 13834 }, { "epoch": 4.346074562695094, "grad_norm": 1.1328125, "learning_rate": 2.639162170739448e-06, "loss": 1.1246, "step": 13836 }, { "epoch": 4.346702789720635, "grad_norm": 0.89453125, "learning_rate": 2.6366232941923204e-06, "loss": 1.3627, "step": 13838 }, { "epoch": 4.3473310167461765, "grad_norm": 1.0390625, "learning_rate": 2.6340844176451922e-06, "loss": 1.1626, "step": 13840 }, { "epoch": 4.347959243771718, "grad_norm": 0.89453125, "learning_rate": 2.6315455410980644e-06, "loss": 1.2178, "step": 13842 }, { "epoch": 4.348587470797259, "grad_norm": 0.98046875, "learning_rate": 2.6290066645509367e-06, "loss": 1.1185, "step": 13844 }, { "epoch": 4.3492156978228005, "grad_norm": 0.95703125, "learning_rate": 2.6264677880038085e-06, "loss": 1.0583, "step": 13846 }, { "epoch": 4.349843924848342, "grad_norm": 1.046875, "learning_rate": 2.6239289114566807e-06, "loss": 1.1283, "step": 13848 }, { "epoch": 4.350472151873883, "grad_norm": 0.98828125, "learning_rate": 2.621390034909553e-06, "loss": 1.1563, "step": 13850 }, { "epoch": 4.3511003788994245, "grad_norm": 0.97265625, "learning_rate": 2.6188511583624247e-06, "loss": 1.2513, "step": 13852 }, { "epoch": 4.351728605924966, "grad_norm": 0.95703125, "learning_rate": 2.616312281815297e-06, "loss": 1.1722, "step": 13854 }, { "epoch": 4.352356832950507, "grad_norm": 1.046875, "learning_rate": 2.613773405268169e-06, "loss": 1.1084, "step": 13856 }, { "epoch": 4.3529850599760485, "grad_norm": 0.984375, "learning_rate": 2.611234528721041e-06, "loss": 1.197, "step": 13858 }, { "epoch": 4.35361328700159, "grad_norm": 0.98828125, "learning_rate": 2.6086956521739132e-06, "loss": 1.2266, "step": 13860 }, { "epoch": 4.354241514027132, "grad_norm": 0.984375, "learning_rate": 2.6061567756267855e-06, "loss": 1.259, "step": 13862 }, { "epoch": 4.354869741052672, "grad_norm": 0.953125, "learning_rate": 2.6036178990796573e-06, "loss": 1.0717, "step": 13864 }, { "epoch": 4.355497968078215, "grad_norm": 1.046875, "learning_rate": 2.6010790225325295e-06, "loss": 1.0232, "step": 13866 }, { "epoch": 4.356126195103756, "grad_norm": 1.0234375, "learning_rate": 2.5985401459854013e-06, "loss": 1.1978, "step": 13868 }, { "epoch": 4.356754422129297, "grad_norm": 1.0078125, "learning_rate": 2.5960012694382735e-06, "loss": 1.1756, "step": 13870 }, { "epoch": 4.357382649154839, "grad_norm": 0.94140625, "learning_rate": 2.5934623928911458e-06, "loss": 1.205, "step": 13872 }, { "epoch": 4.35801087618038, "grad_norm": 1.0078125, "learning_rate": 2.5909235163440176e-06, "loss": 1.162, "step": 13874 }, { "epoch": 4.358639103205921, "grad_norm": 1.0234375, "learning_rate": 2.5883846397968902e-06, "loss": 1.017, "step": 13876 }, { "epoch": 4.359267330231463, "grad_norm": 0.98046875, "learning_rate": 2.5858457632497625e-06, "loss": 1.2016, "step": 13878 }, { "epoch": 4.359895557257004, "grad_norm": 0.99609375, "learning_rate": 2.5833068867026347e-06, "loss": 1.1287, "step": 13880 }, { "epoch": 4.360523784282545, "grad_norm": 0.94921875, "learning_rate": 2.5807680101555065e-06, "loss": 0.997, "step": 13882 }, { "epoch": 4.361152011308087, "grad_norm": 1.03125, "learning_rate": 2.5782291336083787e-06, "loss": 1.1986, "step": 13884 }, { "epoch": 4.361780238333628, "grad_norm": 1.0546875, "learning_rate": 2.575690257061251e-06, "loss": 1.1599, "step": 13886 }, { "epoch": 4.362408465359169, "grad_norm": 0.9921875, "learning_rate": 2.5731513805141228e-06, "loss": 1.1845, "step": 13888 }, { "epoch": 4.363036692384711, "grad_norm": 0.93359375, "learning_rate": 2.570612503966995e-06, "loss": 1.0158, "step": 13890 }, { "epoch": 4.363664919410252, "grad_norm": 0.97265625, "learning_rate": 2.5680736274198668e-06, "loss": 1.2727, "step": 13892 }, { "epoch": 4.364293146435793, "grad_norm": 0.89453125, "learning_rate": 2.565534750872739e-06, "loss": 1.1136, "step": 13894 }, { "epoch": 4.364921373461335, "grad_norm": 0.96484375, "learning_rate": 2.5629958743256112e-06, "loss": 1.1715, "step": 13896 }, { "epoch": 4.365549600486876, "grad_norm": 0.9921875, "learning_rate": 2.560456997778483e-06, "loss": 1.1114, "step": 13898 }, { "epoch": 4.366177827512417, "grad_norm": 0.95703125, "learning_rate": 2.5579181212313553e-06, "loss": 1.0194, "step": 13900 }, { "epoch": 4.366806054537959, "grad_norm": 0.94140625, "learning_rate": 2.5553792446842275e-06, "loss": 1.1637, "step": 13902 }, { "epoch": 4.3674342815635, "grad_norm": 0.93359375, "learning_rate": 2.5528403681370993e-06, "loss": 1.1065, "step": 13904 }, { "epoch": 4.368062508589041, "grad_norm": 1.046875, "learning_rate": 2.5503014915899715e-06, "loss": 1.0804, "step": 13906 }, { "epoch": 4.3686907356145825, "grad_norm": 0.9375, "learning_rate": 2.5477626150428438e-06, "loss": 1.1293, "step": 13908 }, { "epoch": 4.369318962640124, "grad_norm": 0.97265625, "learning_rate": 2.5452237384957156e-06, "loss": 1.0105, "step": 13910 }, { "epoch": 4.369947189665665, "grad_norm": 0.91015625, "learning_rate": 2.542684861948588e-06, "loss": 1.1843, "step": 13912 }, { "epoch": 4.3705754166912065, "grad_norm": 1.0546875, "learning_rate": 2.54014598540146e-06, "loss": 1.1642, "step": 13914 }, { "epoch": 4.371203643716748, "grad_norm": 0.94921875, "learning_rate": 2.537607108854332e-06, "loss": 1.2179, "step": 13916 }, { "epoch": 4.371831870742289, "grad_norm": 0.8671875, "learning_rate": 2.535068232307204e-06, "loss": 1.2056, "step": 13918 }, { "epoch": 4.3724600977678305, "grad_norm": 1.0859375, "learning_rate": 2.5325293557600767e-06, "loss": 1.1705, "step": 13920 }, { "epoch": 4.373088324793372, "grad_norm": 1.0078125, "learning_rate": 2.5299904792129485e-06, "loss": 1.1438, "step": 13922 }, { "epoch": 4.373716551818913, "grad_norm": 0.9609375, "learning_rate": 2.5274516026658208e-06, "loss": 1.1428, "step": 13924 }, { "epoch": 4.3743447788444545, "grad_norm": 1.0390625, "learning_rate": 2.524912726118693e-06, "loss": 1.1333, "step": 13926 }, { "epoch": 4.374973005869997, "grad_norm": 0.92578125, "learning_rate": 2.522373849571565e-06, "loss": 1.1478, "step": 13928 }, { "epoch": 4.375601232895538, "grad_norm": 1.109375, "learning_rate": 2.519834973024437e-06, "loss": 1.0996, "step": 13930 }, { "epoch": 4.376229459921079, "grad_norm": 1.1015625, "learning_rate": 2.5172960964773093e-06, "loss": 1.0599, "step": 13932 }, { "epoch": 4.376857686946621, "grad_norm": 0.93359375, "learning_rate": 2.514757219930181e-06, "loss": 1.1547, "step": 13934 }, { "epoch": 4.377485913972162, "grad_norm": 1.0546875, "learning_rate": 2.5122183433830533e-06, "loss": 1.1514, "step": 13936 }, { "epoch": 4.378114140997703, "grad_norm": 1.03125, "learning_rate": 2.5096794668359255e-06, "loss": 1.1501, "step": 13938 }, { "epoch": 4.378742368023245, "grad_norm": 1.0078125, "learning_rate": 2.5071405902887973e-06, "loss": 1.0175, "step": 13940 }, { "epoch": 4.379370595048786, "grad_norm": 0.91796875, "learning_rate": 2.5046017137416696e-06, "loss": 1.0814, "step": 13942 }, { "epoch": 4.379998822074327, "grad_norm": 1.03125, "learning_rate": 2.5020628371945418e-06, "loss": 0.9903, "step": 13944 }, { "epoch": 4.380627049099869, "grad_norm": 0.97265625, "learning_rate": 2.4995239606474136e-06, "loss": 1.3295, "step": 13946 }, { "epoch": 4.38125527612541, "grad_norm": 1.0625, "learning_rate": 2.496985084100286e-06, "loss": 0.9996, "step": 13948 }, { "epoch": 4.381883503150951, "grad_norm": 1.0234375, "learning_rate": 2.4944462075531576e-06, "loss": 1.0417, "step": 13950 }, { "epoch": 4.382511730176493, "grad_norm": 0.9765625, "learning_rate": 2.4919073310060303e-06, "loss": 1.1107, "step": 13952 }, { "epoch": 4.383139957202034, "grad_norm": 0.9296875, "learning_rate": 2.489368454458902e-06, "loss": 1.1522, "step": 13954 }, { "epoch": 4.383768184227575, "grad_norm": 0.953125, "learning_rate": 2.4868295779117743e-06, "loss": 1.1662, "step": 13956 }, { "epoch": 4.384396411253117, "grad_norm": 0.984375, "learning_rate": 2.4842907013646465e-06, "loss": 1.0399, "step": 13958 }, { "epoch": 4.385024638278658, "grad_norm": 0.99609375, "learning_rate": 2.4817518248175183e-06, "loss": 1.2217, "step": 13960 }, { "epoch": 4.385652865304199, "grad_norm": 0.9296875, "learning_rate": 2.4792129482703906e-06, "loss": 1.1174, "step": 13962 }, { "epoch": 4.386281092329741, "grad_norm": 0.94140625, "learning_rate": 2.476674071723263e-06, "loss": 1.1867, "step": 13964 }, { "epoch": 4.386909319355282, "grad_norm": 0.95703125, "learning_rate": 2.4741351951761346e-06, "loss": 1.1859, "step": 13966 }, { "epoch": 4.387537546380823, "grad_norm": 0.9609375, "learning_rate": 2.471596318629007e-06, "loss": 1.0755, "step": 13968 }, { "epoch": 4.388165773406365, "grad_norm": 1.015625, "learning_rate": 2.469057442081879e-06, "loss": 1.1753, "step": 13970 }, { "epoch": 4.388794000431906, "grad_norm": 0.97265625, "learning_rate": 2.466518565534751e-06, "loss": 1.175, "step": 13972 }, { "epoch": 4.389422227457447, "grad_norm": 1.0234375, "learning_rate": 2.463979688987623e-06, "loss": 0.9921, "step": 13974 }, { "epoch": 4.390050454482989, "grad_norm": 0.9453125, "learning_rate": 2.4614408124404953e-06, "loss": 1.1352, "step": 13976 }, { "epoch": 4.39067868150853, "grad_norm": 0.96875, "learning_rate": 2.4589019358933676e-06, "loss": 1.0875, "step": 13978 }, { "epoch": 4.391306908534071, "grad_norm": 0.953125, "learning_rate": 2.4563630593462394e-06, "loss": 1.1851, "step": 13980 }, { "epoch": 4.391935135559613, "grad_norm": 1.046875, "learning_rate": 2.4538241827991116e-06, "loss": 1.0241, "step": 13982 }, { "epoch": 4.392563362585154, "grad_norm": 0.984375, "learning_rate": 2.451285306251984e-06, "loss": 1.0776, "step": 13984 }, { "epoch": 4.393191589610695, "grad_norm": 0.9296875, "learning_rate": 2.4487464297048556e-06, "loss": 1.0745, "step": 13986 }, { "epoch": 4.3938198166362366, "grad_norm": 0.99609375, "learning_rate": 2.446207553157728e-06, "loss": 1.1357, "step": 13988 }, { "epoch": 4.394448043661778, "grad_norm": 0.9609375, "learning_rate": 2.4436686766106e-06, "loss": 1.1415, "step": 13990 }, { "epoch": 4.395076270687319, "grad_norm": 0.90625, "learning_rate": 2.441129800063472e-06, "loss": 1.1502, "step": 13992 }, { "epoch": 4.395704497712861, "grad_norm": 0.9765625, "learning_rate": 2.4385909235163445e-06, "loss": 1.1558, "step": 13994 }, { "epoch": 4.396332724738403, "grad_norm": 0.94921875, "learning_rate": 2.4360520469692164e-06, "loss": 1.2328, "step": 13996 }, { "epoch": 4.396960951763944, "grad_norm": 0.921875, "learning_rate": 2.4335131704220886e-06, "loss": 1.0429, "step": 13998 }, { "epoch": 4.397589178789485, "grad_norm": 0.91796875, "learning_rate": 2.4309742938749604e-06, "loss": 1.1152, "step": 14000 }, { "epoch": 4.398217405815027, "grad_norm": 0.95703125, "learning_rate": 2.4284354173278326e-06, "loss": 1.0045, "step": 14002 }, { "epoch": 4.398845632840568, "grad_norm": 0.97265625, "learning_rate": 2.425896540780705e-06, "loss": 1.0019, "step": 14004 }, { "epoch": 4.399473859866109, "grad_norm": 1.0, "learning_rate": 2.4233576642335767e-06, "loss": 1.0773, "step": 14006 }, { "epoch": 4.400102086891651, "grad_norm": 0.94921875, "learning_rate": 2.420818787686449e-06, "loss": 1.3244, "step": 14008 }, { "epoch": 4.400730313917192, "grad_norm": 1.015625, "learning_rate": 2.418279911139321e-06, "loss": 1.0942, "step": 14010 }, { "epoch": 4.401358540942733, "grad_norm": 1.0, "learning_rate": 2.415741034592193e-06, "loss": 1.1705, "step": 14012 }, { "epoch": 4.401986767968275, "grad_norm": 1.046875, "learning_rate": 2.413202158045065e-06, "loss": 1.1926, "step": 14014 }, { "epoch": 4.402614994993816, "grad_norm": 0.93359375, "learning_rate": 2.4106632814979374e-06, "loss": 1.2677, "step": 14016 }, { "epoch": 4.403243222019357, "grad_norm": 0.921875, "learning_rate": 2.4081244049508096e-06, "loss": 1.2506, "step": 14018 }, { "epoch": 4.403871449044899, "grad_norm": 1.0078125, "learning_rate": 2.405585528403682e-06, "loss": 1.1136, "step": 14020 }, { "epoch": 4.40449967607044, "grad_norm": 1.109375, "learning_rate": 2.4030466518565536e-06, "loss": 1.1397, "step": 14022 }, { "epoch": 4.405127903095981, "grad_norm": 0.9375, "learning_rate": 2.400507775309426e-06, "loss": 1.0636, "step": 14024 }, { "epoch": 4.405756130121523, "grad_norm": 0.9140625, "learning_rate": 2.3979688987622977e-06, "loss": 1.1789, "step": 14026 }, { "epoch": 4.406384357147064, "grad_norm": 0.95703125, "learning_rate": 2.39543002221517e-06, "loss": 1.2216, "step": 14028 }, { "epoch": 4.407012584172605, "grad_norm": 0.9375, "learning_rate": 2.392891145668042e-06, "loss": 1.0575, "step": 14030 }, { "epoch": 4.407640811198147, "grad_norm": 1.0546875, "learning_rate": 2.390352269120914e-06, "loss": 1.043, "step": 14032 }, { "epoch": 4.408269038223688, "grad_norm": 1.03125, "learning_rate": 2.387813392573786e-06, "loss": 1.1382, "step": 14034 }, { "epoch": 4.408897265249229, "grad_norm": 1.078125, "learning_rate": 2.3852745160266584e-06, "loss": 1.2615, "step": 14036 }, { "epoch": 4.409525492274771, "grad_norm": 0.93359375, "learning_rate": 2.3827356394795306e-06, "loss": 1.1986, "step": 14038 }, { "epoch": 4.410153719300312, "grad_norm": 0.8828125, "learning_rate": 2.380196762932403e-06, "loss": 1.1959, "step": 14040 }, { "epoch": 4.410781946325853, "grad_norm": 0.96484375, "learning_rate": 2.3776578863852747e-06, "loss": 1.1313, "step": 14042 }, { "epoch": 4.411410173351395, "grad_norm": 1.0, "learning_rate": 2.375119009838147e-06, "loss": 1.0768, "step": 14044 }, { "epoch": 4.412038400376936, "grad_norm": 0.9609375, "learning_rate": 2.372580133291019e-06, "loss": 1.0712, "step": 14046 }, { "epoch": 4.412666627402477, "grad_norm": 0.91796875, "learning_rate": 2.370041256743891e-06, "loss": 1.2144, "step": 14048 }, { "epoch": 4.413294854428019, "grad_norm": 0.94140625, "learning_rate": 2.367502380196763e-06, "loss": 1.2625, "step": 14050 }, { "epoch": 4.41392308145356, "grad_norm": 1.03125, "learning_rate": 2.3649635036496354e-06, "loss": 1.1765, "step": 14052 }, { "epoch": 4.414551308479101, "grad_norm": 0.90625, "learning_rate": 2.362424627102507e-06, "loss": 1.1408, "step": 14054 }, { "epoch": 4.415179535504643, "grad_norm": 0.95703125, "learning_rate": 2.3598857505553794e-06, "loss": 1.2588, "step": 14056 }, { "epoch": 4.415807762530184, "grad_norm": 0.98046875, "learning_rate": 2.3573468740082512e-06, "loss": 1.0661, "step": 14058 }, { "epoch": 4.416435989555726, "grad_norm": 1.03125, "learning_rate": 2.354807997461124e-06, "loss": 1.1146, "step": 14060 }, { "epoch": 4.4170642165812675, "grad_norm": 0.90234375, "learning_rate": 2.3522691209139957e-06, "loss": 1.2269, "step": 14062 }, { "epoch": 4.417692443606809, "grad_norm": 0.9453125, "learning_rate": 2.349730244366868e-06, "loss": 1.0977, "step": 14064 }, { "epoch": 4.41832067063235, "grad_norm": 0.96875, "learning_rate": 2.34719136781974e-06, "loss": 1.1458, "step": 14066 }, { "epoch": 4.4189488976578915, "grad_norm": 1.0078125, "learning_rate": 2.344652491272612e-06, "loss": 1.1695, "step": 14068 }, { "epoch": 4.419577124683433, "grad_norm": 0.93359375, "learning_rate": 2.342113614725484e-06, "loss": 1.122, "step": 14070 }, { "epoch": 4.420205351708974, "grad_norm": 1.0078125, "learning_rate": 2.3395747381783564e-06, "loss": 1.1562, "step": 14072 }, { "epoch": 4.420833578734515, "grad_norm": 1.1328125, "learning_rate": 2.337035861631228e-06, "loss": 1.0295, "step": 14074 }, { "epoch": 4.421461805760057, "grad_norm": 0.8984375, "learning_rate": 2.3344969850841004e-06, "loss": 1.1139, "step": 14076 }, { "epoch": 4.422090032785598, "grad_norm": 1.0, "learning_rate": 2.3319581085369727e-06, "loss": 1.108, "step": 14078 }, { "epoch": 4.422718259811139, "grad_norm": 0.96484375, "learning_rate": 2.329419231989845e-06, "loss": 1.1267, "step": 14080 }, { "epoch": 4.423346486836681, "grad_norm": 0.921875, "learning_rate": 2.3268803554427167e-06, "loss": 1.1234, "step": 14082 }, { "epoch": 4.423974713862222, "grad_norm": 1.0, "learning_rate": 2.324341478895589e-06, "loss": 1.3207, "step": 14084 }, { "epoch": 4.424602940887763, "grad_norm": 0.9375, "learning_rate": 2.321802602348461e-06, "loss": 0.9436, "step": 14086 }, { "epoch": 4.425231167913305, "grad_norm": 1.0234375, "learning_rate": 2.319263725801333e-06, "loss": 1.0357, "step": 14088 }, { "epoch": 4.425859394938846, "grad_norm": 0.96484375, "learning_rate": 2.316724849254205e-06, "loss": 1.0878, "step": 14090 }, { "epoch": 4.426487621964387, "grad_norm": 0.89453125, "learning_rate": 2.3141859727070774e-06, "loss": 1.0636, "step": 14092 }, { "epoch": 4.427115848989929, "grad_norm": 0.96484375, "learning_rate": 2.3116470961599492e-06, "loss": 1.0275, "step": 14094 }, { "epoch": 4.42774407601547, "grad_norm": 0.9453125, "learning_rate": 2.3091082196128215e-06, "loss": 1.116, "step": 14096 }, { "epoch": 4.428372303041011, "grad_norm": 1.015625, "learning_rate": 2.3065693430656937e-06, "loss": 1.0996, "step": 14098 }, { "epoch": 4.429000530066553, "grad_norm": 0.94140625, "learning_rate": 2.3040304665185655e-06, "loss": 1.1268, "step": 14100 }, { "epoch": 4.429628757092094, "grad_norm": 0.96875, "learning_rate": 2.301491589971438e-06, "loss": 1.008, "step": 14102 }, { "epoch": 4.430256984117635, "grad_norm": 0.99609375, "learning_rate": 2.29895271342431e-06, "loss": 1.0854, "step": 14104 }, { "epoch": 4.430885211143177, "grad_norm": 0.88671875, "learning_rate": 2.296413836877182e-06, "loss": 1.221, "step": 14106 }, { "epoch": 4.431513438168718, "grad_norm": 0.9140625, "learning_rate": 2.293874960330054e-06, "loss": 1.1577, "step": 14108 }, { "epoch": 4.432141665194259, "grad_norm": 1.0546875, "learning_rate": 2.2913360837829262e-06, "loss": 1.1142, "step": 14110 }, { "epoch": 4.432769892219801, "grad_norm": 0.93359375, "learning_rate": 2.2887972072357984e-06, "loss": 1.1751, "step": 14112 }, { "epoch": 4.433398119245342, "grad_norm": 0.98046875, "learning_rate": 2.2862583306886703e-06, "loss": 1.2118, "step": 14114 }, { "epoch": 4.434026346270883, "grad_norm": 0.90625, "learning_rate": 2.2837194541415425e-06, "loss": 1.1006, "step": 14116 }, { "epoch": 4.434654573296425, "grad_norm": 0.93359375, "learning_rate": 2.2811805775944147e-06, "loss": 1.3168, "step": 14118 }, { "epoch": 4.435282800321966, "grad_norm": 1.015625, "learning_rate": 2.2786417010472865e-06, "loss": 1.2317, "step": 14120 }, { "epoch": 4.435911027347507, "grad_norm": 0.94140625, "learning_rate": 2.2761028245001587e-06, "loss": 1.128, "step": 14122 }, { "epoch": 4.436539254373049, "grad_norm": 0.9765625, "learning_rate": 2.273563947953031e-06, "loss": 1.1168, "step": 14124 }, { "epoch": 4.437167481398591, "grad_norm": 1.0625, "learning_rate": 2.271025071405903e-06, "loss": 1.0861, "step": 14126 }, { "epoch": 4.437795708424132, "grad_norm": 0.9921875, "learning_rate": 2.2684861948587754e-06, "loss": 1.0109, "step": 14128 }, { "epoch": 4.4384239354496735, "grad_norm": 1.015625, "learning_rate": 2.2659473183116472e-06, "loss": 1.1481, "step": 14130 }, { "epoch": 4.439052162475215, "grad_norm": 1.0859375, "learning_rate": 2.2634084417645195e-06, "loss": 1.3681, "step": 14132 }, { "epoch": 4.439680389500756, "grad_norm": 0.98046875, "learning_rate": 2.2608695652173913e-06, "loss": 1.1429, "step": 14134 }, { "epoch": 4.4403086165262975, "grad_norm": 1.0625, "learning_rate": 2.2583306886702635e-06, "loss": 1.0958, "step": 14136 }, { "epoch": 4.440936843551839, "grad_norm": 0.9375, "learning_rate": 2.2557918121231357e-06, "loss": 1.0978, "step": 14138 }, { "epoch": 4.44156507057738, "grad_norm": 0.95703125, "learning_rate": 2.2532529355760075e-06, "loss": 1.2154, "step": 14140 }, { "epoch": 4.4421932976029215, "grad_norm": 0.98828125, "learning_rate": 2.2507140590288798e-06, "loss": 1.1642, "step": 14142 }, { "epoch": 4.442821524628463, "grad_norm": 1.046875, "learning_rate": 2.248175182481752e-06, "loss": 1.0444, "step": 14144 }, { "epoch": 4.443449751654004, "grad_norm": 0.97265625, "learning_rate": 2.2456363059346242e-06, "loss": 1.2519, "step": 14146 }, { "epoch": 4.4440779786795455, "grad_norm": 0.953125, "learning_rate": 2.2430974293874965e-06, "loss": 1.2049, "step": 14148 }, { "epoch": 4.444706205705087, "grad_norm": 0.98046875, "learning_rate": 2.2405585528403683e-06, "loss": 1.1555, "step": 14150 }, { "epoch": 4.445334432730628, "grad_norm": 1.015625, "learning_rate": 2.2380196762932405e-06, "loss": 1.2642, "step": 14152 }, { "epoch": 4.445962659756169, "grad_norm": 0.953125, "learning_rate": 2.2354807997461127e-06, "loss": 1.2454, "step": 14154 }, { "epoch": 4.446590886781711, "grad_norm": 0.9453125, "learning_rate": 2.2329419231989845e-06, "loss": 1.1306, "step": 14156 }, { "epoch": 4.447219113807252, "grad_norm": 0.9765625, "learning_rate": 2.2304030466518568e-06, "loss": 1.2511, "step": 14158 }, { "epoch": 4.447847340832793, "grad_norm": 1.03125, "learning_rate": 2.227864170104729e-06, "loss": 1.176, "step": 14160 }, { "epoch": 4.448475567858335, "grad_norm": 0.9921875, "learning_rate": 2.225325293557601e-06, "loss": 1.1909, "step": 14162 }, { "epoch": 4.449103794883876, "grad_norm": 0.98046875, "learning_rate": 2.222786417010473e-06, "loss": 1.2183, "step": 14164 }, { "epoch": 4.449732021909417, "grad_norm": 0.96484375, "learning_rate": 2.2202475404633452e-06, "loss": 1.0752, "step": 14166 }, { "epoch": 4.450360248934959, "grad_norm": 1.0078125, "learning_rate": 2.2177086639162175e-06, "loss": 1.1448, "step": 14168 }, { "epoch": 4.4509884759605, "grad_norm": 0.99609375, "learning_rate": 2.2151697873690893e-06, "loss": 1.015, "step": 14170 }, { "epoch": 4.451616702986041, "grad_norm": 0.94921875, "learning_rate": 2.2126309108219615e-06, "loss": 1.1739, "step": 14172 }, { "epoch": 4.452244930011583, "grad_norm": 0.94921875, "learning_rate": 2.2100920342748337e-06, "loss": 1.1317, "step": 14174 }, { "epoch": 4.452873157037124, "grad_norm": 0.9453125, "learning_rate": 2.2075531577277055e-06, "loss": 1.2265, "step": 14176 }, { "epoch": 4.453501384062665, "grad_norm": 0.90625, "learning_rate": 2.2050142811805778e-06, "loss": 1.2055, "step": 14178 }, { "epoch": 4.454129611088207, "grad_norm": 0.96875, "learning_rate": 2.20247540463345e-06, "loss": 1.1325, "step": 14180 }, { "epoch": 4.454757838113748, "grad_norm": 1.0546875, "learning_rate": 2.199936528086322e-06, "loss": 0.9501, "step": 14182 }, { "epoch": 4.455386065139289, "grad_norm": 0.9609375, "learning_rate": 2.197397651539194e-06, "loss": 1.0794, "step": 14184 }, { "epoch": 4.456014292164831, "grad_norm": 1.015625, "learning_rate": 2.1948587749920663e-06, "loss": 1.1948, "step": 14186 }, { "epoch": 4.456642519190372, "grad_norm": 0.984375, "learning_rate": 2.1923198984449385e-06, "loss": 1.0927, "step": 14188 }, { "epoch": 4.457270746215913, "grad_norm": 0.953125, "learning_rate": 2.1897810218978103e-06, "loss": 1.158, "step": 14190 }, { "epoch": 4.457898973241456, "grad_norm": 0.94140625, "learning_rate": 2.1872421453506825e-06, "loss": 1.1658, "step": 14192 }, { "epoch": 4.458527200266997, "grad_norm": 0.9296875, "learning_rate": 2.1847032688035548e-06, "loss": 1.283, "step": 14194 }, { "epoch": 4.459155427292538, "grad_norm": 0.90625, "learning_rate": 2.1821643922564266e-06, "loss": 1.0913, "step": 14196 }, { "epoch": 4.4597836543180795, "grad_norm": 1.0078125, "learning_rate": 2.179625515709299e-06, "loss": 1.0829, "step": 14198 }, { "epoch": 4.460411881343621, "grad_norm": 0.9765625, "learning_rate": 2.177086639162171e-06, "loss": 1.258, "step": 14200 }, { "epoch": 4.461040108369162, "grad_norm": 0.984375, "learning_rate": 2.174547762615043e-06, "loss": 1.1295, "step": 14202 }, { "epoch": 4.4616683353947035, "grad_norm": 0.92578125, "learning_rate": 2.172008886067915e-06, "loss": 1.0668, "step": 14204 }, { "epoch": 4.462296562420245, "grad_norm": 1.0, "learning_rate": 2.1694700095207873e-06, "loss": 1.0042, "step": 14206 }, { "epoch": 4.462924789445786, "grad_norm": 1.0390625, "learning_rate": 2.166931132973659e-06, "loss": 1.0909, "step": 14208 }, { "epoch": 4.4635530164713275, "grad_norm": 1.0703125, "learning_rate": 2.1643922564265318e-06, "loss": 1.0797, "step": 14210 }, { "epoch": 4.464181243496869, "grad_norm": 0.94921875, "learning_rate": 2.1618533798794036e-06, "loss": 1.1016, "step": 14212 }, { "epoch": 4.46480947052241, "grad_norm": 0.9453125, "learning_rate": 2.1593145033322758e-06, "loss": 1.1244, "step": 14214 }, { "epoch": 4.4654376975479515, "grad_norm": 0.9140625, "learning_rate": 2.1567756267851476e-06, "loss": 1.1941, "step": 14216 }, { "epoch": 4.466065924573493, "grad_norm": 1.0703125, "learning_rate": 2.15423675023802e-06, "loss": 1.1098, "step": 14218 }, { "epoch": 4.466694151599034, "grad_norm": 1.0, "learning_rate": 2.151697873690892e-06, "loss": 1.1952, "step": 14220 }, { "epoch": 4.4673223786245755, "grad_norm": 0.953125, "learning_rate": 2.149158997143764e-06, "loss": 1.132, "step": 14222 }, { "epoch": 4.467950605650117, "grad_norm": 0.94921875, "learning_rate": 2.146620120596636e-06, "loss": 1.241, "step": 14224 }, { "epoch": 4.468578832675658, "grad_norm": 0.9765625, "learning_rate": 2.1440812440495083e-06, "loss": 1.1603, "step": 14226 }, { "epoch": 4.4692070597011995, "grad_norm": 1.015625, "learning_rate": 2.14154236750238e-06, "loss": 1.2364, "step": 14228 }, { "epoch": 4.469835286726741, "grad_norm": 1.015625, "learning_rate": 2.1390034909552523e-06, "loss": 1.1068, "step": 14230 }, { "epoch": 4.470463513752282, "grad_norm": 0.9609375, "learning_rate": 2.1364646144081246e-06, "loss": 1.2041, "step": 14232 }, { "epoch": 4.4710917407778235, "grad_norm": 0.984375, "learning_rate": 2.133925737860997e-06, "loss": 1.1681, "step": 14234 }, { "epoch": 4.471719967803365, "grad_norm": 0.984375, "learning_rate": 2.131386861313869e-06, "loss": 1.1262, "step": 14236 }, { "epoch": 4.472348194828906, "grad_norm": 1.046875, "learning_rate": 2.128847984766741e-06, "loss": 1.0849, "step": 14238 }, { "epoch": 4.472976421854447, "grad_norm": 0.9375, "learning_rate": 2.126309108219613e-06, "loss": 1.2483, "step": 14240 }, { "epoch": 4.473604648879989, "grad_norm": 1.015625, "learning_rate": 2.123770231672485e-06, "loss": 1.3059, "step": 14242 }, { "epoch": 4.47423287590553, "grad_norm": 1.078125, "learning_rate": 2.121231355125357e-06, "loss": 1.0465, "step": 14244 }, { "epoch": 4.474861102931071, "grad_norm": 1.015625, "learning_rate": 2.1186924785782293e-06, "loss": 1.017, "step": 14246 }, { "epoch": 4.475489329956613, "grad_norm": 0.91796875, "learning_rate": 2.116153602031101e-06, "loss": 1.1013, "step": 14248 }, { "epoch": 4.476117556982154, "grad_norm": 0.9296875, "learning_rate": 2.1136147254839734e-06, "loss": 1.2508, "step": 14250 }, { "epoch": 4.476745784007695, "grad_norm": 1.078125, "learning_rate": 2.1110758489368456e-06, "loss": 1.0445, "step": 14252 }, { "epoch": 4.477374011033237, "grad_norm": 0.98046875, "learning_rate": 2.108536972389718e-06, "loss": 1.1189, "step": 14254 }, { "epoch": 4.478002238058778, "grad_norm": 1.0390625, "learning_rate": 2.10599809584259e-06, "loss": 1.1378, "step": 14256 }, { "epoch": 4.47863046508432, "grad_norm": 1.0859375, "learning_rate": 2.103459219295462e-06, "loss": 0.99, "step": 14258 }, { "epoch": 4.479258692109862, "grad_norm": 1.0546875, "learning_rate": 2.100920342748334e-06, "loss": 1.0117, "step": 14260 }, { "epoch": 4.479886919135403, "grad_norm": 0.984375, "learning_rate": 2.0983814662012063e-06, "loss": 1.1814, "step": 14262 }, { "epoch": 4.480515146160944, "grad_norm": 0.90625, "learning_rate": 2.095842589654078e-06, "loss": 1.1661, "step": 14264 }, { "epoch": 4.481143373186486, "grad_norm": 0.9296875, "learning_rate": 2.0933037131069504e-06, "loss": 1.1667, "step": 14266 }, { "epoch": 4.481771600212027, "grad_norm": 0.97265625, "learning_rate": 2.0907648365598226e-06, "loss": 1.1508, "step": 14268 }, { "epoch": 4.482399827237568, "grad_norm": 0.91796875, "learning_rate": 2.0882259600126944e-06, "loss": 1.0723, "step": 14270 }, { "epoch": 4.48302805426311, "grad_norm": 0.953125, "learning_rate": 2.0856870834655666e-06, "loss": 1.0009, "step": 14272 }, { "epoch": 4.483656281288651, "grad_norm": 0.98046875, "learning_rate": 2.083148206918439e-06, "loss": 1.2293, "step": 14274 }, { "epoch": 4.484284508314192, "grad_norm": 0.9296875, "learning_rate": 2.080609330371311e-06, "loss": 1.2161, "step": 14276 }, { "epoch": 4.484912735339734, "grad_norm": 1.015625, "learning_rate": 2.078070453824183e-06, "loss": 1.0554, "step": 14278 }, { "epoch": 4.485540962365275, "grad_norm": 1.078125, "learning_rate": 2.075531577277055e-06, "loss": 0.9887, "step": 14280 }, { "epoch": 4.486169189390816, "grad_norm": 0.92578125, "learning_rate": 2.0729927007299273e-06, "loss": 1.0633, "step": 14282 }, { "epoch": 4.4867974164163575, "grad_norm": 0.93359375, "learning_rate": 2.070453824182799e-06, "loss": 1.101, "step": 14284 }, { "epoch": 4.487425643441899, "grad_norm": 0.95703125, "learning_rate": 2.0679149476356714e-06, "loss": 1.1001, "step": 14286 }, { "epoch": 4.48805387046744, "grad_norm": 0.91015625, "learning_rate": 2.0653760710885436e-06, "loss": 1.1801, "step": 14288 }, { "epoch": 4.4886820974929815, "grad_norm": 0.921875, "learning_rate": 2.0628371945414154e-06, "loss": 1.1311, "step": 14290 }, { "epoch": 4.489310324518523, "grad_norm": 0.94921875, "learning_rate": 2.0602983179942876e-06, "loss": 1.0663, "step": 14292 }, { "epoch": 4.489938551544064, "grad_norm": 0.9453125, "learning_rate": 2.05775944144716e-06, "loss": 1.2145, "step": 14294 }, { "epoch": 4.4905667785696055, "grad_norm": 1.0, "learning_rate": 2.055220564900032e-06, "loss": 1.1143, "step": 14296 }, { "epoch": 4.491195005595147, "grad_norm": 0.9921875, "learning_rate": 2.052681688352904e-06, "loss": 1.1424, "step": 14298 }, { "epoch": 4.491823232620688, "grad_norm": 0.9921875, "learning_rate": 2.050142811805776e-06, "loss": 1.1271, "step": 14300 }, { "epoch": 4.4924514596462295, "grad_norm": 1.0859375, "learning_rate": 2.0476039352586484e-06, "loss": 1.1047, "step": 14302 }, { "epoch": 4.493079686671771, "grad_norm": 0.91015625, "learning_rate": 2.04506505871152e-06, "loss": 1.0716, "step": 14304 }, { "epoch": 4.493707913697312, "grad_norm": 0.95703125, "learning_rate": 2.0425261821643924e-06, "loss": 1.1632, "step": 14306 }, { "epoch": 4.4943361407228535, "grad_norm": 1.0390625, "learning_rate": 2.0399873056172646e-06, "loss": 1.001, "step": 14308 }, { "epoch": 4.494964367748395, "grad_norm": 1.0625, "learning_rate": 2.0374484290701364e-06, "loss": 1.071, "step": 14310 }, { "epoch": 4.495592594773936, "grad_norm": 1.0078125, "learning_rate": 2.0349095525230087e-06, "loss": 1.0157, "step": 14312 }, { "epoch": 4.4962208217994775, "grad_norm": 1.1015625, "learning_rate": 2.032370675975881e-06, "loss": 1.1606, "step": 14314 }, { "epoch": 4.496849048825019, "grad_norm": 1.0546875, "learning_rate": 2.029831799428753e-06, "loss": 1.2019, "step": 14316 }, { "epoch": 4.49747727585056, "grad_norm": 0.98828125, "learning_rate": 2.0272929228816254e-06, "loss": 1.1373, "step": 14318 }, { "epoch": 4.4981055028761014, "grad_norm": 1.0078125, "learning_rate": 2.024754046334497e-06, "loss": 1.0998, "step": 14320 }, { "epoch": 4.498733729901643, "grad_norm": 0.9296875, "learning_rate": 2.0222151697873694e-06, "loss": 1.1939, "step": 14322 }, { "epoch": 4.499361956927185, "grad_norm": 1.0234375, "learning_rate": 2.019676293240241e-06, "loss": 1.0923, "step": 14324 }, { "epoch": 4.499990183952726, "grad_norm": 0.98828125, "learning_rate": 2.0171374166931134e-06, "loss": 1.1155, "step": 14326 }, { "epoch": 4.500618410978268, "grad_norm": 1.0078125, "learning_rate": 2.0145985401459857e-06, "loss": 1.0697, "step": 14328 }, { "epoch": 4.501246638003809, "grad_norm": 1.0078125, "learning_rate": 2.0120596635988575e-06, "loss": 1.0726, "step": 14330 }, { "epoch": 4.50187486502935, "grad_norm": 1.015625, "learning_rate": 2.0095207870517297e-06, "loss": 1.2866, "step": 14332 }, { "epoch": 4.502503092054892, "grad_norm": 1.03125, "learning_rate": 2.006981910504602e-06, "loss": 1.104, "step": 14334 }, { "epoch": 4.503131319080433, "grad_norm": 1.0546875, "learning_rate": 2.0044430339574737e-06, "loss": 1.0494, "step": 14336 }, { "epoch": 4.503759546105974, "grad_norm": 0.953125, "learning_rate": 2.0019041574103464e-06, "loss": 1.1536, "step": 14338 }, { "epoch": 4.504387773131516, "grad_norm": 1.0, "learning_rate": 1.999365280863218e-06, "loss": 1.219, "step": 14340 }, { "epoch": 4.505016000157057, "grad_norm": 0.9140625, "learning_rate": 1.9968264043160904e-06, "loss": 1.1244, "step": 14342 }, { "epoch": 4.505644227182598, "grad_norm": 0.93359375, "learning_rate": 1.9942875277689626e-06, "loss": 1.054, "step": 14344 }, { "epoch": 4.50627245420814, "grad_norm": 1.0078125, "learning_rate": 1.9917486512218344e-06, "loss": 1.1172, "step": 14346 }, { "epoch": 4.506900681233681, "grad_norm": 1.0625, "learning_rate": 1.9892097746747067e-06, "loss": 1.158, "step": 14348 }, { "epoch": 4.507528908259222, "grad_norm": 0.9921875, "learning_rate": 1.9866708981275785e-06, "loss": 1.2359, "step": 14350 }, { "epoch": 4.508157135284764, "grad_norm": 1.09375, "learning_rate": 1.9841320215804507e-06, "loss": 1.0165, "step": 14352 }, { "epoch": 4.508785362310305, "grad_norm": 0.984375, "learning_rate": 1.981593145033323e-06, "loss": 1.1011, "step": 14354 }, { "epoch": 4.509413589335846, "grad_norm": 1.046875, "learning_rate": 1.9790542684861947e-06, "loss": 1.1337, "step": 14356 }, { "epoch": 4.510041816361388, "grad_norm": 0.96484375, "learning_rate": 1.976515391939067e-06, "loss": 1.1449, "step": 14358 }, { "epoch": 4.510670043386929, "grad_norm": 0.984375, "learning_rate": 1.973976515391939e-06, "loss": 1.1035, "step": 14360 }, { "epoch": 4.51129827041247, "grad_norm": 0.87109375, "learning_rate": 1.9714376388448114e-06, "loss": 1.1715, "step": 14362 }, { "epoch": 4.5119264974380116, "grad_norm": 0.99609375, "learning_rate": 1.9688987622976837e-06, "loss": 1.2025, "step": 14364 }, { "epoch": 4.512554724463553, "grad_norm": 0.99609375, "learning_rate": 1.9663598857505555e-06, "loss": 1.1908, "step": 14366 }, { "epoch": 4.513182951489094, "grad_norm": 0.9140625, "learning_rate": 1.9638210092034277e-06, "loss": 1.1566, "step": 14368 }, { "epoch": 4.5138111785146355, "grad_norm": 0.8984375, "learning_rate": 1.9612821326563e-06, "loss": 1.1035, "step": 14370 }, { "epoch": 4.514439405540177, "grad_norm": 0.9453125, "learning_rate": 1.9587432561091717e-06, "loss": 1.2598, "step": 14372 }, { "epoch": 4.515067632565718, "grad_norm": 0.88671875, "learning_rate": 1.956204379562044e-06, "loss": 1.2299, "step": 14374 }, { "epoch": 4.5156958595912595, "grad_norm": 0.87890625, "learning_rate": 1.953665503014916e-06, "loss": 1.0409, "step": 14376 }, { "epoch": 4.516324086616801, "grad_norm": 0.9609375, "learning_rate": 1.951126626467788e-06, "loss": 1.1498, "step": 14378 }, { "epoch": 4.516952313642342, "grad_norm": 0.98046875, "learning_rate": 1.9485877499206602e-06, "loss": 1.1092, "step": 14380 }, { "epoch": 4.5175805406678835, "grad_norm": 0.93359375, "learning_rate": 1.9460488733735325e-06, "loss": 1.2638, "step": 14382 }, { "epoch": 4.518208767693425, "grad_norm": 0.9140625, "learning_rate": 1.9435099968264047e-06, "loss": 1.194, "step": 14384 }, { "epoch": 4.518836994718967, "grad_norm": 0.89453125, "learning_rate": 1.9409711202792765e-06, "loss": 1.2048, "step": 14386 }, { "epoch": 4.5194652217445075, "grad_norm": 0.97265625, "learning_rate": 1.9384322437321487e-06, "loss": 1.1453, "step": 14388 }, { "epoch": 4.52009344877005, "grad_norm": 0.9140625, "learning_rate": 1.935893367185021e-06, "loss": 1.1501, "step": 14390 }, { "epoch": 4.52072167579559, "grad_norm": 1.0234375, "learning_rate": 1.9333544906378927e-06, "loss": 1.0499, "step": 14392 }, { "epoch": 4.521349902821132, "grad_norm": 0.9765625, "learning_rate": 1.930815614090765e-06, "loss": 1.2874, "step": 14394 }, { "epoch": 4.521978129846674, "grad_norm": 1.046875, "learning_rate": 1.928276737543637e-06, "loss": 1.055, "step": 14396 }, { "epoch": 4.522606356872215, "grad_norm": 0.9140625, "learning_rate": 1.925737860996509e-06, "loss": 1.1819, "step": 14398 }, { "epoch": 4.523234583897756, "grad_norm": 0.94921875, "learning_rate": 1.9231989844493812e-06, "loss": 1.1957, "step": 14400 }, { "epoch": 4.523862810923298, "grad_norm": 1.015625, "learning_rate": 1.9206601079022535e-06, "loss": 1.1364, "step": 14402 }, { "epoch": 4.524491037948839, "grad_norm": 0.9765625, "learning_rate": 1.9181212313551257e-06, "loss": 1.1222, "step": 14404 }, { "epoch": 4.52511926497438, "grad_norm": 1.015625, "learning_rate": 1.9155823548079975e-06, "loss": 1.0754, "step": 14406 }, { "epoch": 4.525747491999922, "grad_norm": 0.9375, "learning_rate": 1.9130434782608697e-06, "loss": 1.2603, "step": 14408 }, { "epoch": 4.526375719025463, "grad_norm": 0.984375, "learning_rate": 1.910504601713742e-06, "loss": 1.0795, "step": 14410 }, { "epoch": 4.527003946051004, "grad_norm": 0.9453125, "learning_rate": 1.9079657251666138e-06, "loss": 1.2351, "step": 14412 }, { "epoch": 4.527632173076546, "grad_norm": 0.9765625, "learning_rate": 1.905426848619486e-06, "loss": 1.0907, "step": 14414 }, { "epoch": 4.528260400102087, "grad_norm": 0.953125, "learning_rate": 1.902887972072358e-06, "loss": 1.1108, "step": 14416 }, { "epoch": 4.528888627127628, "grad_norm": 0.95703125, "learning_rate": 1.9003490955252302e-06, "loss": 1.2744, "step": 14418 }, { "epoch": 4.52951685415317, "grad_norm": 0.9296875, "learning_rate": 1.8978102189781023e-06, "loss": 1.1969, "step": 14420 }, { "epoch": 4.530145081178711, "grad_norm": 0.8828125, "learning_rate": 1.8952713424309743e-06, "loss": 1.2237, "step": 14422 }, { "epoch": 4.530773308204252, "grad_norm": 0.9453125, "learning_rate": 1.8927324658838467e-06, "loss": 1.2232, "step": 14424 }, { "epoch": 4.531401535229794, "grad_norm": 0.98046875, "learning_rate": 1.8901935893367187e-06, "loss": 1.1935, "step": 14426 }, { "epoch": 4.532029762255335, "grad_norm": 0.91015625, "learning_rate": 1.8876547127895908e-06, "loss": 1.2256, "step": 14428 }, { "epoch": 4.532657989280876, "grad_norm": 0.95703125, "learning_rate": 1.885115836242463e-06, "loss": 1.1998, "step": 14430 }, { "epoch": 4.533286216306418, "grad_norm": 0.98046875, "learning_rate": 1.882576959695335e-06, "loss": 1.1356, "step": 14432 }, { "epoch": 4.533914443331959, "grad_norm": 0.9609375, "learning_rate": 1.880038083148207e-06, "loss": 1.0764, "step": 14434 }, { "epoch": 4.5345426703575, "grad_norm": 1.0546875, "learning_rate": 1.8774992066010793e-06, "loss": 1.0998, "step": 14436 }, { "epoch": 4.535170897383042, "grad_norm": 0.91796875, "learning_rate": 1.8749603300539513e-06, "loss": 1.0445, "step": 14438 }, { "epoch": 4.535799124408583, "grad_norm": 1.0390625, "learning_rate": 1.8724214535068233e-06, "loss": 1.1798, "step": 14440 }, { "epoch": 4.536427351434124, "grad_norm": 0.97265625, "learning_rate": 1.8698825769596953e-06, "loss": 1.2078, "step": 14442 }, { "epoch": 4.537055578459666, "grad_norm": 1.0234375, "learning_rate": 1.8673437004125675e-06, "loss": 1.1894, "step": 14444 }, { "epoch": 4.537683805485207, "grad_norm": 1.1171875, "learning_rate": 1.8648048238654398e-06, "loss": 1.079, "step": 14446 }, { "epoch": 4.538312032510748, "grad_norm": 0.94921875, "learning_rate": 1.862265947318312e-06, "loss": 1.1737, "step": 14448 }, { "epoch": 4.5389402595362895, "grad_norm": 0.94140625, "learning_rate": 1.859727070771184e-06, "loss": 1.1112, "step": 14450 }, { "epoch": 4.539568486561832, "grad_norm": 0.9609375, "learning_rate": 1.857188194224056e-06, "loss": 1.0558, "step": 14452 }, { "epoch": 4.540196713587372, "grad_norm": 0.9375, "learning_rate": 1.854649317676928e-06, "loss": 1.1549, "step": 14454 }, { "epoch": 4.540824940612914, "grad_norm": 1.03125, "learning_rate": 1.8521104411298003e-06, "loss": 1.0415, "step": 14456 }, { "epoch": 4.541453167638455, "grad_norm": 1.0078125, "learning_rate": 1.8495715645826723e-06, "loss": 1.1062, "step": 14458 }, { "epoch": 4.542081394663997, "grad_norm": 0.95703125, "learning_rate": 1.8470326880355443e-06, "loss": 1.262, "step": 14460 }, { "epoch": 4.542709621689538, "grad_norm": 0.91015625, "learning_rate": 1.8444938114884165e-06, "loss": 1.1078, "step": 14462 }, { "epoch": 4.54333784871508, "grad_norm": 0.984375, "learning_rate": 1.8419549349412886e-06, "loss": 1.1121, "step": 14464 }, { "epoch": 4.543966075740621, "grad_norm": 1.0390625, "learning_rate": 1.8394160583941606e-06, "loss": 1.1654, "step": 14466 }, { "epoch": 4.544594302766162, "grad_norm": 0.96875, "learning_rate": 1.836877181847033e-06, "loss": 1.1699, "step": 14468 }, { "epoch": 4.545222529791704, "grad_norm": 0.9375, "learning_rate": 1.834338305299905e-06, "loss": 1.1465, "step": 14470 }, { "epoch": 4.545850756817245, "grad_norm": 0.98828125, "learning_rate": 1.831799428752777e-06, "loss": 1.0751, "step": 14472 }, { "epoch": 4.546478983842786, "grad_norm": 1.0390625, "learning_rate": 1.8292605522056493e-06, "loss": 1.0948, "step": 14474 }, { "epoch": 4.547107210868328, "grad_norm": 1.0546875, "learning_rate": 1.8267216756585213e-06, "loss": 1.1492, "step": 14476 }, { "epoch": 4.547735437893869, "grad_norm": 0.99609375, "learning_rate": 1.8241827991113933e-06, "loss": 1.0863, "step": 14478 }, { "epoch": 4.54836366491941, "grad_norm": 0.9375, "learning_rate": 1.8216439225642653e-06, "loss": 1.1006, "step": 14480 }, { "epoch": 4.548991891944952, "grad_norm": 0.98046875, "learning_rate": 1.8191050460171376e-06, "loss": 1.1491, "step": 14482 }, { "epoch": 4.549620118970493, "grad_norm": 0.94140625, "learning_rate": 1.8165661694700096e-06, "loss": 1.0547, "step": 14484 }, { "epoch": 4.550248345996034, "grad_norm": 1.03125, "learning_rate": 1.8140272929228816e-06, "loss": 1.1896, "step": 14486 }, { "epoch": 4.550876573021576, "grad_norm": 0.96484375, "learning_rate": 1.811488416375754e-06, "loss": 1.1691, "step": 14488 }, { "epoch": 4.551504800047117, "grad_norm": 0.96875, "learning_rate": 1.808949539828626e-06, "loss": 1.1759, "step": 14490 }, { "epoch": 4.552133027072658, "grad_norm": 1.03125, "learning_rate": 1.806410663281498e-06, "loss": 1.2388, "step": 14492 }, { "epoch": 4.5527612540982, "grad_norm": 0.9765625, "learning_rate": 1.8038717867343703e-06, "loss": 1.1575, "step": 14494 }, { "epoch": 4.553389481123741, "grad_norm": 0.91796875, "learning_rate": 1.8013329101872423e-06, "loss": 1.1362, "step": 14496 }, { "epoch": 4.554017708149282, "grad_norm": 1.0859375, "learning_rate": 1.7987940336401143e-06, "loss": 0.9711, "step": 14498 }, { "epoch": 4.554645935174824, "grad_norm": 1.0078125, "learning_rate": 1.7962551570929866e-06, "loss": 1.2435, "step": 14500 }, { "epoch": 4.555274162200365, "grad_norm": 1.03125, "learning_rate": 1.7937162805458586e-06, "loss": 1.1746, "step": 14502 }, { "epoch": 4.555902389225906, "grad_norm": 0.97265625, "learning_rate": 1.7911774039987306e-06, "loss": 1.1546, "step": 14504 }, { "epoch": 4.556530616251448, "grad_norm": 1.0, "learning_rate": 1.7886385274516026e-06, "loss": 1.2252, "step": 14506 }, { "epoch": 4.557158843276989, "grad_norm": 1.0, "learning_rate": 1.7860996509044748e-06, "loss": 1.0106, "step": 14508 }, { "epoch": 4.55778707030253, "grad_norm": 0.98828125, "learning_rate": 1.783560774357347e-06, "loss": 1.0719, "step": 14510 }, { "epoch": 4.558415297328072, "grad_norm": 0.95703125, "learning_rate": 1.7810218978102193e-06, "loss": 1.2324, "step": 14512 }, { "epoch": 4.559043524353613, "grad_norm": 1.015625, "learning_rate": 1.7784830212630913e-06, "loss": 1.1843, "step": 14514 }, { "epoch": 4.559671751379154, "grad_norm": 1.0078125, "learning_rate": 1.7759441447159633e-06, "loss": 1.1637, "step": 14516 }, { "epoch": 4.5602999784046965, "grad_norm": 0.98828125, "learning_rate": 1.7734052681688354e-06, "loss": 1.1209, "step": 14518 }, { "epoch": 4.560928205430237, "grad_norm": 1.0234375, "learning_rate": 1.7708663916217076e-06, "loss": 1.0147, "step": 14520 }, { "epoch": 4.561556432455779, "grad_norm": 1.0390625, "learning_rate": 1.7683275150745796e-06, "loss": 0.9469, "step": 14522 }, { "epoch": 4.56218465948132, "grad_norm": 0.9765625, "learning_rate": 1.7657886385274516e-06, "loss": 1.2459, "step": 14524 }, { "epoch": 4.562812886506862, "grad_norm": 0.9921875, "learning_rate": 1.7632497619803238e-06, "loss": 1.082, "step": 14526 }, { "epoch": 4.563441113532403, "grad_norm": 0.96875, "learning_rate": 1.7607108854331959e-06, "loss": 1.1414, "step": 14528 }, { "epoch": 4.564069340557944, "grad_norm": 1.015625, "learning_rate": 1.7581720088860679e-06, "loss": 1.1879, "step": 14530 }, { "epoch": 4.564697567583486, "grad_norm": 1.0078125, "learning_rate": 1.7556331323389403e-06, "loss": 0.9941, "step": 14532 }, { "epoch": 4.565325794609027, "grad_norm": 1.125, "learning_rate": 1.7530942557918123e-06, "loss": 1.132, "step": 14534 }, { "epoch": 4.565954021634568, "grad_norm": 0.96875, "learning_rate": 1.7505553792446844e-06, "loss": 1.155, "step": 14536 }, { "epoch": 4.56658224866011, "grad_norm": 0.98046875, "learning_rate": 1.7480165026975566e-06, "loss": 1.0182, "step": 14538 }, { "epoch": 4.567210475685651, "grad_norm": 1.0625, "learning_rate": 1.7454776261504286e-06, "loss": 1.2896, "step": 14540 }, { "epoch": 4.567838702711192, "grad_norm": 0.9921875, "learning_rate": 1.7429387496033006e-06, "loss": 1.1056, "step": 14542 }, { "epoch": 4.568466929736734, "grad_norm": 0.98046875, "learning_rate": 1.7403998730561729e-06, "loss": 1.2316, "step": 14544 }, { "epoch": 4.569095156762275, "grad_norm": 1.0, "learning_rate": 1.7378609965090449e-06, "loss": 1.1941, "step": 14546 }, { "epoch": 4.569723383787816, "grad_norm": 0.9609375, "learning_rate": 1.7353221199619169e-06, "loss": 1.0145, "step": 14548 }, { "epoch": 4.570351610813358, "grad_norm": 1.015625, "learning_rate": 1.732783243414789e-06, "loss": 1.1975, "step": 14550 }, { "epoch": 4.570979837838899, "grad_norm": 0.984375, "learning_rate": 1.7302443668676613e-06, "loss": 1.0858, "step": 14552 }, { "epoch": 4.57160806486444, "grad_norm": 0.9921875, "learning_rate": 1.7277054903205334e-06, "loss": 1.0628, "step": 14554 }, { "epoch": 4.572236291889982, "grad_norm": 1.03125, "learning_rate": 1.7251666137734056e-06, "loss": 1.2128, "step": 14556 }, { "epoch": 4.572864518915523, "grad_norm": 0.9765625, "learning_rate": 1.7226277372262776e-06, "loss": 1.2294, "step": 14558 }, { "epoch": 4.573492745941064, "grad_norm": 1.015625, "learning_rate": 1.7200888606791496e-06, "loss": 1.0932, "step": 14560 }, { "epoch": 4.574120972966606, "grad_norm": 0.9921875, "learning_rate": 1.7175499841320216e-06, "loss": 1.0813, "step": 14562 }, { "epoch": 4.574749199992147, "grad_norm": 0.96875, "learning_rate": 1.7150111075848939e-06, "loss": 1.0235, "step": 14564 }, { "epoch": 4.575377427017688, "grad_norm": 1.0546875, "learning_rate": 1.7124722310377659e-06, "loss": 1.2288, "step": 14566 }, { "epoch": 4.57600565404323, "grad_norm": 1.03125, "learning_rate": 1.709933354490638e-06, "loss": 1.1042, "step": 14568 }, { "epoch": 4.576633881068771, "grad_norm": 1.046875, "learning_rate": 1.7073944779435101e-06, "loss": 1.1029, "step": 14570 }, { "epoch": 4.577262108094312, "grad_norm": 1.03125, "learning_rate": 1.7048556013963822e-06, "loss": 1.313, "step": 14572 }, { "epoch": 4.577890335119854, "grad_norm": 1.0, "learning_rate": 1.7023167248492544e-06, "loss": 1.2134, "step": 14574 }, { "epoch": 4.578518562145395, "grad_norm": 0.921875, "learning_rate": 1.6997778483021266e-06, "loss": 1.3339, "step": 14576 }, { "epoch": 4.579146789170936, "grad_norm": 0.953125, "learning_rate": 1.6972389717549986e-06, "loss": 1.0822, "step": 14578 }, { "epoch": 4.579775016196478, "grad_norm": 0.98046875, "learning_rate": 1.6947000952078706e-06, "loss": 1.1151, "step": 14580 }, { "epoch": 4.580403243222019, "grad_norm": 0.91796875, "learning_rate": 1.6921612186607429e-06, "loss": 1.1375, "step": 14582 }, { "epoch": 4.581031470247561, "grad_norm": 1.0703125, "learning_rate": 1.689622342113615e-06, "loss": 1.1016, "step": 14584 }, { "epoch": 4.581659697273102, "grad_norm": 0.93359375, "learning_rate": 1.687083465566487e-06, "loss": 1.1397, "step": 14586 }, { "epoch": 4.582287924298644, "grad_norm": 0.90234375, "learning_rate": 1.684544589019359e-06, "loss": 1.0905, "step": 14588 }, { "epoch": 4.582916151324184, "grad_norm": 0.98046875, "learning_rate": 1.6820057124722312e-06, "loss": 1.1155, "step": 14590 }, { "epoch": 4.5835443783497265, "grad_norm": 0.9375, "learning_rate": 1.6794668359251032e-06, "loss": 1.2123, "step": 14592 }, { "epoch": 4.584172605375268, "grad_norm": 1.0703125, "learning_rate": 1.6769279593779752e-06, "loss": 1.1227, "step": 14594 }, { "epoch": 4.584800832400809, "grad_norm": 0.9609375, "learning_rate": 1.6743890828308476e-06, "loss": 1.1416, "step": 14596 }, { "epoch": 4.5854290594263505, "grad_norm": 0.90625, "learning_rate": 1.6718502062837197e-06, "loss": 1.1229, "step": 14598 }, { "epoch": 4.586057286451892, "grad_norm": 0.96875, "learning_rate": 1.6693113297365917e-06, "loss": 1.0852, "step": 14600 }, { "epoch": 4.586685513477433, "grad_norm": 1.046875, "learning_rate": 1.666772453189464e-06, "loss": 1.1973, "step": 14602 }, { "epoch": 4.5873137405029745, "grad_norm": 0.921875, "learning_rate": 1.664233576642336e-06, "loss": 1.1013, "step": 14604 }, { "epoch": 4.587941967528516, "grad_norm": 0.9296875, "learning_rate": 1.661694700095208e-06, "loss": 1.1605, "step": 14606 }, { "epoch": 4.588570194554057, "grad_norm": 0.8828125, "learning_rate": 1.6591558235480802e-06, "loss": 1.1812, "step": 14608 }, { "epoch": 4.5891984215795985, "grad_norm": 0.98046875, "learning_rate": 1.6566169470009522e-06, "loss": 1.1051, "step": 14610 }, { "epoch": 4.58982664860514, "grad_norm": 0.9453125, "learning_rate": 1.6540780704538242e-06, "loss": 1.1445, "step": 14612 }, { "epoch": 4.590454875630681, "grad_norm": 0.95703125, "learning_rate": 1.6515391939066962e-06, "loss": 1.1992, "step": 14614 }, { "epoch": 4.591083102656222, "grad_norm": 1.015625, "learning_rate": 1.6490003173595684e-06, "loss": 1.0407, "step": 14616 }, { "epoch": 4.591711329681764, "grad_norm": 0.9375, "learning_rate": 1.6464614408124407e-06, "loss": 1.0998, "step": 14618 }, { "epoch": 4.592339556707305, "grad_norm": 1.0625, "learning_rate": 1.643922564265313e-06, "loss": 1.1715, "step": 14620 }, { "epoch": 4.592967783732846, "grad_norm": 0.96875, "learning_rate": 1.641383687718185e-06, "loss": 1.1326, "step": 14622 }, { "epoch": 4.593596010758388, "grad_norm": 1.109375, "learning_rate": 1.638844811171057e-06, "loss": 1.1062, "step": 14624 }, { "epoch": 4.594224237783929, "grad_norm": 0.953125, "learning_rate": 1.636305934623929e-06, "loss": 1.0433, "step": 14626 }, { "epoch": 4.59485246480947, "grad_norm": 0.9453125, "learning_rate": 1.6337670580768012e-06, "loss": 1.1942, "step": 14628 }, { "epoch": 4.595480691835012, "grad_norm": 0.95703125, "learning_rate": 1.6312281815296732e-06, "loss": 1.1766, "step": 14630 }, { "epoch": 4.596108918860553, "grad_norm": 0.98828125, "learning_rate": 1.6286893049825452e-06, "loss": 1.1056, "step": 14632 }, { "epoch": 4.596737145886094, "grad_norm": 0.98828125, "learning_rate": 1.6261504284354174e-06, "loss": 1.0755, "step": 14634 }, { "epoch": 4.597365372911636, "grad_norm": 1.0390625, "learning_rate": 1.6236115518882895e-06, "loss": 1.17, "step": 14636 }, { "epoch": 4.597993599937177, "grad_norm": 0.9140625, "learning_rate": 1.621072675341162e-06, "loss": 1.235, "step": 14638 }, { "epoch": 4.598621826962718, "grad_norm": 0.9765625, "learning_rate": 1.618533798794034e-06, "loss": 1.2114, "step": 14640 }, { "epoch": 4.59925005398826, "grad_norm": 0.9765625, "learning_rate": 1.615994922246906e-06, "loss": 1.0666, "step": 14642 }, { "epoch": 4.599878281013801, "grad_norm": 0.953125, "learning_rate": 1.613456045699778e-06, "loss": 1.2552, "step": 14644 }, { "epoch": 4.600506508039342, "grad_norm": 1.09375, "learning_rate": 1.6109171691526502e-06, "loss": 1.1146, "step": 14646 }, { "epoch": 4.601134735064884, "grad_norm": 0.96875, "learning_rate": 1.6083782926055222e-06, "loss": 1.2238, "step": 14648 }, { "epoch": 4.601762962090426, "grad_norm": 1.015625, "learning_rate": 1.6058394160583942e-06, "loss": 1.0408, "step": 14650 }, { "epoch": 4.602391189115966, "grad_norm": 1.0390625, "learning_rate": 1.6033005395112665e-06, "loss": 1.0531, "step": 14652 }, { "epoch": 4.603019416141509, "grad_norm": 1.0390625, "learning_rate": 1.6007616629641385e-06, "loss": 1.1254, "step": 14654 }, { "epoch": 4.603647643167049, "grad_norm": 1.0078125, "learning_rate": 1.5982227864170105e-06, "loss": 1.2678, "step": 14656 }, { "epoch": 4.604275870192591, "grad_norm": 1.046875, "learning_rate": 1.5956839098698825e-06, "loss": 1.1487, "step": 14658 }, { "epoch": 4.6049040972181325, "grad_norm": 1.0546875, "learning_rate": 1.593145033322755e-06, "loss": 1.1315, "step": 14660 }, { "epoch": 4.605532324243674, "grad_norm": 0.9296875, "learning_rate": 1.590606156775627e-06, "loss": 1.157, "step": 14662 }, { "epoch": 4.606160551269215, "grad_norm": 0.97265625, "learning_rate": 1.5880672802284992e-06, "loss": 1.1004, "step": 14664 }, { "epoch": 4.6067887782947565, "grad_norm": 0.92578125, "learning_rate": 1.5855284036813712e-06, "loss": 1.1403, "step": 14666 }, { "epoch": 4.607417005320298, "grad_norm": 0.98046875, "learning_rate": 1.5829895271342432e-06, "loss": 1.1566, "step": 14668 }, { "epoch": 4.608045232345839, "grad_norm": 1.015625, "learning_rate": 1.5804506505871152e-06, "loss": 1.0673, "step": 14670 }, { "epoch": 4.6086734593713805, "grad_norm": 0.98828125, "learning_rate": 1.5779117740399875e-06, "loss": 1.096, "step": 14672 }, { "epoch": 4.609301686396922, "grad_norm": 1.078125, "learning_rate": 1.5753728974928595e-06, "loss": 1.0527, "step": 14674 }, { "epoch": 4.609929913422463, "grad_norm": 0.9453125, "learning_rate": 1.5728340209457315e-06, "loss": 1.2231, "step": 14676 }, { "epoch": 4.6105581404480045, "grad_norm": 0.953125, "learning_rate": 1.5702951443986037e-06, "loss": 1.0617, "step": 14678 }, { "epoch": 4.611186367473546, "grad_norm": 0.953125, "learning_rate": 1.5677562678514758e-06, "loss": 1.126, "step": 14680 }, { "epoch": 4.611814594499087, "grad_norm": 1.1328125, "learning_rate": 1.565217391304348e-06, "loss": 1.1207, "step": 14682 }, { "epoch": 4.6124428215246285, "grad_norm": 1.0546875, "learning_rate": 1.5626785147572202e-06, "loss": 1.1055, "step": 14684 }, { "epoch": 4.61307104855017, "grad_norm": 0.95703125, "learning_rate": 1.5601396382100922e-06, "loss": 1.1396, "step": 14686 }, { "epoch": 4.613699275575711, "grad_norm": 1.015625, "learning_rate": 1.5576007616629642e-06, "loss": 1.2067, "step": 14688 }, { "epoch": 4.6143275026012525, "grad_norm": 1.046875, "learning_rate": 1.5550618851158365e-06, "loss": 1.0377, "step": 14690 }, { "epoch": 4.614955729626794, "grad_norm": 0.92578125, "learning_rate": 1.5525230085687085e-06, "loss": 1.2499, "step": 14692 }, { "epoch": 4.615583956652335, "grad_norm": 0.98046875, "learning_rate": 1.5499841320215805e-06, "loss": 1.2297, "step": 14694 }, { "epoch": 4.6162121836778764, "grad_norm": 0.94921875, "learning_rate": 1.5474452554744525e-06, "loss": 1.1028, "step": 14696 }, { "epoch": 4.616840410703418, "grad_norm": 0.93359375, "learning_rate": 1.5449063789273248e-06, "loss": 1.1262, "step": 14698 }, { "epoch": 4.617468637728959, "grad_norm": 1.0078125, "learning_rate": 1.5423675023801968e-06, "loss": 1.2461, "step": 14700 }, { "epoch": 4.6180968647545, "grad_norm": 1.0234375, "learning_rate": 1.5398286258330688e-06, "loss": 1.1864, "step": 14702 }, { "epoch": 4.618725091780042, "grad_norm": 0.90234375, "learning_rate": 1.5372897492859412e-06, "loss": 1.2635, "step": 14704 }, { "epoch": 4.619353318805583, "grad_norm": 0.984375, "learning_rate": 1.5347508727388133e-06, "loss": 1.1272, "step": 14706 }, { "epoch": 4.619981545831124, "grad_norm": 1.0078125, "learning_rate": 1.5322119961916853e-06, "loss": 1.1354, "step": 14708 }, { "epoch": 4.620609772856666, "grad_norm": 0.94140625, "learning_rate": 1.5296731196445575e-06, "loss": 1.1599, "step": 14710 }, { "epoch": 4.621237999882207, "grad_norm": 0.953125, "learning_rate": 1.5271342430974295e-06, "loss": 1.2292, "step": 14712 }, { "epoch": 4.621866226907748, "grad_norm": 0.87890625, "learning_rate": 1.5245953665503015e-06, "loss": 1.2026, "step": 14714 }, { "epoch": 4.622494453933291, "grad_norm": 0.92578125, "learning_rate": 1.5220564900031738e-06, "loss": 1.0925, "step": 14716 }, { "epoch": 4.623122680958831, "grad_norm": 1.015625, "learning_rate": 1.5195176134560458e-06, "loss": 1.2194, "step": 14718 }, { "epoch": 4.623750907984373, "grad_norm": 0.9921875, "learning_rate": 1.5169787369089178e-06, "loss": 1.1274, "step": 14720 }, { "epoch": 4.624379135009914, "grad_norm": 0.984375, "learning_rate": 1.5144398603617898e-06, "loss": 1.0479, "step": 14722 }, { "epoch": 4.625007362035456, "grad_norm": 0.9140625, "learning_rate": 1.5119009838146623e-06, "loss": 1.2108, "step": 14724 }, { "epoch": 4.625635589060997, "grad_norm": 1.078125, "learning_rate": 1.5093621072675343e-06, "loss": 1.0871, "step": 14726 }, { "epoch": 4.626263816086539, "grad_norm": 0.9609375, "learning_rate": 1.5068232307204065e-06, "loss": 1.2076, "step": 14728 }, { "epoch": 4.62689204311208, "grad_norm": 1.0390625, "learning_rate": 1.5042843541732785e-06, "loss": 1.1703, "step": 14730 }, { "epoch": 4.627520270137621, "grad_norm": 1.1484375, "learning_rate": 1.5017454776261505e-06, "loss": 1.0887, "step": 14732 }, { "epoch": 4.628148497163163, "grad_norm": 0.96875, "learning_rate": 1.4992066010790226e-06, "loss": 1.1768, "step": 14734 }, { "epoch": 4.628776724188704, "grad_norm": 0.9609375, "learning_rate": 1.4966677245318948e-06, "loss": 1.1627, "step": 14736 }, { "epoch": 4.629404951214245, "grad_norm": 0.94140625, "learning_rate": 1.4941288479847668e-06, "loss": 1.1482, "step": 14738 }, { "epoch": 4.6300331782397866, "grad_norm": 0.9375, "learning_rate": 1.4915899714376388e-06, "loss": 1.164, "step": 14740 }, { "epoch": 4.630661405265328, "grad_norm": 0.98046875, "learning_rate": 1.489051094890511e-06, "loss": 1.1114, "step": 14742 }, { "epoch": 4.631289632290869, "grad_norm": 0.92578125, "learning_rate": 1.486512218343383e-06, "loss": 1.1191, "step": 14744 }, { "epoch": 4.6319178593164105, "grad_norm": 0.96484375, "learning_rate": 1.4839733417962555e-06, "loss": 1.1818, "step": 14746 }, { "epoch": 4.632546086341952, "grad_norm": 0.8984375, "learning_rate": 1.4814344652491275e-06, "loss": 1.2687, "step": 14748 }, { "epoch": 4.633174313367493, "grad_norm": 1.1015625, "learning_rate": 1.4788955887019995e-06, "loss": 1.2206, "step": 14750 }, { "epoch": 4.6338025403930345, "grad_norm": 0.9375, "learning_rate": 1.4763567121548716e-06, "loss": 1.1332, "step": 14752 }, { "epoch": 4.634430767418576, "grad_norm": 1.1171875, "learning_rate": 1.4738178356077438e-06, "loss": 1.0737, "step": 14754 }, { "epoch": 4.635058994444117, "grad_norm": 0.984375, "learning_rate": 1.4712789590606158e-06, "loss": 1.2199, "step": 14756 }, { "epoch": 4.6356872214696585, "grad_norm": 0.92578125, "learning_rate": 1.4687400825134878e-06, "loss": 1.0712, "step": 14758 }, { "epoch": 4.6363154484952, "grad_norm": 1.078125, "learning_rate": 1.46620120596636e-06, "loss": 1.16, "step": 14760 }, { "epoch": 4.636943675520741, "grad_norm": 1.015625, "learning_rate": 1.463662329419232e-06, "loss": 1.173, "step": 14762 }, { "epoch": 4.6375719025462825, "grad_norm": 1.0703125, "learning_rate": 1.461123452872104e-06, "loss": 1.0255, "step": 14764 }, { "epoch": 4.638200129571824, "grad_norm": 1.0078125, "learning_rate": 1.4585845763249761e-06, "loss": 1.216, "step": 14766 }, { "epoch": 4.638828356597365, "grad_norm": 1.015625, "learning_rate": 1.4560456997778485e-06, "loss": 1.16, "step": 14768 }, { "epoch": 4.6394565836229065, "grad_norm": 1.0546875, "learning_rate": 1.4535068232307206e-06, "loss": 1.0849, "step": 14770 }, { "epoch": 4.640084810648448, "grad_norm": 0.9296875, "learning_rate": 1.4509679466835928e-06, "loss": 1.2067, "step": 14772 }, { "epoch": 4.640713037673989, "grad_norm": 1.015625, "learning_rate": 1.4484290701364648e-06, "loss": 1.1895, "step": 14774 }, { "epoch": 4.6413412646995305, "grad_norm": 0.9375, "learning_rate": 1.4458901935893368e-06, "loss": 1.08, "step": 14776 }, { "epoch": 4.641969491725072, "grad_norm": 1.015625, "learning_rate": 1.4433513170422088e-06, "loss": 1.2596, "step": 14778 }, { "epoch": 4.642597718750613, "grad_norm": 1.0703125, "learning_rate": 1.440812440495081e-06, "loss": 1.0715, "step": 14780 }, { "epoch": 4.643225945776155, "grad_norm": 0.9453125, "learning_rate": 1.438273563947953e-06, "loss": 1.1192, "step": 14782 }, { "epoch": 4.643854172801696, "grad_norm": 1.015625, "learning_rate": 1.4357346874008251e-06, "loss": 1.165, "step": 14784 }, { "epoch": 4.644482399827238, "grad_norm": 0.9609375, "learning_rate": 1.4331958108536973e-06, "loss": 1.2104, "step": 14786 }, { "epoch": 4.645110626852778, "grad_norm": 0.94140625, "learning_rate": 1.4306569343065694e-06, "loss": 1.2451, "step": 14788 }, { "epoch": 4.645738853878321, "grad_norm": 0.8828125, "learning_rate": 1.4281180577594416e-06, "loss": 1.0284, "step": 14790 }, { "epoch": 4.646367080903862, "grad_norm": 1.0, "learning_rate": 1.4255791812123138e-06, "loss": 1.1482, "step": 14792 }, { "epoch": 4.646995307929403, "grad_norm": 1.0234375, "learning_rate": 1.4230403046651858e-06, "loss": 1.2297, "step": 14794 }, { "epoch": 4.647623534954945, "grad_norm": 0.92578125, "learning_rate": 1.4205014281180579e-06, "loss": 1.1821, "step": 14796 }, { "epoch": 4.648251761980486, "grad_norm": 0.9765625, "learning_rate": 1.41796255157093e-06, "loss": 1.2428, "step": 14798 }, { "epoch": 4.648879989006027, "grad_norm": 0.90625, "learning_rate": 1.415423675023802e-06, "loss": 0.9956, "step": 14800 }, { "epoch": 4.649508216031569, "grad_norm": 0.90234375, "learning_rate": 1.4128847984766741e-06, "loss": 1.0666, "step": 14802 }, { "epoch": 4.65013644305711, "grad_norm": 1.0078125, "learning_rate": 1.4103459219295461e-06, "loss": 1.1463, "step": 14804 }, { "epoch": 4.650764670082651, "grad_norm": 1.0390625, "learning_rate": 1.4078070453824184e-06, "loss": 1.2698, "step": 14806 }, { "epoch": 4.651392897108193, "grad_norm": 1.0390625, "learning_rate": 1.4052681688352904e-06, "loss": 1.0824, "step": 14808 }, { "epoch": 4.652021124133734, "grad_norm": 0.9375, "learning_rate": 1.4027292922881628e-06, "loss": 1.2332, "step": 14810 }, { "epoch": 4.652649351159275, "grad_norm": 0.94140625, "learning_rate": 1.4001904157410348e-06, "loss": 1.2322, "step": 14812 }, { "epoch": 4.653277578184817, "grad_norm": 0.99609375, "learning_rate": 1.3976515391939069e-06, "loss": 1.0367, "step": 14814 }, { "epoch": 4.653905805210358, "grad_norm": 0.99609375, "learning_rate": 1.3951126626467789e-06, "loss": 1.1759, "step": 14816 }, { "epoch": 4.654534032235899, "grad_norm": 0.921875, "learning_rate": 1.392573786099651e-06, "loss": 1.1402, "step": 14818 }, { "epoch": 4.655162259261441, "grad_norm": 1.0390625, "learning_rate": 1.3900349095525231e-06, "loss": 1.2601, "step": 14820 }, { "epoch": 4.655790486286982, "grad_norm": 1.0703125, "learning_rate": 1.3874960330053951e-06, "loss": 1.1312, "step": 14822 }, { "epoch": 4.656418713312523, "grad_norm": 0.94921875, "learning_rate": 1.3849571564582674e-06, "loss": 1.1813, "step": 14824 }, { "epoch": 4.6570469403380645, "grad_norm": 0.8984375, "learning_rate": 1.3824182799111394e-06, "loss": 1.1741, "step": 14826 }, { "epoch": 4.657675167363606, "grad_norm": 0.99609375, "learning_rate": 1.3798794033640114e-06, "loss": 1.2107, "step": 14828 }, { "epoch": 4.658303394389147, "grad_norm": 0.94921875, "learning_rate": 1.3773405268168834e-06, "loss": 1.1582, "step": 14830 }, { "epoch": 4.6589316214146885, "grad_norm": 0.98828125, "learning_rate": 1.3748016502697559e-06, "loss": 1.2314, "step": 14832 }, { "epoch": 4.65955984844023, "grad_norm": 1.0078125, "learning_rate": 1.3722627737226279e-06, "loss": 1.1846, "step": 14834 }, { "epoch": 4.660188075465771, "grad_norm": 1.03125, "learning_rate": 1.3697238971755e-06, "loss": 1.1117, "step": 14836 }, { "epoch": 4.6608163024913125, "grad_norm": 0.9765625, "learning_rate": 1.3671850206283721e-06, "loss": 1.0011, "step": 14838 }, { "epoch": 4.661444529516854, "grad_norm": 0.94140625, "learning_rate": 1.3646461440812441e-06, "loss": 1.2025, "step": 14840 }, { "epoch": 4.662072756542395, "grad_norm": 0.9375, "learning_rate": 1.3621072675341162e-06, "loss": 1.1791, "step": 14842 }, { "epoch": 4.6627009835679365, "grad_norm": 0.9296875, "learning_rate": 1.3595683909869884e-06, "loss": 1.0684, "step": 14844 }, { "epoch": 4.663329210593478, "grad_norm": 0.9375, "learning_rate": 1.3570295144398604e-06, "loss": 1.1227, "step": 14846 }, { "epoch": 4.66395743761902, "grad_norm": 0.98828125, "learning_rate": 1.3544906378927324e-06, "loss": 1.1784, "step": 14848 }, { "epoch": 4.6645856646445605, "grad_norm": 1.0234375, "learning_rate": 1.3519517613456047e-06, "loss": 1.2425, "step": 14850 }, { "epoch": 4.665213891670103, "grad_norm": 0.99609375, "learning_rate": 1.3494128847984767e-06, "loss": 1.2007, "step": 14852 }, { "epoch": 4.665842118695644, "grad_norm": 0.90234375, "learning_rate": 1.3468740082513491e-06, "loss": 1.2107, "step": 14854 }, { "epoch": 4.666470345721185, "grad_norm": 0.93359375, "learning_rate": 1.3443351317042211e-06, "loss": 1.1543, "step": 14856 }, { "epoch": 4.667098572746727, "grad_norm": 0.91796875, "learning_rate": 1.3417962551570931e-06, "loss": 1.2725, "step": 14858 }, { "epoch": 4.667726799772268, "grad_norm": 0.96484375, "learning_rate": 1.3392573786099652e-06, "loss": 1.1985, "step": 14860 }, { "epoch": 4.668355026797809, "grad_norm": 1.0234375, "learning_rate": 1.3367185020628374e-06, "loss": 1.1632, "step": 14862 }, { "epoch": 4.668983253823351, "grad_norm": 0.97265625, "learning_rate": 1.3341796255157094e-06, "loss": 1.062, "step": 14864 }, { "epoch": 4.669611480848892, "grad_norm": 0.90625, "learning_rate": 1.3316407489685814e-06, "loss": 1.1362, "step": 14866 }, { "epoch": 4.670239707874433, "grad_norm": 0.95703125, "learning_rate": 1.3291018724214537e-06, "loss": 1.1849, "step": 14868 }, { "epoch": 4.670867934899975, "grad_norm": 0.984375, "learning_rate": 1.3265629958743257e-06, "loss": 1.2939, "step": 14870 }, { "epoch": 4.671496161925516, "grad_norm": 1.0390625, "learning_rate": 1.3240241193271977e-06, "loss": 1.0688, "step": 14872 }, { "epoch": 4.672124388951057, "grad_norm": 1.0625, "learning_rate": 1.3214852427800701e-06, "loss": 0.9568, "step": 14874 }, { "epoch": 4.672752615976599, "grad_norm": 1.015625, "learning_rate": 1.3189463662329421e-06, "loss": 1.0625, "step": 14876 }, { "epoch": 4.67338084300214, "grad_norm": 1.015625, "learning_rate": 1.3164074896858142e-06, "loss": 1.1576, "step": 14878 }, { "epoch": 4.674009070027681, "grad_norm": 0.953125, "learning_rate": 1.3138686131386864e-06, "loss": 1.0933, "step": 14880 }, { "epoch": 4.674637297053223, "grad_norm": 0.89453125, "learning_rate": 1.3113297365915584e-06, "loss": 1.2696, "step": 14882 }, { "epoch": 4.675265524078764, "grad_norm": 0.93359375, "learning_rate": 1.3087908600444304e-06, "loss": 1.188, "step": 14884 }, { "epoch": 4.675893751104305, "grad_norm": 0.8984375, "learning_rate": 1.3062519834973024e-06, "loss": 1.2054, "step": 14886 }, { "epoch": 4.676521978129847, "grad_norm": 0.9375, "learning_rate": 1.3037131069501747e-06, "loss": 1.0712, "step": 14888 }, { "epoch": 4.677150205155388, "grad_norm": 0.95703125, "learning_rate": 1.3011742304030467e-06, "loss": 1.358, "step": 14890 }, { "epoch": 4.677778432180929, "grad_norm": 0.98828125, "learning_rate": 1.2986353538559187e-06, "loss": 1.0714, "step": 14892 }, { "epoch": 4.678406659206471, "grad_norm": 1.046875, "learning_rate": 1.296096477308791e-06, "loss": 1.0999, "step": 14894 }, { "epoch": 4.679034886232012, "grad_norm": 0.91796875, "learning_rate": 1.2935576007616632e-06, "loss": 1.2003, "step": 14896 }, { "epoch": 4.679663113257553, "grad_norm": 0.94921875, "learning_rate": 1.2910187242145352e-06, "loss": 1.2454, "step": 14898 }, { "epoch": 4.680291340283095, "grad_norm": 0.9765625, "learning_rate": 1.2884798476674074e-06, "loss": 1.1633, "step": 14900 }, { "epoch": 4.680919567308636, "grad_norm": 0.99609375, "learning_rate": 1.2859409711202794e-06, "loss": 1.2118, "step": 14902 }, { "epoch": 4.681547794334177, "grad_norm": 1.0234375, "learning_rate": 1.2834020945731515e-06, "loss": 1.0957, "step": 14904 }, { "epoch": 4.6821760213597186, "grad_norm": 0.98046875, "learning_rate": 1.2808632180260237e-06, "loss": 1.2071, "step": 14906 }, { "epoch": 4.68280424838526, "grad_norm": 0.984375, "learning_rate": 1.2783243414788957e-06, "loss": 1.1693, "step": 14908 }, { "epoch": 4.683432475410801, "grad_norm": 0.85546875, "learning_rate": 1.2757854649317677e-06, "loss": 1.1092, "step": 14910 }, { "epoch": 4.6840607024363425, "grad_norm": 1.0625, "learning_rate": 1.2732465883846397e-06, "loss": 1.1041, "step": 14912 }, { "epoch": 4.684688929461885, "grad_norm": 0.984375, "learning_rate": 1.270707711837512e-06, "loss": 1.2108, "step": 14914 }, { "epoch": 4.685317156487425, "grad_norm": 1.0, "learning_rate": 1.268168835290384e-06, "loss": 0.9887, "step": 14916 }, { "epoch": 4.685945383512967, "grad_norm": 0.984375, "learning_rate": 1.2656299587432564e-06, "loss": 1.0636, "step": 14918 }, { "epoch": 4.686573610538509, "grad_norm": 0.98046875, "learning_rate": 1.2630910821961284e-06, "loss": 1.269, "step": 14920 }, { "epoch": 4.68720183756405, "grad_norm": 0.95703125, "learning_rate": 1.2605522056490005e-06, "loss": 1.1552, "step": 14922 }, { "epoch": 4.687830064589591, "grad_norm": 0.9140625, "learning_rate": 1.2580133291018725e-06, "loss": 1.0905, "step": 14924 }, { "epoch": 4.688458291615133, "grad_norm": 1.015625, "learning_rate": 1.2554744525547447e-06, "loss": 1.0357, "step": 14926 }, { "epoch": 4.689086518640674, "grad_norm": 0.97265625, "learning_rate": 1.2529355760076167e-06, "loss": 1.3681, "step": 14928 }, { "epoch": 4.689714745666215, "grad_norm": 0.9609375, "learning_rate": 1.2503966994604887e-06, "loss": 1.1213, "step": 14930 }, { "epoch": 4.690342972691757, "grad_norm": 1.015625, "learning_rate": 1.247857822913361e-06, "loss": 1.127, "step": 14932 }, { "epoch": 4.690971199717298, "grad_norm": 1.0234375, "learning_rate": 1.2453189463662332e-06, "loss": 1.0943, "step": 14934 }, { "epoch": 4.691599426742839, "grad_norm": 0.9921875, "learning_rate": 1.2427800698191052e-06, "loss": 1.076, "step": 14936 }, { "epoch": 4.692227653768381, "grad_norm": 1.0, "learning_rate": 1.2402411932719772e-06, "loss": 1.0147, "step": 14938 }, { "epoch": 4.692855880793922, "grad_norm": 1.0625, "learning_rate": 1.2377023167248492e-06, "loss": 1.046, "step": 14940 }, { "epoch": 4.693484107819463, "grad_norm": 0.9453125, "learning_rate": 1.2351634401777215e-06, "loss": 1.1478, "step": 14942 }, { "epoch": 4.694112334845005, "grad_norm": 1.046875, "learning_rate": 1.2326245636305937e-06, "loss": 1.0814, "step": 14944 }, { "epoch": 4.694740561870546, "grad_norm": 0.9765625, "learning_rate": 1.2300856870834657e-06, "loss": 1.0907, "step": 14946 }, { "epoch": 4.695368788896087, "grad_norm": 0.94921875, "learning_rate": 1.2275468105363377e-06, "loss": 1.24, "step": 14948 }, { "epoch": 4.695997015921629, "grad_norm": 0.98046875, "learning_rate": 1.2250079339892098e-06, "loss": 1.1675, "step": 14950 }, { "epoch": 4.69662524294717, "grad_norm": 0.86328125, "learning_rate": 1.222469057442082e-06, "loss": 1.0131, "step": 14952 }, { "epoch": 4.697253469972711, "grad_norm": 1.0078125, "learning_rate": 1.219930180894954e-06, "loss": 1.1332, "step": 14954 }, { "epoch": 4.697881696998253, "grad_norm": 0.984375, "learning_rate": 1.2173913043478262e-06, "loss": 1.2315, "step": 14956 }, { "epoch": 4.698509924023794, "grad_norm": 1.0078125, "learning_rate": 1.2148524278006983e-06, "loss": 1.0435, "step": 14958 }, { "epoch": 4.699138151049335, "grad_norm": 0.921875, "learning_rate": 1.2123135512535705e-06, "loss": 1.1157, "step": 14960 }, { "epoch": 4.699766378074877, "grad_norm": 0.94140625, "learning_rate": 1.2097746747064425e-06, "loss": 0.996, "step": 14962 }, { "epoch": 4.700394605100418, "grad_norm": 1.03125, "learning_rate": 1.2072357981593145e-06, "loss": 1.1547, "step": 14964 }, { "epoch": 4.701022832125959, "grad_norm": 0.9375, "learning_rate": 1.2046969216121867e-06, "loss": 1.0992, "step": 14966 }, { "epoch": 4.701651059151501, "grad_norm": 0.9453125, "learning_rate": 1.2021580450650588e-06, "loss": 1.0818, "step": 14968 }, { "epoch": 4.702279286177042, "grad_norm": 1.0234375, "learning_rate": 1.199619168517931e-06, "loss": 0.9574, "step": 14970 }, { "epoch": 4.702907513202583, "grad_norm": 0.9453125, "learning_rate": 1.197080291970803e-06, "loss": 1.1554, "step": 14972 }, { "epoch": 4.703535740228125, "grad_norm": 1.109375, "learning_rate": 1.194541415423675e-06, "loss": 1.1617, "step": 14974 }, { "epoch": 4.704163967253666, "grad_norm": 0.9921875, "learning_rate": 1.1920025388765473e-06, "loss": 1.0841, "step": 14976 }, { "epoch": 4.704792194279207, "grad_norm": 1.0546875, "learning_rate": 1.1894636623294193e-06, "loss": 1.0177, "step": 14978 }, { "epoch": 4.7054204213047495, "grad_norm": 0.9609375, "learning_rate": 1.1869247857822915e-06, "loss": 1.0366, "step": 14980 }, { "epoch": 4.70604864833029, "grad_norm": 0.94140625, "learning_rate": 1.1843859092351635e-06, "loss": 1.1261, "step": 14982 }, { "epoch": 4.706676875355832, "grad_norm": 1.0078125, "learning_rate": 1.1818470326880355e-06, "loss": 1.0877, "step": 14984 }, { "epoch": 4.7073051023813735, "grad_norm": 0.93359375, "learning_rate": 1.1793081561409078e-06, "loss": 1.067, "step": 14986 }, { "epoch": 4.707933329406915, "grad_norm": 0.9609375, "learning_rate": 1.17676927959378e-06, "loss": 1.1328, "step": 14988 }, { "epoch": 4.708561556432456, "grad_norm": 1.0, "learning_rate": 1.174230403046652e-06, "loss": 1.1884, "step": 14990 }, { "epoch": 4.709189783457997, "grad_norm": 1.09375, "learning_rate": 1.171691526499524e-06, "loss": 1.0338, "step": 14992 }, { "epoch": 4.709818010483539, "grad_norm": 1.0625, "learning_rate": 1.169152649952396e-06, "loss": 0.9574, "step": 14994 }, { "epoch": 4.71044623750908, "grad_norm": 0.94921875, "learning_rate": 1.1666137734052683e-06, "loss": 1.2099, "step": 14996 }, { "epoch": 4.711074464534621, "grad_norm": 0.93359375, "learning_rate": 1.1640748968581405e-06, "loss": 1.1958, "step": 14998 }, { "epoch": 4.711702691560163, "grad_norm": 1.0, "learning_rate": 1.1615360203110125e-06, "loss": 1.1558, "step": 15000 }, { "epoch": 4.712330918585704, "grad_norm": 0.984375, "learning_rate": 1.1589971437638845e-06, "loss": 1.1076, "step": 15002 }, { "epoch": 4.712959145611245, "grad_norm": 0.98828125, "learning_rate": 1.1564582672167566e-06, "loss": 1.1803, "step": 15004 }, { "epoch": 4.713587372636787, "grad_norm": 1.0078125, "learning_rate": 1.1539193906696288e-06, "loss": 1.0951, "step": 15006 }, { "epoch": 4.714215599662328, "grad_norm": 1.0703125, "learning_rate": 1.1513805141225008e-06, "loss": 1.1017, "step": 15008 }, { "epoch": 4.714843826687869, "grad_norm": 1.0546875, "learning_rate": 1.148841637575373e-06, "loss": 1.2274, "step": 15010 }, { "epoch": 4.715472053713411, "grad_norm": 1.171875, "learning_rate": 1.146302761028245e-06, "loss": 1.1824, "step": 15012 }, { "epoch": 4.716100280738952, "grad_norm": 0.91796875, "learning_rate": 1.1437638844811173e-06, "loss": 1.1938, "step": 15014 }, { "epoch": 4.716728507764493, "grad_norm": 0.875, "learning_rate": 1.1412250079339893e-06, "loss": 1.1027, "step": 15016 }, { "epoch": 4.717356734790035, "grad_norm": 1.0234375, "learning_rate": 1.1386861313868613e-06, "loss": 1.1364, "step": 15018 }, { "epoch": 4.717984961815576, "grad_norm": 0.953125, "learning_rate": 1.1361472548397335e-06, "loss": 1.0902, "step": 15020 }, { "epoch": 4.718613188841117, "grad_norm": 0.9921875, "learning_rate": 1.1336083782926056e-06, "loss": 1.1716, "step": 15022 }, { "epoch": 4.719241415866659, "grad_norm": 0.984375, "learning_rate": 1.1310695017454778e-06, "loss": 1.3718, "step": 15024 }, { "epoch": 4.7198696428922, "grad_norm": 0.93359375, "learning_rate": 1.1285306251983498e-06, "loss": 1.0631, "step": 15026 }, { "epoch": 4.720497869917741, "grad_norm": 1.0234375, "learning_rate": 1.1259917486512218e-06, "loss": 1.2672, "step": 15028 }, { "epoch": 4.721126096943283, "grad_norm": 0.94921875, "learning_rate": 1.123452872104094e-06, "loss": 1.1213, "step": 15030 }, { "epoch": 4.721754323968824, "grad_norm": 0.9375, "learning_rate": 1.120913995556966e-06, "loss": 1.0644, "step": 15032 }, { "epoch": 4.722382550994365, "grad_norm": 0.92578125, "learning_rate": 1.1183751190098383e-06, "loss": 1.1257, "step": 15034 }, { "epoch": 4.723010778019907, "grad_norm": 0.98828125, "learning_rate": 1.1158362424627103e-06, "loss": 1.2276, "step": 15036 }, { "epoch": 4.723639005045448, "grad_norm": 0.96875, "learning_rate": 1.1132973659155823e-06, "loss": 1.1354, "step": 15038 }, { "epoch": 4.724267232070989, "grad_norm": 0.9765625, "learning_rate": 1.1107584893684546e-06, "loss": 1.1053, "step": 15040 }, { "epoch": 4.724895459096531, "grad_norm": 0.9921875, "learning_rate": 1.1082196128213268e-06, "loss": 1.0892, "step": 15042 }, { "epoch": 4.725523686122072, "grad_norm": 0.91796875, "learning_rate": 1.1056807362741988e-06, "loss": 1.1426, "step": 15044 }, { "epoch": 4.726151913147614, "grad_norm": 0.9609375, "learning_rate": 1.1031418597270708e-06, "loss": 1.2707, "step": 15046 }, { "epoch": 4.726780140173155, "grad_norm": 0.94921875, "learning_rate": 1.1006029831799428e-06, "loss": 1.1114, "step": 15048 }, { "epoch": 4.727408367198697, "grad_norm": 1.0390625, "learning_rate": 1.098064106632815e-06, "loss": 1.117, "step": 15050 }, { "epoch": 4.728036594224238, "grad_norm": 0.9375, "learning_rate": 1.0955252300856873e-06, "loss": 1.2398, "step": 15052 }, { "epoch": 4.7286648212497795, "grad_norm": 1.0, "learning_rate": 1.0929863535385593e-06, "loss": 1.2362, "step": 15054 }, { "epoch": 4.729293048275321, "grad_norm": 1.046875, "learning_rate": 1.0904474769914313e-06, "loss": 1.0746, "step": 15056 }, { "epoch": 4.729921275300862, "grad_norm": 0.91796875, "learning_rate": 1.0879086004443034e-06, "loss": 1.2686, "step": 15058 }, { "epoch": 4.7305495023264035, "grad_norm": 0.95703125, "learning_rate": 1.0853697238971756e-06, "loss": 1.1266, "step": 15060 }, { "epoch": 4.731177729351945, "grad_norm": 0.95703125, "learning_rate": 1.0828308473500478e-06, "loss": 1.1795, "step": 15062 }, { "epoch": 4.731805956377486, "grad_norm": 0.95703125, "learning_rate": 1.0802919708029198e-06, "loss": 1.1361, "step": 15064 }, { "epoch": 4.7324341834030275, "grad_norm": 1.0625, "learning_rate": 1.0777530942557919e-06, "loss": 1.1068, "step": 15066 }, { "epoch": 4.733062410428569, "grad_norm": 0.98828125, "learning_rate": 1.075214217708664e-06, "loss": 1.222, "step": 15068 }, { "epoch": 4.73369063745411, "grad_norm": 1.0390625, "learning_rate": 1.072675341161536e-06, "loss": 1.078, "step": 15070 }, { "epoch": 4.7343188644796514, "grad_norm": 1.0, "learning_rate": 1.0701364646144081e-06, "loss": 1.0485, "step": 15072 }, { "epoch": 4.734947091505193, "grad_norm": 0.97265625, "learning_rate": 1.0675975880672803e-06, "loss": 1.0691, "step": 15074 }, { "epoch": 4.735575318530734, "grad_norm": 0.96484375, "learning_rate": 1.0650587115201524e-06, "loss": 1.2113, "step": 15076 }, { "epoch": 4.736203545556275, "grad_norm": 1.015625, "learning_rate": 1.0625198349730246e-06, "loss": 1.1526, "step": 15078 }, { "epoch": 4.736831772581817, "grad_norm": 0.94921875, "learning_rate": 1.0599809584258966e-06, "loss": 1.1037, "step": 15080 }, { "epoch": 4.737459999607358, "grad_norm": 1.03125, "learning_rate": 1.0574420818787686e-06, "loss": 1.0871, "step": 15082 }, { "epoch": 4.738088226632899, "grad_norm": 1.046875, "learning_rate": 1.0549032053316409e-06, "loss": 1.2796, "step": 15084 }, { "epoch": 4.738716453658441, "grad_norm": 0.9921875, "learning_rate": 1.0523643287845129e-06, "loss": 1.0627, "step": 15086 }, { "epoch": 4.739344680683982, "grad_norm": 0.94140625, "learning_rate": 1.049825452237385e-06, "loss": 1.125, "step": 15088 }, { "epoch": 4.739972907709523, "grad_norm": 1.015625, "learning_rate": 1.0472865756902571e-06, "loss": 1.1793, "step": 15090 }, { "epoch": 4.740601134735065, "grad_norm": 0.96484375, "learning_rate": 1.0447476991431291e-06, "loss": 1.1474, "step": 15092 }, { "epoch": 4.741229361760606, "grad_norm": 0.96875, "learning_rate": 1.0422088225960014e-06, "loss": 1.0737, "step": 15094 }, { "epoch": 4.741857588786147, "grad_norm": 0.93359375, "learning_rate": 1.0396699460488736e-06, "loss": 1.0972, "step": 15096 }, { "epoch": 4.742485815811689, "grad_norm": 0.8828125, "learning_rate": 1.0371310695017456e-06, "loss": 1.1673, "step": 15098 }, { "epoch": 4.74311404283723, "grad_norm": 0.96484375, "learning_rate": 1.0345921929546176e-06, "loss": 1.1071, "step": 15100 }, { "epoch": 4.743742269862771, "grad_norm": 1.046875, "learning_rate": 1.0320533164074896e-06, "loss": 1.0716, "step": 15102 }, { "epoch": 4.744370496888313, "grad_norm": 0.984375, "learning_rate": 1.0295144398603619e-06, "loss": 1.22, "step": 15104 }, { "epoch": 4.744998723913854, "grad_norm": 0.9453125, "learning_rate": 1.0269755633132341e-06, "loss": 1.2009, "step": 15106 }, { "epoch": 4.745626950939395, "grad_norm": 0.96484375, "learning_rate": 1.0244366867661061e-06, "loss": 1.2027, "step": 15108 }, { "epoch": 4.746255177964937, "grad_norm": 0.92578125, "learning_rate": 1.0218978102189781e-06, "loss": 1.162, "step": 15110 }, { "epoch": 4.746883404990479, "grad_norm": 0.953125, "learning_rate": 1.0193589336718502e-06, "loss": 1.1368, "step": 15112 }, { "epoch": 4.747511632016019, "grad_norm": 0.875, "learning_rate": 1.0168200571247224e-06, "loss": 1.1831, "step": 15114 }, { "epoch": 4.7481398590415616, "grad_norm": 1.0078125, "learning_rate": 1.0142811805775946e-06, "loss": 0.8592, "step": 15116 }, { "epoch": 4.748768086067103, "grad_norm": 0.96484375, "learning_rate": 1.0117423040304666e-06, "loss": 1.0103, "step": 15118 }, { "epoch": 4.749396313092644, "grad_norm": 0.984375, "learning_rate": 1.0092034274833387e-06, "loss": 1.2007, "step": 15120 }, { "epoch": 4.7500245401181855, "grad_norm": 0.9765625, "learning_rate": 1.0066645509362109e-06, "loss": 1.1683, "step": 15122 }, { "epoch": 4.750652767143727, "grad_norm": 1.0234375, "learning_rate": 1.004125674389083e-06, "loss": 1.0865, "step": 15124 }, { "epoch": 4.751280994169268, "grad_norm": 0.89453125, "learning_rate": 1.001586797841955e-06, "loss": 1.0646, "step": 15126 }, { "epoch": 4.7519092211948095, "grad_norm": 0.89453125, "learning_rate": 9.990479212948271e-07, "loss": 1.0584, "step": 15128 }, { "epoch": 4.752537448220351, "grad_norm": 1.1328125, "learning_rate": 9.965090447476992e-07, "loss": 1.0445, "step": 15130 }, { "epoch": 4.753165675245892, "grad_norm": 0.9375, "learning_rate": 9.939701682005714e-07, "loss": 1.1781, "step": 15132 }, { "epoch": 4.7537939022714335, "grad_norm": 0.953125, "learning_rate": 9.914312916534434e-07, "loss": 1.0818, "step": 15134 }, { "epoch": 4.754422129296975, "grad_norm": 0.8671875, "learning_rate": 9.888924151063154e-07, "loss": 1.0624, "step": 15136 }, { "epoch": 4.755050356322516, "grad_norm": 1.0, "learning_rate": 9.863535385591877e-07, "loss": 1.0675, "step": 15138 }, { "epoch": 4.7556785833480575, "grad_norm": 0.921875, "learning_rate": 9.838146620120597e-07, "loss": 1.1074, "step": 15140 }, { "epoch": 4.756306810373599, "grad_norm": 0.9765625, "learning_rate": 9.81275785464932e-07, "loss": 1.0704, "step": 15142 }, { "epoch": 4.75693503739914, "grad_norm": 0.96875, "learning_rate": 9.78736908917804e-07, "loss": 1.2322, "step": 15144 }, { "epoch": 4.7575632644246815, "grad_norm": 1.0625, "learning_rate": 9.76198032370676e-07, "loss": 1.1159, "step": 15146 }, { "epoch": 4.758191491450223, "grad_norm": 1.03125, "learning_rate": 9.736591558235482e-07, "loss": 1.1039, "step": 15148 }, { "epoch": 4.758819718475764, "grad_norm": 1.0625, "learning_rate": 9.711202792764204e-07, "loss": 1.0475, "step": 15150 }, { "epoch": 4.7594479455013055, "grad_norm": 0.96484375, "learning_rate": 9.685814027292924e-07, "loss": 1.1419, "step": 15152 }, { "epoch": 4.760076172526847, "grad_norm": 0.9609375, "learning_rate": 9.660425261821644e-07, "loss": 1.0875, "step": 15154 }, { "epoch": 4.760704399552388, "grad_norm": 1.0390625, "learning_rate": 9.635036496350364e-07, "loss": 1.1086, "step": 15156 }, { "epoch": 4.761332626577929, "grad_norm": 1.015625, "learning_rate": 9.609647730879087e-07, "loss": 1.1878, "step": 15158 }, { "epoch": 4.761960853603471, "grad_norm": 1.03125, "learning_rate": 9.58425896540781e-07, "loss": 1.0117, "step": 15160 }, { "epoch": 4.762589080629012, "grad_norm": 0.94921875, "learning_rate": 9.55887019993653e-07, "loss": 1.2361, "step": 15162 }, { "epoch": 4.763217307654553, "grad_norm": 1.015625, "learning_rate": 9.533481434465249e-07, "loss": 1.12, "step": 15164 }, { "epoch": 4.763845534680095, "grad_norm": 1.015625, "learning_rate": 9.508092668993971e-07, "loss": 1.162, "step": 15166 }, { "epoch": 4.764473761705636, "grad_norm": 1.109375, "learning_rate": 9.482703903522692e-07, "loss": 1.2341, "step": 15168 }, { "epoch": 4.765101988731177, "grad_norm": 0.98828125, "learning_rate": 9.457315138051413e-07, "loss": 1.216, "step": 15170 }, { "epoch": 4.765730215756719, "grad_norm": 1.015625, "learning_rate": 9.431926372580134e-07, "loss": 1.0951, "step": 15172 }, { "epoch": 4.76635844278226, "grad_norm": 0.921875, "learning_rate": 9.406537607108856e-07, "loss": 1.0707, "step": 15174 }, { "epoch": 4.766986669807801, "grad_norm": 0.9765625, "learning_rate": 9.381148841637576e-07, "loss": 1.1186, "step": 15176 }, { "epoch": 4.767614896833344, "grad_norm": 1.0078125, "learning_rate": 9.355760076166297e-07, "loss": 1.2066, "step": 15178 }, { "epoch": 4.768243123858884, "grad_norm": 0.9453125, "learning_rate": 9.330371310695019e-07, "loss": 1.1424, "step": 15180 }, { "epoch": 4.768871350884426, "grad_norm": 0.9375, "learning_rate": 9.304982545223739e-07, "loss": 1.1315, "step": 15182 }, { "epoch": 4.769499577909968, "grad_norm": 1.03125, "learning_rate": 9.279593779752461e-07, "loss": 1.0161, "step": 15184 }, { "epoch": 4.770127804935509, "grad_norm": 0.83203125, "learning_rate": 9.254205014281181e-07, "loss": 1.0353, "step": 15186 }, { "epoch": 4.77075603196105, "grad_norm": 0.953125, "learning_rate": 9.228816248809902e-07, "loss": 1.0458, "step": 15188 }, { "epoch": 4.771384258986592, "grad_norm": 1.015625, "learning_rate": 9.203427483338622e-07, "loss": 1.0897, "step": 15190 }, { "epoch": 4.772012486012133, "grad_norm": 0.95703125, "learning_rate": 9.178038717867345e-07, "loss": 1.1919, "step": 15192 }, { "epoch": 4.772640713037674, "grad_norm": 0.99609375, "learning_rate": 9.152649952396066e-07, "loss": 1.03, "step": 15194 }, { "epoch": 4.773268940063216, "grad_norm": 0.97265625, "learning_rate": 9.127261186924787e-07, "loss": 1.0816, "step": 15196 }, { "epoch": 4.773897167088757, "grad_norm": 0.99609375, "learning_rate": 9.101872421453507e-07, "loss": 1.1122, "step": 15198 }, { "epoch": 4.774525394114298, "grad_norm": 0.94140625, "learning_rate": 9.076483655982228e-07, "loss": 1.1394, "step": 15200 }, { "epoch": 4.7751536211398395, "grad_norm": 0.93359375, "learning_rate": 9.051094890510951e-07, "loss": 1.1396, "step": 15202 }, { "epoch": 4.775781848165381, "grad_norm": 0.94921875, "learning_rate": 9.025706125039671e-07, "loss": 1.0484, "step": 15204 }, { "epoch": 4.776410075190922, "grad_norm": 1.03125, "learning_rate": 9.000317359568392e-07, "loss": 1.1761, "step": 15206 }, { "epoch": 4.7770383022164635, "grad_norm": 1.0078125, "learning_rate": 8.974928594097112e-07, "loss": 1.1599, "step": 15208 }, { "epoch": 4.777666529242005, "grad_norm": 1.015625, "learning_rate": 8.949539828625834e-07, "loss": 1.1793, "step": 15210 }, { "epoch": 4.778294756267546, "grad_norm": 0.99609375, "learning_rate": 8.924151063154554e-07, "loss": 1.0814, "step": 15212 }, { "epoch": 4.7789229832930875, "grad_norm": 1.140625, "learning_rate": 8.898762297683276e-07, "loss": 1.2292, "step": 15214 }, { "epoch": 4.779551210318629, "grad_norm": 0.921875, "learning_rate": 8.873373532211997e-07, "loss": 1.0548, "step": 15216 }, { "epoch": 4.78017943734417, "grad_norm": 0.94140625, "learning_rate": 8.847984766740717e-07, "loss": 1.1244, "step": 15218 }, { "epoch": 4.7808076643697115, "grad_norm": 1.0, "learning_rate": 8.822596001269439e-07, "loss": 1.213, "step": 15220 }, { "epoch": 4.781435891395253, "grad_norm": 1.03125, "learning_rate": 8.79720723579816e-07, "loss": 1.2337, "step": 15222 }, { "epoch": 4.782064118420794, "grad_norm": 1.0234375, "learning_rate": 8.771818470326881e-07, "loss": 1.1363, "step": 15224 }, { "epoch": 4.7826923454463355, "grad_norm": 0.8828125, "learning_rate": 8.746429704855602e-07, "loss": 1.2033, "step": 15226 }, { "epoch": 4.783320572471877, "grad_norm": 0.92578125, "learning_rate": 8.721040939384324e-07, "loss": 1.1308, "step": 15228 }, { "epoch": 4.783948799497418, "grad_norm": 1.046875, "learning_rate": 8.695652173913044e-07, "loss": 1.1566, "step": 15230 }, { "epoch": 4.7845770265229595, "grad_norm": 0.97265625, "learning_rate": 8.670263408441765e-07, "loss": 1.101, "step": 15232 }, { "epoch": 4.785205253548501, "grad_norm": 1.1796875, "learning_rate": 8.644874642970487e-07, "loss": 1.1837, "step": 15234 }, { "epoch": 4.785833480574042, "grad_norm": 0.9921875, "learning_rate": 8.619485877499207e-07, "loss": 1.0324, "step": 15236 }, { "epoch": 4.7864617075995834, "grad_norm": 0.9921875, "learning_rate": 8.594097112027929e-07, "loss": 1.2409, "step": 15238 }, { "epoch": 4.787089934625125, "grad_norm": 1.0234375, "learning_rate": 8.568708346556649e-07, "loss": 1.1796, "step": 15240 }, { "epoch": 4.787718161650666, "grad_norm": 1.03125, "learning_rate": 8.54331958108537e-07, "loss": 1.128, "step": 15242 }, { "epoch": 4.788346388676208, "grad_norm": 0.96875, "learning_rate": 8.51793081561409e-07, "loss": 0.9777, "step": 15244 }, { "epoch": 4.788974615701749, "grad_norm": 1.0078125, "learning_rate": 8.492542050142813e-07, "loss": 1.0319, "step": 15246 }, { "epoch": 4.789602842727291, "grad_norm": 0.90234375, "learning_rate": 8.467153284671534e-07, "loss": 1.1685, "step": 15248 }, { "epoch": 4.790231069752832, "grad_norm": 1.0703125, "learning_rate": 8.441764519200255e-07, "loss": 1.1952, "step": 15250 }, { "epoch": 4.790859296778374, "grad_norm": 0.97265625, "learning_rate": 8.416375753728975e-07, "loss": 1.1665, "step": 15252 }, { "epoch": 4.791487523803915, "grad_norm": 0.94921875, "learning_rate": 8.390986988257696e-07, "loss": 1.1473, "step": 15254 }, { "epoch": 4.792115750829456, "grad_norm": 0.94921875, "learning_rate": 8.365598222786419e-07, "loss": 1.1008, "step": 15256 }, { "epoch": 4.792743977854998, "grad_norm": 0.9296875, "learning_rate": 8.340209457315139e-07, "loss": 1.1902, "step": 15258 }, { "epoch": 4.793372204880539, "grad_norm": 0.89453125, "learning_rate": 8.31482069184386e-07, "loss": 1.0476, "step": 15260 }, { "epoch": 4.79400043190608, "grad_norm": 0.98046875, "learning_rate": 8.28943192637258e-07, "loss": 1.1022, "step": 15262 }, { "epoch": 4.794628658931622, "grad_norm": 1.015625, "learning_rate": 8.264043160901302e-07, "loss": 1.2541, "step": 15264 }, { "epoch": 4.795256885957163, "grad_norm": 1.0546875, "learning_rate": 8.238654395430024e-07, "loss": 0.9961, "step": 15266 }, { "epoch": 4.795885112982704, "grad_norm": 0.93359375, "learning_rate": 8.213265629958744e-07, "loss": 1.0307, "step": 15268 }, { "epoch": 4.796513340008246, "grad_norm": 0.95703125, "learning_rate": 8.187876864487465e-07, "loss": 1.0687, "step": 15270 }, { "epoch": 4.797141567033787, "grad_norm": 0.95703125, "learning_rate": 8.162488099016185e-07, "loss": 1.0549, "step": 15272 }, { "epoch": 4.797769794059328, "grad_norm": 0.91796875, "learning_rate": 8.137099333544907e-07, "loss": 1.2061, "step": 15274 }, { "epoch": 4.79839802108487, "grad_norm": 0.92578125, "learning_rate": 8.111710568073628e-07, "loss": 1.1388, "step": 15276 }, { "epoch": 4.799026248110411, "grad_norm": 0.9921875, "learning_rate": 8.086321802602349e-07, "loss": 1.0928, "step": 15278 }, { "epoch": 4.799654475135952, "grad_norm": 0.9375, "learning_rate": 8.06093303713107e-07, "loss": 1.1677, "step": 15280 }, { "epoch": 4.8002827021614936, "grad_norm": 1.015625, "learning_rate": 8.035544271659792e-07, "loss": 1.1326, "step": 15282 }, { "epoch": 4.800910929187035, "grad_norm": 1.0390625, "learning_rate": 8.010155506188512e-07, "loss": 1.0607, "step": 15284 }, { "epoch": 4.801539156212576, "grad_norm": 1.0078125, "learning_rate": 7.984766740717233e-07, "loss": 1.2741, "step": 15286 }, { "epoch": 4.8021673832381175, "grad_norm": 1.0, "learning_rate": 7.959377975245955e-07, "loss": 1.1035, "step": 15288 }, { "epoch": 4.802795610263659, "grad_norm": 0.96484375, "learning_rate": 7.933989209774675e-07, "loss": 1.121, "step": 15290 }, { "epoch": 4.8034238372892, "grad_norm": 0.88671875, "learning_rate": 7.908600444303397e-07, "loss": 1.185, "step": 15292 }, { "epoch": 4.8040520643147415, "grad_norm": 1.0546875, "learning_rate": 7.883211678832117e-07, "loss": 1.3485, "step": 15294 }, { "epoch": 4.804680291340283, "grad_norm": 0.8515625, "learning_rate": 7.857822913360838e-07, "loss": 1.1646, "step": 15296 }, { "epoch": 4.805308518365824, "grad_norm": 1.015625, "learning_rate": 7.83243414788956e-07, "loss": 1.1089, "step": 15298 }, { "epoch": 4.8059367453913655, "grad_norm": 0.9140625, "learning_rate": 7.807045382418281e-07, "loss": 1.0439, "step": 15300 }, { "epoch": 4.806564972416907, "grad_norm": 0.91796875, "learning_rate": 7.781656616947002e-07, "loss": 1.1962, "step": 15302 }, { "epoch": 4.807193199442448, "grad_norm": 1.0390625, "learning_rate": 7.756267851475723e-07, "loss": 1.0854, "step": 15304 }, { "epoch": 4.8078214264679895, "grad_norm": 0.94921875, "learning_rate": 7.730879086004443e-07, "loss": 1.2096, "step": 15306 }, { "epoch": 4.808449653493531, "grad_norm": 0.96484375, "learning_rate": 7.705490320533164e-07, "loss": 1.0913, "step": 15308 }, { "epoch": 4.809077880519073, "grad_norm": 0.92578125, "learning_rate": 7.680101555061887e-07, "loss": 1.2873, "step": 15310 }, { "epoch": 4.8097061075446135, "grad_norm": 0.94140625, "learning_rate": 7.654712789590607e-07, "loss": 1.1575, "step": 15312 }, { "epoch": 4.810334334570156, "grad_norm": 0.953125, "learning_rate": 7.629324024119328e-07, "loss": 1.2673, "step": 15314 }, { "epoch": 4.810962561595697, "grad_norm": 1.1328125, "learning_rate": 7.603935258648048e-07, "loss": 1.1792, "step": 15316 }, { "epoch": 4.811590788621238, "grad_norm": 0.96875, "learning_rate": 7.57854649317677e-07, "loss": 1.1036, "step": 15318 }, { "epoch": 4.81221901564678, "grad_norm": 0.98046875, "learning_rate": 7.553157727705492e-07, "loss": 1.1504, "step": 15320 }, { "epoch": 4.812847242672321, "grad_norm": 0.9453125, "learning_rate": 7.527768962234212e-07, "loss": 1.0561, "step": 15322 }, { "epoch": 4.813475469697862, "grad_norm": 0.984375, "learning_rate": 7.502380196762933e-07, "loss": 1.145, "step": 15324 }, { "epoch": 4.814103696723404, "grad_norm": 0.99609375, "learning_rate": 7.476991431291653e-07, "loss": 1.2885, "step": 15326 }, { "epoch": 4.814731923748945, "grad_norm": 0.9140625, "learning_rate": 7.451602665820375e-07, "loss": 1.0843, "step": 15328 }, { "epoch": 4.815360150774486, "grad_norm": 0.96875, "learning_rate": 7.426213900349096e-07, "loss": 1.1195, "step": 15330 }, { "epoch": 4.815988377800028, "grad_norm": 0.8984375, "learning_rate": 7.400825134877817e-07, "loss": 1.0421, "step": 15332 }, { "epoch": 4.816616604825569, "grad_norm": 1.0703125, "learning_rate": 7.375436369406538e-07, "loss": 1.2204, "step": 15334 }, { "epoch": 4.81724483185111, "grad_norm": 0.98828125, "learning_rate": 7.35004760393526e-07, "loss": 1.2097, "step": 15336 }, { "epoch": 4.817873058876652, "grad_norm": 0.95703125, "learning_rate": 7.32465883846398e-07, "loss": 1.1886, "step": 15338 }, { "epoch": 4.818501285902193, "grad_norm": 1.0234375, "learning_rate": 7.299270072992701e-07, "loss": 0.9334, "step": 15340 }, { "epoch": 4.819129512927734, "grad_norm": 0.9140625, "learning_rate": 7.273881307521423e-07, "loss": 1.1412, "step": 15342 }, { "epoch": 4.819757739953276, "grad_norm": 0.921875, "learning_rate": 7.248492542050143e-07, "loss": 1.0974, "step": 15344 }, { "epoch": 4.820385966978817, "grad_norm": 1.0078125, "learning_rate": 7.223103776578865e-07, "loss": 1.1579, "step": 15346 }, { "epoch": 4.821014194004358, "grad_norm": 0.98046875, "learning_rate": 7.197715011107585e-07, "loss": 1.0568, "step": 15348 }, { "epoch": 4.8216424210299, "grad_norm": 0.95703125, "learning_rate": 7.172326245636306e-07, "loss": 1.1496, "step": 15350 }, { "epoch": 4.822270648055441, "grad_norm": 1.03125, "learning_rate": 7.146937480165028e-07, "loss": 1.0095, "step": 15352 }, { "epoch": 4.822898875080982, "grad_norm": 0.96875, "learning_rate": 7.121548714693749e-07, "loss": 1.1551, "step": 15354 }, { "epoch": 4.823527102106524, "grad_norm": 0.98828125, "learning_rate": 7.09615994922247e-07, "loss": 1.2925, "step": 15356 }, { "epoch": 4.824155329132065, "grad_norm": 0.97265625, "learning_rate": 7.070771183751191e-07, "loss": 1.0885, "step": 15358 }, { "epoch": 4.824783556157606, "grad_norm": 0.96875, "learning_rate": 7.045382418279911e-07, "loss": 1.103, "step": 15360 }, { "epoch": 4.825411783183148, "grad_norm": 1.0234375, "learning_rate": 7.019993652808632e-07, "loss": 1.3006, "step": 15362 }, { "epoch": 4.826040010208689, "grad_norm": 1.0703125, "learning_rate": 6.994604887337355e-07, "loss": 1.1423, "step": 15364 }, { "epoch": 4.82666823723423, "grad_norm": 0.984375, "learning_rate": 6.969216121866075e-07, "loss": 1.1031, "step": 15366 }, { "epoch": 4.8272964642597715, "grad_norm": 1.0234375, "learning_rate": 6.943827356394796e-07, "loss": 1.1256, "step": 15368 }, { "epoch": 4.827924691285313, "grad_norm": 0.96875, "learning_rate": 6.918438590923516e-07, "loss": 1.0703, "step": 15370 }, { "epoch": 4.828552918310854, "grad_norm": 0.984375, "learning_rate": 6.893049825452238e-07, "loss": 1.1624, "step": 15372 }, { "epoch": 4.8291811453363955, "grad_norm": 0.9921875, "learning_rate": 6.86766105998096e-07, "loss": 1.1149, "step": 15374 }, { "epoch": 4.829809372361938, "grad_norm": 1.015625, "learning_rate": 6.84227229450968e-07, "loss": 1.222, "step": 15376 }, { "epoch": 4.830437599387478, "grad_norm": 0.92578125, "learning_rate": 6.816883529038401e-07, "loss": 1.1078, "step": 15378 }, { "epoch": 4.83106582641302, "grad_norm": 0.9765625, "learning_rate": 6.791494763567121e-07, "loss": 1.2966, "step": 15380 }, { "epoch": 4.831694053438562, "grad_norm": 0.96875, "learning_rate": 6.766105998095843e-07, "loss": 1.1616, "step": 15382 }, { "epoch": 4.832322280464103, "grad_norm": 1.0078125, "learning_rate": 6.740717232624565e-07, "loss": 1.1618, "step": 15384 }, { "epoch": 4.832950507489644, "grad_norm": 0.9765625, "learning_rate": 6.715328467153285e-07, "loss": 1.0826, "step": 15386 }, { "epoch": 4.833578734515186, "grad_norm": 0.96875, "learning_rate": 6.689939701682006e-07, "loss": 1.1677, "step": 15388 }, { "epoch": 4.834206961540727, "grad_norm": 0.96875, "learning_rate": 6.664550936210728e-07, "loss": 1.105, "step": 15390 }, { "epoch": 4.834835188566268, "grad_norm": 0.96875, "learning_rate": 6.639162170739448e-07, "loss": 1.1729, "step": 15392 }, { "epoch": 4.83546341559181, "grad_norm": 0.98046875, "learning_rate": 6.613773405268169e-07, "loss": 1.2099, "step": 15394 }, { "epoch": 4.836091642617351, "grad_norm": 0.98046875, "learning_rate": 6.588384639796891e-07, "loss": 1.0558, "step": 15396 }, { "epoch": 4.836719869642892, "grad_norm": 1.0625, "learning_rate": 6.562995874325611e-07, "loss": 1.1532, "step": 15398 }, { "epoch": 4.837348096668434, "grad_norm": 0.91796875, "learning_rate": 6.537607108854333e-07, "loss": 1.239, "step": 15400 }, { "epoch": 4.837976323693975, "grad_norm": 1.078125, "learning_rate": 6.512218343383053e-07, "loss": 1.1685, "step": 15402 }, { "epoch": 4.838604550719516, "grad_norm": 1.0234375, "learning_rate": 6.486829577911774e-07, "loss": 1.2141, "step": 15404 }, { "epoch": 4.839232777745058, "grad_norm": 0.9453125, "learning_rate": 6.461440812440496e-07, "loss": 1.1006, "step": 15406 }, { "epoch": 4.839861004770599, "grad_norm": 0.95703125, "learning_rate": 6.436052046969217e-07, "loss": 1.1216, "step": 15408 }, { "epoch": 4.84048923179614, "grad_norm": 0.96484375, "learning_rate": 6.410663281497938e-07, "loss": 1.125, "step": 15410 }, { "epoch": 4.841117458821682, "grad_norm": 0.95703125, "learning_rate": 6.385274516026659e-07, "loss": 1.0791, "step": 15412 }, { "epoch": 4.841745685847223, "grad_norm": 0.98046875, "learning_rate": 6.359885750555379e-07, "loss": 1.1128, "step": 15414 }, { "epoch": 4.842373912872764, "grad_norm": 1.0859375, "learning_rate": 6.334496985084102e-07, "loss": 1.1676, "step": 15416 }, { "epoch": 4.843002139898306, "grad_norm": 0.97265625, "learning_rate": 6.309108219612823e-07, "loss": 1.2002, "step": 15418 }, { "epoch": 4.843630366923847, "grad_norm": 0.98828125, "learning_rate": 6.283719454141543e-07, "loss": 1.2326, "step": 15420 }, { "epoch": 4.844258593949388, "grad_norm": 1.078125, "learning_rate": 6.258330688670264e-07, "loss": 1.1495, "step": 15422 }, { "epoch": 4.84488682097493, "grad_norm": 0.95703125, "learning_rate": 6.232941923198985e-07, "loss": 1.0612, "step": 15424 }, { "epoch": 4.845515048000471, "grad_norm": 0.94140625, "learning_rate": 6.207553157727706e-07, "loss": 1.2042, "step": 15426 }, { "epoch": 4.846143275026012, "grad_norm": 1.0078125, "learning_rate": 6.182164392256427e-07, "loss": 1.2884, "step": 15428 }, { "epoch": 4.846771502051554, "grad_norm": 0.96484375, "learning_rate": 6.156775626785148e-07, "loss": 1.0661, "step": 15430 }, { "epoch": 4.847399729077095, "grad_norm": 0.95703125, "learning_rate": 6.131386861313869e-07, "loss": 1.1648, "step": 15432 }, { "epoch": 4.848027956102636, "grad_norm": 0.93359375, "learning_rate": 6.105998095842589e-07, "loss": 1.1297, "step": 15434 }, { "epoch": 4.848656183128178, "grad_norm": 0.98046875, "learning_rate": 6.080609330371312e-07, "loss": 1.1094, "step": 15436 }, { "epoch": 4.849284410153719, "grad_norm": 1.0078125, "learning_rate": 6.055220564900032e-07, "loss": 1.1158, "step": 15438 }, { "epoch": 4.84991263717926, "grad_norm": 1.0, "learning_rate": 6.029831799428753e-07, "loss": 1.23, "step": 15440 }, { "epoch": 4.8505408642048025, "grad_norm": 0.9921875, "learning_rate": 6.004443033957474e-07, "loss": 1.0939, "step": 15442 }, { "epoch": 4.851169091230343, "grad_norm": 0.921875, "learning_rate": 5.979054268486196e-07, "loss": 1.1581, "step": 15444 }, { "epoch": 4.851797318255885, "grad_norm": 0.9375, "learning_rate": 5.953665503014917e-07, "loss": 1.1005, "step": 15446 }, { "epoch": 4.8524255452814264, "grad_norm": 0.86328125, "learning_rate": 5.928276737543637e-07, "loss": 1.2146, "step": 15448 }, { "epoch": 4.853053772306968, "grad_norm": 0.94921875, "learning_rate": 5.902887972072358e-07, "loss": 1.1981, "step": 15450 }, { "epoch": 4.853681999332509, "grad_norm": 1.046875, "learning_rate": 5.87749920660108e-07, "loss": 1.1156, "step": 15452 }, { "epoch": 4.85431022635805, "grad_norm": 1.03125, "learning_rate": 5.852110441129801e-07, "loss": 1.1668, "step": 15454 }, { "epoch": 4.854938453383592, "grad_norm": 0.96484375, "learning_rate": 5.826721675658521e-07, "loss": 1.2054, "step": 15456 }, { "epoch": 4.855566680409133, "grad_norm": 0.9140625, "learning_rate": 5.801332910187243e-07, "loss": 1.0698, "step": 15458 }, { "epoch": 4.856194907434674, "grad_norm": 0.9453125, "learning_rate": 5.775944144715963e-07, "loss": 1.1868, "step": 15460 }, { "epoch": 4.856823134460216, "grad_norm": 1.140625, "learning_rate": 5.750555379244685e-07, "loss": 1.157, "step": 15462 }, { "epoch": 4.857451361485757, "grad_norm": 1.03125, "learning_rate": 5.725166613773406e-07, "loss": 1.0055, "step": 15464 }, { "epoch": 4.858079588511298, "grad_norm": 1.0234375, "learning_rate": 5.699777848302127e-07, "loss": 1.1044, "step": 15466 }, { "epoch": 4.85870781553684, "grad_norm": 0.9453125, "learning_rate": 5.674389082830848e-07, "loss": 1.1534, "step": 15468 }, { "epoch": 4.859336042562381, "grad_norm": 0.9453125, "learning_rate": 5.649000317359568e-07, "loss": 1.2203, "step": 15470 }, { "epoch": 4.859964269587922, "grad_norm": 1.0234375, "learning_rate": 5.62361155188829e-07, "loss": 1.1805, "step": 15472 }, { "epoch": 4.860592496613464, "grad_norm": 0.9921875, "learning_rate": 5.598222786417011e-07, "loss": 1.1463, "step": 15474 }, { "epoch": 4.861220723639005, "grad_norm": 1.03125, "learning_rate": 5.572834020945732e-07, "loss": 1.2043, "step": 15476 }, { "epoch": 4.861848950664546, "grad_norm": 1.0, "learning_rate": 5.547445255474453e-07, "loss": 0.9823, "step": 15478 }, { "epoch": 4.862477177690088, "grad_norm": 0.9765625, "learning_rate": 5.522056490003174e-07, "loss": 1.143, "step": 15480 }, { "epoch": 4.863105404715629, "grad_norm": 1.0, "learning_rate": 5.496667724531895e-07, "loss": 1.116, "step": 15482 }, { "epoch": 4.86373363174117, "grad_norm": 0.9453125, "learning_rate": 5.471278959060616e-07, "loss": 1.1341, "step": 15484 }, { "epoch": 4.864361858766712, "grad_norm": 0.96484375, "learning_rate": 5.445890193589337e-07, "loss": 0.9823, "step": 15486 }, { "epoch": 4.864990085792253, "grad_norm": 0.94921875, "learning_rate": 5.420501428118057e-07, "loss": 1.2684, "step": 15488 }, { "epoch": 4.865618312817794, "grad_norm": 0.875, "learning_rate": 5.39511266264678e-07, "loss": 1.2717, "step": 15490 }, { "epoch": 4.866246539843336, "grad_norm": 1.0546875, "learning_rate": 5.3697238971755e-07, "loss": 1.2203, "step": 15492 }, { "epoch": 4.866874766868877, "grad_norm": 0.92578125, "learning_rate": 5.344335131704221e-07, "loss": 1.0384, "step": 15494 }, { "epoch": 4.867502993894418, "grad_norm": 1.3359375, "learning_rate": 5.318946366232942e-07, "loss": 1.0571, "step": 15496 }, { "epoch": 4.86813122091996, "grad_norm": 1.0546875, "learning_rate": 5.293557600761664e-07, "loss": 1.0956, "step": 15498 }, { "epoch": 4.868759447945501, "grad_norm": 1.078125, "learning_rate": 5.268168835290385e-07, "loss": 0.9429, "step": 15500 }, { "epoch": 4.869387674971042, "grad_norm": 0.95703125, "learning_rate": 5.242780069819105e-07, "loss": 1.2119, "step": 15502 }, { "epoch": 4.870015901996584, "grad_norm": 0.9296875, "learning_rate": 5.217391304347826e-07, "loss": 1.1438, "step": 15504 }, { "epoch": 4.870644129022125, "grad_norm": 1.0078125, "learning_rate": 5.192002538876547e-07, "loss": 0.9958, "step": 15506 }, { "epoch": 4.871272356047667, "grad_norm": 0.95703125, "learning_rate": 5.166613773405269e-07, "loss": 1.1207, "step": 15508 }, { "epoch": 4.871900583073208, "grad_norm": 0.9921875, "learning_rate": 5.14122500793399e-07, "loss": 1.2872, "step": 15510 }, { "epoch": 4.87252881009875, "grad_norm": 0.94921875, "learning_rate": 5.115836242462711e-07, "loss": 1.1518, "step": 15512 }, { "epoch": 4.873157037124291, "grad_norm": 1.0859375, "learning_rate": 5.090447476991431e-07, "loss": 1.2147, "step": 15514 }, { "epoch": 4.8737852641498325, "grad_norm": 0.94140625, "learning_rate": 5.065058711520153e-07, "loss": 1.1525, "step": 15516 }, { "epoch": 4.874413491175374, "grad_norm": 0.99609375, "learning_rate": 5.039669946048874e-07, "loss": 0.9559, "step": 15518 }, { "epoch": 4.875041718200915, "grad_norm": 0.9296875, "learning_rate": 5.014281180577595e-07, "loss": 1.1099, "step": 15520 }, { "epoch": 4.8756699452264565, "grad_norm": 0.92578125, "learning_rate": 4.988892415106316e-07, "loss": 1.0986, "step": 15522 }, { "epoch": 4.876298172251998, "grad_norm": 0.9765625, "learning_rate": 4.963503649635036e-07, "loss": 1.1365, "step": 15524 }, { "epoch": 4.876926399277539, "grad_norm": 1.15625, "learning_rate": 4.938114884163759e-07, "loss": 1.2405, "step": 15526 }, { "epoch": 4.8775546263030805, "grad_norm": 0.953125, "learning_rate": 4.912726118692479e-07, "loss": 1.2016, "step": 15528 }, { "epoch": 4.878182853328622, "grad_norm": 0.953125, "learning_rate": 4.8873373532212e-07, "loss": 1.1354, "step": 15530 }, { "epoch": 4.878811080354163, "grad_norm": 1.03125, "learning_rate": 4.861948587749921e-07, "loss": 1.1367, "step": 15532 }, { "epoch": 4.879439307379704, "grad_norm": 0.984375, "learning_rate": 4.836559822278642e-07, "loss": 1.1055, "step": 15534 }, { "epoch": 4.880067534405246, "grad_norm": 0.98046875, "learning_rate": 4.811171056807363e-07, "loss": 1.1147, "step": 15536 }, { "epoch": 4.880695761430787, "grad_norm": 0.99609375, "learning_rate": 4.785782291336084e-07, "loss": 1.2137, "step": 15538 }, { "epoch": 4.881323988456328, "grad_norm": 0.8671875, "learning_rate": 4.7603935258648053e-07, "loss": 1.1573, "step": 15540 }, { "epoch": 4.88195221548187, "grad_norm": 1.0625, "learning_rate": 4.735004760393526e-07, "loss": 1.1616, "step": 15542 }, { "epoch": 4.882580442507411, "grad_norm": 0.9375, "learning_rate": 4.709615994922247e-07, "loss": 1.1615, "step": 15544 }, { "epoch": 4.883208669532952, "grad_norm": 0.921875, "learning_rate": 4.684227229450968e-07, "loss": 1.1547, "step": 15546 }, { "epoch": 4.883836896558494, "grad_norm": 1.015625, "learning_rate": 4.6588384639796897e-07, "loss": 1.1621, "step": 15548 }, { "epoch": 4.884465123584035, "grad_norm": 0.92578125, "learning_rate": 4.6334496985084104e-07, "loss": 1.0732, "step": 15550 }, { "epoch": 4.885093350609576, "grad_norm": 0.99609375, "learning_rate": 4.608060933037131e-07, "loss": 1.0923, "step": 15552 }, { "epoch": 4.885721577635118, "grad_norm": 0.99609375, "learning_rate": 4.582672167565853e-07, "loss": 1.2153, "step": 15554 }, { "epoch": 4.886349804660659, "grad_norm": 1.0546875, "learning_rate": 4.5572834020945736e-07, "loss": 1.1324, "step": 15556 }, { "epoch": 4.8869780316862, "grad_norm": 1.0625, "learning_rate": 4.5318946366232943e-07, "loss": 1.0355, "step": 15558 }, { "epoch": 4.887606258711742, "grad_norm": 0.9453125, "learning_rate": 4.5065058711520155e-07, "loss": 1.1292, "step": 15560 }, { "epoch": 4.888234485737283, "grad_norm": 1.0625, "learning_rate": 4.481117105680737e-07, "loss": 1.1332, "step": 15562 }, { "epoch": 4.888862712762824, "grad_norm": 0.953125, "learning_rate": 4.455728340209458e-07, "loss": 1.1076, "step": 15564 }, { "epoch": 4.889490939788366, "grad_norm": 1.046875, "learning_rate": 4.4303395747381787e-07, "loss": 1.183, "step": 15566 }, { "epoch": 4.890119166813907, "grad_norm": 0.9765625, "learning_rate": 4.4049508092668994e-07, "loss": 1.0437, "step": 15568 }, { "epoch": 4.890747393839448, "grad_norm": 0.91015625, "learning_rate": 4.379562043795621e-07, "loss": 1.177, "step": 15570 }, { "epoch": 4.89137562086499, "grad_norm": 0.94140625, "learning_rate": 4.354173278324342e-07, "loss": 1.1951, "step": 15572 }, { "epoch": 4.892003847890532, "grad_norm": 0.9609375, "learning_rate": 4.3287845128530625e-07, "loss": 1.1793, "step": 15574 }, { "epoch": 4.892632074916072, "grad_norm": 1.09375, "learning_rate": 4.303395747381784e-07, "loss": 1.0787, "step": 15576 }, { "epoch": 4.8932603019416145, "grad_norm": 0.9296875, "learning_rate": 4.278006981910505e-07, "loss": 1.166, "step": 15578 }, { "epoch": 4.893888528967156, "grad_norm": 1.0078125, "learning_rate": 4.252618216439226e-07, "loss": 1.1592, "step": 15580 }, { "epoch": 4.894516755992697, "grad_norm": 1.03125, "learning_rate": 4.227229450967947e-07, "loss": 1.1145, "step": 15582 }, { "epoch": 4.8951449830182385, "grad_norm": 0.94921875, "learning_rate": 4.2018406854966677e-07, "loss": 1.1746, "step": 15584 }, { "epoch": 4.89577321004378, "grad_norm": 0.984375, "learning_rate": 4.1764519200253894e-07, "loss": 1.0769, "step": 15586 }, { "epoch": 4.896401437069321, "grad_norm": 0.8984375, "learning_rate": 4.15106315455411e-07, "loss": 1.1921, "step": 15588 }, { "epoch": 4.8970296640948625, "grad_norm": 0.97265625, "learning_rate": 4.125674389082831e-07, "loss": 1.0834, "step": 15590 }, { "epoch": 4.897657891120404, "grad_norm": 1.015625, "learning_rate": 4.1002856236115526e-07, "loss": 1.077, "step": 15592 }, { "epoch": 4.898286118145945, "grad_norm": 0.953125, "learning_rate": 4.0748968581402733e-07, "loss": 1.0515, "step": 15594 }, { "epoch": 4.8989143451714865, "grad_norm": 1.0078125, "learning_rate": 4.0495080926689945e-07, "loss": 1.23, "step": 15596 }, { "epoch": 4.899542572197028, "grad_norm": 0.94140625, "learning_rate": 4.024119327197715e-07, "loss": 1.1117, "step": 15598 }, { "epoch": 4.900170799222569, "grad_norm": 1.0078125, "learning_rate": 3.998730561726436e-07, "loss": 1.1571, "step": 15600 }, { "epoch": 4.9007990262481105, "grad_norm": 1.0546875, "learning_rate": 3.9733417962551577e-07, "loss": 1.1769, "step": 15602 }, { "epoch": 4.901427253273652, "grad_norm": 0.8984375, "learning_rate": 3.9479530307838784e-07, "loss": 1.0755, "step": 15604 }, { "epoch": 4.902055480299193, "grad_norm": 1.0, "learning_rate": 3.922564265312599e-07, "loss": 1.2601, "step": 15606 }, { "epoch": 4.9026837073247345, "grad_norm": 0.9375, "learning_rate": 3.897175499841321e-07, "loss": 1.0475, "step": 15608 }, { "epoch": 4.903311934350276, "grad_norm": 0.90625, "learning_rate": 3.8717867343700416e-07, "loss": 1.2995, "step": 15610 }, { "epoch": 4.903940161375817, "grad_norm": 0.96875, "learning_rate": 3.846397968898763e-07, "loss": 1.1078, "step": 15612 }, { "epoch": 4.9045683884013584, "grad_norm": 0.91796875, "learning_rate": 3.8210092034274835e-07, "loss": 1.151, "step": 15614 }, { "epoch": 4.9051966154269, "grad_norm": 1.0078125, "learning_rate": 3.795620437956205e-07, "loss": 1.0583, "step": 15616 }, { "epoch": 4.905824842452441, "grad_norm": 0.91796875, "learning_rate": 3.770231672484926e-07, "loss": 1.1186, "step": 15618 }, { "epoch": 4.906453069477982, "grad_norm": 0.91015625, "learning_rate": 3.7448429070136467e-07, "loss": 1.3136, "step": 15620 }, { "epoch": 4.907081296503524, "grad_norm": 1.0, "learning_rate": 3.7194541415423674e-07, "loss": 1.1531, "step": 15622 }, { "epoch": 4.907709523529065, "grad_norm": 1.0078125, "learning_rate": 3.694065376071089e-07, "loss": 1.0372, "step": 15624 }, { "epoch": 4.908337750554606, "grad_norm": 1.171875, "learning_rate": 3.66867661059981e-07, "loss": 1.0852, "step": 15626 }, { "epoch": 4.908965977580148, "grad_norm": 0.91015625, "learning_rate": 3.643287845128531e-07, "loss": 1.1821, "step": 15628 }, { "epoch": 4.909594204605689, "grad_norm": 0.98046875, "learning_rate": 3.617899079657252e-07, "loss": 1.1464, "step": 15630 }, { "epoch": 4.91022243163123, "grad_norm": 0.9609375, "learning_rate": 3.592510314185973e-07, "loss": 1.0942, "step": 15632 }, { "epoch": 4.910850658656772, "grad_norm": 0.94921875, "learning_rate": 3.567121548714694e-07, "loss": 1.1216, "step": 15634 }, { "epoch": 4.911478885682314, "grad_norm": 1.015625, "learning_rate": 3.541732783243415e-07, "loss": 1.1378, "step": 15636 }, { "epoch": 4.912107112707854, "grad_norm": 0.97265625, "learning_rate": 3.5163440177721357e-07, "loss": 1.2263, "step": 15638 }, { "epoch": 4.912735339733397, "grad_norm": 1.0, "learning_rate": 3.4909552523008574e-07, "loss": 1.0909, "step": 15640 }, { "epoch": 4.913363566758937, "grad_norm": 0.9765625, "learning_rate": 3.465566486829578e-07, "loss": 1.1438, "step": 15642 }, { "epoch": 4.913991793784479, "grad_norm": 0.9453125, "learning_rate": 3.440177721358299e-07, "loss": 1.1795, "step": 15644 }, { "epoch": 4.914620020810021, "grad_norm": 0.93359375, "learning_rate": 3.4147889558870206e-07, "loss": 1.0438, "step": 15646 }, { "epoch": 4.915248247835562, "grad_norm": 0.984375, "learning_rate": 3.3894001904157413e-07, "loss": 1.2959, "step": 15648 }, { "epoch": 4.915876474861103, "grad_norm": 0.9375, "learning_rate": 3.3640114249444625e-07, "loss": 1.1562, "step": 15650 }, { "epoch": 4.916504701886645, "grad_norm": 0.984375, "learning_rate": 3.338622659473183e-07, "loss": 1.0537, "step": 15652 }, { "epoch": 4.917132928912186, "grad_norm": 1.09375, "learning_rate": 3.313233894001904e-07, "loss": 1.121, "step": 15654 }, { "epoch": 4.917761155937727, "grad_norm": 1.1015625, "learning_rate": 3.2878451285306257e-07, "loss": 1.1065, "step": 15656 }, { "epoch": 4.9183893829632686, "grad_norm": 0.9140625, "learning_rate": 3.2624563630593464e-07, "loss": 1.1478, "step": 15658 }, { "epoch": 4.91901760998881, "grad_norm": 1.0390625, "learning_rate": 3.237067597588067e-07, "loss": 1.0974, "step": 15660 }, { "epoch": 4.919645837014351, "grad_norm": 0.96875, "learning_rate": 3.211678832116789e-07, "loss": 1.0672, "step": 15662 }, { "epoch": 4.9202740640398925, "grad_norm": 1.0234375, "learning_rate": 3.1862900666455096e-07, "loss": 1.0362, "step": 15664 }, { "epoch": 4.920902291065434, "grad_norm": 1.0546875, "learning_rate": 3.160901301174231e-07, "loss": 1.1344, "step": 15666 }, { "epoch": 4.921530518090975, "grad_norm": 0.9609375, "learning_rate": 3.1355125357029515e-07, "loss": 1.1823, "step": 15668 }, { "epoch": 4.9221587451165165, "grad_norm": 0.96875, "learning_rate": 3.110123770231673e-07, "loss": 1.1117, "step": 15670 }, { "epoch": 4.922786972142058, "grad_norm": 0.94140625, "learning_rate": 3.0847350047603934e-07, "loss": 1.1236, "step": 15672 }, { "epoch": 4.923415199167599, "grad_norm": 1.078125, "learning_rate": 3.0593462392891147e-07, "loss": 1.0997, "step": 15674 }, { "epoch": 4.9240434261931405, "grad_norm": 0.9765625, "learning_rate": 3.033957473817836e-07, "loss": 1.147, "step": 15676 }, { "epoch": 4.924671653218682, "grad_norm": 0.98046875, "learning_rate": 3.008568708346557e-07, "loss": 1.1225, "step": 15678 }, { "epoch": 4.925299880244223, "grad_norm": 0.94140625, "learning_rate": 2.983179942875278e-07, "loss": 1.1739, "step": 15680 }, { "epoch": 4.9259281072697645, "grad_norm": 0.9375, "learning_rate": 2.957791177403999e-07, "loss": 1.2091, "step": 15682 }, { "epoch": 4.926556334295306, "grad_norm": 1.0, "learning_rate": 2.93240241193272e-07, "loss": 1.1108, "step": 15684 }, { "epoch": 4.927184561320847, "grad_norm": 0.953125, "learning_rate": 2.907013646461441e-07, "loss": 1.1798, "step": 15686 }, { "epoch": 4.9278127883463885, "grad_norm": 0.890625, "learning_rate": 2.8816248809901617e-07, "loss": 1.1468, "step": 15688 }, { "epoch": 4.92844101537193, "grad_norm": 0.9765625, "learning_rate": 2.856236115518883e-07, "loss": 1.2573, "step": 15690 }, { "epoch": 4.929069242397471, "grad_norm": 0.98828125, "learning_rate": 2.830847350047604e-07, "loss": 1.0389, "step": 15692 }, { "epoch": 4.9296974694230125, "grad_norm": 0.91796875, "learning_rate": 2.8054585845763254e-07, "loss": 1.3184, "step": 15694 }, { "epoch": 4.930325696448554, "grad_norm": 1.109375, "learning_rate": 2.780069819105046e-07, "loss": 1.1523, "step": 15696 }, { "epoch": 4.930953923474095, "grad_norm": 1.0, "learning_rate": 2.7546810536337674e-07, "loss": 1.1042, "step": 15698 }, { "epoch": 4.931582150499636, "grad_norm": 1.0546875, "learning_rate": 2.7292922881624886e-07, "loss": 1.1395, "step": 15700 }, { "epoch": 4.932210377525179, "grad_norm": 0.9375, "learning_rate": 2.7039035226912093e-07, "loss": 1.1962, "step": 15702 }, { "epoch": 4.932838604550719, "grad_norm": 1.0078125, "learning_rate": 2.6785147572199305e-07, "loss": 1.1329, "step": 15704 }, { "epoch": 4.933466831576261, "grad_norm": 1.0390625, "learning_rate": 2.653125991748651e-07, "loss": 1.1942, "step": 15706 }, { "epoch": 4.934095058601802, "grad_norm": 0.984375, "learning_rate": 2.6277372262773725e-07, "loss": 1.0449, "step": 15708 }, { "epoch": 4.934723285627344, "grad_norm": 0.9765625, "learning_rate": 2.6023484608060937e-07, "loss": 1.2482, "step": 15710 }, { "epoch": 4.935351512652885, "grad_norm": 0.96875, "learning_rate": 2.5769596953348144e-07, "loss": 0.9689, "step": 15712 }, { "epoch": 4.935979739678427, "grad_norm": 0.99609375, "learning_rate": 2.5515709298635356e-07, "loss": 1.1971, "step": 15714 }, { "epoch": 4.936607966703968, "grad_norm": 1.0390625, "learning_rate": 2.526182164392257e-07, "loss": 1.1679, "step": 15716 }, { "epoch": 4.937236193729509, "grad_norm": 0.9609375, "learning_rate": 2.5007933989209776e-07, "loss": 1.1772, "step": 15718 }, { "epoch": 4.937864420755051, "grad_norm": 0.97265625, "learning_rate": 2.475404633449699e-07, "loss": 1.0254, "step": 15720 }, { "epoch": 4.938492647780592, "grad_norm": 1.09375, "learning_rate": 2.4500158679784195e-07, "loss": 1.0295, "step": 15722 }, { "epoch": 4.939120874806133, "grad_norm": 0.890625, "learning_rate": 2.424627102507141e-07, "loss": 1.2547, "step": 15724 }, { "epoch": 4.939749101831675, "grad_norm": 0.94140625, "learning_rate": 2.399238337035862e-07, "loss": 1.1265, "step": 15726 }, { "epoch": 4.940377328857216, "grad_norm": 1.0546875, "learning_rate": 2.3738495715645827e-07, "loss": 1.0392, "step": 15728 }, { "epoch": 4.941005555882757, "grad_norm": 0.984375, "learning_rate": 2.348460806093304e-07, "loss": 1.2505, "step": 15730 }, { "epoch": 4.941633782908299, "grad_norm": 0.92578125, "learning_rate": 2.323072040622025e-07, "loss": 1.2492, "step": 15732 }, { "epoch": 4.94226200993384, "grad_norm": 0.9296875, "learning_rate": 2.297683275150746e-07, "loss": 1.1481, "step": 15734 }, { "epoch": 4.942890236959381, "grad_norm": 0.9375, "learning_rate": 2.2722945096794668e-07, "loss": 1.3197, "step": 15736 }, { "epoch": 4.943518463984923, "grad_norm": 0.9296875, "learning_rate": 2.246905744208188e-07, "loss": 1.2359, "step": 15738 }, { "epoch": 4.944146691010464, "grad_norm": 0.984375, "learning_rate": 2.2215169787369093e-07, "loss": 1.2038, "step": 15740 }, { "epoch": 4.944774918036005, "grad_norm": 1.0, "learning_rate": 2.1961282132656303e-07, "loss": 1.2568, "step": 15742 }, { "epoch": 4.9454031450615465, "grad_norm": 0.953125, "learning_rate": 2.170739447794351e-07, "loss": 1.1274, "step": 15744 }, { "epoch": 4.946031372087088, "grad_norm": 0.9765625, "learning_rate": 2.1453506823230722e-07, "loss": 1.0711, "step": 15746 }, { "epoch": 4.946659599112629, "grad_norm": 0.890625, "learning_rate": 2.1199619168517934e-07, "loss": 1.1793, "step": 15748 }, { "epoch": 4.9472878261381705, "grad_norm": 1.0078125, "learning_rate": 2.0945731513805144e-07, "loss": 1.1717, "step": 15750 }, { "epoch": 4.947916053163712, "grad_norm": 0.9296875, "learning_rate": 2.0691843859092354e-07, "loss": 1.1041, "step": 15752 }, { "epoch": 4.948544280189253, "grad_norm": 0.98046875, "learning_rate": 2.0437956204379563e-07, "loss": 1.1851, "step": 15754 }, { "epoch": 4.9491725072147945, "grad_norm": 1.0, "learning_rate": 2.0184068549666776e-07, "loss": 1.129, "step": 15756 }, { "epoch": 4.949800734240336, "grad_norm": 0.98046875, "learning_rate": 1.9930180894953983e-07, "loss": 1.1797, "step": 15758 }, { "epoch": 4.950428961265877, "grad_norm": 1.203125, "learning_rate": 1.9676293240241195e-07, "loss": 1.1474, "step": 15760 }, { "epoch": 4.9510571882914185, "grad_norm": 0.9453125, "learning_rate": 1.9422405585528405e-07, "loss": 1.2601, "step": 15762 }, { "epoch": 4.95168541531696, "grad_norm": 0.98046875, "learning_rate": 1.9168517930815617e-07, "loss": 1.1234, "step": 15764 }, { "epoch": 4.952313642342501, "grad_norm": 1.03125, "learning_rate": 1.8914630276102824e-07, "loss": 1.1157, "step": 15766 }, { "epoch": 4.952941869368043, "grad_norm": 0.95703125, "learning_rate": 1.8660742621390036e-07, "loss": 1.321, "step": 15768 }, { "epoch": 4.953570096393584, "grad_norm": 1.0390625, "learning_rate": 1.8406854966677246e-07, "loss": 0.9938, "step": 15770 }, { "epoch": 4.954198323419126, "grad_norm": 0.90625, "learning_rate": 1.8152967311964458e-07, "loss": 1.1066, "step": 15772 }, { "epoch": 4.9548265504446665, "grad_norm": 0.9765625, "learning_rate": 1.7899079657251666e-07, "loss": 1.0904, "step": 15774 }, { "epoch": 4.955454777470209, "grad_norm": 1.0234375, "learning_rate": 1.7645192002538878e-07, "loss": 1.073, "step": 15776 }, { "epoch": 4.95608300449575, "grad_norm": 0.94921875, "learning_rate": 1.7391304347826088e-07, "loss": 1.1854, "step": 15778 }, { "epoch": 4.956711231521291, "grad_norm": 0.96484375, "learning_rate": 1.71374166931133e-07, "loss": 1.1345, "step": 15780 }, { "epoch": 4.957339458546833, "grad_norm": 0.90625, "learning_rate": 1.6883529038400507e-07, "loss": 1.1082, "step": 15782 }, { "epoch": 4.957967685572374, "grad_norm": 1.0703125, "learning_rate": 1.662964138368772e-07, "loss": 1.1191, "step": 15784 }, { "epoch": 4.958595912597915, "grad_norm": 0.98046875, "learning_rate": 1.637575372897493e-07, "loss": 1.0461, "step": 15786 }, { "epoch": 4.959224139623457, "grad_norm": 0.890625, "learning_rate": 1.612186607426214e-07, "loss": 1.0615, "step": 15788 }, { "epoch": 4.959852366648998, "grad_norm": 1.0625, "learning_rate": 1.5867978419549348e-07, "loss": 1.1676, "step": 15790 }, { "epoch": 4.960480593674539, "grad_norm": 0.94921875, "learning_rate": 1.561409076483656e-07, "loss": 1.2532, "step": 15792 }, { "epoch": 4.961108820700081, "grad_norm": 0.95703125, "learning_rate": 1.5360203110123773e-07, "loss": 1.1877, "step": 15794 }, { "epoch": 4.961737047725622, "grad_norm": 1.0390625, "learning_rate": 1.5106315455410983e-07, "loss": 1.1942, "step": 15796 }, { "epoch": 4.962365274751163, "grad_norm": 0.953125, "learning_rate": 1.4852427800698192e-07, "loss": 1.0226, "step": 15798 }, { "epoch": 4.962993501776705, "grad_norm": 0.95703125, "learning_rate": 1.4598540145985402e-07, "loss": 1.0712, "step": 15800 }, { "epoch": 4.963621728802246, "grad_norm": 1.03125, "learning_rate": 1.4344652491272614e-07, "loss": 1.114, "step": 15802 }, { "epoch": 4.964249955827787, "grad_norm": 0.98046875, "learning_rate": 1.4090764836559824e-07, "loss": 1.0986, "step": 15804 }, { "epoch": 4.964878182853329, "grad_norm": 0.90625, "learning_rate": 1.3836877181847034e-07, "loss": 1.1095, "step": 15806 }, { "epoch": 4.96550640987887, "grad_norm": 1.140625, "learning_rate": 1.3582989527134243e-07, "loss": 1.0496, "step": 15808 }, { "epoch": 4.966134636904411, "grad_norm": 0.91015625, "learning_rate": 1.3329101872421456e-07, "loss": 1.1013, "step": 15810 }, { "epoch": 4.966762863929953, "grad_norm": 1.015625, "learning_rate": 1.3075214217708665e-07, "loss": 1.0962, "step": 15812 }, { "epoch": 4.967391090955494, "grad_norm": 1.03125, "learning_rate": 1.2821326562995875e-07, "loss": 1.1481, "step": 15814 }, { "epoch": 4.968019317981035, "grad_norm": 0.97265625, "learning_rate": 1.2567438908283085e-07, "loss": 1.2528, "step": 15816 }, { "epoch": 4.968647545006577, "grad_norm": 1.015625, "learning_rate": 1.2313551253570297e-07, "loss": 0.9616, "step": 15818 }, { "epoch": 4.969275772032118, "grad_norm": 0.890625, "learning_rate": 1.2059663598857507e-07, "loss": 1.2594, "step": 15820 }, { "epoch": 4.969903999057659, "grad_norm": 0.953125, "learning_rate": 1.1805775944144718e-07, "loss": 0.9568, "step": 15822 }, { "epoch": 4.9705322260832006, "grad_norm": 1.046875, "learning_rate": 1.1551888289431928e-07, "loss": 1.4042, "step": 15824 }, { "epoch": 4.971160453108742, "grad_norm": 1.15625, "learning_rate": 1.1298000634719139e-07, "loss": 1.1493, "step": 15826 }, { "epoch": 4.971788680134283, "grad_norm": 0.98828125, "learning_rate": 1.1044112980006348e-07, "loss": 1.173, "step": 15828 }, { "epoch": 4.9724169071598245, "grad_norm": 1.1484375, "learning_rate": 1.0790225325293559e-07, "loss": 0.9799, "step": 15830 }, { "epoch": 4.973045134185366, "grad_norm": 1.015625, "learning_rate": 1.0536337670580769e-07, "loss": 1.1843, "step": 15832 }, { "epoch": 4.973673361210908, "grad_norm": 0.95703125, "learning_rate": 1.028245001586798e-07, "loss": 1.0995, "step": 15834 }, { "epoch": 4.9743015882364485, "grad_norm": 0.92578125, "learning_rate": 1.002856236115519e-07, "loss": 1.1589, "step": 15836 }, { "epoch": 4.974929815261991, "grad_norm": 0.921875, "learning_rate": 9.7746747064424e-08, "loss": 1.1839, "step": 15838 }, { "epoch": 4.975558042287531, "grad_norm": 0.97265625, "learning_rate": 9.52078705172961e-08, "loss": 1.014, "step": 15840 }, { "epoch": 4.976186269313073, "grad_norm": 1.0234375, "learning_rate": 9.266899397016821e-08, "loss": 1.3049, "step": 15842 }, { "epoch": 4.976814496338615, "grad_norm": 0.9375, "learning_rate": 9.013011742304031e-08, "loss": 0.9668, "step": 15844 }, { "epoch": 4.977442723364156, "grad_norm": 1.09375, "learning_rate": 8.759124087591242e-08, "loss": 1.016, "step": 15846 }, { "epoch": 4.978070950389697, "grad_norm": 0.93359375, "learning_rate": 8.505236432878452e-08, "loss": 1.169, "step": 15848 }, { "epoch": 4.978699177415239, "grad_norm": 1.0390625, "learning_rate": 8.251348778165663e-08, "loss": 1.1211, "step": 15850 }, { "epoch": 4.97932740444078, "grad_norm": 0.96484375, "learning_rate": 7.997461123452872e-08, "loss": 1.1479, "step": 15852 }, { "epoch": 4.979955631466321, "grad_norm": 1.1328125, "learning_rate": 7.743573468740083e-08, "loss": 1.1483, "step": 15854 }, { "epoch": 4.980583858491863, "grad_norm": 1.03125, "learning_rate": 7.489685814027293e-08, "loss": 1.01, "step": 15856 }, { "epoch": 4.981212085517404, "grad_norm": 0.9609375, "learning_rate": 7.235798159314504e-08, "loss": 1.0365, "step": 15858 }, { "epoch": 4.981840312542945, "grad_norm": 1.046875, "learning_rate": 6.981910504601714e-08, "loss": 1.0191, "step": 15860 }, { "epoch": 4.982468539568487, "grad_norm": 0.96875, "learning_rate": 6.728022849888925e-08, "loss": 1.0633, "step": 15862 }, { "epoch": 4.983096766594028, "grad_norm": 1.046875, "learning_rate": 6.474135195176134e-08, "loss": 1.0263, "step": 15864 }, { "epoch": 4.983724993619569, "grad_norm": 1.0078125, "learning_rate": 6.220247540463345e-08, "loss": 1.0784, "step": 15866 }, { "epoch": 4.984353220645111, "grad_norm": 0.97265625, "learning_rate": 5.966359885750557e-08, "loss": 1.3673, "step": 15868 }, { "epoch": 4.984981447670652, "grad_norm": 1.0234375, "learning_rate": 5.712472231037766e-08, "loss": 1.2508, "step": 15870 }, { "epoch": 4.985609674696193, "grad_norm": 0.9765625, "learning_rate": 5.4585845763249765e-08, "loss": 1.1675, "step": 15872 }, { "epoch": 4.986237901721735, "grad_norm": 1.0234375, "learning_rate": 5.204696921612187e-08, "loss": 1.0231, "step": 15874 }, { "epoch": 4.986866128747276, "grad_norm": 0.9375, "learning_rate": 4.950809266899397e-08, "loss": 1.0987, "step": 15876 }, { "epoch": 4.987494355772817, "grad_norm": 0.953125, "learning_rate": 4.696921612186608e-08, "loss": 0.9856, "step": 15878 }, { "epoch": 4.988122582798359, "grad_norm": 1.0703125, "learning_rate": 4.4430339574738186e-08, "loss": 1.0779, "step": 15880 }, { "epoch": 4.9887508098239, "grad_norm": 0.90234375, "learning_rate": 4.189146302761029e-08, "loss": 1.2355, "step": 15882 }, { "epoch": 4.989379036849441, "grad_norm": 1.0, "learning_rate": 3.935258648048239e-08, "loss": 1.1763, "step": 15884 }, { "epoch": 4.990007263874983, "grad_norm": 0.98046875, "learning_rate": 3.6813709933354496e-08, "loss": 1.1829, "step": 15886 }, { "epoch": 4.990635490900524, "grad_norm": 0.953125, "learning_rate": 3.427483338622659e-08, "loss": 1.2277, "step": 15888 }, { "epoch": 4.991263717926065, "grad_norm": 0.96875, "learning_rate": 3.1735956839098697e-08, "loss": 1.103, "step": 15890 }, { "epoch": 4.991891944951607, "grad_norm": 0.9609375, "learning_rate": 2.9197080291970803e-08, "loss": 1.2308, "step": 15892 }, { "epoch": 4.992520171977148, "grad_norm": 0.95703125, "learning_rate": 2.6658203744842907e-08, "loss": 1.1982, "step": 15894 }, { "epoch": 4.993148399002689, "grad_norm": 0.94140625, "learning_rate": 2.411932719771501e-08, "loss": 1.2134, "step": 15896 }, { "epoch": 4.993776626028231, "grad_norm": 1.0625, "learning_rate": 2.1580450650587117e-08, "loss": 1.2294, "step": 15898 }, { "epoch": 4.994404853053773, "grad_norm": 0.9140625, "learning_rate": 1.904157410345922e-08, "loss": 1.1011, "step": 15900 } ], "logging_steps": 2, "max_steps": 15915, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.306349173411676e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }