|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 90473, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0011053021343384213, |
|
"grad_norm": 4.114652156829834, |
|
"learning_rate": 2.763957987838585e-07, |
|
"loss": 1.1961, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0022106042686768426, |
|
"grad_norm": 2.9325754642486572, |
|
"learning_rate": 5.52791597567717e-07, |
|
"loss": 1.0048, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0033159064030152644, |
|
"grad_norm": 3.6297736167907715, |
|
"learning_rate": 8.291873963515755e-07, |
|
"loss": 0.8568, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.004421208537353685, |
|
"grad_norm": 2.905796766281128, |
|
"learning_rate": 1.105583195135434e-06, |
|
"loss": 0.8767, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.005526510671692107, |
|
"grad_norm": 4.603543758392334, |
|
"learning_rate": 1.3819789939192927e-06, |
|
"loss": 0.7411, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.006631812806030529, |
|
"grad_norm": 3.2700424194335938, |
|
"learning_rate": 1.658374792703151e-06, |
|
"loss": 0.7773, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.00773711494036895, |
|
"grad_norm": 3.0455334186553955, |
|
"learning_rate": 1.9347705914870095e-06, |
|
"loss": 0.7291, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.00884241707470737, |
|
"grad_norm": 3.2247352600097656, |
|
"learning_rate": 2.211166390270868e-06, |
|
"loss": 0.6383, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.009947719209045794, |
|
"grad_norm": 3.087158441543579, |
|
"learning_rate": 2.4875621890547264e-06, |
|
"loss": 0.6445, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.011053021343384215, |
|
"grad_norm": 1.812444806098938, |
|
"learning_rate": 2.7639579878385854e-06, |
|
"loss": 0.631, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.012158323477722636, |
|
"grad_norm": 3.248868465423584, |
|
"learning_rate": 3.0403537866224434e-06, |
|
"loss": 0.6189, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.013263625612061057, |
|
"grad_norm": 1.8088688850402832, |
|
"learning_rate": 3.316749585406302e-06, |
|
"loss": 0.5809, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.014368927746399479, |
|
"grad_norm": 1.9592525959014893, |
|
"learning_rate": 3.5931453841901604e-06, |
|
"loss": 0.5885, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.0154742298807379, |
|
"grad_norm": 2.4960570335388184, |
|
"learning_rate": 3.869541182974019e-06, |
|
"loss": 0.5743, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.01657953201507632, |
|
"grad_norm": 1.895150899887085, |
|
"learning_rate": 4.145936981757877e-06, |
|
"loss": 0.5897, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.01768483414941474, |
|
"grad_norm": 2.611772060394287, |
|
"learning_rate": 4.422332780541736e-06, |
|
"loss": 0.5213, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.018790136283753162, |
|
"grad_norm": 3.9915366172790527, |
|
"learning_rate": 4.698728579325595e-06, |
|
"loss": 0.5928, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.019895438418091587, |
|
"grad_norm": 2.026409387588501, |
|
"learning_rate": 4.975124378109453e-06, |
|
"loss": 0.5726, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.02100074055243001, |
|
"grad_norm": 1.9394502639770508, |
|
"learning_rate": 4.999987004364365e-06, |
|
"loss": 0.5458, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.02210604268676843, |
|
"grad_norm": 2.1378285884857178, |
|
"learning_rate": 4.999942749379922e-06, |
|
"loss": 0.5452, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.02321134482110685, |
|
"grad_norm": 2.2720561027526855, |
|
"learning_rate": 4.999867108486303e-06, |
|
"loss": 0.5195, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.024316646955445272, |
|
"grad_norm": 2.4831795692443848, |
|
"learning_rate": 4.99976008263315e-06, |
|
"loss": 0.5431, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.025421949089783694, |
|
"grad_norm": 3.52687668800354, |
|
"learning_rate": 4.999621673164139e-06, |
|
"loss": 0.5703, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.026527251224122115, |
|
"grad_norm": 2.1417176723480225, |
|
"learning_rate": 4.999451881816949e-06, |
|
"loss": 0.5549, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.027632553358460536, |
|
"grad_norm": 2.2087039947509766, |
|
"learning_rate": 4.999250710723255e-06, |
|
"loss": 0.5664, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.028737855492798958, |
|
"grad_norm": 2.0288796424865723, |
|
"learning_rate": 4.999018162408687e-06, |
|
"loss": 0.5864, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.02984315762713738, |
|
"grad_norm": 1.9152870178222656, |
|
"learning_rate": 4.998754239792809e-06, |
|
"loss": 0.5568, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.0309484597614758, |
|
"grad_norm": 1.9485653638839722, |
|
"learning_rate": 4.998458946189078e-06, |
|
"loss": 0.5706, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.03205376189581422, |
|
"grad_norm": 2.10481595993042, |
|
"learning_rate": 4.9981322853048e-06, |
|
"loss": 0.5501, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.03315906403015264, |
|
"grad_norm": 1.8621227741241455, |
|
"learning_rate": 4.9977742612410905e-06, |
|
"loss": 0.5394, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.034264366164491064, |
|
"grad_norm": 2.057615280151367, |
|
"learning_rate": 4.997384878492817e-06, |
|
"loss": 0.5078, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.03536966829882948, |
|
"grad_norm": 1.742665410041809, |
|
"learning_rate": 4.996964141948542e-06, |
|
"loss": 0.5584, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.03647497043316791, |
|
"grad_norm": 2.150362253189087, |
|
"learning_rate": 4.996512056890468e-06, |
|
"loss": 0.5264, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.037580272567506325, |
|
"grad_norm": 2.3525052070617676, |
|
"learning_rate": 4.996028628994365e-06, |
|
"loss": 0.5828, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.03868557470184475, |
|
"grad_norm": 1.6484140157699585, |
|
"learning_rate": 4.9955138643295e-06, |
|
"loss": 0.52, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.039790876836183174, |
|
"grad_norm": 3.176095724105835, |
|
"learning_rate": 4.994967769358565e-06, |
|
"loss": 0.557, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.04089617897052159, |
|
"grad_norm": 1.66346275806427, |
|
"learning_rate": 4.9943903509375926e-06, |
|
"loss": 0.5121, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.04200148110486002, |
|
"grad_norm": 2.594338893890381, |
|
"learning_rate": 4.9937816163158685e-06, |
|
"loss": 0.4962, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.043106783239198435, |
|
"grad_norm": 2.330629348754883, |
|
"learning_rate": 4.993141573135843e-06, |
|
"loss": 0.5217, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.04421208537353686, |
|
"grad_norm": 2.264955759048462, |
|
"learning_rate": 4.9924702294330375e-06, |
|
"loss": 0.5157, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.04531738750787528, |
|
"grad_norm": 1.9724615812301636, |
|
"learning_rate": 4.991767593635935e-06, |
|
"loss": 0.5294, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.0464226896422137, |
|
"grad_norm": 1.9894862174987793, |
|
"learning_rate": 4.991033674565885e-06, |
|
"loss": 0.5556, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.04752799177655212, |
|
"grad_norm": 1.9730507135391235, |
|
"learning_rate": 4.990268481436984e-06, |
|
"loss": 0.4888, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.048633293910890545, |
|
"grad_norm": 2.208463430404663, |
|
"learning_rate": 4.989472023855966e-06, |
|
"loss": 0.5387, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.04973859604522896, |
|
"grad_norm": 2.394077777862549, |
|
"learning_rate": 4.988644311822076e-06, |
|
"loss": 0.4932, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.05084389817956739, |
|
"grad_norm": 2.514061689376831, |
|
"learning_rate": 4.987785355726953e-06, |
|
"loss": 0.5254, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.051949200313905805, |
|
"grad_norm": 1.8961576223373413, |
|
"learning_rate": 4.9868951663544885e-06, |
|
"loss": 0.5145, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.05305450244824423, |
|
"grad_norm": 2.2813808917999268, |
|
"learning_rate": 4.9859737548807005e-06, |
|
"loss": 0.4982, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.05415980458258265, |
|
"grad_norm": 2.1236634254455566, |
|
"learning_rate": 4.98502113287359e-06, |
|
"loss": 0.5206, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.05526510671692107, |
|
"grad_norm": 2.573836326599121, |
|
"learning_rate": 4.984037312292992e-06, |
|
"loss": 0.4844, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.05637040885125949, |
|
"grad_norm": 1.2394871711730957, |
|
"learning_rate": 4.983022305490431e-06, |
|
"loss": 0.4921, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.057475710985597915, |
|
"grad_norm": 2.2655134201049805, |
|
"learning_rate": 4.9819761252089635e-06, |
|
"loss": 0.5278, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.05858101311993633, |
|
"grad_norm": 1.9459484815597534, |
|
"learning_rate": 4.980898784583019e-06, |
|
"loss": 0.5215, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.05968631525427476, |
|
"grad_norm": 2.574147939682007, |
|
"learning_rate": 4.979790297138232e-06, |
|
"loss": 0.5155, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.060791617388613176, |
|
"grad_norm": 2.5039682388305664, |
|
"learning_rate": 4.9786506767912775e-06, |
|
"loss": 0.5245, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.0618969195229516, |
|
"grad_norm": 2.6227054595947266, |
|
"learning_rate": 4.977479937849689e-06, |
|
"loss": 0.4843, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.06300222165729003, |
|
"grad_norm": 2.1595468521118164, |
|
"learning_rate": 4.9762780950116865e-06, |
|
"loss": 0.4863, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.06410752379162844, |
|
"grad_norm": 1.8619611263275146, |
|
"learning_rate": 4.975045163365989e-06, |
|
"loss": 0.5083, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.06521282592596686, |
|
"grad_norm": 2.270404100418091, |
|
"learning_rate": 4.973781158391621e-06, |
|
"loss": 0.5516, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.06631812806030528, |
|
"grad_norm": 1.9068191051483154, |
|
"learning_rate": 4.972486095957725e-06, |
|
"loss": 0.5058, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.06742343019464371, |
|
"grad_norm": 2.2948782444000244, |
|
"learning_rate": 4.971159992323359e-06, |
|
"loss": 0.5018, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.06852873232898213, |
|
"grad_norm": 3.0896589756011963, |
|
"learning_rate": 4.969802864137289e-06, |
|
"loss": 0.5062, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.06963403446332055, |
|
"grad_norm": 1.7098015546798706, |
|
"learning_rate": 4.96841472843779e-06, |
|
"loss": 0.5067, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.07073933659765896, |
|
"grad_norm": 2.6850175857543945, |
|
"learning_rate": 4.966995602652417e-06, |
|
"loss": 0.5287, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.0718446387319974, |
|
"grad_norm": 1.6628856658935547, |
|
"learning_rate": 4.965545504597802e-06, |
|
"loss": 0.5225, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.07294994086633581, |
|
"grad_norm": 2.279022693634033, |
|
"learning_rate": 4.9640644524794205e-06, |
|
"loss": 0.5026, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.07405524300067423, |
|
"grad_norm": 0.924898624420166, |
|
"learning_rate": 4.962552464891363e-06, |
|
"loss": 0.5354, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.07516054513501265, |
|
"grad_norm": 2.779557228088379, |
|
"learning_rate": 4.961009560816109e-06, |
|
"loss": 0.4776, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.07626584726935108, |
|
"grad_norm": 2.554727077484131, |
|
"learning_rate": 4.9594357596242795e-06, |
|
"loss": 0.4821, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.0773711494036895, |
|
"grad_norm": 1.730661153793335, |
|
"learning_rate": 4.957831081074398e-06, |
|
"loss": 0.4903, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.07847645153802792, |
|
"grad_norm": 2.198575735092163, |
|
"learning_rate": 4.956195545312647e-06, |
|
"loss": 0.4946, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.07958175367236635, |
|
"grad_norm": 1.3369964361190796, |
|
"learning_rate": 4.954529172872605e-06, |
|
"loss": 0.51, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.08068705580670477, |
|
"grad_norm": 2.4426262378692627, |
|
"learning_rate": 4.952831984674998e-06, |
|
"loss": 0.5108, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.08179235794104318, |
|
"grad_norm": 3.9186463356018066, |
|
"learning_rate": 4.951104002027432e-06, |
|
"loss": 0.5086, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.0828976600753816, |
|
"grad_norm": 1.9639850854873657, |
|
"learning_rate": 4.9493452466241254e-06, |
|
"loss": 0.4758, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.08400296220972003, |
|
"grad_norm": 1.2126818895339966, |
|
"learning_rate": 4.94755574054564e-06, |
|
"loss": 0.5017, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.08510826434405845, |
|
"grad_norm": 2.206359386444092, |
|
"learning_rate": 4.945735506258598e-06, |
|
"loss": 0.537, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.08621356647839687, |
|
"grad_norm": 1.7051986455917358, |
|
"learning_rate": 4.943884566615409e-06, |
|
"loss": 0.4835, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.08731886861273529, |
|
"grad_norm": 1.832702398300171, |
|
"learning_rate": 4.942002944853973e-06, |
|
"loss": 0.454, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.08842417074707372, |
|
"grad_norm": 1.8357278108596802, |
|
"learning_rate": 4.940090664597394e-06, |
|
"loss": 0.4972, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.08952947288141214, |
|
"grad_norm": 2.1181540489196777, |
|
"learning_rate": 4.938147749853685e-06, |
|
"loss": 0.5184, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.09063477501575055, |
|
"grad_norm": 1.7029916048049927, |
|
"learning_rate": 4.936174225015463e-06, |
|
"loss": 0.5324, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.09174007715008897, |
|
"grad_norm": 2.0932748317718506, |
|
"learning_rate": 4.934170114859643e-06, |
|
"loss": 0.4806, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.0928453792844274, |
|
"grad_norm": 2.3745322227478027, |
|
"learning_rate": 4.932135444547129e-06, |
|
"loss": 0.4869, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.09395068141876582, |
|
"grad_norm": 2.1215474605560303, |
|
"learning_rate": 4.930070239622498e-06, |
|
"loss": 0.4777, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.09505598355310424, |
|
"grad_norm": 1.7763068675994873, |
|
"learning_rate": 4.9279745260136756e-06, |
|
"loss": 0.478, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.09616128568744266, |
|
"grad_norm": 1.950086236000061, |
|
"learning_rate": 4.925848330031617e-06, |
|
"loss": 0.5048, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.09726658782178109, |
|
"grad_norm": 2.959291696548462, |
|
"learning_rate": 4.923691678369971e-06, |
|
"loss": 0.513, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.09837188995611951, |
|
"grad_norm": 2.3258442878723145, |
|
"learning_rate": 4.921504598104745e-06, |
|
"loss": 0.4896, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.09947719209045792, |
|
"grad_norm": 2.5175669193267822, |
|
"learning_rate": 4.9192871166939715e-06, |
|
"loss": 0.4783, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.10058249422479634, |
|
"grad_norm": 1.981148600578308, |
|
"learning_rate": 4.917039261977353e-06, |
|
"loss": 0.4906, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.10168779635913477, |
|
"grad_norm": 2.439974069595337, |
|
"learning_rate": 4.914761062175925e-06, |
|
"loss": 0.5007, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.10279309849347319, |
|
"grad_norm": 2.8156814575195312, |
|
"learning_rate": 4.912452545891689e-06, |
|
"loss": 0.5203, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.10389840062781161, |
|
"grad_norm": 2.4708168506622314, |
|
"learning_rate": 4.9101137421072605e-06, |
|
"loss": 0.4663, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.10500370276215003, |
|
"grad_norm": 2.4594314098358154, |
|
"learning_rate": 4.907744680185508e-06, |
|
"loss": 0.5027, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.10610900489648846, |
|
"grad_norm": 1.7548918724060059, |
|
"learning_rate": 4.905345389869176e-06, |
|
"loss": 0.4534, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.10721430703082688, |
|
"grad_norm": 1.6353791952133179, |
|
"learning_rate": 4.902915901280517e-06, |
|
"loss": 0.49, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.1083196091651653, |
|
"grad_norm": 3.52217698097229, |
|
"learning_rate": 4.9004562449209146e-06, |
|
"loss": 0.4935, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.10942491129950371, |
|
"grad_norm": 1.6542017459869385, |
|
"learning_rate": 4.897966451670495e-06, |
|
"loss": 0.5118, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.11053021343384214, |
|
"grad_norm": 2.575944185256958, |
|
"learning_rate": 4.895446552787744e-06, |
|
"loss": 0.4977, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.11163551556818056, |
|
"grad_norm": 2.081350088119507, |
|
"learning_rate": 4.8928965799091134e-06, |
|
"loss": 0.5261, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.11274081770251898, |
|
"grad_norm": 2.022676944732666, |
|
"learning_rate": 4.890316565048624e-06, |
|
"loss": 0.4889, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.1138461198368574, |
|
"grad_norm": 1.5808357000350952, |
|
"learning_rate": 4.887706540597461e-06, |
|
"loss": 0.4929, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.11495142197119583, |
|
"grad_norm": 2.1185178756713867, |
|
"learning_rate": 4.8850665393235716e-06, |
|
"loss": 0.4575, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.11605672410553425, |
|
"grad_norm": 2.5382957458496094, |
|
"learning_rate": 4.8823965943712505e-06, |
|
"loss": 0.4979, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.11716202623987267, |
|
"grad_norm": 2.045133590698242, |
|
"learning_rate": 4.879696739260726e-06, |
|
"loss": 0.5215, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.11826732837421108, |
|
"grad_norm": 2.119107484817505, |
|
"learning_rate": 4.876967007887737e-06, |
|
"loss": 0.4754, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.11937263050854952, |
|
"grad_norm": 2.549633502960205, |
|
"learning_rate": 4.8742074345231076e-06, |
|
"loss": 0.5051, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.12047793264288793, |
|
"grad_norm": 3.1271703243255615, |
|
"learning_rate": 4.8714180538123205e-06, |
|
"loss": 0.5036, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.12158323477722635, |
|
"grad_norm": 1.8725048303604126, |
|
"learning_rate": 4.868598900775076e-06, |
|
"loss": 0.4766, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.12268853691156478, |
|
"grad_norm": 1.3768223524093628, |
|
"learning_rate": 4.865750010804857e-06, |
|
"loss": 0.4821, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.1237938390459032, |
|
"grad_norm": 2.7702245712280273, |
|
"learning_rate": 4.8628714196684854e-06, |
|
"loss": 0.5154, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.12489914118024162, |
|
"grad_norm": 2.6272552013397217, |
|
"learning_rate": 4.859963163505668e-06, |
|
"loss": 0.4747, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.12600444331458005, |
|
"grad_norm": 1.649949312210083, |
|
"learning_rate": 4.857025278828545e-06, |
|
"loss": 0.4836, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.12710974544891845, |
|
"grad_norm": 2.358071804046631, |
|
"learning_rate": 4.854057802521234e-06, |
|
"loss": 0.5184, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.12821504758325689, |
|
"grad_norm": 2.5856614112854004, |
|
"learning_rate": 4.851060771839367e-06, |
|
"loss": 0.4818, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.12932034971759532, |
|
"grad_norm": 1.8580783605575562, |
|
"learning_rate": 4.848034224409616e-06, |
|
"loss": 0.4887, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.13042565185193372, |
|
"grad_norm": 2.2157649993896484, |
|
"learning_rate": 4.84497819822923e-06, |
|
"loss": 0.5045, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.13153095398627215, |
|
"grad_norm": 1.4233261346817017, |
|
"learning_rate": 4.841892731665552e-06, |
|
"loss": 0.5147, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.13263625612061056, |
|
"grad_norm": 1.6375737190246582, |
|
"learning_rate": 4.838777863455537e-06, |
|
"loss": 0.4651, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.133741558254949, |
|
"grad_norm": 1.2430723905563354, |
|
"learning_rate": 4.835633632705269e-06, |
|
"loss": 0.4737, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.13484686038928742, |
|
"grad_norm": 2.4360849857330322, |
|
"learning_rate": 4.83246007888947e-06, |
|
"loss": 0.4936, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.13595216252362582, |
|
"grad_norm": 1.9232250452041626, |
|
"learning_rate": 4.8292572418509995e-06, |
|
"loss": 0.4763, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.13705746465796426, |
|
"grad_norm": 2.343539237976074, |
|
"learning_rate": 4.82602516180036e-06, |
|
"loss": 0.4956, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.1381627667923027, |
|
"grad_norm": 1.493943691253662, |
|
"learning_rate": 4.8227638793151875e-06, |
|
"loss": 0.4653, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.1392680689266411, |
|
"grad_norm": 3.257138729095459, |
|
"learning_rate": 4.819473435339748e-06, |
|
"loss": 0.4564, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.14037337106097952, |
|
"grad_norm": 1.8864688873291016, |
|
"learning_rate": 4.816153871184418e-06, |
|
"loss": 0.4667, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.14147867319531793, |
|
"grad_norm": 2.1740174293518066, |
|
"learning_rate": 4.812805228525166e-06, |
|
"loss": 0.4499, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.14258397532965636, |
|
"grad_norm": 1.5121800899505615, |
|
"learning_rate": 4.809427549403033e-06, |
|
"loss": 0.4933, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.1436892774639948, |
|
"grad_norm": 1.604945182800293, |
|
"learning_rate": 4.8060208762236025e-06, |
|
"loss": 0.479, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.1447945795983332, |
|
"grad_norm": 1.933350682258606, |
|
"learning_rate": 4.802585251756468e-06, |
|
"loss": 0.5105, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.14589988173267163, |
|
"grad_norm": 2.8999829292297363, |
|
"learning_rate": 4.799120719134696e-06, |
|
"loss": 0.4689, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.14700518386701006, |
|
"grad_norm": 2.4011030197143555, |
|
"learning_rate": 4.795627321854283e-06, |
|
"loss": 0.4709, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.14811048600134846, |
|
"grad_norm": 2.080972671508789, |
|
"learning_rate": 4.792105103773618e-06, |
|
"loss": 0.4893, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.1492157881356869, |
|
"grad_norm": 2.4878017902374268, |
|
"learning_rate": 4.788554109112918e-06, |
|
"loss": 0.5236, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.1503210902700253, |
|
"grad_norm": 2.1215240955352783, |
|
"learning_rate": 4.78497438245368e-06, |
|
"loss": 0.4817, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.15142639240436373, |
|
"grad_norm": 1.5228586196899414, |
|
"learning_rate": 4.781365968738126e-06, |
|
"loss": 0.4895, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.15253169453870216, |
|
"grad_norm": 2.399446487426758, |
|
"learning_rate": 4.777728913268632e-06, |
|
"loss": 0.4731, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.15363699667304057, |
|
"grad_norm": 2.1382806301116943, |
|
"learning_rate": 4.774063261707158e-06, |
|
"loss": 0.4981, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.154742298807379, |
|
"grad_norm": 1.590667486190796, |
|
"learning_rate": 4.770369060074685e-06, |
|
"loss": 0.4599, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.15584760094171743, |
|
"grad_norm": 1.882934331893921, |
|
"learning_rate": 4.766646354750621e-06, |
|
"loss": 0.5039, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.15695290307605583, |
|
"grad_norm": 1.8898316621780396, |
|
"learning_rate": 4.762895192472235e-06, |
|
"loss": 0.4758, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.15805820521039426, |
|
"grad_norm": 1.6479010581970215, |
|
"learning_rate": 4.759115620334062e-06, |
|
"loss": 0.493, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.1591635073447327, |
|
"grad_norm": 2.28085994720459, |
|
"learning_rate": 4.755307685787312e-06, |
|
"loss": 0.5221, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.1602688094790711, |
|
"grad_norm": 2.697305202484131, |
|
"learning_rate": 4.751471436639271e-06, |
|
"loss": 0.5172, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.16137411161340953, |
|
"grad_norm": 1.897016167640686, |
|
"learning_rate": 4.7476069210527135e-06, |
|
"loss": 0.5284, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.16247941374774794, |
|
"grad_norm": 2.659196376800537, |
|
"learning_rate": 4.743714187545282e-06, |
|
"loss": 0.4776, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.16358471588208637, |
|
"grad_norm": 1.7990115880966187, |
|
"learning_rate": 4.739793284988889e-06, |
|
"loss": 0.4506, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.1646900180164248, |
|
"grad_norm": 2.136432409286499, |
|
"learning_rate": 4.735844262609096e-06, |
|
"loss": 0.4775, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.1657953201507632, |
|
"grad_norm": 1.8059773445129395, |
|
"learning_rate": 4.731867169984506e-06, |
|
"loss": 0.4847, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.16690062228510164, |
|
"grad_norm": 1.7475543022155762, |
|
"learning_rate": 4.727862057046125e-06, |
|
"loss": 0.5092, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.16800592441944007, |
|
"grad_norm": 1.7633237838745117, |
|
"learning_rate": 4.723828974076752e-06, |
|
"loss": 0.4776, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.16911122655377847, |
|
"grad_norm": 1.973683476448059, |
|
"learning_rate": 4.719767971710335e-06, |
|
"loss": 0.4866, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.1702165286881169, |
|
"grad_norm": 2.3195412158966064, |
|
"learning_rate": 4.715679100931343e-06, |
|
"loss": 0.4784, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.1713218308224553, |
|
"grad_norm": 2.262366533279419, |
|
"learning_rate": 4.711562413074122e-06, |
|
"loss": 0.4494, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.17242713295679374, |
|
"grad_norm": 2.2675039768218994, |
|
"learning_rate": 4.707417959822252e-06, |
|
"loss": 0.5182, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.17353243509113217, |
|
"grad_norm": 2.6644225120544434, |
|
"learning_rate": 4.703245793207898e-06, |
|
"loss": 0.4819, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.17463773722547057, |
|
"grad_norm": 1.4928964376449585, |
|
"learning_rate": 4.699045965611157e-06, |
|
"loss": 0.4542, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.175743039359809, |
|
"grad_norm": 1.7893882989883423, |
|
"learning_rate": 4.694818529759399e-06, |
|
"loss": 0.4836, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.17684834149414744, |
|
"grad_norm": 1.5968459844589233, |
|
"learning_rate": 4.690563538726606e-06, |
|
"loss": 0.4702, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.17795364362848584, |
|
"grad_norm": 2.2333779335021973, |
|
"learning_rate": 4.686281045932707e-06, |
|
"loss": 0.4912, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.17905894576282427, |
|
"grad_norm": 1.1746132373809814, |
|
"learning_rate": 4.681971105142905e-06, |
|
"loss": 0.4935, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.18016424789716268, |
|
"grad_norm": 1.5028539896011353, |
|
"learning_rate": 4.677633770467003e-06, |
|
"loss": 0.4908, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.1812695500315011, |
|
"grad_norm": 1.9890942573547363, |
|
"learning_rate": 4.6732690963587256e-06, |
|
"loss": 0.4651, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.18237485216583954, |
|
"grad_norm": 2.262347459793091, |
|
"learning_rate": 4.668877137615032e-06, |
|
"loss": 0.496, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.18348015430017794, |
|
"grad_norm": 2.2725613117218018, |
|
"learning_rate": 4.664457949375434e-06, |
|
"loss": 0.4707, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.18458545643451638, |
|
"grad_norm": 2.965789794921875, |
|
"learning_rate": 4.660011587121297e-06, |
|
"loss": 0.4969, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 0.1856907585688548, |
|
"grad_norm": 1.5919311046600342, |
|
"learning_rate": 4.655538106675149e-06, |
|
"loss": 0.4985, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.1867960607031932, |
|
"grad_norm": 2.4821956157684326, |
|
"learning_rate": 4.651037564199977e-06, |
|
"loss": 0.4878, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 0.18790136283753164, |
|
"grad_norm": 1.9851549863815308, |
|
"learning_rate": 4.646510016198521e-06, |
|
"loss": 0.4778, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.18900666497187005, |
|
"grad_norm": 1.9277724027633667, |
|
"learning_rate": 4.641955519512567e-06, |
|
"loss": 0.5302, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.19011196710620848, |
|
"grad_norm": 2.289950132369995, |
|
"learning_rate": 4.637374131322232e-06, |
|
"loss": 0.4646, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.1912172692405469, |
|
"grad_norm": 2.9119439125061035, |
|
"learning_rate": 4.632765909145247e-06, |
|
"loss": 0.5033, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 0.19232257137488531, |
|
"grad_norm": 1.9241691827774048, |
|
"learning_rate": 4.628130910836234e-06, |
|
"loss": 0.4879, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.19342787350922375, |
|
"grad_norm": 1.1978574991226196, |
|
"learning_rate": 4.623469194585979e-06, |
|
"loss": 0.4675, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.19453317564356218, |
|
"grad_norm": 1.6705842018127441, |
|
"learning_rate": 4.618780818920705e-06, |
|
"loss": 0.4605, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.19563847777790058, |
|
"grad_norm": 2.020331859588623, |
|
"learning_rate": 4.614065842701332e-06, |
|
"loss": 0.4974, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.19674377991223901, |
|
"grad_norm": 2.0887222290039062, |
|
"learning_rate": 4.609324325122743e-06, |
|
"loss": 0.4736, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.19784908204657745, |
|
"grad_norm": 2.283088445663452, |
|
"learning_rate": 4.604556325713035e-06, |
|
"loss": 0.4985, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 0.19895438418091585, |
|
"grad_norm": 2.186509132385254, |
|
"learning_rate": 4.599761904332778e-06, |
|
"loss": 0.4767, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.20005968631525428, |
|
"grad_norm": 2.262012243270874, |
|
"learning_rate": 4.594941121174262e-06, |
|
"loss": 0.4697, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 0.20116498844959269, |
|
"grad_norm": 1.634402871131897, |
|
"learning_rate": 4.590094036760736e-06, |
|
"loss": 0.4939, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.20227029058393112, |
|
"grad_norm": 1.883914589881897, |
|
"learning_rate": 4.5852207119456555e-06, |
|
"loss": 0.47, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 0.20337559271826955, |
|
"grad_norm": 2.231407880783081, |
|
"learning_rate": 4.580321207911912e-06, |
|
"loss": 0.4815, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.20448089485260795, |
|
"grad_norm": 2.605910539627075, |
|
"learning_rate": 4.57539558617107e-06, |
|
"loss": 0.5328, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.20558619698694638, |
|
"grad_norm": 1.1122691631317139, |
|
"learning_rate": 4.570443908562593e-06, |
|
"loss": 0.4606, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.20669149912128482, |
|
"grad_norm": 1.9738783836364746, |
|
"learning_rate": 4.565466237253066e-06, |
|
"loss": 0.4612, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 0.20779680125562322, |
|
"grad_norm": 3.1255314350128174, |
|
"learning_rate": 4.560462634735416e-06, |
|
"loss": 0.469, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.20890210338996165, |
|
"grad_norm": 2.3683340549468994, |
|
"learning_rate": 4.555433163828126e-06, |
|
"loss": 0.4997, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 0.21000740552430006, |
|
"grad_norm": 2.482985496520996, |
|
"learning_rate": 4.55037788767445e-06, |
|
"loss": 0.5105, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.2111127076586385, |
|
"grad_norm": 1.7868962287902832, |
|
"learning_rate": 4.545296869741616e-06, |
|
"loss": 0.4899, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 0.21221800979297692, |
|
"grad_norm": 1.6937700510025024, |
|
"learning_rate": 4.540190173820033e-06, |
|
"loss": 0.5029, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.21332331192731532, |
|
"grad_norm": 1.6983795166015625, |
|
"learning_rate": 4.535057864022486e-06, |
|
"loss": 0.5273, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 0.21442861406165376, |
|
"grad_norm": 1.446453332901001, |
|
"learning_rate": 4.529900004783334e-06, |
|
"loss": 0.4864, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.2155339161959922, |
|
"grad_norm": 2.247065305709839, |
|
"learning_rate": 4.524716660857701e-06, |
|
"loss": 0.4805, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.2166392183303306, |
|
"grad_norm": 1.6583445072174072, |
|
"learning_rate": 4.519507897320662e-06, |
|
"loss": 0.4631, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.21774452046466902, |
|
"grad_norm": 1.718631625175476, |
|
"learning_rate": 4.514273779566426e-06, |
|
"loss": 0.4893, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 0.21884982259900743, |
|
"grad_norm": 1.6608977317810059, |
|
"learning_rate": 4.509014373307515e-06, |
|
"loss": 0.483, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.21995512473334586, |
|
"grad_norm": 2.0695135593414307, |
|
"learning_rate": 4.503729744573943e-06, |
|
"loss": 0.5042, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 0.2210604268676843, |
|
"grad_norm": 1.75504469871521, |
|
"learning_rate": 4.498419959712376e-06, |
|
"loss": 0.4844, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.2221657290020227, |
|
"grad_norm": 3.0820794105529785, |
|
"learning_rate": 4.493085085385314e-06, |
|
"loss": 0.4775, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 0.22327103113636113, |
|
"grad_norm": 2.3822927474975586, |
|
"learning_rate": 4.487725188570241e-06, |
|
"loss": 0.4563, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 0.22437633327069956, |
|
"grad_norm": 2.8337135314941406, |
|
"learning_rate": 4.482340336558793e-06, |
|
"loss": 0.4712, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 0.22548163540503796, |
|
"grad_norm": 2.8210105895996094, |
|
"learning_rate": 4.476930596955909e-06, |
|
"loss": 0.5026, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.2265869375393764, |
|
"grad_norm": 2.012446165084839, |
|
"learning_rate": 4.471496037678982e-06, |
|
"loss": 0.4728, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.2276922396737148, |
|
"grad_norm": 2.477320432662964, |
|
"learning_rate": 4.466036726957008e-06, |
|
"loss": 0.5243, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 0.22879754180805323, |
|
"grad_norm": 2.1189372539520264, |
|
"learning_rate": 4.460552733329729e-06, |
|
"loss": 0.4414, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 0.22990284394239166, |
|
"grad_norm": 1.6811827421188354, |
|
"learning_rate": 4.455044125646773e-06, |
|
"loss": 0.4606, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.23100814607673006, |
|
"grad_norm": 1.8918300867080688, |
|
"learning_rate": 4.449510973066785e-06, |
|
"loss": 0.4587, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 0.2321134482110685, |
|
"grad_norm": 1.6469461917877197, |
|
"learning_rate": 4.44395334505657e-06, |
|
"loss": 0.4811, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.23321875034540693, |
|
"grad_norm": 1.0091384649276733, |
|
"learning_rate": 4.438371311390205e-06, |
|
"loss": 0.4469, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 0.23432405247974533, |
|
"grad_norm": 1.67509126663208, |
|
"learning_rate": 4.432764942148177e-06, |
|
"loss": 0.4812, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 0.23542935461408376, |
|
"grad_norm": 2.054719924926758, |
|
"learning_rate": 4.427134307716496e-06, |
|
"loss": 0.4343, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 0.23653465674842217, |
|
"grad_norm": 2.0753352642059326, |
|
"learning_rate": 4.421479478785814e-06, |
|
"loss": 0.4677, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 0.2376399588827606, |
|
"grad_norm": 1.5594350099563599, |
|
"learning_rate": 4.415800526350535e-06, |
|
"loss": 0.475, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.23874526101709903, |
|
"grad_norm": 2.458397626876831, |
|
"learning_rate": 4.410097521707926e-06, |
|
"loss": 0.4943, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 0.23985056315143743, |
|
"grad_norm": 2.180816888809204, |
|
"learning_rate": 4.404370536457221e-06, |
|
"loss": 0.4361, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 0.24095586528577587, |
|
"grad_norm": 2.4106123447418213, |
|
"learning_rate": 4.3986196424987216e-06, |
|
"loss": 0.5065, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 0.2420611674201143, |
|
"grad_norm": 2.228212833404541, |
|
"learning_rate": 4.392844912032896e-06, |
|
"loss": 0.4892, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 0.2431664695544527, |
|
"grad_norm": 2.2582526206970215, |
|
"learning_rate": 4.387046417559471e-06, |
|
"loss": 0.443, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.24427177168879113, |
|
"grad_norm": 3.1825761795043945, |
|
"learning_rate": 4.381224231876521e-06, |
|
"loss": 0.4607, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 0.24537707382312957, |
|
"grad_norm": 1.9606397151947021, |
|
"learning_rate": 4.375378428079557e-06, |
|
"loss": 0.4431, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 0.24648237595746797, |
|
"grad_norm": 1.9158498048782349, |
|
"learning_rate": 4.369509079560608e-06, |
|
"loss": 0.4923, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 0.2475876780918064, |
|
"grad_norm": 2.624380111694336, |
|
"learning_rate": 4.363616260007294e-06, |
|
"loss": 0.4632, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 0.2486929802261448, |
|
"grad_norm": 1.440521001815796, |
|
"learning_rate": 4.357700043401912e-06, |
|
"loss": 0.4798, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.24979828236048324, |
|
"grad_norm": 2.1393532752990723, |
|
"learning_rate": 4.351760504020496e-06, |
|
"loss": 0.459, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 0.25090358449482164, |
|
"grad_norm": 1.950707197189331, |
|
"learning_rate": 4.345797716431891e-06, |
|
"loss": 0.5176, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 0.2520088866291601, |
|
"grad_norm": 2.3011667728424072, |
|
"learning_rate": 4.339811755496817e-06, |
|
"loss": 0.4838, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 0.2531141887634985, |
|
"grad_norm": 1.6088446378707886, |
|
"learning_rate": 4.333802696366923e-06, |
|
"loss": 0.4588, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 0.2542194908978369, |
|
"grad_norm": 1.790541410446167, |
|
"learning_rate": 4.327770614483853e-06, |
|
"loss": 0.4824, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.25532479303217537, |
|
"grad_norm": 2.6423535346984863, |
|
"learning_rate": 4.321715585578289e-06, |
|
"loss": 0.4589, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 0.25643009516651377, |
|
"grad_norm": 1.4211223125457764, |
|
"learning_rate": 4.315637685669006e-06, |
|
"loss": 0.4483, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 0.2575353973008522, |
|
"grad_norm": 1.9869434833526611, |
|
"learning_rate": 4.30953699106192e-06, |
|
"loss": 0.4658, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 0.25864069943519064, |
|
"grad_norm": 1.8357223272323608, |
|
"learning_rate": 4.303413578349122e-06, |
|
"loss": 0.4697, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 0.25974600156952904, |
|
"grad_norm": 1.6129013299942017, |
|
"learning_rate": 4.2972675244079224e-06, |
|
"loss": 0.4612, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.26085130370386744, |
|
"grad_norm": 1.8021016120910645, |
|
"learning_rate": 4.291098906399885e-06, |
|
"loss": 0.4536, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 0.26195660583820585, |
|
"grad_norm": 1.4587496519088745, |
|
"learning_rate": 4.2849078017698565e-06, |
|
"loss": 0.4347, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 0.2630619079725443, |
|
"grad_norm": 2.1143853664398193, |
|
"learning_rate": 4.2786942882449965e-06, |
|
"loss": 0.4478, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 0.2641672101068827, |
|
"grad_norm": 1.9837020635604858, |
|
"learning_rate": 4.272458443833801e-06, |
|
"loss": 0.4586, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 0.2652725122412211, |
|
"grad_norm": 1.6629817485809326, |
|
"learning_rate": 4.266200346825119e-06, |
|
"loss": 0.4609, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.2663778143755596, |
|
"grad_norm": 2.2694997787475586, |
|
"learning_rate": 4.259920075787177e-06, |
|
"loss": 0.4506, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 0.267483116509898, |
|
"grad_norm": 2.3292577266693115, |
|
"learning_rate": 4.253617709566588e-06, |
|
"loss": 0.4517, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 0.2685884186442364, |
|
"grad_norm": 2.215757369995117, |
|
"learning_rate": 4.247293327287359e-06, |
|
"loss": 0.4598, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 0.26969372077857484, |
|
"grad_norm": 2.3665645122528076, |
|
"learning_rate": 4.240947008349905e-06, |
|
"loss": 0.4926, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 0.27079902291291325, |
|
"grad_norm": 2.2286605834960938, |
|
"learning_rate": 4.234578832430047e-06, |
|
"loss": 0.4665, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.27190432504725165, |
|
"grad_norm": 2.3083527088165283, |
|
"learning_rate": 4.228188879478011e-06, |
|
"loss": 0.4841, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 0.2730096271815901, |
|
"grad_norm": 1.8674919605255127, |
|
"learning_rate": 4.221777229717428e-06, |
|
"loss": 0.464, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 0.2741149293159285, |
|
"grad_norm": 2.442124605178833, |
|
"learning_rate": 4.215343963644324e-06, |
|
"loss": 0.4462, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 0.2752202314502669, |
|
"grad_norm": 1.761814832687378, |
|
"learning_rate": 4.2088891620261106e-06, |
|
"loss": 0.4811, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 0.2763255335846054, |
|
"grad_norm": 1.81318998336792, |
|
"learning_rate": 4.20241290590057e-06, |
|
"loss": 0.4819, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.2774308357189438, |
|
"grad_norm": 2.6324472427368164, |
|
"learning_rate": 4.1959152765748405e-06, |
|
"loss": 0.4942, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 0.2785361378532822, |
|
"grad_norm": 1.9197957515716553, |
|
"learning_rate": 4.189396355624389e-06, |
|
"loss": 0.4411, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 0.27964143998762064, |
|
"grad_norm": 2.736686944961548, |
|
"learning_rate": 4.182856224891997e-06, |
|
"loss": 0.4679, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 0.28074674212195905, |
|
"grad_norm": 1.2711482048034668, |
|
"learning_rate": 4.176294966486722e-06, |
|
"loss": 0.4621, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 0.28185204425629745, |
|
"grad_norm": 2.046609401702881, |
|
"learning_rate": 4.169712662782876e-06, |
|
"loss": 0.4733, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.28295734639063586, |
|
"grad_norm": 1.6701066493988037, |
|
"learning_rate": 4.163109396418986e-06, |
|
"loss": 0.4771, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 0.2840626485249743, |
|
"grad_norm": 1.8547199964523315, |
|
"learning_rate": 4.156485250296757e-06, |
|
"loss": 0.4596, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 0.2851679506593127, |
|
"grad_norm": 2.2946977615356445, |
|
"learning_rate": 4.149840307580033e-06, |
|
"loss": 0.4497, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 0.2862732527936511, |
|
"grad_norm": 2.6851511001586914, |
|
"learning_rate": 4.143174651693753e-06, |
|
"loss": 0.4497, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 0.2873785549279896, |
|
"grad_norm": 2.5896623134613037, |
|
"learning_rate": 4.1364883663229e-06, |
|
"loss": 0.4664, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.288483857062328, |
|
"grad_norm": 2.0162718296051025, |
|
"learning_rate": 4.129781535411456e-06, |
|
"loss": 0.4614, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 0.2895891591966664, |
|
"grad_norm": 2.3387439250946045, |
|
"learning_rate": 4.123054243161342e-06, |
|
"loss": 0.4867, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 0.29069446133100485, |
|
"grad_norm": 2.132131338119507, |
|
"learning_rate": 4.116306574031366e-06, |
|
"loss": 0.4741, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 0.29179976346534325, |
|
"grad_norm": 1.7863556146621704, |
|
"learning_rate": 4.109538612736161e-06, |
|
"loss": 0.4492, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 0.29290506559968166, |
|
"grad_norm": 2.3342113494873047, |
|
"learning_rate": 4.10275044424512e-06, |
|
"loss": 0.47, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.2940103677340201, |
|
"grad_norm": 2.0262320041656494, |
|
"learning_rate": 4.095942153781329e-06, |
|
"loss": 0.4635, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 0.2951156698683585, |
|
"grad_norm": 2.9538447856903076, |
|
"learning_rate": 4.0891138268205025e-06, |
|
"loss": 0.4477, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 0.2962209720026969, |
|
"grad_norm": 2.5609724521636963, |
|
"learning_rate": 4.082265549089902e-06, |
|
"loss": 0.4546, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 0.2973262741370354, |
|
"grad_norm": 2.4035484790802, |
|
"learning_rate": 4.075397406567265e-06, |
|
"loss": 0.494, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 0.2984315762713738, |
|
"grad_norm": 1.2948765754699707, |
|
"learning_rate": 4.068509485479726e-06, |
|
"loss": 0.485, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.2995368784057122, |
|
"grad_norm": 1.7401434183120728, |
|
"learning_rate": 4.061601872302732e-06, |
|
"loss": 0.4451, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 0.3006421805400506, |
|
"grad_norm": 1.718982219696045, |
|
"learning_rate": 4.054674653758956e-06, |
|
"loss": 0.4837, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 0.30174748267438906, |
|
"grad_norm": 2.159252166748047, |
|
"learning_rate": 4.047727916817211e-06, |
|
"loss": 0.4709, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 0.30285278480872746, |
|
"grad_norm": 1.9981988668441772, |
|
"learning_rate": 4.040761748691356e-06, |
|
"loss": 0.468, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 0.30395808694306586, |
|
"grad_norm": 2.0982799530029297, |
|
"learning_rate": 4.033776236839202e-06, |
|
"loss": 0.4637, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.3050633890774043, |
|
"grad_norm": 2.9962141513824463, |
|
"learning_rate": 4.0267714689614124e-06, |
|
"loss": 0.4695, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 0.3061686912117427, |
|
"grad_norm": 2.803635597229004, |
|
"learning_rate": 4.019747533000405e-06, |
|
"loss": 0.4771, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 0.30727399334608113, |
|
"grad_norm": 1.8022634983062744, |
|
"learning_rate": 4.012704517139248e-06, |
|
"loss": 0.4672, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 0.3083792954804196, |
|
"grad_norm": 1.9764262437820435, |
|
"learning_rate": 4.005642509800545e-06, |
|
"loss": 0.4842, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 0.309484597614758, |
|
"grad_norm": 2.3172965049743652, |
|
"learning_rate": 3.998561599645338e-06, |
|
"loss": 0.4747, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.3105898997490964, |
|
"grad_norm": 3.117851972579956, |
|
"learning_rate": 3.9914618755719816e-06, |
|
"loss": 0.4857, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 0.31169520188343486, |
|
"grad_norm": 2.1363372802734375, |
|
"learning_rate": 3.984343426715036e-06, |
|
"loss": 0.4405, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 0.31280050401777326, |
|
"grad_norm": 2.1967580318450928, |
|
"learning_rate": 3.977206342444144e-06, |
|
"loss": 0.4626, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 0.31390580615211167, |
|
"grad_norm": 1.6863844394683838, |
|
"learning_rate": 3.970050712362908e-06, |
|
"loss": 0.4505, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 0.3150111082864501, |
|
"grad_norm": 2.1374428272247314, |
|
"learning_rate": 3.962876626307769e-06, |
|
"loss": 0.4522, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.31611641042078853, |
|
"grad_norm": 2.230015754699707, |
|
"learning_rate": 3.955684174346872e-06, |
|
"loss": 0.4331, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 0.31722171255512693, |
|
"grad_norm": 2.7188756465911865, |
|
"learning_rate": 3.948473446778947e-06, |
|
"loss": 0.4788, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 0.3183270146894654, |
|
"grad_norm": 1.7964341640472412, |
|
"learning_rate": 3.94124453413216e-06, |
|
"loss": 0.4442, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 0.3194323168238038, |
|
"grad_norm": 1.4361404180526733, |
|
"learning_rate": 3.933997527162987e-06, |
|
"loss": 0.4868, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 0.3205376189581422, |
|
"grad_norm": 2.0563929080963135, |
|
"learning_rate": 3.926732516855075e-06, |
|
"loss": 0.4921, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.3216429210924806, |
|
"grad_norm": 1.55277419090271, |
|
"learning_rate": 3.919449594418094e-06, |
|
"loss": 0.4877, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 0.32274822322681906, |
|
"grad_norm": 2.299819231033325, |
|
"learning_rate": 3.912148851286593e-06, |
|
"loss": 0.468, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 0.32385352536115747, |
|
"grad_norm": 1.409555435180664, |
|
"learning_rate": 3.904830379118857e-06, |
|
"loss": 0.4279, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 0.32495882749549587, |
|
"grad_norm": 1.9166666269302368, |
|
"learning_rate": 3.89749426979575e-06, |
|
"loss": 0.4732, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 0.32606412962983433, |
|
"grad_norm": 2.2752537727355957, |
|
"learning_rate": 3.890140615419566e-06, |
|
"loss": 0.4605, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.32716943176417274, |
|
"grad_norm": 1.6896592378616333, |
|
"learning_rate": 3.882769508312871e-06, |
|
"loss": 0.4513, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 0.32827473389851114, |
|
"grad_norm": 1.8940850496292114, |
|
"learning_rate": 3.875381041017343e-06, |
|
"loss": 0.4665, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 0.3293800360328496, |
|
"grad_norm": 2.7840423583984375, |
|
"learning_rate": 3.867975306292612e-06, |
|
"loss": 0.472, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 0.330485338167188, |
|
"grad_norm": 1.7090684175491333, |
|
"learning_rate": 3.860552397115093e-06, |
|
"loss": 0.4239, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 0.3315906403015264, |
|
"grad_norm": 1.5519531965255737, |
|
"learning_rate": 3.853112406676823e-06, |
|
"loss": 0.4537, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.33269594243586487, |
|
"grad_norm": 2.7194883823394775, |
|
"learning_rate": 3.845655428384286e-06, |
|
"loss": 0.5102, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 0.33380124457020327, |
|
"grad_norm": 2.118680000305176, |
|
"learning_rate": 3.838181555857243e-06, |
|
"loss": 0.4915, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 0.3349065467045417, |
|
"grad_norm": 2.484039545059204, |
|
"learning_rate": 3.830690882927558e-06, |
|
"loss": 0.4603, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 0.33601184883888013, |
|
"grad_norm": 2.0341908931732178, |
|
"learning_rate": 3.823183503638014e-06, |
|
"loss": 0.4684, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 0.33711715097321854, |
|
"grad_norm": 0.9588632583618164, |
|
"learning_rate": 3.815659512241141e-06, |
|
"loss": 0.4963, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.33822245310755694, |
|
"grad_norm": 2.8853650093078613, |
|
"learning_rate": 3.8081190031980266e-06, |
|
"loss": 0.4801, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 0.33932775524189535, |
|
"grad_norm": 1.7053953409194946, |
|
"learning_rate": 3.8005620711771318e-06, |
|
"loss": 0.4591, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 0.3404330573762338, |
|
"grad_norm": 2.16013765335083, |
|
"learning_rate": 3.7929888110530998e-06, |
|
"loss": 0.4598, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 0.3415383595105722, |
|
"grad_norm": 2.3963918685913086, |
|
"learning_rate": 3.7853993179055724e-06, |
|
"loss": 0.4681, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 0.3426436616449106, |
|
"grad_norm": 3.2389566898345947, |
|
"learning_rate": 3.7777936870179873e-06, |
|
"loss": 0.4717, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.3437489637792491, |
|
"grad_norm": 2.17598032951355, |
|
"learning_rate": 3.7701720138763877e-06, |
|
"loss": 0.4573, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 0.3448542659135875, |
|
"grad_norm": 2.4974260330200195, |
|
"learning_rate": 3.7625343941682203e-06, |
|
"loss": 0.4681, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 0.3459595680479259, |
|
"grad_norm": 2.331465721130371, |
|
"learning_rate": 3.7548809237811378e-06, |
|
"loss": 0.4953, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 0.34706487018226434, |
|
"grad_norm": 1.782915711402893, |
|
"learning_rate": 3.7472116988017906e-06, |
|
"loss": 0.4257, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 0.34817017231660274, |
|
"grad_norm": 1.96134352684021, |
|
"learning_rate": 3.7395268155146232e-06, |
|
"loss": 0.4489, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.34927547445094115, |
|
"grad_norm": 1.6746424436569214, |
|
"learning_rate": 3.731826370400663e-06, |
|
"loss": 0.4748, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 0.3503807765852796, |
|
"grad_norm": 1.7693666219711304, |
|
"learning_rate": 3.7241104601363154e-06, |
|
"loss": 0.4783, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 0.351486078719618, |
|
"grad_norm": 1.4009222984313965, |
|
"learning_rate": 3.7163791815921394e-06, |
|
"loss": 0.4648, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 0.3525913808539564, |
|
"grad_norm": 2.408993721008301, |
|
"learning_rate": 3.708632631831643e-06, |
|
"loss": 0.4382, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 0.3536966829882949, |
|
"grad_norm": 1.713916540145874, |
|
"learning_rate": 3.7008709081100537e-06, |
|
"loss": 0.4258, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.3548019851226333, |
|
"grad_norm": 2.0615127086639404, |
|
"learning_rate": 3.6930941078731065e-06, |
|
"loss": 0.4874, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 0.3559072872569717, |
|
"grad_norm": 2.3877241611480713, |
|
"learning_rate": 3.685302328755815e-06, |
|
"loss": 0.507, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 0.35701258939131014, |
|
"grad_norm": 2.4597456455230713, |
|
"learning_rate": 3.6774956685812496e-06, |
|
"loss": 0.4513, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 0.35811789152564855, |
|
"grad_norm": 2.5451297760009766, |
|
"learning_rate": 3.6696742253593035e-06, |
|
"loss": 0.4419, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 0.35922319365998695, |
|
"grad_norm": 2.2447433471679688, |
|
"learning_rate": 3.6618380972854694e-06, |
|
"loss": 0.4669, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.36032849579432535, |
|
"grad_norm": 1.7082650661468506, |
|
"learning_rate": 3.6539873827396023e-06, |
|
"loss": 0.4352, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 0.3614337979286638, |
|
"grad_norm": 1.607082486152649, |
|
"learning_rate": 3.646122180284683e-06, |
|
"loss": 0.4595, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 0.3625391000630022, |
|
"grad_norm": 1.835105299949646, |
|
"learning_rate": 3.638242588665587e-06, |
|
"loss": 0.4674, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 0.3636444021973406, |
|
"grad_norm": 1.7002040147781372, |
|
"learning_rate": 3.630348706807836e-06, |
|
"loss": 0.4746, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 0.3647497043316791, |
|
"grad_norm": 2.184178590774536, |
|
"learning_rate": 3.622440633816366e-06, |
|
"loss": 0.4388, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.3658550064660175, |
|
"grad_norm": 2.1649866104125977, |
|
"learning_rate": 3.6145184689742716e-06, |
|
"loss": 0.4499, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 0.3669603086003559, |
|
"grad_norm": 1.3153752088546753, |
|
"learning_rate": 3.6065823117415716e-06, |
|
"loss": 0.4391, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 0.36806561073469435, |
|
"grad_norm": 1.944061279296875, |
|
"learning_rate": 3.5986322617539506e-06, |
|
"loss": 0.4833, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 0.36917091286903275, |
|
"grad_norm": 1.6162335872650146, |
|
"learning_rate": 3.590668418821513e-06, |
|
"loss": 0.4889, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 0.37027621500337116, |
|
"grad_norm": 1.623404622077942, |
|
"learning_rate": 3.5826908829275296e-06, |
|
"loss": 0.4698, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.3713815171377096, |
|
"grad_norm": 1.830082654953003, |
|
"learning_rate": 3.57469975422718e-06, |
|
"loss": 0.507, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 0.372486819272048, |
|
"grad_norm": 2.138823986053467, |
|
"learning_rate": 3.5666951330462972e-06, |
|
"loss": 0.4419, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 0.3735921214063864, |
|
"grad_norm": 2.455385208129883, |
|
"learning_rate": 3.558677119880109e-06, |
|
"loss": 0.4729, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 0.3746974235407249, |
|
"grad_norm": 3.052379846572876, |
|
"learning_rate": 3.550645815391973e-06, |
|
"loss": 0.447, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 0.3758027256750633, |
|
"grad_norm": 1.8502277135849, |
|
"learning_rate": 3.542601320412116e-06, |
|
"loss": 0.4545, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.3769080278094017, |
|
"grad_norm": 2.621030569076538, |
|
"learning_rate": 3.534543735936366e-06, |
|
"loss": 0.4832, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 0.3780133299437401, |
|
"grad_norm": 1.681999683380127, |
|
"learning_rate": 3.5264731631248867e-06, |
|
"loss": 0.4813, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 0.37911863207807855, |
|
"grad_norm": 1.8637994527816772, |
|
"learning_rate": 3.5183897033009018e-06, |
|
"loss": 0.5013, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 0.38022393421241696, |
|
"grad_norm": 1.9797747135162354, |
|
"learning_rate": 3.510293457949433e-06, |
|
"loss": 0.4473, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 0.38132923634675536, |
|
"grad_norm": 2.2267913818359375, |
|
"learning_rate": 3.502184528716013e-06, |
|
"loss": 0.455, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.3824345384810938, |
|
"grad_norm": 1.919852375984192, |
|
"learning_rate": 3.494063017405423e-06, |
|
"loss": 0.447, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 0.3835398406154322, |
|
"grad_norm": 2.838737964630127, |
|
"learning_rate": 3.485929025980402e-06, |
|
"loss": 0.4447, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 0.38464514274977063, |
|
"grad_norm": 1.7883715629577637, |
|
"learning_rate": 3.477782656560377e-06, |
|
"loss": 0.4897, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 0.3857504448841091, |
|
"grad_norm": 1.9990206956863403, |
|
"learning_rate": 3.469624011420173e-06, |
|
"loss": 0.4533, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 0.3868557470184475, |
|
"grad_norm": 3.673203706741333, |
|
"learning_rate": 3.461453192988734e-06, |
|
"loss": 0.4813, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.3879610491527859, |
|
"grad_norm": 1.820590853691101, |
|
"learning_rate": 3.4532703038478368e-06, |
|
"loss": 0.4582, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 0.38906635128712436, |
|
"grad_norm": 1.6964892148971558, |
|
"learning_rate": 3.445075446730798e-06, |
|
"loss": 0.4355, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 0.39017165342146276, |
|
"grad_norm": 2.7785258293151855, |
|
"learning_rate": 3.4368687245211914e-06, |
|
"loss": 0.4744, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 0.39127695555580116, |
|
"grad_norm": 2.661006212234497, |
|
"learning_rate": 3.4286502402515504e-06, |
|
"loss": 0.4512, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 0.3923822576901396, |
|
"grad_norm": 1.379711389541626, |
|
"learning_rate": 3.4204200971020796e-06, |
|
"loss": 0.4727, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.39348755982447803, |
|
"grad_norm": 2.01283860206604, |
|
"learning_rate": 3.412178398399355e-06, |
|
"loss": 0.4774, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 0.39459286195881643, |
|
"grad_norm": 1.920944094657898, |
|
"learning_rate": 3.4039252476150284e-06, |
|
"loss": 0.4775, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 0.3956981640931549, |
|
"grad_norm": 1.920350193977356, |
|
"learning_rate": 3.39566074836453e-06, |
|
"loss": 0.4526, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 0.3968034662274933, |
|
"grad_norm": 2.782977819442749, |
|
"learning_rate": 3.3873850044057633e-06, |
|
"loss": 0.4541, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 0.3979087683618317, |
|
"grad_norm": 2.4611635208129883, |
|
"learning_rate": 3.3790981196378086e-06, |
|
"loss": 0.4964, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.3990140704961701, |
|
"grad_norm": 1.8741673231124878, |
|
"learning_rate": 3.370800198099613e-06, |
|
"loss": 0.435, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 0.40011937263050856, |
|
"grad_norm": 1.919241189956665, |
|
"learning_rate": 3.362491343968687e-06, |
|
"loss": 0.4386, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 0.40122467476484697, |
|
"grad_norm": 2.52968168258667, |
|
"learning_rate": 3.3541716615597948e-06, |
|
"loss": 0.4545, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 0.40232997689918537, |
|
"grad_norm": 2.964994430541992, |
|
"learning_rate": 3.3458412553236475e-06, |
|
"loss": 0.4551, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 0.40343527903352383, |
|
"grad_norm": 2.7886335849761963, |
|
"learning_rate": 3.337500229845592e-06, |
|
"loss": 0.477, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.40454058116786223, |
|
"grad_norm": 1.9467898607254028, |
|
"learning_rate": 3.329148689844289e-06, |
|
"loss": 0.4546, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 0.40564588330220064, |
|
"grad_norm": 1.1720269918441772, |
|
"learning_rate": 3.320786740170414e-06, |
|
"loss": 0.4759, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 0.4067511854365391, |
|
"grad_norm": 2.1939995288848877, |
|
"learning_rate": 3.3124144858053252e-06, |
|
"loss": 0.4456, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 0.4078564875708775, |
|
"grad_norm": 2.350830078125, |
|
"learning_rate": 3.304032031859759e-06, |
|
"loss": 0.4683, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 0.4089617897052159, |
|
"grad_norm": 2.4557292461395264, |
|
"learning_rate": 3.295639483572498e-06, |
|
"loss": 0.4415, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.41006709183955437, |
|
"grad_norm": 1.3871397972106934, |
|
"learning_rate": 3.287236946309059e-06, |
|
"loss": 0.4635, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 0.41117239397389277, |
|
"grad_norm": 2.129850387573242, |
|
"learning_rate": 3.2788245255603675e-06, |
|
"loss": 0.4888, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 0.4122776961082312, |
|
"grad_norm": 1.527912974357605, |
|
"learning_rate": 3.2704023269414304e-06, |
|
"loss": 0.4848, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 0.41338299824256963, |
|
"grad_norm": 1.9338812828063965, |
|
"learning_rate": 3.261970456190014e-06, |
|
"loss": 0.5031, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 0.41448830037690804, |
|
"grad_norm": 1.9333993196487427, |
|
"learning_rate": 3.253529019165314e-06, |
|
"loss": 0.4533, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.41559360251124644, |
|
"grad_norm": 2.1915063858032227, |
|
"learning_rate": 3.2450781218466274e-06, |
|
"loss": 0.4508, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 0.41669890464558484, |
|
"grad_norm": 2.150376319885254, |
|
"learning_rate": 3.2366178703320232e-06, |
|
"loss": 0.4359, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 0.4178042067799233, |
|
"grad_norm": 2.5346415042877197, |
|
"learning_rate": 3.2281483708370074e-06, |
|
"loss": 0.474, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 0.4189095089142617, |
|
"grad_norm": 2.2632484436035156, |
|
"learning_rate": 3.2196697296931915e-06, |
|
"loss": 0.4317, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 0.4200148110486001, |
|
"grad_norm": 2.7014644145965576, |
|
"learning_rate": 3.2111820533469577e-06, |
|
"loss": 0.4493, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.42112011318293857, |
|
"grad_norm": 1.923828363418579, |
|
"learning_rate": 3.202685448358122e-06, |
|
"loss": 0.4884, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 0.422225415317277, |
|
"grad_norm": 2.4021315574645996, |
|
"learning_rate": 3.1941800213985964e-06, |
|
"loss": 0.4457, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 0.4233307174516154, |
|
"grad_norm": 1.7797712087631226, |
|
"learning_rate": 3.1856658792510485e-06, |
|
"loss": 0.4786, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 0.42443601958595384, |
|
"grad_norm": 2.1778018474578857, |
|
"learning_rate": 3.177143128807565e-06, |
|
"loss": 0.4695, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 0.42554132172029224, |
|
"grad_norm": 2.2871477603912354, |
|
"learning_rate": 3.168611877068302e-06, |
|
"loss": 0.4766, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.42664662385463065, |
|
"grad_norm": 3.016216993331909, |
|
"learning_rate": 3.1600722311401515e-06, |
|
"loss": 0.4544, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 0.4277519259889691, |
|
"grad_norm": 1.759264349937439, |
|
"learning_rate": 3.1515242982353876e-06, |
|
"loss": 0.4414, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 0.4288572281233075, |
|
"grad_norm": 2.0453083515167236, |
|
"learning_rate": 3.1429681856703287e-06, |
|
"loss": 0.4471, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 0.4299625302576459, |
|
"grad_norm": 1.5130780935287476, |
|
"learning_rate": 3.1344040008639797e-06, |
|
"loss": 0.4469, |
|
"step": 38900 |
|
}, |
|
{ |
|
"epoch": 0.4310678323919844, |
|
"grad_norm": 1.812267541885376, |
|
"learning_rate": 3.1258318513366975e-06, |
|
"loss": 0.4754, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.4321731345263228, |
|
"grad_norm": 1.798132300376892, |
|
"learning_rate": 3.1172518447088264e-06, |
|
"loss": 0.4519, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 0.4332784366606612, |
|
"grad_norm": 2.252378463745117, |
|
"learning_rate": 3.108664088699358e-06, |
|
"loss": 0.4622, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 0.4343837387949996, |
|
"grad_norm": 1.2119619846343994, |
|
"learning_rate": 3.100068691124572e-06, |
|
"loss": 0.4541, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 0.43548904092933804, |
|
"grad_norm": 1.4428755044937134, |
|
"learning_rate": 3.091465759896688e-06, |
|
"loss": 0.4731, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 0.43659434306367645, |
|
"grad_norm": 1.7551451921463013, |
|
"learning_rate": 3.082855403022507e-06, |
|
"loss": 0.441, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.43769964519801485, |
|
"grad_norm": 1.55975341796875, |
|
"learning_rate": 3.0742377286020547e-06, |
|
"loss": 0.4249, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 0.4388049473323533, |
|
"grad_norm": 1.1946512460708618, |
|
"learning_rate": 3.0656128448272284e-06, |
|
"loss": 0.4709, |
|
"step": 39700 |
|
}, |
|
{ |
|
"epoch": 0.4399102494666917, |
|
"grad_norm": 1.1257880926132202, |
|
"learning_rate": 3.0569808599804345e-06, |
|
"loss": 0.4307, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 0.4410155516010301, |
|
"grad_norm": 1.8002004623413086, |
|
"learning_rate": 3.048341882433232e-06, |
|
"loss": 0.4612, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 0.4421208537353686, |
|
"grad_norm": 2.031006097793579, |
|
"learning_rate": 3.039696020644972e-06, |
|
"loss": 0.4554, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.443226155869707, |
|
"grad_norm": 2.301436185836792, |
|
"learning_rate": 3.0310433831614307e-06, |
|
"loss": 0.4387, |
|
"step": 40100 |
|
}, |
|
{ |
|
"epoch": 0.4443314580040454, |
|
"grad_norm": 1.4582908153533936, |
|
"learning_rate": 3.0223840786134553e-06, |
|
"loss": 0.455, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 0.44543676013838385, |
|
"grad_norm": 2.0824360847473145, |
|
"learning_rate": 3.013718215715593e-06, |
|
"loss": 0.4828, |
|
"step": 40300 |
|
}, |
|
{ |
|
"epoch": 0.44654206227272225, |
|
"grad_norm": 2.2939536571502686, |
|
"learning_rate": 3.0050459032647306e-06, |
|
"loss": 0.457, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 0.44764736440706066, |
|
"grad_norm": 2.297245979309082, |
|
"learning_rate": 2.9963672501387247e-06, |
|
"loss": 0.4778, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.4487526665413991, |
|
"grad_norm": 1.8728293180465698, |
|
"learning_rate": 2.987682365295038e-06, |
|
"loss": 0.4448, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 0.4498579686757375, |
|
"grad_norm": 1.5255945920944214, |
|
"learning_rate": 2.978991357769371e-06, |
|
"loss": 0.4472, |
|
"step": 40700 |
|
}, |
|
{ |
|
"epoch": 0.4509632708100759, |
|
"grad_norm": 2.7456576824188232, |
|
"learning_rate": 2.9702943366742915e-06, |
|
"loss": 0.4668, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 0.4520685729444144, |
|
"grad_norm": 2.2749907970428467, |
|
"learning_rate": 2.961591411197865e-06, |
|
"loss": 0.4483, |
|
"step": 40900 |
|
}, |
|
{ |
|
"epoch": 0.4531738750787528, |
|
"grad_norm": 2.1402695178985596, |
|
"learning_rate": 2.9528826906022843e-06, |
|
"loss": 0.4487, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.4542791772130912, |
|
"grad_norm": 2.3826072216033936, |
|
"learning_rate": 2.944168284222502e-06, |
|
"loss": 0.4953, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 0.4553844793474296, |
|
"grad_norm": 2.2698001861572266, |
|
"learning_rate": 2.9354483014648463e-06, |
|
"loss": 0.484, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 0.45648978148176805, |
|
"grad_norm": 1.9907783269882202, |
|
"learning_rate": 2.926722851805661e-06, |
|
"loss": 0.4398, |
|
"step": 41300 |
|
}, |
|
{ |
|
"epoch": 0.45759508361610646, |
|
"grad_norm": 1.5543720722198486, |
|
"learning_rate": 2.917992044789923e-06, |
|
"loss": 0.4363, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 0.45870038575044486, |
|
"grad_norm": 1.8793258666992188, |
|
"learning_rate": 2.909255990029869e-06, |
|
"loss": 0.4567, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.4598056878847833, |
|
"grad_norm": 2.4277260303497314, |
|
"learning_rate": 2.900514797203617e-06, |
|
"loss": 0.4491, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 0.4609109900191217, |
|
"grad_norm": 2.2503464221954346, |
|
"learning_rate": 2.891768576053797e-06, |
|
"loss": 0.4804, |
|
"step": 41700 |
|
}, |
|
{ |
|
"epoch": 0.46201629215346013, |
|
"grad_norm": 1.4896454811096191, |
|
"learning_rate": 2.8830174363861635e-06, |
|
"loss": 0.4403, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 0.4631215942877986, |
|
"grad_norm": 2.510836601257324, |
|
"learning_rate": 2.874261488068221e-06, |
|
"loss": 0.451, |
|
"step": 41900 |
|
}, |
|
{ |
|
"epoch": 0.464226896422137, |
|
"grad_norm": 1.5463513135910034, |
|
"learning_rate": 2.8655008410278482e-06, |
|
"loss": 0.4671, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.4653321985564754, |
|
"grad_norm": 2.300896167755127, |
|
"learning_rate": 2.856735605251912e-06, |
|
"loss": 0.4348, |
|
"step": 42100 |
|
}, |
|
{ |
|
"epoch": 0.46643750069081386, |
|
"grad_norm": 2.3069446086883545, |
|
"learning_rate": 2.8479658907848893e-06, |
|
"loss": 0.4478, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 0.46754280282515226, |
|
"grad_norm": 2.1205623149871826, |
|
"learning_rate": 2.8391918077274873e-06, |
|
"loss": 0.4346, |
|
"step": 42300 |
|
}, |
|
{ |
|
"epoch": 0.46864810495949066, |
|
"grad_norm": 1.638277292251587, |
|
"learning_rate": 2.830413466235258e-06, |
|
"loss": 0.4395, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 0.4697534070938291, |
|
"grad_norm": 2.0386252403259277, |
|
"learning_rate": 2.8216309765172156e-06, |
|
"loss": 0.4421, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.4708587092281675, |
|
"grad_norm": 2.241922378540039, |
|
"learning_rate": 2.8128444488344565e-06, |
|
"loss": 0.4518, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 0.47196401136250593, |
|
"grad_norm": 2.304940938949585, |
|
"learning_rate": 2.8040539934987697e-06, |
|
"loss": 0.4803, |
|
"step": 42700 |
|
}, |
|
{ |
|
"epoch": 0.47306931349684433, |
|
"grad_norm": 2.377882480621338, |
|
"learning_rate": 2.795259720871256e-06, |
|
"loss": 0.4637, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 0.4741746156311828, |
|
"grad_norm": 1.9520049095153809, |
|
"learning_rate": 2.7864617413609414e-06, |
|
"loss": 0.452, |
|
"step": 42900 |
|
}, |
|
{ |
|
"epoch": 0.4752799177655212, |
|
"grad_norm": 2.1737561225891113, |
|
"learning_rate": 2.777660165423388e-06, |
|
"loss": 0.4622, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.4763852198998596, |
|
"grad_norm": 1.6113853454589844, |
|
"learning_rate": 2.7688551035593125e-06, |
|
"loss": 0.448, |
|
"step": 43100 |
|
}, |
|
{ |
|
"epoch": 0.47749052203419806, |
|
"grad_norm": 2.39670729637146, |
|
"learning_rate": 2.760046666313196e-06, |
|
"loss": 0.4512, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 0.47859582416853647, |
|
"grad_norm": 1.8168816566467285, |
|
"learning_rate": 2.7512349642718927e-06, |
|
"loss": 0.4712, |
|
"step": 43300 |
|
}, |
|
{ |
|
"epoch": 0.47970112630287487, |
|
"grad_norm": 1.6397266387939453, |
|
"learning_rate": 2.7424201080632516e-06, |
|
"loss": 0.4569, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 0.48080642843721333, |
|
"grad_norm": 2.2524404525756836, |
|
"learning_rate": 2.7336022083547153e-06, |
|
"loss": 0.4882, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.48191173057155173, |
|
"grad_norm": 2.5701520442962646, |
|
"learning_rate": 2.72478137585194e-06, |
|
"loss": 0.4593, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 0.48301703270589014, |
|
"grad_norm": 1.691336989402771, |
|
"learning_rate": 2.7159577212973985e-06, |
|
"loss": 0.4743, |
|
"step": 43700 |
|
}, |
|
{ |
|
"epoch": 0.4841223348402286, |
|
"grad_norm": 1.9625279903411865, |
|
"learning_rate": 2.7071313554689994e-06, |
|
"loss": 0.4834, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 0.485227636974567, |
|
"grad_norm": 1.4627450704574585, |
|
"learning_rate": 2.6983023891786835e-06, |
|
"loss": 0.4513, |
|
"step": 43900 |
|
}, |
|
{ |
|
"epoch": 0.4863329391089054, |
|
"grad_norm": 2.0734519958496094, |
|
"learning_rate": 2.689470933271045e-06, |
|
"loss": 0.4611, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.48743824124324386, |
|
"grad_norm": 1.5627169609069824, |
|
"learning_rate": 2.6806370986219305e-06, |
|
"loss": 0.445, |
|
"step": 44100 |
|
}, |
|
{ |
|
"epoch": 0.48854354337758227, |
|
"grad_norm": 2.4556682109832764, |
|
"learning_rate": 2.6718009961370544e-06, |
|
"loss": 0.4255, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 0.48964884551192067, |
|
"grad_norm": 1.817841649055481, |
|
"learning_rate": 2.6629627367505996e-06, |
|
"loss": 0.4725, |
|
"step": 44300 |
|
}, |
|
{ |
|
"epoch": 0.49075414764625913, |
|
"grad_norm": 2.1898646354675293, |
|
"learning_rate": 2.6541224314238306e-06, |
|
"loss": 0.4321, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 0.49185944978059754, |
|
"grad_norm": 1.9783952236175537, |
|
"learning_rate": 2.645280191143697e-06, |
|
"loss": 0.473, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.49296475191493594, |
|
"grad_norm": 2.2066643238067627, |
|
"learning_rate": 2.6364361269214404e-06, |
|
"loss": 0.4388, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 0.49407005404927434, |
|
"grad_norm": 1.5500693321228027, |
|
"learning_rate": 2.627590349791203e-06, |
|
"loss": 0.4515, |
|
"step": 44700 |
|
}, |
|
{ |
|
"epoch": 0.4951753561836128, |
|
"grad_norm": 1.9073359966278076, |
|
"learning_rate": 2.6187429708086304e-06, |
|
"loss": 0.4475, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 0.4962806583179512, |
|
"grad_norm": 1.692548394203186, |
|
"learning_rate": 2.6098941010494793e-06, |
|
"loss": 0.4116, |
|
"step": 44900 |
|
}, |
|
{ |
|
"epoch": 0.4973859604522896, |
|
"grad_norm": 1.8653684854507446, |
|
"learning_rate": 2.6010438516082244e-06, |
|
"loss": 0.4462, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.49849126258662807, |
|
"grad_norm": 2.772581100463867, |
|
"learning_rate": 2.592192333596658e-06, |
|
"loss": 0.4465, |
|
"step": 45100 |
|
}, |
|
{ |
|
"epoch": 0.4995965647209665, |
|
"grad_norm": 1.9330416917800903, |
|
"learning_rate": 2.583339658142503e-06, |
|
"loss": 0.4693, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 0.5007018668553049, |
|
"grad_norm": 1.846220850944519, |
|
"learning_rate": 2.574485936388011e-06, |
|
"loss": 0.4782, |
|
"step": 45300 |
|
}, |
|
{ |
|
"epoch": 0.5018071689896433, |
|
"grad_norm": 1.9324105978012085, |
|
"learning_rate": 2.5656312794885696e-06, |
|
"loss": 0.476, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 0.5029124711239817, |
|
"grad_norm": 1.4215826988220215, |
|
"learning_rate": 2.5567757986113082e-06, |
|
"loss": 0.4404, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.5040177732583202, |
|
"grad_norm": 2.124636173248291, |
|
"learning_rate": 2.5479196049336994e-06, |
|
"loss": 0.4685, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 0.5051230753926585, |
|
"grad_norm": 2.1870932579040527, |
|
"learning_rate": 2.5390628096421675e-06, |
|
"loss": 0.4384, |
|
"step": 45700 |
|
}, |
|
{ |
|
"epoch": 0.506228377526997, |
|
"grad_norm": 2.281766891479492, |
|
"learning_rate": 2.5302055239306857e-06, |
|
"loss": 0.4849, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 0.5073336796613355, |
|
"grad_norm": 2.991182804107666, |
|
"learning_rate": 2.5213478589993884e-06, |
|
"loss": 0.4585, |
|
"step": 45900 |
|
}, |
|
{ |
|
"epoch": 0.5084389817956738, |
|
"grad_norm": 2.271472930908203, |
|
"learning_rate": 2.5124899260531667e-06, |
|
"loss": 0.4459, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.5095442839300123, |
|
"grad_norm": 1.7806503772735596, |
|
"learning_rate": 2.5036318363002816e-06, |
|
"loss": 0.4448, |
|
"step": 46100 |
|
}, |
|
{ |
|
"epoch": 0.5106495860643507, |
|
"grad_norm": 2.3559248447418213, |
|
"learning_rate": 2.4947737009509577e-06, |
|
"loss": 0.4468, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 0.5117548881986891, |
|
"grad_norm": 2.1456425189971924, |
|
"learning_rate": 2.4859156312159945e-06, |
|
"loss": 0.4304, |
|
"step": 46300 |
|
}, |
|
{ |
|
"epoch": 0.5128601903330275, |
|
"grad_norm": 2.4595870971679688, |
|
"learning_rate": 2.4770577383053695e-06, |
|
"loss": 0.4756, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 0.513965492467366, |
|
"grad_norm": 1.6186550855636597, |
|
"learning_rate": 2.4682001334268376e-06, |
|
"loss": 0.4246, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.5150707946017044, |
|
"grad_norm": 2.1293444633483887, |
|
"learning_rate": 2.4593429277845366e-06, |
|
"loss": 0.4373, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 0.5161760967360428, |
|
"grad_norm": 2.4468750953674316, |
|
"learning_rate": 2.450486232577596e-06, |
|
"loss": 0.4722, |
|
"step": 46700 |
|
}, |
|
{ |
|
"epoch": 0.5172813988703813, |
|
"grad_norm": 1.3718825578689575, |
|
"learning_rate": 2.441630158998734e-06, |
|
"loss": 0.4625, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 0.5183867010047196, |
|
"grad_norm": 1.7043936252593994, |
|
"learning_rate": 2.432774818232865e-06, |
|
"loss": 0.4889, |
|
"step": 46900 |
|
}, |
|
{ |
|
"epoch": 0.5194920031390581, |
|
"grad_norm": 1.942793607711792, |
|
"learning_rate": 2.4239203214557026e-06, |
|
"loss": 0.4539, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.5205973052733965, |
|
"grad_norm": 2.086621046066284, |
|
"learning_rate": 2.4150667798323664e-06, |
|
"loss": 0.4303, |
|
"step": 47100 |
|
}, |
|
{ |
|
"epoch": 0.5217026074077349, |
|
"grad_norm": 2.2322304248809814, |
|
"learning_rate": 2.406214304515982e-06, |
|
"loss": 0.4616, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 0.5228079095420733, |
|
"grad_norm": 1.703951120376587, |
|
"learning_rate": 2.3973630066462895e-06, |
|
"loss": 0.4479, |
|
"step": 47300 |
|
}, |
|
{ |
|
"epoch": 0.5239132116764117, |
|
"grad_norm": 1.6014420986175537, |
|
"learning_rate": 2.3885129973482475e-06, |
|
"loss": 0.4269, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 0.5250185138107502, |
|
"grad_norm": 2.385668992996216, |
|
"learning_rate": 2.379664387730634e-06, |
|
"loss": 0.4284, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.5261238159450886, |
|
"grad_norm": 2.08682918548584, |
|
"learning_rate": 2.370817288884656e-06, |
|
"loss": 0.4573, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 0.527229118079427, |
|
"grad_norm": 1.9396214485168457, |
|
"learning_rate": 2.3619718118825536e-06, |
|
"loss": 0.4701, |
|
"step": 47700 |
|
}, |
|
{ |
|
"epoch": 0.5283344202137654, |
|
"grad_norm": 1.9038134813308716, |
|
"learning_rate": 2.3531280677762064e-06, |
|
"loss": 0.4437, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 0.5294397223481039, |
|
"grad_norm": 2.4148266315460205, |
|
"learning_rate": 2.3442861675957353e-06, |
|
"loss": 0.4264, |
|
"step": 47900 |
|
}, |
|
{ |
|
"epoch": 0.5305450244824422, |
|
"grad_norm": 2.0972328186035156, |
|
"learning_rate": 2.3354462223481126e-06, |
|
"loss": 0.4461, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.5316503266167807, |
|
"grad_norm": 2.8991668224334717, |
|
"learning_rate": 2.326608343015769e-06, |
|
"loss": 0.4461, |
|
"step": 48100 |
|
}, |
|
{ |
|
"epoch": 0.5327556287511191, |
|
"grad_norm": 1.24418306350708, |
|
"learning_rate": 2.3177726405551953e-06, |
|
"loss": 0.4329, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 0.5338609308854575, |
|
"grad_norm": 1.501638650894165, |
|
"learning_rate": 2.308939225895554e-06, |
|
"loss": 0.4252, |
|
"step": 48300 |
|
}, |
|
{ |
|
"epoch": 0.534966233019796, |
|
"grad_norm": 1.7708169221878052, |
|
"learning_rate": 2.300108209937284e-06, |
|
"loss": 0.4492, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 0.5360715351541344, |
|
"grad_norm": 1.757341980934143, |
|
"learning_rate": 2.2912797035507118e-06, |
|
"loss": 0.4342, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.5371768372884728, |
|
"grad_norm": 1.7680574655532837, |
|
"learning_rate": 2.2824538175746554e-06, |
|
"loss": 0.4524, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 0.5382821394228112, |
|
"grad_norm": 2.0074987411499023, |
|
"learning_rate": 2.2736306628150322e-06, |
|
"loss": 0.436, |
|
"step": 48700 |
|
}, |
|
{ |
|
"epoch": 0.5393874415571497, |
|
"grad_norm": 1.9048947095870972, |
|
"learning_rate": 2.2648103500434756e-06, |
|
"loss": 0.4189, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 0.540492743691488, |
|
"grad_norm": 2.519080638885498, |
|
"learning_rate": 2.255992989995934e-06, |
|
"loss": 0.4251, |
|
"step": 48900 |
|
}, |
|
{ |
|
"epoch": 0.5415980458258265, |
|
"grad_norm": 2.2120232582092285, |
|
"learning_rate": 2.247178693371288e-06, |
|
"loss": 0.4933, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.542703347960165, |
|
"grad_norm": 1.7563016414642334, |
|
"learning_rate": 2.238367570829954e-06, |
|
"loss": 0.4602, |
|
"step": 49100 |
|
}, |
|
{ |
|
"epoch": 0.5438086500945033, |
|
"grad_norm": 1.5373327732086182, |
|
"learning_rate": 2.229559732992507e-06, |
|
"loss": 0.4792, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 0.5449139522288418, |
|
"grad_norm": 2.573272228240967, |
|
"learning_rate": 2.220755290438275e-06, |
|
"loss": 0.4659, |
|
"step": 49300 |
|
}, |
|
{ |
|
"epoch": 0.5460192543631802, |
|
"grad_norm": 1.7102992534637451, |
|
"learning_rate": 2.211954353703965e-06, |
|
"loss": 0.4553, |
|
"step": 49400 |
|
}, |
|
{ |
|
"epoch": 0.5471245564975186, |
|
"grad_norm": 2.3353729248046875, |
|
"learning_rate": 2.203157033282265e-06, |
|
"loss": 0.4307, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.548229858631857, |
|
"grad_norm": 1.7641658782958984, |
|
"learning_rate": 2.194363439620468e-06, |
|
"loss": 0.4648, |
|
"step": 49600 |
|
}, |
|
{ |
|
"epoch": 0.5493351607661955, |
|
"grad_norm": 1.2468318939208984, |
|
"learning_rate": 2.1855736831190723e-06, |
|
"loss": 0.4616, |
|
"step": 49700 |
|
}, |
|
{ |
|
"epoch": 0.5504404629005338, |
|
"grad_norm": 2.137446880340576, |
|
"learning_rate": 2.1767878741304044e-06, |
|
"loss": 0.4671, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 0.5515457650348723, |
|
"grad_norm": 2.4773776531219482, |
|
"learning_rate": 2.1680061229572343e-06, |
|
"loss": 0.4737, |
|
"step": 49900 |
|
}, |
|
{ |
|
"epoch": 0.5526510671692108, |
|
"grad_norm": 2.0055341720581055, |
|
"learning_rate": 2.1592285398513815e-06, |
|
"loss": 0.4533, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.5537563693035491, |
|
"grad_norm": 1.876347303390503, |
|
"learning_rate": 2.150455235012342e-06, |
|
"loss": 0.4208, |
|
"step": 50100 |
|
}, |
|
{ |
|
"epoch": 0.5548616714378876, |
|
"grad_norm": 2.5351920127868652, |
|
"learning_rate": 2.1416863185858964e-06, |
|
"loss": 0.4404, |
|
"step": 50200 |
|
}, |
|
{ |
|
"epoch": 0.555966973572226, |
|
"grad_norm": 1.0931345224380493, |
|
"learning_rate": 2.132921900662733e-06, |
|
"loss": 0.4465, |
|
"step": 50300 |
|
}, |
|
{ |
|
"epoch": 0.5570722757065644, |
|
"grad_norm": 2.0798308849334717, |
|
"learning_rate": 2.1241620912770612e-06, |
|
"loss": 0.4152, |
|
"step": 50400 |
|
}, |
|
{ |
|
"epoch": 0.5581775778409028, |
|
"grad_norm": 1.6538605690002441, |
|
"learning_rate": 2.115407000405231e-06, |
|
"loss": 0.4209, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.5592828799752413, |
|
"grad_norm": 2.1094820499420166, |
|
"learning_rate": 2.1066567379643557e-06, |
|
"loss": 0.4367, |
|
"step": 50600 |
|
}, |
|
{ |
|
"epoch": 0.5603881821095796, |
|
"grad_norm": 2.1819286346435547, |
|
"learning_rate": 2.097911413810928e-06, |
|
"loss": 0.4525, |
|
"step": 50700 |
|
}, |
|
{ |
|
"epoch": 0.5614934842439181, |
|
"grad_norm": 2.0643765926361084, |
|
"learning_rate": 2.089171137739441e-06, |
|
"loss": 0.4504, |
|
"step": 50800 |
|
}, |
|
{ |
|
"epoch": 0.5625987863782564, |
|
"grad_norm": 1.5290354490280151, |
|
"learning_rate": 2.0804360194810117e-06, |
|
"loss": 0.4313, |
|
"step": 50900 |
|
}, |
|
{ |
|
"epoch": 0.5637040885125949, |
|
"grad_norm": 1.9766910076141357, |
|
"learning_rate": 2.0717061687020047e-06, |
|
"loss": 0.4177, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.5648093906469334, |
|
"grad_norm": 1.1951794624328613, |
|
"learning_rate": 2.0629816950026505e-06, |
|
"loss": 0.5075, |
|
"step": 51100 |
|
}, |
|
{ |
|
"epoch": 0.5659146927812717, |
|
"grad_norm": 2.3847384452819824, |
|
"learning_rate": 2.054262707915671e-06, |
|
"loss": 0.4196, |
|
"step": 51200 |
|
}, |
|
{ |
|
"epoch": 0.5670199949156102, |
|
"grad_norm": 1.665724515914917, |
|
"learning_rate": 2.0455493169049115e-06, |
|
"loss": 0.4333, |
|
"step": 51300 |
|
}, |
|
{ |
|
"epoch": 0.5681252970499486, |
|
"grad_norm": 1.6288607120513916, |
|
"learning_rate": 2.036841631363954e-06, |
|
"loss": 0.4853, |
|
"step": 51400 |
|
}, |
|
{ |
|
"epoch": 0.569230599184287, |
|
"grad_norm": 2.2280824184417725, |
|
"learning_rate": 2.028139760614754e-06, |
|
"loss": 0.456, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.5703359013186254, |
|
"grad_norm": 2.9321858882904053, |
|
"learning_rate": 2.019443813906262e-06, |
|
"loss": 0.4694, |
|
"step": 51600 |
|
}, |
|
{ |
|
"epoch": 0.5714412034529639, |
|
"grad_norm": 2.381856918334961, |
|
"learning_rate": 2.0107539004130577e-06, |
|
"loss": 0.4679, |
|
"step": 51700 |
|
}, |
|
{ |
|
"epoch": 0.5725465055873022, |
|
"grad_norm": 2.0987162590026855, |
|
"learning_rate": 2.002070129233972e-06, |
|
"loss": 0.4611, |
|
"step": 51800 |
|
}, |
|
{ |
|
"epoch": 0.5736518077216407, |
|
"grad_norm": 2.339217185974121, |
|
"learning_rate": 1.993392609390723e-06, |
|
"loss": 0.5007, |
|
"step": 51900 |
|
}, |
|
{ |
|
"epoch": 0.5747571098559792, |
|
"grad_norm": 1.3680297136306763, |
|
"learning_rate": 1.984721449826547e-06, |
|
"loss": 0.4823, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.5758624119903175, |
|
"grad_norm": 1.494996190071106, |
|
"learning_rate": 1.976056759404827e-06, |
|
"loss": 0.4528, |
|
"step": 52100 |
|
}, |
|
{ |
|
"epoch": 0.576967714124656, |
|
"grad_norm": 2.1765034198760986, |
|
"learning_rate": 1.967398646907728e-06, |
|
"loss": 0.4476, |
|
"step": 52200 |
|
}, |
|
{ |
|
"epoch": 0.5780730162589944, |
|
"grad_norm": 1.8729513883590698, |
|
"learning_rate": 1.9587472210348318e-06, |
|
"loss": 0.4626, |
|
"step": 52300 |
|
}, |
|
{ |
|
"epoch": 0.5791783183933328, |
|
"grad_norm": 1.8249151706695557, |
|
"learning_rate": 1.950102590401774e-06, |
|
"loss": 0.4488, |
|
"step": 52400 |
|
}, |
|
{ |
|
"epoch": 0.5802836205276712, |
|
"grad_norm": 1.604670763015747, |
|
"learning_rate": 1.9414648635388765e-06, |
|
"loss": 0.4385, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.5813889226620097, |
|
"grad_norm": 1.7172939777374268, |
|
"learning_rate": 1.932834148889785e-06, |
|
"loss": 0.452, |
|
"step": 52600 |
|
}, |
|
{ |
|
"epoch": 0.582494224796348, |
|
"grad_norm": 2.7707228660583496, |
|
"learning_rate": 1.924210554810114e-06, |
|
"loss": 0.4213, |
|
"step": 52700 |
|
}, |
|
{ |
|
"epoch": 0.5835995269306865, |
|
"grad_norm": 1.858169436454773, |
|
"learning_rate": 1.9155941895660775e-06, |
|
"loss": 0.4422, |
|
"step": 52800 |
|
}, |
|
{ |
|
"epoch": 0.584704829065025, |
|
"grad_norm": 2.9702155590057373, |
|
"learning_rate": 1.9069851613331363e-06, |
|
"loss": 0.4903, |
|
"step": 52900 |
|
}, |
|
{ |
|
"epoch": 0.5858101311993633, |
|
"grad_norm": 1.5274828672409058, |
|
"learning_rate": 1.8983835781946355e-06, |
|
"loss": 0.4359, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.5869154333337018, |
|
"grad_norm": 1.5798296928405762, |
|
"learning_rate": 1.8897895481404523e-06, |
|
"loss": 0.4666, |
|
"step": 53100 |
|
}, |
|
{ |
|
"epoch": 0.5880207354680402, |
|
"grad_norm": 2.6816885471343994, |
|
"learning_rate": 1.8812031790656365e-06, |
|
"loss": 0.4603, |
|
"step": 53200 |
|
}, |
|
{ |
|
"epoch": 0.5891260376023786, |
|
"grad_norm": 2.24021577835083, |
|
"learning_rate": 1.8726245787690556e-06, |
|
"loss": 0.4434, |
|
"step": 53300 |
|
}, |
|
{ |
|
"epoch": 0.590231339736717, |
|
"grad_norm": 2.0478105545043945, |
|
"learning_rate": 1.8640538549520432e-06, |
|
"loss": 0.4547, |
|
"step": 53400 |
|
}, |
|
{ |
|
"epoch": 0.5913366418710555, |
|
"grad_norm": 2.7488420009613037, |
|
"learning_rate": 1.8554911152170491e-06, |
|
"loss": 0.401, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.5924419440053939, |
|
"grad_norm": 1.8583904504776, |
|
"learning_rate": 1.8469364670662838e-06, |
|
"loss": 0.4229, |
|
"step": 53600 |
|
}, |
|
{ |
|
"epoch": 0.5935472461397323, |
|
"grad_norm": 2.7477619647979736, |
|
"learning_rate": 1.8383900179003678e-06, |
|
"loss": 0.453, |
|
"step": 53700 |
|
}, |
|
{ |
|
"epoch": 0.5946525482740708, |
|
"grad_norm": 2.0758025646209717, |
|
"learning_rate": 1.829851875016993e-06, |
|
"loss": 0.4528, |
|
"step": 53800 |
|
}, |
|
{ |
|
"epoch": 0.5957578504084091, |
|
"grad_norm": 1.2921638488769531, |
|
"learning_rate": 1.8213221456095626e-06, |
|
"loss": 0.45, |
|
"step": 53900 |
|
}, |
|
{ |
|
"epoch": 0.5968631525427476, |
|
"grad_norm": 1.9033405780792236, |
|
"learning_rate": 1.812800936765855e-06, |
|
"loss": 0.4489, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.597968454677086, |
|
"grad_norm": 2.5085136890411377, |
|
"learning_rate": 1.8042883554666733e-06, |
|
"loss": 0.4501, |
|
"step": 54100 |
|
}, |
|
{ |
|
"epoch": 0.5990737568114244, |
|
"grad_norm": 1.3407922983169556, |
|
"learning_rate": 1.7957845085845086e-06, |
|
"loss": 0.4581, |
|
"step": 54200 |
|
}, |
|
{ |
|
"epoch": 0.6001790589457628, |
|
"grad_norm": 1.598039150238037, |
|
"learning_rate": 1.7872895028821902e-06, |
|
"loss": 0.4406, |
|
"step": 54300 |
|
}, |
|
{ |
|
"epoch": 0.6012843610801012, |
|
"grad_norm": 1.5193266868591309, |
|
"learning_rate": 1.7788034450115522e-06, |
|
"loss": 0.4412, |
|
"step": 54400 |
|
}, |
|
{ |
|
"epoch": 0.6023896632144397, |
|
"grad_norm": 2.39776611328125, |
|
"learning_rate": 1.7703264415120912e-06, |
|
"loss": 0.4446, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.6034949653487781, |
|
"grad_norm": 2.233445167541504, |
|
"learning_rate": 1.7618585988096292e-06, |
|
"loss": 0.4512, |
|
"step": 54600 |
|
}, |
|
{ |
|
"epoch": 0.6046002674831165, |
|
"grad_norm": 3.316636323928833, |
|
"learning_rate": 1.7534000232149772e-06, |
|
"loss": 0.4617, |
|
"step": 54700 |
|
}, |
|
{ |
|
"epoch": 0.6057055696174549, |
|
"grad_norm": 1.9188458919525146, |
|
"learning_rate": 1.7449508209226007e-06, |
|
"loss": 0.4551, |
|
"step": 54800 |
|
}, |
|
{ |
|
"epoch": 0.6068108717517934, |
|
"grad_norm": 2.422166109085083, |
|
"learning_rate": 1.7365110980092886e-06, |
|
"loss": 0.4213, |
|
"step": 54900 |
|
}, |
|
{ |
|
"epoch": 0.6079161738861317, |
|
"grad_norm": 1.886583685874939, |
|
"learning_rate": 1.7280809604328175e-06, |
|
"loss": 0.4424, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.6090214760204702, |
|
"grad_norm": 2.0250625610351562, |
|
"learning_rate": 1.7196605140306227e-06, |
|
"loss": 0.4474, |
|
"step": 55100 |
|
}, |
|
{ |
|
"epoch": 0.6101267781548086, |
|
"grad_norm": 1.9184309244155884, |
|
"learning_rate": 1.7112498645184734e-06, |
|
"loss": 0.4483, |
|
"step": 55200 |
|
}, |
|
{ |
|
"epoch": 0.611232080289147, |
|
"grad_norm": 1.7985000610351562, |
|
"learning_rate": 1.7028491174891395e-06, |
|
"loss": 0.4395, |
|
"step": 55300 |
|
}, |
|
{ |
|
"epoch": 0.6123373824234855, |
|
"grad_norm": 2.2696986198425293, |
|
"learning_rate": 1.6944583784110702e-06, |
|
"loss": 0.46, |
|
"step": 55400 |
|
}, |
|
{ |
|
"epoch": 0.6134426845578239, |
|
"grad_norm": 1.9761462211608887, |
|
"learning_rate": 1.6860777526270663e-06, |
|
"loss": 0.4514, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.6145479866921623, |
|
"grad_norm": 1.6298624277114868, |
|
"learning_rate": 1.6777073453529628e-06, |
|
"loss": 0.4339, |
|
"step": 55600 |
|
}, |
|
{ |
|
"epoch": 0.6156532888265007, |
|
"grad_norm": 1.7984713315963745, |
|
"learning_rate": 1.6693472616763023e-06, |
|
"loss": 0.4364, |
|
"step": 55700 |
|
}, |
|
{ |
|
"epoch": 0.6167585909608392, |
|
"grad_norm": 2.747307777404785, |
|
"learning_rate": 1.6609976065550188e-06, |
|
"loss": 0.4817, |
|
"step": 55800 |
|
}, |
|
{ |
|
"epoch": 0.6178638930951775, |
|
"grad_norm": 2.802546739578247, |
|
"learning_rate": 1.6526584848161214e-06, |
|
"loss": 0.4566, |
|
"step": 55900 |
|
}, |
|
{ |
|
"epoch": 0.618969195229516, |
|
"grad_norm": 1.783996820449829, |
|
"learning_rate": 1.644330001154373e-06, |
|
"loss": 0.4595, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.6200744973638544, |
|
"grad_norm": 2.170027494430542, |
|
"learning_rate": 1.6360122601309819e-06, |
|
"loss": 0.4608, |
|
"step": 56100 |
|
}, |
|
{ |
|
"epoch": 0.6211797994981928, |
|
"grad_norm": 1.9390249252319336, |
|
"learning_rate": 1.6277053661722836e-06, |
|
"loss": 0.4632, |
|
"step": 56200 |
|
}, |
|
{ |
|
"epoch": 0.6222851016325313, |
|
"grad_norm": 1.528578281402588, |
|
"learning_rate": 1.6194094235684363e-06, |
|
"loss": 0.4299, |
|
"step": 56300 |
|
}, |
|
{ |
|
"epoch": 0.6233904037668697, |
|
"grad_norm": 2.1283223628997803, |
|
"learning_rate": 1.611124536472104e-06, |
|
"loss": 0.4758, |
|
"step": 56400 |
|
}, |
|
{ |
|
"epoch": 0.6244957059012081, |
|
"grad_norm": 1.7181930541992188, |
|
"learning_rate": 1.6028508088971542e-06, |
|
"loss": 0.4408, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.6256010080355465, |
|
"grad_norm": 1.5925639867782593, |
|
"learning_rate": 1.5945883447173516e-06, |
|
"loss": 0.4125, |
|
"step": 56600 |
|
}, |
|
{ |
|
"epoch": 0.626706310169885, |
|
"grad_norm": 2.1560404300689697, |
|
"learning_rate": 1.5863372476650518e-06, |
|
"loss": 0.4572, |
|
"step": 56700 |
|
}, |
|
{ |
|
"epoch": 0.6278116123042233, |
|
"grad_norm": 1.5837538242340088, |
|
"learning_rate": 1.5780976213298987e-06, |
|
"loss": 0.4234, |
|
"step": 56800 |
|
}, |
|
{ |
|
"epoch": 0.6289169144385618, |
|
"grad_norm": 1.7496099472045898, |
|
"learning_rate": 1.5698695691575278e-06, |
|
"loss": 0.4622, |
|
"step": 56900 |
|
}, |
|
{ |
|
"epoch": 0.6300222165729003, |
|
"grad_norm": 1.950454592704773, |
|
"learning_rate": 1.5616531944482639e-06, |
|
"loss": 0.46, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.6311275187072386, |
|
"grad_norm": 1.873214840888977, |
|
"learning_rate": 1.5534486003558256e-06, |
|
"loss": 0.4349, |
|
"step": 57100 |
|
}, |
|
{ |
|
"epoch": 0.6322328208415771, |
|
"grad_norm": 2.442535877227783, |
|
"learning_rate": 1.5452558898860289e-06, |
|
"loss": 0.4525, |
|
"step": 57200 |
|
}, |
|
{ |
|
"epoch": 0.6333381229759155, |
|
"grad_norm": 2.4935104846954346, |
|
"learning_rate": 1.5370751658954962e-06, |
|
"loss": 0.4348, |
|
"step": 57300 |
|
}, |
|
{ |
|
"epoch": 0.6344434251102539, |
|
"grad_norm": 2.2208077907562256, |
|
"learning_rate": 1.5289065310903642e-06, |
|
"loss": 0.4525, |
|
"step": 57400 |
|
}, |
|
{ |
|
"epoch": 0.6355487272445923, |
|
"grad_norm": 2.645033121109009, |
|
"learning_rate": 1.5207500880249937e-06, |
|
"loss": 0.4303, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.6366540293789308, |
|
"grad_norm": 2.4756534099578857, |
|
"learning_rate": 1.5126059391006806e-06, |
|
"loss": 0.4273, |
|
"step": 57600 |
|
}, |
|
{ |
|
"epoch": 0.6377593315132691, |
|
"grad_norm": 2.156022548675537, |
|
"learning_rate": 1.5044741865643752e-06, |
|
"loss": 0.4363, |
|
"step": 57700 |
|
}, |
|
{ |
|
"epoch": 0.6388646336476076, |
|
"grad_norm": 1.1067718267440796, |
|
"learning_rate": 1.4963549325073937e-06, |
|
"loss": 0.477, |
|
"step": 57800 |
|
}, |
|
{ |
|
"epoch": 0.6399699357819459, |
|
"grad_norm": 2.1002750396728516, |
|
"learning_rate": 1.488248278864139e-06, |
|
"loss": 0.4241, |
|
"step": 57900 |
|
}, |
|
{ |
|
"epoch": 0.6410752379162844, |
|
"grad_norm": 2.1461567878723145, |
|
"learning_rate": 1.4801543274108182e-06, |
|
"loss": 0.461, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.6421805400506229, |
|
"grad_norm": 1.992863655090332, |
|
"learning_rate": 1.4720731797641701e-06, |
|
"loss": 0.4419, |
|
"step": 58100 |
|
}, |
|
{ |
|
"epoch": 0.6432858421849612, |
|
"grad_norm": 1.8167692422866821, |
|
"learning_rate": 1.464004937380184e-06, |
|
"loss": 0.4239, |
|
"step": 58200 |
|
}, |
|
{ |
|
"epoch": 0.6443911443192997, |
|
"grad_norm": 1.0601933002471924, |
|
"learning_rate": 1.4559497015528278e-06, |
|
"loss": 0.4534, |
|
"step": 58300 |
|
}, |
|
{ |
|
"epoch": 0.6454964464536381, |
|
"grad_norm": 1.5626897811889648, |
|
"learning_rate": 1.4479075734127795e-06, |
|
"loss": 0.4109, |
|
"step": 58400 |
|
}, |
|
{ |
|
"epoch": 0.6466017485879765, |
|
"grad_norm": 2.2622973918914795, |
|
"learning_rate": 1.4398786539261515e-06, |
|
"loss": 0.4546, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.6477070507223149, |
|
"grad_norm": 2.4710042476654053, |
|
"learning_rate": 1.4318630438932258e-06, |
|
"loss": 0.4442, |
|
"step": 58600 |
|
}, |
|
{ |
|
"epoch": 0.6488123528566534, |
|
"grad_norm": 2.6686673164367676, |
|
"learning_rate": 1.4238608439471916e-06, |
|
"loss": 0.442, |
|
"step": 58700 |
|
}, |
|
{ |
|
"epoch": 0.6499176549909917, |
|
"grad_norm": 1.9529846906661987, |
|
"learning_rate": 1.4158721545528786e-06, |
|
"loss": 0.4719, |
|
"step": 58800 |
|
}, |
|
{ |
|
"epoch": 0.6510229571253302, |
|
"grad_norm": 1.6578528881072998, |
|
"learning_rate": 1.4078970760054952e-06, |
|
"loss": 0.4729, |
|
"step": 58900 |
|
}, |
|
{ |
|
"epoch": 0.6521282592596687, |
|
"grad_norm": 1.7940270900726318, |
|
"learning_rate": 1.399935708429368e-06, |
|
"loss": 0.4512, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.653233561394007, |
|
"grad_norm": 1.85922372341156, |
|
"learning_rate": 1.3919881517766941e-06, |
|
"loss": 0.4402, |
|
"step": 59100 |
|
}, |
|
{ |
|
"epoch": 0.6543388635283455, |
|
"grad_norm": 2.1098904609680176, |
|
"learning_rate": 1.3840545058262729e-06, |
|
"loss": 0.4497, |
|
"step": 59200 |
|
}, |
|
{ |
|
"epoch": 0.6554441656626839, |
|
"grad_norm": 1.5995895862579346, |
|
"learning_rate": 1.376134870182262e-06, |
|
"loss": 0.4626, |
|
"step": 59300 |
|
}, |
|
{ |
|
"epoch": 0.6565494677970223, |
|
"grad_norm": 1.8691281080245972, |
|
"learning_rate": 1.3682293442729217e-06, |
|
"loss": 0.4674, |
|
"step": 59400 |
|
}, |
|
{ |
|
"epoch": 0.6576547699313607, |
|
"grad_norm": 2.0507023334503174, |
|
"learning_rate": 1.3603380273493769e-06, |
|
"loss": 0.4547, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.6587600720656992, |
|
"grad_norm": 1.5811275243759155, |
|
"learning_rate": 1.3524610184843567e-06, |
|
"loss": 0.4523, |
|
"step": 59600 |
|
}, |
|
{ |
|
"epoch": 0.6598653742000375, |
|
"grad_norm": 1.8390048742294312, |
|
"learning_rate": 1.3445984165709586e-06, |
|
"loss": 0.436, |
|
"step": 59700 |
|
}, |
|
{ |
|
"epoch": 0.660970676334376, |
|
"grad_norm": 2.165388345718384, |
|
"learning_rate": 1.3367503203214078e-06, |
|
"loss": 0.4259, |
|
"step": 59800 |
|
}, |
|
{ |
|
"epoch": 0.6620759784687145, |
|
"grad_norm": 1.9885059595108032, |
|
"learning_rate": 1.3289168282658167e-06, |
|
"loss": 0.4394, |
|
"step": 59900 |
|
}, |
|
{ |
|
"epoch": 0.6631812806030528, |
|
"grad_norm": 0.8709326386451721, |
|
"learning_rate": 1.3210980387509436e-06, |
|
"loss": 0.4507, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.6642865827373913, |
|
"grad_norm": 1.6904494762420654, |
|
"learning_rate": 1.3132940499389634e-06, |
|
"loss": 0.4469, |
|
"step": 60100 |
|
}, |
|
{ |
|
"epoch": 0.6653918848717297, |
|
"grad_norm": 2.0872297286987305, |
|
"learning_rate": 1.3055049598062347e-06, |
|
"loss": 0.4256, |
|
"step": 60200 |
|
}, |
|
{ |
|
"epoch": 0.6664971870060681, |
|
"grad_norm": 3.0559935569763184, |
|
"learning_rate": 1.2977308661420657e-06, |
|
"loss": 0.5023, |
|
"step": 60300 |
|
}, |
|
{ |
|
"epoch": 0.6676024891404065, |
|
"grad_norm": 1.9940212965011597, |
|
"learning_rate": 1.2899718665474913e-06, |
|
"loss": 0.4416, |
|
"step": 60400 |
|
}, |
|
{ |
|
"epoch": 0.668707791274745, |
|
"grad_norm": 1.7937722206115723, |
|
"learning_rate": 1.2822280584340458e-06, |
|
"loss": 0.4676, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.6698130934090833, |
|
"grad_norm": 3.7665975093841553, |
|
"learning_rate": 1.2744995390225378e-06, |
|
"loss": 0.4159, |
|
"step": 60600 |
|
}, |
|
{ |
|
"epoch": 0.6709183955434218, |
|
"grad_norm": 2.6829941272735596, |
|
"learning_rate": 1.2667864053418316e-06, |
|
"loss": 0.4499, |
|
"step": 60700 |
|
}, |
|
{ |
|
"epoch": 0.6720236976777603, |
|
"grad_norm": 3.8452253341674805, |
|
"learning_rate": 1.2590887542276314e-06, |
|
"loss": 0.4391, |
|
"step": 60800 |
|
}, |
|
{ |
|
"epoch": 0.6731289998120986, |
|
"grad_norm": 2.4866082668304443, |
|
"learning_rate": 1.2514066823212623e-06, |
|
"loss": 0.4567, |
|
"step": 60900 |
|
}, |
|
{ |
|
"epoch": 0.6742343019464371, |
|
"grad_norm": 1.9398912191390991, |
|
"learning_rate": 1.2437402860684566e-06, |
|
"loss": 0.479, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.6753396040807755, |
|
"grad_norm": 2.085367202758789, |
|
"learning_rate": 1.2360896617181442e-06, |
|
"loss": 0.441, |
|
"step": 61100 |
|
}, |
|
{ |
|
"epoch": 0.6764449062151139, |
|
"grad_norm": 1.9988934993743896, |
|
"learning_rate": 1.2284549053212461e-06, |
|
"loss": 0.4435, |
|
"step": 61200 |
|
}, |
|
{ |
|
"epoch": 0.6775502083494523, |
|
"grad_norm": 1.8229702711105347, |
|
"learning_rate": 1.2208361127294662e-06, |
|
"loss": 0.4682, |
|
"step": 61300 |
|
}, |
|
{ |
|
"epoch": 0.6786555104837907, |
|
"grad_norm": 2.7625458240509033, |
|
"learning_rate": 1.2132333795940873e-06, |
|
"loss": 0.4731, |
|
"step": 61400 |
|
}, |
|
{ |
|
"epoch": 0.6797608126181292, |
|
"grad_norm": 2.0298068523406982, |
|
"learning_rate": 1.2056468013647699e-06, |
|
"loss": 0.4599, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.6808661147524676, |
|
"grad_norm": 1.9047514200210571, |
|
"learning_rate": 1.1980764732883613e-06, |
|
"loss": 0.4431, |
|
"step": 61600 |
|
}, |
|
{ |
|
"epoch": 0.681971416886806, |
|
"grad_norm": 2.681807041168213, |
|
"learning_rate": 1.1905224904076873e-06, |
|
"loss": 0.4317, |
|
"step": 61700 |
|
}, |
|
{ |
|
"epoch": 0.6830767190211444, |
|
"grad_norm": 1.9497393369674683, |
|
"learning_rate": 1.1829849475603683e-06, |
|
"loss": 0.4383, |
|
"step": 61800 |
|
}, |
|
{ |
|
"epoch": 0.6841820211554829, |
|
"grad_norm": 1.764805555343628, |
|
"learning_rate": 1.1754639393776238e-06, |
|
"loss": 0.4375, |
|
"step": 61900 |
|
}, |
|
{ |
|
"epoch": 0.6852873232898212, |
|
"grad_norm": 1.5404030084609985, |
|
"learning_rate": 1.1679595602830913e-06, |
|
"loss": 0.4419, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.6863926254241597, |
|
"grad_norm": 1.7731199264526367, |
|
"learning_rate": 1.160471904491631e-06, |
|
"loss": 0.4104, |
|
"step": 62100 |
|
}, |
|
{ |
|
"epoch": 0.6874979275584981, |
|
"grad_norm": 2.781113862991333, |
|
"learning_rate": 1.153001066008149e-06, |
|
"loss": 0.4098, |
|
"step": 62200 |
|
}, |
|
{ |
|
"epoch": 0.6886032296928365, |
|
"grad_norm": 2.170764207839966, |
|
"learning_rate": 1.1455471386264164e-06, |
|
"loss": 0.4386, |
|
"step": 62300 |
|
}, |
|
{ |
|
"epoch": 0.689708531827175, |
|
"grad_norm": 1.8785371780395508, |
|
"learning_rate": 1.138110215927893e-06, |
|
"loss": 0.4689, |
|
"step": 62400 |
|
}, |
|
{ |
|
"epoch": 0.6908138339615134, |
|
"grad_norm": 3.2463815212249756, |
|
"learning_rate": 1.1306903912805483e-06, |
|
"loss": 0.5066, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.6919191360958518, |
|
"grad_norm": 1.5964540243148804, |
|
"learning_rate": 1.123287757837691e-06, |
|
"loss": 0.4719, |
|
"step": 62600 |
|
}, |
|
{ |
|
"epoch": 0.6930244382301902, |
|
"grad_norm": 2.1385936737060547, |
|
"learning_rate": 1.1159024085368031e-06, |
|
"loss": 0.4397, |
|
"step": 62700 |
|
}, |
|
{ |
|
"epoch": 0.6941297403645287, |
|
"grad_norm": 1.62234628200531, |
|
"learning_rate": 1.1085344360983696e-06, |
|
"loss": 0.4167, |
|
"step": 62800 |
|
}, |
|
{ |
|
"epoch": 0.695235042498867, |
|
"grad_norm": 2.0470333099365234, |
|
"learning_rate": 1.1011839330247128e-06, |
|
"loss": 0.4526, |
|
"step": 62900 |
|
}, |
|
{ |
|
"epoch": 0.6963403446332055, |
|
"grad_norm": 2.6171181201934814, |
|
"learning_rate": 1.0938509915988362e-06, |
|
"loss": 0.4793, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.697445646767544, |
|
"grad_norm": 2.3599164485931396, |
|
"learning_rate": 1.08653570388326e-06, |
|
"loss": 0.4159, |
|
"step": 63100 |
|
}, |
|
{ |
|
"epoch": 0.6985509489018823, |
|
"grad_norm": 2.1658973693847656, |
|
"learning_rate": 1.079238161718871e-06, |
|
"loss": 0.4399, |
|
"step": 63200 |
|
}, |
|
{ |
|
"epoch": 0.6996562510362208, |
|
"grad_norm": 2.165238618850708, |
|
"learning_rate": 1.0719584567237646e-06, |
|
"loss": 0.4545, |
|
"step": 63300 |
|
}, |
|
{ |
|
"epoch": 0.7007615531705592, |
|
"grad_norm": 1.8751685619354248, |
|
"learning_rate": 1.0646966802920986e-06, |
|
"loss": 0.4699, |
|
"step": 63400 |
|
}, |
|
{ |
|
"epoch": 0.7018668553048976, |
|
"grad_norm": 2.2241878509521484, |
|
"learning_rate": 1.0574529235929424e-06, |
|
"loss": 0.418, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.702972157439236, |
|
"grad_norm": 2.227008104324341, |
|
"learning_rate": 1.050227277569133e-06, |
|
"loss": 0.4435, |
|
"step": 63600 |
|
}, |
|
{ |
|
"epoch": 0.7040774595735745, |
|
"grad_norm": 2.7472541332244873, |
|
"learning_rate": 1.043019832936139e-06, |
|
"loss": 0.48, |
|
"step": 63700 |
|
}, |
|
{ |
|
"epoch": 0.7051827617079128, |
|
"grad_norm": 1.427216649055481, |
|
"learning_rate": 1.0358306801809123e-06, |
|
"loss": 0.4621, |
|
"step": 63800 |
|
}, |
|
{ |
|
"epoch": 0.7062880638422513, |
|
"grad_norm": 2.6720409393310547, |
|
"learning_rate": 1.0286599095607576e-06, |
|
"loss": 0.4494, |
|
"step": 63900 |
|
}, |
|
{ |
|
"epoch": 0.7073933659765897, |
|
"grad_norm": 2.212963342666626, |
|
"learning_rate": 1.021507611102197e-06, |
|
"loss": 0.4605, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.7084986681109281, |
|
"grad_norm": 1.640894889831543, |
|
"learning_rate": 1.014373874599846e-06, |
|
"loss": 0.4313, |
|
"step": 64100 |
|
}, |
|
{ |
|
"epoch": 0.7096039702452666, |
|
"grad_norm": 1.8810545206069946, |
|
"learning_rate": 1.0072587896152769e-06, |
|
"loss": 0.4316, |
|
"step": 64200 |
|
}, |
|
{ |
|
"epoch": 0.710709272379605, |
|
"grad_norm": 2.1144118309020996, |
|
"learning_rate": 1.0001624454758983e-06, |
|
"loss": 0.4435, |
|
"step": 64300 |
|
}, |
|
{ |
|
"epoch": 0.7118145745139434, |
|
"grad_norm": 1.9362212419509888, |
|
"learning_rate": 9.930849312738366e-07, |
|
"loss": 0.4532, |
|
"step": 64400 |
|
}, |
|
{ |
|
"epoch": 0.7129198766482818, |
|
"grad_norm": 2.598273277282715, |
|
"learning_rate": 9.860263358648146e-07, |
|
"loss": 0.4611, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 0.7140251787826203, |
|
"grad_norm": 2.244027614593506, |
|
"learning_rate": 9.789867478670345e-07, |
|
"loss": 0.4351, |
|
"step": 64600 |
|
}, |
|
{ |
|
"epoch": 0.7151304809169586, |
|
"grad_norm": 2.007619619369507, |
|
"learning_rate": 9.719662556600672e-07, |
|
"loss": 0.4419, |
|
"step": 64700 |
|
}, |
|
{ |
|
"epoch": 0.7162357830512971, |
|
"grad_norm": 2.03373122215271, |
|
"learning_rate": 9.649649473837448e-07, |
|
"loss": 0.4056, |
|
"step": 64800 |
|
}, |
|
{ |
|
"epoch": 0.7173410851856354, |
|
"grad_norm": 2.0532867908477783, |
|
"learning_rate": 9.579829109370506e-07, |
|
"loss": 0.4215, |
|
"step": 64900 |
|
}, |
|
{ |
|
"epoch": 0.7184463873199739, |
|
"grad_norm": 2.224346876144409, |
|
"learning_rate": 9.510202339770164e-07, |
|
"loss": 0.4431, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.7195516894543124, |
|
"grad_norm": 2.053011894226074, |
|
"learning_rate": 9.440770039176212e-07, |
|
"loss": 0.4496, |
|
"step": 65100 |
|
}, |
|
{ |
|
"epoch": 0.7206569915886507, |
|
"grad_norm": 2.328004837036133, |
|
"learning_rate": 9.371533079286976e-07, |
|
"loss": 0.443, |
|
"step": 65200 |
|
}, |
|
{ |
|
"epoch": 0.7217622937229892, |
|
"grad_norm": 1.9584163427352905, |
|
"learning_rate": 9.302492329348348e-07, |
|
"loss": 0.4411, |
|
"step": 65300 |
|
}, |
|
{ |
|
"epoch": 0.7228675958573276, |
|
"grad_norm": 1.6421287059783936, |
|
"learning_rate": 9.233648656142838e-07, |
|
"loss": 0.447, |
|
"step": 65400 |
|
}, |
|
{ |
|
"epoch": 0.723972897991666, |
|
"grad_norm": 2.134143590927124, |
|
"learning_rate": 9.165002923978769e-07, |
|
"loss": 0.4494, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 0.7250782001260044, |
|
"grad_norm": 2.2968268394470215, |
|
"learning_rate": 9.096555994679346e-07, |
|
"loss": 0.4537, |
|
"step": 65600 |
|
}, |
|
{ |
|
"epoch": 0.7261835022603429, |
|
"grad_norm": 1.4631460905075073, |
|
"learning_rate": 9.028308727571905e-07, |
|
"loss": 0.4112, |
|
"step": 65700 |
|
}, |
|
{ |
|
"epoch": 0.7272888043946812, |
|
"grad_norm": 3.258443593978882, |
|
"learning_rate": 8.960261979477061e-07, |
|
"loss": 0.4292, |
|
"step": 65800 |
|
}, |
|
{ |
|
"epoch": 0.7283941065290197, |
|
"grad_norm": 2.0727250576019287, |
|
"learning_rate": 8.892416604698021e-07, |
|
"loss": 0.4337, |
|
"step": 65900 |
|
}, |
|
{ |
|
"epoch": 0.7294994086633582, |
|
"grad_norm": 2.1423141956329346, |
|
"learning_rate": 8.824773455009777e-07, |
|
"loss": 0.4304, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.7306047107976965, |
|
"grad_norm": 1.4535356760025024, |
|
"learning_rate": 8.757333379648491e-07, |
|
"loss": 0.405, |
|
"step": 66100 |
|
}, |
|
{ |
|
"epoch": 0.731710012932035, |
|
"grad_norm": 1.9360605478286743, |
|
"learning_rate": 8.690097225300789e-07, |
|
"loss": 0.4434, |
|
"step": 66200 |
|
}, |
|
{ |
|
"epoch": 0.7328153150663734, |
|
"grad_norm": 2.19547700881958, |
|
"learning_rate": 8.623065836093131e-07, |
|
"loss": 0.4207, |
|
"step": 66300 |
|
}, |
|
{ |
|
"epoch": 0.7339206172007118, |
|
"grad_norm": 2.0186522006988525, |
|
"learning_rate": 8.556240053581222e-07, |
|
"loss": 0.4634, |
|
"step": 66400 |
|
}, |
|
{ |
|
"epoch": 0.7350259193350502, |
|
"grad_norm": 1.845166563987732, |
|
"learning_rate": 8.489620716739436e-07, |
|
"loss": 0.4466, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 0.7361312214693887, |
|
"grad_norm": 2.228302001953125, |
|
"learning_rate": 8.423208661950342e-07, |
|
"loss": 0.4612, |
|
"step": 66600 |
|
}, |
|
{ |
|
"epoch": 0.737236523603727, |
|
"grad_norm": 2.429689884185791, |
|
"learning_rate": 8.357004722994105e-07, |
|
"loss": 0.4108, |
|
"step": 66700 |
|
}, |
|
{ |
|
"epoch": 0.7383418257380655, |
|
"grad_norm": 3.2977466583251953, |
|
"learning_rate": 8.291009731038078e-07, |
|
"loss": 0.4497, |
|
"step": 66800 |
|
}, |
|
{ |
|
"epoch": 0.739447127872404, |
|
"grad_norm": 2.6713201999664307, |
|
"learning_rate": 8.22522451462637e-07, |
|
"loss": 0.4041, |
|
"step": 66900 |
|
}, |
|
{ |
|
"epoch": 0.7405524300067423, |
|
"grad_norm": 1.5487697124481201, |
|
"learning_rate": 8.159649899669436e-07, |
|
"loss": 0.4521, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.7416577321410808, |
|
"grad_norm": 2.303757429122925, |
|
"learning_rate": 8.094286709433683e-07, |
|
"loss": 0.444, |
|
"step": 67100 |
|
}, |
|
{ |
|
"epoch": 0.7427630342754192, |
|
"grad_norm": 2.1915831565856934, |
|
"learning_rate": 8.029135764531157e-07, |
|
"loss": 0.4364, |
|
"step": 67200 |
|
}, |
|
{ |
|
"epoch": 0.7438683364097576, |
|
"grad_norm": 1.9223788976669312, |
|
"learning_rate": 7.964197882909252e-07, |
|
"loss": 0.444, |
|
"step": 67300 |
|
}, |
|
{ |
|
"epoch": 0.744973638544096, |
|
"grad_norm": 2.2881598472595215, |
|
"learning_rate": 7.899473879840431e-07, |
|
"loss": 0.4276, |
|
"step": 67400 |
|
}, |
|
{ |
|
"epoch": 0.7460789406784345, |
|
"grad_norm": 1.8012919425964355, |
|
"learning_rate": 7.834964567911956e-07, |
|
"loss": 0.4057, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 0.7471842428127728, |
|
"grad_norm": 1.6279646158218384, |
|
"learning_rate": 7.770670757015752e-07, |
|
"loss": 0.4643, |
|
"step": 67600 |
|
}, |
|
{ |
|
"epoch": 0.7482895449471113, |
|
"grad_norm": 2.4971320629119873, |
|
"learning_rate": 7.706593254338174e-07, |
|
"loss": 0.4609, |
|
"step": 67700 |
|
}, |
|
{ |
|
"epoch": 0.7493948470814498, |
|
"grad_norm": 1.2119097709655762, |
|
"learning_rate": 7.642732864349927e-07, |
|
"loss": 0.484, |
|
"step": 67800 |
|
}, |
|
{ |
|
"epoch": 0.7505001492157881, |
|
"grad_norm": 1.7218291759490967, |
|
"learning_rate": 7.579090388795923e-07, |
|
"loss": 0.4322, |
|
"step": 67900 |
|
}, |
|
{ |
|
"epoch": 0.7516054513501266, |
|
"grad_norm": 1.814095139503479, |
|
"learning_rate": 7.51566662668525e-07, |
|
"loss": 0.4391, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.752710753484465, |
|
"grad_norm": 1.9664380550384521, |
|
"learning_rate": 7.452462374281111e-07, |
|
"loss": 0.4384, |
|
"step": 68100 |
|
}, |
|
{ |
|
"epoch": 0.7538160556188034, |
|
"grad_norm": 1.8115942478179932, |
|
"learning_rate": 7.389478425090845e-07, |
|
"loss": 0.4358, |
|
"step": 68200 |
|
}, |
|
{ |
|
"epoch": 0.7549213577531418, |
|
"grad_norm": 2.317274570465088, |
|
"learning_rate": 7.326715569855983e-07, |
|
"loss": 0.4266, |
|
"step": 68300 |
|
}, |
|
{ |
|
"epoch": 0.7560266598874802, |
|
"grad_norm": 1.416651964187622, |
|
"learning_rate": 7.264174596542262e-07, |
|
"loss": 0.4613, |
|
"step": 68400 |
|
}, |
|
{ |
|
"epoch": 0.7571319620218186, |
|
"grad_norm": 2.0251598358154297, |
|
"learning_rate": 7.201856290329781e-07, |
|
"loss": 0.4353, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 0.7582372641561571, |
|
"grad_norm": 2.7883288860321045, |
|
"learning_rate": 7.139761433603148e-07, |
|
"loss": 0.4728, |
|
"step": 68600 |
|
}, |
|
{ |
|
"epoch": 0.7593425662904955, |
|
"grad_norm": 2.3883168697357178, |
|
"learning_rate": 7.077890805941631e-07, |
|
"loss": 0.4496, |
|
"step": 68700 |
|
}, |
|
{ |
|
"epoch": 0.7604478684248339, |
|
"grad_norm": 1.9152491092681885, |
|
"learning_rate": 7.016245184109374e-07, |
|
"loss": 0.4222, |
|
"step": 68800 |
|
}, |
|
{ |
|
"epoch": 0.7615531705591724, |
|
"grad_norm": 1.967631459236145, |
|
"learning_rate": 6.954825342045648e-07, |
|
"loss": 0.4551, |
|
"step": 68900 |
|
}, |
|
{ |
|
"epoch": 0.7626584726935107, |
|
"grad_norm": 5.269169330596924, |
|
"learning_rate": 6.893632050855153e-07, |
|
"loss": 0.4473, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.7637637748278492, |
|
"grad_norm": 2.2106597423553467, |
|
"learning_rate": 6.832666078798319e-07, |
|
"loss": 0.4272, |
|
"step": 69100 |
|
}, |
|
{ |
|
"epoch": 0.7648690769621876, |
|
"grad_norm": 1.336655855178833, |
|
"learning_rate": 6.771928191281657e-07, |
|
"loss": 0.4363, |
|
"step": 69200 |
|
}, |
|
{ |
|
"epoch": 0.765974379096526, |
|
"grad_norm": 2.259783983230591, |
|
"learning_rate": 6.711419150848142e-07, |
|
"loss": 0.4753, |
|
"step": 69300 |
|
}, |
|
{ |
|
"epoch": 0.7670796812308645, |
|
"grad_norm": 2.4219510555267334, |
|
"learning_rate": 6.651139717167684e-07, |
|
"loss": 0.4387, |
|
"step": 69400 |
|
}, |
|
{ |
|
"epoch": 0.7681849833652029, |
|
"grad_norm": 1.4461395740509033, |
|
"learning_rate": 6.591090647027551e-07, |
|
"loss": 0.4333, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 0.7692902854995413, |
|
"grad_norm": 2.112628221511841, |
|
"learning_rate": 6.531272694322865e-07, |
|
"loss": 0.4432, |
|
"step": 69600 |
|
}, |
|
{ |
|
"epoch": 0.7703955876338797, |
|
"grad_norm": 2.398404121398926, |
|
"learning_rate": 6.471686610047149e-07, |
|
"loss": 0.4178, |
|
"step": 69700 |
|
}, |
|
{ |
|
"epoch": 0.7715008897682182, |
|
"grad_norm": 1.9381033182144165, |
|
"learning_rate": 6.412333142282912e-07, |
|
"loss": 0.4319, |
|
"step": 69800 |
|
}, |
|
{ |
|
"epoch": 0.7726061919025565, |
|
"grad_norm": 2.338209390640259, |
|
"learning_rate": 6.353213036192244e-07, |
|
"loss": 0.4392, |
|
"step": 69900 |
|
}, |
|
{ |
|
"epoch": 0.773711494036895, |
|
"grad_norm": 2.6548027992248535, |
|
"learning_rate": 6.294327034007444e-07, |
|
"loss": 0.46, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.7748167961712334, |
|
"grad_norm": 1.4142146110534668, |
|
"learning_rate": 6.235675875021741e-07, |
|
"loss": 0.4779, |
|
"step": 70100 |
|
}, |
|
{ |
|
"epoch": 0.7759220983055718, |
|
"grad_norm": 2.0672521591186523, |
|
"learning_rate": 6.177260295579962e-07, |
|
"loss": 0.438, |
|
"step": 70200 |
|
}, |
|
{ |
|
"epoch": 0.7770274004399103, |
|
"grad_norm": 2.526472806930542, |
|
"learning_rate": 6.119081029069346e-07, |
|
"loss": 0.4127, |
|
"step": 70300 |
|
}, |
|
{ |
|
"epoch": 0.7781327025742487, |
|
"grad_norm": 1.7942878007888794, |
|
"learning_rate": 6.061138805910272e-07, |
|
"loss": 0.4384, |
|
"step": 70400 |
|
}, |
|
{ |
|
"epoch": 0.7792380047085871, |
|
"grad_norm": 3.063554286956787, |
|
"learning_rate": 6.003434353547158e-07, |
|
"loss": 0.3919, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 0.7803433068429255, |
|
"grad_norm": 2.0761284828186035, |
|
"learning_rate": 5.945968396439262e-07, |
|
"loss": 0.42, |
|
"step": 70600 |
|
}, |
|
{ |
|
"epoch": 0.781448608977264, |
|
"grad_norm": 2.193068504333496, |
|
"learning_rate": 5.88874165605163e-07, |
|
"loss": 0.4547, |
|
"step": 70700 |
|
}, |
|
{ |
|
"epoch": 0.7825539111116023, |
|
"grad_norm": 1.3570361137390137, |
|
"learning_rate": 5.831754850846039e-07, |
|
"loss": 0.4401, |
|
"step": 70800 |
|
}, |
|
{ |
|
"epoch": 0.7836592132459408, |
|
"grad_norm": 1.9479831457138062, |
|
"learning_rate": 5.775008696271942e-07, |
|
"loss": 0.4558, |
|
"step": 70900 |
|
}, |
|
{ |
|
"epoch": 0.7847645153802792, |
|
"grad_norm": 1.4606367349624634, |
|
"learning_rate": 5.718503904757503e-07, |
|
"loss": 0.4485, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.7858698175146176, |
|
"grad_norm": 1.7804583311080933, |
|
"learning_rate": 5.662241185700684e-07, |
|
"loss": 0.3965, |
|
"step": 71100 |
|
}, |
|
{ |
|
"epoch": 0.7869751196489561, |
|
"grad_norm": 1.787216067314148, |
|
"learning_rate": 5.606221245460297e-07, |
|
"loss": 0.4349, |
|
"step": 71200 |
|
}, |
|
{ |
|
"epoch": 0.7880804217832945, |
|
"grad_norm": 2.5382983684539795, |
|
"learning_rate": 5.550444787347148e-07, |
|
"loss": 0.4296, |
|
"step": 71300 |
|
}, |
|
{ |
|
"epoch": 0.7891857239176329, |
|
"grad_norm": 2.524690866470337, |
|
"learning_rate": 5.494912511615205e-07, |
|
"loss": 0.4599, |
|
"step": 71400 |
|
}, |
|
{ |
|
"epoch": 0.7902910260519713, |
|
"grad_norm": 1.3965719938278198, |
|
"learning_rate": 5.439625115452824e-07, |
|
"loss": 0.4503, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 0.7913963281863098, |
|
"grad_norm": 1.595763921737671, |
|
"learning_rate": 5.384583292973985e-07, |
|
"loss": 0.4615, |
|
"step": 71600 |
|
}, |
|
{ |
|
"epoch": 0.7925016303206481, |
|
"grad_norm": 1.5032540559768677, |
|
"learning_rate": 5.329787735209566e-07, |
|
"loss": 0.4287, |
|
"step": 71700 |
|
}, |
|
{ |
|
"epoch": 0.7936069324549866, |
|
"grad_norm": 1.8847301006317139, |
|
"learning_rate": 5.275239130098669e-07, |
|
"loss": 0.446, |
|
"step": 71800 |
|
}, |
|
{ |
|
"epoch": 0.7947122345893249, |
|
"grad_norm": 1.5111511945724487, |
|
"learning_rate": 5.220938162480014e-07, |
|
"loss": 0.435, |
|
"step": 71900 |
|
}, |
|
{ |
|
"epoch": 0.7958175367236634, |
|
"grad_norm": 2.1808974742889404, |
|
"learning_rate": 5.166885514083311e-07, |
|
"loss": 0.4365, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.7969228388580019, |
|
"grad_norm": 1.921736240386963, |
|
"learning_rate": 5.113081863520697e-07, |
|
"loss": 0.4746, |
|
"step": 72100 |
|
}, |
|
{ |
|
"epoch": 0.7980281409923402, |
|
"grad_norm": 2.0888705253601074, |
|
"learning_rate": 5.059527886278246e-07, |
|
"loss": 0.4435, |
|
"step": 72200 |
|
}, |
|
{ |
|
"epoch": 0.7991334431266787, |
|
"grad_norm": 2.90547776222229, |
|
"learning_rate": 5.006224254707448e-07, |
|
"loss": 0.464, |
|
"step": 72300 |
|
}, |
|
{ |
|
"epoch": 0.8002387452610171, |
|
"grad_norm": 1.6634081602096558, |
|
"learning_rate": 4.953171638016821e-07, |
|
"loss": 0.4243, |
|
"step": 72400 |
|
}, |
|
{ |
|
"epoch": 0.8013440473953555, |
|
"grad_norm": 1.630812644958496, |
|
"learning_rate": 4.900370702263443e-07, |
|
"loss": 0.3898, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 0.8024493495296939, |
|
"grad_norm": 2.4027256965637207, |
|
"learning_rate": 4.847822110344664e-07, |
|
"loss": 0.4398, |
|
"step": 72600 |
|
}, |
|
{ |
|
"epoch": 0.8035546516640324, |
|
"grad_norm": 1.9806816577911377, |
|
"learning_rate": 4.795526521989705e-07, |
|
"loss": 0.475, |
|
"step": 72700 |
|
}, |
|
{ |
|
"epoch": 0.8046599537983707, |
|
"grad_norm": 2.0573477745056152, |
|
"learning_rate": 4.743484593751446e-07, |
|
"loss": 0.4239, |
|
"step": 72800 |
|
}, |
|
{ |
|
"epoch": 0.8057652559327092, |
|
"grad_norm": 2.6847050189971924, |
|
"learning_rate": 4.6916969789981477e-07, |
|
"loss": 0.4509, |
|
"step": 72900 |
|
}, |
|
{ |
|
"epoch": 0.8068705580670477, |
|
"grad_norm": 2.843912124633789, |
|
"learning_rate": 4.6401643279052444e-07, |
|
"loss": 0.4605, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.807975860201386, |
|
"grad_norm": 2.673027276992798, |
|
"learning_rate": 4.588887287447188e-07, |
|
"loss": 0.4139, |
|
"step": 73100 |
|
}, |
|
{ |
|
"epoch": 0.8090811623357245, |
|
"grad_norm": 1.7096991539001465, |
|
"learning_rate": 4.5378665013893375e-07, |
|
"loss": 0.4527, |
|
"step": 73200 |
|
}, |
|
{ |
|
"epoch": 0.8101864644700629, |
|
"grad_norm": 1.959112286567688, |
|
"learning_rate": 4.4871026102798755e-07, |
|
"loss": 0.4437, |
|
"step": 73300 |
|
}, |
|
{ |
|
"epoch": 0.8112917666044013, |
|
"grad_norm": 1.4862419366836548, |
|
"learning_rate": 4.436596251441738e-07, |
|
"loss": 0.4287, |
|
"step": 73400 |
|
}, |
|
{ |
|
"epoch": 0.8123970687387397, |
|
"grad_norm": 2.291743278503418, |
|
"learning_rate": 4.3863480589646374e-07, |
|
"loss": 0.4279, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 0.8135023708730782, |
|
"grad_norm": 2.421630620956421, |
|
"learning_rate": 4.336358663697107e-07, |
|
"loss": 0.4497, |
|
"step": 73600 |
|
}, |
|
{ |
|
"epoch": 0.8146076730074165, |
|
"grad_norm": 2.3377912044525146, |
|
"learning_rate": 4.286628693238576e-07, |
|
"loss": 0.4474, |
|
"step": 73700 |
|
}, |
|
{ |
|
"epoch": 0.815712975141755, |
|
"grad_norm": 2.160400390625, |
|
"learning_rate": 4.237158771931468e-07, |
|
"loss": 0.4472, |
|
"step": 73800 |
|
}, |
|
{ |
|
"epoch": 0.8168182772760935, |
|
"grad_norm": 2.32997465133667, |
|
"learning_rate": 4.187949520853382e-07, |
|
"loss": 0.446, |
|
"step": 73900 |
|
}, |
|
{ |
|
"epoch": 0.8179235794104318, |
|
"grad_norm": 2.2677996158599854, |
|
"learning_rate": 4.139001557809308e-07, |
|
"loss": 0.4408, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.8190288815447703, |
|
"grad_norm": 1.791791558265686, |
|
"learning_rate": 4.090315497323852e-07, |
|
"loss": 0.4721, |
|
"step": 74100 |
|
}, |
|
{ |
|
"epoch": 0.8201341836791087, |
|
"grad_norm": 1.590136170387268, |
|
"learning_rate": 4.041891950633514e-07, |
|
"loss": 0.4389, |
|
"step": 74200 |
|
}, |
|
{ |
|
"epoch": 0.8212394858134471, |
|
"grad_norm": 1.7760423421859741, |
|
"learning_rate": 3.993731525679029e-07, |
|
"loss": 0.4682, |
|
"step": 74300 |
|
}, |
|
{ |
|
"epoch": 0.8223447879477855, |
|
"grad_norm": 1.8399248123168945, |
|
"learning_rate": 3.945834827097736e-07, |
|
"loss": 0.4345, |
|
"step": 74400 |
|
}, |
|
{ |
|
"epoch": 0.823450090082124, |
|
"grad_norm": 2.691328763961792, |
|
"learning_rate": 3.8982024562159854e-07, |
|
"loss": 0.4865, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 0.8245553922164623, |
|
"grad_norm": 2.113375425338745, |
|
"learning_rate": 3.8508350110415646e-07, |
|
"loss": 0.4288, |
|
"step": 74600 |
|
}, |
|
{ |
|
"epoch": 0.8256606943508008, |
|
"grad_norm": 1.4317853450775146, |
|
"learning_rate": 3.8037330862562393e-07, |
|
"loss": 0.4465, |
|
"step": 74700 |
|
}, |
|
{ |
|
"epoch": 0.8267659964851393, |
|
"grad_norm": 1.591933012008667, |
|
"learning_rate": 3.7568972732082295e-07, |
|
"loss": 0.4131, |
|
"step": 74800 |
|
}, |
|
{ |
|
"epoch": 0.8278712986194776, |
|
"grad_norm": 1.7374714612960815, |
|
"learning_rate": 3.710328159904844e-07, |
|
"loss": 0.4011, |
|
"step": 74900 |
|
}, |
|
{ |
|
"epoch": 0.8289766007538161, |
|
"grad_norm": 2.0382604598999023, |
|
"learning_rate": 3.664026331005044e-07, |
|
"loss": 0.4176, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.8300819028881545, |
|
"grad_norm": 2.3857359886169434, |
|
"learning_rate": 3.6179923678121537e-07, |
|
"loss": 0.4921, |
|
"step": 75100 |
|
}, |
|
{ |
|
"epoch": 0.8311872050224929, |
|
"grad_norm": 2.013730764389038, |
|
"learning_rate": 3.5722268482665107e-07, |
|
"loss": 0.4365, |
|
"step": 75200 |
|
}, |
|
{ |
|
"epoch": 0.8322925071568313, |
|
"grad_norm": 1.714146375656128, |
|
"learning_rate": 3.5267303469382506e-07, |
|
"loss": 0.4353, |
|
"step": 75300 |
|
}, |
|
{ |
|
"epoch": 0.8333978092911697, |
|
"grad_norm": 1.6847208738327026, |
|
"learning_rate": 3.4815034350200893e-07, |
|
"loss": 0.4585, |
|
"step": 75400 |
|
}, |
|
{ |
|
"epoch": 0.8345031114255081, |
|
"grad_norm": 2.0972464084625244, |
|
"learning_rate": 3.4365466803201216e-07, |
|
"loss": 0.441, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 0.8356084135598466, |
|
"grad_norm": 1.8610143661499023, |
|
"learning_rate": 3.3918606472547136e-07, |
|
"loss": 0.4351, |
|
"step": 75600 |
|
}, |
|
{ |
|
"epoch": 0.836713715694185, |
|
"grad_norm": 2.597923755645752, |
|
"learning_rate": 3.347445896841428e-07, |
|
"loss": 0.4196, |
|
"step": 75700 |
|
}, |
|
{ |
|
"epoch": 0.8378190178285234, |
|
"grad_norm": 1.8498742580413818, |
|
"learning_rate": 3.30330298669197e-07, |
|
"loss": 0.4526, |
|
"step": 75800 |
|
}, |
|
{ |
|
"epoch": 0.8389243199628619, |
|
"grad_norm": 1.8387874364852905, |
|
"learning_rate": 3.259432471005175e-07, |
|
"loss": 0.4287, |
|
"step": 75900 |
|
}, |
|
{ |
|
"epoch": 0.8400296220972002, |
|
"grad_norm": 2.75079083442688, |
|
"learning_rate": 3.215834900560055e-07, |
|
"loss": 0.4486, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.8411349242315387, |
|
"grad_norm": 1.793381690979004, |
|
"learning_rate": 3.1725108227089074e-07, |
|
"loss": 0.4602, |
|
"step": 76100 |
|
}, |
|
{ |
|
"epoch": 0.8422402263658771, |
|
"grad_norm": 1.3438163995742798, |
|
"learning_rate": 3.129460781370422e-07, |
|
"loss": 0.4441, |
|
"step": 76200 |
|
}, |
|
{ |
|
"epoch": 0.8433455285002155, |
|
"grad_norm": 2.8206710815429688, |
|
"learning_rate": 3.0866853170228443e-07, |
|
"loss": 0.3989, |
|
"step": 76300 |
|
}, |
|
{ |
|
"epoch": 0.844450830634554, |
|
"grad_norm": 1.9363433122634888, |
|
"learning_rate": 3.044184966697203e-07, |
|
"loss": 0.4252, |
|
"step": 76400 |
|
}, |
|
{ |
|
"epoch": 0.8455561327688924, |
|
"grad_norm": 2.5586061477661133, |
|
"learning_rate": 3.001960263970577e-07, |
|
"loss": 0.4957, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 0.8466614349032308, |
|
"grad_norm": 1.9901615381240845, |
|
"learning_rate": 2.960011738959387e-07, |
|
"loss": 0.4629, |
|
"step": 76600 |
|
}, |
|
{ |
|
"epoch": 0.8477667370375692, |
|
"grad_norm": 1.8617513179779053, |
|
"learning_rate": 2.918339918312718e-07, |
|
"loss": 0.4515, |
|
"step": 76700 |
|
}, |
|
{ |
|
"epoch": 0.8488720391719077, |
|
"grad_norm": 1.8503713607788086, |
|
"learning_rate": 2.876945325205754e-07, |
|
"loss": 0.4614, |
|
"step": 76800 |
|
}, |
|
{ |
|
"epoch": 0.849977341306246, |
|
"grad_norm": 2.3590264320373535, |
|
"learning_rate": 2.835828479333164e-07, |
|
"loss": 0.4517, |
|
"step": 76900 |
|
}, |
|
{ |
|
"epoch": 0.8510826434405845, |
|
"grad_norm": 1.9208427667617798, |
|
"learning_rate": 2.7949898969026114e-07, |
|
"loss": 0.4694, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.852187945574923, |
|
"grad_norm": 2.673845052719116, |
|
"learning_rate": 2.754430090628243e-07, |
|
"loss": 0.4379, |
|
"step": 77100 |
|
}, |
|
{ |
|
"epoch": 0.8532932477092613, |
|
"grad_norm": 2.1295111179351807, |
|
"learning_rate": 2.714149569724295e-07, |
|
"loss": 0.4654, |
|
"step": 77200 |
|
}, |
|
{ |
|
"epoch": 0.8543985498435998, |
|
"grad_norm": 2.3107078075408936, |
|
"learning_rate": 2.6741488398986384e-07, |
|
"loss": 0.4267, |
|
"step": 77300 |
|
}, |
|
{ |
|
"epoch": 0.8555038519779382, |
|
"grad_norm": 2.0932328701019287, |
|
"learning_rate": 2.6344284033464976e-07, |
|
"loss": 0.4141, |
|
"step": 77400 |
|
}, |
|
{ |
|
"epoch": 0.8566091541122766, |
|
"grad_norm": 1.246630072593689, |
|
"learning_rate": 2.594988758744088e-07, |
|
"loss": 0.4597, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 0.857714456246615, |
|
"grad_norm": 1.999973177909851, |
|
"learning_rate": 2.5558304012423954e-07, |
|
"loss": 0.4488, |
|
"step": 77600 |
|
}, |
|
{ |
|
"epoch": 0.8588197583809535, |
|
"grad_norm": 1.827642798423767, |
|
"learning_rate": 2.516953822460935e-07, |
|
"loss": 0.473, |
|
"step": 77700 |
|
}, |
|
{ |
|
"epoch": 0.8599250605152918, |
|
"grad_norm": 2.323723793029785, |
|
"learning_rate": 2.4783595104815954e-07, |
|
"loss": 0.4138, |
|
"step": 77800 |
|
}, |
|
{ |
|
"epoch": 0.8610303626496303, |
|
"grad_norm": 2.2066116333007812, |
|
"learning_rate": 2.440047949842506e-07, |
|
"loss": 0.4466, |
|
"step": 77900 |
|
}, |
|
{ |
|
"epoch": 0.8621356647839687, |
|
"grad_norm": 1.8978465795516968, |
|
"learning_rate": 2.402019621531937e-07, |
|
"loss": 0.4597, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.8632409669183071, |
|
"grad_norm": 1.499747395515442, |
|
"learning_rate": 2.364275002982286e-07, |
|
"loss": 0.4103, |
|
"step": 78100 |
|
}, |
|
{ |
|
"epoch": 0.8643462690526456, |
|
"grad_norm": 1.766528606414795, |
|
"learning_rate": 2.3268145680640758e-07, |
|
"loss": 0.4416, |
|
"step": 78200 |
|
}, |
|
{ |
|
"epoch": 0.865451571186984, |
|
"grad_norm": 2.050598621368408, |
|
"learning_rate": 2.2896387870800034e-07, |
|
"loss": 0.4238, |
|
"step": 78300 |
|
}, |
|
{ |
|
"epoch": 0.8665568733213224, |
|
"grad_norm": 3.147510290145874, |
|
"learning_rate": 2.2527481267590274e-07, |
|
"loss": 0.4561, |
|
"step": 78400 |
|
}, |
|
{ |
|
"epoch": 0.8676621754556608, |
|
"grad_norm": 2.1303939819335938, |
|
"learning_rate": 2.2161430502505133e-07, |
|
"loss": 0.4525, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 0.8687674775899992, |
|
"grad_norm": 2.706810235977173, |
|
"learning_rate": 2.179824017118437e-07, |
|
"loss": 0.4467, |
|
"step": 78600 |
|
}, |
|
{ |
|
"epoch": 0.8698727797243376, |
|
"grad_norm": 2.3057336807250977, |
|
"learning_rate": 2.1437914833355887e-07, |
|
"loss": 0.4511, |
|
"step": 78700 |
|
}, |
|
{ |
|
"epoch": 0.8709780818586761, |
|
"grad_norm": 2.322817087173462, |
|
"learning_rate": 2.1080459012778636e-07, |
|
"loss": 0.4705, |
|
"step": 78800 |
|
}, |
|
{ |
|
"epoch": 0.8720833839930144, |
|
"grad_norm": 1.8831989765167236, |
|
"learning_rate": 2.0725877197185663e-07, |
|
"loss": 0.445, |
|
"step": 78900 |
|
}, |
|
{ |
|
"epoch": 0.8731886861273529, |
|
"grad_norm": 2.8571081161499023, |
|
"learning_rate": 2.0374173838228013e-07, |
|
"loss": 0.4772, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.8742939882616914, |
|
"grad_norm": 2.4051854610443115, |
|
"learning_rate": 2.0025353351418753e-07, |
|
"loss": 0.4557, |
|
"step": 79100 |
|
}, |
|
{ |
|
"epoch": 0.8753992903960297, |
|
"grad_norm": 1.7439450025558472, |
|
"learning_rate": 1.967942011607732e-07, |
|
"loss": 0.4421, |
|
"step": 79200 |
|
}, |
|
{ |
|
"epoch": 0.8765045925303682, |
|
"grad_norm": 2.514841318130493, |
|
"learning_rate": 1.9336378475274865e-07, |
|
"loss": 0.4508, |
|
"step": 79300 |
|
}, |
|
{ |
|
"epoch": 0.8776098946647066, |
|
"grad_norm": 1.8946666717529297, |
|
"learning_rate": 1.8996232735779496e-07, |
|
"loss": 0.4509, |
|
"step": 79400 |
|
}, |
|
{ |
|
"epoch": 0.878715196799045, |
|
"grad_norm": 1.5840513706207275, |
|
"learning_rate": 1.865898716800238e-07, |
|
"loss": 0.4557, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 0.8798204989333834, |
|
"grad_norm": 1.824873924255371, |
|
"learning_rate": 1.8324646005943913e-07, |
|
"loss": 0.4662, |
|
"step": 79600 |
|
}, |
|
{ |
|
"epoch": 0.8809258010677219, |
|
"grad_norm": 2.0272133350372314, |
|
"learning_rate": 1.7993213447140807e-07, |
|
"loss": 0.4291, |
|
"step": 79700 |
|
}, |
|
{ |
|
"epoch": 0.8820311032020602, |
|
"grad_norm": 1.1004635095596313, |
|
"learning_rate": 1.766469365261317e-07, |
|
"loss": 0.3973, |
|
"step": 79800 |
|
}, |
|
{ |
|
"epoch": 0.8831364053363987, |
|
"grad_norm": 2.014890193939209, |
|
"learning_rate": 1.7339090746812449e-07, |
|
"loss": 0.45, |
|
"step": 79900 |
|
}, |
|
{ |
|
"epoch": 0.8842417074707372, |
|
"grad_norm": 2.0376179218292236, |
|
"learning_rate": 1.7016408817569606e-07, |
|
"loss": 0.4381, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.8853470096050755, |
|
"grad_norm": 1.6137086153030396, |
|
"learning_rate": 1.6696651916043666e-07, |
|
"loss": 0.4361, |
|
"step": 80100 |
|
}, |
|
{ |
|
"epoch": 0.886452311739414, |
|
"grad_norm": 1.7986013889312744, |
|
"learning_rate": 1.6379824056670934e-07, |
|
"loss": 0.4719, |
|
"step": 80200 |
|
}, |
|
{ |
|
"epoch": 0.8875576138737524, |
|
"grad_norm": 1.8301312923431396, |
|
"learning_rate": 1.6065929217114696e-07, |
|
"loss": 0.4262, |
|
"step": 80300 |
|
}, |
|
{ |
|
"epoch": 0.8886629160080908, |
|
"grad_norm": 2.35886287689209, |
|
"learning_rate": 1.575497133821524e-07, |
|
"loss": 0.4535, |
|
"step": 80400 |
|
}, |
|
{ |
|
"epoch": 0.8897682181424292, |
|
"grad_norm": 1.7016726732254028, |
|
"learning_rate": 1.5446954323940223e-07, |
|
"loss": 0.4294, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 0.8908735202767677, |
|
"grad_norm": 1.589161992073059, |
|
"learning_rate": 1.5141882041335737e-07, |
|
"loss": 0.4309, |
|
"step": 80600 |
|
}, |
|
{ |
|
"epoch": 0.891978822411106, |
|
"grad_norm": 2.3803720474243164, |
|
"learning_rate": 1.4839758320477958e-07, |
|
"loss": 0.4318, |
|
"step": 80700 |
|
}, |
|
{ |
|
"epoch": 0.8930841245454445, |
|
"grad_norm": 2.638575315475464, |
|
"learning_rate": 1.454058695442484e-07, |
|
"loss": 0.4678, |
|
"step": 80800 |
|
}, |
|
{ |
|
"epoch": 0.894189426679783, |
|
"grad_norm": 1.9479451179504395, |
|
"learning_rate": 1.4244371699168453e-07, |
|
"loss": 0.4264, |
|
"step": 80900 |
|
}, |
|
{ |
|
"epoch": 0.8952947288141213, |
|
"grad_norm": 1.9173952341079712, |
|
"learning_rate": 1.3951116273588e-07, |
|
"loss": 0.4507, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.8964000309484598, |
|
"grad_norm": 1.8866360187530518, |
|
"learning_rate": 1.3660824359403107e-07, |
|
"loss": 0.4359, |
|
"step": 81100 |
|
}, |
|
{ |
|
"epoch": 0.8975053330827982, |
|
"grad_norm": 2.116718053817749, |
|
"learning_rate": 1.3373499601127466e-07, |
|
"loss": 0.4451, |
|
"step": 81200 |
|
}, |
|
{ |
|
"epoch": 0.8986106352171366, |
|
"grad_norm": 2.32564377784729, |
|
"learning_rate": 1.308914560602323e-07, |
|
"loss": 0.4198, |
|
"step": 81300 |
|
}, |
|
{ |
|
"epoch": 0.899715937351475, |
|
"grad_norm": 2.0888161659240723, |
|
"learning_rate": 1.2807765944055528e-07, |
|
"loss": 0.4543, |
|
"step": 81400 |
|
}, |
|
{ |
|
"epoch": 0.9008212394858135, |
|
"grad_norm": 2.4812674522399902, |
|
"learning_rate": 1.2529364147847918e-07, |
|
"loss": 0.4323, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 0.9019265416201518, |
|
"grad_norm": 1.4540350437164307, |
|
"learning_rate": 1.2253943712637883e-07, |
|
"loss": 0.4429, |
|
"step": 81600 |
|
}, |
|
{ |
|
"epoch": 0.9030318437544903, |
|
"grad_norm": 2.2741010189056396, |
|
"learning_rate": 1.198150809623283e-07, |
|
"loss": 0.4087, |
|
"step": 81700 |
|
}, |
|
{ |
|
"epoch": 0.9041371458888288, |
|
"grad_norm": 1.891856074333191, |
|
"learning_rate": 1.1712060718966967e-07, |
|
"loss": 0.4314, |
|
"step": 81800 |
|
}, |
|
{ |
|
"epoch": 0.9052424480231671, |
|
"grad_norm": 2.013892412185669, |
|
"learning_rate": 1.1445604963658041e-07, |
|
"loss": 0.413, |
|
"step": 81900 |
|
}, |
|
{ |
|
"epoch": 0.9063477501575056, |
|
"grad_norm": 1.5470303297042847, |
|
"learning_rate": 1.1182144175565207e-07, |
|
"loss": 0.4102, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.9074530522918439, |
|
"grad_norm": 2.09853196144104, |
|
"learning_rate": 1.0921681662346695e-07, |
|
"loss": 0.4228, |
|
"step": 82100 |
|
}, |
|
{ |
|
"epoch": 0.9085583544261824, |
|
"grad_norm": 1.8821436166763306, |
|
"learning_rate": 1.0664220694018512e-07, |
|
"loss": 0.4499, |
|
"step": 82200 |
|
}, |
|
{ |
|
"epoch": 0.9096636565605208, |
|
"grad_norm": 2.268958568572998, |
|
"learning_rate": 1.0409764502913311e-07, |
|
"loss": 0.457, |
|
"step": 82300 |
|
}, |
|
{ |
|
"epoch": 0.9107689586948592, |
|
"grad_norm": 2.286543607711792, |
|
"learning_rate": 1.0158316283639807e-07, |
|
"loss": 0.4531, |
|
"step": 82400 |
|
}, |
|
{ |
|
"epoch": 0.9118742608291976, |
|
"grad_norm": 1.7463018894195557, |
|
"learning_rate": 9.909879193042731e-08, |
|
"loss": 0.4182, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 0.9129795629635361, |
|
"grad_norm": 1.9405850172042847, |
|
"learning_rate": 9.664456350163055e-08, |
|
"loss": 0.4074, |
|
"step": 82600 |
|
}, |
|
{ |
|
"epoch": 0.9140848650978745, |
|
"grad_norm": 1.8400213718414307, |
|
"learning_rate": 9.422050836198904e-08, |
|
"loss": 0.4281, |
|
"step": 82700 |
|
}, |
|
{ |
|
"epoch": 0.9151901672322129, |
|
"grad_norm": 2.0934810638427734, |
|
"learning_rate": 9.182665694467019e-08, |
|
"loss": 0.4394, |
|
"step": 82800 |
|
}, |
|
{ |
|
"epoch": 0.9162954693665514, |
|
"grad_norm": 1.6910539865493774, |
|
"learning_rate": 8.946303930364386e-08, |
|
"loss": 0.4511, |
|
"step": 82900 |
|
}, |
|
{ |
|
"epoch": 0.9174007715008897, |
|
"grad_norm": 1.2215235233306885, |
|
"learning_rate": 8.712968511330439e-08, |
|
"loss": 0.4427, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.9185060736352282, |
|
"grad_norm": 1.4822089672088623, |
|
"learning_rate": 8.482662366809947e-08, |
|
"loss": 0.4029, |
|
"step": 83100 |
|
}, |
|
{ |
|
"epoch": 0.9196113757695666, |
|
"grad_norm": 1.9130114316940308, |
|
"learning_rate": 8.255388388216267e-08, |
|
"loss": 0.4471, |
|
"step": 83200 |
|
}, |
|
{ |
|
"epoch": 0.920716677903905, |
|
"grad_norm": 1.6017576456069946, |
|
"learning_rate": 8.031149428894936e-08, |
|
"loss": 0.449, |
|
"step": 83300 |
|
}, |
|
{ |
|
"epoch": 0.9218219800382434, |
|
"grad_norm": 1.9857609272003174, |
|
"learning_rate": 7.80994830408785e-08, |
|
"loss": 0.4505, |
|
"step": 83400 |
|
}, |
|
{ |
|
"epoch": 0.9229272821725819, |
|
"grad_norm": 1.8383105993270874, |
|
"learning_rate": 7.59178779089792e-08, |
|
"loss": 0.4387, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 0.9240325843069203, |
|
"grad_norm": 1.5734336376190186, |
|
"learning_rate": 7.376670628254368e-08, |
|
"loss": 0.4456, |
|
"step": 83600 |
|
}, |
|
{ |
|
"epoch": 0.9251378864412587, |
|
"grad_norm": 1.7729212045669556, |
|
"learning_rate": 7.16459951687809e-08, |
|
"loss": 0.4252, |
|
"step": 83700 |
|
}, |
|
{ |
|
"epoch": 0.9262431885755972, |
|
"grad_norm": 2.0925188064575195, |
|
"learning_rate": 6.955577119247909e-08, |
|
"loss": 0.4397, |
|
"step": 83800 |
|
}, |
|
{ |
|
"epoch": 0.9273484907099355, |
|
"grad_norm": 1.6489801406860352, |
|
"learning_rate": 6.749606059567177e-08, |
|
"loss": 0.4241, |
|
"step": 83900 |
|
}, |
|
{ |
|
"epoch": 0.928453792844274, |
|
"grad_norm": 2.122025728225708, |
|
"learning_rate": 6.546688923730587e-08, |
|
"loss": 0.4509, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.9295590949786124, |
|
"grad_norm": 1.5546257495880127, |
|
"learning_rate": 6.346828259292114e-08, |
|
"loss": 0.4283, |
|
"step": 84100 |
|
}, |
|
{ |
|
"epoch": 0.9306643971129508, |
|
"grad_norm": 1.4313548803329468, |
|
"learning_rate": 6.150026575432622e-08, |
|
"loss": 0.4315, |
|
"step": 84200 |
|
}, |
|
{ |
|
"epoch": 0.9317696992472893, |
|
"grad_norm": 2.144721269607544, |
|
"learning_rate": 5.956286342928608e-08, |
|
"loss": 0.4046, |
|
"step": 84300 |
|
}, |
|
{ |
|
"epoch": 0.9328750013816277, |
|
"grad_norm": 1.9185172319412231, |
|
"learning_rate": 5.7656099941210966e-08, |
|
"loss": 0.4762, |
|
"step": 84400 |
|
}, |
|
{ |
|
"epoch": 0.9339803035159661, |
|
"grad_norm": 2.0306639671325684, |
|
"learning_rate": 5.577999922885158e-08, |
|
"loss": 0.4347, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 0.9350856056503045, |
|
"grad_norm": 2.1696221828460693, |
|
"learning_rate": 5.393458484599823e-08, |
|
"loss": 0.4654, |
|
"step": 84600 |
|
}, |
|
{ |
|
"epoch": 0.936190907784643, |
|
"grad_norm": 3.1747541427612305, |
|
"learning_rate": 5.2119879961184114e-08, |
|
"loss": 0.4361, |
|
"step": 84700 |
|
}, |
|
{ |
|
"epoch": 0.9372962099189813, |
|
"grad_norm": 2.4681639671325684, |
|
"learning_rate": 5.033590735739641e-08, |
|
"loss": 0.4064, |
|
"step": 84800 |
|
}, |
|
{ |
|
"epoch": 0.9384015120533198, |
|
"grad_norm": 2.34089732170105, |
|
"learning_rate": 4.858268943178868e-08, |
|
"loss": 0.4839, |
|
"step": 84900 |
|
}, |
|
{ |
|
"epoch": 0.9395068141876582, |
|
"grad_norm": 2.1032681465148926, |
|
"learning_rate": 4.686024819540058e-08, |
|
"loss": 0.4256, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.9406121163219966, |
|
"grad_norm": 2.1643483638763428, |
|
"learning_rate": 4.5168605272881414e-08, |
|
"loss": 0.4503, |
|
"step": 85100 |
|
}, |
|
{ |
|
"epoch": 0.941717418456335, |
|
"grad_norm": 1.97984778881073, |
|
"learning_rate": 4.350778190221699e-08, |
|
"loss": 0.424, |
|
"step": 85200 |
|
}, |
|
{ |
|
"epoch": 0.9428227205906735, |
|
"grad_norm": 2.1957056522369385, |
|
"learning_rate": 4.187779893446597e-08, |
|
"loss": 0.4226, |
|
"step": 85300 |
|
}, |
|
{ |
|
"epoch": 0.9439280227250119, |
|
"grad_norm": 2.0904030799865723, |
|
"learning_rate": 4.027867683349618e-08, |
|
"loss": 0.4394, |
|
"step": 85400 |
|
}, |
|
{ |
|
"epoch": 0.9450333248593503, |
|
"grad_norm": 1.8033450841903687, |
|
"learning_rate": 3.87104356757273e-08, |
|
"loss": 0.4806, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 0.9461386269936887, |
|
"grad_norm": 1.6405876874923706, |
|
"learning_rate": 3.717309514988027e-08, |
|
"loss": 0.4618, |
|
"step": 85600 |
|
}, |
|
{ |
|
"epoch": 0.9472439291280271, |
|
"grad_norm": 2.6198575496673584, |
|
"learning_rate": 3.566667455672912e-08, |
|
"loss": 0.4313, |
|
"step": 85700 |
|
}, |
|
{ |
|
"epoch": 0.9483492312623656, |
|
"grad_norm": 1.9811842441558838, |
|
"learning_rate": 3.4191192808858966e-08, |
|
"loss": 0.4057, |
|
"step": 85800 |
|
}, |
|
{ |
|
"epoch": 0.9494545333967039, |
|
"grad_norm": 1.8522582054138184, |
|
"learning_rate": 3.27466684304284e-08, |
|
"loss": 0.4433, |
|
"step": 85900 |
|
}, |
|
{ |
|
"epoch": 0.9505598355310424, |
|
"grad_norm": 2.586599826812744, |
|
"learning_rate": 3.133311955693691e-08, |
|
"loss": 0.414, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.9516651376653809, |
|
"grad_norm": 1.4519222974777222, |
|
"learning_rate": 2.995056393499757e-08, |
|
"loss": 0.4333, |
|
"step": 86100 |
|
}, |
|
{ |
|
"epoch": 0.9527704397997192, |
|
"grad_norm": 2.7613425254821777, |
|
"learning_rate": 2.859901892211442e-08, |
|
"loss": 0.4776, |
|
"step": 86200 |
|
}, |
|
{ |
|
"epoch": 0.9538757419340577, |
|
"grad_norm": 1.8202546834945679, |
|
"learning_rate": 2.7278501486463216e-08, |
|
"loss": 0.4269, |
|
"step": 86300 |
|
}, |
|
{ |
|
"epoch": 0.9549810440683961, |
|
"grad_norm": 2.257310390472412, |
|
"learning_rate": 2.598902820667992e-08, |
|
"loss": 0.4069, |
|
"step": 86400 |
|
}, |
|
{ |
|
"epoch": 0.9560863462027345, |
|
"grad_norm": 2.6993019580841064, |
|
"learning_rate": 2.4730615271651716e-08, |
|
"loss": 0.413, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 0.9571916483370729, |
|
"grad_norm": 1.3624522686004639, |
|
"learning_rate": 2.3503278480313806e-08, |
|
"loss": 0.4277, |
|
"step": 86600 |
|
}, |
|
{ |
|
"epoch": 0.9582969504714114, |
|
"grad_norm": 1.7707417011260986, |
|
"learning_rate": 2.230703324145156e-08, |
|
"loss": 0.4512, |
|
"step": 86700 |
|
}, |
|
{ |
|
"epoch": 0.9594022526057497, |
|
"grad_norm": 1.9722903966903687, |
|
"learning_rate": 2.1141894573507014e-08, |
|
"loss": 0.4333, |
|
"step": 86800 |
|
}, |
|
{ |
|
"epoch": 0.9605075547400882, |
|
"grad_norm": 2.174100399017334, |
|
"learning_rate": 2.000787710438934e-08, |
|
"loss": 0.473, |
|
"step": 86900 |
|
}, |
|
{ |
|
"epoch": 0.9616128568744267, |
|
"grad_norm": 2.9068939685821533, |
|
"learning_rate": 1.8904995071292455e-08, |
|
"loss": 0.4919, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.962718159008765, |
|
"grad_norm": 2.1874163150787354, |
|
"learning_rate": 1.7833262320515744e-08, |
|
"loss": 0.4501, |
|
"step": 87100 |
|
}, |
|
{ |
|
"epoch": 0.9638234611431035, |
|
"grad_norm": 2.9089388847351074, |
|
"learning_rate": 1.6792692307289747e-08, |
|
"loss": 0.4295, |
|
"step": 87200 |
|
}, |
|
{ |
|
"epoch": 0.9649287632774419, |
|
"grad_norm": 2.2932639122009277, |
|
"learning_rate": 1.578329809560797e-08, |
|
"loss": 0.4246, |
|
"step": 87300 |
|
}, |
|
{ |
|
"epoch": 0.9660340654117803, |
|
"grad_norm": 1.8238743543624878, |
|
"learning_rate": 1.4805092358062822e-08, |
|
"loss": 0.4535, |
|
"step": 87400 |
|
}, |
|
{ |
|
"epoch": 0.9671393675461187, |
|
"grad_norm": 2.673421859741211, |
|
"learning_rate": 1.3858087375686335e-08, |
|
"loss": 0.4606, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 0.9682446696804572, |
|
"grad_norm": 2.192293405532837, |
|
"learning_rate": 1.2942295037795261e-08, |
|
"loss": 0.4632, |
|
"step": 87600 |
|
}, |
|
{ |
|
"epoch": 0.9693499718147955, |
|
"grad_norm": 2.283832550048828, |
|
"learning_rate": 1.2057726841842865e-08, |
|
"loss": 0.444, |
|
"step": 87700 |
|
}, |
|
{ |
|
"epoch": 0.970455273949134, |
|
"grad_norm": 1.8313320875167847, |
|
"learning_rate": 1.1204393893274878e-08, |
|
"loss": 0.415, |
|
"step": 87800 |
|
}, |
|
{ |
|
"epoch": 0.9715605760834725, |
|
"grad_norm": 1.7791038751602173, |
|
"learning_rate": 1.0382306905388495e-08, |
|
"loss": 0.4252, |
|
"step": 87900 |
|
}, |
|
{ |
|
"epoch": 0.9726658782178108, |
|
"grad_norm": 2.295269250869751, |
|
"learning_rate": 9.591476199199146e-09, |
|
"loss": 0.4614, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.9737711803521493, |
|
"grad_norm": 3.02966046333313, |
|
"learning_rate": 8.831911703310047e-09, |
|
"loss": 0.4545, |
|
"step": 88100 |
|
}, |
|
{ |
|
"epoch": 0.9748764824864877, |
|
"grad_norm": 2.004098653793335, |
|
"learning_rate": 8.103622953789247e-09, |
|
"loss": 0.4399, |
|
"step": 88200 |
|
}, |
|
{ |
|
"epoch": 0.9759817846208261, |
|
"grad_norm": 2.338454008102417, |
|
"learning_rate": 7.406619094047496e-09, |
|
"loss": 0.4413, |
|
"step": 88300 |
|
}, |
|
{ |
|
"epoch": 0.9770870867551645, |
|
"grad_norm": 1.626102089881897, |
|
"learning_rate": 6.740908874725005e-09, |
|
"loss": 0.4362, |
|
"step": 88400 |
|
}, |
|
{ |
|
"epoch": 0.978192388889503, |
|
"grad_norm": 1.9277746677398682, |
|
"learning_rate": 6.106500653581815e-09, |
|
"loss": 0.4365, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 0.9792976910238413, |
|
"grad_norm": 2.415738105773926, |
|
"learning_rate": 5.503402395391489e-09, |
|
"loss": 0.4642, |
|
"step": 88600 |
|
}, |
|
{ |
|
"epoch": 0.9804029931581798, |
|
"grad_norm": 1.5694254636764526, |
|
"learning_rate": 4.931621671842301e-09, |
|
"loss": 0.441, |
|
"step": 88700 |
|
}, |
|
{ |
|
"epoch": 0.9815082952925183, |
|
"grad_norm": 1.3973413705825806, |
|
"learning_rate": 4.391165661442043e-09, |
|
"loss": 0.4311, |
|
"step": 88800 |
|
}, |
|
{ |
|
"epoch": 0.9826135974268566, |
|
"grad_norm": 1.9460673332214355, |
|
"learning_rate": 3.882041149427251e-09, |
|
"loss": 0.4422, |
|
"step": 88900 |
|
}, |
|
{ |
|
"epoch": 0.9837188995611951, |
|
"grad_norm": 1.6558293104171753, |
|
"learning_rate": 3.404254527678286e-09, |
|
"loss": 0.423, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 0.9848242016955334, |
|
"grad_norm": 1.9977773427963257, |
|
"learning_rate": 2.957811794639942e-09, |
|
"loss": 0.456, |
|
"step": 89100 |
|
}, |
|
{ |
|
"epoch": 0.9859295038298719, |
|
"grad_norm": 2.308818817138672, |
|
"learning_rate": 2.5427185552448496e-09, |
|
"loss": 0.4504, |
|
"step": 89200 |
|
}, |
|
{ |
|
"epoch": 0.9870348059642103, |
|
"grad_norm": 3.172938108444214, |
|
"learning_rate": 2.158980020843804e-09, |
|
"loss": 0.476, |
|
"step": 89300 |
|
}, |
|
{ |
|
"epoch": 0.9881401080985487, |
|
"grad_norm": 1.7046418190002441, |
|
"learning_rate": 1.8066010091402631e-09, |
|
"loss": 0.4402, |
|
"step": 89400 |
|
}, |
|
{ |
|
"epoch": 0.9892454102328871, |
|
"grad_norm": 2.2551283836364746, |
|
"learning_rate": 1.485585944129564e-09, |
|
"loss": 0.4364, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 0.9903507123672256, |
|
"grad_norm": 2.2183637619018555, |
|
"learning_rate": 1.1959388560445207e-09, |
|
"loss": 0.4685, |
|
"step": 89600 |
|
}, |
|
{ |
|
"epoch": 0.991456014501564, |
|
"grad_norm": 1.7988057136535645, |
|
"learning_rate": 9.376633813026891e-10, |
|
"loss": 0.445, |
|
"step": 89700 |
|
}, |
|
{ |
|
"epoch": 0.9925613166359024, |
|
"grad_norm": 2.6220619678497314, |
|
"learning_rate": 7.107627624627911e-10, |
|
"loss": 0.4628, |
|
"step": 89800 |
|
}, |
|
{ |
|
"epoch": 0.9936666187702409, |
|
"grad_norm": 2.123908519744873, |
|
"learning_rate": 5.152398481828025e-10, |
|
"loss": 0.4388, |
|
"step": 89900 |
|
}, |
|
{ |
|
"epoch": 0.9947719209045792, |
|
"grad_norm": 2.005134344100952, |
|
"learning_rate": 3.510970931849822e-10, |
|
"loss": 0.4415, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.9958772230389177, |
|
"grad_norm": 2.5748660564422607, |
|
"learning_rate": 2.1833655822423027e-10, |
|
"loss": 0.4225, |
|
"step": 90100 |
|
}, |
|
{ |
|
"epoch": 0.9969825251732561, |
|
"grad_norm": 1.9123564958572388, |
|
"learning_rate": 1.169599100625529e-10, |
|
"loss": 0.4542, |
|
"step": 90200 |
|
}, |
|
{ |
|
"epoch": 0.9980878273075945, |
|
"grad_norm": 2.8671979904174805, |
|
"learning_rate": 4.6968421448523313e-11, |
|
"loss": 0.467, |
|
"step": 90300 |
|
}, |
|
{ |
|
"epoch": 0.999193129441933, |
|
"grad_norm": 1.6871434450149536, |
|
"learning_rate": 8.362971101183448e-12, |
|
"loss": 0.437, |
|
"step": 90400 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 90473, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 10000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.968302960287416e+17, |
|
"train_batch_size": 3, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|