werent4's picture
Upload trainer_state.json with huggingface_hub
778ce41 verified
raw
history blame
159 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 90473,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0011053021343384213,
"grad_norm": 4.114652156829834,
"learning_rate": 2.763957987838585e-07,
"loss": 1.1961,
"step": 100
},
{
"epoch": 0.0022106042686768426,
"grad_norm": 2.9325754642486572,
"learning_rate": 5.52791597567717e-07,
"loss": 1.0048,
"step": 200
},
{
"epoch": 0.0033159064030152644,
"grad_norm": 3.6297736167907715,
"learning_rate": 8.291873963515755e-07,
"loss": 0.8568,
"step": 300
},
{
"epoch": 0.004421208537353685,
"grad_norm": 2.905796766281128,
"learning_rate": 1.105583195135434e-06,
"loss": 0.8767,
"step": 400
},
{
"epoch": 0.005526510671692107,
"grad_norm": 4.603543758392334,
"learning_rate": 1.3819789939192927e-06,
"loss": 0.7411,
"step": 500
},
{
"epoch": 0.006631812806030529,
"grad_norm": 3.2700424194335938,
"learning_rate": 1.658374792703151e-06,
"loss": 0.7773,
"step": 600
},
{
"epoch": 0.00773711494036895,
"grad_norm": 3.0455334186553955,
"learning_rate": 1.9347705914870095e-06,
"loss": 0.7291,
"step": 700
},
{
"epoch": 0.00884241707470737,
"grad_norm": 3.2247352600097656,
"learning_rate": 2.211166390270868e-06,
"loss": 0.6383,
"step": 800
},
{
"epoch": 0.009947719209045794,
"grad_norm": 3.087158441543579,
"learning_rate": 2.4875621890547264e-06,
"loss": 0.6445,
"step": 900
},
{
"epoch": 0.011053021343384215,
"grad_norm": 1.812444806098938,
"learning_rate": 2.7639579878385854e-06,
"loss": 0.631,
"step": 1000
},
{
"epoch": 0.012158323477722636,
"grad_norm": 3.248868465423584,
"learning_rate": 3.0403537866224434e-06,
"loss": 0.6189,
"step": 1100
},
{
"epoch": 0.013263625612061057,
"grad_norm": 1.8088688850402832,
"learning_rate": 3.316749585406302e-06,
"loss": 0.5809,
"step": 1200
},
{
"epoch": 0.014368927746399479,
"grad_norm": 1.9592525959014893,
"learning_rate": 3.5931453841901604e-06,
"loss": 0.5885,
"step": 1300
},
{
"epoch": 0.0154742298807379,
"grad_norm": 2.4960570335388184,
"learning_rate": 3.869541182974019e-06,
"loss": 0.5743,
"step": 1400
},
{
"epoch": 0.01657953201507632,
"grad_norm": 1.895150899887085,
"learning_rate": 4.145936981757877e-06,
"loss": 0.5897,
"step": 1500
},
{
"epoch": 0.01768483414941474,
"grad_norm": 2.611772060394287,
"learning_rate": 4.422332780541736e-06,
"loss": 0.5213,
"step": 1600
},
{
"epoch": 0.018790136283753162,
"grad_norm": 3.9915366172790527,
"learning_rate": 4.698728579325595e-06,
"loss": 0.5928,
"step": 1700
},
{
"epoch": 0.019895438418091587,
"grad_norm": 2.026409387588501,
"learning_rate": 4.975124378109453e-06,
"loss": 0.5726,
"step": 1800
},
{
"epoch": 0.02100074055243001,
"grad_norm": 1.9394502639770508,
"learning_rate": 4.999987004364365e-06,
"loss": 0.5458,
"step": 1900
},
{
"epoch": 0.02210604268676843,
"grad_norm": 2.1378285884857178,
"learning_rate": 4.999942749379922e-06,
"loss": 0.5452,
"step": 2000
},
{
"epoch": 0.02321134482110685,
"grad_norm": 2.2720561027526855,
"learning_rate": 4.999867108486303e-06,
"loss": 0.5195,
"step": 2100
},
{
"epoch": 0.024316646955445272,
"grad_norm": 2.4831795692443848,
"learning_rate": 4.99976008263315e-06,
"loss": 0.5431,
"step": 2200
},
{
"epoch": 0.025421949089783694,
"grad_norm": 3.52687668800354,
"learning_rate": 4.999621673164139e-06,
"loss": 0.5703,
"step": 2300
},
{
"epoch": 0.026527251224122115,
"grad_norm": 2.1417176723480225,
"learning_rate": 4.999451881816949e-06,
"loss": 0.5549,
"step": 2400
},
{
"epoch": 0.027632553358460536,
"grad_norm": 2.2087039947509766,
"learning_rate": 4.999250710723255e-06,
"loss": 0.5664,
"step": 2500
},
{
"epoch": 0.028737855492798958,
"grad_norm": 2.0288796424865723,
"learning_rate": 4.999018162408687e-06,
"loss": 0.5864,
"step": 2600
},
{
"epoch": 0.02984315762713738,
"grad_norm": 1.9152870178222656,
"learning_rate": 4.998754239792809e-06,
"loss": 0.5568,
"step": 2700
},
{
"epoch": 0.0309484597614758,
"grad_norm": 1.9485653638839722,
"learning_rate": 4.998458946189078e-06,
"loss": 0.5706,
"step": 2800
},
{
"epoch": 0.03205376189581422,
"grad_norm": 2.10481595993042,
"learning_rate": 4.9981322853048e-06,
"loss": 0.5501,
"step": 2900
},
{
"epoch": 0.03315906403015264,
"grad_norm": 1.8621227741241455,
"learning_rate": 4.9977742612410905e-06,
"loss": 0.5394,
"step": 3000
},
{
"epoch": 0.034264366164491064,
"grad_norm": 2.057615280151367,
"learning_rate": 4.997384878492817e-06,
"loss": 0.5078,
"step": 3100
},
{
"epoch": 0.03536966829882948,
"grad_norm": 1.742665410041809,
"learning_rate": 4.996964141948542e-06,
"loss": 0.5584,
"step": 3200
},
{
"epoch": 0.03647497043316791,
"grad_norm": 2.150362253189087,
"learning_rate": 4.996512056890468e-06,
"loss": 0.5264,
"step": 3300
},
{
"epoch": 0.037580272567506325,
"grad_norm": 2.3525052070617676,
"learning_rate": 4.996028628994365e-06,
"loss": 0.5828,
"step": 3400
},
{
"epoch": 0.03868557470184475,
"grad_norm": 1.6484140157699585,
"learning_rate": 4.9955138643295e-06,
"loss": 0.52,
"step": 3500
},
{
"epoch": 0.039790876836183174,
"grad_norm": 3.176095724105835,
"learning_rate": 4.994967769358565e-06,
"loss": 0.557,
"step": 3600
},
{
"epoch": 0.04089617897052159,
"grad_norm": 1.66346275806427,
"learning_rate": 4.9943903509375926e-06,
"loss": 0.5121,
"step": 3700
},
{
"epoch": 0.04200148110486002,
"grad_norm": 2.594338893890381,
"learning_rate": 4.9937816163158685e-06,
"loss": 0.4962,
"step": 3800
},
{
"epoch": 0.043106783239198435,
"grad_norm": 2.330629348754883,
"learning_rate": 4.993141573135843e-06,
"loss": 0.5217,
"step": 3900
},
{
"epoch": 0.04421208537353686,
"grad_norm": 2.264955759048462,
"learning_rate": 4.9924702294330375e-06,
"loss": 0.5157,
"step": 4000
},
{
"epoch": 0.04531738750787528,
"grad_norm": 1.9724615812301636,
"learning_rate": 4.991767593635935e-06,
"loss": 0.5294,
"step": 4100
},
{
"epoch": 0.0464226896422137,
"grad_norm": 1.9894862174987793,
"learning_rate": 4.991033674565885e-06,
"loss": 0.5556,
"step": 4200
},
{
"epoch": 0.04752799177655212,
"grad_norm": 1.9730507135391235,
"learning_rate": 4.990268481436984e-06,
"loss": 0.4888,
"step": 4300
},
{
"epoch": 0.048633293910890545,
"grad_norm": 2.208463430404663,
"learning_rate": 4.989472023855966e-06,
"loss": 0.5387,
"step": 4400
},
{
"epoch": 0.04973859604522896,
"grad_norm": 2.394077777862549,
"learning_rate": 4.988644311822076e-06,
"loss": 0.4932,
"step": 4500
},
{
"epoch": 0.05084389817956739,
"grad_norm": 2.514061689376831,
"learning_rate": 4.987785355726953e-06,
"loss": 0.5254,
"step": 4600
},
{
"epoch": 0.051949200313905805,
"grad_norm": 1.8961576223373413,
"learning_rate": 4.9868951663544885e-06,
"loss": 0.5145,
"step": 4700
},
{
"epoch": 0.05305450244824423,
"grad_norm": 2.2813808917999268,
"learning_rate": 4.9859737548807005e-06,
"loss": 0.4982,
"step": 4800
},
{
"epoch": 0.05415980458258265,
"grad_norm": 2.1236634254455566,
"learning_rate": 4.98502113287359e-06,
"loss": 0.5206,
"step": 4900
},
{
"epoch": 0.05526510671692107,
"grad_norm": 2.573836326599121,
"learning_rate": 4.984037312292992e-06,
"loss": 0.4844,
"step": 5000
},
{
"epoch": 0.05637040885125949,
"grad_norm": 1.2394871711730957,
"learning_rate": 4.983022305490431e-06,
"loss": 0.4921,
"step": 5100
},
{
"epoch": 0.057475710985597915,
"grad_norm": 2.2655134201049805,
"learning_rate": 4.9819761252089635e-06,
"loss": 0.5278,
"step": 5200
},
{
"epoch": 0.05858101311993633,
"grad_norm": 1.9459484815597534,
"learning_rate": 4.980898784583019e-06,
"loss": 0.5215,
"step": 5300
},
{
"epoch": 0.05968631525427476,
"grad_norm": 2.574147939682007,
"learning_rate": 4.979790297138232e-06,
"loss": 0.5155,
"step": 5400
},
{
"epoch": 0.060791617388613176,
"grad_norm": 2.5039682388305664,
"learning_rate": 4.9786506767912775e-06,
"loss": 0.5245,
"step": 5500
},
{
"epoch": 0.0618969195229516,
"grad_norm": 2.6227054595947266,
"learning_rate": 4.977479937849689e-06,
"loss": 0.4843,
"step": 5600
},
{
"epoch": 0.06300222165729003,
"grad_norm": 2.1595468521118164,
"learning_rate": 4.9762780950116865e-06,
"loss": 0.4863,
"step": 5700
},
{
"epoch": 0.06410752379162844,
"grad_norm": 1.8619611263275146,
"learning_rate": 4.975045163365989e-06,
"loss": 0.5083,
"step": 5800
},
{
"epoch": 0.06521282592596686,
"grad_norm": 2.270404100418091,
"learning_rate": 4.973781158391621e-06,
"loss": 0.5516,
"step": 5900
},
{
"epoch": 0.06631812806030528,
"grad_norm": 1.9068191051483154,
"learning_rate": 4.972486095957725e-06,
"loss": 0.5058,
"step": 6000
},
{
"epoch": 0.06742343019464371,
"grad_norm": 2.2948782444000244,
"learning_rate": 4.971159992323359e-06,
"loss": 0.5018,
"step": 6100
},
{
"epoch": 0.06852873232898213,
"grad_norm": 3.0896589756011963,
"learning_rate": 4.969802864137289e-06,
"loss": 0.5062,
"step": 6200
},
{
"epoch": 0.06963403446332055,
"grad_norm": 1.7098015546798706,
"learning_rate": 4.96841472843779e-06,
"loss": 0.5067,
"step": 6300
},
{
"epoch": 0.07073933659765896,
"grad_norm": 2.6850175857543945,
"learning_rate": 4.966995602652417e-06,
"loss": 0.5287,
"step": 6400
},
{
"epoch": 0.0718446387319974,
"grad_norm": 1.6628856658935547,
"learning_rate": 4.965545504597802e-06,
"loss": 0.5225,
"step": 6500
},
{
"epoch": 0.07294994086633581,
"grad_norm": 2.279022693634033,
"learning_rate": 4.9640644524794205e-06,
"loss": 0.5026,
"step": 6600
},
{
"epoch": 0.07405524300067423,
"grad_norm": 0.924898624420166,
"learning_rate": 4.962552464891363e-06,
"loss": 0.5354,
"step": 6700
},
{
"epoch": 0.07516054513501265,
"grad_norm": 2.779557228088379,
"learning_rate": 4.961009560816109e-06,
"loss": 0.4776,
"step": 6800
},
{
"epoch": 0.07626584726935108,
"grad_norm": 2.554727077484131,
"learning_rate": 4.9594357596242795e-06,
"loss": 0.4821,
"step": 6900
},
{
"epoch": 0.0773711494036895,
"grad_norm": 1.730661153793335,
"learning_rate": 4.957831081074398e-06,
"loss": 0.4903,
"step": 7000
},
{
"epoch": 0.07847645153802792,
"grad_norm": 2.198575735092163,
"learning_rate": 4.956195545312647e-06,
"loss": 0.4946,
"step": 7100
},
{
"epoch": 0.07958175367236635,
"grad_norm": 1.3369964361190796,
"learning_rate": 4.954529172872605e-06,
"loss": 0.51,
"step": 7200
},
{
"epoch": 0.08068705580670477,
"grad_norm": 2.4426262378692627,
"learning_rate": 4.952831984674998e-06,
"loss": 0.5108,
"step": 7300
},
{
"epoch": 0.08179235794104318,
"grad_norm": 3.9186463356018066,
"learning_rate": 4.951104002027432e-06,
"loss": 0.5086,
"step": 7400
},
{
"epoch": 0.0828976600753816,
"grad_norm": 1.9639850854873657,
"learning_rate": 4.9493452466241254e-06,
"loss": 0.4758,
"step": 7500
},
{
"epoch": 0.08400296220972003,
"grad_norm": 1.2126818895339966,
"learning_rate": 4.94755574054564e-06,
"loss": 0.5017,
"step": 7600
},
{
"epoch": 0.08510826434405845,
"grad_norm": 2.206359386444092,
"learning_rate": 4.945735506258598e-06,
"loss": 0.537,
"step": 7700
},
{
"epoch": 0.08621356647839687,
"grad_norm": 1.7051986455917358,
"learning_rate": 4.943884566615409e-06,
"loss": 0.4835,
"step": 7800
},
{
"epoch": 0.08731886861273529,
"grad_norm": 1.832702398300171,
"learning_rate": 4.942002944853973e-06,
"loss": 0.454,
"step": 7900
},
{
"epoch": 0.08842417074707372,
"grad_norm": 1.8357278108596802,
"learning_rate": 4.940090664597394e-06,
"loss": 0.4972,
"step": 8000
},
{
"epoch": 0.08952947288141214,
"grad_norm": 2.1181540489196777,
"learning_rate": 4.938147749853685e-06,
"loss": 0.5184,
"step": 8100
},
{
"epoch": 0.09063477501575055,
"grad_norm": 1.7029916048049927,
"learning_rate": 4.936174225015463e-06,
"loss": 0.5324,
"step": 8200
},
{
"epoch": 0.09174007715008897,
"grad_norm": 2.0932748317718506,
"learning_rate": 4.934170114859643e-06,
"loss": 0.4806,
"step": 8300
},
{
"epoch": 0.0928453792844274,
"grad_norm": 2.3745322227478027,
"learning_rate": 4.932135444547129e-06,
"loss": 0.4869,
"step": 8400
},
{
"epoch": 0.09395068141876582,
"grad_norm": 2.1215474605560303,
"learning_rate": 4.930070239622498e-06,
"loss": 0.4777,
"step": 8500
},
{
"epoch": 0.09505598355310424,
"grad_norm": 1.7763068675994873,
"learning_rate": 4.9279745260136756e-06,
"loss": 0.478,
"step": 8600
},
{
"epoch": 0.09616128568744266,
"grad_norm": 1.950086236000061,
"learning_rate": 4.925848330031617e-06,
"loss": 0.5048,
"step": 8700
},
{
"epoch": 0.09726658782178109,
"grad_norm": 2.959291696548462,
"learning_rate": 4.923691678369971e-06,
"loss": 0.513,
"step": 8800
},
{
"epoch": 0.09837188995611951,
"grad_norm": 2.3258442878723145,
"learning_rate": 4.921504598104745e-06,
"loss": 0.4896,
"step": 8900
},
{
"epoch": 0.09947719209045792,
"grad_norm": 2.5175669193267822,
"learning_rate": 4.9192871166939715e-06,
"loss": 0.4783,
"step": 9000
},
{
"epoch": 0.10058249422479634,
"grad_norm": 1.981148600578308,
"learning_rate": 4.917039261977353e-06,
"loss": 0.4906,
"step": 9100
},
{
"epoch": 0.10168779635913477,
"grad_norm": 2.439974069595337,
"learning_rate": 4.914761062175925e-06,
"loss": 0.5007,
"step": 9200
},
{
"epoch": 0.10279309849347319,
"grad_norm": 2.8156814575195312,
"learning_rate": 4.912452545891689e-06,
"loss": 0.5203,
"step": 9300
},
{
"epoch": 0.10389840062781161,
"grad_norm": 2.4708168506622314,
"learning_rate": 4.9101137421072605e-06,
"loss": 0.4663,
"step": 9400
},
{
"epoch": 0.10500370276215003,
"grad_norm": 2.4594314098358154,
"learning_rate": 4.907744680185508e-06,
"loss": 0.5027,
"step": 9500
},
{
"epoch": 0.10610900489648846,
"grad_norm": 1.7548918724060059,
"learning_rate": 4.905345389869176e-06,
"loss": 0.4534,
"step": 9600
},
{
"epoch": 0.10721430703082688,
"grad_norm": 1.6353791952133179,
"learning_rate": 4.902915901280517e-06,
"loss": 0.49,
"step": 9700
},
{
"epoch": 0.1083196091651653,
"grad_norm": 3.52217698097229,
"learning_rate": 4.9004562449209146e-06,
"loss": 0.4935,
"step": 9800
},
{
"epoch": 0.10942491129950371,
"grad_norm": 1.6542017459869385,
"learning_rate": 4.897966451670495e-06,
"loss": 0.5118,
"step": 9900
},
{
"epoch": 0.11053021343384214,
"grad_norm": 2.575944185256958,
"learning_rate": 4.895446552787744e-06,
"loss": 0.4977,
"step": 10000
},
{
"epoch": 0.11163551556818056,
"grad_norm": 2.081350088119507,
"learning_rate": 4.8928965799091134e-06,
"loss": 0.5261,
"step": 10100
},
{
"epoch": 0.11274081770251898,
"grad_norm": 2.022676944732666,
"learning_rate": 4.890316565048624e-06,
"loss": 0.4889,
"step": 10200
},
{
"epoch": 0.1138461198368574,
"grad_norm": 1.5808357000350952,
"learning_rate": 4.887706540597461e-06,
"loss": 0.4929,
"step": 10300
},
{
"epoch": 0.11495142197119583,
"grad_norm": 2.1185178756713867,
"learning_rate": 4.8850665393235716e-06,
"loss": 0.4575,
"step": 10400
},
{
"epoch": 0.11605672410553425,
"grad_norm": 2.5382957458496094,
"learning_rate": 4.8823965943712505e-06,
"loss": 0.4979,
"step": 10500
},
{
"epoch": 0.11716202623987267,
"grad_norm": 2.045133590698242,
"learning_rate": 4.879696739260726e-06,
"loss": 0.5215,
"step": 10600
},
{
"epoch": 0.11826732837421108,
"grad_norm": 2.119107484817505,
"learning_rate": 4.876967007887737e-06,
"loss": 0.4754,
"step": 10700
},
{
"epoch": 0.11937263050854952,
"grad_norm": 2.549633502960205,
"learning_rate": 4.8742074345231076e-06,
"loss": 0.5051,
"step": 10800
},
{
"epoch": 0.12047793264288793,
"grad_norm": 3.1271703243255615,
"learning_rate": 4.8714180538123205e-06,
"loss": 0.5036,
"step": 10900
},
{
"epoch": 0.12158323477722635,
"grad_norm": 1.8725048303604126,
"learning_rate": 4.868598900775076e-06,
"loss": 0.4766,
"step": 11000
},
{
"epoch": 0.12268853691156478,
"grad_norm": 1.3768223524093628,
"learning_rate": 4.865750010804857e-06,
"loss": 0.4821,
"step": 11100
},
{
"epoch": 0.1237938390459032,
"grad_norm": 2.7702245712280273,
"learning_rate": 4.8628714196684854e-06,
"loss": 0.5154,
"step": 11200
},
{
"epoch": 0.12489914118024162,
"grad_norm": 2.6272552013397217,
"learning_rate": 4.859963163505668e-06,
"loss": 0.4747,
"step": 11300
},
{
"epoch": 0.12600444331458005,
"grad_norm": 1.649949312210083,
"learning_rate": 4.857025278828545e-06,
"loss": 0.4836,
"step": 11400
},
{
"epoch": 0.12710974544891845,
"grad_norm": 2.358071804046631,
"learning_rate": 4.854057802521234e-06,
"loss": 0.5184,
"step": 11500
},
{
"epoch": 0.12821504758325689,
"grad_norm": 2.5856614112854004,
"learning_rate": 4.851060771839367e-06,
"loss": 0.4818,
"step": 11600
},
{
"epoch": 0.12932034971759532,
"grad_norm": 1.8580783605575562,
"learning_rate": 4.848034224409616e-06,
"loss": 0.4887,
"step": 11700
},
{
"epoch": 0.13042565185193372,
"grad_norm": 2.2157649993896484,
"learning_rate": 4.84497819822923e-06,
"loss": 0.5045,
"step": 11800
},
{
"epoch": 0.13153095398627215,
"grad_norm": 1.4233261346817017,
"learning_rate": 4.841892731665552e-06,
"loss": 0.5147,
"step": 11900
},
{
"epoch": 0.13263625612061056,
"grad_norm": 1.6375737190246582,
"learning_rate": 4.838777863455537e-06,
"loss": 0.4651,
"step": 12000
},
{
"epoch": 0.133741558254949,
"grad_norm": 1.2430723905563354,
"learning_rate": 4.835633632705269e-06,
"loss": 0.4737,
"step": 12100
},
{
"epoch": 0.13484686038928742,
"grad_norm": 2.4360849857330322,
"learning_rate": 4.83246007888947e-06,
"loss": 0.4936,
"step": 12200
},
{
"epoch": 0.13595216252362582,
"grad_norm": 1.9232250452041626,
"learning_rate": 4.8292572418509995e-06,
"loss": 0.4763,
"step": 12300
},
{
"epoch": 0.13705746465796426,
"grad_norm": 2.343539237976074,
"learning_rate": 4.82602516180036e-06,
"loss": 0.4956,
"step": 12400
},
{
"epoch": 0.1381627667923027,
"grad_norm": 1.493943691253662,
"learning_rate": 4.8227638793151875e-06,
"loss": 0.4653,
"step": 12500
},
{
"epoch": 0.1392680689266411,
"grad_norm": 3.257138729095459,
"learning_rate": 4.819473435339748e-06,
"loss": 0.4564,
"step": 12600
},
{
"epoch": 0.14037337106097952,
"grad_norm": 1.8864688873291016,
"learning_rate": 4.816153871184418e-06,
"loss": 0.4667,
"step": 12700
},
{
"epoch": 0.14147867319531793,
"grad_norm": 2.1740174293518066,
"learning_rate": 4.812805228525166e-06,
"loss": 0.4499,
"step": 12800
},
{
"epoch": 0.14258397532965636,
"grad_norm": 1.5121800899505615,
"learning_rate": 4.809427549403033e-06,
"loss": 0.4933,
"step": 12900
},
{
"epoch": 0.1436892774639948,
"grad_norm": 1.604945182800293,
"learning_rate": 4.8060208762236025e-06,
"loss": 0.479,
"step": 13000
},
{
"epoch": 0.1447945795983332,
"grad_norm": 1.933350682258606,
"learning_rate": 4.802585251756468e-06,
"loss": 0.5105,
"step": 13100
},
{
"epoch": 0.14589988173267163,
"grad_norm": 2.8999829292297363,
"learning_rate": 4.799120719134696e-06,
"loss": 0.4689,
"step": 13200
},
{
"epoch": 0.14700518386701006,
"grad_norm": 2.4011030197143555,
"learning_rate": 4.795627321854283e-06,
"loss": 0.4709,
"step": 13300
},
{
"epoch": 0.14811048600134846,
"grad_norm": 2.080972671508789,
"learning_rate": 4.792105103773618e-06,
"loss": 0.4893,
"step": 13400
},
{
"epoch": 0.1492157881356869,
"grad_norm": 2.4878017902374268,
"learning_rate": 4.788554109112918e-06,
"loss": 0.5236,
"step": 13500
},
{
"epoch": 0.1503210902700253,
"grad_norm": 2.1215240955352783,
"learning_rate": 4.78497438245368e-06,
"loss": 0.4817,
"step": 13600
},
{
"epoch": 0.15142639240436373,
"grad_norm": 1.5228586196899414,
"learning_rate": 4.781365968738126e-06,
"loss": 0.4895,
"step": 13700
},
{
"epoch": 0.15253169453870216,
"grad_norm": 2.399446487426758,
"learning_rate": 4.777728913268632e-06,
"loss": 0.4731,
"step": 13800
},
{
"epoch": 0.15363699667304057,
"grad_norm": 2.1382806301116943,
"learning_rate": 4.774063261707158e-06,
"loss": 0.4981,
"step": 13900
},
{
"epoch": 0.154742298807379,
"grad_norm": 1.590667486190796,
"learning_rate": 4.770369060074685e-06,
"loss": 0.4599,
"step": 14000
},
{
"epoch": 0.15584760094171743,
"grad_norm": 1.882934331893921,
"learning_rate": 4.766646354750621e-06,
"loss": 0.5039,
"step": 14100
},
{
"epoch": 0.15695290307605583,
"grad_norm": 1.8898316621780396,
"learning_rate": 4.762895192472235e-06,
"loss": 0.4758,
"step": 14200
},
{
"epoch": 0.15805820521039426,
"grad_norm": 1.6479010581970215,
"learning_rate": 4.759115620334062e-06,
"loss": 0.493,
"step": 14300
},
{
"epoch": 0.1591635073447327,
"grad_norm": 2.28085994720459,
"learning_rate": 4.755307685787312e-06,
"loss": 0.5221,
"step": 14400
},
{
"epoch": 0.1602688094790711,
"grad_norm": 2.697305202484131,
"learning_rate": 4.751471436639271e-06,
"loss": 0.5172,
"step": 14500
},
{
"epoch": 0.16137411161340953,
"grad_norm": 1.897016167640686,
"learning_rate": 4.7476069210527135e-06,
"loss": 0.5284,
"step": 14600
},
{
"epoch": 0.16247941374774794,
"grad_norm": 2.659196376800537,
"learning_rate": 4.743714187545282e-06,
"loss": 0.4776,
"step": 14700
},
{
"epoch": 0.16358471588208637,
"grad_norm": 1.7990115880966187,
"learning_rate": 4.739793284988889e-06,
"loss": 0.4506,
"step": 14800
},
{
"epoch": 0.1646900180164248,
"grad_norm": 2.136432409286499,
"learning_rate": 4.735844262609096e-06,
"loss": 0.4775,
"step": 14900
},
{
"epoch": 0.1657953201507632,
"grad_norm": 1.8059773445129395,
"learning_rate": 4.731867169984506e-06,
"loss": 0.4847,
"step": 15000
},
{
"epoch": 0.16690062228510164,
"grad_norm": 1.7475543022155762,
"learning_rate": 4.727862057046125e-06,
"loss": 0.5092,
"step": 15100
},
{
"epoch": 0.16800592441944007,
"grad_norm": 1.7633237838745117,
"learning_rate": 4.723828974076752e-06,
"loss": 0.4776,
"step": 15200
},
{
"epoch": 0.16911122655377847,
"grad_norm": 1.973683476448059,
"learning_rate": 4.719767971710335e-06,
"loss": 0.4866,
"step": 15300
},
{
"epoch": 0.1702165286881169,
"grad_norm": 2.3195412158966064,
"learning_rate": 4.715679100931343e-06,
"loss": 0.4784,
"step": 15400
},
{
"epoch": 0.1713218308224553,
"grad_norm": 2.262366533279419,
"learning_rate": 4.711562413074122e-06,
"loss": 0.4494,
"step": 15500
},
{
"epoch": 0.17242713295679374,
"grad_norm": 2.2675039768218994,
"learning_rate": 4.707417959822252e-06,
"loss": 0.5182,
"step": 15600
},
{
"epoch": 0.17353243509113217,
"grad_norm": 2.6644225120544434,
"learning_rate": 4.703245793207898e-06,
"loss": 0.4819,
"step": 15700
},
{
"epoch": 0.17463773722547057,
"grad_norm": 1.4928964376449585,
"learning_rate": 4.699045965611157e-06,
"loss": 0.4542,
"step": 15800
},
{
"epoch": 0.175743039359809,
"grad_norm": 1.7893882989883423,
"learning_rate": 4.694818529759399e-06,
"loss": 0.4836,
"step": 15900
},
{
"epoch": 0.17684834149414744,
"grad_norm": 1.5968459844589233,
"learning_rate": 4.690563538726606e-06,
"loss": 0.4702,
"step": 16000
},
{
"epoch": 0.17795364362848584,
"grad_norm": 2.2333779335021973,
"learning_rate": 4.686281045932707e-06,
"loss": 0.4912,
"step": 16100
},
{
"epoch": 0.17905894576282427,
"grad_norm": 1.1746132373809814,
"learning_rate": 4.681971105142905e-06,
"loss": 0.4935,
"step": 16200
},
{
"epoch": 0.18016424789716268,
"grad_norm": 1.5028539896011353,
"learning_rate": 4.677633770467003e-06,
"loss": 0.4908,
"step": 16300
},
{
"epoch": 0.1812695500315011,
"grad_norm": 1.9890942573547363,
"learning_rate": 4.6732690963587256e-06,
"loss": 0.4651,
"step": 16400
},
{
"epoch": 0.18237485216583954,
"grad_norm": 2.262347459793091,
"learning_rate": 4.668877137615032e-06,
"loss": 0.496,
"step": 16500
},
{
"epoch": 0.18348015430017794,
"grad_norm": 2.2725613117218018,
"learning_rate": 4.664457949375434e-06,
"loss": 0.4707,
"step": 16600
},
{
"epoch": 0.18458545643451638,
"grad_norm": 2.965789794921875,
"learning_rate": 4.660011587121297e-06,
"loss": 0.4969,
"step": 16700
},
{
"epoch": 0.1856907585688548,
"grad_norm": 1.5919311046600342,
"learning_rate": 4.655538106675149e-06,
"loss": 0.4985,
"step": 16800
},
{
"epoch": 0.1867960607031932,
"grad_norm": 2.4821956157684326,
"learning_rate": 4.651037564199977e-06,
"loss": 0.4878,
"step": 16900
},
{
"epoch": 0.18790136283753164,
"grad_norm": 1.9851549863815308,
"learning_rate": 4.646510016198521e-06,
"loss": 0.4778,
"step": 17000
},
{
"epoch": 0.18900666497187005,
"grad_norm": 1.9277724027633667,
"learning_rate": 4.641955519512567e-06,
"loss": 0.5302,
"step": 17100
},
{
"epoch": 0.19011196710620848,
"grad_norm": 2.289950132369995,
"learning_rate": 4.637374131322232e-06,
"loss": 0.4646,
"step": 17200
},
{
"epoch": 0.1912172692405469,
"grad_norm": 2.9119439125061035,
"learning_rate": 4.632765909145247e-06,
"loss": 0.5033,
"step": 17300
},
{
"epoch": 0.19232257137488531,
"grad_norm": 1.9241691827774048,
"learning_rate": 4.628130910836234e-06,
"loss": 0.4879,
"step": 17400
},
{
"epoch": 0.19342787350922375,
"grad_norm": 1.1978574991226196,
"learning_rate": 4.623469194585979e-06,
"loss": 0.4675,
"step": 17500
},
{
"epoch": 0.19453317564356218,
"grad_norm": 1.6705842018127441,
"learning_rate": 4.618780818920705e-06,
"loss": 0.4605,
"step": 17600
},
{
"epoch": 0.19563847777790058,
"grad_norm": 2.020331859588623,
"learning_rate": 4.614065842701332e-06,
"loss": 0.4974,
"step": 17700
},
{
"epoch": 0.19674377991223901,
"grad_norm": 2.0887222290039062,
"learning_rate": 4.609324325122743e-06,
"loss": 0.4736,
"step": 17800
},
{
"epoch": 0.19784908204657745,
"grad_norm": 2.283088445663452,
"learning_rate": 4.604556325713035e-06,
"loss": 0.4985,
"step": 17900
},
{
"epoch": 0.19895438418091585,
"grad_norm": 2.186509132385254,
"learning_rate": 4.599761904332778e-06,
"loss": 0.4767,
"step": 18000
},
{
"epoch": 0.20005968631525428,
"grad_norm": 2.262012243270874,
"learning_rate": 4.594941121174262e-06,
"loss": 0.4697,
"step": 18100
},
{
"epoch": 0.20116498844959269,
"grad_norm": 1.634402871131897,
"learning_rate": 4.590094036760736e-06,
"loss": 0.4939,
"step": 18200
},
{
"epoch": 0.20227029058393112,
"grad_norm": 1.883914589881897,
"learning_rate": 4.5852207119456555e-06,
"loss": 0.47,
"step": 18300
},
{
"epoch": 0.20337559271826955,
"grad_norm": 2.231407880783081,
"learning_rate": 4.580321207911912e-06,
"loss": 0.4815,
"step": 18400
},
{
"epoch": 0.20448089485260795,
"grad_norm": 2.605910539627075,
"learning_rate": 4.57539558617107e-06,
"loss": 0.5328,
"step": 18500
},
{
"epoch": 0.20558619698694638,
"grad_norm": 1.1122691631317139,
"learning_rate": 4.570443908562593e-06,
"loss": 0.4606,
"step": 18600
},
{
"epoch": 0.20669149912128482,
"grad_norm": 1.9738783836364746,
"learning_rate": 4.565466237253066e-06,
"loss": 0.4612,
"step": 18700
},
{
"epoch": 0.20779680125562322,
"grad_norm": 3.1255314350128174,
"learning_rate": 4.560462634735416e-06,
"loss": 0.469,
"step": 18800
},
{
"epoch": 0.20890210338996165,
"grad_norm": 2.3683340549468994,
"learning_rate": 4.555433163828126e-06,
"loss": 0.4997,
"step": 18900
},
{
"epoch": 0.21000740552430006,
"grad_norm": 2.482985496520996,
"learning_rate": 4.55037788767445e-06,
"loss": 0.5105,
"step": 19000
},
{
"epoch": 0.2111127076586385,
"grad_norm": 1.7868962287902832,
"learning_rate": 4.545296869741616e-06,
"loss": 0.4899,
"step": 19100
},
{
"epoch": 0.21221800979297692,
"grad_norm": 1.6937700510025024,
"learning_rate": 4.540190173820033e-06,
"loss": 0.5029,
"step": 19200
},
{
"epoch": 0.21332331192731532,
"grad_norm": 1.6983795166015625,
"learning_rate": 4.535057864022486e-06,
"loss": 0.5273,
"step": 19300
},
{
"epoch": 0.21442861406165376,
"grad_norm": 1.446453332901001,
"learning_rate": 4.529900004783334e-06,
"loss": 0.4864,
"step": 19400
},
{
"epoch": 0.2155339161959922,
"grad_norm": 2.247065305709839,
"learning_rate": 4.524716660857701e-06,
"loss": 0.4805,
"step": 19500
},
{
"epoch": 0.2166392183303306,
"grad_norm": 1.6583445072174072,
"learning_rate": 4.519507897320662e-06,
"loss": 0.4631,
"step": 19600
},
{
"epoch": 0.21774452046466902,
"grad_norm": 1.718631625175476,
"learning_rate": 4.514273779566426e-06,
"loss": 0.4893,
"step": 19700
},
{
"epoch": 0.21884982259900743,
"grad_norm": 1.6608977317810059,
"learning_rate": 4.509014373307515e-06,
"loss": 0.483,
"step": 19800
},
{
"epoch": 0.21995512473334586,
"grad_norm": 2.0695135593414307,
"learning_rate": 4.503729744573943e-06,
"loss": 0.5042,
"step": 19900
},
{
"epoch": 0.2210604268676843,
"grad_norm": 1.75504469871521,
"learning_rate": 4.498419959712376e-06,
"loss": 0.4844,
"step": 20000
},
{
"epoch": 0.2221657290020227,
"grad_norm": 3.0820794105529785,
"learning_rate": 4.493085085385314e-06,
"loss": 0.4775,
"step": 20100
},
{
"epoch": 0.22327103113636113,
"grad_norm": 2.3822927474975586,
"learning_rate": 4.487725188570241e-06,
"loss": 0.4563,
"step": 20200
},
{
"epoch": 0.22437633327069956,
"grad_norm": 2.8337135314941406,
"learning_rate": 4.482340336558793e-06,
"loss": 0.4712,
"step": 20300
},
{
"epoch": 0.22548163540503796,
"grad_norm": 2.8210105895996094,
"learning_rate": 4.476930596955909e-06,
"loss": 0.5026,
"step": 20400
},
{
"epoch": 0.2265869375393764,
"grad_norm": 2.012446165084839,
"learning_rate": 4.471496037678982e-06,
"loss": 0.4728,
"step": 20500
},
{
"epoch": 0.2276922396737148,
"grad_norm": 2.477320432662964,
"learning_rate": 4.466036726957008e-06,
"loss": 0.5243,
"step": 20600
},
{
"epoch": 0.22879754180805323,
"grad_norm": 2.1189372539520264,
"learning_rate": 4.460552733329729e-06,
"loss": 0.4414,
"step": 20700
},
{
"epoch": 0.22990284394239166,
"grad_norm": 1.6811827421188354,
"learning_rate": 4.455044125646773e-06,
"loss": 0.4606,
"step": 20800
},
{
"epoch": 0.23100814607673006,
"grad_norm": 1.8918300867080688,
"learning_rate": 4.449510973066785e-06,
"loss": 0.4587,
"step": 20900
},
{
"epoch": 0.2321134482110685,
"grad_norm": 1.6469461917877197,
"learning_rate": 4.44395334505657e-06,
"loss": 0.4811,
"step": 21000
},
{
"epoch": 0.23321875034540693,
"grad_norm": 1.0091384649276733,
"learning_rate": 4.438371311390205e-06,
"loss": 0.4469,
"step": 21100
},
{
"epoch": 0.23432405247974533,
"grad_norm": 1.67509126663208,
"learning_rate": 4.432764942148177e-06,
"loss": 0.4812,
"step": 21200
},
{
"epoch": 0.23542935461408376,
"grad_norm": 2.054719924926758,
"learning_rate": 4.427134307716496e-06,
"loss": 0.4343,
"step": 21300
},
{
"epoch": 0.23653465674842217,
"grad_norm": 2.0753352642059326,
"learning_rate": 4.421479478785814e-06,
"loss": 0.4677,
"step": 21400
},
{
"epoch": 0.2376399588827606,
"grad_norm": 1.5594350099563599,
"learning_rate": 4.415800526350535e-06,
"loss": 0.475,
"step": 21500
},
{
"epoch": 0.23874526101709903,
"grad_norm": 2.458397626876831,
"learning_rate": 4.410097521707926e-06,
"loss": 0.4943,
"step": 21600
},
{
"epoch": 0.23985056315143743,
"grad_norm": 2.180816888809204,
"learning_rate": 4.404370536457221e-06,
"loss": 0.4361,
"step": 21700
},
{
"epoch": 0.24095586528577587,
"grad_norm": 2.4106123447418213,
"learning_rate": 4.3986196424987216e-06,
"loss": 0.5065,
"step": 21800
},
{
"epoch": 0.2420611674201143,
"grad_norm": 2.228212833404541,
"learning_rate": 4.392844912032896e-06,
"loss": 0.4892,
"step": 21900
},
{
"epoch": 0.2431664695544527,
"grad_norm": 2.2582526206970215,
"learning_rate": 4.387046417559471e-06,
"loss": 0.443,
"step": 22000
},
{
"epoch": 0.24427177168879113,
"grad_norm": 3.1825761795043945,
"learning_rate": 4.381224231876521e-06,
"loss": 0.4607,
"step": 22100
},
{
"epoch": 0.24537707382312957,
"grad_norm": 1.9606397151947021,
"learning_rate": 4.375378428079557e-06,
"loss": 0.4431,
"step": 22200
},
{
"epoch": 0.24648237595746797,
"grad_norm": 1.9158498048782349,
"learning_rate": 4.369509079560608e-06,
"loss": 0.4923,
"step": 22300
},
{
"epoch": 0.2475876780918064,
"grad_norm": 2.624380111694336,
"learning_rate": 4.363616260007294e-06,
"loss": 0.4632,
"step": 22400
},
{
"epoch": 0.2486929802261448,
"grad_norm": 1.440521001815796,
"learning_rate": 4.357700043401912e-06,
"loss": 0.4798,
"step": 22500
},
{
"epoch": 0.24979828236048324,
"grad_norm": 2.1393532752990723,
"learning_rate": 4.351760504020496e-06,
"loss": 0.459,
"step": 22600
},
{
"epoch": 0.25090358449482164,
"grad_norm": 1.950707197189331,
"learning_rate": 4.345797716431891e-06,
"loss": 0.5176,
"step": 22700
},
{
"epoch": 0.2520088866291601,
"grad_norm": 2.3011667728424072,
"learning_rate": 4.339811755496817e-06,
"loss": 0.4838,
"step": 22800
},
{
"epoch": 0.2531141887634985,
"grad_norm": 1.6088446378707886,
"learning_rate": 4.333802696366923e-06,
"loss": 0.4588,
"step": 22900
},
{
"epoch": 0.2542194908978369,
"grad_norm": 1.790541410446167,
"learning_rate": 4.327770614483853e-06,
"loss": 0.4824,
"step": 23000
},
{
"epoch": 0.25532479303217537,
"grad_norm": 2.6423535346984863,
"learning_rate": 4.321715585578289e-06,
"loss": 0.4589,
"step": 23100
},
{
"epoch": 0.25643009516651377,
"grad_norm": 1.4211223125457764,
"learning_rate": 4.315637685669006e-06,
"loss": 0.4483,
"step": 23200
},
{
"epoch": 0.2575353973008522,
"grad_norm": 1.9869434833526611,
"learning_rate": 4.30953699106192e-06,
"loss": 0.4658,
"step": 23300
},
{
"epoch": 0.25864069943519064,
"grad_norm": 1.8357223272323608,
"learning_rate": 4.303413578349122e-06,
"loss": 0.4697,
"step": 23400
},
{
"epoch": 0.25974600156952904,
"grad_norm": 1.6129013299942017,
"learning_rate": 4.2972675244079224e-06,
"loss": 0.4612,
"step": 23500
},
{
"epoch": 0.26085130370386744,
"grad_norm": 1.8021016120910645,
"learning_rate": 4.291098906399885e-06,
"loss": 0.4536,
"step": 23600
},
{
"epoch": 0.26195660583820585,
"grad_norm": 1.4587496519088745,
"learning_rate": 4.2849078017698565e-06,
"loss": 0.4347,
"step": 23700
},
{
"epoch": 0.2630619079725443,
"grad_norm": 2.1143853664398193,
"learning_rate": 4.2786942882449965e-06,
"loss": 0.4478,
"step": 23800
},
{
"epoch": 0.2641672101068827,
"grad_norm": 1.9837020635604858,
"learning_rate": 4.272458443833801e-06,
"loss": 0.4586,
"step": 23900
},
{
"epoch": 0.2652725122412211,
"grad_norm": 1.6629817485809326,
"learning_rate": 4.266200346825119e-06,
"loss": 0.4609,
"step": 24000
},
{
"epoch": 0.2663778143755596,
"grad_norm": 2.2694997787475586,
"learning_rate": 4.259920075787177e-06,
"loss": 0.4506,
"step": 24100
},
{
"epoch": 0.267483116509898,
"grad_norm": 2.3292577266693115,
"learning_rate": 4.253617709566588e-06,
"loss": 0.4517,
"step": 24200
},
{
"epoch": 0.2685884186442364,
"grad_norm": 2.215757369995117,
"learning_rate": 4.247293327287359e-06,
"loss": 0.4598,
"step": 24300
},
{
"epoch": 0.26969372077857484,
"grad_norm": 2.3665645122528076,
"learning_rate": 4.240947008349905e-06,
"loss": 0.4926,
"step": 24400
},
{
"epoch": 0.27079902291291325,
"grad_norm": 2.2286605834960938,
"learning_rate": 4.234578832430047e-06,
"loss": 0.4665,
"step": 24500
},
{
"epoch": 0.27190432504725165,
"grad_norm": 2.3083527088165283,
"learning_rate": 4.228188879478011e-06,
"loss": 0.4841,
"step": 24600
},
{
"epoch": 0.2730096271815901,
"grad_norm": 1.8674919605255127,
"learning_rate": 4.221777229717428e-06,
"loss": 0.464,
"step": 24700
},
{
"epoch": 0.2741149293159285,
"grad_norm": 2.442124605178833,
"learning_rate": 4.215343963644324e-06,
"loss": 0.4462,
"step": 24800
},
{
"epoch": 0.2752202314502669,
"grad_norm": 1.761814832687378,
"learning_rate": 4.2088891620261106e-06,
"loss": 0.4811,
"step": 24900
},
{
"epoch": 0.2763255335846054,
"grad_norm": 1.81318998336792,
"learning_rate": 4.20241290590057e-06,
"loss": 0.4819,
"step": 25000
},
{
"epoch": 0.2774308357189438,
"grad_norm": 2.6324472427368164,
"learning_rate": 4.1959152765748405e-06,
"loss": 0.4942,
"step": 25100
},
{
"epoch": 0.2785361378532822,
"grad_norm": 1.9197957515716553,
"learning_rate": 4.189396355624389e-06,
"loss": 0.4411,
"step": 25200
},
{
"epoch": 0.27964143998762064,
"grad_norm": 2.736686944961548,
"learning_rate": 4.182856224891997e-06,
"loss": 0.4679,
"step": 25300
},
{
"epoch": 0.28074674212195905,
"grad_norm": 1.2711482048034668,
"learning_rate": 4.176294966486722e-06,
"loss": 0.4621,
"step": 25400
},
{
"epoch": 0.28185204425629745,
"grad_norm": 2.046609401702881,
"learning_rate": 4.169712662782876e-06,
"loss": 0.4733,
"step": 25500
},
{
"epoch": 0.28295734639063586,
"grad_norm": 1.6701066493988037,
"learning_rate": 4.163109396418986e-06,
"loss": 0.4771,
"step": 25600
},
{
"epoch": 0.2840626485249743,
"grad_norm": 1.8547199964523315,
"learning_rate": 4.156485250296757e-06,
"loss": 0.4596,
"step": 25700
},
{
"epoch": 0.2851679506593127,
"grad_norm": 2.2946977615356445,
"learning_rate": 4.149840307580033e-06,
"loss": 0.4497,
"step": 25800
},
{
"epoch": 0.2862732527936511,
"grad_norm": 2.6851511001586914,
"learning_rate": 4.143174651693753e-06,
"loss": 0.4497,
"step": 25900
},
{
"epoch": 0.2873785549279896,
"grad_norm": 2.5896623134613037,
"learning_rate": 4.1364883663229e-06,
"loss": 0.4664,
"step": 26000
},
{
"epoch": 0.288483857062328,
"grad_norm": 2.0162718296051025,
"learning_rate": 4.129781535411456e-06,
"loss": 0.4614,
"step": 26100
},
{
"epoch": 0.2895891591966664,
"grad_norm": 2.3387439250946045,
"learning_rate": 4.123054243161342e-06,
"loss": 0.4867,
"step": 26200
},
{
"epoch": 0.29069446133100485,
"grad_norm": 2.132131338119507,
"learning_rate": 4.116306574031366e-06,
"loss": 0.4741,
"step": 26300
},
{
"epoch": 0.29179976346534325,
"grad_norm": 1.7863556146621704,
"learning_rate": 4.109538612736161e-06,
"loss": 0.4492,
"step": 26400
},
{
"epoch": 0.29290506559968166,
"grad_norm": 2.3342113494873047,
"learning_rate": 4.10275044424512e-06,
"loss": 0.47,
"step": 26500
},
{
"epoch": 0.2940103677340201,
"grad_norm": 2.0262320041656494,
"learning_rate": 4.095942153781329e-06,
"loss": 0.4635,
"step": 26600
},
{
"epoch": 0.2951156698683585,
"grad_norm": 2.9538447856903076,
"learning_rate": 4.0891138268205025e-06,
"loss": 0.4477,
"step": 26700
},
{
"epoch": 0.2962209720026969,
"grad_norm": 2.5609724521636963,
"learning_rate": 4.082265549089902e-06,
"loss": 0.4546,
"step": 26800
},
{
"epoch": 0.2973262741370354,
"grad_norm": 2.4035484790802,
"learning_rate": 4.075397406567265e-06,
"loss": 0.494,
"step": 26900
},
{
"epoch": 0.2984315762713738,
"grad_norm": 1.2948765754699707,
"learning_rate": 4.068509485479726e-06,
"loss": 0.485,
"step": 27000
},
{
"epoch": 0.2995368784057122,
"grad_norm": 1.7401434183120728,
"learning_rate": 4.061601872302732e-06,
"loss": 0.4451,
"step": 27100
},
{
"epoch": 0.3006421805400506,
"grad_norm": 1.718982219696045,
"learning_rate": 4.054674653758956e-06,
"loss": 0.4837,
"step": 27200
},
{
"epoch": 0.30174748267438906,
"grad_norm": 2.159252166748047,
"learning_rate": 4.047727916817211e-06,
"loss": 0.4709,
"step": 27300
},
{
"epoch": 0.30285278480872746,
"grad_norm": 1.9981988668441772,
"learning_rate": 4.040761748691356e-06,
"loss": 0.468,
"step": 27400
},
{
"epoch": 0.30395808694306586,
"grad_norm": 2.0982799530029297,
"learning_rate": 4.033776236839202e-06,
"loss": 0.4637,
"step": 27500
},
{
"epoch": 0.3050633890774043,
"grad_norm": 2.9962141513824463,
"learning_rate": 4.0267714689614124e-06,
"loss": 0.4695,
"step": 27600
},
{
"epoch": 0.3061686912117427,
"grad_norm": 2.803635597229004,
"learning_rate": 4.019747533000405e-06,
"loss": 0.4771,
"step": 27700
},
{
"epoch": 0.30727399334608113,
"grad_norm": 1.8022634983062744,
"learning_rate": 4.012704517139248e-06,
"loss": 0.4672,
"step": 27800
},
{
"epoch": 0.3083792954804196,
"grad_norm": 1.9764262437820435,
"learning_rate": 4.005642509800545e-06,
"loss": 0.4842,
"step": 27900
},
{
"epoch": 0.309484597614758,
"grad_norm": 2.3172965049743652,
"learning_rate": 3.998561599645338e-06,
"loss": 0.4747,
"step": 28000
},
{
"epoch": 0.3105898997490964,
"grad_norm": 3.117851972579956,
"learning_rate": 3.9914618755719816e-06,
"loss": 0.4857,
"step": 28100
},
{
"epoch": 0.31169520188343486,
"grad_norm": 2.1363372802734375,
"learning_rate": 3.984343426715036e-06,
"loss": 0.4405,
"step": 28200
},
{
"epoch": 0.31280050401777326,
"grad_norm": 2.1967580318450928,
"learning_rate": 3.977206342444144e-06,
"loss": 0.4626,
"step": 28300
},
{
"epoch": 0.31390580615211167,
"grad_norm": 1.6863844394683838,
"learning_rate": 3.970050712362908e-06,
"loss": 0.4505,
"step": 28400
},
{
"epoch": 0.3150111082864501,
"grad_norm": 2.1374428272247314,
"learning_rate": 3.962876626307769e-06,
"loss": 0.4522,
"step": 28500
},
{
"epoch": 0.31611641042078853,
"grad_norm": 2.230015754699707,
"learning_rate": 3.955684174346872e-06,
"loss": 0.4331,
"step": 28600
},
{
"epoch": 0.31722171255512693,
"grad_norm": 2.7188756465911865,
"learning_rate": 3.948473446778947e-06,
"loss": 0.4788,
"step": 28700
},
{
"epoch": 0.3183270146894654,
"grad_norm": 1.7964341640472412,
"learning_rate": 3.94124453413216e-06,
"loss": 0.4442,
"step": 28800
},
{
"epoch": 0.3194323168238038,
"grad_norm": 1.4361404180526733,
"learning_rate": 3.933997527162987e-06,
"loss": 0.4868,
"step": 28900
},
{
"epoch": 0.3205376189581422,
"grad_norm": 2.0563929080963135,
"learning_rate": 3.926732516855075e-06,
"loss": 0.4921,
"step": 29000
},
{
"epoch": 0.3216429210924806,
"grad_norm": 1.55277419090271,
"learning_rate": 3.919449594418094e-06,
"loss": 0.4877,
"step": 29100
},
{
"epoch": 0.32274822322681906,
"grad_norm": 2.299819231033325,
"learning_rate": 3.912148851286593e-06,
"loss": 0.468,
"step": 29200
},
{
"epoch": 0.32385352536115747,
"grad_norm": 1.409555435180664,
"learning_rate": 3.904830379118857e-06,
"loss": 0.4279,
"step": 29300
},
{
"epoch": 0.32495882749549587,
"grad_norm": 1.9166666269302368,
"learning_rate": 3.89749426979575e-06,
"loss": 0.4732,
"step": 29400
},
{
"epoch": 0.32606412962983433,
"grad_norm": 2.2752537727355957,
"learning_rate": 3.890140615419566e-06,
"loss": 0.4605,
"step": 29500
},
{
"epoch": 0.32716943176417274,
"grad_norm": 1.6896592378616333,
"learning_rate": 3.882769508312871e-06,
"loss": 0.4513,
"step": 29600
},
{
"epoch": 0.32827473389851114,
"grad_norm": 1.8940850496292114,
"learning_rate": 3.875381041017343e-06,
"loss": 0.4665,
"step": 29700
},
{
"epoch": 0.3293800360328496,
"grad_norm": 2.7840423583984375,
"learning_rate": 3.867975306292612e-06,
"loss": 0.472,
"step": 29800
},
{
"epoch": 0.330485338167188,
"grad_norm": 1.7090684175491333,
"learning_rate": 3.860552397115093e-06,
"loss": 0.4239,
"step": 29900
},
{
"epoch": 0.3315906403015264,
"grad_norm": 1.5519531965255737,
"learning_rate": 3.853112406676823e-06,
"loss": 0.4537,
"step": 30000
},
{
"epoch": 0.33269594243586487,
"grad_norm": 2.7194883823394775,
"learning_rate": 3.845655428384286e-06,
"loss": 0.5102,
"step": 30100
},
{
"epoch": 0.33380124457020327,
"grad_norm": 2.118680000305176,
"learning_rate": 3.838181555857243e-06,
"loss": 0.4915,
"step": 30200
},
{
"epoch": 0.3349065467045417,
"grad_norm": 2.484039545059204,
"learning_rate": 3.830690882927558e-06,
"loss": 0.4603,
"step": 30300
},
{
"epoch": 0.33601184883888013,
"grad_norm": 2.0341908931732178,
"learning_rate": 3.823183503638014e-06,
"loss": 0.4684,
"step": 30400
},
{
"epoch": 0.33711715097321854,
"grad_norm": 0.9588632583618164,
"learning_rate": 3.815659512241141e-06,
"loss": 0.4963,
"step": 30500
},
{
"epoch": 0.33822245310755694,
"grad_norm": 2.8853650093078613,
"learning_rate": 3.8081190031980266e-06,
"loss": 0.4801,
"step": 30600
},
{
"epoch": 0.33932775524189535,
"grad_norm": 1.7053953409194946,
"learning_rate": 3.8005620711771318e-06,
"loss": 0.4591,
"step": 30700
},
{
"epoch": 0.3404330573762338,
"grad_norm": 2.16013765335083,
"learning_rate": 3.7929888110530998e-06,
"loss": 0.4598,
"step": 30800
},
{
"epoch": 0.3415383595105722,
"grad_norm": 2.3963918685913086,
"learning_rate": 3.7853993179055724e-06,
"loss": 0.4681,
"step": 30900
},
{
"epoch": 0.3426436616449106,
"grad_norm": 3.2389566898345947,
"learning_rate": 3.7777936870179873e-06,
"loss": 0.4717,
"step": 31000
},
{
"epoch": 0.3437489637792491,
"grad_norm": 2.17598032951355,
"learning_rate": 3.7701720138763877e-06,
"loss": 0.4573,
"step": 31100
},
{
"epoch": 0.3448542659135875,
"grad_norm": 2.4974260330200195,
"learning_rate": 3.7625343941682203e-06,
"loss": 0.4681,
"step": 31200
},
{
"epoch": 0.3459595680479259,
"grad_norm": 2.331465721130371,
"learning_rate": 3.7548809237811378e-06,
"loss": 0.4953,
"step": 31300
},
{
"epoch": 0.34706487018226434,
"grad_norm": 1.782915711402893,
"learning_rate": 3.7472116988017906e-06,
"loss": 0.4257,
"step": 31400
},
{
"epoch": 0.34817017231660274,
"grad_norm": 1.96134352684021,
"learning_rate": 3.7395268155146232e-06,
"loss": 0.4489,
"step": 31500
},
{
"epoch": 0.34927547445094115,
"grad_norm": 1.6746424436569214,
"learning_rate": 3.731826370400663e-06,
"loss": 0.4748,
"step": 31600
},
{
"epoch": 0.3503807765852796,
"grad_norm": 1.7693666219711304,
"learning_rate": 3.7241104601363154e-06,
"loss": 0.4783,
"step": 31700
},
{
"epoch": 0.351486078719618,
"grad_norm": 1.4009222984313965,
"learning_rate": 3.7163791815921394e-06,
"loss": 0.4648,
"step": 31800
},
{
"epoch": 0.3525913808539564,
"grad_norm": 2.408993721008301,
"learning_rate": 3.708632631831643e-06,
"loss": 0.4382,
"step": 31900
},
{
"epoch": 0.3536966829882949,
"grad_norm": 1.713916540145874,
"learning_rate": 3.7008709081100537e-06,
"loss": 0.4258,
"step": 32000
},
{
"epoch": 0.3548019851226333,
"grad_norm": 2.0615127086639404,
"learning_rate": 3.6930941078731065e-06,
"loss": 0.4874,
"step": 32100
},
{
"epoch": 0.3559072872569717,
"grad_norm": 2.3877241611480713,
"learning_rate": 3.685302328755815e-06,
"loss": 0.507,
"step": 32200
},
{
"epoch": 0.35701258939131014,
"grad_norm": 2.4597456455230713,
"learning_rate": 3.6774956685812496e-06,
"loss": 0.4513,
"step": 32300
},
{
"epoch": 0.35811789152564855,
"grad_norm": 2.5451297760009766,
"learning_rate": 3.6696742253593035e-06,
"loss": 0.4419,
"step": 32400
},
{
"epoch": 0.35922319365998695,
"grad_norm": 2.2447433471679688,
"learning_rate": 3.6618380972854694e-06,
"loss": 0.4669,
"step": 32500
},
{
"epoch": 0.36032849579432535,
"grad_norm": 1.7082650661468506,
"learning_rate": 3.6539873827396023e-06,
"loss": 0.4352,
"step": 32600
},
{
"epoch": 0.3614337979286638,
"grad_norm": 1.607082486152649,
"learning_rate": 3.646122180284683e-06,
"loss": 0.4595,
"step": 32700
},
{
"epoch": 0.3625391000630022,
"grad_norm": 1.835105299949646,
"learning_rate": 3.638242588665587e-06,
"loss": 0.4674,
"step": 32800
},
{
"epoch": 0.3636444021973406,
"grad_norm": 1.7002040147781372,
"learning_rate": 3.630348706807836e-06,
"loss": 0.4746,
"step": 32900
},
{
"epoch": 0.3647497043316791,
"grad_norm": 2.184178590774536,
"learning_rate": 3.622440633816366e-06,
"loss": 0.4388,
"step": 33000
},
{
"epoch": 0.3658550064660175,
"grad_norm": 2.1649866104125977,
"learning_rate": 3.6145184689742716e-06,
"loss": 0.4499,
"step": 33100
},
{
"epoch": 0.3669603086003559,
"grad_norm": 1.3153752088546753,
"learning_rate": 3.6065823117415716e-06,
"loss": 0.4391,
"step": 33200
},
{
"epoch": 0.36806561073469435,
"grad_norm": 1.944061279296875,
"learning_rate": 3.5986322617539506e-06,
"loss": 0.4833,
"step": 33300
},
{
"epoch": 0.36917091286903275,
"grad_norm": 1.6162335872650146,
"learning_rate": 3.590668418821513e-06,
"loss": 0.4889,
"step": 33400
},
{
"epoch": 0.37027621500337116,
"grad_norm": 1.623404622077942,
"learning_rate": 3.5826908829275296e-06,
"loss": 0.4698,
"step": 33500
},
{
"epoch": 0.3713815171377096,
"grad_norm": 1.830082654953003,
"learning_rate": 3.57469975422718e-06,
"loss": 0.507,
"step": 33600
},
{
"epoch": 0.372486819272048,
"grad_norm": 2.138823986053467,
"learning_rate": 3.5666951330462972e-06,
"loss": 0.4419,
"step": 33700
},
{
"epoch": 0.3735921214063864,
"grad_norm": 2.455385208129883,
"learning_rate": 3.558677119880109e-06,
"loss": 0.4729,
"step": 33800
},
{
"epoch": 0.3746974235407249,
"grad_norm": 3.052379846572876,
"learning_rate": 3.550645815391973e-06,
"loss": 0.447,
"step": 33900
},
{
"epoch": 0.3758027256750633,
"grad_norm": 1.8502277135849,
"learning_rate": 3.542601320412116e-06,
"loss": 0.4545,
"step": 34000
},
{
"epoch": 0.3769080278094017,
"grad_norm": 2.621030569076538,
"learning_rate": 3.534543735936366e-06,
"loss": 0.4832,
"step": 34100
},
{
"epoch": 0.3780133299437401,
"grad_norm": 1.681999683380127,
"learning_rate": 3.5264731631248867e-06,
"loss": 0.4813,
"step": 34200
},
{
"epoch": 0.37911863207807855,
"grad_norm": 1.8637994527816772,
"learning_rate": 3.5183897033009018e-06,
"loss": 0.5013,
"step": 34300
},
{
"epoch": 0.38022393421241696,
"grad_norm": 1.9797747135162354,
"learning_rate": 3.510293457949433e-06,
"loss": 0.4473,
"step": 34400
},
{
"epoch": 0.38132923634675536,
"grad_norm": 2.2267913818359375,
"learning_rate": 3.502184528716013e-06,
"loss": 0.455,
"step": 34500
},
{
"epoch": 0.3824345384810938,
"grad_norm": 1.919852375984192,
"learning_rate": 3.494063017405423e-06,
"loss": 0.447,
"step": 34600
},
{
"epoch": 0.3835398406154322,
"grad_norm": 2.838737964630127,
"learning_rate": 3.485929025980402e-06,
"loss": 0.4447,
"step": 34700
},
{
"epoch": 0.38464514274977063,
"grad_norm": 1.7883715629577637,
"learning_rate": 3.477782656560377e-06,
"loss": 0.4897,
"step": 34800
},
{
"epoch": 0.3857504448841091,
"grad_norm": 1.9990206956863403,
"learning_rate": 3.469624011420173e-06,
"loss": 0.4533,
"step": 34900
},
{
"epoch": 0.3868557470184475,
"grad_norm": 3.673203706741333,
"learning_rate": 3.461453192988734e-06,
"loss": 0.4813,
"step": 35000
},
{
"epoch": 0.3879610491527859,
"grad_norm": 1.820590853691101,
"learning_rate": 3.4532703038478368e-06,
"loss": 0.4582,
"step": 35100
},
{
"epoch": 0.38906635128712436,
"grad_norm": 1.6964892148971558,
"learning_rate": 3.445075446730798e-06,
"loss": 0.4355,
"step": 35200
},
{
"epoch": 0.39017165342146276,
"grad_norm": 2.7785258293151855,
"learning_rate": 3.4368687245211914e-06,
"loss": 0.4744,
"step": 35300
},
{
"epoch": 0.39127695555580116,
"grad_norm": 2.661006212234497,
"learning_rate": 3.4286502402515504e-06,
"loss": 0.4512,
"step": 35400
},
{
"epoch": 0.3923822576901396,
"grad_norm": 1.379711389541626,
"learning_rate": 3.4204200971020796e-06,
"loss": 0.4727,
"step": 35500
},
{
"epoch": 0.39348755982447803,
"grad_norm": 2.01283860206604,
"learning_rate": 3.412178398399355e-06,
"loss": 0.4774,
"step": 35600
},
{
"epoch": 0.39459286195881643,
"grad_norm": 1.920944094657898,
"learning_rate": 3.4039252476150284e-06,
"loss": 0.4775,
"step": 35700
},
{
"epoch": 0.3956981640931549,
"grad_norm": 1.920350193977356,
"learning_rate": 3.39566074836453e-06,
"loss": 0.4526,
"step": 35800
},
{
"epoch": 0.3968034662274933,
"grad_norm": 2.782977819442749,
"learning_rate": 3.3873850044057633e-06,
"loss": 0.4541,
"step": 35900
},
{
"epoch": 0.3979087683618317,
"grad_norm": 2.4611635208129883,
"learning_rate": 3.3790981196378086e-06,
"loss": 0.4964,
"step": 36000
},
{
"epoch": 0.3990140704961701,
"grad_norm": 1.8741673231124878,
"learning_rate": 3.370800198099613e-06,
"loss": 0.435,
"step": 36100
},
{
"epoch": 0.40011937263050856,
"grad_norm": 1.919241189956665,
"learning_rate": 3.362491343968687e-06,
"loss": 0.4386,
"step": 36200
},
{
"epoch": 0.40122467476484697,
"grad_norm": 2.52968168258667,
"learning_rate": 3.3541716615597948e-06,
"loss": 0.4545,
"step": 36300
},
{
"epoch": 0.40232997689918537,
"grad_norm": 2.964994430541992,
"learning_rate": 3.3458412553236475e-06,
"loss": 0.4551,
"step": 36400
},
{
"epoch": 0.40343527903352383,
"grad_norm": 2.7886335849761963,
"learning_rate": 3.337500229845592e-06,
"loss": 0.477,
"step": 36500
},
{
"epoch": 0.40454058116786223,
"grad_norm": 1.9467898607254028,
"learning_rate": 3.329148689844289e-06,
"loss": 0.4546,
"step": 36600
},
{
"epoch": 0.40564588330220064,
"grad_norm": 1.1720269918441772,
"learning_rate": 3.320786740170414e-06,
"loss": 0.4759,
"step": 36700
},
{
"epoch": 0.4067511854365391,
"grad_norm": 2.1939995288848877,
"learning_rate": 3.3124144858053252e-06,
"loss": 0.4456,
"step": 36800
},
{
"epoch": 0.4078564875708775,
"grad_norm": 2.350830078125,
"learning_rate": 3.304032031859759e-06,
"loss": 0.4683,
"step": 36900
},
{
"epoch": 0.4089617897052159,
"grad_norm": 2.4557292461395264,
"learning_rate": 3.295639483572498e-06,
"loss": 0.4415,
"step": 37000
},
{
"epoch": 0.41006709183955437,
"grad_norm": 1.3871397972106934,
"learning_rate": 3.287236946309059e-06,
"loss": 0.4635,
"step": 37100
},
{
"epoch": 0.41117239397389277,
"grad_norm": 2.129850387573242,
"learning_rate": 3.2788245255603675e-06,
"loss": 0.4888,
"step": 37200
},
{
"epoch": 0.4122776961082312,
"grad_norm": 1.527912974357605,
"learning_rate": 3.2704023269414304e-06,
"loss": 0.4848,
"step": 37300
},
{
"epoch": 0.41338299824256963,
"grad_norm": 1.9338812828063965,
"learning_rate": 3.261970456190014e-06,
"loss": 0.5031,
"step": 37400
},
{
"epoch": 0.41448830037690804,
"grad_norm": 1.9333993196487427,
"learning_rate": 3.253529019165314e-06,
"loss": 0.4533,
"step": 37500
},
{
"epoch": 0.41559360251124644,
"grad_norm": 2.1915063858032227,
"learning_rate": 3.2450781218466274e-06,
"loss": 0.4508,
"step": 37600
},
{
"epoch": 0.41669890464558484,
"grad_norm": 2.150376319885254,
"learning_rate": 3.2366178703320232e-06,
"loss": 0.4359,
"step": 37700
},
{
"epoch": 0.4178042067799233,
"grad_norm": 2.5346415042877197,
"learning_rate": 3.2281483708370074e-06,
"loss": 0.474,
"step": 37800
},
{
"epoch": 0.4189095089142617,
"grad_norm": 2.2632484436035156,
"learning_rate": 3.2196697296931915e-06,
"loss": 0.4317,
"step": 37900
},
{
"epoch": 0.4200148110486001,
"grad_norm": 2.7014644145965576,
"learning_rate": 3.2111820533469577e-06,
"loss": 0.4493,
"step": 38000
},
{
"epoch": 0.42112011318293857,
"grad_norm": 1.923828363418579,
"learning_rate": 3.202685448358122e-06,
"loss": 0.4884,
"step": 38100
},
{
"epoch": 0.422225415317277,
"grad_norm": 2.4021315574645996,
"learning_rate": 3.1941800213985964e-06,
"loss": 0.4457,
"step": 38200
},
{
"epoch": 0.4233307174516154,
"grad_norm": 1.7797712087631226,
"learning_rate": 3.1856658792510485e-06,
"loss": 0.4786,
"step": 38300
},
{
"epoch": 0.42443601958595384,
"grad_norm": 2.1778018474578857,
"learning_rate": 3.177143128807565e-06,
"loss": 0.4695,
"step": 38400
},
{
"epoch": 0.42554132172029224,
"grad_norm": 2.2871477603912354,
"learning_rate": 3.168611877068302e-06,
"loss": 0.4766,
"step": 38500
},
{
"epoch": 0.42664662385463065,
"grad_norm": 3.016216993331909,
"learning_rate": 3.1600722311401515e-06,
"loss": 0.4544,
"step": 38600
},
{
"epoch": 0.4277519259889691,
"grad_norm": 1.759264349937439,
"learning_rate": 3.1515242982353876e-06,
"loss": 0.4414,
"step": 38700
},
{
"epoch": 0.4288572281233075,
"grad_norm": 2.0453083515167236,
"learning_rate": 3.1429681856703287e-06,
"loss": 0.4471,
"step": 38800
},
{
"epoch": 0.4299625302576459,
"grad_norm": 1.5130780935287476,
"learning_rate": 3.1344040008639797e-06,
"loss": 0.4469,
"step": 38900
},
{
"epoch": 0.4310678323919844,
"grad_norm": 1.812267541885376,
"learning_rate": 3.1258318513366975e-06,
"loss": 0.4754,
"step": 39000
},
{
"epoch": 0.4321731345263228,
"grad_norm": 1.798132300376892,
"learning_rate": 3.1172518447088264e-06,
"loss": 0.4519,
"step": 39100
},
{
"epoch": 0.4332784366606612,
"grad_norm": 2.252378463745117,
"learning_rate": 3.108664088699358e-06,
"loss": 0.4622,
"step": 39200
},
{
"epoch": 0.4343837387949996,
"grad_norm": 1.2119619846343994,
"learning_rate": 3.100068691124572e-06,
"loss": 0.4541,
"step": 39300
},
{
"epoch": 0.43548904092933804,
"grad_norm": 1.4428755044937134,
"learning_rate": 3.091465759896688e-06,
"loss": 0.4731,
"step": 39400
},
{
"epoch": 0.43659434306367645,
"grad_norm": 1.7551451921463013,
"learning_rate": 3.082855403022507e-06,
"loss": 0.441,
"step": 39500
},
{
"epoch": 0.43769964519801485,
"grad_norm": 1.55975341796875,
"learning_rate": 3.0742377286020547e-06,
"loss": 0.4249,
"step": 39600
},
{
"epoch": 0.4388049473323533,
"grad_norm": 1.1946512460708618,
"learning_rate": 3.0656128448272284e-06,
"loss": 0.4709,
"step": 39700
},
{
"epoch": 0.4399102494666917,
"grad_norm": 1.1257880926132202,
"learning_rate": 3.0569808599804345e-06,
"loss": 0.4307,
"step": 39800
},
{
"epoch": 0.4410155516010301,
"grad_norm": 1.8002004623413086,
"learning_rate": 3.048341882433232e-06,
"loss": 0.4612,
"step": 39900
},
{
"epoch": 0.4421208537353686,
"grad_norm": 2.031006097793579,
"learning_rate": 3.039696020644972e-06,
"loss": 0.4554,
"step": 40000
},
{
"epoch": 0.443226155869707,
"grad_norm": 2.301436185836792,
"learning_rate": 3.0310433831614307e-06,
"loss": 0.4387,
"step": 40100
},
{
"epoch": 0.4443314580040454,
"grad_norm": 1.4582908153533936,
"learning_rate": 3.0223840786134553e-06,
"loss": 0.455,
"step": 40200
},
{
"epoch": 0.44543676013838385,
"grad_norm": 2.0824360847473145,
"learning_rate": 3.013718215715593e-06,
"loss": 0.4828,
"step": 40300
},
{
"epoch": 0.44654206227272225,
"grad_norm": 2.2939536571502686,
"learning_rate": 3.0050459032647306e-06,
"loss": 0.457,
"step": 40400
},
{
"epoch": 0.44764736440706066,
"grad_norm": 2.297245979309082,
"learning_rate": 2.9963672501387247e-06,
"loss": 0.4778,
"step": 40500
},
{
"epoch": 0.4487526665413991,
"grad_norm": 1.8728293180465698,
"learning_rate": 2.987682365295038e-06,
"loss": 0.4448,
"step": 40600
},
{
"epoch": 0.4498579686757375,
"grad_norm": 1.5255945920944214,
"learning_rate": 2.978991357769371e-06,
"loss": 0.4472,
"step": 40700
},
{
"epoch": 0.4509632708100759,
"grad_norm": 2.7456576824188232,
"learning_rate": 2.9702943366742915e-06,
"loss": 0.4668,
"step": 40800
},
{
"epoch": 0.4520685729444144,
"grad_norm": 2.2749907970428467,
"learning_rate": 2.961591411197865e-06,
"loss": 0.4483,
"step": 40900
},
{
"epoch": 0.4531738750787528,
"grad_norm": 2.1402695178985596,
"learning_rate": 2.9528826906022843e-06,
"loss": 0.4487,
"step": 41000
},
{
"epoch": 0.4542791772130912,
"grad_norm": 2.3826072216033936,
"learning_rate": 2.944168284222502e-06,
"loss": 0.4953,
"step": 41100
},
{
"epoch": 0.4553844793474296,
"grad_norm": 2.2698001861572266,
"learning_rate": 2.9354483014648463e-06,
"loss": 0.484,
"step": 41200
},
{
"epoch": 0.45648978148176805,
"grad_norm": 1.9907783269882202,
"learning_rate": 2.926722851805661e-06,
"loss": 0.4398,
"step": 41300
},
{
"epoch": 0.45759508361610646,
"grad_norm": 1.5543720722198486,
"learning_rate": 2.917992044789923e-06,
"loss": 0.4363,
"step": 41400
},
{
"epoch": 0.45870038575044486,
"grad_norm": 1.8793258666992188,
"learning_rate": 2.909255990029869e-06,
"loss": 0.4567,
"step": 41500
},
{
"epoch": 0.4598056878847833,
"grad_norm": 2.4277260303497314,
"learning_rate": 2.900514797203617e-06,
"loss": 0.4491,
"step": 41600
},
{
"epoch": 0.4609109900191217,
"grad_norm": 2.2503464221954346,
"learning_rate": 2.891768576053797e-06,
"loss": 0.4804,
"step": 41700
},
{
"epoch": 0.46201629215346013,
"grad_norm": 1.4896454811096191,
"learning_rate": 2.8830174363861635e-06,
"loss": 0.4403,
"step": 41800
},
{
"epoch": 0.4631215942877986,
"grad_norm": 2.510836601257324,
"learning_rate": 2.874261488068221e-06,
"loss": 0.451,
"step": 41900
},
{
"epoch": 0.464226896422137,
"grad_norm": 1.5463513135910034,
"learning_rate": 2.8655008410278482e-06,
"loss": 0.4671,
"step": 42000
},
{
"epoch": 0.4653321985564754,
"grad_norm": 2.300896167755127,
"learning_rate": 2.856735605251912e-06,
"loss": 0.4348,
"step": 42100
},
{
"epoch": 0.46643750069081386,
"grad_norm": 2.3069446086883545,
"learning_rate": 2.8479658907848893e-06,
"loss": 0.4478,
"step": 42200
},
{
"epoch": 0.46754280282515226,
"grad_norm": 2.1205623149871826,
"learning_rate": 2.8391918077274873e-06,
"loss": 0.4346,
"step": 42300
},
{
"epoch": 0.46864810495949066,
"grad_norm": 1.638277292251587,
"learning_rate": 2.830413466235258e-06,
"loss": 0.4395,
"step": 42400
},
{
"epoch": 0.4697534070938291,
"grad_norm": 2.0386252403259277,
"learning_rate": 2.8216309765172156e-06,
"loss": 0.4421,
"step": 42500
},
{
"epoch": 0.4708587092281675,
"grad_norm": 2.241922378540039,
"learning_rate": 2.8128444488344565e-06,
"loss": 0.4518,
"step": 42600
},
{
"epoch": 0.47196401136250593,
"grad_norm": 2.304940938949585,
"learning_rate": 2.8040539934987697e-06,
"loss": 0.4803,
"step": 42700
},
{
"epoch": 0.47306931349684433,
"grad_norm": 2.377882480621338,
"learning_rate": 2.795259720871256e-06,
"loss": 0.4637,
"step": 42800
},
{
"epoch": 0.4741746156311828,
"grad_norm": 1.9520049095153809,
"learning_rate": 2.7864617413609414e-06,
"loss": 0.452,
"step": 42900
},
{
"epoch": 0.4752799177655212,
"grad_norm": 2.1737561225891113,
"learning_rate": 2.777660165423388e-06,
"loss": 0.4622,
"step": 43000
},
{
"epoch": 0.4763852198998596,
"grad_norm": 1.6113853454589844,
"learning_rate": 2.7688551035593125e-06,
"loss": 0.448,
"step": 43100
},
{
"epoch": 0.47749052203419806,
"grad_norm": 2.39670729637146,
"learning_rate": 2.760046666313196e-06,
"loss": 0.4512,
"step": 43200
},
{
"epoch": 0.47859582416853647,
"grad_norm": 1.8168816566467285,
"learning_rate": 2.7512349642718927e-06,
"loss": 0.4712,
"step": 43300
},
{
"epoch": 0.47970112630287487,
"grad_norm": 1.6397266387939453,
"learning_rate": 2.7424201080632516e-06,
"loss": 0.4569,
"step": 43400
},
{
"epoch": 0.48080642843721333,
"grad_norm": 2.2524404525756836,
"learning_rate": 2.7336022083547153e-06,
"loss": 0.4882,
"step": 43500
},
{
"epoch": 0.48191173057155173,
"grad_norm": 2.5701520442962646,
"learning_rate": 2.72478137585194e-06,
"loss": 0.4593,
"step": 43600
},
{
"epoch": 0.48301703270589014,
"grad_norm": 1.691336989402771,
"learning_rate": 2.7159577212973985e-06,
"loss": 0.4743,
"step": 43700
},
{
"epoch": 0.4841223348402286,
"grad_norm": 1.9625279903411865,
"learning_rate": 2.7071313554689994e-06,
"loss": 0.4834,
"step": 43800
},
{
"epoch": 0.485227636974567,
"grad_norm": 1.4627450704574585,
"learning_rate": 2.6983023891786835e-06,
"loss": 0.4513,
"step": 43900
},
{
"epoch": 0.4863329391089054,
"grad_norm": 2.0734519958496094,
"learning_rate": 2.689470933271045e-06,
"loss": 0.4611,
"step": 44000
},
{
"epoch": 0.48743824124324386,
"grad_norm": 1.5627169609069824,
"learning_rate": 2.6806370986219305e-06,
"loss": 0.445,
"step": 44100
},
{
"epoch": 0.48854354337758227,
"grad_norm": 2.4556682109832764,
"learning_rate": 2.6718009961370544e-06,
"loss": 0.4255,
"step": 44200
},
{
"epoch": 0.48964884551192067,
"grad_norm": 1.817841649055481,
"learning_rate": 2.6629627367505996e-06,
"loss": 0.4725,
"step": 44300
},
{
"epoch": 0.49075414764625913,
"grad_norm": 2.1898646354675293,
"learning_rate": 2.6541224314238306e-06,
"loss": 0.4321,
"step": 44400
},
{
"epoch": 0.49185944978059754,
"grad_norm": 1.9783952236175537,
"learning_rate": 2.645280191143697e-06,
"loss": 0.473,
"step": 44500
},
{
"epoch": 0.49296475191493594,
"grad_norm": 2.2066643238067627,
"learning_rate": 2.6364361269214404e-06,
"loss": 0.4388,
"step": 44600
},
{
"epoch": 0.49407005404927434,
"grad_norm": 1.5500693321228027,
"learning_rate": 2.627590349791203e-06,
"loss": 0.4515,
"step": 44700
},
{
"epoch": 0.4951753561836128,
"grad_norm": 1.9073359966278076,
"learning_rate": 2.6187429708086304e-06,
"loss": 0.4475,
"step": 44800
},
{
"epoch": 0.4962806583179512,
"grad_norm": 1.692548394203186,
"learning_rate": 2.6098941010494793e-06,
"loss": 0.4116,
"step": 44900
},
{
"epoch": 0.4973859604522896,
"grad_norm": 1.8653684854507446,
"learning_rate": 2.6010438516082244e-06,
"loss": 0.4462,
"step": 45000
},
{
"epoch": 0.49849126258662807,
"grad_norm": 2.772581100463867,
"learning_rate": 2.592192333596658e-06,
"loss": 0.4465,
"step": 45100
},
{
"epoch": 0.4995965647209665,
"grad_norm": 1.9330416917800903,
"learning_rate": 2.583339658142503e-06,
"loss": 0.4693,
"step": 45200
},
{
"epoch": 0.5007018668553049,
"grad_norm": 1.846220850944519,
"learning_rate": 2.574485936388011e-06,
"loss": 0.4782,
"step": 45300
},
{
"epoch": 0.5018071689896433,
"grad_norm": 1.9324105978012085,
"learning_rate": 2.5656312794885696e-06,
"loss": 0.476,
"step": 45400
},
{
"epoch": 0.5029124711239817,
"grad_norm": 1.4215826988220215,
"learning_rate": 2.5567757986113082e-06,
"loss": 0.4404,
"step": 45500
},
{
"epoch": 0.5040177732583202,
"grad_norm": 2.124636173248291,
"learning_rate": 2.5479196049336994e-06,
"loss": 0.4685,
"step": 45600
},
{
"epoch": 0.5051230753926585,
"grad_norm": 2.1870932579040527,
"learning_rate": 2.5390628096421675e-06,
"loss": 0.4384,
"step": 45700
},
{
"epoch": 0.506228377526997,
"grad_norm": 2.281766891479492,
"learning_rate": 2.5302055239306857e-06,
"loss": 0.4849,
"step": 45800
},
{
"epoch": 0.5073336796613355,
"grad_norm": 2.991182804107666,
"learning_rate": 2.5213478589993884e-06,
"loss": 0.4585,
"step": 45900
},
{
"epoch": 0.5084389817956738,
"grad_norm": 2.271472930908203,
"learning_rate": 2.5124899260531667e-06,
"loss": 0.4459,
"step": 46000
},
{
"epoch": 0.5095442839300123,
"grad_norm": 1.7806503772735596,
"learning_rate": 2.5036318363002816e-06,
"loss": 0.4448,
"step": 46100
},
{
"epoch": 0.5106495860643507,
"grad_norm": 2.3559248447418213,
"learning_rate": 2.4947737009509577e-06,
"loss": 0.4468,
"step": 46200
},
{
"epoch": 0.5117548881986891,
"grad_norm": 2.1456425189971924,
"learning_rate": 2.4859156312159945e-06,
"loss": 0.4304,
"step": 46300
},
{
"epoch": 0.5128601903330275,
"grad_norm": 2.4595870971679688,
"learning_rate": 2.4770577383053695e-06,
"loss": 0.4756,
"step": 46400
},
{
"epoch": 0.513965492467366,
"grad_norm": 1.6186550855636597,
"learning_rate": 2.4682001334268376e-06,
"loss": 0.4246,
"step": 46500
},
{
"epoch": 0.5150707946017044,
"grad_norm": 2.1293444633483887,
"learning_rate": 2.4593429277845366e-06,
"loss": 0.4373,
"step": 46600
},
{
"epoch": 0.5161760967360428,
"grad_norm": 2.4468750953674316,
"learning_rate": 2.450486232577596e-06,
"loss": 0.4722,
"step": 46700
},
{
"epoch": 0.5172813988703813,
"grad_norm": 1.3718825578689575,
"learning_rate": 2.441630158998734e-06,
"loss": 0.4625,
"step": 46800
},
{
"epoch": 0.5183867010047196,
"grad_norm": 1.7043936252593994,
"learning_rate": 2.432774818232865e-06,
"loss": 0.4889,
"step": 46900
},
{
"epoch": 0.5194920031390581,
"grad_norm": 1.942793607711792,
"learning_rate": 2.4239203214557026e-06,
"loss": 0.4539,
"step": 47000
},
{
"epoch": 0.5205973052733965,
"grad_norm": 2.086621046066284,
"learning_rate": 2.4150667798323664e-06,
"loss": 0.4303,
"step": 47100
},
{
"epoch": 0.5217026074077349,
"grad_norm": 2.2322304248809814,
"learning_rate": 2.406214304515982e-06,
"loss": 0.4616,
"step": 47200
},
{
"epoch": 0.5228079095420733,
"grad_norm": 1.703951120376587,
"learning_rate": 2.3973630066462895e-06,
"loss": 0.4479,
"step": 47300
},
{
"epoch": 0.5239132116764117,
"grad_norm": 1.6014420986175537,
"learning_rate": 2.3885129973482475e-06,
"loss": 0.4269,
"step": 47400
},
{
"epoch": 0.5250185138107502,
"grad_norm": 2.385668992996216,
"learning_rate": 2.379664387730634e-06,
"loss": 0.4284,
"step": 47500
},
{
"epoch": 0.5261238159450886,
"grad_norm": 2.08682918548584,
"learning_rate": 2.370817288884656e-06,
"loss": 0.4573,
"step": 47600
},
{
"epoch": 0.527229118079427,
"grad_norm": 1.9396214485168457,
"learning_rate": 2.3619718118825536e-06,
"loss": 0.4701,
"step": 47700
},
{
"epoch": 0.5283344202137654,
"grad_norm": 1.9038134813308716,
"learning_rate": 2.3531280677762064e-06,
"loss": 0.4437,
"step": 47800
},
{
"epoch": 0.5294397223481039,
"grad_norm": 2.4148266315460205,
"learning_rate": 2.3442861675957353e-06,
"loss": 0.4264,
"step": 47900
},
{
"epoch": 0.5305450244824422,
"grad_norm": 2.0972328186035156,
"learning_rate": 2.3354462223481126e-06,
"loss": 0.4461,
"step": 48000
},
{
"epoch": 0.5316503266167807,
"grad_norm": 2.8991668224334717,
"learning_rate": 2.326608343015769e-06,
"loss": 0.4461,
"step": 48100
},
{
"epoch": 0.5327556287511191,
"grad_norm": 1.24418306350708,
"learning_rate": 2.3177726405551953e-06,
"loss": 0.4329,
"step": 48200
},
{
"epoch": 0.5338609308854575,
"grad_norm": 1.501638650894165,
"learning_rate": 2.308939225895554e-06,
"loss": 0.4252,
"step": 48300
},
{
"epoch": 0.534966233019796,
"grad_norm": 1.7708169221878052,
"learning_rate": 2.300108209937284e-06,
"loss": 0.4492,
"step": 48400
},
{
"epoch": 0.5360715351541344,
"grad_norm": 1.757341980934143,
"learning_rate": 2.2912797035507118e-06,
"loss": 0.4342,
"step": 48500
},
{
"epoch": 0.5371768372884728,
"grad_norm": 1.7680574655532837,
"learning_rate": 2.2824538175746554e-06,
"loss": 0.4524,
"step": 48600
},
{
"epoch": 0.5382821394228112,
"grad_norm": 2.0074987411499023,
"learning_rate": 2.2736306628150322e-06,
"loss": 0.436,
"step": 48700
},
{
"epoch": 0.5393874415571497,
"grad_norm": 1.9048947095870972,
"learning_rate": 2.2648103500434756e-06,
"loss": 0.4189,
"step": 48800
},
{
"epoch": 0.540492743691488,
"grad_norm": 2.519080638885498,
"learning_rate": 2.255992989995934e-06,
"loss": 0.4251,
"step": 48900
},
{
"epoch": 0.5415980458258265,
"grad_norm": 2.2120232582092285,
"learning_rate": 2.247178693371288e-06,
"loss": 0.4933,
"step": 49000
},
{
"epoch": 0.542703347960165,
"grad_norm": 1.7563016414642334,
"learning_rate": 2.238367570829954e-06,
"loss": 0.4602,
"step": 49100
},
{
"epoch": 0.5438086500945033,
"grad_norm": 1.5373327732086182,
"learning_rate": 2.229559732992507e-06,
"loss": 0.4792,
"step": 49200
},
{
"epoch": 0.5449139522288418,
"grad_norm": 2.573272228240967,
"learning_rate": 2.220755290438275e-06,
"loss": 0.4659,
"step": 49300
},
{
"epoch": 0.5460192543631802,
"grad_norm": 1.7102992534637451,
"learning_rate": 2.211954353703965e-06,
"loss": 0.4553,
"step": 49400
},
{
"epoch": 0.5471245564975186,
"grad_norm": 2.3353729248046875,
"learning_rate": 2.203157033282265e-06,
"loss": 0.4307,
"step": 49500
},
{
"epoch": 0.548229858631857,
"grad_norm": 1.7641658782958984,
"learning_rate": 2.194363439620468e-06,
"loss": 0.4648,
"step": 49600
},
{
"epoch": 0.5493351607661955,
"grad_norm": 1.2468318939208984,
"learning_rate": 2.1855736831190723e-06,
"loss": 0.4616,
"step": 49700
},
{
"epoch": 0.5504404629005338,
"grad_norm": 2.137446880340576,
"learning_rate": 2.1767878741304044e-06,
"loss": 0.4671,
"step": 49800
},
{
"epoch": 0.5515457650348723,
"grad_norm": 2.4773776531219482,
"learning_rate": 2.1680061229572343e-06,
"loss": 0.4737,
"step": 49900
},
{
"epoch": 0.5526510671692108,
"grad_norm": 2.0055341720581055,
"learning_rate": 2.1592285398513815e-06,
"loss": 0.4533,
"step": 50000
},
{
"epoch": 0.5537563693035491,
"grad_norm": 1.876347303390503,
"learning_rate": 2.150455235012342e-06,
"loss": 0.4208,
"step": 50100
},
{
"epoch": 0.5548616714378876,
"grad_norm": 2.5351920127868652,
"learning_rate": 2.1416863185858964e-06,
"loss": 0.4404,
"step": 50200
},
{
"epoch": 0.555966973572226,
"grad_norm": 1.0931345224380493,
"learning_rate": 2.132921900662733e-06,
"loss": 0.4465,
"step": 50300
},
{
"epoch": 0.5570722757065644,
"grad_norm": 2.0798308849334717,
"learning_rate": 2.1241620912770612e-06,
"loss": 0.4152,
"step": 50400
},
{
"epoch": 0.5581775778409028,
"grad_norm": 1.6538605690002441,
"learning_rate": 2.115407000405231e-06,
"loss": 0.4209,
"step": 50500
},
{
"epoch": 0.5592828799752413,
"grad_norm": 2.1094820499420166,
"learning_rate": 2.1066567379643557e-06,
"loss": 0.4367,
"step": 50600
},
{
"epoch": 0.5603881821095796,
"grad_norm": 2.1819286346435547,
"learning_rate": 2.097911413810928e-06,
"loss": 0.4525,
"step": 50700
},
{
"epoch": 0.5614934842439181,
"grad_norm": 2.0643765926361084,
"learning_rate": 2.089171137739441e-06,
"loss": 0.4504,
"step": 50800
},
{
"epoch": 0.5625987863782564,
"grad_norm": 1.5290354490280151,
"learning_rate": 2.0804360194810117e-06,
"loss": 0.4313,
"step": 50900
},
{
"epoch": 0.5637040885125949,
"grad_norm": 1.9766910076141357,
"learning_rate": 2.0717061687020047e-06,
"loss": 0.4177,
"step": 51000
},
{
"epoch": 0.5648093906469334,
"grad_norm": 1.1951794624328613,
"learning_rate": 2.0629816950026505e-06,
"loss": 0.5075,
"step": 51100
},
{
"epoch": 0.5659146927812717,
"grad_norm": 2.3847384452819824,
"learning_rate": 2.054262707915671e-06,
"loss": 0.4196,
"step": 51200
},
{
"epoch": 0.5670199949156102,
"grad_norm": 1.665724515914917,
"learning_rate": 2.0455493169049115e-06,
"loss": 0.4333,
"step": 51300
},
{
"epoch": 0.5681252970499486,
"grad_norm": 1.6288607120513916,
"learning_rate": 2.036841631363954e-06,
"loss": 0.4853,
"step": 51400
},
{
"epoch": 0.569230599184287,
"grad_norm": 2.2280824184417725,
"learning_rate": 2.028139760614754e-06,
"loss": 0.456,
"step": 51500
},
{
"epoch": 0.5703359013186254,
"grad_norm": 2.9321858882904053,
"learning_rate": 2.019443813906262e-06,
"loss": 0.4694,
"step": 51600
},
{
"epoch": 0.5714412034529639,
"grad_norm": 2.381856918334961,
"learning_rate": 2.0107539004130577e-06,
"loss": 0.4679,
"step": 51700
},
{
"epoch": 0.5725465055873022,
"grad_norm": 2.0987162590026855,
"learning_rate": 2.002070129233972e-06,
"loss": 0.4611,
"step": 51800
},
{
"epoch": 0.5736518077216407,
"grad_norm": 2.339217185974121,
"learning_rate": 1.993392609390723e-06,
"loss": 0.5007,
"step": 51900
},
{
"epoch": 0.5747571098559792,
"grad_norm": 1.3680297136306763,
"learning_rate": 1.984721449826547e-06,
"loss": 0.4823,
"step": 52000
},
{
"epoch": 0.5758624119903175,
"grad_norm": 1.494996190071106,
"learning_rate": 1.976056759404827e-06,
"loss": 0.4528,
"step": 52100
},
{
"epoch": 0.576967714124656,
"grad_norm": 2.1765034198760986,
"learning_rate": 1.967398646907728e-06,
"loss": 0.4476,
"step": 52200
},
{
"epoch": 0.5780730162589944,
"grad_norm": 1.8729513883590698,
"learning_rate": 1.9587472210348318e-06,
"loss": 0.4626,
"step": 52300
},
{
"epoch": 0.5791783183933328,
"grad_norm": 1.8249151706695557,
"learning_rate": 1.950102590401774e-06,
"loss": 0.4488,
"step": 52400
},
{
"epoch": 0.5802836205276712,
"grad_norm": 1.604670763015747,
"learning_rate": 1.9414648635388765e-06,
"loss": 0.4385,
"step": 52500
},
{
"epoch": 0.5813889226620097,
"grad_norm": 1.7172939777374268,
"learning_rate": 1.932834148889785e-06,
"loss": 0.452,
"step": 52600
},
{
"epoch": 0.582494224796348,
"grad_norm": 2.7707228660583496,
"learning_rate": 1.924210554810114e-06,
"loss": 0.4213,
"step": 52700
},
{
"epoch": 0.5835995269306865,
"grad_norm": 1.858169436454773,
"learning_rate": 1.9155941895660775e-06,
"loss": 0.4422,
"step": 52800
},
{
"epoch": 0.584704829065025,
"grad_norm": 2.9702155590057373,
"learning_rate": 1.9069851613331363e-06,
"loss": 0.4903,
"step": 52900
},
{
"epoch": 0.5858101311993633,
"grad_norm": 1.5274828672409058,
"learning_rate": 1.8983835781946355e-06,
"loss": 0.4359,
"step": 53000
},
{
"epoch": 0.5869154333337018,
"grad_norm": 1.5798296928405762,
"learning_rate": 1.8897895481404523e-06,
"loss": 0.4666,
"step": 53100
},
{
"epoch": 0.5880207354680402,
"grad_norm": 2.6816885471343994,
"learning_rate": 1.8812031790656365e-06,
"loss": 0.4603,
"step": 53200
},
{
"epoch": 0.5891260376023786,
"grad_norm": 2.24021577835083,
"learning_rate": 1.8726245787690556e-06,
"loss": 0.4434,
"step": 53300
},
{
"epoch": 0.590231339736717,
"grad_norm": 2.0478105545043945,
"learning_rate": 1.8640538549520432e-06,
"loss": 0.4547,
"step": 53400
},
{
"epoch": 0.5913366418710555,
"grad_norm": 2.7488420009613037,
"learning_rate": 1.8554911152170491e-06,
"loss": 0.401,
"step": 53500
},
{
"epoch": 0.5924419440053939,
"grad_norm": 1.8583904504776,
"learning_rate": 1.8469364670662838e-06,
"loss": 0.4229,
"step": 53600
},
{
"epoch": 0.5935472461397323,
"grad_norm": 2.7477619647979736,
"learning_rate": 1.8383900179003678e-06,
"loss": 0.453,
"step": 53700
},
{
"epoch": 0.5946525482740708,
"grad_norm": 2.0758025646209717,
"learning_rate": 1.829851875016993e-06,
"loss": 0.4528,
"step": 53800
},
{
"epoch": 0.5957578504084091,
"grad_norm": 1.2921638488769531,
"learning_rate": 1.8213221456095626e-06,
"loss": 0.45,
"step": 53900
},
{
"epoch": 0.5968631525427476,
"grad_norm": 1.9033405780792236,
"learning_rate": 1.812800936765855e-06,
"loss": 0.4489,
"step": 54000
},
{
"epoch": 0.597968454677086,
"grad_norm": 2.5085136890411377,
"learning_rate": 1.8042883554666733e-06,
"loss": 0.4501,
"step": 54100
},
{
"epoch": 0.5990737568114244,
"grad_norm": 1.3407922983169556,
"learning_rate": 1.7957845085845086e-06,
"loss": 0.4581,
"step": 54200
},
{
"epoch": 0.6001790589457628,
"grad_norm": 1.598039150238037,
"learning_rate": 1.7872895028821902e-06,
"loss": 0.4406,
"step": 54300
},
{
"epoch": 0.6012843610801012,
"grad_norm": 1.5193266868591309,
"learning_rate": 1.7788034450115522e-06,
"loss": 0.4412,
"step": 54400
},
{
"epoch": 0.6023896632144397,
"grad_norm": 2.39776611328125,
"learning_rate": 1.7703264415120912e-06,
"loss": 0.4446,
"step": 54500
},
{
"epoch": 0.6034949653487781,
"grad_norm": 2.233445167541504,
"learning_rate": 1.7618585988096292e-06,
"loss": 0.4512,
"step": 54600
},
{
"epoch": 0.6046002674831165,
"grad_norm": 3.316636323928833,
"learning_rate": 1.7534000232149772e-06,
"loss": 0.4617,
"step": 54700
},
{
"epoch": 0.6057055696174549,
"grad_norm": 1.9188458919525146,
"learning_rate": 1.7449508209226007e-06,
"loss": 0.4551,
"step": 54800
},
{
"epoch": 0.6068108717517934,
"grad_norm": 2.422166109085083,
"learning_rate": 1.7365110980092886e-06,
"loss": 0.4213,
"step": 54900
},
{
"epoch": 0.6079161738861317,
"grad_norm": 1.886583685874939,
"learning_rate": 1.7280809604328175e-06,
"loss": 0.4424,
"step": 55000
},
{
"epoch": 0.6090214760204702,
"grad_norm": 2.0250625610351562,
"learning_rate": 1.7196605140306227e-06,
"loss": 0.4474,
"step": 55100
},
{
"epoch": 0.6101267781548086,
"grad_norm": 1.9184309244155884,
"learning_rate": 1.7112498645184734e-06,
"loss": 0.4483,
"step": 55200
},
{
"epoch": 0.611232080289147,
"grad_norm": 1.7985000610351562,
"learning_rate": 1.7028491174891395e-06,
"loss": 0.4395,
"step": 55300
},
{
"epoch": 0.6123373824234855,
"grad_norm": 2.2696986198425293,
"learning_rate": 1.6944583784110702e-06,
"loss": 0.46,
"step": 55400
},
{
"epoch": 0.6134426845578239,
"grad_norm": 1.9761462211608887,
"learning_rate": 1.6860777526270663e-06,
"loss": 0.4514,
"step": 55500
},
{
"epoch": 0.6145479866921623,
"grad_norm": 1.6298624277114868,
"learning_rate": 1.6777073453529628e-06,
"loss": 0.4339,
"step": 55600
},
{
"epoch": 0.6156532888265007,
"grad_norm": 1.7984713315963745,
"learning_rate": 1.6693472616763023e-06,
"loss": 0.4364,
"step": 55700
},
{
"epoch": 0.6167585909608392,
"grad_norm": 2.747307777404785,
"learning_rate": 1.6609976065550188e-06,
"loss": 0.4817,
"step": 55800
},
{
"epoch": 0.6178638930951775,
"grad_norm": 2.802546739578247,
"learning_rate": 1.6526584848161214e-06,
"loss": 0.4566,
"step": 55900
},
{
"epoch": 0.618969195229516,
"grad_norm": 1.783996820449829,
"learning_rate": 1.644330001154373e-06,
"loss": 0.4595,
"step": 56000
},
{
"epoch": 0.6200744973638544,
"grad_norm": 2.170027494430542,
"learning_rate": 1.6360122601309819e-06,
"loss": 0.4608,
"step": 56100
},
{
"epoch": 0.6211797994981928,
"grad_norm": 1.9390249252319336,
"learning_rate": 1.6277053661722836e-06,
"loss": 0.4632,
"step": 56200
},
{
"epoch": 0.6222851016325313,
"grad_norm": 1.528578281402588,
"learning_rate": 1.6194094235684363e-06,
"loss": 0.4299,
"step": 56300
},
{
"epoch": 0.6233904037668697,
"grad_norm": 2.1283223628997803,
"learning_rate": 1.611124536472104e-06,
"loss": 0.4758,
"step": 56400
},
{
"epoch": 0.6244957059012081,
"grad_norm": 1.7181930541992188,
"learning_rate": 1.6028508088971542e-06,
"loss": 0.4408,
"step": 56500
},
{
"epoch": 0.6256010080355465,
"grad_norm": 1.5925639867782593,
"learning_rate": 1.5945883447173516e-06,
"loss": 0.4125,
"step": 56600
},
{
"epoch": 0.626706310169885,
"grad_norm": 2.1560404300689697,
"learning_rate": 1.5863372476650518e-06,
"loss": 0.4572,
"step": 56700
},
{
"epoch": 0.6278116123042233,
"grad_norm": 1.5837538242340088,
"learning_rate": 1.5780976213298987e-06,
"loss": 0.4234,
"step": 56800
},
{
"epoch": 0.6289169144385618,
"grad_norm": 1.7496099472045898,
"learning_rate": 1.5698695691575278e-06,
"loss": 0.4622,
"step": 56900
},
{
"epoch": 0.6300222165729003,
"grad_norm": 1.950454592704773,
"learning_rate": 1.5616531944482639e-06,
"loss": 0.46,
"step": 57000
},
{
"epoch": 0.6311275187072386,
"grad_norm": 1.873214840888977,
"learning_rate": 1.5534486003558256e-06,
"loss": 0.4349,
"step": 57100
},
{
"epoch": 0.6322328208415771,
"grad_norm": 2.442535877227783,
"learning_rate": 1.5452558898860289e-06,
"loss": 0.4525,
"step": 57200
},
{
"epoch": 0.6333381229759155,
"grad_norm": 2.4935104846954346,
"learning_rate": 1.5370751658954962e-06,
"loss": 0.4348,
"step": 57300
},
{
"epoch": 0.6344434251102539,
"grad_norm": 2.2208077907562256,
"learning_rate": 1.5289065310903642e-06,
"loss": 0.4525,
"step": 57400
},
{
"epoch": 0.6355487272445923,
"grad_norm": 2.645033121109009,
"learning_rate": 1.5207500880249937e-06,
"loss": 0.4303,
"step": 57500
},
{
"epoch": 0.6366540293789308,
"grad_norm": 2.4756534099578857,
"learning_rate": 1.5126059391006806e-06,
"loss": 0.4273,
"step": 57600
},
{
"epoch": 0.6377593315132691,
"grad_norm": 2.156022548675537,
"learning_rate": 1.5044741865643752e-06,
"loss": 0.4363,
"step": 57700
},
{
"epoch": 0.6388646336476076,
"grad_norm": 1.1067718267440796,
"learning_rate": 1.4963549325073937e-06,
"loss": 0.477,
"step": 57800
},
{
"epoch": 0.6399699357819459,
"grad_norm": 2.1002750396728516,
"learning_rate": 1.488248278864139e-06,
"loss": 0.4241,
"step": 57900
},
{
"epoch": 0.6410752379162844,
"grad_norm": 2.1461567878723145,
"learning_rate": 1.4801543274108182e-06,
"loss": 0.461,
"step": 58000
},
{
"epoch": 0.6421805400506229,
"grad_norm": 1.992863655090332,
"learning_rate": 1.4720731797641701e-06,
"loss": 0.4419,
"step": 58100
},
{
"epoch": 0.6432858421849612,
"grad_norm": 1.8167692422866821,
"learning_rate": 1.464004937380184e-06,
"loss": 0.4239,
"step": 58200
},
{
"epoch": 0.6443911443192997,
"grad_norm": 1.0601933002471924,
"learning_rate": 1.4559497015528278e-06,
"loss": 0.4534,
"step": 58300
},
{
"epoch": 0.6454964464536381,
"grad_norm": 1.5626897811889648,
"learning_rate": 1.4479075734127795e-06,
"loss": 0.4109,
"step": 58400
},
{
"epoch": 0.6466017485879765,
"grad_norm": 2.2622973918914795,
"learning_rate": 1.4398786539261515e-06,
"loss": 0.4546,
"step": 58500
},
{
"epoch": 0.6477070507223149,
"grad_norm": 2.4710042476654053,
"learning_rate": 1.4318630438932258e-06,
"loss": 0.4442,
"step": 58600
},
{
"epoch": 0.6488123528566534,
"grad_norm": 2.6686673164367676,
"learning_rate": 1.4238608439471916e-06,
"loss": 0.442,
"step": 58700
},
{
"epoch": 0.6499176549909917,
"grad_norm": 1.9529846906661987,
"learning_rate": 1.4158721545528786e-06,
"loss": 0.4719,
"step": 58800
},
{
"epoch": 0.6510229571253302,
"grad_norm": 1.6578528881072998,
"learning_rate": 1.4078970760054952e-06,
"loss": 0.4729,
"step": 58900
},
{
"epoch": 0.6521282592596687,
"grad_norm": 1.7940270900726318,
"learning_rate": 1.399935708429368e-06,
"loss": 0.4512,
"step": 59000
},
{
"epoch": 0.653233561394007,
"grad_norm": 1.85922372341156,
"learning_rate": 1.3919881517766941e-06,
"loss": 0.4402,
"step": 59100
},
{
"epoch": 0.6543388635283455,
"grad_norm": 2.1098904609680176,
"learning_rate": 1.3840545058262729e-06,
"loss": 0.4497,
"step": 59200
},
{
"epoch": 0.6554441656626839,
"grad_norm": 1.5995895862579346,
"learning_rate": 1.376134870182262e-06,
"loss": 0.4626,
"step": 59300
},
{
"epoch": 0.6565494677970223,
"grad_norm": 1.8691281080245972,
"learning_rate": 1.3682293442729217e-06,
"loss": 0.4674,
"step": 59400
},
{
"epoch": 0.6576547699313607,
"grad_norm": 2.0507023334503174,
"learning_rate": 1.3603380273493769e-06,
"loss": 0.4547,
"step": 59500
},
{
"epoch": 0.6587600720656992,
"grad_norm": 1.5811275243759155,
"learning_rate": 1.3524610184843567e-06,
"loss": 0.4523,
"step": 59600
},
{
"epoch": 0.6598653742000375,
"grad_norm": 1.8390048742294312,
"learning_rate": 1.3445984165709586e-06,
"loss": 0.436,
"step": 59700
},
{
"epoch": 0.660970676334376,
"grad_norm": 2.165388345718384,
"learning_rate": 1.3367503203214078e-06,
"loss": 0.4259,
"step": 59800
},
{
"epoch": 0.6620759784687145,
"grad_norm": 1.9885059595108032,
"learning_rate": 1.3289168282658167e-06,
"loss": 0.4394,
"step": 59900
},
{
"epoch": 0.6631812806030528,
"grad_norm": 0.8709326386451721,
"learning_rate": 1.3210980387509436e-06,
"loss": 0.4507,
"step": 60000
},
{
"epoch": 0.6642865827373913,
"grad_norm": 1.6904494762420654,
"learning_rate": 1.3132940499389634e-06,
"loss": 0.4469,
"step": 60100
},
{
"epoch": 0.6653918848717297,
"grad_norm": 2.0872297286987305,
"learning_rate": 1.3055049598062347e-06,
"loss": 0.4256,
"step": 60200
},
{
"epoch": 0.6664971870060681,
"grad_norm": 3.0559935569763184,
"learning_rate": 1.2977308661420657e-06,
"loss": 0.5023,
"step": 60300
},
{
"epoch": 0.6676024891404065,
"grad_norm": 1.9940212965011597,
"learning_rate": 1.2899718665474913e-06,
"loss": 0.4416,
"step": 60400
},
{
"epoch": 0.668707791274745,
"grad_norm": 1.7937722206115723,
"learning_rate": 1.2822280584340458e-06,
"loss": 0.4676,
"step": 60500
},
{
"epoch": 0.6698130934090833,
"grad_norm": 3.7665975093841553,
"learning_rate": 1.2744995390225378e-06,
"loss": 0.4159,
"step": 60600
},
{
"epoch": 0.6709183955434218,
"grad_norm": 2.6829941272735596,
"learning_rate": 1.2667864053418316e-06,
"loss": 0.4499,
"step": 60700
},
{
"epoch": 0.6720236976777603,
"grad_norm": 3.8452253341674805,
"learning_rate": 1.2590887542276314e-06,
"loss": 0.4391,
"step": 60800
},
{
"epoch": 0.6731289998120986,
"grad_norm": 2.4866082668304443,
"learning_rate": 1.2514066823212623e-06,
"loss": 0.4567,
"step": 60900
},
{
"epoch": 0.6742343019464371,
"grad_norm": 1.9398912191390991,
"learning_rate": 1.2437402860684566e-06,
"loss": 0.479,
"step": 61000
},
{
"epoch": 0.6753396040807755,
"grad_norm": 2.085367202758789,
"learning_rate": 1.2360896617181442e-06,
"loss": 0.441,
"step": 61100
},
{
"epoch": 0.6764449062151139,
"grad_norm": 1.9988934993743896,
"learning_rate": 1.2284549053212461e-06,
"loss": 0.4435,
"step": 61200
},
{
"epoch": 0.6775502083494523,
"grad_norm": 1.8229702711105347,
"learning_rate": 1.2208361127294662e-06,
"loss": 0.4682,
"step": 61300
},
{
"epoch": 0.6786555104837907,
"grad_norm": 2.7625458240509033,
"learning_rate": 1.2132333795940873e-06,
"loss": 0.4731,
"step": 61400
},
{
"epoch": 0.6797608126181292,
"grad_norm": 2.0298068523406982,
"learning_rate": 1.2056468013647699e-06,
"loss": 0.4599,
"step": 61500
},
{
"epoch": 0.6808661147524676,
"grad_norm": 1.9047514200210571,
"learning_rate": 1.1980764732883613e-06,
"loss": 0.4431,
"step": 61600
},
{
"epoch": 0.681971416886806,
"grad_norm": 2.681807041168213,
"learning_rate": 1.1905224904076873e-06,
"loss": 0.4317,
"step": 61700
},
{
"epoch": 0.6830767190211444,
"grad_norm": 1.9497393369674683,
"learning_rate": 1.1829849475603683e-06,
"loss": 0.4383,
"step": 61800
},
{
"epoch": 0.6841820211554829,
"grad_norm": 1.764805555343628,
"learning_rate": 1.1754639393776238e-06,
"loss": 0.4375,
"step": 61900
},
{
"epoch": 0.6852873232898212,
"grad_norm": 1.5404030084609985,
"learning_rate": 1.1679595602830913e-06,
"loss": 0.4419,
"step": 62000
},
{
"epoch": 0.6863926254241597,
"grad_norm": 1.7731199264526367,
"learning_rate": 1.160471904491631e-06,
"loss": 0.4104,
"step": 62100
},
{
"epoch": 0.6874979275584981,
"grad_norm": 2.781113862991333,
"learning_rate": 1.153001066008149e-06,
"loss": 0.4098,
"step": 62200
},
{
"epoch": 0.6886032296928365,
"grad_norm": 2.170764207839966,
"learning_rate": 1.1455471386264164e-06,
"loss": 0.4386,
"step": 62300
},
{
"epoch": 0.689708531827175,
"grad_norm": 1.8785371780395508,
"learning_rate": 1.138110215927893e-06,
"loss": 0.4689,
"step": 62400
},
{
"epoch": 0.6908138339615134,
"grad_norm": 3.2463815212249756,
"learning_rate": 1.1306903912805483e-06,
"loss": 0.5066,
"step": 62500
},
{
"epoch": 0.6919191360958518,
"grad_norm": 1.5964540243148804,
"learning_rate": 1.123287757837691e-06,
"loss": 0.4719,
"step": 62600
},
{
"epoch": 0.6930244382301902,
"grad_norm": 2.1385936737060547,
"learning_rate": 1.1159024085368031e-06,
"loss": 0.4397,
"step": 62700
},
{
"epoch": 0.6941297403645287,
"grad_norm": 1.62234628200531,
"learning_rate": 1.1085344360983696e-06,
"loss": 0.4167,
"step": 62800
},
{
"epoch": 0.695235042498867,
"grad_norm": 2.0470333099365234,
"learning_rate": 1.1011839330247128e-06,
"loss": 0.4526,
"step": 62900
},
{
"epoch": 0.6963403446332055,
"grad_norm": 2.6171181201934814,
"learning_rate": 1.0938509915988362e-06,
"loss": 0.4793,
"step": 63000
},
{
"epoch": 0.697445646767544,
"grad_norm": 2.3599164485931396,
"learning_rate": 1.08653570388326e-06,
"loss": 0.4159,
"step": 63100
},
{
"epoch": 0.6985509489018823,
"grad_norm": 2.1658973693847656,
"learning_rate": 1.079238161718871e-06,
"loss": 0.4399,
"step": 63200
},
{
"epoch": 0.6996562510362208,
"grad_norm": 2.165238618850708,
"learning_rate": 1.0719584567237646e-06,
"loss": 0.4545,
"step": 63300
},
{
"epoch": 0.7007615531705592,
"grad_norm": 1.8751685619354248,
"learning_rate": 1.0646966802920986e-06,
"loss": 0.4699,
"step": 63400
},
{
"epoch": 0.7018668553048976,
"grad_norm": 2.2241878509521484,
"learning_rate": 1.0574529235929424e-06,
"loss": 0.418,
"step": 63500
},
{
"epoch": 0.702972157439236,
"grad_norm": 2.227008104324341,
"learning_rate": 1.050227277569133e-06,
"loss": 0.4435,
"step": 63600
},
{
"epoch": 0.7040774595735745,
"grad_norm": 2.7472541332244873,
"learning_rate": 1.043019832936139e-06,
"loss": 0.48,
"step": 63700
},
{
"epoch": 0.7051827617079128,
"grad_norm": 1.427216649055481,
"learning_rate": 1.0358306801809123e-06,
"loss": 0.4621,
"step": 63800
},
{
"epoch": 0.7062880638422513,
"grad_norm": 2.6720409393310547,
"learning_rate": 1.0286599095607576e-06,
"loss": 0.4494,
"step": 63900
},
{
"epoch": 0.7073933659765897,
"grad_norm": 2.212963342666626,
"learning_rate": 1.021507611102197e-06,
"loss": 0.4605,
"step": 64000
},
{
"epoch": 0.7084986681109281,
"grad_norm": 1.640894889831543,
"learning_rate": 1.014373874599846e-06,
"loss": 0.4313,
"step": 64100
},
{
"epoch": 0.7096039702452666,
"grad_norm": 1.8810545206069946,
"learning_rate": 1.0072587896152769e-06,
"loss": 0.4316,
"step": 64200
},
{
"epoch": 0.710709272379605,
"grad_norm": 2.1144118309020996,
"learning_rate": 1.0001624454758983e-06,
"loss": 0.4435,
"step": 64300
},
{
"epoch": 0.7118145745139434,
"grad_norm": 1.9362212419509888,
"learning_rate": 9.930849312738366e-07,
"loss": 0.4532,
"step": 64400
},
{
"epoch": 0.7129198766482818,
"grad_norm": 2.598273277282715,
"learning_rate": 9.860263358648146e-07,
"loss": 0.4611,
"step": 64500
},
{
"epoch": 0.7140251787826203,
"grad_norm": 2.244027614593506,
"learning_rate": 9.789867478670345e-07,
"loss": 0.4351,
"step": 64600
},
{
"epoch": 0.7151304809169586,
"grad_norm": 2.007619619369507,
"learning_rate": 9.719662556600672e-07,
"loss": 0.4419,
"step": 64700
},
{
"epoch": 0.7162357830512971,
"grad_norm": 2.03373122215271,
"learning_rate": 9.649649473837448e-07,
"loss": 0.4056,
"step": 64800
},
{
"epoch": 0.7173410851856354,
"grad_norm": 2.0532867908477783,
"learning_rate": 9.579829109370506e-07,
"loss": 0.4215,
"step": 64900
},
{
"epoch": 0.7184463873199739,
"grad_norm": 2.224346876144409,
"learning_rate": 9.510202339770164e-07,
"loss": 0.4431,
"step": 65000
},
{
"epoch": 0.7195516894543124,
"grad_norm": 2.053011894226074,
"learning_rate": 9.440770039176212e-07,
"loss": 0.4496,
"step": 65100
},
{
"epoch": 0.7206569915886507,
"grad_norm": 2.328004837036133,
"learning_rate": 9.371533079286976e-07,
"loss": 0.443,
"step": 65200
},
{
"epoch": 0.7217622937229892,
"grad_norm": 1.9584163427352905,
"learning_rate": 9.302492329348348e-07,
"loss": 0.4411,
"step": 65300
},
{
"epoch": 0.7228675958573276,
"grad_norm": 1.6421287059783936,
"learning_rate": 9.233648656142838e-07,
"loss": 0.447,
"step": 65400
},
{
"epoch": 0.723972897991666,
"grad_norm": 2.134143590927124,
"learning_rate": 9.165002923978769e-07,
"loss": 0.4494,
"step": 65500
},
{
"epoch": 0.7250782001260044,
"grad_norm": 2.2968268394470215,
"learning_rate": 9.096555994679346e-07,
"loss": 0.4537,
"step": 65600
},
{
"epoch": 0.7261835022603429,
"grad_norm": 1.4631460905075073,
"learning_rate": 9.028308727571905e-07,
"loss": 0.4112,
"step": 65700
},
{
"epoch": 0.7272888043946812,
"grad_norm": 3.258443593978882,
"learning_rate": 8.960261979477061e-07,
"loss": 0.4292,
"step": 65800
},
{
"epoch": 0.7283941065290197,
"grad_norm": 2.0727250576019287,
"learning_rate": 8.892416604698021e-07,
"loss": 0.4337,
"step": 65900
},
{
"epoch": 0.7294994086633582,
"grad_norm": 2.1423141956329346,
"learning_rate": 8.824773455009777e-07,
"loss": 0.4304,
"step": 66000
},
{
"epoch": 0.7306047107976965,
"grad_norm": 1.4535356760025024,
"learning_rate": 8.757333379648491e-07,
"loss": 0.405,
"step": 66100
},
{
"epoch": 0.731710012932035,
"grad_norm": 1.9360605478286743,
"learning_rate": 8.690097225300789e-07,
"loss": 0.4434,
"step": 66200
},
{
"epoch": 0.7328153150663734,
"grad_norm": 2.19547700881958,
"learning_rate": 8.623065836093131e-07,
"loss": 0.4207,
"step": 66300
},
{
"epoch": 0.7339206172007118,
"grad_norm": 2.0186522006988525,
"learning_rate": 8.556240053581222e-07,
"loss": 0.4634,
"step": 66400
},
{
"epoch": 0.7350259193350502,
"grad_norm": 1.845166563987732,
"learning_rate": 8.489620716739436e-07,
"loss": 0.4466,
"step": 66500
},
{
"epoch": 0.7361312214693887,
"grad_norm": 2.228302001953125,
"learning_rate": 8.423208661950342e-07,
"loss": 0.4612,
"step": 66600
},
{
"epoch": 0.737236523603727,
"grad_norm": 2.429689884185791,
"learning_rate": 8.357004722994105e-07,
"loss": 0.4108,
"step": 66700
},
{
"epoch": 0.7383418257380655,
"grad_norm": 3.2977466583251953,
"learning_rate": 8.291009731038078e-07,
"loss": 0.4497,
"step": 66800
},
{
"epoch": 0.739447127872404,
"grad_norm": 2.6713201999664307,
"learning_rate": 8.22522451462637e-07,
"loss": 0.4041,
"step": 66900
},
{
"epoch": 0.7405524300067423,
"grad_norm": 1.5487697124481201,
"learning_rate": 8.159649899669436e-07,
"loss": 0.4521,
"step": 67000
},
{
"epoch": 0.7416577321410808,
"grad_norm": 2.303757429122925,
"learning_rate": 8.094286709433683e-07,
"loss": 0.444,
"step": 67100
},
{
"epoch": 0.7427630342754192,
"grad_norm": 2.1915831565856934,
"learning_rate": 8.029135764531157e-07,
"loss": 0.4364,
"step": 67200
},
{
"epoch": 0.7438683364097576,
"grad_norm": 1.9223788976669312,
"learning_rate": 7.964197882909252e-07,
"loss": 0.444,
"step": 67300
},
{
"epoch": 0.744973638544096,
"grad_norm": 2.2881598472595215,
"learning_rate": 7.899473879840431e-07,
"loss": 0.4276,
"step": 67400
},
{
"epoch": 0.7460789406784345,
"grad_norm": 1.8012919425964355,
"learning_rate": 7.834964567911956e-07,
"loss": 0.4057,
"step": 67500
},
{
"epoch": 0.7471842428127728,
"grad_norm": 1.6279646158218384,
"learning_rate": 7.770670757015752e-07,
"loss": 0.4643,
"step": 67600
},
{
"epoch": 0.7482895449471113,
"grad_norm": 2.4971320629119873,
"learning_rate": 7.706593254338174e-07,
"loss": 0.4609,
"step": 67700
},
{
"epoch": 0.7493948470814498,
"grad_norm": 1.2119097709655762,
"learning_rate": 7.642732864349927e-07,
"loss": 0.484,
"step": 67800
},
{
"epoch": 0.7505001492157881,
"grad_norm": 1.7218291759490967,
"learning_rate": 7.579090388795923e-07,
"loss": 0.4322,
"step": 67900
},
{
"epoch": 0.7516054513501266,
"grad_norm": 1.814095139503479,
"learning_rate": 7.51566662668525e-07,
"loss": 0.4391,
"step": 68000
},
{
"epoch": 0.752710753484465,
"grad_norm": 1.9664380550384521,
"learning_rate": 7.452462374281111e-07,
"loss": 0.4384,
"step": 68100
},
{
"epoch": 0.7538160556188034,
"grad_norm": 1.8115942478179932,
"learning_rate": 7.389478425090845e-07,
"loss": 0.4358,
"step": 68200
},
{
"epoch": 0.7549213577531418,
"grad_norm": 2.317274570465088,
"learning_rate": 7.326715569855983e-07,
"loss": 0.4266,
"step": 68300
},
{
"epoch": 0.7560266598874802,
"grad_norm": 1.416651964187622,
"learning_rate": 7.264174596542262e-07,
"loss": 0.4613,
"step": 68400
},
{
"epoch": 0.7571319620218186,
"grad_norm": 2.0251598358154297,
"learning_rate": 7.201856290329781e-07,
"loss": 0.4353,
"step": 68500
},
{
"epoch": 0.7582372641561571,
"grad_norm": 2.7883288860321045,
"learning_rate": 7.139761433603148e-07,
"loss": 0.4728,
"step": 68600
},
{
"epoch": 0.7593425662904955,
"grad_norm": 2.3883168697357178,
"learning_rate": 7.077890805941631e-07,
"loss": 0.4496,
"step": 68700
},
{
"epoch": 0.7604478684248339,
"grad_norm": 1.9152491092681885,
"learning_rate": 7.016245184109374e-07,
"loss": 0.4222,
"step": 68800
},
{
"epoch": 0.7615531705591724,
"grad_norm": 1.967631459236145,
"learning_rate": 6.954825342045648e-07,
"loss": 0.4551,
"step": 68900
},
{
"epoch": 0.7626584726935107,
"grad_norm": 5.269169330596924,
"learning_rate": 6.893632050855153e-07,
"loss": 0.4473,
"step": 69000
},
{
"epoch": 0.7637637748278492,
"grad_norm": 2.2106597423553467,
"learning_rate": 6.832666078798319e-07,
"loss": 0.4272,
"step": 69100
},
{
"epoch": 0.7648690769621876,
"grad_norm": 1.336655855178833,
"learning_rate": 6.771928191281657e-07,
"loss": 0.4363,
"step": 69200
},
{
"epoch": 0.765974379096526,
"grad_norm": 2.259783983230591,
"learning_rate": 6.711419150848142e-07,
"loss": 0.4753,
"step": 69300
},
{
"epoch": 0.7670796812308645,
"grad_norm": 2.4219510555267334,
"learning_rate": 6.651139717167684e-07,
"loss": 0.4387,
"step": 69400
},
{
"epoch": 0.7681849833652029,
"grad_norm": 1.4461395740509033,
"learning_rate": 6.591090647027551e-07,
"loss": 0.4333,
"step": 69500
},
{
"epoch": 0.7692902854995413,
"grad_norm": 2.112628221511841,
"learning_rate": 6.531272694322865e-07,
"loss": 0.4432,
"step": 69600
},
{
"epoch": 0.7703955876338797,
"grad_norm": 2.398404121398926,
"learning_rate": 6.471686610047149e-07,
"loss": 0.4178,
"step": 69700
},
{
"epoch": 0.7715008897682182,
"grad_norm": 1.9381033182144165,
"learning_rate": 6.412333142282912e-07,
"loss": 0.4319,
"step": 69800
},
{
"epoch": 0.7726061919025565,
"grad_norm": 2.338209390640259,
"learning_rate": 6.353213036192244e-07,
"loss": 0.4392,
"step": 69900
},
{
"epoch": 0.773711494036895,
"grad_norm": 2.6548027992248535,
"learning_rate": 6.294327034007444e-07,
"loss": 0.46,
"step": 70000
},
{
"epoch": 0.7748167961712334,
"grad_norm": 1.4142146110534668,
"learning_rate": 6.235675875021741e-07,
"loss": 0.4779,
"step": 70100
},
{
"epoch": 0.7759220983055718,
"grad_norm": 2.0672521591186523,
"learning_rate": 6.177260295579962e-07,
"loss": 0.438,
"step": 70200
},
{
"epoch": 0.7770274004399103,
"grad_norm": 2.526472806930542,
"learning_rate": 6.119081029069346e-07,
"loss": 0.4127,
"step": 70300
},
{
"epoch": 0.7781327025742487,
"grad_norm": 1.7942878007888794,
"learning_rate": 6.061138805910272e-07,
"loss": 0.4384,
"step": 70400
},
{
"epoch": 0.7792380047085871,
"grad_norm": 3.063554286956787,
"learning_rate": 6.003434353547158e-07,
"loss": 0.3919,
"step": 70500
},
{
"epoch": 0.7803433068429255,
"grad_norm": 2.0761284828186035,
"learning_rate": 5.945968396439262e-07,
"loss": 0.42,
"step": 70600
},
{
"epoch": 0.781448608977264,
"grad_norm": 2.193068504333496,
"learning_rate": 5.88874165605163e-07,
"loss": 0.4547,
"step": 70700
},
{
"epoch": 0.7825539111116023,
"grad_norm": 1.3570361137390137,
"learning_rate": 5.831754850846039e-07,
"loss": 0.4401,
"step": 70800
},
{
"epoch": 0.7836592132459408,
"grad_norm": 1.9479831457138062,
"learning_rate": 5.775008696271942e-07,
"loss": 0.4558,
"step": 70900
},
{
"epoch": 0.7847645153802792,
"grad_norm": 1.4606367349624634,
"learning_rate": 5.718503904757503e-07,
"loss": 0.4485,
"step": 71000
},
{
"epoch": 0.7858698175146176,
"grad_norm": 1.7804583311080933,
"learning_rate": 5.662241185700684e-07,
"loss": 0.3965,
"step": 71100
},
{
"epoch": 0.7869751196489561,
"grad_norm": 1.787216067314148,
"learning_rate": 5.606221245460297e-07,
"loss": 0.4349,
"step": 71200
},
{
"epoch": 0.7880804217832945,
"grad_norm": 2.5382983684539795,
"learning_rate": 5.550444787347148e-07,
"loss": 0.4296,
"step": 71300
},
{
"epoch": 0.7891857239176329,
"grad_norm": 2.524690866470337,
"learning_rate": 5.494912511615205e-07,
"loss": 0.4599,
"step": 71400
},
{
"epoch": 0.7902910260519713,
"grad_norm": 1.3965719938278198,
"learning_rate": 5.439625115452824e-07,
"loss": 0.4503,
"step": 71500
},
{
"epoch": 0.7913963281863098,
"grad_norm": 1.595763921737671,
"learning_rate": 5.384583292973985e-07,
"loss": 0.4615,
"step": 71600
},
{
"epoch": 0.7925016303206481,
"grad_norm": 1.5032540559768677,
"learning_rate": 5.329787735209566e-07,
"loss": 0.4287,
"step": 71700
},
{
"epoch": 0.7936069324549866,
"grad_norm": 1.8847301006317139,
"learning_rate": 5.275239130098669e-07,
"loss": 0.446,
"step": 71800
},
{
"epoch": 0.7947122345893249,
"grad_norm": 1.5111511945724487,
"learning_rate": 5.220938162480014e-07,
"loss": 0.435,
"step": 71900
},
{
"epoch": 0.7958175367236634,
"grad_norm": 2.1808974742889404,
"learning_rate": 5.166885514083311e-07,
"loss": 0.4365,
"step": 72000
},
{
"epoch": 0.7969228388580019,
"grad_norm": 1.921736240386963,
"learning_rate": 5.113081863520697e-07,
"loss": 0.4746,
"step": 72100
},
{
"epoch": 0.7980281409923402,
"grad_norm": 2.0888705253601074,
"learning_rate": 5.059527886278246e-07,
"loss": 0.4435,
"step": 72200
},
{
"epoch": 0.7991334431266787,
"grad_norm": 2.90547776222229,
"learning_rate": 5.006224254707448e-07,
"loss": 0.464,
"step": 72300
},
{
"epoch": 0.8002387452610171,
"grad_norm": 1.6634081602096558,
"learning_rate": 4.953171638016821e-07,
"loss": 0.4243,
"step": 72400
},
{
"epoch": 0.8013440473953555,
"grad_norm": 1.630812644958496,
"learning_rate": 4.900370702263443e-07,
"loss": 0.3898,
"step": 72500
},
{
"epoch": 0.8024493495296939,
"grad_norm": 2.4027256965637207,
"learning_rate": 4.847822110344664e-07,
"loss": 0.4398,
"step": 72600
},
{
"epoch": 0.8035546516640324,
"grad_norm": 1.9806816577911377,
"learning_rate": 4.795526521989705e-07,
"loss": 0.475,
"step": 72700
},
{
"epoch": 0.8046599537983707,
"grad_norm": 2.0573477745056152,
"learning_rate": 4.743484593751446e-07,
"loss": 0.4239,
"step": 72800
},
{
"epoch": 0.8057652559327092,
"grad_norm": 2.6847050189971924,
"learning_rate": 4.6916969789981477e-07,
"loss": 0.4509,
"step": 72900
},
{
"epoch": 0.8068705580670477,
"grad_norm": 2.843912124633789,
"learning_rate": 4.6401643279052444e-07,
"loss": 0.4605,
"step": 73000
},
{
"epoch": 0.807975860201386,
"grad_norm": 2.673027276992798,
"learning_rate": 4.588887287447188e-07,
"loss": 0.4139,
"step": 73100
},
{
"epoch": 0.8090811623357245,
"grad_norm": 1.7096991539001465,
"learning_rate": 4.5378665013893375e-07,
"loss": 0.4527,
"step": 73200
},
{
"epoch": 0.8101864644700629,
"grad_norm": 1.959112286567688,
"learning_rate": 4.4871026102798755e-07,
"loss": 0.4437,
"step": 73300
},
{
"epoch": 0.8112917666044013,
"grad_norm": 1.4862419366836548,
"learning_rate": 4.436596251441738e-07,
"loss": 0.4287,
"step": 73400
},
{
"epoch": 0.8123970687387397,
"grad_norm": 2.291743278503418,
"learning_rate": 4.3863480589646374e-07,
"loss": 0.4279,
"step": 73500
},
{
"epoch": 0.8135023708730782,
"grad_norm": 2.421630620956421,
"learning_rate": 4.336358663697107e-07,
"loss": 0.4497,
"step": 73600
},
{
"epoch": 0.8146076730074165,
"grad_norm": 2.3377912044525146,
"learning_rate": 4.286628693238576e-07,
"loss": 0.4474,
"step": 73700
},
{
"epoch": 0.815712975141755,
"grad_norm": 2.160400390625,
"learning_rate": 4.237158771931468e-07,
"loss": 0.4472,
"step": 73800
},
{
"epoch": 0.8168182772760935,
"grad_norm": 2.32997465133667,
"learning_rate": 4.187949520853382e-07,
"loss": 0.446,
"step": 73900
},
{
"epoch": 0.8179235794104318,
"grad_norm": 2.2677996158599854,
"learning_rate": 4.139001557809308e-07,
"loss": 0.4408,
"step": 74000
},
{
"epoch": 0.8190288815447703,
"grad_norm": 1.791791558265686,
"learning_rate": 4.090315497323852e-07,
"loss": 0.4721,
"step": 74100
},
{
"epoch": 0.8201341836791087,
"grad_norm": 1.590136170387268,
"learning_rate": 4.041891950633514e-07,
"loss": 0.4389,
"step": 74200
},
{
"epoch": 0.8212394858134471,
"grad_norm": 1.7760423421859741,
"learning_rate": 3.993731525679029e-07,
"loss": 0.4682,
"step": 74300
},
{
"epoch": 0.8223447879477855,
"grad_norm": 1.8399248123168945,
"learning_rate": 3.945834827097736e-07,
"loss": 0.4345,
"step": 74400
},
{
"epoch": 0.823450090082124,
"grad_norm": 2.691328763961792,
"learning_rate": 3.8982024562159854e-07,
"loss": 0.4865,
"step": 74500
},
{
"epoch": 0.8245553922164623,
"grad_norm": 2.113375425338745,
"learning_rate": 3.8508350110415646e-07,
"loss": 0.4288,
"step": 74600
},
{
"epoch": 0.8256606943508008,
"grad_norm": 1.4317853450775146,
"learning_rate": 3.8037330862562393e-07,
"loss": 0.4465,
"step": 74700
},
{
"epoch": 0.8267659964851393,
"grad_norm": 1.591933012008667,
"learning_rate": 3.7568972732082295e-07,
"loss": 0.4131,
"step": 74800
},
{
"epoch": 0.8278712986194776,
"grad_norm": 1.7374714612960815,
"learning_rate": 3.710328159904844e-07,
"loss": 0.4011,
"step": 74900
},
{
"epoch": 0.8289766007538161,
"grad_norm": 2.0382604598999023,
"learning_rate": 3.664026331005044e-07,
"loss": 0.4176,
"step": 75000
},
{
"epoch": 0.8300819028881545,
"grad_norm": 2.3857359886169434,
"learning_rate": 3.6179923678121537e-07,
"loss": 0.4921,
"step": 75100
},
{
"epoch": 0.8311872050224929,
"grad_norm": 2.013730764389038,
"learning_rate": 3.5722268482665107e-07,
"loss": 0.4365,
"step": 75200
},
{
"epoch": 0.8322925071568313,
"grad_norm": 1.714146375656128,
"learning_rate": 3.5267303469382506e-07,
"loss": 0.4353,
"step": 75300
},
{
"epoch": 0.8333978092911697,
"grad_norm": 1.6847208738327026,
"learning_rate": 3.4815034350200893e-07,
"loss": 0.4585,
"step": 75400
},
{
"epoch": 0.8345031114255081,
"grad_norm": 2.0972464084625244,
"learning_rate": 3.4365466803201216e-07,
"loss": 0.441,
"step": 75500
},
{
"epoch": 0.8356084135598466,
"grad_norm": 1.8610143661499023,
"learning_rate": 3.3918606472547136e-07,
"loss": 0.4351,
"step": 75600
},
{
"epoch": 0.836713715694185,
"grad_norm": 2.597923755645752,
"learning_rate": 3.347445896841428e-07,
"loss": 0.4196,
"step": 75700
},
{
"epoch": 0.8378190178285234,
"grad_norm": 1.8498742580413818,
"learning_rate": 3.30330298669197e-07,
"loss": 0.4526,
"step": 75800
},
{
"epoch": 0.8389243199628619,
"grad_norm": 1.8387874364852905,
"learning_rate": 3.259432471005175e-07,
"loss": 0.4287,
"step": 75900
},
{
"epoch": 0.8400296220972002,
"grad_norm": 2.75079083442688,
"learning_rate": 3.215834900560055e-07,
"loss": 0.4486,
"step": 76000
},
{
"epoch": 0.8411349242315387,
"grad_norm": 1.793381690979004,
"learning_rate": 3.1725108227089074e-07,
"loss": 0.4602,
"step": 76100
},
{
"epoch": 0.8422402263658771,
"grad_norm": 1.3438163995742798,
"learning_rate": 3.129460781370422e-07,
"loss": 0.4441,
"step": 76200
},
{
"epoch": 0.8433455285002155,
"grad_norm": 2.8206710815429688,
"learning_rate": 3.0866853170228443e-07,
"loss": 0.3989,
"step": 76300
},
{
"epoch": 0.844450830634554,
"grad_norm": 1.9363433122634888,
"learning_rate": 3.044184966697203e-07,
"loss": 0.4252,
"step": 76400
},
{
"epoch": 0.8455561327688924,
"grad_norm": 2.5586061477661133,
"learning_rate": 3.001960263970577e-07,
"loss": 0.4957,
"step": 76500
},
{
"epoch": 0.8466614349032308,
"grad_norm": 1.9901615381240845,
"learning_rate": 2.960011738959387e-07,
"loss": 0.4629,
"step": 76600
},
{
"epoch": 0.8477667370375692,
"grad_norm": 1.8617513179779053,
"learning_rate": 2.918339918312718e-07,
"loss": 0.4515,
"step": 76700
},
{
"epoch": 0.8488720391719077,
"grad_norm": 1.8503713607788086,
"learning_rate": 2.876945325205754e-07,
"loss": 0.4614,
"step": 76800
},
{
"epoch": 0.849977341306246,
"grad_norm": 2.3590264320373535,
"learning_rate": 2.835828479333164e-07,
"loss": 0.4517,
"step": 76900
},
{
"epoch": 0.8510826434405845,
"grad_norm": 1.9208427667617798,
"learning_rate": 2.7949898969026114e-07,
"loss": 0.4694,
"step": 77000
},
{
"epoch": 0.852187945574923,
"grad_norm": 2.673845052719116,
"learning_rate": 2.754430090628243e-07,
"loss": 0.4379,
"step": 77100
},
{
"epoch": 0.8532932477092613,
"grad_norm": 2.1295111179351807,
"learning_rate": 2.714149569724295e-07,
"loss": 0.4654,
"step": 77200
},
{
"epoch": 0.8543985498435998,
"grad_norm": 2.3107078075408936,
"learning_rate": 2.6741488398986384e-07,
"loss": 0.4267,
"step": 77300
},
{
"epoch": 0.8555038519779382,
"grad_norm": 2.0932328701019287,
"learning_rate": 2.6344284033464976e-07,
"loss": 0.4141,
"step": 77400
},
{
"epoch": 0.8566091541122766,
"grad_norm": 1.246630072593689,
"learning_rate": 2.594988758744088e-07,
"loss": 0.4597,
"step": 77500
},
{
"epoch": 0.857714456246615,
"grad_norm": 1.999973177909851,
"learning_rate": 2.5558304012423954e-07,
"loss": 0.4488,
"step": 77600
},
{
"epoch": 0.8588197583809535,
"grad_norm": 1.827642798423767,
"learning_rate": 2.516953822460935e-07,
"loss": 0.473,
"step": 77700
},
{
"epoch": 0.8599250605152918,
"grad_norm": 2.323723793029785,
"learning_rate": 2.4783595104815954e-07,
"loss": 0.4138,
"step": 77800
},
{
"epoch": 0.8610303626496303,
"grad_norm": 2.2066116333007812,
"learning_rate": 2.440047949842506e-07,
"loss": 0.4466,
"step": 77900
},
{
"epoch": 0.8621356647839687,
"grad_norm": 1.8978465795516968,
"learning_rate": 2.402019621531937e-07,
"loss": 0.4597,
"step": 78000
},
{
"epoch": 0.8632409669183071,
"grad_norm": 1.499747395515442,
"learning_rate": 2.364275002982286e-07,
"loss": 0.4103,
"step": 78100
},
{
"epoch": 0.8643462690526456,
"grad_norm": 1.766528606414795,
"learning_rate": 2.3268145680640758e-07,
"loss": 0.4416,
"step": 78200
},
{
"epoch": 0.865451571186984,
"grad_norm": 2.050598621368408,
"learning_rate": 2.2896387870800034e-07,
"loss": 0.4238,
"step": 78300
},
{
"epoch": 0.8665568733213224,
"grad_norm": 3.147510290145874,
"learning_rate": 2.2527481267590274e-07,
"loss": 0.4561,
"step": 78400
},
{
"epoch": 0.8676621754556608,
"grad_norm": 2.1303939819335938,
"learning_rate": 2.2161430502505133e-07,
"loss": 0.4525,
"step": 78500
},
{
"epoch": 0.8687674775899992,
"grad_norm": 2.706810235977173,
"learning_rate": 2.179824017118437e-07,
"loss": 0.4467,
"step": 78600
},
{
"epoch": 0.8698727797243376,
"grad_norm": 2.3057336807250977,
"learning_rate": 2.1437914833355887e-07,
"loss": 0.4511,
"step": 78700
},
{
"epoch": 0.8709780818586761,
"grad_norm": 2.322817087173462,
"learning_rate": 2.1080459012778636e-07,
"loss": 0.4705,
"step": 78800
},
{
"epoch": 0.8720833839930144,
"grad_norm": 1.8831989765167236,
"learning_rate": 2.0725877197185663e-07,
"loss": 0.445,
"step": 78900
},
{
"epoch": 0.8731886861273529,
"grad_norm": 2.8571081161499023,
"learning_rate": 2.0374173838228013e-07,
"loss": 0.4772,
"step": 79000
},
{
"epoch": 0.8742939882616914,
"grad_norm": 2.4051854610443115,
"learning_rate": 2.0025353351418753e-07,
"loss": 0.4557,
"step": 79100
},
{
"epoch": 0.8753992903960297,
"grad_norm": 1.7439450025558472,
"learning_rate": 1.967942011607732e-07,
"loss": 0.4421,
"step": 79200
},
{
"epoch": 0.8765045925303682,
"grad_norm": 2.514841318130493,
"learning_rate": 1.9336378475274865e-07,
"loss": 0.4508,
"step": 79300
},
{
"epoch": 0.8776098946647066,
"grad_norm": 1.8946666717529297,
"learning_rate": 1.8996232735779496e-07,
"loss": 0.4509,
"step": 79400
},
{
"epoch": 0.878715196799045,
"grad_norm": 1.5840513706207275,
"learning_rate": 1.865898716800238e-07,
"loss": 0.4557,
"step": 79500
},
{
"epoch": 0.8798204989333834,
"grad_norm": 1.824873924255371,
"learning_rate": 1.8324646005943913e-07,
"loss": 0.4662,
"step": 79600
},
{
"epoch": 0.8809258010677219,
"grad_norm": 2.0272133350372314,
"learning_rate": 1.7993213447140807e-07,
"loss": 0.4291,
"step": 79700
},
{
"epoch": 0.8820311032020602,
"grad_norm": 1.1004635095596313,
"learning_rate": 1.766469365261317e-07,
"loss": 0.3973,
"step": 79800
},
{
"epoch": 0.8831364053363987,
"grad_norm": 2.014890193939209,
"learning_rate": 1.7339090746812449e-07,
"loss": 0.45,
"step": 79900
},
{
"epoch": 0.8842417074707372,
"grad_norm": 2.0376179218292236,
"learning_rate": 1.7016408817569606e-07,
"loss": 0.4381,
"step": 80000
},
{
"epoch": 0.8853470096050755,
"grad_norm": 1.6137086153030396,
"learning_rate": 1.6696651916043666e-07,
"loss": 0.4361,
"step": 80100
},
{
"epoch": 0.886452311739414,
"grad_norm": 1.7986013889312744,
"learning_rate": 1.6379824056670934e-07,
"loss": 0.4719,
"step": 80200
},
{
"epoch": 0.8875576138737524,
"grad_norm": 1.8301312923431396,
"learning_rate": 1.6065929217114696e-07,
"loss": 0.4262,
"step": 80300
},
{
"epoch": 0.8886629160080908,
"grad_norm": 2.35886287689209,
"learning_rate": 1.575497133821524e-07,
"loss": 0.4535,
"step": 80400
},
{
"epoch": 0.8897682181424292,
"grad_norm": 1.7016726732254028,
"learning_rate": 1.5446954323940223e-07,
"loss": 0.4294,
"step": 80500
},
{
"epoch": 0.8908735202767677,
"grad_norm": 1.589161992073059,
"learning_rate": 1.5141882041335737e-07,
"loss": 0.4309,
"step": 80600
},
{
"epoch": 0.891978822411106,
"grad_norm": 2.3803720474243164,
"learning_rate": 1.4839758320477958e-07,
"loss": 0.4318,
"step": 80700
},
{
"epoch": 0.8930841245454445,
"grad_norm": 2.638575315475464,
"learning_rate": 1.454058695442484e-07,
"loss": 0.4678,
"step": 80800
},
{
"epoch": 0.894189426679783,
"grad_norm": 1.9479451179504395,
"learning_rate": 1.4244371699168453e-07,
"loss": 0.4264,
"step": 80900
},
{
"epoch": 0.8952947288141213,
"grad_norm": 1.9173952341079712,
"learning_rate": 1.3951116273588e-07,
"loss": 0.4507,
"step": 81000
},
{
"epoch": 0.8964000309484598,
"grad_norm": 1.8866360187530518,
"learning_rate": 1.3660824359403107e-07,
"loss": 0.4359,
"step": 81100
},
{
"epoch": 0.8975053330827982,
"grad_norm": 2.116718053817749,
"learning_rate": 1.3373499601127466e-07,
"loss": 0.4451,
"step": 81200
},
{
"epoch": 0.8986106352171366,
"grad_norm": 2.32564377784729,
"learning_rate": 1.308914560602323e-07,
"loss": 0.4198,
"step": 81300
},
{
"epoch": 0.899715937351475,
"grad_norm": 2.0888161659240723,
"learning_rate": 1.2807765944055528e-07,
"loss": 0.4543,
"step": 81400
},
{
"epoch": 0.9008212394858135,
"grad_norm": 2.4812674522399902,
"learning_rate": 1.2529364147847918e-07,
"loss": 0.4323,
"step": 81500
},
{
"epoch": 0.9019265416201518,
"grad_norm": 1.4540350437164307,
"learning_rate": 1.2253943712637883e-07,
"loss": 0.4429,
"step": 81600
},
{
"epoch": 0.9030318437544903,
"grad_norm": 2.2741010189056396,
"learning_rate": 1.198150809623283e-07,
"loss": 0.4087,
"step": 81700
},
{
"epoch": 0.9041371458888288,
"grad_norm": 1.891856074333191,
"learning_rate": 1.1712060718966967e-07,
"loss": 0.4314,
"step": 81800
},
{
"epoch": 0.9052424480231671,
"grad_norm": 2.013892412185669,
"learning_rate": 1.1445604963658041e-07,
"loss": 0.413,
"step": 81900
},
{
"epoch": 0.9063477501575056,
"grad_norm": 1.5470303297042847,
"learning_rate": 1.1182144175565207e-07,
"loss": 0.4102,
"step": 82000
},
{
"epoch": 0.9074530522918439,
"grad_norm": 2.09853196144104,
"learning_rate": 1.0921681662346695e-07,
"loss": 0.4228,
"step": 82100
},
{
"epoch": 0.9085583544261824,
"grad_norm": 1.8821436166763306,
"learning_rate": 1.0664220694018512e-07,
"loss": 0.4499,
"step": 82200
},
{
"epoch": 0.9096636565605208,
"grad_norm": 2.268958568572998,
"learning_rate": 1.0409764502913311e-07,
"loss": 0.457,
"step": 82300
},
{
"epoch": 0.9107689586948592,
"grad_norm": 2.286543607711792,
"learning_rate": 1.0158316283639807e-07,
"loss": 0.4531,
"step": 82400
},
{
"epoch": 0.9118742608291976,
"grad_norm": 1.7463018894195557,
"learning_rate": 9.909879193042731e-08,
"loss": 0.4182,
"step": 82500
},
{
"epoch": 0.9129795629635361,
"grad_norm": 1.9405850172042847,
"learning_rate": 9.664456350163055e-08,
"loss": 0.4074,
"step": 82600
},
{
"epoch": 0.9140848650978745,
"grad_norm": 1.8400213718414307,
"learning_rate": 9.422050836198904e-08,
"loss": 0.4281,
"step": 82700
},
{
"epoch": 0.9151901672322129,
"grad_norm": 2.0934810638427734,
"learning_rate": 9.182665694467019e-08,
"loss": 0.4394,
"step": 82800
},
{
"epoch": 0.9162954693665514,
"grad_norm": 1.6910539865493774,
"learning_rate": 8.946303930364386e-08,
"loss": 0.4511,
"step": 82900
},
{
"epoch": 0.9174007715008897,
"grad_norm": 1.2215235233306885,
"learning_rate": 8.712968511330439e-08,
"loss": 0.4427,
"step": 83000
},
{
"epoch": 0.9185060736352282,
"grad_norm": 1.4822089672088623,
"learning_rate": 8.482662366809947e-08,
"loss": 0.4029,
"step": 83100
},
{
"epoch": 0.9196113757695666,
"grad_norm": 1.9130114316940308,
"learning_rate": 8.255388388216267e-08,
"loss": 0.4471,
"step": 83200
},
{
"epoch": 0.920716677903905,
"grad_norm": 1.6017576456069946,
"learning_rate": 8.031149428894936e-08,
"loss": 0.449,
"step": 83300
},
{
"epoch": 0.9218219800382434,
"grad_norm": 1.9857609272003174,
"learning_rate": 7.80994830408785e-08,
"loss": 0.4505,
"step": 83400
},
{
"epoch": 0.9229272821725819,
"grad_norm": 1.8383105993270874,
"learning_rate": 7.59178779089792e-08,
"loss": 0.4387,
"step": 83500
},
{
"epoch": 0.9240325843069203,
"grad_norm": 1.5734336376190186,
"learning_rate": 7.376670628254368e-08,
"loss": 0.4456,
"step": 83600
},
{
"epoch": 0.9251378864412587,
"grad_norm": 1.7729212045669556,
"learning_rate": 7.16459951687809e-08,
"loss": 0.4252,
"step": 83700
},
{
"epoch": 0.9262431885755972,
"grad_norm": 2.0925188064575195,
"learning_rate": 6.955577119247909e-08,
"loss": 0.4397,
"step": 83800
},
{
"epoch": 0.9273484907099355,
"grad_norm": 1.6489801406860352,
"learning_rate": 6.749606059567177e-08,
"loss": 0.4241,
"step": 83900
},
{
"epoch": 0.928453792844274,
"grad_norm": 2.122025728225708,
"learning_rate": 6.546688923730587e-08,
"loss": 0.4509,
"step": 84000
},
{
"epoch": 0.9295590949786124,
"grad_norm": 1.5546257495880127,
"learning_rate": 6.346828259292114e-08,
"loss": 0.4283,
"step": 84100
},
{
"epoch": 0.9306643971129508,
"grad_norm": 1.4313548803329468,
"learning_rate": 6.150026575432622e-08,
"loss": 0.4315,
"step": 84200
},
{
"epoch": 0.9317696992472893,
"grad_norm": 2.144721269607544,
"learning_rate": 5.956286342928608e-08,
"loss": 0.4046,
"step": 84300
},
{
"epoch": 0.9328750013816277,
"grad_norm": 1.9185172319412231,
"learning_rate": 5.7656099941210966e-08,
"loss": 0.4762,
"step": 84400
},
{
"epoch": 0.9339803035159661,
"grad_norm": 2.0306639671325684,
"learning_rate": 5.577999922885158e-08,
"loss": 0.4347,
"step": 84500
},
{
"epoch": 0.9350856056503045,
"grad_norm": 2.1696221828460693,
"learning_rate": 5.393458484599823e-08,
"loss": 0.4654,
"step": 84600
},
{
"epoch": 0.936190907784643,
"grad_norm": 3.1747541427612305,
"learning_rate": 5.2119879961184114e-08,
"loss": 0.4361,
"step": 84700
},
{
"epoch": 0.9372962099189813,
"grad_norm": 2.4681639671325684,
"learning_rate": 5.033590735739641e-08,
"loss": 0.4064,
"step": 84800
},
{
"epoch": 0.9384015120533198,
"grad_norm": 2.34089732170105,
"learning_rate": 4.858268943178868e-08,
"loss": 0.4839,
"step": 84900
},
{
"epoch": 0.9395068141876582,
"grad_norm": 2.1032681465148926,
"learning_rate": 4.686024819540058e-08,
"loss": 0.4256,
"step": 85000
},
{
"epoch": 0.9406121163219966,
"grad_norm": 2.1643483638763428,
"learning_rate": 4.5168605272881414e-08,
"loss": 0.4503,
"step": 85100
},
{
"epoch": 0.941717418456335,
"grad_norm": 1.97984778881073,
"learning_rate": 4.350778190221699e-08,
"loss": 0.424,
"step": 85200
},
{
"epoch": 0.9428227205906735,
"grad_norm": 2.1957056522369385,
"learning_rate": 4.187779893446597e-08,
"loss": 0.4226,
"step": 85300
},
{
"epoch": 0.9439280227250119,
"grad_norm": 2.0904030799865723,
"learning_rate": 4.027867683349618e-08,
"loss": 0.4394,
"step": 85400
},
{
"epoch": 0.9450333248593503,
"grad_norm": 1.8033450841903687,
"learning_rate": 3.87104356757273e-08,
"loss": 0.4806,
"step": 85500
},
{
"epoch": 0.9461386269936887,
"grad_norm": 1.6405876874923706,
"learning_rate": 3.717309514988027e-08,
"loss": 0.4618,
"step": 85600
},
{
"epoch": 0.9472439291280271,
"grad_norm": 2.6198575496673584,
"learning_rate": 3.566667455672912e-08,
"loss": 0.4313,
"step": 85700
},
{
"epoch": 0.9483492312623656,
"grad_norm": 1.9811842441558838,
"learning_rate": 3.4191192808858966e-08,
"loss": 0.4057,
"step": 85800
},
{
"epoch": 0.9494545333967039,
"grad_norm": 1.8522582054138184,
"learning_rate": 3.27466684304284e-08,
"loss": 0.4433,
"step": 85900
},
{
"epoch": 0.9505598355310424,
"grad_norm": 2.586599826812744,
"learning_rate": 3.133311955693691e-08,
"loss": 0.414,
"step": 86000
},
{
"epoch": 0.9516651376653809,
"grad_norm": 1.4519222974777222,
"learning_rate": 2.995056393499757e-08,
"loss": 0.4333,
"step": 86100
},
{
"epoch": 0.9527704397997192,
"grad_norm": 2.7613425254821777,
"learning_rate": 2.859901892211442e-08,
"loss": 0.4776,
"step": 86200
},
{
"epoch": 0.9538757419340577,
"grad_norm": 1.8202546834945679,
"learning_rate": 2.7278501486463216e-08,
"loss": 0.4269,
"step": 86300
},
{
"epoch": 0.9549810440683961,
"grad_norm": 2.257310390472412,
"learning_rate": 2.598902820667992e-08,
"loss": 0.4069,
"step": 86400
},
{
"epoch": 0.9560863462027345,
"grad_norm": 2.6993019580841064,
"learning_rate": 2.4730615271651716e-08,
"loss": 0.413,
"step": 86500
},
{
"epoch": 0.9571916483370729,
"grad_norm": 1.3624522686004639,
"learning_rate": 2.3503278480313806e-08,
"loss": 0.4277,
"step": 86600
},
{
"epoch": 0.9582969504714114,
"grad_norm": 1.7707417011260986,
"learning_rate": 2.230703324145156e-08,
"loss": 0.4512,
"step": 86700
},
{
"epoch": 0.9594022526057497,
"grad_norm": 1.9722903966903687,
"learning_rate": 2.1141894573507014e-08,
"loss": 0.4333,
"step": 86800
},
{
"epoch": 0.9605075547400882,
"grad_norm": 2.174100399017334,
"learning_rate": 2.000787710438934e-08,
"loss": 0.473,
"step": 86900
},
{
"epoch": 0.9616128568744267,
"grad_norm": 2.9068939685821533,
"learning_rate": 1.8904995071292455e-08,
"loss": 0.4919,
"step": 87000
},
{
"epoch": 0.962718159008765,
"grad_norm": 2.1874163150787354,
"learning_rate": 1.7833262320515744e-08,
"loss": 0.4501,
"step": 87100
},
{
"epoch": 0.9638234611431035,
"grad_norm": 2.9089388847351074,
"learning_rate": 1.6792692307289747e-08,
"loss": 0.4295,
"step": 87200
},
{
"epoch": 0.9649287632774419,
"grad_norm": 2.2932639122009277,
"learning_rate": 1.578329809560797e-08,
"loss": 0.4246,
"step": 87300
},
{
"epoch": 0.9660340654117803,
"grad_norm": 1.8238743543624878,
"learning_rate": 1.4805092358062822e-08,
"loss": 0.4535,
"step": 87400
},
{
"epoch": 0.9671393675461187,
"grad_norm": 2.673421859741211,
"learning_rate": 1.3858087375686335e-08,
"loss": 0.4606,
"step": 87500
},
{
"epoch": 0.9682446696804572,
"grad_norm": 2.192293405532837,
"learning_rate": 1.2942295037795261e-08,
"loss": 0.4632,
"step": 87600
},
{
"epoch": 0.9693499718147955,
"grad_norm": 2.283832550048828,
"learning_rate": 1.2057726841842865e-08,
"loss": 0.444,
"step": 87700
},
{
"epoch": 0.970455273949134,
"grad_norm": 1.8313320875167847,
"learning_rate": 1.1204393893274878e-08,
"loss": 0.415,
"step": 87800
},
{
"epoch": 0.9715605760834725,
"grad_norm": 1.7791038751602173,
"learning_rate": 1.0382306905388495e-08,
"loss": 0.4252,
"step": 87900
},
{
"epoch": 0.9726658782178108,
"grad_norm": 2.295269250869751,
"learning_rate": 9.591476199199146e-09,
"loss": 0.4614,
"step": 88000
},
{
"epoch": 0.9737711803521493,
"grad_norm": 3.02966046333313,
"learning_rate": 8.831911703310047e-09,
"loss": 0.4545,
"step": 88100
},
{
"epoch": 0.9748764824864877,
"grad_norm": 2.004098653793335,
"learning_rate": 8.103622953789247e-09,
"loss": 0.4399,
"step": 88200
},
{
"epoch": 0.9759817846208261,
"grad_norm": 2.338454008102417,
"learning_rate": 7.406619094047496e-09,
"loss": 0.4413,
"step": 88300
},
{
"epoch": 0.9770870867551645,
"grad_norm": 1.626102089881897,
"learning_rate": 6.740908874725005e-09,
"loss": 0.4362,
"step": 88400
},
{
"epoch": 0.978192388889503,
"grad_norm": 1.9277746677398682,
"learning_rate": 6.106500653581815e-09,
"loss": 0.4365,
"step": 88500
},
{
"epoch": 0.9792976910238413,
"grad_norm": 2.415738105773926,
"learning_rate": 5.503402395391489e-09,
"loss": 0.4642,
"step": 88600
},
{
"epoch": 0.9804029931581798,
"grad_norm": 1.5694254636764526,
"learning_rate": 4.931621671842301e-09,
"loss": 0.441,
"step": 88700
},
{
"epoch": 0.9815082952925183,
"grad_norm": 1.3973413705825806,
"learning_rate": 4.391165661442043e-09,
"loss": 0.4311,
"step": 88800
},
{
"epoch": 0.9826135974268566,
"grad_norm": 1.9460673332214355,
"learning_rate": 3.882041149427251e-09,
"loss": 0.4422,
"step": 88900
},
{
"epoch": 0.9837188995611951,
"grad_norm": 1.6558293104171753,
"learning_rate": 3.404254527678286e-09,
"loss": 0.423,
"step": 89000
},
{
"epoch": 0.9848242016955334,
"grad_norm": 1.9977773427963257,
"learning_rate": 2.957811794639942e-09,
"loss": 0.456,
"step": 89100
},
{
"epoch": 0.9859295038298719,
"grad_norm": 2.308818817138672,
"learning_rate": 2.5427185552448496e-09,
"loss": 0.4504,
"step": 89200
},
{
"epoch": 0.9870348059642103,
"grad_norm": 3.172938108444214,
"learning_rate": 2.158980020843804e-09,
"loss": 0.476,
"step": 89300
},
{
"epoch": 0.9881401080985487,
"grad_norm": 1.7046418190002441,
"learning_rate": 1.8066010091402631e-09,
"loss": 0.4402,
"step": 89400
},
{
"epoch": 0.9892454102328871,
"grad_norm": 2.2551283836364746,
"learning_rate": 1.485585944129564e-09,
"loss": 0.4364,
"step": 89500
},
{
"epoch": 0.9903507123672256,
"grad_norm": 2.2183637619018555,
"learning_rate": 1.1959388560445207e-09,
"loss": 0.4685,
"step": 89600
},
{
"epoch": 0.991456014501564,
"grad_norm": 1.7988057136535645,
"learning_rate": 9.376633813026891e-10,
"loss": 0.445,
"step": 89700
},
{
"epoch": 0.9925613166359024,
"grad_norm": 2.6220619678497314,
"learning_rate": 7.107627624627911e-10,
"loss": 0.4628,
"step": 89800
},
{
"epoch": 0.9936666187702409,
"grad_norm": 2.123908519744873,
"learning_rate": 5.152398481828025e-10,
"loss": 0.4388,
"step": 89900
},
{
"epoch": 0.9947719209045792,
"grad_norm": 2.005134344100952,
"learning_rate": 3.510970931849822e-10,
"loss": 0.4415,
"step": 90000
},
{
"epoch": 0.9958772230389177,
"grad_norm": 2.5748660564422607,
"learning_rate": 2.1833655822423027e-10,
"loss": 0.4225,
"step": 90100
},
{
"epoch": 0.9969825251732561,
"grad_norm": 1.9123564958572388,
"learning_rate": 1.169599100625529e-10,
"loss": 0.4542,
"step": 90200
},
{
"epoch": 0.9980878273075945,
"grad_norm": 2.8671979904174805,
"learning_rate": 4.6968421448523313e-11,
"loss": 0.467,
"step": 90300
},
{
"epoch": 0.999193129441933,
"grad_norm": 1.6871434450149536,
"learning_rate": 8.362971101183448e-12,
"loss": 0.437,
"step": 90400
}
],
"logging_steps": 100,
"max_steps": 90473,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 10000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5.968302960287416e+17,
"train_batch_size": 3,
"trial_name": null,
"trial_params": null
}