gliclass-llama-1.3B-v1.0 / trainer_state.json
Ihor's picture
Upload folder using huggingface_hub
9f43e8d verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0788133053640996,
"eval_steps": 500,
"global_step": 18000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.005993407252022775,
"grad_norm": 0.8397009372711182,
"learning_rate": 5.991611743559017e-07,
"loss": 0.0366,
"step": 100
},
{
"epoch": 0.01198681450404555,
"grad_norm": 0.7360026240348816,
"learning_rate": 1.1983223487118035e-06,
"loss": 0.0143,
"step": 200
},
{
"epoch": 0.017980221756068324,
"grad_norm": 0.26396483182907104,
"learning_rate": 1.7974835230677055e-06,
"loss": 0.0091,
"step": 300
},
{
"epoch": 0.0239736290080911,
"grad_norm": 0.08779273182153702,
"learning_rate": 2.396644697423607e-06,
"loss": 0.0059,
"step": 400
},
{
"epoch": 0.029967036260113874,
"grad_norm": 0.5255675911903381,
"learning_rate": 2.995805871779509e-06,
"loss": 0.0059,
"step": 500
},
{
"epoch": 0.03596044351213665,
"grad_norm": 0.2772226929664612,
"learning_rate": 3.594967046135411e-06,
"loss": 0.005,
"step": 600
},
{
"epoch": 0.041953850764159424,
"grad_norm": 0.29560720920562744,
"learning_rate": 4.194128220491313e-06,
"loss": 0.0041,
"step": 700
},
{
"epoch": 0.0479472580161822,
"grad_norm": 0.4243590235710144,
"learning_rate": 4.793289394847214e-06,
"loss": 0.0038,
"step": 800
},
{
"epoch": 0.05394066526820498,
"grad_norm": 0.12234604358673096,
"learning_rate": 5.392450569203116e-06,
"loss": 0.0033,
"step": 900
},
{
"epoch": 0.05993407252022775,
"grad_norm": 0.17332005500793457,
"learning_rate": 5.991611743559018e-06,
"loss": 0.0045,
"step": 1000
},
{
"epoch": 0.06592747977225052,
"grad_norm": 0.017084548249840736,
"learning_rate": 6.59077291791492e-06,
"loss": 0.0031,
"step": 1100
},
{
"epoch": 0.0719208870242733,
"grad_norm": 0.04909040033817291,
"learning_rate": 7.189934092270822e-06,
"loss": 0.0034,
"step": 1200
},
{
"epoch": 0.07791429427629608,
"grad_norm": 0.03835730627179146,
"learning_rate": 7.789095266626723e-06,
"loss": 0.0028,
"step": 1300
},
{
"epoch": 0.08390770152831885,
"grad_norm": 0.04889771714806557,
"learning_rate": 8.388256440982625e-06,
"loss": 0.0028,
"step": 1400
},
{
"epoch": 0.08990110878034162,
"grad_norm": 0.1031421571969986,
"learning_rate": 8.987417615338527e-06,
"loss": 0.003,
"step": 1500
},
{
"epoch": 0.0958945160323644,
"grad_norm": 0.11215908825397491,
"learning_rate": 9.586578789694428e-06,
"loss": 0.0027,
"step": 1600
},
{
"epoch": 0.10188792328438717,
"grad_norm": 0.1708650439977646,
"learning_rate": 9.99022112867102e-06,
"loss": 0.0025,
"step": 1700
},
{
"epoch": 0.10788133053640996,
"grad_norm": 0.01850762963294983,
"learning_rate": 9.958676382448504e-06,
"loss": 0.0025,
"step": 1800
},
{
"epoch": 0.11387473778843273,
"grad_norm": 0.10600468516349792,
"learning_rate": 9.927131636225988e-06,
"loss": 0.0025,
"step": 1900
},
{
"epoch": 0.1198681450404555,
"grad_norm": 0.16077758371829987,
"learning_rate": 9.895586890003471e-06,
"loss": 0.0027,
"step": 2000
},
{
"epoch": 0.12586155229247828,
"grad_norm": 0.3142828047275543,
"learning_rate": 9.864042143780953e-06,
"loss": 0.0022,
"step": 2100
},
{
"epoch": 0.13185495954450105,
"grad_norm": 0.18406708538532257,
"learning_rate": 9.832497397558437e-06,
"loss": 0.0025,
"step": 2200
},
{
"epoch": 0.13784836679652382,
"grad_norm": 0.10599557310342789,
"learning_rate": 9.80095265133592e-06,
"loss": 0.0027,
"step": 2300
},
{
"epoch": 0.1438417740485466,
"grad_norm": 0.041681960225105286,
"learning_rate": 9.769407905113404e-06,
"loss": 0.0027,
"step": 2400
},
{
"epoch": 0.1498351813005694,
"grad_norm": 0.26586103439331055,
"learning_rate": 9.737863158890888e-06,
"loss": 0.0031,
"step": 2500
},
{
"epoch": 0.15582858855259216,
"grad_norm": 0.1568969488143921,
"learning_rate": 9.70631841266837e-06,
"loss": 0.0025,
"step": 2600
},
{
"epoch": 0.16182199580461493,
"grad_norm": 0.09259970486164093,
"learning_rate": 9.674773666445855e-06,
"loss": 0.0023,
"step": 2700
},
{
"epoch": 0.1678154030566377,
"grad_norm": 0.03380216658115387,
"learning_rate": 9.643228920223337e-06,
"loss": 0.0022,
"step": 2800
},
{
"epoch": 0.17380881030866047,
"grad_norm": 0.18946796655654907,
"learning_rate": 9.611684174000821e-06,
"loss": 0.0025,
"step": 2900
},
{
"epoch": 0.17980221756068324,
"grad_norm": 0.3344770073890686,
"learning_rate": 9.580139427778305e-06,
"loss": 0.0021,
"step": 3000
},
{
"epoch": 0.18579562481270603,
"grad_norm": 0.04218849539756775,
"learning_rate": 9.548594681555787e-06,
"loss": 0.0024,
"step": 3100
},
{
"epoch": 0.1917890320647288,
"grad_norm": 0.0481434129178524,
"learning_rate": 9.517049935333272e-06,
"loss": 0.0027,
"step": 3200
},
{
"epoch": 0.19778243931675157,
"grad_norm": 0.32030656933784485,
"learning_rate": 9.485505189110754e-06,
"loss": 0.0025,
"step": 3300
},
{
"epoch": 0.20377584656877434,
"grad_norm": 0.19509385526180267,
"learning_rate": 9.453960442888238e-06,
"loss": 0.0022,
"step": 3400
},
{
"epoch": 0.2097692538207971,
"grad_norm": 0.08745113760232925,
"learning_rate": 9.422415696665721e-06,
"loss": 0.0026,
"step": 3500
},
{
"epoch": 0.2157626610728199,
"grad_norm": 0.11743105947971344,
"learning_rate": 9.390870950443205e-06,
"loss": 0.0021,
"step": 3600
},
{
"epoch": 0.22175606832484268,
"grad_norm": 0.1497587114572525,
"learning_rate": 9.359326204220689e-06,
"loss": 0.0026,
"step": 3700
},
{
"epoch": 0.22774947557686545,
"grad_norm": 0.07227639853954315,
"learning_rate": 9.32778145799817e-06,
"loss": 0.0024,
"step": 3800
},
{
"epoch": 0.23374288282888822,
"grad_norm": 0.022099023684859276,
"learning_rate": 9.296236711775654e-06,
"loss": 0.0019,
"step": 3900
},
{
"epoch": 0.239736290080911,
"grad_norm": 0.09603813290596008,
"learning_rate": 9.264691965553138e-06,
"loss": 0.0019,
"step": 4000
},
{
"epoch": 0.24572969733293376,
"grad_norm": 0.09311718493700027,
"learning_rate": 9.233147219330622e-06,
"loss": 0.002,
"step": 4100
},
{
"epoch": 0.25172310458495656,
"grad_norm": 0.06892485171556473,
"learning_rate": 9.201602473108105e-06,
"loss": 0.0022,
"step": 4200
},
{
"epoch": 0.2577165118369793,
"grad_norm": 0.2696809470653534,
"learning_rate": 9.170057726885589e-06,
"loss": 0.0024,
"step": 4300
},
{
"epoch": 0.2637099190890021,
"grad_norm": 0.12481023371219635,
"learning_rate": 9.138512980663071e-06,
"loss": 0.0021,
"step": 4400
},
{
"epoch": 0.2697033263410249,
"grad_norm": 0.029085570946335793,
"learning_rate": 9.106968234440555e-06,
"loss": 0.0025,
"step": 4500
},
{
"epoch": 0.27569673359304764,
"grad_norm": 0.16772325336933136,
"learning_rate": 9.075423488218038e-06,
"loss": 0.0019,
"step": 4600
},
{
"epoch": 0.28169014084507044,
"grad_norm": 0.25038984417915344,
"learning_rate": 9.04387874199552e-06,
"loss": 0.0022,
"step": 4700
},
{
"epoch": 0.2876835480970932,
"grad_norm": 0.009772785007953644,
"learning_rate": 9.012333995773006e-06,
"loss": 0.002,
"step": 4800
},
{
"epoch": 0.293676955349116,
"grad_norm": 0.10010802745819092,
"learning_rate": 8.980789249550487e-06,
"loss": 0.0021,
"step": 4900
},
{
"epoch": 0.2996703626011388,
"grad_norm": 0.019169898703694344,
"learning_rate": 8.949244503327971e-06,
"loss": 0.0024,
"step": 5000
},
{
"epoch": 0.3056637698531615,
"grad_norm": 0.039739012718200684,
"learning_rate": 8.917699757105455e-06,
"loss": 0.0022,
"step": 5100
},
{
"epoch": 0.3116571771051843,
"grad_norm": 0.20961305499076843,
"learning_rate": 8.886155010882938e-06,
"loss": 0.0021,
"step": 5200
},
{
"epoch": 0.31765058435720706,
"grad_norm": 0.07605484127998352,
"learning_rate": 8.854610264660422e-06,
"loss": 0.002,
"step": 5300
},
{
"epoch": 0.32364399160922985,
"grad_norm": 0.01589258573949337,
"learning_rate": 8.823065518437904e-06,
"loss": 0.0022,
"step": 5400
},
{
"epoch": 0.3296373988612526,
"grad_norm": 0.10248999297618866,
"learning_rate": 8.791520772215388e-06,
"loss": 0.0023,
"step": 5500
},
{
"epoch": 0.3356308061132754,
"grad_norm": 0.09167122095823288,
"learning_rate": 8.759976025992871e-06,
"loss": 0.002,
"step": 5600
},
{
"epoch": 0.3416242133652982,
"grad_norm": 0.23392055928707123,
"learning_rate": 8.728431279770355e-06,
"loss": 0.0021,
"step": 5700
},
{
"epoch": 0.34761762061732093,
"grad_norm": 0.040714360773563385,
"learning_rate": 8.696886533547839e-06,
"loss": 0.0025,
"step": 5800
},
{
"epoch": 0.35361102786934373,
"grad_norm": 0.184820294380188,
"learning_rate": 8.665341787325322e-06,
"loss": 0.0021,
"step": 5900
},
{
"epoch": 0.3596044351213665,
"grad_norm": 0.04772236570715904,
"learning_rate": 8.633797041102804e-06,
"loss": 0.0022,
"step": 6000
},
{
"epoch": 0.36559784237338927,
"grad_norm": 0.12407626956701279,
"learning_rate": 8.60225229488029e-06,
"loss": 0.0018,
"step": 6100
},
{
"epoch": 0.37159124962541207,
"grad_norm": 0.1552393138408661,
"learning_rate": 8.570707548657772e-06,
"loss": 0.0024,
"step": 6200
},
{
"epoch": 0.3775846568774348,
"grad_norm": 0.005017109680920839,
"learning_rate": 8.539162802435255e-06,
"loss": 0.0022,
"step": 6300
},
{
"epoch": 0.3835780641294576,
"grad_norm": 0.00316947465762496,
"learning_rate": 8.507618056212739e-06,
"loss": 0.0021,
"step": 6400
},
{
"epoch": 0.38957147138148035,
"grad_norm": 0.08644753694534302,
"learning_rate": 8.476073309990221e-06,
"loss": 0.0016,
"step": 6500
},
{
"epoch": 0.39556487863350315,
"grad_norm": 0.23877011239528656,
"learning_rate": 8.444528563767705e-06,
"loss": 0.0023,
"step": 6600
},
{
"epoch": 0.40155828588552595,
"grad_norm": 0.12397243827581406,
"learning_rate": 8.412983817545188e-06,
"loss": 0.002,
"step": 6700
},
{
"epoch": 0.4075516931375487,
"grad_norm": 0.08488207310438156,
"learning_rate": 8.381439071322672e-06,
"loss": 0.002,
"step": 6800
},
{
"epoch": 0.4135451003895715,
"grad_norm": 0.15658150613307953,
"learning_rate": 8.349894325100156e-06,
"loss": 0.0021,
"step": 6900
},
{
"epoch": 0.4195385076415942,
"grad_norm": 0.09054456651210785,
"learning_rate": 8.31834957887764e-06,
"loss": 0.0022,
"step": 7000
},
{
"epoch": 0.425531914893617,
"grad_norm": 0.1383715718984604,
"learning_rate": 8.286804832655121e-06,
"loss": 0.0019,
"step": 7100
},
{
"epoch": 0.4315253221456398,
"grad_norm": 0.23421403765678406,
"learning_rate": 8.255260086432605e-06,
"loss": 0.0021,
"step": 7200
},
{
"epoch": 0.43751872939766256,
"grad_norm": 0.07612959295511246,
"learning_rate": 8.223715340210089e-06,
"loss": 0.0018,
"step": 7300
},
{
"epoch": 0.44351213664968536,
"grad_norm": 0.08813223987817764,
"learning_rate": 8.192170593987572e-06,
"loss": 0.0028,
"step": 7400
},
{
"epoch": 0.4495055439017081,
"grad_norm": 0.11603320389986038,
"learning_rate": 8.160625847765056e-06,
"loss": 0.0021,
"step": 7500
},
{
"epoch": 0.4554989511537309,
"grad_norm": 0.06462118774652481,
"learning_rate": 8.129081101542538e-06,
"loss": 0.0021,
"step": 7600
},
{
"epoch": 0.46149235840575364,
"grad_norm": 0.08253411203622818,
"learning_rate": 8.097536355320023e-06,
"loss": 0.0019,
"step": 7700
},
{
"epoch": 0.46748576565777644,
"grad_norm": 0.017711922526359558,
"learning_rate": 8.065991609097505e-06,
"loss": 0.0018,
"step": 7800
},
{
"epoch": 0.47347917290979924,
"grad_norm": 0.16423271596431732,
"learning_rate": 8.034446862874989e-06,
"loss": 0.0021,
"step": 7900
},
{
"epoch": 0.479472580161822,
"grad_norm": 0.17104622721672058,
"learning_rate": 8.002902116652473e-06,
"loss": 0.0022,
"step": 8000
},
{
"epoch": 0.4854659874138448,
"grad_norm": 0.11236003786325455,
"learning_rate": 7.971357370429955e-06,
"loss": 0.002,
"step": 8100
},
{
"epoch": 0.4914593946658675,
"grad_norm": 0.004910625517368317,
"learning_rate": 7.93981262420744e-06,
"loss": 0.0017,
"step": 8200
},
{
"epoch": 0.4974528019178903,
"grad_norm": 0.015166868455708027,
"learning_rate": 7.908267877984922e-06,
"loss": 0.0016,
"step": 8300
},
{
"epoch": 0.5034462091699131,
"grad_norm": 0.04219336435198784,
"learning_rate": 7.876723131762406e-06,
"loss": 0.0019,
"step": 8400
},
{
"epoch": 0.5094396164219359,
"grad_norm": 0.08096965402364731,
"learning_rate": 7.84517838553989e-06,
"loss": 0.002,
"step": 8500
},
{
"epoch": 0.5154330236739586,
"grad_norm": 0.27304044365882874,
"learning_rate": 7.813633639317373e-06,
"loss": 0.002,
"step": 8600
},
{
"epoch": 0.5214264309259814,
"grad_norm": 0.023843977600336075,
"learning_rate": 7.782088893094857e-06,
"loss": 0.0021,
"step": 8700
},
{
"epoch": 0.5274198381780042,
"grad_norm": 0.06996838003396988,
"learning_rate": 7.750544146872338e-06,
"loss": 0.0016,
"step": 8800
},
{
"epoch": 0.533413245430027,
"grad_norm": 0.09238845109939575,
"learning_rate": 7.718999400649822e-06,
"loss": 0.0017,
"step": 8900
},
{
"epoch": 0.5394066526820498,
"grad_norm": 0.031245483085513115,
"learning_rate": 7.687454654427306e-06,
"loss": 0.0019,
"step": 9000
},
{
"epoch": 0.5454000599340725,
"grad_norm": 0.02232646569609642,
"learning_rate": 7.65590990820479e-06,
"loss": 0.0022,
"step": 9100
},
{
"epoch": 0.5513934671860953,
"grad_norm": 0.18810293078422546,
"learning_rate": 7.624365161982272e-06,
"loss": 0.002,
"step": 9200
},
{
"epoch": 0.5573868744381181,
"grad_norm": 0.04845254495739937,
"learning_rate": 7.592820415759756e-06,
"loss": 0.0021,
"step": 9300
},
{
"epoch": 0.5633802816901409,
"grad_norm": 0.12073975801467896,
"learning_rate": 7.561275669537239e-06,
"loss": 0.0021,
"step": 9400
},
{
"epoch": 0.5693736889421637,
"grad_norm": 0.03330647572875023,
"learning_rate": 7.529730923314722e-06,
"loss": 0.002,
"step": 9500
},
{
"epoch": 0.5753670961941864,
"grad_norm": 0.23292703926563263,
"learning_rate": 7.498186177092206e-06,
"loss": 0.0018,
"step": 9600
},
{
"epoch": 0.5813605034462092,
"grad_norm": 0.3227817118167877,
"learning_rate": 7.466641430869689e-06,
"loss": 0.0017,
"step": 9700
},
{
"epoch": 0.587353910698232,
"grad_norm": 0.03530238941311836,
"learning_rate": 7.4350966846471726e-06,
"loss": 0.0023,
"step": 9800
},
{
"epoch": 0.5933473179502547,
"grad_norm": 0.1631837785243988,
"learning_rate": 7.403551938424655e-06,
"loss": 0.0019,
"step": 9900
},
{
"epoch": 0.5993407252022775,
"grad_norm": 0.11341429501771927,
"learning_rate": 7.37200719220214e-06,
"loss": 0.0018,
"step": 10000
},
{
"epoch": 0.6053341324543002,
"grad_norm": 0.19524067640304565,
"learning_rate": 7.340462445979623e-06,
"loss": 0.0021,
"step": 10100
},
{
"epoch": 0.611327539706323,
"grad_norm": 0.058198366314172745,
"learning_rate": 7.308917699757106e-06,
"loss": 0.0018,
"step": 10200
},
{
"epoch": 0.6173209469583458,
"grad_norm": 0.02788078971207142,
"learning_rate": 7.277372953534589e-06,
"loss": 0.0014,
"step": 10300
},
{
"epoch": 0.6233143542103686,
"grad_norm": 0.07168685644865036,
"learning_rate": 7.245828207312072e-06,
"loss": 0.0017,
"step": 10400
},
{
"epoch": 0.6293077614623914,
"grad_norm": 0.07542666047811508,
"learning_rate": 7.2142834610895565e-06,
"loss": 0.0022,
"step": 10500
},
{
"epoch": 0.6353011687144141,
"grad_norm": 0.1050957664847374,
"learning_rate": 7.182738714867039e-06,
"loss": 0.0017,
"step": 10600
},
{
"epoch": 0.6412945759664369,
"grad_norm": 0.02330237440764904,
"learning_rate": 7.151193968644523e-06,
"loss": 0.002,
"step": 10700
},
{
"epoch": 0.6472879832184597,
"grad_norm": 0.019814783707261086,
"learning_rate": 7.119649222422006e-06,
"loss": 0.0019,
"step": 10800
},
{
"epoch": 0.6532813904704825,
"grad_norm": 0.041212160140275955,
"learning_rate": 7.08810447619949e-06,
"loss": 0.0022,
"step": 10900
},
{
"epoch": 0.6592747977225052,
"grad_norm": 0.104148730635643,
"learning_rate": 7.056559729976973e-06,
"loss": 0.0017,
"step": 11000
},
{
"epoch": 0.665268204974528,
"grad_norm": 0.060578759759664536,
"learning_rate": 7.025014983754457e-06,
"loss": 0.0019,
"step": 11100
},
{
"epoch": 0.6712616122265508,
"grad_norm": 0.014108662493526936,
"learning_rate": 6.99347023753194e-06,
"loss": 0.002,
"step": 11200
},
{
"epoch": 0.6772550194785736,
"grad_norm": 0.06860730797052383,
"learning_rate": 6.9619254913094224e-06,
"loss": 0.0018,
"step": 11300
},
{
"epoch": 0.6832484267305964,
"grad_norm": 0.2818455696105957,
"learning_rate": 6.930380745086906e-06,
"loss": 0.0015,
"step": 11400
},
{
"epoch": 0.6892418339826191,
"grad_norm": 0.09976188093423843,
"learning_rate": 6.89883599886439e-06,
"loss": 0.0017,
"step": 11500
},
{
"epoch": 0.6952352412346419,
"grad_norm": 0.04988027364015579,
"learning_rate": 6.8672912526418734e-06,
"loss": 0.0016,
"step": 11600
},
{
"epoch": 0.7012286484866647,
"grad_norm": 0.061295535415410995,
"learning_rate": 6.835746506419356e-06,
"loss": 0.0016,
"step": 11700
},
{
"epoch": 0.7072220557386875,
"grad_norm": 0.04820416495203972,
"learning_rate": 6.80420176019684e-06,
"loss": 0.0017,
"step": 11800
},
{
"epoch": 0.7132154629907103,
"grad_norm": 0.08933009207248688,
"learning_rate": 6.772657013974323e-06,
"loss": 0.002,
"step": 11900
},
{
"epoch": 0.719208870242733,
"grad_norm": 0.057753268629312515,
"learning_rate": 6.7411122677518055e-06,
"loss": 0.0018,
"step": 12000
},
{
"epoch": 0.7252022774947557,
"grad_norm": 0.020321357995271683,
"learning_rate": 6.70956752152929e-06,
"loss": 0.0017,
"step": 12100
},
{
"epoch": 0.7311956847467785,
"grad_norm": 0.258957177400589,
"learning_rate": 6.678022775306773e-06,
"loss": 0.0019,
"step": 12200
},
{
"epoch": 0.7371890919988013,
"grad_norm": 0.1562880277633667,
"learning_rate": 6.6464780290842565e-06,
"loss": 0.002,
"step": 12300
},
{
"epoch": 0.7431824992508241,
"grad_norm": 0.0703672245144844,
"learning_rate": 6.614933282861739e-06,
"loss": 0.0018,
"step": 12400
},
{
"epoch": 0.7491759065028468,
"grad_norm": 0.015919741243124008,
"learning_rate": 6.583388536639224e-06,
"loss": 0.0018,
"step": 12500
},
{
"epoch": 0.7551693137548696,
"grad_norm": 0.06606917828321457,
"learning_rate": 6.551843790416707e-06,
"loss": 0.0022,
"step": 12600
},
{
"epoch": 0.7611627210068924,
"grad_norm": 0.1327201873064041,
"learning_rate": 6.52029904419419e-06,
"loss": 0.002,
"step": 12700
},
{
"epoch": 0.7671561282589152,
"grad_norm": 0.10167068988084793,
"learning_rate": 6.488754297971673e-06,
"loss": 0.0018,
"step": 12800
},
{
"epoch": 0.773149535510938,
"grad_norm": 0.20014306902885437,
"learning_rate": 6.457209551749156e-06,
"loss": 0.0019,
"step": 12900
},
{
"epoch": 0.7791429427629607,
"grad_norm": 0.10611408203840256,
"learning_rate": 6.4256648055266405e-06,
"loss": 0.0016,
"step": 13000
},
{
"epoch": 0.7851363500149835,
"grad_norm": 0.004227208439260721,
"learning_rate": 6.394120059304123e-06,
"loss": 0.0018,
"step": 13100
},
{
"epoch": 0.7911297572670063,
"grad_norm": 0.04251255840063095,
"learning_rate": 6.362575313081607e-06,
"loss": 0.0022,
"step": 13200
},
{
"epoch": 0.7971231645190291,
"grad_norm": 0.09611974656581879,
"learning_rate": 6.33103056685909e-06,
"loss": 0.0019,
"step": 13300
},
{
"epoch": 0.8031165717710519,
"grad_norm": 0.060009848326444626,
"learning_rate": 6.299485820636574e-06,
"loss": 0.0019,
"step": 13400
},
{
"epoch": 0.8091099790230746,
"grad_norm": 0.027135098353028297,
"learning_rate": 6.267941074414057e-06,
"loss": 0.0016,
"step": 13500
},
{
"epoch": 0.8151033862750974,
"grad_norm": 0.09115968644618988,
"learning_rate": 6.236396328191541e-06,
"loss": 0.0017,
"step": 13600
},
{
"epoch": 0.8210967935271202,
"grad_norm": 0.3819001317024231,
"learning_rate": 6.204851581969024e-06,
"loss": 0.0019,
"step": 13700
},
{
"epoch": 0.827090200779143,
"grad_norm": 0.07268409430980682,
"learning_rate": 6.173306835746506e-06,
"loss": 0.002,
"step": 13800
},
{
"epoch": 0.8330836080311658,
"grad_norm": 0.1490897685289383,
"learning_rate": 6.14176208952399e-06,
"loss": 0.0015,
"step": 13900
},
{
"epoch": 0.8390770152831885,
"grad_norm": 0.07468798011541367,
"learning_rate": 6.110217343301474e-06,
"loss": 0.0017,
"step": 14000
},
{
"epoch": 0.8450704225352113,
"grad_norm": 0.045000866055488586,
"learning_rate": 6.078672597078957e-06,
"loss": 0.0019,
"step": 14100
},
{
"epoch": 0.851063829787234,
"grad_norm": 0.22245222330093384,
"learning_rate": 6.04712785085644e-06,
"loss": 0.0015,
"step": 14200
},
{
"epoch": 0.8570572370392568,
"grad_norm": 0.09135129302740097,
"learning_rate": 6.015583104633924e-06,
"loss": 0.002,
"step": 14300
},
{
"epoch": 0.8630506442912796,
"grad_norm": 0.043701499700546265,
"learning_rate": 5.984038358411407e-06,
"loss": 0.0017,
"step": 14400
},
{
"epoch": 0.8690440515433023,
"grad_norm": 0.1364869773387909,
"learning_rate": 5.9524936121888895e-06,
"loss": 0.0019,
"step": 14500
},
{
"epoch": 0.8750374587953251,
"grad_norm": 0.08669265359640121,
"learning_rate": 5.920948865966374e-06,
"loss": 0.002,
"step": 14600
},
{
"epoch": 0.8810308660473479,
"grad_norm": 0.00844608899205923,
"learning_rate": 5.889404119743857e-06,
"loss": 0.0016,
"step": 14700
},
{
"epoch": 0.8870242732993707,
"grad_norm": 0.027935262769460678,
"learning_rate": 5.8578593735213405e-06,
"loss": 0.0018,
"step": 14800
},
{
"epoch": 0.8930176805513935,
"grad_norm": 0.0481196753680706,
"learning_rate": 5.826314627298823e-06,
"loss": 0.0019,
"step": 14900
},
{
"epoch": 0.8990110878034162,
"grad_norm": 0.021947329863905907,
"learning_rate": 5.794769881076308e-06,
"loss": 0.0015,
"step": 15000
},
{
"epoch": 0.905004495055439,
"grad_norm": 0.08527759462594986,
"learning_rate": 5.763225134853791e-06,
"loss": 0.0017,
"step": 15100
},
{
"epoch": 0.9109979023074618,
"grad_norm": 0.021068023517727852,
"learning_rate": 5.731680388631274e-06,
"loss": 0.0018,
"step": 15200
},
{
"epoch": 0.9169913095594846,
"grad_norm": 0.08113428950309753,
"learning_rate": 5.700135642408757e-06,
"loss": 0.0017,
"step": 15300
},
{
"epoch": 0.9229847168115073,
"grad_norm": 0.10709325969219208,
"learning_rate": 5.66859089618624e-06,
"loss": 0.0015,
"step": 15400
},
{
"epoch": 0.9289781240635301,
"grad_norm": 0.08009694516658783,
"learning_rate": 5.6370461499637244e-06,
"loss": 0.0016,
"step": 15500
},
{
"epoch": 0.9349715313155529,
"grad_norm": 0.03613545373082161,
"learning_rate": 5.605501403741207e-06,
"loss": 0.0017,
"step": 15600
},
{
"epoch": 0.9409649385675757,
"grad_norm": 0.06710252165794373,
"learning_rate": 5.573956657518691e-06,
"loss": 0.0018,
"step": 15700
},
{
"epoch": 0.9469583458195985,
"grad_norm": 0.09847810864448547,
"learning_rate": 5.542411911296174e-06,
"loss": 0.0014,
"step": 15800
},
{
"epoch": 0.9529517530716212,
"grad_norm": 0.011624569073319435,
"learning_rate": 5.510867165073658e-06,
"loss": 0.0016,
"step": 15900
},
{
"epoch": 0.958945160323644,
"grad_norm": 0.06741365045309067,
"learning_rate": 5.479322418851141e-06,
"loss": 0.0015,
"step": 16000
},
{
"epoch": 0.9649385675756668,
"grad_norm": 0.021546615287661552,
"learning_rate": 5.447777672628625e-06,
"loss": 0.0017,
"step": 16100
},
{
"epoch": 0.9709319748276896,
"grad_norm": 0.1303360015153885,
"learning_rate": 5.4162329264061075e-06,
"loss": 0.0018,
"step": 16200
},
{
"epoch": 0.9769253820797124,
"grad_norm": 0.10070718824863434,
"learning_rate": 5.38468818018359e-06,
"loss": 0.0018,
"step": 16300
},
{
"epoch": 0.982918789331735,
"grad_norm": 0.08305861055850983,
"learning_rate": 5.353143433961074e-06,
"loss": 0.0016,
"step": 16400
},
{
"epoch": 0.9889121965837578,
"grad_norm": 0.007656518369913101,
"learning_rate": 5.321598687738557e-06,
"loss": 0.0017,
"step": 16500
},
{
"epoch": 0.9949056038357806,
"grad_norm": 0.0743492990732193,
"learning_rate": 5.290053941516041e-06,
"loss": 0.0015,
"step": 16600
},
{
"epoch": 1.0,
"eval_accuracy": 0.8271744263468347,
"eval_f1": 0.7498195656860883,
"eval_loss": 0.001594877801835537,
"eval_precision": 0.6861185445920746,
"eval_recall": 0.8271744263468347,
"eval_runtime": 1686.0917,
"eval_samples_per_second": 8.796,
"eval_steps_per_second": 1.1,
"step": 16685
},
{
"epoch": 1.0008990110878033,
"grad_norm": 0.05216585099697113,
"learning_rate": 5.258509195293524e-06,
"loss": 0.0015,
"step": 16700
},
{
"epoch": 1.0068924183398262,
"grad_norm": 0.12606635689735413,
"learning_rate": 5.226964449071008e-06,
"loss": 0.0011,
"step": 16800
},
{
"epoch": 1.012885825591849,
"grad_norm": 0.0004606186121236533,
"learning_rate": 5.195419702848491e-06,
"loss": 0.001,
"step": 16900
},
{
"epoch": 1.0188792328438718,
"grad_norm": 0.000365409447113052,
"learning_rate": 5.1638749566259735e-06,
"loss": 0.001,
"step": 17000
},
{
"epoch": 1.0248726400958945,
"grad_norm": 0.031485725194215775,
"learning_rate": 5.132330210403458e-06,
"loss": 0.0012,
"step": 17100
},
{
"epoch": 1.0308660473479172,
"grad_norm": 0.0031660550739616156,
"learning_rate": 5.100785464180941e-06,
"loss": 0.0011,
"step": 17200
},
{
"epoch": 1.0368594545999401,
"grad_norm": 0.04788443446159363,
"learning_rate": 5.0692407179584244e-06,
"loss": 0.001,
"step": 17300
},
{
"epoch": 1.0428528618519628,
"grad_norm": 0.07966958731412888,
"learning_rate": 5.037695971735907e-06,
"loss": 0.001,
"step": 17400
},
{
"epoch": 1.0488462691039857,
"grad_norm": 0.2937103807926178,
"learning_rate": 5.006151225513392e-06,
"loss": 0.0007,
"step": 17500
},
{
"epoch": 1.0548396763560084,
"grad_norm": 0.0027551730163395405,
"learning_rate": 4.974606479290875e-06,
"loss": 0.0007,
"step": 17600
},
{
"epoch": 1.060833083608031,
"grad_norm": 0.08430271595716476,
"learning_rate": 4.943061733068357e-06,
"loss": 0.0008,
"step": 17700
},
{
"epoch": 1.066826490860054,
"grad_norm": 0.24536843597888947,
"learning_rate": 4.911516986845841e-06,
"loss": 0.0006,
"step": 17800
},
{
"epoch": 1.0728198981120767,
"grad_norm": 0.040876179933547974,
"learning_rate": 4.879972240623325e-06,
"loss": 0.0009,
"step": 17900
},
{
"epoch": 1.0788133053640996,
"grad_norm": 0.0515579879283905,
"learning_rate": 4.848427494400808e-06,
"loss": 0.0007,
"step": 18000
}
],
"logging_steps": 100,
"max_steps": 33370,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 1000,
"total_flos": 7.597573697465206e+17,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}