|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9990732159406858, |
|
"eval_steps": 500, |
|
"global_step": 539, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0018535681186283596, |
|
"grad_norm": 26.35410018433926, |
|
"learning_rate": 5.555555555555555e-06, |
|
"loss": 1.6809, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.009267840593141797, |
|
"grad_norm": 39.94533926724377, |
|
"learning_rate": 2.7777777777777772e-05, |
|
"loss": 1.343, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.018535681186283594, |
|
"grad_norm": 4.5543620999927485, |
|
"learning_rate": 5.5555555555555545e-05, |
|
"loss": 0.8749, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.027803521779425393, |
|
"grad_norm": 4.24495034141875, |
|
"learning_rate": 8.333333333333333e-05, |
|
"loss": 0.8704, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03707136237256719, |
|
"grad_norm": 39.42543160126464, |
|
"learning_rate": 0.00011111111111111109, |
|
"loss": 0.8733, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04633920296570899, |
|
"grad_norm": 11.962483325702362, |
|
"learning_rate": 0.0001388888888888889, |
|
"loss": 1.0553, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.05560704355885079, |
|
"grad_norm": 14.195688905715766, |
|
"learning_rate": 0.00016666666666666666, |
|
"loss": 1.0623, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06487488415199258, |
|
"grad_norm": 4.164105779251046, |
|
"learning_rate": 0.00019444444444444443, |
|
"loss": 0.8718, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.07414272474513438, |
|
"grad_norm": 6.991113945747281, |
|
"learning_rate": 0.00022222222222222218, |
|
"loss": 0.8619, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08341056533827618, |
|
"grad_norm": 631.4969668103296, |
|
"learning_rate": 0.00025, |
|
"loss": 1.8292, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.09267840593141798, |
|
"grad_norm": 51.99035558508526, |
|
"learning_rate": 0.0002777777777777778, |
|
"loss": 1.6128, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.10194624652455977, |
|
"grad_norm": 5.615188207831111, |
|
"learning_rate": 0.0002999968531502098, |
|
"loss": 1.1458, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.11121408711770157, |
|
"grad_norm": 162.57564664314097, |
|
"learning_rate": 0.0002998867272706619, |
|
"loss": 1.7169, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.12048192771084337, |
|
"grad_norm": 27.819505339583905, |
|
"learning_rate": 0.0002996193909122197, |
|
"loss": 2.1196, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.12974976830398516, |
|
"grad_norm": 12.017979123334465, |
|
"learning_rate": 0.00029919512447380625, |
|
"loss": 1.3348, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.13901760889712697, |
|
"grad_norm": 529.5307829885717, |
|
"learning_rate": 0.0002986143729523282, |
|
"loss": 1.0105, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.14828544949026876, |
|
"grad_norm": 2.511805320667406, |
|
"learning_rate": 0.000297877745475935, |
|
"loss": 1.0586, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.15755329008341057, |
|
"grad_norm": 4.022429995067504, |
|
"learning_rate": 0.0002969860146651276, |
|
"loss": 0.9055, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.16682113067655235, |
|
"grad_norm": 512.3998660297882, |
|
"learning_rate": 0.0002959401158223867, |
|
"loss": 5.3655, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.17608897126969417, |
|
"grad_norm": 131.99417407337796, |
|
"learning_rate": 0.00029474114595116896, |
|
"loss": 3.1238, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.18535681186283595, |
|
"grad_norm": 19.953838142892188, |
|
"learning_rate": 0.0002933903626053024, |
|
"loss": 1.9603, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.19462465245597776, |
|
"grad_norm": 7.472797510255509, |
|
"learning_rate": 0.00029188918256998564, |
|
"loss": 1.4801, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.20389249304911955, |
|
"grad_norm": 9.70552960980865, |
|
"learning_rate": 0.00029023918037577635, |
|
"loss": 1.3544, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.21316033364226136, |
|
"grad_norm": 17.11916105312361, |
|
"learning_rate": 0.00028844208664712575, |
|
"loss": 1.0681, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.22242817423540315, |
|
"grad_norm": 22.28855318123011, |
|
"learning_rate": 0.00028649978628719254, |
|
"loss": 1.2611, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.23169601482854496, |
|
"grad_norm": 34.76579789562846, |
|
"learning_rate": 0.00028441431650084016, |
|
"loss": 1.6181, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.24096385542168675, |
|
"grad_norm": 15.11803328233237, |
|
"learning_rate": 0.0002821878646578898, |
|
"loss": 1.1601, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.25023169601482853, |
|
"grad_norm": 10.594239787348334, |
|
"learning_rate": 0.0002798227659988717, |
|
"loss": 1.0309, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.2594995366079703, |
|
"grad_norm": 3.881883182518721, |
|
"learning_rate": 0.00027732150118568017, |
|
"loss": 1.5651, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.26876737720111216, |
|
"grad_norm": 3.036719624432077, |
|
"learning_rate": 0.00027468669369970207, |
|
"loss": 1.1445, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.27803521779425394, |
|
"grad_norm": 14.352629655607771, |
|
"learning_rate": 0.00027192110709014697, |
|
"loss": 0.9305, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2873030583873957, |
|
"grad_norm": 2.933168446331413, |
|
"learning_rate": 0.0002690276420754655, |
|
"loss": 0.9324, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.2965708989805375, |
|
"grad_norm": 4.661928504358525, |
|
"learning_rate": 0.00026600933350089654, |
|
"loss": 0.9491, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.30583873957367935, |
|
"grad_norm": 5.782107803222577, |
|
"learning_rate": 0.0002628693471553335, |
|
"loss": 0.8689, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.31510658016682114, |
|
"grad_norm": 4.287118434199453, |
|
"learning_rate": 0.00025961097645084885, |
|
"loss": 1.112, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3243744207599629, |
|
"grad_norm": 170.8246279898043, |
|
"learning_rate": 0.0002562376389683599, |
|
"loss": 2.2669, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.3336422613531047, |
|
"grad_norm": 243.40178580373365, |
|
"learning_rate": 0.00025275287287305814, |
|
"loss": 3.2917, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.34291010194624655, |
|
"grad_norm": 160.38147120987801, |
|
"learning_rate": 0.00024916033320336264, |
|
"loss": 3.067, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.35217794253938833, |
|
"grad_norm": 30.070071532273786, |
|
"learning_rate": 0.0002454637880372892, |
|
"loss": 3.1301, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.3614457831325301, |
|
"grad_norm": 48.47525499191394, |
|
"learning_rate": 0.0002416671145402575, |
|
"loss": 2.6178, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.3707136237256719, |
|
"grad_norm": 48.37664262960068, |
|
"learning_rate": 0.00023777429489847934, |
|
"loss": 1.441, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3799814643188137, |
|
"grad_norm": 3.8665655676681245, |
|
"learning_rate": 0.0002337894121421954, |
|
"loss": 1.2978, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.38924930491195553, |
|
"grad_norm": 2.9273559971935055, |
|
"learning_rate": 0.00022971664586314054, |
|
"loss": 1.0238, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.3985171455050973, |
|
"grad_norm": 20.165413444524287, |
|
"learning_rate": 0.00022556026783072895, |
|
"loss": 0.8787, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.4077849860982391, |
|
"grad_norm": 2.3870488681045203, |
|
"learning_rate": 0.00022132463751155812, |
|
"loss": 0.9467, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.4170528266913809, |
|
"grad_norm": 4.038604860534557, |
|
"learning_rate": 0.00021701419749693034, |
|
"loss": 0.9708, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.4263206672845227, |
|
"grad_norm": 2.7240106071190535, |
|
"learning_rate": 0.00021263346884318777, |
|
"loss": 1.0564, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.4355885078776645, |
|
"grad_norm": 1.6377519083501402, |
|
"learning_rate": 0.00020818704632974896, |
|
"loss": 0.7724, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.4448563484708063, |
|
"grad_norm": 1.0433662853439323, |
|
"learning_rate": 0.00020367959363981936, |
|
"loss": 0.8052, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.4541241890639481, |
|
"grad_norm": 1.5351301360333338, |
|
"learning_rate": 0.00019911583846883197, |
|
"loss": 0.8407, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.4633920296570899, |
|
"grad_norm": 1.2741936551633426, |
|
"learning_rate": 0.0001945005675657475, |
|
"loss": 0.8344, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.4726598702502317, |
|
"grad_norm": 2.4652187839401467, |
|
"learning_rate": 0.00018983862171241577, |
|
"loss": 0.9688, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.4819277108433735, |
|
"grad_norm": 1.1552631433359035, |
|
"learning_rate": 0.00018513489064626398, |
|
"loss": 0.8647, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.4911955514365153, |
|
"grad_norm": 1.113328634852908, |
|
"learning_rate": 0.00018039430793163753, |
|
"loss": 0.8514, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5004633920296571, |
|
"grad_norm": 0.995245914359209, |
|
"learning_rate": 0.00017562184578517203, |
|
"loss": 0.8845, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5097312326227988, |
|
"grad_norm": 1.5488835001137233, |
|
"learning_rate": 0.00017082250986062502, |
|
"loss": 0.7809, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5189990732159406, |
|
"grad_norm": 1.5756484423065427, |
|
"learning_rate": 0.00016600133399863594, |
|
"loss": 0.8326, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.5282669138090825, |
|
"grad_norm": 206.03580929049917, |
|
"learning_rate": 0.0001611633749469231, |
|
"loss": 2.2034, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.5375347544022243, |
|
"grad_norm": 4.169210184616496, |
|
"learning_rate": 0.0001563137070564528, |
|
"loss": 1.9396, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.5468025949953661, |
|
"grad_norm": 7.018651154352153, |
|
"learning_rate": 0.0001514574169591466, |
|
"loss": 1.4702, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.5560704355885079, |
|
"grad_norm": 1.7304196471580864, |
|
"learning_rate": 0.0001465995982327065, |
|
"loss": 0.9425, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5653382761816497, |
|
"grad_norm": 2.241775914452003, |
|
"learning_rate": 0.00014174534605815525, |
|
"loss": 0.7961, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.5746061167747915, |
|
"grad_norm": 3.456242970621782, |
|
"learning_rate": 0.0001368997518756954, |
|
"loss": 0.834, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.5838739573679332, |
|
"grad_norm": 5.072902047355265, |
|
"learning_rate": 0.00013206789804449116, |
|
"loss": 0.99, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.593141797961075, |
|
"grad_norm": 6.231760889035276, |
|
"learning_rate": 0.0001272548525119758, |
|
"loss": 0.8985, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6024096385542169, |
|
"grad_norm": 0.8710278075890822, |
|
"learning_rate": 0.0001224656634982746, |
|
"loss": 0.7725, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.6116774791473587, |
|
"grad_norm": 1.799368795567863, |
|
"learning_rate": 0.00011770535420131876, |
|
"loss": 0.7621, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.6209453197405005, |
|
"grad_norm": 0.9742992413928552, |
|
"learning_rate": 0.00011297891752820484, |
|
"loss": 0.8327, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.6302131603336423, |
|
"grad_norm": 0.85009149768552, |
|
"learning_rate": 0.0001082913108583245, |
|
"loss": 0.6863, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.6394810009267841, |
|
"grad_norm": 0.8181844995131707, |
|
"learning_rate": 0.0001036474508437579, |
|
"loss": 0.6779, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.6487488415199258, |
|
"grad_norm": 0.9546399988989202, |
|
"learning_rate": 9.905220825238491e-05, |
|
"loss": 0.6872, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.6580166821130676, |
|
"grad_norm": 0.6995327936772554, |
|
"learning_rate": 9.45104028591222e-05, |
|
"loss": 0.7025, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.6672845227062094, |
|
"grad_norm": 0.7195227820211112, |
|
"learning_rate": 9.002679839064463e-05, |
|
"loss": 0.6807, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.6765523632993512, |
|
"grad_norm": 0.6283846451216366, |
|
"learning_rate": 8.560609752889412e-05, |
|
"loss": 0.6888, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.6858202038924931, |
|
"grad_norm": 0.7394400936445648, |
|
"learning_rate": 8.125293697861548e-05, |
|
"loss": 0.6542, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.6950880444856349, |
|
"grad_norm": 0.6824620929581083, |
|
"learning_rate": 7.697188260409356e-05, |
|
"loss": 0.671, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.7043558850787767, |
|
"grad_norm": 0.6116075298871171, |
|
"learning_rate": 7.276742464019198e-05, |
|
"loss": 0.6729, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.7136237256719185, |
|
"grad_norm": 0.6453928151826652, |
|
"learning_rate": 6.864397298271699e-05, |
|
"loss": 0.6626, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.7228915662650602, |
|
"grad_norm": 0.6716078881868212, |
|
"learning_rate": 6.460585256304559e-05, |
|
"loss": 0.6851, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.732159406858202, |
|
"grad_norm": 0.6801638199214254, |
|
"learning_rate": 6.065729881186982e-05, |
|
"loss": 0.6168, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.7414272474513438, |
|
"grad_norm": 0.4794662188577268, |
|
"learning_rate": 5.680245321681471e-05, |
|
"loss": 0.6531, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7506950880444856, |
|
"grad_norm": 1.029961404045439, |
|
"learning_rate": 5.304535897858999e-05, |
|
"loss": 0.6295, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.7599629286376274, |
|
"grad_norm": 0.5284338458030478, |
|
"learning_rate": 4.938995677023054e-05, |
|
"loss": 0.6201, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.7692307692307693, |
|
"grad_norm": 0.4516339145236783, |
|
"learning_rate": 4.584008060387455e-05, |
|
"loss": 0.6083, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.7784986098239111, |
|
"grad_norm": 0.5727589086366411, |
|
"learning_rate": 4.239945380941461e-05, |
|
"loss": 0.6021, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.7877664504170528, |
|
"grad_norm": 0.4804742186453503, |
|
"learning_rate": 3.907168512923842e-05, |
|
"loss": 0.5933, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.7970342910101946, |
|
"grad_norm": 0.45776606255626884, |
|
"learning_rate": 3.5860264933156324e-05, |
|
"loss": 0.5774, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.8063021316033364, |
|
"grad_norm": 1.2302657626291176, |
|
"learning_rate": 3.276856155748584e-05, |
|
"loss": 0.5908, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.8155699721964782, |
|
"grad_norm": 0.9348743887035583, |
|
"learning_rate": 2.9799817772131516e-05, |
|
"loss": 0.592, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.82483781278962, |
|
"grad_norm": 0.4027186043623421, |
|
"learning_rate": 2.6957147379367217e-05, |
|
"loss": 0.5798, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.8341056533827618, |
|
"grad_norm": 0.5240634600695104, |
|
"learning_rate": 2.4243531947887802e-05, |
|
"loss": 0.5805, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.8433734939759037, |
|
"grad_norm": 0.5416112282286268, |
|
"learning_rate": 2.1661817685554833e-05, |
|
"loss": 0.6067, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.8526413345690455, |
|
"grad_norm": 0.47266676591358336, |
|
"learning_rate": 1.921471245411794e-05, |
|
"loss": 0.5962, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.8619091751621872, |
|
"grad_norm": 0.6382596120183166, |
|
"learning_rate": 1.6904782929041693e-05, |
|
"loss": 0.5791, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.871177015755329, |
|
"grad_norm": 0.49306751144827193, |
|
"learning_rate": 1.4734451907417255e-05, |
|
"loss": 0.6182, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.8804448563484708, |
|
"grad_norm": 0.49635162924553927, |
|
"learning_rate": 1.2705995766783079e-05, |
|
"loss": 0.5521, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.8897126969416126, |
|
"grad_norm": 0.49868563464480065, |
|
"learning_rate": 1.0821542077519169e-05, |
|
"loss": 0.5579, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.8989805375347544, |
|
"grad_norm": 0.384294096915848, |
|
"learning_rate": 9.083067371319324e-06, |
|
"loss": 0.5532, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.9082483781278962, |
|
"grad_norm": 0.6584770004045724, |
|
"learning_rate": 7.492395068082619e-06, |
|
"loss": 0.544, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.917516218721038, |
|
"grad_norm": 0.5198655418147264, |
|
"learning_rate": 6.051193563397599e-06, |
|
"loss": 0.555, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.9267840593141798, |
|
"grad_norm": 0.46652880015863485, |
|
"learning_rate": 4.760974478625634e-06, |
|
"loss": 0.5437, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.9360518999073216, |
|
"grad_norm": 1.0569529661255628, |
|
"learning_rate": 3.623091075418977e-06, |
|
"loss": 0.5573, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.9453197405004634, |
|
"grad_norm": 0.4373874071284105, |
|
"learning_rate": 2.638736836336158e-06, |
|
"loss": 0.5312, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.9545875810936052, |
|
"grad_norm": 0.46826046078794514, |
|
"learning_rate": 1.8089442130434061e-06, |
|
"loss": 0.5648, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.963855421686747, |
|
"grad_norm": 0.40638149868456475, |
|
"learning_rate": 1.1345835434156736e-06, |
|
"loss": 0.5417, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.9731232622798888, |
|
"grad_norm": 0.4274920327067933, |
|
"learning_rate": 6.163621386722218e-07, |
|
"loss": 0.5528, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.9823911028730306, |
|
"grad_norm": 0.41954954402917344, |
|
"learning_rate": 2.5482354150493935e-07, |
|
"loss": 0.549, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.9916589434661723, |
|
"grad_norm": 0.46407841167410513, |
|
"learning_rate": 5.0346955976976467e-08, |
|
"loss": 0.5499, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.9990732159406858, |
|
"eval_loss": 2.2535195350646973, |
|
"eval_runtime": 2.3617, |
|
"eval_samples_per_second": 1.694, |
|
"eval_steps_per_second": 0.423, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.9990732159406858, |
|
"step": 539, |
|
"total_flos": 28187736145920.0, |
|
"train_loss": 1.0674916249259283, |
|
"train_runtime": 10422.8572, |
|
"train_samples_per_second": 1.655, |
|
"train_steps_per_second": 0.052 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 539, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 28187736145920.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|