Initial commit of fine-tuned model using the omi-health/medical-dialogue-to-soap-summary dataset
b72b816
verified
{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 0.9993928354584092, | |
"eval_steps": 500, | |
"global_step": 823, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.006071645415907711, | |
"grad_norm": 0.24457845688569232, | |
"learning_rate": 9.999089317673432e-05, | |
"loss": 1.5749, | |
"step": 5 | |
}, | |
{ | |
"epoch": 0.012143290831815421, | |
"grad_norm": 0.26391371346280523, | |
"learning_rate": 9.996357602430646e-05, | |
"loss": 1.5727, | |
"step": 10 | |
}, | |
{ | |
"epoch": 0.018214936247723135, | |
"grad_norm": 0.27050405369105746, | |
"learning_rate": 9.991805849361562e-05, | |
"loss": 1.4543, | |
"step": 15 | |
}, | |
{ | |
"epoch": 0.024286581663630843, | |
"grad_norm": 0.39705249849447666, | |
"learning_rate": 9.985435716546608e-05, | |
"loss": 1.3521, | |
"step": 20 | |
}, | |
{ | |
"epoch": 0.030358227079538554, | |
"grad_norm": 0.34482550859107886, | |
"learning_rate": 9.977249524452732e-05, | |
"loss": 1.2841, | |
"step": 25 | |
}, | |
{ | |
"epoch": 0.03642987249544627, | |
"grad_norm": 0.38424241285585997, | |
"learning_rate": 9.96725025508812e-05, | |
"loss": 1.1925, | |
"step": 30 | |
}, | |
{ | |
"epoch": 0.042501517911353974, | |
"grad_norm": 0.2991869711584357, | |
"learning_rate": 9.95544155091593e-05, | |
"loss": 1.1573, | |
"step": 35 | |
}, | |
{ | |
"epoch": 0.048573163327261686, | |
"grad_norm": 0.29091768319491834, | |
"learning_rate": 9.941827713527434e-05, | |
"loss": 1.1386, | |
"step": 40 | |
}, | |
{ | |
"epoch": 0.0546448087431694, | |
"grad_norm": 0.2701117441828494, | |
"learning_rate": 9.926413702075075e-05, | |
"loss": 1.0848, | |
"step": 45 | |
}, | |
{ | |
"epoch": 0.06071645415907711, | |
"grad_norm": 0.2660514207123453, | |
"learning_rate": 9.909205131465979e-05, | |
"loss": 1.0918, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.06678809957498483, | |
"grad_norm": 0.24936524134232213, | |
"learning_rate": 9.890208270316594e-05, | |
"loss": 1.1005, | |
"step": 55 | |
}, | |
{ | |
"epoch": 0.07285974499089254, | |
"grad_norm": 0.2453116793576235, | |
"learning_rate": 9.869430038669202e-05, | |
"loss": 1.0775, | |
"step": 60 | |
}, | |
{ | |
"epoch": 0.07893139040680024, | |
"grad_norm": 0.27766127791240464, | |
"learning_rate": 9.846878005471138e-05, | |
"loss": 1.0932, | |
"step": 65 | |
}, | |
{ | |
"epoch": 0.08500303582270795, | |
"grad_norm": 0.2706580007666611, | |
"learning_rate": 9.82256038581763e-05, | |
"loss": 1.077, | |
"step": 70 | |
}, | |
{ | |
"epoch": 0.09107468123861566, | |
"grad_norm": 0.2622352686116729, | |
"learning_rate": 9.796486037959252e-05, | |
"loss": 1.0645, | |
"step": 75 | |
}, | |
{ | |
"epoch": 0.09714632665452337, | |
"grad_norm": 0.2915416298324084, | |
"learning_rate": 9.768664460075113e-05, | |
"loss": 1.0584, | |
"step": 80 | |
}, | |
{ | |
"epoch": 0.10321797207043108, | |
"grad_norm": 0.28210266400278944, | |
"learning_rate": 9.739105786812924e-05, | |
"loss": 1.0043, | |
"step": 85 | |
}, | |
{ | |
"epoch": 0.1092896174863388, | |
"grad_norm": 0.2834916737475469, | |
"learning_rate": 9.707820785597219e-05, | |
"loss": 1.0909, | |
"step": 90 | |
}, | |
{ | |
"epoch": 0.1153612629022465, | |
"grad_norm": 0.28178909138002517, | |
"learning_rate": 9.674820852707076e-05, | |
"loss": 1.0605, | |
"step": 95 | |
}, | |
{ | |
"epoch": 0.12143290831815422, | |
"grad_norm": 0.2899825819853286, | |
"learning_rate": 9.64011800912476e-05, | |
"loss": 0.9849, | |
"step": 100 | |
}, | |
{ | |
"epoch": 0.12750455373406194, | |
"grad_norm": 0.26579632159231636, | |
"learning_rate": 9.603724896156805e-05, | |
"loss": 1.0159, | |
"step": 105 | |
}, | |
{ | |
"epoch": 0.13357619914996965, | |
"grad_norm": 0.2905253394887582, | |
"learning_rate": 9.565654770829123e-05, | |
"loss": 1.0435, | |
"step": 110 | |
}, | |
{ | |
"epoch": 0.13964784456587737, | |
"grad_norm": 0.2853787242613106, | |
"learning_rate": 9.52592150105784e-05, | |
"loss": 1.0113, | |
"step": 115 | |
}, | |
{ | |
"epoch": 0.14571948998178508, | |
"grad_norm": 0.2908783611322731, | |
"learning_rate": 9.484539560597576e-05, | |
"loss": 1.0053, | |
"step": 120 | |
}, | |
{ | |
"epoch": 0.15179113539769276, | |
"grad_norm": 0.2884438105008898, | |
"learning_rate": 9.441524023769058e-05, | |
"loss": 1.0028, | |
"step": 125 | |
}, | |
{ | |
"epoch": 0.15786278081360047, | |
"grad_norm": 0.3429769385996274, | |
"learning_rate": 9.396890559967951e-05, | |
"loss": 1.0139, | |
"step": 130 | |
}, | |
{ | |
"epoch": 0.16393442622950818, | |
"grad_norm": 0.3056010970713378, | |
"learning_rate": 9.350655427956918e-05, | |
"loss": 1.0315, | |
"step": 135 | |
}, | |
{ | |
"epoch": 0.1700060716454159, | |
"grad_norm": 0.3048115308528829, | |
"learning_rate": 9.302835469942992e-05, | |
"loss": 1.0359, | |
"step": 140 | |
}, | |
{ | |
"epoch": 0.1760777170613236, | |
"grad_norm": 0.30341692290041095, | |
"learning_rate": 9.253448105442422e-05, | |
"loss": 1.0297, | |
"step": 145 | |
}, | |
{ | |
"epoch": 0.18214936247723132, | |
"grad_norm": 0.31063335948647086, | |
"learning_rate": 9.202511324935213e-05, | |
"loss": 1.03, | |
"step": 150 | |
}, | |
{ | |
"epoch": 0.18822100789313903, | |
"grad_norm": 0.32897710252989215, | |
"learning_rate": 9.150043683311673e-05, | |
"loss": 1.0367, | |
"step": 155 | |
}, | |
{ | |
"epoch": 0.19429265330904674, | |
"grad_norm": 0.28758174259282, | |
"learning_rate": 9.096064293113382e-05, | |
"loss": 1.0204, | |
"step": 160 | |
}, | |
{ | |
"epoch": 0.20036429872495445, | |
"grad_norm": 0.3022100238885681, | |
"learning_rate": 9.040592817571001e-05, | |
"loss": 1.025, | |
"step": 165 | |
}, | |
{ | |
"epoch": 0.20643594414086217, | |
"grad_norm": 0.31328298710681657, | |
"learning_rate": 8.983649463441493e-05, | |
"loss": 1.0103, | |
"step": 170 | |
}, | |
{ | |
"epoch": 0.21250758955676988, | |
"grad_norm": 0.3056230604848522, | |
"learning_rate": 8.925254973647343e-05, | |
"loss": 1.0119, | |
"step": 175 | |
}, | |
{ | |
"epoch": 0.2185792349726776, | |
"grad_norm": 0.2932278475986119, | |
"learning_rate": 8.865430619720483e-05, | |
"loss": 1.0125, | |
"step": 180 | |
}, | |
{ | |
"epoch": 0.2246508803885853, | |
"grad_norm": 0.28707514291349817, | |
"learning_rate": 8.804198194053642e-05, | |
"loss": 0.9675, | |
"step": 185 | |
}, | |
{ | |
"epoch": 0.230722525804493, | |
"grad_norm": 0.31551606190516496, | |
"learning_rate": 8.741580001961966e-05, | |
"loss": 0.9934, | |
"step": 190 | |
}, | |
{ | |
"epoch": 0.23679417122040072, | |
"grad_norm": 0.3081186246259813, | |
"learning_rate": 8.677598853557797e-05, | |
"loss": 1.0446, | |
"step": 195 | |
}, | |
{ | |
"epoch": 0.24286581663630843, | |
"grad_norm": 0.32635829071646627, | |
"learning_rate": 8.612278055441573e-05, | |
"loss": 0.9928, | |
"step": 200 | |
}, | |
{ | |
"epoch": 0.24893746205221615, | |
"grad_norm": 0.3056664443473449, | |
"learning_rate": 8.54564140221185e-05, | |
"loss": 0.998, | |
"step": 205 | |
}, | |
{ | |
"epoch": 0.2550091074681239, | |
"grad_norm": 0.31526198269540723, | |
"learning_rate": 8.477713167797591e-05, | |
"loss": 1.0053, | |
"step": 210 | |
}, | |
{ | |
"epoch": 0.2610807528840316, | |
"grad_norm": 0.3222176583928717, | |
"learning_rate": 8.408518096615817e-05, | |
"loss": 1.0034, | |
"step": 215 | |
}, | |
{ | |
"epoch": 0.2671523982999393, | |
"grad_norm": 0.3169713895071339, | |
"learning_rate": 8.338081394557892e-05, | |
"loss": 0.9812, | |
"step": 220 | |
}, | |
{ | |
"epoch": 0.273224043715847, | |
"grad_norm": 0.32407653940571435, | |
"learning_rate": 8.2664287198077e-05, | |
"loss": 1.0051, | |
"step": 225 | |
}, | |
{ | |
"epoch": 0.27929568913175473, | |
"grad_norm": 0.3295876290581108, | |
"learning_rate": 8.193586173495056e-05, | |
"loss": 0.9978, | |
"step": 230 | |
}, | |
{ | |
"epoch": 0.28536733454766244, | |
"grad_norm": 0.3188765761682836, | |
"learning_rate": 8.119580290187783e-05, | |
"loss": 1.0001, | |
"step": 235 | |
}, | |
{ | |
"epoch": 0.29143897996357016, | |
"grad_norm": 0.3387778443057323, | |
"learning_rate": 8.044438028225879e-05, | |
"loss": 1.0466, | |
"step": 240 | |
}, | |
{ | |
"epoch": 0.2975106253794778, | |
"grad_norm": 0.32002789503741597, | |
"learning_rate": 7.968186759901315e-05, | |
"loss": 1.0313, | |
"step": 245 | |
}, | |
{ | |
"epoch": 0.3035822707953855, | |
"grad_norm": 0.3216979529684869, | |
"learning_rate": 7.890854261487074e-05, | |
"loss": 0.9845, | |
"step": 250 | |
}, | |
{ | |
"epoch": 0.30965391621129323, | |
"grad_norm": 0.3222228418533532, | |
"learning_rate": 7.812468703118985e-05, | |
"loss": 0.9985, | |
"step": 255 | |
}, | |
{ | |
"epoch": 0.31572556162720095, | |
"grad_norm": 0.32349662017935793, | |
"learning_rate": 7.733058638534113e-05, | |
"loss": 0.9589, | |
"step": 260 | |
}, | |
{ | |
"epoch": 0.32179720704310866, | |
"grad_norm": 0.31939553229363304, | |
"learning_rate": 7.652652994669406e-05, | |
"loss": 1.0175, | |
"step": 265 | |
}, | |
{ | |
"epoch": 0.32786885245901637, | |
"grad_norm": 0.311734388011201, | |
"learning_rate": 7.571281061124394e-05, | |
"loss": 0.9874, | |
"step": 270 | |
}, | |
{ | |
"epoch": 0.3339404978749241, | |
"grad_norm": 0.3322440364094201, | |
"learning_rate": 7.488972479491778e-05, | |
"loss": 1.001, | |
"step": 275 | |
}, | |
{ | |
"epoch": 0.3400121432908318, | |
"grad_norm": 0.3293068128055922, | |
"learning_rate": 7.405757232559807e-05, | |
"loss": 0.9384, | |
"step": 280 | |
}, | |
{ | |
"epoch": 0.3460837887067395, | |
"grad_norm": 0.34714958730180595, | |
"learning_rate": 7.321665633390355e-05, | |
"loss": 0.9812, | |
"step": 285 | |
}, | |
{ | |
"epoch": 0.3521554341226472, | |
"grad_norm": 0.3341860479562958, | |
"learning_rate": 7.236728314276692e-05, | |
"loss": 1.0073, | |
"step": 290 | |
}, | |
{ | |
"epoch": 0.3582270795385549, | |
"grad_norm": 0.3477899939169161, | |
"learning_rate": 7.150976215584967e-05, | |
"loss": 0.9846, | |
"step": 295 | |
}, | |
{ | |
"epoch": 0.36429872495446264, | |
"grad_norm": 0.3435321427630364, | |
"learning_rate": 7.064440574483482e-05, | |
"loss": 0.961, | |
"step": 300 | |
}, | |
{ | |
"epoch": 0.37037037037037035, | |
"grad_norm": 0.3571408920594794, | |
"learning_rate": 6.977152913563825e-05, | |
"loss": 0.9517, | |
"step": 305 | |
}, | |
{ | |
"epoch": 0.37644201578627806, | |
"grad_norm": 0.3378465695868974, | |
"learning_rate": 6.889145029358046e-05, | |
"loss": 0.9717, | |
"step": 310 | |
}, | |
{ | |
"epoch": 0.3825136612021858, | |
"grad_norm": 0.34744636886470215, | |
"learning_rate": 6.800448980756042e-05, | |
"loss": 1.0026, | |
"step": 315 | |
}, | |
{ | |
"epoch": 0.3885853066180935, | |
"grad_norm": 0.3279610405694282, | |
"learning_rate": 6.711097077327372e-05, | |
"loss": 0.9726, | |
"step": 320 | |
}, | |
{ | |
"epoch": 0.3946569520340012, | |
"grad_norm": 0.3417715632798864, | |
"learning_rate": 6.621121867551759e-05, | |
"loss": 0.9958, | |
"step": 325 | |
}, | |
{ | |
"epoch": 0.4007285974499089, | |
"grad_norm": 0.33073846039850685, | |
"learning_rate": 6.530556126962545e-05, | |
"loss": 0.9771, | |
"step": 330 | |
}, | |
{ | |
"epoch": 0.4068002428658166, | |
"grad_norm": 0.37065478796323925, | |
"learning_rate": 6.439432846207475e-05, | |
"loss": 0.9954, | |
"step": 335 | |
}, | |
{ | |
"epoch": 0.41287188828172433, | |
"grad_norm": 0.35459118756191277, | |
"learning_rate": 6.347785219031077e-05, | |
"loss": 0.9812, | |
"step": 340 | |
}, | |
{ | |
"epoch": 0.41894353369763204, | |
"grad_norm": 0.33241922586525224, | |
"learning_rate": 6.255646630183082e-05, | |
"loss": 0.9793, | |
"step": 345 | |
}, | |
{ | |
"epoch": 0.42501517911353975, | |
"grad_norm": 0.3392477463965851, | |
"learning_rate": 6.163050643257282e-05, | |
"loss": 1.0413, | |
"step": 350 | |
}, | |
{ | |
"epoch": 0.43108682452944747, | |
"grad_norm": 0.36159716352209215, | |
"learning_rate": 6.070030988465192e-05, | |
"loss": 0.9742, | |
"step": 355 | |
}, | |
{ | |
"epoch": 0.4371584699453552, | |
"grad_norm": 0.36382042578767604, | |
"learning_rate": 5.976621550349072e-05, | |
"loss": 0.9664, | |
"step": 360 | |
}, | |
{ | |
"epoch": 0.4432301153612629, | |
"grad_norm": 0.34643585549602424, | |
"learning_rate": 5.8828563554386954e-05, | |
"loss": 0.9871, | |
"step": 365 | |
}, | |
{ | |
"epoch": 0.4493017607771706, | |
"grad_norm": 0.36541385782952596, | |
"learning_rate": 5.7887695598563975e-05, | |
"loss": 1.0445, | |
"step": 370 | |
}, | |
{ | |
"epoch": 0.4553734061930783, | |
"grad_norm": 0.3621329521595355, | |
"learning_rate": 5.694395436874942e-05, | |
"loss": 0.9847, | |
"step": 375 | |
}, | |
{ | |
"epoch": 0.461445051608986, | |
"grad_norm": 0.35444381061319274, | |
"learning_rate": 5.5997683644326804e-05, | |
"loss": 0.9952, | |
"step": 380 | |
}, | |
{ | |
"epoch": 0.46751669702489373, | |
"grad_norm": 0.3393936558293067, | |
"learning_rate": 5.50492281261061e-05, | |
"loss": 1.0073, | |
"step": 385 | |
}, | |
{ | |
"epoch": 0.47358834244080145, | |
"grad_norm": 0.35553712556373906, | |
"learning_rate": 5.40989333107585e-05, | |
"loss": 1.0, | |
"step": 390 | |
}, | |
{ | |
"epoch": 0.47965998785670916, | |
"grad_norm": 0.3508589267110535, | |
"learning_rate": 5.314714536496135e-05, | |
"loss": 1.0051, | |
"step": 395 | |
}, | |
{ | |
"epoch": 0.48573163327261687, | |
"grad_norm": 0.3688927085936352, | |
"learning_rate": 5.219421099929899e-05, | |
"loss": 1.028, | |
"step": 400 | |
}, | |
{ | |
"epoch": 0.4918032786885246, | |
"grad_norm": 0.3478126659800513, | |
"learning_rate": 5.1240477341965485e-05, | |
"loss": 0.9681, | |
"step": 405 | |
}, | |
{ | |
"epoch": 0.4978749241044323, | |
"grad_norm": 0.3403963388182524, | |
"learning_rate": 5.028629181231526e-05, | |
"loss": 0.9781, | |
"step": 410 | |
}, | |
{ | |
"epoch": 0.50394656952034, | |
"grad_norm": 0.3706190417343179, | |
"learning_rate": 4.933200199430754e-05, | |
"loss": 0.9824, | |
"step": 415 | |
}, | |
{ | |
"epoch": 0.5100182149362478, | |
"grad_norm": 0.3340345296512503, | |
"learning_rate": 4.837795550989101e-05, | |
"loss": 0.9414, | |
"step": 420 | |
}, | |
{ | |
"epoch": 0.5160898603521554, | |
"grad_norm": 0.35075966838432565, | |
"learning_rate": 4.74244998923745e-05, | |
"loss": 1.0231, | |
"step": 425 | |
}, | |
{ | |
"epoch": 0.5221615057680632, | |
"grad_norm": 0.3500843432014112, | |
"learning_rate": 4.647198245983005e-05, | |
"loss": 0.9868, | |
"step": 430 | |
}, | |
{ | |
"epoch": 0.5282331511839709, | |
"grad_norm": 0.35747081745898696, | |
"learning_rate": 4.552075018857423e-05, | |
"loss": 0.9591, | |
"step": 435 | |
}, | |
{ | |
"epoch": 0.5343047965998786, | |
"grad_norm": 0.3462424877362867, | |
"learning_rate": 4.457114958677424e-05, | |
"loss": 0.9705, | |
"step": 440 | |
}, | |
{ | |
"epoch": 0.5403764420157863, | |
"grad_norm": 0.3329658938355871, | |
"learning_rate": 4.362352656822422e-05, | |
"loss": 0.9658, | |
"step": 445 | |
}, | |
{ | |
"epoch": 0.546448087431694, | |
"grad_norm": 0.3465063916450208, | |
"learning_rate": 4.2678226326338246e-05, | |
"loss": 0.9821, | |
"step": 450 | |
}, | |
{ | |
"epoch": 0.5525197328476017, | |
"grad_norm": 0.36950370087466716, | |
"learning_rate": 4.173559320840579e-05, | |
"loss": 0.975, | |
"step": 455 | |
}, | |
{ | |
"epoch": 0.5585913782635095, | |
"grad_norm": 0.38755895845945404, | |
"learning_rate": 4.079597059015518e-05, | |
"loss": 0.9871, | |
"step": 460 | |
}, | |
{ | |
"epoch": 0.5646630236794171, | |
"grad_norm": 0.36167893272457224, | |
"learning_rate": 3.9859700750671275e-05, | |
"loss": 0.9539, | |
"step": 465 | |
}, | |
{ | |
"epoch": 0.5707346690953249, | |
"grad_norm": 0.3393106959735491, | |
"learning_rate": 3.892712474771237e-05, | |
"loss": 0.9642, | |
"step": 470 | |
}, | |
{ | |
"epoch": 0.5768063145112325, | |
"grad_norm": 0.36327402527885383, | |
"learning_rate": 3.7998582293472084e-05, | |
"loss": 0.9541, | |
"step": 475 | |
}, | |
{ | |
"epoch": 0.5828779599271403, | |
"grad_norm": 0.35803017903945483, | |
"learning_rate": 3.707441163083146e-05, | |
"loss": 0.9581, | |
"step": 480 | |
}, | |
{ | |
"epoch": 0.588949605343048, | |
"grad_norm": 0.35636927377775207, | |
"learning_rate": 3.6154949410146136e-05, | |
"loss": 0.9915, | |
"step": 485 | |
}, | |
{ | |
"epoch": 0.5950212507589556, | |
"grad_norm": 0.3478523328672183, | |
"learning_rate": 3.524053056661385e-05, | |
"loss": 0.9658, | |
"step": 490 | |
}, | |
{ | |
"epoch": 0.6010928961748634, | |
"grad_norm": 0.3622729352136925, | |
"learning_rate": 3.4331488198266576e-05, | |
"loss": 0.9662, | |
"step": 495 | |
}, | |
{ | |
"epoch": 0.607164541590771, | |
"grad_norm": 0.36926496729541863, | |
"learning_rate": 3.34281534446319e-05, | |
"loss": 0.9308, | |
"step": 500 | |
}, | |
{ | |
"epoch": 0.6132361870066788, | |
"grad_norm": 0.3725805831457224, | |
"learning_rate": 3.253085536610786e-05, | |
"loss": 1.0059, | |
"step": 505 | |
}, | |
{ | |
"epoch": 0.6193078324225865, | |
"grad_norm": 0.356438235198244, | |
"learning_rate": 3.163992082409515e-05, | |
"loss": 0.9844, | |
"step": 510 | |
}, | |
{ | |
"epoch": 0.6253794778384942, | |
"grad_norm": 0.3674543124679783, | |
"learning_rate": 3.0755674361930385e-05, | |
"loss": 0.9734, | |
"step": 515 | |
}, | |
{ | |
"epoch": 0.6314511232544019, | |
"grad_norm": 0.3667296471323237, | |
"learning_rate": 2.987843808666375e-05, | |
"loss": 0.945, | |
"step": 520 | |
}, | |
{ | |
"epoch": 0.6375227686703097, | |
"grad_norm": 0.35853673283205795, | |
"learning_rate": 2.9008531551724095e-05, | |
"loss": 0.9305, | |
"step": 525 | |
}, | |
{ | |
"epoch": 0.6435944140862173, | |
"grad_norm": 0.3612263024162725, | |
"learning_rate": 2.814627164051429e-05, | |
"loss": 0.9662, | |
"step": 530 | |
}, | |
{ | |
"epoch": 0.6496660595021251, | |
"grad_norm": 0.36442156113628843, | |
"learning_rate": 2.729197245097908e-05, | |
"loss": 0.961, | |
"step": 535 | |
}, | |
{ | |
"epoch": 0.6557377049180327, | |
"grad_norm": 0.37491981923510603, | |
"learning_rate": 2.6445945181187947e-05, | |
"loss": 0.9942, | |
"step": 540 | |
}, | |
{ | |
"epoch": 0.6618093503339405, | |
"grad_norm": 0.37088299839099026, | |
"learning_rate": 2.5608498015973813e-05, | |
"loss": 0.9711, | |
"step": 545 | |
}, | |
{ | |
"epoch": 0.6678809957498482, | |
"grad_norm": 0.36485018535725894, | |
"learning_rate": 2.4779936014669792e-05, | |
"loss": 0.9841, | |
"step": 550 | |
}, | |
{ | |
"epoch": 0.6739526411657559, | |
"grad_norm": 0.37286440050209124, | |
"learning_rate": 2.396056099998435e-05, | |
"loss": 0.9781, | |
"step": 555 | |
}, | |
{ | |
"epoch": 0.6800242865816636, | |
"grad_norm": 0.38374404754220387, | |
"learning_rate": 2.31506714480553e-05, | |
"loss": 0.9712, | |
"step": 560 | |
}, | |
{ | |
"epoch": 0.6860959319975714, | |
"grad_norm": 0.34919826336650855, | |
"learning_rate": 2.2350562379723263e-05, | |
"loss": 0.9345, | |
"step": 565 | |
}, | |
{ | |
"epoch": 0.692167577413479, | |
"grad_norm": 0.37159419535563243, | |
"learning_rate": 2.1560525253063358e-05, | |
"loss": 0.9537, | |
"step": 570 | |
}, | |
{ | |
"epoch": 0.6982392228293868, | |
"grad_norm": 0.35402600110835986, | |
"learning_rate": 2.0780847857215006e-05, | |
"loss": 0.9961, | |
"step": 575 | |
}, | |
{ | |
"epoch": 0.7043108682452944, | |
"grad_norm": 0.36907420983715, | |
"learning_rate": 2.001181420754819e-05, | |
"loss": 0.9739, | |
"step": 580 | |
}, | |
{ | |
"epoch": 0.7103825136612022, | |
"grad_norm": 0.35691133344150777, | |
"learning_rate": 1.925370444220415e-05, | |
"loss": 0.961, | |
"step": 585 | |
}, | |
{ | |
"epoch": 0.7164541590771099, | |
"grad_norm": 0.3654680752856991, | |
"learning_rate": 1.8506794720048902e-05, | |
"loss": 0.9554, | |
"step": 590 | |
}, | |
{ | |
"epoch": 0.7225258044930176, | |
"grad_norm": 0.36670809620950345, | |
"learning_rate": 1.777135712007583e-05, | |
"loss": 0.9968, | |
"step": 595 | |
}, | |
{ | |
"epoch": 0.7285974499089253, | |
"grad_norm": 0.37074108181866644, | |
"learning_rate": 1.704765954229476e-05, | |
"loss": 0.9916, | |
"step": 600 | |
}, | |
{ | |
"epoch": 0.734669095324833, | |
"grad_norm": 0.3699841020680654, | |
"learning_rate": 1.6335965610143272e-05, | |
"loss": 0.9822, | |
"step": 605 | |
}, | |
{ | |
"epoch": 0.7407407407407407, | |
"grad_norm": 0.34824224824135186, | |
"learning_rate": 1.5636534574455686e-05, | |
"loss": 0.9515, | |
"step": 610 | |
}, | |
{ | |
"epoch": 0.7468123861566485, | |
"grad_norm": 0.35545971051734354, | |
"learning_rate": 1.4949621219025194e-05, | |
"loss": 0.979, | |
"step": 615 | |
}, | |
{ | |
"epoch": 0.7528840315725561, | |
"grad_norm": 0.36939000283625245, | |
"learning_rate": 1.4275475767792845e-05, | |
"loss": 0.9447, | |
"step": 620 | |
}, | |
{ | |
"epoch": 0.7589556769884639, | |
"grad_norm": 0.37722833600145106, | |
"learning_rate": 1.361434379369783e-05, | |
"loss": 0.9825, | |
"step": 625 | |
}, | |
{ | |
"epoch": 0.7650273224043715, | |
"grad_norm": 0.3737568094142613, | |
"learning_rate": 1.2966466129221883e-05, | |
"loss": 0.9594, | |
"step": 630 | |
}, | |
{ | |
"epoch": 0.7710989678202793, | |
"grad_norm": 0.38063677046968875, | |
"learning_rate": 1.2332078778660517e-05, | |
"loss": 0.9536, | |
"step": 635 | |
}, | |
{ | |
"epoch": 0.777170613236187, | |
"grad_norm": 0.36470282230989215, | |
"learning_rate": 1.1711412832153101e-05, | |
"loss": 0.9604, | |
"step": 640 | |
}, | |
{ | |
"epoch": 0.7832422586520947, | |
"grad_norm": 0.36050493110455295, | |
"learning_rate": 1.11046943815029e-05, | |
"loss": 0.9661, | |
"step": 645 | |
}, | |
{ | |
"epoch": 0.7893139040680024, | |
"grad_norm": 0.36561374914578487, | |
"learning_rate": 1.0512144437817994e-05, | |
"loss": 0.981, | |
"step": 650 | |
}, | |
{ | |
"epoch": 0.7953855494839102, | |
"grad_norm": 0.378396003181358, | |
"learning_rate": 9.933978851002913e-06, | |
"loss": 0.9655, | |
"step": 655 | |
}, | |
{ | |
"epoch": 0.8014571948998178, | |
"grad_norm": 0.37891923107554043, | |
"learning_rate": 9.370408231130346e-06, | |
"loss": 0.9359, | |
"step": 660 | |
}, | |
{ | |
"epoch": 0.8075288403157256, | |
"grad_norm": 0.3668948751622055, | |
"learning_rate": 8.821637871721621e-06, | |
"loss": 0.9567, | |
"step": 665 | |
}, | |
{ | |
"epoch": 0.8136004857316332, | |
"grad_norm": 0.3908476341217553, | |
"learning_rate": 8.287867674963807e-06, | |
"loss": 0.9694, | |
"step": 670 | |
}, | |
{ | |
"epoch": 0.819672131147541, | |
"grad_norm": 0.35662541569917056, | |
"learning_rate": 7.769292078890745e-06, | |
"loss": 0.9424, | |
"step": 675 | |
}, | |
{ | |
"epoch": 0.8257437765634487, | |
"grad_norm": 0.3651570402984319, | |
"learning_rate": 7.266099986554576e-06, | |
"loss": 0.9955, | |
"step": 680 | |
}, | |
{ | |
"epoch": 0.8318154219793564, | |
"grad_norm": 0.3661244231382078, | |
"learning_rate": 6.778474697213427e-06, | |
"loss": 0.9677, | |
"step": 685 | |
}, | |
{ | |
"epoch": 0.8378870673952641, | |
"grad_norm": 0.36996265866155903, | |
"learning_rate": 6.306593839560521e-06, | |
"loss": 0.9487, | |
"step": 690 | |
}, | |
{ | |
"epoch": 0.8439587128111719, | |
"grad_norm": 0.3621026556302149, | |
"learning_rate": 5.850629307018768e-06, | |
"loss": 0.9315, | |
"step": 695 | |
}, | |
{ | |
"epoch": 0.8500303582270795, | |
"grad_norm": 0.362727406662448, | |
"learning_rate": 5.410747195124704e-06, | |
"loss": 0.9498, | |
"step": 700 | |
}, | |
{ | |
"epoch": 0.8561020036429873, | |
"grad_norm": 0.370792614501865, | |
"learning_rate": 4.98710774102435e-06, | |
"loss": 0.9726, | |
"step": 705 | |
}, | |
{ | |
"epoch": 0.8621736490588949, | |
"grad_norm": 0.3803856942579128, | |
"learning_rate": 4.5798652651031835e-06, | |
"loss": 1.007, | |
"step": 710 | |
}, | |
{ | |
"epoch": 0.8682452944748027, | |
"grad_norm": 0.35999841674151944, | |
"learning_rate": 4.189168114771391e-06, | |
"loss": 0.9331, | |
"step": 715 | |
}, | |
{ | |
"epoch": 0.8743169398907104, | |
"grad_norm": 0.3602664439468411, | |
"learning_rate": 3.815158610424896e-06, | |
"loss": 0.9597, | |
"step": 720 | |
}, | |
{ | |
"epoch": 0.8803885853066181, | |
"grad_norm": 0.35149568085349064, | |
"learning_rate": 3.4579729936019655e-06, | |
"loss": 0.97, | |
"step": 725 | |
}, | |
{ | |
"epoch": 0.8864602307225258, | |
"grad_norm": 0.3624842659189334, | |
"learning_rate": 3.1177413773539775e-06, | |
"loss": 0.9351, | |
"step": 730 | |
}, | |
{ | |
"epoch": 0.8925318761384335, | |
"grad_norm": 0.36379142989482044, | |
"learning_rate": 2.7945876988488883e-06, | |
"loss": 0.9501, | |
"step": 735 | |
}, | |
{ | |
"epoch": 0.8986035215543412, | |
"grad_norm": 0.37684310590658127, | |
"learning_rate": 2.488629674224213e-06, | |
"loss": 0.9701, | |
"step": 740 | |
}, | |
{ | |
"epoch": 0.904675166970249, | |
"grad_norm": 0.3587826562170923, | |
"learning_rate": 2.199978755706228e-06, | |
"loss": 0.9387, | |
"step": 745 | |
}, | |
{ | |
"epoch": 0.9107468123861566, | |
"grad_norm": 0.36521231530865467, | |
"learning_rate": 1.928740091010961e-06, | |
"loss": 0.9637, | |
"step": 750 | |
}, | |
{ | |
"epoch": 0.9168184578020644, | |
"grad_norm": 0.35686456437969416, | |
"learning_rate": 1.6750124850416826e-06, | |
"loss": 0.9706, | |
"step": 755 | |
}, | |
{ | |
"epoch": 0.922890103217972, | |
"grad_norm": 0.362462586295161, | |
"learning_rate": 1.4388883638970063e-06, | |
"loss": 0.9857, | |
"step": 760 | |
}, | |
{ | |
"epoch": 0.9289617486338798, | |
"grad_norm": 0.37168300236487517, | |
"learning_rate": 1.220453741202543e-06, | |
"loss": 0.9939, | |
"step": 765 | |
}, | |
{ | |
"epoch": 0.9350333940497875, | |
"grad_norm": 0.3892333517564668, | |
"learning_rate": 1.0197881867784365e-06, | |
"loss": 0.9534, | |
"step": 770 | |
}, | |
{ | |
"epoch": 0.9411050394656952, | |
"grad_norm": 0.3800794703218157, | |
"learning_rate": 8.369647976542883e-07, | |
"loss": 0.9638, | |
"step": 775 | |
}, | |
{ | |
"epoch": 0.9471766848816029, | |
"grad_norm": 0.37279762749246226, | |
"learning_rate": 6.720501714418237e-07, | |
"loss": 0.9872, | |
"step": 780 | |
}, | |
{ | |
"epoch": 0.9532483302975107, | |
"grad_norm": 0.3678126186430571, | |
"learning_rate": 5.251043820752532e-07, | |
"loss": 0.9706, | |
"step": 785 | |
}, | |
{ | |
"epoch": 0.9593199757134183, | |
"grad_norm": 0.36745295930354865, | |
"learning_rate": 3.9618095792790524e-07, | |
"loss": 0.9686, | |
"step": 790 | |
}, | |
{ | |
"epoch": 0.9653916211293261, | |
"grad_norm": 0.3545247585982039, | |
"learning_rate": 2.853268623133232e-07, | |
"loss": 0.8937, | |
"step": 795 | |
}, | |
{ | |
"epoch": 0.9714632665452337, | |
"grad_norm": 0.3562774345464157, | |
"learning_rate": 1.9258247637778392e-07, | |
"loss": 0.982, | |
"step": 800 | |
}, | |
{ | |
"epoch": 0.9775349119611415, | |
"grad_norm": 0.3912418038591769, | |
"learning_rate": 1.179815843905585e-07, | |
"loss": 0.9705, | |
"step": 805 | |
}, | |
{ | |
"epoch": 0.9836065573770492, | |
"grad_norm": 0.36095941098323664, | |
"learning_rate": 6.155136143718987e-08, | |
"loss": 0.9478, | |
"step": 810 | |
}, | |
{ | |
"epoch": 0.9896782027929569, | |
"grad_norm": 0.39365635306638663, | |
"learning_rate": 2.3312363520378023e-08, | |
"loss": 0.9949, | |
"step": 815 | |
}, | |
{ | |
"epoch": 0.9957498482088646, | |
"grad_norm": 0.36179997068618486, | |
"learning_rate": 3.2785200719476216e-09, | |
"loss": 0.9421, | |
"step": 820 | |
}, | |
{ | |
"epoch": 0.9993928354584092, | |
"eval_loss": 1.1136568784713745, | |
"eval_runtime": 17.0161, | |
"eval_samples_per_second": 8.991, | |
"eval_steps_per_second": 2.292, | |
"step": 823 | |
} | |
], | |
"logging_steps": 5, | |
"max_steps": 823, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 1, | |
"save_steps": 500, | |
"stateful_callbacks": { | |
"TrainerControl": { | |
"args": { | |
"should_epoch_stop": false, | |
"should_evaluate": false, | |
"should_log": false, | |
"should_save": true, | |
"should_training_stop": true | |
}, | |
"attributes": {} | |
} | |
}, | |
"total_flos": 517472256000000.0, | |
"train_batch_size": 2, | |
"trial_name": null, | |
"trial_params": null | |
} | |