|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9997038791827065, |
|
"eval_steps": 500, |
|
"global_step": 1688, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.005922416345869114, |
|
"grad_norm": 26.875, |
|
"learning_rate": 2.9585798816568044e-08, |
|
"log_odds_chosen": -0.4994420111179352, |
|
"log_odds_ratio": -1.0620524883270264, |
|
"logits/chosen": -2.227687358856201, |
|
"logits/rejected": -2.213762044906616, |
|
"logps/chosen": -0.7160366773605347, |
|
"logps/rejected": -0.47193747758865356, |
|
"loss": 1.3693, |
|
"nll_loss": 1.2856990098953247, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": -0.0716036707162857, |
|
"rewards/margins": -0.02440992370247841, |
|
"rewards/rejected": -0.047193750739097595, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.011844832691738229, |
|
"grad_norm": 26.5, |
|
"learning_rate": 5.917159763313609e-08, |
|
"log_odds_chosen": -0.6077697277069092, |
|
"log_odds_ratio": -1.154677152633667, |
|
"logits/chosen": -2.1866495609283447, |
|
"logits/rejected": -2.1631338596343994, |
|
"logps/chosen": -0.8245598077774048, |
|
"logps/rejected": -0.4715619683265686, |
|
"loss": 1.3378, |
|
"nll_loss": 1.228305459022522, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": -0.08245597779750824, |
|
"rewards/margins": -0.03529978543519974, |
|
"rewards/rejected": -0.0471561960875988, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.017767249037607343, |
|
"grad_norm": 29.125, |
|
"learning_rate": 8.875739644970414e-08, |
|
"log_odds_chosen": -0.5950562357902527, |
|
"log_odds_ratio": -1.171638011932373, |
|
"logits/chosen": -2.152902126312256, |
|
"logits/rejected": -2.1443581581115723, |
|
"logps/chosen": -0.854525089263916, |
|
"logps/rejected": -0.49298763275146484, |
|
"loss": 1.3488, |
|
"nll_loss": 1.3134263753890991, |
|
"rewards/accuracies": 0.3062500059604645, |
|
"rewards/chosen": -0.08545249700546265, |
|
"rewards/margins": -0.03615374490618706, |
|
"rewards/rejected": -0.049298763275146484, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.023689665383476458, |
|
"grad_norm": 31.25, |
|
"learning_rate": 1.1834319526627217e-07, |
|
"log_odds_chosen": -0.5344940423965454, |
|
"log_odds_ratio": -1.0923480987548828, |
|
"logits/chosen": -2.219038486480713, |
|
"logits/rejected": -2.2063724994659424, |
|
"logps/chosen": -0.7574710845947266, |
|
"logps/rejected": -0.4638025760650635, |
|
"loss": 1.3817, |
|
"nll_loss": 1.2359822988510132, |
|
"rewards/accuracies": 0.33125001192092896, |
|
"rewards/chosen": -0.07574710994958878, |
|
"rewards/margins": -0.02936685085296631, |
|
"rewards/rejected": -0.04638025909662247, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.029612081729345572, |
|
"grad_norm": 28.0, |
|
"learning_rate": 1.4792899408284022e-07, |
|
"log_odds_chosen": -0.4542032778263092, |
|
"log_odds_ratio": -1.0256363153457642, |
|
"logits/chosen": -2.1617987155914307, |
|
"logits/rejected": -2.146223545074463, |
|
"logps/chosen": -0.7006078958511353, |
|
"logps/rejected": -0.47175368666648865, |
|
"loss": 1.3127, |
|
"nll_loss": 1.2409818172454834, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": -0.07006079703569412, |
|
"rewards/margins": -0.022885426878929138, |
|
"rewards/rejected": -0.047175366431474686, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.035534498075214686, |
|
"grad_norm": 23.125, |
|
"learning_rate": 1.7751479289940827e-07, |
|
"log_odds_chosen": -0.6588231921195984, |
|
"log_odds_ratio": -1.230991005897522, |
|
"logits/chosen": -2.2183756828308105, |
|
"logits/rejected": -2.187129259109497, |
|
"logps/chosen": -0.8897625207901001, |
|
"logps/rejected": -0.4612082540988922, |
|
"loss": 1.3568, |
|
"nll_loss": 1.2310936450958252, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": -0.08897626399993896, |
|
"rewards/margins": -0.04285542666912079, |
|
"rewards/rejected": -0.04612082242965698, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.041456914421083804, |
|
"grad_norm": 30.75, |
|
"learning_rate": 2.0710059171597633e-07, |
|
"log_odds_chosen": -0.5367478132247925, |
|
"log_odds_ratio": -1.1100060939788818, |
|
"logits/chosen": -2.232348918914795, |
|
"logits/rejected": -2.1998302936553955, |
|
"logps/chosen": -0.7972711324691772, |
|
"logps/rejected": -0.4634431302547455, |
|
"loss": 1.3614, |
|
"nll_loss": 1.2567493915557861, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.07972709834575653, |
|
"rewards/margins": -0.03338279575109482, |
|
"rewards/rejected": -0.04634431377053261, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.047379330766952915, |
|
"grad_norm": 28.875, |
|
"learning_rate": 2.3668639053254435e-07, |
|
"log_odds_chosen": -0.5755403637886047, |
|
"log_odds_ratio": -1.156178593635559, |
|
"logits/chosen": -2.197105884552002, |
|
"logits/rejected": -2.186234474182129, |
|
"logps/chosen": -0.7956789135932922, |
|
"logps/rejected": -0.4599471688270569, |
|
"loss": 1.3327, |
|
"nll_loss": 1.219543695449829, |
|
"rewards/accuracies": 0.3687500059604645, |
|
"rewards/chosen": -0.07956788688898087, |
|
"rewards/margins": -0.033573172986507416, |
|
"rewards/rejected": -0.04599471390247345, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05330174711282203, |
|
"grad_norm": 25.25, |
|
"learning_rate": 2.662721893491124e-07, |
|
"log_odds_chosen": -0.5014861226081848, |
|
"log_odds_ratio": -1.0729024410247803, |
|
"logits/chosen": -2.1807546615600586, |
|
"logits/rejected": -2.1571853160858154, |
|
"logps/chosen": -0.7198914289474487, |
|
"logps/rejected": -0.46573418378829956, |
|
"loss": 1.3113, |
|
"nll_loss": 1.224487066268921, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.07198914140462875, |
|
"rewards/margins": -0.025415724143385887, |
|
"rewards/rejected": -0.046573419123888016, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.059224163458691144, |
|
"grad_norm": 25.125, |
|
"learning_rate": 2.9585798816568045e-07, |
|
"log_odds_chosen": -0.4174951910972595, |
|
"log_odds_ratio": -0.9966305494308472, |
|
"logits/chosen": -2.2450003623962402, |
|
"logits/rejected": -2.199430465698242, |
|
"logps/chosen": -0.6903594732284546, |
|
"logps/rejected": -0.4920008182525635, |
|
"loss": 1.2864, |
|
"nll_loss": 1.2207610607147217, |
|
"rewards/accuracies": 0.33125001192092896, |
|
"rewards/chosen": -0.06903595477342606, |
|
"rewards/margins": -0.01983586512506008, |
|
"rewards/rejected": -0.04920008033514023, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06514657980456026, |
|
"grad_norm": 19.375, |
|
"learning_rate": 3.254437869822485e-07, |
|
"log_odds_chosen": -0.4817837178707123, |
|
"log_odds_ratio": -1.0484408140182495, |
|
"logits/chosen": -2.195328950881958, |
|
"logits/rejected": -2.172029972076416, |
|
"logps/chosen": -0.7407166361808777, |
|
"logps/rejected": -0.4809334874153137, |
|
"loss": 1.2292, |
|
"nll_loss": 1.1185578107833862, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.07407166808843613, |
|
"rewards/margins": -0.025978317484259605, |
|
"rewards/rejected": -0.04809335619211197, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.07106899615042937, |
|
"grad_norm": 26.25, |
|
"learning_rate": 3.5502958579881655e-07, |
|
"log_odds_chosen": -0.5086492300033569, |
|
"log_odds_ratio": -1.073943018913269, |
|
"logits/chosen": -2.2213022708892822, |
|
"logits/rejected": -2.210648536682129, |
|
"logps/chosen": -0.7544690370559692, |
|
"logps/rejected": -0.47401171922683716, |
|
"loss": 1.2703, |
|
"nll_loss": 1.1549344062805176, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": -0.07544689625501633, |
|
"rewards/margins": -0.02804572507739067, |
|
"rewards/rejected": -0.04740116745233536, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.07699141249629848, |
|
"grad_norm": 121.5, |
|
"learning_rate": 3.8461538461538463e-07, |
|
"log_odds_chosen": -0.6346783638000488, |
|
"log_odds_ratio": -1.208389401435852, |
|
"logits/chosen": -2.205939292907715, |
|
"logits/rejected": -2.1982388496398926, |
|
"logps/chosen": -0.8659466505050659, |
|
"logps/rejected": -0.4501543939113617, |
|
"loss": 1.3049, |
|
"nll_loss": 1.1621254682540894, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": -0.08659467846155167, |
|
"rewards/margins": -0.0415792390704155, |
|
"rewards/rejected": -0.04501544311642647, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.08291382884216761, |
|
"grad_norm": 18.625, |
|
"learning_rate": 4.1420118343195265e-07, |
|
"log_odds_chosen": -0.4532869756221771, |
|
"log_odds_ratio": -1.0120022296905518, |
|
"logits/chosen": -2.2359938621520996, |
|
"logits/rejected": -2.2115871906280518, |
|
"logps/chosen": -0.6659095287322998, |
|
"logps/rejected": -0.4467584490776062, |
|
"loss": 1.2027, |
|
"nll_loss": 1.0797432661056519, |
|
"rewards/accuracies": 0.33125001192092896, |
|
"rewards/chosen": -0.06659095734357834, |
|
"rewards/margins": -0.02191510982811451, |
|
"rewards/rejected": -0.04467584565281868, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.08883624518803672, |
|
"grad_norm": 29.75, |
|
"learning_rate": 4.437869822485207e-07, |
|
"log_odds_chosen": -0.46737051010131836, |
|
"log_odds_ratio": -1.0146253108978271, |
|
"logits/chosen": -2.16318941116333, |
|
"logits/rejected": -2.1556496620178223, |
|
"logps/chosen": -0.7067540287971497, |
|
"logps/rejected": -0.47525158524513245, |
|
"loss": 1.217, |
|
"nll_loss": 1.1824976205825806, |
|
"rewards/accuracies": 0.29374998807907104, |
|
"rewards/chosen": -0.07067539542913437, |
|
"rewards/margins": -0.023150241002440453, |
|
"rewards/rejected": -0.047525160014629364, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.09475866153390583, |
|
"grad_norm": 20.75, |
|
"learning_rate": 4.733727810650887e-07, |
|
"log_odds_chosen": -0.31778836250305176, |
|
"log_odds_ratio": -0.9325827360153198, |
|
"logits/chosen": -2.2458879947662354, |
|
"logits/rejected": -2.2277491092681885, |
|
"logps/chosen": -0.6050869822502136, |
|
"logps/rejected": -0.4580734372138977, |
|
"loss": 1.2157, |
|
"nll_loss": 1.0979220867156982, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.06050870940089226, |
|
"rewards/margins": -0.014701364561915398, |
|
"rewards/rejected": -0.04580734297633171, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.10068107787977496, |
|
"grad_norm": 14.3125, |
|
"learning_rate": 4.999994653198566e-07, |
|
"log_odds_chosen": -0.44623684883117676, |
|
"log_odds_ratio": -1.0507714748382568, |
|
"logits/chosen": -2.273740530014038, |
|
"logits/rejected": -2.248004198074341, |
|
"logps/chosen": -0.744641900062561, |
|
"logps/rejected": -0.4939740300178528, |
|
"loss": 1.2442, |
|
"nll_loss": 1.0892422199249268, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": -0.07446418702602386, |
|
"rewards/margins": -0.0250667966902256, |
|
"rewards/rejected": -0.04939739406108856, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.10660349422564407, |
|
"grad_norm": 12.5625, |
|
"learning_rate": 4.999353064699471e-07, |
|
"log_odds_chosen": -0.5144436955451965, |
|
"log_odds_ratio": -1.1169707775115967, |
|
"logits/chosen": -2.2361178398132324, |
|
"logits/rejected": -2.2026758193969727, |
|
"logps/chosen": -0.8099610209465027, |
|
"logps/rejected": -0.49819788336753845, |
|
"loss": 1.1022, |
|
"nll_loss": 1.0261476039886475, |
|
"rewards/accuracies": 0.33125001192092896, |
|
"rewards/chosen": -0.08099609613418579, |
|
"rewards/margins": -0.031176313757896423, |
|
"rewards/rejected": -0.049819789826869965, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.11252591057151318, |
|
"grad_norm": 12.875, |
|
"learning_rate": 4.99764243036258e-07, |
|
"log_odds_chosen": -0.4125841557979584, |
|
"log_odds_ratio": -0.991108775138855, |
|
"logits/chosen": -2.268022298812866, |
|
"logits/rejected": -2.240299701690674, |
|
"logps/chosen": -0.6463659405708313, |
|
"logps/rejected": -0.4517286717891693, |
|
"loss": 1.1318, |
|
"nll_loss": 1.0371661186218262, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.06463660299777985, |
|
"rewards/margins": -0.01946372725069523, |
|
"rewards/rejected": -0.04517286270856857, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.11844832691738229, |
|
"grad_norm": 12.0, |
|
"learning_rate": 4.994863481875841e-07, |
|
"log_odds_chosen": -0.38528627157211304, |
|
"log_odds_ratio": -0.9595619440078735, |
|
"logits/chosen": -2.217349052429199, |
|
"logits/rejected": -2.1852166652679443, |
|
"logps/chosen": -0.6334083676338196, |
|
"logps/rejected": -0.4435149133205414, |
|
"loss": 1.1246, |
|
"nll_loss": 0.9835959672927856, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": -0.06334083527326584, |
|
"rewards/margins": -0.01898934319615364, |
|
"rewards/rejected": -0.044351495802402496, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.12437074326325141, |
|
"grad_norm": 11.5, |
|
"learning_rate": 4.991017407876165e-07, |
|
"log_odds_chosen": -0.429326593875885, |
|
"log_odds_ratio": -1.002436876296997, |
|
"logits/chosen": -2.224944591522217, |
|
"logits/rejected": -2.1807491779327393, |
|
"logps/chosen": -0.7087312936782837, |
|
"logps/rejected": -0.49742716550827026, |
|
"loss": 1.0953, |
|
"nll_loss": 1.0195242166519165, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": -0.07087312638759613, |
|
"rewards/margins": -0.021130409091711044, |
|
"rewards/rejected": -0.049742721021175385, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.13029315960912052, |
|
"grad_norm": 10.6875, |
|
"learning_rate": 4.98610585344102e-07, |
|
"log_odds_chosen": -0.2424849271774292, |
|
"log_odds_ratio": -0.9048135876655579, |
|
"logits/chosen": -2.2507550716400146, |
|
"logits/rejected": -2.217257499694824, |
|
"logps/chosen": -0.6068475246429443, |
|
"logps/rejected": -0.4904823899269104, |
|
"loss": 1.1278, |
|
"nll_loss": 1.0603684186935425, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": -0.06068475916981697, |
|
"rewards/margins": -0.011636516079306602, |
|
"rewards/rejected": -0.04904823377728462, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.13621557595498965, |
|
"grad_norm": 9.875, |
|
"learning_rate": 4.980130919384768e-07, |
|
"log_odds_chosen": -0.5562174916267395, |
|
"log_odds_ratio": -1.0973405838012695, |
|
"logits/chosen": -2.246185779571533, |
|
"logits/rejected": -2.2379026412963867, |
|
"logps/chosen": -0.7477759122848511, |
|
"logps/rejected": -0.4505345821380615, |
|
"loss": 1.1333, |
|
"nll_loss": 1.0181388854980469, |
|
"rewards/accuracies": 0.28125, |
|
"rewards/chosen": -0.07477758824825287, |
|
"rewards/margins": -0.029724130406975746, |
|
"rewards/rejected": -0.04505345970392227, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.14213799230085875, |
|
"grad_norm": 11.5625, |
|
"learning_rate": 4.973095161360105e-07, |
|
"log_odds_chosen": -0.425253689289093, |
|
"log_odds_ratio": -1.0029823780059814, |
|
"logits/chosen": -2.242088794708252, |
|
"logits/rejected": -2.2122817039489746, |
|
"logps/chosen": -0.68077552318573, |
|
"logps/rejected": -0.48119717836380005, |
|
"loss": 1.1443, |
|
"nll_loss": 1.063909649848938, |
|
"rewards/accuracies": 0.29374998807907104, |
|
"rewards/chosen": -0.06807754933834076, |
|
"rewards/margins": -0.01995784044265747, |
|
"rewards/rejected": -0.048119716346263885, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.14806040864672787, |
|
"grad_norm": 10.3125, |
|
"learning_rate": 4.965001588764913e-07, |
|
"log_odds_chosen": -0.4351120889186859, |
|
"log_odds_ratio": -1.013584852218628, |
|
"logits/chosen": -2.2702879905700684, |
|
"logits/rejected": -2.2400031089782715, |
|
"logps/chosen": -0.6880632638931274, |
|
"logps/rejected": -0.4528827667236328, |
|
"loss": 1.1299, |
|
"nll_loss": 1.0191699266433716, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": -0.06880633533000946, |
|
"rewards/margins": -0.023518051952123642, |
|
"rewards/rejected": -0.04528827592730522, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.15398282499259697, |
|
"grad_norm": 11.1875, |
|
"learning_rate": 4.955853663455072e-07, |
|
"log_odds_chosen": -0.30220693349838257, |
|
"log_odds_ratio": -0.9368545413017273, |
|
"logits/chosen": -2.257448673248291, |
|
"logits/rejected": -2.227647542953491, |
|
"logps/chosen": -0.6458665728569031, |
|
"logps/rejected": -0.4764745235443115, |
|
"loss": 1.0645, |
|
"nll_loss": 0.9644678235054016, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.06458665430545807, |
|
"rewards/margins": -0.016939211636781693, |
|
"rewards/rejected": -0.04764745384454727, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.1599052413384661, |
|
"grad_norm": 10.875, |
|
"learning_rate": 4.945655298263713e-07, |
|
"log_odds_chosen": -0.41390785574913025, |
|
"log_odds_ratio": -0.9837135076522827, |
|
"logits/chosen": -2.20629620552063, |
|
"logits/rejected": -2.1831986904144287, |
|
"logps/chosen": -0.6674059629440308, |
|
"logps/rejected": -0.46569353342056274, |
|
"loss": 1.1528, |
|
"nll_loss": 1.0888841152191162, |
|
"rewards/accuracies": 0.3062500059604645, |
|
"rewards/chosen": -0.06674060225486755, |
|
"rewards/margins": -0.02017124928534031, |
|
"rewards/rejected": -0.046569354832172394, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.16582765768433522, |
|
"grad_norm": 9.5625, |
|
"learning_rate": 4.934410855327585e-07, |
|
"log_odds_chosen": -0.3461267352104187, |
|
"log_odds_ratio": -0.9425566792488098, |
|
"logits/chosen": -2.2884914875030518, |
|
"logits/rejected": -2.27152943611145, |
|
"logps/chosen": -0.6492639780044556, |
|
"logps/rejected": -0.46900925040245056, |
|
"loss": 1.0682, |
|
"nll_loss": 1.0291364192962646, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": -0.0649264007806778, |
|
"rewards/margins": -0.018025478348135948, |
|
"rewards/rejected": -0.0469009205698967, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.1717500740302043, |
|
"grad_norm": 11.0, |
|
"learning_rate": 4.922125144221252e-07, |
|
"log_odds_chosen": -0.38331133127212524, |
|
"log_odds_ratio": -0.9734469652175903, |
|
"logits/chosen": -2.2513084411621094, |
|
"logits/rejected": -2.199239492416382, |
|
"logps/chosen": -0.6518736481666565, |
|
"logps/rejected": -0.4689255356788635, |
|
"loss": 1.1269, |
|
"nll_loss": 1.0506547689437866, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.06518735736608505, |
|
"rewards/margins": -0.018294811248779297, |
|
"rewards/rejected": -0.04689255356788635, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.17767249037607344, |
|
"grad_norm": 10.625, |
|
"learning_rate": 4.90880341989989e-07, |
|
"log_odds_chosen": -0.295235276222229, |
|
"log_odds_ratio": -0.9132793545722961, |
|
"logits/chosen": -2.255086660385132, |
|
"logits/rejected": -2.2318952083587646, |
|
"logps/chosen": -0.6402678489685059, |
|
"logps/rejected": -0.48136910796165466, |
|
"loss": 1.0909, |
|
"nll_loss": 1.0022283792495728, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": -0.06402678042650223, |
|
"rewards/margins": -0.015889868140220642, |
|
"rewards/rejected": -0.048136912286281586, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.18359490672194256, |
|
"grad_norm": 9.875, |
|
"learning_rate": 4.894451380451589e-07, |
|
"log_odds_chosen": -0.4930775761604309, |
|
"log_odds_ratio": -1.0459508895874023, |
|
"logits/chosen": -2.2340633869171143, |
|
"logits/rejected": -2.2229130268096924, |
|
"logps/chosen": -0.7189785242080688, |
|
"logps/rejected": -0.46092820167541504, |
|
"loss": 1.116, |
|
"nll_loss": 1.0077855587005615, |
|
"rewards/accuracies": 0.3187499940395355, |
|
"rewards/chosen": -0.07189784944057465, |
|
"rewards/margins": -0.02580503560602665, |
|
"rewards/rejected": -0.046092819422483444, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.18951732306781166, |
|
"grad_norm": 11.5625, |
|
"learning_rate": 4.879075164660124e-07, |
|
"log_odds_chosen": -0.29097312688827515, |
|
"log_odds_ratio": -0.9061079025268555, |
|
"logits/chosen": -2.238163471221924, |
|
"logits/rejected": -2.2072105407714844, |
|
"logps/chosen": -0.6175664067268372, |
|
"logps/rejected": -0.47239384055137634, |
|
"loss": 1.0495, |
|
"nll_loss": 0.9289931058883667, |
|
"rewards/accuracies": 0.33125001192092896, |
|
"rewards/chosen": -0.06175662949681282, |
|
"rewards/margins": -0.014517253264784813, |
|
"rewards/rejected": -0.047239381819963455, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.19543973941368079, |
|
"grad_norm": 12.25, |
|
"learning_rate": 4.862681349379212e-07, |
|
"log_odds_chosen": -0.33382827043533325, |
|
"log_odds_ratio": -0.939583420753479, |
|
"logits/chosen": -2.244995594024658, |
|
"logits/rejected": -2.1931443214416504, |
|
"logps/chosen": -0.6333972811698914, |
|
"logps/rejected": -0.4775928556919098, |
|
"loss": 1.1124, |
|
"nll_loss": 1.0409491062164307, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.0633397176861763, |
|
"rewards/margins": -0.01558043621480465, |
|
"rewards/rejected": -0.0477592833340168, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.2013621557595499, |
|
"grad_norm": 9.6875, |
|
"learning_rate": 4.8452769467194e-07, |
|
"log_odds_chosen": -0.3502793610095978, |
|
"log_odds_ratio": -0.9458521604537964, |
|
"logits/chosen": -2.2533793449401855, |
|
"logits/rejected": -2.231985092163086, |
|
"logps/chosen": -0.6348416209220886, |
|
"logps/rejected": -0.46244215965270996, |
|
"loss": 1.0872, |
|
"nll_loss": 0.965823769569397, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.06348416954278946, |
|
"rewards/margins": -0.017239956185221672, |
|
"rewards/rejected": -0.04624421149492264, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.207284572105419, |
|
"grad_norm": 9.625, |
|
"learning_rate": 4.82686940104879e-07, |
|
"log_odds_chosen": -0.37014713883399963, |
|
"log_odds_ratio": -0.9843534231185913, |
|
"logits/chosen": -2.296128511428833, |
|
"logits/rejected": -2.267141103744507, |
|
"logps/chosen": -0.6616524457931519, |
|
"logps/rejected": -0.4461567997932434, |
|
"loss": 1.0383, |
|
"nll_loss": 0.9294153451919556, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": -0.0661652460694313, |
|
"rewards/margins": -0.02154957316815853, |
|
"rewards/rejected": -0.04461567848920822, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.21320698845128813, |
|
"grad_norm": 9.6875, |
|
"learning_rate": 4.807466585808856e-07, |
|
"log_odds_chosen": -0.2995724380016327, |
|
"log_odds_ratio": -0.9168221354484558, |
|
"logits/chosen": -2.274096727371216, |
|
"logits/rejected": -2.2658305168151855, |
|
"logps/chosen": -0.5940972566604614, |
|
"logps/rejected": -0.46015462279319763, |
|
"loss": 1.0942, |
|
"nll_loss": 0.9911165237426758, |
|
"rewards/accuracies": 0.3687500059604645, |
|
"rewards/chosen": -0.0594097301363945, |
|
"rewards/margins": -0.013394266366958618, |
|
"rewards/rejected": -0.04601546376943588, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.21912940479715723, |
|
"grad_norm": 13.5625, |
|
"learning_rate": 4.787076800146752e-07, |
|
"log_odds_chosen": -0.27963608503341675, |
|
"log_odds_ratio": -0.9352908134460449, |
|
"logits/chosen": -2.2542636394500732, |
|
"logits/rejected": -2.2058660984039307, |
|
"logps/chosen": -0.6458699107170105, |
|
"logps/rejected": -0.468344509601593, |
|
"loss": 1.0125, |
|
"nll_loss": 0.9038776159286499, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.06458699703216553, |
|
"rewards/margins": -0.017752548679709435, |
|
"rewards/rejected": -0.046834446489810944, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.22505182114302635, |
|
"grad_norm": 10.0, |
|
"learning_rate": 4.765708765365526e-07, |
|
"log_odds_chosen": -0.2566812038421631, |
|
"log_odds_ratio": -0.9025079011917114, |
|
"logits/chosen": -2.2573628425598145, |
|
"logits/rejected": -2.2479588985443115, |
|
"logps/chosen": -0.5893818140029907, |
|
"logps/rejected": -0.4597233235836029, |
|
"loss": 1.1093, |
|
"nll_loss": 0.9725319147109985, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.05893818661570549, |
|
"rewards/margins": -0.012965850532054901, |
|
"rewards/rejected": -0.04597233235836029, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.23097423748889548, |
|
"grad_norm": 12.25, |
|
"learning_rate": 4.7433716211937587e-07, |
|
"log_odds_chosen": -0.4499928057193756, |
|
"log_odds_ratio": -1.0134861469268799, |
|
"logits/chosen": -2.3190252780914307, |
|
"logits/rejected": -2.297466516494751, |
|
"logps/chosen": -0.655422568321228, |
|
"logps/rejected": -0.43357038497924805, |
|
"loss": 1.0471, |
|
"nll_loss": 1.008756399154663, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": -0.06554224342107773, |
|
"rewards/margins": -0.02218521013855934, |
|
"rewards/rejected": -0.043357037007808685, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.23689665383476458, |
|
"grad_norm": 9.3125, |
|
"learning_rate": 4.720074921876245e-07, |
|
"log_odds_chosen": -0.3851686120033264, |
|
"log_odds_ratio": -0.9778718948364258, |
|
"logits/chosen": -2.325918674468994, |
|
"logits/rejected": -2.2813212871551514, |
|
"logps/chosen": -0.6249781847000122, |
|
"logps/rejected": -0.45036381483078003, |
|
"loss": 1.0507, |
|
"nll_loss": 0.9533747434616089, |
|
"rewards/accuracies": 0.3812499940395355, |
|
"rewards/chosen": -0.0624978169798851, |
|
"rewards/margins": -0.01746143028140068, |
|
"rewards/rejected": -0.04503639414906502, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.2428190701806337, |
|
"grad_norm": 10.5625, |
|
"learning_rate": 4.6958286320873593e-07, |
|
"log_odds_chosen": -0.38822251558303833, |
|
"log_odds_ratio": -0.9542675018310547, |
|
"logits/chosen": -2.2724270820617676, |
|
"logits/rejected": -2.27009916305542, |
|
"logps/chosen": -0.6122742891311646, |
|
"logps/rejected": -0.4287818372249603, |
|
"loss": 1.0679, |
|
"nll_loss": 1.0051120519638062, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.06122744083404541, |
|
"rewards/margins": -0.018349256366491318, |
|
"rewards/rejected": -0.04287818819284439, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.24874148652650283, |
|
"grad_norm": 10.25, |
|
"learning_rate": 4.6706431226688804e-07, |
|
"log_odds_chosen": -0.30081695318222046, |
|
"log_odds_ratio": -0.921572208404541, |
|
"logits/chosen": -2.2560360431671143, |
|
"logits/rejected": -2.2262086868286133, |
|
"logps/chosen": -0.6127408742904663, |
|
"logps/rejected": -0.4595797061920166, |
|
"loss": 1.0784, |
|
"nll_loss": 0.9788911938667297, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.06127409264445305, |
|
"rewards/margins": -0.015316121280193329, |
|
"rewards/rejected": -0.04595796763896942, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.25466390287237195, |
|
"grad_norm": 9.375, |
|
"learning_rate": 4.6445291661940777e-07, |
|
"log_odds_chosen": -0.2526037096977234, |
|
"log_odds_ratio": -0.8853398561477661, |
|
"logits/chosen": -2.274932861328125, |
|
"logits/rejected": -2.2737860679626465, |
|
"logps/chosen": -0.5831697583198547, |
|
"logps/rejected": -0.4629867672920227, |
|
"loss": 1.0351, |
|
"nll_loss": 0.9002013206481934, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.05831696838140488, |
|
"rewards/margins": -0.012018295004963875, |
|
"rewards/rejected": -0.04629867523908615, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.26058631921824105, |
|
"grad_norm": 13.0, |
|
"learning_rate": 4.6174979323599715e-07, |
|
"log_odds_chosen": -0.4437042772769928, |
|
"log_odds_ratio": -1.0250940322875977, |
|
"logits/chosen": -2.2592310905456543, |
|
"logits/rejected": -2.2114596366882324, |
|
"logps/chosen": -0.7022743821144104, |
|
"logps/rejected": -0.4603559374809265, |
|
"loss": 1.0967, |
|
"nll_loss": 1.0961658954620361, |
|
"rewards/accuracies": 0.3187499940395355, |
|
"rewards/chosen": -0.07022743672132492, |
|
"rewards/margins": -0.02419184148311615, |
|
"rewards/rejected": -0.04603559896349907, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.26650873556411014, |
|
"grad_norm": 9.5, |
|
"learning_rate": 4.5895609832097277e-07, |
|
"log_odds_chosen": -0.3050179183483124, |
|
"log_odds_ratio": -0.9421980977058411, |
|
"logits/chosen": -2.2684884071350098, |
|
"logits/rejected": -2.2559661865234375, |
|
"logps/chosen": -0.6401418447494507, |
|
"logps/rejected": -0.46939319372177124, |
|
"loss": 1.0745, |
|
"nll_loss": 0.96502685546875, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": -0.06401418894529343, |
|
"rewards/margins": -0.01707487180829048, |
|
"rewards/rejected": -0.04693932086229324, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.2724311519099793, |
|
"grad_norm": 10.1875, |
|
"learning_rate": 4.560730268187236e-07, |
|
"log_odds_chosen": -0.26763516664505005, |
|
"log_odds_ratio": -0.8960734605789185, |
|
"logits/chosen": -2.266759157180786, |
|
"logits/rejected": -2.230344533920288, |
|
"logps/chosen": -0.57380610704422, |
|
"logps/rejected": -0.45090922713279724, |
|
"loss": 1.0544, |
|
"nll_loss": 0.9469722509384155, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -0.05738060921430588, |
|
"rewards/margins": -0.012289688922464848, |
|
"rewards/rejected": -0.0450909249484539, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.2783535682558484, |
|
"grad_norm": 11.1875, |
|
"learning_rate": 4.531018119025989e-07, |
|
"log_odds_chosen": -0.19471798837184906, |
|
"log_odds_ratio": -0.8877772092819214, |
|
"logits/chosen": -2.325700283050537, |
|
"logits/rejected": -2.3014023303985596, |
|
"logps/chosen": -0.5948117971420288, |
|
"logps/rejected": -0.5260331630706787, |
|
"loss": 1.0872, |
|
"nll_loss": 1.042905569076538, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.059481192380189896, |
|
"rewards/margins": -0.006877871695905924, |
|
"rewards/rejected": -0.05260331556200981, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.2842759846017175, |
|
"grad_norm": 10.1875, |
|
"learning_rate": 4.5004372444744376e-07, |
|
"log_odds_chosen": -0.20854365825653076, |
|
"log_odds_ratio": -0.8700854182243347, |
|
"logits/chosen": -2.267329454421997, |
|
"logits/rejected": -2.2475056648254395, |
|
"logps/chosen": -0.610100269317627, |
|
"logps/rejected": -0.49854737520217896, |
|
"loss": 1.0582, |
|
"nll_loss": 0.982585608959198, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.061010025441646576, |
|
"rewards/margins": -0.011155293323099613, |
|
"rewards/rejected": -0.04985473304986954, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.2901984009475866, |
|
"grad_norm": 11.125, |
|
"learning_rate": 4.4690007248600967e-07, |
|
"log_odds_chosen": -0.30316418409347534, |
|
"log_odds_ratio": -0.9258543848991394, |
|
"logits/chosen": -2.260499954223633, |
|
"logits/rejected": -2.2460737228393555, |
|
"logps/chosen": -0.6183134913444519, |
|
"logps/rejected": -0.4603392481803894, |
|
"loss": 1.0569, |
|
"nll_loss": 0.9751143455505371, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": -0.06183135509490967, |
|
"rewards/margins": -0.01579742692410946, |
|
"rewards/rejected": -0.04603392630815506, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.29612081729345574, |
|
"grad_norm": 9.25, |
|
"learning_rate": 4.436722006494701e-07, |
|
"log_odds_chosen": -0.4622948169708252, |
|
"log_odds_ratio": -1.0724523067474365, |
|
"logits/chosen": -2.2528557777404785, |
|
"logits/rejected": -2.2317535877227783, |
|
"logps/chosen": -0.7585560083389282, |
|
"logps/rejected": -0.4601530134677887, |
|
"loss": 1.0779, |
|
"nll_loss": 1.0056917667388916, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": -0.07585560530424118, |
|
"rewards/margins": -0.02984030917286873, |
|
"rewards/rejected": -0.04601530730724335, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.30204323363932484, |
|
"grad_norm": 10.125, |
|
"learning_rate": 4.4036148959228356e-07, |
|
"log_odds_chosen": -0.37729692459106445, |
|
"log_odds_ratio": -0.9907791018486023, |
|
"logits/chosen": -2.285222291946411, |
|
"logits/rejected": -2.2465076446533203, |
|
"logps/chosen": -0.6608995199203491, |
|
"logps/rejected": -0.44408687949180603, |
|
"loss": 1.0854, |
|
"nll_loss": 0.9470478892326355, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.0660899430513382, |
|
"rewards/margins": -0.02168126031756401, |
|
"rewards/rejected": -0.04440869390964508, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.30796564998519393, |
|
"grad_norm": 12.9375, |
|
"learning_rate": 4.3696935540164705e-07, |
|
"log_odds_chosen": -0.3114868998527527, |
|
"log_odds_ratio": -0.9284585118293762, |
|
"logits/chosen": -2.2520318031311035, |
|
"logits/rejected": -2.2336666584014893, |
|
"logps/chosen": -0.6092923879623413, |
|
"logps/rejected": -0.4560086727142334, |
|
"loss": 1.0234, |
|
"nll_loss": 0.954501748085022, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -0.06092924624681473, |
|
"rewards/margins": -0.01532837562263012, |
|
"rewards/rejected": -0.04560086503624916, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.3138880663310631, |
|
"grad_norm": 9.1875, |
|
"learning_rate": 4.334972489917947e-07, |
|
"log_odds_chosen": -0.22460684180259705, |
|
"log_odds_ratio": -0.88166743516922, |
|
"logits/chosen": -2.313957691192627, |
|
"logits/rejected": -2.2588186264038086, |
|
"logps/chosen": -0.6013073325157166, |
|
"logps/rejected": -0.47843700647354126, |
|
"loss": 1.0456, |
|
"nll_loss": 0.9358353614807129, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.060130733996629715, |
|
"rewards/margins": -0.012287032790482044, |
|
"rewards/rejected": -0.047843702137470245, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.3198104826769322, |
|
"grad_norm": 11.25, |
|
"learning_rate": 4.299466554833997e-07, |
|
"log_odds_chosen": -0.33192509412765503, |
|
"log_odds_ratio": -0.94036465883255, |
|
"logits/chosen": -2.2912707328796387, |
|
"logits/rejected": -2.2435359954833984, |
|
"logps/chosen": -0.5902704000473022, |
|
"logps/rejected": -0.44104498624801636, |
|
"loss": 1.0515, |
|
"nll_loss": 0.9315252304077148, |
|
"rewards/accuracies": 0.3812499940395355, |
|
"rewards/chosen": -0.059027038514614105, |
|
"rewards/margins": -0.014922534115612507, |
|
"rewards/rejected": -0.044104501605033875, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.3257328990228013, |
|
"grad_norm": 8.9375, |
|
"learning_rate": 4.263190935683449e-07, |
|
"log_odds_chosen": -0.25842440128326416, |
|
"log_odds_ratio": -0.893360435962677, |
|
"logits/chosen": -2.2691588401794434, |
|
"logits/rejected": -2.2356011867523193, |
|
"logps/chosen": -0.5605894327163696, |
|
"logps/rejected": -0.43656760454177856, |
|
"loss": 0.9862, |
|
"nll_loss": 0.8704695701599121, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -0.05605894327163696, |
|
"rewards/margins": -0.012402191758155823, |
|
"rewards/rejected": -0.04365675523877144, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.33165531536867043, |
|
"grad_norm": 10.875, |
|
"learning_rate": 4.2261611486013437e-07, |
|
"log_odds_chosen": -0.3279554545879364, |
|
"log_odds_ratio": -0.9397815465927124, |
|
"logits/chosen": -2.3104796409606934, |
|
"logits/rejected": -2.275190830230713, |
|
"logps/chosen": -0.6270398497581482, |
|
"logps/rejected": -0.4670359194278717, |
|
"loss": 1.0697, |
|
"nll_loss": 0.977874755859375, |
|
"rewards/accuracies": 0.3812499940395355, |
|
"rewards/chosen": -0.06270398944616318, |
|
"rewards/margins": -0.01600039377808571, |
|
"rewards/rejected": -0.04670359194278717, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.33757773171453953, |
|
"grad_norm": 11.125, |
|
"learning_rate": 4.188393032302233e-07, |
|
"log_odds_chosen": -0.14010918140411377, |
|
"log_odds_ratio": -0.8429776430130005, |
|
"logits/chosen": -2.2512803077697754, |
|
"logits/rejected": -2.1937472820281982, |
|
"logps/chosen": -0.5634902715682983, |
|
"logps/rejected": -0.5150736570358276, |
|
"loss": 1.0249, |
|
"nll_loss": 0.931064248085022, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -0.056349027901887894, |
|
"rewards/margins": -0.0048416657373309135, |
|
"rewards/rejected": -0.051507361233234406, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.3435001480604086, |
|
"grad_norm": 12.75, |
|
"learning_rate": 4.1499027413055e-07, |
|
"log_odds_chosen": -0.33234935998916626, |
|
"log_odds_ratio": -0.9407118558883667, |
|
"logits/chosen": -2.258405923843384, |
|
"logits/rejected": -2.232956647872925, |
|
"logps/chosen": -0.6220130920410156, |
|
"logps/rejected": -0.4592718482017517, |
|
"loss": 1.0413, |
|
"nll_loss": 0.9290376901626587, |
|
"rewards/accuracies": 0.3687500059604645, |
|
"rewards/chosen": -0.06220130994915962, |
|
"rewards/margins": -0.016274118795990944, |
|
"rewards/rejected": -0.04592718556523323, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.3494225644062778, |
|
"grad_norm": 13.875, |
|
"learning_rate": 4.1107067390256056e-07, |
|
"log_odds_chosen": -0.35427385568618774, |
|
"log_odds_ratio": -0.9841470718383789, |
|
"logits/chosen": -2.305126428604126, |
|
"logits/rejected": -2.280172824859619, |
|
"logps/chosen": -0.696389377117157, |
|
"logps/rejected": -0.4881146550178528, |
|
"loss": 1.0718, |
|
"nll_loss": 1.0334848165512085, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.0696389377117157, |
|
"rewards/margins": -0.02082747593522072, |
|
"rewards/rejected": -0.04881146177649498, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.3553449807521469, |
|
"grad_norm": 11.6875, |
|
"learning_rate": 4.0708217907302047e-07, |
|
"log_odds_chosen": -0.3386622369289398, |
|
"log_odds_ratio": -0.9444282650947571, |
|
"logits/chosen": -2.2589573860168457, |
|
"logits/rejected": -2.2278530597686768, |
|
"logps/chosen": -0.6211683750152588, |
|
"logps/rejected": -0.46438631415367126, |
|
"loss": 1.0621, |
|
"nll_loss": 0.9823211431503296, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.062116838991642, |
|
"rewards/margins": -0.01567821204662323, |
|
"rewards/rejected": -0.04643862694501877, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.361267397098016, |
|
"grad_norm": 14.4375, |
|
"learning_rate": 4.030264956369157e-07, |
|
"log_odds_chosen": -0.32127273082733154, |
|
"log_odds_ratio": -0.929902195930481, |
|
"logits/chosen": -2.297096014022827, |
|
"logits/rejected": -2.259603977203369, |
|
"logps/chosen": -0.591595470905304, |
|
"logps/rejected": -0.4399223327636719, |
|
"loss": 1.0497, |
|
"nll_loss": 0.9886807203292847, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": -0.0591595396399498, |
|
"rewards/margins": -0.01516731083393097, |
|
"rewards/rejected": -0.04399223253130913, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.3671898134438851, |
|
"grad_norm": 11.1875, |
|
"learning_rate": 3.989053583277492e-07, |
|
"log_odds_chosen": -0.42405062913894653, |
|
"log_odds_ratio": -1.0016412734985352, |
|
"logits/chosen": -2.3095479011535645, |
|
"logits/rejected": -2.2935452461242676, |
|
"logps/chosen": -0.6750982403755188, |
|
"logps/rejected": -0.45489102602005005, |
|
"loss": 1.0537, |
|
"nll_loss": 0.9710051417350769, |
|
"rewards/accuracies": 0.3187499940395355, |
|
"rewards/chosen": -0.06750981509685516, |
|
"rewards/margins": -0.022020723670721054, |
|
"rewards/rejected": -0.04548909515142441, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.3731122297897542, |
|
"grad_norm": 13.4375, |
|
"learning_rate": 3.947205298755447e-07, |
|
"log_odds_chosen": -0.25669050216674805, |
|
"log_odds_ratio": -0.9015368223190308, |
|
"logits/chosen": -2.2679405212402344, |
|
"logits/rejected": -2.2386162281036377, |
|
"logps/chosen": -0.6160240173339844, |
|
"logps/rejected": -0.48336100578308105, |
|
"loss": 1.0648, |
|
"nll_loss": 0.9532335996627808, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -0.061602406203746796, |
|
"rewards/margins": -0.013266305439174175, |
|
"rewards/rejected": -0.04833609610795975, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.3790346461356233, |
|
"grad_norm": 10.4375, |
|
"learning_rate": 3.9047380025287634e-07, |
|
"log_odds_chosen": -0.24768850207328796, |
|
"log_odds_ratio": -0.891069769859314, |
|
"logits/chosen": -2.275651216506958, |
|
"logits/rejected": -2.247177839279175, |
|
"logps/chosen": -0.5877569913864136, |
|
"logps/rejected": -0.4681660532951355, |
|
"loss": 1.0549, |
|
"nll_loss": 0.9463118314743042, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.058775704354047775, |
|
"rewards/margins": -0.011959095485508442, |
|
"rewards/rejected": -0.04681660607457161, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.3849570624814925, |
|
"grad_norm": 12.1875, |
|
"learning_rate": 3.8616698590924523e-07, |
|
"log_odds_chosen": -0.2891980707645416, |
|
"log_odds_ratio": -0.9127435684204102, |
|
"logits/chosen": -2.296032428741455, |
|
"logits/rejected": -2.2514827251434326, |
|
"logps/chosen": -0.6284441351890564, |
|
"logps/rejected": -0.4775362014770508, |
|
"loss": 1.0297, |
|
"nll_loss": 0.9506929516792297, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -0.062844417989254, |
|
"rewards/margins": -0.01509079895913601, |
|
"rewards/rejected": -0.04775362089276314, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.39087947882736157, |
|
"grad_norm": 9.9375, |
|
"learning_rate": 3.8180192899413123e-07, |
|
"log_odds_chosen": -0.3009001314640045, |
|
"log_odds_ratio": -0.9173041582107544, |
|
"logits/chosen": -2.292931079864502, |
|
"logits/rejected": -2.2850821018218994, |
|
"logps/chosen": -0.5977297425270081, |
|
"logps/rejected": -0.4498085081577301, |
|
"loss": 1.066, |
|
"nll_loss": 0.9441615343093872, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.059772975742816925, |
|
"rewards/margins": -0.014792119152843952, |
|
"rewards/rejected": -0.04498085752129555, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.39680189517323067, |
|
"grad_norm": 9.6875, |
|
"learning_rate": 3.7738049656905225e-07, |
|
"log_odds_chosen": -0.2274588793516159, |
|
"log_odds_ratio": -0.871192455291748, |
|
"logits/chosen": -2.2281768321990967, |
|
"logits/rejected": -2.1852290630340576, |
|
"logps/chosen": -0.5783167481422424, |
|
"logps/rejected": -0.472917377948761, |
|
"loss": 1.0607, |
|
"nll_loss": 0.9557689428329468, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.05783168226480484, |
|
"rewards/margins": -0.010539938695728779, |
|
"rewards/rejected": -0.04729173332452774, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.4027243115190998, |
|
"grad_norm": 10.375, |
|
"learning_rate": 3.7290457980896787e-07, |
|
"log_odds_chosen": -0.1645122915506363, |
|
"log_odds_ratio": -0.8458727598190308, |
|
"logits/chosen": -2.2992634773254395, |
|
"logits/rejected": -2.270430564880371, |
|
"logps/chosen": -0.5671563148498535, |
|
"logps/rejected": -0.4864569306373596, |
|
"loss": 1.0284, |
|
"nll_loss": 0.9164050817489624, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -0.05671562999486923, |
|
"rewards/margins": -0.00806993618607521, |
|
"rewards/rejected": -0.04864569753408432, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.4086467278649689, |
|
"grad_norm": 10.125, |
|
"learning_rate": 3.68376093193369e-07, |
|
"log_odds_chosen": -0.2814542353153229, |
|
"log_odds_ratio": -0.9015814661979675, |
|
"logits/chosen": -2.3065972328186035, |
|
"logits/rejected": -2.2681093215942383, |
|
"logps/chosen": -0.5637949109077454, |
|
"logps/rejected": -0.4352457523345947, |
|
"loss": 1.0214, |
|
"nll_loss": 0.91374272108078, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": -0.05637948960065842, |
|
"rewards/margins": -0.012854918837547302, |
|
"rewards/rejected": -0.043524570763111115, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.414569144210838, |
|
"grad_norm": 58.5, |
|
"learning_rate": 3.637969736873992e-07, |
|
"log_odds_chosen": -0.21553269028663635, |
|
"log_odds_ratio": -0.8870409727096558, |
|
"logits/chosen": -2.2836763858795166, |
|
"logits/rejected": -2.252403736114502, |
|
"logps/chosen": -0.5681829452514648, |
|
"logps/rejected": -0.465969979763031, |
|
"loss": 1.0701, |
|
"nll_loss": 0.9871380925178528, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.056818295270204544, |
|
"rewards/margins": -0.010221302509307861, |
|
"rewards/rejected": -0.04659699648618698, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.4204915605567071, |
|
"grad_norm": 10.5, |
|
"learning_rate": 3.591691799133587e-07, |
|
"log_odds_chosen": -0.19581297039985657, |
|
"log_odds_ratio": -0.8488709330558777, |
|
"logits/chosen": -2.3274245262145996, |
|
"logits/rejected": -2.2992606163024902, |
|
"logps/chosen": -0.5645796060562134, |
|
"logps/rejected": -0.4613499641418457, |
|
"loss": 1.0495, |
|
"nll_loss": 0.9565572738647461, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -0.05645795539021492, |
|
"rewards/margins": -0.010322963818907738, |
|
"rewards/rejected": -0.04613499343395233, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.42641397690257626, |
|
"grad_norm": 9.75, |
|
"learning_rate": 3.5449469131294476e-07, |
|
"log_odds_chosen": -0.22600612044334412, |
|
"log_odds_ratio": -0.8781830668449402, |
|
"logits/chosen": -2.2927708625793457, |
|
"logits/rejected": -2.2485132217407227, |
|
"logps/chosen": -0.5577629804611206, |
|
"logps/rejected": -0.44653376936912537, |
|
"loss": 1.0248, |
|
"nll_loss": 0.9297264814376831, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.055776309221982956, |
|
"rewards/margins": -0.0111229307949543, |
|
"rewards/rejected": -0.044653378427028656, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.43233639324844536, |
|
"grad_norm": 9.3125, |
|
"learning_rate": 3.497755073005868e-07, |
|
"log_odds_chosen": -0.09444288164377213, |
|
"log_odds_ratio": -0.8072474598884583, |
|
"logits/chosen": -2.290067672729492, |
|
"logits/rejected": -2.257514238357544, |
|
"logps/chosen": -0.5471974611282349, |
|
"logps/rejected": -0.47366800904273987, |
|
"loss": 1.0112, |
|
"nll_loss": 0.8891817927360535, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.054719746112823486, |
|
"rewards/margins": -0.007352945860475302, |
|
"rewards/rejected": -0.04736679792404175, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.43825880959431446, |
|
"grad_norm": 16.75, |
|
"learning_rate": 3.4501364640823926e-07, |
|
"log_odds_chosen": -0.3251793384552002, |
|
"log_odds_ratio": -0.9317482709884644, |
|
"logits/chosen": -2.2995355129241943, |
|
"logits/rejected": -2.2732508182525635, |
|
"logps/chosen": -0.6547442674636841, |
|
"logps/rejected": -0.4866989254951477, |
|
"loss": 1.0482, |
|
"nll_loss": 0.9714682698249817, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": -0.06547442078590393, |
|
"rewards/margins": -0.016804538667201996, |
|
"rewards/rejected": -0.04866989329457283, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.4441812259401836, |
|
"grad_norm": 10.4375, |
|
"learning_rate": 3.402111454219966e-07, |
|
"log_odds_chosen": -0.17538635432720184, |
|
"log_odds_ratio": -0.8506783246994019, |
|
"logits/chosen": -2.3090875148773193, |
|
"logits/rejected": -2.26053786277771, |
|
"logps/chosen": -0.5713698863983154, |
|
"logps/rejected": -0.47184181213378906, |
|
"loss": 1.0275, |
|
"nll_loss": 0.954795241355896, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.05713699012994766, |
|
"rewards/margins": -0.009952803142368793, |
|
"rewards/rejected": -0.047184187918901443, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.4501036422860527, |
|
"grad_norm": 9.3125, |
|
"learning_rate": 3.353700585109005e-07, |
|
"log_odds_chosen": -0.19826039671897888, |
|
"log_odds_ratio": -0.8637887835502625, |
|
"logits/chosen": -2.302405834197998, |
|
"logits/rejected": -2.27463698387146, |
|
"logps/chosen": -0.5740953683853149, |
|
"logps/rejected": -0.4722967743873596, |
|
"loss": 1.0239, |
|
"nll_loss": 0.963403582572937, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.057409536093473434, |
|
"rewards/margins": -0.010179854929447174, |
|
"rewards/rejected": -0.04722967743873596, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.4560260586319218, |
|
"grad_norm": 9.875, |
|
"learning_rate": 3.304924563483129e-07, |
|
"log_odds_chosen": -0.22836697101593018, |
|
"log_odds_ratio": -0.895135760307312, |
|
"logits/chosen": -2.315516948699951, |
|
"logits/rejected": -2.3024649620056152, |
|
"logps/chosen": -0.6285193562507629, |
|
"logps/rejected": -0.48862919211387634, |
|
"loss": 1.073, |
|
"nll_loss": 1.0095432996749878, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.0628519356250763, |
|
"rewards/margins": -0.013989018276333809, |
|
"rewards/rejected": -0.04886292293667793, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.46194847497779096, |
|
"grad_norm": 11.125, |
|
"learning_rate": 3.255804252262283e-07, |
|
"log_odds_chosen": -0.19756431877613068, |
|
"log_odds_ratio": -0.856968104839325, |
|
"logits/chosen": -2.255115032196045, |
|
"logits/rejected": -2.226313352584839, |
|
"logps/chosen": -0.551701545715332, |
|
"logps/rejected": -0.45012766122817993, |
|
"loss": 1.0499, |
|
"nll_loss": 0.9961403608322144, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -0.05517015606164932, |
|
"rewards/margins": -0.010157393291592598, |
|
"rewards/rejected": -0.045012760907411575, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.46787089132366005, |
|
"grad_norm": 8.375, |
|
"learning_rate": 3.2063606616290626e-07, |
|
"log_odds_chosen": -0.3132410943508148, |
|
"log_odds_ratio": -0.9298326373100281, |
|
"logits/chosen": -2.2360429763793945, |
|
"logits/rejected": -2.1973369121551514, |
|
"logps/chosen": -0.5941890478134155, |
|
"logps/rejected": -0.44506731629371643, |
|
"loss": 0.9654, |
|
"nll_loss": 0.8383496999740601, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -0.05941891670227051, |
|
"rewards/margins": -0.01491218339651823, |
|
"rewards/rejected": -0.044506728649139404, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.47379330766952915, |
|
"grad_norm": 15.0625, |
|
"learning_rate": 3.1566149400420523e-07, |
|
"log_odds_chosen": -0.26251059770584106, |
|
"log_odds_ratio": -0.8918318748474121, |
|
"logits/chosen": -2.2902214527130127, |
|
"logits/rejected": -2.2795047760009766, |
|
"logps/chosen": -0.6117950081825256, |
|
"logps/rejected": -0.4801320433616638, |
|
"loss": 1.0567, |
|
"nll_loss": 0.9525865316390991, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -0.0611795075237751, |
|
"rewards/margins": -0.013166295364499092, |
|
"rewards/rejected": -0.04801321029663086, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.4797157240153983, |
|
"grad_norm": 10.3125, |
|
"learning_rate": 3.1065883651900087e-07, |
|
"log_odds_chosen": -0.2203420102596283, |
|
"log_odds_ratio": -0.8829119801521301, |
|
"logits/chosen": -2.2788829803466797, |
|
"logits/rejected": -2.2381834983825684, |
|
"logps/chosen": -0.5892807841300964, |
|
"logps/rejected": -0.48378220200538635, |
|
"loss": 1.0678, |
|
"nll_loss": 0.9220091104507446, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -0.05892808362841606, |
|
"rewards/margins": -0.01054986473172903, |
|
"rewards/rejected": -0.048378217965364456, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.4856381403612674, |
|
"grad_norm": 9.875, |
|
"learning_rate": 3.056302334890786e-07, |
|
"log_odds_chosen": -0.30824679136276245, |
|
"log_odds_ratio": -0.9259847402572632, |
|
"logits/chosen": -2.288405179977417, |
|
"logits/rejected": -2.2682487964630127, |
|
"logps/chosen": -0.6053352355957031, |
|
"logps/rejected": -0.4507838189601898, |
|
"loss": 1.0098, |
|
"nll_loss": 0.9126564860343933, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": -0.06053352355957031, |
|
"rewards/margins": -0.01545514166355133, |
|
"rewards/rejected": -0.04507838934659958, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.4915605567071365, |
|
"grad_norm": 12.6875, |
|
"learning_rate": 3.0057783579388586e-07, |
|
"log_odds_chosen": -0.15970291197299957, |
|
"log_odds_ratio": -0.8330586552619934, |
|
"logits/chosen": -2.2909493446350098, |
|
"logits/rejected": -2.2521986961364746, |
|
"logps/chosen": -0.5571908950805664, |
|
"logps/rejected": -0.4815686345100403, |
|
"loss": 1.0258, |
|
"nll_loss": 0.9384473562240601, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.05571908876299858, |
|
"rewards/margins": -0.007562229875475168, |
|
"rewards/rejected": -0.04815686494112015, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.49748297305300565, |
|
"grad_norm": 11.75, |
|
"learning_rate": 2.9550380449053907e-07, |
|
"log_odds_chosen": -0.18619410693645477, |
|
"log_odds_ratio": -0.8525155782699585, |
|
"logits/chosen": -2.2423572540283203, |
|
"logits/rejected": -2.221928596496582, |
|
"logps/chosen": -0.5615742206573486, |
|
"logps/rejected": -0.4591636657714844, |
|
"loss": 1.0133, |
|
"nll_loss": 0.8223134279251099, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.056157421320676804, |
|
"rewards/margins": -0.010241055861115456, |
|
"rewards/rejected": -0.0459163673222065, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.5034053893988747, |
|
"grad_norm": 8.625, |
|
"learning_rate": 2.904103098894767e-07, |
|
"log_odds_chosen": -0.22144293785095215, |
|
"log_odds_ratio": -0.8922742009162903, |
|
"logits/chosen": -2.280796527862549, |
|
"logits/rejected": -2.2380261421203613, |
|
"logps/chosen": -0.5996569991111755, |
|
"logps/rejected": -0.4632148742675781, |
|
"loss": 1.0102, |
|
"nll_loss": 0.9282135963439941, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.059965699911117554, |
|
"rewards/margins": -0.013644215650856495, |
|
"rewards/rejected": -0.04632148891687393, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.5093278057447439, |
|
"grad_norm": 12.625, |
|
"learning_rate": 2.852995306261545e-07, |
|
"log_odds_chosen": -0.1986076533794403, |
|
"log_odds_ratio": -0.8607484698295593, |
|
"logits/chosen": -2.306536912918091, |
|
"logits/rejected": -2.2707247734069824, |
|
"logps/chosen": -0.575395405292511, |
|
"logps/rejected": -0.4835848808288574, |
|
"loss": 1.074, |
|
"nll_loss": 1.0040955543518066, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -0.05753953382372856, |
|
"rewards/margins": -0.009181044064462185, |
|
"rewards/rejected": -0.0483584925532341, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.515250222090613, |
|
"grad_norm": 10.6875, |
|
"learning_rate": 2.801736527291797e-07, |
|
"log_odds_chosen": -0.26449286937713623, |
|
"log_odds_ratio": -0.9028227925300598, |
|
"logits/chosen": -2.275608777999878, |
|
"logits/rejected": -2.233181953430176, |
|
"logps/chosen": -0.61722731590271, |
|
"logps/rejected": -0.4729304313659668, |
|
"loss": 1.042, |
|
"nll_loss": 0.908827006816864, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": -0.061722736805677414, |
|
"rewards/margins": -0.014429694041609764, |
|
"rewards/rejected": -0.0472930371761322, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.5211726384364821, |
|
"grad_norm": 10.875, |
|
"learning_rate": 2.750348686852836e-07, |
|
"log_odds_chosen": -0.31994161009788513, |
|
"log_odds_ratio": -0.9219182729721069, |
|
"logits/chosen": -2.329312324523926, |
|
"logits/rejected": -2.2651875019073486, |
|
"logps/chosen": -0.6155102252960205, |
|
"logps/rejected": -0.4632096290588379, |
|
"loss": 1.0724, |
|
"nll_loss": 1.0065295696258545, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.06155102327466011, |
|
"rewards/margins": -0.015230064280331135, |
|
"rewards/rejected": -0.04632095992565155, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.5270950547823512, |
|
"grad_norm": 11.875, |
|
"learning_rate": 2.69885376501531e-07, |
|
"log_odds_chosen": -0.23163005709648132, |
|
"log_odds_ratio": -0.8846963047981262, |
|
"logits/chosen": -2.261355400085449, |
|
"logits/rejected": -2.2470784187316895, |
|
"logps/chosen": -0.6110343933105469, |
|
"logps/rejected": -0.4852830767631531, |
|
"loss": 1.0546, |
|
"nll_loss": 0.9538838267326355, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.06110344082117081, |
|
"rewards/margins": -0.012575129978358746, |
|
"rewards/rejected": -0.04852830991148949, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.5330174711282203, |
|
"grad_norm": 11.9375, |
|
"learning_rate": 2.647273787651687e-07, |
|
"log_odds_chosen": -0.18702737987041473, |
|
"log_odds_ratio": -0.8396440744400024, |
|
"logits/chosen": -2.2948384284973145, |
|
"logits/rejected": -2.2751121520996094, |
|
"logps/chosen": -0.5671176910400391, |
|
"logps/rejected": -0.47453179955482483, |
|
"loss": 1.0288, |
|
"nll_loss": 0.9608666300773621, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.056711774319410324, |
|
"rewards/margins": -0.009258597157895565, |
|
"rewards/rejected": -0.047453176230192184, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.5389398874740894, |
|
"grad_norm": 16.75, |
|
"learning_rate": 2.5956308170151526e-07, |
|
"log_odds_chosen": -0.40357428789138794, |
|
"log_odds_ratio": -1.0180401802062988, |
|
"logits/chosen": -2.260730504989624, |
|
"logits/rejected": -2.2328133583068848, |
|
"logps/chosen": -0.7037028670310974, |
|
"logps/rejected": -0.4578544497489929, |
|
"loss": 1.1183, |
|
"nll_loss": 0.9839082956314087, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -0.07037027925252914, |
|
"rewards/margins": -0.02458484098315239, |
|
"rewards/rejected": -0.04578544571995735, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.5448623038199586, |
|
"grad_norm": 10.3125, |
|
"learning_rate": 2.543946942302944e-07, |
|
"log_odds_chosen": -0.21979165077209473, |
|
"log_odds_ratio": -0.8726961016654968, |
|
"logits/chosen": -2.2551956176757812, |
|
"logits/rejected": -2.2191715240478516, |
|
"logps/chosen": -0.5772194862365723, |
|
"logps/rejected": -0.45551061630249023, |
|
"loss": 1.0123, |
|
"nll_loss": 0.9414900541305542, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.057721953839063644, |
|
"rewards/margins": -0.012170888483524323, |
|
"rewards/rejected": -0.04555106535553932, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.5507847201658277, |
|
"grad_norm": 11.1875, |
|
"learning_rate": 2.492244270208158e-07, |
|
"log_odds_chosen": -0.1632816195487976, |
|
"log_odds_ratio": -0.8366379737854004, |
|
"logits/chosen": -2.2645580768585205, |
|
"logits/rejected": -2.2385404109954834, |
|
"logps/chosen": -0.5705746412277222, |
|
"logps/rejected": -0.48298463225364685, |
|
"loss": 0.9953, |
|
"nll_loss": 0.9456483721733093, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.05705747753381729, |
|
"rewards/margins": -0.008759009651839733, |
|
"rewards/rejected": -0.048298463225364685, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.5567071365116968, |
|
"grad_norm": 9.5, |
|
"learning_rate": 2.440544915464078e-07, |
|
"log_odds_chosen": -0.2142259180545807, |
|
"log_odds_ratio": -0.8674869537353516, |
|
"logits/chosen": -2.294877290725708, |
|
"logits/rejected": -2.2555816173553467, |
|
"logps/chosen": -0.5593573451042175, |
|
"logps/rejected": -0.45421138405799866, |
|
"loss": 1.0237, |
|
"nll_loss": 0.9162901043891907, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.055935733020305634, |
|
"rewards/margins": -0.01051459088921547, |
|
"rewards/rejected": -0.045421142131090164, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.5626295528575659, |
|
"grad_norm": 10.0625, |
|
"learning_rate": 2.3888709913850593e-07, |
|
"log_odds_chosen": -0.21557164192199707, |
|
"log_odds_ratio": -0.8706417083740234, |
|
"logits/chosen": -2.3428778648376465, |
|
"logits/rejected": -2.3050456047058105, |
|
"logps/chosen": -0.5851597785949707, |
|
"logps/rejected": -0.4760478436946869, |
|
"loss": 1.0825, |
|
"nll_loss": 0.9482911825180054, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.05851597711443901, |
|
"rewards/margins": -0.010911193676292896, |
|
"rewards/rejected": -0.04760478436946869, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.568551969203435, |
|
"grad_norm": 9.75, |
|
"learning_rate": 2.337244600408025e-07, |
|
"log_odds_chosen": -0.30868110060691833, |
|
"log_odds_ratio": -0.9379078149795532, |
|
"logits/chosen": -2.3101601600646973, |
|
"logits/rejected": -2.2805612087249756, |
|
"logps/chosen": -0.6376503109931946, |
|
"logps/rejected": -0.4714363217353821, |
|
"loss": 1.051, |
|
"nll_loss": 0.983268141746521, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": -0.0637650191783905, |
|
"rewards/margins": -0.016621392220258713, |
|
"rewards/rejected": -0.04714363440871239, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.5744743855493041, |
|
"grad_norm": 11.9375, |
|
"learning_rate": 2.2856878246386085e-07, |
|
"log_odds_chosen": -0.20517487823963165, |
|
"log_odds_ratio": -0.8652151226997375, |
|
"logits/chosen": -2.306201457977295, |
|
"logits/rejected": -2.283665180206299, |
|
"logps/chosen": -0.5846830606460571, |
|
"logps/rejected": -0.4740404486656189, |
|
"loss": 1.0953, |
|
"nll_loss": 1.0276809930801392, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.058468304574489594, |
|
"rewards/margins": -0.011064260266721249, |
|
"rewards/rejected": -0.04740404710173607, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.5803968018951732, |
|
"grad_norm": 13.0, |
|
"learning_rate": 2.2342227164060035e-07, |
|
"log_odds_chosen": -0.2963787019252777, |
|
"log_odds_ratio": -0.9264262318611145, |
|
"logits/chosen": -2.2660953998565674, |
|
"logits/rejected": -2.211947441101074, |
|
"logps/chosen": -0.6310227513313293, |
|
"logps/rejected": -0.4772140085697174, |
|
"loss": 1.0355, |
|
"nll_loss": 0.916420578956604, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.06310227513313293, |
|
"rewards/margins": -0.015380874276161194, |
|
"rewards/rejected": -0.04772140458226204, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.5863192182410424, |
|
"grad_norm": 9.5625, |
|
"learning_rate": 2.182871288830533e-07, |
|
"log_odds_chosen": -0.3251541554927826, |
|
"log_odds_ratio": -0.941790759563446, |
|
"logits/chosen": -2.293196439743042, |
|
"logits/rejected": -2.232034206390381, |
|
"logps/chosen": -0.6307833790779114, |
|
"logps/rejected": -0.4696255624294281, |
|
"loss": 1.0677, |
|
"nll_loss": 0.967657208442688, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.06307834386825562, |
|
"rewards/margins": -0.016115780919790268, |
|
"rewards/rejected": -0.04696255922317505, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.5922416345869115, |
|
"grad_norm": 9.6875, |
|
"learning_rate": 2.131655506408007e-07, |
|
"log_odds_chosen": -0.22425034642219543, |
|
"log_odds_ratio": -0.8798470497131348, |
|
"logits/chosen": -2.2940893173217773, |
|
"logits/rejected": -2.254329204559326, |
|
"logps/chosen": -0.5970818400382996, |
|
"logps/rejected": -0.48467540740966797, |
|
"loss": 1.0208, |
|
"nll_loss": 0.9316588640213013, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.059708189219236374, |
|
"rewards/margins": -0.011240655556321144, |
|
"rewards/rejected": -0.04846753552556038, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5981640509327806, |
|
"grad_norm": 9.5, |
|
"learning_rate": 2.0805972756148643e-07, |
|
"log_odds_chosen": -0.3093208074569702, |
|
"log_odds_ratio": -0.9420243501663208, |
|
"logits/chosen": -2.2883636951446533, |
|
"logits/rejected": -2.275327682495117, |
|
"logps/chosen": -0.6675941348075867, |
|
"logps/rejected": -0.47907954454421997, |
|
"loss": 1.0708, |
|
"nll_loss": 1.0012794733047485, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": -0.06675940752029419, |
|
"rewards/margins": -0.01885146275162697, |
|
"rewards/rejected": -0.047907955944538116, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.6040864672786497, |
|
"grad_norm": 9.1875, |
|
"learning_rate": 2.0297184355381432e-07, |
|
"log_odds_chosen": -0.2639048993587494, |
|
"log_odds_ratio": -0.89494389295578, |
|
"logits/chosen": -2.304008722305298, |
|
"logits/rejected": -2.265723705291748, |
|
"logps/chosen": -0.5768560767173767, |
|
"logps/rejected": -0.4624248445034027, |
|
"loss": 1.0328, |
|
"nll_loss": 0.9577334523200989, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -0.05768561363220215, |
|
"rewards/margins": -0.011443129740655422, |
|
"rewards/rejected": -0.04624248296022415, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.6100088836245188, |
|
"grad_norm": 9.125, |
|
"learning_rate": 1.9790407485342638e-07, |
|
"log_odds_chosen": -0.3557616174221039, |
|
"log_odds_ratio": -0.9650157690048218, |
|
"logits/chosen": -2.327831268310547, |
|
"logits/rejected": -2.2884087562561035, |
|
"logps/chosen": -0.6429619193077087, |
|
"logps/rejected": -0.4408210217952728, |
|
"loss": 1.0091, |
|
"nll_loss": 0.9397379755973816, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.06429620087146759, |
|
"rewards/margins": -0.02021409198641777, |
|
"rewards/rejected": -0.04408210515975952, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.6159312999703879, |
|
"grad_norm": 10.75, |
|
"learning_rate": 1.928585890920641e-07, |
|
"log_odds_chosen": -0.1900234967470169, |
|
"log_odds_ratio": -0.8621436953544617, |
|
"logits/chosen": -2.2921512126922607, |
|
"logits/rejected": -2.2576987743377686, |
|
"logps/chosen": -0.5736020803451538, |
|
"logps/rejected": -0.46828731894493103, |
|
"loss": 1.0474, |
|
"nll_loss": 0.9162224531173706, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -0.0573602095246315, |
|
"rewards/margins": -0.010531473904848099, |
|
"rewards/rejected": -0.046828728169202805, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.6218537163162571, |
|
"grad_norm": 11.875, |
|
"learning_rate": 1.8783754437040902e-07, |
|
"log_odds_chosen": -0.26852238178253174, |
|
"log_odds_ratio": -0.9126049280166626, |
|
"logits/chosen": -2.275580883026123, |
|
"logits/rejected": -2.2431647777557373, |
|
"logps/chosen": -0.5689065456390381, |
|
"logps/rejected": -0.44645556807518005, |
|
"loss": 1.0095, |
|
"nll_loss": 0.9046837091445923, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": -0.05689065903425217, |
|
"rewards/margins": -0.012245100922882557, |
|
"rewards/rejected": -0.044645555317401886, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.6277761326621262, |
|
"grad_norm": 9.25, |
|
"learning_rate": 1.8284308833500118e-07, |
|
"log_odds_chosen": -0.2125154435634613, |
|
"log_odds_ratio": -0.8751262426376343, |
|
"logits/chosen": -2.277667760848999, |
|
"logits/rejected": -2.253131866455078, |
|
"logps/chosen": -0.5812402963638306, |
|
"logps/rejected": -0.47419658303260803, |
|
"loss": 1.0476, |
|
"nll_loss": 0.93915194272995, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.05812402814626694, |
|
"rewards/margins": -0.010704366490244865, |
|
"rewards/rejected": -0.04741965979337692, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.6336985490079953, |
|
"grad_norm": 11.0625, |
|
"learning_rate": 1.7787735725962756e-07, |
|
"log_odds_chosen": -0.27183157205581665, |
|
"log_odds_ratio": -0.9005556106567383, |
|
"logits/chosen": -2.2851767539978027, |
|
"logits/rejected": -2.2494091987609863, |
|
"logps/chosen": -0.613685667514801, |
|
"logps/rejected": -0.47953805327415466, |
|
"loss": 1.0919, |
|
"nll_loss": 0.9954058527946472, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.06136856600642204, |
|
"rewards/margins": -0.013414761051535606, |
|
"rewards/rejected": -0.047953806817531586, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.6396209653538644, |
|
"grad_norm": 9.375, |
|
"learning_rate": 1.7294247513157616e-07, |
|
"log_odds_chosen": -0.22400331497192383, |
|
"log_odds_ratio": -0.8672366142272949, |
|
"logits/chosen": -2.3089351654052734, |
|
"logits/rejected": -2.2596447467803955, |
|
"logps/chosen": -0.5711158514022827, |
|
"logps/rejected": -0.46820420026779175, |
|
"loss": 1.0251, |
|
"nll_loss": 0.960826575756073, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.05711158365011215, |
|
"rewards/margins": -0.010291163809597492, |
|
"rewards/rejected": -0.046820417046546936, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.6455433816997335, |
|
"grad_norm": 10.0625, |
|
"learning_rate": 1.6804055274314494e-07, |
|
"log_odds_chosen": -0.19274529814720154, |
|
"log_odds_ratio": -0.8532935380935669, |
|
"logits/chosen": -2.270355224609375, |
|
"logits/rejected": -2.248356342315674, |
|
"logps/chosen": -0.5621662735939026, |
|
"logps/rejected": -0.47271862626075745, |
|
"loss": 1.0217, |
|
"nll_loss": 0.9073405265808105, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -0.056216634809970856, |
|
"rewards/margins": -0.008944764733314514, |
|
"rewards/rejected": -0.04727186635136604, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.6514657980456026, |
|
"grad_norm": 10.25, |
|
"learning_rate": 1.6317368678879496e-07, |
|
"log_odds_chosen": -0.20030847191810608, |
|
"log_odds_ratio": -0.8486258387565613, |
|
"logits/chosen": -2.3088138103485107, |
|
"logits/rejected": -2.27048659324646, |
|
"logps/chosen": -0.5797799825668335, |
|
"logps/rejected": -0.4854944348335266, |
|
"loss": 1.0725, |
|
"nll_loss": 0.9621385335922241, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.05797800421714783, |
|
"rewards/margins": -0.009428557008504868, |
|
"rewards/rejected": -0.04854945093393326, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.6573882143914718, |
|
"grad_norm": 12.125, |
|
"learning_rate": 1.5834395896833281e-07, |
|
"log_odds_chosen": -0.3109692335128784, |
|
"log_odds_ratio": -0.9263485670089722, |
|
"logits/chosen": -2.3202879428863525, |
|
"logits/rejected": -2.265725612640381, |
|
"logps/chosen": -0.6155823469161987, |
|
"logps/rejected": -0.4582076966762543, |
|
"loss": 1.0499, |
|
"nll_loss": 0.9659247398376465, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -0.06155823543667793, |
|
"rewards/margins": -0.015737462788820267, |
|
"rewards/rejected": -0.045820772647857666, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.6633106307373409, |
|
"grad_norm": 9.9375, |
|
"learning_rate": 1.535534350965075e-07, |
|
"log_odds_chosen": -0.25020501017570496, |
|
"log_odds_ratio": -0.8859984278678894, |
|
"logits/chosen": -2.3179831504821777, |
|
"logits/rejected": -2.3054070472717285, |
|
"logps/chosen": -0.5626355409622192, |
|
"logps/rejected": -0.434339702129364, |
|
"loss": 1.0081, |
|
"nll_loss": 0.9209376573562622, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -0.056263554841279984, |
|
"rewards/margins": -0.012829584069550037, |
|
"rewards/rejected": -0.04343396797776222, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.66923304708321, |
|
"grad_norm": 15.625, |
|
"learning_rate": 1.4880416421940154e-07, |
|
"log_odds_chosen": -0.23923833668231964, |
|
"log_odds_ratio": -0.8853415250778198, |
|
"logits/chosen": -2.26355242729187, |
|
"logits/rejected": -2.240990161895752, |
|
"logps/chosen": -0.6214331388473511, |
|
"logps/rejected": -0.4842914938926697, |
|
"loss": 1.1113, |
|
"nll_loss": 1.0326354503631592, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -0.062143318355083466, |
|
"rewards/margins": -0.01371416263282299, |
|
"rewards/rejected": -0.048429153859615326, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.6751554634290791, |
|
"grad_norm": 12.75, |
|
"learning_rate": 1.4409817773799459e-07, |
|
"log_odds_chosen": -0.23250596225261688, |
|
"log_odds_ratio": -0.8853020668029785, |
|
"logits/chosen": -2.288491725921631, |
|
"logits/rejected": -2.24708890914917, |
|
"logps/chosen": -0.6100078225135803, |
|
"logps/rejected": -0.4815722405910492, |
|
"loss": 1.0552, |
|
"nll_loss": 0.9337055087089539, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -0.061000775545835495, |
|
"rewards/margins": -0.01284355204552412, |
|
"rewards/rejected": -0.0481572225689888, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.6810778797749482, |
|
"grad_norm": 10.4375, |
|
"learning_rate": 1.3943748853927385e-07, |
|
"log_odds_chosen": -0.3103570342063904, |
|
"log_odds_ratio": -0.9324914216995239, |
|
"logits/chosen": -2.28434419631958, |
|
"logits/rejected": -2.277893543243408, |
|
"logps/chosen": -0.64482182264328, |
|
"logps/rejected": -0.46989989280700684, |
|
"loss": 1.035, |
|
"nll_loss": 0.934810996055603, |
|
"rewards/accuracies": 0.3687500059604645, |
|
"rewards/chosen": -0.064482182264328, |
|
"rewards/margins": -0.017492195591330528, |
|
"rewards/rejected": -0.046989988535642624, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.6870002961208173, |
|
"grad_norm": 11.375, |
|
"learning_rate": 1.3482409013526436e-07, |
|
"log_odds_chosen": -0.3323788642883301, |
|
"log_odds_ratio": -0.9415693283081055, |
|
"logits/chosen": -2.272247791290283, |
|
"logits/rejected": -2.2672269344329834, |
|
"logps/chosen": -0.6134747862815857, |
|
"logps/rejected": -0.4571937918663025, |
|
"loss": 1.0638, |
|
"nll_loss": 0.9829813241958618, |
|
"rewards/accuracies": 0.3812499940395355, |
|
"rewards/chosen": -0.06134747713804245, |
|
"rewards/margins": -0.01562810130417347, |
|
"rewards/rejected": -0.04571938142180443, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.6929227124666865, |
|
"grad_norm": 9.4375, |
|
"learning_rate": 1.302599558103456e-07, |
|
"log_odds_chosen": -0.23517660796642303, |
|
"log_odds_ratio": -0.8992069363594055, |
|
"logits/chosen": -2.3287193775177, |
|
"logits/rejected": -2.293454885482788, |
|
"logps/chosen": -0.6200941801071167, |
|
"logps/rejected": -0.4878036081790924, |
|
"loss": 1.0413, |
|
"nll_loss": 0.9660770297050476, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -0.06200941652059555, |
|
"rewards/margins": -0.013229051604866982, |
|
"rewards/rejected": -0.04878035932779312, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.6988451288125556, |
|
"grad_norm": 13.0625, |
|
"learning_rate": 1.257470377772214e-07, |
|
"log_odds_chosen": -0.27837398648262024, |
|
"log_odds_ratio": -0.9113019704818726, |
|
"logits/chosen": -2.3072619438171387, |
|
"logits/rejected": -2.282047748565674, |
|
"logps/chosen": -0.5952633023262024, |
|
"logps/rejected": -0.4496152400970459, |
|
"loss": 1.0661, |
|
"nll_loss": 0.9518778920173645, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": -0.05952633172273636, |
|
"rewards/margins": -0.014564801938831806, |
|
"rewards/rejected": -0.04496152698993683, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.7047675451584247, |
|
"grad_norm": 13.5, |
|
"learning_rate": 1.2128726634190046e-07, |
|
"log_odds_chosen": -0.26337355375289917, |
|
"log_odds_ratio": -0.8862990140914917, |
|
"logits/chosen": -2.3180294036865234, |
|
"logits/rejected": -2.274146556854248, |
|
"logps/chosen": -0.5859608054161072, |
|
"logps/rejected": -0.44980812072753906, |
|
"loss": 1.0204, |
|
"nll_loss": 0.91375333070755, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.05859608203172684, |
|
"rewards/margins": -0.01361527293920517, |
|
"rewards/rejected": -0.044980812817811966, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.7106899615042938, |
|
"grad_norm": 13.3125, |
|
"learning_rate": 1.1688254907804992e-07, |
|
"log_odds_chosen": -0.2645830512046814, |
|
"log_odds_ratio": -0.9049927592277527, |
|
"logits/chosen": -2.2710115909576416, |
|
"logits/rejected": -2.2327637672424316, |
|
"logps/chosen": -0.6210035085678101, |
|
"logps/rejected": -0.48435431718826294, |
|
"loss": 1.0683, |
|
"nll_loss": 0.9852622747421265, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": -0.06210034340620041, |
|
"rewards/margins": -0.01366492174565792, |
|
"rewards/rejected": -0.048435427248477936, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.7166123778501629, |
|
"grad_norm": 9.1875, |
|
"learning_rate": 1.1253477001106956e-07, |
|
"log_odds_chosen": -0.18010739982128143, |
|
"log_odds_ratio": -0.848807156085968, |
|
"logits/chosen": -2.2503340244293213, |
|
"logits/rejected": -2.214433431625366, |
|
"logps/chosen": -0.5777139663696289, |
|
"logps/rejected": -0.48649734258651733, |
|
"loss": 1.0408, |
|
"nll_loss": 0.9145431518554688, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.05777139216661453, |
|
"rewards/margins": -0.009121658280491829, |
|
"rewards/rejected": -0.04864973947405815, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.722534794196032, |
|
"grad_norm": 14.1875, |
|
"learning_rate": 1.0824578881224065e-07, |
|
"log_odds_chosen": -0.14203877747058868, |
|
"log_odds_ratio": -0.8198834657669067, |
|
"logits/chosen": -2.323948621749878, |
|
"logits/rejected": -2.3119778633117676, |
|
"logps/chosen": -0.5389841794967651, |
|
"logps/rejected": -0.4636968672275543, |
|
"loss": 0.9852, |
|
"nll_loss": 0.871512770652771, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.053898416459560394, |
|
"rewards/margins": -0.007528733462095261, |
|
"rewards/rejected": -0.046369682997465134, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.728457210541901, |
|
"grad_norm": 10.25, |
|
"learning_rate": 1.0401744000328918e-07, |
|
"log_odds_chosen": -0.19983641803264618, |
|
"log_odds_ratio": -0.8725547790527344, |
|
"logits/chosen": -2.268932342529297, |
|
"logits/rejected": -2.2664635181427, |
|
"logps/chosen": -0.5955653786659241, |
|
"logps/rejected": -0.4910568296909332, |
|
"loss": 1.0167, |
|
"nll_loss": 0.9245740175247192, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -0.059556543827056885, |
|
"rewards/margins": -0.010450851172208786, |
|
"rewards/rejected": -0.0491056926548481, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.7343796268877703, |
|
"grad_norm": 11.25, |
|
"learning_rate": 9.985153217170902e-08, |
|
"log_odds_chosen": -0.27591392397880554, |
|
"log_odds_ratio": -0.9048240780830383, |
|
"logits/chosen": -2.3324825763702393, |
|
"logits/rejected": -2.3199105262756348, |
|
"logps/chosen": -0.613168478012085, |
|
"logps/rejected": -0.47140389680862427, |
|
"loss": 1.1081, |
|
"nll_loss": 1.0194193124771118, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.061316847801208496, |
|
"rewards/margins": -0.01417645812034607, |
|
"rewards/rejected": -0.047140393406152725, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.7403020432336394, |
|
"grad_norm": 12.25, |
|
"learning_rate": 9.574984719717553e-08, |
|
"log_odds_chosen": -0.24321213364601135, |
|
"log_odds_ratio": -0.89483243227005, |
|
"logits/chosen": -2.3112952709198, |
|
"logits/rejected": -2.2951555252075195, |
|
"logps/chosen": -0.5895348191261292, |
|
"logps/rejected": -0.4669637680053711, |
|
"loss": 1.0306, |
|
"nll_loss": 0.9830119013786316, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.05895348638296127, |
|
"rewards/margins": -0.012257112190127373, |
|
"rewards/rejected": -0.04669637233018875, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.7462244595795084, |
|
"grad_norm": 9.9375, |
|
"learning_rate": 9.171413948938459e-08, |
|
"log_odds_chosen": -0.2236686646938324, |
|
"log_odds_ratio": -0.879412829875946, |
|
"logits/chosen": -2.3061726093292236, |
|
"logits/rejected": -2.254133701324463, |
|
"logps/chosen": -0.6122428178787231, |
|
"logps/rejected": -0.49692878127098083, |
|
"loss": 1.0596, |
|
"nll_loss": 0.9902396202087402, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.061224281787872314, |
|
"rewards/margins": -0.011531401425600052, |
|
"rewards/rejected": -0.04969288408756256, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.7521468759253775, |
|
"grad_norm": 9.4375, |
|
"learning_rate": 8.774613523764049e-08, |
|
"log_odds_chosen": -0.26704955101013184, |
|
"log_odds_ratio": -0.8915314674377441, |
|
"logits/chosen": -2.2866809368133545, |
|
"logits/rejected": -2.239720582962036, |
|
"logps/chosen": -0.5904482007026672, |
|
"logps/rejected": -0.4593755304813385, |
|
"loss": 1.0287, |
|
"nll_loss": 0.9099699854850769, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": -0.059044819325208664, |
|
"rewards/margins": -0.013107270002365112, |
|
"rewards/rejected": -0.04593754559755325, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.7580692922712466, |
|
"grad_norm": 11.125, |
|
"learning_rate": 8.384753167251412e-08, |
|
"log_odds_chosen": -0.2359321415424347, |
|
"log_odds_ratio": -0.8834274411201477, |
|
"logits/chosen": -2.241650104522705, |
|
"logits/rejected": -2.2175180912017822, |
|
"logps/chosen": -0.5696910619735718, |
|
"logps/rejected": -0.4511106610298157, |
|
"loss": 0.9877, |
|
"nll_loss": 0.8763992190361023, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.05696910619735718, |
|
"rewards/margins": -0.01185804232954979, |
|
"rewards/rejected": -0.045111071318387985, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.7639917086171157, |
|
"grad_norm": 10.1875, |
|
"learning_rate": 8.001999633988942e-08, |
|
"log_odds_chosen": -0.26344627141952515, |
|
"log_odds_ratio": -0.8965330123901367, |
|
"logits/chosen": -2.317347764968872, |
|
"logits/rejected": -2.2693257331848145, |
|
"logps/chosen": -0.5864616632461548, |
|
"logps/rejected": -0.45855003595352173, |
|
"loss": 0.9993, |
|
"nll_loss": 0.9034452438354492, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.058646153658628464, |
|
"rewards/margins": -0.012791156768798828, |
|
"rewards/rejected": -0.04585500434041023, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.769914124962985, |
|
"grad_norm": 10.4375, |
|
"learning_rate": 7.62651663877042e-08, |
|
"log_odds_chosen": -0.17867620289325714, |
|
"log_odds_ratio": -0.8561042547225952, |
|
"logits/chosen": -2.2582385540008545, |
|
"logits/rejected": -2.232391357421875, |
|
"logps/chosen": -0.5790480971336365, |
|
"logps/rejected": -0.4850679337978363, |
|
"loss": 1.0993, |
|
"nll_loss": 0.9781789779663086, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.057904817163944244, |
|
"rewards/margins": -0.009398018009960651, |
|
"rewards/rejected": -0.04850679263472557, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.775836541308854, |
|
"grad_norm": 9.0, |
|
"learning_rate": 7.258464786569549e-08, |
|
"log_odds_chosen": -0.2144562005996704, |
|
"log_odds_ratio": -0.8685463070869446, |
|
"logits/chosen": -2.322035551071167, |
|
"logits/rejected": -2.2717068195343018, |
|
"logps/chosen": -0.5770824551582336, |
|
"logps/rejected": -0.47164034843444824, |
|
"loss": 1.0633, |
|
"nll_loss": 0.9638098478317261, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -0.057708241045475006, |
|
"rewards/margins": -0.010544205084443092, |
|
"rewards/rejected": -0.04716403782367706, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.7817589576547231, |
|
"grad_norm": 12.375, |
|
"learning_rate": 6.898001503844483e-08, |
|
"log_odds_chosen": -0.3992167115211487, |
|
"log_odds_ratio": -1.0115876197814941, |
|
"logits/chosen": -2.3506951332092285, |
|
"logits/rejected": -2.3084568977355957, |
|
"logps/chosen": -0.7245315313339233, |
|
"logps/rejected": -0.4761766493320465, |
|
"loss": 1.0509, |
|
"nll_loss": 1.0009998083114624, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.07245315611362457, |
|
"rewards/margins": -0.0248354934155941, |
|
"rewards/rejected": -0.04761766642332077, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.7876813740005922, |
|
"grad_norm": 11.3125, |
|
"learning_rate": 6.545280971202014e-08, |
|
"log_odds_chosen": -0.17274455726146698, |
|
"log_odds_ratio": -0.8463727831840515, |
|
"logits/chosen": -2.310338020324707, |
|
"logits/rejected": -2.2806801795959473, |
|
"logps/chosen": -0.5623282194137573, |
|
"logps/rejected": -0.46932634711265564, |
|
"loss": 1.0128, |
|
"nll_loss": 0.9555832147598267, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -0.05623283237218857, |
|
"rewards/margins": -0.009300192818045616, |
|
"rewards/rejected": -0.046932633966207504, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.7936037903464613, |
|
"grad_norm": 11.0625, |
|
"learning_rate": 6.200454057450022e-08, |
|
"log_odds_chosen": -0.2566189169883728, |
|
"log_odds_ratio": -0.8830870389938354, |
|
"logits/chosen": -2.2640976905822754, |
|
"logits/rejected": -2.2190680503845215, |
|
"logps/chosen": -0.6031737327575684, |
|
"logps/rejected": -0.4697316586971283, |
|
"loss": 1.0756, |
|
"nll_loss": 0.9159650802612305, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.060317374765872955, |
|
"rewards/margins": -0.01334420870989561, |
|
"rewards/rejected": -0.04697316139936447, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.7995262066923304, |
|
"grad_norm": 12.0, |
|
"learning_rate": 5.863668255066492e-08, |
|
"log_odds_chosen": -0.2177290916442871, |
|
"log_odds_ratio": -0.8585535287857056, |
|
"logits/chosen": -2.262441396713257, |
|
"logits/rejected": -2.231968402862549, |
|
"logps/chosen": -0.5860260128974915, |
|
"logps/rejected": -0.47981762886047363, |
|
"loss": 1.0081, |
|
"nll_loss": 0.9461213946342468, |
|
"rewards/accuracies": 0.3687500059604645, |
|
"rewards/chosen": -0.05860259383916855, |
|
"rewards/margins": -0.010620838031172752, |
|
"rewards/rejected": -0.047981761395931244, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.8054486230381996, |
|
"grad_norm": 9.625, |
|
"learning_rate": 5.53506761711274e-08, |
|
"log_odds_chosen": -0.21258850395679474, |
|
"log_odds_ratio": -0.8654868006706238, |
|
"logits/chosen": -2.2940022945404053, |
|
"logits/rejected": -2.264361619949341, |
|
"logps/chosen": -0.5948741436004639, |
|
"logps/rejected": -0.48127132654190063, |
|
"loss": 1.0435, |
|
"nll_loss": 1.0004308223724365, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.059487421065568924, |
|
"rewards/margins": -0.0113602876663208, |
|
"rewards/rejected": -0.04812713339924812, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.8113710393840687, |
|
"grad_norm": 14.375, |
|
"learning_rate": 5.2147926956177174e-08, |
|
"log_odds_chosen": -0.3361436724662781, |
|
"log_odds_ratio": -0.9543386697769165, |
|
"logits/chosen": -2.2842912673950195, |
|
"logits/rejected": -2.2753098011016846, |
|
"logps/chosen": -0.6304486989974976, |
|
"logps/rejected": -0.4559609293937683, |
|
"loss": 1.0422, |
|
"nll_loss": 0.9697739481925964, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.06304488331079483, |
|
"rewards/margins": -0.0174487866461277, |
|
"rewards/rejected": -0.04559609293937683, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.8172934557299378, |
|
"grad_norm": 22.625, |
|
"learning_rate": 4.902980481459834e-08, |
|
"log_odds_chosen": -0.18400034308433533, |
|
"log_odds_ratio": -0.8533352017402649, |
|
"logits/chosen": -2.267984390258789, |
|
"logits/rejected": -2.240002155303955, |
|
"logps/chosen": -0.5833351016044617, |
|
"logps/rejected": -0.4882822632789612, |
|
"loss": 1.0013, |
|
"nll_loss": 0.9279516935348511, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.058333516120910645, |
|
"rewards/margins": -0.009505288675427437, |
|
"rewards/rejected": -0.04882822558283806, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.8232158720758069, |
|
"grad_norm": 11.8125, |
|
"learning_rate": 4.5997643457719646e-08, |
|
"log_odds_chosen": -0.2714422643184662, |
|
"log_odds_ratio": -0.8982048034667969, |
|
"logits/chosen": -2.2855401039123535, |
|
"logits/rejected": -2.2796995639801025, |
|
"logps/chosen": -0.5933629274368286, |
|
"logps/rejected": -0.45899391174316406, |
|
"loss": 0.9938, |
|
"nll_loss": 0.9157652854919434, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -0.05933629721403122, |
|
"rewards/margins": -0.01343690324574709, |
|
"rewards/rejected": -0.045899391174316406, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.829138288421676, |
|
"grad_norm": 11.125, |
|
"learning_rate": 4.305273982894772e-08, |
|
"log_odds_chosen": -0.24461349844932556, |
|
"log_odds_ratio": -0.8896273374557495, |
|
"logits/chosen": -2.3211405277252197, |
|
"logits/rejected": -2.279554843902588, |
|
"logps/chosen": -0.6189180612564087, |
|
"logps/rejected": -0.4841720461845398, |
|
"loss": 1.041, |
|
"nll_loss": 0.9456349611282349, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.06189180538058281, |
|
"rewards/margins": -0.013474604114890099, |
|
"rewards/rejected": -0.04841720312833786, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.8350607047675451, |
|
"grad_norm": 11.8125, |
|
"learning_rate": 4.0196353549026786e-08, |
|
"log_odds_chosen": -0.1991504579782486, |
|
"log_odds_ratio": -0.8548718690872192, |
|
"logits/chosen": -2.288534641265869, |
|
"logits/rejected": -2.2532122135162354, |
|
"logps/chosen": -0.5849851965904236, |
|
"logps/rejected": -0.48299694061279297, |
|
"loss": 1.0681, |
|
"nll_loss": 1.0149555206298828, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.05849852040410042, |
|
"rewards/margins": -0.010198831558227539, |
|
"rewards/rejected": -0.04829969257116318, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.8409831211134142, |
|
"grad_norm": 9.625, |
|
"learning_rate": 3.742970637726181e-08, |
|
"log_odds_chosen": -0.09389691054821014, |
|
"log_odds_ratio": -0.8085994720458984, |
|
"logits/chosen": -2.3118512630462646, |
|
"logits/rejected": -2.2662172317504883, |
|
"logps/chosen": -0.5374116897583008, |
|
"logps/rejected": -0.4831947386264801, |
|
"loss": 1.0166, |
|
"nll_loss": 0.9142959713935852, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.05374116823077202, |
|
"rewards/margins": -0.005421696230769157, |
|
"rewards/rejected": -0.04831947013735771, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.8469055374592834, |
|
"grad_norm": 13.0, |
|
"learning_rate": 3.4753981688937284e-08, |
|
"log_odds_chosen": -0.23033122718334198, |
|
"log_odds_ratio": -0.8797691464424133, |
|
"logits/chosen": -2.2840065956115723, |
|
"logits/rejected": -2.2577414512634277, |
|
"logps/chosen": -0.5791336297988892, |
|
"logps/rejected": -0.46595969796180725, |
|
"loss": 1.0562, |
|
"nll_loss": 0.9663812518119812, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.05791335552930832, |
|
"rewards/margins": -0.011317392811179161, |
|
"rewards/rejected": -0.046595968306064606, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.8528279538051525, |
|
"grad_norm": 16.5, |
|
"learning_rate": 3.217032396915265e-08, |
|
"log_odds_chosen": -0.28934675455093384, |
|
"log_odds_ratio": -0.925268292427063, |
|
"logits/chosen": -2.294243335723877, |
|
"logits/rejected": -2.26255464553833, |
|
"logps/chosen": -0.6588538885116577, |
|
"logps/rejected": -0.4810880720615387, |
|
"loss": 1.0625, |
|
"nll_loss": 0.9974772334098816, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -0.06588538736104965, |
|
"rewards/margins": -0.01777658611536026, |
|
"rewards/rejected": -0.04810880497097969, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.8587503701510216, |
|
"grad_norm": 13.5625, |
|
"learning_rate": 2.9679838323293404e-08, |
|
"log_odds_chosen": -0.30326423048973083, |
|
"log_odds_ratio": -0.9402921795845032, |
|
"logits/chosen": -2.285403251647949, |
|
"logits/rejected": -2.2570960521698, |
|
"logps/chosen": -0.6499019265174866, |
|
"logps/rejected": -0.485442578792572, |
|
"loss": 1.0074, |
|
"nll_loss": 0.9335571527481079, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": -0.06499020010232925, |
|
"rewards/margins": -0.016445934772491455, |
|
"rewards/rejected": -0.0485442578792572, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.8646727864968907, |
|
"grad_norm": 11.875, |
|
"learning_rate": 2.728359000434488e-08, |
|
"log_odds_chosen": -0.25829392671585083, |
|
"log_odds_ratio": -0.8930153846740723, |
|
"logits/chosen": -2.316516399383545, |
|
"logits/rejected": -2.283731460571289, |
|
"logps/chosen": -0.5545108318328857, |
|
"logps/rejected": -0.4498627185821533, |
|
"loss": 1.048, |
|
"nll_loss": 0.9053192138671875, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.055451083928346634, |
|
"rewards/margins": -0.010464807972311974, |
|
"rewards/rejected": -0.04498627781867981, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.8705952028427598, |
|
"grad_norm": 10.75, |
|
"learning_rate": 2.498260395725302e-08, |
|
"log_odds_chosen": -0.25851163268089294, |
|
"log_odds_ratio": -0.8944876790046692, |
|
"logits/chosen": -2.281040906906128, |
|
"logits/rejected": -2.26870059967041, |
|
"logps/chosen": -0.6054626703262329, |
|
"logps/rejected": -0.48731446266174316, |
|
"loss": 1.0483, |
|
"nll_loss": 0.9450349807739258, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.060546260327100754, |
|
"rewards/margins": -0.011814813129603863, |
|
"rewards/rejected": -0.048731446266174316, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.8765176191886289, |
|
"grad_norm": 10.3125, |
|
"learning_rate": 2.2777864380525426e-08, |
|
"log_odds_chosen": -0.20190663635730743, |
|
"log_odds_ratio": -0.8694218397140503, |
|
"logits/chosen": -2.288378953933716, |
|
"logits/rejected": -2.2683846950531006, |
|
"logps/chosen": -0.5955201387405396, |
|
"logps/rejected": -0.4755355417728424, |
|
"loss": 1.0093, |
|
"nll_loss": 0.8863022923469543, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.059552013874053955, |
|
"rewards/margins": -0.01199845876544714, |
|
"rewards/rejected": -0.04755355045199394, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.8824400355344981, |
|
"grad_norm": 11.125, |
|
"learning_rate": 2.0670314305261423e-08, |
|
"log_odds_chosen": -0.21881277859210968, |
|
"log_odds_ratio": -0.8681440353393555, |
|
"logits/chosen": -2.3011648654937744, |
|
"logits/rejected": -2.2739992141723633, |
|
"logps/chosen": -0.5647403597831726, |
|
"logps/rejected": -0.46096763014793396, |
|
"loss": 0.9903, |
|
"nll_loss": 0.9155017733573914, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -0.05647404119372368, |
|
"rewards/margins": -0.010377271100878716, |
|
"rewards/rejected": -0.046096768230199814, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.8883624518803672, |
|
"grad_norm": 15.4375, |
|
"learning_rate": 1.866085519178995e-08, |
|
"log_odds_chosen": -0.21367135643959045, |
|
"log_odds_ratio": -0.8818863034248352, |
|
"logits/chosen": -2.283823013305664, |
|
"logits/rejected": -2.262935161590576, |
|
"logps/chosen": -0.6190184354782104, |
|
"logps/rejected": -0.518616259098053, |
|
"loss": 1.0823, |
|
"nll_loss": 1.0067201852798462, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.06190184876322746, |
|
"rewards/margins": -0.010040223598480225, |
|
"rewards/rejected": -0.05186162516474724, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.8942848682262363, |
|
"grad_norm": 10.0625, |
|
"learning_rate": 1.675034654408894e-08, |
|
"log_odds_chosen": -0.2969823479652405, |
|
"log_odds_ratio": -0.9049533605575562, |
|
"logits/chosen": -2.3211445808410645, |
|
"logits/rejected": -2.293593168258667, |
|
"logps/chosen": -0.5710967779159546, |
|
"logps/rejected": -0.44247856736183167, |
|
"loss": 1.0186, |
|
"nll_loss": 0.9544011354446411, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.057109683752059937, |
|
"rewards/margins": -0.012861823663115501, |
|
"rewards/rejected": -0.044247858226299286, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.9002072845721054, |
|
"grad_norm": 11.8125, |
|
"learning_rate": 1.4939605542150595e-08, |
|
"log_odds_chosen": -0.20066659152507782, |
|
"log_odds_ratio": -0.880477249622345, |
|
"logits/chosen": -2.306097984313965, |
|
"logits/rejected": -2.2691056728363037, |
|
"logps/chosen": -0.630598247051239, |
|
"logps/rejected": -0.5075589418411255, |
|
"loss": 1.0954, |
|
"nll_loss": 0.9971143007278442, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.06305982172489166, |
|
"rewards/margins": -0.01230393536388874, |
|
"rewards/rejected": -0.05075589567422867, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.9061297009179745, |
|
"grad_norm": 11.0, |
|
"learning_rate": 1.3229406692449791e-08, |
|
"log_odds_chosen": -0.14233054220676422, |
|
"log_odds_ratio": -0.8427847623825073, |
|
"logits/chosen": -2.2426674365997314, |
|
"logits/rejected": -2.2174274921417236, |
|
"logps/chosen": -0.5756295919418335, |
|
"logps/rejected": -0.4939804971218109, |
|
"loss": 1.0628, |
|
"nll_loss": 0.9542373418807983, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.05756296589970589, |
|
"rewards/margins": -0.008164914324879646, |
|
"rewards/rejected": -0.04939804598689079, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.9120521172638436, |
|
"grad_norm": 10.6875, |
|
"learning_rate": 1.162048149666503e-08, |
|
"log_odds_chosen": -0.209940105676651, |
|
"log_odds_ratio": -0.8944632411003113, |
|
"logits/chosen": -2.2973880767822266, |
|
"logits/rejected": -2.255645990371704, |
|
"logps/chosen": -0.6111503839492798, |
|
"logps/rejected": -0.49909108877182007, |
|
"loss": 1.0425, |
|
"nll_loss": 0.9487366676330566, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.061115045100450516, |
|
"rewards/margins": -0.011205929331481457, |
|
"rewards/rejected": -0.049909114837646484, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.9179745336097128, |
|
"grad_norm": 9.8125, |
|
"learning_rate": 1.0113518138794047e-08, |
|
"log_odds_chosen": -0.25878992676734924, |
|
"log_odds_ratio": -0.899122416973114, |
|
"logits/chosen": -2.2492969036102295, |
|
"logits/rejected": -2.2273764610290527, |
|
"logps/chosen": -0.5986303091049194, |
|
"logps/rejected": -0.47187572717666626, |
|
"loss": 1.0612, |
|
"nll_loss": 0.9412651062011719, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.059863023459911346, |
|
"rewards/margins": -0.012675456702709198, |
|
"rewards/rejected": -0.04718757048249245, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.9238969499555819, |
|
"grad_norm": 10.375, |
|
"learning_rate": 8.709161190797565e-09, |
|
"log_odds_chosen": -0.14045746624469757, |
|
"log_odds_ratio": -0.8357732892036438, |
|
"logits/chosen": -2.3169333934783936, |
|
"logits/rejected": -2.2879374027252197, |
|
"logps/chosen": -0.5618830919265747, |
|
"logps/rejected": -0.4810701012611389, |
|
"loss": 1.031, |
|
"nll_loss": 0.9191296696662903, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.05618830770254135, |
|
"rewards/margins": -0.008081300184130669, |
|
"rewards/rejected": -0.04810700938105583, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.929819366301451, |
|
"grad_norm": 15.3125, |
|
"learning_rate": 7.408011336897141e-09, |
|
"log_odds_chosen": -0.323073148727417, |
|
"log_odds_ratio": -0.9851021766662598, |
|
"logits/chosen": -2.3374483585357666, |
|
"logits/rejected": -2.3257203102111816, |
|
"logps/chosen": -0.7131141424179077, |
|
"logps/rejected": -0.4996616244316101, |
|
"loss": 1.0776, |
|
"nll_loss": 1.01613450050354, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.07131141424179077, |
|
"rewards/margins": -0.02134525403380394, |
|
"rewards/rejected": -0.04996616020798683, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.9357417826473201, |
|
"grad_norm": 9.5625, |
|
"learning_rate": 6.210625116645135e-09, |
|
"log_odds_chosen": -0.32444125413894653, |
|
"log_odds_ratio": -0.9329547882080078, |
|
"logits/chosen": -2.342031955718994, |
|
"logits/rejected": -2.3026318550109863, |
|
"logps/chosen": -0.6195459365844727, |
|
"logps/rejected": -0.45777615904808044, |
|
"loss": 1.0033, |
|
"nll_loss": 0.8760407567024231, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": -0.061954595148563385, |
|
"rewards/margins": -0.0161769799888134, |
|
"rewards/rejected": -0.04577761888504028, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.9416641989931892, |
|
"grad_norm": 10.625, |
|
"learning_rate": 5.117514686876378e-09, |
|
"log_odds_chosen": -0.20949645340442657, |
|
"log_odds_ratio": -0.8756229281425476, |
|
"logits/chosen": -2.30104398727417, |
|
"logits/rejected": -2.2671799659729004, |
|
"logps/chosen": -0.5797516703605652, |
|
"logps/rejected": -0.4763546586036682, |
|
"loss": 1.0455, |
|
"nll_loss": 0.9568120837211609, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -0.057975172996520996, |
|
"rewards/margins": -0.010339704342186451, |
|
"rewards/rejected": -0.04763546586036682, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.9475866153390583, |
|
"grad_norm": 10.0, |
|
"learning_rate": 4.1291476026441565e-09, |
|
"log_odds_chosen": -0.14046767354011536, |
|
"log_odds_ratio": -0.8268812894821167, |
|
"logits/chosen": -2.2659006118774414, |
|
"logits/rejected": -2.245576858520508, |
|
"logps/chosen": -0.5697029829025269, |
|
"logps/rejected": -0.4846652150154114, |
|
"loss": 0.9915, |
|
"nll_loss": 0.8766274452209473, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.056970298290252686, |
|
"rewards/margins": -0.00850378442555666, |
|
"rewards/rejected": -0.0484665185213089, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.9535090316849274, |
|
"grad_norm": 9.8125, |
|
"learning_rate": 3.2459466172331253e-09, |
|
"log_odds_chosen": -0.25180304050445557, |
|
"log_odds_ratio": -0.9306501150131226, |
|
"logits/chosen": -2.274780035018921, |
|
"logits/rejected": -2.255272626876831, |
|
"logps/chosen": -0.6529628038406372, |
|
"logps/rejected": -0.48409169912338257, |
|
"loss": 1.0873, |
|
"nll_loss": 0.9862693548202515, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.06529629230499268, |
|
"rewards/margins": -0.016887117177248, |
|
"rewards/rejected": -0.048409171402454376, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.9594314480307966, |
|
"grad_norm": 13.125, |
|
"learning_rate": 2.4682895013354854e-09, |
|
"log_odds_chosen": -0.230398491024971, |
|
"log_odds_ratio": -0.8930587768554688, |
|
"logits/chosen": -2.2783544063568115, |
|
"logits/rejected": -2.2587246894836426, |
|
"logps/chosen": -0.6128379702568054, |
|
"logps/rejected": -0.46944743394851685, |
|
"loss": 1.0177, |
|
"nll_loss": 0.9610903859138489, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.06128380447626114, |
|
"rewards/margins": -0.014339059591293335, |
|
"rewards/rejected": -0.0469447486102581, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.9653538643766657, |
|
"grad_norm": 25.0, |
|
"learning_rate": 1.7965088814675677e-09, |
|
"log_odds_chosen": -0.3568347692489624, |
|
"log_odds_ratio": -0.9671844244003296, |
|
"logits/chosen": -2.2762491703033447, |
|
"logits/rejected": -2.2589855194091797, |
|
"logps/chosen": -0.6517866253852844, |
|
"logps/rejected": -0.4649588167667389, |
|
"loss": 1.037, |
|
"nll_loss": 0.9754410982131958, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": -0.06517866253852844, |
|
"rewards/margins": -0.018682777881622314, |
|
"rewards/rejected": -0.04649588465690613, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.9712762807225348, |
|
"grad_norm": 10.8125, |
|
"learning_rate": 1.2308920976958348e-09, |
|
"log_odds_chosen": -0.1785418540239334, |
|
"log_odds_ratio": -0.8583124876022339, |
|
"logits/chosen": -2.2591869831085205, |
|
"logits/rejected": -2.2376914024353027, |
|
"logps/chosen": -0.5955510139465332, |
|
"logps/rejected": -0.4926881790161133, |
|
"loss": 1.0026, |
|
"nll_loss": 0.9147430658340454, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.05955510213971138, |
|
"rewards/margins": -0.010286283679306507, |
|
"rewards/rejected": -0.04926881566643715, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.9771986970684039, |
|
"grad_norm": 11.9375, |
|
"learning_rate": 7.716810807330276e-10, |
|
"log_odds_chosen": -0.30667099356651306, |
|
"log_odds_ratio": -0.9143903851509094, |
|
"logits/chosen": -2.2759385108947754, |
|
"logits/rejected": -2.2378878593444824, |
|
"logps/chosen": -0.6037041544914246, |
|
"logps/rejected": -0.45009493827819824, |
|
"loss": 1.0344, |
|
"nll_loss": 0.93921959400177, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.06037042289972305, |
|
"rewards/margins": -0.015360923483967781, |
|
"rewards/rejected": -0.045009493827819824, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.983121113414273, |
|
"grad_norm": 17.25, |
|
"learning_rate": 4.190722484575804e-10, |
|
"log_odds_chosen": -0.24070534110069275, |
|
"log_odds_ratio": -0.9141713976860046, |
|
"logits/chosen": -2.285658597946167, |
|
"logits/rejected": -2.2573189735412598, |
|
"logps/chosen": -0.6545957326889038, |
|
"logps/rejected": -0.4952670931816101, |
|
"loss": 1.0545, |
|
"nll_loss": 0.9895190000534058, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -0.06545957177877426, |
|
"rewards/margins": -0.01593286357820034, |
|
"rewards/rejected": -0.04952671006321907, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.9890435297601421, |
|
"grad_norm": 9.1875, |
|
"learning_rate": 1.732164218998522e-10, |
|
"log_odds_chosen": -0.2650103271007538, |
|
"log_odds_ratio": -0.8960719108581543, |
|
"logits/chosen": -2.2581698894500732, |
|
"logits/rejected": -2.2162814140319824, |
|
"logps/chosen": -0.6056646704673767, |
|
"logps/rejected": -0.4769059717655182, |
|
"loss": 1.0107, |
|
"nll_loss": 0.9156764149665833, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": -0.06056647375226021, |
|
"rewards/margins": -0.0128758754581213, |
|
"rewards/rejected": -0.04769059270620346, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.9949659461060113, |
|
"grad_norm": 12.0, |
|
"learning_rate": 3.4218760731730136e-11, |
|
"log_odds_chosen": -0.21042411029338837, |
|
"log_odds_ratio": -0.8711256980895996, |
|
"logits/chosen": -2.333160638809204, |
|
"logits/rejected": -2.2931103706359863, |
|
"logps/chosen": -0.5873175859451294, |
|
"logps/rejected": -0.47885292768478394, |
|
"loss": 1.066, |
|
"nll_loss": 0.9840106964111328, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.05873175337910652, |
|
"rewards/margins": -0.010846461169421673, |
|
"rewards/rejected": -0.047885291278362274, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.9997038791827065, |
|
"step": 1688, |
|
"total_flos": 0.0, |
|
"train_loss": 1.076995034918401, |
|
"train_runtime": 25716.0251, |
|
"train_samples_per_second": 2.101, |
|
"train_steps_per_second": 0.066 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1688, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|