|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9998751404669747, |
|
"eval_steps": 1000, |
|
"global_step": 2002, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.000499438132101386, |
|
"grad_norm": 0.22265625, |
|
"learning_rate": 2.4875621890547265e-08, |
|
"logits/chosen": -0.3009346127510071, |
|
"logits/rejected": -0.224898099899292, |
|
"logps/chosen": -43.235816955566406, |
|
"logps/rejected": -65.95542907714844, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00499438132101386, |
|
"grad_norm": 0.1904296875, |
|
"learning_rate": 2.4875621890547267e-07, |
|
"logits/chosen": -0.4162670373916626, |
|
"logits/rejected": -0.31764352321624756, |
|
"logps/chosen": -43.73904037475586, |
|
"logps/rejected": -88.3354263305664, |
|
"loss": 0.4999, |
|
"rewards/accuracies": 0.5277777910232544, |
|
"rewards/chosen": 0.0004759904695674777, |
|
"rewards/margins": 0.0009994357824325562, |
|
"rewards/rejected": -0.0005234453710727394, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.00998876264202772, |
|
"grad_norm": 0.16796875, |
|
"learning_rate": 4.975124378109453e-07, |
|
"logits/chosen": -0.41128048300743103, |
|
"logits/rejected": -0.3287343382835388, |
|
"logps/chosen": -43.18193054199219, |
|
"logps/rejected": -69.37371063232422, |
|
"loss": 0.4999, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 8.649445953778923e-05, |
|
"rewards/margins": 0.000692047062329948, |
|
"rewards/rejected": -0.0006055526318959892, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.014983143963041578, |
|
"grad_norm": 0.25, |
|
"learning_rate": 7.462686567164179e-07, |
|
"logits/chosen": -0.4024788439273834, |
|
"logits/rejected": -0.3096240162849426, |
|
"logps/chosen": -42.980751037597656, |
|
"logps/rejected": -73.10075378417969, |
|
"loss": 0.4999, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.0004937038174830377, |
|
"rewards/margins": 0.0007297725533135235, |
|
"rewards/rejected": -0.00023606869217474014, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01997752528405544, |
|
"grad_norm": 0.208984375, |
|
"learning_rate": 9.950248756218907e-07, |
|
"logits/chosen": -0.41356319189071655, |
|
"logits/rejected": -0.34054869413375854, |
|
"logps/chosen": -43.257789611816406, |
|
"logps/rejected": -69.32649230957031, |
|
"loss": 0.4998, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.0002575941034592688, |
|
"rewards/margins": 0.0018243074882775545, |
|
"rewards/rejected": -0.0015667133266106248, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.024971906605069295, |
|
"grad_norm": 0.26953125, |
|
"learning_rate": 1.2437810945273632e-06, |
|
"logits/chosen": -0.4217616021633148, |
|
"logits/rejected": -0.3440130352973938, |
|
"logps/chosen": -44.67601776123047, |
|
"logps/rejected": -78.74809265136719, |
|
"loss": 0.4995, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.0004624166467692703, |
|
"rewards/margins": 0.002841049339622259, |
|
"rewards/rejected": -0.00237863278016448, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.029966287926083156, |
|
"grad_norm": 0.158203125, |
|
"learning_rate": 1.4925373134328358e-06, |
|
"logits/chosen": -0.4335503578186035, |
|
"logits/rejected": -0.3408567011356354, |
|
"logps/chosen": -43.363746643066406, |
|
"logps/rejected": -77.2335433959961, |
|
"loss": 0.4991, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.0020871085580438375, |
|
"rewards/margins": 0.0050177304074168205, |
|
"rewards/rejected": -0.0029306220822036266, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.034960669247097016, |
|
"grad_norm": 0.1572265625, |
|
"learning_rate": 1.7412935323383088e-06, |
|
"logits/chosen": -0.4507155418395996, |
|
"logits/rejected": -0.35845330357551575, |
|
"logps/chosen": -42.748069763183594, |
|
"logps/rejected": -73.00779724121094, |
|
"loss": 0.4982, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.004088289104402065, |
|
"rewards/margins": 0.009887892752885818, |
|
"rewards/rejected": -0.005799603182822466, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.03995505056811088, |
|
"grad_norm": 0.1943359375, |
|
"learning_rate": 1.9900497512437813e-06, |
|
"logits/chosen": -0.41265735030174255, |
|
"logits/rejected": -0.32930153608322144, |
|
"logps/chosen": -42.023521423339844, |
|
"logps/rejected": -82.02639770507812, |
|
"loss": 0.497, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.008388923481106758, |
|
"rewards/margins": 0.015186095610260963, |
|
"rewards/rejected": -0.006797172129154205, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04494943188912474, |
|
"grad_norm": 0.181640625, |
|
"learning_rate": 2.238805970149254e-06, |
|
"logits/chosen": -0.42406344413757324, |
|
"logits/rejected": -0.32654517889022827, |
|
"logps/chosen": -43.199241638183594, |
|
"logps/rejected": -79.2525405883789, |
|
"loss": 0.4952, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.015419301576912403, |
|
"rewards/margins": 0.025097712874412537, |
|
"rewards/rejected": -0.009678413160145283, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.04994381321013859, |
|
"grad_norm": 0.232421875, |
|
"learning_rate": 2.4875621890547264e-06, |
|
"logits/chosen": -0.4186275601387024, |
|
"logits/rejected": -0.31876617670059204, |
|
"logps/chosen": -41.526851654052734, |
|
"logps/rejected": -73.03739929199219, |
|
"loss": 0.4934, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.020998705178499222, |
|
"rewards/margins": 0.03922630846500397, |
|
"rewards/rejected": -0.018227603286504745, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05493819453115245, |
|
"grad_norm": 0.1630859375, |
|
"learning_rate": 2.736318407960199e-06, |
|
"logits/chosen": -0.3820754289627075, |
|
"logits/rejected": -0.3049188256263733, |
|
"logps/chosen": -40.626625061035156, |
|
"logps/rejected": -77.20478057861328, |
|
"loss": 0.4909, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.026516741141676903, |
|
"rewards/margins": 0.0446377769112587, |
|
"rewards/rejected": -0.018121037632226944, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.05993257585216631, |
|
"grad_norm": 0.1455078125, |
|
"learning_rate": 2.9850746268656716e-06, |
|
"logits/chosen": -0.4118029475212097, |
|
"logits/rejected": -0.3379635214805603, |
|
"logps/chosen": -40.367244720458984, |
|
"logps/rejected": -72.1778564453125, |
|
"loss": 0.4878, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.034462034702301025, |
|
"rewards/margins": 0.07104991376399994, |
|
"rewards/rejected": -0.036587879061698914, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.06492695717318017, |
|
"grad_norm": 0.1416015625, |
|
"learning_rate": 3.233830845771145e-06, |
|
"logits/chosen": -0.38510891795158386, |
|
"logits/rejected": -0.2871672511100769, |
|
"logps/chosen": -39.84120559692383, |
|
"logps/rejected": -77.8514175415039, |
|
"loss": 0.4845, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.033992547541856766, |
|
"rewards/margins": 0.09460695832967758, |
|
"rewards/rejected": -0.06061442568898201, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.06992133849419403, |
|
"grad_norm": 0.2197265625, |
|
"learning_rate": 3.4825870646766175e-06, |
|
"logits/chosen": -0.3648582696914673, |
|
"logits/rejected": -0.2659669816493988, |
|
"logps/chosen": -40.6865234375, |
|
"logps/rejected": -84.98823547363281, |
|
"loss": 0.4808, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.04041652753949165, |
|
"rewards/margins": 0.11247670650482178, |
|
"rewards/rejected": -0.07206018269062042, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.07491571981520789, |
|
"grad_norm": 0.173828125, |
|
"learning_rate": 3.73134328358209e-06, |
|
"logits/chosen": -0.4185262620449066, |
|
"logits/rejected": -0.31970107555389404, |
|
"logps/chosen": -40.132545471191406, |
|
"logps/rejected": -80.09419250488281, |
|
"loss": 0.4784, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.04452138394117355, |
|
"rewards/margins": 0.12835349142551422, |
|
"rewards/rejected": -0.08383210748434067, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.07991010113622175, |
|
"grad_norm": 0.130859375, |
|
"learning_rate": 3.980099502487563e-06, |
|
"logits/chosen": -0.37403732538223267, |
|
"logits/rejected": -0.27564138174057007, |
|
"logps/chosen": -39.31542205810547, |
|
"logps/rejected": -90.21852111816406, |
|
"loss": 0.4747, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.044510699808597565, |
|
"rewards/margins": 0.15536533296108246, |
|
"rewards/rejected": -0.11085464060306549, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.08490448245723561, |
|
"grad_norm": 0.1572265625, |
|
"learning_rate": 4.228855721393035e-06, |
|
"logits/chosen": -0.35230112075805664, |
|
"logits/rejected": -0.2606234848499298, |
|
"logps/chosen": -38.46342086791992, |
|
"logps/rejected": -85.07556915283203, |
|
"loss": 0.4689, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.04909727722406387, |
|
"rewards/margins": 0.20509441196918488, |
|
"rewards/rejected": -0.1559971272945404, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.08989886377824947, |
|
"grad_norm": 0.2421875, |
|
"learning_rate": 4.477611940298508e-06, |
|
"logits/chosen": -0.3421555161476135, |
|
"logits/rejected": -0.2503698766231537, |
|
"logps/chosen": -39.706153869628906, |
|
"logps/rejected": -85.4745101928711, |
|
"loss": 0.4643, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0517905056476593, |
|
"rewards/margins": 0.2295013666152954, |
|
"rewards/rejected": -0.1777108609676361, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.09489324509926333, |
|
"grad_norm": 0.2197265625, |
|
"learning_rate": 4.72636815920398e-06, |
|
"logits/chosen": -0.2977878451347351, |
|
"logits/rejected": -0.17351695895195007, |
|
"logps/chosen": -38.165069580078125, |
|
"logps/rejected": -104.08354187011719, |
|
"loss": 0.4516, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0498540997505188, |
|
"rewards/margins": 0.33853739500045776, |
|
"rewards/rejected": -0.28868329524993896, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.09988762642027718, |
|
"grad_norm": 0.484375, |
|
"learning_rate": 4.975124378109453e-06, |
|
"logits/chosen": -0.2946663498878479, |
|
"logits/rejected": -0.17343321442604065, |
|
"logps/chosen": -37.89108657836914, |
|
"logps/rejected": -119.83811950683594, |
|
"loss": 0.4244, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.06007291004061699, |
|
"rewards/margins": 0.5597599148750305, |
|
"rewards/rejected": -0.49968695640563965, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.10488200774129104, |
|
"grad_norm": 0.271484375, |
|
"learning_rate": 4.999691923599309e-06, |
|
"logits/chosen": -0.24224761128425598, |
|
"logits/rejected": -0.10646134614944458, |
|
"logps/chosen": -38.592735290527344, |
|
"logps/rejected": -158.98190307617188, |
|
"loss": 0.3761, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.052448518574237823, |
|
"rewards/margins": 0.9627677202224731, |
|
"rewards/rejected": -0.9103191494941711, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.1098763890623049, |
|
"grad_norm": 0.369140625, |
|
"learning_rate": 4.998627065620946e-06, |
|
"logits/chosen": -0.20557060837745667, |
|
"logits/rejected": -0.019889693707227707, |
|
"logps/chosen": -39.04503631591797, |
|
"logps/rejected": -309.40240478515625, |
|
"loss": 0.3162, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.05903823301196098, |
|
"rewards/margins": 2.2989630699157715, |
|
"rewards/rejected": -2.239924907684326, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.11487077038331876, |
|
"grad_norm": 0.1435546875, |
|
"learning_rate": 4.996801946581365e-06, |
|
"logits/chosen": -0.08062759786844254, |
|
"logits/rejected": 0.12686052918434143, |
|
"logps/chosen": -38.807472229003906, |
|
"logps/rejected": -391.716552734375, |
|
"loss": 0.2979, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.03988034278154373, |
|
"rewards/margins": 3.2561073303222656, |
|
"rewards/rejected": -3.216226577758789, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.11986515170433262, |
|
"grad_norm": 0.173828125, |
|
"learning_rate": 4.99421712181231e-06, |
|
"logits/chosen": -0.09656897932291031, |
|
"logits/rejected": 0.20183369517326355, |
|
"logps/chosen": -40.211158752441406, |
|
"logps/rejected": -563.9237060546875, |
|
"loss": 0.2787, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.03454852104187012, |
|
"rewards/margins": 4.958992958068848, |
|
"rewards/rejected": -4.924445152282715, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.12485953302534648, |
|
"grad_norm": 0.19140625, |
|
"learning_rate": 4.990873377802351e-06, |
|
"logits/chosen": -0.04213310405611992, |
|
"logits/rejected": 0.26729267835617065, |
|
"logps/chosen": -38.64299011230469, |
|
"logps/rejected": -672.4085693359375, |
|
"loss": 0.2738, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.04478804022073746, |
|
"rewards/margins": 6.028790473937988, |
|
"rewards/rejected": -5.984002113342285, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.12985391434636034, |
|
"grad_norm": 0.1591796875, |
|
"learning_rate": 4.986771731957569e-06, |
|
"logits/chosen": -0.013924488797783852, |
|
"logits/rejected": 0.32576116919517517, |
|
"logps/chosen": -38.04896926879883, |
|
"logps/rejected": -677.6256713867188, |
|
"loss": 0.269, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.04615269601345062, |
|
"rewards/margins": 6.120830535888672, |
|
"rewards/rejected": -6.07467794418335, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.1348482956673742, |
|
"grad_norm": 0.1240234375, |
|
"learning_rate": 4.981913432291989e-06, |
|
"logits/chosen": -0.0022221256513148546, |
|
"logits/rejected": 0.3353291451931, |
|
"logps/chosen": -36.26408004760742, |
|
"logps/rejected": -657.3780517578125, |
|
"loss": 0.2603, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.07836371660232544, |
|
"rewards/margins": 5.838679790496826, |
|
"rewards/rejected": -5.760315895080566, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.13984267698838806, |
|
"grad_norm": 0.138671875, |
|
"learning_rate": 4.976299957047846e-06, |
|
"logits/chosen": -0.008776476606726646, |
|
"logits/rejected": 0.35888582468032837, |
|
"logps/chosen": -34.863895416259766, |
|
"logps/rejected": -787.3309326171875, |
|
"loss": 0.2619, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0779188945889473, |
|
"rewards/margins": 7.239710330963135, |
|
"rewards/rejected": -7.1617913246154785, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.1448370583094019, |
|
"grad_norm": 0.2373046875, |
|
"learning_rate": 4.9699330142458e-06, |
|
"logits/chosen": -0.005239410791546106, |
|
"logits/rejected": 0.3836653232574463, |
|
"logps/chosen": -30.54348373413086, |
|
"logps/rejected": -689.7301635742188, |
|
"loss": 0.2517, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1276303231716156, |
|
"rewards/margins": 6.348529815673828, |
|
"rewards/rejected": -6.2208991050720215, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.14983143963041579, |
|
"grad_norm": 0.15625, |
|
"learning_rate": 4.96281454116523e-06, |
|
"logits/chosen": -0.018074408173561096, |
|
"logits/rejected": 0.34884509444236755, |
|
"logps/chosen": -19.059213638305664, |
|
"logps/rejected": -759.7054443359375, |
|
"loss": 0.2355, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.24109096825122833, |
|
"rewards/margins": 7.104301452636719, |
|
"rewards/rejected": -6.863211154937744, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.15482582095142963, |
|
"grad_norm": 0.14453125, |
|
"learning_rate": 4.954946703754777e-06, |
|
"logits/chosen": -0.022661946713924408, |
|
"logits/rejected": 0.36587223410606384, |
|
"logps/chosen": -14.688285827636719, |
|
"logps/rejected": -652.6716918945312, |
|
"loss": 0.2303, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2812207341194153, |
|
"rewards/margins": 6.1413116455078125, |
|
"rewards/rejected": -5.860090255737305, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.1598202022724435, |
|
"grad_norm": 0.06591796875, |
|
"learning_rate": 4.946331895973308e-06, |
|
"logits/chosen": 0.027700275182724, |
|
"logits/rejected": 0.4779927134513855, |
|
"logps/chosen": -13.264841079711914, |
|
"logps/rejected": -853.7429809570312, |
|
"loss": 0.2306, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.29832005500793457, |
|
"rewards/margins": 7.9851837158203125, |
|
"rewards/rejected": -7.686862945556641, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.16481458359345735, |
|
"grad_norm": 0.1298828125, |
|
"learning_rate": 4.936972739061503e-06, |
|
"logits/chosen": 0.028876056894659996, |
|
"logits/rejected": 0.4520367980003357, |
|
"logps/chosen": -14.747647285461426, |
|
"logps/rejected": -819.2418212890625, |
|
"loss": 0.2308, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.29899871349334717, |
|
"rewards/margins": 7.518294334411621, |
|
"rewards/rejected": -7.219296455383301, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.16980896491447123, |
|
"grad_norm": 0.2353515625, |
|
"learning_rate": 4.926872080744284e-06, |
|
"logits/chosen": 0.09099732339382172, |
|
"logits/rejected": 0.6329769492149353, |
|
"logps/chosen": -14.6112699508667, |
|
"logps/rejected": -978.19482421875, |
|
"loss": 0.2205, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.290539413690567, |
|
"rewards/margins": 9.353235244750977, |
|
"rewards/rejected": -9.06269645690918, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.17480334623548507, |
|
"grad_norm": 0.0673828125, |
|
"learning_rate": 4.9160329943643335e-06, |
|
"logits/chosen": 0.10238673537969589, |
|
"logits/rejected": 0.6276119947433472, |
|
"logps/chosen": -13.135534286499023, |
|
"logps/rejected": -862.1193237304688, |
|
"loss": 0.2233, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2938804030418396, |
|
"rewards/margins": 8.220571517944336, |
|
"rewards/rejected": -7.92669153213501, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.17979772755649895, |
|
"grad_norm": 0.064453125, |
|
"learning_rate": 4.904458777946967e-06, |
|
"logits/chosen": 0.023329418152570724, |
|
"logits/rejected": 0.6091981530189514, |
|
"logps/chosen": -13.777295112609863, |
|
"logps/rejected": -1096.3214111328125, |
|
"loss": 0.221, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2997492849826813, |
|
"rewards/margins": 10.409059524536133, |
|
"rewards/rejected": -10.1093111038208, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.1847921088775128, |
|
"grad_norm": 0.1201171875, |
|
"learning_rate": 4.892152953196633e-06, |
|
"logits/chosen": 0.029097210615873337, |
|
"logits/rejected": 0.650887131690979, |
|
"logps/chosen": -13.820713996887207, |
|
"logps/rejected": -1171.0504150390625, |
|
"loss": 0.2235, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2969459295272827, |
|
"rewards/margins": 11.168619155883789, |
|
"rewards/rejected": -10.871672630310059, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.18978649019852667, |
|
"grad_norm": 0.06640625, |
|
"learning_rate": 4.879119264425366e-06, |
|
"logits/chosen": 0.11170516163110733, |
|
"logits/rejected": 0.7552271485328674, |
|
"logps/chosen": -13.031651496887207, |
|
"logps/rejected": -990.927734375, |
|
"loss": 0.2191, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.30227065086364746, |
|
"rewards/margins": 9.518648147583008, |
|
"rewards/rejected": -9.216377258300781, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.19478087151954052, |
|
"grad_norm": 0.0272216796875, |
|
"learning_rate": 4.865361677413489e-06, |
|
"logits/chosen": 0.10295484960079193, |
|
"logits/rejected": 0.6912266612052917, |
|
"logps/chosen": -14.148368835449219, |
|
"logps/rejected": -973.99365234375, |
|
"loss": 0.2236, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.30019253492355347, |
|
"rewards/margins": 9.242959022521973, |
|
"rewards/rejected": -8.942765235900879, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.19977525284055436, |
|
"grad_norm": 0.048583984375, |
|
"learning_rate": 4.850884378202947e-06, |
|
"logits/chosen": 0.12218449264764786, |
|
"logits/rejected": 0.7848892211914062, |
|
"logps/chosen": -13.857281684875488, |
|
"logps/rejected": -1093.4356689453125, |
|
"loss": 0.2224, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.30381980538368225, |
|
"rewards/margins": 10.443190574645996, |
|
"rewards/rejected": -10.139370918273926, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.20476963416156824, |
|
"grad_norm": 0.10888671875, |
|
"learning_rate": 4.8356917718236125e-06, |
|
"logits/chosen": 0.16129298508167267, |
|
"logits/rejected": 0.83033287525177, |
|
"logps/chosen": -13.215472221374512, |
|
"logps/rejected": -1056.891357421875, |
|
"loss": 0.2254, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.30162861943244934, |
|
"rewards/margins": 10.163104057312012, |
|
"rewards/rejected": -9.861475944519043, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.20976401548258208, |
|
"grad_norm": 0.05712890625, |
|
"learning_rate": 4.8197884809529575e-06, |
|
"logits/chosen": 0.18466398119926453, |
|
"logits/rejected": 0.8971255421638489, |
|
"logps/chosen": -14.178210258483887, |
|
"logps/rejected": -1095.867431640625, |
|
"loss": 0.2192, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.29163575172424316, |
|
"rewards/margins": 10.56762409210205, |
|
"rewards/rejected": -10.275988578796387, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.21475839680359596, |
|
"grad_norm": 0.078125, |
|
"learning_rate": 4.803179344509505e-06, |
|
"logits/chosen": 0.17180819809436798, |
|
"logits/rejected": 0.9859398603439331, |
|
"logps/chosen": -14.1710844039917, |
|
"logps/rejected": -1132.571044921875, |
|
"loss": 0.2206, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2903401851654053, |
|
"rewards/margins": 10.91908073425293, |
|
"rewards/rejected": -10.628740310668945, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.2197527781246098, |
|
"grad_norm": 0.0341796875, |
|
"learning_rate": 4.785869416180489e-06, |
|
"logits/chosen": 0.18128976225852966, |
|
"logits/rejected": 0.9951160550117493, |
|
"logps/chosen": -13.373617172241211, |
|
"logps/rejected": -1239.9248046875, |
|
"loss": 0.2185, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3014771342277527, |
|
"rewards/margins": 11.977083206176758, |
|
"rewards/rejected": -11.675604820251465, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.22474715944562368, |
|
"grad_norm": 0.035400390625, |
|
"learning_rate": 4.767863962884156e-06, |
|
"logits/chosen": 0.19665592908859253, |
|
"logits/rejected": 1.0053622722625732, |
|
"logps/chosen": -12.768911361694336, |
|
"logps/rejected": -1196.0384521484375, |
|
"loss": 0.217, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.30236831307411194, |
|
"rewards/margins": 11.53145694732666, |
|
"rewards/rejected": -11.229089736938477, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.22974154076663753, |
|
"grad_norm": 0.06982421875, |
|
"learning_rate": 4.74916846316719e-06, |
|
"logits/chosen": 0.2026137411594391, |
|
"logits/rejected": 0.9967263340950012, |
|
"logps/chosen": -14.13359546661377, |
|
"logps/rejected": -1080.839111328125, |
|
"loss": 0.2196, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.29357805848121643, |
|
"rewards/margins": 10.319085121154785, |
|
"rewards/rejected": -10.025506973266602, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.2347359220876514, |
|
"grad_norm": 0.047607421875, |
|
"learning_rate": 4.7297886055377525e-06, |
|
"logits/chosen": 0.22456324100494385, |
|
"logits/rejected": 0.9802171587944031, |
|
"logps/chosen": -13.012743949890137, |
|
"logps/rejected": -1017.5540161132812, |
|
"loss": 0.2208, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.29690033197402954, |
|
"rewards/margins": 9.80536937713623, |
|
"rewards/rejected": -9.508468627929688, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.23973030340866525, |
|
"grad_norm": 0.02734375, |
|
"learning_rate": 4.709730286734631e-06, |
|
"logits/chosen": 0.2183937281370163, |
|
"logits/rejected": 1.0708736181259155, |
|
"logps/chosen": -12.663009643554688, |
|
"logps/rejected": -1249.983642578125, |
|
"loss": 0.2169, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3058857023715973, |
|
"rewards/margins": 12.078222274780273, |
|
"rewards/rejected": -11.772336959838867, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.24472468472967912, |
|
"grad_norm": 0.055908203125, |
|
"learning_rate": 4.688999609933023e-06, |
|
"logits/chosen": 0.22988371551036835, |
|
"logits/rejected": 1.0844902992248535, |
|
"logps/chosen": -12.956899642944336, |
|
"logps/rejected": -1156.6395263671875, |
|
"loss": 0.2176, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.30138763785362244, |
|
"rewards/margins": 11.153468132019043, |
|
"rewards/rejected": -10.852079391479492, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.24971906605069297, |
|
"grad_norm": 0.049072265625, |
|
"learning_rate": 4.6676028828875195e-06, |
|
"logits/chosen": 0.19053277373313904, |
|
"logits/rejected": 1.1232895851135254, |
|
"logps/chosen": -13.526689529418945, |
|
"logps/rejected": -1328.9798583984375, |
|
"loss": 0.2173, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2967537045478821, |
|
"rewards/margins": 12.862408638000488, |
|
"rewards/rejected": -12.565653800964355, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2547134473717068, |
|
"grad_norm": 0.00811767578125, |
|
"learning_rate": 4.645546616012835e-06, |
|
"logits/chosen": 0.19936171174049377, |
|
"logits/rejected": 1.1598930358886719, |
|
"logps/chosen": -13.963285446166992, |
|
"logps/rejected": -1301.6494140625, |
|
"loss": 0.217, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.29725441336631775, |
|
"rewards/margins": 12.456524848937988, |
|
"rewards/rejected": -12.159271240234375, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.2597078286927207, |
|
"grad_norm": 0.022216796875, |
|
"learning_rate": 4.622837520402869e-06, |
|
"logits/chosen": 0.2132669985294342, |
|
"logits/rejected": 1.1716349124908447, |
|
"logps/chosen": -13.427679061889648, |
|
"logps/rejected": -1347.979248046875, |
|
"loss": 0.216, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.30607202649116516, |
|
"rewards/margins": 13.056879043579102, |
|
"rewards/rejected": -12.75080680847168, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.26470221001373456, |
|
"grad_norm": 0.0654296875, |
|
"learning_rate": 4.599482505788715e-06, |
|
"logits/chosen": 0.1745399534702301, |
|
"logits/rejected": 1.1154874563217163, |
|
"logps/chosen": -13.649249076843262, |
|
"logps/rejected": -1377.6204833984375, |
|
"loss": 0.2163, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2969611883163452, |
|
"rewards/margins": 13.256256103515625, |
|
"rewards/rejected": -12.959295272827148, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.2696965913347484, |
|
"grad_norm": 0.040771484375, |
|
"learning_rate": 4.575488678436228e-06, |
|
"logits/chosen": 0.20975852012634277, |
|
"logits/rejected": 1.2858575582504272, |
|
"logps/chosen": -13.548286437988281, |
|
"logps/rejected": -1526.4365234375, |
|
"loss": 0.2161, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.30410271883010864, |
|
"rewards/margins": 14.82691764831543, |
|
"rewards/rejected": -14.522814750671387, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.27469097265576226, |
|
"grad_norm": 0.0235595703125, |
|
"learning_rate": 4.550863338983784e-06, |
|
"logits/chosen": 0.23238572478294373, |
|
"logits/rejected": 1.2929937839508057, |
|
"logps/chosen": -12.818222045898438, |
|
"logps/rejected": -1398.010009765625, |
|
"loss": 0.2146, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3006662130355835, |
|
"rewards/margins": 13.579294204711914, |
|
"rewards/rejected": -13.2786283493042, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.27968535397677613, |
|
"grad_norm": 0.045654296875, |
|
"learning_rate": 4.525613980220909e-06, |
|
"logits/chosen": 0.21401552855968475, |
|
"logits/rejected": 1.2280786037445068, |
|
"logps/chosen": -13.640890121459961, |
|
"logps/rejected": -1425.2213134765625, |
|
"loss": 0.2165, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.30365556478500366, |
|
"rewards/margins": 13.8203706741333, |
|
"rewards/rejected": -13.516714096069336, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.28467973529779, |
|
"grad_norm": 0.031494140625, |
|
"learning_rate": 4.499748284808433e-06, |
|
"logits/chosen": 0.2350139617919922, |
|
"logits/rejected": 1.237275242805481, |
|
"logps/chosen": -13.027705192565918, |
|
"logps/rejected": -1249.4954833984375, |
|
"loss": 0.2147, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3124990463256836, |
|
"rewards/margins": 11.985966682434082, |
|
"rewards/rejected": -11.673466682434082, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.2896741166188038, |
|
"grad_norm": 0.037109375, |
|
"learning_rate": 4.473274122940879e-06, |
|
"logits/chosen": 0.26041245460510254, |
|
"logits/rejected": 1.2588506937026978, |
|
"logps/chosen": -12.92595100402832, |
|
"logps/rejected": -1345.543212890625, |
|
"loss": 0.2145, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3006027638912201, |
|
"rewards/margins": 13.026535034179688, |
|
"rewards/rejected": -12.725933074951172, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.2946684979398177, |
|
"grad_norm": 0.023193359375, |
|
"learning_rate": 4.446199549951782e-06, |
|
"logits/chosen": 0.2726953327655792, |
|
"logits/rejected": 1.2741527557373047, |
|
"logps/chosen": -13.607648849487305, |
|
"logps/rejected": -1350.020263671875, |
|
"loss": 0.2164, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2966735064983368, |
|
"rewards/margins": 13.099435806274414, |
|
"rewards/rejected": -12.802760124206543, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.29966287926083157, |
|
"grad_norm": 0.060302734375, |
|
"learning_rate": 4.418532803862684e-06, |
|
"logits/chosen": 0.24927139282226562, |
|
"logits/rejected": 1.2164738178253174, |
|
"logps/chosen": -13.667009353637695, |
|
"logps/rejected": -1226.8724365234375, |
|
"loss": 0.2161, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3081030249595642, |
|
"rewards/margins": 11.710563659667969, |
|
"rewards/rejected": -11.402461051940918, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.30465726058184545, |
|
"grad_norm": 0.01220703125, |
|
"learning_rate": 4.39028230287654e-06, |
|
"logits/chosen": 0.24915924668312073, |
|
"logits/rejected": 1.2275068759918213, |
|
"logps/chosen": -14.152711868286133, |
|
"logps/rejected": -1383.346923828125, |
|
"loss": 0.2153, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.301180899143219, |
|
"rewards/margins": 13.357465744018555, |
|
"rewards/rejected": -13.05628490447998, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.30965164190285926, |
|
"grad_norm": 0.056884765625, |
|
"learning_rate": 4.361456642816292e-06, |
|
"logits/chosen": 0.18370430171489716, |
|
"logits/rejected": 1.187785267829895, |
|
"logps/chosen": -14.063751220703125, |
|
"logps/rejected": -1465.3687744140625, |
|
"loss": 0.2156, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3011600375175476, |
|
"rewards/margins": 14.13781452178955, |
|
"rewards/rejected": -13.836652755737305, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.31464602322387314, |
|
"grad_norm": 0.036376953125, |
|
"learning_rate": 4.332064594509413e-06, |
|
"logits/chosen": 0.19446897506713867, |
|
"logits/rejected": 1.427197813987732, |
|
"logps/chosen": -14.258028030395508, |
|
"logps/rejected": -1825.0166015625, |
|
"loss": 0.2145, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2957887053489685, |
|
"rewards/margins": 17.80067253112793, |
|
"rewards/rejected": -17.504884719848633, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.319640404544887, |
|
"grad_norm": 0.025634765625, |
|
"learning_rate": 4.302115101119186e-06, |
|
"logits/chosen": 0.19377607107162476, |
|
"logits/rejected": 1.1977471113204956, |
|
"logps/chosen": -13.28663158416748, |
|
"logps/rejected": -1551.3272705078125, |
|
"loss": 0.2146, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2949761748313904, |
|
"rewards/margins": 14.840034484863281, |
|
"rewards/rejected": -14.545059204101562, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.3246347858659009, |
|
"grad_norm": 0.028076171875, |
|
"learning_rate": 4.271617275423564e-06, |
|
"logits/chosen": 0.18471740186214447, |
|
"logits/rejected": 1.2049682140350342, |
|
"logps/chosen": -14.22096061706543, |
|
"logps/rejected": -1509.560791015625, |
|
"loss": 0.214, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2953301966190338, |
|
"rewards/margins": 14.56297779083252, |
|
"rewards/rejected": -14.267648696899414, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.3296291671869147, |
|
"grad_norm": 0.031494140625, |
|
"learning_rate": 4.2405803970423995e-06, |
|
"logits/chosen": 0.21741405129432678, |
|
"logits/rejected": 1.3314052820205688, |
|
"logps/chosen": -13.835968017578125, |
|
"logps/rejected": -1617.0235595703125, |
|
"loss": 0.2147, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.29379233717918396, |
|
"rewards/margins": 15.649670600891113, |
|
"rewards/rejected": -15.355878829956055, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.3346235485079286, |
|
"grad_norm": 0.11572265625, |
|
"learning_rate": 4.2090139096139306e-06, |
|
"logits/chosen": 0.16212065517902374, |
|
"logits/rejected": 1.2855770587921143, |
|
"logps/chosen": -14.017046928405762, |
|
"logps/rejected": -1740.139404296875, |
|
"loss": 0.2134, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3004634976387024, |
|
"rewards/margins": 16.887771606445312, |
|
"rewards/rejected": -16.58730697631836, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.33961792982894246, |
|
"grad_norm": 0.043701171875, |
|
"learning_rate": 4.176927417921343e-06, |
|
"logits/chosen": 0.326777845621109, |
|
"logits/rejected": 1.3592358827590942, |
|
"logps/chosen": -13.120327949523926, |
|
"logps/rejected": -1251.674560546875, |
|
"loss": 0.2143, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.30190396308898926, |
|
"rewards/margins": 12.16067123413086, |
|
"rewards/rejected": -11.858766555786133, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.3446123111499563, |
|
"grad_norm": 0.03173828125, |
|
"learning_rate": 4.144330684970314e-06, |
|
"logits/chosen": 0.22485598921775818, |
|
"logits/rejected": 1.238599181175232, |
|
"logps/chosen": -14.03515625, |
|
"logps/rejected": -1432.62060546875, |
|
"loss": 0.214, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.30531880259513855, |
|
"rewards/margins": 13.757417678833008, |
|
"rewards/rejected": -13.45209789276123, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.34960669247097015, |
|
"grad_norm": 0.064453125, |
|
"learning_rate": 4.111233629018404e-06, |
|
"logits/chosen": 0.2409452497959137, |
|
"logits/rejected": 1.3199043273925781, |
|
"logps/chosen": -13.525833129882812, |
|
"logps/rejected": -1432.090087890625, |
|
"loss": 0.2146, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.29940056800842285, |
|
"rewards/margins": 13.923855781555176, |
|
"rewards/rejected": -13.624455451965332, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.354601073791984, |
|
"grad_norm": 0.044677734375, |
|
"learning_rate": 4.077646320557215e-06, |
|
"logits/chosen": 0.25844550132751465, |
|
"logits/rejected": 1.4347895383834839, |
|
"logps/chosen": -13.414407730102539, |
|
"logps/rejected": -1587.720947265625, |
|
"loss": 0.214, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.29702991247177124, |
|
"rewards/margins": 15.390164375305176, |
|
"rewards/rejected": -15.093134880065918, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.3595954551129979, |
|
"grad_norm": 0.017822265625, |
|
"learning_rate": 4.043578979248228e-06, |
|
"logits/chosen": 0.24548295140266418, |
|
"logits/rejected": 1.3877404928207397, |
|
"logps/chosen": -12.786032676696777, |
|
"logps/rejected": -1540.37939453125, |
|
"loss": 0.2147, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3044242262840271, |
|
"rewards/margins": 14.986343383789062, |
|
"rewards/rejected": -14.681918144226074, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.3645898364340117, |
|
"grad_norm": 0.0203857421875, |
|
"learning_rate": 4.009041970813247e-06, |
|
"logits/chosen": 0.2618701457977295, |
|
"logits/rejected": 1.432408332824707, |
|
"logps/chosen": -12.812631607055664, |
|
"logps/rejected": -1661.7718505859375, |
|
"loss": 0.2146, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.29122787714004517, |
|
"rewards/margins": 16.198129653930664, |
|
"rewards/rejected": -15.906901359558105, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.3695842177550256, |
|
"grad_norm": 0.030029296875, |
|
"learning_rate": 3.9740458038804075e-06, |
|
"logits/chosen": 0.25733712315559387, |
|
"logits/rejected": 1.3133214712142944, |
|
"logps/chosen": -14.166203498840332, |
|
"logps/rejected": -1473.091552734375, |
|
"loss": 0.2133, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3017811179161072, |
|
"rewards/margins": 14.06616497039795, |
|
"rewards/rejected": -13.764383316040039, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.37457859907603946, |
|
"grad_norm": 0.02880859375, |
|
"learning_rate": 3.938601126786702e-06, |
|
"logits/chosen": 0.28963789343833923, |
|
"logits/rejected": 1.4084501266479492, |
|
"logps/chosen": -12.976341247558594, |
|
"logps/rejected": -1537.215087890625, |
|
"loss": 0.2137, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2965225577354431, |
|
"rewards/margins": 14.963714599609375, |
|
"rewards/rejected": -14.667192459106445, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.37957298039705334, |
|
"grad_norm": 0.0306396484375, |
|
"learning_rate": 3.902718724337993e-06, |
|
"logits/chosen": 0.22370409965515137, |
|
"logits/rejected": 1.3502318859100342, |
|
"logps/chosen": -13.021102905273438, |
|
"logps/rejected": -1571.427978515625, |
|
"loss": 0.2141, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3001527190208435, |
|
"rewards/margins": 15.25025463104248, |
|
"rewards/rejected": -14.950100898742676, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.38456736171806716, |
|
"grad_norm": 0.014404296875, |
|
"learning_rate": 3.8664095145274995e-06, |
|
"logits/chosen": 0.26876306533813477, |
|
"logits/rejected": 1.432448387145996, |
|
"logps/chosen": -13.371549606323242, |
|
"logps/rejected": -1552.1263427734375, |
|
"loss": 0.2136, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.30356094241142273, |
|
"rewards/margins": 15.13947582244873, |
|
"rewards/rejected": -14.835916519165039, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.38956174303908103, |
|
"grad_norm": 0.034912109375, |
|
"learning_rate": 3.829684545213768e-06, |
|
"logits/chosen": 0.23094145953655243, |
|
"logits/rejected": 1.379480242729187, |
|
"logps/chosen": -13.367365837097168, |
|
"logps/rejected": -1540.384765625, |
|
"loss": 0.2137, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.30820637941360474, |
|
"rewards/margins": 14.93891716003418, |
|
"rewards/rejected": -14.630711555480957, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.3945561243600949, |
|
"grad_norm": 0.03857421875, |
|
"learning_rate": 3.7925549907591252e-06, |
|
"logits/chosen": 0.17974331974983215, |
|
"logits/rejected": 1.3995566368103027, |
|
"logps/chosen": -13.248870849609375, |
|
"logps/rejected": -1851.462646484375, |
|
"loss": 0.2139, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3005455434322357, |
|
"rewards/margins": 18.06133460998535, |
|
"rewards/rejected": -17.760787963867188, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.3995505056811087, |
|
"grad_norm": 0.0272216796875, |
|
"learning_rate": 3.7550321486296303e-06, |
|
"logits/chosen": 0.1997009515762329, |
|
"logits/rejected": 1.2776936292648315, |
|
"logps/chosen": -13.081275939941406, |
|
"logps/rejected": -1519.75537109375, |
|
"loss": 0.2139, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.29953616857528687, |
|
"rewards/margins": 14.774045944213867, |
|
"rewards/rejected": -14.474508285522461, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.4045448870021226, |
|
"grad_norm": 0.0252685546875, |
|
"learning_rate": 3.717127435957583e-06, |
|
"logits/chosen": 0.22182372212409973, |
|
"logits/rejected": 1.330664873123169, |
|
"logps/chosen": -13.001907348632812, |
|
"logps/rejected": -1577.052978515625, |
|
"loss": 0.214, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.30547088384628296, |
|
"rewards/margins": 15.365854263305664, |
|
"rewards/rejected": -15.060381889343262, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.4095392683231365, |
|
"grad_norm": 0.011474609375, |
|
"learning_rate": 3.6788523860676156e-06, |
|
"logits/chosen": 0.23856505751609802, |
|
"logits/rejected": 1.3909879922866821, |
|
"logps/chosen": -13.222851753234863, |
|
"logps/rejected": -1567.79931640625, |
|
"loss": 0.2142, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3013755977153778, |
|
"rewards/margins": 15.270452499389648, |
|
"rewards/rejected": -14.969076156616211, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.41453364964415035, |
|
"grad_norm": 0.0267333984375, |
|
"learning_rate": 3.640218644967429e-06, |
|
"logits/chosen": 0.2593843638896942, |
|
"logits/rejected": 1.4300034046173096, |
|
"logps/chosen": -12.888224601745605, |
|
"logps/rejected": -1659.7015380859375, |
|
"loss": 0.2143, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.295782208442688, |
|
"rewards/margins": 16.122600555419922, |
|
"rewards/rejected": -15.826817512512207, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.41952803096516417, |
|
"grad_norm": 0.0194091796875, |
|
"learning_rate": 3.601237967804245e-06, |
|
"logits/chosen": 0.264489084482193, |
|
"logits/rejected": 1.4015864133834839, |
|
"logps/chosen": -12.973742485046387, |
|
"logps/rejected": -1561.237548828125, |
|
"loss": 0.2142, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2937301695346832, |
|
"rewards/margins": 15.21537971496582, |
|
"rewards/rejected": -14.921648979187012, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.42452241228617804, |
|
"grad_norm": 0.034912109375, |
|
"learning_rate": 3.5619222152880488e-06, |
|
"logits/chosen": 0.26485809683799744, |
|
"logits/rejected": 1.4641757011413574, |
|
"logps/chosen": -12.745534896850586, |
|
"logps/rejected": -1704.2015380859375, |
|
"loss": 0.2143, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.29673147201538086, |
|
"rewards/margins": 16.65966033935547, |
|
"rewards/rejected": -16.362926483154297, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.4295167936071919, |
|
"grad_norm": 0.03662109375, |
|
"learning_rate": 3.522283350082713e-06, |
|
"logits/chosen": 0.27674156427383423, |
|
"logits/rejected": 1.3279974460601807, |
|
"logps/chosen": -13.220677375793457, |
|
"logps/rejected": -1268.2398681640625, |
|
"loss": 0.2143, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.30168816447257996, |
|
"rewards/margins": 12.249058723449707, |
|
"rewards/rejected": -11.947370529174805, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.4345111749282058, |
|
"grad_norm": 0.0303955078125, |
|
"learning_rate": 3.482333433166101e-06, |
|
"logits/chosen": 0.2209288775920868, |
|
"logits/rejected": 1.239816427230835, |
|
"logps/chosen": -13.64061450958252, |
|
"logps/rejected": -1329.226806640625, |
|
"loss": 0.2133, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.32054653763771057, |
|
"rewards/margins": 12.724926948547363, |
|
"rewards/rejected": -12.404378890991211, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.4395055562492196, |
|
"grad_norm": 0.017578125, |
|
"learning_rate": 3.442084620160255e-06, |
|
"logits/chosen": 0.2859000563621521, |
|
"logits/rejected": 1.3617407083511353, |
|
"logps/chosen": -13.033666610717773, |
|
"logps/rejected": -1480.4652099609375, |
|
"loss": 0.2148, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3004864454269409, |
|
"rewards/margins": 14.408884048461914, |
|
"rewards/rejected": -14.1083984375, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.4444999375702335, |
|
"grad_norm": 0.033203125, |
|
"learning_rate": 3.4015491576327813e-06, |
|
"logits/chosen": 0.2019362449645996, |
|
"logits/rejected": 1.4212459325790405, |
|
"logps/chosen": -13.03289794921875, |
|
"logps/rejected": -1749.9088134765625, |
|
"loss": 0.2134, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2995554804801941, |
|
"rewards/margins": 17.093223571777344, |
|
"rewards/rejected": -16.793670654296875, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.44949431889124736, |
|
"grad_norm": 0.043701171875, |
|
"learning_rate": 3.3607393793705774e-06, |
|
"logits/chosen": 0.18301896750926971, |
|
"logits/rejected": 1.3750990629196167, |
|
"logps/chosen": -13.328268051147461, |
|
"logps/rejected": -1911.580322265625, |
|
"loss": 0.2131, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3058582544326782, |
|
"rewards/margins": 18.632659912109375, |
|
"rewards/rejected": -18.32680320739746, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.45448870021226123, |
|
"grad_norm": 0.026611328125, |
|
"learning_rate": 3.319667702627004e-06, |
|
"logits/chosen": 0.251764714717865, |
|
"logits/rejected": 1.379320502281189, |
|
"logps/chosen": -13.333532333374023, |
|
"logps/rejected": -1562.4708251953125, |
|
"loss": 0.2145, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.29535120725631714, |
|
"rewards/margins": 15.228157043457031, |
|
"rewards/rejected": -14.932805061340332, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.45948308153327505, |
|
"grad_norm": 0.0301513671875, |
|
"learning_rate": 3.2783466243436728e-06, |
|
"logits/chosen": 0.2565325200557709, |
|
"logits/rejected": 1.3139139413833618, |
|
"logps/chosen": -12.679740905761719, |
|
"logps/rejected": -1520.677490234375, |
|
"loss": 0.2148, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.29702773690223694, |
|
"rewards/margins": 14.843530654907227, |
|
"rewards/rejected": -14.546501159667969, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.4644774628542889, |
|
"grad_norm": 0.032958984375, |
|
"learning_rate": 3.23678871734798e-06, |
|
"logits/chosen": 0.25534436106681824, |
|
"logits/rejected": 1.4121118783950806, |
|
"logps/chosen": -13.0289945602417, |
|
"logps/rejected": -1636.283447265625, |
|
"loss": 0.2134, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2977290153503418, |
|
"rewards/margins": 15.95555591583252, |
|
"rewards/rejected": -15.657827377319336, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.4694718441753028, |
|
"grad_norm": 0.017578125, |
|
"learning_rate": 3.1950066265275563e-06, |
|
"logits/chosen": 0.22841012477874756, |
|
"logits/rejected": 1.4126774072647095, |
|
"logps/chosen": -13.12025260925293, |
|
"logps/rejected": -1663.779541015625, |
|
"loss": 0.2137, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2916621267795563, |
|
"rewards/margins": 16.221864700317383, |
|
"rewards/rejected": -15.930200576782227, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.4744662254963166, |
|
"grad_norm": 0.0255126953125, |
|
"learning_rate": 3.1530130649827866e-06, |
|
"logits/chosen": 0.22560763359069824, |
|
"logits/rejected": 1.3270902633666992, |
|
"logps/chosen": -12.889676094055176, |
|
"logps/rejected": -1550.6907958984375, |
|
"loss": 0.214, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2999538779258728, |
|
"rewards/margins": 15.000715255737305, |
|
"rewards/rejected": -14.700759887695312, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.4794606068173305, |
|
"grad_norm": 0.033203125, |
|
"learning_rate": 3.1108208101585737e-06, |
|
"logits/chosen": 0.2439723014831543, |
|
"logits/rejected": 1.362210988998413, |
|
"logps/chosen": -13.089398384094238, |
|
"logps/rejected": -1693.647216796875, |
|
"loss": 0.2136, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.29442083835601807, |
|
"rewards/margins": 16.52579116821289, |
|
"rewards/rejected": -16.231369018554688, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.48445498813834437, |
|
"grad_norm": 0.062255859375, |
|
"learning_rate": 3.068442699956526e-06, |
|
"logits/chosen": 0.2077961266040802, |
|
"logits/rejected": 1.3753139972686768, |
|
"logps/chosen": -14.569076538085938, |
|
"logps/rejected": -1670.797119140625, |
|
"loss": 0.2127, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.30582067370414734, |
|
"rewards/margins": 16.229455947875977, |
|
"rewards/rejected": -15.92363452911377, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.48944936945935824, |
|
"grad_norm": 0.05224609375, |
|
"learning_rate": 3.025891628828754e-06, |
|
"logits/chosen": 0.1842622458934784, |
|
"logits/rejected": 1.2995259761810303, |
|
"logps/chosen": -14.32885456085205, |
|
"logps/rejected": -1653.052001953125, |
|
"loss": 0.2126, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3057419955730438, |
|
"rewards/margins": 16.055688858032227, |
|
"rewards/rejected": -15.749944686889648, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.49444375078037206, |
|
"grad_norm": 0.0269775390625, |
|
"learning_rate": 2.983180543854449e-06, |
|
"logits/chosen": 0.19390757381916046, |
|
"logits/rejected": 1.3017512559890747, |
|
"logps/chosen": -13.34800910949707, |
|
"logps/rejected": -1712.0159912109375, |
|
"loss": 0.2135, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.30465009808540344, |
|
"rewards/margins": 16.635692596435547, |
|
"rewards/rejected": -16.331043243408203, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.49943813210138593, |
|
"grad_norm": 0.0223388671875, |
|
"learning_rate": 2.9403224408004607e-06, |
|
"logits/chosen": 0.23906031250953674, |
|
"logits/rejected": 1.400268316268921, |
|
"logps/chosen": -13.12585735321045, |
|
"logps/rejected": -1704.7308349609375, |
|
"loss": 0.2129, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3049536347389221, |
|
"rewards/margins": 16.624217987060547, |
|
"rewards/rejected": -16.319265365600586, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.49943813210138593, |
|
"eval_logits/chosen": 0.20361633598804474, |
|
"eval_logits/rejected": 1.1080797910690308, |
|
"eval_logps/chosen": -12.131524085998535, |
|
"eval_logps/rejected": -998.1762084960938, |
|
"eval_loss": 0.21242494881153107, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 0.32524025440216064, |
|
"eval_rewards/margins": 9.676619529724121, |
|
"eval_rewards/rejected": -9.351378440856934, |
|
"eval_runtime": 0.4258, |
|
"eval_samples_per_second": 11.742, |
|
"eval_steps_per_second": 7.045, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5044325134223998, |
|
"grad_norm": 0.0205078125, |
|
"learning_rate": 2.8973303601670537e-06, |
|
"logits/chosen": 0.23553326725959778, |
|
"logits/rejected": 1.356740117073059, |
|
"logps/chosen": -13.091280937194824, |
|
"logps/rejected": -1667.8570556640625, |
|
"loss": 0.2138, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3002287745475769, |
|
"rewards/margins": 16.279354095458984, |
|
"rewards/rejected": -15.979124069213867, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.5094268947434136, |
|
"grad_norm": 0.04833984375, |
|
"learning_rate": 2.8542173832200547e-06, |
|
"logits/chosen": 0.1925448775291443, |
|
"logits/rejected": 1.3025437593460083, |
|
"logps/chosen": -14.501489639282227, |
|
"logps/rejected": -1580.2572021484375, |
|
"loss": 0.2144, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2990257143974304, |
|
"rewards/margins": 15.303033828735352, |
|
"rewards/rejected": -15.004008293151855, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.5144212760644276, |
|
"grad_norm": 0.02587890625, |
|
"learning_rate": 2.810996628010594e-06, |
|
"logits/chosen": 0.2747485637664795, |
|
"logits/rejected": 1.341671109199524, |
|
"logps/chosen": -13.159135818481445, |
|
"logps/rejected": -1436.7039794921875, |
|
"loss": 0.2139, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.30046582221984863, |
|
"rewards/margins": 14.00048542022705, |
|
"rewards/rejected": -13.700021743774414, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.5194156573854414, |
|
"grad_norm": 0.0380859375, |
|
"learning_rate": 2.7676812453836617e-06, |
|
"logits/chosen": 0.2172623872756958, |
|
"logits/rejected": 1.389795184135437, |
|
"logps/chosen": -13.409383773803711, |
|
"logps/rejected": -1729.2418212890625, |
|
"loss": 0.2143, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.29702064394950867, |
|
"rewards/margins": 16.85504722595215, |
|
"rewards/rejected": -16.558027267456055, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.5244100387064552, |
|
"grad_norm": 0.059814453125, |
|
"learning_rate": 2.724284414976672e-06, |
|
"logits/chosen": 0.194356769323349, |
|
"logits/rejected": 1.3346575498580933, |
|
"logps/chosen": -13.100080490112305, |
|
"logps/rejected": -1775.0599365234375, |
|
"loss": 0.2141, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3071358799934387, |
|
"rewards/margins": 17.33095359802246, |
|
"rewards/rejected": -17.023818969726562, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.5294044200274691, |
|
"grad_norm": 0.0322265625, |
|
"learning_rate": 2.6808193412092823e-06, |
|
"logits/chosen": 0.27043357491493225, |
|
"logits/rejected": 1.2958372831344604, |
|
"logps/chosen": -13.240577697753906, |
|
"logps/rejected": -1308.380615234375, |
|
"loss": 0.212, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3128414452075958, |
|
"rewards/margins": 12.567632675170898, |
|
"rewards/rejected": -12.254792213439941, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.5343988013484829, |
|
"grad_norm": 0.018310546875, |
|
"learning_rate": 2.637299249265659e-06, |
|
"logits/chosen": 0.24779090285301208, |
|
"logits/rejected": 1.3237859010696411, |
|
"logps/chosen": -13.30639934539795, |
|
"logps/rejected": -1560.58740234375, |
|
"loss": 0.2129, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3085622787475586, |
|
"rewards/margins": 15.089482307434082, |
|
"rewards/rejected": -14.780920028686523, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.5393931826694968, |
|
"grad_norm": 0.049560546875, |
|
"learning_rate": 2.5937373810704352e-06, |
|
"logits/chosen": 0.20865114033222198, |
|
"logits/rejected": 1.3283964395523071, |
|
"logps/chosen": -12.944803237915039, |
|
"logps/rejected": -1628.0531005859375, |
|
"loss": 0.213, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3075375556945801, |
|
"rewards/margins": 15.755208969116211, |
|
"rewards/rejected": -15.447671890258789, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.5443875639905107, |
|
"grad_norm": 0.0247802734375, |
|
"learning_rate": 2.550146991259565e-06, |
|
"logits/chosen": 0.2642405331134796, |
|
"logits/rejected": 1.330570936203003, |
|
"logps/chosen": -12.811630249023438, |
|
"logps/rejected": -1518.474609375, |
|
"loss": 0.213, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.309398889541626, |
|
"rewards/margins": 14.794235229492188, |
|
"rewards/rejected": -14.484835624694824, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.5493819453115245, |
|
"grad_norm": 0.0252685546875, |
|
"learning_rate": 2.5065413431473196e-06, |
|
"logits/chosen": 0.22386522591114044, |
|
"logits/rejected": 1.3922302722930908, |
|
"logps/chosen": -13.141294479370117, |
|
"logps/rejected": -1650.009765625, |
|
"loss": 0.2129, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.30252230167388916, |
|
"rewards/margins": 16.12454605102539, |
|
"rewards/rejected": -15.822023391723633, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.5543763266325384, |
|
"grad_norm": 0.017822265625, |
|
"learning_rate": 2.462933704690635e-06, |
|
"logits/chosen": 0.23435406386852264, |
|
"logits/rejected": 1.2522070407867432, |
|
"logps/chosen": -13.335357666015625, |
|
"logps/rejected": -1444.448974609375, |
|
"loss": 0.2132, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3048686385154724, |
|
"rewards/margins": 13.933290481567383, |
|
"rewards/rejected": -13.62842082977295, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.5593707079535523, |
|
"grad_norm": 0.0159912109375, |
|
"learning_rate": 2.4193373444520558e-06, |
|
"logits/chosen": 0.23952054977416992, |
|
"logits/rejected": 1.466230869293213, |
|
"logps/chosen": -13.09483528137207, |
|
"logps/rejected": -1737.9251708984375, |
|
"loss": 0.213, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3013126254081726, |
|
"rewards/margins": 16.979856491088867, |
|
"rewards/rejected": -16.67854118347168, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.5643650892745661, |
|
"grad_norm": 0.0277099609375, |
|
"learning_rate": 2.3757655275624826e-06, |
|
"logits/chosen": 0.20145916938781738, |
|
"logits/rejected": 1.4154694080352783, |
|
"logps/chosen": -12.834500312805176, |
|
"logps/rejected": -1683.4241943359375, |
|
"loss": 0.2142, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.29736918210983276, |
|
"rewards/margins": 16.437068939208984, |
|
"rewards/rejected": -16.13970184326172, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.56935947059558, |
|
"grad_norm": 0.0235595703125, |
|
"learning_rate": 2.3322315116849747e-06, |
|
"logits/chosen": 0.18402531743049622, |
|
"logits/rejected": 1.3126466274261475, |
|
"logps/chosen": -13.314038276672363, |
|
"logps/rejected": -1694.0882568359375, |
|
"loss": 0.2131, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3090851306915283, |
|
"rewards/margins": 16.457656860351562, |
|
"rewards/rejected": -16.14857292175293, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.5743538519165938, |
|
"grad_norm": 0.0208740234375, |
|
"learning_rate": 2.2887485429808213e-06, |
|
"logits/chosen": 0.24247586727142334, |
|
"logits/rejected": 1.3415312767028809, |
|
"logps/chosen": -13.364187240600586, |
|
"logps/rejected": -1531.287353515625, |
|
"loss": 0.2132, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.31205669045448303, |
|
"rewards/margins": 14.862896919250488, |
|
"rewards/rejected": -14.5508394241333, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.5793482332376076, |
|
"grad_norm": 0.0257568359375, |
|
"learning_rate": 2.245329852079109e-06, |
|
"logits/chosen": 0.2564612329006195, |
|
"logits/rejected": 1.3275741338729858, |
|
"logps/chosen": -12.749259948730469, |
|
"logps/rejected": -1410.6912841796875, |
|
"loss": 0.2134, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3032976984977722, |
|
"rewards/margins": 13.754470825195312, |
|
"rewards/rejected": -13.451173782348633, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.5843426145586216, |
|
"grad_norm": 0.02099609375, |
|
"learning_rate": 2.2019886500510197e-06, |
|
"logits/chosen": 0.234290212392807, |
|
"logits/rejected": 1.3947325944900513, |
|
"logps/chosen": -12.72685432434082, |
|
"logps/rejected": -1736.8687744140625, |
|
"loss": 0.2138, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2989721894264221, |
|
"rewards/margins": 16.979671478271484, |
|
"rewards/rejected": -16.68069839477539, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.5893369958796354, |
|
"grad_norm": 0.037353515625, |
|
"learning_rate": 2.1587381243900777e-06, |
|
"logits/chosen": 0.26597389578819275, |
|
"logits/rejected": 1.3139584064483643, |
|
"logps/chosen": -14.154109001159668, |
|
"logps/rejected": -1483.2686767578125, |
|
"loss": 0.2125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3113531470298767, |
|
"rewards/margins": 14.383076667785645, |
|
"rewards/rejected": -14.071722030639648, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.5943313772006493, |
|
"grad_norm": 0.0233154296875, |
|
"learning_rate": 2.115591434999573e-06, |
|
"logits/chosen": 0.277686208486557, |
|
"logits/rejected": 1.401039719581604, |
|
"logps/chosen": -12.76582145690918, |
|
"logps/rejected": -1506.6636962890625, |
|
"loss": 0.2132, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.30175262689590454, |
|
"rewards/margins": 14.693206787109375, |
|
"rewards/rejected": -14.391454696655273, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.5993257585216631, |
|
"grad_norm": 0.0294189453125, |
|
"learning_rate": 2.0725617101883726e-06, |
|
"logits/chosen": 0.25775861740112305, |
|
"logits/rejected": 1.345365285873413, |
|
"logps/chosen": -12.69609260559082, |
|
"logps/rejected": -1601.963623046875, |
|
"loss": 0.2147, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2992851138114929, |
|
"rewards/margins": 15.617083549499512, |
|
"rewards/rejected": -15.317797660827637, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.604320139842677, |
|
"grad_norm": 0.09033203125, |
|
"learning_rate": 2.0296620426763545e-06, |
|
"logits/chosen": 0.14509257674217224, |
|
"logits/rejected": 1.3586305379867554, |
|
"logps/chosen": -13.326802253723145, |
|
"logps/rejected": -1893.167724609375, |
|
"loss": 0.2128, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.31738612055778503, |
|
"rewards/margins": 18.294166564941406, |
|
"rewards/rejected": -17.97677993774414, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.6093145211636909, |
|
"grad_norm": 0.0206298828125, |
|
"learning_rate": 1.9869054856106628e-06, |
|
"logits/chosen": 0.2093639373779297, |
|
"logits/rejected": 1.3707678318023682, |
|
"logps/chosen": -13.017126083374023, |
|
"logps/rejected": -1714.967529296875, |
|
"loss": 0.213, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.305789053440094, |
|
"rewards/margins": 16.755239486694336, |
|
"rewards/rejected": -16.449451446533203, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.6143089024847047, |
|
"grad_norm": 0.027587890625, |
|
"learning_rate": 1.9443050485940118e-06, |
|
"logits/chosen": 0.29796817898750305, |
|
"logits/rejected": 1.4095687866210938, |
|
"logps/chosen": -13.055410385131836, |
|
"logps/rejected": -1501.1917724609375, |
|
"loss": 0.2132, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.30659452080726624, |
|
"rewards/margins": 14.630342483520508, |
|
"rewards/rejected": -14.323748588562012, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.6193032838057185, |
|
"grad_norm": 0.0205078125, |
|
"learning_rate": 1.9018736937262271e-06, |
|
"logits/chosen": 0.20551720261573792, |
|
"logits/rejected": 1.3722885847091675, |
|
"logps/chosen": -13.263589859008789, |
|
"logps/rejected": -1644.015380859375, |
|
"loss": 0.213, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3152340054512024, |
|
"rewards/margins": 15.940661430358887, |
|
"rewards/rejected": -15.62542724609375, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.6242976651267325, |
|
"grad_norm": 0.025146484375, |
|
"learning_rate": 1.859624331660253e-06, |
|
"logits/chosen": 0.19998934864997864, |
|
"logits/rejected": 1.380974531173706, |
|
"logps/chosen": -12.843725204467773, |
|
"logps/rejected": -1903.5989990234375, |
|
"loss": 0.2149, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.293190598487854, |
|
"rewards/margins": 18.591859817504883, |
|
"rewards/rejected": -18.298667907714844, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.6292920464477463, |
|
"grad_norm": 0.0184326171875, |
|
"learning_rate": 1.817569817673806e-06, |
|
"logits/chosen": 0.20200982689857483, |
|
"logits/rejected": 1.3313050270080566, |
|
"logps/chosen": -13.815347671508789, |
|
"logps/rejected": -1682.6148681640625, |
|
"loss": 0.2137, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3007175624370575, |
|
"rewards/margins": 16.28915786743164, |
|
"rewards/rejected": -15.988439559936523, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.6342864277687601, |
|
"grad_norm": 0.0264892578125, |
|
"learning_rate": 1.7757229477578824e-06, |
|
"logits/chosen": 0.2238602340221405, |
|
"logits/rejected": 1.3182499408721924, |
|
"logps/chosen": -13.298685073852539, |
|
"logps/rejected": -1689.486328125, |
|
"loss": 0.2124, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.31393638253211975, |
|
"rewards/margins": 16.413013458251953, |
|
"rewards/rejected": -16.099077224731445, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.639280809089774, |
|
"grad_norm": 0.0120849609375, |
|
"learning_rate": 1.7340964547232993e-06, |
|
"logits/chosen": 0.23566928505897522, |
|
"logits/rejected": 1.3794082403182983, |
|
"logps/chosen": -12.66304874420166, |
|
"logps/rejected": -1540.5882568359375, |
|
"loss": 0.2122, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.30794182419776917, |
|
"rewards/margins": 15.042073249816895, |
|
"rewards/rejected": -14.734130859375, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.6442751904107878, |
|
"grad_norm": 0.0233154296875, |
|
"learning_rate": 1.6927030043264656e-06, |
|
"logits/chosen": 0.29966339468955994, |
|
"logits/rejected": 1.3575140237808228, |
|
"logps/chosen": -12.536134719848633, |
|
"logps/rejected": -1450.3671875, |
|
"loss": 0.2131, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3003261685371399, |
|
"rewards/margins": 14.137969970703125, |
|
"rewards/rejected": -13.83764362335205, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.6492695717318018, |
|
"grad_norm": 0.01904296875, |
|
"learning_rate": 1.6515551914155522e-06, |
|
"logits/chosen": 0.21864613890647888, |
|
"logits/rejected": 1.2960518598556519, |
|
"logps/chosen": -13.931121826171875, |
|
"logps/rejected": -1731.0433349609375, |
|
"loss": 0.2135, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.29954832792282104, |
|
"rewards/margins": 16.782882690429688, |
|
"rewards/rejected": -16.483333587646484, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.6542639530528156, |
|
"grad_norm": 0.027587890625, |
|
"learning_rate": 1.6106655360982376e-06, |
|
"logits/chosen": 0.11391136795282364, |
|
"logits/rejected": 1.1829859018325806, |
|
"logps/chosen": -13.162788391113281, |
|
"logps/rejected": -1799.5191650390625, |
|
"loss": 0.2121, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3114808201789856, |
|
"rewards/margins": 17.426467895507812, |
|
"rewards/rejected": -17.114986419677734, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.6592583343738294, |
|
"grad_norm": 0.126953125, |
|
"learning_rate": 1.570046479932196e-06, |
|
"logits/chosen": 0.25235381722450256, |
|
"logits/rejected": 1.2799979448318481, |
|
"logps/chosen": -13.60442066192627, |
|
"logps/rejected": -1395.318115234375, |
|
"loss": 0.213, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3034665286540985, |
|
"rewards/margins": 13.48466968536377, |
|
"rewards/rejected": -13.18120288848877, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.6642527156948433, |
|
"grad_norm": 0.0306396484375, |
|
"learning_rate": 1.5297103821394876e-06, |
|
"logits/chosen": 0.2604614198207855, |
|
"logits/rejected": 1.4426438808441162, |
|
"logps/chosen": -12.831866264343262, |
|
"logps/rejected": -1725.9541015625, |
|
"loss": 0.2136, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.30009177327156067, |
|
"rewards/margins": 16.85196876525879, |
|
"rewards/rejected": -16.551877975463867, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.6692470970158572, |
|
"grad_norm": 0.0291748046875, |
|
"learning_rate": 1.489669515845995e-06, |
|
"logits/chosen": 0.19801196455955505, |
|
"logits/rejected": 1.246242642402649, |
|
"logps/chosen": -12.573989868164062, |
|
"logps/rejected": -1484.0341796875, |
|
"loss": 0.2136, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3015730679035187, |
|
"rewards/margins": 14.43646240234375, |
|
"rewards/rejected": -14.134889602661133, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.674241478336871, |
|
"grad_norm": 0.020751953125, |
|
"learning_rate": 1.449936064347065e-06, |
|
"logits/chosen": 0.24275144934654236, |
|
"logits/rejected": 1.3506710529327393, |
|
"logps/chosen": -12.805200576782227, |
|
"logps/rejected": -1675.633544921875, |
|
"loss": 0.2131, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3000423312187195, |
|
"rewards/margins": 16.352081298828125, |
|
"rewards/rejected": -16.052040100097656, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.6792358596578849, |
|
"grad_norm": 0.01483154296875, |
|
"learning_rate": 1.4105221174004771e-06, |
|
"logits/chosen": 0.18348607420921326, |
|
"logits/rejected": 1.3507370948791504, |
|
"logps/chosen": -13.73768424987793, |
|
"logps/rejected": -1898.157470703125, |
|
"loss": 0.2124, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.309729665517807, |
|
"rewards/margins": 18.553356170654297, |
|
"rewards/rejected": -18.24362564086914, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.6842302409788987, |
|
"grad_norm": 0.03515625, |
|
"learning_rate": 1.3714396675478714e-06, |
|
"logits/chosen": 0.29044079780578613, |
|
"logits/rejected": 1.34650719165802, |
|
"logps/chosen": -12.877673149108887, |
|
"logps/rejected": -1523.441162109375, |
|
"loss": 0.2132, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2951946556568146, |
|
"rewards/margins": 14.83436107635498, |
|
"rewards/rejected": -14.539166450500488, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.6892246222999125, |
|
"grad_norm": 0.0115966796875, |
|
"learning_rate": 1.332700606465766e-06, |
|
"logits/chosen": 0.18918542563915253, |
|
"logits/rejected": 1.3702523708343506, |
|
"logps/chosen": -13.452165603637695, |
|
"logps/rejected": -1564.6370849609375, |
|
"loss": 0.2125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3073347210884094, |
|
"rewards/margins": 15.239773750305176, |
|
"rewards/rejected": -14.9324369430542, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.6942190036209265, |
|
"grad_norm": 0.034423828125, |
|
"learning_rate": 1.294316721347254e-06, |
|
"logits/chosen": 0.23732297122478485, |
|
"logits/rejected": 1.3234728574752808, |
|
"logps/chosen": -13.713842391967773, |
|
"logps/rejected": -1537.1632080078125, |
|
"loss": 0.2131, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.30026909708976746, |
|
"rewards/margins": 14.980550765991211, |
|
"rewards/rejected": -14.680282592773438, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.6992133849419403, |
|
"grad_norm": 0.07080078125, |
|
"learning_rate": 1.2562996913154952e-06, |
|
"logits/chosen": 0.150472030043602, |
|
"logits/rejected": 1.4298745393753052, |
|
"logps/chosen": -12.643513679504395, |
|
"logps/rejected": -2120.697509765625, |
|
"loss": 0.2133, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.30036434531211853, |
|
"rewards/margins": 20.737525939941406, |
|
"rewards/rejected": -20.43716049194336, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.7042077662629542, |
|
"grad_norm": 0.034423828125, |
|
"learning_rate": 1.2186610838700958e-06, |
|
"logits/chosen": 0.30068179965019226, |
|
"logits/rejected": 1.335126280784607, |
|
"logps/chosen": -12.845601081848145, |
|
"logps/rejected": -1345.099365234375, |
|
"loss": 0.2143, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2946023643016815, |
|
"rewards/margins": 13.083051681518555, |
|
"rewards/rejected": -12.788450241088867, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.709202147583968, |
|
"grad_norm": 0.033447265625, |
|
"learning_rate": 1.1814123513674465e-06, |
|
"logits/chosen": 0.18157488107681274, |
|
"logits/rejected": 1.380326509475708, |
|
"logps/chosen": -13.374841690063477, |
|
"logps/rejected": -1741.3206787109375, |
|
"loss": 0.2124, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.30905359983444214, |
|
"rewards/margins": 16.993389129638672, |
|
"rewards/rejected": -16.684337615966797, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.7141965289049819, |
|
"grad_norm": 0.036865234375, |
|
"learning_rate": 1.1445648275360925e-06, |
|
"logits/chosen": 0.19126050174236298, |
|
"logits/rejected": 1.4904680252075195, |
|
"logps/chosen": -13.341898918151855, |
|
"logps/rejected": -1982.956298828125, |
|
"loss": 0.2139, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.30447131395339966, |
|
"rewards/margins": 19.409332275390625, |
|
"rewards/rejected": -19.104862213134766, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.7191909102259958, |
|
"grad_norm": 0.03955078125, |
|
"learning_rate": 1.1081297240282077e-06, |
|
"logits/chosen": 0.2438248097896576, |
|
"logits/rejected": 1.3988474607467651, |
|
"logps/chosen": -13.187724113464355, |
|
"logps/rejected": -1571.8028564453125, |
|
"loss": 0.2139, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.29910221695899963, |
|
"rewards/margins": 15.328478813171387, |
|
"rewards/rejected": -15.029376029968262, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.7241852915470096, |
|
"grad_norm": 0.0235595703125, |
|
"learning_rate": 1.0721181270082061e-06, |
|
"logits/chosen": 0.20241305232048035, |
|
"logits/rejected": 1.308318018913269, |
|
"logps/chosen": -12.698432922363281, |
|
"logps/rejected": -1740.4573974609375, |
|
"loss": 0.2125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.313620924949646, |
|
"rewards/margins": 16.896198272705078, |
|
"rewards/rejected": -16.582576751708984, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.7291796728680234, |
|
"grad_norm": 0.0164794921875, |
|
"learning_rate": 1.0365409937795385e-06, |
|
"logits/chosen": 0.20683518052101135, |
|
"logits/rejected": 1.2756832838058472, |
|
"logps/chosen": -13.767419815063477, |
|
"logps/rejected": -1547.2147216796875, |
|
"loss": 0.2129, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3099841773509979, |
|
"rewards/margins": 14.946782112121582, |
|
"rewards/rejected": -14.636796951293945, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.7341740541890374, |
|
"grad_norm": 0.0263671875, |
|
"learning_rate": 1.0014091494506962e-06, |
|
"logits/chosen": 0.17677463591098785, |
|
"logits/rejected": 1.3411346673965454, |
|
"logps/chosen": -13.297311782836914, |
|
"logps/rejected": -1913.2984619140625, |
|
"loss": 0.2128, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3021236062049866, |
|
"rewards/margins": 18.699064254760742, |
|
"rewards/rejected": -18.396942138671875, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.7391684355100512, |
|
"grad_norm": 0.033203125, |
|
"learning_rate": 9.667332836414368e-07, |
|
"logits/chosen": 0.15931569039821625, |
|
"logits/rejected": 1.2274577617645264, |
|
"logps/chosen": -13.343734741210938, |
|
"logps/rejected": -1609.459228515625, |
|
"loss": 0.2133, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3135462999343872, |
|
"rewards/margins": 15.410505294799805, |
|
"rewards/rejected": -15.096961975097656, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.744162816831065, |
|
"grad_norm": 0.013427734375, |
|
"learning_rate": 9.325239472302422e-07, |
|
"logits/chosen": 0.25666847825050354, |
|
"logits/rejected": 1.4118614196777344, |
|
"logps/chosen": -13.085573196411133, |
|
"logps/rejected": -1693.067626953125, |
|
"loss": 0.2127, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.30241408944129944, |
|
"rewards/margins": 16.529926300048828, |
|
"rewards/rejected": -16.227514266967773, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.7491571981520789, |
|
"grad_norm": 0.032958984375, |
|
"learning_rate": 8.987915491439844e-07, |
|
"logits/chosen": 0.2501397132873535, |
|
"logits/rejected": 1.383455514907837, |
|
"logps/chosen": -12.903341293334961, |
|
"logps/rejected": -1736.0384521484375, |
|
"loss": 0.2143, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3011986017227173, |
|
"rewards/margins": 16.931396484375, |
|
"rewards/rejected": -16.630199432373047, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.7541515794730927, |
|
"grad_norm": 0.021240234375, |
|
"learning_rate": 8.655463531907823e-07, |
|
"logits/chosen": 0.2224547117948532, |
|
"logits/rejected": 1.3173682689666748, |
|
"logps/chosen": -13.032022476196289, |
|
"logps/rejected": -1771.166259765625, |
|
"loss": 0.2132, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.29998722672462463, |
|
"rewards/margins": 17.284591674804688, |
|
"rewards/rejected": -16.984607696533203, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.7591459607941067, |
|
"grad_norm": 0.014404296875, |
|
"learning_rate": 8.327984749370227e-07, |
|
"logits/chosen": 0.2447456419467926, |
|
"logits/rejected": 1.344362497329712, |
|
"logps/chosen": -12.957304000854492, |
|
"logps/rejected": -1557.052978515625, |
|
"loss": 0.2142, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.29349425435066223, |
|
"rewards/margins": 15.1736478805542, |
|
"rewards/rejected": -14.880154609680176, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.7641403421151205, |
|
"grad_norm": 0.0218505859375, |
|
"learning_rate": 8.005578786294782e-07, |
|
"logits/chosen": 0.1744759976863861, |
|
"logits/rejected": 1.3996227979660034, |
|
"logps/chosen": -13.042366027832031, |
|
"logps/rejected": -1864.2431640625, |
|
"loss": 0.2122, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.30568575859069824, |
|
"rewards/margins": 18.201250076293945, |
|
"rewards/rejected": -17.89556312561035, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.7691347234361343, |
|
"grad_norm": 0.012939453125, |
|
"learning_rate": 7.688343741634702e-07, |
|
"logits/chosen": 0.22156497836112976, |
|
"logits/rejected": 1.3012608289718628, |
|
"logps/chosen": -12.66821575164795, |
|
"logps/rejected": -1628.963134765625, |
|
"loss": 0.2137, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2943740785121918, |
|
"rewards/margins": 15.8633394241333, |
|
"rewards/rejected": -15.568964958190918, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.7741291047571482, |
|
"grad_norm": 0.03662109375, |
|
"learning_rate": 7.376376140980001e-07, |
|
"logits/chosen": 0.1970866173505783, |
|
"logits/rejected": 1.2925320863723755, |
|
"logps/chosen": -12.667881965637207, |
|
"logps/rejected": -1566.008056640625, |
|
"loss": 0.2128, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3171369433403015, |
|
"rewards/margins": 15.188334465026855, |
|
"rewards/rejected": -14.871198654174805, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.7791234860781621, |
|
"grad_norm": 0.0625, |
|
"learning_rate": 7.069770907187465e-07, |
|
"logits/chosen": 0.20419040322303772, |
|
"logits/rejected": 1.306980013847351, |
|
"logps/chosen": -13.543539047241211, |
|
"logps/rejected": -1506.096923828125, |
|
"loss": 0.2137, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3025529384613037, |
|
"rewards/margins": 14.58825397491455, |
|
"rewards/rejected": -14.285697937011719, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.7841178673991759, |
|
"grad_norm": 0.01483154296875, |
|
"learning_rate": 6.768621331498371e-07, |
|
"logits/chosen": 0.22659845650196075, |
|
"logits/rejected": 1.3488702774047852, |
|
"logps/chosen": -12.837379455566406, |
|
"logps/rejected": -1573.134033203125, |
|
"loss": 0.2131, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.29855865240097046, |
|
"rewards/margins": 15.335705757141113, |
|
"rewards/rejected": -15.037145614624023, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.7891122487201898, |
|
"grad_norm": 0.049560546875, |
|
"learning_rate": 6.473019045152593e-07, |
|
"logits/chosen": 0.22067594528198242, |
|
"logits/rejected": 1.39100980758667, |
|
"logps/chosen": -12.964533805847168, |
|
"logps/rejected": -1819.2757568359375, |
|
"loss": 0.2124, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3059755563735962, |
|
"rewards/margins": 17.78072738647461, |
|
"rewards/rejected": -17.474750518798828, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.7941066300412036, |
|
"grad_norm": 0.0159912109375, |
|
"learning_rate": 6.183053991507818e-07, |
|
"logits/chosen": 0.18515101075172424, |
|
"logits/rejected": 1.2657784223556519, |
|
"logps/chosen": -13.701101303100586, |
|
"logps/rejected": -1632.0228271484375, |
|
"loss": 0.2135, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.30029112100601196, |
|
"rewards/margins": 15.897903442382812, |
|
"rewards/rejected": -15.597612380981445, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.7991010113622175, |
|
"grad_norm": 0.035400390625, |
|
"learning_rate": 5.898814398672376e-07, |
|
"logits/chosen": 0.2673841416835785, |
|
"logits/rejected": 1.404524326324463, |
|
"logps/chosen": -12.89787769317627, |
|
"logps/rejected": -1488.13818359375, |
|
"loss": 0.2137, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2999451160430908, |
|
"rewards/margins": 14.503445625305176, |
|
"rewards/rejected": -14.203500747680664, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.8040953926832314, |
|
"grad_norm": 0.028564453125, |
|
"learning_rate": 5.620386752659912e-07, |
|
"logits/chosen": 0.20275497436523438, |
|
"logits/rejected": 1.2962656021118164, |
|
"logps/chosen": -14.060361862182617, |
|
"logps/rejected": -1563.873046875, |
|
"loss": 0.2123, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3067697286605835, |
|
"rewards/margins": 15.22687816619873, |
|
"rewards/rejected": -14.920109748840332, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.8090897740042452, |
|
"grad_norm": 0.031494140625, |
|
"learning_rate": 5.347855771074157e-07, |
|
"logits/chosen": 0.22789278626441956, |
|
"logits/rejected": 1.4333293437957764, |
|
"logps/chosen": -12.939372062683105, |
|
"logps/rejected": -1751.370361328125, |
|
"loss": 0.2136, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2999444007873535, |
|
"rewards/margins": 17.115280151367188, |
|
"rewards/rejected": -16.81533432006836, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.8140841553252591, |
|
"grad_norm": 0.031005859375, |
|
"learning_rate": 5.081304377331786e-07, |
|
"logits/chosen": 0.27506810426712036, |
|
"logits/rejected": 1.2999309301376343, |
|
"logps/chosen": -13.067828178405762, |
|
"logps/rejected": -1445.669677734375, |
|
"loss": 0.2139, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.30137357115745544, |
|
"rewards/margins": 14.080915451049805, |
|
"rewards/rejected": -13.779541015625, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.819078536646273, |
|
"grad_norm": 0.03369140625, |
|
"learning_rate": 4.820813675431186e-07, |
|
"logits/chosen": 0.15463611483573914, |
|
"logits/rejected": 1.3889650106430054, |
|
"logps/chosen": -13.349421501159668, |
|
"logps/rejected": -1812.4769287109375, |
|
"loss": 0.2136, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3092433512210846, |
|
"rewards/margins": 17.648366928100586, |
|
"rewards/rejected": -17.33912467956543, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.8240729179672868, |
|
"grad_norm": 0.0242919921875, |
|
"learning_rate": 4.5664629252747865e-07, |
|
"logits/chosen": 0.21123354136943817, |
|
"logits/rejected": 1.3822557926177979, |
|
"logps/chosen": -12.89905834197998, |
|
"logps/rejected": -1771.5474853515625, |
|
"loss": 0.213, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.29551658034324646, |
|
"rewards/margins": 17.291709899902344, |
|
"rewards/rejected": -16.99619483947754, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.8290672992883007, |
|
"grad_norm": 0.021484375, |
|
"learning_rate": 4.3183295185525746e-07, |
|
"logits/chosen": 0.17760224640369415, |
|
"logits/rejected": 1.320516586303711, |
|
"logps/chosen": -12.863115310668945, |
|
"logps/rejected": -1791.6168212890625, |
|
"loss": 0.2129, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3116030693054199, |
|
"rewards/margins": 17.404626846313477, |
|
"rewards/rejected": -17.09302520751953, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.8340616806093145, |
|
"grad_norm": 0.0274658203125, |
|
"learning_rate": 4.0764889551939773e-07, |
|
"logits/chosen": 0.19689543545246124, |
|
"logits/rejected": 1.3633372783660889, |
|
"logps/chosen": -12.999624252319336, |
|
"logps/rejected": -1783.937255859375, |
|
"loss": 0.2131, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.30732038617134094, |
|
"rewards/margins": 17.336694717407227, |
|
"rewards/rejected": -17.029375076293945, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.8390560619303283, |
|
"grad_norm": 0.02392578125, |
|
"learning_rate": 3.8410148203953916e-07, |
|
"logits/chosen": 0.20565366744995117, |
|
"logits/rejected": 1.2732315063476562, |
|
"logps/chosen": -13.205337524414062, |
|
"logps/rejected": -1707.686279296875, |
|
"loss": 0.2141, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.29469722509384155, |
|
"rewards/margins": 16.513946533203125, |
|
"rewards/rejected": -16.219249725341797, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.8440504432513423, |
|
"grad_norm": 0.01904296875, |
|
"learning_rate": 3.611978762230306e-07, |
|
"logits/chosen": 0.2300119698047638, |
|
"logits/rejected": 1.3706471920013428, |
|
"logps/chosen": -12.709399223327637, |
|
"logps/rejected": -1650.5601806640625, |
|
"loss": 0.2126, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.30553561449050903, |
|
"rewards/margins": 16.097576141357422, |
|
"rewards/rejected": -15.79203987121582, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.8490448245723561, |
|
"grad_norm": 0.031982421875, |
|
"learning_rate": 3.389450469848821e-07, |
|
"logits/chosen": 0.26923322677612305, |
|
"logits/rejected": 1.3886299133300781, |
|
"logps/chosen": -12.681255340576172, |
|
"logps/rejected": -1633.20361328125, |
|
"loss": 0.2138, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3012009263038635, |
|
"rewards/margins": 15.944944381713867, |
|
"rewards/rejected": -15.643745422363281, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.85403920589337, |
|
"grad_norm": 0.015869140625, |
|
"learning_rate": 3.173497652273241e-07, |
|
"logits/chosen": 0.22611021995544434, |
|
"logits/rejected": 1.4598513841629028, |
|
"logps/chosen": -13.163076400756836, |
|
"logps/rejected": -1702.979736328125, |
|
"loss": 0.2126, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3090182840824127, |
|
"rewards/margins": 16.64162826538086, |
|
"rewards/rejected": -16.332609176635742, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.8590335872143838, |
|
"grad_norm": 0.0257568359375, |
|
"learning_rate": 2.964186017796153e-07, |
|
"logits/chosen": 0.23432429134845734, |
|
"logits/rejected": 1.3954203128814697, |
|
"logps/chosen": -12.975980758666992, |
|
"logps/rejected": -1669.3160400390625, |
|
"loss": 0.2133, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.29741281270980835, |
|
"rewards/margins": 16.287899017333984, |
|
"rewards/rejected": -15.990484237670898, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.8640279685353977, |
|
"grad_norm": 0.020263671875, |
|
"learning_rate": 2.761579253987226e-07, |
|
"logits/chosen": 0.24720034003257751, |
|
"logits/rejected": 1.284588098526001, |
|
"logps/chosen": -13.461567878723145, |
|
"logps/rejected": -1476.7037353515625, |
|
"loss": 0.2131, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3023452162742615, |
|
"rewards/margins": 14.363668441772461, |
|
"rewards/rejected": -14.061323165893555, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.8690223498564116, |
|
"grad_norm": 0.04541015625, |
|
"learning_rate": 2.565739008314944e-07, |
|
"logits/chosen": 0.25941091775894165, |
|
"logits/rejected": 1.3293951749801636, |
|
"logps/chosen": -12.768040657043457, |
|
"logps/rejected": -1509.4613037109375, |
|
"loss": 0.2144, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2932388186454773, |
|
"rewards/margins": 14.711868286132812, |
|
"rewards/rejected": -14.418627738952637, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.8740167311774254, |
|
"grad_norm": 0.0240478515625, |
|
"learning_rate": 2.3767248693890106e-07, |
|
"logits/chosen": 0.2348676472902298, |
|
"logits/rejected": 1.3176116943359375, |
|
"logps/chosen": -13.70958137512207, |
|
"logps/rejected": -1628.524658203125, |
|
"loss": 0.2137, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3072062134742737, |
|
"rewards/margins": 15.790201187133789, |
|
"rewards/rejected": -15.482992172241211, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.8790111124984392, |
|
"grad_norm": 0.0849609375, |
|
"learning_rate": 2.1945943488292265e-07, |
|
"logits/chosen": 0.14879265427589417, |
|
"logits/rejected": 1.2918832302093506, |
|
"logps/chosen": -14.206727981567383, |
|
"logps/rejected": -1858.029052734375, |
|
"loss": 0.2125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3139779269695282, |
|
"rewards/margins": 18.043270111083984, |
|
"rewards/rejected": -17.729291915893555, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.8840054938194531, |
|
"grad_norm": 0.019775390625, |
|
"learning_rate": 2.0194028637663733e-07, |
|
"logits/chosen": 0.2688780426979065, |
|
"logits/rejected": 1.2982268333435059, |
|
"logps/chosen": -13.01366901397705, |
|
"logps/rejected": -1392.9710693359375, |
|
"loss": 0.213, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3066830039024353, |
|
"rewards/margins": 13.560384750366211, |
|
"rewards/rejected": -13.253702163696289, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.888999875140467, |
|
"grad_norm": 0.0380859375, |
|
"learning_rate": 1.851203719980324e-07, |
|
"logits/chosen": 0.10393796861171722, |
|
"logits/rejected": 1.3042396306991577, |
|
"logps/chosen": -13.014623641967773, |
|
"logps/rejected": -1894.3724365234375, |
|
"loss": 0.2117, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3259292542934418, |
|
"rewards/margins": 18.341753005981445, |
|
"rewards/rejected": -18.015825271606445, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.8939942564614808, |
|
"grad_norm": 0.0250244140625, |
|
"learning_rate": 1.6900480956806214e-07, |
|
"logits/chosen": 0.14930710196495056, |
|
"logits/rejected": 1.24697744846344, |
|
"logps/chosen": -13.011159896850586, |
|
"logps/rejected": -1752.00390625, |
|
"loss": 0.2127, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.30487823486328125, |
|
"rewards/margins": 17.018848419189453, |
|
"rewards/rejected": -16.713970184326172, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.8989886377824947, |
|
"grad_norm": 0.0159912109375, |
|
"learning_rate": 1.5359850259344223e-07, |
|
"logits/chosen": 0.19253353774547577, |
|
"logits/rejected": 1.2840015888214111, |
|
"logps/chosen": -13.470372200012207, |
|
"logps/rejected": -1626.4345703125, |
|
"loss": 0.2143, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3020302951335907, |
|
"rewards/margins": 15.777534484863281, |
|
"rewards/rejected": -15.475504875183105, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.9039830191035085, |
|
"grad_norm": 0.0238037109375, |
|
"learning_rate": 1.3890613877465127e-07, |
|
"logits/chosen": 0.236587792634964, |
|
"logits/rejected": 1.3434031009674072, |
|
"logps/chosen": -13.017538070678711, |
|
"logps/rejected": -1619.8177490234375, |
|
"loss": 0.2148, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.29720932245254517, |
|
"rewards/margins": 15.795916557312012, |
|
"rewards/rejected": -15.498708724975586, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.9089774004245225, |
|
"grad_norm": 0.064453125, |
|
"learning_rate": 1.249321885795954e-07, |
|
"logits/chosen": 0.23312029242515564, |
|
"logits/rejected": 1.218126893043518, |
|
"logps/chosen": -13.364558219909668, |
|
"logps/rejected": -1396.6041259765625, |
|
"loss": 0.214, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.30306655168533325, |
|
"rewards/margins": 13.558195114135742, |
|
"rewards/rejected": -13.255128860473633, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.9139717817455363, |
|
"grad_norm": 0.02001953125, |
|
"learning_rate": 1.1168090388337577e-07, |
|
"logits/chosen": 0.289134681224823, |
|
"logits/rejected": 1.3270288705825806, |
|
"logps/chosen": -12.729695320129395, |
|
"logps/rejected": -1422.671142578125, |
|
"loss": 0.2137, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3039693832397461, |
|
"rewards/margins": 13.834306716918945, |
|
"rewards/rejected": -13.5303373336792, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.9189661630665501, |
|
"grad_norm": 0.0257568359375, |
|
"learning_rate": 9.915631667455989e-08, |
|
"logits/chosen": 0.23302344977855682, |
|
"logits/rejected": 1.3944863080978394, |
|
"logps/chosen": -13.118586540222168, |
|
"logps/rejected": -1715.78515625, |
|
"loss": 0.2139, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.29517239332199097, |
|
"rewards/margins": 16.75180435180664, |
|
"rewards/rejected": -16.45663070678711, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.923960544387564, |
|
"grad_norm": 0.025634765625, |
|
"learning_rate": 8.736223782836589e-08, |
|
"logits/chosen": 0.1992538869380951, |
|
"logits/rejected": 1.3024797439575195, |
|
"logps/chosen": -12.67003059387207, |
|
"logps/rejected": -1692.8782958984375, |
|
"loss": 0.214, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3054552972316742, |
|
"rewards/margins": 16.512258529663086, |
|
"rewards/rejected": -16.206802368164062, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.9289549257085778, |
|
"grad_norm": 0.01226806640625, |
|
"learning_rate": 7.63022559471202e-08, |
|
"logits/chosen": 0.23122599720954895, |
|
"logits/rejected": 1.37257981300354, |
|
"logps/chosen": -12.650789260864258, |
|
"logps/rejected": -1592.7677001953125, |
|
"loss": 0.2138, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2996312975883484, |
|
"rewards/margins": 15.537762641906738, |
|
"rewards/rejected": -15.238128662109375, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.9339493070295917, |
|
"grad_norm": 0.0159912109375, |
|
"learning_rate": 6.597973626834759e-08, |
|
"logits/chosen": 0.21128106117248535, |
|
"logits/rejected": 1.4774492979049683, |
|
"logps/chosen": -13.167986869812012, |
|
"logps/rejected": -1897.197998046875, |
|
"loss": 0.2133, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.30018025636672974, |
|
"rewards/margins": 18.570720672607422, |
|
"rewards/rejected": -18.27054214477539, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.9389436883506056, |
|
"grad_norm": 0.0264892578125, |
|
"learning_rate": 5.639781964082547e-08, |
|
"logits/chosen": 0.27233806252479553, |
|
"logits/rejected": 1.4588285684585571, |
|
"logps/chosen": -13.405789375305176, |
|
"logps/rejected": -1711.1881103515625, |
|
"loss": 0.214, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.29316192865371704, |
|
"rewards/margins": 16.696842193603516, |
|
"rewards/rejected": -16.40367889404297, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.9439380696716194, |
|
"grad_norm": 0.033203125, |
|
"learning_rate": 4.755942156891458e-08, |
|
"logits/chosen": 0.23750165104866028, |
|
"logits/rejected": 1.4071403741836548, |
|
"logps/chosen": -12.829435348510742, |
|
"logps/rejected": -1591.2479248046875, |
|
"loss": 0.2133, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3001479208469391, |
|
"rewards/margins": 15.529951095581055, |
|
"rewards/rejected": -15.229803085327148, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.9489324509926332, |
|
"grad_norm": 0.0203857421875, |
|
"learning_rate": 3.946723132545155e-08, |
|
"logits/chosen": 0.18308812379837036, |
|
"logits/rejected": 1.3526315689086914, |
|
"logps/chosen": -13.234288215637207, |
|
"logps/rejected": -1674.6246337890625, |
|
"loss": 0.2124, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3094561696052551, |
|
"rewards/margins": 16.279918670654297, |
|
"rewards/rejected": -15.970464706420898, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.9539268323136472, |
|
"grad_norm": 0.02197265625, |
|
"learning_rate": 3.212371113348156e-08, |
|
"logits/chosen": 0.2626166343688965, |
|
"logits/rejected": 1.3081789016723633, |
|
"logps/chosen": -12.9055757522583, |
|
"logps/rejected": -1487.3057861328125, |
|
"loss": 0.2143, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.29553088545799255, |
|
"rewards/margins": 14.46813678741455, |
|
"rewards/rejected": -14.172607421875, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.958921213634661, |
|
"grad_norm": 0.033935546875, |
|
"learning_rate": 2.5531095417073437e-08, |
|
"logits/chosen": 0.2499731481075287, |
|
"logits/rejected": 1.3821344375610352, |
|
"logps/chosen": -13.182432174682617, |
|
"logps/rejected": -1497.8023681640625, |
|
"loss": 0.214, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.29157382249832153, |
|
"rewards/margins": 14.601869583129883, |
|
"rewards/rejected": -14.310295104980469, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.9639155949556749, |
|
"grad_norm": 0.0218505859375, |
|
"learning_rate": 1.969139012144822e-08, |
|
"logits/chosen": 0.27068477869033813, |
|
"logits/rejected": 1.3521279096603394, |
|
"logps/chosen": -13.608545303344727, |
|
"logps/rejected": -1491.3599853515625, |
|
"loss": 0.214, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3026901185512543, |
|
"rewards/margins": 14.520078659057617, |
|
"rewards/rejected": -14.217389106750488, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.9689099762766887, |
|
"grad_norm": 0.049072265625, |
|
"learning_rate": 1.4606372102626277e-08, |
|
"logits/chosen": 0.19163861870765686, |
|
"logits/rejected": 1.3109387159347534, |
|
"logps/chosen": -13.46105670928955, |
|
"logps/rejected": -1731.31640625, |
|
"loss": 0.2127, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3083009421825409, |
|
"rewards/margins": 16.883403778076172, |
|
"rewards/rejected": -16.575105667114258, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.9739043575977026, |
|
"grad_norm": 0.0322265625, |
|
"learning_rate": 1.0277588586781463e-08, |
|
"logits/chosen": 0.19613580405712128, |
|
"logits/rejected": 1.228542685508728, |
|
"logps/chosen": -13.699869155883789, |
|
"logps/rejected": -1520.361572265625, |
|
"loss": 0.2129, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3030748963356018, |
|
"rewards/margins": 14.732551574707031, |
|
"rewards/rejected": -14.429475784301758, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.9788987389187165, |
|
"grad_norm": 0.025146484375, |
|
"learning_rate": 6.7063566994651775e-09, |
|
"logits/chosen": 0.21389658749103546, |
|
"logits/rejected": 1.3907114267349243, |
|
"logps/chosen": -13.105180740356445, |
|
"logps/rejected": -1654.1494140625, |
|
"loss": 0.2134, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.30368101596832275, |
|
"rewards/margins": 16.11886978149414, |
|
"rewards/rejected": -15.81518840789795, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.9838931202397303, |
|
"grad_norm": 0.02490234375, |
|
"learning_rate": 3.893763064840295e-09, |
|
"logits/chosen": 0.18713845312595367, |
|
"logits/rejected": 1.294327974319458, |
|
"logps/chosen": -12.946528434753418, |
|
"logps/rejected": -1592.420654296875, |
|
"loss": 0.2118, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.31735336780548096, |
|
"rewards/margins": 15.468029975891113, |
|
"rewards/rejected": -15.150675773620605, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.9888875015607441, |
|
"grad_norm": 0.0172119140625, |
|
"learning_rate": 1.840663475053961e-09, |
|
"logits/chosen": 0.23091156780719757, |
|
"logits/rejected": 1.4303152561187744, |
|
"logps/chosen": -13.524391174316406, |
|
"logps/rejected": -1685.2825927734375, |
|
"loss": 0.2139, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.30492401123046875, |
|
"rewards/margins": 16.39162826538086, |
|
"rewards/rejected": -16.08670425415039, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.993881882881758, |
|
"grad_norm": 0.01446533203125, |
|
"learning_rate": 5.476826298439486e-10, |
|
"logits/chosen": 0.18041366338729858, |
|
"logits/rejected": 1.3766809701919556, |
|
"logps/chosen": -12.73656940460205, |
|
"logps/rejected": -1975.449462890625, |
|
"loss": 0.2138, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2982867360115051, |
|
"rewards/margins": 19.314851760864258, |
|
"rewards/rejected": -19.016565322875977, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.9988762642027719, |
|
"grad_norm": 0.029052734375, |
|
"learning_rate": 1.521394646070151e-11, |
|
"logits/chosen": 0.212058424949646, |
|
"logits/rejected": 1.3283421993255615, |
|
"logps/chosen": -12.978428840637207, |
|
"logps/rejected": -1706.1982421875, |
|
"loss": 0.2118, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3162993788719177, |
|
"rewards/margins": 16.5406551361084, |
|
"rewards/rejected": -16.224355697631836, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.9988762642027719, |
|
"eval_logits/chosen": 0.19315297901630402, |
|
"eval_logits/rejected": 1.0988490581512451, |
|
"eval_logps/chosen": -12.085772514343262, |
|
"eval_logps/rejected": -1018.5144653320312, |
|
"eval_loss": 0.2121797353029251, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 0.32569777965545654, |
|
"eval_rewards/margins": 9.880459785461426, |
|
"eval_rewards/rejected": -9.554760932922363, |
|
"eval_runtime": 0.4236, |
|
"eval_samples_per_second": 11.805, |
|
"eval_steps_per_second": 7.083, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.9998751404669747, |
|
"step": 2002, |
|
"total_flos": 0.0, |
|
"train_loss": 0.2449444185841929, |
|
"train_runtime": 3711.3289, |
|
"train_samples_per_second": 4.316, |
|
"train_steps_per_second": 0.539 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2002, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|