|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 3179, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00031456432840515884, |
|
"grad_norm": 0.0479649193584919, |
|
"learning_rate": 1.5723270440251573e-08, |
|
"logits/chosen": -1.9399988651275635, |
|
"logits/rejected": -1.95430588722229, |
|
"logps/chosen": -37.35533905029297, |
|
"logps/rejected": -35.944679260253906, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0031456432840515887, |
|
"grad_norm": 0.04902639612555504, |
|
"learning_rate": 1.5723270440251575e-07, |
|
"logits/chosen": -1.8399639129638672, |
|
"logits/rejected": -1.9113829135894775, |
|
"logps/chosen": -33.15123748779297, |
|
"logps/rejected": -34.97999572753906, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4097222089767456, |
|
"rewards/chosen": -0.00022805675689596683, |
|
"rewards/margins": -0.00019479618640616536, |
|
"rewards/rejected": -3.326057776575908e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0062912865681031774, |
|
"grad_norm": 0.04345833510160446, |
|
"learning_rate": 3.144654088050315e-07, |
|
"logits/chosen": -1.8433700799942017, |
|
"logits/rejected": -1.8701088428497314, |
|
"logps/chosen": -32.37605667114258, |
|
"logps/rejected": -35.17049026489258, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -0.0001617725647520274, |
|
"rewards/margins": -0.00015225948300212622, |
|
"rewards/rejected": -9.513064469501842e-06, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.009436929852154765, |
|
"grad_norm": 0.04357537627220154, |
|
"learning_rate": 4.716981132075472e-07, |
|
"logits/chosen": -1.8218624591827393, |
|
"logits/rejected": -1.8491008281707764, |
|
"logps/chosen": -32.927635192871094, |
|
"logps/rejected": -34.23331069946289, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 1.8031179934041575e-05, |
|
"rewards/margins": 0.00020070603932254016, |
|
"rewards/rejected": -0.00018267489213030785, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.012582573136206355, |
|
"grad_norm": 0.04584033414721489, |
|
"learning_rate": 6.28930817610063e-07, |
|
"logits/chosen": -1.8482955694198608, |
|
"logits/rejected": -1.844745397567749, |
|
"logps/chosen": -33.68798828125, |
|
"logps/rejected": -37.151004791259766, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.0003600932250265032, |
|
"rewards/margins": -7.291980000445619e-05, |
|
"rewards/rejected": -0.00028717340319417417, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.015728216420257943, |
|
"grad_norm": 0.04268745705485344, |
|
"learning_rate": 7.861635220125787e-07, |
|
"logits/chosen": -1.8830944299697876, |
|
"logits/rejected": -1.912956953048706, |
|
"logps/chosen": -33.78385925292969, |
|
"logps/rejected": -34.29187774658203, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.0002359933714615181, |
|
"rewards/margins": 0.000701805402059108, |
|
"rewards/rejected": -0.0009377988171763718, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01887385970430953, |
|
"grad_norm": 0.04893243685364723, |
|
"learning_rate": 9.433962264150944e-07, |
|
"logits/chosen": -1.7625595331192017, |
|
"logits/rejected": -1.8228267431259155, |
|
"logps/chosen": -33.46589279174805, |
|
"logps/rejected": -36.136573791503906, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.0001076062471838668, |
|
"rewards/margins": 0.001043324125930667, |
|
"rewards/rejected": -0.0009357180679216981, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02201950298836112, |
|
"grad_norm": 0.046153198927640915, |
|
"learning_rate": 1.1006289308176102e-06, |
|
"logits/chosen": -1.7840734720230103, |
|
"logits/rejected": -1.8304617404937744, |
|
"logps/chosen": -33.84288787841797, |
|
"logps/rejected": -36.21353530883789, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.00046643256791867316, |
|
"rewards/margins": 0.0013198342639952898, |
|
"rewards/rejected": -0.0017862668028101325, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02516514627241271, |
|
"grad_norm": 0.05236299708485603, |
|
"learning_rate": 1.257861635220126e-06, |
|
"logits/chosen": -1.7575123310089111, |
|
"logits/rejected": -1.7971513271331787, |
|
"logps/chosen": -32.27585220336914, |
|
"logps/rejected": -34.206329345703125, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.00034783725277520716, |
|
"rewards/margins": 0.0015550373354926705, |
|
"rewards/rejected": -0.001902874791994691, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.028310789556464298, |
|
"grad_norm": 0.05319148302078247, |
|
"learning_rate": 1.4150943396226415e-06, |
|
"logits/chosen": -1.8102385997772217, |
|
"logits/rejected": -1.8448301553726196, |
|
"logps/chosen": -31.791019439697266, |
|
"logps/rejected": -34.05156707763672, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.001236448297277093, |
|
"rewards/margins": 0.001894423388876021, |
|
"rewards/rejected": -0.0031308718025684357, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.031456432840515886, |
|
"grad_norm": 0.054018791764974594, |
|
"learning_rate": 1.5723270440251573e-06, |
|
"logits/chosen": -1.8018757104873657, |
|
"logits/rejected": -1.843665361404419, |
|
"logps/chosen": -34.18579864501953, |
|
"logps/rejected": -35.275360107421875, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.001453344477340579, |
|
"rewards/margins": 0.004754130728542805, |
|
"rewards/rejected": -0.003300786716863513, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.031456432840515886, |
|
"eval_logits/chosen": -1.6366691589355469, |
|
"eval_logits/rejected": -1.6841371059417725, |
|
"eval_logps/chosen": -32.712493896484375, |
|
"eval_logps/rejected": -36.258174896240234, |
|
"eval_loss": 0.6911582946777344, |
|
"eval_rewards/accuracies": 0.6339552402496338, |
|
"eval_rewards/chosen": 0.0005853726179338992, |
|
"eval_rewards/margins": 0.0041807363741099834, |
|
"eval_rewards/rejected": -0.0035953635815531015, |
|
"eval_runtime": 219.2745, |
|
"eval_samples_per_second": 97.672, |
|
"eval_steps_per_second": 1.528, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03460207612456748, |
|
"grad_norm": 0.05021243169903755, |
|
"learning_rate": 1.7295597484276729e-06, |
|
"logits/chosen": -1.800484299659729, |
|
"logits/rejected": -1.8205846548080444, |
|
"logps/chosen": -33.69269943237305, |
|
"logps/rejected": -37.17658233642578, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.0005980390124022961, |
|
"rewards/margins": 0.0038381360936909914, |
|
"rewards/rejected": -0.004436175338923931, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03774771940861906, |
|
"grad_norm": 0.05514535307884216, |
|
"learning_rate": 1.8867924528301889e-06, |
|
"logits/chosen": -1.799552321434021, |
|
"logits/rejected": -1.8297306299209595, |
|
"logps/chosen": -32.277740478515625, |
|
"logps/rejected": -34.390567779541016, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.001782993203960359, |
|
"rewards/margins": 0.008893580175936222, |
|
"rewards/rejected": -0.007110586855560541, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.04089336269267065, |
|
"grad_norm": 0.05100173130631447, |
|
"learning_rate": 2.044025157232705e-06, |
|
"logits/chosen": -1.8046401739120483, |
|
"logits/rejected": -1.817368507385254, |
|
"logps/chosen": -32.66090393066406, |
|
"logps/rejected": -35.909088134765625, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.003670961130410433, |
|
"rewards/margins": 0.011476712301373482, |
|
"rewards/rejected": -0.007805750705301762, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.04403900597672224, |
|
"grad_norm": 0.057663802057504654, |
|
"learning_rate": 2.2012578616352204e-06, |
|
"logits/chosen": -1.7695449590682983, |
|
"logits/rejected": -1.8342878818511963, |
|
"logps/chosen": -32.138465881347656, |
|
"logps/rejected": -38.76416778564453, |
|
"loss": 0.6877, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.00406468054279685, |
|
"rewards/margins": 0.016144271939992905, |
|
"rewards/rejected": -0.012079590931534767, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.04718464926077383, |
|
"grad_norm": 0.06243397668004036, |
|
"learning_rate": 2.358490566037736e-06, |
|
"logits/chosen": -1.760240912437439, |
|
"logits/rejected": -1.7993634939193726, |
|
"logps/chosen": -34.989463806152344, |
|
"logps/rejected": -36.670860290527344, |
|
"loss": 0.6881, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0018432287033647299, |
|
"rewards/margins": 0.01047598011791706, |
|
"rewards/rejected": -0.012319209054112434, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.05033029254482542, |
|
"grad_norm": 0.0660770907998085, |
|
"learning_rate": 2.515723270440252e-06, |
|
"logits/chosen": -1.732052206993103, |
|
"logits/rejected": -1.7964776754379272, |
|
"logps/chosen": -30.869558334350586, |
|
"logps/rejected": -36.11768341064453, |
|
"loss": 0.6841, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.00874429289251566, |
|
"rewards/margins": 0.01892954111099243, |
|
"rewards/rejected": -0.010185247287154198, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.053475935828877004, |
|
"grad_norm": 0.07039070129394531, |
|
"learning_rate": 2.6729559748427675e-06, |
|
"logits/chosen": -1.7254928350448608, |
|
"logits/rejected": -1.7313286066055298, |
|
"logps/chosen": -31.875972747802734, |
|
"logps/rejected": -36.539859771728516, |
|
"loss": 0.6829, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.012786077335476875, |
|
"rewards/margins": 0.02013152278959751, |
|
"rewards/rejected": -0.007345445454120636, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.056621579112928595, |
|
"grad_norm": 0.06682829558849335, |
|
"learning_rate": 2.830188679245283e-06, |
|
"logits/chosen": -1.7438074350357056, |
|
"logits/rejected": -1.761460542678833, |
|
"logps/chosen": -31.37579345703125, |
|
"logps/rejected": -37.373321533203125, |
|
"loss": 0.6828, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.014985946007072926, |
|
"rewards/margins": 0.030825484544038773, |
|
"rewards/rejected": -0.015839537605643272, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.05976722239698018, |
|
"grad_norm": 0.08220777660608292, |
|
"learning_rate": 2.987421383647799e-06, |
|
"logits/chosen": -1.7134368419647217, |
|
"logits/rejected": -1.7409776449203491, |
|
"logps/chosen": -30.14202308654785, |
|
"logps/rejected": -38.0638542175293, |
|
"loss": 0.6786, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.008612741716206074, |
|
"rewards/margins": 0.040046971291303635, |
|
"rewards/rejected": -0.031434230506420135, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.06291286568103177, |
|
"grad_norm": 0.08102578669786453, |
|
"learning_rate": 3.1446540880503146e-06, |
|
"logits/chosen": -1.6293474435806274, |
|
"logits/rejected": -1.715287208557129, |
|
"logps/chosen": -27.261890411376953, |
|
"logps/rejected": -36.432151794433594, |
|
"loss": 0.6743, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.0217665433883667, |
|
"rewards/margins": 0.03480926901102066, |
|
"rewards/rejected": -0.013042723760008812, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.06291286568103177, |
|
"eval_logits/chosen": -1.4648962020874023, |
|
"eval_logits/rejected": -1.5154403448104858, |
|
"eval_logps/chosen": -32.85725784301758, |
|
"eval_logps/rejected": -40.52320098876953, |
|
"eval_loss": 0.6753061413764954, |
|
"eval_rewards/accuracies": 0.6320895552635193, |
|
"eval_rewards/chosen": -0.0008622497553005815, |
|
"eval_rewards/margins": 0.04538334161043167, |
|
"eval_rewards/rejected": -0.04624559357762337, |
|
"eval_runtime": 214.8723, |
|
"eval_samples_per_second": 99.673, |
|
"eval_steps_per_second": 1.559, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.06605850896508336, |
|
"grad_norm": 0.08751504868268967, |
|
"learning_rate": 3.30188679245283e-06, |
|
"logits/chosen": -1.6138765811920166, |
|
"logits/rejected": -1.6505515575408936, |
|
"logps/chosen": -36.41826629638672, |
|
"logps/rejected": -38.340946197509766, |
|
"loss": 0.6782, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.016763882711529732, |
|
"rewards/margins": 0.015965834259986877, |
|
"rewards/rejected": -0.03272971510887146, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.06920415224913495, |
|
"grad_norm": 0.10140910744667053, |
|
"learning_rate": 3.4591194968553458e-06, |
|
"logits/chosen": -1.572196364402771, |
|
"logits/rejected": -1.608331322669983, |
|
"logps/chosen": -35.66874694824219, |
|
"logps/rejected": -40.756690979003906, |
|
"loss": 0.6719, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.025554969906806946, |
|
"rewards/margins": 0.03226945921778679, |
|
"rewards/rejected": -0.057824425399303436, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.07234979553318653, |
|
"grad_norm": 0.11134755611419678, |
|
"learning_rate": 3.6163522012578618e-06, |
|
"logits/chosen": -1.6366207599639893, |
|
"logits/rejected": -1.6505063772201538, |
|
"logps/chosen": -37.33563232421875, |
|
"logps/rejected": -43.3948974609375, |
|
"loss": 0.6707, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.045298900455236435, |
|
"rewards/margins": 0.026216819882392883, |
|
"rewards/rejected": -0.07151572406291962, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.07549543881723812, |
|
"grad_norm": 0.1564127802848816, |
|
"learning_rate": 3.7735849056603777e-06, |
|
"logits/chosen": -1.5884507894515991, |
|
"logits/rejected": -1.6262544393539429, |
|
"logps/chosen": -37.20269775390625, |
|
"logps/rejected": -45.897308349609375, |
|
"loss": 0.6668, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.03480329364538193, |
|
"rewards/margins": 0.06560282409191132, |
|
"rewards/rejected": -0.10040611028671265, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.07864108210128971, |
|
"grad_norm": 0.15535807609558105, |
|
"learning_rate": 3.930817610062894e-06, |
|
"logits/chosen": -1.6724646091461182, |
|
"logits/rejected": -1.6687465906143188, |
|
"logps/chosen": -43.27534484863281, |
|
"logps/rejected": -45.803104400634766, |
|
"loss": 0.6624, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.10215433686971664, |
|
"rewards/margins": 0.013999072834849358, |
|
"rewards/rejected": -0.11615340411663055, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.0817867253853413, |
|
"grad_norm": 0.18862789869308472, |
|
"learning_rate": 4.08805031446541e-06, |
|
"logits/chosen": -1.624436378479004, |
|
"logits/rejected": -1.695433259010315, |
|
"logps/chosen": -44.69486999511719, |
|
"logps/rejected": -51.40407180786133, |
|
"loss": 0.6532, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.11470717191696167, |
|
"rewards/margins": 0.04582948237657547, |
|
"rewards/rejected": -0.16053664684295654, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.0849323686693929, |
|
"grad_norm": 0.19437934458255768, |
|
"learning_rate": 4.245283018867925e-06, |
|
"logits/chosen": -1.4597517251968384, |
|
"logits/rejected": -1.5478241443634033, |
|
"logps/chosen": -43.16393280029297, |
|
"logps/rejected": -59.01338577270508, |
|
"loss": 0.6427, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.09754286706447601, |
|
"rewards/margins": 0.1444810926914215, |
|
"rewards/rejected": -0.24202391505241394, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.08807801195344447, |
|
"grad_norm": 0.2875131070613861, |
|
"learning_rate": 4.402515723270441e-06, |
|
"logits/chosen": -1.4205321073532104, |
|
"logits/rejected": -1.4631447792053223, |
|
"logps/chosen": -54.769493103027344, |
|
"logps/rejected": -60.56072998046875, |
|
"loss": 0.6426, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.21598923206329346, |
|
"rewards/margins": 0.04702833294868469, |
|
"rewards/rejected": -0.26301756501197815, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.09122365523749607, |
|
"grad_norm": 0.5252009034156799, |
|
"learning_rate": 4.559748427672957e-06, |
|
"logits/chosen": -1.124021053314209, |
|
"logits/rejected": -1.2152128219604492, |
|
"logps/chosen": -69.76283264160156, |
|
"logps/rejected": -94.03047943115234, |
|
"loss": 0.6124, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.37234413623809814, |
|
"rewards/margins": 0.22571516036987305, |
|
"rewards/rejected": -0.5980592966079712, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.09436929852154766, |
|
"grad_norm": 0.5838403105735779, |
|
"learning_rate": 4.716981132075472e-06, |
|
"logits/chosen": -0.8352106809616089, |
|
"logits/rejected": -0.8499029278755188, |
|
"logps/chosen": -92.40535736083984, |
|
"logps/rejected": -115.4690933227539, |
|
"loss": 0.6112, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.5981942415237427, |
|
"rewards/margins": 0.19615033268928528, |
|
"rewards/rejected": -0.7943445444107056, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.09436929852154766, |
|
"eval_logits/chosen": -0.4325442612171173, |
|
"eval_logits/rejected": -0.5166311264038086, |
|
"eval_logps/chosen": -82.86697387695312, |
|
"eval_logps/rejected": -119.5517578125, |
|
"eval_loss": 0.5905027389526367, |
|
"eval_rewards/accuracies": 0.6630597114562988, |
|
"eval_rewards/chosen": -0.5009594559669495, |
|
"eval_rewards/margins": 0.33557161688804626, |
|
"eval_rewards/rejected": -0.8365311026573181, |
|
"eval_runtime": 215.0864, |
|
"eval_samples_per_second": 99.574, |
|
"eval_steps_per_second": 1.558, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.09751494180559925, |
|
"grad_norm": 0.7127551436424255, |
|
"learning_rate": 4.874213836477988e-06, |
|
"logits/chosen": -0.6658456921577454, |
|
"logits/rejected": -0.7291407585144043, |
|
"logps/chosen": -92.9823226928711, |
|
"logps/rejected": -138.02853393554688, |
|
"loss": 0.5956, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6104769706726074, |
|
"rewards/margins": 0.4045206904411316, |
|
"rewards/rejected": -1.0149977207183838, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.10066058508965084, |
|
"grad_norm": 0.8495454788208008, |
|
"learning_rate": 4.999993971158594e-06, |
|
"logits/chosen": -0.7028144598007202, |
|
"logits/rejected": -0.7583300471305847, |
|
"logps/chosen": -128.24002075195312, |
|
"logps/rejected": -170.99911499023438, |
|
"loss": 0.5863, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.9406751394271851, |
|
"rewards/margins": 0.40160757303237915, |
|
"rewards/rejected": -1.3422826528549194, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.10380622837370242, |
|
"grad_norm": 0.7416212558746338, |
|
"learning_rate": 4.9997829647624885e-06, |
|
"logits/chosen": -0.5096332430839539, |
|
"logits/rejected": -0.5753281712532043, |
|
"logps/chosen": -136.32276916503906, |
|
"logps/rejected": -190.13681030273438, |
|
"loss": 0.5714, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.0311200618743896, |
|
"rewards/margins": 0.5132460594177246, |
|
"rewards/rejected": -1.5443661212921143, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.10695187165775401, |
|
"grad_norm": 0.8752725720405579, |
|
"learning_rate": 4.999270545372964e-06, |
|
"logits/chosen": -0.7201881408691406, |
|
"logits/rejected": -0.8150702714920044, |
|
"logps/chosen": -124.40911865234375, |
|
"logps/rejected": -179.231201171875, |
|
"loss": 0.5488, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.9127928018569946, |
|
"rewards/margins": 0.5067313313484192, |
|
"rewards/rejected": -1.419524073600769, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.1100975149418056, |
|
"grad_norm": 1.2527039051055908, |
|
"learning_rate": 4.998456774775329e-06, |
|
"logits/chosen": -0.7363126277923584, |
|
"logits/rejected": -0.7730661630630493, |
|
"logps/chosen": -173.3235321044922, |
|
"logps/rejected": -224.24801635742188, |
|
"loss": 0.5328, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.3973206281661987, |
|
"rewards/margins": 0.46879178285598755, |
|
"rewards/rejected": -1.8661121129989624, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.11324315822585719, |
|
"grad_norm": 0.9629844427108765, |
|
"learning_rate": 4.997341751090515e-06, |
|
"logits/chosen": -0.8685577511787415, |
|
"logits/rejected": -0.9733338356018066, |
|
"logps/chosen": -174.112060546875, |
|
"logps/rejected": -247.428466796875, |
|
"loss": 0.4908, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.4022471904754639, |
|
"rewards/margins": 0.7104827761650085, |
|
"rewards/rejected": -2.112730026245117, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.11638880150990878, |
|
"grad_norm": 1.126010775566101, |
|
"learning_rate": 4.995925608763244e-06, |
|
"logits/chosen": -1.114950180053711, |
|
"logits/rejected": -1.1976020336151123, |
|
"logps/chosen": -169.6937255859375, |
|
"logps/rejected": -253.0226593017578, |
|
"loss": 0.5253, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.3709325790405273, |
|
"rewards/margins": 0.8127344250679016, |
|
"rewards/rejected": -2.183666944503784, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.11953444479396036, |
|
"grad_norm": 1.2758018970489502, |
|
"learning_rate": 4.994208518545819e-06, |
|
"logits/chosen": -1.1732008457183838, |
|
"logits/rejected": -1.3207279443740845, |
|
"logps/chosen": -182.38046264648438, |
|
"logps/rejected": -262.43511962890625, |
|
"loss": 0.4701, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.4985719919204712, |
|
"rewards/margins": 0.7734432816505432, |
|
"rewards/rejected": -2.27201509475708, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.12268008807801195, |
|
"grad_norm": 1.0300933122634888, |
|
"learning_rate": 4.992190687477535e-06, |
|
"logits/chosen": -1.2033292055130005, |
|
"logits/rejected": -1.3128149509429932, |
|
"logps/chosen": -187.9945526123047, |
|
"logps/rejected": -290.4914245605469, |
|
"loss": 0.4584, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.5572750568389893, |
|
"rewards/margins": 0.9893418550491333, |
|
"rewards/rejected": -2.546616792678833, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.12582573136206354, |
|
"grad_norm": 1.2100883722305298, |
|
"learning_rate": 4.989872358859716e-06, |
|
"logits/chosen": -0.8574434518814087, |
|
"logits/rejected": -1.0649316310882568, |
|
"logps/chosen": -218.46463012695312, |
|
"logps/rejected": -331.96588134765625, |
|
"loss": 0.4477, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.8527570962905884, |
|
"rewards/margins": 1.106737732887268, |
|
"rewards/rejected": -2.9594950675964355, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.12582573136206354, |
|
"eval_logits/chosen": -0.3494086265563965, |
|
"eval_logits/rejected": -0.5023281574249268, |
|
"eval_logps/chosen": -225.44276428222656, |
|
"eval_logps/rejected": -344.3971862792969, |
|
"eval_loss": 0.40257528424263, |
|
"eval_rewards/accuracies": 0.7201492786407471, |
|
"eval_rewards/chosen": -1.9267174005508423, |
|
"eval_rewards/margins": 1.1582682132720947, |
|
"eval_rewards/rejected": -3.0849857330322266, |
|
"eval_runtime": 215.0471, |
|
"eval_samples_per_second": 99.592, |
|
"eval_steps_per_second": 1.558, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.12897137464611513, |
|
"grad_norm": 1.2617757320404053, |
|
"learning_rate": 4.987253812226373e-06, |
|
"logits/chosen": -0.9884117245674133, |
|
"logits/rejected": -1.162603735923767, |
|
"logps/chosen": -231.59756469726562, |
|
"logps/rejected": -362.80120849609375, |
|
"loss": 0.4239, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.992637276649475, |
|
"rewards/margins": 1.289119005203247, |
|
"rewards/rejected": -3.281756639480591, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.13211701793016672, |
|
"grad_norm": 1.5599896907806396, |
|
"learning_rate": 4.984335363310513e-06, |
|
"logits/chosen": -0.8311988115310669, |
|
"logits/rejected": -0.9811903238296509, |
|
"logps/chosen": -213.430908203125, |
|
"logps/rejected": -332.53253173828125, |
|
"loss": 0.4375, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.8192352056503296, |
|
"rewards/margins": 1.1662169694900513, |
|
"rewards/rejected": -2.985452175140381, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.13526266121421832, |
|
"grad_norm": 1.3952319622039795, |
|
"learning_rate": 4.9811173640060516e-06, |
|
"logits/chosen": -0.8797961473464966, |
|
"logits/rejected": -0.876280665397644, |
|
"logps/chosen": -261.06622314453125, |
|
"logps/rejected": -368.8479309082031, |
|
"loss": 0.4203, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -2.2755675315856934, |
|
"rewards/margins": 1.0598723888397217, |
|
"rewards/rejected": -3.335440158843994, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.1384083044982699, |
|
"grad_norm": 1.354478359222412, |
|
"learning_rate": 4.977600202325396e-06, |
|
"logits/chosen": -0.9967167973518372, |
|
"logits/rejected": -1.0930196046829224, |
|
"logps/chosen": -235.45474243164062, |
|
"logps/rejected": -352.99542236328125, |
|
"loss": 0.3928, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.014740467071533, |
|
"rewards/margins": 1.1414014101028442, |
|
"rewards/rejected": -3.156141757965088, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.14155394778232147, |
|
"grad_norm": 1.211749792098999, |
|
"learning_rate": 4.973784302352654e-06, |
|
"logits/chosen": -0.7338708639144897, |
|
"logits/rejected": -0.8988674283027649, |
|
"logps/chosen": -251.37356567382812, |
|
"logps/rejected": -366.8519592285156, |
|
"loss": 0.4014, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.1910181045532227, |
|
"rewards/margins": 1.1455790996551514, |
|
"rewards/rejected": -3.336597442626953, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.14469959106637306, |
|
"grad_norm": 1.581602692604065, |
|
"learning_rate": 4.969670124192504e-06, |
|
"logits/chosen": -0.4252908229827881, |
|
"logits/rejected": -0.5512481927871704, |
|
"logps/chosen": -241.5343780517578, |
|
"logps/rejected": -368.2862243652344, |
|
"loss": 0.3892, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.076319694519043, |
|
"rewards/margins": 1.2542181015014648, |
|
"rewards/rejected": -3.330537796020508, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.14784523435042465, |
|
"grad_norm": 1.7681437730789185, |
|
"learning_rate": 4.965258163914713e-06, |
|
"logits/chosen": -0.5902543067932129, |
|
"logits/rejected": -0.5962556600570679, |
|
"logps/chosen": -286.5151062011719, |
|
"logps/rejected": -404.4659729003906, |
|
"loss": 0.383, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.528414249420166, |
|
"rewards/margins": 1.1718274354934692, |
|
"rewards/rejected": -3.7002415657043457, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.15099087763447624, |
|
"grad_norm": 1.6546568870544434, |
|
"learning_rate": 4.960548953494325e-06, |
|
"logits/chosen": -0.7005417943000793, |
|
"logits/rejected": -0.7231167554855347, |
|
"logps/chosen": -279.2669372558594, |
|
"logps/rejected": -405.3627014160156, |
|
"loss": 0.3813, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -2.462043523788452, |
|
"rewards/margins": 1.2482386827468872, |
|
"rewards/rejected": -3.7102818489074707, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.15413652091852784, |
|
"grad_norm": 1.8185548782348633, |
|
"learning_rate": 4.9555430607475194e-06, |
|
"logits/chosen": -0.3447544276714325, |
|
"logits/rejected": -0.43131861090660095, |
|
"logps/chosen": -268.96234130859375, |
|
"logps/rejected": -403.81939697265625, |
|
"loss": 0.3635, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.3665878772735596, |
|
"rewards/margins": 1.3263323307037354, |
|
"rewards/rejected": -3.692920684814453, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.15728216420257943, |
|
"grad_norm": 1.902738332748413, |
|
"learning_rate": 4.9502410892631426e-06, |
|
"logits/chosen": -0.28083157539367676, |
|
"logits/rejected": -0.32724082469940186, |
|
"logps/chosen": -280.0065002441406, |
|
"logps/rejected": -418.2118225097656, |
|
"loss": 0.3583, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.4585459232330322, |
|
"rewards/margins": 1.3503986597061157, |
|
"rewards/rejected": -3.8089442253112793, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.15728216420257943, |
|
"eval_logits/chosen": 0.47167521715164185, |
|
"eval_logits/rejected": 0.31242406368255615, |
|
"eval_logps/chosen": -281.4605407714844, |
|
"eval_logps/rejected": -449.56976318359375, |
|
"eval_loss": 0.3062981069087982, |
|
"eval_rewards/accuracies": 0.7645522356033325, |
|
"eval_rewards/chosen": -2.4868950843811035, |
|
"eval_rewards/margins": 1.6498165130615234, |
|
"eval_rewards/rejected": -4.136711597442627, |
|
"eval_runtime": 215.1599, |
|
"eval_samples_per_second": 99.54, |
|
"eval_steps_per_second": 1.557, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.16042780748663102, |
|
"grad_norm": 2.6762752532958984, |
|
"learning_rate": 4.9446436783299315e-06, |
|
"logits/chosen": -0.21420426666736603, |
|
"logits/rejected": -0.31396135687828064, |
|
"logps/chosen": -295.9054870605469, |
|
"logps/rejected": -435.9479064941406, |
|
"loss": 0.3801, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.6259713172912598, |
|
"rewards/margins": 1.3752977848052979, |
|
"rewards/rejected": -4.001269340515137, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.1635734507706826, |
|
"grad_norm": 1.6177635192871094, |
|
"learning_rate": 4.938751502859433e-06, |
|
"logits/chosen": -0.3818402886390686, |
|
"logits/rejected": -0.4854033589363098, |
|
"logps/chosen": -307.38507080078125, |
|
"logps/rejected": -450.1014709472656, |
|
"loss": 0.3959, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.744161367416382, |
|
"rewards/margins": 1.3961646556854248, |
|
"rewards/rejected": -4.140326499938965, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.1667190940547342, |
|
"grad_norm": 1.5721980333328247, |
|
"learning_rate": 4.932565273304623e-06, |
|
"logits/chosen": -0.30099183320999146, |
|
"logits/rejected": -0.31828540563583374, |
|
"logps/chosen": -302.99053955078125, |
|
"logps/rejected": -423.03729248046875, |
|
"loss": 0.3326, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.6925716400146484, |
|
"rewards/margins": 1.1963403224945068, |
|
"rewards/rejected": -3.888911724090576, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.1698647373387858, |
|
"grad_norm": 2.8931405544281006, |
|
"learning_rate": 4.926085735574244e-06, |
|
"logits/chosen": -0.07565931975841522, |
|
"logits/rejected": -0.26495999097824097, |
|
"logps/chosen": -333.55584716796875, |
|
"logps/rejected": -520.9510498046875, |
|
"loss": 0.3571, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.99123215675354, |
|
"rewards/margins": 1.855472207069397, |
|
"rewards/rejected": -4.846704483032227, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.17301038062283736, |
|
"grad_norm": 2.259403705596924, |
|
"learning_rate": 4.9193136709428666e-06, |
|
"logits/chosen": -0.05604839324951172, |
|
"logits/rejected": -0.09852688759565353, |
|
"logps/chosen": -319.73895263671875, |
|
"logps/rejected": -462.59490966796875, |
|
"loss": 0.3559, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.861781597137451, |
|
"rewards/margins": 1.3996176719665527, |
|
"rewards/rejected": -4.261399269104004, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.17615602390688895, |
|
"grad_norm": 2.100226640701294, |
|
"learning_rate": 4.912249895956687e-06, |
|
"logits/chosen": 0.10179214179515839, |
|
"logits/rejected": -0.02760564163327217, |
|
"logps/chosen": -284.9942626953125, |
|
"logps/rejected": -476.2345275878906, |
|
"loss": 0.3428, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.523716449737549, |
|
"rewards/margins": 1.8726260662078857, |
|
"rewards/rejected": -4.3963422775268555, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.17930166719094054, |
|
"grad_norm": 2.5998682975769043, |
|
"learning_rate": 4.904895262335072e-06, |
|
"logits/chosen": 0.22700171172618866, |
|
"logits/rejected": 0.09037125110626221, |
|
"logps/chosen": -315.6595764160156, |
|
"logps/rejected": -516.5098876953125, |
|
"loss": 0.3242, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.8289144039154053, |
|
"rewards/margins": 1.9916881322860718, |
|
"rewards/rejected": -4.8206024169921875, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.18244731047499213, |
|
"grad_norm": 2.4419095516204834, |
|
"learning_rate": 4.897250656867863e-06, |
|
"logits/chosen": 0.32371488213539124, |
|
"logits/rejected": 0.17300409078598022, |
|
"logps/chosen": -358.229248046875, |
|
"logps/rejected": -524.1956176757812, |
|
"loss": 0.3043, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -3.2328643798828125, |
|
"rewards/margins": 1.6491279602050781, |
|
"rewards/rejected": -4.881992340087891, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.18559295375904372, |
|
"grad_norm": 2.5324759483337402, |
|
"learning_rate": 4.889317001308447e-06, |
|
"logits/chosen": 0.3359132707118988, |
|
"logits/rejected": 0.19779124855995178, |
|
"logps/chosen": -359.0341491699219, |
|
"logps/rejected": -536.9963989257812, |
|
"loss": 0.3334, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -3.257742404937744, |
|
"rewards/margins": 1.7748934030532837, |
|
"rewards/rejected": -5.032635688781738, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.1887385970430953, |
|
"grad_norm": 2.23077392578125, |
|
"learning_rate": 4.881095252262619e-06, |
|
"logits/chosen": 0.21948488056659698, |
|
"logits/rejected": 0.23094649612903595, |
|
"logps/chosen": -353.52838134765625, |
|
"logps/rejected": -525.2556762695312, |
|
"loss": 0.3041, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -3.2166409492492676, |
|
"rewards/margins": 1.6981933116912842, |
|
"rewards/rejected": -4.914834499359131, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1887385970430953, |
|
"eval_logits/chosen": 1.1113409996032715, |
|
"eval_logits/rejected": 0.964361846446991, |
|
"eval_logps/chosen": -323.46649169921875, |
|
"eval_logps/rejected": -533.2188720703125, |
|
"eval_loss": 0.24049775302410126, |
|
"eval_rewards/accuracies": 0.7917910218238831, |
|
"eval_rewards/chosen": -2.906954765319824, |
|
"eval_rewards/margins": 2.0662477016448975, |
|
"eval_rewards/rejected": -4.973201751708984, |
|
"eval_runtime": 215.0565, |
|
"eval_samples_per_second": 99.588, |
|
"eval_steps_per_second": 1.558, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1918842403271469, |
|
"grad_norm": 1.878097653388977, |
|
"learning_rate": 4.872586401073238e-06, |
|
"logits/chosen": 0.34480124711990356, |
|
"logits/rejected": 0.36164969205856323, |
|
"logps/chosen": -299.5352478027344, |
|
"logps/rejected": -532.43701171875, |
|
"loss": 0.3015, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -2.655369520187378, |
|
"rewards/margins": 2.305147409439087, |
|
"rewards/rejected": -4.960516452789307, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.1950298836111985, |
|
"grad_norm": 2.0585927963256836, |
|
"learning_rate": 4.863791473700695e-06, |
|
"logits/chosen": 0.30810683965682983, |
|
"logits/rejected": 0.3117186427116394, |
|
"logps/chosen": -315.50518798828125, |
|
"logps/rejected": -502.65057373046875, |
|
"loss": 0.3237, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.825150966644287, |
|
"rewards/margins": 1.8449840545654297, |
|
"rewards/rejected": -4.670135021209717, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.1981755268952501, |
|
"grad_norm": 2.595250129699707, |
|
"learning_rate": 4.854711530599207e-06, |
|
"logits/chosen": 0.17247377336025238, |
|
"logits/rejected": 0.185434028506279, |
|
"logps/chosen": -350.2391662597656, |
|
"logps/rejected": -565.4362182617188, |
|
"loss": 0.3099, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -3.168469190597534, |
|
"rewards/margins": 2.1261777877807617, |
|
"rewards/rejected": -5.294647216796875, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.20132117017930168, |
|
"grad_norm": 2.5605061054229736, |
|
"learning_rate": 4.845347666588952e-06, |
|
"logits/chosen": 0.36663442850112915, |
|
"logits/rejected": 0.3369132876396179, |
|
"logps/chosen": -355.25396728515625, |
|
"logps/rejected": -588.9056396484375, |
|
"loss": 0.3052, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -3.2272791862487793, |
|
"rewards/margins": 2.284496545791626, |
|
"rewards/rejected": -5.511775970458984, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.20446681346335324, |
|
"grad_norm": 2.2802627086639404, |
|
"learning_rate": 4.835701010724061e-06, |
|
"logits/chosen": 0.32938310503959656, |
|
"logits/rejected": 0.1386842280626297, |
|
"logps/chosen": -336.86077880859375, |
|
"logps/rejected": -585.5232543945312, |
|
"loss": 0.2897, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -3.0445141792297363, |
|
"rewards/margins": 2.4435367584228516, |
|
"rewards/rejected": -5.488050937652588, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.20761245674740483, |
|
"grad_norm": 2.4260733127593994, |
|
"learning_rate": 4.825772726156479e-06, |
|
"logits/chosen": 0.5395032167434692, |
|
"logits/rejected": 0.4053524434566498, |
|
"logps/chosen": -383.67041015625, |
|
"logps/rejected": -576.008056640625, |
|
"loss": 0.2512, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -3.505573272705078, |
|
"rewards/margins": 1.9211244583129883, |
|
"rewards/rejected": -5.426698207855225, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.21075810003145642, |
|
"grad_norm": 2.286827325820923, |
|
"learning_rate": 4.8155640099957206e-06, |
|
"logits/chosen": 0.4676589071750641, |
|
"logits/rejected": 0.3794856071472168, |
|
"logps/chosen": -355.155517578125, |
|
"logps/rejected": -585.6317138671875, |
|
"loss": 0.2664, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -3.2314720153808594, |
|
"rewards/margins": 2.29856538772583, |
|
"rewards/rejected": -5.5300374031066895, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.21390374331550802, |
|
"grad_norm": 2.673321008682251, |
|
"learning_rate": 4.805076093164527e-06, |
|
"logits/chosen": 0.4926396310329437, |
|
"logits/rejected": 0.39895009994506836, |
|
"logps/chosen": -373.0772705078125, |
|
"logps/rejected": -613.6629638671875, |
|
"loss": 0.2543, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -3.402231216430664, |
|
"rewards/margins": 2.3745970726013184, |
|
"rewards/rejected": -5.776828289031982, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.2170493865995596, |
|
"grad_norm": 2.462273359298706, |
|
"learning_rate": 4.794310240250444e-06, |
|
"logits/chosen": 0.47542086243629456, |
|
"logits/rejected": 0.526648223400116, |
|
"logps/chosen": -397.76947021484375, |
|
"logps/rejected": -614.8250732421875, |
|
"loss": 0.2645, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -3.626183271408081, |
|
"rewards/margins": 2.162733554840088, |
|
"rewards/rejected": -5.78891658782959, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.2201950298836112, |
|
"grad_norm": 3.2976531982421875, |
|
"learning_rate": 4.783267749353346e-06, |
|
"logits/chosen": 0.8178389668464661, |
|
"logits/rejected": 0.6482642889022827, |
|
"logps/chosen": -347.7313232421875, |
|
"logps/rejected": -585.7216186523438, |
|
"loss": 0.2487, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -3.1436398029327393, |
|
"rewards/margins": 2.362039089202881, |
|
"rewards/rejected": -5.505678653717041, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.2201950298836112, |
|
"eval_logits/chosen": 1.2932829856872559, |
|
"eval_logits/rejected": 1.1342921257019043, |
|
"eval_logps/chosen": -373.9985046386719, |
|
"eval_logps/rejected": -617.6231079101562, |
|
"eval_loss": 0.1963878720998764, |
|
"eval_rewards/accuracies": 0.8208954930305481, |
|
"eval_rewards/chosen": -3.4122743606567383, |
|
"eval_rewards/margins": 2.4049696922302246, |
|
"eval_rewards/rejected": -5.817244529724121, |
|
"eval_runtime": 214.689, |
|
"eval_samples_per_second": 99.758, |
|
"eval_steps_per_second": 1.56, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.2233406731676628, |
|
"grad_norm": 2.3596227169036865, |
|
"learning_rate": 4.771949951928918e-06, |
|
"logits/chosen": 0.4875836968421936, |
|
"logits/rejected": 0.3758041262626648, |
|
"logps/chosen": -386.7015380859375, |
|
"logps/rejected": -659.1187744140625, |
|
"loss": 0.2576, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -3.5298709869384766, |
|
"rewards/margins": 2.6897921562194824, |
|
"rewards/rejected": -6.219663619995117, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.22648631645171438, |
|
"grad_norm": 2.243406295776367, |
|
"learning_rate": 4.76035821262811e-06, |
|
"logits/chosen": 0.7452244162559509, |
|
"logits/rejected": 0.5989497900009155, |
|
"logps/chosen": -355.85284423828125, |
|
"logps/rejected": -639.9850463867188, |
|
"loss": 0.2265, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -3.235308885574341, |
|
"rewards/margins": 2.7962398529052734, |
|
"rewards/rejected": -6.031548976898193, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.22963195973576597, |
|
"grad_norm": 3.3983983993530273, |
|
"learning_rate": 4.748493929132599e-06, |
|
"logits/chosen": 0.8924552798271179, |
|
"logits/rejected": 0.6032952070236206, |
|
"logps/chosen": -359.5245056152344, |
|
"logps/rejected": -623.7483520507812, |
|
"loss": 0.268, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -3.2640464305877686, |
|
"rewards/margins": 2.6267881393432617, |
|
"rewards/rejected": -5.890834331512451, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.23277760301981756, |
|
"grad_norm": 2.4341931343078613, |
|
"learning_rate": 4.7363585319862535e-06, |
|
"logits/chosen": 0.8101499676704407, |
|
"logits/rejected": 0.7433942556381226, |
|
"logps/chosen": -332.5082702636719, |
|
"logps/rejected": -583.41650390625, |
|
"loss": 0.2235, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.9981846809387207, |
|
"rewards/margins": 2.4985713958740234, |
|
"rewards/rejected": -5.496755599975586, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.23592324630386913, |
|
"grad_norm": 1.8014791011810303, |
|
"learning_rate": 4.7239534844226595e-06, |
|
"logits/chosen": 0.9954848289489746, |
|
"logits/rejected": 0.9288710355758667, |
|
"logps/chosen": -386.96783447265625, |
|
"logps/rejected": -663.5922241210938, |
|
"loss": 0.2359, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -3.52067232131958, |
|
"rewards/margins": 2.7473063468933105, |
|
"rewards/rejected": -6.267977714538574, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.23906888958792072, |
|
"grad_norm": 2.4365100860595703, |
|
"learning_rate": 4.711280282188674e-06, |
|
"logits/chosen": 1.1280491352081299, |
|
"logits/rejected": 0.9587424397468567, |
|
"logps/chosen": -416.06427001953125, |
|
"logps/rejected": -652.4948120117188, |
|
"loss": 0.245, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -3.8194878101348877, |
|
"rewards/margins": 2.3404040336608887, |
|
"rewards/rejected": -6.159891128540039, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.2422145328719723, |
|
"grad_norm": 2.6265649795532227, |
|
"learning_rate": 4.698340453364087e-06, |
|
"logits/chosen": 1.0067460536956787, |
|
"logits/rejected": 0.7757904529571533, |
|
"logps/chosen": -403.1853942871094, |
|
"logps/rejected": -655.3627319335938, |
|
"loss": 0.2227, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -3.702298641204834, |
|
"rewards/margins": 2.4899773597717285, |
|
"rewards/rejected": -6.192275524139404, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.2453601761560239, |
|
"grad_norm": 3.4549660682678223, |
|
"learning_rate": 4.685135558177361e-06, |
|
"logits/chosen": 0.8524907827377319, |
|
"logits/rejected": 0.7574479579925537, |
|
"logps/chosen": -394.0924987792969, |
|
"logps/rejected": -652.1280517578125, |
|
"loss": 0.2464, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -3.604322910308838, |
|
"rewards/margins": 2.561311721801758, |
|
"rewards/rejected": -6.165635108947754, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.2485058194400755, |
|
"grad_norm": 2.0555849075317383, |
|
"learning_rate": 4.671667188817516e-06, |
|
"logits/chosen": 0.7523924112319946, |
|
"logits/rejected": 0.791477382183075, |
|
"logps/chosen": -409.0593566894531, |
|
"logps/rejected": -673.9398193359375, |
|
"loss": 0.2144, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -3.750147581100464, |
|
"rewards/margins": 2.626615047454834, |
|
"rewards/rejected": -6.376762866973877, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.2516514627241271, |
|
"grad_norm": 3.2837741374969482, |
|
"learning_rate": 4.657936969242146e-06, |
|
"logits/chosen": 0.8540847897529602, |
|
"logits/rejected": 0.8441425561904907, |
|
"logps/chosen": -394.26251220703125, |
|
"logps/rejected": -657.2059326171875, |
|
"loss": 0.218, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -3.610989809036255, |
|
"rewards/margins": 2.6211609840393066, |
|
"rewards/rejected": -6.232151031494141, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.2516514627241271, |
|
"eval_logits/chosen": 1.7290374040603638, |
|
"eval_logits/rejected": 1.5710214376449585, |
|
"eval_logps/chosen": -400.4794921875, |
|
"eval_logps/rejected": -698.409423828125, |
|
"eval_loss": 0.15468811988830566, |
|
"eval_rewards/accuracies": 0.8335821032524109, |
|
"eval_rewards/chosen": -3.67708420753479, |
|
"eval_rewards/margins": 2.9480228424072266, |
|
"eval_rewards/rejected": -6.625107288360596, |
|
"eval_runtime": 215.1039, |
|
"eval_samples_per_second": 99.566, |
|
"eval_steps_per_second": 1.557, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.2547971060081787, |
|
"grad_norm": 2.1503520011901855, |
|
"learning_rate": 4.643946554981607e-06, |
|
"logits/chosen": 1.1990272998809814, |
|
"logits/rejected": 1.1896309852600098, |
|
"logps/chosen": -422.6075744628906, |
|
"logps/rejected": -712.6632080078125, |
|
"loss": 0.2211, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -3.8861594200134277, |
|
"rewards/margins": 2.8886961936950684, |
|
"rewards/rejected": -6.774855613708496, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.25794274929223027, |
|
"grad_norm": 2.367893934249878, |
|
"learning_rate": 4.629697632939402e-06, |
|
"logits/chosen": 0.8573128581047058, |
|
"logits/rejected": 0.8127277493476868, |
|
"logps/chosen": -448.3675231933594, |
|
"logps/rejected": -742.9185180664062, |
|
"loss": 0.2021, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -4.144953727722168, |
|
"rewards/margins": 2.9193053245544434, |
|
"rewards/rejected": -7.064258575439453, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.26108839257628186, |
|
"grad_norm": 2.6760129928588867, |
|
"learning_rate": 4.615191921188782e-06, |
|
"logits/chosen": 1.047659993171692, |
|
"logits/rejected": 0.8862239122390747, |
|
"logps/chosen": -430.19622802734375, |
|
"logps/rejected": -718.34033203125, |
|
"loss": 0.1965, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -3.9595940113067627, |
|
"rewards/margins": 2.861283779144287, |
|
"rewards/rejected": -6.8208770751953125, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.26423403586033345, |
|
"grad_norm": 3.5069732666015625, |
|
"learning_rate": 4.600431168765588e-06, |
|
"logits/chosen": 1.041211724281311, |
|
"logits/rejected": 1.060903549194336, |
|
"logps/chosen": -402.40496826171875, |
|
"logps/rejected": -657.80859375, |
|
"loss": 0.2057, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -3.7000668048858643, |
|
"rewards/margins": 2.5350348949432373, |
|
"rewards/rejected": -6.235101222991943, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.26737967914438504, |
|
"grad_norm": 2.710855484008789, |
|
"learning_rate": 4.58541715545736e-06, |
|
"logits/chosen": 1.0758662223815918, |
|
"logits/rejected": 1.0488075017929077, |
|
"logps/chosen": -438.24676513671875, |
|
"logps/rejected": -723.1414184570312, |
|
"loss": 0.213, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -4.031523704528809, |
|
"rewards/margins": 2.824887752532959, |
|
"rewards/rejected": -6.856411933898926, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.27052532242843663, |
|
"grad_norm": 2.7011239528656006, |
|
"learning_rate": 4.570151691588739e-06, |
|
"logits/chosen": 1.2816145420074463, |
|
"logits/rejected": 1.1929352283477783, |
|
"logps/chosen": -421.4190368652344, |
|
"logps/rejected": -684.8802490234375, |
|
"loss": 0.2145, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -3.8643555641174316, |
|
"rewards/margins": 2.6349289417266846, |
|
"rewards/rejected": -6.499284267425537, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.2736709657124882, |
|
"grad_norm": 2.7440969944000244, |
|
"learning_rate": 4.554636617803182e-06, |
|
"logits/chosen": 1.342369556427002, |
|
"logits/rejected": 1.008675217628479, |
|
"logps/chosen": -391.8333435058594, |
|
"logps/rejected": -700.033203125, |
|
"loss": 0.1982, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -3.576009750366211, |
|
"rewards/margins": 3.0503413677215576, |
|
"rewards/rejected": -6.626351833343506, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.2768166089965398, |
|
"grad_norm": 2.3672075271606445, |
|
"learning_rate": 4.538873804841028e-06, |
|
"logits/chosen": 1.249786615371704, |
|
"logits/rejected": 1.0435155630111694, |
|
"logps/chosen": -476.52423095703125, |
|
"logps/rejected": -823.1788330078125, |
|
"loss": 0.1808, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -4.4410505294799805, |
|
"rewards/margins": 3.4214394092559814, |
|
"rewards/rejected": -7.862489223480225, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.2799622522805914, |
|
"grad_norm": 2.971510648727417, |
|
"learning_rate": 4.522865153313932e-06, |
|
"logits/chosen": 1.4465210437774658, |
|
"logits/rejected": 1.3960046768188477, |
|
"logps/chosen": -461.5182189941406, |
|
"logps/rejected": -782.6722412109375, |
|
"loss": 0.2097, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -4.27487850189209, |
|
"rewards/margins": 3.2032265663146973, |
|
"rewards/rejected": -7.4781060218811035, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.28310789556464294, |
|
"grad_norm": 3.615593910217285, |
|
"learning_rate": 4.506612593475701e-06, |
|
"logits/chosen": 1.137704610824585, |
|
"logits/rejected": 1.0405908823013306, |
|
"logps/chosen": -378.880859375, |
|
"logps/rejected": -688.6226806640625, |
|
"loss": 0.1858, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -3.4608230590820312, |
|
"rewards/margins": 3.0565378665924072, |
|
"rewards/rejected": -6.517360687255859, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.28310789556464294, |
|
"eval_logits/chosen": 1.8630539178848267, |
|
"eval_logits/rejected": 1.6987581253051758, |
|
"eval_logps/chosen": -387.6122741699219, |
|
"eval_logps/rejected": -703.9799194335938, |
|
"eval_loss": 0.139369398355484, |
|
"eval_rewards/accuracies": 0.8485074639320374, |
|
"eval_rewards/chosen": -3.5484120845794678, |
|
"eval_rewards/margins": 3.1324002742767334, |
|
"eval_rewards/rejected": -6.680812835693359, |
|
"eval_runtime": 215.0603, |
|
"eval_samples_per_second": 99.586, |
|
"eval_steps_per_second": 1.558, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.28625353884869453, |
|
"grad_norm": 2.4971940517425537, |
|
"learning_rate": 4.490118084989544e-06, |
|
"logits/chosen": 1.0084787607192993, |
|
"logits/rejected": 1.003114938735962, |
|
"logps/chosen": -420.396484375, |
|
"logps/rejected": -712.1617431640625, |
|
"loss": 0.2153, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -3.869481325149536, |
|
"rewards/margins": 2.8973114490509033, |
|
"rewards/rejected": -6.766793727874756, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.2893991821327461, |
|
"grad_norm": 2.809347629547119, |
|
"learning_rate": 4.473383616691792e-06, |
|
"logits/chosen": 1.2141597270965576, |
|
"logits/rejected": 1.092398762702942, |
|
"logps/chosen": -439.409912109375, |
|
"logps/rejected": -735.4099731445312, |
|
"loss": 0.201, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -4.070156097412109, |
|
"rewards/margins": 2.93392276763916, |
|
"rewards/rejected": -7.0040788650512695, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.2925448254167977, |
|
"grad_norm": 2.333805561065674, |
|
"learning_rate": 4.456411206352088e-06, |
|
"logits/chosen": 0.8967952728271484, |
|
"logits/rejected": 0.8582611083984375, |
|
"logps/chosen": -407.14739990234375, |
|
"logps/rejected": -692.2764892578125, |
|
"loss": 0.2086, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -3.749049425125122, |
|
"rewards/margins": 2.817676067352295, |
|
"rewards/rejected": -6.566725730895996, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.2956904687008493, |
|
"grad_norm": 3.1295571327209473, |
|
"learning_rate": 4.439202900430098e-06, |
|
"logits/chosen": 1.0088117122650146, |
|
"logits/rejected": 0.8580893278121948, |
|
"logps/chosen": -401.88525390625, |
|
"logps/rejected": -715.9742431640625, |
|
"loss": 0.2032, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -3.6845085620880127, |
|
"rewards/margins": 3.113250970840454, |
|
"rewards/rejected": -6.797759056091309, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.2988361119849009, |
|
"grad_norm": 3.3793976306915283, |
|
"learning_rate": 4.421760773828749e-06, |
|
"logits/chosen": 0.9543240666389465, |
|
"logits/rejected": 0.8352963328361511, |
|
"logps/chosen": -427.1727600097656, |
|
"logps/rejected": -727.7288208007812, |
|
"loss": 0.2395, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -3.9340949058532715, |
|
"rewards/margins": 3.0020058155059814, |
|
"rewards/rejected": -6.936100006103516, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.3019817552689525, |
|
"grad_norm": 2.7082152366638184, |
|
"learning_rate": 4.4040869296440595e-06, |
|
"logits/chosen": 0.8623906970024109, |
|
"logits/rejected": 0.835192859172821, |
|
"logps/chosen": -421.047607421875, |
|
"logps/rejected": -760.1714477539062, |
|
"loss": 0.1768, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.8686699867248535, |
|
"rewards/margins": 3.3722710609436035, |
|
"rewards/rejected": -7.240941047668457, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.3051273985530041, |
|
"grad_norm": 3.3928000926971436, |
|
"learning_rate": 4.3861834989115435e-06, |
|
"logits/chosen": 0.8181372880935669, |
|
"logits/rejected": 0.8542720079421997, |
|
"logps/chosen": -388.176513671875, |
|
"logps/rejected": -731.9312744140625, |
|
"loss": 0.1659, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -3.5451221466064453, |
|
"rewards/margins": 3.4101932048797607, |
|
"rewards/rejected": -6.955314636230469, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.3082730418370557, |
|
"grad_norm": 3.535090446472168, |
|
"learning_rate": 4.368052640349269e-06, |
|
"logits/chosen": 1.3438990116119385, |
|
"logits/rejected": 1.2530713081359863, |
|
"logps/chosen": -406.4024658203125, |
|
"logps/rejected": -737.8275146484375, |
|
"loss": 0.1641, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -3.737173557281494, |
|
"rewards/margins": 3.2970378398895264, |
|
"rewards/rejected": -7.034211158752441, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.31141868512110726, |
|
"grad_norm": 3.4729607105255127, |
|
"learning_rate": 4.349696540097564e-06, |
|
"logits/chosen": 1.3546165227890015, |
|
"logits/rejected": 1.2128336429595947, |
|
"logps/chosen": -454.58148193359375, |
|
"logps/rejected": -771.8201293945312, |
|
"loss": 0.1636, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -4.219397068023682, |
|
"rewards/margins": 3.1577281951904297, |
|
"rewards/rejected": -7.3771257400512695, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.31456432840515886, |
|
"grad_norm": 2.7894742488861084, |
|
"learning_rate": 4.331117411455425e-06, |
|
"logits/chosen": 1.3166215419769287, |
|
"logits/rejected": 1.3613556623458862, |
|
"logps/chosen": -435.62359619140625, |
|
"logps/rejected": -732.1107177734375, |
|
"loss": 0.173, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -4.024202823638916, |
|
"rewards/margins": 2.9517178535461426, |
|
"rewards/rejected": -6.9759202003479, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.31456432840515886, |
|
"eval_logits/chosen": 1.9776256084442139, |
|
"eval_logits/rejected": 1.8189852237701416, |
|
"eval_logps/chosen": -381.0118408203125, |
|
"eval_logps/rejected": -712.953125, |
|
"eval_loss": 0.11758408695459366, |
|
"eval_rewards/accuracies": 0.8649253845214844, |
|
"eval_rewards/chosen": -3.482407569885254, |
|
"eval_rewards/margins": 3.2881364822387695, |
|
"eval_rewards/rejected": -6.770544052124023, |
|
"eval_runtime": 214.5734, |
|
"eval_samples_per_second": 99.812, |
|
"eval_steps_per_second": 1.561, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.31770997168921045, |
|
"grad_norm": 2.152683734893799, |
|
"learning_rate": 4.312317494613642e-06, |
|
"logits/chosen": 1.2920253276824951, |
|
"logits/rejected": 1.169878602027893, |
|
"logps/chosen": -423.5650939941406, |
|
"logps/rejected": -764.8018798828125, |
|
"loss": 0.1572, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -3.894944429397583, |
|
"rewards/margins": 3.4041748046875, |
|
"rewards/rejected": -7.299118995666504, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.32085561497326204, |
|
"grad_norm": 2.5282208919525146, |
|
"learning_rate": 4.293299056384692e-06, |
|
"logits/chosen": 1.3552110195159912, |
|
"logits/rejected": 1.1271814107894897, |
|
"logps/chosen": -462.6878356933594, |
|
"logps/rejected": -754.1079711914062, |
|
"loss": 0.1873, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -4.283886909484863, |
|
"rewards/margins": 2.8990678787231445, |
|
"rewards/rejected": -7.18295431137085, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.32400125825731363, |
|
"grad_norm": 6.489924430847168, |
|
"learning_rate": 4.274064389929412e-06, |
|
"logits/chosen": 1.4782826900482178, |
|
"logits/rejected": 1.2579948902130127, |
|
"logps/chosen": -387.65777587890625, |
|
"logps/rejected": -723.7617797851562, |
|
"loss": 0.1682, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -3.5566749572753906, |
|
"rewards/margins": 3.3422751426696777, |
|
"rewards/rejected": -6.898950099945068, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.3271469015413652, |
|
"grad_norm": 3.615981340408325, |
|
"learning_rate": 4.254615814480501e-06, |
|
"logits/chosen": 1.3795908689498901, |
|
"logits/rejected": 1.2748284339904785, |
|
"logps/chosen": -461.3394470214844, |
|
"logps/rejected": -786.0704956054688, |
|
"loss": 0.1716, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -4.288393974304199, |
|
"rewards/margins": 3.223513126373291, |
|
"rewards/rejected": -7.51190710067749, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.3302925448254168, |
|
"grad_norm": 2.9559316635131836, |
|
"learning_rate": 4.234955675062881e-06, |
|
"logits/chosen": 1.324539303779602, |
|
"logits/rejected": 1.2243735790252686, |
|
"logps/chosen": -457.6017150878906, |
|
"logps/rejected": -793.406005859375, |
|
"loss": 0.1757, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -4.254838466644287, |
|
"rewards/margins": 3.335017681121826, |
|
"rewards/rejected": -7.5898566246032715, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.3334381881094684, |
|
"grad_norm": 2.599055051803589, |
|
"learning_rate": 4.215086342210932e-06, |
|
"logits/chosen": 1.4138332605361938, |
|
"logits/rejected": 1.2221533060073853, |
|
"logps/chosen": -416.51092529296875, |
|
"logps/rejected": -771.9613037109375, |
|
"loss": 0.1549, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -3.84822416305542, |
|
"rewards/margins": 3.504741668701172, |
|
"rewards/rejected": -7.352965354919434, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.33658383139352, |
|
"grad_norm": 2.722844362258911, |
|
"learning_rate": 4.19501021168268e-06, |
|
"logits/chosen": 1.339404821395874, |
|
"logits/rejected": 1.2510805130004883, |
|
"logps/chosen": -417.2064514160156, |
|
"logps/rejected": -769.0526123046875, |
|
"loss": 0.182, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.8489937782287598, |
|
"rewards/margins": 3.4992358684539795, |
|
"rewards/rejected": -7.348229885101318, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.3397294746775716, |
|
"grad_norm": 3.07171630859375, |
|
"learning_rate": 4.174729704170914e-06, |
|
"logits/chosen": 1.3331737518310547, |
|
"logits/rejected": 1.3105535507202148, |
|
"logps/chosen": -443.6220703125, |
|
"logps/rejected": -835.4161987304688, |
|
"loss": 0.1646, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -4.104954719543457, |
|
"rewards/margins": 3.9018218517303467, |
|
"rewards/rejected": -8.006775856018066, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.3428751179616232, |
|
"grad_norm": 3.2593963146209717, |
|
"learning_rate": 4.154247265011313e-06, |
|
"logits/chosen": 1.2752745151519775, |
|
"logits/rejected": 1.0605169534683228, |
|
"logps/chosen": -459.13067626953125, |
|
"logps/rejected": -868.1017456054688, |
|
"loss": 0.1369, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.2618088722229, |
|
"rewards/margins": 4.040172576904297, |
|
"rewards/rejected": -8.301980972290039, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.3460207612456747, |
|
"grad_norm": 2.6058199405670166, |
|
"learning_rate": 4.133565363887602e-06, |
|
"logits/chosen": 1.3846371173858643, |
|
"logits/rejected": 1.2146029472351074, |
|
"logps/chosen": -433.3853454589844, |
|
"logps/rejected": -802.4357299804688, |
|
"loss": 0.1494, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -4.011342525482178, |
|
"rewards/margins": 3.647768497467041, |
|
"rewards/rejected": -7.659111022949219, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.3460207612456747, |
|
"eval_logits/chosen": 1.9864978790283203, |
|
"eval_logits/rejected": 1.8179283142089844, |
|
"eval_logps/chosen": -412.18609619140625, |
|
"eval_logps/rejected": -781.1856689453125, |
|
"eval_loss": 0.09792975336313248, |
|
"eval_rewards/accuracies": 0.871268630027771, |
|
"eval_rewards/chosen": -3.7941508293151855, |
|
"eval_rewards/margins": 3.658719778060913, |
|
"eval_rewards/rejected": -7.452869892120361, |
|
"eval_runtime": 215.0997, |
|
"eval_samples_per_second": 99.568, |
|
"eval_steps_per_second": 1.557, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.3491664045297263, |
|
"grad_norm": 3.2654693126678467, |
|
"learning_rate": 4.112686494533762e-06, |
|
"logits/chosen": 1.497064232826233, |
|
"logits/rejected": 1.313239336013794, |
|
"logps/chosen": -445.63079833984375, |
|
"logps/rejected": -807.7423095703125, |
|
"loss": 0.1143, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -4.1313910484313965, |
|
"rewards/margins": 3.5983211994171143, |
|
"rewards/rejected": -7.729712009429932, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.3523120478137779, |
|
"grad_norm": 3.974165201187134, |
|
"learning_rate": 4.091613174433351e-06, |
|
"logits/chosen": 1.1193442344665527, |
|
"logits/rejected": 1.1738699674606323, |
|
"logps/chosen": -500.4566345214844, |
|
"logps/rejected": -867.6866455078125, |
|
"loss": 0.1575, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -4.670734405517578, |
|
"rewards/margins": 3.6689980030059814, |
|
"rewards/rejected": -8.33973217010498, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.3554576910978295, |
|
"grad_norm": 3.4348835945129395, |
|
"learning_rate": 4.070347944515955e-06, |
|
"logits/chosen": 1.054678201675415, |
|
"logits/rejected": 0.9517561197280884, |
|
"logps/chosen": -465.5830078125, |
|
"logps/rejected": -858.0778198242188, |
|
"loss": 0.1553, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.322167873382568, |
|
"rewards/margins": 3.9099960327148438, |
|
"rewards/rejected": -8.232163429260254, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.3586033343818811, |
|
"grad_norm": 3.4094433784484863, |
|
"learning_rate": 4.048893368850812e-06, |
|
"logits/chosen": 1.299513816833496, |
|
"logits/rejected": 1.1844545602798462, |
|
"logps/chosen": -450.3697204589844, |
|
"logps/rejected": -804.0281982421875, |
|
"loss": 0.1617, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -4.180676460266113, |
|
"rewards/margins": 3.5175251960754395, |
|
"rewards/rejected": -7.698201656341553, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.36174897766593267, |
|
"grad_norm": 3.1470396518707275, |
|
"learning_rate": 4.027252034337653e-06, |
|
"logits/chosen": 1.1882685422897339, |
|
"logits/rejected": 1.0305453538894653, |
|
"logps/chosen": -461.49322509765625, |
|
"logps/rejected": -830.1395263671875, |
|
"loss": 0.1468, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -4.267642021179199, |
|
"rewards/margins": 3.6701016426086426, |
|
"rewards/rejected": -7.937744140625, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.36489462094998426, |
|
"grad_norm": 2.149296998977661, |
|
"learning_rate": 4.005426550394777e-06, |
|
"logits/chosen": 1.0792266130447388, |
|
"logits/rejected": 1.1189695596694946, |
|
"logps/chosen": -465.95587158203125, |
|
"logps/rejected": -866.3806762695312, |
|
"loss": 0.1431, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -4.327679634094238, |
|
"rewards/margins": 4.006646633148193, |
|
"rewards/rejected": -8.334325790405273, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.36804026423403585, |
|
"grad_norm": 2.5954060554504395, |
|
"learning_rate": 3.983419548644427e-06, |
|
"logits/chosen": 1.2456753253936768, |
|
"logits/rejected": 1.0422345399856567, |
|
"logps/chosen": -433.5536193847656, |
|
"logps/rejected": -783.9739990234375, |
|
"loss": 0.1501, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -4.011656284332275, |
|
"rewards/margins": 3.4877192974090576, |
|
"rewards/rejected": -7.499375820159912, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.37118590751808744, |
|
"grad_norm": 2.630852460861206, |
|
"learning_rate": 3.961233682595474e-06, |
|
"logits/chosen": 1.4556474685668945, |
|
"logits/rejected": 1.3100624084472656, |
|
"logps/chosen": -450.36737060546875, |
|
"logps/rejected": -859.49658203125, |
|
"loss": 0.1678, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -4.17067813873291, |
|
"rewards/margins": 4.0663228034973145, |
|
"rewards/rejected": -8.237001419067383, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.37433155080213903, |
|
"grad_norm": 1.8810055255889893, |
|
"learning_rate": 3.93887162732347e-06, |
|
"logits/chosen": 1.1926202774047852, |
|
"logits/rejected": 1.159652829170227, |
|
"logps/chosen": -428.0986328125, |
|
"logps/rejected": -843.1412963867188, |
|
"loss": 0.1476, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -3.963353395462036, |
|
"rewards/margins": 4.120386123657227, |
|
"rewards/rejected": -8.083739280700684, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.3774771940861906, |
|
"grad_norm": 3.1499431133270264, |
|
"learning_rate": 3.916336079148102e-06, |
|
"logits/chosen": 1.6126463413238525, |
|
"logits/rejected": 1.4319192171096802, |
|
"logps/chosen": -460.66455078125, |
|
"logps/rejected": -844.0821533203125, |
|
"loss": 0.149, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -4.291562080383301, |
|
"rewards/margins": 3.7794106006622314, |
|
"rewards/rejected": -8.070972442626953, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.3774771940861906, |
|
"eval_logits/chosen": 2.0580649375915527, |
|
"eval_logits/rejected": 1.8714509010314941, |
|
"eval_logps/chosen": -451.3316345214844, |
|
"eval_logps/rejected": -860.935546875, |
|
"eval_loss": 0.08171828836202621, |
|
"eval_rewards/accuracies": 0.8843283653259277, |
|
"eval_rewards/chosen": -4.185605049133301, |
|
"eval_rewards/margins": 4.064764022827148, |
|
"eval_rewards/rejected": -8.250368118286133, |
|
"eval_runtime": 215.1027, |
|
"eval_samples_per_second": 99.566, |
|
"eval_steps_per_second": 1.557, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.3806228373702422, |
|
"grad_norm": 2.8907980918884277, |
|
"learning_rate": 3.893629755308078e-06, |
|
"logits/chosen": 1.040919303894043, |
|
"logits/rejected": 1.0302915573120117, |
|
"logps/chosen": -447.6219787597656, |
|
"logps/rejected": -785.0813598632812, |
|
"loss": 0.135, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -4.135832786560059, |
|
"rewards/margins": 3.3801684379577637, |
|
"rewards/rejected": -7.516000270843506, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.3837684806542938, |
|
"grad_norm": 3.385078191757202, |
|
"learning_rate": 3.870755393633495e-06, |
|
"logits/chosen": 1.291372299194336, |
|
"logits/rejected": 1.1527048349380493, |
|
"logps/chosen": -461.098388671875, |
|
"logps/rejected": -850.8663940429688, |
|
"loss": 0.1317, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -4.281379699707031, |
|
"rewards/margins": 3.8819034099578857, |
|
"rewards/rejected": -8.163283348083496, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.3869141239383454, |
|
"grad_norm": 2.8103888034820557, |
|
"learning_rate": 3.847715752215725e-06, |
|
"logits/chosen": 1.3119373321533203, |
|
"logits/rejected": 1.25798499584198, |
|
"logps/chosen": -471.1314392089844, |
|
"logps/rejected": -876.8768310546875, |
|
"loss": 0.1191, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -4.381020545959473, |
|
"rewards/margins": 4.039222240447998, |
|
"rewards/rejected": -8.420242309570312, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.390059767222397, |
|
"grad_norm": 3.6371328830718994, |
|
"learning_rate": 3.824513609074853e-06, |
|
"logits/chosen": 1.3880940675735474, |
|
"logits/rejected": 1.2948577404022217, |
|
"logps/chosen": -476.92022705078125, |
|
"logps/rejected": -872.46240234375, |
|
"loss": 0.131, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -4.443149089813232, |
|
"rewards/margins": 3.925715684890747, |
|
"rewards/rejected": -8.368864059448242, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.3932054105064486, |
|
"grad_norm": 2.8011200428009033, |
|
"learning_rate": 3.8011517618247208e-06, |
|
"logits/chosen": 1.4773666858673096, |
|
"logits/rejected": 1.3155330419540405, |
|
"logps/chosen": -496.269287109375, |
|
"logps/rejected": -857.8162841796875, |
|
"loss": 0.1128, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -4.630643367767334, |
|
"rewards/margins": 3.5995922088623047, |
|
"rewards/rejected": -8.230236053466797, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.3963510537905002, |
|
"grad_norm": 3.5925869941711426, |
|
"learning_rate": 3.777633027335594e-06, |
|
"logits/chosen": 1.2616320848464966, |
|
"logits/rejected": 1.1137524843215942, |
|
"logps/chosen": -487.0738220214844, |
|
"logps/rejected": -863.6187744140625, |
|
"loss": 0.1402, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -4.545956134796143, |
|
"rewards/margins": 3.731048107147217, |
|
"rewards/rejected": -8.277003288269043, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.39949669707455177, |
|
"grad_norm": 2.241239547729492, |
|
"learning_rate": 3.7539602413945264e-06, |
|
"logits/chosen": 1.1234735250473022, |
|
"logits/rejected": 0.9519070386886597, |
|
"logps/chosen": -431.80340576171875, |
|
"logps/rejected": -828.6780395507812, |
|
"loss": 0.108, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.9965758323669434, |
|
"rewards/margins": 3.9541754722595215, |
|
"rewards/rejected": -7.950751304626465, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.40264234035860336, |
|
"grad_norm": 3.996866464614868, |
|
"learning_rate": 3.7301362583634255e-06, |
|
"logits/chosen": 1.1398568153381348, |
|
"logits/rejected": 1.0180408954620361, |
|
"logps/chosen": -462.87225341796875, |
|
"logps/rejected": -890.3841552734375, |
|
"loss": 0.1281, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -4.300458908081055, |
|
"rewards/margins": 4.238921165466309, |
|
"rewards/rejected": -8.539380073547363, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.40578798364265495, |
|
"grad_norm": 2.932255744934082, |
|
"learning_rate": 3.7061639508348883e-06, |
|
"logits/chosen": 0.9352201223373413, |
|
"logits/rejected": 0.8519388437271118, |
|
"logps/chosen": -481.8533630371094, |
|
"logps/rejected": -956.1497802734375, |
|
"loss": 0.1091, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.485152721405029, |
|
"rewards/margins": 4.709257125854492, |
|
"rewards/rejected": -9.19441032409668, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.4089336269267065, |
|
"grad_norm": 4.746089935302734, |
|
"learning_rate": 3.6820462092858388e-06, |
|
"logits/chosen": 1.1464670896530151, |
|
"logits/rejected": 0.9618524312973022, |
|
"logps/chosen": -500.5083923339844, |
|
"logps/rejected": -943.3963012695312, |
|
"loss": 0.1143, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.674715995788574, |
|
"rewards/margins": 4.405007839202881, |
|
"rewards/rejected": -9.079724311828613, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.4089336269267065, |
|
"eval_logits/chosen": 1.9770227670669556, |
|
"eval_logits/rejected": 1.7765424251556396, |
|
"eval_logps/chosen": -457.2140808105469, |
|
"eval_logps/rejected": -897.443115234375, |
|
"eval_loss": 0.0702316164970398, |
|
"eval_rewards/accuracies": 0.8884328603744507, |
|
"eval_rewards/chosen": -4.2444305419921875, |
|
"eval_rewards/margins": 4.371013641357422, |
|
"eval_rewards/rejected": -8.615446090698242, |
|
"eval_runtime": 214.7353, |
|
"eval_samples_per_second": 99.737, |
|
"eval_steps_per_second": 1.56, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.4120792702107581, |
|
"grad_norm": 1.9351997375488281, |
|
"learning_rate": 3.6577859417290036e-06, |
|
"logits/chosen": 1.0681344270706177, |
|
"logits/rejected": 1.032234787940979, |
|
"logps/chosen": -490.95538330078125, |
|
"logps/rejected": -917.3497924804688, |
|
"loss": 0.1097, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.577587127685547, |
|
"rewards/margins": 4.240847587585449, |
|
"rewards/rejected": -8.818434715270996, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.41522491349480967, |
|
"grad_norm": 3.2459261417388916, |
|
"learning_rate": 3.633386073362275e-06, |
|
"logits/chosen": 1.3883543014526367, |
|
"logits/rejected": 1.1711828708648682, |
|
"logps/chosen": -445.54901123046875, |
|
"logps/rejected": -872.5389404296875, |
|
"loss": 0.1097, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.128580570220947, |
|
"rewards/margins": 4.237695693969727, |
|
"rewards/rejected": -8.366275787353516, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.41837055677886126, |
|
"grad_norm": 3.151752471923828, |
|
"learning_rate": 3.6088495462160108e-06, |
|
"logits/chosen": 1.3057953119277954, |
|
"logits/rejected": 1.2494308948516846, |
|
"logps/chosen": -468.8291931152344, |
|
"logps/rejected": -929.8936767578125, |
|
"loss": 0.1145, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.374839782714844, |
|
"rewards/margins": 4.57546854019165, |
|
"rewards/rejected": -8.950307846069336, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.42151620006291285, |
|
"grad_norm": 2.9136953353881836, |
|
"learning_rate": 3.584179318798287e-06, |
|
"logits/chosen": 1.4748225212097168, |
|
"logits/rejected": 1.2122485637664795, |
|
"logps/chosen": -491.70416259765625, |
|
"logps/rejected": -948.0320434570312, |
|
"loss": 0.1099, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -4.592446804046631, |
|
"rewards/margins": 4.535719871520996, |
|
"rewards/rejected": -9.128166198730469, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.42466184334696444, |
|
"grad_norm": 2.980229616165161, |
|
"learning_rate": 3.5593783657381832e-06, |
|
"logits/chosen": 1.3782281875610352, |
|
"logits/rejected": 1.2022063732147217, |
|
"logps/chosen": -457.11029052734375, |
|
"logps/rejected": -905.9327392578125, |
|
"loss": 0.1076, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -4.2324628829956055, |
|
"rewards/margins": 4.458249568939209, |
|
"rewards/rejected": -8.690712928771973, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.42780748663101603, |
|
"grad_norm": 2.3310577869415283, |
|
"learning_rate": 3.534449677427106e-06, |
|
"logits/chosen": 1.275436520576477, |
|
"logits/rejected": 1.0545412302017212, |
|
"logps/chosen": -419.954833984375, |
|
"logps/rejected": -886.1671752929688, |
|
"loss": 0.1152, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -3.8796966075897217, |
|
"rewards/margins": 4.618420124053955, |
|
"rewards/rejected": -8.498116493225098, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.4309531299150676, |
|
"grad_norm": 2.4203007221221924, |
|
"learning_rate": 3.5093962596582288e-06, |
|
"logits/chosen": 1.4109416007995605, |
|
"logits/rejected": 1.346010446548462, |
|
"logps/chosen": -479.3482360839844, |
|
"logps/rejected": -935.4396362304688, |
|
"loss": 0.1251, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -4.468480587005615, |
|
"rewards/margins": 4.542850971221924, |
|
"rewards/rejected": -9.011331558227539, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.4340987731991192, |
|
"grad_norm": 2.9505016803741455, |
|
"learning_rate": 3.4842211332640595e-06, |
|
"logits/chosen": 1.4074004888534546, |
|
"logits/rejected": 1.0855344533920288, |
|
"logps/chosen": -525.0509033203125, |
|
"logps/rejected": -1000.765625, |
|
"loss": 0.125, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -4.915558815002441, |
|
"rewards/margins": 4.711212158203125, |
|
"rewards/rejected": -9.626770973205566, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.4372444164831708, |
|
"grad_norm": 3.437758207321167, |
|
"learning_rate": 3.4589273337522055e-06, |
|
"logits/chosen": 1.7364375591278076, |
|
"logits/rejected": 1.437404990196228, |
|
"logps/chosen": -464.2837829589844, |
|
"logps/rejected": -936.4107666015625, |
|
"loss": 0.1151, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -4.3078765869140625, |
|
"rewards/margins": 4.697639465332031, |
|
"rewards/rejected": -9.005515098571777, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.4403900597672224, |
|
"grad_norm": 3.1491661071777344, |
|
"learning_rate": 3.433517910939364e-06, |
|
"logits/chosen": 1.6241645812988281, |
|
"logits/rejected": 1.4199696779251099, |
|
"logps/chosen": -478.7039489746094, |
|
"logps/rejected": -865.2511596679688, |
|
"loss": 0.1204, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -4.474257469177246, |
|
"rewards/margins": 3.8194992542266846, |
|
"rewards/rejected": -8.293757438659668, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.4403900597672224, |
|
"eval_logits/chosen": 2.373414993286133, |
|
"eval_logits/rejected": 2.199601411819458, |
|
"eval_logps/chosen": -447.18634033203125, |
|
"eval_logps/rejected": -897.015380859375, |
|
"eval_loss": 0.06420407444238663, |
|
"eval_rewards/accuracies": 0.8966417908668518, |
|
"eval_rewards/chosen": -4.144153118133545, |
|
"eval_rewards/margins": 4.467014789581299, |
|
"eval_rewards/rejected": -8.611167907714844, |
|
"eval_runtime": 214.6217, |
|
"eval_samples_per_second": 99.79, |
|
"eval_steps_per_second": 1.561, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.443535703051274, |
|
"grad_norm": 3.9951677322387695, |
|
"learning_rate": 3.4079959285835895e-06, |
|
"logits/chosen": 1.426998496055603, |
|
"logits/rejected": 1.4089171886444092, |
|
"logps/chosen": -486.6236267089844, |
|
"logps/rejected": -924.7525634765625, |
|
"loss": 0.116, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -4.531942844390869, |
|
"rewards/margins": 4.369163990020752, |
|
"rewards/rejected": -8.901106834411621, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.4466813463353256, |
|
"grad_norm": 2.6183512210845947, |
|
"learning_rate": 3.3823644640148767e-06, |
|
"logits/chosen": 1.2935250997543335, |
|
"logits/rejected": 1.0634428262710571, |
|
"logps/chosen": -477.60626220703125, |
|
"logps/rejected": -1014.9510498046875, |
|
"loss": 0.1088, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -4.449921607971191, |
|
"rewards/margins": 5.347407341003418, |
|
"rewards/rejected": -9.79732894897461, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.44982698961937717, |
|
"grad_norm": 4.819204330444336, |
|
"learning_rate": 3.356626607764113e-06, |
|
"logits/chosen": 1.4216114282608032, |
|
"logits/rejected": 1.3206603527069092, |
|
"logps/chosen": -439.0201110839844, |
|
"logps/rejected": -846.8821411132812, |
|
"loss": 0.1089, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -4.05989933013916, |
|
"rewards/margins": 4.05983829498291, |
|
"rewards/rejected": -8.11973762512207, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.45297263290342876, |
|
"grad_norm": 4.423831462860107, |
|
"learning_rate": 3.3307854631904315e-06, |
|
"logits/chosen": 1.1363990306854248, |
|
"logits/rejected": 1.0369417667388916, |
|
"logps/chosen": -541.7600708007812, |
|
"logps/rejected": -1006.2171020507812, |
|
"loss": 0.1362, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -5.079786777496338, |
|
"rewards/margins": 4.622787952423096, |
|
"rewards/rejected": -9.702574729919434, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.45611827618748035, |
|
"grad_norm": 1.959601640701294, |
|
"learning_rate": 3.3048441461070234e-06, |
|
"logits/chosen": 1.2691389322280884, |
|
"logits/rejected": 1.111587405204773, |
|
"logps/chosen": -476.6605529785156, |
|
"logps/rejected": -884.9557495117188, |
|
"loss": 0.1146, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -4.442279815673828, |
|
"rewards/margins": 4.060351848602295, |
|
"rewards/rejected": -8.502632141113281, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.45926391947153195, |
|
"grad_norm": 2.6528480052948, |
|
"learning_rate": 3.278805784405451e-06, |
|
"logits/chosen": 1.3994677066802979, |
|
"logits/rejected": 1.1729563474655151, |
|
"logps/chosen": -460.7957458496094, |
|
"logps/rejected": -922.0712890625, |
|
"loss": 0.1055, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -4.265277862548828, |
|
"rewards/margins": 4.591836452484131, |
|
"rewards/rejected": -8.857114791870117, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.46240956275558354, |
|
"grad_norm": 3.501101493835449, |
|
"learning_rate": 3.2526735176784897e-06, |
|
"logits/chosen": 1.8661353588104248, |
|
"logits/rejected": 1.6564010381698608, |
|
"logps/chosen": -528.0916748046875, |
|
"logps/rejected": -949.0935668945312, |
|
"loss": 0.1108, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.956791400909424, |
|
"rewards/margins": 4.165668964385986, |
|
"rewards/rejected": -9.12246036529541, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.4655552060396351, |
|
"grad_norm": 3.2202489376068115, |
|
"learning_rate": 3.2264504968415805e-06, |
|
"logits/chosen": 1.4454188346862793, |
|
"logits/rejected": 1.239365816116333, |
|
"logps/chosen": -503.5089416503906, |
|
"logps/rejected": -947.4742431640625, |
|
"loss": 0.1045, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.705296993255615, |
|
"rewards/margins": 4.416085720062256, |
|
"rewards/rejected": -9.121381759643555, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.4687008493236867, |
|
"grad_norm": 2.5105979442596436, |
|
"learning_rate": 3.2001398837529e-06, |
|
"logits/chosen": 1.4767264127731323, |
|
"logits/rejected": 1.294965147972107, |
|
"logps/chosen": -509.91497802734375, |
|
"logps/rejected": -995.5406494140625, |
|
"loss": 0.0922, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.786181449890137, |
|
"rewards/margins": 4.818673610687256, |
|
"rewards/rejected": -9.604853630065918, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.47184649260773825, |
|
"grad_norm": 3.5266494750976562, |
|
"learning_rate": 3.1737448508321176e-06, |
|
"logits/chosen": 1.5441019535064697, |
|
"logits/rejected": 1.2486565113067627, |
|
"logps/chosen": -518.9967041015625, |
|
"logps/rejected": -971.5304565429688, |
|
"loss": 0.1013, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -4.873408317565918, |
|
"rewards/margins": 4.476175785064697, |
|
"rewards/rejected": -9.349583625793457, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.47184649260773825, |
|
"eval_logits/chosen": 2.136415958404541, |
|
"eval_logits/rejected": 1.951367974281311, |
|
"eval_logps/chosen": -483.0837707519531, |
|
"eval_logps/rejected": -947.4904174804688, |
|
"eval_loss": 0.058037880808115005, |
|
"eval_rewards/accuracies": 0.8951492309570312, |
|
"eval_rewards/chosen": -4.503126621246338, |
|
"eval_rewards/margins": 4.612789630889893, |
|
"eval_rewards/rejected": -9.11591625213623, |
|
"eval_runtime": 214.8935, |
|
"eval_samples_per_second": 99.663, |
|
"eval_steps_per_second": 1.559, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.47499213589178985, |
|
"grad_norm": 2.5726158618927, |
|
"learning_rate": 3.1472685806778837e-06, |
|
"logits/chosen": 1.462704062461853, |
|
"logits/rejected": 1.1601091623306274, |
|
"logps/chosen": -491.4873046875, |
|
"logps/rejected": -966.7628784179688, |
|
"loss": 0.1067, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -4.586657524108887, |
|
"rewards/margins": 4.717507362365723, |
|
"rewards/rejected": -9.30416488647461, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.47813777917584144, |
|
"grad_norm": 1.8397867679595947, |
|
"learning_rate": 3.1207142656840782e-06, |
|
"logits/chosen": 1.4628472328186035, |
|
"logits/rejected": 1.2875310182571411, |
|
"logps/chosen": -484.8026428222656, |
|
"logps/rejected": -923.8834228515625, |
|
"loss": 0.0886, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -4.506746292114258, |
|
"rewards/margins": 4.361083984375, |
|
"rewards/rejected": -8.867830276489258, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.48128342245989303, |
|
"grad_norm": 3.018728494644165, |
|
"learning_rate": 3.094085107654891e-06, |
|
"logits/chosen": 1.3588612079620361, |
|
"logits/rejected": 1.0993680953979492, |
|
"logps/chosen": -544.7242431640625, |
|
"logps/rejected": -1018.6455078125, |
|
"loss": 0.1048, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -5.132107734680176, |
|
"rewards/margins": 4.696852684020996, |
|
"rewards/rejected": -9.828960418701172, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.4844290657439446, |
|
"grad_norm": 2.7776284217834473, |
|
"learning_rate": 3.067384317418761e-06, |
|
"logits/chosen": 1.5641913414001465, |
|
"logits/rejected": 1.4478683471679688, |
|
"logps/chosen": -493.83746337890625, |
|
"logps/rejected": -903.9118041992188, |
|
"loss": 0.116, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -4.606075286865234, |
|
"rewards/margins": 4.075374126434326, |
|
"rewards/rejected": -8.681449890136719, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.4875747090279962, |
|
"grad_norm": 4.471944808959961, |
|
"learning_rate": 3.0406151144412277e-06, |
|
"logits/chosen": 1.447131633758545, |
|
"logits/rejected": 1.3758630752563477, |
|
"logps/chosen": -481.29736328125, |
|
"logps/rejected": -913.4244384765625, |
|
"loss": 0.0996, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.4858551025390625, |
|
"rewards/margins": 4.2920660972595215, |
|
"rewards/rejected": -8.777920722961426, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.4907203523120478, |
|
"grad_norm": 1.814293622970581, |
|
"learning_rate": 3.013780726436743e-06, |
|
"logits/chosen": 1.3209139108657837, |
|
"logits/rejected": 1.036739468574524, |
|
"logps/chosen": -497.64080810546875, |
|
"logps/rejected": -983.0264892578125, |
|
"loss": 0.0891, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -4.66217565536499, |
|
"rewards/margins": 4.808836936950684, |
|
"rewards/rejected": -9.471014022827148, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.4938659955960994, |
|
"grad_norm": 2.357700824737549, |
|
"learning_rate": 2.9868843889794867e-06, |
|
"logits/chosen": 1.507373571395874, |
|
"logits/rejected": 1.33405339717865, |
|
"logps/chosen": -556.9375610351562, |
|
"logps/rejected": -1055.0340576171875, |
|
"loss": 0.0958, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -5.234933376312256, |
|
"rewards/margins": 4.970101833343506, |
|
"rewards/rejected": -10.205035209655762, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.497011638880151, |
|
"grad_norm": 2.6845498085021973, |
|
"learning_rate": 2.9599293451132338e-06, |
|
"logits/chosen": 1.3129870891571045, |
|
"logits/rejected": 1.0617408752441406, |
|
"logps/chosen": -516.5895385742188, |
|
"logps/rejected": -1013.9332885742188, |
|
"loss": 0.0764, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -4.83015775680542, |
|
"rewards/margins": 4.950501441955566, |
|
"rewards/rejected": -9.780659675598145, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.5001572821642026, |
|
"grad_norm": 3.2871477603912354, |
|
"learning_rate": 2.9329188449603245e-06, |
|
"logits/chosen": 1.3259484767913818, |
|
"logits/rejected": 1.3023512363433838, |
|
"logps/chosen": -524.0896606445312, |
|
"logps/rejected": -990.5101318359375, |
|
"loss": 0.0905, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -4.907674312591553, |
|
"rewards/margins": 4.64675760269165, |
|
"rewards/rejected": -9.55443286895752, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.5033029254482542, |
|
"grad_norm": 3.381878137588501, |
|
"learning_rate": 2.9058561453297783e-06, |
|
"logits/chosen": 1.392225980758667, |
|
"logits/rejected": 1.2445684671401978, |
|
"logps/chosen": -441.95501708984375, |
|
"logps/rejected": -929.5980224609375, |
|
"loss": 0.1011, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -4.106937408447266, |
|
"rewards/margins": 4.853115081787109, |
|
"rewards/rejected": -8.960052490234375, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.5033029254482542, |
|
"eval_logits/chosen": 2.1102631092071533, |
|
"eval_logits/rejected": 1.9239301681518555, |
|
"eval_logps/chosen": -436.5010681152344, |
|
"eval_logps/rejected": -893.6846313476562, |
|
"eval_loss": 0.05671687051653862, |
|
"eval_rewards/accuracies": 0.9067164063453674, |
|
"eval_rewards/chosen": -4.0373005867004395, |
|
"eval_rewards/margins": 4.540557861328125, |
|
"eval_rewards/rejected": -8.577857971191406, |
|
"eval_runtime": 215.1071, |
|
"eval_samples_per_second": 99.564, |
|
"eval_steps_per_second": 1.557, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.5064485687323057, |
|
"grad_norm": 3.025057315826416, |
|
"learning_rate": 2.8787445093246004e-06, |
|
"logits/chosen": 1.3364161252975464, |
|
"logits/rejected": 1.176685094833374, |
|
"logps/chosen": -451.01513671875, |
|
"logps/rejected": -895.0511474609375, |
|
"loss": 0.105, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -4.202619552612305, |
|
"rewards/margins": 4.399237155914307, |
|
"rewards/rejected": -8.60185718536377, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.5095942120163574, |
|
"grad_norm": 3.637864112854004, |
|
"learning_rate": 2.8515872059483326e-06, |
|
"logits/chosen": 1.2735852003097534, |
|
"logits/rejected": 1.112821340560913, |
|
"logps/chosen": -489.1302795410156, |
|
"logps/rejected": -982.361328125, |
|
"loss": 0.1066, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.53579568862915, |
|
"rewards/margins": 4.906703948974609, |
|
"rewards/rejected": -9.442499160766602, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.5127398553004089, |
|
"grad_norm": 1.9579370021820068, |
|
"learning_rate": 2.8243875097108897e-06, |
|
"logits/chosen": 1.3890975713729858, |
|
"logits/rejected": 1.2870466709136963, |
|
"logps/chosen": -498.91650390625, |
|
"logps/rejected": -1006.8635864257812, |
|
"loss": 0.0956, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.648484230041504, |
|
"rewards/margins": 5.069706916809082, |
|
"rewards/rejected": -9.718191146850586, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.5158854985844605, |
|
"grad_norm": 2.1294662952423096, |
|
"learning_rate": 2.7971487002337344e-06, |
|
"logits/chosen": 1.551054835319519, |
|
"logits/rejected": 1.4010810852050781, |
|
"logps/chosen": -553.7382202148438, |
|
"logps/rejected": -1012.7103271484375, |
|
"loss": 0.0835, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -5.215034008026123, |
|
"rewards/margins": 4.574709415435791, |
|
"rewards/rejected": -9.78974437713623, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.5190311418685121, |
|
"grad_norm": 2.6101462841033936, |
|
"learning_rate": 2.769874061854434e-06, |
|
"logits/chosen": 1.5008209943771362, |
|
"logits/rejected": 1.2978509664535522, |
|
"logps/chosen": -475.5283203125, |
|
"logps/rejected": -943.5218505859375, |
|
"loss": 0.0822, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -4.424406051635742, |
|
"rewards/margins": 4.653243064880371, |
|
"rewards/rejected": -9.07765007019043, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.5221767851525637, |
|
"grad_norm": 3.2625982761383057, |
|
"learning_rate": 2.74256688323065e-06, |
|
"logits/chosen": 1.244457483291626, |
|
"logits/rejected": 1.1406192779541016, |
|
"logps/chosen": -540.4188232421875, |
|
"logps/rejected": -989.83154296875, |
|
"loss": 0.0979, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -5.072657108306885, |
|
"rewards/margins": 4.476801872253418, |
|
"rewards/rejected": -9.549459457397461, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.5253224284366153, |
|
"grad_norm": 2.4055895805358887, |
|
"learning_rate": 2.7152304569436055e-06, |
|
"logits/chosen": 1.3721317052841187, |
|
"logits/rejected": 1.1766210794448853, |
|
"logps/chosen": -486.52117919921875, |
|
"logps/rejected": -998.5260009765625, |
|
"loss": 0.0737, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -4.543233394622803, |
|
"rewards/margins": 5.094179630279541, |
|
"rewards/rejected": -9.637413024902344, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.5284680717206669, |
|
"grad_norm": 4.099717140197754, |
|
"learning_rate": 2.6878680791010786e-06, |
|
"logits/chosen": 1.48918879032135, |
|
"logits/rejected": 1.4536645412445068, |
|
"logps/chosen": -565.0792236328125, |
|
"logps/rejected": -1010.3558349609375, |
|
"loss": 0.0941, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -5.319018840789795, |
|
"rewards/margins": 4.435922145843506, |
|
"rewards/rejected": -9.7549409866333, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.5316137150047184, |
|
"grad_norm": 4.480926513671875, |
|
"learning_rate": 2.6604830489399763e-06, |
|
"logits/chosen": 1.392275333404541, |
|
"logits/rejected": 1.226122260093689, |
|
"logps/chosen": -508.6419982910156, |
|
"logps/rejected": -1001.2962646484375, |
|
"loss": 0.1079, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -4.7513346672058105, |
|
"rewards/margins": 4.9047369956970215, |
|
"rewards/rejected": -9.656072616577148, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.5347593582887701, |
|
"grad_norm": 3.0424184799194336, |
|
"learning_rate": 2.6330786684285203e-06, |
|
"logits/chosen": 1.5600693225860596, |
|
"logits/rejected": 1.2706520557403564, |
|
"logps/chosen": -478.228515625, |
|
"logps/rejected": -1058.522705078125, |
|
"loss": 0.0853, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.449268341064453, |
|
"rewards/margins": 5.78544282913208, |
|
"rewards/rejected": -10.234711647033691, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.5347593582887701, |
|
"eval_logits/chosen": 2.233569383621216, |
|
"eval_logits/rejected": 2.0647757053375244, |
|
"eval_logps/chosen": -463.9637145996094, |
|
"eval_logps/rejected": -965.1707763671875, |
|
"eval_loss": 0.04823598265647888, |
|
"eval_rewards/accuracies": 0.9067164063453674, |
|
"eval_rewards/chosen": -4.31192684173584, |
|
"eval_rewards/margins": 4.9807939529418945, |
|
"eval_rewards/rejected": -9.292719841003418, |
|
"eval_runtime": 214.9777, |
|
"eval_samples_per_second": 99.624, |
|
"eval_steps_per_second": 1.558, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.5379050015728216, |
|
"grad_norm": 2.249830484390259, |
|
"learning_rate": 2.6056582418681164e-06, |
|
"logits/chosen": 1.4642279148101807, |
|
"logits/rejected": 1.204160451889038, |
|
"logps/chosen": -478.66937255859375, |
|
"logps/rejected": -993.77392578125, |
|
"loss": 0.0807, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -4.444479465484619, |
|
"rewards/margins": 5.1373610496521, |
|
"rewards/rejected": -9.581840515136719, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.5410506448568733, |
|
"grad_norm": 3.891451358795166, |
|
"learning_rate": 2.5782250754949334e-06, |
|
"logits/chosen": 1.3919012546539307, |
|
"logits/rejected": 1.2075676918029785, |
|
"logps/chosen": -528.3414306640625, |
|
"logps/rejected": -1053.069091796875, |
|
"loss": 0.0831, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.955795764923096, |
|
"rewards/margins": 5.23477029800415, |
|
"rewards/rejected": -10.19056510925293, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.5441962881409248, |
|
"grad_norm": 2.48587965965271, |
|
"learning_rate": 2.55078247708125e-06, |
|
"logits/chosen": 1.679917573928833, |
|
"logits/rejected": 1.483515977859497, |
|
"logps/chosen": -483.80340576171875, |
|
"logps/rejected": -970.2359619140625, |
|
"loss": 0.0823, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -4.530341148376465, |
|
"rewards/margins": 4.841682434082031, |
|
"rewards/rejected": -9.37202262878418, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.5473419314249764, |
|
"grad_norm": 4.296637058258057, |
|
"learning_rate": 2.5233337555366206e-06, |
|
"logits/chosen": 1.3564870357513428, |
|
"logits/rejected": 1.3211233615875244, |
|
"logps/chosen": -497.56915283203125, |
|
"logps/rejected": -997.6647338867188, |
|
"loss": 0.1098, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -4.646389007568359, |
|
"rewards/margins": 4.965304851531982, |
|
"rewards/rejected": -9.6116943359375, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.550487574709028, |
|
"grad_norm": 3.6422853469848633, |
|
"learning_rate": 2.4958822205089e-06, |
|
"logits/chosen": 1.2528626918792725, |
|
"logits/rejected": 1.1883177757263184, |
|
"logps/chosen": -489.92425537109375, |
|
"logps/rejected": -988.5632934570312, |
|
"loss": 0.0915, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.553713798522949, |
|
"rewards/margins": 4.979607582092285, |
|
"rewards/rejected": -9.533323287963867, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.5536332179930796, |
|
"grad_norm": 4.168347358703613, |
|
"learning_rate": 2.468431181985179e-06, |
|
"logits/chosen": 1.3860187530517578, |
|
"logits/rejected": 1.1525763273239136, |
|
"logps/chosen": -503.0482482910156, |
|
"logps/rejected": -1023.8341674804688, |
|
"loss": 0.0837, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -4.700275421142578, |
|
"rewards/margins": 5.1787824630737305, |
|
"rewards/rejected": -9.879056930541992, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.5567788612771312, |
|
"grad_norm": 2.398576498031616, |
|
"learning_rate": 2.4409839498926848e-06, |
|
"logits/chosen": 1.508630394935608, |
|
"logits/rejected": 1.2709838151931763, |
|
"logps/chosen": -478.9554138183594, |
|
"logps/rejected": -994.9898681640625, |
|
"loss": 0.1004, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -4.457327842712402, |
|
"rewards/margins": 5.13339900970459, |
|
"rewards/rejected": -9.590726852416992, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.5599245045611828, |
|
"grad_norm": 1.8458563089370728, |
|
"learning_rate": 2.41354383369968e-06, |
|
"logits/chosen": 1.2807908058166504, |
|
"logits/rejected": 1.176458716392517, |
|
"logps/chosen": -518.6404418945312, |
|
"logps/rejected": -1035.277587890625, |
|
"loss": 0.0897, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -4.8516459465026855, |
|
"rewards/margins": 5.121449947357178, |
|
"rewards/rejected": -9.973095893859863, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.5630701478452343, |
|
"grad_norm": 2.2971677780151367, |
|
"learning_rate": 2.3861141420164246e-06, |
|
"logits/chosen": 1.4796741008758545, |
|
"logits/rejected": 1.2651772499084473, |
|
"logps/chosen": -477.72747802734375, |
|
"logps/rejected": -1049.280029296875, |
|
"loss": 0.0764, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.469423294067383, |
|
"rewards/margins": 5.668953895568848, |
|
"rewards/rejected": -10.13837718963623, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.5662157911292859, |
|
"grad_norm": 2.9305918216705322, |
|
"learning_rate": 2.3586981821962325e-06, |
|
"logits/chosen": 1.2771943807601929, |
|
"logits/rejected": 1.146514654159546, |
|
"logps/chosen": -525.7647705078125, |
|
"logps/rejected": -1003.0315551757812, |
|
"loss": 0.0897, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -4.923129558563232, |
|
"rewards/margins": 4.7543439865112305, |
|
"rewards/rejected": -9.677474975585938, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.5662157911292859, |
|
"eval_logits/chosen": 2.0822219848632812, |
|
"eval_logits/rejected": 1.9037203788757324, |
|
"eval_logps/chosen": -462.9552307128906, |
|
"eval_logps/rejected": -978.6490478515625, |
|
"eval_loss": 0.04494684934616089, |
|
"eval_rewards/accuracies": 0.9100746512413025, |
|
"eval_rewards/chosen": -4.301841735839844, |
|
"eval_rewards/margins": 5.1256632804870605, |
|
"eval_rewards/rejected": -9.427504539489746, |
|
"eval_runtime": 215.1799, |
|
"eval_samples_per_second": 99.531, |
|
"eval_steps_per_second": 1.557, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.5693614344133375, |
|
"grad_norm": 3.8161098957061768, |
|
"learning_rate": 2.3312992599366922e-06, |
|
"logits/chosen": 1.2443963289260864, |
|
"logits/rejected": 1.1431701183319092, |
|
"logps/chosen": -506.3291015625, |
|
"logps/rejected": -968.5440673828125, |
|
"loss": 0.0944, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -4.7178635597229, |
|
"rewards/margins": 4.596076011657715, |
|
"rewards/rejected": -9.313939094543457, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.5725070776973891, |
|
"grad_norm": 2.8469791412353516, |
|
"learning_rate": 2.3039206788810772e-06, |
|
"logits/chosen": 1.3020217418670654, |
|
"logits/rejected": 1.1260459423065186, |
|
"logps/chosen": -498.827392578125, |
|
"logps/rejected": -1060.462158203125, |
|
"loss": 0.0784, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -4.652395248413086, |
|
"rewards/margins": 5.58463716506958, |
|
"rewards/rejected": -10.237032890319824, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.5756527209814407, |
|
"grad_norm": 2.866737127304077, |
|
"learning_rate": 2.276565740220006e-06, |
|
"logits/chosen": 1.2295194864273071, |
|
"logits/rejected": 1.1037569046020508, |
|
"logps/chosen": -521.6785888671875, |
|
"logps/rejected": -1030.312255859375, |
|
"loss": 0.0949, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -4.875964641571045, |
|
"rewards/margins": 5.085350036621094, |
|
"rewards/rejected": -9.96131420135498, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.5787983642654922, |
|
"grad_norm": 3.850404739379883, |
|
"learning_rate": 2.249237742293399e-06, |
|
"logits/chosen": 1.5953633785247803, |
|
"logits/rejected": 1.3363173007965088, |
|
"logps/chosen": -480.15625, |
|
"logps/rejected": -995.4392700195312, |
|
"loss": 0.0825, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -4.456434726715088, |
|
"rewards/margins": 5.125838756561279, |
|
"rewards/rejected": -9.582273483276367, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.5819440075495439, |
|
"grad_norm": 3.528315544128418, |
|
"learning_rate": 2.2219399801927818e-06, |
|
"logits/chosen": 1.3022558689117432, |
|
"logits/rejected": 1.3659191131591797, |
|
"logps/chosen": -491.4390563964844, |
|
"logps/rejected": -1003.6394653320312, |
|
"loss": 0.0754, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -4.590095043182373, |
|
"rewards/margins": 5.104881286621094, |
|
"rewards/rejected": -9.694976806640625, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.5850896508335954, |
|
"grad_norm": 1.6058087348937988, |
|
"learning_rate": 2.194675745363971e-06, |
|
"logits/chosen": 1.203044056892395, |
|
"logits/rejected": 1.063595175743103, |
|
"logps/chosen": -501.7749938964844, |
|
"logps/rejected": -1017.3069458007812, |
|
"loss": 0.0582, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -4.695984840393066, |
|
"rewards/margins": 5.130289077758789, |
|
"rewards/rejected": -9.826274871826172, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.5882352941176471, |
|
"grad_norm": 3.549255847930908, |
|
"learning_rate": 2.167448325210214e-06, |
|
"logits/chosen": 1.3493187427520752, |
|
"logits/rejected": 1.2300829887390137, |
|
"logps/chosen": -551.4163208007812, |
|
"logps/rejected": -1054.571533203125, |
|
"loss": 0.0885, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -5.191308498382568, |
|
"rewards/margins": 5.013763904571533, |
|
"rewards/rejected": -10.205072402954102, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.5913809374016986, |
|
"grad_norm": 3.082181930541992, |
|
"learning_rate": 2.140261002695804e-06, |
|
"logits/chosen": 1.4283784627914429, |
|
"logits/rejected": 1.312314748764038, |
|
"logps/chosen": -510.80908203125, |
|
"logps/rejected": -1003.2232666015625, |
|
"loss": 0.0661, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -4.792733669281006, |
|
"rewards/margins": 4.895735740661621, |
|
"rewards/rejected": -9.688468933105469, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.5945265806857503, |
|
"grad_norm": 3.06274676322937, |
|
"learning_rate": 2.1131170559502328e-06, |
|
"logits/chosen": 1.4798619747161865, |
|
"logits/rejected": 1.2996398210525513, |
|
"logps/chosen": -499.54010009765625, |
|
"logps/rejected": -1004.2786865234375, |
|
"loss": 0.0885, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -4.648800373077393, |
|
"rewards/margins": 5.034187316894531, |
|
"rewards/rejected": -9.682988166809082, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.5976722239698018, |
|
"grad_norm": 2.7769219875335693, |
|
"learning_rate": 2.0860197578729306e-06, |
|
"logits/chosen": 1.4442288875579834, |
|
"logits/rejected": 1.2996704578399658, |
|
"logps/chosen": -501.84149169921875, |
|
"logps/rejected": -1037.2816162109375, |
|
"loss": 0.0717, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.711190700531006, |
|
"rewards/margins": 5.312428951263428, |
|
"rewards/rejected": -10.023618698120117, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.5976722239698018, |
|
"eval_logits/chosen": 2.174854278564453, |
|
"eval_logits/rejected": 2.000274181365967, |
|
"eval_logps/chosen": -476.6779479980469, |
|
"eval_logps/rejected": -1019.844482421875, |
|
"eval_loss": 0.040205687284469604, |
|
"eval_rewards/accuracies": 0.9111940264701843, |
|
"eval_rewards/chosen": -4.4390692710876465, |
|
"eval_rewards/margins": 5.400389194488525, |
|
"eval_rewards/rejected": -9.839457511901855, |
|
"eval_runtime": 215.3019, |
|
"eval_samples_per_second": 99.474, |
|
"eval_steps_per_second": 1.556, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.6008178672538534, |
|
"grad_norm": 2.0829498767852783, |
|
"learning_rate": 2.058972375738635e-06, |
|
"logits/chosen": 1.4589773416519165, |
|
"logits/rejected": 1.2779289484024048, |
|
"logps/chosen": -474.3269958496094, |
|
"logps/rejected": -990.4747924804688, |
|
"loss": 0.0856, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -4.426661491394043, |
|
"rewards/margins": 5.121007442474365, |
|
"rewards/rejected": -9.547670364379883, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.603963510537905, |
|
"grad_norm": 2.426499605178833, |
|
"learning_rate": 2.031978170803433e-06, |
|
"logits/chosen": 1.4309927225112915, |
|
"logits/rejected": 1.1588290929794312, |
|
"logps/chosen": -473.109130859375, |
|
"logps/rejected": -1036.1728515625, |
|
"loss": 0.0589, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -4.395280361175537, |
|
"rewards/margins": 5.615933418273926, |
|
"rewards/rejected": -10.011213302612305, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.6071091538219566, |
|
"grad_norm": 4.0052947998046875, |
|
"learning_rate": 2.0050403979115372e-06, |
|
"logits/chosen": 1.4139277935028076, |
|
"logits/rejected": 1.32688307762146, |
|
"logps/chosen": -508.6914978027344, |
|
"logps/rejected": -1031.7740478515625, |
|
"loss": 0.0887, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.769944667816162, |
|
"rewards/margins": 5.1961283683776855, |
|
"rewards/rejected": -9.966073036193848, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.6102547971060082, |
|
"grad_norm": 2.5407373905181885, |
|
"learning_rate": 1.978162305102828e-06, |
|
"logits/chosen": 1.255937099456787, |
|
"logits/rejected": 1.1275227069854736, |
|
"logps/chosen": -508.431884765625, |
|
"logps/rejected": -1019.8643798828125, |
|
"loss": 0.0771, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -4.759140491485596, |
|
"rewards/margins": 5.087557792663574, |
|
"rewards/rejected": -9.846698760986328, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.6134004403900598, |
|
"grad_norm": 1.74916410446167, |
|
"learning_rate": 1.9513471332212218e-06, |
|
"logits/chosen": 1.3682215213775635, |
|
"logits/rejected": 1.1232274770736694, |
|
"logps/chosen": -526.3414306640625, |
|
"logps/rejected": -1046.4849853515625, |
|
"loss": 0.0651, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -4.9356513023376465, |
|
"rewards/margins": 5.17080020904541, |
|
"rewards/rejected": -10.106451988220215, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.6165460836741113, |
|
"grad_norm": 3.125661611557007, |
|
"learning_rate": 1.9245981155239003e-06, |
|
"logits/chosen": 1.0954793691635132, |
|
"logits/rejected": 0.8896375894546509, |
|
"logps/chosen": -468.3290100097656, |
|
"logps/rejected": -1034.557861328125, |
|
"loss": 0.0803, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -4.36580753326416, |
|
"rewards/margins": 5.616608619689941, |
|
"rewards/rejected": -9.982416152954102, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.619691726958163, |
|
"grad_norm": 1.5080751180648804, |
|
"learning_rate": 1.8979184772914626e-06, |
|
"logits/chosen": 1.0934078693389893, |
|
"logits/rejected": 1.0470936298370361, |
|
"logps/chosen": -469.5868225097656, |
|
"logps/rejected": -1005.32763671875, |
|
"loss": 0.0781, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.377876281738281, |
|
"rewards/margins": 5.329991340637207, |
|
"rewards/rejected": -9.707868576049805, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.6228373702422145, |
|
"grad_norm": 2.835935592651367, |
|
"learning_rate": 1.8713114354390302e-06, |
|
"logits/chosen": 1.0925285816192627, |
|
"logits/rejected": 0.8882206678390503, |
|
"logps/chosen": -486.85260009765625, |
|
"logps/rejected": -1009.4669799804688, |
|
"loss": 0.0776, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.53892183303833, |
|
"rewards/margins": 5.20424222946167, |
|
"rewards/rejected": -9.7431640625, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.6259830135262662, |
|
"grad_norm": 3.386470317840576, |
|
"learning_rate": 1.8447801981283692e-06, |
|
"logits/chosen": 1.1058335304260254, |
|
"logits/rejected": 0.9235653877258301, |
|
"logps/chosen": -508.955810546875, |
|
"logps/rejected": -1028.66162109375, |
|
"loss": 0.0806, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.748799800872803, |
|
"rewards/margins": 5.186470031738281, |
|
"rewards/rejected": -9.935269355773926, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.6291286568103177, |
|
"grad_norm": 2.4048423767089844, |
|
"learning_rate": 1.818327964381063e-06, |
|
"logits/chosen": 1.3770169019699097, |
|
"logits/rejected": 1.08171808719635, |
|
"logps/chosen": -558.8704833984375, |
|
"logps/rejected": -1182.05517578125, |
|
"loss": 0.0487, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -5.268151760101318, |
|
"rewards/margins": 6.191213607788086, |
|
"rewards/rejected": -11.459364891052246, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.6291286568103177, |
|
"eval_logits/chosen": 2.1422410011291504, |
|
"eval_logits/rejected": 1.9354875087738037, |
|
"eval_logps/chosen": -580.0486450195312, |
|
"eval_logps/rejected": -1167.69677734375, |
|
"eval_loss": 0.03678546100854874, |
|
"eval_rewards/accuracies": 0.907835841178894, |
|
"eval_rewards/chosen": -5.472775936126709, |
|
"eval_rewards/margins": 5.845205307006836, |
|
"eval_rewards/rejected": -11.317980766296387, |
|
"eval_runtime": 215.2983, |
|
"eval_samples_per_second": 99.476, |
|
"eval_steps_per_second": 1.556, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.6322743000943692, |
|
"grad_norm": 4.648438453674316, |
|
"learning_rate": 1.7919579236927873e-06, |
|
"logits/chosen": 1.4500977993011475, |
|
"logits/rejected": 1.4014136791229248, |
|
"logps/chosen": -587.9722290039062, |
|
"logps/rejected": -1136.3968505859375, |
|
"loss": 0.0804, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -5.550187110900879, |
|
"rewards/margins": 5.465216636657715, |
|
"rewards/rejected": -11.015403747558594, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.6354199433784209, |
|
"grad_norm": 3.089294672012329, |
|
"learning_rate": 1.7656732556487349e-06, |
|
"logits/chosen": 1.5375685691833496, |
|
"logits/rejected": 1.2935359477996826, |
|
"logps/chosen": -569.8836059570312, |
|
"logps/rejected": -1086.203125, |
|
"loss": 0.0853, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -5.359100818634033, |
|
"rewards/margins": 5.148070335388184, |
|
"rewards/rejected": -10.507171630859375, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.6385655866624724, |
|
"grad_norm": 1.5491887331008911, |
|
"learning_rate": 1.7394771295402357e-06, |
|
"logits/chosen": 1.234811544418335, |
|
"logits/rejected": 1.1473228931427002, |
|
"logps/chosen": -460.8042907714844, |
|
"logps/rejected": -970.8651123046875, |
|
"loss": 0.0637, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.264889717102051, |
|
"rewards/margins": 5.0752058029174805, |
|
"rewards/rejected": -9.340095520019531, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.6417112299465241, |
|
"grad_norm": 1.6754862070083618, |
|
"learning_rate": 1.713372703982616e-06, |
|
"logits/chosen": 1.405088186264038, |
|
"logits/rejected": 1.1589716672897339, |
|
"logps/chosen": -482.781494140625, |
|
"logps/rejected": -1041.592041015625, |
|
"loss": 0.0658, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.50002384185791, |
|
"rewards/margins": 5.562270164489746, |
|
"rewards/rejected": -10.06229305267334, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.6448568732305756, |
|
"grad_norm": 2.908865451812744, |
|
"learning_rate": 1.6873631265343482e-06, |
|
"logits/chosen": 1.2295814752578735, |
|
"logits/rejected": 1.182570219039917, |
|
"logps/chosen": -506.02899169921875, |
|
"logps/rejected": -1007.8961181640625, |
|
"loss": 0.0886, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.720425605773926, |
|
"rewards/margins": 5.0006914138793945, |
|
"rewards/rejected": -9.72111701965332, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.6480025165146273, |
|
"grad_norm": 2.9250617027282715, |
|
"learning_rate": 1.6614515333175301e-06, |
|
"logits/chosen": 1.2245099544525146, |
|
"logits/rejected": 1.1675890684127808, |
|
"logps/chosen": -485.50726318359375, |
|
"logps/rejected": -1001.1533203125, |
|
"loss": 0.0791, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.507925033569336, |
|
"rewards/margins": 5.16204309463501, |
|
"rewards/rejected": -9.669966697692871, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.6511481597986788, |
|
"grad_norm": 1.8200067281723022, |
|
"learning_rate": 1.6356410486397465e-06, |
|
"logits/chosen": 1.4917027950286865, |
|
"logits/rejected": 1.1800551414489746, |
|
"logps/chosen": -441.74755859375, |
|
"logps/rejected": -1085.5457763671875, |
|
"loss": 0.0871, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -4.092404365539551, |
|
"rewards/margins": 6.399931907653809, |
|
"rewards/rejected": -10.492338180541992, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.6542938030827304, |
|
"grad_norm": 3.119096517562866, |
|
"learning_rate": 1.6099347846173515e-06, |
|
"logits/chosen": 1.573908805847168, |
|
"logits/rejected": 1.341700792312622, |
|
"logps/chosen": -462.58782958984375, |
|
"logps/rejected": -1023.86328125, |
|
"loss": 0.0774, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -4.3063201904296875, |
|
"rewards/margins": 5.589145660400391, |
|
"rewards/rejected": -9.895464897155762, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.657439446366782, |
|
"grad_norm": 3.447984218597412, |
|
"learning_rate": 1.5843358408002263e-06, |
|
"logits/chosen": 1.4889028072357178, |
|
"logits/rejected": 1.2970954179763794, |
|
"logps/chosen": -526.8050537109375, |
|
"logps/rejected": -1041.9156494140625, |
|
"loss": 0.0806, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -4.942746162414551, |
|
"rewards/margins": 5.119975566864014, |
|
"rewards/rejected": -10.062723159790039, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.6605850896508336, |
|
"grad_norm": 2.026029109954834, |
|
"learning_rate": 1.5588473037980448e-06, |
|
"logits/chosen": 1.607642412185669, |
|
"logits/rejected": 1.3940250873565674, |
|
"logps/chosen": -501.61932373046875, |
|
"logps/rejected": -1060.6180419921875, |
|
"loss": 0.0683, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -4.699268817901611, |
|
"rewards/margins": 5.546324253082275, |
|
"rewards/rejected": -10.24559211730957, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.6605850896508336, |
|
"eval_logits/chosen": 2.205811023712158, |
|
"eval_logits/rejected": 2.02057147026062, |
|
"eval_logps/chosen": -500.12677001953125, |
|
"eval_logps/rejected": -1064.2464599609375, |
|
"eval_loss": 0.03558611497282982, |
|
"eval_rewards/accuracies": 0.9190298318862915, |
|
"eval_rewards/chosen": -4.673556804656982, |
|
"eval_rewards/margins": 5.609921455383301, |
|
"eval_rewards/rejected": -10.283478736877441, |
|
"eval_runtime": 214.4369, |
|
"eval_samples_per_second": 99.876, |
|
"eval_steps_per_second": 1.562, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.6637307329348852, |
|
"grad_norm": 4.1672210693359375, |
|
"learning_rate": 1.5334722469081071e-06, |
|
"logits/chosen": 1.6063897609710693, |
|
"logits/rejected": 1.4115921258926392, |
|
"logps/chosen": -531.2935791015625, |
|
"logps/rejected": -1066.810302734375, |
|
"loss": 0.08, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -4.991600036621094, |
|
"rewards/margins": 5.322088241577148, |
|
"rewards/rejected": -10.313688278198242, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.6668763762189368, |
|
"grad_norm": 2.3815789222717285, |
|
"learning_rate": 1.508213729744773e-06, |
|
"logits/chosen": 1.546438217163086, |
|
"logits/rejected": 1.2946199178695679, |
|
"logps/chosen": -535.8787231445312, |
|
"logps/rejected": -1102.216064453125, |
|
"loss": 0.0778, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -5.021573066711426, |
|
"rewards/margins": 5.643277645111084, |
|
"rewards/rejected": -10.664851188659668, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.6700220195029883, |
|
"grad_norm": 2.239928960800171, |
|
"learning_rate": 1.483074797870547e-06, |
|
"logits/chosen": 1.2132775783538818, |
|
"logits/rejected": 1.0260388851165771, |
|
"logps/chosen": -521.3165893554688, |
|
"logps/rejected": -1100.88037109375, |
|
"loss": 0.0682, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.873823642730713, |
|
"rewards/margins": 5.776439189910889, |
|
"rewards/rejected": -10.650262832641602, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.67316766278704, |
|
"grad_norm": 4.122522354125977, |
|
"learning_rate": 1.4580584824288585e-06, |
|
"logits/chosen": 1.3960988521575928, |
|
"logits/rejected": 1.2553833723068237, |
|
"logps/chosen": -499.489990234375, |
|
"logps/rejected": -1097.9930419921875, |
|
"loss": 0.0649, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -4.652958393096924, |
|
"rewards/margins": 5.967487335205078, |
|
"rewards/rejected": -10.620445251464844, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.6763133060710915, |
|
"grad_norm": 3.405120372772217, |
|
"learning_rate": 1.4331677997785786e-06, |
|
"logits/chosen": 1.1937223672866821, |
|
"logits/rejected": 1.0959510803222656, |
|
"logps/chosen": -533.7369384765625, |
|
"logps/rejected": -1047.7427978515625, |
|
"loss": 0.0817, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -5.0067901611328125, |
|
"rewards/margins": 5.136543273925781, |
|
"rewards/rejected": -10.143333435058594, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.6794589493551432, |
|
"grad_norm": 3.386723279953003, |
|
"learning_rate": 1.4084057511303212e-06, |
|
"logits/chosen": 1.2363038063049316, |
|
"logits/rejected": 1.009194254875183, |
|
"logps/chosen": -475.3011779785156, |
|
"logps/rejected": -1005.6472778320312, |
|
"loss": 0.077, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.424704551696777, |
|
"rewards/margins": 5.272775650024414, |
|
"rewards/rejected": -9.697481155395508, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.6826045926391947, |
|
"grad_norm": 1.0529998540878296, |
|
"learning_rate": 1.383775322184569e-06, |
|
"logits/chosen": 1.3098864555358887, |
|
"logits/rejected": 1.1490665674209595, |
|
"logps/chosen": -485.99853515625, |
|
"logps/rejected": -971.4075927734375, |
|
"loss": 0.0715, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -4.52325439453125, |
|
"rewards/margins": 4.847430229187012, |
|
"rewards/rejected": -9.370684623718262, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.6857502359232464, |
|
"grad_norm": 3.592379093170166, |
|
"learning_rate": 1.3592794827716726e-06, |
|
"logits/chosen": 1.4308536052703857, |
|
"logits/rejected": 1.1930121183395386, |
|
"logps/chosen": -522.5880126953125, |
|
"logps/rejected": -1014.5653076171875, |
|
"loss": 0.0782, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -4.89089298248291, |
|
"rewards/margins": 4.9056267738342285, |
|
"rewards/rejected": -9.79651927947998, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.6888958792072979, |
|
"grad_norm": 3.138232946395874, |
|
"learning_rate": 1.33492118649376e-06, |
|
"logits/chosen": 1.2991418838500977, |
|
"logits/rejected": 1.0811710357666016, |
|
"logps/chosen": -516.8934936523438, |
|
"logps/rejected": -1030.9951171875, |
|
"loss": 0.0764, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -4.8288469314575195, |
|
"rewards/margins": 5.133552551269531, |
|
"rewards/rejected": -9.96239948272705, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.6920415224913494, |
|
"grad_norm": 1.4153366088867188, |
|
"learning_rate": 1.310703370368605e-06, |
|
"logits/chosen": 1.3389747142791748, |
|
"logits/rejected": 1.2751328945159912, |
|
"logps/chosen": -496.916015625, |
|
"logps/rejected": -1069.6923828125, |
|
"loss": 0.0514, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.62496280670166, |
|
"rewards/margins": 5.712726593017578, |
|
"rewards/rejected": -10.337689399719238, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.6920415224913494, |
|
"eval_logits/chosen": 2.127232551574707, |
|
"eval_logits/rejected": 1.9361519813537598, |
|
"eval_logps/chosen": -493.0187072753906, |
|
"eval_logps/rejected": -1058.18115234375, |
|
"eval_loss": 0.03407713398337364, |
|
"eval_rewards/accuracies": 0.920895516872406, |
|
"eval_rewards/chosen": -4.602477073669434, |
|
"eval_rewards/margins": 5.620346546173096, |
|
"eval_rewards/rejected": -10.222823143005371, |
|
"eval_runtime": 214.8052, |
|
"eval_samples_per_second": 99.704, |
|
"eval_steps_per_second": 1.56, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.6951871657754011, |
|
"grad_norm": 2.530432939529419, |
|
"learning_rate": 1.28662895447549e-06, |
|
"logits/chosen": 1.5015747547149658, |
|
"logits/rejected": 1.3566521406173706, |
|
"logps/chosen": -486.2564392089844, |
|
"logps/rejected": -1018.2689208984375, |
|
"loss": 0.069, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -4.528347969055176, |
|
"rewards/margins": 5.29823112487793, |
|
"rewards/rejected": -9.826578140258789, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.6983328090594526, |
|
"grad_norm": 2.539612293243408, |
|
"learning_rate": 1.2627008416031234e-06, |
|
"logits/chosen": 1.268513560295105, |
|
"logits/rejected": 1.1765294075012207, |
|
"logps/chosen": -509.4791564941406, |
|
"logps/rejected": -1032.7933349609375, |
|
"loss": 0.0587, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.755896091461182, |
|
"rewards/margins": 5.238858699798584, |
|
"rewards/rejected": -9.994754791259766, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.7014784523435043, |
|
"grad_norm": 2.578758716583252, |
|
"learning_rate": 1.2389219168996275e-06, |
|
"logits/chosen": 1.171112298965454, |
|
"logits/rejected": 0.9224494099617004, |
|
"logps/chosen": -523.6820068359375, |
|
"logps/rejected": -1086.1273193359375, |
|
"loss": 0.058, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -4.909883975982666, |
|
"rewards/margins": 5.608704566955566, |
|
"rewards/rejected": -10.51858901977539, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.7046240956275558, |
|
"grad_norm": 3.774636745452881, |
|
"learning_rate": 1.2152950475246621e-06, |
|
"logits/chosen": 1.2603710889816284, |
|
"logits/rejected": 1.075377106666565, |
|
"logps/chosen": -540.7611083984375, |
|
"logps/rejected": -1098.763427734375, |
|
"loss": 0.0714, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -5.084263801574707, |
|
"rewards/margins": 5.547011375427246, |
|
"rewards/rejected": -10.631277084350586, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.7077697389116074, |
|
"grad_norm": 4.932303428649902, |
|
"learning_rate": 1.191823082303715e-06, |
|
"logits/chosen": 1.3602168560028076, |
|
"logits/rejected": 1.2510731220245361, |
|
"logps/chosen": -512.5650634765625, |
|
"logps/rejected": -1079.0947265625, |
|
"loss": 0.0691, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.783736228942871, |
|
"rewards/margins": 5.657622337341309, |
|
"rewards/rejected": -10.44135856628418, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.710915382195659, |
|
"grad_norm": 3.490607500076294, |
|
"learning_rate": 1.1685088513846022e-06, |
|
"logits/chosen": 1.2267606258392334, |
|
"logits/rejected": 1.1843498945236206, |
|
"logps/chosen": -507.78106689453125, |
|
"logps/rejected": -1016.5057373046875, |
|
"loss": 0.0721, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -4.733147144317627, |
|
"rewards/margins": 5.068860054016113, |
|
"rewards/rejected": -9.802007675170898, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.7140610254797106, |
|
"grad_norm": 3.0085694789886475, |
|
"learning_rate": 1.1453551658962216e-06, |
|
"logits/chosen": 1.3816957473754883, |
|
"logits/rejected": 1.13775634765625, |
|
"logps/chosen": -510.34344482421875, |
|
"logps/rejected": -1073.6854248046875, |
|
"loss": 0.0572, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -4.783577919006348, |
|
"rewards/margins": 5.602017402648926, |
|
"rewards/rejected": -10.385595321655273, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.7172066687637622, |
|
"grad_norm": 1.4133647680282593, |
|
"learning_rate": 1.1223648176095992e-06, |
|
"logits/chosen": 0.987162709236145, |
|
"logits/rejected": 0.9267969131469727, |
|
"logps/chosen": -526.0758666992188, |
|
"logps/rejected": -1094.768798828125, |
|
"loss": 0.0554, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -4.929390907287598, |
|
"rewards/margins": 5.669344902038574, |
|
"rewards/rejected": -10.598735809326172, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.7203523120478138, |
|
"grad_norm": 2.9754834175109863, |
|
"learning_rate": 1.0995405786012687e-06, |
|
"logits/chosen": 1.2644927501678467, |
|
"logits/rejected": 1.032185435295105, |
|
"logps/chosen": -539.8465576171875, |
|
"logps/rejected": -1119.389892578125, |
|
"loss": 0.0728, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -5.067166805267334, |
|
"rewards/margins": 5.777620315551758, |
|
"rewards/rejected": -10.84478759765625, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.7234979553318653, |
|
"grad_norm": 3.943408489227295, |
|
"learning_rate": 1.0768852009190275e-06, |
|
"logits/chosen": 1.2131414413452148, |
|
"logits/rejected": 1.1425590515136719, |
|
"logps/chosen": -527.9764404296875, |
|
"logps/rejected": -1111.0107421875, |
|
"loss": 0.0623, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -4.94549036026001, |
|
"rewards/margins": 5.832481384277344, |
|
"rewards/rejected": -10.777971267700195, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.7234979553318653, |
|
"eval_logits/chosen": 2.0326879024505615, |
|
"eval_logits/rejected": 1.8240412473678589, |
|
"eval_logps/chosen": -526.7490844726562, |
|
"eval_logps/rejected": -1106.5096435546875, |
|
"eval_loss": 0.032569337636232376, |
|
"eval_rewards/accuracies": 0.9212686419487, |
|
"eval_rewards/chosen": -4.939780235290527, |
|
"eval_rewards/margins": 5.766328811645508, |
|
"eval_rewards/rejected": -10.706109046936035, |
|
"eval_runtime": 215.216, |
|
"eval_samples_per_second": 99.514, |
|
"eval_steps_per_second": 1.557, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.726643598615917, |
|
"grad_norm": 1.9171903133392334, |
|
"learning_rate": 1.0544014162501065e-06, |
|
"logits/chosen": 1.2516294717788696, |
|
"logits/rejected": 1.073210597038269, |
|
"logps/chosen": -539.9628295898438, |
|
"logps/rejected": -1080.7843017578125, |
|
"loss": 0.0617, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -5.059046745300293, |
|
"rewards/margins": 5.38003396987915, |
|
"rewards/rejected": -10.439081192016602, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.7297892418999685, |
|
"grad_norm": 3.3843212127685547, |
|
"learning_rate": 1.0320919355917951e-06, |
|
"logits/chosen": 1.2604172229766846, |
|
"logits/rejected": 1.1014560461044312, |
|
"logps/chosen": -511.25421142578125, |
|
"logps/rejected": -1042.747314453125, |
|
"loss": 0.0764, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -4.782188415527344, |
|
"rewards/margins": 5.282374382019043, |
|
"rewards/rejected": -10.06456184387207, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.7329348851840202, |
|
"grad_norm": 4.020650386810303, |
|
"learning_rate": 1.0099594489245593e-06, |
|
"logits/chosen": 1.5181647539138794, |
|
"logits/rejected": 1.3572300672531128, |
|
"logps/chosen": -520.1868896484375, |
|
"logps/rejected": -1071.7554931640625, |
|
"loss": 0.0737, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.876954078674316, |
|
"rewards/margins": 5.484519004821777, |
|
"rewards/rejected": -10.361473083496094, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.7360805284680717, |
|
"grad_norm": 3.158212184906006, |
|
"learning_rate": 9.880066248876977e-07, |
|
"logits/chosen": 1.3198108673095703, |
|
"logits/rejected": 1.1537165641784668, |
|
"logps/chosen": -531.109375, |
|
"logps/rejected": -1087.0430908203125, |
|
"loss": 0.0556, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.994508743286133, |
|
"rewards/margins": 5.531711578369141, |
|
"rewards/rejected": -10.526220321655273, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.7392261717521234, |
|
"grad_norm": 2.2422516345977783, |
|
"learning_rate": 9.662361104575688e-07, |
|
"logits/chosen": 1.4852499961853027, |
|
"logits/rejected": 1.3730026483535767, |
|
"logps/chosen": -504.0821228027344, |
|
"logps/rejected": -1052.568603515625, |
|
"loss": 0.0707, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -4.705484390258789, |
|
"rewards/margins": 5.476336479187012, |
|
"rewards/rejected": -10.1818208694458, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.7423718150361749, |
|
"grad_norm": 2.419581174850464, |
|
"learning_rate": 9.446505306284276e-07, |
|
"logits/chosen": 1.228144884109497, |
|
"logits/rejected": 1.058624029159546, |
|
"logps/chosen": -529.3380126953125, |
|
"logps/rejected": -1101.996337890625, |
|
"loss": 0.0779, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.964212894439697, |
|
"rewards/margins": 5.716494083404541, |
|
"rewards/rejected": -10.680707931518555, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.7455174583202265, |
|
"grad_norm": 1.8092293739318848, |
|
"learning_rate": 9.232524880959173e-07, |
|
"logits/chosen": 1.497022032737732, |
|
"logits/rejected": 1.2356172800064087, |
|
"logps/chosen": -563.4901123046875, |
|
"logps/rejected": -1064.003173828125, |
|
"loss": 0.0721, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -5.283173561096191, |
|
"rewards/margins": 5.0053606033325195, |
|
"rewards/rejected": -10.288534164428711, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.7486631016042781, |
|
"grad_norm": 2.928718090057373, |
|
"learning_rate": 9.02044562943247e-07, |
|
"logits/chosen": 1.3512821197509766, |
|
"logits/rejected": 1.1803138256072998, |
|
"logps/chosen": -544.3297119140625, |
|
"logps/rejected": -1103.0115966796875, |
|
"loss": 0.0649, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -5.104948043823242, |
|
"rewards/margins": 5.569328784942627, |
|
"rewards/rejected": -10.674277305603027, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.7518087448883297, |
|
"grad_norm": 5.057467937469482, |
|
"learning_rate": 8.810293123300956e-07, |
|
"logits/chosen": 1.2008953094482422, |
|
"logits/rejected": 1.0849316120147705, |
|
"logps/chosen": -523.2420654296875, |
|
"logps/rejected": -1096.0482177734375, |
|
"loss": 0.0727, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.887139320373535, |
|
"rewards/margins": 5.702400207519531, |
|
"rewards/rejected": -10.589539527893066, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.7549543881723813, |
|
"grad_norm": 2.2187750339508057, |
|
"learning_rate": 8.602092701842821e-07, |
|
"logits/chosen": 1.3591426610946655, |
|
"logits/rejected": 1.168972134590149, |
|
"logps/chosen": -502.78167724609375, |
|
"logps/rejected": -1061.560546875, |
|
"loss": 0.0693, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -4.693524360656738, |
|
"rewards/margins": 5.574719429016113, |
|
"rewards/rejected": -10.268243789672852, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.7549543881723813, |
|
"eval_logits/chosen": 2.0582876205444336, |
|
"eval_logits/rejected": 1.8580067157745361, |
|
"eval_logps/chosen": -513.0094604492188, |
|
"eval_logps/rejected": -1098.9998779296875, |
|
"eval_loss": 0.03128579631447792, |
|
"eval_rewards/accuracies": 0.9231343269348145, |
|
"eval_rewards/chosen": -4.802384376525879, |
|
"eval_rewards/margins": 5.828627109527588, |
|
"eval_rewards/rejected": -10.631011962890625, |
|
"eval_runtime": 214.497, |
|
"eval_samples_per_second": 99.848, |
|
"eval_steps_per_second": 1.562, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.7581000314564328, |
|
"grad_norm": 2.5894553661346436, |
|
"learning_rate": 8.395869468962337e-07, |
|
"logits/chosen": 1.2950252294540405, |
|
"logits/rejected": 1.0707935094833374, |
|
"logps/chosen": -528.3794555664062, |
|
"logps/rejected": -1150.237548828125, |
|
"loss": 0.0622, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -4.965851306915283, |
|
"rewards/margins": 6.168177604675293, |
|
"rewards/rejected": -11.134028434753418, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.7612456747404844, |
|
"grad_norm": 1.3667354583740234, |
|
"learning_rate": 8.191648290162957e-07, |
|
"logits/chosen": 1.440059781074524, |
|
"logits/rejected": 1.308544635772705, |
|
"logps/chosen": -504.36151123046875, |
|
"logps/rejected": -1091.216552734375, |
|
"loss": 0.0615, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.736184597015381, |
|
"rewards/margins": 5.828288555145264, |
|
"rewards/rejected": -10.564473152160645, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.764391318024536, |
|
"grad_norm": 3.833573579788208, |
|
"learning_rate": 7.989453789549131e-07, |
|
"logits/chosen": 1.3757915496826172, |
|
"logits/rejected": 1.328730583190918, |
|
"logps/chosen": -516.4542236328125, |
|
"logps/rejected": -1068.0494384765625, |
|
"loss": 0.083, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -4.81516170501709, |
|
"rewards/margins": 5.511561393737793, |
|
"rewards/rejected": -10.326723098754883, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.7675369613085876, |
|
"grad_norm": 1.203484296798706, |
|
"learning_rate": 7.789310346857243e-07, |
|
"logits/chosen": 1.573310136795044, |
|
"logits/rejected": 1.4590044021606445, |
|
"logps/chosen": -496.33001708984375, |
|
"logps/rejected": -956.9088134765625, |
|
"loss": 0.0658, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.645215034484863, |
|
"rewards/margins": 4.573522090911865, |
|
"rewards/rejected": -9.21873664855957, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.7706826045926392, |
|
"grad_norm": 3.1549971103668213, |
|
"learning_rate": 7.591242094515983e-07, |
|
"logits/chosen": 1.3946812152862549, |
|
"logits/rejected": 1.2821216583251953, |
|
"logps/chosen": -507.58221435546875, |
|
"logps/rejected": -976.2921752929688, |
|
"loss": 0.0655, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -4.728119850158691, |
|
"rewards/margins": 4.666321754455566, |
|
"rewards/rejected": -9.394440650939941, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.7738282478766908, |
|
"grad_norm": 2.5392165184020996, |
|
"learning_rate": 7.395272914736604e-07, |
|
"logits/chosen": 1.22986900806427, |
|
"logits/rejected": 1.043039083480835, |
|
"logps/chosen": -506.740478515625, |
|
"logps/rejected": -1101.1920166015625, |
|
"loss": 0.0592, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -4.742389678955078, |
|
"rewards/margins": 5.9132208824157715, |
|
"rewards/rejected": -10.655611038208008, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.7769738911607423, |
|
"grad_norm": 3.4939658641815186, |
|
"learning_rate": 7.201426436633289e-07, |
|
"logits/chosen": 1.1813576221466064, |
|
"logits/rejected": 1.0866069793701172, |
|
"logps/chosen": -519.2979125976562, |
|
"logps/rejected": -1120.2783203125, |
|
"loss": 0.0617, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -4.859357833862305, |
|
"rewards/margins": 5.996047019958496, |
|
"rewards/rejected": -10.8554048538208, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.780119534444794, |
|
"grad_norm": 2.185563325881958, |
|
"learning_rate": 7.009726033374045e-07, |
|
"logits/chosen": 1.399010419845581, |
|
"logits/rejected": 1.1917028427124023, |
|
"logps/chosen": -552.6378173828125, |
|
"logps/rejected": -1109.709716796875, |
|
"loss": 0.0602, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -5.179744243621826, |
|
"rewards/margins": 5.563017845153809, |
|
"rewards/rejected": -10.742761611938477, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.7832651777288455, |
|
"grad_norm": 3.3408279418945312, |
|
"learning_rate": 6.820194819362477e-07, |
|
"logits/chosen": 1.303276777267456, |
|
"logits/rejected": 1.1878306865692139, |
|
"logps/chosen": -515.8385009765625, |
|
"logps/rejected": -1112.105224609375, |
|
"loss": 0.0577, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -4.832343101501465, |
|
"rewards/margins": 5.937832832336426, |
|
"rewards/rejected": -10.770174980163574, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.7864108210128972, |
|
"grad_norm": 2.98215651512146, |
|
"learning_rate": 6.632855647450784e-07, |
|
"logits/chosen": 1.2269493341445923, |
|
"logits/rejected": 1.1103036403656006, |
|
"logps/chosen": -512.5769653320312, |
|
"logps/rejected": -1073.9197998046875, |
|
"loss": 0.0543, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -4.790786266326904, |
|
"rewards/margins": 5.6085734367370605, |
|
"rewards/rejected": -10.399358749389648, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.7864108210128972, |
|
"eval_logits/chosen": 2.147142171859741, |
|
"eval_logits/rejected": 1.9534480571746826, |
|
"eval_logps/chosen": -514.0873413085938, |
|
"eval_logps/rejected": -1109.419921875, |
|
"eval_loss": 0.03031795844435692, |
|
"eval_rewards/accuracies": 0.9227612018585205, |
|
"eval_rewards/chosen": -4.8131632804870605, |
|
"eval_rewards/margins": 5.922050476074219, |
|
"eval_rewards/rejected": -10.735214233398438, |
|
"eval_runtime": 214.5706, |
|
"eval_samples_per_second": 99.813, |
|
"eval_steps_per_second": 1.561, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.7895564642969487, |
|
"grad_norm": 3.696200132369995, |
|
"learning_rate": 6.447731106184183e-07, |
|
"logits/chosen": 1.3392512798309326, |
|
"logits/rejected": 1.1863142251968384, |
|
"logps/chosen": -556.5394287109375, |
|
"logps/rejected": -1091.25537109375, |
|
"loss": 0.0509, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -5.229084014892578, |
|
"rewards/margins": 5.346131324768066, |
|
"rewards/rejected": -10.575216293334961, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.7927021075810003, |
|
"grad_norm": 3.2612533569335938, |
|
"learning_rate": 6.264843517077355e-07, |
|
"logits/chosen": 1.5268758535385132, |
|
"logits/rejected": 1.4993855953216553, |
|
"logps/chosen": -488.5655212402344, |
|
"logps/rejected": -1056.704345703125, |
|
"loss": 0.0712, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -4.5665812492370605, |
|
"rewards/margins": 5.6401848793029785, |
|
"rewards/rejected": -10.206766128540039, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.7958477508650519, |
|
"grad_norm": 1.6281901597976685, |
|
"learning_rate": 6.084214931922988e-07, |
|
"logits/chosen": 1.544272780418396, |
|
"logits/rejected": 1.394097924232483, |
|
"logps/chosen": -487.400146484375, |
|
"logps/rejected": -1142.82470703125, |
|
"loss": 0.0544, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -4.553741455078125, |
|
"rewards/margins": 6.4883294105529785, |
|
"rewards/rejected": -11.042070388793945, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.7989933941491035, |
|
"grad_norm": 1.7609894275665283, |
|
"learning_rate": 5.905867130132858e-07, |
|
"logits/chosen": 1.3884477615356445, |
|
"logits/rejected": 1.174084186553955, |
|
"logps/chosen": -524.9644775390625, |
|
"logps/rejected": -1094.303466796875, |
|
"loss": 0.0616, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -4.912240028381348, |
|
"rewards/margins": 5.668644428253174, |
|
"rewards/rejected": -10.58088493347168, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.8021390374331551, |
|
"grad_norm": 1.901530385017395, |
|
"learning_rate": 5.729821616111777e-07, |
|
"logits/chosen": 1.2051628828048706, |
|
"logits/rejected": 1.0570333003997803, |
|
"logps/chosen": -522.4967651367188, |
|
"logps/rejected": -1080.4366455078125, |
|
"loss": 0.0678, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -4.89337158203125, |
|
"rewards/margins": 5.575320243835449, |
|
"rewards/rejected": -10.4686918258667, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.8052846807172067, |
|
"grad_norm": 2.6274139881134033, |
|
"learning_rate": 5.556099616664678e-07, |
|
"logits/chosen": 1.457098126411438, |
|
"logits/rejected": 1.270530343055725, |
|
"logps/chosen": -524.5520629882812, |
|
"logps/rejected": -1138.426025390625, |
|
"loss": 0.0612, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.922528266906738, |
|
"rewards/margins": 6.103795051574707, |
|
"rewards/rejected": -11.026323318481445, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.8084303240012582, |
|
"grad_norm": 3.4322991371154785, |
|
"learning_rate": 5.384722078437163e-07, |
|
"logits/chosen": 1.168784499168396, |
|
"logits/rejected": 0.9237147569656372, |
|
"logps/chosen": -503.53948974609375, |
|
"logps/rejected": -1102.985107421875, |
|
"loss": 0.0587, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -4.709792137145996, |
|
"rewards/margins": 5.9735107421875, |
|
"rewards/rejected": -10.68330192565918, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.8115759672853099, |
|
"grad_norm": 2.3039779663085938, |
|
"learning_rate": 5.215709665389884e-07, |
|
"logits/chosen": 1.5015685558319092, |
|
"logits/rejected": 1.1338298320770264, |
|
"logps/chosen": -495.9610900878906, |
|
"logps/rejected": -1086.0147705078125, |
|
"loss": 0.0628, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -4.62796688079834, |
|
"rewards/margins": 5.865346431732178, |
|
"rewards/rejected": -10.493314743041992, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.8147216105693614, |
|
"grad_norm": 3.216778039932251, |
|
"learning_rate": 5.049082756306933e-07, |
|
"logits/chosen": 1.1515804529190063, |
|
"logits/rejected": 1.0844125747680664, |
|
"logps/chosen": -494.03338623046875, |
|
"logps/rejected": -1051.1014404296875, |
|
"loss": 0.0659, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.619012832641602, |
|
"rewards/margins": 5.5344648361206055, |
|
"rewards/rejected": -10.153477668762207, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.817867253853413, |
|
"grad_norm": 1.8903234004974365, |
|
"learning_rate": 4.884861442338703e-07, |
|
"logits/chosen": 1.355169415473938, |
|
"logits/rejected": 1.2924784421920776, |
|
"logps/chosen": -503.74908447265625, |
|
"logps/rejected": -1109.2928466796875, |
|
"loss": 0.0555, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.697711944580078, |
|
"rewards/margins": 6.013389587402344, |
|
"rewards/rejected": -10.711101531982422, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.817867253853413, |
|
"eval_logits/chosen": 2.135688543319702, |
|
"eval_logits/rejected": 1.939805507659912, |
|
"eval_logps/chosen": -505.2809753417969, |
|
"eval_logps/rejected": -1092.1619873046875, |
|
"eval_loss": 0.0300795566290617, |
|
"eval_rewards/accuracies": 0.9261193871498108, |
|
"eval_rewards/chosen": -4.725099086761475, |
|
"eval_rewards/margins": 5.837533473968506, |
|
"eval_rewards/rejected": -10.56263256072998, |
|
"eval_runtime": 215.0481, |
|
"eval_samples_per_second": 99.592, |
|
"eval_steps_per_second": 1.558, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.8210128971374646, |
|
"grad_norm": 1.426253318786621, |
|
"learning_rate": 4.7230655245793286e-07, |
|
"logits/chosen": 1.430915117263794, |
|
"logits/rejected": 1.2984659671783447, |
|
"logps/chosen": -506.103271484375, |
|
"logps/rejected": -1104.470458984375, |
|
"loss": 0.0576, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.7343974113464355, |
|
"rewards/margins": 5.964964866638184, |
|
"rewards/rejected": -10.699361801147461, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.8241585404215162, |
|
"grad_norm": 4.4854736328125, |
|
"learning_rate": 4.563714511679201e-07, |
|
"logits/chosen": 1.1772897243499756, |
|
"logits/rejected": 0.9327267408370972, |
|
"logps/chosen": -552.3887939453125, |
|
"logps/rejected": -1113.9022216796875, |
|
"loss": 0.0646, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -5.1871819496154785, |
|
"rewards/margins": 5.609848976135254, |
|
"rewards/rejected": -10.797030448913574, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.8273041837055678, |
|
"grad_norm": 2.612297296524048, |
|
"learning_rate": 4.4068276174926624e-07, |
|
"logits/chosen": 1.4450725317001343, |
|
"logits/rejected": 1.308194875717163, |
|
"logps/chosen": -527.3743896484375, |
|
"logps/rejected": -1082.718994140625, |
|
"loss": 0.0691, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -4.931950569152832, |
|
"rewards/margins": 5.522625923156738, |
|
"rewards/rejected": -10.45457649230957, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.8304498269896193, |
|
"grad_norm": 2.8371591567993164, |
|
"learning_rate": 4.25242375876132e-07, |
|
"logits/chosen": 1.6938962936401367, |
|
"logits/rejected": 1.4360706806182861, |
|
"logps/chosen": -514.8795166015625, |
|
"logps/rejected": -1132.747802734375, |
|
"loss": 0.055, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.795286655426025, |
|
"rewards/margins": 6.1563005447387695, |
|
"rewards/rejected": -10.95158576965332, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.833595470273671, |
|
"grad_norm": 2.8870389461517334, |
|
"learning_rate": 4.1005215528331254e-07, |
|
"logits/chosen": 1.656299352645874, |
|
"logits/rejected": 1.4690709114074707, |
|
"logps/chosen": -529.0469360351562, |
|
"logps/rejected": -1109.736083984375, |
|
"loss": 0.0657, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -4.954121112823486, |
|
"rewards/margins": 5.782765865325928, |
|
"rewards/rejected": -10.736886978149414, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.8367411135577225, |
|
"grad_norm": 1.9303276538848877, |
|
"learning_rate": 3.9511393154175795e-07, |
|
"logits/chosen": 1.6134449243545532, |
|
"logits/rejected": 1.3297247886657715, |
|
"logps/chosen": -525.5284423828125, |
|
"logps/rejected": -1118.3677978515625, |
|
"loss": 0.0512, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.909844398498535, |
|
"rewards/margins": 5.91357421875, |
|
"rewards/rejected": -10.823419570922852, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.8398867568417742, |
|
"grad_norm": 3.3604700565338135, |
|
"learning_rate": 3.8042950583773054e-07, |
|
"logits/chosen": 1.5226154327392578, |
|
"logits/rejected": 1.2554523944854736, |
|
"logps/chosen": -510.0711975097656, |
|
"logps/rejected": -1055.9796142578125, |
|
"loss": 0.0662, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -4.7616868019104, |
|
"rewards/margins": 5.4259772300720215, |
|
"rewards/rejected": -10.187662124633789, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.8430324001258257, |
|
"grad_norm": 2.3669848442077637, |
|
"learning_rate": 3.660006487556245e-07, |
|
"logits/chosen": 1.499861478805542, |
|
"logits/rejected": 1.3318068981170654, |
|
"logps/chosen": -489.2085876464844, |
|
"logps/rejected": -1086.85546875, |
|
"loss": 0.0561, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -4.581131935119629, |
|
"rewards/margins": 5.949007987976074, |
|
"rewards/rejected": -10.530141830444336, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.8461780434098773, |
|
"grad_norm": 1.5873863697052002, |
|
"learning_rate": 3.5182910006447775e-07, |
|
"logits/chosen": 1.3309674263000488, |
|
"logits/rejected": 1.1546790599822998, |
|
"logps/chosen": -509.28302001953125, |
|
"logps/rejected": -1080.261962890625, |
|
"loss": 0.0488, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -4.774194717407227, |
|
"rewards/margins": 5.679512023925781, |
|
"rewards/rejected": -10.453706741333008, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.8493236866939289, |
|
"grad_norm": 2.22021484375, |
|
"learning_rate": 3.3791656850819975e-07, |
|
"logits/chosen": 1.4363747835159302, |
|
"logits/rejected": 1.231518030166626, |
|
"logps/chosen": -512.7792358398438, |
|
"logps/rejected": -1080.0721435546875, |
|
"loss": 0.0646, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -4.79265022277832, |
|
"rewards/margins": 5.652270793914795, |
|
"rewards/rejected": -10.444921493530273, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.8493236866939289, |
|
"eval_logits/chosen": 2.194664716720581, |
|
"eval_logits/rejected": 2.000288724899292, |
|
"eval_logps/chosen": -502.0693664550781, |
|
"eval_logps/rejected": -1098.9693603515625, |
|
"eval_loss": 0.029430242255330086, |
|
"eval_rewards/accuracies": 0.9261193871498108, |
|
"eval_rewards/chosen": -4.692983627319336, |
|
"eval_rewards/margins": 5.937723636627197, |
|
"eval_rewards/rejected": -10.630707740783691, |
|
"eval_runtime": 215.131, |
|
"eval_samples_per_second": 99.553, |
|
"eval_steps_per_second": 1.557, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.8524693299779805, |
|
"grad_norm": 3.6423380374908447, |
|
"learning_rate": 3.2426473159953455e-07, |
|
"logits/chosen": 1.4169188737869263, |
|
"logits/rejected": 1.2496435642242432, |
|
"logps/chosen": -513.0960693359375, |
|
"logps/rejected": -1108.312255859375, |
|
"loss": 0.062, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.786744117736816, |
|
"rewards/margins": 5.93210506439209, |
|
"rewards/rejected": -10.71884822845459, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.8556149732620321, |
|
"grad_norm": 3.5630664825439453, |
|
"learning_rate": 3.108752354177963e-07, |
|
"logits/chosen": 1.552750825881958, |
|
"logits/rejected": 1.3104605674743652, |
|
"logps/chosen": -508.5628967285156, |
|
"logps/rejected": -1070.4361572265625, |
|
"loss": 0.0549, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -4.747432231903076, |
|
"rewards/margins": 5.604626178741455, |
|
"rewards/rejected": -10.352057456970215, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.8587606165460837, |
|
"grad_norm": 4.022861957550049, |
|
"learning_rate": 2.9774969441039247e-07, |
|
"logits/chosen": 1.208437204360962, |
|
"logits/rejected": 1.0336554050445557, |
|
"logps/chosen": -515.5606689453125, |
|
"logps/rejected": -1061.59375, |
|
"loss": 0.07, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -4.836429595947266, |
|
"rewards/margins": 5.419116020202637, |
|
"rewards/rejected": -10.255544662475586, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.8619062598301352, |
|
"grad_norm": 2.5656957626342773, |
|
"learning_rate": 2.848896911981575e-07, |
|
"logits/chosen": 1.585086703300476, |
|
"logits/rejected": 1.302536964416504, |
|
"logps/chosen": -475.01495361328125, |
|
"logps/rejected": -1078.8958740234375, |
|
"loss": 0.067, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.43586540222168, |
|
"rewards/margins": 6.01693058013916, |
|
"rewards/rejected": -10.452796936035156, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.8650519031141869, |
|
"grad_norm": 1.7859119176864624, |
|
"learning_rate": 2.722967763845316e-07, |
|
"logits/chosen": 1.4026825428009033, |
|
"logits/rejected": 1.267093539237976, |
|
"logps/chosen": -501.826171875, |
|
"logps/rejected": -1080.474365234375, |
|
"loss": 0.048, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.700644016265869, |
|
"rewards/margins": 5.758360862731934, |
|
"rewards/rejected": -10.459004402160645, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.8681975463982384, |
|
"grad_norm": 2.9971580505371094, |
|
"learning_rate": 2.5997246836859335e-07, |
|
"logits/chosen": 1.5809880495071411, |
|
"logits/rejected": 1.286370038986206, |
|
"logps/chosen": -566.8330688476562, |
|
"logps/rejected": -1177.3800048828125, |
|
"loss": 0.0768, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -5.326024532318115, |
|
"rewards/margins": 6.073489189147949, |
|
"rewards/rejected": -11.399515151977539, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.8713431896822901, |
|
"grad_norm": 3.9024531841278076, |
|
"learning_rate": 2.479182531619778e-07, |
|
"logits/chosen": 1.4867026805877686, |
|
"logits/rejected": 1.2765355110168457, |
|
"logps/chosen": -556.2525634765625, |
|
"logps/rejected": -1071.0406494140625, |
|
"loss": 0.0591, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -5.220080375671387, |
|
"rewards/margins": 5.128342628479004, |
|
"rewards/rejected": -10.348422050476074, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.8744888329663416, |
|
"grad_norm": 3.937291383743286, |
|
"learning_rate": 2.3613558420969988e-07, |
|
"logits/chosen": 1.4205322265625, |
|
"logits/rejected": 1.195991039276123, |
|
"logps/chosen": -532.1976318359375, |
|
"logps/rejected": -1121.0986328125, |
|
"loss": 0.0611, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -4.998320579528809, |
|
"rewards/margins": 5.846045017242432, |
|
"rewards/rejected": -10.844365119934082, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.8776344762503933, |
|
"grad_norm": 3.083104133605957, |
|
"learning_rate": 2.2462588221490445e-07, |
|
"logits/chosen": 1.5729047060012817, |
|
"logits/rejected": 1.2844527959823608, |
|
"logps/chosen": -530.4925537109375, |
|
"logps/rejected": -1121.37939453125, |
|
"loss": 0.0561, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -4.973997592926025, |
|
"rewards/margins": 5.868443489074707, |
|
"rewards/rejected": -10.842439651489258, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.8807801195344448, |
|
"grad_norm": 2.435751438140869, |
|
"learning_rate": 2.1339053496756413e-07, |
|
"logits/chosen": 1.7648521661758423, |
|
"logits/rejected": 1.4746003150939941, |
|
"logps/chosen": -520.1046142578125, |
|
"logps/rejected": -1067.98486328125, |
|
"loss": 0.0546, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.869875907897949, |
|
"rewards/margins": 5.440675258636475, |
|
"rewards/rejected": -10.310551643371582, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.8807801195344448, |
|
"eval_logits/chosen": 2.1606998443603516, |
|
"eval_logits/rejected": 1.9595870971679688, |
|
"eval_logps/chosen": -513.6257934570312, |
|
"eval_logps/rejected": -1117.5887451171875, |
|
"eval_loss": 0.02870255894958973, |
|
"eval_rewards/accuracies": 0.925000011920929, |
|
"eval_rewards/chosen": -4.808547019958496, |
|
"eval_rewards/margins": 6.008353233337402, |
|
"eval_rewards/rejected": -10.816901206970215, |
|
"eval_runtime": 215.3283, |
|
"eval_samples_per_second": 99.462, |
|
"eval_steps_per_second": 1.556, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.8839257628184963, |
|
"grad_norm": 4.853071212768555, |
|
"learning_rate": 2.0243089717714465e-07, |
|
"logits/chosen": 1.6696542501449585, |
|
"logits/rejected": 1.3343422412872314, |
|
"logps/chosen": -513.935546875, |
|
"logps/rejected": -1139.2392578125, |
|
"loss": 0.0607, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -4.819055080413818, |
|
"rewards/margins": 6.217933654785156, |
|
"rewards/rejected": -11.036989212036133, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.887071406102548, |
|
"grad_norm": 3.8749752044677734, |
|
"learning_rate": 1.9174829030926157e-07, |
|
"logits/chosen": 1.3861050605773926, |
|
"logits/rejected": 1.3259289264678955, |
|
"logps/chosen": -569.0123901367188, |
|
"logps/rejected": -1085.4583740234375, |
|
"loss": 0.0498, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -5.3545613288879395, |
|
"rewards/margins": 5.1358819007873535, |
|
"rewards/rejected": -10.490442276000977, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.8902170493865995, |
|
"grad_norm": 1.685403823852539, |
|
"learning_rate": 1.8134400242634214e-07, |
|
"logits/chosen": 1.3578906059265137, |
|
"logits/rejected": 1.2923089265823364, |
|
"logps/chosen": -559.1904296875, |
|
"logps/rejected": -1093.1944580078125, |
|
"loss": 0.066, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -5.261031150817871, |
|
"rewards/margins": 5.333484649658203, |
|
"rewards/rejected": -10.59451675415039, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.8933626926706512, |
|
"grad_norm": 3.112783670425415, |
|
"learning_rate": 1.7121928803231714e-07, |
|
"logits/chosen": 1.6115779876708984, |
|
"logits/rejected": 1.3464069366455078, |
|
"logps/chosen": -504.38385009765625, |
|
"logps/rejected": -1135.041259765625, |
|
"loss": 0.054, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -4.714786052703857, |
|
"rewards/margins": 6.264063835144043, |
|
"rewards/rejected": -10.978850364685059, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.8965083359547027, |
|
"grad_norm": 1.6297857761383057, |
|
"learning_rate": 1.613753679213581e-07, |
|
"logits/chosen": 1.4668216705322266, |
|
"logits/rejected": 1.3703067302703857, |
|
"logps/chosen": -546.5037841796875, |
|
"logps/rejected": -1063.0972900390625, |
|
"loss": 0.0533, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -5.121490478515625, |
|
"rewards/margins": 5.16286563873291, |
|
"rewards/rejected": -10.284356117248535, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.8996539792387543, |
|
"grad_norm": 0.9355494379997253, |
|
"learning_rate": 1.5181342903067803e-07, |
|
"logits/chosen": 1.5894020795822144, |
|
"logits/rejected": 1.4288842678070068, |
|
"logps/chosen": -475.189453125, |
|
"logps/rejected": -1045.166259765625, |
|
"loss": 0.0595, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.432465553283691, |
|
"rewards/margins": 5.66586446762085, |
|
"rewards/rejected": -10.098329544067383, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.9027996225228059, |
|
"grad_norm": 3.7816286087036133, |
|
"learning_rate": 1.4253462429741877e-07, |
|
"logits/chosen": 1.426163673400879, |
|
"logits/rejected": 1.2298691272735596, |
|
"logps/chosen": -501.76739501953125, |
|
"logps/rejected": -1122.6923828125, |
|
"loss": 0.0571, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -4.693778038024902, |
|
"rewards/margins": 6.179869651794434, |
|
"rewards/rejected": -10.873647689819336, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.9059452658068575, |
|
"grad_norm": 3.62336802482605, |
|
"learning_rate": 1.335400725196309e-07, |
|
"logits/chosen": 1.4734015464782715, |
|
"logits/rejected": 1.3798444271087646, |
|
"logps/chosen": -544.997314453125, |
|
"logps/rejected": -1142.9996337890625, |
|
"loss": 0.0782, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -5.112950325012207, |
|
"rewards/margins": 5.9460601806640625, |
|
"rewards/rejected": -11.05901050567627, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.9090909090909091, |
|
"grad_norm": 3.0453686714172363, |
|
"learning_rate": 1.2483085822137752e-07, |
|
"logits/chosen": 1.3678343296051025, |
|
"logits/rejected": 1.3175466060638428, |
|
"logps/chosen": -511.88134765625, |
|
"logps/rejected": -1121.2176513671875, |
|
"loss": 0.0579, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -4.770578861236572, |
|
"rewards/margins": 6.093011379241943, |
|
"rewards/rejected": -10.863590240478516, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.9122365523749607, |
|
"grad_norm": 2.422604560852051, |
|
"learning_rate": 1.16408031521964e-07, |
|
"logits/chosen": 1.3921037912368774, |
|
"logits/rejected": 1.2343624830245972, |
|
"logps/chosen": -517.9652099609375, |
|
"logps/rejected": -1084.047119140625, |
|
"loss": 0.0702, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -4.85400390625, |
|
"rewards/margins": 5.625793933868408, |
|
"rewards/rejected": -10.479796409606934, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.9122365523749607, |
|
"eval_logits/chosen": 2.1647133827209473, |
|
"eval_logits/rejected": 1.969600796699524, |
|
"eval_logps/chosen": -502.4717712402344, |
|
"eval_logps/rejected": -1104.9371337890625, |
|
"eval_loss": 0.028775138780474663, |
|
"eval_rewards/accuracies": 0.9242537021636963, |
|
"eval_rewards/chosen": -4.697007656097412, |
|
"eval_rewards/margins": 5.993377208709717, |
|
"eval_rewards/rejected": -10.690384864807129, |
|
"eval_runtime": 215.1474, |
|
"eval_samples_per_second": 99.546, |
|
"eval_steps_per_second": 1.557, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.9153821956590122, |
|
"grad_norm": 2.0521950721740723, |
|
"learning_rate": 1.0827260800932132e-07, |
|
"logits/chosen": 1.2516539096832275, |
|
"logits/rejected": 1.1419090032577515, |
|
"logps/chosen": -541.690185546875, |
|
"logps/rejected": -1094.5318603515625, |
|
"loss": 0.0505, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -5.085227012634277, |
|
"rewards/margins": 5.53399658203125, |
|
"rewards/rejected": -10.619223594665527, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.9185278389430639, |
|
"grad_norm": 1.5800129175186157, |
|
"learning_rate": 1.0042556861754981e-07, |
|
"logits/chosen": 1.3491766452789307, |
|
"logits/rejected": 1.2649667263031006, |
|
"logps/chosen": -520.3859252929688, |
|
"logps/rejected": -1101.0565185546875, |
|
"loss": 0.0559, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -4.897294998168945, |
|
"rewards/margins": 5.766488075256348, |
|
"rewards/rejected": -10.663783073425293, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.9216734822271154, |
|
"grad_norm": 4.585078239440918, |
|
"learning_rate": 9.286785950864297e-08, |
|
"logits/chosen": 1.4685800075531006, |
|
"logits/rejected": 1.3016550540924072, |
|
"logps/chosen": -514.4509887695312, |
|
"logps/rejected": -1111.2257080078125, |
|
"loss": 0.0676, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.8128156661987305, |
|
"rewards/margins": 5.959506034851074, |
|
"rewards/rejected": -10.772321701049805, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.9248191255111671, |
|
"grad_norm": 2.5994062423706055, |
|
"learning_rate": 8.560039195840226e-08, |
|
"logits/chosen": 1.4919440746307373, |
|
"logits/rejected": 1.1589787006378174, |
|
"logps/chosen": -523.2716674804688, |
|
"logps/rejected": -1082.82275390625, |
|
"loss": 0.0663, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -4.89348840713501, |
|
"rewards/margins": 5.579774379730225, |
|
"rewards/rejected": -10.473262786865234, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.9279647687952186, |
|
"grad_norm": 1.1528620719909668, |
|
"learning_rate": 7.86240422465609e-08, |
|
"logits/chosen": 1.3870362043380737, |
|
"logits/rejected": 1.323654294013977, |
|
"logps/chosen": -536.1802978515625, |
|
"logps/rejected": -1112.9925537109375, |
|
"loss": 0.0643, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -5.029669761657715, |
|
"rewards/margins": 5.733022212982178, |
|
"rewards/rejected": -10.762693405151367, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.9311104120792703, |
|
"grad_norm": 2.2496023178100586, |
|
"learning_rate": 7.193965155112475e-08, |
|
"logits/chosen": 1.3429162502288818, |
|
"logits/rejected": 1.1649806499481201, |
|
"logps/chosen": -537.1536254882812, |
|
"logps/rejected": -1076.180908203125, |
|
"loss": 0.0602, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -5.0520339012146, |
|
"rewards/margins": 5.354105472564697, |
|
"rewards/rejected": -10.406140327453613, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.9342560553633218, |
|
"grad_norm": 1.6158769130706787, |
|
"learning_rate": 6.554802584694791e-08, |
|
"logits/chosen": 1.508937954902649, |
|
"logits/rejected": 1.3928695917129517, |
|
"logps/chosen": -454.51019287109375, |
|
"logps/rejected": -1097.5311279296875, |
|
"loss": 0.0531, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -4.223744869232178, |
|
"rewards/margins": 6.399988651275635, |
|
"rewards/rejected": -10.623734474182129, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.9374016986473734, |
|
"grad_norm": 4.760510444641113, |
|
"learning_rate": 5.9449935808549576e-08, |
|
"logits/chosen": 1.424422025680542, |
|
"logits/rejected": 1.188518762588501, |
|
"logps/chosen": -521.4863891601562, |
|
"logps/rejected": -1131.7774658203125, |
|
"loss": 0.0607, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -4.8670854568481445, |
|
"rewards/margins": 6.077136039733887, |
|
"rewards/rejected": -10.944222450256348, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.940547341931425, |
|
"grad_norm": 1.8301982879638672, |
|
"learning_rate": 5.3646116717191723e-08, |
|
"logits/chosen": 1.7392746210098267, |
|
"logits/rejected": 1.4524823427200317, |
|
"logps/chosen": -491.2704162597656, |
|
"logps/rejected": -1077.2618408203125, |
|
"loss": 0.0505, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -4.597735404968262, |
|
"rewards/margins": 5.812699794769287, |
|
"rewards/rejected": -10.41043472290039, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.9436929852154765, |
|
"grad_norm": 1.969045877456665, |
|
"learning_rate": 4.813726837222116e-08, |
|
"logits/chosen": 1.5177466869354248, |
|
"logits/rejected": 1.5681421756744385, |
|
"logps/chosen": -530.2708129882812, |
|
"logps/rejected": -1052.5723876953125, |
|
"loss": 0.0623, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.96749210357666, |
|
"rewards/margins": 5.2094526290893555, |
|
"rewards/rejected": -10.176944732666016, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.9436929852154765, |
|
"eval_logits/chosen": 2.143676996231079, |
|
"eval_logits/rejected": 1.9440104961395264, |
|
"eval_logps/chosen": -503.750732421875, |
|
"eval_logps/rejected": -1103.3302001953125, |
|
"eval_loss": 0.0286338459700346, |
|
"eval_rewards/accuracies": 0.9268656969070435, |
|
"eval_rewards/chosen": -4.709796905517578, |
|
"eval_rewards/margins": 5.964517116546631, |
|
"eval_rewards/rejected": -10.674314498901367, |
|
"eval_runtime": 215.2202, |
|
"eval_samples_per_second": 99.512, |
|
"eval_steps_per_second": 1.557, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.9468386284995282, |
|
"grad_norm": 4.614864826202393, |
|
"learning_rate": 4.292405500669061e-08, |
|
"logits/chosen": 1.4414303302764893, |
|
"logits/rejected": 1.157649278640747, |
|
"logps/chosen": -522.2523193359375, |
|
"logps/rejected": -1145.5069580078125, |
|
"loss": 0.0761, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -4.891526699066162, |
|
"rewards/margins": 6.2130513191223145, |
|
"rewards/rejected": -11.104578018188477, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.9499842717835797, |
|
"grad_norm": 2.128798246383667, |
|
"learning_rate": 3.8007105207268355e-08, |
|
"logits/chosen": 1.5095900297164917, |
|
"logits/rejected": 1.2341318130493164, |
|
"logps/chosen": -525.769775390625, |
|
"logps/rejected": -1126.2525634765625, |
|
"loss": 0.059, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.912005424499512, |
|
"rewards/margins": 5.982682228088379, |
|
"rewards/rejected": -10.894686698913574, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.9531299150676313, |
|
"grad_norm": 3.470418930053711, |
|
"learning_rate": 3.338701183844689e-08, |
|
"logits/chosen": 1.2871098518371582, |
|
"logits/rejected": 1.203657865524292, |
|
"logps/chosen": -469.0284118652344, |
|
"logps/rejected": -1091.468994140625, |
|
"loss": 0.0596, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -4.372101783752441, |
|
"rewards/margins": 6.188841819763184, |
|
"rewards/rejected": -10.560943603515625, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.9562755583516829, |
|
"grad_norm": 2.421786308288574, |
|
"learning_rate": 2.9064331971056515e-08, |
|
"logits/chosen": 1.432936668395996, |
|
"logits/rejected": 1.4242966175079346, |
|
"logps/chosen": -510.3072204589844, |
|
"logps/rejected": -1054.8065185546875, |
|
"loss": 0.0593, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.7712721824646, |
|
"rewards/margins": 5.450275421142578, |
|
"rewards/rejected": -10.221548080444336, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.9594212016357345, |
|
"grad_norm": 3.0846569538116455, |
|
"learning_rate": 2.503958681509683e-08, |
|
"logits/chosen": 1.566232681274414, |
|
"logits/rejected": 1.3942068815231323, |
|
"logps/chosen": -524.6204833984375, |
|
"logps/rejected": -1089.9156494140625, |
|
"loss": 0.0568, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.901736736297607, |
|
"rewards/margins": 5.649755954742432, |
|
"rewards/rejected": -10.551492691040039, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.9625668449197861, |
|
"grad_norm": 4.23090934753418, |
|
"learning_rate": 2.1313261656891737e-08, |
|
"logits/chosen": 1.4874424934387207, |
|
"logits/rejected": 1.4000155925750732, |
|
"logps/chosen": -557.3499145507812, |
|
"logps/rejected": -1110.4896240234375, |
|
"loss": 0.0655, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -5.248296737670898, |
|
"rewards/margins": 5.522107124328613, |
|
"rewards/rejected": -10.770402908325195, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.9657124882038377, |
|
"grad_norm": 1.998992681503296, |
|
"learning_rate": 1.788580580057514e-08, |
|
"logits/chosen": 1.43699049949646, |
|
"logits/rejected": 1.2418830394744873, |
|
"logps/chosen": -558.634765625, |
|
"logps/rejected": -1058.3809814453125, |
|
"loss": 0.0662, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -5.254110336303711, |
|
"rewards/margins": 4.97817850112915, |
|
"rewards/rejected": -10.23228931427002, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.9688581314878892, |
|
"grad_norm": 3.4238898754119873, |
|
"learning_rate": 1.4757632513916764e-08, |
|
"logits/chosen": 1.4360311031341553, |
|
"logits/rejected": 1.2258044481277466, |
|
"logps/chosen": -472.9986267089844, |
|
"logps/rejected": -1088.400634765625, |
|
"loss": 0.0615, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -4.3953752517700195, |
|
"rewards/margins": 6.134710788726807, |
|
"rewards/rejected": -10.530085563659668, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.9720037747719409, |
|
"grad_norm": 2.3061940670013428, |
|
"learning_rate": 1.1929118978490361e-08, |
|
"logits/chosen": 1.4735639095306396, |
|
"logits/rejected": 1.3414032459259033, |
|
"logps/chosen": -499.18719482421875, |
|
"logps/rejected": -1091.6337890625, |
|
"loss": 0.0589, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.663874626159668, |
|
"rewards/margins": 5.900238990783691, |
|
"rewards/rejected": -10.564112663269043, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.9751494180559924, |
|
"grad_norm": 3.2079808712005615, |
|
"learning_rate": 9.400606244196753e-09, |
|
"logits/chosen": 1.5242373943328857, |
|
"logits/rejected": 1.161348581314087, |
|
"logps/chosen": -513.7073364257812, |
|
"logps/rejected": -1092.349365234375, |
|
"loss": 0.0593, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.811236381530762, |
|
"rewards/margins": 5.742733955383301, |
|
"rewards/rejected": -10.553971290588379, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.9751494180559924, |
|
"eval_logits/chosen": 2.146430015563965, |
|
"eval_logits/rejected": 1.9468775987625122, |
|
"eval_logps/chosen": -502.61627197265625, |
|
"eval_logps/rejected": -1101.212158203125, |
|
"eval_loss": 0.02869391068816185, |
|
"eval_rewards/accuracies": 0.9276119470596313, |
|
"eval_rewards/chosen": -4.698452949523926, |
|
"eval_rewards/margins": 5.95468282699585, |
|
"eval_rewards/rejected": -10.653135299682617, |
|
"eval_runtime": 215.2746, |
|
"eval_samples_per_second": 99.487, |
|
"eval_steps_per_second": 1.556, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.9782950613400441, |
|
"grad_norm": 3.9912333488464355, |
|
"learning_rate": 7.172399188140611e-09, |
|
"logits/chosen": 1.643225908279419, |
|
"logits/rejected": 1.293428659439087, |
|
"logps/chosen": -533.8253784179688, |
|
"logps/rejected": -1149.5816650390625, |
|
"loss": 0.0602, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -5.007025718688965, |
|
"rewards/margins": 6.1268744468688965, |
|
"rewards/rejected": -11.13390064239502, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.9814407046240956, |
|
"grad_norm": 1.301156997680664, |
|
"learning_rate": 5.244766477869034e-09, |
|
"logits/chosen": 1.4164081811904907, |
|
"logits/rejected": 1.1432323455810547, |
|
"logps/chosen": -517.7205200195312, |
|
"logps/rejected": -1175.121826171875, |
|
"loss": 0.0563, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.851759910583496, |
|
"rewards/margins": 6.547132968902588, |
|
"rewards/rejected": -11.398893356323242, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.9845863479081473, |
|
"grad_norm": 3.761977195739746, |
|
"learning_rate": 3.617940538978848e-09, |
|
"logits/chosen": 1.3819186687469482, |
|
"logits/rejected": 1.1908156871795654, |
|
"logps/chosen": -517.9227294921875, |
|
"logps/rejected": -1027.6142578125, |
|
"loss": 0.0452, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -4.846681594848633, |
|
"rewards/margins": 5.071451187133789, |
|
"rewards/rejected": -9.918134689331055, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.9877319911921988, |
|
"grad_norm": 2.8583881855010986, |
|
"learning_rate": 2.2921175270890217e-09, |
|
"logits/chosen": 1.5854167938232422, |
|
"logits/rejected": 1.247761845588684, |
|
"logps/chosen": -521.75, |
|
"logps/rejected": -1083.730224609375, |
|
"loss": 0.0674, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.886361122131348, |
|
"rewards/margins": 5.578226089477539, |
|
"rewards/rejected": -10.464587211608887, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.9908776344762504, |
|
"grad_norm": 1.9092971086502075, |
|
"learning_rate": 1.2674573041909776e-09, |
|
"logits/chosen": 1.2872904539108276, |
|
"logits/rejected": 1.259684443473816, |
|
"logps/chosen": -520.8709106445312, |
|
"logps/rejected": -1122.4615478515625, |
|
"loss": 0.0591, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.865385055541992, |
|
"rewards/margins": 6.030797958374023, |
|
"rewards/rejected": -10.896183967590332, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.994023277760302, |
|
"grad_norm": 4.959075927734375, |
|
"learning_rate": 5.440834193726208e-10, |
|
"logits/chosen": 1.6272627115249634, |
|
"logits/rejected": 1.4106671810150146, |
|
"logps/chosen": -505.4927673339844, |
|
"logps/rejected": -1114.03173828125, |
|
"loss": 0.0573, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -4.746209144592285, |
|
"rewards/margins": 6.043259620666504, |
|
"rewards/rejected": -10.789468765258789, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.9971689210443536, |
|
"grad_norm": 3.2813379764556885, |
|
"learning_rate": 1.2208309392081064e-10, |
|
"logits/chosen": 1.2191925048828125, |
|
"logits/rejected": 1.0304396152496338, |
|
"logps/chosen": -555.1168212890625, |
|
"logps/rejected": -1115.899169921875, |
|
"loss": 0.0633, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -5.212588310241699, |
|
"rewards/margins": 5.577348709106445, |
|
"rewards/rejected": -10.789937973022461, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 3179, |
|
"total_flos": 0.0, |
|
"train_loss": 0.19036180805619987, |
|
"train_runtime": 15997.0818, |
|
"train_samples_per_second": 25.436, |
|
"train_steps_per_second": 0.199 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3179, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|