|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9997172745264349, |
|
"eval_steps": 500, |
|
"global_step": 442, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0022618037885213456, |
|
"grad_norm": 131.355215075171, |
|
"learning_rate": 1.7777777777777777e-08, |
|
"logits/chosen": -11.149957656860352, |
|
"logits/rejected": -11.106039047241211, |
|
"logps/chosen": -0.4639046788215637, |
|
"logps/rejected": -0.459951788187027, |
|
"loss": 4.9394, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -4.639046669006348, |
|
"rewards/margins": -0.039528995752334595, |
|
"rewards/rejected": -4.599517822265625, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.004523607577042691, |
|
"grad_norm": 68.75410369813167, |
|
"learning_rate": 3.5555555555555554e-08, |
|
"logits/chosen": -10.890952110290527, |
|
"logits/rejected": -10.69871711730957, |
|
"logps/chosen": -0.5820316672325134, |
|
"logps/rejected": -0.5644893646240234, |
|
"loss": 5.3979, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -5.820316314697266, |
|
"rewards/margins": -0.17542320489883423, |
|
"rewards/rejected": -5.644893646240234, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.006785411365564037, |
|
"grad_norm": 98.10181248017503, |
|
"learning_rate": 5.333333333333333e-08, |
|
"logits/chosen": -10.386991500854492, |
|
"logits/rejected": -10.3389892578125, |
|
"logps/chosen": -0.7467580437660217, |
|
"logps/rejected": -0.7350905537605286, |
|
"loss": 5.3645, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -7.467580318450928, |
|
"rewards/margins": -0.11667439341545105, |
|
"rewards/rejected": -7.350905418395996, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.009047215154085382, |
|
"grad_norm": 85.65000023027866, |
|
"learning_rate": 7.111111111111111e-08, |
|
"logits/chosen": -10.73530387878418, |
|
"logits/rejected": -10.564247131347656, |
|
"logps/chosen": -0.5459556579589844, |
|
"logps/rejected": -0.4875364899635315, |
|
"loss": 5.1509, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -5.459556579589844, |
|
"rewards/margins": -0.584191620349884, |
|
"rewards/rejected": -4.875364780426025, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01130901894260673, |
|
"grad_norm": 74.9791226991399, |
|
"learning_rate": 8.888888888888888e-08, |
|
"logits/chosen": -10.528997421264648, |
|
"logits/rejected": -9.97251033782959, |
|
"logps/chosen": -0.6336177587509155, |
|
"logps/rejected": -0.6410748958587646, |
|
"loss": 5.2521, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -6.336178302764893, |
|
"rewards/margins": 0.07457125931978226, |
|
"rewards/rejected": -6.410749435424805, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.013570822731128074, |
|
"grad_norm": 85.29375010182012, |
|
"learning_rate": 1.0666666666666666e-07, |
|
"logits/chosen": -10.442211151123047, |
|
"logits/rejected": -10.494767189025879, |
|
"logps/chosen": -0.5163740515708923, |
|
"logps/rejected": -0.5633006691932678, |
|
"loss": 5.3068, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -5.163741111755371, |
|
"rewards/margins": 0.46926558017730713, |
|
"rewards/rejected": -5.6330060958862305, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.01583262651964942, |
|
"grad_norm": 72.34857602748069, |
|
"learning_rate": 1.2444444444444443e-07, |
|
"logits/chosen": -10.570417404174805, |
|
"logits/rejected": -10.182153701782227, |
|
"logps/chosen": -0.49319958686828613, |
|
"logps/rejected": -0.5546016693115234, |
|
"loss": 4.7171, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -4.931995868682861, |
|
"rewards/margins": 0.6140204668045044, |
|
"rewards/rejected": -5.546016693115234, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.018094430308170765, |
|
"grad_norm": 96.60351151716078, |
|
"learning_rate": 1.4222222222222222e-07, |
|
"logits/chosen": -11.686549186706543, |
|
"logits/rejected": -11.599323272705078, |
|
"logps/chosen": -0.654168963432312, |
|
"logps/rejected": -0.7179521322250366, |
|
"loss": 5.2239, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -6.541690349578857, |
|
"rewards/margins": 0.6378321051597595, |
|
"rewards/rejected": -7.179522514343262, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.020356234096692113, |
|
"grad_norm": 69.08528348151097, |
|
"learning_rate": 1.6e-07, |
|
"logits/chosen": -10.81006908416748, |
|
"logits/rejected": -10.815924644470215, |
|
"logps/chosen": -0.6012924313545227, |
|
"logps/rejected": -0.6476706266403198, |
|
"loss": 5.1255, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -6.0129241943359375, |
|
"rewards/margins": 0.46378093957901, |
|
"rewards/rejected": -6.476705074310303, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.02261803788521346, |
|
"grad_norm": 98.9910788377654, |
|
"learning_rate": 1.7777777777777776e-07, |
|
"logits/chosen": -11.031579971313477, |
|
"logits/rejected": -10.416993141174316, |
|
"logps/chosen": -0.534875750541687, |
|
"logps/rejected": -0.5605251789093018, |
|
"loss": 5.0563, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -5.348757743835449, |
|
"rewards/margins": 0.25649386644363403, |
|
"rewards/rejected": -5.605251789093018, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.024879841673734804, |
|
"grad_norm": 92.47375199683604, |
|
"learning_rate": 1.9555555555555555e-07, |
|
"logits/chosen": -11.211597442626953, |
|
"logits/rejected": -10.974644660949707, |
|
"logps/chosen": -0.5139535665512085, |
|
"logps/rejected": -0.5967007279396057, |
|
"loss": 4.9212, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -5.139535903930664, |
|
"rewards/margins": 0.8274715542793274, |
|
"rewards/rejected": -5.967007637023926, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.02714164546225615, |
|
"grad_norm": 109.2539243326917, |
|
"learning_rate": 2.133333333333333e-07, |
|
"logits/chosen": -10.3906831741333, |
|
"logits/rejected": -10.407954216003418, |
|
"logps/chosen": -0.5846738815307617, |
|
"logps/rejected": -0.5586296319961548, |
|
"loss": 5.2254, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -5.846738338470459, |
|
"rewards/margins": -0.26044273376464844, |
|
"rewards/rejected": -5.586296081542969, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.029403449250777494, |
|
"grad_norm": 93.26949602496981, |
|
"learning_rate": 2.3111111111111107e-07, |
|
"logits/chosen": -11.434279441833496, |
|
"logits/rejected": -11.00756549835205, |
|
"logps/chosen": -0.57530277967453, |
|
"logps/rejected": -0.5521742105484009, |
|
"loss": 5.3681, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -5.753026962280273, |
|
"rewards/margins": -0.23128610849380493, |
|
"rewards/rejected": -5.52174186706543, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.03166525303929884, |
|
"grad_norm": 54.76880243693634, |
|
"learning_rate": 2.4888888888888886e-07, |
|
"logits/chosen": -11.06928825378418, |
|
"logits/rejected": -10.667929649353027, |
|
"logps/chosen": -0.49921348690986633, |
|
"logps/rejected": -0.5616152882575989, |
|
"loss": 4.7488, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -4.992135047912598, |
|
"rewards/margins": 0.6240180134773254, |
|
"rewards/rejected": -5.616153240203857, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.033927056827820185, |
|
"grad_norm": 71.5182395693498, |
|
"learning_rate": 2.666666666666666e-07, |
|
"logits/chosen": -11.895730972290039, |
|
"logits/rejected": -11.64004135131836, |
|
"logps/chosen": -0.49031415581703186, |
|
"logps/rejected": -0.5405735373497009, |
|
"loss": 4.9109, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -4.903141498565674, |
|
"rewards/margins": 0.5025936961174011, |
|
"rewards/rejected": -5.405735015869141, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03618886061634153, |
|
"grad_norm": 79.7034591294439, |
|
"learning_rate": 2.8444444444444443e-07, |
|
"logits/chosen": -10.60659122467041, |
|
"logits/rejected": -10.282760620117188, |
|
"logps/chosen": -0.6062531471252441, |
|
"logps/rejected": -0.5618928670883179, |
|
"loss": 5.1619, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -6.062531471252441, |
|
"rewards/margins": -0.44360262155532837, |
|
"rewards/rejected": -5.618928909301758, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.038450664404862875, |
|
"grad_norm": 87.84593204498888, |
|
"learning_rate": 3.022222222222222e-07, |
|
"logits/chosen": -12.490971565246582, |
|
"logits/rejected": -12.19153881072998, |
|
"logps/chosen": -0.41767269372940063, |
|
"logps/rejected": -0.40474578738212585, |
|
"loss": 5.3782, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -4.176727294921875, |
|
"rewards/margins": -0.12926939129829407, |
|
"rewards/rejected": -4.047458171844482, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.04071246819338423, |
|
"grad_norm": 110.95316335628588, |
|
"learning_rate": 3.2e-07, |
|
"logits/chosen": -11.399320602416992, |
|
"logits/rejected": -11.420541763305664, |
|
"logps/chosen": -0.6453328728675842, |
|
"logps/rejected": -0.6450071334838867, |
|
"loss": 5.0468, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -6.453329086303711, |
|
"rewards/margins": -0.0032582059502601624, |
|
"rewards/rejected": -6.450070381164551, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.04297427198190557, |
|
"grad_norm": 86.46973948808649, |
|
"learning_rate": 3.3777777777777777e-07, |
|
"logits/chosen": -12.149145126342773, |
|
"logits/rejected": -12.085639953613281, |
|
"logps/chosen": -0.49516329169273376, |
|
"logps/rejected": -0.41885480284690857, |
|
"loss": 5.052, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": -4.951632499694824, |
|
"rewards/margins": -0.7630849480628967, |
|
"rewards/rejected": -4.188547611236572, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.04523607577042692, |
|
"grad_norm": 71.36928249846149, |
|
"learning_rate": 3.5555555555555553e-07, |
|
"logits/chosen": -10.735100746154785, |
|
"logits/rejected": -10.853598594665527, |
|
"logps/chosen": -0.5569137930870056, |
|
"logps/rejected": -0.5804443955421448, |
|
"loss": 4.6835, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -5.5691375732421875, |
|
"rewards/margins": 0.2353065013885498, |
|
"rewards/rejected": -5.804443836212158, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04749787955894826, |
|
"grad_norm": 61.042781292962395, |
|
"learning_rate": 3.7333333333333334e-07, |
|
"logits/chosen": -11.995122909545898, |
|
"logits/rejected": -11.363445281982422, |
|
"logps/chosen": -0.4467337131500244, |
|
"logps/rejected": -0.4861924648284912, |
|
"loss": 5.0424, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -4.467337131500244, |
|
"rewards/margins": 0.3945879340171814, |
|
"rewards/rejected": -4.86192512512207, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.04975968334746961, |
|
"grad_norm": 89.07868369848414, |
|
"learning_rate": 3.911111111111111e-07, |
|
"logits/chosen": -11.574544906616211, |
|
"logits/rejected": -11.458039283752441, |
|
"logps/chosen": -0.5010548233985901, |
|
"logps/rejected": -0.5385463833808899, |
|
"loss": 5.0061, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -5.010547637939453, |
|
"rewards/margins": 0.37491610646247864, |
|
"rewards/rejected": -5.385463714599609, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.05202148713599095, |
|
"grad_norm": 169.3436229453717, |
|
"learning_rate": 4.0888888888888886e-07, |
|
"logits/chosen": -10.919013977050781, |
|
"logits/rejected": -10.827438354492188, |
|
"logps/chosen": -0.5667375326156616, |
|
"logps/rejected": -0.5423346161842346, |
|
"loss": 4.978, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -5.667375564575195, |
|
"rewards/margins": -0.2440294474363327, |
|
"rewards/rejected": -5.423345565795898, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0542832909245123, |
|
"grad_norm": 58.12255640669961, |
|
"learning_rate": 4.266666666666666e-07, |
|
"logits/chosen": -12.290349960327148, |
|
"logits/rejected": -12.1292142868042, |
|
"logps/chosen": -0.3345947861671448, |
|
"logps/rejected": -0.37946847081184387, |
|
"loss": 4.7173, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -3.3459479808807373, |
|
"rewards/margins": 0.44873636960983276, |
|
"rewards/rejected": -3.794684410095215, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.05654509471303364, |
|
"grad_norm": 93.15623268640158, |
|
"learning_rate": 4.4444444444444444e-07, |
|
"logits/chosen": -11.1387939453125, |
|
"logits/rejected": -10.918662071228027, |
|
"logps/chosen": -0.48852428793907166, |
|
"logps/rejected": -0.5354989767074585, |
|
"loss": 4.8657, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -4.8852434158325195, |
|
"rewards/margins": 0.46974682807922363, |
|
"rewards/rejected": -5.354990005493164, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.05880689850155499, |
|
"grad_norm": 64.22760084086403, |
|
"learning_rate": 4.6222222222222214e-07, |
|
"logits/chosen": -11.181513786315918, |
|
"logits/rejected": -10.725030899047852, |
|
"logps/chosen": -0.43731987476348877, |
|
"logps/rejected": -0.48887380957603455, |
|
"loss": 4.7432, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -4.373198986053467, |
|
"rewards/margins": 0.5155391097068787, |
|
"rewards/rejected": -4.88873815536499, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.061068702290076333, |
|
"grad_norm": 86.04795850026115, |
|
"learning_rate": 4.8e-07, |
|
"logits/chosen": -10.508721351623535, |
|
"logits/rejected": -10.471704483032227, |
|
"logps/chosen": -0.4588507413864136, |
|
"logps/rejected": -0.5172092914581299, |
|
"loss": 4.8766, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -4.588507175445557, |
|
"rewards/margins": 0.5835859775543213, |
|
"rewards/rejected": -5.172093391418457, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.06333050607859768, |
|
"grad_norm": 74.38153566770265, |
|
"learning_rate": 4.977777777777777e-07, |
|
"logits/chosen": -10.403984069824219, |
|
"logits/rejected": -10.611076354980469, |
|
"logps/chosen": -0.47997862100601196, |
|
"logps/rejected": -0.4639643728733063, |
|
"loss": 4.9471, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -4.79978609085083, |
|
"rewards/margins": -0.16014233231544495, |
|
"rewards/rejected": -4.639643669128418, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.06559230986711903, |
|
"grad_norm": 71.67345566558542, |
|
"learning_rate": 5.155555555555556e-07, |
|
"logits/chosen": -11.623980522155762, |
|
"logits/rejected": -11.187301635742188, |
|
"logps/chosen": -0.47259610891342163, |
|
"logps/rejected": -0.4480085074901581, |
|
"loss": 4.9723, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -4.725960731506348, |
|
"rewards/margins": -0.24587592482566833, |
|
"rewards/rejected": -4.480085372924805, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.06785411365564037, |
|
"grad_norm": 67.06827734638706, |
|
"learning_rate": 5.333333333333332e-07, |
|
"logits/chosen": -11.207446098327637, |
|
"logits/rejected": -10.807114601135254, |
|
"logps/chosen": -0.37271052598953247, |
|
"logps/rejected": -0.4592744708061218, |
|
"loss": 4.7976, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -3.7271053791046143, |
|
"rewards/margins": 0.8656396865844727, |
|
"rewards/rejected": -4.592744827270508, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07011591744416172, |
|
"grad_norm": 72.20873132579756, |
|
"learning_rate": 5.511111111111111e-07, |
|
"logits/chosen": -10.57562255859375, |
|
"logits/rejected": -10.428007125854492, |
|
"logps/chosen": -0.43888184428215027, |
|
"logps/rejected": -0.464484840631485, |
|
"loss": 4.8315, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -4.388818264007568, |
|
"rewards/margins": 0.25603026151657104, |
|
"rewards/rejected": -4.644848823547363, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.07237772123268306, |
|
"grad_norm": 65.66194705402224, |
|
"learning_rate": 5.688888888888889e-07, |
|
"logits/chosen": -10.86208724975586, |
|
"logits/rejected": -10.499285697937012, |
|
"logps/chosen": -0.4337802529335022, |
|
"logps/rejected": -0.4744107127189636, |
|
"loss": 5.0868, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -4.337802886962891, |
|
"rewards/margins": 0.4063045084476471, |
|
"rewards/rejected": -4.744107246398926, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.07463952502120441, |
|
"grad_norm": 69.10527972763198, |
|
"learning_rate": 5.866666666666666e-07, |
|
"logits/chosen": -10.774530410766602, |
|
"logits/rejected": -10.699942588806152, |
|
"logps/chosen": -0.4071800112724304, |
|
"logps/rejected": -0.4233216643333435, |
|
"loss": 4.9605, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -4.0717997550964355, |
|
"rewards/margins": 0.1614171266555786, |
|
"rewards/rejected": -4.233217239379883, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.07690132880972575, |
|
"grad_norm": 64.69922555278133, |
|
"learning_rate": 6.044444444444444e-07, |
|
"logits/chosen": -10.982305526733398, |
|
"logits/rejected": -10.901609420776367, |
|
"logps/chosen": -0.37619659304618835, |
|
"logps/rejected": -0.4010980725288391, |
|
"loss": 4.7433, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -3.7619662284851074, |
|
"rewards/margins": 0.2490149885416031, |
|
"rewards/rejected": -4.01098108291626, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.0791631325982471, |
|
"grad_norm": 64.60258006742473, |
|
"learning_rate": 6.222222222222223e-07, |
|
"logits/chosen": -10.168670654296875, |
|
"logits/rejected": -10.236058235168457, |
|
"logps/chosen": -0.46076497435569763, |
|
"logps/rejected": -0.4832019805908203, |
|
"loss": 4.6019, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -4.607649803161621, |
|
"rewards/margins": 0.22437021136283875, |
|
"rewards/rejected": -4.832019805908203, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08142493638676845, |
|
"grad_norm": 66.16525403071286, |
|
"learning_rate": 6.4e-07, |
|
"logits/chosen": -10.984823226928711, |
|
"logits/rejected": -10.869633674621582, |
|
"logps/chosen": -0.4266025125980377, |
|
"logps/rejected": -0.44316184520721436, |
|
"loss": 4.6435, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -4.266025066375732, |
|
"rewards/margins": 0.16559378802776337, |
|
"rewards/rejected": -4.431619167327881, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.08368674017528979, |
|
"grad_norm": 62.62054800184612, |
|
"learning_rate": 6.577777777777777e-07, |
|
"logits/chosen": -11.466747283935547, |
|
"logits/rejected": -10.947083473205566, |
|
"logps/chosen": -0.4133344292640686, |
|
"logps/rejected": -0.46087807416915894, |
|
"loss": 4.6937, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -4.1333441734313965, |
|
"rewards/margins": 0.47543561458587646, |
|
"rewards/rejected": -4.608780384063721, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.08594854396381114, |
|
"grad_norm": 68.71159791998826, |
|
"learning_rate": 6.755555555555555e-07, |
|
"logits/chosen": -10.58063793182373, |
|
"logits/rejected": -10.66105842590332, |
|
"logps/chosen": -0.4248150587081909, |
|
"logps/rejected": -0.4461151957511902, |
|
"loss": 4.6967, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": -4.248150825500488, |
|
"rewards/margins": 0.2130012959241867, |
|
"rewards/rejected": -4.461152076721191, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.08821034775233248, |
|
"grad_norm": 70.5941913597691, |
|
"learning_rate": 6.933333333333333e-07, |
|
"logits/chosen": -11.376758575439453, |
|
"logits/rejected": -11.398736953735352, |
|
"logps/chosen": -0.47147077322006226, |
|
"logps/rejected": -0.4626140296459198, |
|
"loss": 4.6912, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -4.714707374572754, |
|
"rewards/margins": -0.08856695890426636, |
|
"rewards/rejected": -4.626140594482422, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.09047215154085383, |
|
"grad_norm": 68.46057335163108, |
|
"learning_rate": 7.111111111111111e-07, |
|
"logits/chosen": -11.606950759887695, |
|
"logits/rejected": -11.105400085449219, |
|
"logps/chosen": -0.39962151646614075, |
|
"logps/rejected": -0.4578825831413269, |
|
"loss": 4.5643, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -3.9962148666381836, |
|
"rewards/margins": 0.5826107263565063, |
|
"rewards/rejected": -4.578825950622559, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09273395532937517, |
|
"grad_norm": 48.972841115050805, |
|
"learning_rate": 7.288888888888888e-07, |
|
"logits/chosen": -11.43770980834961, |
|
"logits/rejected": -11.56243896484375, |
|
"logps/chosen": -0.41414573788642883, |
|
"logps/rejected": -0.4131737947463989, |
|
"loss": 4.6472, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -4.141457557678223, |
|
"rewards/margins": -0.009719468653202057, |
|
"rewards/rejected": -4.13173770904541, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.09499575911789652, |
|
"grad_norm": 143.02756863226023, |
|
"learning_rate": 7.466666666666667e-07, |
|
"logits/chosen": -11.282739639282227, |
|
"logits/rejected": -10.897704124450684, |
|
"logps/chosen": -0.4278091490268707, |
|
"logps/rejected": -0.4735082983970642, |
|
"loss": 4.8335, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -4.278091907501221, |
|
"rewards/margins": 0.4569913148880005, |
|
"rewards/rejected": -4.735082626342773, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.09725756290641786, |
|
"grad_norm": 74.71400905339502, |
|
"learning_rate": 7.644444444444444e-07, |
|
"logits/chosen": -10.040319442749023, |
|
"logits/rejected": -9.910164833068848, |
|
"logps/chosen": -0.5288741588592529, |
|
"logps/rejected": -0.5330761671066284, |
|
"loss": 4.9162, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -5.2887420654296875, |
|
"rewards/margins": 0.04202008247375488, |
|
"rewards/rejected": -5.330761909484863, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.09951936669493922, |
|
"grad_norm": 81.89498458784372, |
|
"learning_rate": 7.822222222222222e-07, |
|
"logits/chosen": -11.844489097595215, |
|
"logits/rejected": -11.597496032714844, |
|
"logps/chosen": -0.3373556435108185, |
|
"logps/rejected": -0.42685818672180176, |
|
"loss": 4.4339, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -3.373556613922119, |
|
"rewards/margins": 0.8950251340866089, |
|
"rewards/rejected": -4.268581390380859, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.10178117048346055, |
|
"grad_norm": 70.48086533057375, |
|
"learning_rate": 8e-07, |
|
"logits/chosen": -10.962928771972656, |
|
"logits/rejected": -10.9669771194458, |
|
"logps/chosen": -0.40894240140914917, |
|
"logps/rejected": -0.47473400831222534, |
|
"loss": 4.6343, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -4.089423656463623, |
|
"rewards/margins": 0.657916784286499, |
|
"rewards/rejected": -4.747340679168701, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.1040429742719819, |
|
"grad_norm": 68.45670029697006, |
|
"learning_rate": 7.999874759018868e-07, |
|
"logits/chosen": -10.595868110656738, |
|
"logits/rejected": -10.306282043457031, |
|
"logps/chosen": -0.463877409696579, |
|
"logps/rejected": -0.5967152118682861, |
|
"loss": 4.6017, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -4.6387739181518555, |
|
"rewards/margins": 1.3283770084381104, |
|
"rewards/rejected": -5.967151165008545, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.10630477806050326, |
|
"grad_norm": 48.95212367492393, |
|
"learning_rate": 7.999499043918123e-07, |
|
"logits/chosen": -12.154573440551758, |
|
"logits/rejected": -12.19536304473877, |
|
"logps/chosen": -0.45791739225387573, |
|
"logps/rejected": -0.5772292017936707, |
|
"loss": 4.7494, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -4.579174041748047, |
|
"rewards/margins": 1.1931182146072388, |
|
"rewards/rejected": -5.772292137145996, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.1085665818490246, |
|
"grad_norm": 78.70326465142523, |
|
"learning_rate": 7.998872878225228e-07, |
|
"logits/chosen": -11.652605056762695, |
|
"logits/rejected": -11.418684005737305, |
|
"logps/chosen": -0.4871661365032196, |
|
"logps/rejected": -0.5542778372764587, |
|
"loss": 4.8017, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -4.871661186218262, |
|
"rewards/margins": 0.6711173057556152, |
|
"rewards/rejected": -5.542778491973877, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.11082838563754595, |
|
"grad_norm": 52.90074896212443, |
|
"learning_rate": 7.997996301150987e-07, |
|
"logits/chosen": -12.08781623840332, |
|
"logits/rejected": -11.528596878051758, |
|
"logps/chosen": -0.4144824743270874, |
|
"logps/rejected": -0.5010120868682861, |
|
"loss": 4.6588, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -4.144824981689453, |
|
"rewards/margins": 0.8652949929237366, |
|
"rewards/rejected": -5.010120391845703, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.11309018942606729, |
|
"grad_norm": 94.26111558590127, |
|
"learning_rate": 7.996869367587088e-07, |
|
"logits/chosen": -11.582418441772461, |
|
"logits/rejected": -10.963386535644531, |
|
"logps/chosen": -0.4467932879924774, |
|
"logps/rejected": -0.4817226529121399, |
|
"loss": 4.7057, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -4.46793270111084, |
|
"rewards/margins": 0.34929385781288147, |
|
"rewards/rejected": -4.817226409912109, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11535199321458864, |
|
"grad_norm": 49.386260226236786, |
|
"learning_rate": 7.99549214810266e-07, |
|
"logits/chosen": -10.763232231140137, |
|
"logits/rejected": -10.5509672164917, |
|
"logps/chosen": -0.5276934504508972, |
|
"logps/rejected": -0.5668250322341919, |
|
"loss": 4.505, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -5.276934623718262, |
|
"rewards/margins": 0.39131537079811096, |
|
"rewards/rejected": -5.668249607086182, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.11761379700310998, |
|
"grad_norm": 75.77039924958046, |
|
"learning_rate": 7.993864728939867e-07, |
|
"logits/chosen": -10.996638298034668, |
|
"logits/rejected": -11.125421524047852, |
|
"logps/chosen": -0.4168677031993866, |
|
"logps/rejected": -0.4338124394416809, |
|
"loss": 4.8877, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -4.16867733001709, |
|
"rewards/margins": 0.1694469451904297, |
|
"rewards/rejected": -4.3381242752075195, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.11987560079163133, |
|
"grad_norm": 128.1604406398008, |
|
"learning_rate": 7.991987212008491e-07, |
|
"logits/chosen": -10.845922470092773, |
|
"logits/rejected": -10.981675148010254, |
|
"logps/chosen": -0.5582807660102844, |
|
"logps/rejected": -0.5974184274673462, |
|
"loss": 4.4079, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -5.582807540893555, |
|
"rewards/margins": 0.3913762867450714, |
|
"rewards/rejected": -5.974184036254883, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.12213740458015267, |
|
"grad_norm": 58.15165845926177, |
|
"learning_rate": 7.989859714879565e-07, |
|
"logits/chosen": -10.547262191772461, |
|
"logits/rejected": -10.480108261108398, |
|
"logps/chosen": -0.5517194271087646, |
|
"logps/rejected": -0.6646666526794434, |
|
"loss": 4.9804, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -5.5171942710876465, |
|
"rewards/margins": 1.1294726133346558, |
|
"rewards/rejected": -6.646667003631592, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.12439920836867402, |
|
"grad_norm": 48.309919673308315, |
|
"learning_rate": 7.987482370778005e-07, |
|
"logits/chosen": -11.610102653503418, |
|
"logits/rejected": -11.80359935760498, |
|
"logps/chosen": -0.5112394690513611, |
|
"logps/rejected": -0.5051109790802002, |
|
"loss": 4.7434, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -5.112394332885742, |
|
"rewards/margins": -0.061284855008125305, |
|
"rewards/rejected": -5.051109790802002, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.12666101215719536, |
|
"grad_norm": 92.15090562707765, |
|
"learning_rate": 7.984855328574262e-07, |
|
"logits/chosen": -11.098040580749512, |
|
"logits/rejected": -10.789083480834961, |
|
"logps/chosen": -0.489580363035202, |
|
"logps/rejected": -0.5100796818733215, |
|
"loss": 4.5609, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -4.895802974700928, |
|
"rewards/margins": 0.20499347150325775, |
|
"rewards/rejected": -5.100796699523926, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.1289228159457167, |
|
"grad_norm": 94.24070870623638, |
|
"learning_rate": 7.981978752775009e-07, |
|
"logits/chosen": -9.92190933227539, |
|
"logits/rejected": -9.928149223327637, |
|
"logps/chosen": -0.6262676119804382, |
|
"logps/rejected": -0.6750127077102661, |
|
"loss": 4.5092, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -6.262676239013672, |
|
"rewards/margins": 0.48745113611221313, |
|
"rewards/rejected": -6.750126838684082, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.13118461973423806, |
|
"grad_norm": 73.68507158036604, |
|
"learning_rate": 7.978852823512833e-07, |
|
"logits/chosen": -10.95576000213623, |
|
"logits/rejected": -10.358962059020996, |
|
"logps/chosen": -0.4652557671070099, |
|
"logps/rejected": -0.4836958050727844, |
|
"loss": 4.8084, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -4.652557849884033, |
|
"rewards/margins": 0.1843997836112976, |
|
"rewards/rejected": -4.8369574546813965, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.1334464235227594, |
|
"grad_norm": 66.30964256222111, |
|
"learning_rate": 7.975477736534957e-07, |
|
"logits/chosen": -12.005413055419922, |
|
"logits/rejected": -11.653824806213379, |
|
"logps/chosen": -0.46185585856437683, |
|
"logps/rejected": -0.5776143670082092, |
|
"loss": 4.5199, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -4.618558883666992, |
|
"rewards/margins": 1.1575853824615479, |
|
"rewards/rejected": -5.776144027709961, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.13570822731128074, |
|
"grad_norm": 101.38755828571541, |
|
"learning_rate": 7.971853703190986e-07, |
|
"logits/chosen": -11.413613319396973, |
|
"logits/rejected": -10.73826789855957, |
|
"logps/chosen": -0.5611809492111206, |
|
"logps/rejected": -0.6577370762825012, |
|
"loss": 4.6643, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -5.611808776855469, |
|
"rewards/margins": 0.9655615091323853, |
|
"rewards/rejected": -6.577370643615723, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1379700310998021, |
|
"grad_norm": 68.86595519869128, |
|
"learning_rate": 7.967980950419664e-07, |
|
"logits/chosen": -11.096137046813965, |
|
"logits/rejected": -10.685264587402344, |
|
"logps/chosen": -0.4981518089771271, |
|
"logps/rejected": -0.665467381477356, |
|
"loss": 4.4761, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -4.981517791748047, |
|
"rewards/margins": 1.673156499862671, |
|
"rewards/rejected": -6.654675006866455, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.14023183488832344, |
|
"grad_norm": 65.33178558436228, |
|
"learning_rate": 7.963859720734669e-07, |
|
"logits/chosen": -12.070573806762695, |
|
"logits/rejected": -11.637935638427734, |
|
"logps/chosen": -0.38139575719833374, |
|
"logps/rejected": -0.45192593336105347, |
|
"loss": 4.5535, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -3.813957691192627, |
|
"rewards/margins": 0.7053009867668152, |
|
"rewards/rejected": -4.519258975982666, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.14249363867684478, |
|
"grad_norm": 83.62029217364055, |
|
"learning_rate": 7.959490272209427e-07, |
|
"logits/chosen": -10.89778995513916, |
|
"logits/rejected": -10.380571365356445, |
|
"logps/chosen": -0.4739726185798645, |
|
"logps/rejected": -0.6313707232475281, |
|
"loss": 4.5111, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -4.7397260665893555, |
|
"rewards/margins": 1.5739809274673462, |
|
"rewards/rejected": -6.31370735168457, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.14475544246536612, |
|
"grad_norm": 54.013979661163816, |
|
"learning_rate": 7.954872878460946e-07, |
|
"logits/chosen": -11.172213554382324, |
|
"logits/rejected": -10.982388496398926, |
|
"logps/chosen": -0.4742993414402008, |
|
"logps/rejected": -0.6619201898574829, |
|
"loss": 4.3085, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -4.742993354797363, |
|
"rewards/margins": 1.8762080669403076, |
|
"rewards/rejected": -6.619201183319092, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.14701724625388748, |
|
"grad_norm": 81.96626905055028, |
|
"learning_rate": 7.950007828632691e-07, |
|
"logits/chosen": -10.859444618225098, |
|
"logits/rejected": -10.706047058105469, |
|
"logps/chosen": -0.5983306169509888, |
|
"logps/rejected": -0.6700727939605713, |
|
"loss": 4.3127, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -5.98330545425415, |
|
"rewards/margins": 0.717422366142273, |
|
"rewards/rejected": -6.700727462768555, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.14927905004240882, |
|
"grad_norm": 58.0945518868609, |
|
"learning_rate": 7.944895427376465e-07, |
|
"logits/chosen": -10.645411491394043, |
|
"logits/rejected": -10.482881546020508, |
|
"logps/chosen": -0.5315589904785156, |
|
"logps/rejected": -0.7207262516021729, |
|
"loss": 4.238, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -5.315589904785156, |
|
"rewards/margins": 1.8916726112365723, |
|
"rewards/rejected": -7.2072625160217285, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.15154085383093016, |
|
"grad_norm": 40.98461722424557, |
|
"learning_rate": 7.939535994833345e-07, |
|
"logits/chosen": -12.032543182373047, |
|
"logits/rejected": -11.655169486999512, |
|
"logps/chosen": -0.40713435411453247, |
|
"logps/rejected": -0.5286428928375244, |
|
"loss": 4.1796, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -4.071343421936035, |
|
"rewards/margins": 1.215085744857788, |
|
"rewards/rejected": -5.286429405212402, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.1538026576194515, |
|
"grad_norm": 58.64617780662404, |
|
"learning_rate": 7.933929866613628e-07, |
|
"logits/chosen": -11.718114852905273, |
|
"logits/rejected": -11.243300437927246, |
|
"logps/chosen": -0.5240508317947388, |
|
"logps/rejected": -0.5563682317733765, |
|
"loss": 4.6826, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -5.240508556365967, |
|
"rewards/margins": 0.3231736421585083, |
|
"rewards/rejected": -5.5636820793151855, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.15606446140797287, |
|
"grad_norm": 70.31268006854283, |
|
"learning_rate": 7.928077393775808e-07, |
|
"logits/chosen": -11.418298721313477, |
|
"logits/rejected": -11.400525093078613, |
|
"logps/chosen": -0.5047922730445862, |
|
"logps/rejected": -0.6909648776054382, |
|
"loss": 3.9852, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -5.0479230880737305, |
|
"rewards/margins": 1.8617255687713623, |
|
"rewards/rejected": -6.909648418426514, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.1583262651964942, |
|
"grad_norm": 80.21299453365818, |
|
"learning_rate": 7.921978942804609e-07, |
|
"logits/chosen": -10.426657676696777, |
|
"logits/rejected": -10.646503448486328, |
|
"logps/chosen": -0.5763324499130249, |
|
"logps/rejected": -0.6343460083007812, |
|
"loss": 4.159, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -5.7633256912231445, |
|
"rewards/margins": 0.5801345705986023, |
|
"rewards/rejected": -6.3434600830078125, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.16058806898501554, |
|
"grad_norm": 80.79055167712902, |
|
"learning_rate": 7.915634895588021e-07, |
|
"logits/chosen": -11.959595680236816, |
|
"logits/rejected": -12.10846996307373, |
|
"logps/chosen": -0.5684102177619934, |
|
"logps/rejected": -0.5796740055084229, |
|
"loss": 4.8753, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -5.684101581573486, |
|
"rewards/margins": 0.11263775080442429, |
|
"rewards/rejected": -5.796739101409912, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.1628498727735369, |
|
"grad_norm": 75.73711259124929, |
|
"learning_rate": 7.909045649393394e-07, |
|
"logits/chosen": -12.076671600341797, |
|
"logits/rejected": -11.380012512207031, |
|
"logps/chosen": -0.5402446389198303, |
|
"logps/rejected": -0.5312026739120483, |
|
"loss": 4.8356, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -5.402446269989014, |
|
"rewards/margins": -0.09041957557201385, |
|
"rewards/rejected": -5.3120269775390625, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.16511167656205825, |
|
"grad_norm": 75.75288370918786, |
|
"learning_rate": 7.902211616842556e-07, |
|
"logits/chosen": -10.804548263549805, |
|
"logits/rejected": -10.961880683898926, |
|
"logps/chosen": -0.5909055471420288, |
|
"logps/rejected": -0.673595666885376, |
|
"loss": 4.3771, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -5.909054756164551, |
|
"rewards/margins": 0.8269017934799194, |
|
"rewards/rejected": -6.735957145690918, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.16737348035057958, |
|
"grad_norm": 62.67088862221453, |
|
"learning_rate": 7.89513322588598e-07, |
|
"logits/chosen": -12.931127548217773, |
|
"logits/rejected": -12.251081466674805, |
|
"logps/chosen": -0.4514605700969696, |
|
"logps/rejected": -0.5236379504203796, |
|
"loss": 4.3108, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -4.51460599899292, |
|
"rewards/margins": 0.7217735648155212, |
|
"rewards/rejected": -5.236379623413086, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.16963528413910092, |
|
"grad_norm": 80.95433872904526, |
|
"learning_rate": 7.887810919775976e-07, |
|
"logits/chosen": -11.493197441101074, |
|
"logits/rejected": -11.485479354858398, |
|
"logps/chosen": -0.6065416932106018, |
|
"logps/rejected": -0.6815317273139954, |
|
"loss": 4.3937, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -6.065417289733887, |
|
"rewards/margins": 0.7499004006385803, |
|
"rewards/rejected": -6.815317630767822, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.1718970879276223, |
|
"grad_norm": 46.993337340785516, |
|
"learning_rate": 7.880245157038949e-07, |
|
"logits/chosen": -11.63713264465332, |
|
"logits/rejected": -11.74251651763916, |
|
"logps/chosen": -0.5179169178009033, |
|
"logps/rejected": -0.5965338945388794, |
|
"loss": 4.3272, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -5.179169178009033, |
|
"rewards/margins": 0.7861694693565369, |
|
"rewards/rejected": -5.965338706970215, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.17415889171614363, |
|
"grad_norm": 87.9689908780646, |
|
"learning_rate": 7.872436411446671e-07, |
|
"logits/chosen": -12.063104629516602, |
|
"logits/rejected": -11.92426872253418, |
|
"logps/chosen": -0.5937929749488831, |
|
"logps/rejected": -0.7317577004432678, |
|
"loss": 4.6281, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -5.937929153442383, |
|
"rewards/margins": 1.3796474933624268, |
|
"rewards/rejected": -7.3175764083862305, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.17642069550466496, |
|
"grad_norm": 58.552629911827445, |
|
"learning_rate": 7.86438517198662e-07, |
|
"logits/chosen": -11.951095581054688, |
|
"logits/rejected": -11.894678115844727, |
|
"logps/chosen": -0.6496031880378723, |
|
"logps/rejected": -0.7183038592338562, |
|
"loss": 4.4127, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -6.496031761169434, |
|
"rewards/margins": 0.6870064735412598, |
|
"rewards/rejected": -7.183038711547852, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.1786824992931863, |
|
"grad_norm": 50.220014654534005, |
|
"learning_rate": 7.856091942831366e-07, |
|
"logits/chosen": -12.49548625946045, |
|
"logits/rejected": -12.11488151550293, |
|
"logps/chosen": -0.5599091649055481, |
|
"logps/rejected": -0.6432383060455322, |
|
"loss": 4.5361, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -5.599091529846191, |
|
"rewards/margins": 0.83329176902771, |
|
"rewards/rejected": -6.432382583618164, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.18094430308170767, |
|
"grad_norm": 66.70412693533851, |
|
"learning_rate": 7.847557243306982e-07, |
|
"logits/chosen": -11.657049179077148, |
|
"logits/rejected": -11.260804176330566, |
|
"logps/chosen": -0.5219194293022156, |
|
"logps/rejected": -0.6938945055007935, |
|
"loss": 4.3537, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -5.219193458557129, |
|
"rewards/margins": 1.719750165939331, |
|
"rewards/rejected": -6.9389448165893555, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.183206106870229, |
|
"grad_norm": 63.80924880121896, |
|
"learning_rate": 7.838781607860541e-07, |
|
"logits/chosen": -12.70258903503418, |
|
"logits/rejected": -12.420878410339355, |
|
"logps/chosen": -0.625399112701416, |
|
"logps/rejected": -0.7799273133277893, |
|
"loss": 4.2639, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -6.25399112701416, |
|
"rewards/margins": 1.5452824831008911, |
|
"rewards/rejected": -7.7992730140686035, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.18546791065875035, |
|
"grad_norm": 57.78934306013499, |
|
"learning_rate": 7.82976558602664e-07, |
|
"logits/chosen": -11.984509468078613, |
|
"logits/rejected": -12.11713695526123, |
|
"logps/chosen": -0.5547804832458496, |
|
"logps/rejected": -0.6996307373046875, |
|
"loss": 4.4859, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -5.547804832458496, |
|
"rewards/margins": 1.4485028982162476, |
|
"rewards/rejected": -6.996307373046875, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.1877297144472717, |
|
"grad_norm": 74.92592513817463, |
|
"learning_rate": 7.820509742392988e-07, |
|
"logits/chosen": -12.603782653808594, |
|
"logits/rejected": -12.160541534423828, |
|
"logps/chosen": -0.6248946189880371, |
|
"logps/rejected": -0.6692970991134644, |
|
"loss": 4.2867, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -6.248946189880371, |
|
"rewards/margins": 0.444024920463562, |
|
"rewards/rejected": -6.692971229553223, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.18999151823579305, |
|
"grad_norm": 88.28010047139081, |
|
"learning_rate": 7.811014656565054e-07, |
|
"logits/chosen": -12.70538330078125, |
|
"logits/rejected": -12.26504898071289, |
|
"logps/chosen": -0.5449544191360474, |
|
"logps/rejected": -0.7343254685401917, |
|
"loss": 4.002, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -5.4495439529418945, |
|
"rewards/margins": 1.8937102556228638, |
|
"rewards/rejected": -7.343254566192627, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.1922533220243144, |
|
"grad_norm": 91.0049552486995, |
|
"learning_rate": 7.801280923129773e-07, |
|
"logits/chosen": -11.466194152832031, |
|
"logits/rejected": -11.01245403289795, |
|
"logps/chosen": -0.6134600043296814, |
|
"logps/rejected": -0.7226859927177429, |
|
"loss": 4.7861, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -6.134600639343262, |
|
"rewards/margins": 1.0922595262527466, |
|
"rewards/rejected": -7.226860046386719, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.19451512581283573, |
|
"grad_norm": 80.46205455624374, |
|
"learning_rate": 7.791309151618305e-07, |
|
"logits/chosen": -12.178560256958008, |
|
"logits/rejected": -12.0822114944458, |
|
"logps/chosen": -0.589695930480957, |
|
"logps/rejected": -0.6354808211326599, |
|
"loss": 4.5683, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -5.8969597816467285, |
|
"rewards/margins": 0.45784902572631836, |
|
"rewards/rejected": -6.354808330535889, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.1967769296013571, |
|
"grad_norm": 65.70377095012364, |
|
"learning_rate": 7.781099966467874e-07, |
|
"logits/chosen": -14.043821334838867, |
|
"logits/rejected": -13.850515365600586, |
|
"logps/chosen": -0.5184203386306763, |
|
"logps/rejected": -0.5960665345191956, |
|
"loss": 4.4942, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -5.184203147888184, |
|
"rewards/margins": 0.7764618396759033, |
|
"rewards/rejected": -5.960664749145508, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.19903873338987843, |
|
"grad_norm": 85.44105817952351, |
|
"learning_rate": 7.770654006982664e-07, |
|
"logits/chosen": -11.956082344055176, |
|
"logits/rejected": -11.715299606323242, |
|
"logps/chosen": -0.7433941960334778, |
|
"logps/rejected": -0.8704244494438171, |
|
"loss": 4.6953, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -7.4339423179626465, |
|
"rewards/margins": 1.2703025341033936, |
|
"rewards/rejected": -8.704244613647461, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.20130053717839977, |
|
"grad_norm": 66.70633497431983, |
|
"learning_rate": 7.759971927293781e-07, |
|
"logits/chosen": -12.565323829650879, |
|
"logits/rejected": -12.145037651062012, |
|
"logps/chosen": -0.5749909281730652, |
|
"logps/rejected": -0.7184647917747498, |
|
"loss": 4.1798, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -5.749909400939941, |
|
"rewards/margins": 1.4347392320632935, |
|
"rewards/rejected": -7.184648513793945, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.2035623409669211, |
|
"grad_norm": 61.091515513789844, |
|
"learning_rate": 7.749054396318297e-07, |
|
"logits/chosen": -11.981965065002441, |
|
"logits/rejected": -11.925725936889648, |
|
"logps/chosen": -0.6095532178878784, |
|
"logps/rejected": -0.7097649574279785, |
|
"loss": 4.5833, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -6.095531463623047, |
|
"rewards/margins": 1.00211763381958, |
|
"rewards/rejected": -7.097649097442627, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20582414475544247, |
|
"grad_norm": 98.35174892551164, |
|
"learning_rate": 7.737902097717356e-07, |
|
"logits/chosen": -12.858875274658203, |
|
"logits/rejected": -12.816228866577148, |
|
"logps/chosen": -0.5555391907691956, |
|
"logps/rejected": -0.6473320722579956, |
|
"loss": 4.3657, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -5.555391788482666, |
|
"rewards/margins": 0.9179282784461975, |
|
"rewards/rejected": -6.473320960998535, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.2080859485439638, |
|
"grad_norm": 71.84927934229238, |
|
"learning_rate": 7.726515729853367e-07, |
|
"logits/chosen": -11.215812683105469, |
|
"logits/rejected": -10.828777313232422, |
|
"logps/chosen": -0.6313311457633972, |
|
"logps/rejected": -0.8224250674247742, |
|
"loss": 4.3914, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -6.313311576843262, |
|
"rewards/margins": 1.9109392166137695, |
|
"rewards/rejected": -8.224250793457031, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.21034775233248515, |
|
"grad_norm": 63.14236456247472, |
|
"learning_rate": 7.714896005746272e-07, |
|
"logits/chosen": -12.176814079284668, |
|
"logits/rejected": -11.898388862609863, |
|
"logps/chosen": -0.5294336080551147, |
|
"logps/rejected": -0.6646890044212341, |
|
"loss": 4.0327, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -5.294336318969727, |
|
"rewards/margins": 1.3525540828704834, |
|
"rewards/rejected": -6.646890163421631, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.21260955612100652, |
|
"grad_norm": 121.71357306492177, |
|
"learning_rate": 7.703043653028896e-07, |
|
"logits/chosen": -12.20483684539795, |
|
"logits/rejected": -11.81241226196289, |
|
"logps/chosen": -0.6999309659004211, |
|
"logps/rejected": -0.8097646236419678, |
|
"loss": 4.668, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -6.999309539794922, |
|
"rewards/margins": 1.0983363389968872, |
|
"rewards/rejected": -8.09764575958252, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.21487135990952785, |
|
"grad_norm": 104.68871191294573, |
|
"learning_rate": 7.690959413901379e-07, |
|
"logits/chosen": -13.26455307006836, |
|
"logits/rejected": -13.093438148498535, |
|
"logps/chosen": -0.6004514694213867, |
|
"logps/rejected": -0.7420926690101624, |
|
"loss": 4.5244, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -6.004515171051025, |
|
"rewards/margins": 1.4164113998413086, |
|
"rewards/rejected": -7.420926570892334, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.2171331636980492, |
|
"grad_norm": 96.3754544871051, |
|
"learning_rate": 7.678644045084704e-07, |
|
"logits/chosen": -13.176921844482422, |
|
"logits/rejected": -12.706074714660645, |
|
"logps/chosen": -0.5092126727104187, |
|
"logps/rejected": -0.673244833946228, |
|
"loss": 4.126, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -5.09212589263916, |
|
"rewards/margins": 1.6403214931488037, |
|
"rewards/rejected": -6.732447624206543, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.21939496748657053, |
|
"grad_norm": 64.90730762680234, |
|
"learning_rate": 7.666098317773308e-07, |
|
"logits/chosen": -12.79755687713623, |
|
"logits/rejected": -12.852503776550293, |
|
"logps/chosen": -0.730464518070221, |
|
"logps/rejected": -0.8265626430511475, |
|
"loss": 4.1382, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -7.30464506149292, |
|
"rewards/margins": 0.9609812498092651, |
|
"rewards/rejected": -8.265625953674316, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.2216567712750919, |
|
"grad_norm": 61.06704486908139, |
|
"learning_rate": 7.653323017586789e-07, |
|
"logits/chosen": -13.87999153137207, |
|
"logits/rejected": -13.832286834716797, |
|
"logps/chosen": -0.629042387008667, |
|
"logps/rejected": -0.607448399066925, |
|
"loss": 4.3219, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -6.290423393249512, |
|
"rewards/margins": -0.21593987941741943, |
|
"rewards/rejected": -6.074484348297119, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.22391857506361323, |
|
"grad_norm": 68.27361430232956, |
|
"learning_rate": 7.640318944520711e-07, |
|
"logits/chosen": -12.233078956604004, |
|
"logits/rejected": -11.775206565856934, |
|
"logps/chosen": -0.7409114241600037, |
|
"logps/rejected": -0.9343410134315491, |
|
"loss": 4.2391, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -7.409113883972168, |
|
"rewards/margins": 1.9342964887619019, |
|
"rewards/rejected": -9.343409538269043, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.22618037885213457, |
|
"grad_norm": 101.95762268614794, |
|
"learning_rate": 7.627086912896511e-07, |
|
"logits/chosen": -13.06617546081543, |
|
"logits/rejected": -12.822061538696289, |
|
"logps/chosen": -0.6488937139511108, |
|
"logps/rejected": -0.6966894268989563, |
|
"loss": 4.3114, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -6.4889373779296875, |
|
"rewards/margins": 0.47795701026916504, |
|
"rewards/rejected": -6.966893672943115, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2284421826406559, |
|
"grad_norm": 62.41706596840518, |
|
"learning_rate": 7.613627751310499e-07, |
|
"logits/chosen": -13.649486541748047, |
|
"logits/rejected": -13.323113441467285, |
|
"logps/chosen": -0.5429686307907104, |
|
"logps/rejected": -0.7514999508857727, |
|
"loss": 4.1066, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -5.429686546325684, |
|
"rewards/margins": 2.085312604904175, |
|
"rewards/rejected": -7.5149993896484375, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.23070398642917728, |
|
"grad_norm": 98.8807647714299, |
|
"learning_rate": 7.599942302581977e-07, |
|
"logits/chosen": -13.609407424926758, |
|
"logits/rejected": -13.286027908325195, |
|
"logps/chosen": -0.6267982721328735, |
|
"logps/rejected": -0.8196748495101929, |
|
"loss": 4.085, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -6.267982006072998, |
|
"rewards/margins": 1.9287660121917725, |
|
"rewards/rejected": -8.196748733520508, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.23296579021769862, |
|
"grad_norm": 69.62248080154978, |
|
"learning_rate": 7.586031423700457e-07, |
|
"logits/chosen": -13.66258430480957, |
|
"logits/rejected": -13.500720024108887, |
|
"logps/chosen": -0.67485111951828, |
|
"logps/rejected": -0.7922409772872925, |
|
"loss": 4.2884, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -6.74851131439209, |
|
"rewards/margins": 1.1738990545272827, |
|
"rewards/rejected": -7.92241096496582, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.23522759400621995, |
|
"grad_norm": 128.9243427342601, |
|
"learning_rate": 7.571895985772e-07, |
|
"logits/chosen": -13.242142677307129, |
|
"logits/rejected": -13.256977081298828, |
|
"logps/chosen": -0.6713986396789551, |
|
"logps/rejected": -0.823998749256134, |
|
"loss": 4.4093, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -6.713986873626709, |
|
"rewards/margins": 1.526000738143921, |
|
"rewards/rejected": -8.239988327026367, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.23748939779474132, |
|
"grad_norm": 90.13452963738787, |
|
"learning_rate": 7.557536873964661e-07, |
|
"logits/chosen": -13.565170288085938, |
|
"logits/rejected": -13.13564682006836, |
|
"logps/chosen": -0.6910791993141174, |
|
"logps/rejected": -0.9325417280197144, |
|
"loss": 4.4509, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -6.910791873931885, |
|
"rewards/margins": 2.4146251678466797, |
|
"rewards/rejected": -9.325417518615723, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.23975120158326266, |
|
"grad_norm": 87.0306411773028, |
|
"learning_rate": 7.542954987453069e-07, |
|
"logits/chosen": -14.550992012023926, |
|
"logits/rejected": -14.176923751831055, |
|
"logps/chosen": -0.6862035393714905, |
|
"logps/rejected": -0.8450896143913269, |
|
"loss": 3.9713, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -6.862035751342773, |
|
"rewards/margins": 1.5888599157333374, |
|
"rewards/rejected": -8.450895309448242, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.242013005371784, |
|
"grad_norm": 79.22884465246462, |
|
"learning_rate": 7.528151239362108e-07, |
|
"logits/chosen": -14.102907180786133, |
|
"logits/rejected": -13.666712760925293, |
|
"logps/chosen": -0.6612896919250488, |
|
"logps/rejected": -0.831270694732666, |
|
"loss": 4.1818, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -6.6128973960876465, |
|
"rewards/margins": 1.6998090744018555, |
|
"rewards/rejected": -8.312705993652344, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.24427480916030533, |
|
"grad_norm": 127.69113358221105, |
|
"learning_rate": 7.513126556709748e-07, |
|
"logits/chosen": -11.86813735961914, |
|
"logits/rejected": -11.855137825012207, |
|
"logps/chosen": -0.6619610786437988, |
|
"logps/rejected": -0.9614608287811279, |
|
"loss": 3.5632, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -6.6196112632751465, |
|
"rewards/margins": 2.9949963092803955, |
|
"rewards/rejected": -9.614606857299805, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.2465366129488267, |
|
"grad_norm": 68.99980133964193, |
|
"learning_rate": 7.497881880348984e-07, |
|
"logits/chosen": -14.216558456420898, |
|
"logits/rejected": -13.699520111083984, |
|
"logps/chosen": -0.6432782411575317, |
|
"logps/rejected": -0.8865514397621155, |
|
"loss": 3.6564, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -6.432782173156738, |
|
"rewards/margins": 2.432731866836548, |
|
"rewards/rejected": -8.865514755249023, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.24879841673734804, |
|
"grad_norm": 94.37373909884498, |
|
"learning_rate": 7.482418164908931e-07, |
|
"logits/chosen": -13.685918807983398, |
|
"logits/rejected": -13.722275733947754, |
|
"logps/chosen": -0.7590062618255615, |
|
"logps/rejected": -0.8727726936340332, |
|
"loss": 4.4461, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -7.590063095092773, |
|
"rewards/margins": 1.1376643180847168, |
|
"rewards/rejected": -8.727726936340332, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2510602205258694, |
|
"grad_norm": 104.73538958533439, |
|
"learning_rate": 7.466736378735035e-07, |
|
"logits/chosen": -13.90713882446289, |
|
"logits/rejected": -13.833703994750977, |
|
"logps/chosen": -0.9833253622055054, |
|
"logps/rejected": -1.1174235343933105, |
|
"loss": 4.0684, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -9.833253860473633, |
|
"rewards/margins": 1.340980887413025, |
|
"rewards/rejected": -11.174234390258789, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.2533220243143907, |
|
"grad_norm": 85.78487702365295, |
|
"learning_rate": 7.450837503828439e-07, |
|
"logits/chosen": -14.123536109924316, |
|
"logits/rejected": -14.122791290283203, |
|
"logps/chosen": -0.7747003436088562, |
|
"logps/rejected": -0.9367392063140869, |
|
"loss": 3.7847, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -7.747003555297852, |
|
"rewards/margins": 1.6203885078430176, |
|
"rewards/rejected": -9.367391586303711, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.2555838281029121, |
|
"grad_norm": 79.39379626286185, |
|
"learning_rate": 7.43472253578449e-07, |
|
"logits/chosen": -15.111526489257812, |
|
"logits/rejected": -15.17776870727539, |
|
"logps/chosen": -0.6799838542938232, |
|
"logps/rejected": -0.7487653493881226, |
|
"loss": 4.1428, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -6.799839019775391, |
|
"rewards/margins": 0.6878141760826111, |
|
"rewards/rejected": -7.487652778625488, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.2578456318914334, |
|
"grad_norm": 95.27260892673486, |
|
"learning_rate": 7.418392483730389e-07, |
|
"logits/chosen": -15.093989372253418, |
|
"logits/rejected": -14.798469543457031, |
|
"logps/chosen": -0.611186683177948, |
|
"logps/rejected": -0.733193039894104, |
|
"loss": 3.9567, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -6.111865997314453, |
|
"rewards/margins": 1.2200640439987183, |
|
"rewards/rejected": -7.331930637359619, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.26010743567995476, |
|
"grad_norm": 81.84816803138266, |
|
"learning_rate": 7.401848370262012e-07, |
|
"logits/chosen": -16.052608489990234, |
|
"logits/rejected": -15.86906623840332, |
|
"logps/chosen": -0.7116187810897827, |
|
"logps/rejected": -0.8240950107574463, |
|
"loss": 4.2147, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -7.116188049316406, |
|
"rewards/margins": 1.1247621774673462, |
|
"rewards/rejected": -8.240950584411621, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.2623692394684761, |
|
"grad_norm": 86.40835196804031, |
|
"learning_rate": 7.385091231379856e-07, |
|
"logits/chosen": -15.110920906066895, |
|
"logits/rejected": -15.024141311645508, |
|
"logps/chosen": -0.7939636707305908, |
|
"logps/rejected": -0.9955480098724365, |
|
"loss": 4.0034, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -7.93963623046875, |
|
"rewards/margins": 2.0158443450927734, |
|
"rewards/rejected": -9.955480575561523, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.26463104325699743, |
|
"grad_norm": 96.7390682646137, |
|
"learning_rate": 7.368122116424182e-07, |
|
"logits/chosen": -13.677536964416504, |
|
"logits/rejected": -13.632445335388184, |
|
"logps/chosen": -0.8173962235450745, |
|
"logps/rejected": -0.8863806128501892, |
|
"loss": 4.2779, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -8.173962593078613, |
|
"rewards/margins": 0.6898432970046997, |
|
"rewards/rejected": -8.863805770874023, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.2668928470455188, |
|
"grad_norm": 114.76228974717415, |
|
"learning_rate": 7.350942088009289e-07, |
|
"logits/chosen": -16.132448196411133, |
|
"logits/rejected": -15.948546409606934, |
|
"logps/chosen": -0.8236314058303833, |
|
"logps/rejected": -0.9717513918876648, |
|
"loss": 3.7875, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -8.236313819885254, |
|
"rewards/margins": 1.4811999797821045, |
|
"rewards/rejected": -9.717514038085938, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.26915465083404017, |
|
"grad_norm": 124.74702715605902, |
|
"learning_rate": 7.333552221956986e-07, |
|
"logits/chosen": -14.226578712463379, |
|
"logits/rejected": -13.749677658081055, |
|
"logps/chosen": -0.9559565782546997, |
|
"logps/rejected": -1.1939551830291748, |
|
"loss": 3.7927, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -9.559566497802734, |
|
"rewards/margins": 2.37998628616333, |
|
"rewards/rejected": -11.939552307128906, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.2714164546225615, |
|
"grad_norm": 139.60375866242782, |
|
"learning_rate": 7.315953607229217e-07, |
|
"logits/chosen": -15.55072021484375, |
|
"logits/rejected": -15.846210479736328, |
|
"logps/chosen": -0.9729312658309937, |
|
"logps/rejected": -1.1994317770004272, |
|
"loss": 4.0626, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -9.729312896728516, |
|
"rewards/margins": 2.265005588531494, |
|
"rewards/rejected": -11.994318962097168, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.27367825841108284, |
|
"grad_norm": 90.79159608076576, |
|
"learning_rate": 7.298147345859869e-07, |
|
"logits/chosen": -15.140702247619629, |
|
"logits/rejected": -14.700098037719727, |
|
"logps/chosen": -0.8421116471290588, |
|
"logps/rejected": -1.0816903114318848, |
|
"loss": 4.0231, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -8.421116828918457, |
|
"rewards/margins": 2.3957865238189697, |
|
"rewards/rejected": -10.816903114318848, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.2759400621996042, |
|
"grad_norm": 100.537933010007, |
|
"learning_rate": 7.280134552885762e-07, |
|
"logits/chosen": -16.38404083251953, |
|
"logits/rejected": -15.996622085571289, |
|
"logps/chosen": -0.7793571949005127, |
|
"logps/rejected": -0.9447546005249023, |
|
"loss": 4.1454, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -7.793571949005127, |
|
"rewards/margins": 1.6539742946624756, |
|
"rewards/rejected": -9.44754695892334, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.2782018659881255, |
|
"grad_norm": 92.3597151880949, |
|
"learning_rate": 7.261916356276831e-07, |
|
"logits/chosen": -16.811389923095703, |
|
"logits/rejected": -16.297218322753906, |
|
"logps/chosen": -1.1431193351745605, |
|
"logps/rejected": -1.4224827289581299, |
|
"loss": 3.5578, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -11.431194305419922, |
|
"rewards/margins": 2.7936320304870605, |
|
"rewards/rejected": -14.22482681274414, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.2804636697766469, |
|
"grad_norm": 85.41024914421368, |
|
"learning_rate": 7.243493896865486e-07, |
|
"logits/chosen": -16.567768096923828, |
|
"logits/rejected": -16.550174713134766, |
|
"logps/chosen": -0.7305294275283813, |
|
"logps/rejected": -0.8769953846931458, |
|
"loss": 3.8388, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -7.305294990539551, |
|
"rewards/margins": 1.4646586179733276, |
|
"rewards/rejected": -8.769953727722168, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.2827254735651682, |
|
"grad_norm": 116.2256817022879, |
|
"learning_rate": 7.224868328275169e-07, |
|
"logits/chosen": -15.456303596496582, |
|
"logits/rejected": -15.146892547607422, |
|
"logps/chosen": -0.8332209587097168, |
|
"logps/rejected": -1.0618098974227905, |
|
"loss": 3.8379, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -8.332210540771484, |
|
"rewards/margins": 2.285888433456421, |
|
"rewards/rejected": -10.618098258972168, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.28498727735368956, |
|
"grad_norm": 171.39099454139836, |
|
"learning_rate": 7.206040816848126e-07, |
|
"logits/chosen": -13.179584503173828, |
|
"logits/rejected": -13.4354887008667, |
|
"logps/chosen": -0.7763444781303406, |
|
"logps/rejected": -1.0508021116256714, |
|
"loss": 4.0712, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -7.763444423675537, |
|
"rewards/margins": 2.7445759773254395, |
|
"rewards/rejected": -10.508020401000977, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.2872490811422109, |
|
"grad_norm": 113.59593931675269, |
|
"learning_rate": 7.187012541572356e-07, |
|
"logits/chosen": -16.92714500427246, |
|
"logits/rejected": -16.797697067260742, |
|
"logps/chosen": -0.8984054923057556, |
|
"logps/rejected": -1.2400306463241577, |
|
"loss": 3.9837, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -8.984055519104004, |
|
"rewards/margins": 3.416250467300415, |
|
"rewards/rejected": -12.400304794311523, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.28951088493073224, |
|
"grad_norm": 106.75878513488918, |
|
"learning_rate": 7.167784694007791e-07, |
|
"logits/chosen": -17.011579513549805, |
|
"logits/rejected": -16.66845703125, |
|
"logps/chosen": -0.8532888889312744, |
|
"logps/rejected": -1.0494173765182495, |
|
"loss": 3.6795, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -8.532888412475586, |
|
"rewards/margins": 1.9612853527069092, |
|
"rewards/rejected": -10.494174003601074, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.2917726887192536, |
|
"grad_norm": 100.82906895906213, |
|
"learning_rate": 7.148358478211682e-07, |
|
"logits/chosen": -17.22789764404297, |
|
"logits/rejected": -16.70311737060547, |
|
"logps/chosen": -1.0011430978775024, |
|
"logps/rejected": -1.2051304578781128, |
|
"loss": 3.8174, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -10.011430740356445, |
|
"rewards/margins": 2.039872646331787, |
|
"rewards/rejected": -12.05130386352539, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.29403449250777497, |
|
"grad_norm": 70.96241822690538, |
|
"learning_rate": 7.128735110663187e-07, |
|
"logits/chosen": -16.649568557739258, |
|
"logits/rejected": -16.656688690185547, |
|
"logps/chosen": -0.9126195907592773, |
|
"logps/rejected": -1.2946593761444092, |
|
"loss": 3.4187, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -9.12619686126709, |
|
"rewards/margins": 3.8203978538513184, |
|
"rewards/rejected": -12.946593284606934, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2962962962962963, |
|
"grad_norm": 101.80550264504929, |
|
"learning_rate": 7.108915820187211e-07, |
|
"logits/chosen": -14.773153305053711, |
|
"logits/rejected": -14.596695899963379, |
|
"logps/chosen": -0.9897314310073853, |
|
"logps/rejected": -1.3485132455825806, |
|
"loss": 3.2847, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -9.89731502532959, |
|
"rewards/margins": 3.587818145751953, |
|
"rewards/rejected": -13.48513126373291, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.29855810008481765, |
|
"grad_norm": 118.07277179415749, |
|
"learning_rate": 7.088901847877447e-07, |
|
"logits/chosen": -15.380992889404297, |
|
"logits/rejected": -15.349335670471191, |
|
"logps/chosen": -0.9675842523574829, |
|
"logps/rejected": -1.2969377040863037, |
|
"loss": 4.5014, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -9.67584228515625, |
|
"rewards/margins": 3.293534755706787, |
|
"rewards/rejected": -12.969377517700195, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.300819903873339, |
|
"grad_norm": 126.2514417468051, |
|
"learning_rate": 7.068694447018658e-07, |
|
"logits/chosen": -16.715206146240234, |
|
"logits/rejected": -16.719194412231445, |
|
"logps/chosen": -0.8660197854042053, |
|
"logps/rejected": -0.9733752608299255, |
|
"loss": 3.7002, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -8.660198211669922, |
|
"rewards/margins": 1.073554515838623, |
|
"rewards/rejected": -9.733752250671387, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.3030817076618603, |
|
"grad_norm": 115.59595983452179, |
|
"learning_rate": 7.048294883008199e-07, |
|
"logits/chosen": -17.525606155395508, |
|
"logits/rejected": -17.24049186706543, |
|
"logps/chosen": -0.9643100500106812, |
|
"logps/rejected": -1.2070279121398926, |
|
"loss": 3.5993, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -9.64310073852539, |
|
"rewards/margins": 2.4271788597106934, |
|
"rewards/rejected": -12.070280075073242, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.3053435114503817, |
|
"grad_norm": 135.03715755778867, |
|
"learning_rate": 7.027704433276776e-07, |
|
"logits/chosen": -18.083145141601562, |
|
"logits/rejected": -17.37511444091797, |
|
"logps/chosen": -0.9572893381118774, |
|
"logps/rejected": -1.3515632152557373, |
|
"loss": 3.6303, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -9.572893142700195, |
|
"rewards/margins": 3.9427390098571777, |
|
"rewards/rejected": -13.515631675720215, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.307605315238903, |
|
"grad_norm": 123.3145878922545, |
|
"learning_rate": 7.006924387208452e-07, |
|
"logits/chosen": -16.337995529174805, |
|
"logits/rejected": -16.18612289428711, |
|
"logps/chosen": -0.7492311596870422, |
|
"logps/rejected": -0.9364847540855408, |
|
"loss": 3.8638, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -7.492311000823975, |
|
"rewards/margins": 1.8725361824035645, |
|
"rewards/rejected": -9.364849090576172, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.30986711902742436, |
|
"grad_norm": 110.47458672576012, |
|
"learning_rate": 6.985956046059904e-07, |
|
"logits/chosen": -15.281987190246582, |
|
"logits/rejected": -15.222082138061523, |
|
"logps/chosen": -0.8718824982643127, |
|
"logps/rejected": -1.2887756824493408, |
|
"loss": 3.8853, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -8.718825340270996, |
|
"rewards/margins": 4.168931007385254, |
|
"rewards/rejected": -12.887757301330566, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.31212892281594573, |
|
"grad_norm": 93.15535964190208, |
|
"learning_rate": 6.964800722878945e-07, |
|
"logits/chosen": -16.852909088134766, |
|
"logits/rejected": -16.60708999633789, |
|
"logps/chosen": -0.8743470907211304, |
|
"logps/rejected": -1.0895050764083862, |
|
"loss": 3.3061, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -8.743470191955566, |
|
"rewards/margins": 2.151580810546875, |
|
"rewards/rejected": -10.895051002502441, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.31439072660446704, |
|
"grad_norm": 116.25516493400698, |
|
"learning_rate": 6.943459742422287e-07, |
|
"logits/chosen": -16.162519454956055, |
|
"logits/rejected": -15.761104583740234, |
|
"logps/chosen": -1.1220320463180542, |
|
"logps/rejected": -1.5008246898651123, |
|
"loss": 3.8586, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -11.220320701599121, |
|
"rewards/margins": 3.7879250049591064, |
|
"rewards/rejected": -15.008245468139648, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.3166525303929884, |
|
"grad_norm": 115.45171227467374, |
|
"learning_rate": 6.921934441072597e-07, |
|
"logits/chosen": -17.508764266967773, |
|
"logits/rejected": -17.509645462036133, |
|
"logps/chosen": -1.0907777547836304, |
|
"logps/rejected": -1.355396032333374, |
|
"loss": 3.8912, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -10.90777587890625, |
|
"rewards/margins": 2.646184206008911, |
|
"rewards/rejected": -13.553960800170898, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3189143341815098, |
|
"grad_norm": 145.15074217143749, |
|
"learning_rate": 6.900226166754807e-07, |
|
"logits/chosen": -16.634740829467773, |
|
"logits/rejected": -16.976970672607422, |
|
"logps/chosen": -1.3348404169082642, |
|
"logps/rejected": -1.4817439317703247, |
|
"loss": 4.6661, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -13.348404884338379, |
|
"rewards/margins": 1.469035029411316, |
|
"rewards/rejected": -14.817439079284668, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.3211761379700311, |
|
"grad_norm": 108.45634524328419, |
|
"learning_rate": 6.8783362788517e-07, |
|
"logits/chosen": -16.96560287475586, |
|
"logits/rejected": -16.87692642211914, |
|
"logps/chosen": -1.285522699356079, |
|
"logps/rejected": -1.6592097282409668, |
|
"loss": 3.4579, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -12.855226516723633, |
|
"rewards/margins": 3.7368712425231934, |
|
"rewards/rejected": -16.592098236083984, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.32343794175855245, |
|
"grad_norm": 108.50675097342489, |
|
"learning_rate": 6.856266148118796e-07, |
|
"logits/chosen": -16.803754806518555, |
|
"logits/rejected": -17.20581817626953, |
|
"logps/chosen": -1.0886934995651245, |
|
"logps/rejected": -1.5176775455474854, |
|
"loss": 3.3977, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -10.886935234069824, |
|
"rewards/margins": 4.2898406982421875, |
|
"rewards/rejected": -15.176775932312012, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.3256997455470738, |
|
"grad_norm": 135.30929548671452, |
|
"learning_rate": 6.834017156598512e-07, |
|
"logits/chosen": -17.159934997558594, |
|
"logits/rejected": -16.915081024169922, |
|
"logps/chosen": -1.0447005033493042, |
|
"logps/rejected": -1.5800331830978394, |
|
"loss": 3.5652, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -10.447005271911621, |
|
"rewards/margins": 5.353327751159668, |
|
"rewards/rejected": -15.800333023071289, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.3279615493355951, |
|
"grad_norm": 92.79875046989964, |
|
"learning_rate": 6.811590697533607e-07, |
|
"logits/chosen": -18.941160202026367, |
|
"logits/rejected": -18.880695343017578, |
|
"logps/chosen": -1.2164652347564697, |
|
"logps/rejected": -1.3915354013442993, |
|
"loss": 3.8421, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -12.164650917053223, |
|
"rewards/margins": 1.7507033348083496, |
|
"rewards/rejected": -13.915353775024414, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.3302233531241165, |
|
"grad_norm": 152.87216949016505, |
|
"learning_rate": 6.788988175279951e-07, |
|
"logits/chosen": -17.32018280029297, |
|
"logits/rejected": -17.33584976196289, |
|
"logps/chosen": -1.1823511123657227, |
|
"logps/rejected": -1.55608069896698, |
|
"loss": 3.9364, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -11.823511123657227, |
|
"rewards/margins": 3.7372941970825195, |
|
"rewards/rejected": -15.560805320739746, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.3324851569126378, |
|
"grad_norm": 112.74886884591804, |
|
"learning_rate": 6.766211005218577e-07, |
|
"logits/chosen": -17.034011840820312, |
|
"logits/rejected": -16.896516799926758, |
|
"logps/chosen": -1.0992332696914673, |
|
"logps/rejected": -1.6042219400405884, |
|
"loss": 3.2047, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -10.992332458496094, |
|
"rewards/margins": 5.049887180328369, |
|
"rewards/rejected": -16.042219161987305, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.33474696070115917, |
|
"grad_norm": 102.37494798795927, |
|
"learning_rate": 6.743260613667047e-07, |
|
"logits/chosen": -20.16105079650879, |
|
"logits/rejected": -20.097129821777344, |
|
"logps/chosen": -1.3072175979614258, |
|
"logps/rejected": -1.7791482210159302, |
|
"loss": 3.6159, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -13.072174072265625, |
|
"rewards/margins": 4.719306945800781, |
|
"rewards/rejected": -17.79148292541504, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.33700876448968053, |
|
"grad_norm": 92.8889637457248, |
|
"learning_rate": 6.720138437790139e-07, |
|
"logits/chosen": -18.851449966430664, |
|
"logits/rejected": -19.006174087524414, |
|
"logps/chosen": -1.1900596618652344, |
|
"logps/rejected": -1.6059695482254028, |
|
"loss": 3.0262, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -11.900595664978027, |
|
"rewards/margins": 4.1590986251831055, |
|
"rewards/rejected": -16.059694290161133, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.33927056827820185, |
|
"grad_norm": 128.41082842918968, |
|
"learning_rate": 6.696845925509848e-07, |
|
"logits/chosen": -17.904184341430664, |
|
"logits/rejected": -17.44746208190918, |
|
"logps/chosen": -1.3113856315612793, |
|
"logps/rejected": -1.5605354309082031, |
|
"loss": 3.9659, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -13.113856315612793, |
|
"rewards/margins": 2.491497755050659, |
|
"rewards/rejected": -15.605354309082031, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3415323720667232, |
|
"grad_norm": 120.26296110157324, |
|
"learning_rate": 6.673384535414718e-07, |
|
"logits/chosen": -18.292760848999023, |
|
"logits/rejected": -18.07993507385254, |
|
"logps/chosen": -1.2153687477111816, |
|
"logps/rejected": -1.442406415939331, |
|
"loss": 4.4772, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -12.153687477111816, |
|
"rewards/margins": 2.270376205444336, |
|
"rewards/rejected": -14.424064636230469, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.3437941758552446, |
|
"grad_norm": 108.61106396576423, |
|
"learning_rate": 6.649755736668511e-07, |
|
"logits/chosen": -16.841121673583984, |
|
"logits/rejected": -16.501068115234375, |
|
"logps/chosen": -1.1176737546920776, |
|
"logps/rejected": -1.6219063997268677, |
|
"loss": 2.9905, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -11.176735877990723, |
|
"rewards/margins": 5.042326927185059, |
|
"rewards/rejected": -16.21906280517578, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.3460559796437659, |
|
"grad_norm": 128.2647564085794, |
|
"learning_rate": 6.625961008918192e-07, |
|
"logits/chosen": -18.715444564819336, |
|
"logits/rejected": -18.511932373046875, |
|
"logps/chosen": -1.2932363748550415, |
|
"logps/rejected": -1.488201379776001, |
|
"loss": 3.3092, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -12.932364463806152, |
|
"rewards/margins": 1.9496493339538574, |
|
"rewards/rejected": -14.882014274597168, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.34831778343228725, |
|
"grad_norm": 119.57670893186517, |
|
"learning_rate": 6.602001842201289e-07, |
|
"logits/chosen": -16.945566177368164, |
|
"logits/rejected": -17.048381805419922, |
|
"logps/chosen": -1.1972765922546387, |
|
"logps/rejected": -1.4838308095932007, |
|
"loss": 3.6387, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -11.972765922546387, |
|
"rewards/margins": 2.8655428886413574, |
|
"rewards/rejected": -14.838308334350586, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.3505795872208086, |
|
"grad_norm": 133.44404947093932, |
|
"learning_rate": 6.577879736852571e-07, |
|
"logits/chosen": -17.318836212158203, |
|
"logits/rejected": -17.2701473236084, |
|
"logps/chosen": -1.3334428071975708, |
|
"logps/rejected": -1.554374098777771, |
|
"loss": 3.9227, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -13.334427833557129, |
|
"rewards/margins": 2.2093122005462646, |
|
"rewards/rejected": -15.543739318847656, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.35284139100932993, |
|
"grad_norm": 108.58327489587623, |
|
"learning_rate": 6.553596203410112e-07, |
|
"logits/chosen": -16.674957275390625, |
|
"logits/rejected": -16.426877975463867, |
|
"logps/chosen": -1.0741811990737915, |
|
"logps/rejected": -1.548369288444519, |
|
"loss": 3.0135, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -10.741811752319336, |
|
"rewards/margins": 4.741880893707275, |
|
"rewards/rejected": -15.483692169189453, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.3551031947978513, |
|
"grad_norm": 114.21004986072782, |
|
"learning_rate": 6.529152762520688e-07, |
|
"logits/chosen": -18.453733444213867, |
|
"logits/rejected": -18.391489028930664, |
|
"logps/chosen": -1.3322038650512695, |
|
"logps/rejected": -1.5323253870010376, |
|
"loss": 3.8828, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -13.322039604187012, |
|
"rewards/margins": 2.0012147426605225, |
|
"rewards/rejected": -15.323253631591797, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.3573649985863726, |
|
"grad_norm": 140.31074175994047, |
|
"learning_rate": 6.504550944844558e-07, |
|
"logits/chosen": -16.778514862060547, |
|
"logits/rejected": -16.645111083984375, |
|
"logps/chosen": -1.3454078435897827, |
|
"logps/rejected": -1.8546462059020996, |
|
"loss": 3.6056, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -13.454076766967773, |
|
"rewards/margins": 5.092383861541748, |
|
"rewards/rejected": -18.546463012695312, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.359626802374894, |
|
"grad_norm": 137.59691231656896, |
|
"learning_rate": 6.479792290959613e-07, |
|
"logits/chosen": -16.80532455444336, |
|
"logits/rejected": -16.812410354614258, |
|
"logps/chosen": -1.4085781574249268, |
|
"logps/rejected": -1.807809591293335, |
|
"loss": 3.3238, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -14.085782051086426, |
|
"rewards/margins": 3.9923133850097656, |
|
"rewards/rejected": -18.078096389770508, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.36188860616341534, |
|
"grad_norm": 122.31193582134684, |
|
"learning_rate": 6.454878351264906e-07, |
|
"logits/chosen": -17.32520866394043, |
|
"logits/rejected": -17.327320098876953, |
|
"logps/chosen": -1.2008891105651855, |
|
"logps/rejected": -1.3473906517028809, |
|
"loss": 3.8266, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -12.008890151977539, |
|
"rewards/margins": 1.4650166034698486, |
|
"rewards/rejected": -13.473907470703125, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.36415040995193665, |
|
"grad_norm": 131.3884611405496, |
|
"learning_rate": 6.429810685883565e-07, |
|
"logits/chosen": -16.57655143737793, |
|
"logits/rejected": -16.592082977294922, |
|
"logps/chosen": -1.295839786529541, |
|
"logps/rejected": -1.7100400924682617, |
|
"loss": 3.3158, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -12.958398818969727, |
|
"rewards/margins": 4.142002582550049, |
|
"rewards/rejected": -17.100400924682617, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.366412213740458, |
|
"grad_norm": 131.23411742145908, |
|
"learning_rate": 6.404590864565088e-07, |
|
"logits/chosen": -17.949878692626953, |
|
"logits/rejected": -18.036605834960938, |
|
"logps/chosen": -1.171650767326355, |
|
"logps/rejected": -1.3193507194519043, |
|
"loss": 3.9769, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -11.716507911682129, |
|
"rewards/margins": 1.4770005941390991, |
|
"rewards/rejected": -13.19350814819336, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.3686740175289794, |
|
"grad_norm": 118.65175820446706, |
|
"learning_rate": 6.379220466587063e-07, |
|
"logits/chosen": -19.885251998901367, |
|
"logits/rejected": -19.330791473388672, |
|
"logps/chosen": -1.2915012836456299, |
|
"logps/rejected": -1.452092170715332, |
|
"loss": 3.4104, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -12.91501235961914, |
|
"rewards/margins": 1.6059094667434692, |
|
"rewards/rejected": -14.520920753479004, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.3709358213175007, |
|
"grad_norm": 115.53093238478114, |
|
"learning_rate": 6.353701080656254e-07, |
|
"logits/chosen": -18.273351669311523, |
|
"logits/rejected": -18.36724090576172, |
|
"logps/chosen": -1.3921793699264526, |
|
"logps/rejected": -1.6417995691299438, |
|
"loss": 3.3543, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -13.921795845031738, |
|
"rewards/margins": 2.496201276779175, |
|
"rewards/rejected": -16.41799545288086, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.37319762510602206, |
|
"grad_norm": 124.5033686839827, |
|
"learning_rate": 6.32803430480913e-07, |
|
"logits/chosen": -18.828781127929688, |
|
"logits/rejected": -18.2823429107666, |
|
"logps/chosen": -1.3336889743804932, |
|
"logps/rejected": -1.6634752750396729, |
|
"loss": 3.7914, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -13.336889266967773, |
|
"rewards/margins": 3.297863721847534, |
|
"rewards/rejected": -16.63475227355957, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.3754594288945434, |
|
"grad_norm": 147.97213663568766, |
|
"learning_rate": 6.302221746311782e-07, |
|
"logits/chosen": -16.221012115478516, |
|
"logits/rejected": -15.638816833496094, |
|
"logps/chosen": -1.24800443649292, |
|
"logps/rejected": -1.5752636194229126, |
|
"loss": 3.9877, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -12.480045318603516, |
|
"rewards/margins": 3.272590398788452, |
|
"rewards/rejected": -15.752635955810547, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.37772123268306473, |
|
"grad_norm": 117.72793765671894, |
|
"learning_rate": 6.276265021559288e-07, |
|
"logits/chosen": -17.692129135131836, |
|
"logits/rejected": -17.678510665893555, |
|
"logps/chosen": -1.4085066318511963, |
|
"logps/rejected": -1.5413269996643066, |
|
"loss": 3.7453, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -14.085065841674805, |
|
"rewards/margins": 1.3282032012939453, |
|
"rewards/rejected": -15.41326904296875, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.3799830364715861, |
|
"grad_norm": 123.6576549431064, |
|
"learning_rate": 6.250165755974487e-07, |
|
"logits/chosen": -18.768051147460938, |
|
"logits/rejected": -18.692035675048828, |
|
"logps/chosen": -1.2646162509918213, |
|
"logps/rejected": -1.4007298946380615, |
|
"loss": 3.4565, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -12.646160125732422, |
|
"rewards/margins": 1.361138939857483, |
|
"rewards/rejected": -14.00730037689209, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.3822448402601074, |
|
"grad_norm": 111.39581567132997, |
|
"learning_rate": 6.223925583906192e-07, |
|
"logits/chosen": -18.353418350219727, |
|
"logits/rejected": -17.735267639160156, |
|
"logps/chosen": -1.327247142791748, |
|
"logps/rejected": -1.6749954223632812, |
|
"loss": 3.351, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -13.272473335266113, |
|
"rewards/margins": 3.477482318878174, |
|
"rewards/rejected": -16.749956130981445, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.3845066440486288, |
|
"grad_norm": 115.58098332357874, |
|
"learning_rate": 6.19754614852685e-07, |
|
"logits/chosen": -17.524866104125977, |
|
"logits/rejected": -17.590625762939453, |
|
"logps/chosen": -1.1806279420852661, |
|
"logps/rejected": -1.5952324867248535, |
|
"loss": 3.6054, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -11.806280136108398, |
|
"rewards/margins": 4.146044731140137, |
|
"rewards/rejected": -15.952325820922852, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.38676844783715014, |
|
"grad_norm": 107.33538738128671, |
|
"learning_rate": 6.171029101729644e-07, |
|
"logits/chosen": -17.01272964477539, |
|
"logits/rejected": -17.01720428466797, |
|
"logps/chosen": -1.2651264667510986, |
|
"logps/rejected": -1.5476595163345337, |
|
"loss": 3.5012, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -12.651265144348145, |
|
"rewards/margins": 2.8253297805786133, |
|
"rewards/rejected": -15.476594924926758, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.38903025162567145, |
|
"grad_norm": 125.94524393171766, |
|
"learning_rate": 6.144376104025055e-07, |
|
"logits/chosen": -16.98217010498047, |
|
"logits/rejected": -16.957050323486328, |
|
"logps/chosen": -1.1778655052185059, |
|
"logps/rejected": -1.4646776914596558, |
|
"loss": 3.2286, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -11.778654098510742, |
|
"rewards/margins": 2.8681228160858154, |
|
"rewards/rejected": -14.64677619934082, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.3912920554141928, |
|
"grad_norm": 115.30228389834534, |
|
"learning_rate": 6.117588824436873e-07, |
|
"logits/chosen": -17.979293823242188, |
|
"logits/rejected": -17.900651931762695, |
|
"logps/chosen": -1.158996820449829, |
|
"logps/rejected": -1.3833591938018799, |
|
"loss": 3.6577, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -11.589967727661133, |
|
"rewards/margins": 2.243624448776245, |
|
"rewards/rejected": -13.83359146118164, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.3935538592027142, |
|
"grad_norm": 136.92412625465224, |
|
"learning_rate": 6.090668940397688e-07, |
|
"logits/chosen": -17.41019058227539, |
|
"logits/rejected": -17.082996368408203, |
|
"logps/chosen": -1.200378179550171, |
|
"logps/rejected": -1.5303970575332642, |
|
"loss": 3.3817, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -12.003782272338867, |
|
"rewards/margins": 3.3001890182495117, |
|
"rewards/rejected": -15.303971290588379, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.3958156629912355, |
|
"grad_norm": 97.25464380215708, |
|
"learning_rate": 6.063618137643844e-07, |
|
"logits/chosen": -17.9305419921875, |
|
"logits/rejected": -17.675626754760742, |
|
"logps/chosen": -1.0930852890014648, |
|
"logps/rejected": -1.3468796014785767, |
|
"loss": 3.0673, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -10.930851936340332, |
|
"rewards/margins": 2.5379440784454346, |
|
"rewards/rejected": -13.468796730041504, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.39807746677975686, |
|
"grad_norm": 109.33547647771188, |
|
"learning_rate": 6.03643811010988e-07, |
|
"logits/chosen": -18.435937881469727, |
|
"logits/rejected": -18.244789123535156, |
|
"logps/chosen": -1.421694278717041, |
|
"logps/rejected": -1.6195881366729736, |
|
"loss": 3.4374, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -14.216943740844727, |
|
"rewards/margins": 1.9789376258850098, |
|
"rewards/rejected": -16.195880889892578, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.4003392705682782, |
|
"grad_norm": 124.54512017176646, |
|
"learning_rate": 6.009130559822453e-07, |
|
"logits/chosen": -18.292753219604492, |
|
"logits/rejected": -18.013795852661133, |
|
"logps/chosen": -1.2830564975738525, |
|
"logps/rejected": -1.6480597257614136, |
|
"loss": 3.5887, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -12.830564498901367, |
|
"rewards/margins": 3.6500320434570312, |
|
"rewards/rejected": -16.4805965423584, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.40260107435679954, |
|
"grad_norm": 117.11523823927375, |
|
"learning_rate": 5.981697196793758e-07, |
|
"logits/chosen": -17.92546844482422, |
|
"logits/rejected": -17.709861755371094, |
|
"logps/chosen": -1.328133225440979, |
|
"logps/rejected": -1.6237107515335083, |
|
"loss": 3.5286, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -13.281332015991211, |
|
"rewards/margins": 2.9557762145996094, |
|
"rewards/rejected": -16.23710823059082, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.4048628781453209, |
|
"grad_norm": 135.21248374147646, |
|
"learning_rate": 5.954139738914446e-07, |
|
"logits/chosen": -16.00653648376465, |
|
"logits/rejected": -16.205612182617188, |
|
"logps/chosen": -1.5146088600158691, |
|
"logps/rejected": -1.722532868385315, |
|
"loss": 3.3813, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -15.146087646484375, |
|
"rewards/margins": 2.079242467880249, |
|
"rewards/rejected": -17.225330352783203, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.4071246819338422, |
|
"grad_norm": 118.75409568384903, |
|
"learning_rate": 5.92645991184605e-07, |
|
"logits/chosen": -18.836807250976562, |
|
"logits/rejected": -18.096431732177734, |
|
"logps/chosen": -1.328464388847351, |
|
"logps/rejected": -1.7455822229385376, |
|
"loss": 3.0959, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -13.284643173217773, |
|
"rewards/margins": 4.171177387237549, |
|
"rewards/rejected": -17.455821990966797, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4093864857223636, |
|
"grad_norm": 149.61294643110665, |
|
"learning_rate": 5.898659448912917e-07, |
|
"logits/chosen": -19.704021453857422, |
|
"logits/rejected": -19.59141731262207, |
|
"logps/chosen": -1.322415828704834, |
|
"logps/rejected": -1.6818199157714844, |
|
"loss": 3.99, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -13.224160194396973, |
|
"rewards/margins": 3.5940399169921875, |
|
"rewards/rejected": -16.818199157714844, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.41164828951088495, |
|
"grad_norm": 122.65957791889335, |
|
"learning_rate": 5.870740090993676e-07, |
|
"logits/chosen": -18.265836715698242, |
|
"logits/rejected": -18.42105484008789, |
|
"logps/chosen": -1.5347073078155518, |
|
"logps/rejected": -1.7209949493408203, |
|
"loss": 3.171, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -15.347073554992676, |
|
"rewards/margins": 1.8628755807876587, |
|
"rewards/rejected": -17.209949493408203, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.41391009329940626, |
|
"grad_norm": 135.01368423233092, |
|
"learning_rate": 5.842703586412214e-07, |
|
"logits/chosen": -18.84733009338379, |
|
"logits/rejected": -18.886695861816406, |
|
"logps/chosen": -1.4349242448806763, |
|
"logps/rejected": -1.703200340270996, |
|
"loss": 3.7337, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -14.3492431640625, |
|
"rewards/margins": 2.6827609539031982, |
|
"rewards/rejected": -17.03200340270996, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.4161718970879276, |
|
"grad_norm": 106.57687497552105, |
|
"learning_rate": 5.814551690828203e-07, |
|
"logits/chosen": -18.91258430480957, |
|
"logits/rejected": -18.3468017578125, |
|
"logps/chosen": -1.2373554706573486, |
|
"logps/rejected": -1.6082764863967896, |
|
"loss": 3.0833, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -12.373554229736328, |
|
"rewards/margins": 3.709210157394409, |
|
"rewards/rejected": -16.082765579223633, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.418433700876449, |
|
"grad_norm": 139.70919536594133, |
|
"learning_rate": 5.786286167127155e-07, |
|
"logits/chosen": -18.484155654907227, |
|
"logits/rejected": -18.344846725463867, |
|
"logps/chosen": -1.4353551864624023, |
|
"logps/rejected": -1.933018684387207, |
|
"loss": 3.4633, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -14.353551864624023, |
|
"rewards/margins": 4.976635932922363, |
|
"rewards/rejected": -19.33018684387207, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.4206955046649703, |
|
"grad_norm": 128.37055319468598, |
|
"learning_rate": 5.757908785310031e-07, |
|
"logits/chosen": -17.390743255615234, |
|
"logits/rejected": -17.32730484008789, |
|
"logps/chosen": -1.4048118591308594, |
|
"logps/rejected": -1.8442890644073486, |
|
"loss": 3.7353, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -14.048118591308594, |
|
"rewards/margins": 4.394770622253418, |
|
"rewards/rejected": -18.442888259887695, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.42295730845349166, |
|
"grad_norm": 125.14297515296605, |
|
"learning_rate": 5.729421322382399e-07, |
|
"logits/chosen": -16.7410888671875, |
|
"logits/rejected": -16.759994506835938, |
|
"logps/chosen": -1.1190869808197021, |
|
"logps/rejected": -1.3602699041366577, |
|
"loss": 3.4939, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -11.19087028503418, |
|
"rewards/margins": 2.4118287563323975, |
|
"rewards/rejected": -13.602697372436523, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.42521911224201303, |
|
"grad_norm": 121.636291085123, |
|
"learning_rate": 5.700825562243163e-07, |
|
"logits/chosen": -17.965713500976562, |
|
"logits/rejected": -17.82717514038086, |
|
"logps/chosen": -1.3181742429733276, |
|
"logps/rejected": -1.5977309942245483, |
|
"loss": 3.2506, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -13.181741714477539, |
|
"rewards/margins": 2.7955687046051025, |
|
"rewards/rejected": -15.977310180664062, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.42748091603053434, |
|
"grad_norm": 115.58317098156645, |
|
"learning_rate": 5.672123295572854e-07, |
|
"logits/chosen": -16.11078453063965, |
|
"logits/rejected": -15.989376068115234, |
|
"logps/chosen": -1.318684458732605, |
|
"logps/rejected": -1.7406071424484253, |
|
"loss": 3.2275, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -13.186845779418945, |
|
"rewards/margins": 4.219226837158203, |
|
"rewards/rejected": -17.40607261657715, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.4297427198190557, |
|
"grad_norm": 110.76706668825072, |
|
"learning_rate": 5.643316319721487e-07, |
|
"logits/chosen": -21.12446403503418, |
|
"logits/rejected": -21.084503173828125, |
|
"logps/chosen": -1.7526358366012573, |
|
"logps/rejected": -1.8315396308898926, |
|
"loss": 3.7724, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -17.52635955810547, |
|
"rewards/margins": 0.789039134979248, |
|
"rewards/rejected": -18.315397262573242, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.432004523607577, |
|
"grad_norm": 128.02917137545202, |
|
"learning_rate": 5.614406438596026e-07, |
|
"logits/chosen": -18.617406845092773, |
|
"logits/rejected": -18.170780181884766, |
|
"logps/chosen": -1.6118402481079102, |
|
"logps/rejected": -1.8639785051345825, |
|
"loss": 3.8619, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -16.1184024810791, |
|
"rewards/margins": 2.521383285522461, |
|
"rewards/rejected": -18.639785766601562, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.4342663273960984, |
|
"grad_norm": 128.62006828713473, |
|
"learning_rate": 5.585395462547406e-07, |
|
"logits/chosen": -17.90593719482422, |
|
"logits/rejected": -18.07216453552246, |
|
"logps/chosen": -1.6061056852340698, |
|
"logps/rejected": -1.8645837306976318, |
|
"loss": 3.6755, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -16.06105613708496, |
|
"rewards/margins": 2.5847792625427246, |
|
"rewards/rejected": -18.645837783813477, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.43652813118461975, |
|
"grad_norm": 115.59247682163482, |
|
"learning_rate": 5.55628520825718e-07, |
|
"logits/chosen": -17.22564125061035, |
|
"logits/rejected": -17.25448989868164, |
|
"logps/chosen": -1.4208124876022339, |
|
"logps/rejected": -1.9312503337860107, |
|
"loss": 3.3126, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -14.208124160766602, |
|
"rewards/margins": 5.104379177093506, |
|
"rewards/rejected": -19.312503814697266, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.43878993497314106, |
|
"grad_norm": 125.18412016900433, |
|
"learning_rate": 5.527077498623752e-07, |
|
"logits/chosen": -16.629623413085938, |
|
"logits/rejected": -16.644580841064453, |
|
"logps/chosen": -1.4364906549453735, |
|
"logps/rejected": -1.7086538076400757, |
|
"loss": 3.5968, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -14.364906311035156, |
|
"rewards/margins": 2.7216320037841797, |
|
"rewards/rejected": -17.086536407470703, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.4410517387616624, |
|
"grad_norm": 95.71934222963769, |
|
"learning_rate": 5.497774162648228e-07, |
|
"logits/chosen": -17.9370059967041, |
|
"logits/rejected": -17.442899703979492, |
|
"logps/chosen": -1.521501064300537, |
|
"logps/rejected": -2.050462484359741, |
|
"loss": 2.8302, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -15.215010643005371, |
|
"rewards/margins": 5.289614677429199, |
|
"rewards/rejected": -20.50462532043457, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.4433135425501838, |
|
"grad_norm": 118.37601502769598, |
|
"learning_rate": 5.468377035319882e-07, |
|
"logits/chosen": -18.176965713500977, |
|
"logits/rejected": -17.80450439453125, |
|
"logps/chosen": -1.5505520105361938, |
|
"logps/rejected": -2.088866949081421, |
|
"loss": 3.251, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -15.505517959594727, |
|
"rewards/margins": 5.383152008056641, |
|
"rewards/rejected": -20.888671875, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.4455753463387051, |
|
"grad_norm": 114.61725176586181, |
|
"learning_rate": 5.438887957501248e-07, |
|
"logits/chosen": -18.173625946044922, |
|
"logits/rejected": -18.10104751586914, |
|
"logps/chosen": -1.6463440656661987, |
|
"logps/rejected": -1.9076793193817139, |
|
"loss": 3.6095, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -16.46343994140625, |
|
"rewards/margins": 2.6133527755737305, |
|
"rewards/rejected": -19.076791763305664, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.44783715012722647, |
|
"grad_norm": 131.97444146862202, |
|
"learning_rate": 5.409308775812844e-07, |
|
"logits/chosen": -17.92854118347168, |
|
"logits/rejected": -18.174327850341797, |
|
"logps/chosen": -1.7027937173843384, |
|
"logps/rejected": -1.9399760961532593, |
|
"loss": 3.6652, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -17.027938842773438, |
|
"rewards/margins": 2.3718223571777344, |
|
"rewards/rejected": -19.39975929260254, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.45009895391574783, |
|
"grad_norm": 112.00245200831505, |
|
"learning_rate": 5.379641342517541e-07, |
|
"logits/chosen": -17.90815544128418, |
|
"logits/rejected": -17.855358123779297, |
|
"logps/chosen": -1.369492530822754, |
|
"logps/rejected": -1.7647912502288818, |
|
"loss": 3.6049, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -13.694926261901855, |
|
"rewards/margins": 3.9529881477355957, |
|
"rewards/rejected": -17.64791488647461, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.45236075770426915, |
|
"grad_norm": 110.39676860636777, |
|
"learning_rate": 5.349887515404564e-07, |
|
"logits/chosen": -19.14180564880371, |
|
"logits/rejected": -18.728225708007812, |
|
"logps/chosen": -1.567470669746399, |
|
"logps/rejected": -1.8445343971252441, |
|
"loss": 3.0688, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -15.674705505371094, |
|
"rewards/margins": 2.7706375122070312, |
|
"rewards/rejected": -18.445341110229492, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4546225614927905, |
|
"grad_norm": 112.66708655546361, |
|
"learning_rate": 5.320049157673163e-07, |
|
"logits/chosen": -19.58551597595215, |
|
"logits/rejected": -19.09307289123535, |
|
"logps/chosen": -1.4883310794830322, |
|
"logps/rejected": -1.705352783203125, |
|
"loss": 3.2351, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -14.88331127166748, |
|
"rewards/margins": 2.1702170372009277, |
|
"rewards/rejected": -17.05352783203125, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.4568843652813118, |
|
"grad_norm": 140.2319323230872, |
|
"learning_rate": 5.290128137815938e-07, |
|
"logits/chosen": -18.30789566040039, |
|
"logits/rejected": -18.139724731445312, |
|
"logps/chosen": -1.4757699966430664, |
|
"logps/rejected": -1.8749364614486694, |
|
"loss": 3.259, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -14.75770092010498, |
|
"rewards/margins": 3.991664409637451, |
|
"rewards/rejected": -18.749366760253906, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.4591461690698332, |
|
"grad_norm": 124.01739804659297, |
|
"learning_rate": 5.260126329501828e-07, |
|
"logits/chosen": -18.476545333862305, |
|
"logits/rejected": -18.295202255249023, |
|
"logps/chosen": -1.3175721168518066, |
|
"logps/rejected": -1.8804268836975098, |
|
"loss": 2.8801, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -13.175721168518066, |
|
"rewards/margins": 5.628549098968506, |
|
"rewards/rejected": -18.804269790649414, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.46140797285835455, |
|
"grad_norm": 110.8800697134872, |
|
"learning_rate": 5.230045611458789e-07, |
|
"logits/chosen": -19.596027374267578, |
|
"logits/rejected": -19.41700553894043, |
|
"logps/chosen": -1.2929422855377197, |
|
"logps/rejected": -1.7962149381637573, |
|
"loss": 3.0127, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -12.929424285888672, |
|
"rewards/margins": 5.032725811004639, |
|
"rewards/rejected": -17.962148666381836, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.46366977664687586, |
|
"grad_norm": 130.9926070201208, |
|
"learning_rate": 5.199887867356143e-07, |
|
"logits/chosen": -18.125207901000977, |
|
"logits/rejected": -18.14871597290039, |
|
"logps/chosen": -1.4900728464126587, |
|
"logps/rejected": -1.9675356149673462, |
|
"loss": 3.1097, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -14.900728225708008, |
|
"rewards/margins": 4.774627685546875, |
|
"rewards/rejected": -19.675355911254883, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.46593158043539723, |
|
"grad_norm": 120.13166100619164, |
|
"learning_rate": 5.16965498568662e-07, |
|
"logits/chosen": -18.744281768798828, |
|
"logits/rejected": -18.2139835357666, |
|
"logps/chosen": -1.6062824726104736, |
|
"logps/rejected": -2.2554736137390137, |
|
"loss": 3.0776, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -16.062824249267578, |
|
"rewards/margins": 6.491910934448242, |
|
"rewards/rejected": -22.554733276367188, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.4681933842239186, |
|
"grad_norm": 120.48417226561662, |
|
"learning_rate": 5.139348859648098e-07, |
|
"logits/chosen": -18.755203247070312, |
|
"logits/rejected": -18.599624633789062, |
|
"logps/chosen": -1.2531086206436157, |
|
"logps/rejected": -1.658897042274475, |
|
"loss": 3.4743, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -12.531085968017578, |
|
"rewards/margins": 4.05788516998291, |
|
"rewards/rejected": -16.588970184326172, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.4704551880124399, |
|
"grad_norm": 108.6230474054679, |
|
"learning_rate": 5.10897138702506e-07, |
|
"logits/chosen": -19.518808364868164, |
|
"logits/rejected": -19.44478416442871, |
|
"logps/chosen": -1.500132441520691, |
|
"logps/rejected": -1.9223947525024414, |
|
"loss": 2.9948, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -15.001323699951172, |
|
"rewards/margins": 4.2226243019104, |
|
"rewards/rejected": -19.223947525024414, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.4727169918009613, |
|
"grad_norm": 137.90239347147582, |
|
"learning_rate": 5.078524470069743e-07, |
|
"logits/chosen": -20.06089973449707, |
|
"logits/rejected": -19.945919036865234, |
|
"logps/chosen": -1.5609779357910156, |
|
"logps/rejected": -2.011448860168457, |
|
"loss": 3.3272, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -15.609781265258789, |
|
"rewards/margins": 4.504709720611572, |
|
"rewards/rejected": -20.114490509033203, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.47497879558948264, |
|
"grad_norm": 122.82261924752474, |
|
"learning_rate": 5.048010015383021e-07, |
|
"logits/chosen": -20.49317169189453, |
|
"logits/rejected": -20.084096908569336, |
|
"logps/chosen": -1.8195672035217285, |
|
"logps/rejected": -2.4391162395477295, |
|
"loss": 3.0204, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -18.1956729888916, |
|
"rewards/margins": 6.195489406585693, |
|
"rewards/rejected": -24.39116096496582, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.47724059937800395, |
|
"grad_norm": 133.40914227876348, |
|
"learning_rate": 5.01742993379502e-07, |
|
"logits/chosen": -20.487716674804688, |
|
"logits/rejected": -20.366500854492188, |
|
"logps/chosen": -1.6833590269088745, |
|
"logps/rejected": -1.998462200164795, |
|
"loss": 3.3079, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -16.833589553833008, |
|
"rewards/margins": 3.151031017303467, |
|
"rewards/rejected": -19.984619140625, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.4795024031665253, |
|
"grad_norm": 121.21962647812443, |
|
"learning_rate": 4.986786140245446e-07, |
|
"logits/chosen": -18.02095603942871, |
|
"logits/rejected": -18.15108299255371, |
|
"logps/chosen": -1.5042206048965454, |
|
"logps/rejected": -1.7882376909255981, |
|
"loss": 3.3782, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -15.042205810546875, |
|
"rewards/margins": 2.8401715755462646, |
|
"rewards/rejected": -17.88237762451172, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.4817642069550466, |
|
"grad_norm": 138.8873467278285, |
|
"learning_rate": 4.956080553663687e-07, |
|
"logits/chosen": -19.02423095703125, |
|
"logits/rejected": -18.977191925048828, |
|
"logps/chosen": -1.8565832376480103, |
|
"logps/rejected": -2.2188644409179688, |
|
"loss": 3.2943, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -18.565834045410156, |
|
"rewards/margins": 3.622810125350952, |
|
"rewards/rejected": -22.188644409179688, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.484026010743568, |
|
"grad_norm": 120.38379558424705, |
|
"learning_rate": 4.925315096848636e-07, |
|
"logits/chosen": -17.092893600463867, |
|
"logits/rejected": -17.469482421875, |
|
"logps/chosen": -1.5928668975830078, |
|
"logps/rejected": -2.1185383796691895, |
|
"loss": 3.2942, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -15.928670883178711, |
|
"rewards/margins": 5.256712913513184, |
|
"rewards/rejected": -21.185382843017578, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.48628781453208936, |
|
"grad_norm": 141.47624101645354, |
|
"learning_rate": 4.894491696348293e-07, |
|
"logits/chosen": -18.64133644104004, |
|
"logits/rejected": -18.508480072021484, |
|
"logps/chosen": -1.7584447860717773, |
|
"logps/rejected": -1.9836417436599731, |
|
"loss": 3.8231, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -17.58444595336914, |
|
"rewards/margins": 2.2519688606262207, |
|
"rewards/rejected": -19.836416244506836, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.48854961832061067, |
|
"grad_norm": 100.6834936019963, |
|
"learning_rate": 4.863612282339116e-07, |
|
"logits/chosen": -18.912193298339844, |
|
"logits/rejected": -18.51605224609375, |
|
"logps/chosen": -1.4050565958023071, |
|
"logps/rejected": -1.7702962160110474, |
|
"loss": 3.2087, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -14.050565719604492, |
|
"rewards/margins": 3.6523966789245605, |
|
"rewards/rejected": -17.70296287536621, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.49081142210913203, |
|
"grad_norm": 126.90139310026458, |
|
"learning_rate": 4.832678788505161e-07, |
|
"logits/chosen": -20.156646728515625, |
|
"logits/rejected": -20.06357192993164, |
|
"logps/chosen": -1.8053876161575317, |
|
"logps/rejected": -2.161924362182617, |
|
"loss": 3.2957, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -18.053876876831055, |
|
"rewards/margins": 3.565363883972168, |
|
"rewards/rejected": -21.619239807128906, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.4930732258976534, |
|
"grad_norm": 112.75413161542235, |
|
"learning_rate": 4.801693151916985e-07, |
|
"logits/chosen": -18.10401153564453, |
|
"logits/rejected": -18.311925888061523, |
|
"logps/chosen": -1.7536580562591553, |
|
"logps/rejected": -2.2070531845092773, |
|
"loss": 3.1204, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -17.53658103942871, |
|
"rewards/margins": 4.533949375152588, |
|
"rewards/rejected": -22.070531845092773, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.4953350296861747, |
|
"grad_norm": 120.36363917960784, |
|
"learning_rate": 4.770657312910354e-07, |
|
"logits/chosen": -19.36819839477539, |
|
"logits/rejected": -19.35448455810547, |
|
"logps/chosen": -1.7574162483215332, |
|
"logps/rejected": -2.383183002471924, |
|
"loss": 3.6944, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -17.57416343688965, |
|
"rewards/margins": 6.257665634155273, |
|
"rewards/rejected": -23.831829071044922, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.4975968334746961, |
|
"grad_norm": 123.44879094752798, |
|
"learning_rate": 4.739573214964729e-07, |
|
"logits/chosen": -18.070960998535156, |
|
"logits/rejected": -17.84283447265625, |
|
"logps/chosen": -1.280721664428711, |
|
"logps/rejected": -1.6026414632797241, |
|
"loss": 2.9387, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -12.80721664428711, |
|
"rewards/margins": 3.2191972732543945, |
|
"rewards/rejected": -16.02641487121582, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.49985863726321744, |
|
"grad_norm": 170.10224468225707, |
|
"learning_rate": 4.7084428045815733e-07, |
|
"logits/chosen": -19.83563995361328, |
|
"logits/rejected": -19.78580665588379, |
|
"logps/chosen": -1.7471987009048462, |
|
"logps/rejected": -2.135683059692383, |
|
"loss": 3.8038, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -17.471986770629883, |
|
"rewards/margins": 3.884843349456787, |
|
"rewards/rejected": -21.356828689575195, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.5021204410517388, |
|
"grad_norm": 133.7068988108329, |
|
"learning_rate": 4.677268031162457e-07, |
|
"logits/chosen": -18.73598861694336, |
|
"logits/rejected": -18.53685188293457, |
|
"logps/chosen": -1.8922369480133057, |
|
"logps/rejected": -2.408550977706909, |
|
"loss": 3.5777, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -18.9223690032959, |
|
"rewards/margins": 5.163141250610352, |
|
"rewards/rejected": -24.085508346557617, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.5043822448402601, |
|
"grad_norm": 124.33610987958505, |
|
"learning_rate": 4.646050846886985e-07, |
|
"logits/chosen": -17.20172882080078, |
|
"logits/rejected": -17.508880615234375, |
|
"logps/chosen": -1.3590439558029175, |
|
"logps/rejected": -1.6888562440872192, |
|
"loss": 3.4386, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -13.59044075012207, |
|
"rewards/margins": 3.2981221675872803, |
|
"rewards/rejected": -16.88856315612793, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.5066440486287814, |
|
"grad_norm": 142.45176890673545, |
|
"learning_rate": 4.6147932065905494e-07, |
|
"logits/chosen": -18.200490951538086, |
|
"logits/rejected": -17.826982498168945, |
|
"logps/chosen": -1.4690287113189697, |
|
"logps/rejected": -2.0389232635498047, |
|
"loss": 3.4706, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -14.690287590026855, |
|
"rewards/margins": 5.698945999145508, |
|
"rewards/rejected": -20.38923454284668, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.5089058524173028, |
|
"grad_norm": 122.4324863969919, |
|
"learning_rate": 4.5834970676419214e-07, |
|
"logits/chosen": -18.388614654541016, |
|
"logits/rejected": -18.511974334716797, |
|
"logps/chosen": -1.8286144733428955, |
|
"logps/rejected": -2.2092440128326416, |
|
"loss": 3.3465, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -18.28614616394043, |
|
"rewards/margins": 3.8062963485717773, |
|
"rewards/rejected": -22.09244155883789, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.5111676562058242, |
|
"grad_norm": 129.2266776919472, |
|
"learning_rate": 4.552164389820673e-07, |
|
"logits/chosen": -19.732501983642578, |
|
"logits/rejected": -19.419715881347656, |
|
"logps/chosen": -1.65671706199646, |
|
"logps/rejected": -1.9091390371322632, |
|
"loss": 3.195, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -16.567171096801758, |
|
"rewards/margins": 2.5242207050323486, |
|
"rewards/rejected": -19.09139060974121, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.5134294599943455, |
|
"grad_norm": 133.10160051050664, |
|
"learning_rate": 4.5207971351944605e-07, |
|
"logits/chosen": -18.20813751220703, |
|
"logits/rejected": -17.936553955078125, |
|
"logps/chosen": -1.5020235776901245, |
|
"logps/rejected": -2.2087697982788086, |
|
"loss": 3.6883, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -15.02023696899414, |
|
"rewards/margins": 7.067460536956787, |
|
"rewards/rejected": -22.087697982788086, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.5156912637828668, |
|
"grad_norm": 115.75597435267949, |
|
"learning_rate": 4.489397267996157e-07, |
|
"logits/chosen": -18.27444839477539, |
|
"logits/rejected": -18.018796920776367, |
|
"logps/chosen": -1.620775580406189, |
|
"logps/rejected": -2.2751684188842773, |
|
"loss": 3.0586, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -16.2077579498291, |
|
"rewards/margins": 6.543926239013672, |
|
"rewards/rejected": -22.75168228149414, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.5179530675713881, |
|
"grad_norm": 119.59564297014721, |
|
"learning_rate": 4.45796675450085e-07, |
|
"logits/chosen": -19.229650497436523, |
|
"logits/rejected": -19.270090103149414, |
|
"logps/chosen": -1.7479207515716553, |
|
"logps/rejected": -1.9961845874786377, |
|
"loss": 3.46, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -17.47920799255371, |
|
"rewards/margins": 2.482638120651245, |
|
"rewards/rejected": -19.96184730529785, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.5202148713599095, |
|
"grad_norm": 123.01980120189968, |
|
"learning_rate": 4.4265075629027126e-07, |
|
"logits/chosen": -20.439476013183594, |
|
"logits/rejected": -20.24441146850586, |
|
"logps/chosen": -1.8146342039108276, |
|
"logps/rejected": -2.263535976409912, |
|
"loss": 3.4224, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -18.146343231201172, |
|
"rewards/margins": 4.489017486572266, |
|
"rewards/rejected": -22.635360717773438, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5224766751484309, |
|
"grad_norm": 107.58798664449722, |
|
"learning_rate": 4.3950216631917563e-07, |
|
"logits/chosen": -19.177587509155273, |
|
"logits/rejected": -18.99146270751953, |
|
"logps/chosen": -1.755948543548584, |
|
"logps/rejected": -2.040555477142334, |
|
"loss": 2.9987, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -17.559486389160156, |
|
"rewards/margins": 2.846068859100342, |
|
"rewards/rejected": -20.405555725097656, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.5247384789369522, |
|
"grad_norm": 109.78374887710692, |
|
"learning_rate": 4.3635110270304676e-07, |
|
"logits/chosen": -18.429826736450195, |
|
"logits/rejected": -18.861085891723633, |
|
"logps/chosen": -1.559441328048706, |
|
"logps/rejected": -1.994086742401123, |
|
"loss": 2.6686, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -15.59441089630127, |
|
"rewards/margins": 4.3464555740356445, |
|
"rewards/rejected": -19.940868377685547, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.5270002827254736, |
|
"grad_norm": 116.99222466276613, |
|
"learning_rate": 4.331977627630339e-07, |
|
"logits/chosen": -17.902793884277344, |
|
"logits/rejected": -18.096345901489258, |
|
"logps/chosen": -1.382148265838623, |
|
"logps/rejected": -1.986084222793579, |
|
"loss": 2.6777, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -13.82148265838623, |
|
"rewards/margins": 6.039360046386719, |
|
"rewards/rejected": -19.860841751098633, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.5292620865139949, |
|
"grad_norm": 111.82478148043788, |
|
"learning_rate": 4.300423439628313e-07, |
|
"logits/chosen": -18.652141571044922, |
|
"logits/rejected": -18.740739822387695, |
|
"logps/chosen": -1.7629899978637695, |
|
"logps/rejected": -2.2177019119262695, |
|
"loss": 2.6484, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -17.629901885986328, |
|
"rewards/margins": 4.547117233276367, |
|
"rewards/rejected": -22.177017211914062, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.5315238903025162, |
|
"grad_norm": 124.7698836200117, |
|
"learning_rate": 4.268850438963118e-07, |
|
"logits/chosen": -19.93507957458496, |
|
"logits/rejected": -19.908302307128906, |
|
"logps/chosen": -1.6999324560165405, |
|
"logps/rejected": -2.0379884243011475, |
|
"loss": 3.3517, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -16.999324798583984, |
|
"rewards/margins": 3.380560874938965, |
|
"rewards/rejected": -20.379884719848633, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5337856940910376, |
|
"grad_norm": 130.0202969035073, |
|
"learning_rate": 4.2372606027515463e-07, |
|
"logits/chosen": -16.75927734375, |
|
"logits/rejected": -16.6347713470459, |
|
"logps/chosen": -1.7100436687469482, |
|
"logps/rejected": -2.0675430297851562, |
|
"loss": 3.3574, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -17.100439071655273, |
|
"rewards/margins": 3.574993133544922, |
|
"rewards/rejected": -20.675430297851562, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.536047497879559, |
|
"grad_norm": 142.3393610446344, |
|
"learning_rate": 4.2056559091646387e-07, |
|
"logits/chosen": -19.50155258178711, |
|
"logits/rejected": -19.233562469482422, |
|
"logps/chosen": -1.7303612232208252, |
|
"logps/rejected": -1.9693727493286133, |
|
"loss": 3.8961, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -17.303613662719727, |
|
"rewards/margins": 2.390113353729248, |
|
"rewards/rejected": -19.693727493286133, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.5383093016680803, |
|
"grad_norm": 132.7580435084033, |
|
"learning_rate": 4.1740383373038116e-07, |
|
"logits/chosen": -19.230268478393555, |
|
"logits/rejected": -19.026260375976562, |
|
"logps/chosen": -1.7516860961914062, |
|
"logps/rejected": -2.302055835723877, |
|
"loss": 2.958, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -17.516862869262695, |
|
"rewards/margins": 5.503696441650391, |
|
"rewards/rejected": -23.020557403564453, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.5405711054566016, |
|
"grad_norm": 146.2605729576581, |
|
"learning_rate": 4.1424098670769255e-07, |
|
"logits/chosen": -16.876083374023438, |
|
"logits/rejected": -16.858041763305664, |
|
"logps/chosen": -1.4845470190048218, |
|
"logps/rejected": -1.819509506225586, |
|
"loss": 3.4692, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -14.845470428466797, |
|
"rewards/margins": 3.349626302719116, |
|
"rewards/rejected": -18.195096969604492, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.542832909245123, |
|
"grad_norm": 103.3850975548694, |
|
"learning_rate": 4.1107724790743007e-07, |
|
"logits/chosen": -19.218101501464844, |
|
"logits/rejected": -19.07253646850586, |
|
"logps/chosen": -1.6440057754516602, |
|
"logps/rejected": -2.002814531326294, |
|
"loss": 2.7882, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -16.4400577545166, |
|
"rewards/margins": 3.5880849361419678, |
|
"rewards/rejected": -20.028141021728516, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5450947130336443, |
|
"grad_norm": 108.59641705360205, |
|
"learning_rate": 4.0791281544446947e-07, |
|
"logits/chosen": -18.29979705810547, |
|
"logits/rejected": -18.103771209716797, |
|
"logps/chosen": -1.5138269662857056, |
|
"logps/rejected": -1.9497716426849365, |
|
"loss": 2.4937, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -15.138269424438477, |
|
"rewards/margins": 4.3594465255737305, |
|
"rewards/rejected": -19.49771499633789, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.5473565168221657, |
|
"grad_norm": 118.47748367490442, |
|
"learning_rate": 4.0474788747712416e-07, |
|
"logits/chosen": -16.5893611907959, |
|
"logits/rejected": -16.470932006835938, |
|
"logps/chosen": -1.4450711011886597, |
|
"logps/rejected": -1.7044168710708618, |
|
"loss": 3.6487, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -14.450712203979492, |
|
"rewards/margins": 2.5934557914733887, |
|
"rewards/rejected": -17.04416847229004, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.549618320610687, |
|
"grad_norm": 127.18536302103081, |
|
"learning_rate": 4.0158266219473573e-07, |
|
"logits/chosen": -19.53125762939453, |
|
"logits/rejected": -19.866533279418945, |
|
"logps/chosen": -1.3576165437698364, |
|
"logps/rejected": -1.6340572834014893, |
|
"loss": 2.8172, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -13.576166152954102, |
|
"rewards/margins": 2.7644076347351074, |
|
"rewards/rejected": -16.340572357177734, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.5518801243992084, |
|
"grad_norm": 144.24875011893795, |
|
"learning_rate": 3.984173378052643e-07, |
|
"logits/chosen": -17.787691116333008, |
|
"logits/rejected": -17.381996154785156, |
|
"logps/chosen": -1.4772697687149048, |
|
"logps/rejected": -1.9439443349838257, |
|
"loss": 2.7811, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -14.772696495056152, |
|
"rewards/margins": 4.666746139526367, |
|
"rewards/rejected": -19.439443588256836, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.5541419281877297, |
|
"grad_norm": 130.02624554472013, |
|
"learning_rate": 3.9525211252287585e-07, |
|
"logits/chosen": -17.32337760925293, |
|
"logits/rejected": -17.11507797241211, |
|
"logps/chosen": -1.8103289604187012, |
|
"logps/rejected": -2.3584718704223633, |
|
"loss": 2.8682, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -18.103288650512695, |
|
"rewards/margins": 5.4814324378967285, |
|
"rewards/rejected": -23.584720611572266, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.556403731976251, |
|
"grad_norm": 112.27330200229477, |
|
"learning_rate": 3.920871845555305e-07, |
|
"logits/chosen": -19.526695251464844, |
|
"logits/rejected": -19.684539794921875, |
|
"logps/chosen": -1.6799914836883545, |
|
"logps/rejected": -2.0144991874694824, |
|
"loss": 3.2575, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -16.799915313720703, |
|
"rewards/margins": 3.3450779914855957, |
|
"rewards/rejected": -20.14499282836914, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.5586655357647724, |
|
"grad_norm": 123.49123400492937, |
|
"learning_rate": 3.8892275209256984e-07, |
|
"logits/chosen": -17.89940643310547, |
|
"logits/rejected": -18.42051124572754, |
|
"logps/chosen": -1.7134385108947754, |
|
"logps/rejected": -2.140021324157715, |
|
"loss": 3.0396, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -17.134387969970703, |
|
"rewards/margins": 4.26582670211792, |
|
"rewards/rejected": -21.40021324157715, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.5609273395532938, |
|
"grad_norm": 135.4244307593356, |
|
"learning_rate": 3.8575901329230747e-07, |
|
"logits/chosen": -19.93168067932129, |
|
"logits/rejected": -19.66005516052246, |
|
"logps/chosen": -2.0851705074310303, |
|
"logps/rejected": -2.714930534362793, |
|
"loss": 3.4958, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -20.85170555114746, |
|
"rewards/margins": 6.297600746154785, |
|
"rewards/rejected": -27.14930534362793, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.5631891433418151, |
|
"grad_norm": 126.1113881766967, |
|
"learning_rate": 3.8259616626961886e-07, |
|
"logits/chosen": -19.275150299072266, |
|
"logits/rejected": -19.156465530395508, |
|
"logps/chosen": -1.4787800312042236, |
|
"logps/rejected": -1.7590197324752808, |
|
"loss": 3.2882, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -14.787800788879395, |
|
"rewards/margins": 2.8023955821990967, |
|
"rewards/rejected": -17.59019660949707, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.5654509471303364, |
|
"grad_norm": 143.11865197858194, |
|
"learning_rate": 3.794344090835362e-07, |
|
"logits/chosen": -19.26443099975586, |
|
"logits/rejected": -18.946765899658203, |
|
"logps/chosen": -1.713568925857544, |
|
"logps/rejected": -2.3817105293273926, |
|
"loss": 3.9405, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -17.13568878173828, |
|
"rewards/margins": 6.681418418884277, |
|
"rewards/rejected": -23.817108154296875, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5677127509188578, |
|
"grad_norm": 135.07580917666817, |
|
"learning_rate": 3.7627393972484534e-07, |
|
"logits/chosen": -19.917835235595703, |
|
"logits/rejected": -19.873958587646484, |
|
"logps/chosen": -1.6692298650741577, |
|
"logps/rejected": -1.9403576850891113, |
|
"loss": 3.9729, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -16.692298889160156, |
|
"rewards/margins": 2.711277961730957, |
|
"rewards/rejected": -19.40357780456543, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.5699745547073791, |
|
"grad_norm": 121.70724549497747, |
|
"learning_rate": 3.7311495610368823e-07, |
|
"logits/chosen": -19.70743179321289, |
|
"logits/rejected": -19.559131622314453, |
|
"logps/chosen": -1.725498914718628, |
|
"logps/rejected": -1.9476670026779175, |
|
"loss": 3.2505, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -17.254987716674805, |
|
"rewards/margins": 2.221681833267212, |
|
"rewards/rejected": -19.476669311523438, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.5722363584959005, |
|
"grad_norm": 92.60358297039429, |
|
"learning_rate": 3.699576560371689e-07, |
|
"logits/chosen": -19.507673263549805, |
|
"logits/rejected": -19.240455627441406, |
|
"logps/chosen": -1.6785533428192139, |
|
"logps/rejected": -2.16404128074646, |
|
"loss": 2.668, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -16.785533905029297, |
|
"rewards/margins": 4.854878902435303, |
|
"rewards/rejected": -21.640413284301758, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.5744981622844219, |
|
"grad_norm": 117.8494457797105, |
|
"learning_rate": 3.66802237236966e-07, |
|
"logits/chosen": -17.930335998535156, |
|
"logits/rejected": -17.775684356689453, |
|
"logps/chosen": -1.4922497272491455, |
|
"logps/rejected": -1.9657816886901855, |
|
"loss": 2.9119, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -14.92249584197998, |
|
"rewards/margins": 4.735319137573242, |
|
"rewards/rejected": -19.65781593322754, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.5767599660729432, |
|
"grad_norm": 128.0769904494541, |
|
"learning_rate": 3.636488972969532e-07, |
|
"logits/chosen": -18.133464813232422, |
|
"logits/rejected": -18.1317195892334, |
|
"logps/chosen": -1.7718310356140137, |
|
"logps/rejected": -2.1774401664733887, |
|
"loss": 3.32, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -17.71830940246582, |
|
"rewards/margins": 4.056089878082275, |
|
"rewards/rejected": -21.77440071105957, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5790217698614645, |
|
"grad_norm": 109.14078747974439, |
|
"learning_rate": 3.604978336808244e-07, |
|
"logits/chosen": -18.034282684326172, |
|
"logits/rejected": -17.809715270996094, |
|
"logps/chosen": -1.6046905517578125, |
|
"logps/rejected": -2.003675937652588, |
|
"loss": 3.0045, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -16.046907424926758, |
|
"rewards/margins": 3.9898502826690674, |
|
"rewards/rejected": -20.03675651550293, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.5812835736499858, |
|
"grad_norm": 143.3661499166921, |
|
"learning_rate": 3.5734924370972876e-07, |
|
"logits/chosen": -18.07189178466797, |
|
"logits/rejected": -17.913497924804688, |
|
"logps/chosen": -1.4232511520385742, |
|
"logps/rejected": -1.7245614528656006, |
|
"loss": 3.0709, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -14.232512474060059, |
|
"rewards/margins": 3.0131046772003174, |
|
"rewards/rejected": -17.245615005493164, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.5835453774385072, |
|
"grad_norm": 115.02007432023626, |
|
"learning_rate": 3.5420332454991504e-07, |
|
"logits/chosen": -18.96527862548828, |
|
"logits/rejected": -18.798969268798828, |
|
"logps/chosen": -1.7819788455963135, |
|
"logps/rejected": -2.11525559425354, |
|
"loss": 3.3907, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -17.819787979125977, |
|
"rewards/margins": 3.3327670097351074, |
|
"rewards/rejected": -21.152557373046875, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.5858071812270286, |
|
"grad_norm": 116.46242436554796, |
|
"learning_rate": 3.510602732003843e-07, |
|
"logits/chosen": -18.923112869262695, |
|
"logits/rejected": -19.192468643188477, |
|
"logps/chosen": -1.7312378883361816, |
|
"logps/rejected": -2.397825002670288, |
|
"loss": 2.9306, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -17.312379837036133, |
|
"rewards/margins": 6.665870189666748, |
|
"rewards/rejected": -23.978248596191406, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.5880689850155499, |
|
"grad_norm": 130.25971956929465, |
|
"learning_rate": 3.4792028648055396e-07, |
|
"logits/chosen": -18.882343292236328, |
|
"logits/rejected": -18.996551513671875, |
|
"logps/chosen": -1.6002156734466553, |
|
"logps/rejected": -2.0233724117279053, |
|
"loss": 2.9946, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -16.00215721130371, |
|
"rewards/margins": 4.231566905975342, |
|
"rewards/rejected": -20.23372459411621, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5903307888040712, |
|
"grad_norm": 116.58608387387143, |
|
"learning_rate": 3.447835610179327e-07, |
|
"logits/chosen": -18.31661033630371, |
|
"logits/rejected": -18.64508819580078, |
|
"logps/chosen": -1.852226972579956, |
|
"logps/rejected": -2.575822591781616, |
|
"loss": 2.6653, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -18.52227020263672, |
|
"rewards/margins": 7.235957145690918, |
|
"rewards/rejected": -25.75822639465332, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.5925925925925926, |
|
"grad_norm": 131.22286636501386, |
|
"learning_rate": 3.416502932358079e-07, |
|
"logits/chosen": -19.86322021484375, |
|
"logits/rejected": -19.80363655090332, |
|
"logps/chosen": -1.677018642425537, |
|
"logps/rejected": -1.9988960027694702, |
|
"loss": 3.2105, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -16.770187377929688, |
|
"rewards/margins": 3.2187743186950684, |
|
"rewards/rejected": -19.98896026611328, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.5948543963811139, |
|
"grad_norm": 113.95397777389032, |
|
"learning_rate": 3.385206793409451e-07, |
|
"logits/chosen": -16.9749813079834, |
|
"logits/rejected": -16.70389175415039, |
|
"logps/chosen": -1.5683423280715942, |
|
"logps/rejected": -1.973022699356079, |
|
"loss": 2.895, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -15.68342399597168, |
|
"rewards/margins": 4.046802520751953, |
|
"rewards/rejected": -19.730226516723633, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.5971162001696353, |
|
"grad_norm": 136.6059459741964, |
|
"learning_rate": 3.3539491531130163e-07, |
|
"logits/chosen": -17.935535430908203, |
|
"logits/rejected": -17.720043182373047, |
|
"logps/chosen": -1.4503566026687622, |
|
"logps/rejected": -1.7065664529800415, |
|
"loss": 3.1485, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -14.50356674194336, |
|
"rewards/margins": 2.5620980262756348, |
|
"rewards/rejected": -17.065662384033203, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.5993780039581567, |
|
"grad_norm": 137.85024895532698, |
|
"learning_rate": 3.3227319688375426e-07, |
|
"logits/chosen": -19.27477264404297, |
|
"logits/rejected": -19.29145050048828, |
|
"logps/chosen": -1.9958823919296265, |
|
"logps/rejected": -2.369366407394409, |
|
"loss": 3.4478, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -19.958826065063477, |
|
"rewards/margins": 3.7348380088806152, |
|
"rewards/rejected": -23.693662643432617, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.601639807746678, |
|
"grad_norm": 123.86696879933385, |
|
"learning_rate": 3.291557195418427e-07, |
|
"logits/chosen": -18.97182273864746, |
|
"logits/rejected": -18.623531341552734, |
|
"logps/chosen": -1.6371110677719116, |
|
"logps/rejected": -2.1235477924346924, |
|
"loss": 3.124, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -16.371112823486328, |
|
"rewards/margins": 4.864367485046387, |
|
"rewards/rejected": -21.235477447509766, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.6039016115351993, |
|
"grad_norm": 160.30929493983072, |
|
"learning_rate": 3.260426785035272e-07, |
|
"logits/chosen": -18.151859283447266, |
|
"logits/rejected": -18.189985275268555, |
|
"logps/chosen": -1.5115103721618652, |
|
"logps/rejected": -1.9283788204193115, |
|
"loss": 3.4577, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -15.115103721618652, |
|
"rewards/margins": 4.168684959411621, |
|
"rewards/rejected": -19.283788681030273, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.6061634153237206, |
|
"grad_norm": 118.85038786779553, |
|
"learning_rate": 3.229342687089646e-07, |
|
"logits/chosen": -17.767433166503906, |
|
"logits/rejected": -17.30542755126953, |
|
"logps/chosen": -1.7307448387145996, |
|
"logps/rejected": -2.2132887840270996, |
|
"loss": 3.2675, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -17.30744743347168, |
|
"rewards/margins": 4.825439453125, |
|
"rewards/rejected": -22.132884979248047, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.608425219112242, |
|
"grad_norm": 135.72304791395987, |
|
"learning_rate": 3.1983068480830143e-07, |
|
"logits/chosen": -17.994487762451172, |
|
"logits/rejected": -17.969486236572266, |
|
"logps/chosen": -1.7408254146575928, |
|
"logps/rejected": -2.30222225189209, |
|
"loss": 3.0847, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -17.408254623413086, |
|
"rewards/margins": 5.613969326019287, |
|
"rewards/rejected": -23.0222225189209, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.6106870229007634, |
|
"grad_norm": 136.8409835051581, |
|
"learning_rate": 3.1673212114948387e-07, |
|
"logits/chosen": -18.464635848999023, |
|
"logits/rejected": -18.186416625976562, |
|
"logps/chosen": -1.8000985383987427, |
|
"logps/rejected": -2.355130672454834, |
|
"loss": 2.2817, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -18.000986099243164, |
|
"rewards/margins": 5.550319671630859, |
|
"rewards/rejected": -23.551307678222656, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6129488266892847, |
|
"grad_norm": 128.71421710643776, |
|
"learning_rate": 3.1363877176608845e-07, |
|
"logits/chosen": -18.273387908935547, |
|
"logits/rejected": -18.52509117126465, |
|
"logps/chosen": -1.7283263206481934, |
|
"logps/rejected": -2.170729637145996, |
|
"loss": 2.919, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -17.28326416015625, |
|
"rewards/margins": 4.424034118652344, |
|
"rewards/rejected": -21.707298278808594, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.615210630477806, |
|
"grad_norm": 130.4882843458114, |
|
"learning_rate": 3.1055083036517076e-07, |
|
"logits/chosen": -18.288068771362305, |
|
"logits/rejected": -17.75768280029297, |
|
"logps/chosen": -1.6948351860046387, |
|
"logps/rejected": -2.2144925594329834, |
|
"loss": 3.3302, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -16.948348999023438, |
|
"rewards/margins": 5.196574687957764, |
|
"rewards/rejected": -22.14492416381836, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.6174724342663274, |
|
"grad_norm": 119.37105820236062, |
|
"learning_rate": 3.074684903151364e-07, |
|
"logits/chosen": -17.694923400878906, |
|
"logits/rejected": -17.45270538330078, |
|
"logps/chosen": -1.4144465923309326, |
|
"logps/rejected": -1.7332209348678589, |
|
"loss": 3.1823, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -14.144466400146484, |
|
"rewards/margins": 3.1877427101135254, |
|
"rewards/rejected": -17.33220863342285, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.6197342380548487, |
|
"grad_norm": 126.9873723041469, |
|
"learning_rate": 3.0439194463363136e-07, |
|
"logits/chosen": -19.154897689819336, |
|
"logits/rejected": -19.074947357177734, |
|
"logps/chosen": -1.6273291110992432, |
|
"logps/rejected": -2.1156094074249268, |
|
"loss": 3.1957, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -16.27328872680664, |
|
"rewards/margins": 4.882803916931152, |
|
"rewards/rejected": -21.156095504760742, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.6219960418433701, |
|
"grad_norm": 98.82421414344171, |
|
"learning_rate": 3.0132138597545537e-07, |
|
"logits/chosen": -18.89469337463379, |
|
"logits/rejected": -18.92743492126465, |
|
"logps/chosen": -1.8614561557769775, |
|
"logps/rejected": -2.298145294189453, |
|
"loss": 2.4871, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -18.614561080932617, |
|
"rewards/margins": 4.366891860961914, |
|
"rewards/rejected": -22.9814510345459, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.6242578456318915, |
|
"grad_norm": 116.98836789151454, |
|
"learning_rate": 2.982570066204981e-07, |
|
"logits/chosen": -17.621952056884766, |
|
"logits/rejected": -17.41912841796875, |
|
"logps/chosen": -1.7095118761062622, |
|
"logps/rejected": -2.267876625061035, |
|
"loss": 2.8961, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -17.09511947631836, |
|
"rewards/margins": 5.583648204803467, |
|
"rewards/rejected": -22.678768157958984, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.6265196494204128, |
|
"grad_norm": 139.7828630658467, |
|
"learning_rate": 2.951989984616979e-07, |
|
"logits/chosen": -18.495176315307617, |
|
"logits/rejected": -18.713180541992188, |
|
"logps/chosen": -1.794584035873413, |
|
"logps/rejected": -2.701646327972412, |
|
"loss": 3.3014, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -17.94584083557129, |
|
"rewards/margins": 9.070621490478516, |
|
"rewards/rejected": -27.016462326049805, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.6287814532089341, |
|
"grad_norm": 104.32477594909946, |
|
"learning_rate": 2.9214755299302584e-07, |
|
"logits/chosen": -18.10324478149414, |
|
"logits/rejected": -18.533466339111328, |
|
"logps/chosen": -1.4521610736846924, |
|
"logps/rejected": -2.136770486831665, |
|
"loss": 2.7298, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -14.521611213684082, |
|
"rewards/margins": 6.84609317779541, |
|
"rewards/rejected": -21.367706298828125, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.6310432569974554, |
|
"grad_norm": 129.12312580227652, |
|
"learning_rate": 2.89102861297494e-07, |
|
"logits/chosen": -16.307287216186523, |
|
"logits/rejected": -16.62302589416504, |
|
"logps/chosen": -1.5291308164596558, |
|
"logps/rejected": -1.9218378067016602, |
|
"loss": 3.2843, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -15.29130744934082, |
|
"rewards/margins": 3.9270708560943604, |
|
"rewards/rejected": -19.218379974365234, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.6333050607859768, |
|
"grad_norm": 119.86450692791884, |
|
"learning_rate": 2.860651140351902e-07, |
|
"logits/chosen": -17.81388282775879, |
|
"logits/rejected": -17.59682273864746, |
|
"logps/chosen": -1.4970345497131348, |
|
"logps/rejected": -2.2067017555236816, |
|
"loss": 3.0318, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -14.970344543457031, |
|
"rewards/margins": 7.096673011779785, |
|
"rewards/rejected": -22.0670166015625, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6355668645744982, |
|
"grad_norm": 138.33272392046018, |
|
"learning_rate": 2.830345014313381e-07, |
|
"logits/chosen": -18.549711227416992, |
|
"logits/rejected": -18.178396224975586, |
|
"logps/chosen": -1.5726195573806763, |
|
"logps/rejected": -2.220799684524536, |
|
"loss": 3.0664, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -15.726195335388184, |
|
"rewards/margins": 6.481801986694336, |
|
"rewards/rejected": -22.20799446105957, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.6378286683630195, |
|
"grad_norm": 121.49306068303021, |
|
"learning_rate": 2.800112132643856e-07, |
|
"logits/chosen": -18.666532516479492, |
|
"logits/rejected": -18.698705673217773, |
|
"logps/chosen": -1.9378407001495361, |
|
"logps/rejected": -2.5402820110321045, |
|
"loss": 3.0881, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -19.378408432006836, |
|
"rewards/margins": 6.024411678314209, |
|
"rewards/rejected": -25.40281867980957, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.6400904721515408, |
|
"grad_norm": 114.50153480696909, |
|
"learning_rate": 2.7699543885412105e-07, |
|
"logits/chosen": -18.842344284057617, |
|
"logits/rejected": -19.002525329589844, |
|
"logps/chosen": -1.7454712390899658, |
|
"logps/rejected": -2.214015483856201, |
|
"loss": 2.7145, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -17.454710006713867, |
|
"rewards/margins": 4.685445308685303, |
|
"rewards/rejected": -22.14015769958496, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.6423522759400622, |
|
"grad_norm": 126.9731178541442, |
|
"learning_rate": 2.7398736704981725e-07, |
|
"logits/chosen": -17.94224739074707, |
|
"logits/rejected": -18.106706619262695, |
|
"logps/chosen": -1.8006949424743652, |
|
"logps/rejected": -2.4084813594818115, |
|
"loss": 2.7514, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -18.00695037841797, |
|
"rewards/margins": 6.077863693237305, |
|
"rewards/rejected": -24.084814071655273, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.6446140797285835, |
|
"grad_norm": 121.79509986844188, |
|
"learning_rate": 2.709871862184063e-07, |
|
"logits/chosen": -16.98878288269043, |
|
"logits/rejected": -17.01874542236328, |
|
"logps/chosen": -1.8407750129699707, |
|
"logps/rejected": -2.2451648712158203, |
|
"loss": 3.3275, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -18.407751083374023, |
|
"rewards/margins": 4.0438995361328125, |
|
"rewards/rejected": -22.451650619506836, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.6468758835171049, |
|
"grad_norm": 108.87797931776414, |
|
"learning_rate": 2.679950842326837e-07, |
|
"logits/chosen": -18.95654296875, |
|
"logits/rejected": -18.801700592041016, |
|
"logps/chosen": -1.6954306364059448, |
|
"logps/rejected": -2.5442655086517334, |
|
"loss": 2.5918, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -16.95430564880371, |
|
"rewards/margins": 8.488348960876465, |
|
"rewards/rejected": -25.442655563354492, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.6491376873056263, |
|
"grad_norm": 111.41714747114163, |
|
"learning_rate": 2.6501124845954363e-07, |
|
"logits/chosen": -16.922765731811523, |
|
"logits/rejected": -16.570079803466797, |
|
"logps/chosen": -1.5942519903182983, |
|
"logps/rejected": -2.0841176509857178, |
|
"loss": 2.7103, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -15.942520141601562, |
|
"rewards/margins": 4.898656368255615, |
|
"rewards/rejected": -20.841175079345703, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.6513994910941476, |
|
"grad_norm": 111.54831143350387, |
|
"learning_rate": 2.62035865748246e-07, |
|
"logits/chosen": -19.410310745239258, |
|
"logits/rejected": -19.599136352539062, |
|
"logps/chosen": -1.7219384908676147, |
|
"logps/rejected": -2.0403764247894287, |
|
"loss": 3.257, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -17.219385147094727, |
|
"rewards/margins": 3.184377670288086, |
|
"rewards/rejected": -20.403764724731445, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.6536612948826689, |
|
"grad_norm": 129.1650835585718, |
|
"learning_rate": 2.5906912241871554e-07, |
|
"logits/chosen": -19.173494338989258, |
|
"logits/rejected": -19.192523956298828, |
|
"logps/chosen": -1.6308451890945435, |
|
"logps/rejected": -2.0142531394958496, |
|
"loss": 3.5351, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -16.308452606201172, |
|
"rewards/margins": 3.8340790271759033, |
|
"rewards/rejected": -20.142528533935547, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.6559230986711903, |
|
"grad_norm": 114.97395819625001, |
|
"learning_rate": 2.561112042498753e-07, |
|
"logits/chosen": -17.663278579711914, |
|
"logits/rejected": -17.458215713500977, |
|
"logps/chosen": -1.433032751083374, |
|
"logps/rejected": -1.9120677709579468, |
|
"loss": 3.1725, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -14.330328941345215, |
|
"rewards/margins": 4.79034948348999, |
|
"rewards/rejected": -19.12067985534668, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6581849024597116, |
|
"grad_norm": 118.80435385837328, |
|
"learning_rate": 2.5316229646801195e-07, |
|
"logits/chosen": -19.93079948425293, |
|
"logits/rejected": -19.657909393310547, |
|
"logps/chosen": -1.6756254434585571, |
|
"logps/rejected": -2.254075288772583, |
|
"loss": 2.7255, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -16.756254196166992, |
|
"rewards/margins": 5.7845001220703125, |
|
"rewards/rejected": -22.540752410888672, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.660446706248233, |
|
"grad_norm": 120.26763355250853, |
|
"learning_rate": 2.5022258373517714e-07, |
|
"logits/chosen": -18.864389419555664, |
|
"logits/rejected": -18.524669647216797, |
|
"logps/chosen": -1.6191332340240479, |
|
"logps/rejected": -2.0536766052246094, |
|
"loss": 2.8735, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -16.191333770751953, |
|
"rewards/margins": 4.345433235168457, |
|
"rewards/rejected": -20.536766052246094, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.6627085100367544, |
|
"grad_norm": 147.0811073810554, |
|
"learning_rate": 2.4729225013762474e-07, |
|
"logits/chosen": -18.751914978027344, |
|
"logits/rejected": -18.83761215209961, |
|
"logps/chosen": -1.7884494066238403, |
|
"logps/rejected": -2.187746524810791, |
|
"loss": 3.8995, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -17.884492874145508, |
|
"rewards/margins": 3.9929721355438232, |
|
"rewards/rejected": -21.877464294433594, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.6649703138252756, |
|
"grad_norm": 148.90534716218426, |
|
"learning_rate": 2.4437147917428203e-07, |
|
"logits/chosen": -18.826107025146484, |
|
"logits/rejected": -18.503259658813477, |
|
"logps/chosen": -1.7195425033569336, |
|
"logps/rejected": -2.187434434890747, |
|
"loss": 3.0299, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -17.195425033569336, |
|
"rewards/margins": 4.678918838500977, |
|
"rewards/rejected": -21.87434196472168, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.667232117613797, |
|
"grad_norm": 127.90476462202498, |
|
"learning_rate": 2.414604537452595e-07, |
|
"logits/chosen": -18.674943923950195, |
|
"logits/rejected": -18.60759735107422, |
|
"logps/chosen": -1.7201087474822998, |
|
"logps/rejected": -2.014519214630127, |
|
"loss": 3.128, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -17.201087951660156, |
|
"rewards/margins": 2.9441049098968506, |
|
"rewards/rejected": -20.145193099975586, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.6694939214023183, |
|
"grad_norm": 108.9145846232443, |
|
"learning_rate": 2.385593561403974e-07, |
|
"logits/chosen": -19.400646209716797, |
|
"logits/rejected": -19.273517608642578, |
|
"logps/chosen": -1.726077914237976, |
|
"logps/rejected": -2.1319644451141357, |
|
"loss": 2.882, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -17.260780334472656, |
|
"rewards/margins": 4.058864593505859, |
|
"rewards/rejected": -21.319643020629883, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.6717557251908397, |
|
"grad_norm": 112.86761120489062, |
|
"learning_rate": 2.3566836802785119e-07, |
|
"logits/chosen": -18.81859016418457, |
|
"logits/rejected": -18.859493255615234, |
|
"logps/chosen": -1.9398648738861084, |
|
"logps/rejected": -2.3005058765411377, |
|
"loss": 2.8161, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -19.39864730834961, |
|
"rewards/margins": 3.60640811920166, |
|
"rewards/rejected": -23.005056381225586, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.6740175289793611, |
|
"grad_norm": 118.40226934077113, |
|
"learning_rate": 2.327876704427146e-07, |
|
"logits/chosen": -18.128990173339844, |
|
"logits/rejected": -18.05478858947754, |
|
"logps/chosen": -1.7885990142822266, |
|
"logps/rejected": -2.1967928409576416, |
|
"loss": 3.3258, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -17.885990142822266, |
|
"rewards/margins": 4.08193826675415, |
|
"rewards/rejected": -21.96792984008789, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.6762793327678824, |
|
"grad_norm": 153.7897090271479, |
|
"learning_rate": 2.2991744377568358e-07, |
|
"logits/chosen": -17.88959312438965, |
|
"logits/rejected": -17.185943603515625, |
|
"logps/chosen": -1.6429543495178223, |
|
"logps/rejected": -2.067078113555908, |
|
"loss": 3.74, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -16.429542541503906, |
|
"rewards/margins": 4.241240501403809, |
|
"rewards/rejected": -20.67078399658203, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.6785411365564037, |
|
"grad_norm": 133.58644267056656, |
|
"learning_rate": 2.270578677617601e-07, |
|
"logits/chosen": -18.508695602416992, |
|
"logits/rejected": -18.557147979736328, |
|
"logps/chosen": -1.617353916168213, |
|
"logps/rejected": -2.0458405017852783, |
|
"loss": 3.4581, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -16.173538208007812, |
|
"rewards/margins": 4.2848663330078125, |
|
"rewards/rejected": -20.458406448364258, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6808029403449251, |
|
"grad_norm": 116.99592631499642, |
|
"learning_rate": 2.242091214689971e-07, |
|
"logits/chosen": -18.887380599975586, |
|
"logits/rejected": -18.278608322143555, |
|
"logps/chosen": -1.7732751369476318, |
|
"logps/rejected": -2.1223254203796387, |
|
"loss": 2.8134, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -17.732751846313477, |
|
"rewards/margins": 3.4905025959014893, |
|
"rewards/rejected": -21.223255157470703, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.6830647441334464, |
|
"grad_norm": 129.92140054225982, |
|
"learning_rate": 2.2137138328728456e-07, |
|
"logits/chosen": -18.269765853881836, |
|
"logits/rejected": -17.92385482788086, |
|
"logps/chosen": -1.8314377069473267, |
|
"logps/rejected": -2.02689266204834, |
|
"loss": 3.2214, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -18.314376831054688, |
|
"rewards/margins": 1.9545530080795288, |
|
"rewards/rejected": -20.26892852783203, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.6853265479219678, |
|
"grad_norm": 120.44838708614284, |
|
"learning_rate": 2.1854483091717974e-07, |
|
"logits/chosen": -17.881437301635742, |
|
"logits/rejected": -17.71358299255371, |
|
"logps/chosen": -1.6855335235595703, |
|
"logps/rejected": -2.2010655403137207, |
|
"loss": 2.7716, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -16.855335235595703, |
|
"rewards/margins": 5.155317783355713, |
|
"rewards/rejected": -22.010652542114258, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.6875883517104892, |
|
"grad_norm": 142.94697133093283, |
|
"learning_rate": 2.1572964135877863e-07, |
|
"logits/chosen": -17.533218383789062, |
|
"logits/rejected": -17.355274200439453, |
|
"logps/chosen": -1.5027070045471191, |
|
"logps/rejected": -2.0504214763641357, |
|
"loss": 3.376, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -15.027069091796875, |
|
"rewards/margins": 5.477144241333008, |
|
"rewards/rejected": -20.504213333129883, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.6898501554990104, |
|
"grad_norm": 114.71461725743285, |
|
"learning_rate": 2.1292599090063245e-07, |
|
"logits/chosen": -18.869152069091797, |
|
"logits/rejected": -18.81059455871582, |
|
"logps/chosen": -1.6714305877685547, |
|
"logps/rejected": -2.1169075965881348, |
|
"loss": 2.7332, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -16.714305877685547, |
|
"rewards/margins": 4.454771518707275, |
|
"rewards/rejected": -21.169076919555664, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6921119592875318, |
|
"grad_norm": 111.93201937391214, |
|
"learning_rate": 2.1013405510870824e-07, |
|
"logits/chosen": -18.295650482177734, |
|
"logits/rejected": -18.45261573791504, |
|
"logps/chosen": -1.8770661354064941, |
|
"logps/rejected": -2.236387252807617, |
|
"loss": 3.3382, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -18.770662307739258, |
|
"rewards/margins": 3.593210220336914, |
|
"rewards/rejected": -22.36387062072754, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.6943737630760531, |
|
"grad_norm": 129.49294218978284, |
|
"learning_rate": 2.0735400881539494e-07, |
|
"logits/chosen": -20.06885528564453, |
|
"logits/rejected": -20.67595672607422, |
|
"logps/chosen": -1.699569821357727, |
|
"logps/rejected": -2.1293015480041504, |
|
"loss": 3.2719, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -16.995698928833008, |
|
"rewards/margins": 4.29731559753418, |
|
"rewards/rejected": -21.293014526367188, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.6966355668645745, |
|
"grad_norm": 126.68283700786048, |
|
"learning_rate": 2.0458602610855536e-07, |
|
"logits/chosen": -16.85354995727539, |
|
"logits/rejected": -17.0955753326416, |
|
"logps/chosen": -1.633847713470459, |
|
"logps/rejected": -2.1871325969696045, |
|
"loss": 2.746, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -16.338476181030273, |
|
"rewards/margins": 5.532848834991455, |
|
"rewards/rejected": -21.87132453918457, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.6988973706530959, |
|
"grad_norm": 121.46567102959813, |
|
"learning_rate": 2.0183028032062422e-07, |
|
"logits/chosen": -18.197134017944336, |
|
"logits/rejected": -18.313335418701172, |
|
"logps/chosen": -1.7123744487762451, |
|
"logps/rejected": -2.3391215801239014, |
|
"loss": 3.301, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -17.123743057250977, |
|
"rewards/margins": 6.267471790313721, |
|
"rewards/rejected": -23.391216278076172, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.7011591744416172, |
|
"grad_norm": 124.40646143719266, |
|
"learning_rate": 1.9908694401775473e-07, |
|
"logits/chosen": -19.83294677734375, |
|
"logits/rejected": -20.086868286132812, |
|
"logps/chosen": -2.0055930614471436, |
|
"logps/rejected": -2.405197858810425, |
|
"loss": 3.0917, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -20.055932998657227, |
|
"rewards/margins": 3.996046781539917, |
|
"rewards/rejected": -24.051979064941406, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.7034209782301385, |
|
"grad_norm": 126.95244540601655, |
|
"learning_rate": 1.9635618898901196e-07, |
|
"logits/chosen": -19.060583114624023, |
|
"logits/rejected": -19.149402618408203, |
|
"logps/chosen": -1.9011938571929932, |
|
"logps/rejected": -2.284623146057129, |
|
"loss": 3.0781, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -19.011938095092773, |
|
"rewards/margins": 3.834294080734253, |
|
"rewards/rejected": -22.846233367919922, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.7056827820186599, |
|
"grad_norm": 131.1304953783553, |
|
"learning_rate": 1.9363818623561565e-07, |
|
"logits/chosen": -18.06791114807129, |
|
"logits/rejected": -17.94767189025879, |
|
"logps/chosen": -1.763685941696167, |
|
"logps/rejected": -2.0710325241088867, |
|
"loss": 3.5313, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -17.636857986450195, |
|
"rewards/margins": 3.0734646320343018, |
|
"rewards/rejected": -20.710325241088867, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.7079445858071812, |
|
"grad_norm": 116.35057290069598, |
|
"learning_rate": 1.9093310596023108e-07, |
|
"logits/chosen": -18.00191307067871, |
|
"logits/rejected": -18.05375099182129, |
|
"logps/chosen": -1.9328045845031738, |
|
"logps/rejected": -2.434842348098755, |
|
"loss": 2.5712, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -19.328044891357422, |
|
"rewards/margins": 5.020379543304443, |
|
"rewards/rejected": -24.348424911499023, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.7102063895957026, |
|
"grad_norm": 107.72415682056965, |
|
"learning_rate": 1.8824111755631274e-07, |
|
"logits/chosen": -17.74974250793457, |
|
"logits/rejected": -17.714256286621094, |
|
"logps/chosen": -1.6889841556549072, |
|
"logps/rejected": -2.1979172229766846, |
|
"loss": 3.324, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -16.889841079711914, |
|
"rewards/margins": 5.089331150054932, |
|
"rewards/rejected": -21.97917366027832, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.712468193384224, |
|
"grad_norm": 175.2751395769359, |
|
"learning_rate": 1.8556238959749457e-07, |
|
"logits/chosen": -20.16362762451172, |
|
"logits/rejected": -20.45577049255371, |
|
"logps/chosen": -1.9660546779632568, |
|
"logps/rejected": -2.3499526977539062, |
|
"loss": 3.5857, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -19.660547256469727, |
|
"rewards/margins": 3.8389804363250732, |
|
"rewards/rejected": -23.49952507019043, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.7147299971727452, |
|
"grad_norm": 119.01395158521336, |
|
"learning_rate": 1.8289708982703562e-07, |
|
"logits/chosen": -18.191469192504883, |
|
"logits/rejected": -18.05630111694336, |
|
"logps/chosen": -1.6164871454238892, |
|
"logps/rejected": -2.0442566871643066, |
|
"loss": 3.1091, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -16.164871215820312, |
|
"rewards/margins": 4.277695655822754, |
|
"rewards/rejected": -20.442567825317383, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.7169918009612666, |
|
"grad_norm": 110.39790952014322, |
|
"learning_rate": 1.802453851473151e-07, |
|
"logits/chosen": -18.25019073486328, |
|
"logits/rejected": -18.184785842895508, |
|
"logps/chosen": -1.9268181324005127, |
|
"logps/rejected": -2.5494563579559326, |
|
"loss": 2.5819, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -19.26818084716797, |
|
"rewards/margins": 6.226382732391357, |
|
"rewards/rejected": -25.494564056396484, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.719253604749788, |
|
"grad_norm": 111.59669789809674, |
|
"learning_rate": 1.7760744160938093e-07, |
|
"logits/chosen": -19.184326171875, |
|
"logits/rejected": -19.069150924682617, |
|
"logps/chosen": -1.9187712669372559, |
|
"logps/rejected": -2.417238473892212, |
|
"loss": 2.5089, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -19.187713623046875, |
|
"rewards/margins": 4.984671592712402, |
|
"rewards/rejected": -24.172386169433594, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.7215154085383093, |
|
"grad_norm": 111.40164812220848, |
|
"learning_rate": 1.7498342440255135e-07, |
|
"logits/chosen": -17.5487060546875, |
|
"logits/rejected": -17.807178497314453, |
|
"logps/chosen": -1.8459084033966064, |
|
"logps/rejected": -2.48807430267334, |
|
"loss": 3.5431, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -18.459083557128906, |
|
"rewards/margins": 6.42165994644165, |
|
"rewards/rejected": -24.88074493408203, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.7237772123268307, |
|
"grad_norm": 124.12971999012613, |
|
"learning_rate": 1.7237349784407115e-07, |
|
"logits/chosen": -17.986967086791992, |
|
"logits/rejected": -18.200471878051758, |
|
"logps/chosen": -2.0284173488616943, |
|
"logps/rejected": -2.5237107276916504, |
|
"loss": 3.7015, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -20.2841739654541, |
|
"rewards/margins": 4.952933311462402, |
|
"rewards/rejected": -25.237106323242188, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.726039016115352, |
|
"grad_norm": 141.09967606925343, |
|
"learning_rate": 1.6977782536882178e-07, |
|
"logits/chosen": -16.887096405029297, |
|
"logits/rejected": -16.802282333374023, |
|
"logps/chosen": -1.782692790031433, |
|
"logps/rejected": -2.303356170654297, |
|
"loss": 3.1257, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -17.82692527770996, |
|
"rewards/margins": 5.206636428833008, |
|
"rewards/rejected": -23.0335636138916, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.7283008199038733, |
|
"grad_norm": 116.38070332785638, |
|
"learning_rate": 1.6719656951908708e-07, |
|
"logits/chosen": -17.198162078857422, |
|
"logits/rejected": -16.910192489624023, |
|
"logps/chosen": -1.3144806623458862, |
|
"logps/rejected": -1.8561556339263916, |
|
"loss": 2.7638, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -13.144807815551758, |
|
"rewards/margins": 5.416749954223633, |
|
"rewards/rejected": -18.561553955078125, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.7305626236923947, |
|
"grad_norm": 114.11746852210314, |
|
"learning_rate": 1.6462989193437453e-07, |
|
"logits/chosen": -17.512184143066406, |
|
"logits/rejected": -17.920053482055664, |
|
"logps/chosen": -1.9415867328643799, |
|
"logps/rejected": -2.1908974647521973, |
|
"loss": 3.6178, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -19.415868759155273, |
|
"rewards/margins": 2.493105888366699, |
|
"rewards/rejected": -21.908973693847656, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.732824427480916, |
|
"grad_norm": 113.99856611080652, |
|
"learning_rate": 1.6207795334129365e-07, |
|
"logits/chosen": -19.32732582092285, |
|
"logits/rejected": -19.14191246032715, |
|
"logps/chosen": -1.6971518993377686, |
|
"logps/rejected": -2.295579433441162, |
|
"loss": 2.8077, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -16.971519470214844, |
|
"rewards/margins": 5.98427677154541, |
|
"rewards/rejected": -22.955795288085938, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.7350862312694374, |
|
"grad_norm": 111.98084422068199, |
|
"learning_rate": 1.5954091354349121e-07, |
|
"logits/chosen": -17.98455238342285, |
|
"logits/rejected": -17.884262084960938, |
|
"logps/chosen": -1.7183001041412354, |
|
"logps/rejected": -2.17026424407959, |
|
"loss": 2.8751, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -17.183000564575195, |
|
"rewards/margins": 4.519641876220703, |
|
"rewards/rejected": -21.7026424407959, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.7373480350579588, |
|
"grad_norm": 232.39770149735068, |
|
"learning_rate": 1.5701893141164364e-07, |
|
"logits/chosen": -18.812040328979492, |
|
"logits/rejected": -18.633085250854492, |
|
"logps/chosen": -1.676187515258789, |
|
"logps/rejected": -2.3979151248931885, |
|
"loss": 3.0272, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -16.761873245239258, |
|
"rewards/margins": 7.217278480529785, |
|
"rewards/rejected": -23.979150772094727, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.73960983884648, |
|
"grad_norm": 120.7387881104363, |
|
"learning_rate": 1.545121648735093e-07, |
|
"logits/chosen": -18.34151840209961, |
|
"logits/rejected": -18.516704559326172, |
|
"logps/chosen": -1.6959328651428223, |
|
"logps/rejected": -2.0101141929626465, |
|
"loss": 3.0935, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -16.95932960510254, |
|
"rewards/margins": 3.1418118476867676, |
|
"rewards/rejected": -20.10114097595215, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.7418716426350014, |
|
"grad_norm": 116.46893556878314, |
|
"learning_rate": 1.5202077090403863e-07, |
|
"logits/chosen": -16.94781494140625, |
|
"logits/rejected": -16.98765754699707, |
|
"logps/chosen": -1.6322197914123535, |
|
"logps/rejected": -2.0489912033081055, |
|
"loss": 2.9396, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -16.32219696044922, |
|
"rewards/margins": 4.1677141189575195, |
|
"rewards/rejected": -20.489913940429688, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.7441334464235227, |
|
"grad_norm": 146.64902887166238, |
|
"learning_rate": 1.495449055155443e-07, |
|
"logits/chosen": -16.194108963012695, |
|
"logits/rejected": -16.39000701904297, |
|
"logps/chosen": -1.4700114727020264, |
|
"logps/rejected": -1.9279454946517944, |
|
"loss": 3.1195, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -14.700116157531738, |
|
"rewards/margins": 4.579338550567627, |
|
"rewards/rejected": -19.279455184936523, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.7463952502120441, |
|
"grad_norm": 128.77629572500186, |
|
"learning_rate": 1.4708472374793112e-07, |
|
"logits/chosen": -18.751955032348633, |
|
"logits/rejected": -18.25170135498047, |
|
"logps/chosen": -1.6594680547714233, |
|
"logps/rejected": -2.1272830963134766, |
|
"loss": 3.568, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -16.594682693481445, |
|
"rewards/margins": 4.67814826965332, |
|
"rewards/rejected": -21.272830963134766, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7486570540005655, |
|
"grad_norm": 116.85830667937259, |
|
"learning_rate": 1.4464037965898878e-07, |
|
"logits/chosen": -19.034826278686523, |
|
"logits/rejected": -18.4996395111084, |
|
"logps/chosen": -1.6908671855926514, |
|
"logps/rejected": -2.253139019012451, |
|
"loss": 3.1892, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -16.908668518066406, |
|
"rewards/margins": 5.622718811035156, |
|
"rewards/rejected": -22.531389236450195, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.7509188577890868, |
|
"grad_norm": 128.75404728697518, |
|
"learning_rate": 1.4221202631474282e-07, |
|
"logits/chosen": -18.098434448242188, |
|
"logits/rejected": -18.39754867553711, |
|
"logps/chosen": -1.7314308881759644, |
|
"logps/rejected": -2.248021125793457, |
|
"loss": 3.2902, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -17.314308166503906, |
|
"rewards/margins": 5.165902137756348, |
|
"rewards/rejected": -22.480209350585938, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.7531806615776081, |
|
"grad_norm": 112.23640069941514, |
|
"learning_rate": 1.3979981577987113e-07, |
|
"logits/chosen": -17.052106857299805, |
|
"logits/rejected": -17.000144958496094, |
|
"logps/chosen": -1.8308027982711792, |
|
"logps/rejected": -2.2044148445129395, |
|
"loss": 2.8652, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -18.308027267456055, |
|
"rewards/margins": 3.73612117767334, |
|
"rewards/rejected": -22.04414939880371, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.7554424653661295, |
|
"grad_norm": 120.8440355705056, |
|
"learning_rate": 1.374038991081807e-07, |
|
"logits/chosen": -17.621667861938477, |
|
"logits/rejected": -17.636934280395508, |
|
"logps/chosen": -1.6944687366485596, |
|
"logps/rejected": -2.1847054958343506, |
|
"loss": 2.8811, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -16.944686889648438, |
|
"rewards/margins": 4.902366638183594, |
|
"rewards/rejected": -21.84705352783203, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.7577042691546508, |
|
"grad_norm": 126.66430839817343, |
|
"learning_rate": 1.3502442633314882e-07, |
|
"logits/chosen": -16.78680992126465, |
|
"logits/rejected": -16.919919967651367, |
|
"logps/chosen": -1.619330883026123, |
|
"logps/rejected": -2.069342851638794, |
|
"loss": 2.8007, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -16.193309783935547, |
|
"rewards/margins": 4.500118255615234, |
|
"rewards/rejected": -20.69342803955078, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.7599660729431722, |
|
"grad_norm": 107.65531245820142, |
|
"learning_rate": 1.3266154645852815e-07, |
|
"logits/chosen": -18.672462463378906, |
|
"logits/rejected": -18.989612579345703, |
|
"logps/chosen": -1.6392340660095215, |
|
"logps/rejected": -2.228325366973877, |
|
"loss": 2.9037, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -16.39234161376953, |
|
"rewards/margins": 5.890913009643555, |
|
"rewards/rejected": -22.283252716064453, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.7622278767316936, |
|
"grad_norm": 104.66828130875484, |
|
"learning_rate": 1.303154074490152e-07, |
|
"logits/chosen": -17.055776596069336, |
|
"logits/rejected": -16.77735137939453, |
|
"logps/chosen": -1.4908720254898071, |
|
"logps/rejected": -1.799816608428955, |
|
"loss": 2.7556, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -14.908721923828125, |
|
"rewards/margins": 3.089444875717163, |
|
"rewards/rejected": -17.998165130615234, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.7644896805202148, |
|
"grad_norm": 117.88462800241341, |
|
"learning_rate": 1.2798615622098616e-07, |
|
"logits/chosen": -17.4500732421875, |
|
"logits/rejected": -17.197757720947266, |
|
"logps/chosen": -1.6963038444519043, |
|
"logps/rejected": -2.2104175090789795, |
|
"loss": 2.8171, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -16.96303939819336, |
|
"rewards/margins": 5.141136646270752, |
|
"rewards/rejected": -22.104175567626953, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.7667514843087362, |
|
"grad_norm": 106.90568181230161, |
|
"learning_rate": 1.2567393863329523e-07, |
|
"logits/chosen": -18.870460510253906, |
|
"logits/rejected": -18.87204360961914, |
|
"logps/chosen": -1.8440241813659668, |
|
"logps/rejected": -2.3904788494110107, |
|
"loss": 2.8668, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -18.44024085998535, |
|
"rewards/margins": 5.464546203613281, |
|
"rewards/rejected": -23.904788970947266, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.7690132880972576, |
|
"grad_norm": 135.41776419808772, |
|
"learning_rate": 1.233788994781423e-07, |
|
"logits/chosen": -17.011192321777344, |
|
"logits/rejected": -17.013751983642578, |
|
"logps/chosen": -1.4503196477890015, |
|
"logps/rejected": -2.010632276535034, |
|
"loss": 3.3367, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -14.50319766998291, |
|
"rewards/margins": 5.603124618530273, |
|
"rewards/rejected": -20.106321334838867, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7712750918857789, |
|
"grad_norm": 136.82224989040978, |
|
"learning_rate": 1.2110118247200468e-07, |
|
"logits/chosen": -18.286027908325195, |
|
"logits/rejected": -18.17170524597168, |
|
"logps/chosen": -1.6681314706802368, |
|
"logps/rejected": -2.0645222663879395, |
|
"loss": 2.9115, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -16.68131446838379, |
|
"rewards/margins": 3.963907241821289, |
|
"rewards/rejected": -20.645221710205078, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.7735368956743003, |
|
"grad_norm": 116.57287182904345, |
|
"learning_rate": 1.1884093024663933e-07, |
|
"logits/chosen": -16.591590881347656, |
|
"logits/rejected": -16.540773391723633, |
|
"logps/chosen": -1.636472463607788, |
|
"logps/rejected": -2.1073007583618164, |
|
"loss": 2.8411, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -16.36472511291504, |
|
"rewards/margins": 4.708281517028809, |
|
"rewards/rejected": -21.07300567626953, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.7757986994628217, |
|
"grad_norm": 118.5480993837639, |
|
"learning_rate": 1.1659828434014886e-07, |
|
"logits/chosen": -17.95990562438965, |
|
"logits/rejected": -17.720420837402344, |
|
"logps/chosen": -1.635303020477295, |
|
"logps/rejected": -2.0385963916778564, |
|
"loss": 3.0565, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -16.353031158447266, |
|
"rewards/margins": 4.032935619354248, |
|
"rewards/rejected": -20.38596534729004, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.7780605032513429, |
|
"grad_norm": 134.89450131828505, |
|
"learning_rate": 1.143733851881203e-07, |
|
"logits/chosen": -19.509780883789062, |
|
"logits/rejected": -19.24722671508789, |
|
"logps/chosen": -1.545256495475769, |
|
"logps/rejected": -2.114616632461548, |
|
"loss": 2.6658, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -15.452564239501953, |
|
"rewards/margins": 5.693601608276367, |
|
"rewards/rejected": -21.146167755126953, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.7803223070398643, |
|
"grad_norm": 123.25488488665061, |
|
"learning_rate": 1.1216637211483005e-07, |
|
"logits/chosen": -18.04479217529297, |
|
"logits/rejected": -18.232608795166016, |
|
"logps/chosen": -1.771346092224121, |
|
"logps/rejected": -2.349630355834961, |
|
"loss": 3.2986, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -17.71346092224121, |
|
"rewards/margins": 5.782842636108398, |
|
"rewards/rejected": -23.49630355834961, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.7825841108283856, |
|
"grad_norm": 114.24822425854607, |
|
"learning_rate": 1.0997738332451936e-07, |
|
"logits/chosen": -18.91605567932129, |
|
"logits/rejected": -18.952014923095703, |
|
"logps/chosen": -1.9931975603103638, |
|
"logps/rejected": -2.6114182472229004, |
|
"loss": 2.7017, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -19.931976318359375, |
|
"rewards/margins": 6.182207107543945, |
|
"rewards/rejected": -26.11418342590332, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.784845914616907, |
|
"grad_norm": 101.71447993811626, |
|
"learning_rate": 1.0780655589274031e-07, |
|
"logits/chosen": -19.68770408630371, |
|
"logits/rejected": -19.459531784057617, |
|
"logps/chosen": -1.9268380403518677, |
|
"logps/rejected": -2.4731695652008057, |
|
"loss": 3.0762, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -19.26837921142578, |
|
"rewards/margins": 5.463315010070801, |
|
"rewards/rejected": -24.731693267822266, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.7871077184054284, |
|
"grad_norm": 144.33485472555742, |
|
"learning_rate": 1.056540257577712e-07, |
|
"logits/chosen": -19.237892150878906, |
|
"logits/rejected": -19.197168350219727, |
|
"logps/chosen": -2.0048787593841553, |
|
"logps/rejected": -2.5931644439697266, |
|
"loss": 2.7076, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -20.048786163330078, |
|
"rewards/margins": 5.882862091064453, |
|
"rewards/rejected": -25.93164825439453, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.7893695221939496, |
|
"grad_norm": 112.5748530177231, |
|
"learning_rate": 1.0351992771210554e-07, |
|
"logits/chosen": -18.623769760131836, |
|
"logits/rejected": -19.13688850402832, |
|
"logps/chosen": -1.8369648456573486, |
|
"logps/rejected": -2.3405067920684814, |
|
"loss": 3.2742, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -18.369647979736328, |
|
"rewards/margins": 5.035419464111328, |
|
"rewards/rejected": -23.405067443847656, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.791631325982471, |
|
"grad_norm": 132.898356490048, |
|
"learning_rate": 1.0140439539400953e-07, |
|
"logits/chosen": -18.275182723999023, |
|
"logits/rejected": -18.35052490234375, |
|
"logps/chosen": -2.0253753662109375, |
|
"logps/rejected": -2.4307503700256348, |
|
"loss": 3.2702, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -20.253755569458008, |
|
"rewards/margins": 4.053745746612549, |
|
"rewards/rejected": -24.307498931884766, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7938931297709924, |
|
"grad_norm": 119.8096502311399, |
|
"learning_rate": 9.930756127915488e-08, |
|
"logits/chosen": -20.30059051513672, |
|
"logits/rejected": -20.41552734375, |
|
"logps/chosen": -1.89390230178833, |
|
"logps/rejected": -2.320417642593384, |
|
"loss": 2.8442, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -18.939023971557617, |
|
"rewards/margins": 4.265152454376221, |
|
"rewards/rejected": -23.204177856445312, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.7961549335595137, |
|
"grad_norm": 126.98574065425882, |
|
"learning_rate": 9.722955667232242e-08, |
|
"logits/chosen": -16.467742919921875, |
|
"logits/rejected": -16.47926902770996, |
|
"logps/chosen": -1.5406644344329834, |
|
"logps/rejected": -2.026040554046631, |
|
"loss": 3.5477, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -15.406644821166992, |
|
"rewards/margins": 4.853761672973633, |
|
"rewards/rejected": -20.260406494140625, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.7984167373480351, |
|
"grad_norm": 136.73209013329856, |
|
"learning_rate": 9.517051169918016e-08, |
|
"logits/chosen": -17.076210021972656, |
|
"logits/rejected": -17.026386260986328, |
|
"logps/chosen": -1.750929832458496, |
|
"logps/rejected": -2.2710447311401367, |
|
"loss": 3.3139, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -17.509296417236328, |
|
"rewards/margins": 5.2011494636535645, |
|
"rewards/rejected": -22.71044921875, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.8006785411365565, |
|
"grad_norm": 130.1019327736892, |
|
"learning_rate": 9.313055529813412e-08, |
|
"logits/chosen": -18.05898666381836, |
|
"logits/rejected": -18.257951736450195, |
|
"logps/chosen": -1.6652144193649292, |
|
"logps/rejected": -1.9794695377349854, |
|
"loss": 3.2462, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -16.652145385742188, |
|
"rewards/margins": 3.142549991607666, |
|
"rewards/rejected": -19.794694900512695, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.8029403449250777, |
|
"grad_norm": 135.42727334061703, |
|
"learning_rate": 9.110981521225532e-08, |
|
"logits/chosen": -17.604793548583984, |
|
"logits/rejected": -17.597965240478516, |
|
"logps/chosen": -1.6499242782592773, |
|
"logps/rejected": -2.056095600128174, |
|
"loss": 3.3403, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -16.499242782592773, |
|
"rewards/margins": 4.061714172363281, |
|
"rewards/rejected": -20.560955047607422, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.8052021487135991, |
|
"grad_norm": 119.05199787120601, |
|
"learning_rate": 8.910841798127884e-08, |
|
"logits/chosen": -17.721969604492188, |
|
"logits/rejected": -18.05399513244629, |
|
"logps/chosen": -1.4811893701553345, |
|
"logps/rejected": -1.954594612121582, |
|
"loss": 3.4896, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -14.811893463134766, |
|
"rewards/margins": 4.734054088592529, |
|
"rewards/rejected": -19.545948028564453, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.8074639525021204, |
|
"grad_norm": 115.38476702241651, |
|
"learning_rate": 8.712648893368139e-08, |
|
"logits/chosen": -18.29002571105957, |
|
"logits/rejected": -18.369548797607422, |
|
"logps/chosen": -2.055358409881592, |
|
"logps/rejected": -2.6065731048583984, |
|
"loss": 3.0386, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -20.553585052490234, |
|
"rewards/margins": 5.512145042419434, |
|
"rewards/rejected": -26.065731048583984, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.8097257562906418, |
|
"grad_norm": 136.37770427632142, |
|
"learning_rate": 8.516415217883186e-08, |
|
"logits/chosen": -20.281574249267578, |
|
"logits/rejected": -20.5699405670166, |
|
"logps/chosen": -1.7435851097106934, |
|
"logps/rejected": -2.0679283142089844, |
|
"loss": 3.2075, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -17.435850143432617, |
|
"rewards/margins": 3.2434327602386475, |
|
"rewards/rejected": -20.679283142089844, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.8119875600791632, |
|
"grad_norm": 135.87264464627583, |
|
"learning_rate": 8.32215305992209e-08, |
|
"logits/chosen": -18.004587173461914, |
|
"logits/rejected": -17.975828170776367, |
|
"logps/chosen": -1.6789181232452393, |
|
"logps/rejected": -2.0826172828674316, |
|
"loss": 3.5249, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -16.789180755615234, |
|
"rewards/margins": 4.036990165710449, |
|
"rewards/rejected": -20.826171875, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.8142493638676844, |
|
"grad_norm": 102.16900938424125, |
|
"learning_rate": 8.129874584276448e-08, |
|
"logits/chosen": -19.01247787475586, |
|
"logits/rejected": -18.749008178710938, |
|
"logps/chosen": -1.819298505783081, |
|
"logps/rejected": -2.235302209854126, |
|
"loss": 2.6286, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -18.19298553466797, |
|
"rewards/margins": 4.160037517547607, |
|
"rewards/rejected": -22.353023529052734, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.8165111676562058, |
|
"grad_norm": 118.97964507525137, |
|
"learning_rate": 7.939591831518746e-08, |
|
"logits/chosen": -18.50431251525879, |
|
"logits/rejected": -18.590957641601562, |
|
"logps/chosen": -1.4700491428375244, |
|
"logps/rejected": -1.7618913650512695, |
|
"loss": 3.4084, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -14.700489044189453, |
|
"rewards/margins": 2.9184250831604004, |
|
"rewards/rejected": -17.618913650512695, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.8187729714447272, |
|
"grad_norm": 116.66310673813236, |
|
"learning_rate": 7.751316717248304e-08, |
|
"logits/chosen": -17.54795265197754, |
|
"logits/rejected": -17.81288719177246, |
|
"logps/chosen": -1.8244284391403198, |
|
"logps/rejected": -2.45658016204834, |
|
"loss": 2.5685, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -18.244285583496094, |
|
"rewards/margins": 6.321516990661621, |
|
"rewards/rejected": -24.565799713134766, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.8210347752332485, |
|
"grad_norm": 135.45249586464112, |
|
"learning_rate": 7.565061031345142e-08, |
|
"logits/chosen": -17.44479751586914, |
|
"logits/rejected": -17.825634002685547, |
|
"logps/chosen": -1.5290935039520264, |
|
"logps/rejected": -2.0649290084838867, |
|
"loss": 2.6049, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -15.290933609008789, |
|
"rewards/margins": 5.358358860015869, |
|
"rewards/rejected": -20.6492919921875, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.8232965790217699, |
|
"grad_norm": 156.30830120318217, |
|
"learning_rate": 7.380836437231686e-08, |
|
"logits/chosen": -17.245084762573242, |
|
"logits/rejected": -17.161640167236328, |
|
"logps/chosen": -1.7969930171966553, |
|
"logps/rejected": -2.382586717605591, |
|
"loss": 3.0618, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -17.969932556152344, |
|
"rewards/margins": 5.8559346199035645, |
|
"rewards/rejected": -23.82586669921875, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.8255583828102913, |
|
"grad_norm": 102.01777871530572, |
|
"learning_rate": 7.198654471142371e-08, |
|
"logits/chosen": -15.612630844116211, |
|
"logits/rejected": -15.740878105163574, |
|
"logps/chosen": -1.6975904703140259, |
|
"logps/rejected": -2.106090545654297, |
|
"loss": 2.2693, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -16.97590446472168, |
|
"rewards/margins": 4.084999084472656, |
|
"rewards/rejected": -21.06090545654297, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.8278201865988125, |
|
"grad_norm": 143.32145816638297, |
|
"learning_rate": 7.01852654140132e-08, |
|
"logits/chosen": -16.77008056640625, |
|
"logits/rejected": -16.502208709716797, |
|
"logps/chosen": -1.930238962173462, |
|
"logps/rejected": -2.095974922180176, |
|
"loss": 3.3324, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -19.302391052246094, |
|
"rewards/margins": 1.6573582887649536, |
|
"rewards/rejected": -20.959747314453125, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.8300819903873339, |
|
"grad_norm": 104.17056966782296, |
|
"learning_rate": 6.840463927707833e-08, |
|
"logits/chosen": -19.091691970825195, |
|
"logits/rejected": -18.677135467529297, |
|
"logps/chosen": -1.845261573791504, |
|
"logps/rejected": -2.5466208457946777, |
|
"loss": 2.7259, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -18.45261573791504, |
|
"rewards/margins": 7.0135955810546875, |
|
"rewards/rejected": -25.46621322631836, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.8323437941758552, |
|
"grad_norm": 108.38820652434924, |
|
"learning_rate": 6.664477780430138e-08, |
|
"logits/chosen": -18.874038696289062, |
|
"logits/rejected": -18.718944549560547, |
|
"logps/chosen": -1.7501500844955444, |
|
"logps/rejected": -2.1758456230163574, |
|
"loss": 3.0257, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -17.50149917602539, |
|
"rewards/margins": 4.256957530975342, |
|
"rewards/rejected": -21.758455276489258, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.8346055979643766, |
|
"grad_norm": 132.12208838073096, |
|
"learning_rate": 6.49057911990711e-08, |
|
"logits/chosen": -20.289962768554688, |
|
"logits/rejected": -20.444843292236328, |
|
"logps/chosen": -1.7201656103134155, |
|
"logps/rejected": -2.0844554901123047, |
|
"loss": 3.4713, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -17.201656341552734, |
|
"rewards/margins": 3.642897605895996, |
|
"rewards/rejected": -20.84455108642578, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.836867401752898, |
|
"grad_norm": 109.87950428664602, |
|
"learning_rate": 6.318778835758189e-08, |
|
"logits/chosen": -19.79219627380371, |
|
"logits/rejected": -19.581737518310547, |
|
"logps/chosen": -1.884903907775879, |
|
"logps/rejected": -2.484591007232666, |
|
"loss": 2.5576, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -18.84903907775879, |
|
"rewards/margins": 5.996870994567871, |
|
"rewards/rejected": -24.845909118652344, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8391292055414192, |
|
"grad_norm": 132.43862675142938, |
|
"learning_rate": 6.149087686201433e-08, |
|
"logits/chosen": -17.08002471923828, |
|
"logits/rejected": -17.242450714111328, |
|
"logps/chosen": -1.42905592918396, |
|
"logps/rejected": -1.849015712738037, |
|
"loss": 3.3921, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -14.290557861328125, |
|
"rewards/margins": 4.199598789215088, |
|
"rewards/rejected": -18.490156173706055, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.8413910093299406, |
|
"grad_norm": 125.15917954045561, |
|
"learning_rate": 5.98151629737988e-08, |
|
"logits/chosen": -18.34781265258789, |
|
"logits/rejected": -18.730987548828125, |
|
"logps/chosen": -1.8118157386779785, |
|
"logps/rejected": -2.531665802001953, |
|
"loss": 2.9388, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -18.1181583404541, |
|
"rewards/margins": 7.19849967956543, |
|
"rewards/rejected": -25.31665802001953, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.843652813118462, |
|
"grad_norm": 103.52444879535686, |
|
"learning_rate": 5.816075162696097e-08, |
|
"logits/chosen": -17.558019638061523, |
|
"logits/rejected": -17.425859451293945, |
|
"logps/chosen": -1.3931989669799805, |
|
"logps/rejected": -2.0169644355773926, |
|
"loss": 2.5297, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -13.931989669799805, |
|
"rewards/margins": 6.237652778625488, |
|
"rewards/rejected": -20.16964340209961, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.8459146169069833, |
|
"grad_norm": 96.8291597215048, |
|
"learning_rate": 5.6527746421551046e-08, |
|
"logits/chosen": -19.054040908813477, |
|
"logits/rejected": -18.775854110717773, |
|
"logps/chosen": -1.6858869791030884, |
|
"logps/rejected": -2.047640800476074, |
|
"loss": 3.141, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -16.858869552612305, |
|
"rewards/margins": 3.617537260055542, |
|
"rewards/rejected": -20.47640609741211, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.8481764206955047, |
|
"grad_norm": 111.02813607168243, |
|
"learning_rate": 5.4916249617156064e-08, |
|
"logits/chosen": -18.283065795898438, |
|
"logits/rejected": -17.85355567932129, |
|
"logps/chosen": -1.6720712184906006, |
|
"logps/rejected": -2.2261600494384766, |
|
"loss": 3.0392, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -16.720712661743164, |
|
"rewards/margins": 5.540886402130127, |
|
"rewards/rejected": -22.261598587036133, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.8504382244840261, |
|
"grad_norm": 113.59608952374487, |
|
"learning_rate": 5.332636212649646e-08, |
|
"logits/chosen": -17.41098976135254, |
|
"logits/rejected": -17.46200180053711, |
|
"logps/chosen": -1.504152774810791, |
|
"logps/rejected": -1.9162389039993286, |
|
"loss": 3.0122, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -15.041528701782227, |
|
"rewards/margins": 4.120862007141113, |
|
"rewards/rejected": -19.162389755249023, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.8527000282725473, |
|
"grad_norm": 113.17297439644031, |
|
"learning_rate": 5.17581835091069e-08, |
|
"logits/chosen": -18.82924461364746, |
|
"logits/rejected": -19.1933536529541, |
|
"logps/chosen": -1.8666414022445679, |
|
"logps/rejected": -2.4097957611083984, |
|
"loss": 3.0043, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -18.666414260864258, |
|
"rewards/margins": 5.431545257568359, |
|
"rewards/rejected": -24.097959518432617, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.8549618320610687, |
|
"grad_norm": 127.65744686159971, |
|
"learning_rate": 5.02118119651016e-08, |
|
"logits/chosen": -15.968871116638184, |
|
"logits/rejected": -16.066356658935547, |
|
"logps/chosen": -1.7414379119873047, |
|
"logps/rejected": -2.270836353302002, |
|
"loss": 2.8791, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -17.41438102722168, |
|
"rewards/margins": 5.293981075286865, |
|
"rewards/rejected": -22.708358764648438, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.85722363584959, |
|
"grad_norm": 128.0416423580519, |
|
"learning_rate": 4.868734432902526e-08, |
|
"logits/chosen": -15.94872760772705, |
|
"logits/rejected": -15.913581848144531, |
|
"logps/chosen": -1.6559503078460693, |
|
"logps/rejected": -2.2543857097625732, |
|
"loss": 3.2796, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -16.55950355529785, |
|
"rewards/margins": 5.984354496002197, |
|
"rewards/rejected": -22.543859481811523, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.8594854396381114, |
|
"grad_norm": 137.44029074786243, |
|
"learning_rate": 4.7184876063789134e-08, |
|
"logits/chosen": -16.0161075592041, |
|
"logits/rejected": -16.125635147094727, |
|
"logps/chosen": -1.7773343324661255, |
|
"logps/rejected": -2.296335458755493, |
|
"loss": 3.0373, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -17.773344039916992, |
|
"rewards/margins": 5.190011024475098, |
|
"rewards/rejected": -22.963354110717773, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8617472434266328, |
|
"grad_norm": 97.95472528249057, |
|
"learning_rate": 4.570450125469314e-08, |
|
"logits/chosen": -18.46808624267578, |
|
"logits/rejected": -18.239410400390625, |
|
"logps/chosen": -1.8006447553634644, |
|
"logps/rejected": -2.444537878036499, |
|
"loss": 2.4475, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -18.006446838378906, |
|
"rewards/margins": 6.4389328956604, |
|
"rewards/rejected": -24.44538116455078, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.864009047215154, |
|
"grad_norm": 122.46444272567702, |
|
"learning_rate": 4.424631260353378e-08, |
|
"logits/chosen": -16.532258987426758, |
|
"logits/rejected": -16.949682235717773, |
|
"logps/chosen": -1.4457993507385254, |
|
"logps/rejected": -1.9387412071228027, |
|
"loss": 3.2723, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -14.457992553710938, |
|
"rewards/margins": 4.929420471191406, |
|
"rewards/rejected": -19.387413024902344, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.8662708510036754, |
|
"grad_norm": 134.6321015186439, |
|
"learning_rate": 4.281040142280008e-08, |
|
"logits/chosen": -17.987564086914062, |
|
"logits/rejected": -17.679292678833008, |
|
"logps/chosen": -1.4789788722991943, |
|
"logps/rejected": -1.9366073608398438, |
|
"loss": 2.4358, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -14.789788246154785, |
|
"rewards/margins": 4.5762858390808105, |
|
"rewards/rejected": -19.366071701049805, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.8685326547921968, |
|
"grad_norm": 145.23607203406814, |
|
"learning_rate": 4.1396857629954286e-08, |
|
"logits/chosen": -19.37828254699707, |
|
"logits/rejected": -19.550512313842773, |
|
"logps/chosen": -2.0876235961914062, |
|
"logps/rejected": -2.755703926086426, |
|
"loss": 3.0501, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -20.876237869262695, |
|
"rewards/margins": 6.6808037757873535, |
|
"rewards/rejected": -27.55704116821289, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.8707944585807181, |
|
"grad_norm": 95.46804083715921, |
|
"learning_rate": 4.000576974180232e-08, |
|
"logits/chosen": -17.347396850585938, |
|
"logits/rejected": -17.552875518798828, |
|
"logps/chosen": -1.7520135641098022, |
|
"logps/rejected": -2.1543803215026855, |
|
"loss": 2.6766, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -17.5201358795166, |
|
"rewards/margins": 4.023664474487305, |
|
"rewards/rejected": -21.543800354003906, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.8730562623692395, |
|
"grad_norm": 104.95835540317567, |
|
"learning_rate": 3.8637224868950066e-08, |
|
"logits/chosen": -18.283233642578125, |
|
"logits/rejected": -18.088119506835938, |
|
"logps/chosen": -1.747474193572998, |
|
"logps/rejected": -2.1311683654785156, |
|
"loss": 2.9522, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -17.474742889404297, |
|
"rewards/margins": 3.8369407653808594, |
|
"rewards/rejected": -21.311681747436523, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.8753180661577609, |
|
"grad_norm": 109.46707890564362, |
|
"learning_rate": 3.729130871034885e-08, |
|
"logits/chosen": -17.69164276123047, |
|
"logits/rejected": -17.34153938293457, |
|
"logps/chosen": -1.60804283618927, |
|
"logps/rejected": -2.0555553436279297, |
|
"loss": 2.7504, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -16.080427169799805, |
|
"rewards/margins": 4.475124835968018, |
|
"rewards/rejected": -20.555551528930664, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.8775798699462821, |
|
"grad_norm": 124.79737385782774, |
|
"learning_rate": 3.596810554792888e-08, |
|
"logits/chosen": -19.268070220947266, |
|
"logits/rejected": -19.46062469482422, |
|
"logps/chosen": -2.0554943084716797, |
|
"logps/rejected": -2.616269826889038, |
|
"loss": 3.2245, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -20.554943084716797, |
|
"rewards/margins": 5.607754230499268, |
|
"rewards/rejected": -26.162696838378906, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.8798416737348035, |
|
"grad_norm": 122.92689968096272, |
|
"learning_rate": 3.466769824132116e-08, |
|
"logits/chosen": -19.075674057006836, |
|
"logits/rejected": -19.091054916381836, |
|
"logps/chosen": -1.9592108726501465, |
|
"logps/rejected": -2.5127036571502686, |
|
"loss": 2.9941, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -19.59210968017578, |
|
"rewards/margins": 5.534926414489746, |
|
"rewards/rejected": -25.12703514099121, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.8821034775233249, |
|
"grad_norm": 129.9790697640605, |
|
"learning_rate": 3.339016822266925e-08, |
|
"logits/chosen": -17.7946834564209, |
|
"logits/rejected": -17.704071044921875, |
|
"logps/chosen": -1.8595997095108032, |
|
"logps/rejected": -2.2788615226745605, |
|
"loss": 2.2966, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -18.595996856689453, |
|
"rewards/margins": 4.192615985870361, |
|
"rewards/rejected": -22.788612365722656, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8843652813118462, |
|
"grad_norm": 165.30674333822296, |
|
"learning_rate": 3.213559549152958e-08, |
|
"logits/chosen": -17.560834884643555, |
|
"logits/rejected": -17.52518081665039, |
|
"logps/chosen": -1.4146690368652344, |
|
"logps/rejected": -2.0134286880493164, |
|
"loss": 3.5443, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -14.146690368652344, |
|
"rewards/margins": 5.987596035003662, |
|
"rewards/rejected": -20.134286880493164, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.8866270851003676, |
|
"grad_norm": 125.85896431798938, |
|
"learning_rate": 3.090405860986203e-08, |
|
"logits/chosen": -19.027587890625, |
|
"logits/rejected": -19.275127410888672, |
|
"logps/chosen": -2.2725133895874023, |
|
"logps/rejected": -2.906642436981201, |
|
"loss": 2.978, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -22.72513198852539, |
|
"rewards/margins": 6.3412885665893555, |
|
"rewards/rejected": -29.066425323486328, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.8888888888888888, |
|
"grad_norm": 128.26387031170609, |
|
"learning_rate": 2.9695634697110315e-08, |
|
"logits/chosen": -17.81490135192871, |
|
"logits/rejected": -17.856733322143555, |
|
"logps/chosen": -1.7137458324432373, |
|
"logps/rejected": -2.1935040950775146, |
|
"loss": 3.3315, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -17.13745880126953, |
|
"rewards/margins": 4.797582149505615, |
|
"rewards/rejected": -21.935041427612305, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.8911506926774102, |
|
"grad_norm": 133.54866529087667, |
|
"learning_rate": 2.8510399425372766e-08, |
|
"logits/chosen": -17.018762588500977, |
|
"logits/rejected": -17.156879425048828, |
|
"logps/chosen": -1.6060205698013306, |
|
"logps/rejected": -2.3053719997406006, |
|
"loss": 2.6726, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -16.060205459594727, |
|
"rewards/margins": 6.993513584136963, |
|
"rewards/rejected": -23.05371856689453, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.8934124964659316, |
|
"grad_norm": 133.68043982311156, |
|
"learning_rate": 2.734842701466329e-08, |
|
"logits/chosen": -19.136150360107422, |
|
"logits/rejected": -19.161407470703125, |
|
"logps/chosen": -1.6094989776611328, |
|
"logps/rejected": -1.981292486190796, |
|
"loss": 3.0683, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -16.094987869262695, |
|
"rewards/margins": 3.7179362773895264, |
|
"rewards/rejected": -19.812923431396484, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.8956743002544529, |
|
"grad_norm": 135.95939370288835, |
|
"learning_rate": 2.6209790228264438e-08, |
|
"logits/chosen": -17.601152420043945, |
|
"logits/rejected": -17.81585693359375, |
|
"logps/chosen": -2.0195772647857666, |
|
"logps/rejected": -2.464128017425537, |
|
"loss": 3.0865, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -20.19577407836914, |
|
"rewards/margins": 4.445508003234863, |
|
"rewards/rejected": -24.64128303527832, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.8979361040429743, |
|
"grad_norm": 113.2954374562523, |
|
"learning_rate": 2.5094560368170305e-08, |
|
"logits/chosen": -17.47164535522461, |
|
"logits/rejected": -17.385438919067383, |
|
"logps/chosen": -1.6676338911056519, |
|
"logps/rejected": -2.1376919746398926, |
|
"loss": 2.9399, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -16.676340103149414, |
|
"rewards/margins": 4.7005791664123535, |
|
"rewards/rejected": -21.376916885375977, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.9001979078314957, |
|
"grad_norm": 109.04886917574666, |
|
"learning_rate": 2.4002807270621893e-08, |
|
"logits/chosen": -19.112560272216797, |
|
"logits/rejected": -19.00596046447754, |
|
"logps/chosen": -1.6903434991836548, |
|
"logps/rejected": -2.2326083183288574, |
|
"loss": 2.8406, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -16.90343475341797, |
|
"rewards/margins": 5.422649383544922, |
|
"rewards/rejected": -22.32608413696289, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.9024597116200169, |
|
"grad_norm": 118.17958359886174, |
|
"learning_rate": 2.293459930173354e-08, |
|
"logits/chosen": -19.049705505371094, |
|
"logits/rejected": -19.230878829956055, |
|
"logps/chosen": -1.889772891998291, |
|
"logps/rejected": -2.3055403232574463, |
|
"loss": 3.2104, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -18.897727966308594, |
|
"rewards/margins": 4.157675743103027, |
|
"rewards/rejected": -23.055404663085938, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.9047215154085383, |
|
"grad_norm": 118.64459034636026, |
|
"learning_rate": 2.189000335321256e-08, |
|
"logits/chosen": -16.958843231201172, |
|
"logits/rejected": -16.984729766845703, |
|
"logps/chosen": -1.7274019718170166, |
|
"logps/rejected": -2.112072467803955, |
|
"loss": 3.2286, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -17.274019241333008, |
|
"rewards/margins": 3.8467049598693848, |
|
"rewards/rejected": -21.120725631713867, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9069833191970597, |
|
"grad_norm": 136.85158094391608, |
|
"learning_rate": 2.086908483816954e-08, |
|
"logits/chosen": -18.37076187133789, |
|
"logits/rejected": -18.361103057861328, |
|
"logps/chosen": -2.0497868061065674, |
|
"logps/rejected": -2.2769107818603516, |
|
"loss": 3.0689, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -20.497867584228516, |
|
"rewards/margins": 2.2712390422821045, |
|
"rewards/rejected": -22.769105911254883, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.909245122985581, |
|
"grad_norm": 123.9239261032842, |
|
"learning_rate": 1.9871907687022717e-08, |
|
"logits/chosen": -16.64785385131836, |
|
"logits/rejected": -16.638736724853516, |
|
"logps/chosen": -1.5310957431793213, |
|
"logps/rejected": -2.1431174278259277, |
|
"loss": 3.429, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -15.310956954956055, |
|
"rewards/margins": 6.120217323303223, |
|
"rewards/rejected": -21.43117332458496, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.9115069267741024, |
|
"grad_norm": 112.34416737954284, |
|
"learning_rate": 1.889853434349451e-08, |
|
"logits/chosen": -18.59053611755371, |
|
"logits/rejected": -18.618457794189453, |
|
"logps/chosen": -1.6284946203231812, |
|
"logps/rejected": -2.1159729957580566, |
|
"loss": 3.0198, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -16.28494644165039, |
|
"rewards/margins": 4.874783992767334, |
|
"rewards/rejected": -21.159730911254883, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.9137687305626236, |
|
"grad_norm": 124.44884198830263, |
|
"learning_rate": 1.7949025760701164e-08, |
|
"logits/chosen": -18.346927642822266, |
|
"logits/rejected": -17.982345581054688, |
|
"logps/chosen": -1.8247474431991577, |
|
"logps/rejected": -2.060636043548584, |
|
"loss": 3.2447, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -18.247474670410156, |
|
"rewards/margins": 2.358887195587158, |
|
"rewards/rejected": -20.606359481811523, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.916030534351145, |
|
"grad_norm": 98.85780040911116, |
|
"learning_rate": 1.7023441397336023e-08, |
|
"logits/chosen": -16.48066520690918, |
|
"logits/rejected": -16.472976684570312, |
|
"logps/chosen": -1.3015496730804443, |
|
"logps/rejected": -2.006908416748047, |
|
"loss": 3.0345, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -13.015497207641602, |
|
"rewards/margins": 7.053586006164551, |
|
"rewards/rejected": -20.069082260131836, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.9182923381396664, |
|
"grad_norm": 155.0460005854357, |
|
"learning_rate": 1.6121839213945854e-08, |
|
"logits/chosen": -19.387483596801758, |
|
"logits/rejected": -19.041393280029297, |
|
"logps/chosen": -1.9884074926376343, |
|
"logps/rejected": -2.7867484092712402, |
|
"loss": 2.8585, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -19.884077072143555, |
|
"rewards/margins": 7.983407974243164, |
|
"rewards/rejected": -27.867483139038086, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.9205541419281877, |
|
"grad_norm": 115.17173619747005, |
|
"learning_rate": 1.5244275669301777e-08, |
|
"logits/chosen": -18.749929428100586, |
|
"logits/rejected": -18.742963790893555, |
|
"logps/chosen": -1.801578164100647, |
|
"logps/rejected": -2.3081681728363037, |
|
"loss": 2.7902, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -18.015783309936523, |
|
"rewards/margins": 5.06589937210083, |
|
"rewards/rejected": -23.081682205200195, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.9228159457167091, |
|
"grad_norm": 128.6499296049485, |
|
"learning_rate": 1.4390805716863398e-08, |
|
"logits/chosen": -15.303824424743652, |
|
"logits/rejected": -15.512129783630371, |
|
"logps/chosen": -1.629596471786499, |
|
"logps/rejected": -2.028701066970825, |
|
"loss": 3.3744, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -16.29596519470215, |
|
"rewards/margins": 3.991044521331787, |
|
"rewards/rejected": -20.287010192871094, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.9250777495052305, |
|
"grad_norm": 119.27282282658015, |
|
"learning_rate": 1.3561482801337908e-08, |
|
"logits/chosen": -20.444072723388672, |
|
"logits/rejected": -20.429006576538086, |
|
"logps/chosen": -1.7733900547027588, |
|
"logps/rejected": -2.1421661376953125, |
|
"loss": 2.9589, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -17.73390007019043, |
|
"rewards/margins": 3.687760353088379, |
|
"rewards/rejected": -21.421661376953125, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.9273395532937517, |
|
"grad_norm": 128.38615627616883, |
|
"learning_rate": 1.2756358855332904e-08, |
|
"logits/chosen": -19.65103530883789, |
|
"logits/rejected": -19.755090713500977, |
|
"logps/chosen": -2.041804552078247, |
|
"logps/rejected": -2.587003231048584, |
|
"loss": 3.5126, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -20.418046951293945, |
|
"rewards/margins": 5.45198917388916, |
|
"rewards/rejected": -25.87003517150879, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.9296013570822731, |
|
"grad_norm": 124.92752994294887, |
|
"learning_rate": 1.1975484296105154e-08, |
|
"logits/chosen": -18.58397102355957, |
|
"logits/rejected": -18.57633399963379, |
|
"logps/chosen": -1.8586208820343018, |
|
"logps/rejected": -2.2781119346618652, |
|
"loss": 2.6842, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -18.58620834350586, |
|
"rewards/margins": 4.194911956787109, |
|
"rewards/rejected": -22.78112030029297, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.9318631608707945, |
|
"grad_norm": 125.7273927211634, |
|
"learning_rate": 1.1218908022402374e-08, |
|
"logits/chosen": -17.77056121826172, |
|
"logits/rejected": -17.610393524169922, |
|
"logps/chosen": -1.4044468402862549, |
|
"logps/rejected": -1.957669973373413, |
|
"loss": 2.8821, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -14.04446792602539, |
|
"rewards/margins": 5.532229900360107, |
|
"rewards/rejected": -19.576698303222656, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.9341249646593158, |
|
"grad_norm": 126.86404645931887, |
|
"learning_rate": 1.0486677411402079e-08, |
|
"logits/chosen": -18.397602081298828, |
|
"logits/rejected": -18.570650100708008, |
|
"logps/chosen": -1.7708725929260254, |
|
"logps/rejected": -2.3376171588897705, |
|
"loss": 3.1541, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -17.708724975585938, |
|
"rewards/margins": 5.66744327545166, |
|
"rewards/rejected": -23.376169204711914, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.9363867684478372, |
|
"grad_norm": 124.4303497435617, |
|
"learning_rate": 9.778838315744353e-09, |
|
"logits/chosen": -18.644079208374023, |
|
"logits/rejected": -18.6198673248291, |
|
"logps/chosen": -1.7886202335357666, |
|
"logps/rejected": -2.106006383895874, |
|
"loss": 3.3825, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -17.88620376586914, |
|
"rewards/margins": 3.1738624572753906, |
|
"rewards/rejected": -21.06006622314453, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.9386485722363584, |
|
"grad_norm": 118.54959081850222, |
|
"learning_rate": 9.095435060660595e-09, |
|
"logits/chosen": -18.39704132080078, |
|
"logits/rejected": -18.45808982849121, |
|
"logps/chosen": -1.6519393920898438, |
|
"logps/rejected": -1.9818757772445679, |
|
"loss": 3.0169, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -16.519393920898438, |
|
"rewards/margins": 3.299362897872925, |
|
"rewards/rejected": -19.818758010864258, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.9409103760248798, |
|
"grad_norm": 115.40228572150829, |
|
"learning_rate": 8.436510441197864e-09, |
|
"logits/chosen": -20.38088607788086, |
|
"logits/rejected": -20.029354095458984, |
|
"logps/chosen": -1.6879628896713257, |
|
"logps/rejected": -1.8838168382644653, |
|
"loss": 3.2592, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -16.879629135131836, |
|
"rewards/margins": 1.9585394859313965, |
|
"rewards/rejected": -18.83816909790039, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.9431721798134012, |
|
"grad_norm": 167.10931134069494, |
|
"learning_rate": 7.802105719539076e-09, |
|
"logits/chosen": -18.494495391845703, |
|
"logits/rejected": -18.538818359375, |
|
"logps/chosen": -1.9037325382232666, |
|
"logps/rejected": -2.5261826515197754, |
|
"loss": 3.6266, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -19.037324905395508, |
|
"rewards/margins": 6.224499225616455, |
|
"rewards/rejected": -25.261825561523438, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.9454339836019225, |
|
"grad_norm": 119.62949752586249, |
|
"learning_rate": 7.1922606224192e-09, |
|
"logits/chosen": -18.683055877685547, |
|
"logits/rejected": -18.973587036132812, |
|
"logps/chosen": -1.7631547451019287, |
|
"logps/rejected": -2.3043551445007324, |
|
"loss": 2.9354, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -17.631547927856445, |
|
"rewards/margins": 5.4120049476623535, |
|
"rewards/rejected": -23.043554306030273, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.9476957873904439, |
|
"grad_norm": 110.69406219801077, |
|
"learning_rate": 6.6070133386372906e-09, |
|
"logits/chosen": -16.96223258972168, |
|
"logits/rejected": -17.232624053955078, |
|
"logps/chosen": -1.7214587926864624, |
|
"logps/rejected": -2.0780019760131836, |
|
"loss": 3.1999, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -17.214588165283203, |
|
"rewards/margins": 3.5654308795928955, |
|
"rewards/rejected": -20.780017852783203, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.9499575911789653, |
|
"grad_norm": 133.93900888371826, |
|
"learning_rate": 6.046400516665384e-09, |
|
"logits/chosen": -18.921737670898438, |
|
"logits/rejected": -19.057086944580078, |
|
"logps/chosen": -1.8862426280975342, |
|
"logps/rejected": -2.456373453140259, |
|
"loss": 3.1087, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -18.8624267578125, |
|
"rewards/margins": 5.70130729675293, |
|
"rewards/rejected": -24.56373405456543, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9522193949674865, |
|
"grad_norm": 116.67059208076354, |
|
"learning_rate": 5.510457262353396e-09, |
|
"logits/chosen": -18.74356460571289, |
|
"logits/rejected": -18.647714614868164, |
|
"logps/chosen": -1.5839942693710327, |
|
"logps/rejected": -2.068876028060913, |
|
"loss": 3.0213, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -15.839942932128906, |
|
"rewards/margins": 4.848816871643066, |
|
"rewards/rejected": -20.68876075744629, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.9544811987560079, |
|
"grad_norm": 137.6503435987508, |
|
"learning_rate": 4.9992171367309265e-09, |
|
"logits/chosen": -17.830699920654297, |
|
"logits/rejected": -17.30995750427246, |
|
"logps/chosen": -1.6017370223999023, |
|
"logps/rejected": -2.2726082801818848, |
|
"loss": 2.7492, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -16.017372131347656, |
|
"rewards/margins": 6.708712577819824, |
|
"rewards/rejected": -22.726083755493164, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.9567430025445293, |
|
"grad_norm": 122.50350701504888, |
|
"learning_rate": 4.5127121539052955e-09, |
|
"logits/chosen": -18.987272262573242, |
|
"logits/rejected": -18.7191162109375, |
|
"logps/chosen": -1.7801018953323364, |
|
"logps/rejected": -2.5131754875183105, |
|
"loss": 2.6308, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -17.80101776123047, |
|
"rewards/margins": 7.330737590789795, |
|
"rewards/rejected": -25.131757736206055, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.9590048063330506, |
|
"grad_norm": 105.12784722468204, |
|
"learning_rate": 4.050972779057327e-09, |
|
"logits/chosen": -17.278427124023438, |
|
"logits/rejected": -17.121200561523438, |
|
"logps/chosen": -1.702017903327942, |
|
"logps/rejected": -2.172736883163452, |
|
"loss": 2.6915, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -17.020179748535156, |
|
"rewards/margins": 4.707189559936523, |
|
"rewards/rejected": -21.727367401123047, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.961266610121572, |
|
"grad_norm": 122.22803042526128, |
|
"learning_rate": 3.6140279265330477e-09, |
|
"logits/chosen": -18.193286895751953, |
|
"logits/rejected": -17.90346908569336, |
|
"logps/chosen": -1.8119601011276245, |
|
"logps/rejected": -2.272505283355713, |
|
"loss": 2.935, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -18.119600296020508, |
|
"rewards/margins": 4.605450630187988, |
|
"rewards/rejected": -22.725051879882812, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.9635284139100933, |
|
"grad_norm": 140.15479399613614, |
|
"learning_rate": 3.2019049580335853e-09, |
|
"logits/chosen": -17.40700340270996, |
|
"logits/rejected": -17.39166259765625, |
|
"logps/chosen": -1.8650894165039062, |
|
"logps/rejected": -2.274355888366699, |
|
"loss": 3.5647, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -18.65089225769043, |
|
"rewards/margins": 4.092666149139404, |
|
"rewards/rejected": -22.743558883666992, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.9657902176986146, |
|
"grad_norm": 102.93156958129578, |
|
"learning_rate": 2.814629680901337e-09, |
|
"logits/chosen": -19.251096725463867, |
|
"logits/rejected": -19.292316436767578, |
|
"logps/chosen": -1.6867254972457886, |
|
"logps/rejected": -2.0900285243988037, |
|
"loss": 2.4974, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -16.86725425720215, |
|
"rewards/margins": 4.0330305099487305, |
|
"rewards/rejected": -20.900283813476562, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.968052021487136, |
|
"grad_norm": 111.34250544518655, |
|
"learning_rate": 2.4522263465041937e-09, |
|
"logits/chosen": -19.024517059326172, |
|
"logits/rejected": -18.74802017211914, |
|
"logps/chosen": -2.0575406551361084, |
|
"logps/rejected": -2.7982211112976074, |
|
"loss": 2.5955, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -20.575408935546875, |
|
"rewards/margins": 7.406803131103516, |
|
"rewards/rejected": -27.982210159301758, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.9703138252756573, |
|
"grad_norm": 98.51128989688017, |
|
"learning_rate": 2.114717648716713e-09, |
|
"logits/chosen": -16.984386444091797, |
|
"logits/rejected": -16.8139591217041, |
|
"logps/chosen": -1.8296539783477783, |
|
"logps/rejected": -2.573110818862915, |
|
"loss": 3.0284, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -18.296539306640625, |
|
"rewards/margins": 7.434567451477051, |
|
"rewards/rejected": -25.731107711791992, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.9725756290641787, |
|
"grad_norm": 124.4940263604112, |
|
"learning_rate": 1.802124722499121e-09, |
|
"logits/chosen": -18.865802764892578, |
|
"logits/rejected": -18.73249626159668, |
|
"logps/chosen": -1.7756928205490112, |
|
"logps/rejected": -2.6194663047790527, |
|
"loss": 2.5879, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -17.75693130493164, |
|
"rewards/margins": 8.437736511230469, |
|
"rewards/rejected": -26.194665908813477, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9748374328527001, |
|
"grad_norm": 119.13158902037044, |
|
"learning_rate": 1.5144671425737499e-09, |
|
"logits/chosen": -17.51629638671875, |
|
"logits/rejected": -17.642141342163086, |
|
"logps/chosen": -1.799952745437622, |
|
"logps/rejected": -2.451775550842285, |
|
"loss": 3.0634, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -17.999526977539062, |
|
"rewards/margins": 6.518229007720947, |
|
"rewards/rejected": -24.517757415771484, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.9770992366412213, |
|
"grad_norm": 100.6745496186136, |
|
"learning_rate": 1.251762922199484e-09, |
|
"logits/chosen": -18.572729110717773, |
|
"logits/rejected": -19.301191329956055, |
|
"logps/chosen": -1.8852096796035767, |
|
"logps/rejected": -2.454303503036499, |
|
"loss": 2.4105, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -18.852096557617188, |
|
"rewards/margins": 5.690939903259277, |
|
"rewards/rejected": -24.54303550720215, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.9793610404297427, |
|
"grad_norm": 119.30288980428828, |
|
"learning_rate": 1.0140285120433744e-09, |
|
"logits/chosen": -18.9143009185791, |
|
"logits/rejected": -18.95807456970215, |
|
"logps/chosen": -1.8828755617141724, |
|
"logps/rejected": -2.437493085861206, |
|
"loss": 3.3873, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -18.828754425048828, |
|
"rewards/margins": 5.546175003051758, |
|
"rewards/rejected": -24.374929428100586, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.9816228442182641, |
|
"grad_norm": 119.93045050852022, |
|
"learning_rate": 8.012787991508396e-10, |
|
"logits/chosen": -18.035734176635742, |
|
"logits/rejected": -17.416671752929688, |
|
"logps/chosen": -1.7183349132537842, |
|
"logps/rejected": -2.451599359512329, |
|
"loss": 2.4103, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -17.183349609375, |
|
"rewards/margins": 7.332643508911133, |
|
"rewards/rejected": -24.5159912109375, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.9838846480067854, |
|
"grad_norm": 127.1741271306872, |
|
"learning_rate": 6.135271060133007e-10, |
|
"logits/chosen": -17.5001277923584, |
|
"logits/rejected": -17.65492057800293, |
|
"logps/chosen": -1.74495530128479, |
|
"logps/rejected": -2.323106527328491, |
|
"loss": 3.0668, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -17.449552536010742, |
|
"rewards/margins": 5.781513214111328, |
|
"rewards/rejected": -23.23106575012207, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.9861464517953068, |
|
"grad_norm": 115.3718910974279, |
|
"learning_rate": 4.50785189733871e-10, |
|
"logits/chosen": -17.362075805664062, |
|
"logits/rejected": -17.160686492919922, |
|
"logps/chosen": -1.3833808898925781, |
|
"logps/rejected": -1.7379635572433472, |
|
"loss": 2.7748, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -13.833809852600098, |
|
"rewards/margins": 3.5458261966705322, |
|
"rewards/rejected": -17.379636764526367, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.988408255583828, |
|
"grad_norm": 110.49410393455729, |
|
"learning_rate": 3.1306324129118935e-10, |
|
"logits/chosen": -17.78763198852539, |
|
"logits/rejected": -17.5814151763916, |
|
"logps/chosen": -1.6376947164535522, |
|
"logps/rejected": -2.1998562812805176, |
|
"loss": 3.0113, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -16.3769474029541, |
|
"rewards/margins": 5.621615886688232, |
|
"rewards/rejected": -21.99856185913086, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.9906700593723494, |
|
"grad_norm": 142.4763338483451, |
|
"learning_rate": 2.003698849011748e-10, |
|
"logits/chosen": -19.646331787109375, |
|
"logits/rejected": -19.66240119934082, |
|
"logps/chosen": -2.0467026233673096, |
|
"logps/rejected": -2.477294921875, |
|
"loss": 3.3739, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -20.467025756835938, |
|
"rewards/margins": 4.305922985076904, |
|
"rewards/rejected": -24.772947311401367, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.9929318631608708, |
|
"grad_norm": 130.7419382757851, |
|
"learning_rate": 1.1271217747714779e-10, |
|
"logits/chosen": -17.93435287475586, |
|
"logits/rejected": -17.90981674194336, |
|
"logps/chosen": -1.883331298828125, |
|
"logps/rejected": -2.1619515419006348, |
|
"loss": 3.3682, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -18.83331298828125, |
|
"rewards/margins": 2.786202907562256, |
|
"rewards/rejected": -21.619516372680664, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.9951936669493922, |
|
"grad_norm": 125.07489041195862, |
|
"learning_rate": 5.0095608187739055e-11, |
|
"logits/chosen": -19.032190322875977, |
|
"logits/rejected": -19.182344436645508, |
|
"logps/chosen": -1.578109622001648, |
|
"logps/rejected": -1.948418378829956, |
|
"loss": 2.7569, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -15.781095504760742, |
|
"rewards/margins": 3.703087329864502, |
|
"rewards/rejected": -19.48418426513672, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9974554707379135, |
|
"grad_norm": 119.11218694159568, |
|
"learning_rate": 1.2524098113209092e-11, |
|
"logits/chosen": -16.846660614013672, |
|
"logits/rejected": -17.356082916259766, |
|
"logps/chosen": -1.736297845840454, |
|
"logps/rejected": -2.1138105392456055, |
|
"loss": 3.4049, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -17.36298179626465, |
|
"rewards/margins": 3.7751266956329346, |
|
"rewards/rejected": -21.138107299804688, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.9997172745264349, |
|
"grad_norm": 120.08290315715726, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -18.770984649658203, |
|
"logits/rejected": -18.760494232177734, |
|
"logps/chosen": -1.659979224205017, |
|
"logps/rejected": -2.181823492050171, |
|
"loss": 2.8512, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -16.59979248046875, |
|
"rewards/margins": 5.218443393707275, |
|
"rewards/rejected": -21.818235397338867, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.9997172745264349, |
|
"step": 442, |
|
"total_flos": 227674672136192.0, |
|
"train_loss": 0.0, |
|
"train_runtime": 1.6273, |
|
"train_samples_per_second": 34774.982, |
|
"train_steps_per_second": 271.612 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 442, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 227674672136192.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|