|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 100, |
|
"global_step": 2907, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.7182130584192438e-09, |
|
"logits/chosen": -2.7645790576934814, |
|
"logits/rejected": -2.8125059604644775, |
|
"logps/chosen": -113.67314910888672, |
|
"logps/rejected": -132.0498504638672, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.718213058419244e-08, |
|
"logits/chosen": -2.9990971088409424, |
|
"logits/rejected": -3.0227837562561035, |
|
"logps/chosen": -281.044921875, |
|
"logps/rejected": -247.3936309814453, |
|
"loss": 0.6906, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.004584211856126785, |
|
"rewards/margins": 0.00850469246506691, |
|
"rewards/rejected": -0.0039204806089401245, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.436426116838488e-08, |
|
"logits/chosen": -2.88598895072937, |
|
"logits/rejected": -2.917177200317383, |
|
"logps/chosen": -359.26177978515625, |
|
"logps/rejected": -298.42877197265625, |
|
"loss": 0.6845, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.05283154919743538, |
|
"rewards/margins": 0.018162177875638008, |
|
"rewards/rejected": 0.03466937318444252, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.154639175257731e-08, |
|
"logits/chosen": -2.9570868015289307, |
|
"logits/rejected": -2.9609949588775635, |
|
"logps/chosen": -326.2544860839844, |
|
"logps/rejected": -289.9393615722656, |
|
"loss": 0.6564, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.1515582799911499, |
|
"rewards/margins": 0.05830109864473343, |
|
"rewards/rejected": 0.09325718879699707, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.872852233676976e-08, |
|
"logits/chosen": -2.9531846046447754, |
|
"logits/rejected": -2.955566883087158, |
|
"logps/chosen": -376.5739440917969, |
|
"logps/rejected": -331.3490295410156, |
|
"loss": 0.6444, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.3529122769832611, |
|
"rewards/margins": 0.1296483278274536, |
|
"rewards/rejected": 0.2232639044523239, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.59106529209622e-08, |
|
"logits/chosen": -2.8878796100616455, |
|
"logits/rejected": -2.9229512214660645, |
|
"logps/chosen": -427.5284118652344, |
|
"logps/rejected": -266.94415283203125, |
|
"loss": 0.6204, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.5041142702102661, |
|
"rewards/margins": 0.29676300287246704, |
|
"rewards/rejected": 0.20735123753547668, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.0309278350515462e-07, |
|
"logits/chosen": -2.92777943611145, |
|
"logits/rejected": -2.9371728897094727, |
|
"logps/chosen": -318.0414123535156, |
|
"logps/rejected": -265.1334228515625, |
|
"loss": 0.6105, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.38802462816238403, |
|
"rewards/margins": 0.31535086035728455, |
|
"rewards/rejected": 0.0726737454533577, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.202749140893471e-07, |
|
"logits/chosen": -2.9851737022399902, |
|
"logits/rejected": -3.0005269050598145, |
|
"logps/chosen": -404.5143127441406, |
|
"logps/rejected": -300.8736572265625, |
|
"loss": 0.6069, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.5573440790176392, |
|
"rewards/margins": 0.41277560591697693, |
|
"rewards/rejected": 0.14456847310066223, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.3745704467353952e-07, |
|
"logits/chosen": -2.96441912651062, |
|
"logits/rejected": -2.968987464904785, |
|
"logps/chosen": -314.7437438964844, |
|
"logps/rejected": -254.7586669921875, |
|
"loss": 0.5569, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.45461219549179077, |
|
"rewards/margins": 0.5670984983444214, |
|
"rewards/rejected": -0.1124863252043724, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.5463917525773197e-07, |
|
"logits/chosen": -3.021219491958618, |
|
"logits/rejected": -3.0178027153015137, |
|
"logps/chosen": -308.31585693359375, |
|
"logps/rejected": -257.63250732421875, |
|
"loss": 0.5296, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.5257282257080078, |
|
"rewards/margins": 0.7057730555534363, |
|
"rewards/rejected": -0.18004484474658966, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.718213058419244e-07, |
|
"logits/chosen": -2.8890886306762695, |
|
"logits/rejected": -2.896449327468872, |
|
"logps/chosen": -375.84564208984375, |
|
"logps/rejected": -241.34219360351562, |
|
"loss": 0.5504, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.6213200092315674, |
|
"rewards/margins": 0.8095133900642395, |
|
"rewards/rejected": -0.18819323182106018, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_logits/chosen": -2.936641216278076, |
|
"eval_logits/rejected": -2.935973882675171, |
|
"eval_logps/chosen": -361.9043884277344, |
|
"eval_logps/rejected": -293.7761535644531, |
|
"eval_loss": 0.5406630635261536, |
|
"eval_rewards/accuracies": 0.7579365372657776, |
|
"eval_rewards/chosen": 0.5287383794784546, |
|
"eval_rewards/margins": 0.7097563743591309, |
|
"eval_rewards/rejected": -0.18101799488067627, |
|
"eval_runtime": 163.7175, |
|
"eval_samples_per_second": 12.216, |
|
"eval_steps_per_second": 0.385, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.8900343642611682e-07, |
|
"logits/chosen": -2.917739152908325, |
|
"logits/rejected": -2.8890061378479004, |
|
"logps/chosen": -334.1250305175781, |
|
"logps/rejected": -331.29571533203125, |
|
"loss": 0.5741, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.4206802248954773, |
|
"rewards/margins": 0.6734089851379395, |
|
"rewards/rejected": -0.25272876024246216, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.0618556701030925e-07, |
|
"logits/chosen": -2.9665865898132324, |
|
"logits/rejected": -2.970818519592285, |
|
"logps/chosen": -386.2568664550781, |
|
"logps/rejected": -280.7279357910156, |
|
"loss": 0.5533, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.3308308720588684, |
|
"rewards/margins": 0.6611676216125488, |
|
"rewards/rejected": -0.33033671975135803, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.2336769759450173e-07, |
|
"logits/chosen": -3.012545347213745, |
|
"logits/rejected": -2.9925591945648193, |
|
"logps/chosen": -353.75469970703125, |
|
"logps/rejected": -290.1478576660156, |
|
"loss": 0.5447, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.47041910886764526, |
|
"rewards/margins": 0.7254467010498047, |
|
"rewards/rejected": -0.2550275921821594, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.405498281786942e-07, |
|
"logits/chosen": -2.9241251945495605, |
|
"logits/rejected": -2.9576869010925293, |
|
"logps/chosen": -329.7611389160156, |
|
"logps/rejected": -265.63006591796875, |
|
"loss": 0.5113, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.4418914318084717, |
|
"rewards/margins": 0.7908871173858643, |
|
"rewards/rejected": -0.3489956259727478, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.5773195876288655e-07, |
|
"logits/chosen": -2.9687321186065674, |
|
"logits/rejected": -2.9832406044006348, |
|
"logps/chosen": -331.42669677734375, |
|
"logps/rejected": -269.3779296875, |
|
"loss": 0.5387, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.38882407546043396, |
|
"rewards/margins": 0.8327142000198364, |
|
"rewards/rejected": -0.4438902735710144, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.7491408934707903e-07, |
|
"logits/chosen": -2.9920172691345215, |
|
"logits/rejected": -3.013425827026367, |
|
"logps/chosen": -383.51934814453125, |
|
"logps/rejected": -297.9476318359375, |
|
"loss": 0.5083, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.6878620982170105, |
|
"rewards/margins": 1.0685365200042725, |
|
"rewards/rejected": -0.3806745111942291, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.9209621993127146e-07, |
|
"logits/chosen": -2.9233837127685547, |
|
"logits/rejected": -2.9321510791778564, |
|
"logps/chosen": -339.95745849609375, |
|
"logps/rejected": -280.793701171875, |
|
"loss": 0.5131, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.5272036194801331, |
|
"rewards/margins": 0.8315987586975098, |
|
"rewards/rejected": -0.3043951690196991, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.0927835051546394e-07, |
|
"logits/chosen": -2.9920477867126465, |
|
"logits/rejected": -2.9811954498291016, |
|
"logps/chosen": -265.2094421386719, |
|
"logps/rejected": -254.6926727294922, |
|
"loss": 0.504, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.177840456366539, |
|
"rewards/margins": 0.8774341344833374, |
|
"rewards/rejected": -0.699593722820282, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.2646048109965636e-07, |
|
"logits/chosen": -3.0027570724487305, |
|
"logits/rejected": -2.987896203994751, |
|
"logps/chosen": -330.7102966308594, |
|
"logps/rejected": -239.6572723388672, |
|
"loss": 0.5611, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.5821124315261841, |
|
"rewards/margins": 1.141722559928894, |
|
"rewards/rejected": -0.5596100687980652, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.436426116838488e-07, |
|
"logits/chosen": -3.048879384994507, |
|
"logits/rejected": -2.9993340969085693, |
|
"logps/chosen": -266.72430419921875, |
|
"logps/rejected": -187.27467346191406, |
|
"loss": 0.541, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.2961394786834717, |
|
"rewards/margins": 0.8773609399795532, |
|
"rewards/rejected": -0.5812214612960815, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": -2.980220317840576, |
|
"eval_logits/rejected": -2.9785656929016113, |
|
"eval_logps/chosen": -360.50030517578125, |
|
"eval_logps/rejected": -297.53515625, |
|
"eval_loss": 0.5220658779144287, |
|
"eval_rewards/accuracies": 0.7698412537574768, |
|
"eval_rewards/chosen": 0.6691505908966064, |
|
"eval_rewards/margins": 1.2260682582855225, |
|
"eval_rewards/rejected": -0.5569177269935608, |
|
"eval_runtime": 163.6147, |
|
"eval_samples_per_second": 12.224, |
|
"eval_steps_per_second": 0.385, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.608247422680412e-07, |
|
"logits/chosen": -2.9797844886779785, |
|
"logits/rejected": -2.9449918270111084, |
|
"logps/chosen": -364.26287841796875, |
|
"logps/rejected": -251.58901977539062, |
|
"loss": 0.4772, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.5583639740943909, |
|
"rewards/margins": 1.3936102390289307, |
|
"rewards/rejected": -0.835246205329895, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.7800687285223364e-07, |
|
"logits/chosen": -2.9557044506073, |
|
"logits/rejected": -2.9637341499328613, |
|
"logps/chosen": -261.53216552734375, |
|
"logps/rejected": -271.5208740234375, |
|
"loss": 0.5707, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.367598295211792, |
|
"rewards/margins": 1.1545054912567139, |
|
"rewards/rejected": -0.7869071960449219, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.9518900343642607e-07, |
|
"logits/chosen": -3.0034899711608887, |
|
"logits/rejected": -2.991698980331421, |
|
"logps/chosen": -308.8106689453125, |
|
"logps/rejected": -278.55950927734375, |
|
"loss": 0.5827, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.4888441562652588, |
|
"rewards/margins": 1.2474777698516846, |
|
"rewards/rejected": -0.7586336731910706, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.123711340206185e-07, |
|
"logits/chosen": -3.0844597816467285, |
|
"logits/rejected": -3.0464837551116943, |
|
"logps/chosen": -385.8021545410156, |
|
"logps/rejected": -253.19869995117188, |
|
"loss": 0.4898, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.5026682019233704, |
|
"rewards/margins": 1.1020526885986328, |
|
"rewards/rejected": -0.5993844270706177, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.2955326460481097e-07, |
|
"logits/chosen": -3.0472395420074463, |
|
"logits/rejected": -3.0599236488342285, |
|
"logps/chosen": -341.8814697265625, |
|
"logps/rejected": -295.29437255859375, |
|
"loss": 0.5395, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.24311120808124542, |
|
"rewards/margins": 0.9385444521903992, |
|
"rewards/rejected": -0.6954333186149597, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.4673539518900345e-07, |
|
"logits/chosen": -3.0285518169403076, |
|
"logits/rejected": -3.0690500736236572, |
|
"logps/chosen": -353.20074462890625, |
|
"logps/rejected": -244.77041625976562, |
|
"loss": 0.6312, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.22555696964263916, |
|
"rewards/margins": 0.8821722269058228, |
|
"rewards/rejected": -1.107729196548462, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.639175257731959e-07, |
|
"logits/chosen": -3.0879526138305664, |
|
"logits/rejected": -3.0506978034973145, |
|
"logps/chosen": -354.5426025390625, |
|
"logps/rejected": -279.86773681640625, |
|
"loss": 0.571, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.26413029432296753, |
|
"rewards/margins": 0.9725528955459595, |
|
"rewards/rejected": -0.7084226012229919, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.810996563573884e-07, |
|
"logits/chosen": -3.0588438510894775, |
|
"logits/rejected": -3.0481762886047363, |
|
"logps/chosen": -339.85675048828125, |
|
"logps/rejected": -285.8063049316406, |
|
"loss": 0.6383, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.30532822012901306, |
|
"rewards/margins": 1.10079026222229, |
|
"rewards/rejected": -0.7954620122909546, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.982817869415807e-07, |
|
"logits/chosen": -3.0889270305633545, |
|
"logits/rejected": -3.0666940212249756, |
|
"logps/chosen": -335.5870666503906, |
|
"logps/rejected": -258.51641845703125, |
|
"loss": 0.5611, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.7204562425613403, |
|
"rewards/margins": 1.0493910312652588, |
|
"rewards/rejected": -0.32893460988998413, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.982798165137615e-07, |
|
"logits/chosen": -3.0340983867645264, |
|
"logits/rejected": -3.0090713500976562, |
|
"logps/chosen": -281.38751220703125, |
|
"logps/rejected": -289.4985656738281, |
|
"loss": 0.6034, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.268043577671051, |
|
"rewards/margins": 0.5827728509902954, |
|
"rewards/rejected": -0.3147292733192444, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_logits/chosen": -3.035973072052002, |
|
"eval_logits/rejected": -3.0234200954437256, |
|
"eval_logps/chosen": -359.8170166015625, |
|
"eval_logps/rejected": -296.5441589355469, |
|
"eval_loss": 0.5459412932395935, |
|
"eval_rewards/accuracies": 0.761904776096344, |
|
"eval_rewards/chosen": 0.737476110458374, |
|
"eval_rewards/margins": 1.1952924728393555, |
|
"eval_rewards/rejected": -0.45781639218330383, |
|
"eval_runtime": 164.3219, |
|
"eval_samples_per_second": 12.171, |
|
"eval_steps_per_second": 0.383, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.963685015290519e-07, |
|
"logits/chosen": -3.1333563327789307, |
|
"logits/rejected": -3.0529465675354004, |
|
"logps/chosen": -394.2475280761719, |
|
"logps/rejected": -328.84796142578125, |
|
"loss": 0.5995, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.5251134634017944, |
|
"rewards/margins": 0.8031543493270874, |
|
"rewards/rejected": -0.27804094552993774, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.944571865443424e-07, |
|
"logits/chosen": -3.115387201309204, |
|
"logits/rejected": -3.104794502258301, |
|
"logps/chosen": -299.5379943847656, |
|
"logps/rejected": -227.14413452148438, |
|
"loss": 0.5504, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.5498681664466858, |
|
"rewards/margins": 1.550806999206543, |
|
"rewards/rejected": -1.0009387731552124, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.92545871559633e-07, |
|
"logits/chosen": -3.1059436798095703, |
|
"logits/rejected": -3.110661029815674, |
|
"logps/chosen": -405.8400573730469, |
|
"logps/rejected": -290.01934814453125, |
|
"loss": 0.5355, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.5642995834350586, |
|
"rewards/margins": 1.1246757507324219, |
|
"rewards/rejected": -0.5603762269020081, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.906345565749235e-07, |
|
"logits/chosen": -3.0694103240966797, |
|
"logits/rejected": -3.075610876083374, |
|
"logps/chosen": -301.7900695800781, |
|
"logps/rejected": -273.09100341796875, |
|
"loss": 0.6496, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.26571425795555115, |
|
"rewards/margins": 0.967176079750061, |
|
"rewards/rejected": -0.701461672782898, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.88723241590214e-07, |
|
"logits/chosen": -3.078815460205078, |
|
"logits/rejected": -3.097691059112549, |
|
"logps/chosen": -373.6755065917969, |
|
"logps/rejected": -278.1918640136719, |
|
"loss": 0.5251, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.5501624941825867, |
|
"rewards/margins": 1.1470292806625366, |
|
"rewards/rejected": -0.5968667268753052, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.868119266055046e-07, |
|
"logits/chosen": -3.101353168487549, |
|
"logits/rejected": -3.1290316581726074, |
|
"logps/chosen": -370.21112060546875, |
|
"logps/rejected": -328.2227783203125, |
|
"loss": 0.5218, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.4693407118320465, |
|
"rewards/margins": 1.2798802852630615, |
|
"rewards/rejected": -0.8105396032333374, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.849006116207951e-07, |
|
"logits/chosen": -3.108405113220215, |
|
"logits/rejected": -3.108668804168701, |
|
"logps/chosen": -357.5787048339844, |
|
"logps/rejected": -308.5846252441406, |
|
"loss": 0.5781, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.5667105317115784, |
|
"rewards/margins": 1.561586618423462, |
|
"rewards/rejected": -0.9948760271072388, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.829892966360856e-07, |
|
"logits/chosen": -3.1076834201812744, |
|
"logits/rejected": -3.139901638031006, |
|
"logps/chosen": -372.7229919433594, |
|
"logps/rejected": -321.50347900390625, |
|
"loss": 0.5748, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.595112681388855, |
|
"rewards/margins": 1.7188622951507568, |
|
"rewards/rejected": -1.1237497329711914, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.810779816513762e-07, |
|
"logits/chosen": -3.0231597423553467, |
|
"logits/rejected": -3.055475950241089, |
|
"logps/chosen": -308.81109619140625, |
|
"logps/rejected": -280.67572021484375, |
|
"loss": 0.594, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.022154245525598526, |
|
"rewards/margins": 1.1715147495269775, |
|
"rewards/rejected": -1.1493604183197021, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.791666666666667e-07, |
|
"logits/chosen": -2.9821434020996094, |
|
"logits/rejected": -2.990657329559326, |
|
"logps/chosen": -350.4073791503906, |
|
"logps/rejected": -234.08291625976562, |
|
"loss": 0.5944, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.6456303000450134, |
|
"rewards/margins": 1.8281257152557373, |
|
"rewards/rejected": -1.182495355606079, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_logits/chosen": -2.96209979057312, |
|
"eval_logits/rejected": -2.963911294937134, |
|
"eval_logps/chosen": -362.2125549316406, |
|
"eval_logps/rejected": -300.90362548828125, |
|
"eval_loss": 0.5573462247848511, |
|
"eval_rewards/accuracies": 0.7698412537574768, |
|
"eval_rewards/chosen": 0.49792128801345825, |
|
"eval_rewards/margins": 1.391687273979187, |
|
"eval_rewards/rejected": -0.8937660455703735, |
|
"eval_runtime": 163.7646, |
|
"eval_samples_per_second": 12.213, |
|
"eval_steps_per_second": 0.385, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.772553516819572e-07, |
|
"logits/chosen": -2.9685988426208496, |
|
"logits/rejected": -2.9469170570373535, |
|
"logps/chosen": -359.9443054199219, |
|
"logps/rejected": -339.13482666015625, |
|
"loss": 0.7753, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.5800348520278931, |
|
"rewards/margins": 1.4965015649795532, |
|
"rewards/rejected": -0.9164667129516602, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.753440366972477e-07, |
|
"logits/chosen": -3.039097785949707, |
|
"logits/rejected": -3.0352489948272705, |
|
"logps/chosen": -279.19451904296875, |
|
"logps/rejected": -275.61077880859375, |
|
"loss": 0.5719, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.08171078562736511, |
|
"rewards/margins": 0.8535135388374329, |
|
"rewards/rejected": -0.7718027234077454, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.7343272171253825e-07, |
|
"logits/chosen": -3.0542099475860596, |
|
"logits/rejected": -3.048107624053955, |
|
"logps/chosen": -304.2041015625, |
|
"logps/rejected": -275.24664306640625, |
|
"loss": 0.5521, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.6358417272567749, |
|
"rewards/margins": 1.4337527751922607, |
|
"rewards/rejected": -0.7979112863540649, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.715214067278288e-07, |
|
"logits/chosen": -2.9832911491394043, |
|
"logits/rejected": -2.9696083068847656, |
|
"logps/chosen": -351.0896911621094, |
|
"logps/rejected": -278.2879333496094, |
|
"loss": 0.5257, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.4420256018638611, |
|
"rewards/margins": 1.773047685623169, |
|
"rewards/rejected": -1.331022024154663, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.696100917431192e-07, |
|
"logits/chosen": -3.115874767303467, |
|
"logits/rejected": -3.0773837566375732, |
|
"logps/chosen": -392.2452392578125, |
|
"logps/rejected": -324.62640380859375, |
|
"loss": 0.5536, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.2861310839653015, |
|
"rewards/margins": 0.8551927804946899, |
|
"rewards/rejected": -0.5690616369247437, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.6769877675840974e-07, |
|
"logits/chosen": -3.0585522651672363, |
|
"logits/rejected": -3.089534282684326, |
|
"logps/chosen": -310.84967041015625, |
|
"logps/rejected": -287.9058532714844, |
|
"loss": 0.5614, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.2514137625694275, |
|
"rewards/margins": 1.2147700786590576, |
|
"rewards/rejected": -0.9633563160896301, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.6578746177370027e-07, |
|
"logits/chosen": -3.0050368309020996, |
|
"logits/rejected": -3.0113613605499268, |
|
"logps/chosen": -243.838623046875, |
|
"logps/rejected": -224.61404418945312, |
|
"loss": 0.5769, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.10031839460134506, |
|
"rewards/margins": 1.2319433689117432, |
|
"rewards/rejected": -1.1316249370574951, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.638761467889908e-07, |
|
"logits/chosen": -3.01200795173645, |
|
"logits/rejected": -2.9829325675964355, |
|
"logps/chosen": -353.6679992675781, |
|
"logps/rejected": -299.7701416015625, |
|
"loss": 0.5141, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.6005850434303284, |
|
"rewards/margins": 2.408433437347412, |
|
"rewards/rejected": -1.807848334312439, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.6196483180428133e-07, |
|
"logits/chosen": -3.038440227508545, |
|
"logits/rejected": -3.0429458618164062, |
|
"logps/chosen": -330.0135192871094, |
|
"logps/rejected": -262.1318359375, |
|
"loss": 0.5292, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.4290197491645813, |
|
"rewards/margins": 1.5280876159667969, |
|
"rewards/rejected": -1.0990678071975708, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.600535168195718e-07, |
|
"logits/chosen": -3.0223565101623535, |
|
"logits/rejected": -3.0170624256134033, |
|
"logps/chosen": -259.1560363769531, |
|
"logps/rejected": -268.68365478515625, |
|
"loss": 0.5512, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.08852599561214447, |
|
"rewards/margins": 1.076027750968933, |
|
"rewards/rejected": -0.9875017404556274, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": -3.0406343936920166, |
|
"eval_logits/rejected": -3.0485074520111084, |
|
"eval_logps/chosen": -362.83642578125, |
|
"eval_logps/rejected": -302.1329650878906, |
|
"eval_loss": 0.5256651043891907, |
|
"eval_rewards/accuracies": 0.7579365372657776, |
|
"eval_rewards/chosen": 0.4355368912220001, |
|
"eval_rewards/margins": 1.452234148979187, |
|
"eval_rewards/rejected": -1.0166972875595093, |
|
"eval_runtime": 164.1914, |
|
"eval_samples_per_second": 12.181, |
|
"eval_steps_per_second": 0.384, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.5814220183486234e-07, |
|
"logits/chosen": -2.971991777420044, |
|
"logits/rejected": -2.9626731872558594, |
|
"logps/chosen": -387.75872802734375, |
|
"logps/rejected": -341.24224853515625, |
|
"loss": 0.5611, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.0966944545507431, |
|
"rewards/margins": 0.9770743250846863, |
|
"rewards/rejected": -1.0737688541412354, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.562308868501529e-07, |
|
"logits/chosen": -2.97809100151062, |
|
"logits/rejected": -3.0156943798065186, |
|
"logps/chosen": -325.83837890625, |
|
"logps/rejected": -321.0384826660156, |
|
"loss": 0.5693, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.27487924695014954, |
|
"rewards/margins": 1.646512746810913, |
|
"rewards/rejected": -1.371633529663086, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.543195718654434e-07, |
|
"logits/chosen": -3.0082881450653076, |
|
"logits/rejected": -3.003408193588257, |
|
"logps/chosen": -274.6020812988281, |
|
"logps/rejected": -240.13998413085938, |
|
"loss": 0.5953, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.0502743124961853, |
|
"rewards/margins": 0.5772665739059448, |
|
"rewards/rejected": -0.6275408864021301, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.5240825688073394e-07, |
|
"logits/chosen": -3.0475857257843018, |
|
"logits/rejected": -3.0587058067321777, |
|
"logps/chosen": -345.28802490234375, |
|
"logps/rejected": -276.25018310546875, |
|
"loss": 0.559, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.6526178121566772, |
|
"rewards/margins": 1.6864182949066162, |
|
"rewards/rejected": -1.033800482749939, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.504969418960244e-07, |
|
"logits/chosen": -2.9780993461608887, |
|
"logits/rejected": -3.0339550971984863, |
|
"logps/chosen": -318.60699462890625, |
|
"logps/rejected": -363.83966064453125, |
|
"loss": 0.5182, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.3249278664588928, |
|
"rewards/margins": 1.6138547658920288, |
|
"rewards/rejected": -1.2889269590377808, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.4858562691131495e-07, |
|
"logits/chosen": -3.0293617248535156, |
|
"logits/rejected": -3.0541815757751465, |
|
"logps/chosen": -355.3965759277344, |
|
"logps/rejected": -341.19097900390625, |
|
"loss": 0.5655, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.15244658291339874, |
|
"rewards/margins": 1.1955846548080444, |
|
"rewards/rejected": -1.043138027191162, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.466743119266055e-07, |
|
"logits/chosen": -2.955909252166748, |
|
"logits/rejected": -2.966557502746582, |
|
"logps/chosen": -339.918701171875, |
|
"logps/rejected": -312.85992431640625, |
|
"loss": 0.5342, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.24385514855384827, |
|
"rewards/margins": 1.2561490535736084, |
|
"rewards/rejected": -1.5000044107437134, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.44762996941896e-07, |
|
"logits/chosen": -2.9457859992980957, |
|
"logits/rejected": -2.921659231185913, |
|
"logps/chosen": -361.46905517578125, |
|
"logps/rejected": -314.6666259765625, |
|
"loss": 0.5347, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.31199535727500916, |
|
"rewards/margins": 1.5148388147354126, |
|
"rewards/rejected": -1.202843427658081, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.4285168195718655e-07, |
|
"logits/chosen": -2.9674103260040283, |
|
"logits/rejected": -2.9832658767700195, |
|
"logps/chosen": -279.5147705078125, |
|
"logps/rejected": -283.4952697753906, |
|
"loss": 0.5475, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.18109655380249023, |
|
"rewards/margins": 1.081386685371399, |
|
"rewards/rejected": -1.2624832391738892, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.40940366972477e-07, |
|
"logits/chosen": -3.038327932357788, |
|
"logits/rejected": -3.081512928009033, |
|
"logps/chosen": -282.9052429199219, |
|
"logps/rejected": -260.5687255859375, |
|
"loss": 0.5879, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.0032925487030297518, |
|
"rewards/margins": 0.903986930847168, |
|
"rewards/rejected": -0.9006943702697754, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_logits/chosen": -2.9869041442871094, |
|
"eval_logits/rejected": -2.991122007369995, |
|
"eval_logps/chosen": -362.4848327636719, |
|
"eval_logps/rejected": -301.2572021484375, |
|
"eval_loss": 0.5287741422653198, |
|
"eval_rewards/accuracies": 0.7579365372657776, |
|
"eval_rewards/chosen": 0.47069627046585083, |
|
"eval_rewards/margins": 1.3998188972473145, |
|
"eval_rewards/rejected": -0.9291225075721741, |
|
"eval_runtime": 164.0279, |
|
"eval_samples_per_second": 12.193, |
|
"eval_steps_per_second": 0.384, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.3902905198776756e-07, |
|
"logits/chosen": -3.0266683101654053, |
|
"logits/rejected": -3.0426414012908936, |
|
"logps/chosen": -345.2246398925781, |
|
"logps/rejected": -280.60711669921875, |
|
"loss": 0.5172, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.3867380917072296, |
|
"rewards/margins": 1.5311682224273682, |
|
"rewards/rejected": -1.14443039894104, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.371177370030581e-07, |
|
"logits/chosen": -3.0024008750915527, |
|
"logits/rejected": -3.0336501598358154, |
|
"logps/chosen": -340.01483154296875, |
|
"logps/rejected": -288.4037170410156, |
|
"loss": 0.5674, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.09759467095136642, |
|
"rewards/margins": 1.4280248880386353, |
|
"rewards/rejected": -1.330430030822754, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.352064220183486e-07, |
|
"logits/chosen": -3.073171377182007, |
|
"logits/rejected": -3.0693984031677246, |
|
"logps/chosen": -268.47442626953125, |
|
"logps/rejected": -253.87173461914062, |
|
"loss": 0.6129, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.08323542028665543, |
|
"rewards/margins": 1.263319969177246, |
|
"rewards/rejected": -1.180084466934204, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.3329510703363915e-07, |
|
"logits/chosen": -3.1394124031066895, |
|
"logits/rejected": -3.147449493408203, |
|
"logps/chosen": -316.50323486328125, |
|
"logps/rejected": -256.6443786621094, |
|
"loss": 0.5405, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.0077073900029063225, |
|
"rewards/margins": 1.4150127172470093, |
|
"rewards/rejected": -1.407305359840393, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.313837920489297e-07, |
|
"logits/chosen": -3.015110731124878, |
|
"logits/rejected": -3.0439746379852295, |
|
"logps/chosen": -309.4215087890625, |
|
"logps/rejected": -278.88934326171875, |
|
"loss": 0.5239, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.3628634512424469, |
|
"rewards/margins": 1.4821045398712158, |
|
"rewards/rejected": -1.1192409992218018, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.2947247706422016e-07, |
|
"logits/chosen": -3.0345845222473145, |
|
"logits/rejected": -2.997607469558716, |
|
"logps/chosen": -311.18719482421875, |
|
"logps/rejected": -289.7060852050781, |
|
"loss": 0.5288, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.4103531241416931, |
|
"rewards/margins": 1.0822376012802124, |
|
"rewards/rejected": -1.4925907850265503, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.275611620795107e-07, |
|
"logits/chosen": -3.0080935955047607, |
|
"logits/rejected": -3.015535593032837, |
|
"logps/chosen": -377.9685974121094, |
|
"logps/rejected": -297.92169189453125, |
|
"loss": 0.5683, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.14489376544952393, |
|
"rewards/margins": 1.1130046844482422, |
|
"rewards/rejected": -0.9681110382080078, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.2564984709480123e-07, |
|
"logits/chosen": -3.030597448348999, |
|
"logits/rejected": -3.059508800506592, |
|
"logps/chosen": -368.32635498046875, |
|
"logps/rejected": -274.53619384765625, |
|
"loss": 0.6557, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.3344075083732605, |
|
"rewards/margins": 1.3792588710784912, |
|
"rewards/rejected": -1.0448510646820068, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.2373853211009176e-07, |
|
"logits/chosen": -3.0355846881866455, |
|
"logits/rejected": -3.0617101192474365, |
|
"logps/chosen": -310.24530029296875, |
|
"logps/rejected": -280.7437438964844, |
|
"loss": 0.5629, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.31564414501190186, |
|
"rewards/margins": 1.532622218132019, |
|
"rewards/rejected": -1.2169779539108276, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.2182721712538224e-07, |
|
"logits/chosen": -3.029533863067627, |
|
"logits/rejected": -3.05369234085083, |
|
"logps/chosen": -370.49945068359375, |
|
"logps/rejected": -285.1793212890625, |
|
"loss": 0.6773, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.3230968117713928, |
|
"rewards/margins": 1.0617311000823975, |
|
"rewards/rejected": -0.7386342287063599, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_logits/chosen": -3.041776180267334, |
|
"eval_logits/rejected": -3.0563852787017822, |
|
"eval_logps/chosen": -366.7193603515625, |
|
"eval_logps/rejected": -301.1505432128906, |
|
"eval_loss": 0.585310697555542, |
|
"eval_rewards/accuracies": 0.7460317611694336, |
|
"eval_rewards/chosen": 0.047242674976587296, |
|
"eval_rewards/margins": 0.9657005667686462, |
|
"eval_rewards/rejected": -0.9184578657150269, |
|
"eval_runtime": 164.258, |
|
"eval_samples_per_second": 12.176, |
|
"eval_steps_per_second": 0.384, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.199159021406727e-07, |
|
"logits/chosen": -2.963630199432373, |
|
"logits/rejected": -3.031212329864502, |
|
"logps/chosen": -297.58990478515625, |
|
"logps/rejected": -283.17572021484375, |
|
"loss": 0.6067, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.13306304812431335, |
|
"rewards/margins": 1.0036863088607788, |
|
"rewards/rejected": -0.8706234097480774, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.1800458715596325e-07, |
|
"logits/chosen": -3.0382869243621826, |
|
"logits/rejected": -3.0224924087524414, |
|
"logps/chosen": -373.01947021484375, |
|
"logps/rejected": -315.932861328125, |
|
"loss": 0.6166, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.028559958562254906, |
|
"rewards/margins": 0.9609702825546265, |
|
"rewards/rejected": -0.9324103593826294, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.160932721712538e-07, |
|
"logits/chosen": -3.0072388648986816, |
|
"logits/rejected": -3.0005228519439697, |
|
"logps/chosen": -340.4766540527344, |
|
"logps/rejected": -306.3741149902344, |
|
"loss": 0.6079, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.09662823379039764, |
|
"rewards/margins": 1.1530828475952148, |
|
"rewards/rejected": -1.0564546585083008, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.141819571865443e-07, |
|
"logits/chosen": -2.9518847465515137, |
|
"logits/rejected": -2.9550204277038574, |
|
"logps/chosen": -325.9070739746094, |
|
"logps/rejected": -244.12588500976562, |
|
"loss": 0.564, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.1386883705854416, |
|
"rewards/margins": 1.7188549041748047, |
|
"rewards/rejected": -1.5801665782928467, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.1227064220183485e-07, |
|
"logits/chosen": -2.9738943576812744, |
|
"logits/rejected": -3.009288787841797, |
|
"logps/chosen": -306.73614501953125, |
|
"logps/rejected": -284.35089111328125, |
|
"loss": 0.5213, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.16552898287773132, |
|
"rewards/margins": 1.8094953298568726, |
|
"rewards/rejected": -1.6439664363861084, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.103593272171253e-07, |
|
"logits/chosen": -2.9576098918914795, |
|
"logits/rejected": -2.9751369953155518, |
|
"logps/chosen": -336.5853576660156, |
|
"logps/rejected": -326.5455017089844, |
|
"loss": 0.5703, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.07825515419244766, |
|
"rewards/margins": 1.1398742198944092, |
|
"rewards/rejected": -1.0616191625595093, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.0844801223241586e-07, |
|
"logits/chosen": -3.0122196674346924, |
|
"logits/rejected": -2.9879307746887207, |
|
"logps/chosen": -350.8817138671875, |
|
"logps/rejected": -298.84307861328125, |
|
"loss": 0.5197, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.03767753392457962, |
|
"rewards/margins": 1.09770929813385, |
|
"rewards/rejected": -1.0600318908691406, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.065366972477064e-07, |
|
"logits/chosen": -2.9043805599212646, |
|
"logits/rejected": -2.9711837768554688, |
|
"logps/chosen": -379.1385803222656, |
|
"logps/rejected": -296.9505920410156, |
|
"loss": 0.5669, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.3532111942768097, |
|
"rewards/margins": 1.7610466480255127, |
|
"rewards/rejected": -1.407835602760315, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.046253822629969e-07, |
|
"logits/chosen": -2.9510416984558105, |
|
"logits/rejected": -2.961275100708008, |
|
"logps/chosen": -330.33673095703125, |
|
"logps/rejected": -288.71173095703125, |
|
"loss": 0.5177, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.301142156124115, |
|
"rewards/margins": 1.2936238050460815, |
|
"rewards/rejected": -0.9924817085266113, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.0271406727828745e-07, |
|
"logits/chosen": -2.991361141204834, |
|
"logits/rejected": -2.974353790283203, |
|
"logps/chosen": -322.8855895996094, |
|
"logps/rejected": -271.7654113769531, |
|
"loss": 0.5263, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.06158037111163139, |
|
"rewards/margins": 1.1184431314468384, |
|
"rewards/rejected": -1.056862711906433, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_logits/chosen": -2.963681936264038, |
|
"eval_logits/rejected": -2.966184139251709, |
|
"eval_logps/chosen": -364.9457702636719, |
|
"eval_logps/rejected": -303.87957763671875, |
|
"eval_loss": 0.5150811076164246, |
|
"eval_rewards/accuracies": 0.761904776096344, |
|
"eval_rewards/chosen": 0.22460374236106873, |
|
"eval_rewards/margins": 1.4159626960754395, |
|
"eval_rewards/rejected": -1.191359043121338, |
|
"eval_runtime": 163.931, |
|
"eval_samples_per_second": 12.2, |
|
"eval_steps_per_second": 0.384, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.00802752293578e-07, |
|
"logits/chosen": -2.974116802215576, |
|
"logits/rejected": -2.9998645782470703, |
|
"logps/chosen": -330.64910888671875, |
|
"logps/rejected": -294.6690368652344, |
|
"loss": 0.5031, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.18794824182987213, |
|
"rewards/margins": 1.2928552627563477, |
|
"rewards/rejected": -1.1049071550369263, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.9889143730886847e-07, |
|
"logits/chosen": -2.9930388927459717, |
|
"logits/rejected": -2.983773946762085, |
|
"logps/chosen": -376.36212158203125, |
|
"logps/rejected": -305.14111328125, |
|
"loss": 0.5357, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.27564454078674316, |
|
"rewards/margins": 1.5530188083648682, |
|
"rewards/rejected": -1.277374267578125, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.96980122324159e-07, |
|
"logits/chosen": -3.0382747650146484, |
|
"logits/rejected": -3.0700857639312744, |
|
"logps/chosen": -313.2106018066406, |
|
"logps/rejected": -256.5130310058594, |
|
"loss": 0.556, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.03291673585772514, |
|
"rewards/margins": 1.2974836826324463, |
|
"rewards/rejected": -1.3304002285003662, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.9506880733944953e-07, |
|
"logits/chosen": -3.0589098930358887, |
|
"logits/rejected": -3.058842897415161, |
|
"logps/chosen": -304.68658447265625, |
|
"logps/rejected": -276.25177001953125, |
|
"loss": 0.5578, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.22373457252979279, |
|
"rewards/margins": 1.489611268043518, |
|
"rewards/rejected": -1.7133458852767944, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.9315749235474006e-07, |
|
"logits/chosen": -3.037079334259033, |
|
"logits/rejected": -3.0386836528778076, |
|
"logps/chosen": -347.38897705078125, |
|
"logps/rejected": -334.3331298828125, |
|
"loss": 0.5433, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.15273378789424896, |
|
"rewards/margins": 1.5688612461090088, |
|
"rewards/rejected": -1.7215951681137085, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.912461773700306e-07, |
|
"logits/chosen": -2.9914333820343018, |
|
"logits/rejected": -3.013286828994751, |
|
"logps/chosen": -361.6410217285156, |
|
"logps/rejected": -342.3985900878906, |
|
"loss": 0.5464, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.03575097769498825, |
|
"rewards/margins": 1.2531265020370483, |
|
"rewards/rejected": -1.2888776063919067, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.8933486238532107e-07, |
|
"logits/chosen": -2.9541945457458496, |
|
"logits/rejected": -2.979830265045166, |
|
"logps/chosen": -424.258544921875, |
|
"logps/rejected": -299.7648620605469, |
|
"loss": 0.581, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.013498688116669655, |
|
"rewards/margins": 1.3451616764068604, |
|
"rewards/rejected": -1.331662893295288, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.874235474006116e-07, |
|
"logits/chosen": -2.9790916442871094, |
|
"logits/rejected": -2.987037181854248, |
|
"logps/chosen": -364.68048095703125, |
|
"logps/rejected": -290.4891052246094, |
|
"loss": 0.58, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.007329714484512806, |
|
"rewards/margins": 1.4278209209442139, |
|
"rewards/rejected": -1.4351506233215332, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.8551223241590214e-07, |
|
"logits/chosen": -2.986210823059082, |
|
"logits/rejected": -2.9739222526550293, |
|
"logps/chosen": -300.7494812011719, |
|
"logps/rejected": -278.2732849121094, |
|
"loss": 0.5741, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.04843021556735039, |
|
"rewards/margins": 1.3019744157791138, |
|
"rewards/rejected": -1.3504045009613037, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.8360091743119267e-07, |
|
"logits/chosen": -2.9838929176330566, |
|
"logits/rejected": -2.9902117252349854, |
|
"logps/chosen": -306.20025634765625, |
|
"logps/rejected": -289.5735168457031, |
|
"loss": 0.5366, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.058762937784194946, |
|
"rewards/margins": 1.2602014541625977, |
|
"rewards/rejected": -1.2014386653900146, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_logits/chosen": -2.9907381534576416, |
|
"eval_logits/rejected": -2.982360363006592, |
|
"eval_logps/chosen": -364.6807556152344, |
|
"eval_logps/rejected": -302.8385314941406, |
|
"eval_loss": 0.5133689641952515, |
|
"eval_rewards/accuracies": 0.75, |
|
"eval_rewards/chosen": 0.25110283493995667, |
|
"eval_rewards/margins": 1.338356614112854, |
|
"eval_rewards/rejected": -1.0872538089752197, |
|
"eval_runtime": 164.3114, |
|
"eval_samples_per_second": 12.172, |
|
"eval_steps_per_second": 0.383, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.816896024464832e-07, |
|
"logits/chosen": -3.0253748893737793, |
|
"logits/rejected": -2.9562289714813232, |
|
"logps/chosen": -281.73016357421875, |
|
"logps/rejected": -248.2506866455078, |
|
"loss": 0.5377, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.28038138151168823, |
|
"rewards/margins": 1.607208251953125, |
|
"rewards/rejected": -1.326826810836792, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.797782874617737e-07, |
|
"logits/chosen": -2.9797048568725586, |
|
"logits/rejected": -2.932326555252075, |
|
"logps/chosen": -333.2131042480469, |
|
"logps/rejected": -267.63128662109375, |
|
"loss": 0.4959, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.26507893204689026, |
|
"rewards/margins": 1.226858139038086, |
|
"rewards/rejected": -0.9617794156074524, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.778669724770642e-07, |
|
"logits/chosen": -2.9677836894989014, |
|
"logits/rejected": -2.9711012840270996, |
|
"logps/chosen": -301.1932067871094, |
|
"logps/rejected": -239.915771484375, |
|
"loss": 0.5646, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.06536471843719482, |
|
"rewards/margins": 1.3896596431732178, |
|
"rewards/rejected": -1.3242948055267334, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.7595565749235474e-07, |
|
"logits/chosen": -3.003399133682251, |
|
"logits/rejected": -2.9879281520843506, |
|
"logps/chosen": -351.9979553222656, |
|
"logps/rejected": -264.519775390625, |
|
"loss": 0.585, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.3593365252017975, |
|
"rewards/margins": 1.1577335596084595, |
|
"rewards/rejected": -0.7983969449996948, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.740443425076452e-07, |
|
"logits/chosen": -2.9658942222595215, |
|
"logits/rejected": -2.982341766357422, |
|
"logps/chosen": -336.6238708496094, |
|
"logps/rejected": -299.588134765625, |
|
"loss": 0.5176, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.2032889872789383, |
|
"rewards/margins": 1.3534172773361206, |
|
"rewards/rejected": -1.1501282453536987, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.7213302752293575e-07, |
|
"logits/chosen": -2.951019763946533, |
|
"logits/rejected": -2.985151767730713, |
|
"logps/chosen": -338.8179931640625, |
|
"logps/rejected": -283.80328369140625, |
|
"loss": 0.538, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.049173761159181595, |
|
"rewards/margins": 0.8662222623825073, |
|
"rewards/rejected": -0.8170484304428101, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.702217125382263e-07, |
|
"logits/chosen": -2.9252991676330566, |
|
"logits/rejected": -2.937505006790161, |
|
"logps/chosen": -354.7286682128906, |
|
"logps/rejected": -312.35333251953125, |
|
"loss": 0.4865, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.2835424542427063, |
|
"rewards/margins": 1.7692314386367798, |
|
"rewards/rejected": -1.4856891632080078, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.6831039755351677e-07, |
|
"logits/chosen": -2.975984573364258, |
|
"logits/rejected": -2.9734318256378174, |
|
"logps/chosen": -319.2844543457031, |
|
"logps/rejected": -303.3651428222656, |
|
"loss": 0.1133, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.7135050296783447, |
|
"rewards/margins": 4.7504682540893555, |
|
"rewards/rejected": -3.0369625091552734, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.663990825688073e-07, |
|
"logits/chosen": -2.899402141571045, |
|
"logits/rejected": -2.89802885055542, |
|
"logps/chosen": -288.45123291015625, |
|
"logps/rejected": -316.5885314941406, |
|
"loss": 0.1405, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.7678325176239014, |
|
"rewards/margins": 5.483719348907471, |
|
"rewards/rejected": -3.715886354446411, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.6448776758409783e-07, |
|
"logits/chosen": -2.8675971031188965, |
|
"logits/rejected": -2.89615797996521, |
|
"logps/chosen": -325.03863525390625, |
|
"logps/rejected": -333.436767578125, |
|
"loss": 0.1034, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.79555344581604, |
|
"rewards/margins": 4.759924411773682, |
|
"rewards/rejected": -2.9643709659576416, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_logits/chosen": -2.92020583152771, |
|
"eval_logits/rejected": -2.9095799922943115, |
|
"eval_logps/chosen": -364.1185302734375, |
|
"eval_logps/rejected": -306.2866516113281, |
|
"eval_loss": 0.5107486248016357, |
|
"eval_rewards/accuracies": 0.761904776096344, |
|
"eval_rewards/chosen": 0.307327002286911, |
|
"eval_rewards/margins": 1.739391803741455, |
|
"eval_rewards/rejected": -1.4320647716522217, |
|
"eval_runtime": 164.3142, |
|
"eval_samples_per_second": 12.172, |
|
"eval_steps_per_second": 0.383, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.6257645259938836e-07, |
|
"logits/chosen": -2.8848228454589844, |
|
"logits/rejected": -2.9434664249420166, |
|
"logps/chosen": -304.1281433105469, |
|
"logps/rejected": -323.9388732910156, |
|
"loss": 0.0912, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.5237512588500977, |
|
"rewards/margins": 5.300021171569824, |
|
"rewards/rejected": -3.7762699127197266, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.606651376146789e-07, |
|
"logits/chosen": -2.818145275115967, |
|
"logits/rejected": -2.773864269256592, |
|
"logps/chosen": -315.73687744140625, |
|
"logps/rejected": -252.3991241455078, |
|
"loss": 0.1072, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.2391575574874878, |
|
"rewards/margins": 4.052863121032715, |
|
"rewards/rejected": -2.8137052059173584, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.5875382262996937e-07, |
|
"logits/chosen": -2.8956587314605713, |
|
"logits/rejected": -2.88509202003479, |
|
"logps/chosen": -332.889404296875, |
|
"logps/rejected": -375.0550231933594, |
|
"loss": 0.0854, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.7886276245117188, |
|
"rewards/margins": 4.777144432067871, |
|
"rewards/rejected": -2.9885172843933105, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.568425076452599e-07, |
|
"logits/chosen": -2.9382426738739014, |
|
"logits/rejected": -2.9390716552734375, |
|
"logps/chosen": -339.12451171875, |
|
"logps/rejected": -315.15625, |
|
"loss": 0.099, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 1.3939841985702515, |
|
"rewards/margins": 4.764640808105469, |
|
"rewards/rejected": -3.370656967163086, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.5493119266055044e-07, |
|
"logits/chosen": -2.8407671451568604, |
|
"logits/rejected": -2.821763753890991, |
|
"logps/chosen": -336.37298583984375, |
|
"logps/rejected": -257.6861267089844, |
|
"loss": 0.1132, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.3384716510772705, |
|
"rewards/margins": 4.965681076049805, |
|
"rewards/rejected": -3.627209424972534, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.5301987767584097e-07, |
|
"logits/chosen": -2.8167824745178223, |
|
"logits/rejected": -2.810854434967041, |
|
"logps/chosen": -323.439208984375, |
|
"logps/rejected": -342.47991943359375, |
|
"loss": 0.2041, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.7954254150390625, |
|
"rewards/margins": 4.722014427185059, |
|
"rewards/rejected": -3.9265894889831543, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.511085626911315e-07, |
|
"logits/chosen": -2.940957546234131, |
|
"logits/rejected": -3.0021321773529053, |
|
"logps/chosen": -366.2899475097656, |
|
"logps/rejected": -343.2218933105469, |
|
"loss": 0.3299, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.4886529445648193, |
|
"rewards/margins": 5.359461307525635, |
|
"rewards/rejected": -3.870807647705078, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.49197247706422e-07, |
|
"logits/chosen": -2.933786392211914, |
|
"logits/rejected": -2.906247615814209, |
|
"logps/chosen": -261.5579833984375, |
|
"logps/rejected": -276.83026123046875, |
|
"loss": 0.149, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.48736995458602905, |
|
"rewards/margins": 4.608451843261719, |
|
"rewards/rejected": -4.121081829071045, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.472859327217125e-07, |
|
"logits/chosen": -2.8349316120147705, |
|
"logits/rejected": -2.9043667316436768, |
|
"logps/chosen": -364.6941833496094, |
|
"logps/rejected": -376.5315856933594, |
|
"loss": 0.1092, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.2117881774902344, |
|
"rewards/margins": 5.32895565032959, |
|
"rewards/rejected": -4.117166996002197, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.4537461773700304e-07, |
|
"logits/chosen": -2.985729694366455, |
|
"logits/rejected": -2.8761606216430664, |
|
"logps/chosen": -258.05841064453125, |
|
"logps/rejected": -241.39053344726562, |
|
"loss": 0.1114, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.6233962774276733, |
|
"rewards/margins": 4.293813228607178, |
|
"rewards/rejected": -3.6704165935516357, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_logits/chosen": -2.9666378498077393, |
|
"eval_logits/rejected": -2.95609450340271, |
|
"eval_logps/chosen": -365.8598327636719, |
|
"eval_logps/rejected": -310.414794921875, |
|
"eval_loss": 0.534447431564331, |
|
"eval_rewards/accuracies": 0.7460317611694336, |
|
"eval_rewards/chosen": 0.1331927627325058, |
|
"eval_rewards/margins": 1.9780747890472412, |
|
"eval_rewards/rejected": -1.8448821306228638, |
|
"eval_runtime": 164.1399, |
|
"eval_samples_per_second": 12.185, |
|
"eval_steps_per_second": 0.384, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.434633027522936e-07, |
|
"logits/chosen": -2.9507124423980713, |
|
"logits/rejected": -2.9483211040496826, |
|
"logps/chosen": -338.0868835449219, |
|
"logps/rejected": -325.01483154296875, |
|
"loss": 0.1007, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.6764055490493774, |
|
"rewards/margins": 5.684920310974121, |
|
"rewards/rejected": -4.008514404296875, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.415519877675841e-07, |
|
"logits/chosen": -2.976590156555176, |
|
"logits/rejected": -3.025784730911255, |
|
"logps/chosen": -277.34710693359375, |
|
"logps/rejected": -323.576171875, |
|
"loss": 0.1131, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.841607928276062, |
|
"rewards/margins": 4.658609867095947, |
|
"rewards/rejected": -3.8170018196105957, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.3964067278287464e-07, |
|
"logits/chosen": -3.0445570945739746, |
|
"logits/rejected": -3.0413312911987305, |
|
"logps/chosen": -337.9605407714844, |
|
"logps/rejected": -288.26666259765625, |
|
"loss": 0.1463, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.5894230604171753, |
|
"rewards/margins": 5.3310723304748535, |
|
"rewards/rejected": -3.7416489124298096, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.377293577981651e-07, |
|
"logits/chosen": -2.9471421241760254, |
|
"logits/rejected": -2.9865708351135254, |
|
"logps/chosen": -288.2189025878906, |
|
"logps/rejected": -309.2388610839844, |
|
"loss": 0.0934, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.4304211139678955, |
|
"rewards/margins": 5.832246780395508, |
|
"rewards/rejected": -4.401825428009033, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.3581804281345565e-07, |
|
"logits/chosen": -2.9803059101104736, |
|
"logits/rejected": -2.9711978435516357, |
|
"logps/chosen": -337.70697021484375, |
|
"logps/rejected": -298.4077453613281, |
|
"loss": 0.0967, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.4117494821548462, |
|
"rewards/margins": 5.474527359008789, |
|
"rewards/rejected": -4.062777519226074, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.339067278287462e-07, |
|
"logits/chosen": -2.8603241443634033, |
|
"logits/rejected": -2.8709046840667725, |
|
"logps/chosen": -312.73504638671875, |
|
"logps/rejected": -306.9026794433594, |
|
"loss": 0.0785, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.3839662075042725, |
|
"rewards/margins": 5.900813102722168, |
|
"rewards/rejected": -4.516847133636475, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.319954128440367e-07, |
|
"logits/chosen": -2.975525379180908, |
|
"logits/rejected": -2.9611260890960693, |
|
"logps/chosen": -325.86163330078125, |
|
"logps/rejected": -285.2755432128906, |
|
"loss": 0.0992, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.0602104663848877, |
|
"rewards/margins": 4.849926948547363, |
|
"rewards/rejected": -3.7897167205810547, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.3008409785932725e-07, |
|
"logits/chosen": -2.877586841583252, |
|
"logits/rejected": -2.821748971939087, |
|
"logps/chosen": -324.6281433105469, |
|
"logps/rejected": -323.02301025390625, |
|
"loss": 0.0892, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.1957648992538452, |
|
"rewards/margins": 5.952631950378418, |
|
"rewards/rejected": -4.756867408752441, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.2817278287461773e-07, |
|
"logits/chosen": -2.9451098442077637, |
|
"logits/rejected": -2.9684863090515137, |
|
"logps/chosen": -279.90216064453125, |
|
"logps/rejected": -338.3842468261719, |
|
"loss": 0.1045, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.2665460109710693, |
|
"rewards/margins": 5.084068298339844, |
|
"rewards/rejected": -3.8175220489501953, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.262614678899082e-07, |
|
"logits/chosen": -2.889819383621216, |
|
"logits/rejected": -2.9235751628875732, |
|
"logps/chosen": -303.02838134765625, |
|
"logps/rejected": -356.177734375, |
|
"loss": 0.1338, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.8252049684524536, |
|
"rewards/margins": 5.507418155670166, |
|
"rewards/rejected": -4.6822123527526855, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_logits/chosen": -2.9508416652679443, |
|
"eval_logits/rejected": -2.9459922313690186, |
|
"eval_logps/chosen": -368.0057678222656, |
|
"eval_logps/rejected": -313.3835144042969, |
|
"eval_loss": 0.534950315952301, |
|
"eval_rewards/accuracies": 0.773809552192688, |
|
"eval_rewards/chosen": -0.0813969075679779, |
|
"eval_rewards/margins": 2.0603599548339844, |
|
"eval_rewards/rejected": -2.141756772994995, |
|
"eval_runtime": 164.0736, |
|
"eval_samples_per_second": 12.19, |
|
"eval_steps_per_second": 0.384, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.2435015290519874e-07, |
|
"logits/chosen": -2.916611671447754, |
|
"logits/rejected": -2.927777051925659, |
|
"logps/chosen": -283.2217712402344, |
|
"logps/rejected": -297.02850341796875, |
|
"loss": 0.0893, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.0804212093353271, |
|
"rewards/margins": 5.549715518951416, |
|
"rewards/rejected": -4.469293594360352, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.2243883792048927e-07, |
|
"logits/chosen": -2.977875232696533, |
|
"logits/rejected": -2.986704111099243, |
|
"logps/chosen": -335.274658203125, |
|
"logps/rejected": -380.4412536621094, |
|
"loss": 0.1303, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.4366910457611084, |
|
"rewards/margins": 5.666425704956055, |
|
"rewards/rejected": -4.229735374450684, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.205275229357798e-07, |
|
"logits/chosen": -2.868638753890991, |
|
"logits/rejected": -2.8948395252227783, |
|
"logps/chosen": -387.9947204589844, |
|
"logps/rejected": -389.3511657714844, |
|
"loss": 0.1117, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.3278411626815796, |
|
"rewards/margins": 5.698910236358643, |
|
"rewards/rejected": -4.371068954467773, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.186162079510703e-07, |
|
"logits/chosen": -2.9128642082214355, |
|
"logits/rejected": -2.91692852973938, |
|
"logps/chosen": -351.5616149902344, |
|
"logps/rejected": -373.9852600097656, |
|
"loss": 0.1466, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.3745101690292358, |
|
"rewards/margins": 5.025930404663086, |
|
"rewards/rejected": -3.6514201164245605, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.167048929663608e-07, |
|
"logits/chosen": -2.955967426300049, |
|
"logits/rejected": -2.923954486846924, |
|
"logps/chosen": -278.7707824707031, |
|
"logps/rejected": -281.9942321777344, |
|
"loss": 0.1003, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 1.0149275064468384, |
|
"rewards/margins": 5.51505184173584, |
|
"rewards/rejected": -4.500124931335449, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.1479357798165134e-07, |
|
"logits/chosen": -2.9661002159118652, |
|
"logits/rejected": -2.948564052581787, |
|
"logps/chosen": -339.5476989746094, |
|
"logps/rejected": -321.3616638183594, |
|
"loss": 0.0984, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.038356065750122, |
|
"rewards/margins": 5.16934061050415, |
|
"rewards/rejected": -4.130984306335449, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.128822629969419e-07, |
|
"logits/chosen": -2.862750291824341, |
|
"logits/rejected": -2.8853306770324707, |
|
"logps/chosen": -350.9757995605469, |
|
"logps/rejected": -333.6067199707031, |
|
"loss": 0.1195, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.147101640701294, |
|
"rewards/margins": 5.550149917602539, |
|
"rewards/rejected": -4.403048038482666, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.109709480122324e-07, |
|
"logits/chosen": -2.9103734493255615, |
|
"logits/rejected": -2.9115426540374756, |
|
"logps/chosen": -286.4703063964844, |
|
"logps/rejected": -298.028076171875, |
|
"loss": 0.1039, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.1303044557571411, |
|
"rewards/margins": 5.467093467712402, |
|
"rewards/rejected": -4.336789131164551, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.0905963302752294e-07, |
|
"logits/chosen": -2.7934536933898926, |
|
"logits/rejected": -2.880432605743408, |
|
"logps/chosen": -312.3811340332031, |
|
"logps/rejected": -316.52215576171875, |
|
"loss": 0.1168, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.9917739629745483, |
|
"rewards/margins": 5.529503345489502, |
|
"rewards/rejected": -4.537729263305664, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.071483180428134e-07, |
|
"logits/chosen": -2.9836788177490234, |
|
"logits/rejected": -2.9340128898620605, |
|
"logps/chosen": -382.5011291503906, |
|
"logps/rejected": -295.3705749511719, |
|
"loss": 0.0979, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.5801069736480713, |
|
"rewards/margins": 5.381975173950195, |
|
"rewards/rejected": -3.801867723464966, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"eval_logits/chosen": -2.9200918674468994, |
|
"eval_logits/rejected": -2.9171833992004395, |
|
"eval_logps/chosen": -368.1370849609375, |
|
"eval_logps/rejected": -314.4656982421875, |
|
"eval_loss": 0.5474238991737366, |
|
"eval_rewards/accuracies": 0.7658730149269104, |
|
"eval_rewards/chosen": -0.09453116357326508, |
|
"eval_rewards/margins": 2.1554412841796875, |
|
"eval_rewards/rejected": -2.249972343444824, |
|
"eval_runtime": 164.7724, |
|
"eval_samples_per_second": 12.138, |
|
"eval_steps_per_second": 0.382, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.0523700305810395e-07, |
|
"logits/chosen": -2.9245269298553467, |
|
"logits/rejected": -2.9436841011047363, |
|
"logps/chosen": -330.574951171875, |
|
"logps/rejected": -342.8641662597656, |
|
"loss": 0.1043, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.3053131103515625, |
|
"rewards/margins": 5.542339324951172, |
|
"rewards/rejected": -4.237026214599609, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.033256880733945e-07, |
|
"logits/chosen": -2.9248242378234863, |
|
"logits/rejected": -2.935176372528076, |
|
"logps/chosen": -286.57171630859375, |
|
"logps/rejected": -278.96746826171875, |
|
"loss": 0.104, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.7352786660194397, |
|
"rewards/margins": 5.138430118560791, |
|
"rewards/rejected": -4.403151035308838, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.01414373088685e-07, |
|
"logits/chosen": -2.8516454696655273, |
|
"logits/rejected": -2.7985987663269043, |
|
"logps/chosen": -344.3554382324219, |
|
"logps/rejected": -341.85986328125, |
|
"loss": 0.1138, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.1177431344985962, |
|
"rewards/margins": 5.806307792663574, |
|
"rewards/rejected": -4.688565254211426, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.9950305810397555e-07, |
|
"logits/chosen": -2.906580686569214, |
|
"logits/rejected": -2.97481369972229, |
|
"logps/chosen": -335.2439880371094, |
|
"logps/rejected": -320.96929931640625, |
|
"loss": 0.1256, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.8055500984191895, |
|
"rewards/margins": 6.211544990539551, |
|
"rewards/rejected": -5.405994892120361, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.9759174311926603e-07, |
|
"logits/chosen": -2.9623026847839355, |
|
"logits/rejected": -2.9445879459381104, |
|
"logps/chosen": -323.3135070800781, |
|
"logps/rejected": -329.90496826171875, |
|
"loss": 0.1101, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.9566561579704285, |
|
"rewards/margins": 7.040016174316406, |
|
"rewards/rejected": -6.083359718322754, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.9568042813455656e-07, |
|
"logits/chosen": -2.8847999572753906, |
|
"logits/rejected": -2.8803889751434326, |
|
"logps/chosen": -342.5070495605469, |
|
"logps/rejected": -269.67431640625, |
|
"loss": 0.1087, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.4027617573738098, |
|
"rewards/margins": 4.224934101104736, |
|
"rewards/rejected": -3.8221726417541504, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.937691131498471e-07, |
|
"logits/chosen": -2.949441909790039, |
|
"logits/rejected": -2.9045028686523438, |
|
"logps/chosen": -338.4786376953125, |
|
"logps/rejected": -334.69189453125, |
|
"loss": 0.1096, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.9296592473983765, |
|
"rewards/margins": 5.884530067443848, |
|
"rewards/rejected": -4.954870700836182, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.918577981651376e-07, |
|
"logits/chosen": -2.932290554046631, |
|
"logits/rejected": -2.9427378177642822, |
|
"logps/chosen": -280.7291564941406, |
|
"logps/rejected": -314.51953125, |
|
"loss": 0.1177, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.7406275868415833, |
|
"rewards/margins": 5.4294962882995605, |
|
"rewards/rejected": -4.688868522644043, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.8994648318042816e-07, |
|
"logits/chosen": -2.9206976890563965, |
|
"logits/rejected": -2.9712460041046143, |
|
"logps/chosen": -358.7654113769531, |
|
"logps/rejected": -313.7131652832031, |
|
"loss": 0.112, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.8611236810684204, |
|
"rewards/margins": 5.0603203773498535, |
|
"rewards/rejected": -4.199196815490723, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.8803516819571863e-07, |
|
"logits/chosen": -2.979775905609131, |
|
"logits/rejected": -2.9886953830718994, |
|
"logps/chosen": -379.1478576660156, |
|
"logps/rejected": -358.28179931640625, |
|
"loss": 0.1366, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.4315125942230225, |
|
"rewards/margins": 6.2576189041137695, |
|
"rewards/rejected": -4.826106071472168, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_logits/chosen": -2.9143617153167725, |
|
"eval_logits/rejected": -2.9134304523468018, |
|
"eval_logps/chosen": -371.9402770996094, |
|
"eval_logps/rejected": -315.933837890625, |
|
"eval_loss": 0.5439518094062805, |
|
"eval_rewards/accuracies": 0.7579365372657776, |
|
"eval_rewards/chosen": -0.47485068440437317, |
|
"eval_rewards/margins": 1.921934962272644, |
|
"eval_rewards/rejected": -2.3967857360839844, |
|
"eval_runtime": 165.1605, |
|
"eval_samples_per_second": 12.109, |
|
"eval_steps_per_second": 0.381, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.8612385321100917e-07, |
|
"logits/chosen": -2.8306632041931152, |
|
"logits/rejected": -2.9071240425109863, |
|
"logps/chosen": -294.634033203125, |
|
"logps/rejected": -327.87896728515625, |
|
"loss": 0.1281, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.9478427171707153, |
|
"rewards/margins": 5.661940097808838, |
|
"rewards/rejected": -4.714097499847412, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.842125382262997e-07, |
|
"logits/chosen": -2.9503073692321777, |
|
"logits/rejected": -2.9379420280456543, |
|
"logps/chosen": -308.3216247558594, |
|
"logps/rejected": -308.57574462890625, |
|
"loss": 0.1361, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.2659790515899658, |
|
"rewards/margins": 5.583965301513672, |
|
"rewards/rejected": -4.317985534667969, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.8230122324159023e-07, |
|
"logits/chosen": -2.8611526489257812, |
|
"logits/rejected": -2.9008944034576416, |
|
"logps/chosen": -375.9707946777344, |
|
"logps/rejected": -374.29913330078125, |
|
"loss": 0.1194, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.3691354990005493, |
|
"rewards/margins": 6.1049299240112305, |
|
"rewards/rejected": -4.735795021057129, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.8038990825688076e-07, |
|
"logits/chosen": -2.994868516921997, |
|
"logits/rejected": -2.9603443145751953, |
|
"logps/chosen": -263.36474609375, |
|
"logps/rejected": -250.1201934814453, |
|
"loss": 0.1098, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.8430646657943726, |
|
"rewards/margins": 4.541081428527832, |
|
"rewards/rejected": -3.698017120361328, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.784785932721712e-07, |
|
"logits/chosen": -2.9225330352783203, |
|
"logits/rejected": -2.925787925720215, |
|
"logps/chosen": -319.07574462890625, |
|
"logps/rejected": -327.4895324707031, |
|
"loss": 0.1336, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.1656725406646729, |
|
"rewards/margins": 5.621832847595215, |
|
"rewards/rejected": -4.456160068511963, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.765672782874617e-07, |
|
"logits/chosen": -2.896669864654541, |
|
"logits/rejected": -2.895914316177368, |
|
"logps/chosen": -331.0616760253906, |
|
"logps/rejected": -286.6056213378906, |
|
"loss": 0.1367, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.3613170385360718, |
|
"rewards/margins": 5.62969970703125, |
|
"rewards/rejected": -4.268383026123047, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.7465596330275225e-07, |
|
"logits/chosen": -2.955178737640381, |
|
"logits/rejected": -2.9608724117279053, |
|
"logps/chosen": -350.20703125, |
|
"logps/rejected": -255.1401824951172, |
|
"loss": 0.104, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.233802080154419, |
|
"rewards/margins": 5.684638023376465, |
|
"rewards/rejected": -4.450836658477783, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.727446483180428e-07, |
|
"logits/chosen": -2.9283223152160645, |
|
"logits/rejected": -2.952641010284424, |
|
"logps/chosen": -313.20306396484375, |
|
"logps/rejected": -316.35333251953125, |
|
"loss": 0.1125, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.9838566780090332, |
|
"rewards/margins": 5.377806186676025, |
|
"rewards/rejected": -4.393948554992676, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.708333333333333e-07, |
|
"logits/chosen": -2.850559949874878, |
|
"logits/rejected": -2.833322048187256, |
|
"logps/chosen": -341.3831481933594, |
|
"logps/rejected": -314.9398498535156, |
|
"loss": 0.0943, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.7250410914421082, |
|
"rewards/margins": 5.533560276031494, |
|
"rewards/rejected": -4.808518409729004, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.6892201834862385e-07, |
|
"logits/chosen": -2.937903642654419, |
|
"logits/rejected": -2.9050183296203613, |
|
"logps/chosen": -326.52691650390625, |
|
"logps/rejected": -302.30694580078125, |
|
"loss": 0.1042, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.6643081903457642, |
|
"rewards/margins": 5.094948768615723, |
|
"rewards/rejected": -4.43064022064209, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_logits/chosen": -2.930583953857422, |
|
"eval_logits/rejected": -2.936053991317749, |
|
"eval_logps/chosen": -372.2054138183594, |
|
"eval_logps/rejected": -318.7686462402344, |
|
"eval_loss": 0.552377462387085, |
|
"eval_rewards/accuracies": 0.7698412537574768, |
|
"eval_rewards/chosen": -0.5013648867607117, |
|
"eval_rewards/margins": 2.178898334503174, |
|
"eval_rewards/rejected": -2.6802632808685303, |
|
"eval_runtime": 167.7329, |
|
"eval_samples_per_second": 11.924, |
|
"eval_steps_per_second": 0.376, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.6701070336391433e-07, |
|
"logits/chosen": -2.90950345993042, |
|
"logits/rejected": -2.87695050239563, |
|
"logps/chosen": -378.1885070800781, |
|
"logps/rejected": -322.77337646484375, |
|
"loss": 0.0924, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.4327830076217651, |
|
"rewards/margins": 5.619394779205322, |
|
"rewards/rejected": -4.186612129211426, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.6509938837920486e-07, |
|
"logits/chosen": -2.849907398223877, |
|
"logits/rejected": -2.8833765983581543, |
|
"logps/chosen": -289.51605224609375, |
|
"logps/rejected": -320.0068054199219, |
|
"loss": 0.0975, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.8192381858825684, |
|
"rewards/margins": 4.99267053604126, |
|
"rewards/rejected": -4.173432350158691, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.631880733944954e-07, |
|
"logits/chosen": -2.964118719100952, |
|
"logits/rejected": -2.984459400177002, |
|
"logps/chosen": -303.44866943359375, |
|
"logps/rejected": -299.94482421875, |
|
"loss": 0.105, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.37635737657546997, |
|
"rewards/margins": 4.797235488891602, |
|
"rewards/rejected": -4.420877933502197, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.612767584097859e-07, |
|
"logits/chosen": -2.9242002964019775, |
|
"logits/rejected": -2.9575366973876953, |
|
"logps/chosen": -308.75616455078125, |
|
"logps/rejected": -282.21380615234375, |
|
"loss": 0.1016, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.4492263793945312, |
|
"rewards/margins": 5.21218204498291, |
|
"rewards/rejected": -3.7629554271698, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.5936544342507646e-07, |
|
"logits/chosen": -2.902669668197632, |
|
"logits/rejected": -2.932953357696533, |
|
"logps/chosen": -306.1797790527344, |
|
"logps/rejected": -315.36700439453125, |
|
"loss": 0.1412, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.0359132289886475, |
|
"rewards/margins": 5.480694770812988, |
|
"rewards/rejected": -4.444781303405762, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.5745412844036693e-07, |
|
"logits/chosen": -2.862687110900879, |
|
"logits/rejected": -2.9322876930236816, |
|
"logps/chosen": -361.41583251953125, |
|
"logps/rejected": -309.0520935058594, |
|
"loss": 0.1228, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.3139979839324951, |
|
"rewards/margins": 5.389029026031494, |
|
"rewards/rejected": -4.07503080368042, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.5554281345565747e-07, |
|
"logits/chosen": -2.937886953353882, |
|
"logits/rejected": -2.9431166648864746, |
|
"logps/chosen": -312.0148620605469, |
|
"logps/rejected": -329.90863037109375, |
|
"loss": 0.1005, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.7804427146911621, |
|
"rewards/margins": 5.692571640014648, |
|
"rewards/rejected": -4.912128925323486, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.53631498470948e-07, |
|
"logits/chosen": -2.972658634185791, |
|
"logits/rejected": -2.9699690341949463, |
|
"logps/chosen": -347.6422424316406, |
|
"logps/rejected": -312.10858154296875, |
|
"loss": 0.0857, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.3729878664016724, |
|
"rewards/margins": 5.7400736808776855, |
|
"rewards/rejected": -4.367085933685303, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.5172018348623853e-07, |
|
"logits/chosen": -2.8885016441345215, |
|
"logits/rejected": -2.9170756340026855, |
|
"logps/chosen": -355.39813232421875, |
|
"logps/rejected": -336.2825927734375, |
|
"loss": 0.0852, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.1573688983917236, |
|
"rewards/margins": 5.676226615905762, |
|
"rewards/rejected": -4.518857479095459, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.4980886850152906e-07, |
|
"logits/chosen": -2.895519256591797, |
|
"logits/rejected": -2.851107120513916, |
|
"logps/chosen": -341.70904541015625, |
|
"logps/rejected": -305.86480712890625, |
|
"loss": 0.1313, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.0455483198165894, |
|
"rewards/margins": 5.084899425506592, |
|
"rewards/rejected": -4.039351463317871, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"eval_logits/chosen": -2.8998661041259766, |
|
"eval_logits/rejected": -2.9059910774230957, |
|
"eval_logps/chosen": -369.42547607421875, |
|
"eval_logps/rejected": -313.8333435058594, |
|
"eval_loss": 0.5333446264266968, |
|
"eval_rewards/accuracies": 0.75, |
|
"eval_rewards/chosen": -0.22337232530117035, |
|
"eval_rewards/margins": 1.9633642435073853, |
|
"eval_rewards/rejected": -2.1867363452911377, |
|
"eval_runtime": 167.9925, |
|
"eval_samples_per_second": 11.905, |
|
"eval_steps_per_second": 0.375, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.478975535168196e-07, |
|
"logits/chosen": -2.892448663711548, |
|
"logits/rejected": -2.8929343223571777, |
|
"logps/chosen": -341.67431640625, |
|
"logps/rejected": -287.61383056640625, |
|
"loss": 0.1044, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8163628578186035, |
|
"rewards/margins": 4.475451469421387, |
|
"rewards/rejected": -3.659088611602783, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.459862385321101e-07, |
|
"logits/chosen": -2.9265084266662598, |
|
"logits/rejected": -2.9548892974853516, |
|
"logps/chosen": -382.5340576171875, |
|
"logps/rejected": -347.4888916015625, |
|
"loss": 0.1018, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.6915686130523682, |
|
"rewards/margins": 6.067580223083496, |
|
"rewards/rejected": -4.376010894775391, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.440749235474006e-07, |
|
"logits/chosen": -2.8851680755615234, |
|
"logits/rejected": -2.903552532196045, |
|
"logps/chosen": -342.8496398925781, |
|
"logps/rejected": -317.72845458984375, |
|
"loss": 0.1005, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.0220366716384888, |
|
"rewards/margins": 5.634666442871094, |
|
"rewards/rejected": -4.6126298904418945, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.421636085626911e-07, |
|
"logits/chosen": -2.8142755031585693, |
|
"logits/rejected": -2.8399770259857178, |
|
"logps/chosen": -344.5146484375, |
|
"logps/rejected": -301.95928955078125, |
|
"loss": 0.0981, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.6624820232391357, |
|
"rewards/margins": 6.2719035148620605, |
|
"rewards/rejected": -4.6094207763671875, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.402522935779816e-07, |
|
"logits/chosen": -2.8734793663024902, |
|
"logits/rejected": -2.876209259033203, |
|
"logps/chosen": -339.1289367675781, |
|
"logps/rejected": -351.3002014160156, |
|
"loss": 0.1207, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.1800963878631592, |
|
"rewards/margins": 5.746790409088135, |
|
"rewards/rejected": -4.566694736480713, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.3834097859327215e-07, |
|
"logits/chosen": -2.922632932662964, |
|
"logits/rejected": -2.973679780960083, |
|
"logps/chosen": -285.7434997558594, |
|
"logps/rejected": -304.81536865234375, |
|
"loss": 0.1184, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.9965537786483765, |
|
"rewards/margins": 5.077877998352051, |
|
"rewards/rejected": -4.081325054168701, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.3642966360856268e-07, |
|
"logits/chosen": -2.9541144371032715, |
|
"logits/rejected": -2.929344654083252, |
|
"logps/chosen": -326.902587890625, |
|
"logps/rejected": -306.6372985839844, |
|
"loss": 0.0972, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.7346267700195312, |
|
"rewards/margins": 5.666862964630127, |
|
"rewards/rejected": -3.9322357177734375, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.345183486238532e-07, |
|
"logits/chosen": -2.857109785079956, |
|
"logits/rejected": -2.8801960945129395, |
|
"logps/chosen": -308.47369384765625, |
|
"logps/rejected": -375.78692626953125, |
|
"loss": 0.0899, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.088503122329712, |
|
"rewards/margins": 5.432967185974121, |
|
"rewards/rejected": -4.344464302062988, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.3260703363914372e-07, |
|
"logits/chosen": -2.9087703227996826, |
|
"logits/rejected": -2.9551265239715576, |
|
"logps/chosen": -381.01959228515625, |
|
"logps/rejected": -338.1856994628906, |
|
"loss": 0.105, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.9901115894317627, |
|
"rewards/margins": 5.308182716369629, |
|
"rewards/rejected": -3.318070888519287, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.3069571865443425e-07, |
|
"logits/chosen": -2.9682905673980713, |
|
"logits/rejected": -2.9819796085357666, |
|
"logps/chosen": -277.3031921386719, |
|
"logps/rejected": -325.71649169921875, |
|
"loss": 0.1629, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.5376863479614258, |
|
"rewards/margins": 5.757152557373047, |
|
"rewards/rejected": -5.219466209411621, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_logits/chosen": -2.909576654434204, |
|
"eval_logits/rejected": -2.9181904792785645, |
|
"eval_logps/chosen": -371.09588623046875, |
|
"eval_logps/rejected": -319.5571594238281, |
|
"eval_loss": 0.5655122399330139, |
|
"eval_rewards/accuracies": 0.75, |
|
"eval_rewards/chosen": -0.3904118835926056, |
|
"eval_rewards/margins": 2.3687071800231934, |
|
"eval_rewards/rejected": -2.7591187953948975, |
|
"eval_runtime": 164.0305, |
|
"eval_samples_per_second": 12.193, |
|
"eval_steps_per_second": 0.384, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.2878440366972476e-07, |
|
"logits/chosen": -2.9808902740478516, |
|
"logits/rejected": -2.9869067668914795, |
|
"logps/chosen": -326.5906677246094, |
|
"logps/rejected": -384.11944580078125, |
|
"loss": 0.0867, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.5069032311439514, |
|
"rewards/margins": 5.799986362457275, |
|
"rewards/rejected": -5.2930827140808105, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.268730886850153e-07, |
|
"logits/chosen": -2.8385868072509766, |
|
"logits/rejected": -2.9057114124298096, |
|
"logps/chosen": -325.4120178222656, |
|
"logps/rejected": -309.59136962890625, |
|
"loss": 0.0989, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.1605224609375, |
|
"rewards/margins": 5.591654300689697, |
|
"rewards/rejected": -4.431131362915039, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.249617737003058e-07, |
|
"logits/chosen": -2.8839237689971924, |
|
"logits/rejected": -2.8796088695526123, |
|
"logps/chosen": -352.14886474609375, |
|
"logps/rejected": -371.3978576660156, |
|
"loss": 0.1089, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.0480362176895142, |
|
"rewards/margins": 5.741724967956543, |
|
"rewards/rejected": -4.693687915802002, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.2305045871559633e-07, |
|
"logits/chosen": -2.883430242538452, |
|
"logits/rejected": -2.8605690002441406, |
|
"logps/chosen": -345.23272705078125, |
|
"logps/rejected": -331.52325439453125, |
|
"loss": 0.1311, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.180057168006897, |
|
"rewards/margins": 5.705449104309082, |
|
"rewards/rejected": -4.525391578674316, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.2113914373088686e-07, |
|
"logits/chosen": -2.962857484817505, |
|
"logits/rejected": -2.9696333408355713, |
|
"logps/chosen": -377.6351623535156, |
|
"logps/rejected": -362.8825378417969, |
|
"loss": 0.1067, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.8095831871032715, |
|
"rewards/margins": 5.408170700073242, |
|
"rewards/rejected": -4.598587512969971, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.1922782874617736e-07, |
|
"logits/chosen": -2.9029316902160645, |
|
"logits/rejected": -2.9539952278137207, |
|
"logps/chosen": -289.267822265625, |
|
"logps/rejected": -366.2077941894531, |
|
"loss": 0.0929, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.47237950563430786, |
|
"rewards/margins": 6.199611186981201, |
|
"rewards/rejected": -5.727231502532959, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.1731651376146787e-07, |
|
"logits/chosen": -2.8582608699798584, |
|
"logits/rejected": -2.8988289833068848, |
|
"logps/chosen": -277.98406982421875, |
|
"logps/rejected": -328.0066833496094, |
|
"loss": 0.1048, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.7530109286308289, |
|
"rewards/margins": 5.7497334480285645, |
|
"rewards/rejected": -4.99672269821167, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.154051987767584e-07, |
|
"logits/chosen": -2.9023542404174805, |
|
"logits/rejected": -2.9242827892303467, |
|
"logps/chosen": -314.0538330078125, |
|
"logps/rejected": -299.74420166015625, |
|
"loss": 0.12, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.7526110410690308, |
|
"rewards/margins": 5.213059425354004, |
|
"rewards/rejected": -4.460447311401367, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.134938837920489e-07, |
|
"logits/chosen": -2.8436591625213623, |
|
"logits/rejected": -2.8463809490203857, |
|
"logps/chosen": -255.3565673828125, |
|
"logps/rejected": -273.94464111328125, |
|
"loss": 0.0818, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.06976697593927383, |
|
"rewards/margins": 4.849638938903809, |
|
"rewards/rejected": -4.919405937194824, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.1158256880733944e-07, |
|
"logits/chosen": -2.9030632972717285, |
|
"logits/rejected": -2.8941729068756104, |
|
"logps/chosen": -367.6248779296875, |
|
"logps/rejected": -302.12249755859375, |
|
"loss": 0.0993, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.4400938153266907, |
|
"rewards/margins": 5.627293586730957, |
|
"rewards/rejected": -5.187199592590332, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_logits/chosen": -2.8477160930633545, |
|
"eval_logits/rejected": -2.8601999282836914, |
|
"eval_logps/chosen": -374.3083801269531, |
|
"eval_logps/rejected": -321.6667785644531, |
|
"eval_loss": 0.5605445504188538, |
|
"eval_rewards/accuracies": 0.7460317611694336, |
|
"eval_rewards/chosen": -0.711660623550415, |
|
"eval_rewards/margins": 2.2584221363067627, |
|
"eval_rewards/rejected": -2.9700827598571777, |
|
"eval_runtime": 164.7388, |
|
"eval_samples_per_second": 12.14, |
|
"eval_steps_per_second": 0.382, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.0967125382262994e-07, |
|
"logits/chosen": -2.8688273429870605, |
|
"logits/rejected": -2.868739366531372, |
|
"logps/chosen": -337.7546081542969, |
|
"logps/rejected": -312.27569580078125, |
|
"loss": 0.1163, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.7751233577728271, |
|
"rewards/margins": 5.873146057128906, |
|
"rewards/rejected": -5.098022937774658, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.0775993883792048e-07, |
|
"logits/chosen": -2.8374381065368652, |
|
"logits/rejected": -2.8085215091705322, |
|
"logps/chosen": -352.53192138671875, |
|
"logps/rejected": -316.3230895996094, |
|
"loss": 0.0933, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.9547752141952515, |
|
"rewards/margins": 5.203994274139404, |
|
"rewards/rejected": -4.249218940734863, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.05848623853211e-07, |
|
"logits/chosen": -2.874891757965088, |
|
"logits/rejected": -2.839573621749878, |
|
"logps/chosen": -366.4833679199219, |
|
"logps/rejected": -319.9959411621094, |
|
"loss": 0.0966, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.212294101715088, |
|
"rewards/margins": 5.914790630340576, |
|
"rewards/rejected": -4.702496528625488, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.0393730886850151e-07, |
|
"logits/chosen": -2.8277204036712646, |
|
"logits/rejected": -2.878105640411377, |
|
"logps/chosen": -378.3955383300781, |
|
"logps/rejected": -314.2088623046875, |
|
"loss": 0.0863, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.7747803926467896, |
|
"rewards/margins": 5.967954158782959, |
|
"rewards/rejected": -5.193174362182617, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.0202599388379205e-07, |
|
"logits/chosen": -2.8658251762390137, |
|
"logits/rejected": -2.8985071182250977, |
|
"logps/chosen": -339.0852355957031, |
|
"logps/rejected": -384.46112060546875, |
|
"loss": 0.0786, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.9408707618713379, |
|
"rewards/margins": 5.965841770172119, |
|
"rewards/rejected": -5.024971008300781, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 2.0011467889908258e-07, |
|
"logits/chosen": -2.829246997833252, |
|
"logits/rejected": -2.8732194900512695, |
|
"logps/chosen": -381.65655517578125, |
|
"logps/rejected": -284.0471496582031, |
|
"loss": 0.0945, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.5882245898246765, |
|
"rewards/margins": 5.461816787719727, |
|
"rewards/rejected": -4.873592376708984, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.9820336391437308e-07, |
|
"logits/chosen": -2.8398656845092773, |
|
"logits/rejected": -2.8620615005493164, |
|
"logps/chosen": -309.2004089355469, |
|
"logps/rejected": -296.1297302246094, |
|
"loss": 0.093, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.8088359832763672, |
|
"rewards/margins": 5.4316887855529785, |
|
"rewards/rejected": -4.6228532791137695, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.9629204892966362e-07, |
|
"logits/chosen": -2.941847324371338, |
|
"logits/rejected": -2.950911283493042, |
|
"logps/chosen": -329.76617431640625, |
|
"logps/rejected": -295.0538635253906, |
|
"loss": 0.1113, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.2022342681884766, |
|
"rewards/margins": 5.600251197814941, |
|
"rewards/rejected": -4.398016929626465, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.943807339449541e-07, |
|
"logits/chosen": -2.8697052001953125, |
|
"logits/rejected": -2.901094913482666, |
|
"logps/chosen": -311.559326171875, |
|
"logps/rejected": -333.4175720214844, |
|
"loss": 0.0948, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.7903985977172852, |
|
"rewards/margins": 5.597433090209961, |
|
"rewards/rejected": -4.807034015655518, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.9246941896024463e-07, |
|
"logits/chosen": -2.939120054244995, |
|
"logits/rejected": -2.9861233234405518, |
|
"logps/chosen": -320.5481262207031, |
|
"logps/rejected": -347.7875061035156, |
|
"loss": 0.1116, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.6647524833679199, |
|
"rewards/margins": 6.036587238311768, |
|
"rewards/rejected": -5.3718342781066895, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_logits/chosen": -2.914954900741577, |
|
"eval_logits/rejected": -2.927724599838257, |
|
"eval_logps/chosen": -373.57073974609375, |
|
"eval_logps/rejected": -319.2250061035156, |
|
"eval_loss": 0.5649252533912659, |
|
"eval_rewards/accuracies": 0.7539682388305664, |
|
"eval_rewards/chosen": -0.6378985047340393, |
|
"eval_rewards/margins": 2.088006019592285, |
|
"eval_rewards/rejected": -2.7259042263031006, |
|
"eval_runtime": 164.2377, |
|
"eval_samples_per_second": 12.177, |
|
"eval_steps_per_second": 0.384, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.9055810397553516e-07, |
|
"logits/chosen": -2.9238085746765137, |
|
"logits/rejected": -2.9308090209960938, |
|
"logps/chosen": -313.63665771484375, |
|
"logps/rejected": -304.2153625488281, |
|
"loss": 0.1214, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.6549821496009827, |
|
"rewards/margins": 4.487866401672363, |
|
"rewards/rejected": -3.8328843116760254, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.8864678899082566e-07, |
|
"logits/chosen": -2.8484818935394287, |
|
"logits/rejected": -2.866534948348999, |
|
"logps/chosen": -347.75689697265625, |
|
"logps/rejected": -279.4710693359375, |
|
"loss": 0.1082, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.3496394753456116, |
|
"rewards/margins": 4.312170505523682, |
|
"rewards/rejected": -3.9625308513641357, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.867354740061162e-07, |
|
"logits/chosen": -2.9492716789245605, |
|
"logits/rejected": -2.956796169281006, |
|
"logps/chosen": -307.85845947265625, |
|
"logps/rejected": -332.1622619628906, |
|
"loss": 0.1061, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.4475575387477875, |
|
"rewards/margins": 5.942025184631348, |
|
"rewards/rejected": -5.494467735290527, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.8482415902140673e-07, |
|
"logits/chosen": -2.923053503036499, |
|
"logits/rejected": -2.920959949493408, |
|
"logps/chosen": -331.311767578125, |
|
"logps/rejected": -320.19586181640625, |
|
"loss": 0.0801, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.0848143100738525, |
|
"rewards/margins": 5.605216979980469, |
|
"rewards/rejected": -4.520401954650879, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.8291284403669723e-07, |
|
"logits/chosen": -2.881058692932129, |
|
"logits/rejected": -2.93363618850708, |
|
"logps/chosen": -309.11212158203125, |
|
"logps/rejected": -322.50665283203125, |
|
"loss": 0.0254, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.7099040746688843, |
|
"rewards/margins": 6.735787868499756, |
|
"rewards/rejected": -5.025883674621582, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.8100152905198777e-07, |
|
"logits/chosen": -2.7668607234954834, |
|
"logits/rejected": -2.7822773456573486, |
|
"logps/chosen": -351.9031677246094, |
|
"logps/rejected": -415.9180603027344, |
|
"loss": 0.0185, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.090543508529663, |
|
"rewards/margins": 7.8234100341796875, |
|
"rewards/rejected": -6.7328667640686035, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.7909021406727827e-07, |
|
"logits/chosen": -2.9600331783294678, |
|
"logits/rejected": -2.8843834400177, |
|
"logps/chosen": -309.39642333984375, |
|
"logps/rejected": -306.4966735839844, |
|
"loss": 0.0282, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.9620206952095032, |
|
"rewards/margins": 6.899697303771973, |
|
"rewards/rejected": -5.937676429748535, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.771788990825688e-07, |
|
"logits/chosen": -2.923687696456909, |
|
"logits/rejected": -2.9661598205566406, |
|
"logps/chosen": -330.7653503417969, |
|
"logps/rejected": -352.5653076171875, |
|
"loss": 0.0243, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.7405881881713867, |
|
"rewards/margins": 8.00406265258789, |
|
"rewards/rejected": -6.263474941253662, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.7526758409785934e-07, |
|
"logits/chosen": -2.9299581050872803, |
|
"logits/rejected": -2.8949360847473145, |
|
"logps/chosen": -362.6274719238281, |
|
"logps/rejected": -363.09149169921875, |
|
"loss": 0.0148, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0967247486114502, |
|
"rewards/margins": 7.466977119445801, |
|
"rewards/rejected": -6.37025260925293, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.7335626911314984e-07, |
|
"logits/chosen": -2.845986843109131, |
|
"logits/rejected": -2.8671188354492188, |
|
"logps/chosen": -274.60870361328125, |
|
"logps/rejected": -295.59478759765625, |
|
"loss": 0.0193, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.014614641666412354, |
|
"rewards/margins": 6.743406772613525, |
|
"rewards/rejected": -6.758021354675293, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"eval_logits/chosen": -2.882474422454834, |
|
"eval_logits/rejected": -2.8919453620910645, |
|
"eval_logps/chosen": -376.60406494140625, |
|
"eval_logps/rejected": -329.82745361328125, |
|
"eval_loss": 0.6121558547019958, |
|
"eval_rewards/accuracies": 0.761904776096344, |
|
"eval_rewards/chosen": -0.941230058670044, |
|
"eval_rewards/margins": 2.8449153900146484, |
|
"eval_rewards/rejected": -3.7861454486846924, |
|
"eval_runtime": 164.9655, |
|
"eval_samples_per_second": 12.124, |
|
"eval_steps_per_second": 0.382, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.7144495412844037e-07, |
|
"logits/chosen": -2.9446756839752197, |
|
"logits/rejected": -2.953831911087036, |
|
"logps/chosen": -353.67376708984375, |
|
"logps/rejected": -347.7017822265625, |
|
"loss": 0.0214, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.4430414140224457, |
|
"rewards/margins": 7.570870399475098, |
|
"rewards/rejected": -7.127829074859619, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.6953363914373088e-07, |
|
"logits/chosen": -2.940734386444092, |
|
"logits/rejected": -2.9746463298797607, |
|
"logps/chosen": -348.05328369140625, |
|
"logps/rejected": -333.2148742675781, |
|
"loss": 0.012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2261013984680176, |
|
"rewards/margins": 8.234363555908203, |
|
"rewards/rejected": -7.008261680603027, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.6762232415902138e-07, |
|
"logits/chosen": -2.875319719314575, |
|
"logits/rejected": -2.855180263519287, |
|
"logps/chosen": -306.70050048828125, |
|
"logps/rejected": -349.5177917480469, |
|
"loss": 0.0203, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.3287349343299866, |
|
"rewards/margins": 7.007230281829834, |
|
"rewards/rejected": -6.678494930267334, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.6571100917431192e-07, |
|
"logits/chosen": -2.9315755367279053, |
|
"logits/rejected": -2.930187702178955, |
|
"logps/chosen": -306.041259765625, |
|
"logps/rejected": -305.6824951171875, |
|
"loss": 0.0151, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5696347951889038, |
|
"rewards/margins": 7.535808563232422, |
|
"rewards/rejected": -6.9661736488342285, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.6379969418960242e-07, |
|
"logits/chosen": -2.8848538398742676, |
|
"logits/rejected": -2.905867338180542, |
|
"logps/chosen": -389.7286682128906, |
|
"logps/rejected": -386.9409484863281, |
|
"loss": 0.0148, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.3652772903442383, |
|
"rewards/margins": 8.069811820983887, |
|
"rewards/rejected": -6.704535484313965, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.6188837920489295e-07, |
|
"logits/chosen": -2.8828773498535156, |
|
"logits/rejected": -2.8783280849456787, |
|
"logps/chosen": -359.57666015625, |
|
"logps/rejected": -339.35345458984375, |
|
"loss": 0.0154, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8870366811752319, |
|
"rewards/margins": 8.021839141845703, |
|
"rewards/rejected": -7.13480281829834, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.5997706422018349e-07, |
|
"logits/chosen": -2.88275408744812, |
|
"logits/rejected": -2.929903984069824, |
|
"logps/chosen": -322.4759216308594, |
|
"logps/rejected": -423.30682373046875, |
|
"loss": 0.0171, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.5736116170883179, |
|
"rewards/margins": 8.667892456054688, |
|
"rewards/rejected": -7.0942816734313965, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.58065749235474e-07, |
|
"logits/chosen": -2.90217661857605, |
|
"logits/rejected": -2.8910233974456787, |
|
"logps/chosen": -413.54522705078125, |
|
"logps/rejected": -385.43341064453125, |
|
"loss": 0.0127, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.9447764158248901, |
|
"rewards/margins": 7.688788414001465, |
|
"rewards/rejected": -6.744012355804443, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.5615443425076452e-07, |
|
"logits/chosen": -2.7480947971343994, |
|
"logits/rejected": -2.747185230255127, |
|
"logps/chosen": -354.87493896484375, |
|
"logps/rejected": -351.3457946777344, |
|
"loss": 0.0178, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.513919472694397, |
|
"rewards/margins": 8.175249099731445, |
|
"rewards/rejected": -7.661329746246338, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.5424311926605506e-07, |
|
"logits/chosen": -2.8367042541503906, |
|
"logits/rejected": -2.8408215045928955, |
|
"logps/chosen": -298.4134826660156, |
|
"logps/rejected": -300.4717712402344, |
|
"loss": 0.0175, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.2804573178291321, |
|
"rewards/margins": 7.449028968811035, |
|
"rewards/rejected": -7.729485511779785, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"eval_logits/chosen": -2.839301586151123, |
|
"eval_logits/rejected": -2.8474462032318115, |
|
"eval_logps/chosen": -383.2186279296875, |
|
"eval_logps/rejected": -338.7977294921875, |
|
"eval_loss": 0.6523212790489197, |
|
"eval_rewards/accuracies": 0.7658730149269104, |
|
"eval_rewards/chosen": -1.6026798486709595, |
|
"eval_rewards/margins": 3.080495834350586, |
|
"eval_rewards/rejected": -4.683175563812256, |
|
"eval_runtime": 165.5125, |
|
"eval_samples_per_second": 12.084, |
|
"eval_steps_per_second": 0.381, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.5233180428134556e-07, |
|
"logits/chosen": -2.9054439067840576, |
|
"logits/rejected": -2.913278102874756, |
|
"logps/chosen": -323.6388244628906, |
|
"logps/rejected": -323.73419189453125, |
|
"loss": 0.0138, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.8671594858169556, |
|
"rewards/margins": 7.387481689453125, |
|
"rewards/rejected": -6.520320892333984, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.504204892966361e-07, |
|
"logits/chosen": -2.800830364227295, |
|
"logits/rejected": -2.8197312355041504, |
|
"logps/chosen": -359.3259582519531, |
|
"logps/rejected": -394.8112487792969, |
|
"loss": 0.0173, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4949137568473816, |
|
"rewards/margins": 9.693056106567383, |
|
"rewards/rejected": -9.198141098022461, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.485091743119266e-07, |
|
"logits/chosen": -2.890476942062378, |
|
"logits/rejected": -2.925356388092041, |
|
"logps/chosen": -315.17742919921875, |
|
"logps/rejected": -378.8518371582031, |
|
"loss": 0.0188, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.27211472392082214, |
|
"rewards/margins": 8.301239967346191, |
|
"rewards/rejected": -8.02912425994873, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.465978593272171e-07, |
|
"logits/chosen": -2.820862054824829, |
|
"logits/rejected": -2.8192131519317627, |
|
"logps/chosen": -233.12344360351562, |
|
"logps/rejected": -238.68014526367188, |
|
"loss": 0.0187, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.23832440376281738, |
|
"rewards/margins": 7.234049320220947, |
|
"rewards/rejected": -7.472373008728027, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.4468654434250764e-07, |
|
"logits/chosen": -2.7812180519104004, |
|
"logits/rejected": -2.839566946029663, |
|
"logps/chosen": -400.56396484375, |
|
"logps/rejected": -418.9078063964844, |
|
"loss": 0.0183, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.306671380996704, |
|
"rewards/margins": 9.309846878051758, |
|
"rewards/rejected": -8.003175735473633, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.4277522935779814e-07, |
|
"logits/chosen": -2.857119083404541, |
|
"logits/rejected": -2.8069121837615967, |
|
"logps/chosen": -346.87091064453125, |
|
"logps/rejected": -364.4837341308594, |
|
"loss": 0.0185, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.42278409004211426, |
|
"rewards/margins": 7.95212459564209, |
|
"rewards/rejected": -7.5293402671813965, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.4086391437308867e-07, |
|
"logits/chosen": -2.828322172164917, |
|
"logits/rejected": -2.872556209564209, |
|
"logps/chosen": -371.8916015625, |
|
"logps/rejected": -404.73162841796875, |
|
"loss": 0.0183, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.5918793082237244, |
|
"rewards/margins": 8.069284439086914, |
|
"rewards/rejected": -7.477405548095703, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.389525993883792e-07, |
|
"logits/chosen": -2.8225388526916504, |
|
"logits/rejected": -2.8491692543029785, |
|
"logps/chosen": -293.30047607421875, |
|
"logps/rejected": -313.1904296875, |
|
"loss": 0.018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3963487148284912, |
|
"rewards/margins": 7.192727565765381, |
|
"rewards/rejected": -7.589076042175293, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.370412844036697e-07, |
|
"logits/chosen": -2.8727283477783203, |
|
"logits/rejected": -2.850238561630249, |
|
"logps/chosen": -306.93695068359375, |
|
"logps/rejected": -345.2283020019531, |
|
"loss": 0.0144, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.09160284698009491, |
|
"rewards/margins": 7.823256492614746, |
|
"rewards/rejected": -7.731653690338135, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.3512996941896024e-07, |
|
"logits/chosen": -2.8500583171844482, |
|
"logits/rejected": -2.8594961166381836, |
|
"logps/chosen": -283.15771484375, |
|
"logps/rejected": -311.3097839355469, |
|
"loss": 0.0131, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.03272407129406929, |
|
"rewards/margins": 8.4783353805542, |
|
"rewards/rejected": -8.445611953735352, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"eval_logits/chosen": -2.806851387023926, |
|
"eval_logits/rejected": -2.812812328338623, |
|
"eval_logps/chosen": -386.09039306640625, |
|
"eval_logps/rejected": -342.2704162597656, |
|
"eval_loss": 0.6702452898025513, |
|
"eval_rewards/accuracies": 0.7420634627342224, |
|
"eval_rewards/chosen": -1.8898613452911377, |
|
"eval_rewards/margins": 3.1405844688415527, |
|
"eval_rewards/rejected": -5.0304460525512695, |
|
"eval_runtime": 165.1336, |
|
"eval_samples_per_second": 12.111, |
|
"eval_steps_per_second": 0.382, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.3321865443425075e-07, |
|
"logits/chosen": -2.7931952476501465, |
|
"logits/rejected": -2.8073198795318604, |
|
"logps/chosen": -338.2393493652344, |
|
"logps/rejected": -352.142333984375, |
|
"loss": 0.015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.13923540711402893, |
|
"rewards/margins": 8.090972900390625, |
|
"rewards/rejected": -8.230208396911621, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.3130733944954128e-07, |
|
"logits/chosen": -2.74983811378479, |
|
"logits/rejected": -2.748617649078369, |
|
"logps/chosen": -358.42401123046875, |
|
"logps/rejected": -402.30328369140625, |
|
"loss": 0.0129, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.6026802062988281, |
|
"rewards/margins": 9.088810920715332, |
|
"rewards/rejected": -7.4861297607421875, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.293960244648318e-07, |
|
"logits/chosen": -2.8457603454589844, |
|
"logits/rejected": -2.8344614505767822, |
|
"logps/chosen": -365.7544860839844, |
|
"logps/rejected": -347.2682189941406, |
|
"loss": 0.0163, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.001628613448701799, |
|
"rewards/margins": 8.202213287353516, |
|
"rewards/rejected": -8.203841209411621, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.2748470948012232e-07, |
|
"logits/chosen": -2.8093724250793457, |
|
"logits/rejected": -2.81803822517395, |
|
"logps/chosen": -340.55352783203125, |
|
"logps/rejected": -407.7304992675781, |
|
"loss": 0.0119, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.47526517510414124, |
|
"rewards/margins": 8.480849266052246, |
|
"rewards/rejected": -8.005583763122559, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.2557339449541285e-07, |
|
"logits/chosen": -2.8672242164611816, |
|
"logits/rejected": -2.855675220489502, |
|
"logps/chosen": -343.7786865234375, |
|
"logps/rejected": -365.4543151855469, |
|
"loss": 0.0212, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.35550713539123535, |
|
"rewards/margins": 8.239429473876953, |
|
"rewards/rejected": -7.8839240074157715, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.2366207951070336e-07, |
|
"logits/chosen": -2.8647074699401855, |
|
"logits/rejected": -2.8598999977111816, |
|
"logps/chosen": -335.4911193847656, |
|
"logps/rejected": -369.7025146484375, |
|
"loss": 0.0148, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.1035120040178299, |
|
"rewards/margins": 8.297709465026855, |
|
"rewards/rejected": -8.401222229003906, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.217507645259939e-07, |
|
"logits/chosen": -2.8038744926452637, |
|
"logits/rejected": -2.8534445762634277, |
|
"logps/chosen": -327.49005126953125, |
|
"logps/rejected": -348.63116455078125, |
|
"loss": 0.0103, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.261627733707428, |
|
"rewards/margins": 8.056116104125977, |
|
"rewards/rejected": -7.794488430023193, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.198394495412844e-07, |
|
"logits/chosen": -2.879183053970337, |
|
"logits/rejected": -2.9233028888702393, |
|
"logps/chosen": -337.91790771484375, |
|
"logps/rejected": -346.1882019042969, |
|
"loss": 0.0176, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.12253670394420624, |
|
"rewards/margins": 7.606545925140381, |
|
"rewards/rejected": -7.4840087890625, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.1792813455657493e-07, |
|
"logits/chosen": -2.7606253623962402, |
|
"logits/rejected": -2.8114898204803467, |
|
"logps/chosen": -337.6861877441406, |
|
"logps/rejected": -345.7854309082031, |
|
"loss": 0.0258, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.3526005446910858, |
|
"rewards/margins": 8.258612632751465, |
|
"rewards/rejected": -7.906012058258057, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.1601681957186543e-07, |
|
"logits/chosen": -2.8984854221343994, |
|
"logits/rejected": -2.912468910217285, |
|
"logps/chosen": -334.9092102050781, |
|
"logps/rejected": -334.67669677734375, |
|
"loss": 0.0243, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7173303961753845, |
|
"rewards/margins": 7.802558898925781, |
|
"rewards/rejected": -7.085227966308594, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"eval_logits/chosen": -2.8489737510681152, |
|
"eval_logits/rejected": -2.854724407196045, |
|
"eval_logps/chosen": -383.90655517578125, |
|
"eval_logps/rejected": -339.3347473144531, |
|
"eval_loss": 0.6559089422225952, |
|
"eval_rewards/accuracies": 0.7698412537574768, |
|
"eval_rewards/chosen": -1.6714773178100586, |
|
"eval_rewards/margins": 3.0653984546661377, |
|
"eval_rewards/rejected": -4.736875534057617, |
|
"eval_runtime": 164.8339, |
|
"eval_samples_per_second": 12.133, |
|
"eval_steps_per_second": 0.382, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.1410550458715595e-07, |
|
"logits/chosen": -2.8347411155700684, |
|
"logits/rejected": -2.851090908050537, |
|
"logps/chosen": -329.1361999511719, |
|
"logps/rejected": -359.9030456542969, |
|
"loss": 0.0278, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.2210700958967209, |
|
"rewards/margins": 7.436942100524902, |
|
"rewards/rejected": -7.215872287750244, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.1219418960244648e-07, |
|
"logits/chosen": -2.8228423595428467, |
|
"logits/rejected": -2.841404438018799, |
|
"logps/chosen": -282.3636169433594, |
|
"logps/rejected": -409.47979736328125, |
|
"loss": 0.0135, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2855607867240906, |
|
"rewards/margins": 8.16025161743164, |
|
"rewards/rejected": -8.445813179016113, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.10282874617737e-07, |
|
"logits/chosen": -2.8471851348876953, |
|
"logits/rejected": -2.8798093795776367, |
|
"logps/chosen": -295.41900634765625, |
|
"logps/rejected": -340.5544738769531, |
|
"loss": 0.0184, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3259337544441223, |
|
"rewards/margins": 8.084188461303711, |
|
"rewards/rejected": -8.410122871398926, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.0837155963302752e-07, |
|
"logits/chosen": -2.636784076690674, |
|
"logits/rejected": -2.740302562713623, |
|
"logps/chosen": -287.13702392578125, |
|
"logps/rejected": -391.1552429199219, |
|
"loss": 0.0185, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.528149425983429, |
|
"rewards/margins": 9.060527801513672, |
|
"rewards/rejected": -8.532378196716309, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.0646024464831804e-07, |
|
"logits/chosen": -2.753213882446289, |
|
"logits/rejected": -2.822252035140991, |
|
"logps/chosen": -369.473388671875, |
|
"logps/rejected": -360.38983154296875, |
|
"loss": 0.0221, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.3261668086051941, |
|
"rewards/margins": 8.502967834472656, |
|
"rewards/rejected": -8.829133033752441, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.0454892966360856e-07, |
|
"logits/chosen": -2.782691717147827, |
|
"logits/rejected": -2.868027448654175, |
|
"logps/chosen": -333.0803527832031, |
|
"logps/rejected": -355.0961608886719, |
|
"loss": 0.0144, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5834169983863831, |
|
"rewards/margins": 8.312009811401367, |
|
"rewards/rejected": -8.895425796508789, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.0263761467889908e-07, |
|
"logits/chosen": -2.770711898803711, |
|
"logits/rejected": -2.796137809753418, |
|
"logps/chosen": -336.739990234375, |
|
"logps/rejected": -372.0965576171875, |
|
"loss": 0.0151, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.35953274369239807, |
|
"rewards/margins": 8.809865951538086, |
|
"rewards/rejected": -8.450332641601562, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.007262996941896e-07, |
|
"logits/chosen": -2.7547390460968018, |
|
"logits/rejected": -2.7793593406677246, |
|
"logps/chosen": -335.936279296875, |
|
"logps/rejected": -330.6647033691406, |
|
"loss": 0.0167, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.02794322930276394, |
|
"rewards/margins": 8.719170570373535, |
|
"rewards/rejected": -8.747113227844238, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 9.881498470948011e-08, |
|
"logits/chosen": -2.846524477005005, |
|
"logits/rejected": -2.799567222595215, |
|
"logps/chosen": -343.198486328125, |
|
"logps/rejected": -335.6533508300781, |
|
"loss": 0.0136, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.013063406571745872, |
|
"rewards/margins": 8.952108383178711, |
|
"rewards/rejected": -8.965171813964844, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 9.690366972477065e-08, |
|
"logits/chosen": -2.85577392578125, |
|
"logits/rejected": -2.8093135356903076, |
|
"logps/chosen": -333.2208251953125, |
|
"logps/rejected": -358.0810241699219, |
|
"loss": 0.0142, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.41089487075805664, |
|
"rewards/margins": 8.724878311157227, |
|
"rewards/rejected": -9.135773658752441, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_logits/chosen": -2.835172653198242, |
|
"eval_logits/rejected": -2.839359760284424, |
|
"eval_logps/chosen": -386.6546936035156, |
|
"eval_logps/rejected": -343.19000244140625, |
|
"eval_loss": 0.6733575463294983, |
|
"eval_rewards/accuracies": 0.7579365372657776, |
|
"eval_rewards/chosen": -1.946290373802185, |
|
"eval_rewards/margins": 3.1761116981506348, |
|
"eval_rewards/rejected": -5.122402191162109, |
|
"eval_runtime": 165.3843, |
|
"eval_samples_per_second": 12.093, |
|
"eval_steps_per_second": 0.381, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 9.499235474006116e-08, |
|
"logits/chosen": -2.846043109893799, |
|
"logits/rejected": -2.8555102348327637, |
|
"logps/chosen": -376.3670349121094, |
|
"logps/rejected": -341.2032470703125, |
|
"loss": 0.0246, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.03427610173821449, |
|
"rewards/margins": 8.654411315917969, |
|
"rewards/rejected": -8.688688278198242, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 9.308103975535168e-08, |
|
"logits/chosen": -2.8411316871643066, |
|
"logits/rejected": -2.8570432662963867, |
|
"logps/chosen": -373.59844970703125, |
|
"logps/rejected": -401.067138671875, |
|
"loss": 0.0176, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0549451112747192, |
|
"rewards/margins": 8.842530250549316, |
|
"rewards/rejected": -7.787585258483887, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 9.116972477064219e-08, |
|
"logits/chosen": -2.895292282104492, |
|
"logits/rejected": -2.854443073272705, |
|
"logps/chosen": -345.359375, |
|
"logps/rejected": -408.4029846191406, |
|
"loss": 0.0169, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.30679136514663696, |
|
"rewards/margins": 7.920645713806152, |
|
"rewards/rejected": -8.227437019348145, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 8.925840978593272e-08, |
|
"logits/chosen": -2.835501194000244, |
|
"logits/rejected": -2.896915912628174, |
|
"logps/chosen": -264.5487365722656, |
|
"logps/rejected": -387.1824951171875, |
|
"loss": 0.019, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.3389735221862793, |
|
"rewards/margins": 9.347002029418945, |
|
"rewards/rejected": -9.008028030395508, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 8.734709480122324e-08, |
|
"logits/chosen": -2.806790828704834, |
|
"logits/rejected": -2.8148555755615234, |
|
"logps/chosen": -308.4158630371094, |
|
"logps/rejected": -376.0751953125, |
|
"loss": 0.0166, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.38033682107925415, |
|
"rewards/margins": 8.450287818908691, |
|
"rewards/rejected": -8.8306245803833, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 8.543577981651376e-08, |
|
"logits/chosen": -2.7967381477355957, |
|
"logits/rejected": -2.792023181915283, |
|
"logps/chosen": -455.0721740722656, |
|
"logps/rejected": -405.89501953125, |
|
"loss": 0.0156, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7061554789543152, |
|
"rewards/margins": 9.382705688476562, |
|
"rewards/rejected": -8.676549911499023, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 8.352446483180428e-08, |
|
"logits/chosen": -2.8607754707336426, |
|
"logits/rejected": -2.8268520832061768, |
|
"logps/chosen": -331.96820068359375, |
|
"logps/rejected": -321.39422607421875, |
|
"loss": 0.0236, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4299864172935486, |
|
"rewards/margins": 8.559895515441895, |
|
"rewards/rejected": -8.129908561706543, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 8.161314984709481e-08, |
|
"logits/chosen": -2.8822827339172363, |
|
"logits/rejected": -2.893578052520752, |
|
"logps/chosen": -339.42449951171875, |
|
"logps/rejected": -356.1263427734375, |
|
"loss": 0.0083, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.2752775549888611, |
|
"rewards/margins": 8.704290390014648, |
|
"rewards/rejected": -8.4290132522583, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.970183486238531e-08, |
|
"logits/chosen": -2.828721284866333, |
|
"logits/rejected": -2.833087205886841, |
|
"logps/chosen": -328.60418701171875, |
|
"logps/rejected": -360.6470642089844, |
|
"loss": 0.0176, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.30725544691085815, |
|
"rewards/margins": 8.530462265014648, |
|
"rewards/rejected": -8.223207473754883, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 7.779051987767583e-08, |
|
"logits/chosen": -2.8711142539978027, |
|
"logits/rejected": -2.892519950866699, |
|
"logps/chosen": -370.29339599609375, |
|
"logps/rejected": -355.296875, |
|
"loss": 0.0211, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0712406188249588, |
|
"rewards/margins": 8.155640602111816, |
|
"rewards/rejected": -8.084399223327637, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"eval_logits/chosen": -2.8333258628845215, |
|
"eval_logits/rejected": -2.8368897438049316, |
|
"eval_logps/chosen": -388.3058776855469, |
|
"eval_logps/rejected": -347.57440185546875, |
|
"eval_loss": 0.6890397667884827, |
|
"eval_rewards/accuracies": 0.7698412537574768, |
|
"eval_rewards/chosen": -2.1114044189453125, |
|
"eval_rewards/margins": 3.4494407176971436, |
|
"eval_rewards/rejected": -5.560845375061035, |
|
"eval_runtime": 164.7492, |
|
"eval_samples_per_second": 12.14, |
|
"eval_steps_per_second": 0.382, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 7.587920489296635e-08, |
|
"logits/chosen": -2.855881690979004, |
|
"logits/rejected": -2.8854427337646484, |
|
"logps/chosen": -351.69769287109375, |
|
"logps/rejected": -358.4553527832031, |
|
"loss": 0.0164, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.2860868573188782, |
|
"rewards/margins": 8.237478256225586, |
|
"rewards/rejected": -8.523565292358398, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 7.396788990825688e-08, |
|
"logits/chosen": -2.8813681602478027, |
|
"logits/rejected": -2.9079108238220215, |
|
"logps/chosen": -322.7754821777344, |
|
"logps/rejected": -327.5832824707031, |
|
"loss": 0.0137, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.06775089353322983, |
|
"rewards/margins": 8.004460334777832, |
|
"rewards/rejected": -8.072211265563965, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 7.20565749235474e-08, |
|
"logits/chosen": -2.810084819793701, |
|
"logits/rejected": -2.815389394760132, |
|
"logps/chosen": -325.9468688964844, |
|
"logps/rejected": -330.6631164550781, |
|
"loss": 0.0133, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.12425418943166733, |
|
"rewards/margins": 8.769124984741211, |
|
"rewards/rejected": -8.893379211425781, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 7.014525993883792e-08, |
|
"logits/chosen": -2.7919540405273438, |
|
"logits/rejected": -2.7934675216674805, |
|
"logps/chosen": -353.1927185058594, |
|
"logps/rejected": -365.3847351074219, |
|
"loss": 0.0154, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2072305679321289, |
|
"rewards/margins": 8.390886306762695, |
|
"rewards/rejected": -8.18365478515625, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.823394495412843e-08, |
|
"logits/chosen": -2.7678780555725098, |
|
"logits/rejected": -2.765697479248047, |
|
"logps/chosen": -358.8880615234375, |
|
"logps/rejected": -376.55706787109375, |
|
"loss": 0.02, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.11311036348342896, |
|
"rewards/margins": 9.944357872009277, |
|
"rewards/rejected": -9.83124828338623, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 6.632262996941895e-08, |
|
"logits/chosen": -2.834345817565918, |
|
"logits/rejected": -2.7858288288116455, |
|
"logps/chosen": -336.33648681640625, |
|
"logps/rejected": -366.84991455078125, |
|
"loss": 0.0164, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.12319626659154892, |
|
"rewards/margins": 9.35567569732666, |
|
"rewards/rejected": -9.478872299194336, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 6.441131498470948e-08, |
|
"logits/chosen": -2.847996950149536, |
|
"logits/rejected": -2.863615036010742, |
|
"logps/chosen": -304.58502197265625, |
|
"logps/rejected": -352.5277404785156, |
|
"loss": 0.0135, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.37131237983703613, |
|
"rewards/margins": 8.316202163696289, |
|
"rewards/rejected": -8.687514305114746, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 6.25e-08, |
|
"logits/chosen": -2.8685457706451416, |
|
"logits/rejected": -2.876739501953125, |
|
"logps/chosen": -394.1883850097656, |
|
"logps/rejected": -382.19287109375, |
|
"loss": 0.0135, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5528956055641174, |
|
"rewards/margins": 9.068865776062012, |
|
"rewards/rejected": -8.515970230102539, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 6.058868501529052e-08, |
|
"logits/chosen": -2.9075653553009033, |
|
"logits/rejected": -2.8715763092041016, |
|
"logps/chosen": -366.0291442871094, |
|
"logps/rejected": -358.59381103515625, |
|
"loss": 0.0202, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.14853370189666748, |
|
"rewards/margins": 8.612794876098633, |
|
"rewards/rejected": -8.464262008666992, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 5.8677370030581035e-08, |
|
"logits/chosen": -2.797910213470459, |
|
"logits/rejected": -2.840148687362671, |
|
"logps/chosen": -331.3750305175781, |
|
"logps/rejected": -344.34332275390625, |
|
"loss": 0.011, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.22872698307037354, |
|
"rewards/margins": 8.751152038574219, |
|
"rewards/rejected": -8.979879379272461, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"eval_logits/chosen": -2.8258047103881836, |
|
"eval_logits/rejected": -2.8298983573913574, |
|
"eval_logps/chosen": -390.2113952636719, |
|
"eval_logps/rejected": -350.03887939453125, |
|
"eval_loss": 0.6998910307884216, |
|
"eval_rewards/accuracies": 0.7658730149269104, |
|
"eval_rewards/chosen": -2.301961660385132, |
|
"eval_rewards/margins": 3.5053274631500244, |
|
"eval_rewards/rejected": -5.807290077209473, |
|
"eval_runtime": 164.7101, |
|
"eval_samples_per_second": 12.143, |
|
"eval_steps_per_second": 0.382, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.6766055045871554e-08, |
|
"logits/chosen": -2.837218761444092, |
|
"logits/rejected": -2.8603646755218506, |
|
"logps/chosen": -325.1515197753906, |
|
"logps/rejected": -377.93707275390625, |
|
"loss": 0.0122, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.7425957918167114, |
|
"rewards/margins": 9.372137069702148, |
|
"rewards/rejected": -8.629541397094727, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5.485474006116208e-08, |
|
"logits/chosen": -2.859614372253418, |
|
"logits/rejected": -2.907731294631958, |
|
"logps/chosen": -323.7126159667969, |
|
"logps/rejected": -337.7955627441406, |
|
"loss": 0.0124, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9508829116821289, |
|
"rewards/margins": 9.375367164611816, |
|
"rewards/rejected": -8.424482345581055, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 5.294342507645259e-08, |
|
"logits/chosen": -2.8355846405029297, |
|
"logits/rejected": -2.8445563316345215, |
|
"logps/chosen": -387.78021240234375, |
|
"logps/rejected": -341.34332275390625, |
|
"loss": 0.0122, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.06057599186897278, |
|
"rewards/margins": 8.586808204650879, |
|
"rewards/rejected": -8.647383689880371, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 5.1032110091743117e-08, |
|
"logits/chosen": -2.8416004180908203, |
|
"logits/rejected": -2.8135132789611816, |
|
"logps/chosen": -294.2474670410156, |
|
"logps/rejected": -342.1490173339844, |
|
"loss": 0.0112, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.0905003547668457, |
|
"rewards/margins": 7.913638114929199, |
|
"rewards/rejected": -9.004137992858887, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.9120795107033635e-08, |
|
"logits/chosen": -2.808621406555176, |
|
"logits/rejected": -2.8129184246063232, |
|
"logps/chosen": -361.94146728515625, |
|
"logps/rejected": -372.2251892089844, |
|
"loss": 0.0108, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.09845595061779022, |
|
"rewards/margins": 9.0397367477417, |
|
"rewards/rejected": -9.138191223144531, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.7209480122324154e-08, |
|
"logits/chosen": -2.857626438140869, |
|
"logits/rejected": -2.854701042175293, |
|
"logps/chosen": -334.3382568359375, |
|
"logps/rejected": -397.60504150390625, |
|
"loss": 0.0135, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4237436354160309, |
|
"rewards/margins": 8.734308242797852, |
|
"rewards/rejected": -9.158050537109375, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.529816513761467e-08, |
|
"logits/chosen": -2.833742618560791, |
|
"logits/rejected": -2.848910093307495, |
|
"logps/chosen": -376.8042297363281, |
|
"logps/rejected": -431.098388671875, |
|
"loss": 0.0185, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.09423612058162689, |
|
"rewards/margins": 8.842924118041992, |
|
"rewards/rejected": -8.937159538269043, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.33868501529052e-08, |
|
"logits/chosen": -2.8585665225982666, |
|
"logits/rejected": -2.888023614883423, |
|
"logps/chosen": -299.4255065917969, |
|
"logps/rejected": -347.65032958984375, |
|
"loss": 0.0227, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.39737778902053833, |
|
"rewards/margins": 8.897387504577637, |
|
"rewards/rejected": -9.294764518737793, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 4.147553516819572e-08, |
|
"logits/chosen": -2.7752485275268555, |
|
"logits/rejected": -2.7679455280303955, |
|
"logps/chosen": -288.93524169921875, |
|
"logps/rejected": -362.3262634277344, |
|
"loss": 0.0138, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.21344709396362305, |
|
"rewards/margins": 8.969237327575684, |
|
"rewards/rejected": -9.182684898376465, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.9564220183486236e-08, |
|
"logits/chosen": -2.771638870239258, |
|
"logits/rejected": -2.7894372940063477, |
|
"logps/chosen": -358.27276611328125, |
|
"logps/rejected": -337.4700012207031, |
|
"loss": 0.0114, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.08803452551364899, |
|
"rewards/margins": 8.622003555297852, |
|
"rewards/rejected": -8.710036277770996, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"eval_logits/chosen": -2.817159414291382, |
|
"eval_logits/rejected": -2.820690393447876, |
|
"eval_logps/chosen": -389.5738525390625, |
|
"eval_logps/rejected": -348.8511962890625, |
|
"eval_loss": 0.6951248645782471, |
|
"eval_rewards/accuracies": 0.7698412537574768, |
|
"eval_rewards/chosen": -2.238208055496216, |
|
"eval_rewards/margins": 3.4503118991851807, |
|
"eval_rewards/rejected": -5.6885199546813965, |
|
"eval_runtime": 164.1407, |
|
"eval_samples_per_second": 12.185, |
|
"eval_steps_per_second": 0.384, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.7652905198776755e-08, |
|
"logits/chosen": -2.8291115760803223, |
|
"logits/rejected": -2.812997817993164, |
|
"logps/chosen": -361.16973876953125, |
|
"logps/rejected": -371.3473205566406, |
|
"loss": 0.0109, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.2673804759979248, |
|
"rewards/margins": 8.214799880981445, |
|
"rewards/rejected": -8.482179641723633, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.574159021406728e-08, |
|
"logits/chosen": -2.8169431686401367, |
|
"logits/rejected": -2.780579090118408, |
|
"logps/chosen": -340.25567626953125, |
|
"logps/rejected": -452.1532287597656, |
|
"loss": 0.0125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4266482889652252, |
|
"rewards/margins": 8.901152610778809, |
|
"rewards/rejected": -9.327801704406738, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.383027522935779e-08, |
|
"logits/chosen": -2.852733850479126, |
|
"logits/rejected": -2.8627407550811768, |
|
"logps/chosen": -345.10504150390625, |
|
"logps/rejected": -381.2701416015625, |
|
"loss": 0.0144, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.6709401607513428, |
|
"rewards/margins": 9.368196487426758, |
|
"rewards/rejected": -8.697256088256836, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 3.191896024464832e-08, |
|
"logits/chosen": -2.847033977508545, |
|
"logits/rejected": -2.880303382873535, |
|
"logps/chosen": -344.7592468261719, |
|
"logps/rejected": -366.505615234375, |
|
"loss": 0.0093, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2561042606830597, |
|
"rewards/margins": 9.626019477844238, |
|
"rewards/rejected": -9.369915008544922, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 3.0007645259938836e-08, |
|
"logits/chosen": -2.829150676727295, |
|
"logits/rejected": -2.8306522369384766, |
|
"logps/chosen": -299.6269836425781, |
|
"logps/rejected": -362.5341796875, |
|
"loss": 0.0191, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.312942773103714, |
|
"rewards/margins": 10.283103942871094, |
|
"rewards/rejected": -9.970161437988281, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.809633027522936e-08, |
|
"logits/chosen": -2.815882444381714, |
|
"logits/rejected": -2.7824299335479736, |
|
"logps/chosen": -315.24993896484375, |
|
"logps/rejected": -347.3058776855469, |
|
"loss": 0.0207, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.3008100688457489, |
|
"rewards/margins": 8.677629470825195, |
|
"rewards/rejected": -8.978440284729004, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.6185015290519877e-08, |
|
"logits/chosen": -2.8008246421813965, |
|
"logits/rejected": -2.7953882217407227, |
|
"logps/chosen": -333.69329833984375, |
|
"logps/rejected": -373.4231872558594, |
|
"loss": 0.0131, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.08695399761199951, |
|
"rewards/margins": 7.9680304527282715, |
|
"rewards/rejected": -8.054986000061035, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.4273700305810396e-08, |
|
"logits/chosen": -2.790097951889038, |
|
"logits/rejected": -2.827036142349243, |
|
"logps/chosen": -378.98236083984375, |
|
"logps/rejected": -420.4288635253906, |
|
"loss": 0.0135, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.11630038917064667, |
|
"rewards/margins": 10.843367576599121, |
|
"rewards/rejected": -10.727069854736328, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.2362385321100918e-08, |
|
"logits/chosen": -2.7879481315612793, |
|
"logits/rejected": -2.7845988273620605, |
|
"logps/chosen": -350.80572509765625, |
|
"logps/rejected": -345.9007873535156, |
|
"loss": 0.018, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.001116895698942244, |
|
"rewards/margins": 8.821355819702148, |
|
"rewards/rejected": -8.822473526000977, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 2.0451070336391437e-08, |
|
"logits/chosen": -2.7627055644989014, |
|
"logits/rejected": -2.7335832118988037, |
|
"logps/chosen": -341.35662841796875, |
|
"logps/rejected": -318.8877258300781, |
|
"loss": 0.0437, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.1377846747636795, |
|
"rewards/margins": 8.672611236572266, |
|
"rewards/rejected": -8.534826278686523, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"eval_logits/chosen": -2.8117332458496094, |
|
"eval_logits/rejected": -2.8151025772094727, |
|
"eval_logps/chosen": -389.4859924316406, |
|
"eval_logps/rejected": -348.1217346191406, |
|
"eval_loss": 0.6910788416862488, |
|
"eval_rewards/accuracies": 0.7658730149269104, |
|
"eval_rewards/chosen": -2.229426622390747, |
|
"eval_rewards/margins": 3.3861491680145264, |
|
"eval_rewards/rejected": -5.615575313568115, |
|
"eval_runtime": 165.138, |
|
"eval_samples_per_second": 12.111, |
|
"eval_steps_per_second": 0.381, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.8539755351681956e-08, |
|
"logits/chosen": -2.7365012168884277, |
|
"logits/rejected": -2.788407325744629, |
|
"logps/chosen": -330.33197021484375, |
|
"logps/rejected": -380.91168212890625, |
|
"loss": 0.0163, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.0645025223493576, |
|
"rewards/margins": 8.317387580871582, |
|
"rewards/rejected": -8.381890296936035, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.6628440366972478e-08, |
|
"logits/chosen": -2.8033618927001953, |
|
"logits/rejected": -2.8255763053894043, |
|
"logps/chosen": -373.3817443847656, |
|
"logps/rejected": -359.75421142578125, |
|
"loss": 0.0142, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.008309101685881615, |
|
"rewards/margins": 8.045055389404297, |
|
"rewards/rejected": -8.03674602508545, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.4717125382262997e-08, |
|
"logits/chosen": -2.854548692703247, |
|
"logits/rejected": -2.8665812015533447, |
|
"logps/chosen": -339.0101318359375, |
|
"logps/rejected": -377.62847900390625, |
|
"loss": 0.0123, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.6132253408432007, |
|
"rewards/margins": 8.178349494934082, |
|
"rewards/rejected": -8.791574478149414, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.2805810397553517e-08, |
|
"logits/chosen": -2.8801310062408447, |
|
"logits/rejected": -2.826385021209717, |
|
"logps/chosen": -346.5010681152344, |
|
"logps/rejected": -360.3970031738281, |
|
"loss": 0.026, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.022508572787046432, |
|
"rewards/margins": 8.316872596740723, |
|
"rewards/rejected": -8.33938217163086, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.0894495412844038e-08, |
|
"logits/chosen": -2.758545160293579, |
|
"logits/rejected": -2.7856967449188232, |
|
"logps/chosen": -326.37896728515625, |
|
"logps/rejected": -359.3761291503906, |
|
"loss": 0.015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21567471325397491, |
|
"rewards/margins": 8.950045585632324, |
|
"rewards/rejected": -8.734369277954102, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 8.983180428134555e-09, |
|
"logits/chosen": -2.8466389179229736, |
|
"logits/rejected": -2.8278822898864746, |
|
"logps/chosen": -327.270751953125, |
|
"logps/rejected": -307.3416748046875, |
|
"loss": 0.0158, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.5247852802276611, |
|
"rewards/margins": 8.214715957641602, |
|
"rewards/rejected": -8.739501953125, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 7.071865443425076e-09, |
|
"logits/chosen": -2.7788777351379395, |
|
"logits/rejected": -2.7975292205810547, |
|
"logps/chosen": -361.37384033203125, |
|
"logps/rejected": -391.8774719238281, |
|
"loss": 0.0134, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.35571426153182983, |
|
"rewards/margins": 8.981501579284668, |
|
"rewards/rejected": -9.337217330932617, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 5.1605504587155965e-09, |
|
"logits/chosen": -2.8489837646484375, |
|
"logits/rejected": -2.7892398834228516, |
|
"logps/chosen": -342.7001953125, |
|
"logps/rejected": -358.37469482421875, |
|
"loss": 0.012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.04198342561721802, |
|
"rewards/margins": 8.752424240112305, |
|
"rewards/rejected": -8.710439682006836, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 3.249235474006116e-09, |
|
"logits/chosen": -2.8391404151916504, |
|
"logits/rejected": -2.862032175064087, |
|
"logps/chosen": -332.72100830078125, |
|
"logps/rejected": -362.6523132324219, |
|
"loss": 0.0282, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.023767167702317238, |
|
"rewards/margins": 8.220497131347656, |
|
"rewards/rejected": -8.19672966003418, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.3379204892966359e-09, |
|
"logits/chosen": -2.854654550552368, |
|
"logits/rejected": -2.8116354942321777, |
|
"logps/chosen": -330.1148986816406, |
|
"logps/rejected": -358.40155029296875, |
|
"loss": 0.0109, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3439770042896271, |
|
"rewards/margins": 8.63255500793457, |
|
"rewards/rejected": -8.976531028747559, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_logits/chosen": -2.814802408218384, |
|
"eval_logits/rejected": -2.818735122680664, |
|
"eval_logps/chosen": -389.9677429199219, |
|
"eval_logps/rejected": -348.89801025390625, |
|
"eval_loss": 0.6909257769584656, |
|
"eval_rewards/accuracies": 0.7658730149269104, |
|
"eval_rewards/chosen": -2.2775967121124268, |
|
"eval_rewards/margins": 3.415607452392578, |
|
"eval_rewards/rejected": -5.693204402923584, |
|
"eval_runtime": 164.8452, |
|
"eval_samples_per_second": 12.133, |
|
"eval_steps_per_second": 0.382, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 2907, |
|
"total_flos": 0.0, |
|
"train_loss": 0.23139607249449978, |
|
"train_runtime": 34004.0578, |
|
"train_samples_per_second": 5.467, |
|
"train_steps_per_second": 0.085 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2907, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|