|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.24, |
|
"eval_steps": 500, |
|
"global_step": 400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.846153846153846e-06, |
|
"logits/chosen": 1.3807897567749023, |
|
"logits/rejected": 1.1952139139175415, |
|
"logps/chosen": -589.1343994140625, |
|
"logps/rejected": -494.7060241699219, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 7.692307692307692e-06, |
|
"logits/chosen": 1.2665337324142456, |
|
"logits/rejected": 1.1713109016418457, |
|
"logps/chosen": -559.9566650390625, |
|
"logps/rejected": -549.1146850585938, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.1538461538461538e-05, |
|
"logits/chosen": 1.3811347484588623, |
|
"logits/rejected": 1.216629981994629, |
|
"logps/chosen": -559.24951171875, |
|
"logps/rejected": -481.5151672363281, |
|
"loss": 0.6956, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.002765989163890481, |
|
"rewards/margins": -0.004863501060754061, |
|
"rewards/rejected": 0.002097511198371649, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.5384615384615384e-05, |
|
"logits/chosen": 1.3156853914260864, |
|
"logits/rejected": 1.2506608963012695, |
|
"logps/chosen": -554.9755249023438, |
|
"logps/rejected": -558.6537475585938, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.005945444107055664, |
|
"rewards/margins": 0.003013968002051115, |
|
"rewards/rejected": 0.0029314758721739054, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.9230769230769228e-05, |
|
"logits/chosen": 1.3257012367248535, |
|
"logits/rejected": 1.223274827003479, |
|
"logps/chosen": -523.537109375, |
|
"logps/rejected": -585.207763671875, |
|
"loss": 0.6902, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.0114411236718297, |
|
"rewards/margins": 0.005982697010040283, |
|
"rewards/rejected": 0.005458426661789417, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.3076923076923076e-05, |
|
"logits/chosen": 1.2537128925323486, |
|
"logits/rejected": 1.1953891515731812, |
|
"logps/chosen": -545.0478515625, |
|
"logps/rejected": -489.9993896484375, |
|
"loss": 0.6881, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.015848731622099876, |
|
"rewards/margins": 0.01023783627897501, |
|
"rewards/rejected": 0.0056108953431248665, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.692307692307692e-05, |
|
"logits/chosen": 1.312497854232788, |
|
"logits/rejected": 1.2055679559707642, |
|
"logps/chosen": -545.1070556640625, |
|
"logps/rejected": -509.77001953125, |
|
"loss": 0.6864, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.02675754949450493, |
|
"rewards/margins": 0.013801073655486107, |
|
"rewards/rejected": 0.012956475839018822, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.076923076923077e-05, |
|
"logits/chosen": 1.2600666284561157, |
|
"logits/rejected": 1.1770424842834473, |
|
"logps/chosen": -539.1686401367188, |
|
"logps/rejected": -483.5211181640625, |
|
"loss": 0.6871, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.033090125769376755, |
|
"rewards/margins": 0.012454044073820114, |
|
"rewards/rejected": 0.02063608169555664, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.461538461538461e-05, |
|
"logits/chosen": 1.3227227926254272, |
|
"logits/rejected": 1.2356113195419312, |
|
"logps/chosen": -565.9663696289062, |
|
"logps/rejected": -622.9007568359375, |
|
"loss": 0.6839, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.04537828266620636, |
|
"rewards/margins": 0.019218124449253082, |
|
"rewards/rejected": 0.026160158216953278, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.8461538461538456e-05, |
|
"logits/chosen": 1.3510740995407104, |
|
"logits/rejected": 1.2296315431594849, |
|
"logps/chosen": -593.7393798828125, |
|
"logps/rejected": -594.9727172851562, |
|
"loss": 0.6723, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.07614221423864365, |
|
"rewards/margins": 0.0457853302359581, |
|
"rewards/rejected": 0.030356884002685547, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.23076923076923e-05, |
|
"logits/chosen": 1.2242865562438965, |
|
"logits/rejected": 1.2467900514602661, |
|
"logps/chosen": -508.84783935546875, |
|
"logps/rejected": -598.6978759765625, |
|
"loss": 0.6674, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.08538543432950974, |
|
"rewards/margins": 0.05763913318514824, |
|
"rewards/rejected": 0.027746297419071198, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.615384615384615e-05, |
|
"logits/chosen": 1.3490934371948242, |
|
"logits/rejected": 1.301114559173584, |
|
"logps/chosen": -570.9324340820312, |
|
"logps/rejected": -578.1993408203125, |
|
"loss": 0.6625, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.09820537269115448, |
|
"rewards/margins": 0.06798899918794632, |
|
"rewards/rejected": 0.030216386541724205, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.9999999999999996e-05, |
|
"logits/chosen": 1.31833016872406, |
|
"logits/rejected": 1.2298263311386108, |
|
"logps/chosen": -498.8169860839844, |
|
"logps/rejected": -478.9023742675781, |
|
"loss": 0.6617, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.11317408084869385, |
|
"rewards/margins": 0.06994115561246872, |
|
"rewards/rejected": 0.043232932686805725, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 5.384615384615384e-05, |
|
"logits/chosen": 1.3441507816314697, |
|
"logits/rejected": 1.2419227361679077, |
|
"logps/chosen": -570.9219970703125, |
|
"logps/rejected": -512.0221557617188, |
|
"loss": 0.6168, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.13483530282974243, |
|
"rewards/margins": 0.17350149154663086, |
|
"rewards/rejected": -0.03866620361804962, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 5.769230769230769e-05, |
|
"logits/chosen": 1.301425814628601, |
|
"logits/rejected": 1.2630890607833862, |
|
"logps/chosen": -525.2069091796875, |
|
"logps/rejected": -515.6624145507812, |
|
"loss": 0.6319, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.14281943440437317, |
|
"rewards/margins": 0.1536417305469513, |
|
"rewards/rejected": -0.010822296142578125, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 6.153846153846154e-05, |
|
"logits/chosen": 1.3221077919006348, |
|
"logits/rejected": 1.2412704229354858, |
|
"logps/chosen": -522.0698852539062, |
|
"logps/rejected": -479.1268615722656, |
|
"loss": 0.6041, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.18193425238132477, |
|
"rewards/margins": 0.20868375897407532, |
|
"rewards/rejected": -0.026749493554234505, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 6.538461538461539e-05, |
|
"logits/chosen": 1.2305197715759277, |
|
"logits/rejected": 1.1909728050231934, |
|
"logps/chosen": -591.547607421875, |
|
"logps/rejected": -501.61956787109375, |
|
"loss": 0.632, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.22359740734100342, |
|
"rewards/margins": 0.16370725631713867, |
|
"rewards/rejected": 0.059890177100896835, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 6.923076923076922e-05, |
|
"logits/chosen": 1.2290271520614624, |
|
"logits/rejected": 1.2503975629806519, |
|
"logps/chosen": -583.2138671875, |
|
"logps/rejected": -551.908447265625, |
|
"loss": 0.5782, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": 0.25082916021347046, |
|
"rewards/margins": 0.29117509722709656, |
|
"rewards/rejected": -0.04034590348601341, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 7.307692307692307e-05, |
|
"logits/chosen": 1.2445173263549805, |
|
"logits/rejected": 1.274112582206726, |
|
"logps/chosen": -476.1685791015625, |
|
"logps/rejected": -558.716796875, |
|
"loss": 0.6356, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.2891727685928345, |
|
"rewards/margins": 0.18064022064208984, |
|
"rewards/rejected": 0.10853258520364761, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 7.692307692307691e-05, |
|
"logits/chosen": 1.3747544288635254, |
|
"logits/rejected": 1.1747362613677979, |
|
"logps/chosen": -604.9407348632812, |
|
"logps/rejected": -521.9097900390625, |
|
"loss": 0.5778, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.3479543924331665, |
|
"rewards/margins": 0.2904755175113678, |
|
"rewards/rejected": 0.05747886002063751, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 8.076923076923076e-05, |
|
"logits/chosen": 1.215498447418213, |
|
"logits/rejected": 1.1988316774368286, |
|
"logps/chosen": -508.58685302734375, |
|
"logps/rejected": -480.9490051269531, |
|
"loss": 0.6143, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.3200737535953522, |
|
"rewards/margins": 0.22501060366630554, |
|
"rewards/rejected": 0.09506310522556305, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.46153846153846e-05, |
|
"logits/chosen": 1.2625510692596436, |
|
"logits/rejected": 1.2751553058624268, |
|
"logps/chosen": -464.3768615722656, |
|
"logps/rejected": -548.1248779296875, |
|
"loss": 0.6585, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.2795701324939728, |
|
"rewards/margins": 0.18192264437675476, |
|
"rewards/rejected": 0.09764745086431503, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.846153846153845e-05, |
|
"logits/chosen": 1.228266716003418, |
|
"logits/rejected": 1.1854723691940308, |
|
"logps/chosen": -542.0804443359375, |
|
"logps/rejected": -593.2991943359375, |
|
"loss": 0.5038, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 0.5573378801345825, |
|
"rewards/margins": 0.559643566608429, |
|
"rewards/rejected": -0.0023057162761688232, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 9.23076923076923e-05, |
|
"logits/chosen": 1.3595786094665527, |
|
"logits/rejected": 1.299391746520996, |
|
"logps/chosen": -598.842041015625, |
|
"logps/rejected": -521.9869384765625, |
|
"loss": 0.58, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.3911605179309845, |
|
"rewards/margins": 0.45501241087913513, |
|
"rewards/rejected": -0.06385190039873123, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.615384615384615e-05, |
|
"logits/chosen": 1.244804859161377, |
|
"logits/rejected": 1.2789154052734375, |
|
"logps/chosen": -527.1282958984375, |
|
"logps/rejected": -562.9415283203125, |
|
"loss": 0.5638, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.3064769506454468, |
|
"rewards/margins": 0.4528440833091736, |
|
"rewards/rejected": -0.1463671177625656, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.999999999999999e-05, |
|
"logits/chosen": 1.2605584859848022, |
|
"logits/rejected": 1.257567048072815, |
|
"logps/chosen": -518.2035522460938, |
|
"logps/rejected": -553.550537109375, |
|
"loss": 0.5622, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.3198173940181732, |
|
"rewards/margins": 0.533422589302063, |
|
"rewards/rejected": -0.21360518038272858, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00010384615384615383, |
|
"logits/chosen": 1.2905932664871216, |
|
"logits/rejected": 1.252805233001709, |
|
"logps/chosen": -502.51318359375, |
|
"logps/rejected": -492.2623596191406, |
|
"loss": 0.5688, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.2552236318588257, |
|
"rewards/margins": 0.4563944339752197, |
|
"rewards/rejected": -0.20117078721523285, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00010769230769230768, |
|
"logits/chosen": 1.360573172569275, |
|
"logits/rejected": 1.246628999710083, |
|
"logps/chosen": -593.9207153320312, |
|
"logps/rejected": -521.7044677734375, |
|
"loss": 0.4958, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 0.24957714974880219, |
|
"rewards/margins": 0.6095183491706848, |
|
"rewards/rejected": -0.35994118452072144, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00011153846153846153, |
|
"logits/chosen": 1.3290568590164185, |
|
"logits/rejected": 1.1086769104003906, |
|
"logps/chosen": -588.37451171875, |
|
"logps/rejected": -555.8126220703125, |
|
"loss": 0.4523, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.22330154478549957, |
|
"rewards/margins": 0.8921126127243042, |
|
"rewards/rejected": -0.6688110828399658, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00011538461538461538, |
|
"logits/chosen": 1.283523678779602, |
|
"logits/rejected": 1.2930572032928467, |
|
"logps/chosen": -533.7445678710938, |
|
"logps/rejected": -590.4415283203125, |
|
"loss": 0.4839, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 0.2506100535392761, |
|
"rewards/margins": 0.8535110950469971, |
|
"rewards/rejected": -0.6029011011123657, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00011923076923076922, |
|
"logits/chosen": 1.1903033256530762, |
|
"logits/rejected": 1.2316640615463257, |
|
"logps/chosen": -544.7385864257812, |
|
"logps/rejected": -559.547607421875, |
|
"loss": 0.453, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.16801050305366516, |
|
"rewards/margins": 0.6988197565078735, |
|
"rewards/rejected": -0.530809223651886, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00012307692307692307, |
|
"logits/chosen": 1.2974334955215454, |
|
"logits/rejected": 1.2153936624526978, |
|
"logps/chosen": -569.9619140625, |
|
"logps/rejected": -532.5298461914062, |
|
"loss": 0.3596, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": 0.41700971126556396, |
|
"rewards/margins": 1.2314927577972412, |
|
"rewards/rejected": -0.814483106136322, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.0001269230769230769, |
|
"logits/chosen": 1.20902681350708, |
|
"logits/rejected": 1.1872200965881348, |
|
"logps/chosen": -517.0770263671875, |
|
"logps/rejected": -496.25469970703125, |
|
"loss": 0.3913, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.6890252828598022, |
|
"rewards/margins": 1.2427911758422852, |
|
"rewards/rejected": -0.5537658929824829, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00013076923076923077, |
|
"logits/chosen": 1.1348516941070557, |
|
"logits/rejected": 1.2495156526565552, |
|
"logps/chosen": -473.9527893066406, |
|
"logps/rejected": -583.898193359375, |
|
"loss": 0.3421, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": 0.3356397747993469, |
|
"rewards/margins": 1.326012134552002, |
|
"rewards/rejected": -0.9903723001480103, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.0001346153846153846, |
|
"logits/chosen": 1.199057698249817, |
|
"logits/rejected": 1.2061611413955688, |
|
"logps/chosen": -508.8526611328125, |
|
"logps/rejected": -528.3182983398438, |
|
"loss": 0.5245, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.07617084681987762, |
|
"rewards/margins": 0.7057029008865356, |
|
"rewards/rejected": -0.6295321583747864, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00013846153846153845, |
|
"logits/chosen": 1.324285864830017, |
|
"logits/rejected": 1.1698598861694336, |
|
"logps/chosen": -636.7570190429688, |
|
"logps/rejected": -569.9312744140625, |
|
"loss": 0.3545, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.5845645666122437, |
|
"rewards/margins": 1.4404823780059814, |
|
"rewards/rejected": -0.855917751789093, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00014230769230769228, |
|
"logits/chosen": 1.3375169038772583, |
|
"logits/rejected": 1.226860761642456, |
|
"logps/chosen": -550.1502685546875, |
|
"logps/rejected": -568.7561645507812, |
|
"loss": 0.2948, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.7275359034538269, |
|
"rewards/margins": 1.519990086555481, |
|
"rewards/rejected": -0.7924542427062988, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00014615384615384615, |
|
"logits/chosen": 1.3178166151046753, |
|
"logits/rejected": 1.3225042819976807, |
|
"logps/chosen": -583.7266235351562, |
|
"logps/rejected": -640.8178100585938, |
|
"loss": 0.2679, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.0492963790893555, |
|
"rewards/margins": 1.9212793111801147, |
|
"rewards/rejected": -0.8719831109046936, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00015, |
|
"logits/chosen": 1.424353837966919, |
|
"logits/rejected": 1.2278132438659668, |
|
"logps/chosen": -576.828125, |
|
"logps/rejected": -532.112060546875, |
|
"loss": 0.409, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 1.294251561164856, |
|
"rewards/margins": 1.8459293842315674, |
|
"rewards/rejected": -0.5516780018806458, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00015384615384615382, |
|
"logits/chosen": 1.3266693353652954, |
|
"logits/rejected": 1.4058163166046143, |
|
"logps/chosen": -523.482421875, |
|
"logps/rejected": -534.4713134765625, |
|
"loss": 0.2549, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.9553894996643066, |
|
"rewards/margins": 1.72726571559906, |
|
"rewards/rejected": -0.7718762159347534, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.0001576923076923077, |
|
"logits/chosen": 1.2602558135986328, |
|
"logits/rejected": 1.2625583410263062, |
|
"logps/chosen": -522.94287109375, |
|
"logps/rejected": -609.2876586914062, |
|
"loss": 0.181, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 1.3178558349609375, |
|
"rewards/margins": 2.475386381149292, |
|
"rewards/rejected": -1.1575307846069336, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00016153846153846153, |
|
"logits/chosen": 1.2560456991195679, |
|
"logits/rejected": 1.3124988079071045, |
|
"logps/chosen": -557.4879150390625, |
|
"logps/rejected": -604.8274536132812, |
|
"loss": 0.2405, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.4575282335281372, |
|
"rewards/margins": 2.372474193572998, |
|
"rewards/rejected": -0.9149457812309265, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.0001653846153846154, |
|
"logits/chosen": 1.2838218212127686, |
|
"logits/rejected": 1.0645534992218018, |
|
"logps/chosen": -572.41357421875, |
|
"logps/rejected": -516.2637939453125, |
|
"loss": 0.1404, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.6796542406082153, |
|
"rewards/margins": 2.779283285140991, |
|
"rewards/rejected": -1.0996291637420654, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.0001692307692307692, |
|
"logits/chosen": 1.1537644863128662, |
|
"logits/rejected": 1.1177821159362793, |
|
"logps/chosen": -455.26904296875, |
|
"logps/rejected": -498.28900146484375, |
|
"loss": 0.1726, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 1.2386869192123413, |
|
"rewards/margins": 2.298076868057251, |
|
"rewards/rejected": -1.0593899488449097, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.00017307692307692304, |
|
"logits/chosen": 1.2277421951293945, |
|
"logits/rejected": 1.1039767265319824, |
|
"logps/chosen": -510.9812927246094, |
|
"logps/rejected": -503.68829345703125, |
|
"loss": 0.2239, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.2717504501342773, |
|
"rewards/margins": 2.59214186668396, |
|
"rewards/rejected": -1.3203915357589722, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.0001769230769230769, |
|
"logits/chosen": 1.2631361484527588, |
|
"logits/rejected": 1.221813440322876, |
|
"logps/chosen": -528.2451171875, |
|
"logps/rejected": -565.52783203125, |
|
"loss": 0.1884, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.7057870626449585, |
|
"rewards/margins": 2.7309272289276123, |
|
"rewards/rejected": -1.025140404701233, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.00018076923076923074, |
|
"logits/chosen": 1.1752557754516602, |
|
"logits/rejected": 1.2416189908981323, |
|
"logps/chosen": -501.43609619140625, |
|
"logps/rejected": -574.4725341796875, |
|
"loss": 0.2208, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 1.349045753479004, |
|
"rewards/margins": 2.619417190551758, |
|
"rewards/rejected": -1.2703715562820435, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.0001846153846153846, |
|
"logits/chosen": 1.2255234718322754, |
|
"logits/rejected": 1.1879360675811768, |
|
"logps/chosen": -533.389404296875, |
|
"logps/rejected": -574.5704956054688, |
|
"loss": 0.2814, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 1.4104804992675781, |
|
"rewards/margins": 3.444915294647217, |
|
"rewards/rejected": -2.0344350337982178, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.00018846153846153844, |
|
"logits/chosen": 1.3353238105773926, |
|
"logits/rejected": 1.133821964263916, |
|
"logps/chosen": -516.5598754882812, |
|
"logps/rejected": -498.43603515625, |
|
"loss": 0.1799, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 1.9517850875854492, |
|
"rewards/margins": 3.3812241554260254, |
|
"rewards/rejected": -1.4294389486312866, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.0001923076923076923, |
|
"logits/chosen": 1.4486721754074097, |
|
"logits/rejected": 1.2504708766937256, |
|
"logps/chosen": -577.9517822265625, |
|
"logps/rejected": -578.573974609375, |
|
"loss": 0.1502, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.9723610877990723, |
|
"rewards/margins": 3.7397069931030273, |
|
"rewards/rejected": -1.767345666885376, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.00019615384615384615, |
|
"logits/chosen": 1.156247854232788, |
|
"logits/rejected": 1.1714026927947998, |
|
"logps/chosen": -544.3547973632812, |
|
"logps/rejected": -576.8724365234375, |
|
"loss": 0.2114, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 1.1724624633789062, |
|
"rewards/margins": 2.6890792846679688, |
|
"rewards/rejected": -1.5166168212890625, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.00019999999999999998, |
|
"logits/chosen": 1.144045352935791, |
|
"logits/rejected": 1.1430819034576416, |
|
"logps/chosen": -508.1917724609375, |
|
"logps/rejected": -617.259521484375, |
|
"loss": 0.1589, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.5832880735397339, |
|
"rewards/margins": 4.405728816986084, |
|
"rewards/rejected": -2.8224408626556396, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00020384615384615385, |
|
"logits/chosen": 1.2338566780090332, |
|
"logits/rejected": 1.1481688022613525, |
|
"logps/chosen": -531.331298828125, |
|
"logps/rejected": -501.3189697265625, |
|
"loss": 0.1972, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 0.7195085287094116, |
|
"rewards/margins": 3.0136146545410156, |
|
"rewards/rejected": -2.2941062450408936, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.00020769230769230766, |
|
"logits/chosen": 1.2266112565994263, |
|
"logits/rejected": 1.192571997642517, |
|
"logps/chosen": -549.5713500976562, |
|
"logps/rejected": -617.6051025390625, |
|
"loss": 0.2518, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.4716801345348358, |
|
"rewards/margins": 4.073496341705322, |
|
"rewards/rejected": -3.6018166542053223, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.00021153846153846152, |
|
"logits/chosen": 1.1858152151107788, |
|
"logits/rejected": 1.1164907217025757, |
|
"logps/chosen": -556.048583984375, |
|
"logps/rejected": -599.2432250976562, |
|
"loss": 0.0601, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6191356182098389, |
|
"rewards/margins": 4.675748825073242, |
|
"rewards/rejected": -4.056613445281982, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.00021538461538461536, |
|
"logits/chosen": 1.3085862398147583, |
|
"logits/rejected": 1.105547547340393, |
|
"logps/chosen": -603.694580078125, |
|
"logps/rejected": -638.7554931640625, |
|
"loss": 0.1046, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.4260401725769043, |
|
"rewards/margins": 4.424009799957275, |
|
"rewards/rejected": -3.997969150543213, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.0002192307692307692, |
|
"logits/chosen": 1.0858182907104492, |
|
"logits/rejected": 1.0118762254714966, |
|
"logps/chosen": -547.671875, |
|
"logps/rejected": -617.9951782226562, |
|
"loss": 0.222, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 0.33517026901245117, |
|
"rewards/margins": 3.591820001602173, |
|
"rewards/rejected": -3.2566497325897217, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.00022307692307692306, |
|
"logits/chosen": 1.0582268238067627, |
|
"logits/rejected": 1.1294262409210205, |
|
"logps/chosen": -482.9873962402344, |
|
"logps/rejected": -656.79736328125, |
|
"loss": 0.0781, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 0.6922726035118103, |
|
"rewards/margins": 4.7888617515563965, |
|
"rewards/rejected": -4.0965895652771, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.0002269230769230769, |
|
"logits/chosen": 1.1849119663238525, |
|
"logits/rejected": 0.989042341709137, |
|
"logps/chosen": -579.6500854492188, |
|
"logps/rejected": -521.362060546875, |
|
"loss": 0.3419, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.1423124074935913, |
|
"rewards/margins": 2.5491743087768555, |
|
"rewards/rejected": -2.6914870738983154, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.00023076923076923076, |
|
"logits/chosen": 1.2060493230819702, |
|
"logits/rejected": 1.0908496379852295, |
|
"logps/chosen": -503.88165283203125, |
|
"logps/rejected": -486.8570251464844, |
|
"loss": 0.2709, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.47895151376724243, |
|
"rewards/margins": 3.0553998947143555, |
|
"rewards/rejected": -2.576448917388916, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.0002346153846153846, |
|
"logits/chosen": 1.331544041633606, |
|
"logits/rejected": 1.2061303853988647, |
|
"logps/chosen": -567.0886840820312, |
|
"logps/rejected": -555.7452392578125, |
|
"loss": 0.1176, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 2.0240767002105713, |
|
"rewards/margins": 3.8347549438476562, |
|
"rewards/rejected": -1.810678243637085, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.00023846153846153844, |
|
"logits/chosen": 1.4309509992599487, |
|
"logits/rejected": 1.2126150131225586, |
|
"logps/chosen": -569.264892578125, |
|
"logps/rejected": -555.525390625, |
|
"loss": 0.1622, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.7101458311080933, |
|
"rewards/margins": 3.121368885040283, |
|
"rewards/rejected": -1.4112231731414795, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.0002423076923076923, |
|
"logits/chosen": 1.4115931987762451, |
|
"logits/rejected": 1.3143726587295532, |
|
"logps/chosen": -567.4575805664062, |
|
"logps/rejected": -551.8060302734375, |
|
"loss": 0.1243, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 2.250972270965576, |
|
"rewards/margins": 3.4197261333465576, |
|
"rewards/rejected": -1.168753981590271, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.00024615384615384614, |
|
"logits/chosen": 1.4023628234863281, |
|
"logits/rejected": 1.4432225227355957, |
|
"logps/chosen": -561.3869018554688, |
|
"logps/rejected": -625.6553344726562, |
|
"loss": 0.137, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 2.7209982872009277, |
|
"rewards/margins": 3.365004777908325, |
|
"rewards/rejected": -0.6440060138702393, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.00025, |
|
"logits/chosen": 1.403619647026062, |
|
"logits/rejected": 1.3114700317382812, |
|
"logps/chosen": -557.41796875, |
|
"logps/rejected": -585.562744140625, |
|
"loss": 0.0987, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 2.93491268157959, |
|
"rewards/margins": 3.9586920738220215, |
|
"rewards/rejected": -1.0237791538238525, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.0002538461538461538, |
|
"logits/chosen": 1.2774848937988281, |
|
"logits/rejected": 1.391822338104248, |
|
"logps/chosen": -501.67236328125, |
|
"logps/rejected": -607.6801147460938, |
|
"loss": 0.1317, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 2.4067182540893555, |
|
"rewards/margins": 3.5049643516540527, |
|
"rewards/rejected": -1.0982458591461182, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.0002576923076923077, |
|
"logits/chosen": 1.2345640659332275, |
|
"logits/rejected": 1.3340271711349487, |
|
"logps/chosen": -502.60589599609375, |
|
"logps/rejected": -594.7681884765625, |
|
"loss": 0.1882, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.6566812992095947, |
|
"rewards/margins": 3.0219056606292725, |
|
"rewards/rejected": -1.3652244806289673, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.00026153846153846154, |
|
"logits/chosen": 1.3136622905731201, |
|
"logits/rejected": 1.1759097576141357, |
|
"logps/chosen": -554.1909790039062, |
|
"logps/rejected": -477.70074462890625, |
|
"loss": 0.1877, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 1.4338759183883667, |
|
"rewards/margins": 3.288071393966675, |
|
"rewards/rejected": -1.854195237159729, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.00026538461538461536, |
|
"logits/chosen": 1.3607081174850464, |
|
"logits/rejected": 1.173765778541565, |
|
"logps/chosen": -581.3910522460938, |
|
"logps/rejected": -505.4391784667969, |
|
"loss": 0.1424, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.8081064224243164, |
|
"rewards/margins": 4.335569858551025, |
|
"rewards/rejected": -2.527463436126709, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.0002692307692307692, |
|
"logits/chosen": 1.3691301345825195, |
|
"logits/rejected": 1.3659199476242065, |
|
"logps/chosen": -583.293212890625, |
|
"logps/rejected": -593.8999633789062, |
|
"loss": 0.1304, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 1.1818904876708984, |
|
"rewards/margins": 3.6441099643707275, |
|
"rewards/rejected": -2.46221923828125, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.00027307692307692303, |
|
"logits/chosen": 1.1499935388565063, |
|
"logits/rejected": 1.1766197681427002, |
|
"logps/chosen": -482.293701171875, |
|
"logps/rejected": -570.5851440429688, |
|
"loss": 0.0857, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 1.165338158607483, |
|
"rewards/margins": 3.8725168704986572, |
|
"rewards/rejected": -2.7071785926818848, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.0002769230769230769, |
|
"logits/chosen": 1.325207233428955, |
|
"logits/rejected": 1.1687246561050415, |
|
"logps/chosen": -571.1265869140625, |
|
"logps/rejected": -600.61865234375, |
|
"loss": 0.1814, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.56050443649292, |
|
"rewards/margins": 3.7676329612731934, |
|
"rewards/rejected": -2.2071282863616943, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.00028076923076923076, |
|
"logits/chosen": 1.3450841903686523, |
|
"logits/rejected": 1.151707410812378, |
|
"logps/chosen": -538.774658203125, |
|
"logps/rejected": -446.6949157714844, |
|
"loss": 0.091, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 1.3523597717285156, |
|
"rewards/margins": 3.761559247970581, |
|
"rewards/rejected": -2.4091997146606445, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.00028461538461538457, |
|
"logits/chosen": 1.3139710426330566, |
|
"logits/rejected": 1.1975148916244507, |
|
"logps/chosen": -544.6610107421875, |
|
"logps/rejected": -459.76275634765625, |
|
"loss": 0.2291, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.633323311805725, |
|
"rewards/margins": 3.0829625129699707, |
|
"rewards/rejected": -1.4496394395828247, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.00028846153846153843, |
|
"logits/chosen": 1.4247934818267822, |
|
"logits/rejected": 1.3758761882781982, |
|
"logps/chosen": -518.5802612304688, |
|
"logps/rejected": -527.0255737304688, |
|
"loss": 0.1177, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 2.010237216949463, |
|
"rewards/margins": 3.8918490409851074, |
|
"rewards/rejected": -1.8816115856170654, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.0002923076923076923, |
|
"logits/chosen": 1.3634833097457886, |
|
"logits/rejected": 1.2164397239685059, |
|
"logps/chosen": -534.68505859375, |
|
"logps/rejected": -506.5450744628906, |
|
"loss": 0.0595, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.536834239959717, |
|
"rewards/margins": 3.897109031677246, |
|
"rewards/rejected": -1.3602746725082397, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.00029615384615384616, |
|
"logits/chosen": 1.3006478548049927, |
|
"logits/rejected": 1.3856381177902222, |
|
"logps/chosen": -469.55450439453125, |
|
"logps/rejected": -642.709716796875, |
|
"loss": 0.148, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.724798560142517, |
|
"rewards/margins": 3.511629343032837, |
|
"rewards/rejected": -1.7868304252624512, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.0003, |
|
"logits/chosen": 1.328688621520996, |
|
"logits/rejected": 1.1962082386016846, |
|
"logps/chosen": -516.2116088867188, |
|
"logps/rejected": -504.8653259277344, |
|
"loss": 0.0937, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 1.6297885179519653, |
|
"rewards/margins": 3.997544288635254, |
|
"rewards/rejected": -2.36775541305542, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 0.00029957264957264953, |
|
"logits/chosen": 1.3884762525558472, |
|
"logits/rejected": 1.2696239948272705, |
|
"logps/chosen": -575.4322509765625, |
|
"logps/rejected": -583.3463745117188, |
|
"loss": 0.0528, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.6055470705032349, |
|
"rewards/margins": 4.734403133392334, |
|
"rewards/rejected": -3.1288557052612305, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 0.00029914529914529915, |
|
"logits/chosen": 1.2306060791015625, |
|
"logits/rejected": 1.201812982559204, |
|
"logps/chosen": -526.1033935546875, |
|
"logps/rejected": -625.164306640625, |
|
"loss": 0.0742, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 0.5637297034263611, |
|
"rewards/margins": 4.7755279541015625, |
|
"rewards/rejected": -4.211798191070557, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 0.0002987179487179487, |
|
"logits/chosen": 1.2625794410705566, |
|
"logits/rejected": 1.1102485656738281, |
|
"logps/chosen": -581.4782104492188, |
|
"logps/rejected": -620.8692626953125, |
|
"loss": 0.015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.47795385122299194, |
|
"rewards/margins": 5.5137505531311035, |
|
"rewards/rejected": -5.035797119140625, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 0.00029829059829059826, |
|
"logits/chosen": 1.193795919418335, |
|
"logits/rejected": 1.13469660282135, |
|
"logps/chosen": -534.1414794921875, |
|
"logps/rejected": -540.5661010742188, |
|
"loss": 0.0537, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0871587023139, |
|
"rewards/margins": 4.834710121154785, |
|
"rewards/rejected": -4.747550964355469, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 0.0002978632478632478, |
|
"logits/chosen": 1.245851755142212, |
|
"logits/rejected": 1.0414592027664185, |
|
"logps/chosen": -603.68212890625, |
|
"logps/rejected": -521.8074340820312, |
|
"loss": 0.046, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3754722476005554, |
|
"rewards/margins": 4.623739719390869, |
|
"rewards/rejected": -4.248267650604248, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 0.00029743589743589743, |
|
"logits/chosen": 1.0934433937072754, |
|
"logits/rejected": 1.0990025997161865, |
|
"logps/chosen": -511.38897705078125, |
|
"logps/rejected": -607.8187255859375, |
|
"loss": 0.0567, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.49227023124694824, |
|
"rewards/margins": 5.553871154785156, |
|
"rewards/rejected": -6.046142101287842, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 0.000297008547008547, |
|
"logits/chosen": 1.1143027544021606, |
|
"logits/rejected": 1.1277693510055542, |
|
"logps/chosen": -501.1644287109375, |
|
"logps/rejected": -627.3696899414062, |
|
"loss": 0.0149, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9094219207763672, |
|
"rewards/margins": 5.872971534729004, |
|
"rewards/rejected": -4.9635491371154785, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 0.00029658119658119655, |
|
"logits/chosen": 1.2900817394256592, |
|
"logits/rejected": 1.258035659790039, |
|
"logps/chosen": -482.3966369628906, |
|
"logps/rejected": -613.1928100585938, |
|
"loss": 0.006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0523629188537598, |
|
"rewards/margins": 6.690260887145996, |
|
"rewards/rejected": -5.6378984451293945, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 0.00029615384615384616, |
|
"logits/chosen": 1.2223701477050781, |
|
"logits/rejected": 1.1854349374771118, |
|
"logps/chosen": -504.41375732421875, |
|
"logps/rejected": -530.4273071289062, |
|
"loss": 0.1105, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.9830568432807922, |
|
"rewards/margins": 5.67386531829834, |
|
"rewards/rejected": -4.6908087730407715, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 0.0002957264957264957, |
|
"logits/chosen": 1.3008583784103394, |
|
"logits/rejected": 1.2168331146240234, |
|
"logps/chosen": -580.7012939453125, |
|
"logps/rejected": -591.1567993164062, |
|
"loss": 0.028, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.010232925415039, |
|
"rewards/margins": 5.921438694000244, |
|
"rewards/rejected": -4.911205768585205, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 0.0002952991452991453, |
|
"logits/chosen": 1.4120073318481445, |
|
"logits/rejected": 1.3543351888656616, |
|
"logps/chosen": -551.34130859375, |
|
"logps/rejected": -607.3863525390625, |
|
"loss": 0.0082, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.7222862243652344, |
|
"rewards/margins": 6.246278762817383, |
|
"rewards/rejected": -4.523993492126465, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 0.00029487179487179484, |
|
"logits/chosen": 1.3433583974838257, |
|
"logits/rejected": 1.2706623077392578, |
|
"logps/chosen": -550.4794921875, |
|
"logps/rejected": -574.7581787109375, |
|
"loss": 0.0251, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.3056535720825195, |
|
"rewards/margins": 6.369759559631348, |
|
"rewards/rejected": -4.064105033874512, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 0.00029444444444444445, |
|
"logits/chosen": 1.4810067415237427, |
|
"logits/rejected": 1.3233308792114258, |
|
"logps/chosen": -586.744384765625, |
|
"logps/rejected": -591.3648071289062, |
|
"loss": 0.0176, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.4182064533233643, |
|
"rewards/margins": 6.562655448913574, |
|
"rewards/rejected": -4.144449710845947, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 0.000294017094017094, |
|
"logits/chosen": 1.4654786586761475, |
|
"logits/rejected": 1.3706129789352417, |
|
"logps/chosen": -496.12701416015625, |
|
"logps/rejected": -567.374755859375, |
|
"loss": 0.0221, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.9136104583740234, |
|
"rewards/margins": 5.71716833114624, |
|
"rewards/rejected": -3.8035576343536377, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 0.00029358974358974357, |
|
"logits/chosen": 1.4996682405471802, |
|
"logits/rejected": 1.4585434198379517, |
|
"logps/chosen": -568.8134155273438, |
|
"logps/rejected": -659.0804443359375, |
|
"loss": 0.0161, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.9488861560821533, |
|
"rewards/margins": 6.1611528396606445, |
|
"rewards/rejected": -4.2122673988342285, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 0.00029316239316239313, |
|
"logits/chosen": 1.433556318283081, |
|
"logits/rejected": 1.4805834293365479, |
|
"logps/chosen": -551.3663940429688, |
|
"logps/rejected": -583.002685546875, |
|
"loss": 0.039, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.608030080795288, |
|
"rewards/margins": 5.825406551361084, |
|
"rewards/rejected": -3.217377185821533, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 0.0002927350427350427, |
|
"logits/chosen": 1.5423190593719482, |
|
"logits/rejected": 1.3645411729812622, |
|
"logps/chosen": -541.7529296875, |
|
"logps/rejected": -540.1168212890625, |
|
"loss": 0.0073, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.6581199169158936, |
|
"rewards/margins": 6.796147346496582, |
|
"rewards/rejected": -4.138028144836426, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 0.0002923076923076923, |
|
"logits/chosen": 1.3319764137268066, |
|
"logits/rejected": 1.3734033107757568, |
|
"logps/chosen": -505.6252136230469, |
|
"logps/rejected": -595.2819213867188, |
|
"loss": 0.0207, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1945226192474365, |
|
"rewards/margins": 5.589131832122803, |
|
"rewards/rejected": -4.3946099281311035, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 0.00029188034188034186, |
|
"logits/chosen": 1.326178789138794, |
|
"logits/rejected": 1.4406118392944336, |
|
"logps/chosen": -514.8160400390625, |
|
"logps/rejected": -575.283447265625, |
|
"loss": 0.0135, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.6229735612869263, |
|
"rewards/margins": 6.596070766448975, |
|
"rewards/rejected": -4.973097324371338, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 0.0002914529914529914, |
|
"logits/chosen": 1.408813714981079, |
|
"logits/rejected": 1.3960859775543213, |
|
"logps/chosen": -542.9385375976562, |
|
"logps/rejected": -647.4051513671875, |
|
"loss": 0.0683, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 0.39411336183547974, |
|
"rewards/margins": 6.164831161499023, |
|
"rewards/rejected": -5.770717620849609, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 0.000291025641025641, |
|
"logits/chosen": 1.2414624691009521, |
|
"logits/rejected": 1.2821427583694458, |
|
"logps/chosen": -500.63568115234375, |
|
"logps/rejected": -617.734375, |
|
"loss": 0.1346, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 1.0469615459442139, |
|
"rewards/margins": 6.8481831550598145, |
|
"rewards/rejected": -5.8012213706970215, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 0.0002905982905982906, |
|
"logits/chosen": 1.3173812627792358, |
|
"logits/rejected": 1.1461554765701294, |
|
"logps/chosen": -596.7745971679688, |
|
"logps/rejected": -591.0558471679688, |
|
"loss": 0.0567, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 0.7747964262962341, |
|
"rewards/margins": 5.85588264465332, |
|
"rewards/rejected": -5.081086158752441, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 0.00029017094017094015, |
|
"logits/chosen": 1.1509640216827393, |
|
"logits/rejected": 1.1098980903625488, |
|
"logps/chosen": -474.10675048828125, |
|
"logps/rejected": -535.8497924804688, |
|
"loss": 0.031, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1605219841003418, |
|
"rewards/margins": 6.742035865783691, |
|
"rewards/rejected": -5.58151388168335, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 0.0002897435897435897, |
|
"logits/chosen": 1.1271547079086304, |
|
"logits/rejected": 0.9869892001152039, |
|
"logps/chosen": -542.78662109375, |
|
"logps/rejected": -566.442626953125, |
|
"loss": 0.2008, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 0.9951988458633423, |
|
"rewards/margins": 6.587618827819824, |
|
"rewards/rejected": -5.5924201011657715, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 0.00028931623931623926, |
|
"logits/chosen": 1.2000806331634521, |
|
"logits/rejected": 1.0094119310379028, |
|
"logps/chosen": -550.8510131835938, |
|
"logps/rejected": -531.4238891601562, |
|
"loss": 0.0239, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.3359670639038086, |
|
"rewards/margins": 6.495780944824219, |
|
"rewards/rejected": -5.159814834594727, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 0.0002888888888888888, |
|
"logits/chosen": 1.102707862854004, |
|
"logits/rejected": 1.2245404720306396, |
|
"logps/chosen": -528.5265502929688, |
|
"logps/rejected": -665.08544921875, |
|
"loss": 0.0174, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7339667081832886, |
|
"rewards/margins": 7.117589473724365, |
|
"rewards/rejected": -6.383623123168945, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 0.00028846153846153843, |
|
"logits/chosen": 1.1308355331420898, |
|
"logits/rejected": 0.9417912364006042, |
|
"logps/chosen": -553.8656005859375, |
|
"logps/rejected": -555.5968627929688, |
|
"loss": 0.0185, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.9540010690689087, |
|
"rewards/margins": 6.364682197570801, |
|
"rewards/rejected": -4.410680770874023, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 0.000288034188034188, |
|
"logits/chosen": 1.2123243808746338, |
|
"logits/rejected": 0.9968570470809937, |
|
"logps/chosen": -567.16748046875, |
|
"logps/rejected": -521.2568359375, |
|
"loss": 0.0109, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.0697741508483887, |
|
"rewards/margins": 6.565426349639893, |
|
"rewards/rejected": -4.495651721954346, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 0.00028760683760683755, |
|
"logits/chosen": 1.1736996173858643, |
|
"logits/rejected": 1.147781252861023, |
|
"logps/chosen": -542.6943359375, |
|
"logps/rejected": -622.96826171875, |
|
"loss": 0.0085, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.328216075897217, |
|
"rewards/margins": 7.282103538513184, |
|
"rewards/rejected": -4.953887939453125, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 0.00028717948717948716, |
|
"logits/chosen": 1.060608983039856, |
|
"logits/rejected": 1.2086546421051025, |
|
"logps/chosen": -499.45989990234375, |
|
"logps/rejected": -617.087646484375, |
|
"loss": 0.0268, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.5071005821228027, |
|
"rewards/margins": 5.79173469543457, |
|
"rewards/rejected": -3.2846338748931885, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 0.0002867521367521367, |
|
"logits/chosen": 1.180185079574585, |
|
"logits/rejected": 0.9707103967666626, |
|
"logps/chosen": -530.9793701171875, |
|
"logps/rejected": -531.7672119140625, |
|
"loss": 0.0172, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.693030595779419, |
|
"rewards/margins": 5.776254653930664, |
|
"rewards/rejected": -3.083223819732666, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 0.0002863247863247863, |
|
"logits/chosen": 1.2219749689102173, |
|
"logits/rejected": 1.1085822582244873, |
|
"logps/chosen": -579.6038818359375, |
|
"logps/rejected": -576.358642578125, |
|
"loss": 0.0091, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.139183521270752, |
|
"rewards/margins": 5.98101806640625, |
|
"rewards/rejected": -2.84183406829834, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 0.00028589743589743584, |
|
"logits/chosen": 1.190647006034851, |
|
"logits/rejected": 1.1908663511276245, |
|
"logps/chosen": -514.8892211914062, |
|
"logps/rejected": -611.48046875, |
|
"loss": 0.0565, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 2.977041006088257, |
|
"rewards/margins": 6.328646659851074, |
|
"rewards/rejected": -3.3516054153442383, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 0.00028547008547008545, |
|
"logits/chosen": 1.1982405185699463, |
|
"logits/rejected": 1.1012368202209473, |
|
"logps/chosen": -496.9002685546875, |
|
"logps/rejected": -544.1046142578125, |
|
"loss": 0.0649, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 3.5147013664245605, |
|
"rewards/margins": 6.30265998840332, |
|
"rewards/rejected": -2.787958860397339, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 0.000285042735042735, |
|
"logits/chosen": 1.1536628007888794, |
|
"logits/rejected": 1.0870661735534668, |
|
"logps/chosen": -475.55023193359375, |
|
"logps/rejected": -584.7400512695312, |
|
"loss": 0.0315, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.8786869049072266, |
|
"rewards/margins": 6.310683250427246, |
|
"rewards/rejected": -3.4319963455200195, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 0.00028461538461538457, |
|
"logits/chosen": 1.129310965538025, |
|
"logits/rejected": 1.0381569862365723, |
|
"logps/chosen": -506.8164978027344, |
|
"logps/rejected": -490.2034606933594, |
|
"loss": 0.0435, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.066460609436035, |
|
"rewards/margins": 5.390075206756592, |
|
"rewards/rejected": -2.3236145973205566, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 0.0002841880341880342, |
|
"logits/chosen": 1.2719249725341797, |
|
"logits/rejected": 1.0853713750839233, |
|
"logps/chosen": -580.2399291992188, |
|
"logps/rejected": -545.1849975585938, |
|
"loss": 0.0081, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.0230274200439453, |
|
"rewards/margins": 7.315037727355957, |
|
"rewards/rejected": -4.292009353637695, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 0.00028376068376068374, |
|
"logits/chosen": 1.2354512214660645, |
|
"logits/rejected": 1.1801047325134277, |
|
"logps/chosen": -482.8843994140625, |
|
"logps/rejected": -646.3005981445312, |
|
"loss": 0.0077, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2153114080429077, |
|
"rewards/margins": 6.708934307098389, |
|
"rewards/rejected": -5.493622779846191, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.0002833333333333333, |
|
"logits/chosen": 0.9910479784011841, |
|
"logits/rejected": 1.020785927772522, |
|
"logps/chosen": -481.9186706542969, |
|
"logps/rejected": -603.8145141601562, |
|
"loss": 0.0455, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 0.37189701199531555, |
|
"rewards/margins": 6.756865978240967, |
|
"rewards/rejected": -6.384969711303711, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 0.0002829059829059829, |
|
"logits/chosen": 1.066985845565796, |
|
"logits/rejected": 1.0346630811691284, |
|
"logps/chosen": -550.504638671875, |
|
"logps/rejected": -601.9063720703125, |
|
"loss": 0.0125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5437355041503906, |
|
"rewards/margins": 7.08186674118042, |
|
"rewards/rejected": -6.538131237030029, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 0.00028247863247863247, |
|
"logits/chosen": 1.1769685745239258, |
|
"logits/rejected": 1.0711925029754639, |
|
"logps/chosen": -572.137451171875, |
|
"logps/rejected": -591.6756591796875, |
|
"loss": 0.0185, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4036594033241272, |
|
"rewards/margins": 7.711120128631592, |
|
"rewards/rejected": -7.307460784912109, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 0.00028205128205128203, |
|
"logits/chosen": 1.1407585144042969, |
|
"logits/rejected": 1.0219597816467285, |
|
"logps/chosen": -486.6500244140625, |
|
"logps/rejected": -650.4639282226562, |
|
"loss": 0.0075, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8903399705886841, |
|
"rewards/margins": 8.448041915893555, |
|
"rewards/rejected": -7.5577006340026855, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 0.0002816239316239316, |
|
"logits/chosen": 1.146453619003296, |
|
"logits/rejected": 1.0628845691680908, |
|
"logps/chosen": -531.998046875, |
|
"logps/rejected": -557.2601928710938, |
|
"loss": 0.0032, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.3119817972183228, |
|
"rewards/margins": 7.6520514488220215, |
|
"rewards/rejected": -6.340068817138672, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 0.0002811965811965812, |
|
"logits/chosen": 1.0393480062484741, |
|
"logits/rejected": 1.0280386209487915, |
|
"logps/chosen": -470.7844543457031, |
|
"logps/rejected": -552.33935546875, |
|
"loss": 0.011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.4702866077423096, |
|
"rewards/margins": 7.459576606750488, |
|
"rewards/rejected": -5.9892897605896, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 0.00028076923076923076, |
|
"logits/chosen": 1.1644623279571533, |
|
"logits/rejected": 1.0770840644836426, |
|
"logps/chosen": -546.8715209960938, |
|
"logps/rejected": -534.8888549804688, |
|
"loss": 0.0046, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.770728349685669, |
|
"rewards/margins": 7.339654922485352, |
|
"rewards/rejected": -5.5689263343811035, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 0.0002803418803418803, |
|
"logits/chosen": 1.0946919918060303, |
|
"logits/rejected": 1.1111879348754883, |
|
"logps/chosen": -448.84698486328125, |
|
"logps/rejected": -623.4598388671875, |
|
"loss": 0.0036, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.276505470275879, |
|
"rewards/margins": 7.733612060546875, |
|
"rewards/rejected": -5.457107067108154, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 0.00027991452991452993, |
|
"logits/chosen": 1.083165168762207, |
|
"logits/rejected": 1.1361708641052246, |
|
"logps/chosen": -491.6983947753906, |
|
"logps/rejected": -597.7027587890625, |
|
"loss": 0.0045, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.072481632232666, |
|
"rewards/margins": 8.625251770019531, |
|
"rewards/rejected": -6.552770137786865, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 0.0002794871794871795, |
|
"logits/chosen": 1.2724071741104126, |
|
"logits/rejected": 1.118057370185852, |
|
"logps/chosen": -525.2489013671875, |
|
"logps/rejected": -523.6296997070312, |
|
"loss": 0.0102, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.201770544052124, |
|
"rewards/margins": 6.9069504737854, |
|
"rewards/rejected": -4.7051801681518555, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 0.00027905982905982905, |
|
"logits/chosen": 1.2534475326538086, |
|
"logits/rejected": 1.2043638229370117, |
|
"logps/chosen": -574.5486450195312, |
|
"logps/rejected": -598.8119506835938, |
|
"loss": 0.0061, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.390216112136841, |
|
"rewards/margins": 8.325881004333496, |
|
"rewards/rejected": -5.935665130615234, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 0.0002786324786324786, |
|
"logits/chosen": 1.2600340843200684, |
|
"logits/rejected": 1.2670692205429077, |
|
"logps/chosen": -539.1239624023438, |
|
"logps/rejected": -605.8324584960938, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.585294485092163, |
|
"rewards/margins": 8.082437515258789, |
|
"rewards/rejected": -5.497143268585205, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 0.00027820512820512816, |
|
"logits/chosen": 1.2851054668426514, |
|
"logits/rejected": 1.1849486827850342, |
|
"logps/chosen": -571.027587890625, |
|
"logps/rejected": -601.7526245117188, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.7168142795562744, |
|
"rewards/margins": 7.749199867248535, |
|
"rewards/rejected": -5.03238582611084, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 0.0002777777777777778, |
|
"logits/chosen": 1.3977611064910889, |
|
"logits/rejected": 1.210925817489624, |
|
"logps/chosen": -533.6973266601562, |
|
"logps/rejected": -598.4097900390625, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.4912829399108887, |
|
"rewards/margins": 8.09334945678711, |
|
"rewards/rejected": -5.602066516876221, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 0.00027735042735042734, |
|
"logits/chosen": 1.3112545013427734, |
|
"logits/rejected": 1.2536931037902832, |
|
"logps/chosen": -516.1342163085938, |
|
"logps/rejected": -645.7244873046875, |
|
"loss": 0.0035, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.322323799133301, |
|
"rewards/margins": 8.349126815795898, |
|
"rewards/rejected": -6.026803493499756, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 0.0002769230769230769, |
|
"logits/chosen": 1.3068538904190063, |
|
"logits/rejected": 1.3506109714508057, |
|
"logps/chosen": -503.5260925292969, |
|
"logps/rejected": -658.4425659179688, |
|
"loss": 0.0067, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.650004506111145, |
|
"rewards/margins": 7.3509368896484375, |
|
"rewards/rejected": -5.700932025909424, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 0.00027649572649572645, |
|
"logits/chosen": 1.336862325668335, |
|
"logits/rejected": 1.2281874418258667, |
|
"logps/chosen": -541.9310302734375, |
|
"logps/rejected": -600.86474609375, |
|
"loss": 0.0064, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.8469295501708984, |
|
"rewards/margins": 7.918344974517822, |
|
"rewards/rejected": -6.071415901184082, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 0.00027606837606837607, |
|
"logits/chosen": 1.3074719905853271, |
|
"logits/rejected": 1.3130128383636475, |
|
"logps/chosen": -484.4212646484375, |
|
"logps/rejected": -639.7544555664062, |
|
"loss": 0.0861, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 2.4394211769104004, |
|
"rewards/margins": 7.3523383140563965, |
|
"rewards/rejected": -4.912917613983154, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 0.0002756410256410256, |
|
"logits/chosen": 1.3250826597213745, |
|
"logits/rejected": 1.2685434818267822, |
|
"logps/chosen": -496.5311279296875, |
|
"logps/rejected": -562.654541015625, |
|
"loss": 0.09, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 2.504718065261841, |
|
"rewards/margins": 7.967668056488037, |
|
"rewards/rejected": -5.462949752807617, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 0.0002752136752136752, |
|
"logits/chosen": 1.4271235466003418, |
|
"logits/rejected": 1.2630449533462524, |
|
"logps/chosen": -561.2323608398438, |
|
"logps/rejected": -642.5577392578125, |
|
"loss": 0.0043, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.4116668701171875, |
|
"rewards/margins": 8.215126037597656, |
|
"rewards/rejected": -5.803459167480469, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 0.00027478632478632474, |
|
"logits/chosen": 1.3756340742111206, |
|
"logits/rejected": 1.4128466844558716, |
|
"logps/chosen": -559.175048828125, |
|
"logps/rejected": -650.5951538085938, |
|
"loss": 0.0045, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.0436666011810303, |
|
"rewards/margins": 7.851646900177002, |
|
"rewards/rejected": -4.807980537414551, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 0.0002743589743589743, |
|
"logits/chosen": 1.3762015104293823, |
|
"logits/rejected": 1.2889195680618286, |
|
"logps/chosen": -538.1447143554688, |
|
"logps/rejected": -631.197998046875, |
|
"loss": 0.0058, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.8010246753692627, |
|
"rewards/margins": 7.74313497543335, |
|
"rewards/rejected": -4.942111015319824, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 0.0002739316239316239, |
|
"logits/chosen": 1.4248151779174805, |
|
"logits/rejected": 1.335301399230957, |
|
"logps/chosen": -473.7148132324219, |
|
"logps/rejected": -559.2073974609375, |
|
"loss": 0.003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.2720112800598145, |
|
"rewards/margins": 7.730058670043945, |
|
"rewards/rejected": -4.458047389984131, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 0.00027350427350427347, |
|
"logits/chosen": 1.3754342794418335, |
|
"logits/rejected": 1.3729346990585327, |
|
"logps/chosen": -519.9334106445312, |
|
"logps/rejected": -599.0367431640625, |
|
"loss": 0.0136, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.4481189250946045, |
|
"rewards/margins": 8.235373497009277, |
|
"rewards/rejected": -4.787254810333252, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 0.00027307692307692303, |
|
"logits/chosen": 1.3112382888793945, |
|
"logits/rejected": 1.2730636596679688, |
|
"logps/chosen": -535.9412231445312, |
|
"logps/rejected": -491.223388671875, |
|
"loss": 0.0156, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.4259979724884033, |
|
"rewards/margins": 7.016923904418945, |
|
"rewards/rejected": -4.590925693511963, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 0.0002726495726495726, |
|
"logits/chosen": 1.4348691701889038, |
|
"logits/rejected": 1.2340961694717407, |
|
"logps/chosen": -533.677978515625, |
|
"logps/rejected": -525.3228149414062, |
|
"loss": 0.0146, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.7481579780578613, |
|
"rewards/margins": 7.346179008483887, |
|
"rewards/rejected": -4.598021030426025, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 0.0002722222222222222, |
|
"logits/chosen": 1.4624712467193604, |
|
"logits/rejected": 1.4238498210906982, |
|
"logps/chosen": -521.18310546875, |
|
"logps/rejected": -628.2357788085938, |
|
"loss": 0.0103, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.531442880630493, |
|
"rewards/margins": 7.304967880249023, |
|
"rewards/rejected": -4.773524761199951, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 0.00027179487179487176, |
|
"logits/chosen": 1.3994379043579102, |
|
"logits/rejected": 1.3517390489578247, |
|
"logps/chosen": -487.254150390625, |
|
"logps/rejected": -562.578369140625, |
|
"loss": 0.0026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.351099729537964, |
|
"rewards/margins": 8.097498893737793, |
|
"rewards/rejected": -4.746399402618408, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 0.0002713675213675213, |
|
"logits/chosen": 1.4121302366256714, |
|
"logits/rejected": 1.4421744346618652, |
|
"logps/chosen": -520.80322265625, |
|
"logps/rejected": -645.4122314453125, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.2977559566497803, |
|
"rewards/margins": 7.4320292472839355, |
|
"rewards/rejected": -5.134273529052734, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 0.00027094017094017093, |
|
"logits/chosen": 1.4837563037872314, |
|
"logits/rejected": 1.379111886024475, |
|
"logps/chosen": -601.663818359375, |
|
"logps/rejected": -564.5067138671875, |
|
"loss": 0.1342, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 1.5174392461776733, |
|
"rewards/margins": 6.973749160766602, |
|
"rewards/rejected": -5.456309795379639, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 0.0002705128205128205, |
|
"logits/chosen": 1.3307445049285889, |
|
"logits/rejected": 1.201188564300537, |
|
"logps/chosen": -529.9027099609375, |
|
"logps/rejected": -573.0830078125, |
|
"loss": 0.0042, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.5938829183578491, |
|
"rewards/margins": 8.63363265991211, |
|
"rewards/rejected": -7.039750099182129, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 0.00027008547008547005, |
|
"logits/chosen": 1.2792227268218994, |
|
"logits/rejected": 1.2923702001571655, |
|
"logps/chosen": -547.6593017578125, |
|
"logps/rejected": -678.7649536132812, |
|
"loss": 0.0035, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6561351418495178, |
|
"rewards/margins": 6.722534656524658, |
|
"rewards/rejected": -6.066399574279785, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 0.0002696581196581196, |
|
"logits/chosen": 1.4008458852767944, |
|
"logits/rejected": 1.1927311420440674, |
|
"logps/chosen": -628.3024291992188, |
|
"logps/rejected": -578.4089965820312, |
|
"loss": 0.0052, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8446162939071655, |
|
"rewards/margins": 8.044957160949707, |
|
"rewards/rejected": -7.200340747833252, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 0.0002692307692307692, |
|
"logits/chosen": 1.1845999956130981, |
|
"logits/rejected": 1.0530712604522705, |
|
"logps/chosen": -499.4281005859375, |
|
"logps/rejected": -573.0311889648438, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9514065980911255, |
|
"rewards/margins": 8.918203353881836, |
|
"rewards/rejected": -7.966796875, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 0.0002688034188034188, |
|
"logits/chosen": 1.1802606582641602, |
|
"logits/rejected": 1.1234092712402344, |
|
"logps/chosen": -572.6054077148438, |
|
"logps/rejected": -626.6286010742188, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8314933180809021, |
|
"rewards/margins": 9.328904151916504, |
|
"rewards/rejected": -8.497410774230957, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 0.00026837606837606834, |
|
"logits/chosen": 1.1486611366271973, |
|
"logits/rejected": 1.0562578439712524, |
|
"logps/chosen": -509.203125, |
|
"logps/rejected": -584.0765991210938, |
|
"loss": 0.003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5767253637313843, |
|
"rewards/margins": 8.173746109008789, |
|
"rewards/rejected": -7.597021102905273, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 0.00026794871794871795, |
|
"logits/chosen": 1.2486861944198608, |
|
"logits/rejected": 1.123504400253296, |
|
"logps/chosen": -586.3121337890625, |
|
"logps/rejected": -666.2870483398438, |
|
"loss": 0.0048, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.11837495118379593, |
|
"rewards/margins": 8.070638656616211, |
|
"rewards/rejected": -7.952263355255127, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 0.0002675213675213675, |
|
"logits/chosen": 1.0615603923797607, |
|
"logits/rejected": 1.0381104946136475, |
|
"logps/chosen": -477.157470703125, |
|
"logps/rejected": -577.363525390625, |
|
"loss": 0.0042, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2932237982749939, |
|
"rewards/margins": 8.428733825683594, |
|
"rewards/rejected": -8.13551139831543, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 0.00026709401709401707, |
|
"logits/chosen": 1.0924714803695679, |
|
"logits/rejected": 1.0560393333435059, |
|
"logps/chosen": -589.655517578125, |
|
"logps/rejected": -621.06201171875, |
|
"loss": 0.0741, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.8137645125389099, |
|
"rewards/margins": 8.433317184448242, |
|
"rewards/rejected": -9.247081756591797, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 0.0002666666666666666, |
|
"logits/chosen": 1.1305707693099976, |
|
"logits/rejected": 1.071329951286316, |
|
"logps/chosen": -582.5031127929688, |
|
"logps/rejected": -615.1607666015625, |
|
"loss": 0.0036, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6762839555740356, |
|
"rewards/margins": 8.447378158569336, |
|
"rewards/rejected": -7.77109432220459, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 0.00026623931623931624, |
|
"logits/chosen": 1.101088285446167, |
|
"logits/rejected": 1.0743204355239868, |
|
"logps/chosen": -503.9051208496094, |
|
"logps/rejected": -705.6373291015625, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.4264540672302246, |
|
"rewards/margins": 9.59773063659668, |
|
"rewards/rejected": -8.171276092529297, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 0.0002658119658119658, |
|
"logits/chosen": 1.021052598953247, |
|
"logits/rejected": 1.0671635866165161, |
|
"logps/chosen": -466.4878845214844, |
|
"logps/rejected": -629.9095458984375, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2989165782928467, |
|
"rewards/margins": 9.68484878540039, |
|
"rewards/rejected": -8.385932922363281, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 0.00026538461538461536, |
|
"logits/chosen": 1.1435658931732178, |
|
"logits/rejected": 1.1837159395217896, |
|
"logps/chosen": -534.5050048828125, |
|
"logps/rejected": -611.9518432617188, |
|
"loss": 0.0038, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.3590706586837769, |
|
"rewards/margins": 8.476469039916992, |
|
"rewards/rejected": -7.117398262023926, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 0.00026495726495726497, |
|
"logits/chosen": 1.146314263343811, |
|
"logits/rejected": 1.1262887716293335, |
|
"logps/chosen": -476.71453857421875, |
|
"logps/rejected": -590.7471923828125, |
|
"loss": 0.0145, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.9563323259353638, |
|
"rewards/margins": 9.145315170288086, |
|
"rewards/rejected": -7.188984394073486, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 0.0002645299145299145, |
|
"logits/chosen": 1.16382896900177, |
|
"logits/rejected": 1.1415654420852661, |
|
"logps/chosen": -532.4939575195312, |
|
"logps/rejected": -640.6329345703125, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.2008843421936035, |
|
"rewards/margins": 8.335280418395996, |
|
"rewards/rejected": -6.134396076202393, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 0.0002641025641025641, |
|
"logits/chosen": 1.0377854108810425, |
|
"logits/rejected": 1.0735015869140625, |
|
"logps/chosen": -482.3515625, |
|
"logps/rejected": -620.0985717773438, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.8429818153381348, |
|
"rewards/margins": 8.701183319091797, |
|
"rewards/rejected": -6.8582000732421875, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 0.00026367521367521364, |
|
"logits/chosen": 1.2598010301589966, |
|
"logits/rejected": 1.2114201784133911, |
|
"logps/chosen": -497.3212890625, |
|
"logps/rejected": -609.7880249023438, |
|
"loss": 0.0028, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.7050861120224, |
|
"rewards/margins": 8.18321418762207, |
|
"rewards/rejected": -6.478128433227539, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 0.00026324786324786326, |
|
"logits/chosen": 1.2148414850234985, |
|
"logits/rejected": 1.0941295623779297, |
|
"logps/chosen": -575.4998779296875, |
|
"logps/rejected": -580.6967163085938, |
|
"loss": 0.0035, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.006260395050049, |
|
"rewards/margins": 8.852553367614746, |
|
"rewards/rejected": -6.846292972564697, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 0.0002628205128205128, |
|
"logits/chosen": 1.2730900049209595, |
|
"logits/rejected": 1.1539727449417114, |
|
"logps/chosen": -587.6873779296875, |
|
"logps/rejected": -640.8407592773438, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.5435640811920166, |
|
"rewards/margins": 8.264644622802734, |
|
"rewards/rejected": -5.721080303192139, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 0.0002623931623931624, |
|
"logits/chosen": 1.1310749053955078, |
|
"logits/rejected": 1.074840784072876, |
|
"logps/chosen": -539.7301025390625, |
|
"logps/rejected": -588.243408203125, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.725675344467163, |
|
"rewards/margins": 8.484735488891602, |
|
"rewards/rejected": -6.759060382843018, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 0.00026196581196581193, |
|
"logits/chosen": 1.1921124458312988, |
|
"logits/rejected": 1.1464745998382568, |
|
"logps/chosen": -531.4241333007812, |
|
"logps/rejected": -650.033447265625, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.208735704421997, |
|
"rewards/margins": 8.580331802368164, |
|
"rewards/rejected": -6.371595859527588, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 0.00026153846153846154, |
|
"logits/chosen": 1.2101249694824219, |
|
"logits/rejected": 1.18596613407135, |
|
"logps/chosen": -563.632568359375, |
|
"logps/rejected": -715.3417358398438, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.0189900398254395, |
|
"rewards/margins": 9.561189651489258, |
|
"rewards/rejected": -7.542199611663818, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 0.0002611111111111111, |
|
"logits/chosen": 1.2898643016815186, |
|
"logits/rejected": 1.1050164699554443, |
|
"logps/chosen": -568.4158325195312, |
|
"logps/rejected": -607.685302734375, |
|
"loss": 0.0064, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.8665090799331665, |
|
"rewards/margins": 8.093311309814453, |
|
"rewards/rejected": -6.226801872253418, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 0.00026068376068376066, |
|
"logits/chosen": 1.1432087421417236, |
|
"logits/rejected": 1.1123594045639038, |
|
"logps/chosen": -532.8350219726562, |
|
"logps/rejected": -636.5218505859375, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.5014150142669678, |
|
"rewards/margins": 9.985570907592773, |
|
"rewards/rejected": -7.48415470123291, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 0.0002602564102564102, |
|
"logits/chosen": 1.0670151710510254, |
|
"logits/rejected": 0.9835186004638672, |
|
"logps/chosen": -525.0418090820312, |
|
"logps/rejected": -603.8453369140625, |
|
"loss": 0.0303, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 1.609043836593628, |
|
"rewards/margins": 8.916692733764648, |
|
"rewards/rejected": -7.307648658752441, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 0.0002598290598290598, |
|
"logits/chosen": 1.1664202213287354, |
|
"logits/rejected": 1.0911628007888794, |
|
"logps/chosen": -546.672607421875, |
|
"logps/rejected": -600.7014770507812, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.016690254211426, |
|
"rewards/margins": 8.022310256958008, |
|
"rewards/rejected": -6.005620002746582, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 0.0002594017094017094, |
|
"logits/chosen": 1.089374303817749, |
|
"logits/rejected": 1.1528961658477783, |
|
"logps/chosen": -515.8948364257812, |
|
"logps/rejected": -608.8614501953125, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.6085548400878906, |
|
"rewards/margins": 9.069319725036621, |
|
"rewards/rejected": -6.4607648849487305, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 0.00025897435897435895, |
|
"logits/chosen": 1.2652084827423096, |
|
"logits/rejected": 1.0107694864273071, |
|
"logps/chosen": -570.3291015625, |
|
"logps/rejected": -542.94384765625, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.6129395961761475, |
|
"rewards/margins": 8.992415428161621, |
|
"rewards/rejected": -5.3794755935668945, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 0.0002585470085470085, |
|
"logits/chosen": 1.2760734558105469, |
|
"logits/rejected": 1.1731884479522705, |
|
"logps/chosen": -581.9400634765625, |
|
"logps/rejected": -597.0192260742188, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.4896321296691895, |
|
"rewards/margins": 8.106904983520508, |
|
"rewards/rejected": -5.617273330688477, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 0.00025811965811965807, |
|
"logits/chosen": 1.262199878692627, |
|
"logits/rejected": 1.0435302257537842, |
|
"logps/chosen": -577.8585205078125, |
|
"logps/rejected": -599.6145629882812, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.0292744636535645, |
|
"rewards/margins": 9.197108268737793, |
|
"rewards/rejected": -5.1678338050842285, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 0.0002576923076923077, |
|
"logits/chosen": 1.0869362354278564, |
|
"logits/rejected": 1.062146544456482, |
|
"logps/chosen": -486.3373107910156, |
|
"logps/rejected": -604.74609375, |
|
"loss": 0.0031, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.719869613647461, |
|
"rewards/margins": 9.07932186126709, |
|
"rewards/rejected": -6.359452724456787, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 0.00025726495726495724, |
|
"logits/chosen": 1.1699590682983398, |
|
"logits/rejected": 1.0801599025726318, |
|
"logps/chosen": -498.7552490234375, |
|
"logps/rejected": -508.74609375, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.849763870239258, |
|
"rewards/margins": 8.253904342651367, |
|
"rewards/rejected": -5.404139995574951, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 0.0002568376068376068, |
|
"logits/chosen": 1.1467467546463013, |
|
"logits/rejected": 1.1259756088256836, |
|
"logps/chosen": -504.258544921875, |
|
"logps/rejected": -571.6417846679688, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.4761364459991455, |
|
"rewards/margins": 9.168277740478516, |
|
"rewards/rejected": -5.692141056060791, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 0.00025641025641025636, |
|
"logits/chosen": 1.1694316864013672, |
|
"logits/rejected": 1.1096751689910889, |
|
"logps/chosen": -521.1782836914062, |
|
"logps/rejected": -578.571533203125, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.831768035888672, |
|
"rewards/margins": 8.435105323791504, |
|
"rewards/rejected": -5.603337287902832, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 0.00025598290598290597, |
|
"logits/chosen": 1.2126814126968384, |
|
"logits/rejected": 1.0483447313308716, |
|
"logps/chosen": -514.5982055664062, |
|
"logps/rejected": -559.2354125976562, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.6389248371124268, |
|
"rewards/margins": 8.253149032592773, |
|
"rewards/rejected": -5.614223957061768, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 0.00025555555555555553, |
|
"logits/chosen": 1.1626149415969849, |
|
"logits/rejected": 1.023645281791687, |
|
"logps/chosen": -539.7805786132812, |
|
"logps/rejected": -583.5364379882812, |
|
"loss": 0.0018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.5426697731018066, |
|
"rewards/margins": 9.294103622436523, |
|
"rewards/rejected": -5.751433372497559, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 0.0002551282051282051, |
|
"logits/chosen": 1.2193539142608643, |
|
"logits/rejected": 1.0917335748672485, |
|
"logps/chosen": -482.9999694824219, |
|
"logps/rejected": -574.2794189453125, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.089658737182617, |
|
"rewards/margins": 8.874006271362305, |
|
"rewards/rejected": -5.784348011016846, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 0.0002547008547008547, |
|
"logits/chosen": 1.110312819480896, |
|
"logits/rejected": 1.136667013168335, |
|
"logps/chosen": -463.33062744140625, |
|
"logps/rejected": -540.6802978515625, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.5352556705474854, |
|
"rewards/margins": 9.688406944274902, |
|
"rewards/rejected": -7.1531524658203125, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 0.00025427350427350426, |
|
"logits/chosen": 1.2557671070098877, |
|
"logits/rejected": 1.1719632148742676, |
|
"logps/chosen": -529.3836669921875, |
|
"logps/rejected": -604.2679443359375, |
|
"loss": 0.0017, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.274686098098755, |
|
"rewards/margins": 8.664352416992188, |
|
"rewards/rejected": -5.389666557312012, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 0.0002538461538461538, |
|
"logits/chosen": 1.2389734983444214, |
|
"logits/rejected": 1.084290623664856, |
|
"logps/chosen": -544.71142578125, |
|
"logps/rejected": -621.462158203125, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.2781314849853516, |
|
"rewards/margins": 8.4774808883667, |
|
"rewards/rejected": -6.199349403381348, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 0.0002534188034188034, |
|
"logits/chosen": 1.1379337310791016, |
|
"logits/rejected": 1.1488574743270874, |
|
"logps/chosen": -505.5488586425781, |
|
"logps/rejected": -620.3075561523438, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.200209140777588, |
|
"rewards/margins": 7.998414516448975, |
|
"rewards/rejected": -5.798205375671387, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 0.000252991452991453, |
|
"logits/chosen": 1.1832406520843506, |
|
"logits/rejected": 1.1894774436950684, |
|
"logps/chosen": -518.3919677734375, |
|
"logps/rejected": -603.9647216796875, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.2990570068359375, |
|
"rewards/margins": 8.94902515411377, |
|
"rewards/rejected": -5.649968147277832, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 0.00025256410256410255, |
|
"logits/chosen": 1.1613863706588745, |
|
"logits/rejected": 1.0867745876312256, |
|
"logps/chosen": -509.91754150390625, |
|
"logps/rejected": -581.9103393554688, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.3365347385406494, |
|
"rewards/margins": 8.27928352355957, |
|
"rewards/rejected": -5.942748069763184, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 0.0002521367521367521, |
|
"logits/chosen": 1.1696518659591675, |
|
"logits/rejected": 1.0967986583709717, |
|
"logps/chosen": -512.4561767578125, |
|
"logps/rejected": -571.2943725585938, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.8717398643493652, |
|
"rewards/margins": 9.033393859863281, |
|
"rewards/rejected": -6.161653995513916, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 0.0002517094017094017, |
|
"logits/chosen": 1.211112141609192, |
|
"logits/rejected": 0.9915167093276978, |
|
"logps/chosen": -592.27294921875, |
|
"logps/rejected": -545.3093872070312, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.7333691120147705, |
|
"rewards/margins": 8.252148628234863, |
|
"rewards/rejected": -5.518779754638672, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 0.0002512820512820513, |
|
"logits/chosen": 1.1865314245224, |
|
"logits/rejected": 1.083636999130249, |
|
"logps/chosen": -500.9031982421875, |
|
"logps/rejected": -634.9488525390625, |
|
"loss": 0.0616, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 2.7689785957336426, |
|
"rewards/margins": 9.048822402954102, |
|
"rewards/rejected": -6.279844760894775, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 0.00025085470085470083, |
|
"logits/chosen": 1.308659553527832, |
|
"logits/rejected": 1.1389790773391724, |
|
"logps/chosen": -530.4187622070312, |
|
"logps/rejected": -617.7844848632812, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.9889013767242432, |
|
"rewards/margins": 7.278841018676758, |
|
"rewards/rejected": -5.2899394035339355, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 0.0002504273504273504, |
|
"logits/chosen": 1.1351438760757446, |
|
"logits/rejected": 1.0995919704437256, |
|
"logps/chosen": -537.9273071289062, |
|
"logps/rejected": -580.5325927734375, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.307258129119873, |
|
"rewards/margins": 8.442761421203613, |
|
"rewards/rejected": -6.135504245758057, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 0.00025, |
|
"logits/chosen": 1.1220672130584717, |
|
"logits/rejected": 1.0801559686660767, |
|
"logps/chosen": -520.331787109375, |
|
"logps/rejected": -620.8114013671875, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.103241205215454, |
|
"rewards/margins": 8.628799438476562, |
|
"rewards/rejected": -6.5255584716796875, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 0.00024957264957264956, |
|
"logits/chosen": 1.2094953060150146, |
|
"logits/rejected": 1.1247830390930176, |
|
"logps/chosen": -482.1205749511719, |
|
"logps/rejected": -588.2861328125, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.7330946922302246, |
|
"rewards/margins": 8.031623840332031, |
|
"rewards/rejected": -5.298529148101807, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 0.0002491452991452991, |
|
"logits/chosen": 1.1430044174194336, |
|
"logits/rejected": 1.0147483348846436, |
|
"logps/chosen": -540.6754760742188, |
|
"logps/rejected": -578.9623413085938, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.216123104095459, |
|
"rewards/margins": 8.867217063903809, |
|
"rewards/rejected": -5.651093482971191, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 0.00024871794871794874, |
|
"logits/chosen": 1.1910125017166138, |
|
"logits/rejected": 1.0428566932678223, |
|
"logps/chosen": -577.4218139648438, |
|
"logps/rejected": -615.4971313476562, |
|
"loss": 0.0152, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.6112916469573975, |
|
"rewards/margins": 8.347122192382812, |
|
"rewards/rejected": -5.735829830169678, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 0.0002482905982905983, |
|
"logits/chosen": 1.189084768295288, |
|
"logits/rejected": 1.023719310760498, |
|
"logps/chosen": -505.85125732421875, |
|
"logps/rejected": -607.1080322265625, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.267016649246216, |
|
"rewards/margins": 8.4349946975708, |
|
"rewards/rejected": -6.1679768562316895, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 0.00024786324786324785, |
|
"logits/chosen": 1.1190298795700073, |
|
"logits/rejected": 1.0712878704071045, |
|
"logps/chosen": -522.3341064453125, |
|
"logps/rejected": -650.10107421875, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.4759145975112915, |
|
"rewards/margins": 8.425509452819824, |
|
"rewards/rejected": -6.949594020843506, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 0.0002474358974358974, |
|
"logits/chosen": 1.1855789422988892, |
|
"logits/rejected": 1.1534652709960938, |
|
"logps/chosen": -530.4508666992188, |
|
"logps/rejected": -620.44873046875, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.7457334995269775, |
|
"rewards/margins": 8.301987648010254, |
|
"rewards/rejected": -5.5562543869018555, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"learning_rate": 0.000247008547008547, |
|
"logits/chosen": 1.2083404064178467, |
|
"logits/rejected": 1.0335760116577148, |
|
"logps/chosen": -544.8739624023438, |
|
"logps/rejected": -582.562744140625, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.7809946537017822, |
|
"rewards/margins": 8.799093246459961, |
|
"rewards/rejected": -6.018097877502441, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 0.0002465811965811966, |
|
"logits/chosen": 1.1989009380340576, |
|
"logits/rejected": 1.1295719146728516, |
|
"logps/chosen": -480.67724609375, |
|
"logps/rejected": -609.4390258789062, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.5975394248962402, |
|
"rewards/margins": 8.314066886901855, |
|
"rewards/rejected": -5.716527938842773, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 0.00024615384615384614, |
|
"logits/chosen": 1.1273399591445923, |
|
"logits/rejected": 0.9690557718276978, |
|
"logps/chosen": -563.5905151367188, |
|
"logps/rejected": -566.3074951171875, |
|
"loss": 0.002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.03564715385437, |
|
"rewards/margins": 8.314454078674316, |
|
"rewards/rejected": -6.278806686401367, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"learning_rate": 0.0002457264957264957, |
|
"logits/chosen": 1.0755057334899902, |
|
"logits/rejected": 1.1226409673690796, |
|
"logps/chosen": -515.4642333984375, |
|
"logps/rejected": -682.0799560546875, |
|
"loss": 0.0026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.6836296319961548, |
|
"rewards/margins": 8.099983215332031, |
|
"rewards/rejected": -6.416353702545166, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 0.00024529914529914526, |
|
"logits/chosen": 1.1585733890533447, |
|
"logits/rejected": 1.1376292705535889, |
|
"logps/chosen": -489.0839538574219, |
|
"logps/rejected": -560.7153930664062, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.028048276901245, |
|
"rewards/margins": 7.495326995849609, |
|
"rewards/rejected": -5.467278957366943, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 0.00024487179487179487, |
|
"logits/chosen": 1.1711719036102295, |
|
"logits/rejected": 1.2047771215438843, |
|
"logps/chosen": -554.0427856445312, |
|
"logps/rejected": -657.2614135742188, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.6054043769836426, |
|
"rewards/margins": 8.226700782775879, |
|
"rewards/rejected": -5.621296405792236, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"learning_rate": 0.00024444444444444443, |
|
"logits/chosen": 1.1655393838882446, |
|
"logits/rejected": 1.1469833850860596, |
|
"logps/chosen": -568.0317993164062, |
|
"logps/rejected": -558.951171875, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.950896739959717, |
|
"rewards/margins": 8.18274974822998, |
|
"rewards/rejected": -5.231853008270264, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"learning_rate": 0.00024401709401709401, |
|
"logits/chosen": 1.188609004020691, |
|
"logits/rejected": 1.1547000408172607, |
|
"logps/chosen": -582.5511474609375, |
|
"logps/rejected": -659.006591796875, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.448970317840576, |
|
"rewards/margins": 9.596809387207031, |
|
"rewards/rejected": -7.147839069366455, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 0.00024358974358974357, |
|
"logits/chosen": 1.1199637651443481, |
|
"logits/rejected": 1.0847599506378174, |
|
"logps/chosen": -521.1385498046875, |
|
"logps/rejected": -580.2830810546875, |
|
"loss": 0.0018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.533172845840454, |
|
"rewards/margins": 8.593878746032715, |
|
"rewards/rejected": -6.060705184936523, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 0.00024316239316239313, |
|
"logits/chosen": 1.1465306282043457, |
|
"logits/rejected": 1.1031239032745361, |
|
"logps/chosen": -483.8526611328125, |
|
"logps/rejected": -559.978759765625, |
|
"loss": 0.0058, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.983487844467163, |
|
"rewards/margins": 8.161898612976074, |
|
"rewards/rejected": -6.178411483764648, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 0.00024273504273504272, |
|
"logits/chosen": 1.13920259475708, |
|
"logits/rejected": 1.1220027208328247, |
|
"logps/chosen": -512.029052734375, |
|
"logps/rejected": -572.7222900390625, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.37949538230896, |
|
"rewards/margins": 9.245460510253906, |
|
"rewards/rejected": -6.865965843200684, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 0.0002423076923076923, |
|
"logits/chosen": 1.1790162324905396, |
|
"logits/rejected": 1.032013177871704, |
|
"logps/chosen": -550.051025390625, |
|
"logps/rejected": -632.330810546875, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.7981252670288086, |
|
"rewards/margins": 9.512584686279297, |
|
"rewards/rejected": -6.714459419250488, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"learning_rate": 0.00024188034188034186, |
|
"logits/chosen": 1.299055814743042, |
|
"logits/rejected": 1.2317571640014648, |
|
"logps/chosen": -517.1921997070312, |
|
"logps/rejected": -619.0784301757812, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.4681472778320312, |
|
"rewards/margins": 8.650612831115723, |
|
"rewards/rejected": -7.182465076446533, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 0.00024145299145299142, |
|
"logits/chosen": 1.1555142402648926, |
|
"logits/rejected": 1.1552680730819702, |
|
"logps/chosen": -493.82232666015625, |
|
"logps/rejected": -553.5524291992188, |
|
"loss": 0.002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.282465934753418, |
|
"rewards/margins": 7.990402698516846, |
|
"rewards/rejected": -5.707936763763428, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 0.000241025641025641, |
|
"logits/chosen": 1.2630378007888794, |
|
"logits/rejected": 1.145714282989502, |
|
"logps/chosen": -566.5864868164062, |
|
"logps/rejected": -555.4298095703125, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.5014400482177734, |
|
"rewards/margins": 9.682706832885742, |
|
"rewards/rejected": -7.1812663078308105, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 0.00024059829059829056, |
|
"logits/chosen": 1.275704026222229, |
|
"logits/rejected": 1.1247011423110962, |
|
"logps/chosen": -565.8131713867188, |
|
"logps/rejected": -633.22509765625, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.3491557836532593, |
|
"rewards/margins": 8.835249900817871, |
|
"rewards/rejected": -7.486093997955322, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"learning_rate": 0.00024017094017094015, |
|
"logits/chosen": 1.24131178855896, |
|
"logits/rejected": 1.1392734050750732, |
|
"logps/chosen": -515.2080078125, |
|
"logps/rejected": -557.8899536132812, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.5449414253234863, |
|
"rewards/margins": 8.077924728393555, |
|
"rewards/rejected": -6.532983303070068, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"learning_rate": 0.00023974358974358974, |
|
"logits/chosen": 1.2522388696670532, |
|
"logits/rejected": 1.049080729484558, |
|
"logps/chosen": -607.8526611328125, |
|
"logps/rejected": -644.0594482421875, |
|
"loss": 0.1498, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 2.073587656021118, |
|
"rewards/margins": 9.496702194213867, |
|
"rewards/rejected": -7.42311429977417, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"learning_rate": 0.0002393162393162393, |
|
"logits/chosen": 1.1726680994033813, |
|
"logits/rejected": 1.0583220720291138, |
|
"logps/chosen": -537.9254150390625, |
|
"logps/rejected": -589.2078857421875, |
|
"loss": 0.0028, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.3364086151123047, |
|
"rewards/margins": 9.462315559387207, |
|
"rewards/rejected": -8.125906944274902, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 0.00023888888888888885, |
|
"logits/chosen": 1.2105443477630615, |
|
"logits/rejected": 1.0398310422897339, |
|
"logps/chosen": -553.4381713867188, |
|
"logps/rejected": -617.5463256835938, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.809446096420288, |
|
"rewards/margins": 9.608116149902344, |
|
"rewards/rejected": -7.798670291900635, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"learning_rate": 0.00023846153846153844, |
|
"logits/chosen": 1.0151175260543823, |
|
"logits/rejected": 1.137940764427185, |
|
"logps/chosen": -474.15582275390625, |
|
"logps/rejected": -601.0347900390625, |
|
"loss": 0.0025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9911149740219116, |
|
"rewards/margins": 8.517168045043945, |
|
"rewards/rejected": -7.526054382324219, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 0.00023803418803418802, |
|
"logits/chosen": 1.1788804531097412, |
|
"logits/rejected": 1.0858978033065796, |
|
"logps/chosen": -538.7382202148438, |
|
"logps/rejected": -580.581787109375, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0802903175354004, |
|
"rewards/margins": 8.940625190734863, |
|
"rewards/rejected": -7.860335350036621, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"learning_rate": 0.00023760683760683758, |
|
"logits/chosen": 1.209570288658142, |
|
"logits/rejected": 1.1490302085876465, |
|
"logps/chosen": -497.189697265625, |
|
"logps/rejected": -623.16455078125, |
|
"loss": 0.0028, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.9234257936477661, |
|
"rewards/margins": 9.893648147583008, |
|
"rewards/rejected": -7.970221996307373, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"learning_rate": 0.00023717948717948714, |
|
"logits/chosen": 1.1075451374053955, |
|
"logits/rejected": 1.0870707035064697, |
|
"logps/chosen": -555.2899169921875, |
|
"logps/rejected": -560.2510375976562, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8097199201583862, |
|
"rewards/margins": 8.50051498413086, |
|
"rewards/rejected": -7.690794467926025, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 0.00023675213675213675, |
|
"logits/chosen": 1.1817216873168945, |
|
"logits/rejected": 1.018075942993164, |
|
"logps/chosen": -529.1445922851562, |
|
"logps/rejected": -607.93505859375, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.8409253358840942, |
|
"rewards/margins": 9.230034828186035, |
|
"rewards/rejected": -7.389110088348389, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"learning_rate": 0.0002363247863247863, |
|
"logits/chosen": 1.146735429763794, |
|
"logits/rejected": 1.082593321800232, |
|
"logps/chosen": -557.0680541992188, |
|
"logps/rejected": -634.6128540039062, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9302033185958862, |
|
"rewards/margins": 9.59379768371582, |
|
"rewards/rejected": -8.663594245910645, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 0.00023589743589743587, |
|
"logits/chosen": 1.0844825506210327, |
|
"logits/rejected": 1.050144076347351, |
|
"logps/chosen": -458.246337890625, |
|
"logps/rejected": -643.5533447265625, |
|
"loss": 0.008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.983664333820343, |
|
"rewards/margins": 9.005660057067871, |
|
"rewards/rejected": -8.021997451782227, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 0.00023547008547008543, |
|
"logits/chosen": 1.1802949905395508, |
|
"logits/rejected": 1.2055476903915405, |
|
"logps/chosen": -538.8391723632812, |
|
"logps/rejected": -667.3803100585938, |
|
"loss": 0.0028, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4791993796825409, |
|
"rewards/margins": 8.043685913085938, |
|
"rewards/rejected": -7.564486026763916, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 0.00023504273504273504, |
|
"logits/chosen": 1.217112421989441, |
|
"logits/rejected": 1.0857105255126953, |
|
"logps/chosen": -547.7744140625, |
|
"logps/rejected": -650.827880859375, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2695170640945435, |
|
"rewards/margins": 10.668280601501465, |
|
"rewards/rejected": -9.398763656616211, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"learning_rate": 0.0002346153846153846, |
|
"logits/chosen": 1.106930136680603, |
|
"logits/rejected": 1.1642612218856812, |
|
"logps/chosen": -534.817626953125, |
|
"logps/rejected": -647.9102783203125, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.33475053310394287, |
|
"rewards/margins": 9.073336601257324, |
|
"rewards/rejected": -8.73858642578125, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 0.00023418803418803416, |
|
"logits/chosen": 1.1831903457641602, |
|
"logits/rejected": 1.1884675025939941, |
|
"logps/chosen": -560.828857421875, |
|
"logps/rejected": -663.0516357421875, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.9667414426803589, |
|
"rewards/margins": 10.761907577514648, |
|
"rewards/rejected": -8.795166969299316, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"learning_rate": 0.00023376068376068375, |
|
"logits/chosen": 1.092282772064209, |
|
"logits/rejected": 0.9970771670341492, |
|
"logps/chosen": -529.67822265625, |
|
"logps/rejected": -641.421142578125, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.46319279074668884, |
|
"rewards/margins": 10.089384078979492, |
|
"rewards/rejected": -9.626192092895508, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 0.0002333333333333333, |
|
"logits/chosen": 1.1541541814804077, |
|
"logits/rejected": 0.9784144759178162, |
|
"logps/chosen": -562.6727294921875, |
|
"logps/rejected": -608.5543823242188, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5622472763061523, |
|
"rewards/margins": 9.451787948608398, |
|
"rewards/rejected": -8.889540672302246, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 0.0002329059829059829, |
|
"logits/chosen": 1.0728470087051392, |
|
"logits/rejected": 1.0160434246063232, |
|
"logps/chosen": -605.6416625976562, |
|
"logps/rejected": -620.4939575195312, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3293549120426178, |
|
"rewards/margins": 9.99177360534668, |
|
"rewards/rejected": -9.662418365478516, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 0.00023247863247863245, |
|
"logits/chosen": 1.0738935470581055, |
|
"logits/rejected": 1.0548124313354492, |
|
"logps/chosen": -494.51788330078125, |
|
"logps/rejected": -601.1412353515625, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6489882469177246, |
|
"rewards/margins": 9.996894836425781, |
|
"rewards/rejected": -9.347906112670898, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 0.00023205128205128203, |
|
"logits/chosen": 1.1992632150650024, |
|
"logits/rejected": 1.1126775741577148, |
|
"logps/chosen": -581.875244140625, |
|
"logps/rejected": -654.9214477539062, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1056530475616455, |
|
"rewards/margins": 9.1024169921875, |
|
"rewards/rejected": -10.208070755004883, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 0.0002316239316239316, |
|
"logits/chosen": 1.024402379989624, |
|
"logits/rejected": 1.0234527587890625, |
|
"logps/chosen": -527.988037109375, |
|
"logps/rejected": -600.8884887695312, |
|
"loss": 0.0015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.40097522735595703, |
|
"rewards/margins": 10.08630084991455, |
|
"rewards/rejected": -9.685325622558594, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 0.00023119658119658118, |
|
"logits/chosen": 1.0920895338058472, |
|
"logits/rejected": 0.925986647605896, |
|
"logps/chosen": -526.6175537109375, |
|
"logps/rejected": -593.0648803710938, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.35913902521133423, |
|
"rewards/margins": 10.041351318359375, |
|
"rewards/rejected": -9.682212829589844, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 0.00023076923076923076, |
|
"logits/chosen": 1.12273108959198, |
|
"logits/rejected": 0.9582171440124512, |
|
"logps/chosen": -566.5948486328125, |
|
"logps/rejected": -640.344482421875, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.10319514572620392, |
|
"rewards/margins": 10.264111518859863, |
|
"rewards/rejected": -10.36730670928955, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 6.17, |
|
"learning_rate": 0.00023034188034188032, |
|
"logits/chosen": 1.125780463218689, |
|
"logits/rejected": 0.8733446598052979, |
|
"logps/chosen": -502.71685791015625, |
|
"logps/rejected": -526.7401123046875, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7334610819816589, |
|
"rewards/margins": 8.98703384399414, |
|
"rewards/rejected": -8.253572463989258, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 0.00022991452991452988, |
|
"logits/chosen": 1.007986307144165, |
|
"logits/rejected": 0.9879658818244934, |
|
"logps/chosen": -493.781982421875, |
|
"logps/rejected": -631.9270629882812, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.32805830240249634, |
|
"rewards/margins": 10.0196533203125, |
|
"rewards/rejected": -9.691594123840332, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"learning_rate": 0.00022948717948717944, |
|
"logits/chosen": 1.0395015478134155, |
|
"logits/rejected": 1.004233479499817, |
|
"logps/chosen": -519.82568359375, |
|
"logps/rejected": -645.8541870117188, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.23589622974395752, |
|
"rewards/margins": 10.204262733459473, |
|
"rewards/rejected": -10.440156936645508, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 0.00022905982905982905, |
|
"logits/chosen": 1.030265212059021, |
|
"logits/rejected": 1.0151996612548828, |
|
"logps/chosen": -490.25640869140625, |
|
"logps/rejected": -603.8272705078125, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.07852241396903992, |
|
"rewards/margins": 10.176379203796387, |
|
"rewards/rejected": -10.254899978637695, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 0.0002286324786324786, |
|
"logits/chosen": 1.2312498092651367, |
|
"logits/rejected": 0.9529620409011841, |
|
"logps/chosen": -602.3244018554688, |
|
"logps/rejected": -622.9336547851562, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.13221769034862518, |
|
"rewards/margins": 10.062947273254395, |
|
"rewards/rejected": -9.930729866027832, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 0.00022820512820512817, |
|
"logits/chosen": 1.0476980209350586, |
|
"logits/rejected": 0.9954835176467896, |
|
"logps/chosen": -542.1054077148438, |
|
"logps/rejected": -681.212646484375, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.028733327984809875, |
|
"rewards/margins": 10.83343505859375, |
|
"rewards/rejected": -10.86216926574707, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 0.00022777777777777778, |
|
"logits/chosen": 0.9804601669311523, |
|
"logits/rejected": 0.8998504281044006, |
|
"logps/chosen": -525.8041381835938, |
|
"logps/rejected": -585.2196655273438, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6352589130401611, |
|
"rewards/margins": 9.895197868347168, |
|
"rewards/rejected": -9.25993824005127, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"learning_rate": 0.00022735042735042734, |
|
"logits/chosen": 0.9808767437934875, |
|
"logits/rejected": 1.03694486618042, |
|
"logps/chosen": -460.6586608886719, |
|
"logps/rejected": -696.9314575195312, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.008252725005149841, |
|
"rewards/margins": 10.02670955657959, |
|
"rewards/rejected": -10.03496265411377, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 0.0002269230769230769, |
|
"logits/chosen": 1.0145666599273682, |
|
"logits/rejected": 1.0171821117401123, |
|
"logps/chosen": -550.2952880859375, |
|
"logps/rejected": -609.4617919921875, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.587495744228363, |
|
"rewards/margins": 9.451133728027344, |
|
"rewards/rejected": -10.038629531860352, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 0.00022649572649572646, |
|
"logits/chosen": 1.1384074687957764, |
|
"logits/rejected": 0.994137167930603, |
|
"logps/chosen": -529.2269287109375, |
|
"logps/rejected": -592.1962890625, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2150293588638306, |
|
"rewards/margins": 9.835915565490723, |
|
"rewards/rejected": -8.620885848999023, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 0.00022606837606837604, |
|
"logits/chosen": 1.0672990083694458, |
|
"logits/rejected": 1.043774127960205, |
|
"logps/chosen": -530.2998046875, |
|
"logps/rejected": -619.9190673828125, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.16798219084739685, |
|
"rewards/margins": 9.340568542480469, |
|
"rewards/rejected": -9.508550643920898, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 0.00022564102564102563, |
|
"logits/chosen": 1.0548663139343262, |
|
"logits/rejected": 0.9898471832275391, |
|
"logps/chosen": -515.4979858398438, |
|
"logps/rejected": -591.4309692382812, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1059775352478027, |
|
"rewards/margins": 9.55521297454834, |
|
"rewards/rejected": -8.449234962463379, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"learning_rate": 0.0002252136752136752, |
|
"logits/chosen": 1.0579899549484253, |
|
"logits/rejected": 1.0557491779327393, |
|
"logps/chosen": -532.400634765625, |
|
"logps/rejected": -688.6305541992188, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3727375268936157, |
|
"rewards/margins": 10.276800155639648, |
|
"rewards/rejected": -10.649538040161133, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 0.00022478632478632477, |
|
"logits/chosen": 1.1566115617752075, |
|
"logits/rejected": 1.0760446786880493, |
|
"logps/chosen": -590.3530883789062, |
|
"logps/rejected": -659.9254150390625, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8200367093086243, |
|
"rewards/margins": 10.486039161682129, |
|
"rewards/rejected": -9.666001319885254, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 0.00022435897435897433, |
|
"logits/chosen": 1.169042944908142, |
|
"logits/rejected": 1.092968225479126, |
|
"logps/chosen": -562.0431518554688, |
|
"logps/rejected": -657.502197265625, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.6965909004211426, |
|
"rewards/margins": 11.691349029541016, |
|
"rewards/rejected": -9.994759559631348, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"learning_rate": 0.00022393162393162392, |
|
"logits/chosen": 1.0384266376495361, |
|
"logits/rejected": 1.021849513053894, |
|
"logps/chosen": -551.0145263671875, |
|
"logps/rejected": -694.0051879882812, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5090039968490601, |
|
"rewards/margins": 10.475794792175293, |
|
"rewards/rejected": -9.966791152954102, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"learning_rate": 0.0002235042735042735, |
|
"logits/chosen": 1.1020841598510742, |
|
"logits/rejected": 0.9978400468826294, |
|
"logps/chosen": -552.3654174804688, |
|
"logps/rejected": -584.1214599609375, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.505805492401123, |
|
"rewards/margins": 11.413491249084473, |
|
"rewards/rejected": -8.907686233520508, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 0.00022307692307692306, |
|
"logits/chosen": 1.1605815887451172, |
|
"logits/rejected": 1.0998469591140747, |
|
"logps/chosen": -532.1463623046875, |
|
"logps/rejected": -632.1581420898438, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.184004306793213, |
|
"rewards/margins": 9.712126731872559, |
|
"rewards/rejected": -8.528121948242188, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"learning_rate": 0.00022264957264957262, |
|
"logits/chosen": 1.0158207416534424, |
|
"logits/rejected": 1.0649572610855103, |
|
"logps/chosen": -537.4420166015625, |
|
"logps/rejected": -689.2913818359375, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0612900257110596, |
|
"rewards/margins": 11.472357749938965, |
|
"rewards/rejected": -10.411066055297852, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 6.66, |
|
"learning_rate": 0.00022222222222222218, |
|
"logits/chosen": 1.23757803440094, |
|
"logits/rejected": 1.0114773511886597, |
|
"logps/chosen": -557.8294677734375, |
|
"logps/rejected": -592.405517578125, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.740067183971405, |
|
"rewards/margins": 10.078370094299316, |
|
"rewards/rejected": -9.338302612304688, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"learning_rate": 0.0002217948717948718, |
|
"logits/chosen": 1.2603142261505127, |
|
"logits/rejected": 1.001814603805542, |
|
"logps/chosen": -581.58935546875, |
|
"logps/rejected": -544.1881103515625, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.5611451864242554, |
|
"rewards/margins": 9.480655670166016, |
|
"rewards/rejected": -7.9195098876953125, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 6.71, |
|
"learning_rate": 0.00022136752136752135, |
|
"logits/chosen": 1.1516404151916504, |
|
"logits/rejected": 1.1282165050506592, |
|
"logps/chosen": -560.3634033203125, |
|
"logps/rejected": -674.9133911132812, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5357306003570557, |
|
"rewards/margins": 9.987811088562012, |
|
"rewards/rejected": -9.452080726623535, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"learning_rate": 0.0002209401709401709, |
|
"logits/chosen": 1.1687240600585938, |
|
"logits/rejected": 1.0638331174850464, |
|
"logps/chosen": -589.182373046875, |
|
"logps/rejected": -682.9474487304688, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.275535225868225, |
|
"rewards/margins": 11.063849449157715, |
|
"rewards/rejected": -9.788314819335938, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 0.00022051282051282052, |
|
"logits/chosen": 1.1243481636047363, |
|
"logits/rejected": 0.967666745185852, |
|
"logps/chosen": -575.9657592773438, |
|
"logps/rejected": -614.3820190429688, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.424743115901947, |
|
"rewards/margins": 9.780957221984863, |
|
"rewards/rejected": -9.35621452331543, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 0.00022008547008547008, |
|
"logits/chosen": 1.0041706562042236, |
|
"logits/rejected": 0.996163547039032, |
|
"logps/chosen": -587.704345703125, |
|
"logps/rejected": -628.5338745117188, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7361750602722168, |
|
"rewards/margins": 9.253312110900879, |
|
"rewards/rejected": -8.51713752746582, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"learning_rate": 0.00021965811965811964, |
|
"logits/chosen": 1.1979098320007324, |
|
"logits/rejected": 1.126028299331665, |
|
"logps/chosen": -524.0247802734375, |
|
"logps/rejected": -609.7775268554688, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.6900510787963867, |
|
"rewards/margins": 9.077376365661621, |
|
"rewards/rejected": -7.387324333190918, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 0.0002192307692307692, |
|
"logits/chosen": 1.1033226251602173, |
|
"logits/rejected": 1.0556286573410034, |
|
"logps/chosen": -534.7022094726562, |
|
"logps/rejected": -597.9768676757812, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.6321287155151367, |
|
"rewards/margins": 10.527310371398926, |
|
"rewards/rejected": -8.895181655883789, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"learning_rate": 0.00021880341880341878, |
|
"logits/chosen": 1.0708644390106201, |
|
"logits/rejected": 1.0677733421325684, |
|
"logps/chosen": -561.38525390625, |
|
"logps/rejected": -664.557373046875, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4007779359817505, |
|
"rewards/margins": 10.572273254394531, |
|
"rewards/rejected": -10.17149543762207, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 6.89, |
|
"learning_rate": 0.00021837606837606837, |
|
"logits/chosen": 1.0858148336410522, |
|
"logits/rejected": 1.0668940544128418, |
|
"logps/chosen": -580.924072265625, |
|
"logps/rejected": -651.995849609375, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.7338685989379883, |
|
"rewards/margins": 10.77505874633789, |
|
"rewards/rejected": -9.041191101074219, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 0.00021794871794871793, |
|
"logits/chosen": 1.081437110900879, |
|
"logits/rejected": 0.9860243797302246, |
|
"logps/chosen": -484.07366943359375, |
|
"logps/rejected": -648.0784912109375, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.5497647523880005, |
|
"rewards/margins": 11.513895034790039, |
|
"rewards/rejected": -9.964130401611328, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"learning_rate": 0.0002175213675213675, |
|
"logits/chosen": 1.1952917575836182, |
|
"logits/rejected": 1.1564627885818481, |
|
"logps/chosen": -548.2731323242188, |
|
"logps/rejected": -742.41845703125, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5800870656967163, |
|
"rewards/margins": 10.232941627502441, |
|
"rewards/rejected": -9.652854919433594, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 0.00021709401709401707, |
|
"logits/chosen": 1.2948367595672607, |
|
"logits/rejected": 1.1679692268371582, |
|
"logps/chosen": -573.0979614257812, |
|
"logps/rejected": -682.3110961914062, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2798572778701782, |
|
"rewards/margins": 9.664986610412598, |
|
"rewards/rejected": -8.385129928588867, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 0.00021666666666666666, |
|
"logits/chosen": 1.1744760274887085, |
|
"logits/rejected": 0.9845774173736572, |
|
"logps/chosen": -558.6195068359375, |
|
"logps/rejected": -652.9862060546875, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.773690104484558, |
|
"rewards/margins": 10.953255653381348, |
|
"rewards/rejected": -9.1795654296875, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 0.00021623931623931622, |
|
"logits/chosen": 1.1232681274414062, |
|
"logits/rejected": 1.0257112979888916, |
|
"logps/chosen": -510.100341796875, |
|
"logps/rejected": -643.0399169921875, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.46491277217865, |
|
"rewards/margins": 9.883655548095703, |
|
"rewards/rejected": -8.418743133544922, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 0.0002158119658119658, |
|
"logits/chosen": 1.0357047319412231, |
|
"logits/rejected": 1.001062035560608, |
|
"logps/chosen": -501.3252868652344, |
|
"logps/rejected": -553.6700439453125, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0863351821899414, |
|
"rewards/margins": 9.711782455444336, |
|
"rewards/rejected": -8.625446319580078, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 0.00021538461538461536, |
|
"logits/chosen": 1.1731458902359009, |
|
"logits/rejected": 1.11858069896698, |
|
"logps/chosen": -577.2032470703125, |
|
"logps/rejected": -713.2083740234375, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0856976509094238, |
|
"rewards/margins": 10.696051597595215, |
|
"rewards/rejected": -9.610353469848633, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 0.00021495726495726492, |
|
"logits/chosen": 1.0282161235809326, |
|
"logits/rejected": 0.9538753032684326, |
|
"logps/chosen": -494.20562744140625, |
|
"logps/rejected": -624.65087890625, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.3367016315460205, |
|
"rewards/margins": 11.350975036621094, |
|
"rewards/rejected": -9.014272689819336, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"learning_rate": 0.00021452991452991453, |
|
"logits/chosen": 1.0994839668273926, |
|
"logits/rejected": 1.1064229011535645, |
|
"logps/chosen": -498.6627197265625, |
|
"logps/rejected": -695.014404296875, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8084684014320374, |
|
"rewards/margins": 10.440178871154785, |
|
"rewards/rejected": -9.631710052490234, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 0.0002141025641025641, |
|
"logits/chosen": 0.9965860843658447, |
|
"logits/rejected": 0.9728628396987915, |
|
"logps/chosen": -478.1365966796875, |
|
"logps/rejected": -635.9570922851562, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1333738565444946, |
|
"rewards/margins": 9.761213302612305, |
|
"rewards/rejected": -8.627839088439941, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"learning_rate": 0.00021367521367521365, |
|
"logits/chosen": 1.2235289812088013, |
|
"logits/rejected": 1.040520191192627, |
|
"logps/chosen": -577.0011596679688, |
|
"logps/rejected": -598.2988891601562, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.098937749862671, |
|
"rewards/margins": 10.220855712890625, |
|
"rewards/rejected": -9.121917724609375, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 7.19, |
|
"learning_rate": 0.0002132478632478632, |
|
"logits/chosen": 1.1766057014465332, |
|
"logits/rejected": 1.001685380935669, |
|
"logps/chosen": -511.30352783203125, |
|
"logps/rejected": -541.9244384765625, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.6884262561798096, |
|
"rewards/margins": 9.974132537841797, |
|
"rewards/rejected": -8.28570556640625, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 7.22, |
|
"learning_rate": 0.00021282051282051282, |
|
"logits/chosen": 1.193005084991455, |
|
"logits/rejected": 1.118786096572876, |
|
"logps/chosen": -552.6077270507812, |
|
"logps/rejected": -715.137451171875, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0981065034866333, |
|
"rewards/margins": 10.102052688598633, |
|
"rewards/rejected": -9.003947257995605, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"learning_rate": 0.00021239316239316238, |
|
"logits/chosen": 1.1393274068832397, |
|
"logits/rejected": 1.102120041847229, |
|
"logps/chosen": -511.25286865234375, |
|
"logps/rejected": -617.3232421875, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.180321216583252, |
|
"rewards/margins": 11.865279197692871, |
|
"rewards/rejected": -9.684957504272461, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"learning_rate": 0.00021196581196581194, |
|
"logits/chosen": 1.0302733182907104, |
|
"logits/rejected": 1.0308837890625, |
|
"logps/chosen": -504.82989501953125, |
|
"logps/rejected": -603.8284301757812, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.966596782207489, |
|
"rewards/margins": 9.79338264465332, |
|
"rewards/rejected": -8.826786041259766, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 0.00021153846153846152, |
|
"logits/chosen": 0.9913230538368225, |
|
"logits/rejected": 0.9480158090591431, |
|
"logps/chosen": -546.1568603515625, |
|
"logps/rejected": -645.8260498046875, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7541160583496094, |
|
"rewards/margins": 10.948417663574219, |
|
"rewards/rejected": -10.19430160522461, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 7.32, |
|
"learning_rate": 0.0002111111111111111, |
|
"logits/chosen": 1.1722790002822876, |
|
"logits/rejected": 1.0763994455337524, |
|
"logps/chosen": -604.4279174804688, |
|
"logps/rejected": -637.1195068359375, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.36835515499115, |
|
"rewards/margins": 10.110456466674805, |
|
"rewards/rejected": -8.742100715637207, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"learning_rate": 0.00021068376068376067, |
|
"logits/chosen": 1.1397333145141602, |
|
"logits/rejected": 1.1724354028701782, |
|
"logps/chosen": -497.7437744140625, |
|
"logps/rejected": -731.747314453125, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8257311582565308, |
|
"rewards/margins": 10.88111686706543, |
|
"rewards/rejected": -10.05538558959961, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"learning_rate": 0.00021025641025641022, |
|
"logits/chosen": 1.0152881145477295, |
|
"logits/rejected": 0.9720747470855713, |
|
"logps/chosen": -467.1788330078125, |
|
"logps/rejected": -618.5647583007812, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0214035511016846, |
|
"rewards/margins": 9.899874687194824, |
|
"rewards/rejected": -8.878470420837402, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"learning_rate": 0.0002098290598290598, |
|
"logits/chosen": 1.2052510976791382, |
|
"logits/rejected": 1.0738441944122314, |
|
"logps/chosen": -559.755859375, |
|
"logps/rejected": -672.936767578125, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2137142419815063, |
|
"rewards/margins": 11.24556827545166, |
|
"rewards/rejected": -10.031854629516602, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"learning_rate": 0.0002094017094017094, |
|
"logits/chosen": 1.2158238887786865, |
|
"logits/rejected": 1.0811805725097656, |
|
"logps/chosen": -546.0452270507812, |
|
"logps/rejected": -623.5526733398438, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.6926029920578003, |
|
"rewards/margins": 10.92724895477295, |
|
"rewards/rejected": -9.23464584350586, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 7.45, |
|
"learning_rate": 0.00020897435897435895, |
|
"logits/chosen": 1.2231104373931885, |
|
"logits/rejected": 1.1560981273651123, |
|
"logps/chosen": -578.806640625, |
|
"logps/rejected": -620.2879028320312, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.173689603805542, |
|
"rewards/margins": 9.140408515930176, |
|
"rewards/rejected": -7.966719150543213, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 0.00020854700854700854, |
|
"logits/chosen": 1.1962541341781616, |
|
"logits/rejected": 1.0524215698242188, |
|
"logps/chosen": -575.8316650390625, |
|
"logps/rejected": -602.4752807617188, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.0710935592651367, |
|
"rewards/margins": 9.931817054748535, |
|
"rewards/rejected": -7.860722541809082, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 0.0002081196581196581, |
|
"logits/chosen": 1.2810760736465454, |
|
"logits/rejected": 1.1952964067459106, |
|
"logps/chosen": -611.9567260742188, |
|
"logps/rejected": -695.635986328125, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.4828972816467285, |
|
"rewards/margins": 10.279756546020508, |
|
"rewards/rejected": -8.796858787536621, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"learning_rate": 0.00020769230769230766, |
|
"logits/chosen": 1.1195869445800781, |
|
"logits/rejected": 1.032854437828064, |
|
"logps/chosen": -496.8470153808594, |
|
"logps/rejected": -573.8423461914062, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.7543283700942993, |
|
"rewards/margins": 10.867127418518066, |
|
"rewards/rejected": -9.112799644470215, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 0.00020726495726495724, |
|
"logits/chosen": 1.1649212837219238, |
|
"logits/rejected": 1.0563678741455078, |
|
"logps/chosen": -558.580322265625, |
|
"logps/rejected": -644.250732421875, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.997902512550354, |
|
"rewards/margins": 10.675620079040527, |
|
"rewards/rejected": -9.677717208862305, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"learning_rate": 0.00020683760683760683, |
|
"logits/chosen": 1.1838792562484741, |
|
"logits/rejected": 1.0918617248535156, |
|
"logps/chosen": -534.3650512695312, |
|
"logps/rejected": -626.9495849609375, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.7307627201080322, |
|
"rewards/margins": 9.962722778320312, |
|
"rewards/rejected": -8.23196029663086, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 0.0002064102564102564, |
|
"logits/chosen": 1.1973166465759277, |
|
"logits/rejected": 1.1105926036834717, |
|
"logps/chosen": -552.765625, |
|
"logps/rejected": -571.7509765625, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.9999276399612427, |
|
"rewards/margins": 9.358254432678223, |
|
"rewards/rejected": -7.358326435089111, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"learning_rate": 0.00020598290598290595, |
|
"logits/chosen": 1.1777927875518799, |
|
"logits/rejected": 1.0511623620986938, |
|
"logps/chosen": -482.9162292480469, |
|
"logps/rejected": -596.0338745117188, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1938194036483765, |
|
"rewards/margins": 10.532297134399414, |
|
"rewards/rejected": -9.338478088378906, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"learning_rate": 0.00020555555555555556, |
|
"logits/chosen": 1.194580078125, |
|
"logits/rejected": 1.0481842756271362, |
|
"logps/chosen": -523.016845703125, |
|
"logps/rejected": -585.9976806640625, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1020272970199585, |
|
"rewards/margins": 10.315288543701172, |
|
"rewards/rejected": -9.213261604309082, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 0.00020512820512820512, |
|
"logits/chosen": 1.0988215208053589, |
|
"logits/rejected": 1.024403691291809, |
|
"logps/chosen": -481.1084289550781, |
|
"logps/rejected": -562.4240112304688, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.069756507873535, |
|
"rewards/margins": 10.040982246398926, |
|
"rewards/rejected": -7.971225261688232, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"learning_rate": 0.00020470085470085468, |
|
"logits/chosen": 1.311755657196045, |
|
"logits/rejected": 1.0829813480377197, |
|
"logps/chosen": -606.013671875, |
|
"logps/rejected": -694.0232543945312, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6969201564788818, |
|
"rewards/margins": 9.998454093933105, |
|
"rewards/rejected": -9.301534652709961, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 7.73, |
|
"learning_rate": 0.00020427350427350423, |
|
"logits/chosen": 1.0659198760986328, |
|
"logits/rejected": 1.0787678956985474, |
|
"logps/chosen": -562.7803344726562, |
|
"logps/rejected": -598.2239990234375, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.5258029699325562, |
|
"rewards/margins": 9.569708824157715, |
|
"rewards/rejected": -8.043905258178711, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 7.76, |
|
"learning_rate": 0.00020384615384615385, |
|
"logits/chosen": 1.1065874099731445, |
|
"logits/rejected": 1.075424313545227, |
|
"logps/chosen": -503.7167053222656, |
|
"logps/rejected": -640.2783203125, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0811102390289307, |
|
"rewards/margins": 9.623838424682617, |
|
"rewards/rejected": -8.542729377746582, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"learning_rate": 0.0002034188034188034, |
|
"logits/chosen": 1.1548815965652466, |
|
"logits/rejected": 1.1475387811660767, |
|
"logps/chosen": -475.9542236328125, |
|
"logps/rejected": -630.96826171875, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4979651868343353, |
|
"rewards/margins": 10.597925186157227, |
|
"rewards/rejected": -10.099960327148438, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"learning_rate": 0.00020299145299145296, |
|
"logits/chosen": 1.1043236255645752, |
|
"logits/rejected": 1.0807740688323975, |
|
"logps/chosen": -546.5986938476562, |
|
"logps/rejected": -615.6447143554688, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.0489461421966553, |
|
"rewards/margins": 10.541731834411621, |
|
"rewards/rejected": -8.49278450012207, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 0.00020256410256410255, |
|
"logits/chosen": 1.1738014221191406, |
|
"logits/rejected": 1.0424628257751465, |
|
"logps/chosen": -579.9978637695312, |
|
"logps/rejected": -668.8394775390625, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2401783466339111, |
|
"rewards/margins": 11.568252563476562, |
|
"rewards/rejected": -10.328075408935547, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"learning_rate": 0.00020213675213675214, |
|
"logits/chosen": 1.221925973892212, |
|
"logits/rejected": 1.0827970504760742, |
|
"logps/chosen": -554.8446044921875, |
|
"logps/rejected": -623.172119140625, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.212306261062622, |
|
"rewards/margins": 9.68209457397461, |
|
"rewards/rejected": -8.469788551330566, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"learning_rate": 0.0002017094017094017, |
|
"logits/chosen": 1.0534234046936035, |
|
"logits/rejected": 1.1416208744049072, |
|
"logps/chosen": -498.2744140625, |
|
"logps/rejected": -689.6672973632812, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.162524938583374, |
|
"rewards/margins": 11.897795677185059, |
|
"rewards/rejected": -9.735269546508789, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"learning_rate": 0.00020128205128205125, |
|
"logits/chosen": 1.0100905895233154, |
|
"logits/rejected": 1.1573420763015747, |
|
"logps/chosen": -500.80841064453125, |
|
"logps/rejected": -593.0250244140625, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.8166165351867676, |
|
"rewards/margins": 9.917183876037598, |
|
"rewards/rejected": -8.100566864013672, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"learning_rate": 0.00020085470085470084, |
|
"logits/chosen": 1.1027199029922485, |
|
"logits/rejected": 0.9867293238639832, |
|
"logps/chosen": -524.3262329101562, |
|
"logps/rejected": -591.5633544921875, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.232293963432312, |
|
"rewards/margins": 10.08406925201416, |
|
"rewards/rejected": -8.851776123046875, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"learning_rate": 0.0002004273504273504, |
|
"logits/chosen": 1.2790604829788208, |
|
"logits/rejected": 1.0140596628189087, |
|
"logps/chosen": -588.2280883789062, |
|
"logps/rejected": -668.6362915039062, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8383196592330933, |
|
"rewards/margins": 10.089158058166504, |
|
"rewards/rejected": -9.250838279724121, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"learning_rate": 0.00019999999999999998, |
|
"logits/chosen": 1.082058310508728, |
|
"logits/rejected": 0.9627883434295654, |
|
"logps/chosen": -609.169677734375, |
|
"logps/rejected": -605.4269409179688, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.0816116333007812, |
|
"rewards/margins": 11.790176391601562, |
|
"rewards/rejected": -9.708564758300781, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 0.00019957264957264957, |
|
"logits/chosen": 1.1039892435073853, |
|
"logits/rejected": 0.9567267298698425, |
|
"logps/chosen": -473.4000244140625, |
|
"logps/rejected": -618.5371704101562, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.4500510692596436, |
|
"rewards/margins": 10.126335144042969, |
|
"rewards/rejected": -8.676283836364746, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 0.00019914529914529913, |
|
"logits/chosen": 1.1014786958694458, |
|
"logits/rejected": 1.0613113641738892, |
|
"logps/chosen": -511.72454833984375, |
|
"logps/rejected": -694.4070434570312, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3977946639060974, |
|
"rewards/margins": 11.06888484954834, |
|
"rewards/rejected": -10.671089172363281, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 0.00019871794871794869, |
|
"logits/chosen": 1.1368095874786377, |
|
"logits/rejected": 0.9869575500488281, |
|
"logps/chosen": -509.3475036621094, |
|
"logps/rejected": -625.2825927734375, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8700177669525146, |
|
"rewards/margins": 10.845271110534668, |
|
"rewards/rejected": -9.97525405883789, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 0.00019829059829059824, |
|
"logits/chosen": 1.1710002422332764, |
|
"logits/rejected": 1.1424845457077026, |
|
"logps/chosen": -548.1114501953125, |
|
"logps/rejected": -658.926513671875, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.743558406829834, |
|
"rewards/margins": 9.961552619934082, |
|
"rewards/rejected": -9.217994689941406, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"learning_rate": 0.00019786324786324786, |
|
"logits/chosen": 1.227845311164856, |
|
"logits/rejected": 1.1172688007354736, |
|
"logps/chosen": -615.0020751953125, |
|
"logps/rejected": -655.31787109375, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.7781198024749756, |
|
"rewards/margins": 11.058065414428711, |
|
"rewards/rejected": -9.279945373535156, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 8.14, |
|
"learning_rate": 0.00019743589743589742, |
|
"logits/chosen": 1.2156200408935547, |
|
"logits/rejected": 0.9318048357963562, |
|
"logps/chosen": -561.3159790039062, |
|
"logps/rejected": -528.769287109375, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.775212287902832, |
|
"rewards/margins": 9.198701858520508, |
|
"rewards/rejected": -7.423489570617676, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"learning_rate": 0.00019700854700854697, |
|
"logits/chosen": 1.1288306713104248, |
|
"logits/rejected": 1.0163847208023071, |
|
"logps/chosen": -566.7800903320312, |
|
"logps/rejected": -621.5507202148438, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.5739163160324097, |
|
"rewards/margins": 11.509315490722656, |
|
"rewards/rejected": -9.935400009155273, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 8.19, |
|
"learning_rate": 0.00019658119658119659, |
|
"logits/chosen": 1.1232361793518066, |
|
"logits/rejected": 1.1592121124267578, |
|
"logps/chosen": -528.2422485351562, |
|
"logps/rejected": -708.33642578125, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.655145287513733, |
|
"rewards/margins": 11.109955787658691, |
|
"rewards/rejected": -9.454811096191406, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 0.00019615384615384615, |
|
"logits/chosen": 1.1231738328933716, |
|
"logits/rejected": 1.117080807685852, |
|
"logps/chosen": -498.927001953125, |
|
"logps/rejected": -631.9031982421875, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.47227901220321655, |
|
"rewards/margins": 10.479837417602539, |
|
"rewards/rejected": -10.007558822631836, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"learning_rate": 0.0001957264957264957, |
|
"logits/chosen": 1.0491048097610474, |
|
"logits/rejected": 0.9988434314727783, |
|
"logps/chosen": -494.6644287109375, |
|
"logps/rejected": -610.76806640625, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.1051549911499023, |
|
"rewards/margins": 11.895588874816895, |
|
"rewards/rejected": -9.790433883666992, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 8.27, |
|
"learning_rate": 0.00019529914529914526, |
|
"logits/chosen": 1.098575234413147, |
|
"logits/rejected": 1.1674755811691284, |
|
"logps/chosen": -514.0235595703125, |
|
"logps/rejected": -722.014892578125, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.734897255897522, |
|
"rewards/margins": 10.937726974487305, |
|
"rewards/rejected": -10.202829360961914, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"learning_rate": 0.00019487179487179487, |
|
"logits/chosen": 1.1288853883743286, |
|
"logits/rejected": 1.1453090906143188, |
|
"logps/chosen": -488.9378662109375, |
|
"logps/rejected": -641.2897338867188, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.5595530271530151, |
|
"rewards/margins": 10.641010284423828, |
|
"rewards/rejected": -9.081456184387207, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"learning_rate": 0.00019444444444444443, |
|
"logits/chosen": 1.265305519104004, |
|
"logits/rejected": 0.9294592142105103, |
|
"logps/chosen": -613.603271484375, |
|
"logps/rejected": -536.7325439453125, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.3043055534362793, |
|
"rewards/margins": 10.669670104980469, |
|
"rewards/rejected": -8.365365028381348, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"learning_rate": 0.000194017094017094, |
|
"logits/chosen": 1.1310415267944336, |
|
"logits/rejected": 1.0312024354934692, |
|
"logps/chosen": -516.1328125, |
|
"logps/rejected": -636.2833251953125, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.218218207359314, |
|
"rewards/margins": 10.852701187133789, |
|
"rewards/rejected": -9.634482383728027, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 8.37, |
|
"learning_rate": 0.00019358974358974358, |
|
"logits/chosen": 1.1042028665542603, |
|
"logits/rejected": 1.0703749656677246, |
|
"logps/chosen": -577.5679321289062, |
|
"logps/rejected": -605.3977661132812, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8515387773513794, |
|
"rewards/margins": 9.11039924621582, |
|
"rewards/rejected": -8.25886058807373, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 0.00019316239316239314, |
|
"logits/chosen": 1.1480742692947388, |
|
"logits/rejected": 1.0245976448059082, |
|
"logps/chosen": -544.492919921875, |
|
"logps/rejected": -650.3825073242188, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1441433429718018, |
|
"rewards/margins": 10.524991989135742, |
|
"rewards/rejected": -9.38084888458252, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"learning_rate": 0.00019273504273504272, |
|
"logits/chosen": 0.9963136315345764, |
|
"logits/rejected": 1.0162067413330078, |
|
"logps/chosen": -543.5288696289062, |
|
"logps/rejected": -677.895751953125, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5060915946960449, |
|
"rewards/margins": 10.86108684539795, |
|
"rewards/rejected": -10.354994773864746, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"learning_rate": 0.0001923076923076923, |
|
"logits/chosen": 1.192360758781433, |
|
"logits/rejected": 1.1079771518707275, |
|
"logps/chosen": -512.4086303710938, |
|
"logps/rejected": -630.524169921875, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.9825226068496704, |
|
"rewards/margins": 11.417821884155273, |
|
"rewards/rejected": -9.435300827026367, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 0.00019188034188034187, |
|
"logits/chosen": 1.0749591588974, |
|
"logits/rejected": 1.0543586015701294, |
|
"logps/chosen": -502.4964599609375, |
|
"logps/rejected": -628.3140869140625, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1525578498840332, |
|
"rewards/margins": 10.45914363861084, |
|
"rewards/rejected": -9.306587219238281, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"learning_rate": 0.00019145299145299142, |
|
"logits/chosen": 1.1045258045196533, |
|
"logits/rejected": 1.094617486000061, |
|
"logps/chosen": -556.7658081054688, |
|
"logps/rejected": -661.129638671875, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5904799103736877, |
|
"rewards/margins": 10.494197845458984, |
|
"rewards/rejected": -9.903717994689941, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"learning_rate": 0.00019102564102564098, |
|
"logits/chosen": 1.0999786853790283, |
|
"logits/rejected": 1.0287926197052002, |
|
"logps/chosen": -559.324951171875, |
|
"logps/rejected": -685.59716796875, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8768333196640015, |
|
"rewards/margins": 11.342018127441406, |
|
"rewards/rejected": -10.465184211730957, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"learning_rate": 0.0001905982905982906, |
|
"logits/chosen": 1.2270386219024658, |
|
"logits/rejected": 1.0695297718048096, |
|
"logps/chosen": -555.893310546875, |
|
"logps/rejected": -616.703125, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.512807846069336, |
|
"rewards/margins": 11.095309257507324, |
|
"rewards/rejected": -8.582502365112305, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 8.58, |
|
"learning_rate": 0.00019017094017094015, |
|
"logits/chosen": 1.1461352109909058, |
|
"logits/rejected": 1.0352705717086792, |
|
"logps/chosen": -500.9500427246094, |
|
"logps/rejected": -638.1965942382812, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5767711400985718, |
|
"rewards/margins": 10.275659561157227, |
|
"rewards/rejected": -9.698890686035156, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"learning_rate": 0.0001897435897435897, |
|
"logits/chosen": 1.1884602308273315, |
|
"logits/rejected": 0.9545145630836487, |
|
"logps/chosen": -553.399658203125, |
|
"logps/rejected": -571.7633056640625, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.724369764328003, |
|
"rewards/margins": 9.780606269836426, |
|
"rewards/rejected": -8.056236267089844, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 8.63, |
|
"learning_rate": 0.00018931623931623933, |
|
"logits/chosen": 1.1067692041397095, |
|
"logits/rejected": 1.0162923336029053, |
|
"logps/chosen": -517.6556396484375, |
|
"logps/rejected": -604.726318359375, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.7874058485031128, |
|
"rewards/margins": 10.558215141296387, |
|
"rewards/rejected": -8.770809173583984, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 8.65, |
|
"learning_rate": 0.00018888888888888888, |
|
"logits/chosen": 1.217395544052124, |
|
"logits/rejected": 1.084112286567688, |
|
"logps/chosen": -542.150634765625, |
|
"logps/rejected": -678.0106201171875, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0376150608062744, |
|
"rewards/margins": 10.951888084411621, |
|
"rewards/rejected": -9.914274215698242, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 8.68, |
|
"learning_rate": 0.00018846153846153844, |
|
"logits/chosen": 1.1382925510406494, |
|
"logits/rejected": 1.0875835418701172, |
|
"logps/chosen": -545.2322387695312, |
|
"logps/rejected": -629.0198974609375, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7386005520820618, |
|
"rewards/margins": 10.408458709716797, |
|
"rewards/rejected": -9.669858932495117, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 0.000188034188034188, |
|
"logits/chosen": 1.115515947341919, |
|
"logits/rejected": 1.066940426826477, |
|
"logps/chosen": -523.6985473632812, |
|
"logps/rejected": -574.7987670898438, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0293325185775757, |
|
"rewards/margins": 9.218039512634277, |
|
"rewards/rejected": -8.18870735168457, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"learning_rate": 0.00018760683760683761, |
|
"logits/chosen": 1.0013961791992188, |
|
"logits/rejected": 1.0823533535003662, |
|
"logps/chosen": -485.6407775878906, |
|
"logps/rejected": -657.2274169921875, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.3279895782470703, |
|
"rewards/margins": 10.64334774017334, |
|
"rewards/rejected": -9.31535816192627, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"learning_rate": 0.00018717948717948717, |
|
"logits/chosen": 1.0347654819488525, |
|
"logits/rejected": 1.0151424407958984, |
|
"logps/chosen": -497.17730712890625, |
|
"logps/rejected": -617.05029296875, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.067275881767273, |
|
"rewards/margins": 9.780403137207031, |
|
"rewards/rejected": -8.713126182556152, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"learning_rate": 0.00018675213675213673, |
|
"logits/chosen": 1.1175577640533447, |
|
"logits/rejected": 1.0508739948272705, |
|
"logps/chosen": -543.9364013671875, |
|
"logps/rejected": -730.9745483398438, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.06183135509490967, |
|
"rewards/margins": 12.12753963470459, |
|
"rewards/rejected": -12.065709114074707, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 8.81, |
|
"learning_rate": 0.00018632478632478632, |
|
"logits/chosen": 1.07142174243927, |
|
"logits/rejected": 1.0519976615905762, |
|
"logps/chosen": -506.71435546875, |
|
"logps/rejected": -652.1842041015625, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.239646315574646, |
|
"rewards/margins": 10.84978199005127, |
|
"rewards/rejected": -9.61013412475586, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 8.83, |
|
"learning_rate": 0.00018589743589743588, |
|
"logits/chosen": 1.1885634660720825, |
|
"logits/rejected": 1.0062313079833984, |
|
"logps/chosen": -569.5838623046875, |
|
"logps/rejected": -602.7799072265625, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2498093843460083, |
|
"rewards/margins": 9.729214668273926, |
|
"rewards/rejected": -8.47940444946289, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"learning_rate": 0.00018547008547008546, |
|
"logits/chosen": 1.2486610412597656, |
|
"logits/rejected": 0.9658511877059937, |
|
"logps/chosen": -563.5016479492188, |
|
"logps/rejected": -559.8602294921875, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.7746890783309937, |
|
"rewards/margins": 10.259527206420898, |
|
"rewards/rejected": -8.484838485717773, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"learning_rate": 0.00018504273504273502, |
|
"logits/chosen": 1.0988224744796753, |
|
"logits/rejected": 1.032260775566101, |
|
"logps/chosen": -590.3836669921875, |
|
"logps/rejected": -605.6322021484375, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1525167226791382, |
|
"rewards/margins": 10.179304122924805, |
|
"rewards/rejected": -9.026787757873535, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 8.91, |
|
"learning_rate": 0.0001846153846153846, |
|
"logits/chosen": 1.07295823097229, |
|
"logits/rejected": 0.9503864645957947, |
|
"logps/chosen": -567.7842407226562, |
|
"logps/rejected": -599.153564453125, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1233148574829102, |
|
"rewards/margins": 10.49275016784668, |
|
"rewards/rejected": -9.369434356689453, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 8.93, |
|
"learning_rate": 0.00018418803418803416, |
|
"logits/chosen": 1.034525752067566, |
|
"logits/rejected": 1.0498316287994385, |
|
"logps/chosen": -501.63323974609375, |
|
"logps/rejected": -582.5183715820312, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.7673141956329346, |
|
"rewards/margins": 10.9537353515625, |
|
"rewards/rejected": -9.186420440673828, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"learning_rate": 0.00018376068376068372, |
|
"logits/chosen": 1.1317795515060425, |
|
"logits/rejected": 1.0368475914001465, |
|
"logps/chosen": -592.8737182617188, |
|
"logps/rejected": -618.9924926757812, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9373716115951538, |
|
"rewards/margins": 10.743789672851562, |
|
"rewards/rejected": -9.806417465209961, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"learning_rate": 0.00018333333333333334, |
|
"logits/chosen": 1.236878514289856, |
|
"logits/rejected": 1.0824024677276611, |
|
"logps/chosen": -592.6204223632812, |
|
"logps/rejected": -678.1072387695312, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1566966772079468, |
|
"rewards/margins": 12.701655387878418, |
|
"rewards/rejected": -11.544958114624023, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 0.0001829059829059829, |
|
"logits/chosen": 1.0362117290496826, |
|
"logits/rejected": 1.0344483852386475, |
|
"logps/chosen": -514.698486328125, |
|
"logps/rejected": -682.2589721679688, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7533162832260132, |
|
"rewards/margins": 10.993279457092285, |
|
"rewards/rejected": -10.23996353149414, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"learning_rate": 0.00018247863247863245, |
|
"logits/chosen": 1.0642633438110352, |
|
"logits/rejected": 0.9820662140846252, |
|
"logps/chosen": -531.2005004882812, |
|
"logps/rejected": -594.51171875, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.5738836526870728, |
|
"rewards/margins": 11.221702575683594, |
|
"rewards/rejected": -9.647819519042969, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"learning_rate": 0.000182051282051282, |
|
"logits/chosen": 1.1875312328338623, |
|
"logits/rejected": 1.0477038621902466, |
|
"logps/chosen": -519.474609375, |
|
"logps/rejected": -641.5570678710938, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.4246861934661865, |
|
"rewards/margins": 10.708345413208008, |
|
"rewards/rejected": -9.283658981323242, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 0.00018162393162393162, |
|
"logits/chosen": 1.300011157989502, |
|
"logits/rejected": 1.1615049839019775, |
|
"logps/chosen": -594.492919921875, |
|
"logps/rejected": -714.0015258789062, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.3209400177001953, |
|
"rewards/margins": 11.944135665893555, |
|
"rewards/rejected": -10.62319564819336, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"learning_rate": 0.00018119658119658118, |
|
"logits/chosen": 1.2041311264038086, |
|
"logits/rejected": 1.09273099899292, |
|
"logps/chosen": -517.081787109375, |
|
"logps/rejected": -623.0797729492188, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.7040494680404663, |
|
"rewards/margins": 9.923104286193848, |
|
"rewards/rejected": -8.21905517578125, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 9.14, |
|
"learning_rate": 0.00018076923076923074, |
|
"logits/chosen": 1.1552023887634277, |
|
"logits/rejected": 1.1544487476348877, |
|
"logps/chosen": -527.7257080078125, |
|
"logps/rejected": -654.458740234375, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.5392783880233765, |
|
"rewards/margins": 10.782899856567383, |
|
"rewards/rejected": -9.243619918823242, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"learning_rate": 0.00018034188034188035, |
|
"logits/chosen": 1.0705739259719849, |
|
"logits/rejected": 1.0257068872451782, |
|
"logps/chosen": -529.324951171875, |
|
"logps/rejected": -572.942138671875, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1362345218658447, |
|
"rewards/margins": 10.217846870422363, |
|
"rewards/rejected": -9.081612586975098, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"learning_rate": 0.0001799145299145299, |
|
"logits/chosen": 1.0554429292678833, |
|
"logits/rejected": 0.9397602081298828, |
|
"logps/chosen": -497.97955322265625, |
|
"logps/rejected": -575.80810546875, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.4867826700210571, |
|
"rewards/margins": 11.263152122497559, |
|
"rewards/rejected": -9.77637004852295, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 9.22, |
|
"learning_rate": 0.00017948717948717947, |
|
"logits/chosen": 1.028283953666687, |
|
"logits/rejected": 1.1357170343399048, |
|
"logps/chosen": -486.7881164550781, |
|
"logps/rejected": -638.11083984375, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.6323189735412598, |
|
"rewards/margins": 9.90245246887207, |
|
"rewards/rejected": -8.270133972167969, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 9.24, |
|
"learning_rate": 0.00017905982905982903, |
|
"logits/chosen": 1.140254259109497, |
|
"logits/rejected": 0.9276759028434753, |
|
"logps/chosen": -533.4109497070312, |
|
"logps/rejected": -542.5346069335938, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.5379778146743774, |
|
"rewards/margins": 10.987845420837402, |
|
"rewards/rejected": -9.449868202209473, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"learning_rate": 0.00017863247863247861, |
|
"logits/chosen": 1.2161970138549805, |
|
"logits/rejected": 1.1362658739089966, |
|
"logps/chosen": -508.8780212402344, |
|
"logps/rejected": -620.1040649414062, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2322115898132324, |
|
"rewards/margins": 10.1491060256958, |
|
"rewards/rejected": -8.91689395904541, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 9.29, |
|
"learning_rate": 0.0001782051282051282, |
|
"logits/chosen": 1.075786828994751, |
|
"logits/rejected": 1.056262731552124, |
|
"logps/chosen": -545.98876953125, |
|
"logps/rejected": -626.4110717773438, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6408146023750305, |
|
"rewards/margins": 11.502391815185547, |
|
"rewards/rejected": -10.861577033996582, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 0.00017777777777777776, |
|
"logits/chosen": 1.127282738685608, |
|
"logits/rejected": 1.0822765827178955, |
|
"logps/chosen": -518.8118286132812, |
|
"logps/rejected": -693.237548828125, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.05411094054579735, |
|
"rewards/margins": 11.227949142456055, |
|
"rewards/rejected": -11.282060623168945, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 9.34, |
|
"learning_rate": 0.00017735042735042734, |
|
"logits/chosen": 1.1585055589675903, |
|
"logits/rejected": 1.031551718711853, |
|
"logps/chosen": -549.4298706054688, |
|
"logps/rejected": -594.13720703125, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.8158806562423706, |
|
"rewards/margins": 10.627920150756836, |
|
"rewards/rejected": -8.81203842163086, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 9.37, |
|
"learning_rate": 0.0001769230769230769, |
|
"logits/chosen": 1.2148640155792236, |
|
"logits/rejected": 1.1051499843597412, |
|
"logps/chosen": -618.556884765625, |
|
"logps/rejected": -655.5272827148438, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7870438098907471, |
|
"rewards/margins": 10.324081420898438, |
|
"rewards/rejected": -9.537036895751953, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 0.00017649572649572646, |
|
"logits/chosen": 1.0680745840072632, |
|
"logits/rejected": 0.9647752046585083, |
|
"logps/chosen": -520.0435791015625, |
|
"logps/rejected": -650.8917236328125, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.3966693878173828, |
|
"rewards/margins": 11.41585922241211, |
|
"rewards/rejected": -10.019189834594727, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 9.42, |
|
"learning_rate": 0.00017606837606837605, |
|
"logits/chosen": 0.974884033203125, |
|
"logits/rejected": 0.9622063636779785, |
|
"logps/chosen": -551.198486328125, |
|
"logps/rejected": -553.2161865234375, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.7198470830917358, |
|
"rewards/margins": 10.737578392028809, |
|
"rewards/rejected": -9.017731666564941, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 9.45, |
|
"learning_rate": 0.00017564102564102563, |
|
"logits/chosen": 1.1338629722595215, |
|
"logits/rejected": 1.0500080585479736, |
|
"logps/chosen": -519.1705322265625, |
|
"logps/rejected": -645.0868530273438, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2686916589736938, |
|
"rewards/margins": 11.208242416381836, |
|
"rewards/rejected": -9.939552307128906, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 0.0001752136752136752, |
|
"logits/chosen": 1.1133869886398315, |
|
"logits/rejected": 1.0957475900650024, |
|
"logps/chosen": -509.5137939453125, |
|
"logps/rejected": -634.0670776367188, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1420785188674927, |
|
"rewards/margins": 10.913748741149902, |
|
"rewards/rejected": -9.771669387817383, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"learning_rate": 0.00017478632478632475, |
|
"logits/chosen": 1.0674755573272705, |
|
"logits/rejected": 1.0430006980895996, |
|
"logps/chosen": -549.5125732421875, |
|
"logps/rejected": -615.5984497070312, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9752965569496155, |
|
"rewards/margins": 10.462637901306152, |
|
"rewards/rejected": -9.487340927124023, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 0.00017435897435897436, |
|
"logits/chosen": 1.0406773090362549, |
|
"logits/rejected": 0.9974726438522339, |
|
"logps/chosen": -554.4515380859375, |
|
"logps/rejected": -671.939697265625, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2197209894657135, |
|
"rewards/margins": 10.627167701721191, |
|
"rewards/rejected": -10.40744686126709, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 9.55, |
|
"learning_rate": 0.00017393162393162392, |
|
"logits/chosen": 1.0458401441574097, |
|
"logits/rejected": 0.9767919182777405, |
|
"logps/chosen": -508.51275634765625, |
|
"logps/rejected": -617.0277099609375, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.3107894659042358, |
|
"rewards/margins": 11.249536514282227, |
|
"rewards/rejected": -9.938748359680176, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 0.00017350427350427348, |
|
"logits/chosen": 1.1613295078277588, |
|
"logits/rejected": 1.067457914352417, |
|
"logps/chosen": -583.6506958007812, |
|
"logps/rejected": -683.9884033203125, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.4901715517044067, |
|
"rewards/margins": 11.270017623901367, |
|
"rewards/rejected": -9.779845237731934, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 0.00017307692307692304, |
|
"logits/chosen": 1.0676989555358887, |
|
"logits/rejected": 1.048395037651062, |
|
"logps/chosen": -553.3953857421875, |
|
"logps/rejected": -713.2011108398438, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.901606559753418, |
|
"rewards/margins": 11.026727676391602, |
|
"rewards/rejected": -10.125120162963867, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"learning_rate": 0.00017264957264957265, |
|
"logits/chosen": 1.196079969406128, |
|
"logits/rejected": 1.0154253244400024, |
|
"logps/chosen": -588.3291015625, |
|
"logps/rejected": -622.0186767578125, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1913900375366211, |
|
"rewards/margins": 10.078536987304688, |
|
"rewards/rejected": -9.887145042419434, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 9.65, |
|
"learning_rate": 0.0001722222222222222, |
|
"logits/chosen": 1.0178331136703491, |
|
"logits/rejected": 0.9668864011764526, |
|
"logps/chosen": -546.368408203125, |
|
"logps/rejected": -615.4050903320312, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2646788954734802, |
|
"rewards/margins": 10.304734230041504, |
|
"rewards/rejected": -10.040055274963379, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"learning_rate": 0.00017179487179487177, |
|
"logits/chosen": 1.1032341718673706, |
|
"logits/rejected": 0.9035695791244507, |
|
"logps/chosen": -604.7924194335938, |
|
"logps/rejected": -652.8895874023438, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.5645720958709717, |
|
"rewards/margins": 11.598380088806152, |
|
"rewards/rejected": -10.033807754516602, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"learning_rate": 0.00017136752136752135, |
|
"logits/chosen": 1.1787011623382568, |
|
"logits/rejected": 0.9833663702011108, |
|
"logps/chosen": -574.9915771484375, |
|
"logps/rejected": -650.5631713867188, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0073214769363403, |
|
"rewards/margins": 10.402047157287598, |
|
"rewards/rejected": -9.394725799560547, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"learning_rate": 0.00017094017094017094, |
|
"logits/chosen": 1.1996289491653442, |
|
"logits/rejected": 1.039535403251648, |
|
"logps/chosen": -556.2608642578125, |
|
"logps/rejected": -642.0311279296875, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.216808557510376, |
|
"rewards/margins": 11.361566543579102, |
|
"rewards/rejected": -10.144757270812988, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 9.75, |
|
"learning_rate": 0.0001705128205128205, |
|
"logits/chosen": 1.1434451341629028, |
|
"logits/rejected": 1.051792860031128, |
|
"logps/chosen": -493.62506103515625, |
|
"logps/rejected": -622.7883911132812, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9653478860855103, |
|
"rewards/margins": 10.048986434936523, |
|
"rewards/rejected": -9.083638191223145, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"learning_rate": 0.00017008547008547006, |
|
"logits/chosen": 1.2439961433410645, |
|
"logits/rejected": 0.9698901772499084, |
|
"logps/chosen": -590.2651977539062, |
|
"logps/rejected": -601.9655151367188, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.7650651931762695, |
|
"rewards/margins": 10.54482650756836, |
|
"rewards/rejected": -8.779762268066406, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 0.00016965811965811964, |
|
"logits/chosen": 1.0514246225357056, |
|
"logits/rejected": 1.0069361925125122, |
|
"logps/chosen": -528.1588745117188, |
|
"logps/rejected": -668.963623046875, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9377299547195435, |
|
"rewards/margins": 11.108814239501953, |
|
"rewards/rejected": -10.171082496643066, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 9.83, |
|
"learning_rate": 0.0001692307692307692, |
|
"logits/chosen": 1.0299386978149414, |
|
"logits/rejected": 0.9614180326461792, |
|
"logps/chosen": -461.4678955078125, |
|
"logps/rejected": -622.8182373046875, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3360067903995514, |
|
"rewards/margins": 11.178654670715332, |
|
"rewards/rejected": -10.842646598815918, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"learning_rate": 0.0001688034188034188, |
|
"logits/chosen": 1.069563627243042, |
|
"logits/rejected": 0.9999057054519653, |
|
"logps/chosen": -532.8282470703125, |
|
"logps/rejected": -611.0808715820312, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.6393276453018188, |
|
"rewards/margins": 11.510586738586426, |
|
"rewards/rejected": -9.871259689331055, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"learning_rate": 0.00016837606837606837, |
|
"logits/chosen": 1.0466080904006958, |
|
"logits/rejected": 0.9922081232070923, |
|
"logps/chosen": -516.3175048828125, |
|
"logps/rejected": -622.3922729492188, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9020825028419495, |
|
"rewards/margins": 11.032547950744629, |
|
"rewards/rejected": -10.13046646118164, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 9.91, |
|
"learning_rate": 0.00016794871794871793, |
|
"logits/chosen": 1.068638801574707, |
|
"logits/rejected": 1.0634409189224243, |
|
"logps/chosen": -518.2888793945312, |
|
"logps/rejected": -700.1104736328125, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8084597587585449, |
|
"rewards/margins": 11.80289363861084, |
|
"rewards/rejected": -10.994433403015137, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 9.93, |
|
"learning_rate": 0.0001675213675213675, |
|
"logits/chosen": 1.252096176147461, |
|
"logits/rejected": 1.12416672706604, |
|
"logps/chosen": -559.9757080078125, |
|
"logps/rejected": -640.3218994140625, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.250156283378601, |
|
"rewards/margins": 10.136184692382812, |
|
"rewards/rejected": -8.886027336120605, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"learning_rate": 0.00016709401709401708, |
|
"logits/chosen": 1.0118858814239502, |
|
"logits/rejected": 1.0544030666351318, |
|
"logps/chosen": -550.706298828125, |
|
"logps/rejected": -651.165283203125, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7763742208480835, |
|
"rewards/margins": 11.286043167114258, |
|
"rewards/rejected": -10.50966739654541, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 9.98, |
|
"learning_rate": 0.00016666666666666666, |
|
"logits/chosen": 1.2127994298934937, |
|
"logits/rejected": 0.9555975198745728, |
|
"logps/chosen": -586.635009765625, |
|
"logps/rejected": -619.5669555664062, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.7170848846435547, |
|
"rewards/margins": 11.147682189941406, |
|
"rewards/rejected": -9.430597305297852, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"learning_rate": 0.00016623931623931622, |
|
"logits/chosen": 0.9974936246871948, |
|
"logits/rejected": 0.98292076587677, |
|
"logps/chosen": -537.190185546875, |
|
"logps/rejected": -637.2457275390625, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7226758599281311, |
|
"rewards/margins": 10.400409698486328, |
|
"rewards/rejected": -9.677732467651367, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 10.04, |
|
"learning_rate": 0.00016581196581196578, |
|
"logits/chosen": 0.9824466109275818, |
|
"logits/rejected": 1.0442546606063843, |
|
"logps/chosen": -524.7149047851562, |
|
"logps/rejected": -685.2274169921875, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.7316232919692993, |
|
"rewards/margins": 12.584775924682617, |
|
"rewards/rejected": -10.85315227508545, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 10.06, |
|
"learning_rate": 0.0001653846153846154, |
|
"logits/chosen": 1.0178762674331665, |
|
"logits/rejected": 1.0314674377441406, |
|
"logps/chosen": -530.5228271484375, |
|
"logps/rejected": -601.8655395507812, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4628024697303772, |
|
"rewards/margins": 9.492361068725586, |
|
"rewards/rejected": -9.029558181762695, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 10.09, |
|
"learning_rate": 0.00016495726495726495, |
|
"logits/chosen": 1.1514480113983154, |
|
"logits/rejected": 0.9793822765350342, |
|
"logps/chosen": -569.4765625, |
|
"logps/rejected": -589.162109375, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.2061307430267334, |
|
"rewards/margins": 11.210150718688965, |
|
"rewards/rejected": -9.004018783569336, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 10.11, |
|
"learning_rate": 0.0001645299145299145, |
|
"logits/chosen": 1.008737564086914, |
|
"logits/rejected": 0.9937309622764587, |
|
"logps/chosen": -473.2950134277344, |
|
"logps/rejected": -667.3652954101562, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.11462172865867615, |
|
"rewards/margins": 11.188936233520508, |
|
"rewards/rejected": -11.303558349609375, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 10.14, |
|
"learning_rate": 0.0001641025641025641, |
|
"logits/chosen": 1.1840193271636963, |
|
"logits/rejected": 0.9543963670730591, |
|
"logps/chosen": -582.7732543945312, |
|
"logps/rejected": -608.0531616210938, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1386045217514038, |
|
"rewards/margins": 10.291077613830566, |
|
"rewards/rejected": -9.152473449707031, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 10.16, |
|
"learning_rate": 0.00016367521367521368, |
|
"logits/chosen": 1.157091736793518, |
|
"logits/rejected": 1.1324167251586914, |
|
"logps/chosen": -568.5440063476562, |
|
"logps/rejected": -697.5151977539062, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1107374429702759, |
|
"rewards/margins": 11.52519702911377, |
|
"rewards/rejected": -10.414460182189941, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 10.19, |
|
"learning_rate": 0.00016324786324786324, |
|
"logits/chosen": 1.0599353313446045, |
|
"logits/rejected": 0.8371300101280212, |
|
"logps/chosen": -523.8178100585938, |
|
"logps/rejected": -568.3763427734375, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7378228306770325, |
|
"rewards/margins": 10.31213092803955, |
|
"rewards/rejected": -9.574308395385742, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 10.21, |
|
"learning_rate": 0.0001628205128205128, |
|
"logits/chosen": 1.1668627262115479, |
|
"logits/rejected": 1.1840052604675293, |
|
"logps/chosen": -513.0997314453125, |
|
"logps/rejected": -679.572998046875, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.532392680644989, |
|
"rewards/margins": 10.64517593383789, |
|
"rewards/rejected": -10.112783432006836, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 10.24, |
|
"learning_rate": 0.00016239316239316238, |
|
"logits/chosen": 1.058180570602417, |
|
"logits/rejected": 0.9799319505691528, |
|
"logps/chosen": -521.548095703125, |
|
"logps/rejected": -572.1917724609375, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.7191563844680786, |
|
"rewards/margins": 9.96951675415039, |
|
"rewards/rejected": -8.250360488891602, |
|
"step": 400 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 780, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 50, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|