|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.0625, |
|
"eval_steps": 500, |
|
"global_step": 250, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.99e-07, |
|
"logits/chosen": -9.150251388549805, |
|
"logits/rejected": -8.951294898986816, |
|
"logps/chosen": -39.82106399536133, |
|
"logps/rejected": -51.287376403808594, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.979999999999999e-07, |
|
"logits/chosen": -8.701302528381348, |
|
"logits/rejected": -8.59887981414795, |
|
"logps/chosen": -38.8251953125, |
|
"logps/rejected": -50.281246185302734, |
|
"loss": 0.6902, |
|
"rewards/accuracies": 0.9000000953674316, |
|
"rewards/chosen": 0.005459675099700689, |
|
"rewards/margins": 0.005827327724546194, |
|
"rewards/rejected": -0.00036765271215699613, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.97e-07, |
|
"logits/chosen": -8.877432823181152, |
|
"logits/rejected": -8.745807647705078, |
|
"logps/chosen": -39.213958740234375, |
|
"logps/rejected": -53.814178466796875, |
|
"loss": 0.6868, |
|
"rewards/accuracies": 0.940000057220459, |
|
"rewards/chosen": 0.009096940979361534, |
|
"rewards/margins": 0.012745475396513939, |
|
"rewards/rejected": -0.003648536978289485, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.96e-07, |
|
"logits/chosen": -8.850730895996094, |
|
"logits/rejected": -8.655380249023438, |
|
"logps/chosen": -41.56839370727539, |
|
"logps/rejected": -49.78559112548828, |
|
"loss": 0.685, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.010362102650105953, |
|
"rewards/margins": 0.016426045447587967, |
|
"rewards/rejected": -0.006063942797482014, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.95e-07, |
|
"logits/chosen": -9.169770240783691, |
|
"logits/rejected": -9.068281173706055, |
|
"logps/chosen": -39.02599334716797, |
|
"logps/rejected": -54.13230514526367, |
|
"loss": 0.6802, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": 0.017099833115935326, |
|
"rewards/margins": 0.026187023147940636, |
|
"rewards/rejected": -0.009087189100682735, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.94e-07, |
|
"logits/chosen": -8.943150520324707, |
|
"logits/rejected": -8.760871887207031, |
|
"logps/chosen": -40.129783630371094, |
|
"logps/rejected": -51.28352737426758, |
|
"loss": 0.6759, |
|
"rewards/accuracies": 0.8799999952316284, |
|
"rewards/chosen": 0.018004529178142548, |
|
"rewards/margins": 0.03507697209715843, |
|
"rewards/rejected": -0.017072444781661034, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.93e-07, |
|
"logits/chosen": -8.973767280578613, |
|
"logits/rejected": -8.846033096313477, |
|
"logps/chosen": -38.954933166503906, |
|
"logps/rejected": -52.8114128112793, |
|
"loss": 0.6694, |
|
"rewards/accuracies": 0.9199999570846558, |
|
"rewards/chosen": 0.01600685343146324, |
|
"rewards/margins": 0.048318009823560715, |
|
"rewards/rejected": -0.032311152666807175, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.92e-07, |
|
"logits/chosen": -9.100973129272461, |
|
"logits/rejected": -8.930387496948242, |
|
"logps/chosen": -36.695037841796875, |
|
"logps/rejected": -51.42365646362305, |
|
"loss": 0.6664, |
|
"rewards/accuracies": 0.9200000762939453, |
|
"rewards/chosen": 0.028073444962501526, |
|
"rewards/margins": 0.054737381637096405, |
|
"rewards/rejected": -0.026663940399885178, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.909999999999999e-07, |
|
"logits/chosen": -8.858081817626953, |
|
"logits/rejected": -8.691803932189941, |
|
"logps/chosen": -39.423805236816406, |
|
"logps/rejected": -53.124473571777344, |
|
"loss": 0.6663, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": 0.006643497850745916, |
|
"rewards/margins": 0.05475940182805061, |
|
"rewards/rejected": -0.04811590164899826, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9e-07, |
|
"logits/chosen": -8.932714462280273, |
|
"logits/rejected": -8.752630233764648, |
|
"logps/chosen": -40.90498733520508, |
|
"logps/rejected": -53.95857620239258, |
|
"loss": 0.6667, |
|
"rewards/accuracies": 0.9000000953674316, |
|
"rewards/chosen": 0.022381644695997238, |
|
"rewards/margins": 0.054132066667079926, |
|
"rewards/rejected": -0.031750429421663284, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.89e-07, |
|
"logits/chosen": -8.926645278930664, |
|
"logits/rejected": -8.795331954956055, |
|
"logps/chosen": -39.50680923461914, |
|
"logps/rejected": -49.81561279296875, |
|
"loss": 0.6657, |
|
"rewards/accuracies": 0.8600000143051147, |
|
"rewards/chosen": 0.02121429704129696, |
|
"rewards/margins": 0.05642315000295639, |
|
"rewards/rejected": -0.035208847373723984, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.879999999999999e-07, |
|
"logits/chosen": -9.18893814086914, |
|
"logits/rejected": -9.020618438720703, |
|
"logps/chosen": -38.57902908325195, |
|
"logps/rejected": -54.4058952331543, |
|
"loss": 0.6514, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": 0.03454957157373428, |
|
"rewards/margins": 0.0861399695277214, |
|
"rewards/rejected": -0.05159040167927742, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.87e-07, |
|
"logits/chosen": -8.865549087524414, |
|
"logits/rejected": -8.72575569152832, |
|
"logps/chosen": -37.333641052246094, |
|
"logps/rejected": -50.77653884887695, |
|
"loss": 0.6508, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.045416563749313354, |
|
"rewards/margins": 0.08835401386022568, |
|
"rewards/rejected": -0.04293745011091232, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.86e-07, |
|
"logits/chosen": -9.011914253234863, |
|
"logits/rejected": -8.871015548706055, |
|
"logps/chosen": -37.863914489746094, |
|
"logps/rejected": -51.89360427856445, |
|
"loss": 0.6456, |
|
"rewards/accuracies": 0.8799999952316284, |
|
"rewards/chosen": 0.05132318660616875, |
|
"rewards/margins": 0.09992541372776031, |
|
"rewards/rejected": -0.048602234572172165, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.85e-07, |
|
"logits/chosen": -9.177026748657227, |
|
"logits/rejected": -9.040088653564453, |
|
"logps/chosen": -38.882564544677734, |
|
"logps/rejected": -49.73377227783203, |
|
"loss": 0.647, |
|
"rewards/accuracies": 0.9199999570846558, |
|
"rewards/chosen": 0.021302934736013412, |
|
"rewards/margins": 0.09643281996250153, |
|
"rewards/rejected": -0.07512988150119781, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.839999999999999e-07, |
|
"logits/chosen": -8.8854398727417, |
|
"logits/rejected": -8.71996784210205, |
|
"logps/chosen": -41.88127517700195, |
|
"logps/rejected": -53.972862243652344, |
|
"loss": 0.6432, |
|
"rewards/accuracies": 0.8400000333786011, |
|
"rewards/chosen": 0.01929306425154209, |
|
"rewards/margins": 0.10526645183563232, |
|
"rewards/rejected": -0.08597338944673538, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.83e-07, |
|
"logits/chosen": -8.87810230255127, |
|
"logits/rejected": -8.729606628417969, |
|
"logps/chosen": -37.907958984375, |
|
"logps/rejected": -54.153953552246094, |
|
"loss": 0.6297, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.026595568284392357, |
|
"rewards/margins": 0.13360002636909485, |
|
"rewards/rejected": -0.10700444877147675, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.82e-07, |
|
"logits/chosen": -9.304192543029785, |
|
"logits/rejected": -9.190086364746094, |
|
"logps/chosen": -38.377891540527344, |
|
"logps/rejected": -51.592796325683594, |
|
"loss": 0.6391, |
|
"rewards/accuracies": 0.8399999737739563, |
|
"rewards/chosen": 0.06439777463674545, |
|
"rewards/margins": 0.11593715101480484, |
|
"rewards/rejected": -0.051539380103349686, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.809999999999999e-07, |
|
"logits/chosen": -8.877577781677246, |
|
"logits/rejected": -8.728302001953125, |
|
"logps/chosen": -36.82625198364258, |
|
"logps/rejected": -50.772274017333984, |
|
"loss": 0.6317, |
|
"rewards/accuracies": 0.940000057220459, |
|
"rewards/chosen": 0.04051102325320244, |
|
"rewards/margins": 0.13047367334365845, |
|
"rewards/rejected": -0.08996264636516571, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.8e-07, |
|
"logits/chosen": -8.93675422668457, |
|
"logits/rejected": -8.837154388427734, |
|
"logps/chosen": -38.22481918334961, |
|
"logps/rejected": -51.28339385986328, |
|
"loss": 0.6327, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.024285469204187393, |
|
"rewards/margins": 0.12898896634578705, |
|
"rewards/rejected": -0.10470350831747055, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.79e-07, |
|
"logits/chosen": -8.971104621887207, |
|
"logits/rejected": -8.809735298156738, |
|
"logps/chosen": -38.27422332763672, |
|
"logps/rejected": -51.61455154418945, |
|
"loss": 0.6212, |
|
"rewards/accuracies": 0.8600000143051147, |
|
"rewards/chosen": 0.04925350099802017, |
|
"rewards/margins": 0.1548921763896942, |
|
"rewards/rejected": -0.10563866049051285, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.779999999999999e-07, |
|
"logits/chosen": -8.943137168884277, |
|
"logits/rejected": -8.724119186401367, |
|
"logps/chosen": -39.740211486816406, |
|
"logps/rejected": -53.67293167114258, |
|
"loss": 0.6185, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.031043654307723045, |
|
"rewards/margins": 0.15891726315021515, |
|
"rewards/rejected": -0.12787359952926636, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.769999999999999e-07, |
|
"logits/chosen": -8.783563613891602, |
|
"logits/rejected": -8.568138122558594, |
|
"logps/chosen": -40.75530242919922, |
|
"logps/rejected": -55.012939453125, |
|
"loss": 0.6142, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": 0.022519828751683235, |
|
"rewards/margins": 0.1669926643371582, |
|
"rewards/rejected": -0.14447283744812012, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.76e-07, |
|
"logits/chosen": -8.948726654052734, |
|
"logits/rejected": -8.741633415222168, |
|
"logps/chosen": -38.42156982421875, |
|
"logps/rejected": -54.19482421875, |
|
"loss": 0.6141, |
|
"rewards/accuracies": 0.8600000143051147, |
|
"rewards/chosen": 0.042123936116695404, |
|
"rewards/margins": 0.16957491636276245, |
|
"rewards/rejected": -0.12745098769664764, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.7499999999999995e-07, |
|
"logits/chosen": -8.800437927246094, |
|
"logits/rejected": -8.649993896484375, |
|
"logps/chosen": -39.37318420410156, |
|
"logps/rejected": -50.02665710449219, |
|
"loss": 0.6253, |
|
"rewards/accuracies": 0.8400000333786011, |
|
"rewards/chosen": 0.004943800158798695, |
|
"rewards/margins": 0.14754608273506165, |
|
"rewards/rejected": -0.14260227978229523, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.7399999999999993e-07, |
|
"logits/chosen": -9.165999412536621, |
|
"logits/rejected": -9.073251724243164, |
|
"logps/chosen": -37.4705696105957, |
|
"logps/rejected": -50.39186477661133, |
|
"loss": 0.613, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": 0.005681462120264769, |
|
"rewards/margins": 0.17087292671203613, |
|
"rewards/rejected": -0.16519147157669067, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.7299999999999996e-07, |
|
"logits/chosen": -8.994885444641113, |
|
"logits/rejected": -8.7987642288208, |
|
"logps/chosen": -37.67160415649414, |
|
"logps/rejected": -54.15082931518555, |
|
"loss": 0.5968, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.03374551981687546, |
|
"rewards/margins": 0.20702290534973145, |
|
"rewards/rejected": -0.1732773780822754, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.7199999999999994e-07, |
|
"logits/chosen": -8.93022346496582, |
|
"logits/rejected": -8.841963768005371, |
|
"logps/chosen": -37.873985290527344, |
|
"logps/rejected": -56.12858200073242, |
|
"loss": 0.6008, |
|
"rewards/accuracies": 0.8399999737739563, |
|
"rewards/chosen": 0.00800693966448307, |
|
"rewards/margins": 0.2039312869310379, |
|
"rewards/rejected": -0.19592434167861938, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.7099999999999997e-07, |
|
"logits/chosen": -9.13399600982666, |
|
"logits/rejected": -8.960123062133789, |
|
"logps/chosen": -37.81399917602539, |
|
"logps/rejected": -53.7081298828125, |
|
"loss": 0.5955, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -0.03130952641367912, |
|
"rewards/margins": 0.21175985038280487, |
|
"rewards/rejected": -0.24306941032409668, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.6999999999999995e-07, |
|
"logits/chosen": -8.995954513549805, |
|
"logits/rejected": -8.84068775177002, |
|
"logps/chosen": -38.8903923034668, |
|
"logps/rejected": -57.47145462036133, |
|
"loss": 0.5892, |
|
"rewards/accuracies": 0.9199999570846558, |
|
"rewards/chosen": 0.044602252542972565, |
|
"rewards/margins": 0.22832927107810974, |
|
"rewards/rejected": -0.18372702598571777, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.689999999999999e-07, |
|
"logits/chosen": -8.951233863830566, |
|
"logits/rejected": -8.74404525756836, |
|
"logps/chosen": -39.165775299072266, |
|
"logps/rejected": -53.60399627685547, |
|
"loss": 0.5901, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -0.005719953216612339, |
|
"rewards/margins": 0.2242022305727005, |
|
"rewards/rejected": -0.22992220520973206, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.68e-07, |
|
"logits/chosen": -8.605962753295898, |
|
"logits/rejected": -8.464166641235352, |
|
"logps/chosen": -39.067291259765625, |
|
"logps/rejected": -56.53247833251953, |
|
"loss": 0.565, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.03435752913355827, |
|
"rewards/margins": 0.28634151816368103, |
|
"rewards/rejected": -0.2519839406013489, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.67e-07, |
|
"logits/chosen": -8.780204772949219, |
|
"logits/rejected": -8.59669017791748, |
|
"logps/chosen": -39.499412536621094, |
|
"logps/rejected": -55.1195068359375, |
|
"loss": 0.5906, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.012880785390734673, |
|
"rewards/margins": 0.22619274258613586, |
|
"rewards/rejected": -0.21331194043159485, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.66e-07, |
|
"logits/chosen": -9.019442558288574, |
|
"logits/rejected": -8.858607292175293, |
|
"logps/chosen": -39.38768768310547, |
|
"logps/rejected": -56.47705841064453, |
|
"loss": 0.5818, |
|
"rewards/accuracies": 0.9399999380111694, |
|
"rewards/chosen": 0.029814431443810463, |
|
"rewards/margins": 0.24373717606067657, |
|
"rewards/rejected": -0.21392273902893066, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.65e-07, |
|
"logits/chosen": -8.972381591796875, |
|
"logits/rejected": -8.825788497924805, |
|
"logps/chosen": -39.450408935546875, |
|
"logps/rejected": -54.7730827331543, |
|
"loss": 0.5931, |
|
"rewards/accuracies": 0.9200000762939453, |
|
"rewards/chosen": 0.01736309379339218, |
|
"rewards/margins": 0.22288131713867188, |
|
"rewards/rejected": -0.2055182158946991, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.64e-07, |
|
"logits/chosen": -8.794334411621094, |
|
"logits/rejected": -8.642388343811035, |
|
"logps/chosen": -38.96186447143555, |
|
"logps/rejected": -52.460182189941406, |
|
"loss": 0.5874, |
|
"rewards/accuracies": 0.8600000143051147, |
|
"rewards/chosen": 0.03622301667928696, |
|
"rewards/margins": 0.23951515555381775, |
|
"rewards/rejected": -0.20329216122627258, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.63e-07, |
|
"logits/chosen": -8.897015571594238, |
|
"logits/rejected": -8.766530990600586, |
|
"logps/chosen": -37.4925537109375, |
|
"logps/rejected": -50.289085388183594, |
|
"loss": 0.5946, |
|
"rewards/accuracies": 0.9200000762939453, |
|
"rewards/chosen": 0.008345548994839191, |
|
"rewards/margins": 0.21366062760353088, |
|
"rewards/rejected": -0.20531506836414337, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.62e-07, |
|
"logits/chosen": -9.160964965820312, |
|
"logits/rejected": -9.01344108581543, |
|
"logps/chosen": -39.84162139892578, |
|
"logps/rejected": -50.35344696044922, |
|
"loss": 0.5878, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": 0.03968983516097069, |
|
"rewards/margins": 0.23294827342033386, |
|
"rewards/rejected": -0.19325841963291168, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.61e-07, |
|
"logits/chosen": -8.879249572753906, |
|
"logits/rejected": -8.736249923706055, |
|
"logps/chosen": -39.56095504760742, |
|
"logps/rejected": -55.347694396972656, |
|
"loss": 0.5662, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": 0.02437666617333889, |
|
"rewards/margins": 0.28601738810539246, |
|
"rewards/rejected": -0.261640727519989, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.6e-07, |
|
"logits/chosen": -9.063352584838867, |
|
"logits/rejected": -8.963701248168945, |
|
"logps/chosen": -38.563880920410156, |
|
"logps/rejected": -51.63567352294922, |
|
"loss": 0.5769, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -0.02472488023340702, |
|
"rewards/margins": 0.2595635950565338, |
|
"rewards/rejected": -0.2842884659767151, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.59e-07, |
|
"logits/chosen": -9.278203964233398, |
|
"logits/rejected": -9.08998966217041, |
|
"logps/chosen": -39.864532470703125, |
|
"logps/rejected": -54.92973709106445, |
|
"loss": 0.5737, |
|
"rewards/accuracies": 0.9199999570846558, |
|
"rewards/chosen": 0.020174704492092133, |
|
"rewards/margins": 0.26886457204818726, |
|
"rewards/rejected": -0.24868984520435333, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.58e-07, |
|
"logits/chosen": -8.939000129699707, |
|
"logits/rejected": -8.744054794311523, |
|
"logps/chosen": -37.10688400268555, |
|
"logps/rejected": -54.631141662597656, |
|
"loss": 0.5495, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": 0.04707593470811844, |
|
"rewards/margins": 0.3288743495941162, |
|
"rewards/rejected": -0.28179842233657837, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.57e-07, |
|
"logits/chosen": -8.78071403503418, |
|
"logits/rejected": -8.645574569702148, |
|
"logps/chosen": -38.05216598510742, |
|
"logps/rejected": -54.2934684753418, |
|
"loss": 0.5586, |
|
"rewards/accuracies": 0.940000057220459, |
|
"rewards/chosen": 0.01882551982998848, |
|
"rewards/margins": 0.3048241138458252, |
|
"rewards/rejected": -0.2859985828399658, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.56e-07, |
|
"logits/chosen": -8.890421867370605, |
|
"logits/rejected": -8.79121208190918, |
|
"logps/chosen": -36.806209564208984, |
|
"logps/rejected": -54.96057891845703, |
|
"loss": 0.5378, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.09500815719366074, |
|
"rewards/margins": 0.3511715531349182, |
|
"rewards/rejected": -0.2561633884906769, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.55e-07, |
|
"logits/chosen": -8.712895393371582, |
|
"logits/rejected": -8.633960723876953, |
|
"logps/chosen": -38.36237335205078, |
|
"logps/rejected": -53.05036163330078, |
|
"loss": 0.5671, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": 0.10673508793115616, |
|
"rewards/margins": 0.28717535734176636, |
|
"rewards/rejected": -0.1804402768611908, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.54e-07, |
|
"logits/chosen": -9.085457801818848, |
|
"logits/rejected": -8.948368072509766, |
|
"logps/chosen": -40.31169128417969, |
|
"logps/rejected": -56.712364196777344, |
|
"loss": 0.5538, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": 0.05474279075860977, |
|
"rewards/margins": 0.32782307267189026, |
|
"rewards/rejected": -0.2730802297592163, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.53e-07, |
|
"logits/chosen": -8.925232887268066, |
|
"logits/rejected": -8.793390274047852, |
|
"logps/chosen": -40.80510711669922, |
|
"logps/rejected": -53.9893798828125, |
|
"loss": 0.5603, |
|
"rewards/accuracies": 0.940000057220459, |
|
"rewards/chosen": -0.004703240934759378, |
|
"rewards/margins": 0.2981411814689636, |
|
"rewards/rejected": -0.30284440517425537, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.5199999999999997e-07, |
|
"logits/chosen": -8.936556816101074, |
|
"logits/rejected": -8.795175552368164, |
|
"logps/chosen": -39.4090690612793, |
|
"logps/rejected": -56.091827392578125, |
|
"loss": 0.5701, |
|
"rewards/accuracies": 0.8799999952316284, |
|
"rewards/chosen": -0.02163396216928959, |
|
"rewards/margins": 0.28110483288764954, |
|
"rewards/rejected": -0.3027387261390686, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.51e-07, |
|
"logits/chosen": -8.785165786743164, |
|
"logits/rejected": -8.663257598876953, |
|
"logps/chosen": -39.04924011230469, |
|
"logps/rejected": -55.55643844604492, |
|
"loss": 0.5291, |
|
"rewards/accuracies": 0.940000057220459, |
|
"rewards/chosen": 0.07937277853488922, |
|
"rewards/margins": 0.3831271529197693, |
|
"rewards/rejected": -0.3037543296813965, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.5e-07, |
|
"logits/chosen": -8.918477058410645, |
|
"logits/rejected": -8.69984245300293, |
|
"logps/chosen": -39.793434143066406, |
|
"logps/rejected": -51.4716911315918, |
|
"loss": 0.5581, |
|
"rewards/accuracies": 0.9399999380111694, |
|
"rewards/chosen": -0.030612414702773094, |
|
"rewards/margins": 0.3040314316749573, |
|
"rewards/rejected": -0.3346438705921173, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.49e-07, |
|
"logits/chosen": -8.830556869506836, |
|
"logits/rejected": -8.661957740783691, |
|
"logps/chosen": -39.756195068359375, |
|
"logps/rejected": -55.29694747924805, |
|
"loss": 0.5275, |
|
"rewards/accuracies": 0.9199999570846558, |
|
"rewards/chosen": 0.03386972099542618, |
|
"rewards/margins": 0.3903976380825043, |
|
"rewards/rejected": -0.3565279543399811, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.48e-07, |
|
"logits/chosen": -8.969281196594238, |
|
"logits/rejected": -8.82741641998291, |
|
"logps/chosen": -37.20234298706055, |
|
"logps/rejected": -59.197296142578125, |
|
"loss": 0.5128, |
|
"rewards/accuracies": 0.9399999380111694, |
|
"rewards/chosen": 0.04513595253229141, |
|
"rewards/margins": 0.429348886013031, |
|
"rewards/rejected": -0.3842129111289978, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.4699999999999997e-07, |
|
"logits/chosen": -8.737951278686523, |
|
"logits/rejected": -8.606011390686035, |
|
"logps/chosen": -38.3763542175293, |
|
"logps/rejected": -58.59581756591797, |
|
"loss": 0.5162, |
|
"rewards/accuracies": 0.9199999570846558, |
|
"rewards/chosen": 0.030918415635824203, |
|
"rewards/margins": 0.41947564482688904, |
|
"rewards/rejected": -0.3885572552680969, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.46e-07, |
|
"logits/chosen": -8.750028610229492, |
|
"logits/rejected": -8.646293640136719, |
|
"logps/chosen": -38.137672424316406, |
|
"logps/rejected": -56.8802490234375, |
|
"loss": 0.5144, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": 0.08223019540309906, |
|
"rewards/margins": 0.42382898926734924, |
|
"rewards/rejected": -0.3415988087654114, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.45e-07, |
|
"logits/chosen": -9.04769515991211, |
|
"logits/rejected": -8.884363174438477, |
|
"logps/chosen": -39.094200134277344, |
|
"logps/rejected": -56.6423454284668, |
|
"loss": 0.5296, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.01863374561071396, |
|
"rewards/margins": 0.3876304030418396, |
|
"rewards/rejected": -0.36899662017822266, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.44e-07, |
|
"logits/chosen": -8.93348503112793, |
|
"logits/rejected": -8.77459716796875, |
|
"logps/chosen": -38.1727294921875, |
|
"logps/rejected": -51.39598846435547, |
|
"loss": 0.5394, |
|
"rewards/accuracies": 0.9199999570846558, |
|
"rewards/chosen": 0.08033865690231323, |
|
"rewards/margins": 0.3627088963985443, |
|
"rewards/rejected": -0.2823702096939087, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.43e-07, |
|
"logits/chosen": -8.88620376586914, |
|
"logits/rejected": -8.79748821258545, |
|
"logps/chosen": -38.02767562866211, |
|
"logps/rejected": -53.46161651611328, |
|
"loss": 0.5248, |
|
"rewards/accuracies": 0.8400000333786011, |
|
"rewards/chosen": 0.07710711658000946, |
|
"rewards/margins": 0.404231458902359, |
|
"rewards/rejected": -0.32712429761886597, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.4199999999999996e-07, |
|
"logits/chosen": -9.048208236694336, |
|
"logits/rejected": -8.921117782592773, |
|
"logps/chosen": -39.349849700927734, |
|
"logps/rejected": -57.04978561401367, |
|
"loss": 0.525, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": 0.005890236236155033, |
|
"rewards/margins": 0.3883860111236572, |
|
"rewards/rejected": -0.38249582052230835, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.41e-07, |
|
"logits/chosen": -9.050989151000977, |
|
"logits/rejected": -8.864224433898926, |
|
"logps/chosen": -38.37465286254883, |
|
"logps/rejected": -54.69602584838867, |
|
"loss": 0.5161, |
|
"rewards/accuracies": 0.9199999570846558, |
|
"rewards/chosen": -0.00012345091090537608, |
|
"rewards/margins": 0.41539669036865234, |
|
"rewards/rejected": -0.4155201017856598, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.3999999999999997e-07, |
|
"logits/chosen": -8.77641773223877, |
|
"logits/rejected": -8.65733528137207, |
|
"logps/chosen": -37.439788818359375, |
|
"logps/rejected": -56.8023681640625, |
|
"loss": 0.491, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": 0.10770513862371445, |
|
"rewards/margins": 0.4838793873786926, |
|
"rewards/rejected": -0.37617427110671997, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.39e-07, |
|
"logits/chosen": -8.84074878692627, |
|
"logits/rejected": -8.694551467895508, |
|
"logps/chosen": -40.056434631347656, |
|
"logps/rejected": -60.27661895751953, |
|
"loss": 0.5015, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": 0.010883894748985767, |
|
"rewards/margins": 0.4609171748161316, |
|
"rewards/rejected": -0.4500332772731781, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.38e-07, |
|
"logits/chosen": -8.841463088989258, |
|
"logits/rejected": -8.691165924072266, |
|
"logps/chosen": -38.9443473815918, |
|
"logps/rejected": -56.7674446105957, |
|
"loss": 0.5052, |
|
"rewards/accuracies": 0.9199999570846558, |
|
"rewards/chosen": 0.058075837790966034, |
|
"rewards/margins": 0.45668134093284607, |
|
"rewards/rejected": -0.39860549569129944, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.3699999999999996e-07, |
|
"logits/chosen": -8.556544303894043, |
|
"logits/rejected": -8.511371612548828, |
|
"logps/chosen": -37.55242919921875, |
|
"logps/rejected": -52.5374755859375, |
|
"loss": 0.5261, |
|
"rewards/accuracies": 0.9199999570846558, |
|
"rewards/chosen": 0.05054298788309097, |
|
"rewards/margins": 0.3981621265411377, |
|
"rewards/rejected": -0.3476191461086273, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.36e-07, |
|
"logits/chosen": -8.824804306030273, |
|
"logits/rejected": -8.721410751342773, |
|
"logps/chosen": -39.77140808105469, |
|
"logps/rejected": -60.0277099609375, |
|
"loss": 0.4751, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.01679672673344612, |
|
"rewards/margins": 0.5294073820114136, |
|
"rewards/rejected": -0.5462040901184082, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.3499999999999996e-07, |
|
"logits/chosen": -8.601284980773926, |
|
"logits/rejected": -8.539445877075195, |
|
"logps/chosen": -40.41975402832031, |
|
"logps/rejected": -55.65443801879883, |
|
"loss": 0.5237, |
|
"rewards/accuracies": 0.9399999380111694, |
|
"rewards/chosen": -0.10446874797344208, |
|
"rewards/margins": 0.39701706171035767, |
|
"rewards/rejected": -0.5014857649803162, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.34e-07, |
|
"logits/chosen": -9.016075134277344, |
|
"logits/rejected": -8.911613464355469, |
|
"logps/chosen": -36.6504020690918, |
|
"logps/rejected": -52.8062629699707, |
|
"loss": 0.4991, |
|
"rewards/accuracies": 0.9399999380111694, |
|
"rewards/chosen": 0.03334439545869827, |
|
"rewards/margins": 0.46417728066444397, |
|
"rewards/rejected": -0.4308328628540039, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.3299999999999997e-07, |
|
"logits/chosen": -9.052962303161621, |
|
"logits/rejected": -8.949380874633789, |
|
"logps/chosen": -37.03533172607422, |
|
"logps/rejected": -52.54149627685547, |
|
"loss": 0.5236, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.004350895527750254, |
|
"rewards/margins": 0.41073599457740784, |
|
"rewards/rejected": -0.4063850939273834, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.3199999999999995e-07, |
|
"logits/chosen": -9.015048027038574, |
|
"logits/rejected": -8.912318229675293, |
|
"logps/chosen": -38.99870300292969, |
|
"logps/rejected": -52.52238082885742, |
|
"loss": 0.517, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": 0.050432223826646805, |
|
"rewards/margins": 0.4209260046482086, |
|
"rewards/rejected": -0.3704938292503357, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.31e-07, |
|
"logits/chosen": -8.882599830627441, |
|
"logits/rejected": -8.721563339233398, |
|
"logps/chosen": -39.24352264404297, |
|
"logps/rejected": -57.102867126464844, |
|
"loss": 0.4889, |
|
"rewards/accuracies": 0.9399999380111694, |
|
"rewards/chosen": 0.05120646953582764, |
|
"rewards/margins": 0.5033482909202576, |
|
"rewards/rejected": -0.4521418511867523, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.2999999999999996e-07, |
|
"logits/chosen": -8.721124649047852, |
|
"logits/rejected": -8.638886451721191, |
|
"logps/chosen": -40.13719940185547, |
|
"logps/rejected": -56.29268264770508, |
|
"loss": 0.5076, |
|
"rewards/accuracies": 0.940000057220459, |
|
"rewards/chosen": -0.024597348645329475, |
|
"rewards/margins": 0.4424295425415039, |
|
"rewards/rejected": -0.46702688932418823, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.29e-07, |
|
"logits/chosen": -8.832576751708984, |
|
"logits/rejected": -8.6920747756958, |
|
"logps/chosen": -39.40597152709961, |
|
"logps/rejected": -57.85718536376953, |
|
"loss": 0.4844, |
|
"rewards/accuracies": 0.9399999380111694, |
|
"rewards/chosen": 0.0307625625282526, |
|
"rewards/margins": 0.5312424302101135, |
|
"rewards/rejected": -0.5004798769950867, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.2799999999999997e-07, |
|
"logits/chosen": -8.825450897216797, |
|
"logits/rejected": -8.670401573181152, |
|
"logps/chosen": -39.127418518066406, |
|
"logps/rejected": -58.1724967956543, |
|
"loss": 0.4951, |
|
"rewards/accuracies": 0.9199999570846558, |
|
"rewards/chosen": 0.04789305105805397, |
|
"rewards/margins": 0.48660898208618164, |
|
"rewards/rejected": -0.43871593475341797, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.2699999999999995e-07, |
|
"logits/chosen": -9.031660079956055, |
|
"logits/rejected": -8.88580322265625, |
|
"logps/chosen": -40.26874542236328, |
|
"logps/rejected": -59.174354553222656, |
|
"loss": 0.4813, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": 0.056357551366090775, |
|
"rewards/margins": 0.5150011777877808, |
|
"rewards/rejected": -0.4586435854434967, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.26e-07, |
|
"logits/chosen": -9.249466896057129, |
|
"logits/rejected": -9.104633331298828, |
|
"logps/chosen": -38.35686492919922, |
|
"logps/rejected": -58.423255920410156, |
|
"loss": 0.4694, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": 0.003165402915328741, |
|
"rewards/margins": 0.5658671855926514, |
|
"rewards/rejected": -0.5627016425132751, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.2499999999999995e-07, |
|
"logits/chosen": -8.971678733825684, |
|
"logits/rejected": -8.806716918945312, |
|
"logps/chosen": -40.63257598876953, |
|
"logps/rejected": -58.69512939453125, |
|
"loss": 0.4714, |
|
"rewards/accuracies": 0.8600000143051147, |
|
"rewards/chosen": -0.003097705077379942, |
|
"rewards/margins": 0.5738621950149536, |
|
"rewards/rejected": -0.5769599676132202, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.24e-07, |
|
"logits/chosen": -8.751283645629883, |
|
"logits/rejected": -8.613395690917969, |
|
"logps/chosen": -41.808475494384766, |
|
"logps/rejected": -57.4010009765625, |
|
"loss": 0.5206, |
|
"rewards/accuracies": 0.9200000762939453, |
|
"rewards/chosen": -0.018361693248152733, |
|
"rewards/margins": 0.4181668758392334, |
|
"rewards/rejected": -0.4365285038948059, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.2299999999999996e-07, |
|
"logits/chosen": -8.919511795043945, |
|
"logits/rejected": -8.767281532287598, |
|
"logps/chosen": -38.50531005859375, |
|
"logps/rejected": -56.9246826171875, |
|
"loss": 0.5066, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.08143829554319382, |
|
"rewards/margins": 0.46956175565719604, |
|
"rewards/rejected": -0.5510000586509705, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.2199999999999994e-07, |
|
"logits/chosen": -9.05775260925293, |
|
"logits/rejected": -8.882523536682129, |
|
"logps/chosen": -37.908966064453125, |
|
"logps/rejected": -57.79386520385742, |
|
"loss": 0.4638, |
|
"rewards/accuracies": 0.9399999380111694, |
|
"rewards/chosen": 0.06249593570828438, |
|
"rewards/margins": 0.5957901477813721, |
|
"rewards/rejected": -0.5332942008972168, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.2099999999999997e-07, |
|
"logits/chosen": -9.013116836547852, |
|
"logits/rejected": -8.926069259643555, |
|
"logps/chosen": -37.548606872558594, |
|
"logps/rejected": -54.40301513671875, |
|
"loss": 0.4867, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.08341957628726959, |
|
"rewards/margins": 0.5038079023361206, |
|
"rewards/rejected": -0.4203883111476898, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.1999999999999995e-07, |
|
"logits/chosen": -8.759903907775879, |
|
"logits/rejected": -8.721986770629883, |
|
"logps/chosen": -38.83586120605469, |
|
"logps/rejected": -56.38386154174805, |
|
"loss": 0.4749, |
|
"rewards/accuracies": 0.9000000953674316, |
|
"rewards/chosen": 0.03295533359050751, |
|
"rewards/margins": 0.5568066239356995, |
|
"rewards/rejected": -0.5238512754440308, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.19e-07, |
|
"logits/chosen": -8.818650245666504, |
|
"logits/rejected": -8.679798126220703, |
|
"logps/chosen": -40.744956970214844, |
|
"logps/rejected": -56.096473693847656, |
|
"loss": 0.4843, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -0.10699313879013062, |
|
"rewards/margins": 0.49514955282211304, |
|
"rewards/rejected": -0.6021426916122437, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.1799999999999996e-07, |
|
"logits/chosen": -8.959874153137207, |
|
"logits/rejected": -8.873147010803223, |
|
"logps/chosen": -38.775062561035156, |
|
"logps/rejected": -56.94916534423828, |
|
"loss": 0.4637, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": 0.026896214112639427, |
|
"rewards/margins": 0.5791338682174683, |
|
"rewards/rejected": -0.5522376298904419, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.17e-07, |
|
"logits/chosen": -9.120010375976562, |
|
"logits/rejected": -8.946874618530273, |
|
"logps/chosen": -41.79666519165039, |
|
"logps/rejected": -59.282958984375, |
|
"loss": 0.469, |
|
"rewards/accuracies": 0.9399999380111694, |
|
"rewards/chosen": -0.009585860185325146, |
|
"rewards/margins": 0.5570787191390991, |
|
"rewards/rejected": -0.5666645169258118, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.1599999999999997e-07, |
|
"logits/chosen": -8.929158210754395, |
|
"logits/rejected": -8.834013938903809, |
|
"logps/chosen": -40.36653137207031, |
|
"logps/rejected": -54.375205993652344, |
|
"loss": 0.5029, |
|
"rewards/accuracies": 0.9200000762939453, |
|
"rewards/chosen": -0.10653682053089142, |
|
"rewards/margins": 0.4618176519870758, |
|
"rewards/rejected": -0.5683544874191284, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.1499999999999994e-07, |
|
"logits/chosen": -9.015276908874512, |
|
"logits/rejected": -8.931076049804688, |
|
"logps/chosen": -40.707420349121094, |
|
"logps/rejected": -57.53357696533203, |
|
"loss": 0.4619, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -0.034116435796022415, |
|
"rewards/margins": 0.5871976613998413, |
|
"rewards/rejected": -0.6213140487670898, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.14e-07, |
|
"logits/chosen": -8.926106452941895, |
|
"logits/rejected": -8.83165168762207, |
|
"logps/chosen": -37.86637878417969, |
|
"logps/rejected": -57.00706100463867, |
|
"loss": 0.4474, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": 0.026764903217554092, |
|
"rewards/margins": 0.6376525163650513, |
|
"rewards/rejected": -0.6108875870704651, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.1299999999999995e-07, |
|
"logits/chosen": -8.976417541503906, |
|
"logits/rejected": -8.816550254821777, |
|
"logps/chosen": -40.38107681274414, |
|
"logps/rejected": -58.815818786621094, |
|
"loss": 0.4547, |
|
"rewards/accuracies": 0.9200000762939453, |
|
"rewards/chosen": -0.023364685475826263, |
|
"rewards/margins": 0.6099767088890076, |
|
"rewards/rejected": -0.6333414316177368, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.12e-07, |
|
"logits/chosen": -8.921110153198242, |
|
"logits/rejected": -8.771123886108398, |
|
"logps/chosen": -36.119667053222656, |
|
"logps/rejected": -54.26972198486328, |
|
"loss": 0.4564, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -0.024796072393655777, |
|
"rewards/margins": 0.5949214100837708, |
|
"rewards/rejected": -0.6197174787521362, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.1099999999999996e-07, |
|
"logits/chosen": -8.972981452941895, |
|
"logits/rejected": -8.839695930480957, |
|
"logps/chosen": -37.952171325683594, |
|
"logps/rejected": -53.723838806152344, |
|
"loss": 0.4678, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": 0.0343460738658905, |
|
"rewards/margins": 0.5734801888465881, |
|
"rewards/rejected": -0.5391340851783752, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.0999999999999994e-07, |
|
"logits/chosen": -8.822635650634766, |
|
"logits/rejected": -8.72290325164795, |
|
"logps/chosen": -40.56664276123047, |
|
"logps/rejected": -58.175697326660156, |
|
"loss": 0.4662, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -0.11275799572467804, |
|
"rewards/margins": 0.5608721971511841, |
|
"rewards/rejected": -0.6736301779747009, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.0899999999999997e-07, |
|
"logits/chosen": -8.966471672058105, |
|
"logits/rejected": -8.847415924072266, |
|
"logps/chosen": -39.2136344909668, |
|
"logps/rejected": -55.99798583984375, |
|
"loss": 0.4806, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.03947858139872551, |
|
"rewards/margins": 0.5527079701423645, |
|
"rewards/rejected": -0.5132293701171875, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.0799999999999995e-07, |
|
"logits/chosen": -8.708093643188477, |
|
"logits/rejected": -8.622647285461426, |
|
"logps/chosen": -40.248023986816406, |
|
"logps/rejected": -60.658241271972656, |
|
"loss": 0.4263, |
|
"rewards/accuracies": 0.9199999570846558, |
|
"rewards/chosen": 0.06861706078052521, |
|
"rewards/margins": 0.7151986360549927, |
|
"rewards/rejected": -0.6465815305709839, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.07e-07, |
|
"logits/chosen": -8.927497863769531, |
|
"logits/rejected": -8.764131546020508, |
|
"logps/chosen": -41.07722473144531, |
|
"logps/rejected": -57.40480422973633, |
|
"loss": 0.4604, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -0.10462639480829239, |
|
"rewards/margins": 0.577288806438446, |
|
"rewards/rejected": -0.6819152235984802, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.06e-07, |
|
"logits/chosen": -8.988286018371582, |
|
"logits/rejected": -8.856369018554688, |
|
"logps/chosen": -39.973121643066406, |
|
"logps/rejected": -55.97014236450195, |
|
"loss": 0.4614, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.0598948672413826, |
|
"rewards/margins": 0.6045216917991638, |
|
"rewards/rejected": -0.664416491985321, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.05e-07, |
|
"logits/chosen": -8.822972297668457, |
|
"logits/rejected": -8.72160816192627, |
|
"logps/chosen": -40.39838409423828, |
|
"logps/rejected": -58.671974182128906, |
|
"loss": 0.4388, |
|
"rewards/accuracies": 0.9399999380111694, |
|
"rewards/chosen": 0.012630686163902283, |
|
"rewards/margins": 0.6781344413757324, |
|
"rewards/rejected": -0.6655037999153137, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.04e-07, |
|
"logits/chosen": -8.826386451721191, |
|
"logits/rejected": -8.73621940612793, |
|
"logps/chosen": -38.85763168334961, |
|
"logps/rejected": -61.4788818359375, |
|
"loss": 0.4197, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": 0.0026855766773223877, |
|
"rewards/margins": 0.7139507532119751, |
|
"rewards/rejected": -0.7112652063369751, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.03e-07, |
|
"logits/chosen": -9.003509521484375, |
|
"logits/rejected": -8.840544700622559, |
|
"logps/chosen": -39.192012786865234, |
|
"logps/rejected": -59.52681350708008, |
|
"loss": 0.4507, |
|
"rewards/accuracies": 0.9199999570846558, |
|
"rewards/chosen": -0.04670552909374237, |
|
"rewards/margins": 0.62091064453125, |
|
"rewards/rejected": -0.6676161885261536, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.02e-07, |
|
"logits/chosen": -9.014259338378906, |
|
"logits/rejected": -8.890603065490723, |
|
"logps/chosen": -39.755069732666016, |
|
"logps/rejected": -62.2213134765625, |
|
"loss": 0.4409, |
|
"rewards/accuracies": 0.940000057220459, |
|
"rewards/chosen": -0.07653121650218964, |
|
"rewards/margins": 0.6743559837341309, |
|
"rewards/rejected": -0.7508872151374817, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.01e-07, |
|
"logits/chosen": -8.92739486694336, |
|
"logits/rejected": -8.727579116821289, |
|
"logps/chosen": -38.63489532470703, |
|
"logps/rejected": -56.489234924316406, |
|
"loss": 0.4253, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -0.06340552121400833, |
|
"rewards/margins": 0.6787285804748535, |
|
"rewards/rejected": -0.7421342134475708, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4e-07, |
|
"logits/chosen": -8.823177337646484, |
|
"logits/rejected": -8.733776092529297, |
|
"logps/chosen": -39.35440444946289, |
|
"logps/rejected": -59.88597869873047, |
|
"loss": 0.4156, |
|
"rewards/accuracies": 0.9200000762939453, |
|
"rewards/chosen": 0.06192043423652649, |
|
"rewards/margins": 0.7568650245666504, |
|
"rewards/rejected": -0.6949446201324463, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.99e-07, |
|
"logits/chosen": -8.744458198547363, |
|
"logits/rejected": -8.607200622558594, |
|
"logps/chosen": -39.50025177001953, |
|
"logps/rejected": -59.96971893310547, |
|
"loss": 0.4466, |
|
"rewards/accuracies": 0.8600000143051147, |
|
"rewards/chosen": -0.02974173054099083, |
|
"rewards/margins": 0.6540297269821167, |
|
"rewards/rejected": -0.6837714910507202, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.98e-07, |
|
"logits/chosen": -8.990362167358398, |
|
"logits/rejected": -8.814451217651367, |
|
"logps/chosen": -41.06502914428711, |
|
"logps/rejected": -59.0604133605957, |
|
"loss": 0.439, |
|
"rewards/accuracies": 0.940000057220459, |
|
"rewards/chosen": -0.11213630437850952, |
|
"rewards/margins": 0.681516170501709, |
|
"rewards/rejected": -0.7936524152755737, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.97e-07, |
|
"logits/chosen": -8.999361038208008, |
|
"logits/rejected": -8.890585899353027, |
|
"logps/chosen": -36.496822357177734, |
|
"logps/rejected": -61.975677490234375, |
|
"loss": 0.4139, |
|
"rewards/accuracies": 0.940000057220459, |
|
"rewards/chosen": 0.07788603752851486, |
|
"rewards/margins": 0.7719110250473022, |
|
"rewards/rejected": -0.6940250992774963, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.96e-07, |
|
"logits/chosen": -8.933026313781738, |
|
"logits/rejected": -8.826833724975586, |
|
"logps/chosen": -40.48444366455078, |
|
"logps/rejected": -55.9661865234375, |
|
"loss": 0.449, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.10673530399799347, |
|
"rewards/margins": 0.6240180730819702, |
|
"rewards/rejected": -0.730753481388092, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.95e-07, |
|
"logits/chosen": -9.035877227783203, |
|
"logits/rejected": -8.826123237609863, |
|
"logps/chosen": -38.494232177734375, |
|
"logps/rejected": -58.50568771362305, |
|
"loss": 0.4465, |
|
"rewards/accuracies": 0.940000057220459, |
|
"rewards/chosen": -0.05296442657709122, |
|
"rewards/margins": 0.677048921585083, |
|
"rewards/rejected": -0.7300133109092712, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.94e-07, |
|
"logits/chosen": -8.989307403564453, |
|
"logits/rejected": -8.91016674041748, |
|
"logps/chosen": -41.43378448486328, |
|
"logps/rejected": -57.25815963745117, |
|
"loss": 0.4938, |
|
"rewards/accuracies": 0.8799999952316284, |
|
"rewards/chosen": -0.20345261693000793, |
|
"rewards/margins": 0.5115060806274414, |
|
"rewards/rejected": -0.7149587869644165, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.93e-07, |
|
"logits/chosen": -8.774269104003906, |
|
"logits/rejected": -8.619407653808594, |
|
"logps/chosen": -39.817054748535156, |
|
"logps/rejected": -54.45206832885742, |
|
"loss": 0.4652, |
|
"rewards/accuracies": 0.9399999380111694, |
|
"rewards/chosen": -0.1449589878320694, |
|
"rewards/margins": 0.5843663811683655, |
|
"rewards/rejected": -0.7293254137039185, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.92e-07, |
|
"logits/chosen": -8.860220909118652, |
|
"logits/rejected": -8.775630950927734, |
|
"logps/chosen": -38.79396438598633, |
|
"logps/rejected": -58.30022430419922, |
|
"loss": 0.4283, |
|
"rewards/accuracies": 0.940000057220459, |
|
"rewards/chosen": 0.04749782010912895, |
|
"rewards/margins": 0.7379117012023926, |
|
"rewards/rejected": -0.6904138922691345, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.91e-07, |
|
"logits/chosen": -8.848630905151367, |
|
"logits/rejected": -8.661413192749023, |
|
"logps/chosen": -40.61771774291992, |
|
"logps/rejected": -60.2691764831543, |
|
"loss": 0.4042, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -0.08625830709934235, |
|
"rewards/margins": 0.773119330406189, |
|
"rewards/rejected": -0.8593775629997253, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.8999999999999997e-07, |
|
"logits/chosen": -8.671426773071289, |
|
"logits/rejected": -8.512123107910156, |
|
"logps/chosen": -40.18708038330078, |
|
"logps/rejected": -61.94392776489258, |
|
"loss": 0.426, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": 0.004405555315315723, |
|
"rewards/margins": 0.7397416234016418, |
|
"rewards/rejected": -0.7353360056877136, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.89e-07, |
|
"logits/chosen": -8.962957382202148, |
|
"logits/rejected": -8.918905258178711, |
|
"logps/chosen": -37.032073974609375, |
|
"logps/rejected": -58.812171936035156, |
|
"loss": 0.3985, |
|
"rewards/accuracies": 0.9200000762939453, |
|
"rewards/chosen": 0.1496596783399582, |
|
"rewards/margins": 0.8128288984298706, |
|
"rewards/rejected": -0.663169264793396, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.88e-07, |
|
"logits/chosen": -8.809492111206055, |
|
"logits/rejected": -8.77208137512207, |
|
"logps/chosen": -40.58615493774414, |
|
"logps/rejected": -61.23163604736328, |
|
"loss": 0.3824, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -0.0648830309510231, |
|
"rewards/margins": 0.8464531898498535, |
|
"rewards/rejected": -0.9113362431526184, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.87e-07, |
|
"logits/chosen": -9.049692153930664, |
|
"logits/rejected": -8.90321159362793, |
|
"logps/chosen": -37.798988342285156, |
|
"logps/rejected": -56.5068244934082, |
|
"loss": 0.4262, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -0.009292450733482838, |
|
"rewards/margins": 0.7117542624473572, |
|
"rewards/rejected": -0.7210468053817749, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.86e-07, |
|
"logits/chosen": -9.263845443725586, |
|
"logits/rejected": -9.075590133666992, |
|
"logps/chosen": -38.95356750488281, |
|
"logps/rejected": -57.786277770996094, |
|
"loss": 0.4206, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": 0.047295115888118744, |
|
"rewards/margins": 0.7150000333786011, |
|
"rewards/rejected": -0.6677049398422241, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.8499999999999997e-07, |
|
"logits/chosen": -8.880139350891113, |
|
"logits/rejected": -8.713644981384277, |
|
"logps/chosen": -37.136085510253906, |
|
"logps/rejected": -57.32819747924805, |
|
"loss": 0.4066, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": 0.023683354258537292, |
|
"rewards/margins": 0.790765643119812, |
|
"rewards/rejected": -0.7670822143554688, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.84e-07, |
|
"logits/chosen": -9.125120162963867, |
|
"logits/rejected": -8.953531265258789, |
|
"logps/chosen": -40.43872833251953, |
|
"logps/rejected": -63.04737091064453, |
|
"loss": 0.3631, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0034645921550691128, |
|
"rewards/margins": 0.9286357164382935, |
|
"rewards/rejected": -0.9251710772514343, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.83e-07, |
|
"logits/chosen": -9.222723960876465, |
|
"logits/rejected": -9.075920104980469, |
|
"logps/chosen": -37.43045425415039, |
|
"logps/rejected": -54.45465087890625, |
|
"loss": 0.4351, |
|
"rewards/accuracies": 0.9000000953674316, |
|
"rewards/chosen": 0.03504835441708565, |
|
"rewards/margins": 0.7053635120391846, |
|
"rewards/rejected": -0.670315146446228, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.82e-07, |
|
"logits/chosen": -8.868099212646484, |
|
"logits/rejected": -8.783113479614258, |
|
"logps/chosen": -38.179161071777344, |
|
"logps/rejected": -59.50090408325195, |
|
"loss": 0.4082, |
|
"rewards/accuracies": 0.8799999356269836, |
|
"rewards/chosen": -0.0526953861117363, |
|
"rewards/margins": 0.7886097431182861, |
|
"rewards/rejected": -0.8413050770759583, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.81e-07, |
|
"logits/chosen": -8.88872241973877, |
|
"logits/rejected": -8.763318061828613, |
|
"logps/chosen": -42.87889862060547, |
|
"logps/rejected": -62.076698303222656, |
|
"loss": 0.4254, |
|
"rewards/accuracies": 0.9200000762939453, |
|
"rewards/chosen": -0.18682220578193665, |
|
"rewards/margins": 0.7382919788360596, |
|
"rewards/rejected": -0.9251142740249634, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.7999999999999996e-07, |
|
"logits/chosen": -8.93816089630127, |
|
"logits/rejected": -8.88119888305664, |
|
"logps/chosen": -38.15512466430664, |
|
"logps/rejected": -58.470611572265625, |
|
"loss": 0.4107, |
|
"rewards/accuracies": 0.940000057220459, |
|
"rewards/chosen": 0.04111450910568237, |
|
"rewards/margins": 0.7645248770713806, |
|
"rewards/rejected": -0.7234103679656982, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.79e-07, |
|
"logits/chosen": -8.973804473876953, |
|
"logits/rejected": -8.872992515563965, |
|
"logps/chosen": -39.51805114746094, |
|
"logps/rejected": -55.87736129760742, |
|
"loss": 0.4504, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.1321883648633957, |
|
"rewards/margins": 0.6513045430183411, |
|
"rewards/rejected": -0.783492922782898, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.7799999999999997e-07, |
|
"logits/chosen": -8.803823471069336, |
|
"logits/rejected": -8.616742134094238, |
|
"logps/chosen": -40.673221588134766, |
|
"logps/rejected": -61.703948974609375, |
|
"loss": 0.3922, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -0.07538942992687225, |
|
"rewards/margins": 0.8220105171203613, |
|
"rewards/rejected": -0.89739990234375, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.77e-07, |
|
"logits/chosen": -9.03860855102539, |
|
"logits/rejected": -8.868653297424316, |
|
"logps/chosen": -37.68621063232422, |
|
"logps/rejected": -58.667076110839844, |
|
"loss": 0.3545, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.06506671011447906, |
|
"rewards/margins": 0.9463762044906616, |
|
"rewards/rejected": -0.8813096284866333, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.76e-07, |
|
"logits/chosen": -8.842554092407227, |
|
"logits/rejected": -8.731310844421387, |
|
"logps/chosen": -40.247703552246094, |
|
"logps/rejected": -59.82563018798828, |
|
"loss": 0.3838, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.1266755610704422, |
|
"rewards/margins": 0.8307477831840515, |
|
"rewards/rejected": -0.9574233889579773, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.75e-07, |
|
"logits/chosen": -9.012685775756836, |
|
"logits/rejected": -8.93365478515625, |
|
"logps/chosen": -39.423152923583984, |
|
"logps/rejected": -66.47831726074219, |
|
"loss": 0.3284, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.07251543551683426, |
|
"rewards/margins": 1.0880799293518066, |
|
"rewards/rejected": -1.0155645608901978, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.74e-07, |
|
"logits/chosen": -8.971677780151367, |
|
"logits/rejected": -8.89514446258545, |
|
"logps/chosen": -39.17797088623047, |
|
"logps/rejected": -61.886260986328125, |
|
"loss": 0.3578, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -0.00424486119300127, |
|
"rewards/margins": 0.9471753239631653, |
|
"rewards/rejected": -0.9514201879501343, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.7299999999999997e-07, |
|
"logits/chosen": -8.827725410461426, |
|
"logits/rejected": -8.715534210205078, |
|
"logps/chosen": -42.28189468383789, |
|
"logps/rejected": -63.66273880004883, |
|
"loss": 0.3951, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -0.21217112243175507, |
|
"rewards/margins": 0.8158702850341797, |
|
"rewards/rejected": -1.0280416011810303, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.72e-07, |
|
"logits/chosen": -8.811687469482422, |
|
"logits/rejected": -8.690886497497559, |
|
"logps/chosen": -39.99266815185547, |
|
"logps/rejected": -60.4193229675293, |
|
"loss": 0.4187, |
|
"rewards/accuracies": 0.9399999380111694, |
|
"rewards/chosen": -0.16783051192760468, |
|
"rewards/margins": 0.767844557762146, |
|
"rewards/rejected": -0.9356750249862671, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.71e-07, |
|
"logits/chosen": -8.954986572265625, |
|
"logits/rejected": -8.789628982543945, |
|
"logps/chosen": -38.853965759277344, |
|
"logps/rejected": -64.25423431396484, |
|
"loss": 0.3561, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": 0.12899711728096008, |
|
"rewards/margins": 1.0408105850219727, |
|
"rewards/rejected": -0.9118132591247559, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.7e-07, |
|
"logits/chosen": -8.90104866027832, |
|
"logits/rejected": -8.75663948059082, |
|
"logps/chosen": -38.79196548461914, |
|
"logps/rejected": -59.23478317260742, |
|
"loss": 0.3979, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.045634448528289795, |
|
"rewards/margins": 0.8061555027961731, |
|
"rewards/rejected": -0.7605210542678833, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.69e-07, |
|
"logits/chosen": -9.070549011230469, |
|
"logits/rejected": -8.883820533752441, |
|
"logps/chosen": -40.20603942871094, |
|
"logps/rejected": -57.377357482910156, |
|
"loss": 0.4072, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -0.19748033583164215, |
|
"rewards/margins": 0.7769988775253296, |
|
"rewards/rejected": -0.9744793772697449, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.6799999999999996e-07, |
|
"logits/chosen": -8.954313278198242, |
|
"logits/rejected": -8.846368789672852, |
|
"logps/chosen": -39.513771057128906, |
|
"logps/rejected": -60.5933723449707, |
|
"loss": 0.3613, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -0.09566650539636612, |
|
"rewards/margins": 0.9306944012641907, |
|
"rewards/rejected": -1.0263609886169434, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.67e-07, |
|
"logits/chosen": -8.816751480102539, |
|
"logits/rejected": -8.633028030395508, |
|
"logps/chosen": -39.54310989379883, |
|
"logps/rejected": -58.71696090698242, |
|
"loss": 0.3859, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.03110024333000183, |
|
"rewards/margins": 0.9095786809921265, |
|
"rewards/rejected": -0.9406788945198059, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.6599999999999997e-07, |
|
"logits/chosen": -8.940773963928223, |
|
"logits/rejected": -8.746129035949707, |
|
"logps/chosen": -40.17658233642578, |
|
"logps/rejected": -62.3738899230957, |
|
"loss": 0.3541, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -0.16021467745304108, |
|
"rewards/margins": 0.984930694103241, |
|
"rewards/rejected": -1.145145297050476, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.65e-07, |
|
"logits/chosen": -8.982457160949707, |
|
"logits/rejected": -8.930691719055176, |
|
"logps/chosen": -41.17652893066406, |
|
"logps/rejected": -63.67726516723633, |
|
"loss": 0.3454, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -0.1305641233921051, |
|
"rewards/margins": 0.996552586555481, |
|
"rewards/rejected": -1.1271167993545532, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.64e-07, |
|
"logits/chosen": -9.113368034362793, |
|
"logits/rejected": -9.021788597106934, |
|
"logps/chosen": -40.26515579223633, |
|
"logps/rejected": -61.51102828979492, |
|
"loss": 0.3919, |
|
"rewards/accuracies": 0.9399999380111694, |
|
"rewards/chosen": -0.07528214156627655, |
|
"rewards/margins": 0.8362929224967957, |
|
"rewards/rejected": -0.9115751385688782, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.6299999999999995e-07, |
|
"logits/chosen": -9.021696090698242, |
|
"logits/rejected": -8.881583213806152, |
|
"logps/chosen": -39.594642639160156, |
|
"logps/rejected": -62.238922119140625, |
|
"loss": 0.3868, |
|
"rewards/accuracies": 0.940000057220459, |
|
"rewards/chosen": -0.08284278213977814, |
|
"rewards/margins": 0.8705474138259888, |
|
"rewards/rejected": -0.9533903002738953, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.62e-07, |
|
"logits/chosen": -9.039748191833496, |
|
"logits/rejected": -8.940320014953613, |
|
"logps/chosen": -40.2382926940918, |
|
"logps/rejected": -62.49633026123047, |
|
"loss": 0.3742, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -0.19811776280403137, |
|
"rewards/margins": 0.9347583651542664, |
|
"rewards/rejected": -1.1328761577606201, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.6099999999999996e-07, |
|
"logits/chosen": -8.731492042541504, |
|
"logits/rejected": -8.617894172668457, |
|
"logps/chosen": -40.858238220214844, |
|
"logps/rejected": -62.641571044921875, |
|
"loss": 0.3566, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -0.1721496880054474, |
|
"rewards/margins": 0.9204368591308594, |
|
"rewards/rejected": -1.0925863981246948, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.6e-07, |
|
"logits/chosen": -9.012569427490234, |
|
"logits/rejected": -8.815531730651855, |
|
"logps/chosen": -42.314231872558594, |
|
"logps/rejected": -64.81603240966797, |
|
"loss": 0.35, |
|
"rewards/accuracies": 0.8799999952316284, |
|
"rewards/chosen": -0.15320369601249695, |
|
"rewards/margins": 1.030665397644043, |
|
"rewards/rejected": -1.1838690042495728, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.5899999999999997e-07, |
|
"logits/chosen": -8.839929580688477, |
|
"logits/rejected": -8.719411849975586, |
|
"logps/chosen": -40.46647262573242, |
|
"logps/rejected": -61.498626708984375, |
|
"loss": 0.3717, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": 0.006344547960907221, |
|
"rewards/margins": 0.8955361247062683, |
|
"rewards/rejected": -0.8891915082931519, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.5799999999999995e-07, |
|
"logits/chosen": -8.899998664855957, |
|
"logits/rejected": -8.761857986450195, |
|
"logps/chosen": -39.248046875, |
|
"logps/rejected": -64.58299255371094, |
|
"loss": 0.3387, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -0.021908091381192207, |
|
"rewards/margins": 1.0876431465148926, |
|
"rewards/rejected": -1.109551191329956, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.57e-07, |
|
"logits/chosen": -8.721105575561523, |
|
"logits/rejected": -8.539088249206543, |
|
"logps/chosen": -38.27935028076172, |
|
"logps/rejected": -60.71025848388672, |
|
"loss": 0.3689, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -0.12697605788707733, |
|
"rewards/margins": 0.9274777173995972, |
|
"rewards/rejected": -1.054453730583191, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.5599999999999996e-07, |
|
"logits/chosen": -8.694182395935059, |
|
"logits/rejected": -8.604917526245117, |
|
"logps/chosen": -41.218135833740234, |
|
"logps/rejected": -63.59687042236328, |
|
"loss": 0.3785, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.21460440754890442, |
|
"rewards/margins": 0.9406296610832214, |
|
"rewards/rejected": -1.1552340984344482, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.55e-07, |
|
"logits/chosen": -9.010725021362305, |
|
"logits/rejected": -8.792009353637695, |
|
"logps/chosen": -44.04743576049805, |
|
"logps/rejected": -63.77995681762695, |
|
"loss": 0.3911, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.310243159532547, |
|
"rewards/margins": 0.9037361145019531, |
|
"rewards/rejected": -1.2139792442321777, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.5399999999999997e-07, |
|
"logits/chosen": -8.800936698913574, |
|
"logits/rejected": -8.641168594360352, |
|
"logps/chosen": -40.73430252075195, |
|
"logps/rejected": -59.47686004638672, |
|
"loss": 0.4198, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.12597987055778503, |
|
"rewards/margins": 0.8163179159164429, |
|
"rewards/rejected": -0.9422977566719055, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.5299999999999994e-07, |
|
"logits/chosen": -8.81165885925293, |
|
"logits/rejected": -8.621085166931152, |
|
"logps/chosen": -41.21516418457031, |
|
"logps/rejected": -65.77284240722656, |
|
"loss": 0.3477, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": 0.02493276819586754, |
|
"rewards/margins": 1.0518163442611694, |
|
"rewards/rejected": -1.026883602142334, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.52e-07, |
|
"logits/chosen": -9.05104923248291, |
|
"logits/rejected": -8.940778732299805, |
|
"logps/chosen": -39.09772872924805, |
|
"logps/rejected": -63.58698272705078, |
|
"loss": 0.3292, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -0.06748518347740173, |
|
"rewards/margins": 1.0447824001312256, |
|
"rewards/rejected": -1.1122677326202393, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.5099999999999995e-07, |
|
"logits/chosen": -8.909757614135742, |
|
"logits/rejected": -8.688767433166504, |
|
"logps/chosen": -40.91362762451172, |
|
"logps/rejected": -60.848472595214844, |
|
"loss": 0.3958, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -0.20450308918952942, |
|
"rewards/margins": 0.8363680839538574, |
|
"rewards/rejected": -1.0408711433410645, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.5e-07, |
|
"logits/chosen": -9.037310600280762, |
|
"logits/rejected": -8.962252616882324, |
|
"logps/chosen": -40.28340530395508, |
|
"logps/rejected": -63.94322967529297, |
|
"loss": 0.3284, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -0.07565836608409882, |
|
"rewards/margins": 1.125253438949585, |
|
"rewards/rejected": -1.2009117603302002, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.4899999999999996e-07, |
|
"logits/chosen": -8.9218168258667, |
|
"logits/rejected": -8.84290599822998, |
|
"logps/chosen": -39.55026626586914, |
|
"logps/rejected": -61.443641662597656, |
|
"loss": 0.3476, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -0.02470378205180168, |
|
"rewards/margins": 0.9961774945259094, |
|
"rewards/rejected": -1.020881175994873, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.4799999999999994e-07, |
|
"logits/chosen": -8.936076164245605, |
|
"logits/rejected": -8.845673561096191, |
|
"logps/chosen": -38.409576416015625, |
|
"logps/rejected": -61.9953727722168, |
|
"loss": 0.3381, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.11342154443264008, |
|
"rewards/margins": 0.9978846311569214, |
|
"rewards/rejected": -1.111306071281433, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.4699999999999997e-07, |
|
"logits/chosen": -8.993753433227539, |
|
"logits/rejected": -8.88867473602295, |
|
"logps/chosen": -41.41925811767578, |
|
"logps/rejected": -64.46873474121094, |
|
"loss": 0.3345, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -0.1233917623758316, |
|
"rewards/margins": 1.042170763015747, |
|
"rewards/rejected": -1.165562629699707, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.4599999999999995e-07, |
|
"logits/chosen": -8.77087688446045, |
|
"logits/rejected": -8.657720565795898, |
|
"logps/chosen": -40.0390510559082, |
|
"logps/rejected": -60.32958984375, |
|
"loss": 0.3967, |
|
"rewards/accuracies": 0.9200000762939453, |
|
"rewards/chosen": -0.19076868891716003, |
|
"rewards/margins": 0.9012584686279297, |
|
"rewards/rejected": -1.0920270681381226, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.45e-07, |
|
"logits/chosen": -8.897802352905273, |
|
"logits/rejected": -8.744150161743164, |
|
"logps/chosen": -40.92519760131836, |
|
"logps/rejected": -65.74849700927734, |
|
"loss": 0.3411, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -0.15756723284721375, |
|
"rewards/margins": 1.0892850160598755, |
|
"rewards/rejected": -1.2468522787094116, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.4399999999999996e-07, |
|
"logits/chosen": -8.871163368225098, |
|
"logits/rejected": -8.717877388000488, |
|
"logps/chosen": -43.01222229003906, |
|
"logps/rejected": -70.48704528808594, |
|
"loss": 0.3188, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -0.3022673428058624, |
|
"rewards/margins": 1.1483441591262817, |
|
"rewards/rejected": -1.4506114721298218, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.43e-07, |
|
"logits/chosen": -8.949113845825195, |
|
"logits/rejected": -8.905904769897461, |
|
"logps/chosen": -39.99720001220703, |
|
"logps/rejected": -62.5020866394043, |
|
"loss": 0.3482, |
|
"rewards/accuracies": 0.940000057220459, |
|
"rewards/chosen": -0.06127683073282242, |
|
"rewards/margins": 1.05168616771698, |
|
"rewards/rejected": -1.1129629611968994, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.42e-07, |
|
"logits/chosen": -8.867097854614258, |
|
"logits/rejected": -8.736467361450195, |
|
"logps/chosen": -41.989192962646484, |
|
"logps/rejected": -61.25897216796875, |
|
"loss": 0.3882, |
|
"rewards/accuracies": 0.940000057220459, |
|
"rewards/chosen": -0.35165315866470337, |
|
"rewards/margins": 0.877507209777832, |
|
"rewards/rejected": -1.2291605472564697, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.41e-07, |
|
"logits/chosen": -8.919662475585938, |
|
"logits/rejected": -8.849380493164062, |
|
"logps/chosen": -42.1957893371582, |
|
"logps/rejected": -66.24508666992188, |
|
"loss": 0.3071, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -0.30500465631484985, |
|
"rewards/margins": 1.1364095211029053, |
|
"rewards/rejected": -1.4414143562316895, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.4000000000000003e-07, |
|
"logits/chosen": -8.94130802154541, |
|
"logits/rejected": -8.818609237670898, |
|
"logps/chosen": -40.54155731201172, |
|
"logps/rejected": -59.704978942871094, |
|
"loss": 0.3546, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -0.13368672132492065, |
|
"rewards/margins": 0.9669790267944336, |
|
"rewards/rejected": -1.100665807723999, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.39e-07, |
|
"logits/chosen": -8.80152416229248, |
|
"logits/rejected": -8.622817039489746, |
|
"logps/chosen": -41.8613166809082, |
|
"logps/rejected": -63.264976501464844, |
|
"loss": 0.3418, |
|
"rewards/accuracies": 0.9399999380111694, |
|
"rewards/chosen": -0.29719024896621704, |
|
"rewards/margins": 1.0231988430023193, |
|
"rewards/rejected": -1.3203891515731812, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.38e-07, |
|
"logits/chosen": -8.761874198913574, |
|
"logits/rejected": -8.586349487304688, |
|
"logps/chosen": -41.71952438354492, |
|
"logps/rejected": -68.82637023925781, |
|
"loss": 0.2946, |
|
"rewards/accuracies": 0.9399999380111694, |
|
"rewards/chosen": -0.21405473351478577, |
|
"rewards/margins": 1.3066158294677734, |
|
"rewards/rejected": -1.5206706523895264, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.37e-07, |
|
"logits/chosen": -8.828725814819336, |
|
"logits/rejected": -8.663387298583984, |
|
"logps/chosen": -41.91630935668945, |
|
"logps/rejected": -65.08641052246094, |
|
"loss": 0.341, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -0.24001702666282654, |
|
"rewards/margins": 1.04341721534729, |
|
"rewards/rejected": -1.2834341526031494, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.36e-07, |
|
"logits/chosen": -8.988265991210938, |
|
"logits/rejected": -8.813672065734863, |
|
"logps/chosen": -41.581756591796875, |
|
"logps/rejected": -64.73905944824219, |
|
"loss": 0.3243, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -0.14427520334720612, |
|
"rewards/margins": 1.099812388420105, |
|
"rewards/rejected": -1.2440874576568604, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.35e-07, |
|
"logits/chosen": -9.024198532104492, |
|
"logits/rejected": -8.907011985778809, |
|
"logps/chosen": -40.02942657470703, |
|
"logps/rejected": -65.40174865722656, |
|
"loss": 0.3156, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -0.17336931824684143, |
|
"rewards/margins": 1.1412004232406616, |
|
"rewards/rejected": -1.3145698308944702, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.34e-07, |
|
"logits/chosen": -8.859591484069824, |
|
"logits/rejected": -8.78995132446289, |
|
"logps/chosen": -37.986610412597656, |
|
"logps/rejected": -62.487632751464844, |
|
"loss": 0.3413, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -0.03164323419332504, |
|
"rewards/margins": 1.038362741470337, |
|
"rewards/rejected": -1.0700057744979858, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.33e-07, |
|
"logits/chosen": -8.95915412902832, |
|
"logits/rejected": -8.862491607666016, |
|
"logps/chosen": -39.90531539916992, |
|
"logps/rejected": -68.55361938476562, |
|
"loss": 0.2611, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -0.09153137356042862, |
|
"rewards/margins": 1.3365256786346436, |
|
"rewards/rejected": -1.4280569553375244, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.32e-07, |
|
"logits/chosen": -8.84840202331543, |
|
"logits/rejected": -8.720226287841797, |
|
"logps/chosen": -39.774906158447266, |
|
"logps/rejected": -63.923797607421875, |
|
"loss": 0.2924, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.15840193629264832, |
|
"rewards/margins": 1.228715181350708, |
|
"rewards/rejected": -1.3871170282363892, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.31e-07, |
|
"logits/chosen": -8.834859848022461, |
|
"logits/rejected": -8.646490097045898, |
|
"logps/chosen": -40.75969696044922, |
|
"logps/rejected": -68.73313903808594, |
|
"loss": 0.289, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -0.2145496904850006, |
|
"rewards/margins": 1.282721757888794, |
|
"rewards/rejected": -1.4972714185714722, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.3e-07, |
|
"logits/chosen": -9.14974594116211, |
|
"logits/rejected": -9.094747543334961, |
|
"logps/chosen": -38.71310806274414, |
|
"logps/rejected": -64.7855453491211, |
|
"loss": 0.3276, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -0.10842056572437286, |
|
"rewards/margins": 1.1217743158340454, |
|
"rewards/rejected": -1.2301948070526123, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.29e-07, |
|
"logits/chosen": -8.805329322814941, |
|
"logits/rejected": -8.60169506072998, |
|
"logps/chosen": -44.541385650634766, |
|
"logps/rejected": -67.90604400634766, |
|
"loss": 0.3276, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -0.33684179186820984, |
|
"rewards/margins": 1.0830628871917725, |
|
"rewards/rejected": -1.4199049472808838, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.28e-07, |
|
"logits/chosen": -9.00810432434082, |
|
"logits/rejected": -8.874260902404785, |
|
"logps/chosen": -42.70119857788086, |
|
"logps/rejected": -66.67572784423828, |
|
"loss": 0.3219, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -0.28449511528015137, |
|
"rewards/margins": 1.1121952533721924, |
|
"rewards/rejected": -1.3966902494430542, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.27e-07, |
|
"logits/chosen": -9.011045455932617, |
|
"logits/rejected": -8.86266803741455, |
|
"logps/chosen": -40.55815887451172, |
|
"logps/rejected": -65.76628875732422, |
|
"loss": 0.3108, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -0.15313352644443512, |
|
"rewards/margins": 1.2283756732940674, |
|
"rewards/rejected": -1.3815090656280518, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.26e-07, |
|
"logits/chosen": -8.745307922363281, |
|
"logits/rejected": -8.638575553894043, |
|
"logps/chosen": -40.17422866821289, |
|
"logps/rejected": -61.25606155395508, |
|
"loss": 0.3363, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -0.26441556215286255, |
|
"rewards/margins": 1.0637372732162476, |
|
"rewards/rejected": -1.3281527757644653, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.25e-07, |
|
"logits/chosen": -8.917795181274414, |
|
"logits/rejected": -8.75191593170166, |
|
"logps/chosen": -40.356971740722656, |
|
"logps/rejected": -65.25245666503906, |
|
"loss": 0.3272, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -0.21726162731647491, |
|
"rewards/margins": 1.1349842548370361, |
|
"rewards/rejected": -1.3522460460662842, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.24e-07, |
|
"logits/chosen": -9.057024002075195, |
|
"logits/rejected": -9.007242202758789, |
|
"logps/chosen": -41.4436149597168, |
|
"logps/rejected": -59.33845138549805, |
|
"loss": 0.3784, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -0.20186138153076172, |
|
"rewards/margins": 0.8808485865592957, |
|
"rewards/rejected": -1.082709789276123, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.23e-07, |
|
"logits/chosen": -9.150751113891602, |
|
"logits/rejected": -8.990005493164062, |
|
"logps/chosen": -41.529197692871094, |
|
"logps/rejected": -64.6326904296875, |
|
"loss": 0.3474, |
|
"rewards/accuracies": 0.940000057220459, |
|
"rewards/chosen": -0.1397334188222885, |
|
"rewards/margins": 1.0998492240905762, |
|
"rewards/rejected": -1.2395826578140259, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.22e-07, |
|
"logits/chosen": -8.985628128051758, |
|
"logits/rejected": -8.924480438232422, |
|
"logps/chosen": -39.99455261230469, |
|
"logps/rejected": -62.32940673828125, |
|
"loss": 0.3442, |
|
"rewards/accuracies": 0.940000057220459, |
|
"rewards/chosen": -0.21177975833415985, |
|
"rewards/margins": 1.0999975204467773, |
|
"rewards/rejected": -1.311777114868164, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.21e-07, |
|
"logits/chosen": -9.033208847045898, |
|
"logits/rejected": -8.933832168579102, |
|
"logps/chosen": -42.78688049316406, |
|
"logps/rejected": -65.41902160644531, |
|
"loss": 0.3323, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -0.42959874868392944, |
|
"rewards/margins": 1.0489178895950317, |
|
"rewards/rejected": -1.478516697883606, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.2e-07, |
|
"logits/chosen": -8.921372413635254, |
|
"logits/rejected": -8.761728286743164, |
|
"logps/chosen": -41.17156982421875, |
|
"logps/rejected": -68.08998107910156, |
|
"loss": 0.2719, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.20495982468128204, |
|
"rewards/margins": 1.340659737586975, |
|
"rewards/rejected": -1.5456194877624512, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.19e-07, |
|
"logits/chosen": -8.87382698059082, |
|
"logits/rejected": -8.771355628967285, |
|
"logps/chosen": -41.562965393066406, |
|
"logps/rejected": -64.83119201660156, |
|
"loss": 0.2963, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -0.17605173587799072, |
|
"rewards/margins": 1.220849633216858, |
|
"rewards/rejected": -1.3969013690948486, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.18e-07, |
|
"logits/chosen": -8.82695198059082, |
|
"logits/rejected": -8.737783432006836, |
|
"logps/chosen": -39.56995391845703, |
|
"logps/rejected": -62.3057746887207, |
|
"loss": 0.3402, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -0.20497837662696838, |
|
"rewards/margins": 1.1743295192718506, |
|
"rewards/rejected": -1.3793079853057861, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.17e-07, |
|
"logits/chosen": -9.018879890441895, |
|
"logits/rejected": -8.862287521362305, |
|
"logps/chosen": -42.85093688964844, |
|
"logps/rejected": -72.0836181640625, |
|
"loss": 0.3212, |
|
"rewards/accuracies": 0.9399999380111694, |
|
"rewards/chosen": -0.2954815924167633, |
|
"rewards/margins": 1.268796682357788, |
|
"rewards/rejected": -1.5642781257629395, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.1599999999999997e-07, |
|
"logits/chosen": -8.963022232055664, |
|
"logits/rejected": -8.82475471496582, |
|
"logps/chosen": -38.94888687133789, |
|
"logps/rejected": -68.8856201171875, |
|
"loss": 0.2643, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0330241322517395, |
|
"rewards/margins": 1.4156899452209473, |
|
"rewards/rejected": -1.4487141370773315, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.15e-07, |
|
"logits/chosen": -9.073816299438477, |
|
"logits/rejected": -8.94584846496582, |
|
"logps/chosen": -44.071678161621094, |
|
"logps/rejected": -70.720947265625, |
|
"loss": 0.2948, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -0.4320756494998932, |
|
"rewards/margins": 1.216040849685669, |
|
"rewards/rejected": -1.6481164693832397, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.14e-07, |
|
"logits/chosen": -9.04081916809082, |
|
"logits/rejected": -8.882933616638184, |
|
"logps/chosen": -39.92203140258789, |
|
"logps/rejected": -66.04690551757812, |
|
"loss": 0.3199, |
|
"rewards/accuracies": 0.9399999380111694, |
|
"rewards/chosen": -0.12839896976947784, |
|
"rewards/margins": 1.1672937870025635, |
|
"rewards/rejected": -1.2956926822662354, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.13e-07, |
|
"logits/chosen": -8.965912818908691, |
|
"logits/rejected": -8.788244247436523, |
|
"logps/chosen": -40.17821502685547, |
|
"logps/rejected": -65.7179946899414, |
|
"loss": 0.2863, |
|
"rewards/accuracies": 0.940000057220459, |
|
"rewards/chosen": -0.1306789070367813, |
|
"rewards/margins": 1.2615444660186768, |
|
"rewards/rejected": -1.3922232389450073, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.12e-07, |
|
"logits/chosen": -8.968905448913574, |
|
"logits/rejected": -8.836429595947266, |
|
"logps/chosen": -40.19866180419922, |
|
"logps/rejected": -65.63804626464844, |
|
"loss": 0.2997, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -0.29229894280433655, |
|
"rewards/margins": 1.2268226146697998, |
|
"rewards/rejected": -1.5191216468811035, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.1099999999999997e-07, |
|
"logits/chosen": -8.949649810791016, |
|
"logits/rejected": -8.85619068145752, |
|
"logps/chosen": -41.16625213623047, |
|
"logps/rejected": -67.67311096191406, |
|
"loss": 0.3123, |
|
"rewards/accuracies": 0.940000057220459, |
|
"rewards/chosen": -0.18751493096351624, |
|
"rewards/margins": 1.2116743326187134, |
|
"rewards/rejected": -1.3991893529891968, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.1e-07, |
|
"logits/chosen": -9.069915771484375, |
|
"logits/rejected": -8.9552640914917, |
|
"logps/chosen": -44.67123031616211, |
|
"logps/rejected": -69.6094970703125, |
|
"loss": 0.3179, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -0.36990422010421753, |
|
"rewards/margins": 1.1963659524917603, |
|
"rewards/rejected": -1.566270112991333, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.09e-07, |
|
"logits/chosen": -8.953503608703613, |
|
"logits/rejected": -8.81824779510498, |
|
"logps/chosen": -43.4686279296875, |
|
"logps/rejected": -65.56145477294922, |
|
"loss": 0.2937, |
|
"rewards/accuracies": 0.9399999380111694, |
|
"rewards/chosen": -0.3817153573036194, |
|
"rewards/margins": 1.2739273309707642, |
|
"rewards/rejected": -1.6556425094604492, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.08e-07, |
|
"logits/chosen": -8.856042861938477, |
|
"logits/rejected": -8.653620719909668, |
|
"logps/chosen": -40.48377990722656, |
|
"logps/rejected": -65.58942413330078, |
|
"loss": 0.3144, |
|
"rewards/accuracies": 0.9399999380111694, |
|
"rewards/chosen": -0.21695208549499512, |
|
"rewards/margins": 1.223385214805603, |
|
"rewards/rejected": -1.4403371810913086, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.07e-07, |
|
"logits/chosen": -8.951238632202148, |
|
"logits/rejected": -8.893243789672852, |
|
"logps/chosen": -38.89543914794922, |
|
"logps/rejected": -65.236572265625, |
|
"loss": 0.2907, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -0.14968658983707428, |
|
"rewards/margins": 1.2427290678024292, |
|
"rewards/rejected": -1.3924156427383423, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.0599999999999996e-07, |
|
"logits/chosen": -9.175559043884277, |
|
"logits/rejected": -9.138572692871094, |
|
"logps/chosen": -41.56648635864258, |
|
"logps/rejected": -62.5894889831543, |
|
"loss": 0.3235, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -0.3439091444015503, |
|
"rewards/margins": 1.1237269639968872, |
|
"rewards/rejected": -1.467635989189148, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.05e-07, |
|
"logits/chosen": -9.081843376159668, |
|
"logits/rejected": -8.967177391052246, |
|
"logps/chosen": -48.0760383605957, |
|
"logps/rejected": -72.55915069580078, |
|
"loss": 0.3347, |
|
"rewards/accuracies": 0.9399999380111694, |
|
"rewards/chosen": -0.599833607673645, |
|
"rewards/margins": 1.1599971055984497, |
|
"rewards/rejected": -1.7598304748535156, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.0399999999999997e-07, |
|
"logits/chosen": -8.88502025604248, |
|
"logits/rejected": -8.776427268981934, |
|
"logps/chosen": -38.89107894897461, |
|
"logps/rejected": -62.98077392578125, |
|
"loss": 0.3103, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -0.08193059265613556, |
|
"rewards/margins": 1.204545021057129, |
|
"rewards/rejected": -1.2864755392074585, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.03e-07, |
|
"logits/chosen": -8.958200454711914, |
|
"logits/rejected": -8.758167266845703, |
|
"logps/chosen": -42.52730941772461, |
|
"logps/rejected": -68.24816131591797, |
|
"loss": 0.2818, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -0.2780629098415375, |
|
"rewards/margins": 1.3868484497070312, |
|
"rewards/rejected": -1.6649112701416016, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.02e-07, |
|
"logits/chosen": -9.064251899719238, |
|
"logits/rejected": -9.055684089660645, |
|
"logps/chosen": -40.26599884033203, |
|
"logps/rejected": -68.03285217285156, |
|
"loss": 0.2815, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -0.21159246563911438, |
|
"rewards/margins": 1.3151299953460693, |
|
"rewards/rejected": -1.5267223119735718, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.0099999999999996e-07, |
|
"logits/chosen": -8.8607177734375, |
|
"logits/rejected": -8.776520729064941, |
|
"logps/chosen": -44.09022521972656, |
|
"logps/rejected": -67.35237884521484, |
|
"loss": 0.2963, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -0.3733174204826355, |
|
"rewards/margins": 1.302872896194458, |
|
"rewards/rejected": -1.6761903762817383, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3e-07, |
|
"logits/chosen": -9.326008796691895, |
|
"logits/rejected": -9.195959091186523, |
|
"logps/chosen": -42.66015625, |
|
"logps/rejected": -69.38017272949219, |
|
"loss": 0.29, |
|
"rewards/accuracies": 0.940000057220459, |
|
"rewards/chosen": -0.3082619607448578, |
|
"rewards/margins": 1.301923394203186, |
|
"rewards/rejected": -1.6101853847503662, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.9899999999999996e-07, |
|
"logits/chosen": -8.926416397094727, |
|
"logits/rejected": -8.793083190917969, |
|
"logps/chosen": -42.86346435546875, |
|
"logps/rejected": -69.28981018066406, |
|
"loss": 0.2735, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.30893146991729736, |
|
"rewards/margins": 1.3181147575378418, |
|
"rewards/rejected": -1.6270462274551392, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.98e-07, |
|
"logits/chosen": -9.045461654663086, |
|
"logits/rejected": -8.975128173828125, |
|
"logps/chosen": -41.371551513671875, |
|
"logps/rejected": -68.18250274658203, |
|
"loss": 0.3091, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -0.3913578391075134, |
|
"rewards/margins": 1.3062489032745361, |
|
"rewards/rejected": -1.6976070404052734, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.9699999999999997e-07, |
|
"logits/chosen": -8.808774948120117, |
|
"logits/rejected": -8.637701034545898, |
|
"logps/chosen": -41.52623748779297, |
|
"logps/rejected": -65.2273941040039, |
|
"loss": 0.3081, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.23487603664398193, |
|
"rewards/margins": 1.2760727405548096, |
|
"rewards/rejected": -1.510948896408081, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.9599999999999995e-07, |
|
"logits/chosen": -8.662897109985352, |
|
"logits/rejected": -8.514671325683594, |
|
"logps/chosen": -40.43410873413086, |
|
"logps/rejected": -63.4935188293457, |
|
"loss": 0.3236, |
|
"rewards/accuracies": 0.9199999570846558, |
|
"rewards/chosen": -0.180062934756279, |
|
"rewards/margins": 1.2099792957305908, |
|
"rewards/rejected": -1.3900420665740967, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.95e-07, |
|
"logits/chosen": -9.048240661621094, |
|
"logits/rejected": -8.950238227844238, |
|
"logps/chosen": -38.42674255371094, |
|
"logps/rejected": -70.53987121582031, |
|
"loss": 0.2272, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2043372094631195, |
|
"rewards/margins": 1.5515215396881104, |
|
"rewards/rejected": -1.7558587789535522, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.9399999999999996e-07, |
|
"logits/chosen": -8.96391487121582, |
|
"logits/rejected": -8.855020523071289, |
|
"logps/chosen": -41.72402572631836, |
|
"logps/rejected": -68.75076293945312, |
|
"loss": 0.2608, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2568601965904236, |
|
"rewards/margins": 1.3788115978240967, |
|
"rewards/rejected": -1.6356719732284546, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.93e-07, |
|
"logits/chosen": -8.95437240600586, |
|
"logits/rejected": -8.87646484375, |
|
"logps/chosen": -43.06665802001953, |
|
"logps/rejected": -68.59609985351562, |
|
"loss": 0.2886, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -0.3358538746833801, |
|
"rewards/margins": 1.31618332862854, |
|
"rewards/rejected": -1.652037262916565, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.9199999999999997e-07, |
|
"logits/chosen": -8.841405868530273, |
|
"logits/rejected": -8.739511489868164, |
|
"logps/chosen": -42.112754821777344, |
|
"logps/rejected": -64.5459976196289, |
|
"loss": 0.322, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -0.3095633387565613, |
|
"rewards/margins": 1.1418513059616089, |
|
"rewards/rejected": -1.4514145851135254, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.9099999999999995e-07, |
|
"logits/chosen": -9.11125373840332, |
|
"logits/rejected": -9.027490615844727, |
|
"logps/chosen": -41.227901458740234, |
|
"logps/rejected": -76.45286560058594, |
|
"loss": 0.236, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -0.18211723864078522, |
|
"rewards/margins": 1.6697490215301514, |
|
"rewards/rejected": -1.8518664836883545, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.9e-07, |
|
"logits/chosen": -8.93098258972168, |
|
"logits/rejected": -8.775957107543945, |
|
"logps/chosen": -42.168487548828125, |
|
"logps/rejected": -69.60896301269531, |
|
"loss": 0.2531, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -0.1441860944032669, |
|
"rewards/margins": 1.5427669286727905, |
|
"rewards/rejected": -1.6869529485702515, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.8899999999999995e-07, |
|
"logits/chosen": -9.149178504943848, |
|
"logits/rejected": -8.981005668640137, |
|
"logps/chosen": -40.37749481201172, |
|
"logps/rejected": -66.62581634521484, |
|
"loss": 0.276, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -0.16469226777553558, |
|
"rewards/margins": 1.4120306968688965, |
|
"rewards/rejected": -1.5767228603363037, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.88e-07, |
|
"logits/chosen": -8.969169616699219, |
|
"logits/rejected": -8.803339004516602, |
|
"logps/chosen": -42.27989959716797, |
|
"logps/rejected": -64.9478759765625, |
|
"loss": 0.2654, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -0.25626304745674133, |
|
"rewards/margins": 1.4114668369293213, |
|
"rewards/rejected": -1.6677299737930298, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.8699999999999996e-07, |
|
"logits/chosen": -8.94532585144043, |
|
"logits/rejected": -8.8807373046875, |
|
"logps/chosen": -44.27782440185547, |
|
"logps/rejected": -70.2257308959961, |
|
"loss": 0.2728, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -0.2951999306678772, |
|
"rewards/margins": 1.3818528652191162, |
|
"rewards/rejected": -1.6770527362823486, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.8599999999999994e-07, |
|
"logits/chosen": -9.086102485656738, |
|
"logits/rejected": -9.030981063842773, |
|
"logps/chosen": -41.30907440185547, |
|
"logps/rejected": -68.33830261230469, |
|
"loss": 0.2695, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -0.1512337625026703, |
|
"rewards/margins": 1.4189387559890747, |
|
"rewards/rejected": -1.570172667503357, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.8499999999999997e-07, |
|
"logits/chosen": -8.850401878356934, |
|
"logits/rejected": -8.755792617797852, |
|
"logps/chosen": -43.39167022705078, |
|
"logps/rejected": -70.92607879638672, |
|
"loss": 0.2675, |
|
"rewards/accuracies": 0.940000057220459, |
|
"rewards/chosen": -0.1908183991909027, |
|
"rewards/margins": 1.4917595386505127, |
|
"rewards/rejected": -1.6825778484344482, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.8399999999999995e-07, |
|
"logits/chosen": -9.259855270385742, |
|
"logits/rejected": -9.203694343566895, |
|
"logps/chosen": -41.756351470947266, |
|
"logps/rejected": -65.8165512084961, |
|
"loss": 0.2729, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -0.2136426419019699, |
|
"rewards/margins": 1.3828479051589966, |
|
"rewards/rejected": -1.5964906215667725, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.83e-07, |
|
"logits/chosen": -8.922069549560547, |
|
"logits/rejected": -8.816206932067871, |
|
"logps/chosen": -43.09095001220703, |
|
"logps/rejected": -70.11869049072266, |
|
"loss": 0.2481, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.40523457527160645, |
|
"rewards/margins": 1.536644697189331, |
|
"rewards/rejected": -1.9418792724609375, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.8199999999999996e-07, |
|
"logits/chosen": -9.18004322052002, |
|
"logits/rejected": -9.122257232666016, |
|
"logps/chosen": -40.0262451171875, |
|
"logps/rejected": -73.48004150390625, |
|
"loss": 0.2586, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.18847300112247467, |
|
"rewards/margins": 1.6103322505950928, |
|
"rewards/rejected": -1.7988052368164062, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.8100000000000004e-07, |
|
"logits/chosen": -8.809640884399414, |
|
"logits/rejected": -8.642433166503906, |
|
"logps/chosen": -46.24394989013672, |
|
"logps/rejected": -66.08168029785156, |
|
"loss": 0.3238, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -0.5799109935760498, |
|
"rewards/margins": 1.1493396759033203, |
|
"rewards/rejected": -1.7292506694793701, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.8e-07, |
|
"logits/chosen": -9.087625503540039, |
|
"logits/rejected": -8.999017715454102, |
|
"logps/chosen": -44.835086822509766, |
|
"logps/rejected": -69.23603057861328, |
|
"loss": 0.3145, |
|
"rewards/accuracies": 0.940000057220459, |
|
"rewards/chosen": -0.4856896996498108, |
|
"rewards/margins": 1.271304726600647, |
|
"rewards/rejected": -1.7569944858551025, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.79e-07, |
|
"logits/chosen": -8.82785415649414, |
|
"logits/rejected": -8.75294017791748, |
|
"logps/chosen": -44.45280838012695, |
|
"logps/rejected": -73.3116226196289, |
|
"loss": 0.237, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.446541965007782, |
|
"rewards/margins": 1.5875802040100098, |
|
"rewards/rejected": -2.0341219902038574, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.7800000000000003e-07, |
|
"logits/chosen": -8.659734725952148, |
|
"logits/rejected": -8.634805679321289, |
|
"logps/chosen": -42.231929779052734, |
|
"logps/rejected": -70.79386901855469, |
|
"loss": 0.2399, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.25388625264167786, |
|
"rewards/margins": 1.5821778774261475, |
|
"rewards/rejected": -1.836064100265503, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.77e-07, |
|
"logits/chosen": -9.120695114135742, |
|
"logits/rejected": -9.018156051635742, |
|
"logps/chosen": -41.204193115234375, |
|
"logps/rejected": -70.23939514160156, |
|
"loss": 0.2549, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.20924563705921173, |
|
"rewards/margins": 1.5008659362792969, |
|
"rewards/rejected": -1.7101116180419922, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.7600000000000004e-07, |
|
"logits/chosen": -8.943544387817383, |
|
"logits/rejected": -8.886691093444824, |
|
"logps/chosen": -44.56146240234375, |
|
"logps/rejected": -69.162841796875, |
|
"loss": 0.2689, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -0.40339189767837524, |
|
"rewards/margins": 1.4689480066299438, |
|
"rewards/rejected": -1.8723399639129639, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.75e-07, |
|
"logits/chosen": -8.663228988647461, |
|
"logits/rejected": -8.65444278717041, |
|
"logps/chosen": -38.287235260009766, |
|
"logps/rejected": -70.95658874511719, |
|
"loss": 0.2141, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": 0.003157383296638727, |
|
"rewards/margins": 1.7416331768035889, |
|
"rewards/rejected": -1.7384757995605469, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.74e-07, |
|
"logits/chosen": -9.011740684509277, |
|
"logits/rejected": -8.81816577911377, |
|
"logps/chosen": -41.13055419921875, |
|
"logps/rejected": -70.431884765625, |
|
"loss": 0.253, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -0.17377158999443054, |
|
"rewards/margins": 1.6204302310943604, |
|
"rewards/rejected": -1.7942014932632446, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.73e-07, |
|
"logits/chosen": -8.906305313110352, |
|
"logits/rejected": -8.782539367675781, |
|
"logps/chosen": -41.5731086730957, |
|
"logps/rejected": -68.40547180175781, |
|
"loss": 0.3045, |
|
"rewards/accuracies": 0.9199999570846558, |
|
"rewards/chosen": -0.27805295586586, |
|
"rewards/margins": 1.4335918426513672, |
|
"rewards/rejected": -1.7116448879241943, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.72e-07, |
|
"logits/chosen": -8.857791900634766, |
|
"logits/rejected": -8.7904052734375, |
|
"logps/chosen": -44.0427131652832, |
|
"logps/rejected": -67.93760681152344, |
|
"loss": 0.3059, |
|
"rewards/accuracies": 0.9399999380111694, |
|
"rewards/chosen": -0.4859923720359802, |
|
"rewards/margins": 1.3006880283355713, |
|
"rewards/rejected": -1.7866804599761963, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.7100000000000003e-07, |
|
"logits/chosen": -8.68824577331543, |
|
"logits/rejected": -8.578463554382324, |
|
"logps/chosen": -46.805423736572266, |
|
"logps/rejected": -71.96556854248047, |
|
"loss": 0.2496, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6673830151557922, |
|
"rewards/margins": 1.489000678062439, |
|
"rewards/rejected": -2.156383752822876, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.7e-07, |
|
"logits/chosen": -8.906216621398926, |
|
"logits/rejected": -8.847137451171875, |
|
"logps/chosen": -45.05828857421875, |
|
"logps/rejected": -71.52420043945312, |
|
"loss": 0.2385, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -0.435865581035614, |
|
"rewards/margins": 1.5502560138702393, |
|
"rewards/rejected": -1.9861215353012085, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.69e-07, |
|
"logits/chosen": -8.947155952453613, |
|
"logits/rejected": -8.829044342041016, |
|
"logps/chosen": -41.90214920043945, |
|
"logps/rejected": -73.6261978149414, |
|
"loss": 0.2404, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -0.3273487687110901, |
|
"rewards/margins": 1.621421456336975, |
|
"rewards/rejected": -1.9487701654434204, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.68e-07, |
|
"logits/chosen": -8.829843521118164, |
|
"logits/rejected": -8.652304649353027, |
|
"logps/chosen": -41.917579650878906, |
|
"logps/rejected": -71.42601776123047, |
|
"loss": 0.2304, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -0.40560126304626465, |
|
"rewards/margins": 1.6656455993652344, |
|
"rewards/rejected": -2.07124662399292, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.67e-07, |
|
"logits/chosen": -9.078448295593262, |
|
"logits/rejected": -9.015963554382324, |
|
"logps/chosen": -48.53789138793945, |
|
"logps/rejected": -72.73210144042969, |
|
"loss": 0.2697, |
|
"rewards/accuracies": 0.940000057220459, |
|
"rewards/chosen": -0.6256522536277771, |
|
"rewards/margins": 1.4912573099136353, |
|
"rewards/rejected": -2.1169095039367676, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.66e-07, |
|
"logits/chosen": -9.04401683807373, |
|
"logits/rejected": -8.977219581604004, |
|
"logps/chosen": -44.62416458129883, |
|
"logps/rejected": -71.57743835449219, |
|
"loss": 0.2474, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5261238813400269, |
|
"rewards/margins": 1.5465754270553589, |
|
"rewards/rejected": -2.072699546813965, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.65e-07, |
|
"logits/chosen": -8.7246732711792, |
|
"logits/rejected": -8.663068771362305, |
|
"logps/chosen": -42.98565673828125, |
|
"logps/rejected": -69.6664047241211, |
|
"loss": 0.2359, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.28909361362457275, |
|
"rewards/margins": 1.6054264307022095, |
|
"rewards/rejected": -1.8945200443267822, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.64e-07, |
|
"logits/chosen": -9.035811424255371, |
|
"logits/rejected": -8.968504905700684, |
|
"logps/chosen": -42.082191467285156, |
|
"logps/rejected": -69.3706283569336, |
|
"loss": 0.2806, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -0.3632660508155823, |
|
"rewards/margins": 1.3896889686584473, |
|
"rewards/rejected": -1.7529550790786743, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.63e-07, |
|
"logits/chosen": -8.899124145507812, |
|
"logits/rejected": -8.845288276672363, |
|
"logps/chosen": -42.73535919189453, |
|
"logps/rejected": -73.52890014648438, |
|
"loss": 0.2707, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -0.3918378949165344, |
|
"rewards/margins": 1.5124340057373047, |
|
"rewards/rejected": -1.9042720794677734, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.62e-07, |
|
"logits/chosen": -9.026718139648438, |
|
"logits/rejected": -8.953622817993164, |
|
"logps/chosen": -40.321632385253906, |
|
"logps/rejected": -68.15009307861328, |
|
"loss": 0.2917, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -0.21047565340995789, |
|
"rewards/margins": 1.3489338159561157, |
|
"rewards/rejected": -1.5594093799591064, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.61e-07, |
|
"logits/chosen": -9.139519691467285, |
|
"logits/rejected": -9.046548843383789, |
|
"logps/chosen": -39.14946365356445, |
|
"logps/rejected": -66.32569122314453, |
|
"loss": 0.2374, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -0.24171173572540283, |
|
"rewards/margins": 1.5724042654037476, |
|
"rewards/rejected": -1.8141158819198608, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.6e-07, |
|
"logits/chosen": -8.669023513793945, |
|
"logits/rejected": -8.563835144042969, |
|
"logps/chosen": -40.61223602294922, |
|
"logps/rejected": -69.82514190673828, |
|
"loss": 0.2413, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -0.3103567063808441, |
|
"rewards/margins": 1.6561682224273682, |
|
"rewards/rejected": -1.9665250778198242, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.59e-07, |
|
"logits/chosen": -8.794702529907227, |
|
"logits/rejected": -8.615911483764648, |
|
"logps/chosen": -43.99329376220703, |
|
"logps/rejected": -66.2555923461914, |
|
"loss": 0.2858, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -0.3557160496711731, |
|
"rewards/margins": 1.3395440578460693, |
|
"rewards/rejected": -1.6952602863311768, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.58e-07, |
|
"logits/chosen": -8.988102912902832, |
|
"logits/rejected": -8.940208435058594, |
|
"logps/chosen": -42.496700286865234, |
|
"logps/rejected": -69.17347717285156, |
|
"loss": 0.272, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -0.460458904504776, |
|
"rewards/margins": 1.4812017679214478, |
|
"rewards/rejected": -1.9416606426239014, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.57e-07, |
|
"logits/chosen": -9.062856674194336, |
|
"logits/rejected": -8.94471263885498, |
|
"logps/chosen": -42.636497497558594, |
|
"logps/rejected": -71.41357421875, |
|
"loss": 0.221, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -0.4040011465549469, |
|
"rewards/margins": 1.7163587808609009, |
|
"rewards/rejected": -2.1203601360321045, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.56e-07, |
|
"logits/chosen": -9.013895034790039, |
|
"logits/rejected": -9.061233520507812, |
|
"logps/chosen": -46.01321792602539, |
|
"logps/rejected": -71.0923843383789, |
|
"loss": 0.3042, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -0.4178338646888733, |
|
"rewards/margins": 1.3820786476135254, |
|
"rewards/rejected": -1.799912452697754, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.55e-07, |
|
"logits/chosen": -9.007227897644043, |
|
"logits/rejected": -8.854522705078125, |
|
"logps/chosen": -42.58584213256836, |
|
"logps/rejected": -68.90918731689453, |
|
"loss": 0.2832, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -0.3119141161441803, |
|
"rewards/margins": 1.5015567541122437, |
|
"rewards/rejected": -1.8134710788726807, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.5399999999999997e-07, |
|
"logits/chosen": -9.030736923217773, |
|
"logits/rejected": -8.89038372039795, |
|
"logps/chosen": -37.5427131652832, |
|
"logps/rejected": -67.01028442382812, |
|
"loss": 0.2396, |
|
"rewards/accuracies": 0.940000057220459, |
|
"rewards/chosen": -0.014831873588263988, |
|
"rewards/margins": 1.6802629232406616, |
|
"rewards/rejected": -1.6950948238372803, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.53e-07, |
|
"logits/chosen": -8.911919593811035, |
|
"logits/rejected": -8.769577026367188, |
|
"logps/chosen": -43.36934280395508, |
|
"logps/rejected": -71.1383056640625, |
|
"loss": 0.2516, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -0.3899439573287964, |
|
"rewards/margins": 1.618249535560608, |
|
"rewards/rejected": -2.0081934928894043, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.52e-07, |
|
"logits/chosen": -8.795068740844727, |
|
"logits/rejected": -8.727156639099121, |
|
"logps/chosen": -43.60725402832031, |
|
"logps/rejected": -73.10511016845703, |
|
"loss": 0.2449, |
|
"rewards/accuracies": 0.9800000190734863, |
|
"rewards/chosen": -0.5213699340820312, |
|
"rewards/margins": 1.5963902473449707, |
|
"rewards/rejected": -2.117760181427002, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.51e-07, |
|
"logits/chosen": -8.8865385055542, |
|
"logits/rejected": -8.710765838623047, |
|
"logps/chosen": -44.19552230834961, |
|
"logps/rejected": -74.49358367919922, |
|
"loss": 0.224, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -0.5294827222824097, |
|
"rewards/margins": 1.6507676839828491, |
|
"rewards/rejected": -2.180250406265259, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.5e-07, |
|
"logits/chosen": -8.938467979431152, |
|
"logits/rejected": -8.918638229370117, |
|
"logps/chosen": -42.69424819946289, |
|
"logps/rejected": -77.01347351074219, |
|
"loss": 0.1974, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.47195687890052795, |
|
"rewards/margins": 1.8267240524291992, |
|
"rewards/rejected": -2.2986807823181152, |
|
"step": 250 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"total_flos": 0.0, |
|
"train_batch_size": 5, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|