|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 73, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 29.326078015981345, |
|
"learning_rate": 6.25e-08, |
|
"logps/chosen": -47.87165832519531, |
|
"logps/rejected": -35.03704071044922, |
|
"loss": 0.6939, |
|
"losses/dpo": 0.7437427639961243, |
|
"losses/sft": 0.2519839406013489, |
|
"losses/total": 0.7437427639961243, |
|
"ref_logps/chosen": -47.90069580078125, |
|
"ref_logps/rejected": -35.07575225830078, |
|
"rewards/accuracies": 0.4609375, |
|
"rewards/chosen": 0.0029037208296358585, |
|
"rewards/margins": -0.0009674869943410158, |
|
"rewards/rejected": 0.0038712075911462307, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 25.98987817588094, |
|
"learning_rate": 1.25e-07, |
|
"logps/chosen": -46.03837966918945, |
|
"logps/rejected": -34.79166030883789, |
|
"loss": 0.6937, |
|
"losses/dpo": 0.711306095123291, |
|
"losses/sft": 0.21511156857013702, |
|
"losses/total": 0.711306095123291, |
|
"ref_logps/chosen": -46.05853271484375, |
|
"ref_logps/rejected": -34.81706237792969, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0020157406106591225, |
|
"rewards/margins": -0.000524366507306695, |
|
"rewards/rejected": 0.002540107350796461, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 43.145173675858224, |
|
"learning_rate": 1.875e-07, |
|
"logps/chosen": -41.797569274902344, |
|
"logps/rejected": -31.708539962768555, |
|
"loss": 0.693, |
|
"losses/dpo": 0.7042351365089417, |
|
"losses/sft": 0.18763618171215057, |
|
"losses/total": 0.7042351365089417, |
|
"ref_logps/chosen": -41.833030700683594, |
|
"ref_logps/rejected": -31.735107421875, |
|
"rewards/accuracies": 0.5234375, |
|
"rewards/chosen": 0.003545756684616208, |
|
"rewards/margins": 0.0008889732416719198, |
|
"rewards/rejected": 0.0026567834429442883, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 31.32790996670384, |
|
"learning_rate": 2.5e-07, |
|
"logps/chosen": -42.71172332763672, |
|
"logps/rejected": -32.757808685302734, |
|
"loss": 0.6927, |
|
"losses/dpo": 0.6976655125617981, |
|
"losses/sft": 0.17784112691879272, |
|
"losses/total": 0.6976655125617981, |
|
"ref_logps/chosen": -42.72623062133789, |
|
"ref_logps/rejected": -32.75667190551758, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0014508566819131374, |
|
"rewards/margins": 0.0015643269289284945, |
|
"rewards/rejected": -0.00011346983956173062, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 35.10577986645193, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logps/chosen": -45.85194396972656, |
|
"logps/rejected": -34.628639221191406, |
|
"loss": 0.689, |
|
"losses/dpo": 0.7395577430725098, |
|
"losses/sft": 0.17383158206939697, |
|
"losses/total": 0.7395577430725098, |
|
"ref_logps/chosen": -45.91680145263672, |
|
"ref_logps/rejected": -34.60468673706055, |
|
"rewards/accuracies": 0.5703125, |
|
"rewards/chosen": 0.006485694088041782, |
|
"rewards/margins": 0.008881103247404099, |
|
"rewards/rejected": -0.002395408693701029, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 28.260278751569523, |
|
"learning_rate": 3.75e-07, |
|
"logps/chosen": -42.09749221801758, |
|
"logps/rejected": -32.70561599731445, |
|
"loss": 0.6932, |
|
"losses/dpo": 0.6590798497200012, |
|
"losses/sft": 0.18368251621723175, |
|
"losses/total": 0.6590798497200012, |
|
"ref_logps/chosen": -42.06741714477539, |
|
"ref_logps/rejected": -32.67097473144531, |
|
"rewards/accuracies": 0.484375, |
|
"rewards/chosen": -0.0030076471157372, |
|
"rewards/margins": 0.0004564363043755293, |
|
"rewards/rejected": -0.0034640836529433727, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 45.257780534421805, |
|
"learning_rate": 4.375e-07, |
|
"logps/chosen": -48.16801834106445, |
|
"logps/rejected": -35.98320770263672, |
|
"loss": 0.6931, |
|
"losses/dpo": 0.674820065498352, |
|
"losses/sft": 0.17130310833454132, |
|
"losses/total": 0.674820065498352, |
|
"ref_logps/chosen": -48.16166687011719, |
|
"ref_logps/rejected": -35.96845245361328, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -0.0006352070486173034, |
|
"rewards/margins": 0.0008399828802794218, |
|
"rewards/rejected": -0.0014751903945580125, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 37.963707614132204, |
|
"learning_rate": 5e-07, |
|
"logps/chosen": -46.631561279296875, |
|
"logps/rejected": -34.54258728027344, |
|
"loss": 0.6911, |
|
"losses/dpo": 0.6616916060447693, |
|
"losses/sft": 0.15279927849769592, |
|
"losses/total": 0.6616916060447693, |
|
"ref_logps/chosen": -46.690643310546875, |
|
"ref_logps/rejected": -34.551368713378906, |
|
"rewards/accuracies": 0.5546875, |
|
"rewards/chosen": 0.005908225197345018, |
|
"rewards/margins": 0.005030112341046333, |
|
"rewards/rejected": 0.0008781132637523115, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 23.24345634411509, |
|
"learning_rate": 4.997080567080816e-07, |
|
"logps/chosen": -45.053184509277344, |
|
"logps/rejected": -35.14673614501953, |
|
"loss": 0.6888, |
|
"losses/dpo": 0.645126461982727, |
|
"losses/sft": 0.1863231658935547, |
|
"losses/total": 0.645126461982727, |
|
"ref_logps/chosen": -45.13517379760742, |
|
"ref_logps/rejected": -35.132957458496094, |
|
"rewards/accuracies": 0.5390625, |
|
"rewards/chosen": 0.008199075236916542, |
|
"rewards/margins": 0.009576688520610332, |
|
"rewards/rejected": -0.0013776118867099285, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 27.949597341892236, |
|
"learning_rate": 4.988329086794122e-07, |
|
"logps/chosen": -46.718475341796875, |
|
"logps/rejected": -36.01044464111328, |
|
"loss": 0.6845, |
|
"losses/dpo": 0.6536989212036133, |
|
"losses/sft": 0.16235677897930145, |
|
"losses/total": 0.6536989212036133, |
|
"ref_logps/chosen": -46.86553192138672, |
|
"ref_logps/rejected": -35.97478103637695, |
|
"rewards/accuracies": 0.6171875, |
|
"rewards/chosen": 0.0147053562104702, |
|
"rewards/margins": 0.018271632492542267, |
|
"rewards/rejected": -0.0035662769805639982, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 40.316536183472955, |
|
"learning_rate": 4.973765998627628e-07, |
|
"logps/chosen": -45.7076416015625, |
|
"logps/rejected": -32.744361877441406, |
|
"loss": 0.6758, |
|
"losses/dpo": 0.639275848865509, |
|
"losses/sft": 0.19072100520133972, |
|
"losses/total": 0.639275848865509, |
|
"ref_logps/chosen": -45.953941345214844, |
|
"ref_logps/rejected": -32.63063430786133, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": 0.024630192667245865, |
|
"rewards/margins": 0.036002762615680695, |
|
"rewards/rejected": -0.01137256994843483, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 31.231333750699285, |
|
"learning_rate": 4.953425315348533e-07, |
|
"logps/chosen": -48.346229553222656, |
|
"logps/rejected": -35.44029235839844, |
|
"loss": 0.6735, |
|
"losses/dpo": 0.7411879301071167, |
|
"losses/sft": 0.30462783575057983, |
|
"losses/total": 0.7411879301071167, |
|
"ref_logps/chosen": -48.579471588134766, |
|
"ref_logps/rejected": -35.26258087158203, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.023324450477957726, |
|
"rewards/margins": 0.04109576344490051, |
|
"rewards/rejected": -0.017771316692233086, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 24.02378939332813, |
|
"learning_rate": 4.92735454356513e-07, |
|
"logps/chosen": -43.760799407958984, |
|
"logps/rejected": -32.20792007446289, |
|
"loss": 0.6771, |
|
"losses/dpo": 0.7643380761146545, |
|
"losses/sft": 0.15294401347637177, |
|
"losses/total": 0.7643380761146545, |
|
"ref_logps/chosen": -43.909759521484375, |
|
"ref_logps/rejected": -32.016273498535156, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.01489595789462328, |
|
"rewards/margins": 0.034060731530189514, |
|
"rewards/rejected": -0.01916477642953396, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 33.47814491109199, |
|
"learning_rate": 4.895614572772916e-07, |
|
"logps/chosen": -45.79880905151367, |
|
"logps/rejected": -34.85653305053711, |
|
"loss": 0.6669, |
|
"losses/dpo": 0.7224411368370056, |
|
"losses/sft": 0.2095840573310852, |
|
"losses/total": 0.7224411368370056, |
|
"ref_logps/chosen": -46.07813262939453, |
|
"ref_logps/rejected": -34.58377456665039, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": 0.02793230675160885, |
|
"rewards/margins": 0.055208105593919754, |
|
"rewards/rejected": -0.027275800704956055, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 47.78782257013143, |
|
"learning_rate": 4.858279533144357e-07, |
|
"logps/chosen": -47.91066360473633, |
|
"logps/rejected": -36.8038330078125, |
|
"loss": 0.6545, |
|
"losses/dpo": 0.5712046027183533, |
|
"losses/sft": 0.20200778543949127, |
|
"losses/total": 0.5712046027183533, |
|
"ref_logps/chosen": -48.32217788696289, |
|
"ref_logps/rejected": -36.395023345947266, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": 0.04115153104066849, |
|
"rewards/margins": 0.08203274011611938, |
|
"rewards/rejected": -0.040881212800741196, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 246.97737804069968, |
|
"learning_rate": 4.815436622394441e-07, |
|
"logps/chosen": -46.90559387207031, |
|
"logps/rejected": -36.626888275146484, |
|
"loss": 0.6465, |
|
"losses/dpo": 0.7274478077888489, |
|
"losses/sft": 0.26765260100364685, |
|
"losses/total": 0.7274478077888489, |
|
"ref_logps/chosen": -47.21229934692383, |
|
"ref_logps/rejected": -35.93655776977539, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 0.03067046031355858, |
|
"rewards/margins": 0.09970355033874512, |
|
"rewards/rejected": -0.06903309375047684, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 23.079239827774252, |
|
"learning_rate": 4.767185902126363e-07, |
|
"logps/chosen": -48.87858200073242, |
|
"logps/rejected": -36.90644073486328, |
|
"loss": 0.633, |
|
"losses/dpo": 0.6357161998748779, |
|
"losses/sft": 0.1839471459388733, |
|
"losses/total": 0.6357161998748779, |
|
"ref_logps/chosen": -49.40204620361328, |
|
"ref_logps/rejected": -36.11450958251953, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": 0.05234625190496445, |
|
"rewards/margins": 0.13153919577598572, |
|
"rewards/rejected": -0.07919295132160187, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 25.63300252359878, |
|
"learning_rate": 4.7136400641330245e-07, |
|
"logps/chosen": -46.71650695800781, |
|
"logps/rejected": -37.09510040283203, |
|
"loss": 0.6297, |
|
"losses/dpo": 0.6393631100654602, |
|
"losses/sft": 0.21227942407131195, |
|
"losses/total": 0.6393631100654602, |
|
"ref_logps/chosen": -46.991477966308594, |
|
"ref_logps/rejected": -35.969173431396484, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": 0.02749716117978096, |
|
"rewards/margins": 0.14008952677249908, |
|
"rewards/rejected": -0.11259236931800842, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 26.311859157755837, |
|
"learning_rate": 4.6549241672001225e-07, |
|
"logps/chosen": -43.63357162475586, |
|
"logps/rejected": -34.979026794433594, |
|
"loss": 0.6077, |
|
"losses/dpo": 0.5548383593559265, |
|
"losses/sft": 0.19493867456912994, |
|
"losses/total": 0.5548383593559265, |
|
"ref_logps/chosen": -44.03193664550781, |
|
"ref_logps/rejected": -33.485252380371094, |
|
"rewards/accuracies": 0.8515625, |
|
"rewards/chosen": 0.03983645513653755, |
|
"rewards/margins": 0.18921390175819397, |
|
"rewards/rejected": -0.14937745034694672, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 28.714173620781665, |
|
"learning_rate": 4.591175345025566e-07, |
|
"logps/chosen": -46.371559143066406, |
|
"logps/rejected": -35.243812561035156, |
|
"loss": 0.609, |
|
"losses/dpo": 0.6410955190658569, |
|
"losses/sft": 0.16183941066265106, |
|
"losses/total": 0.6410955190658569, |
|
"ref_logps/chosen": -46.70909881591797, |
|
"ref_logps/rejected": -33.71453857421875, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": 0.03375420719385147, |
|
"rewards/margins": 0.18668171763420105, |
|
"rewards/rejected": -0.15292751789093018, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 26.549036618365495, |
|
"learning_rate": 4.5225424859373684e-07, |
|
"logps/chosen": -41.521549224853516, |
|
"logps/rejected": -34.770103454589844, |
|
"loss": 0.5963, |
|
"losses/dpo": 0.7364767789840698, |
|
"losses/sft": 0.17622552812099457, |
|
"losses/total": 0.7364767789840698, |
|
"ref_logps/chosen": -41.7501106262207, |
|
"ref_logps/rejected": -32.80527114868164, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": 0.02285606414079666, |
|
"rewards/margins": 0.21933907270431519, |
|
"rewards/rejected": -0.19648301601409912, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 33.26960463303905, |
|
"learning_rate": 4.4491858851580553e-07, |
|
"logps/chosen": -45.94141387939453, |
|
"logps/rejected": -36.16654968261719, |
|
"loss": 0.5887, |
|
"losses/dpo": 0.495862752199173, |
|
"losses/sft": 0.17526012659072876, |
|
"losses/total": 0.495862752199173, |
|
"ref_logps/chosen": -46.16797637939453, |
|
"ref_logps/rejected": -33.92024612426758, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": 0.02265631966292858, |
|
"rewards/margins": 0.2472866028547287, |
|
"rewards/rejected": -0.22463028132915497, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 38.94504011639214, |
|
"learning_rate": 4.3712768704277524e-07, |
|
"logps/chosen": -43.17596435546875, |
|
"logps/rejected": -35.83791732788086, |
|
"loss": 0.5549, |
|
"losses/dpo": 0.6368575692176819, |
|
"losses/sft": 0.20419813692569733, |
|
"losses/total": 0.6368575692176819, |
|
"ref_logps/chosen": -43.439910888671875, |
|
"ref_logps/rejected": -32.738441467285156, |
|
"rewards/accuracies": 0.8828125, |
|
"rewards/chosen": 0.026394736021757126, |
|
"rewards/margins": 0.3363422751426697, |
|
"rewards/rejected": -0.30994755029678345, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 28.33928817647071, |
|
"learning_rate": 4.2889974018603024e-07, |
|
"logps/chosen": -48.73534393310547, |
|
"logps/rejected": -40.98769760131836, |
|
"loss": 0.5358, |
|
"losses/dpo": 0.6388107538223267, |
|
"losses/sft": 0.21662825345993042, |
|
"losses/total": 0.6388107538223267, |
|
"ref_logps/chosen": -48.840187072753906, |
|
"ref_logps/rejected": -37.24340057373047, |
|
"rewards/accuracies": 0.890625, |
|
"rewards/chosen": 0.010484418831765652, |
|
"rewards/margins": 0.38491398096084595, |
|
"rewards/rejected": -0.3744295537471771, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 31.571769897086057, |
|
"learning_rate": 4.2025396469669926e-07, |
|
"logps/chosen": -49.65196228027344, |
|
"logps/rejected": -39.15043258666992, |
|
"loss": 0.5317, |
|
"losses/dpo": 0.4821869134902954, |
|
"losses/sft": 0.2129327803850174, |
|
"losses/total": 0.4821869134902954, |
|
"ref_logps/chosen": -49.09580993652344, |
|
"ref_logps/rejected": -34.47374725341797, |
|
"rewards/accuracies": 0.8828125, |
|
"rewards/chosen": -0.05561504885554314, |
|
"rewards/margins": 0.41205331683158875, |
|
"rewards/rejected": -0.467668354511261, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 20.54896163205101, |
|
"learning_rate": 4.112105531840426e-07, |
|
"logps/chosen": -50.22370529174805, |
|
"logps/rejected": -38.49211120605469, |
|
"loss": 0.5133, |
|
"losses/dpo": 0.6953214406967163, |
|
"losses/sft": 0.1770307421684265, |
|
"losses/total": 0.6953214406967163, |
|
"ref_logps/chosen": -49.23892593383789, |
|
"ref_logps/rejected": -32.732269287109375, |
|
"rewards/accuracies": 0.8671875, |
|
"rewards/chosen": -0.09847792983055115, |
|
"rewards/margins": 0.4775061011314392, |
|
"rewards/rejected": -0.575984001159668, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 24.210290197713302, |
|
"learning_rate": 4.017906269546778e-07, |
|
"logps/chosen": -48.78424072265625, |
|
"logps/rejected": -39.4119758605957, |
|
"loss": 0.5025, |
|
"losses/dpo": 0.2536649703979492, |
|
"losses/sft": 0.17507979273796082, |
|
"losses/total": 0.2536649703979492, |
|
"ref_logps/chosen": -47.147621154785156, |
|
"ref_logps/rejected": -32.35851287841797, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.16366226971149445, |
|
"rewards/margins": 0.5416839718818665, |
|
"rewards/rejected": -0.7053462266921997, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 25.054325101536794, |
|
"learning_rate": 3.920161866827889e-07, |
|
"logps/chosen": -46.48284912109375, |
|
"logps/rejected": -40.55732727050781, |
|
"loss": 0.5225, |
|
"losses/dpo": 0.6159500479698181, |
|
"losses/sft": 0.18471354246139526, |
|
"losses/total": 0.6159500479698181, |
|
"ref_logps/chosen": -44.64717102050781, |
|
"ref_logps/rejected": -34.08299255371094, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.18356791138648987, |
|
"rewards/margins": 0.46386560797691345, |
|
"rewards/rejected": -0.6474335193634033, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 25.059885652690767, |
|
"learning_rate": 3.8191006102653317e-07, |
|
"logps/chosen": -50.65240478515625, |
|
"logps/rejected": -44.85976028442383, |
|
"loss": 0.4509, |
|
"losses/dpo": 0.5429763793945312, |
|
"losses/sft": 0.19810011982917786, |
|
"losses/total": 0.5429763793945312, |
|
"ref_logps/chosen": -47.85638427734375, |
|
"ref_logps/rejected": -35.169281005859375, |
|
"rewards/accuracies": 0.9140625, |
|
"rewards/chosen": -0.27960240840911865, |
|
"rewards/margins": 0.6894451975822449, |
|
"rewards/rejected": -0.9690475463867188, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 19.99856582783424, |
|
"learning_rate": 3.7149585331065145e-07, |
|
"logps/chosen": -49.85383605957031, |
|
"logps/rejected": -45.81809997558594, |
|
"loss": 0.4332, |
|
"losses/dpo": 0.29431843757629395, |
|
"losses/sft": 0.18581561744213104, |
|
"losses/total": 0.29431843757629395, |
|
"ref_logps/chosen": -46.770938873291016, |
|
"ref_logps/rejected": -34.5809326171875, |
|
"rewards/accuracies": 0.8671875, |
|
"rewards/chosen": -0.3082895576953888, |
|
"rewards/margins": 0.8154268264770508, |
|
"rewards/rejected": -1.1237163543701172, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 34.79633257386577, |
|
"learning_rate": 3.6079788639981036e-07, |
|
"logps/chosen": -52.836326599121094, |
|
"logps/rejected": -46.93244934082031, |
|
"loss": 0.4604, |
|
"losses/dpo": 0.8810983300209045, |
|
"losses/sft": 0.23828193545341492, |
|
"losses/total": 0.8810983300209045, |
|
"ref_logps/chosen": -49.11648178100586, |
|
"ref_logps/rejected": -36.381752014160156, |
|
"rewards/accuracies": 0.8984375, |
|
"rewards/chosen": -0.3719848394393921, |
|
"rewards/margins": 0.6830847263336182, |
|
"rewards/rejected": -1.0550695657730103, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 23.026509844905394, |
|
"learning_rate": 3.498411458914238e-07, |
|
"logps/chosen": -50.38003921508789, |
|
"logps/rejected": -45.10429763793945, |
|
"loss": 0.4393, |
|
"losses/dpo": 0.15313033759593964, |
|
"losses/sft": 0.19763650000095367, |
|
"losses/total": 0.15313033759593964, |
|
"ref_logps/chosen": -46.028076171875, |
|
"ref_logps/rejected": -33.00657272338867, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.4351964592933655, |
|
"rewards/margins": 0.7745760679244995, |
|
"rewards/rejected": -1.2097725868225098, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 18.317574609447647, |
|
"learning_rate": 3.3865122176063385e-07, |
|
"logps/chosen": -51.4942512512207, |
|
"logps/rejected": -49.96583557128906, |
|
"loss": 0.4075, |
|
"losses/dpo": 0.1953999102115631, |
|
"losses/sft": 0.29790106415748596, |
|
"losses/total": 0.1953999102115631, |
|
"ref_logps/chosen": -45.6589469909668, |
|
"ref_logps/rejected": -34.858577728271484, |
|
"rewards/accuracies": 0.8515625, |
|
"rewards/chosen": -0.5835303068161011, |
|
"rewards/margins": 0.9271953105926514, |
|
"rewards/rejected": -1.510725736618042, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 19.255871137244554, |
|
"learning_rate": 3.272542485937368e-07, |
|
"logps/chosen": -50.351234436035156, |
|
"logps/rejected": -48.89935302734375, |
|
"loss": 0.3959, |
|
"losses/dpo": 0.4281933605670929, |
|
"losses/sft": 0.19774244725704193, |
|
"losses/total": 0.4281933605670929, |
|
"ref_logps/chosen": -43.48761749267578, |
|
"ref_logps/rejected": -32.255577087402344, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -0.68636155128479, |
|
"rewards/margins": 0.9780160188674927, |
|
"rewards/rejected": -1.6643775701522827, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 17.53385145494046, |
|
"learning_rate": 3.1567684454964674e-07, |
|
"logps/chosen": -49.46981430053711, |
|
"logps/rejected": -49.80710220336914, |
|
"loss": 0.4011, |
|
"losses/dpo": 0.5663512945175171, |
|
"losses/sft": 0.24904295802116394, |
|
"losses/total": 0.5663512945175171, |
|
"ref_logps/chosen": -42.88325500488281, |
|
"ref_logps/rejected": -33.13590621948242, |
|
"rewards/accuracies": 0.890625, |
|
"rewards/chosen": -0.6586559414863586, |
|
"rewards/margins": 1.0084636211395264, |
|
"rewards/rejected": -1.6671196222305298, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 157.5390863725062, |
|
"learning_rate": 3.0394604919195157e-07, |
|
"logps/chosen": -50.14772415161133, |
|
"logps/rejected": -49.97753143310547, |
|
"loss": 0.4132, |
|
"losses/dpo": 0.6134005784988403, |
|
"losses/sft": 0.1941785216331482, |
|
"losses/total": 0.6134005784988403, |
|
"ref_logps/chosen": -42.886375427246094, |
|
"ref_logps/rejected": -32.889442443847656, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -0.7261347770690918, |
|
"rewards/margins": 0.9826743006706238, |
|
"rewards/rejected": -1.7088091373443604, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 30.744138000924785, |
|
"learning_rate": 2.920892603367596e-07, |
|
"logps/chosen": -52.53690719604492, |
|
"logps/rejected": -51.7293701171875, |
|
"loss": 0.4345, |
|
"losses/dpo": 0.39982184767723083, |
|
"losses/sft": 0.16318069398403168, |
|
"losses/total": 0.39982184767723083, |
|
"ref_logps/chosen": -44.043270111083984, |
|
"ref_logps/rejected": -33.67184066772461, |
|
"rewards/accuracies": 0.8203125, |
|
"rewards/chosen": -0.8493636250495911, |
|
"rewards/margins": 0.956389307975769, |
|
"rewards/rejected": -1.8057528734207153, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 18.608606064784283, |
|
"learning_rate": 2.801341700638307e-07, |
|
"logps/chosen": -54.247406005859375, |
|
"logps/rejected": -51.46720886230469, |
|
"loss": 0.4308, |
|
"losses/dpo": 0.7559365630149841, |
|
"losses/sft": 0.20898960530757904, |
|
"losses/total": 0.7559365630149841, |
|
"ref_logps/chosen": -47.05962371826172, |
|
"ref_logps/rejected": -34.95857238769531, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.7187784910202026, |
|
"rewards/margins": 0.9320851564407349, |
|
"rewards/rejected": -1.6508636474609375, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 47.98397942977545, |
|
"learning_rate": 2.681087000404406e-07, |
|
"logps/chosen": -53.239768981933594, |
|
"logps/rejected": -52.34550476074219, |
|
"loss": 0.3907, |
|
"losses/dpo": 0.31572413444519043, |
|
"losses/sft": 0.18499067425727844, |
|
"losses/total": 0.31572413444519043, |
|
"ref_logps/chosen": -45.19135284423828, |
|
"ref_logps/rejected": -33.13307189941406, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -0.8048416972160339, |
|
"rewards/margins": 1.1164013147354126, |
|
"rewards/rejected": -1.9212429523468018, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 21.523748609052035, |
|
"learning_rate": 2.5604093630903305e-07, |
|
"logps/chosen": -53.806236267089844, |
|
"logps/rejected": -54.13373565673828, |
|
"loss": 0.3678, |
|
"losses/dpo": 0.6854045391082764, |
|
"losses/sft": 0.21097487211227417, |
|
"losses/total": 0.6854045391082764, |
|
"ref_logps/chosen": -44.96014404296875, |
|
"ref_logps/rejected": -34.04387664794922, |
|
"rewards/accuracies": 0.890625, |
|
"rewards/chosen": -0.8846092224121094, |
|
"rewards/margins": 1.1243770122528076, |
|
"rewards/rejected": -2.008985996246338, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 20.63046978113073, |
|
"learning_rate": 2.43959063690967e-07, |
|
"logps/chosen": -56.91130065917969, |
|
"logps/rejected": -54.714378356933594, |
|
"loss": 0.3872, |
|
"losses/dpo": 0.1204671785235405, |
|
"losses/sft": 0.17937365174293518, |
|
"losses/total": 0.1204671785235405, |
|
"ref_logps/chosen": -47.74310302734375, |
|
"ref_logps/rejected": -34.866615295410156, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -0.9168204069137573, |
|
"rewards/margins": 1.0679559707641602, |
|
"rewards/rejected": -1.984776258468628, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 27.841791874606287, |
|
"learning_rate": 2.3189129995955942e-07, |
|
"logps/chosen": -56.37548065185547, |
|
"logps/rejected": -55.140594482421875, |
|
"loss": 0.3703, |
|
"losses/dpo": 0.6694349646568298, |
|
"losses/sft": 0.15415219962596893, |
|
"losses/total": 0.6694349646568298, |
|
"ref_logps/chosen": -46.114707946777344, |
|
"ref_logps/rejected": -33.19464111328125, |
|
"rewards/accuracies": 0.8671875, |
|
"rewards/chosen": -1.0260775089263916, |
|
"rewards/margins": 1.16851806640625, |
|
"rewards/rejected": -2.1945955753326416, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 20.157417684445996, |
|
"learning_rate": 2.1986582993616925e-07, |
|
"logps/chosen": -55.861724853515625, |
|
"logps/rejected": -55.27591323852539, |
|
"loss": 0.4096, |
|
"losses/dpo": 0.253600537776947, |
|
"losses/sft": 0.25442296266555786, |
|
"losses/total": 0.253600537776947, |
|
"ref_logps/chosen": -46.024993896484375, |
|
"ref_logps/rejected": -34.88616180419922, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -0.9836731553077698, |
|
"rewards/margins": 1.0553019046783447, |
|
"rewards/rejected": -2.038975238800049, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 22.91868411351925, |
|
"learning_rate": 2.0791073966324034e-07, |
|
"logps/chosen": -56.3699836730957, |
|
"logps/rejected": -58.20032501220703, |
|
"loss": 0.3645, |
|
"losses/dpo": 0.05803808197379112, |
|
"losses/sft": 0.16261443495750427, |
|
"losses/total": 0.05803808197379112, |
|
"ref_logps/chosen": -46.18814468383789, |
|
"ref_logps/rejected": -35.7181396484375, |
|
"rewards/accuracies": 0.8828125, |
|
"rewards/chosen": -1.018183708190918, |
|
"rewards/margins": 1.230034351348877, |
|
"rewards/rejected": -2.248218059539795, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 24.665726952614282, |
|
"learning_rate": 1.960539508080485e-07, |
|
"logps/chosen": -55.33811569213867, |
|
"logps/rejected": -56.2475700378418, |
|
"loss": 0.4363, |
|
"losses/dpo": 0.6756047606468201, |
|
"losses/sft": 0.1989610195159912, |
|
"losses/total": 0.6756047606468201, |
|
"ref_logps/chosen": -42.876373291015625, |
|
"ref_logps/rejected": -33.306602478027344, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.2461739778518677, |
|
"rewards/margins": 1.0479230880737305, |
|
"rewards/rejected": -2.2940969467163086, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 35.04495782063734, |
|
"learning_rate": 1.8432315545035327e-07, |
|
"logps/chosen": -59.337791442871094, |
|
"logps/rejected": -60.82359313964844, |
|
"loss": 0.3701, |
|
"losses/dpo": 0.24237556755542755, |
|
"losses/sft": 0.14872561395168304, |
|
"losses/total": 0.24237556755542755, |
|
"ref_logps/chosen": -46.916419982910156, |
|
"ref_logps/rejected": -36.144935607910156, |
|
"rewards/accuracies": 0.8671875, |
|
"rewards/chosen": -1.2421373128890991, |
|
"rewards/margins": 1.2257287502288818, |
|
"rewards/rejected": -2.4678661823272705, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 18.874251761700755, |
|
"learning_rate": 1.7274575140626315e-07, |
|
"logps/chosen": -60.359886169433594, |
|
"logps/rejected": -56.043479919433594, |
|
"loss": 0.3903, |
|
"losses/dpo": 0.6876823902130127, |
|
"losses/sft": 0.163571298122406, |
|
"losses/total": 0.6876823902130127, |
|
"ref_logps/chosen": -49.23930358886719, |
|
"ref_logps/rejected": -34.02153778076172, |
|
"rewards/accuracies": 0.8984375, |
|
"rewards/chosen": -1.1120576858520508, |
|
"rewards/margins": 1.0901365280151367, |
|
"rewards/rejected": -2.2021942138671875, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 29.114539057876968, |
|
"learning_rate": 1.6134877823936607e-07, |
|
"logps/chosen": -60.98393249511719, |
|
"logps/rejected": -58.489444732666016, |
|
"loss": 0.4011, |
|
"losses/dpo": 0.03265048563480377, |
|
"losses/sft": 0.14689283072948456, |
|
"losses/total": 0.03265048563480377, |
|
"ref_logps/chosen": -49.34606170654297, |
|
"ref_logps/rejected": -36.67803955078125, |
|
"rewards/accuracies": 0.8671875, |
|
"rewards/chosen": -1.1637871265411377, |
|
"rewards/margins": 1.0173530578613281, |
|
"rewards/rejected": -2.181140184402466, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 21.107662898541907, |
|
"learning_rate": 1.5015885410857614e-07, |
|
"logps/chosen": -60.81307601928711, |
|
"logps/rejected": -59.90397262573242, |
|
"loss": 0.3897, |
|
"losses/dpo": 0.33075177669525146, |
|
"losses/sft": 0.214824840426445, |
|
"losses/total": 0.33075177669525146, |
|
"ref_logps/chosen": -46.25496292114258, |
|
"ref_logps/rejected": -33.91436004638672, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -1.4558112621307373, |
|
"rewards/margins": 1.143149971961975, |
|
"rewards/rejected": -2.598961114883423, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 26.95108201172052, |
|
"learning_rate": 1.392021136001897e-07, |
|
"logps/chosen": -56.23418426513672, |
|
"logps/rejected": -56.328125, |
|
"loss": 0.3964, |
|
"losses/dpo": 0.03794693946838379, |
|
"losses/sft": 0.19881302118301392, |
|
"losses/total": 0.03794693946838379, |
|
"ref_logps/chosen": -42.96794891357422, |
|
"ref_logps/rejected": -32.164451599121094, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.3266233205795288, |
|
"rewards/margins": 1.089743971824646, |
|
"rewards/rejected": -2.416367530822754, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 33.76828619344551, |
|
"learning_rate": 1.2850414668934847e-07, |
|
"logps/chosen": -61.50416946411133, |
|
"logps/rejected": -59.79325485229492, |
|
"loss": 0.3827, |
|
"losses/dpo": 0.5413109660148621, |
|
"losses/sft": 0.30467280745506287, |
|
"losses/total": 0.5413109660148621, |
|
"ref_logps/chosen": -48.96829605102539, |
|
"ref_logps/rejected": -35.99717330932617, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.2535876035690308, |
|
"rewards/margins": 1.1260210275650024, |
|
"rewards/rejected": -2.379608631134033, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 16.559964106722745, |
|
"learning_rate": 1.1808993897346678e-07, |
|
"logps/chosen": -58.611270904541016, |
|
"logps/rejected": -58.919395446777344, |
|
"loss": 0.3796, |
|
"losses/dpo": 0.3290981352329254, |
|
"losses/sft": 0.19547075033187866, |
|
"losses/total": 0.3290981352329254, |
|
"ref_logps/chosen": -46.96087646484375, |
|
"ref_logps/rejected": -36.086090087890625, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -1.1650400161743164, |
|
"rewards/margins": 1.1182900667190552, |
|
"rewards/rejected": -2.283329963684082, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 25.26391431571928, |
|
"learning_rate": 1.0798381331721107e-07, |
|
"logps/chosen": -58.2769775390625, |
|
"logps/rejected": -57.12656021118164, |
|
"loss": 0.3707, |
|
"losses/dpo": 0.3912191092967987, |
|
"losses/sft": 0.20826196670532227, |
|
"losses/total": 0.3912191092967987, |
|
"ref_logps/chosen": -46.01140213012695, |
|
"ref_logps/rejected": -32.54326629638672, |
|
"rewards/accuracies": 0.859375, |
|
"rewards/chosen": -1.226557731628418, |
|
"rewards/margins": 1.2317723035812378, |
|
"rewards/rejected": -2.4583301544189453, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 18.669814077600197, |
|
"learning_rate": 9.82093730453222e-08, |
|
"logps/chosen": -57.36506271362305, |
|
"logps/rejected": -57.83528137207031, |
|
"loss": 0.4249, |
|
"losses/dpo": 0.28024712204933167, |
|
"losses/sft": 0.21661897003650665, |
|
"losses/total": 0.28024712204933167, |
|
"ref_logps/chosen": -44.405941009521484, |
|
"ref_logps/rejected": -34.53661346435547, |
|
"rewards/accuracies": 0.8671875, |
|
"rewards/chosen": -1.295912265777588, |
|
"rewards/margins": 1.0339548587799072, |
|
"rewards/rejected": -2.329867124557495, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 17.65819121351904, |
|
"learning_rate": 8.87894468159574e-08, |
|
"logps/chosen": -60.354469299316406, |
|
"logps/rejected": -60.50645065307617, |
|
"loss": 0.3985, |
|
"losses/dpo": 0.9817911386489868, |
|
"losses/sft": 0.1904633343219757, |
|
"losses/total": 0.9817911386489868, |
|
"ref_logps/chosen": -46.499290466308594, |
|
"ref_logps/rejected": -34.763404846191406, |
|
"rewards/accuracies": 0.8359375, |
|
"rewards/chosen": -1.3855178356170654, |
|
"rewards/margins": 1.1887872219085693, |
|
"rewards/rejected": -2.5743050575256348, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 23.90292670438398, |
|
"learning_rate": 7.974603530330067e-08, |
|
"logps/chosen": -55.58333206176758, |
|
"logps/rejected": -55.52084732055664, |
|
"loss": 0.3777, |
|
"losses/dpo": 0.04075286537408829, |
|
"losses/sft": 0.22049269080162048, |
|
"losses/total": 0.04075286537408829, |
|
"ref_logps/chosen": -43.25560760498047, |
|
"ref_logps/rejected": -31.006759643554688, |
|
"rewards/accuracies": 0.8828125, |
|
"rewards/chosen": -1.2327725887298584, |
|
"rewards/margins": 1.2186365127563477, |
|
"rewards/rejected": -2.451408863067627, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 28.08593658686289, |
|
"learning_rate": 7.110025981396975e-08, |
|
"logps/chosen": -58.75514221191406, |
|
"logps/rejected": -58.784584045410156, |
|
"loss": 0.4449, |
|
"losses/dpo": 0.4793856143951416, |
|
"losses/sft": 0.20940393209457397, |
|
"losses/total": 0.4793856143951416, |
|
"ref_logps/chosen": -45.29600524902344, |
|
"ref_logps/rejected": -34.97162628173828, |
|
"rewards/accuracies": 0.8046875, |
|
"rewards/chosen": -1.3459134101867676, |
|
"rewards/margins": 1.0353822708129883, |
|
"rewards/rejected": -2.381295680999756, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 24.077339089176505, |
|
"learning_rate": 6.28723129572247e-08, |
|
"logps/chosen": -55.75697326660156, |
|
"logps/rejected": -56.72669219970703, |
|
"loss": 0.3567, |
|
"losses/dpo": 0.21238191425800323, |
|
"losses/sft": 0.1661817878484726, |
|
"losses/total": 0.21238191425800323, |
|
"ref_logps/chosen": -44.3855094909668, |
|
"ref_logps/rejected": -32.21479797363281, |
|
"rewards/accuracies": 0.890625, |
|
"rewards/chosen": -1.137147068977356, |
|
"rewards/margins": 1.314042568206787, |
|
"rewards/rejected": -2.4511895179748535, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 43.46612828134844, |
|
"learning_rate": 5.508141148419443e-08, |
|
"logps/chosen": -61.76049041748047, |
|
"logps/rejected": -62.041648864746094, |
|
"loss": 0.3688, |
|
"losses/dpo": 0.27996987104415894, |
|
"losses/sft": 0.1737639456987381, |
|
"losses/total": 0.27996987104415894, |
|
"ref_logps/chosen": -49.25553894042969, |
|
"ref_logps/rejected": -36.210182189941406, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -1.250495195388794, |
|
"rewards/margins": 1.3326513767242432, |
|
"rewards/rejected": -2.583146572113037, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 22.779198271573037, |
|
"learning_rate": 4.774575140626316e-08, |
|
"logps/chosen": -55.46681594848633, |
|
"logps/rejected": -57.17453384399414, |
|
"loss": 0.3531, |
|
"losses/dpo": 0.046613942831754684, |
|
"losses/sft": 0.20427729189395905, |
|
"losses/total": 0.046613942831754684, |
|
"ref_logps/chosen": -42.29081726074219, |
|
"ref_logps/rejected": -30.75497817993164, |
|
"rewards/accuracies": 0.8984375, |
|
"rewards/chosen": -1.3175995349884033, |
|
"rewards/margins": 1.3243558406829834, |
|
"rewards/rejected": -2.6419553756713867, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 20.59368424342303, |
|
"learning_rate": 4.0882465497443313e-08, |
|
"logps/chosen": -58.52223587036133, |
|
"logps/rejected": -56.04042053222656, |
|
"loss": 0.3923, |
|
"losses/dpo": 0.26003214716911316, |
|
"losses/sft": 0.17392012476921082, |
|
"losses/total": 0.26003214716911316, |
|
"ref_logps/chosen": -48.404632568359375, |
|
"ref_logps/rejected": -34.86602783203125, |
|
"rewards/accuracies": 0.890625, |
|
"rewards/chosen": -1.0117601156234741, |
|
"rewards/margins": 1.1056792736053467, |
|
"rewards/rejected": -2.1174392700195312, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 23.660376428219948, |
|
"learning_rate": 3.450758327998768e-08, |
|
"logps/chosen": -60.401039123535156, |
|
"logps/rejected": -60.10982131958008, |
|
"loss": 0.3902, |
|
"losses/dpo": 0.01773645170032978, |
|
"losses/sft": 0.17717282474040985, |
|
"losses/total": 0.01773645170032978, |
|
"ref_logps/chosen": -48.241943359375, |
|
"ref_logps/rejected": -34.582366943359375, |
|
"rewards/accuracies": 0.890625, |
|
"rewards/chosen": -1.215909719467163, |
|
"rewards/margins": 1.3368357419967651, |
|
"rewards/rejected": -2.5527453422546387, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 86.96881294099092, |
|
"learning_rate": 2.863599358669755e-08, |
|
"logps/chosen": -56.905418395996094, |
|
"logps/rejected": -56.808746337890625, |
|
"loss": 0.3944, |
|
"losses/dpo": 0.15065120160579681, |
|
"losses/sft": 0.22477349638938904, |
|
"losses/total": 0.15065120160579681, |
|
"ref_logps/chosen": -44.15583038330078, |
|
"ref_logps/rejected": -33.21840286254883, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -1.2749593257904053, |
|
"rewards/margins": 1.0840749740600586, |
|
"rewards/rejected": -2.359034299850464, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 18.8337077576639, |
|
"learning_rate": 2.3281409787363648e-08, |
|
"logps/chosen": -57.604774475097656, |
|
"logps/rejected": -57.78453063964844, |
|
"loss": 0.3863, |
|
"losses/dpo": 0.41682732105255127, |
|
"losses/sft": 0.16616390645503998, |
|
"losses/total": 0.41682732105255127, |
|
"ref_logps/chosen": -43.315818786621094, |
|
"ref_logps/rejected": -31.524248123168945, |
|
"rewards/accuracies": 0.8671875, |
|
"rewards/chosen": -1.4288955926895142, |
|
"rewards/margins": 1.1971325874328613, |
|
"rewards/rejected": -2.626028537750244, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 374.1054719017444, |
|
"learning_rate": 1.845633776055591e-08, |
|
"logps/chosen": -57.63691711425781, |
|
"logps/rejected": -58.3455810546875, |
|
"loss": 0.3882, |
|
"losses/dpo": 0.26508828997612, |
|
"losses/sft": 0.2718198001384735, |
|
"losses/total": 0.26508828997612, |
|
"ref_logps/chosen": -44.429481506347656, |
|
"ref_logps/rejected": -33.13744354248047, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.3207435607910156, |
|
"rewards/margins": 1.2000699043273926, |
|
"rewards/rejected": -2.520813465118408, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 26.70970124014032, |
|
"learning_rate": 1.4172046685564209e-08, |
|
"logps/chosen": -58.663551330566406, |
|
"logps/rejected": -58.07282257080078, |
|
"loss": 0.3962, |
|
"losses/dpo": 0.08177483081817627, |
|
"losses/sft": 0.18531636893749237, |
|
"losses/total": 0.08177483081817627, |
|
"ref_logps/chosen": -45.821983337402344, |
|
"ref_logps/rejected": -33.62261199951172, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.2841567993164062, |
|
"rewards/margins": 1.1608643531799316, |
|
"rewards/rejected": -2.445021390914917, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 25.593261462625442, |
|
"learning_rate": 1.0438542722708444e-08, |
|
"logps/chosen": -59.08097839355469, |
|
"logps/rejected": -59.16502380371094, |
|
"loss": 0.3836, |
|
"losses/dpo": 0.02788337506353855, |
|
"losses/sft": 0.19819076359272003, |
|
"losses/total": 0.02788337506353855, |
|
"ref_logps/chosen": -45.94892883300781, |
|
"ref_logps/rejected": -33.597511291503906, |
|
"rewards/accuracies": 0.8828125, |
|
"rewards/chosen": -1.3132052421569824, |
|
"rewards/margins": 1.2435462474822998, |
|
"rewards/rejected": -2.556751251220703, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 25.28796063034412, |
|
"learning_rate": 7.2645456434869965e-09, |
|
"logps/chosen": -57.95222473144531, |
|
"logps/rejected": -58.91720199584961, |
|
"loss": 0.3915, |
|
"losses/dpo": 1.2907841205596924, |
|
"losses/sft": 0.20458956062793732, |
|
"losses/total": 1.2907841205596924, |
|
"ref_logps/chosen": -45.50114440917969, |
|
"ref_logps/rejected": -35.063446044921875, |
|
"rewards/accuracies": 0.890625, |
|
"rewards/chosen": -1.2451080083847046, |
|
"rewards/margins": 1.140267252922058, |
|
"rewards/rejected": -2.385375499725342, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 30.554099185463503, |
|
"learning_rate": 4.657468465146641e-09, |
|
"logps/chosen": -57.99516296386719, |
|
"logps/rejected": -55.496768951416016, |
|
"loss": 0.3752, |
|
"losses/dpo": 0.20264464616775513, |
|
"losses/sft": 0.17493540048599243, |
|
"losses/total": 0.20264464616775513, |
|
"ref_logps/chosen": -47.58026123046875, |
|
"ref_logps/rejected": -33.345062255859375, |
|
"rewards/accuracies": 0.890625, |
|
"rewards/chosen": -1.041489839553833, |
|
"rewards/margins": 1.1736811399459839, |
|
"rewards/rejected": -2.2151710987091064, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 21.555895701368716, |
|
"learning_rate": 2.6234001372372193e-09, |
|
"logps/chosen": -55.79784393310547, |
|
"logps/rejected": -54.85697555541992, |
|
"loss": 0.4513, |
|
"losses/dpo": 0.6288288235664368, |
|
"losses/sft": 0.25858786702156067, |
|
"losses/total": 0.6288288235664368, |
|
"ref_logps/chosen": -42.008121490478516, |
|
"ref_logps/rejected": -31.47281265258789, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -1.3789721727371216, |
|
"rewards/margins": 0.9594441056251526, |
|
"rewards/rejected": -2.338416337966919, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 21.73384383499147, |
|
"learning_rate": 1.167091320587843e-09, |
|
"logps/chosen": -56.99696350097656, |
|
"logps/rejected": -59.2013053894043, |
|
"loss": 0.3554, |
|
"losses/dpo": 0.09169570356607437, |
|
"losses/sft": 0.20991858839988708, |
|
"losses/total": 0.09169570356607437, |
|
"ref_logps/chosen": -42.36278533935547, |
|
"ref_logps/rejected": -31.79424476623535, |
|
"rewards/accuracies": 0.890625, |
|
"rewards/chosen": -1.463417887687683, |
|
"rewards/margins": 1.2772881984710693, |
|
"rewards/rejected": -2.740705966949463, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 30.958564799186906, |
|
"learning_rate": 2.9194329191833953e-10, |
|
"logps/chosen": -58.35291290283203, |
|
"logps/rejected": -56.74859619140625, |
|
"loss": 0.3706, |
|
"losses/dpo": 0.3077165484428406, |
|
"losses/sft": 0.17356029152870178, |
|
"losses/total": 0.3077165484428406, |
|
"ref_logps/chosen": -44.90869903564453, |
|
"ref_logps/rejected": -31.324697494506836, |
|
"rewards/accuracies": 0.890625, |
|
"rewards/chosen": -1.34442138671875, |
|
"rewards/margins": 1.197968602180481, |
|
"rewards/rejected": -2.5423898696899414, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 20.514487251091158, |
|
"learning_rate": 0.0, |
|
"logps/chosen": -55.3281135559082, |
|
"logps/rejected": -54.42873764038086, |
|
"loss": 0.4185, |
|
"losses/dpo": 0.45331382751464844, |
|
"losses/sft": 0.16170088946819305, |
|
"losses/total": 0.45331382751464844, |
|
"ref_logps/chosen": -42.832916259765625, |
|
"ref_logps/rejected": -31.545093536376953, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.2495195865631104, |
|
"rewards/margins": 1.0388449430465698, |
|
"rewards/rejected": -2.2883644104003906, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 73, |
|
"total_flos": 0.0, |
|
"train_loss": 0.4880054197082781, |
|
"train_runtime": 1195.1879, |
|
"train_samples_per_second": 7.883, |
|
"train_steps_per_second": 0.061 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 73, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|