best_dpo_model / trainer_state.json
AntoineSchutz's picture
Upload folder using huggingface_hub
dbc2afb verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9980364656381484,
"eval_steps": 100,
"global_step": 2004,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04488078541374474,
"grad_norm": 4.790558815002441,
"learning_rate": 9.850299401197606e-05,
"logits/chosen": -3.3742988109588623,
"logits/rejected": -3.0817112922668457,
"logps/chosen": -273.48614501953125,
"logps/rejected": -234.3329315185547,
"loss": 0.6831,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": 0.0712718814611435,
"rewards/margins": 0.024287192150950432,
"rewards/rejected": 0.04698468744754791,
"step": 30
},
{
"epoch": 0.08976157082748948,
"grad_norm": 5.551278591156006,
"learning_rate": 9.700598802395209e-05,
"logits/chosen": -3.378220558166504,
"logits/rejected": -3.129826307296753,
"logps/chosen": -267.0759582519531,
"logps/rejected": -238.60873413085938,
"loss": 0.6691,
"rewards/accuracies": 0.590624988079071,
"rewards/chosen": 0.21243497729301453,
"rewards/margins": 0.07040555775165558,
"rewards/rejected": 0.14202943444252014,
"step": 60
},
{
"epoch": 0.13464235624123422,
"grad_norm": 7.342463493347168,
"learning_rate": 9.550898203592816e-05,
"logits/chosen": -3.3940162658691406,
"logits/rejected": -3.142778158187866,
"logps/chosen": -267.77581787109375,
"logps/rejected": -233.1001434326172,
"loss": 0.6586,
"rewards/accuracies": 0.6145833134651184,
"rewards/chosen": 0.2707298994064331,
"rewards/margins": 0.12286876887083054,
"rewards/rejected": 0.14786113798618317,
"step": 90
},
{
"epoch": 0.1496026180458158,
"eval_logits/chosen": -3.403446674346924,
"eval_logits/rejected": -3.1215860843658447,
"eval_logps/chosen": -268.9533386230469,
"eval_logps/rejected": -229.84756469726562,
"eval_loss": 0.6490052342414856,
"eval_rewards/accuracies": 0.6090127229690552,
"eval_rewards/chosen": 0.2928723990917206,
"eval_rewards/margins": 0.17060735821723938,
"eval_rewards/rejected": 0.12226507067680359,
"eval_runtime": 1689.4226,
"eval_samples_per_second": 3.166,
"eval_steps_per_second": 3.166,
"step": 100
},
{
"epoch": 0.17952314165497896,
"grad_norm": 7.352132797241211,
"learning_rate": 9.40119760479042e-05,
"logits/chosen": -3.416881561279297,
"logits/rejected": -3.130246877670288,
"logps/chosen": -271.2243347167969,
"logps/rejected": -233.06614685058594,
"loss": 0.6385,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": 0.295549601316452,
"rewards/margins": 0.20651350915431976,
"rewards/rejected": 0.08903612196445465,
"step": 120
},
{
"epoch": 0.2244039270687237,
"grad_norm": 7.124303340911865,
"learning_rate": 9.251497005988024e-05,
"logits/chosen": -3.3911712169647217,
"logits/rejected": -3.1303460597991943,
"logps/chosen": -265.11651611328125,
"logps/rejected": -234.63478088378906,
"loss": 0.6436,
"rewards/accuracies": 0.6322916746139526,
"rewards/chosen": 0.23116879165172577,
"rewards/margins": 0.23007448017597198,
"rewards/rejected": 0.0010943154338747263,
"step": 150
},
{
"epoch": 0.26928471248246844,
"grad_norm": 6.228757381439209,
"learning_rate": 9.101796407185628e-05,
"logits/chosen": -3.4022183418273926,
"logits/rejected": -3.151683807373047,
"logps/chosen": -267.1882019042969,
"logps/rejected": -232.48471069335938,
"loss": 0.6485,
"rewards/accuracies": 0.6322916746139526,
"rewards/chosen": 0.3013507127761841,
"rewards/margins": 0.20289267599582672,
"rewards/rejected": 0.09845803678035736,
"step": 180
},
{
"epoch": 0.2992052360916316,
"eval_logits/chosen": -3.4198923110961914,
"eval_logits/rejected": -3.143324613571167,
"eval_logps/chosen": -268.99163818359375,
"eval_logps/rejected": -230.40538024902344,
"eval_loss": 0.6396481394767761,
"eval_rewards/accuracies": 0.6142483353614807,
"eval_rewards/chosen": 0.2890413999557495,
"eval_rewards/margins": 0.22255805134773254,
"eval_rewards/rejected": 0.06648338586091995,
"eval_runtime": 1688.7699,
"eval_samples_per_second": 3.167,
"eval_steps_per_second": 3.167,
"step": 200
},
{
"epoch": 0.3141654978962132,
"grad_norm": 6.4461493492126465,
"learning_rate": 8.952095808383235e-05,
"logits/chosen": -3.434596300125122,
"logits/rejected": -3.132395029067993,
"logps/chosen": -267.54437255859375,
"logps/rejected": -224.43540954589844,
"loss": 0.638,
"rewards/accuracies": 0.6197916865348816,
"rewards/chosen": 0.2697771489620209,
"rewards/margins": 0.2309388816356659,
"rewards/rejected": 0.03883826732635498,
"step": 210
},
{
"epoch": 0.3590462833099579,
"grad_norm": 4.610179424285889,
"learning_rate": 8.80239520958084e-05,
"logits/chosen": -3.428438901901245,
"logits/rejected": -3.169167995452881,
"logps/chosen": -265.898193359375,
"logps/rejected": -230.3724822998047,
"loss": 0.6406,
"rewards/accuracies": 0.6166666746139526,
"rewards/chosen": 0.28266531229019165,
"rewards/margins": 0.23549547791481018,
"rewards/rejected": 0.04716984182596207,
"step": 240
},
{
"epoch": 0.40392706872370265,
"grad_norm": 5.838982582092285,
"learning_rate": 8.652694610778443e-05,
"logits/chosen": -3.4263041019439697,
"logits/rejected": -3.1684255599975586,
"logps/chosen": -267.129150390625,
"logps/rejected": -233.6484832763672,
"loss": 0.6251,
"rewards/accuracies": 0.6302083134651184,
"rewards/chosen": 0.1708066761493683,
"rewards/margins": 0.28462281823158264,
"rewards/rejected": -0.11381613463163376,
"step": 270
},
{
"epoch": 0.4488078541374474,
"grad_norm": 5.045168876647949,
"learning_rate": 8.502994011976048e-05,
"logits/chosen": -3.439959764480591,
"logits/rejected": -3.173079490661621,
"logps/chosen": -273.4431457519531,
"logps/rejected": -236.24371337890625,
"loss": 0.6327,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": 0.17748431861400604,
"rewards/margins": 0.269964337348938,
"rewards/rejected": -0.09247999638319016,
"step": 300
},
{
"epoch": 0.4488078541374474,
"eval_logits/chosen": -3.424139976501465,
"eval_logits/rejected": -3.150641918182373,
"eval_logps/chosen": -269.5113525390625,
"eval_logps/rejected": -231.47146606445312,
"eval_loss": 0.6353974938392639,
"eval_rewards/accuracies": 0.6181750297546387,
"eval_rewards/chosen": 0.23706810176372528,
"eval_rewards/margins": 0.2771916091442108,
"eval_rewards/rejected": -0.04012349247932434,
"eval_runtime": 1688.647,
"eval_samples_per_second": 3.167,
"eval_steps_per_second": 3.167,
"step": 300
},
{
"epoch": 0.49368863955119213,
"grad_norm": 5.068808555603027,
"learning_rate": 8.353293413173653e-05,
"logits/chosen": -3.4144017696380615,
"logits/rejected": -3.1683106422424316,
"logps/chosen": -272.5228271484375,
"logps/rejected": -239.20681762695312,
"loss": 0.646,
"rewards/accuracies": 0.6208333373069763,
"rewards/chosen": 0.22192205488681793,
"rewards/margins": 0.24012483656406403,
"rewards/rejected": -0.018202781677246094,
"step": 330
},
{
"epoch": 0.5385694249649369,
"grad_norm": 6.0258941650390625,
"learning_rate": 8.203592814371259e-05,
"logits/chosen": -3.4079012870788574,
"logits/rejected": -3.1440396308898926,
"logps/chosen": -276.3011474609375,
"logps/rejected": -235.62054443359375,
"loss": 0.6228,
"rewards/accuracies": 0.6270833611488342,
"rewards/chosen": 0.09719991683959961,
"rewards/margins": 0.2948659658432007,
"rewards/rejected": -0.19766603410243988,
"step": 360
},
{
"epoch": 0.5834502103786816,
"grad_norm": 5.713747024536133,
"learning_rate": 8.053892215568862e-05,
"logits/chosen": -3.3723533153533936,
"logits/rejected": -3.1148476600646973,
"logps/chosen": -274.2776794433594,
"logps/rejected": -234.34136962890625,
"loss": 0.6342,
"rewards/accuracies": 0.6270833611488342,
"rewards/chosen": 0.17114956676959991,
"rewards/margins": 0.29201894998550415,
"rewards/rejected": -0.12086938321590424,
"step": 390
},
{
"epoch": 0.5984104721832632,
"eval_logits/chosen": -3.390080451965332,
"eval_logits/rejected": -3.111392021179199,
"eval_logps/chosen": -268.8110656738281,
"eval_logps/rejected": -230.63925170898438,
"eval_loss": 0.6309967041015625,
"eval_rewards/accuracies": 0.630142092704773,
"eval_rewards/chosen": 0.30709749460220337,
"eval_rewards/margins": 0.2640005946159363,
"eval_rewards/rejected": 0.04309689626097679,
"eval_runtime": 1688.4508,
"eval_samples_per_second": 3.167,
"eval_steps_per_second": 3.167,
"step": 400
},
{
"epoch": 0.6283309957924264,
"grad_norm": 5.401741027832031,
"learning_rate": 7.904191616766467e-05,
"logits/chosen": -3.389784574508667,
"logits/rejected": -3.113330602645874,
"logps/chosen": -270.7179260253906,
"logps/rejected": -234.7329864501953,
"loss": 0.6352,
"rewards/accuracies": 0.6260416507720947,
"rewards/chosen": 0.2668881416320801,
"rewards/margins": 0.25750893354415894,
"rewards/rejected": 0.009379198774695396,
"step": 420
},
{
"epoch": 0.6732117812061711,
"grad_norm": 6.569407939910889,
"learning_rate": 7.754491017964072e-05,
"logits/chosen": -3.420933246612549,
"logits/rejected": -3.107722520828247,
"logps/chosen": -279.6606750488281,
"logps/rejected": -232.16326904296875,
"loss": 0.6152,
"rewards/accuracies": 0.6395833492279053,
"rewards/chosen": 0.23483921587467194,
"rewards/margins": 0.30834510922431946,
"rewards/rejected": -0.07350588589906693,
"step": 450
},
{
"epoch": 0.7180925666199158,
"grad_norm": 4.889843463897705,
"learning_rate": 7.604790419161677e-05,
"logits/chosen": -3.4380805492401123,
"logits/rejected": -3.1253116130828857,
"logps/chosen": -279.8207092285156,
"logps/rejected": -233.9474639892578,
"loss": 0.612,
"rewards/accuracies": 0.6697916388511658,
"rewards/chosen": 0.06458248198032379,
"rewards/margins": 0.378538578748703,
"rewards/rejected": -0.3139561414718628,
"step": 480
},
{
"epoch": 0.748013090229079,
"eval_logits/chosen": -3.419874429702759,
"eval_logits/rejected": -3.1424779891967773,
"eval_logps/chosen": -270.2199401855469,
"eval_logps/rejected": -232.4933319091797,
"eval_loss": 0.6269693374633789,
"eval_rewards/accuracies": 0.627711296081543,
"eval_rewards/chosen": 0.16621026396751404,
"eval_rewards/margins": 0.3085208237171173,
"eval_rewards/rejected": -0.14231054484844208,
"eval_runtime": 1688.3228,
"eval_samples_per_second": 3.168,
"eval_steps_per_second": 3.168,
"step": 500
},
{
"epoch": 0.7629733520336606,
"grad_norm": 4.36655330657959,
"learning_rate": 7.455089820359282e-05,
"logits/chosen": -3.4343178272247314,
"logits/rejected": -3.1612308025360107,
"logps/chosen": -272.99578857421875,
"logps/rejected": -234.4145965576172,
"loss": 0.629,
"rewards/accuracies": 0.6427083611488342,
"rewards/chosen": 0.13686993718147278,
"rewards/margins": 0.3045249283313751,
"rewards/rejected": -0.16765499114990234,
"step": 510
},
{
"epoch": 0.8078541374474053,
"grad_norm": 4.971264839172363,
"learning_rate": 7.305389221556886e-05,
"logits/chosen": -3.4246087074279785,
"logits/rejected": -3.172884464263916,
"logps/chosen": -267.14556884765625,
"logps/rejected": -233.85691833496094,
"loss": 0.6269,
"rewards/accuracies": 0.6333333253860474,
"rewards/chosen": 0.22364649176597595,
"rewards/margins": 0.28497254848480225,
"rewards/rejected": -0.06132606416940689,
"step": 540
},
{
"epoch": 0.85273492286115,
"grad_norm": 5.077197074890137,
"learning_rate": 7.155688622754491e-05,
"logits/chosen": -3.4349772930145264,
"logits/rejected": -3.1722497940063477,
"logps/chosen": -268.02630615234375,
"logps/rejected": -231.99020385742188,
"loss": 0.63,
"rewards/accuracies": 0.621874988079071,
"rewards/chosen": 0.2208840399980545,
"rewards/margins": 0.2860158383846283,
"rewards/rejected": -0.06513180583715439,
"step": 570
},
{
"epoch": 0.8976157082748948,
"grad_norm": 4.760651111602783,
"learning_rate": 7.005988023952096e-05,
"logits/chosen": -3.4018094539642334,
"logits/rejected": -3.1606853008270264,
"logps/chosen": -268.86090087890625,
"logps/rejected": -233.84007263183594,
"loss": 0.6432,
"rewards/accuracies": 0.6208333373069763,
"rewards/chosen": 0.25363439321517944,
"rewards/margins": 0.2553554177284241,
"rewards/rejected": -0.0017210314981639385,
"step": 600
},
{
"epoch": 0.8976157082748948,
"eval_logits/chosen": -3.4228434562683105,
"eval_logits/rejected": -3.145069122314453,
"eval_logps/chosen": -269.40740966796875,
"eval_logps/rejected": -231.58685302734375,
"eval_loss": 0.6246524453163147,
"eval_rewards/accuracies": 0.6312640309333801,
"eval_rewards/chosen": 0.24746553599834442,
"eval_rewards/margins": 0.2991257905960083,
"eval_rewards/rejected": -0.051660239696502686,
"eval_runtime": 1688.5162,
"eval_samples_per_second": 3.167,
"eval_steps_per_second": 3.167,
"step": 600
},
{
"epoch": 0.9424964936886395,
"grad_norm": 5.144285678863525,
"learning_rate": 6.856287425149701e-05,
"logits/chosen": -3.4329488277435303,
"logits/rejected": -3.1452276706695557,
"logps/chosen": -269.5411376953125,
"logps/rejected": -228.07046508789062,
"loss": 0.6185,
"rewards/accuracies": 0.6260416507720947,
"rewards/chosen": 0.26634886860847473,
"rewards/margins": 0.3162167966365814,
"rewards/rejected": -0.049867913126945496,
"step": 630
},
{
"epoch": 0.9873772791023843,
"grad_norm": 4.551113128662109,
"learning_rate": 6.706586826347305e-05,
"logits/chosen": -3.435673713684082,
"logits/rejected": -3.1743414402008057,
"logps/chosen": -273.6510314941406,
"logps/rejected": -241.5527801513672,
"loss": 0.6236,
"rewards/accuracies": 0.6364583373069763,
"rewards/chosen": 0.11331641674041748,
"rewards/margins": 0.32483571767807007,
"rewards/rejected": -0.2115192860364914,
"step": 660
},
{
"epoch": 1.032258064516129,
"grad_norm": 4.672567844390869,
"learning_rate": 6.55688622754491e-05,
"logits/chosen": -3.4276251792907715,
"logits/rejected": -3.1490509510040283,
"logps/chosen": -269.5851135253906,
"logps/rejected": -237.02989196777344,
"loss": 0.5554,
"rewards/accuracies": 0.7302083373069763,
"rewards/chosen": 0.08210794627666473,
"rewards/margins": 0.49889788031578064,
"rewards/rejected": -0.4167899191379547,
"step": 690
},
{
"epoch": 1.0472183263207107,
"eval_logits/chosen": -3.4185428619384766,
"eval_logits/rejected": -3.1414475440979004,
"eval_logps/chosen": -270.3139343261719,
"eval_logps/rejected": -232.80120849609375,
"eval_loss": 0.6221644282341003,
"eval_rewards/accuracies": 0.6338818073272705,
"eval_rewards/chosen": 0.15681201219558716,
"eval_rewards/margins": 0.3299100995063782,
"eval_rewards/rejected": -0.17309808731079102,
"eval_runtime": 1688.7954,
"eval_samples_per_second": 3.167,
"eval_steps_per_second": 3.167,
"step": 700
},
{
"epoch": 1.0771388499298737,
"grad_norm": 4.2797369956970215,
"learning_rate": 6.407185628742515e-05,
"logits/chosen": -3.445012092590332,
"logits/rejected": -3.1330997943878174,
"logps/chosen": -266.95782470703125,
"logps/rejected": -227.27879333496094,
"loss": 0.5249,
"rewards/accuracies": 0.7635416388511658,
"rewards/chosen": 0.25232160091400146,
"rewards/margins": 0.5462218523025513,
"rewards/rejected": -0.2939002215862274,
"step": 720
},
{
"epoch": 1.1220196353436185,
"grad_norm": 5.101881980895996,
"learning_rate": 6.25748502994012e-05,
"logits/chosen": -3.425431728363037,
"logits/rejected": -3.1551194190979004,
"logps/chosen": -271.9197082519531,
"logps/rejected": -233.2086639404297,
"loss": 0.5308,
"rewards/accuracies": 0.7385416626930237,
"rewards/chosen": 0.2957630157470703,
"rewards/margins": 0.5949270129203796,
"rewards/rejected": -0.29916396737098694,
"step": 750
},
{
"epoch": 1.1669004207573632,
"grad_norm": 5.360141754150391,
"learning_rate": 6.107784431137725e-05,
"logits/chosen": -3.4009079933166504,
"logits/rejected": -3.1200320720672607,
"logps/chosen": -272.1022644042969,
"logps/rejected": -236.18499755859375,
"loss": 0.5226,
"rewards/accuracies": 0.7520833611488342,
"rewards/chosen": 0.30986490845680237,
"rewards/margins": 0.59710294008255,
"rewards/rejected": -0.2872380018234253,
"step": 780
},
{
"epoch": 1.1968209443665265,
"eval_logits/chosen": -3.405834674835205,
"eval_logits/rejected": -3.1333200931549072,
"eval_logps/chosen": -272.1116638183594,
"eval_logps/rejected": -235.3762664794922,
"eval_loss": 0.6281805038452148,
"eval_rewards/accuracies": 0.6335078477859497,
"eval_rewards/chosen": -0.022958112880587578,
"eval_rewards/margins": 0.40764307975769043,
"eval_rewards/rejected": -0.43060120940208435,
"eval_runtime": 1688.3487,
"eval_samples_per_second": 3.168,
"eval_steps_per_second": 3.168,
"step": 800
},
{
"epoch": 1.211781206171108,
"grad_norm": 5.8285088539123535,
"learning_rate": 5.95808383233533e-05,
"logits/chosen": -3.3979651927948,
"logits/rejected": -3.1520321369171143,
"logps/chosen": -274.0641174316406,
"logps/rejected": -240.42205810546875,
"loss": 0.5402,
"rewards/accuracies": 0.7354166507720947,
"rewards/chosen": 0.1310122311115265,
"rewards/margins": 0.5752567052841187,
"rewards/rejected": -0.44424447417259216,
"step": 810
},
{
"epoch": 1.2566619915848527,
"grad_norm": 5.5216851234436035,
"learning_rate": 5.808383233532935e-05,
"logits/chosen": -3.4025676250457764,
"logits/rejected": -3.1448330879211426,
"logps/chosen": -274.1934509277344,
"logps/rejected": -243.01490783691406,
"loss": 0.5201,
"rewards/accuracies": 0.7552083134651184,
"rewards/chosen": 0.14752289652824402,
"rewards/margins": 0.626766562461853,
"rewards/rejected": -0.479243665933609,
"step": 840
},
{
"epoch": 1.3015427769985974,
"grad_norm": 5.673742294311523,
"learning_rate": 5.6586826347305385e-05,
"logits/chosen": -3.3895277976989746,
"logits/rejected": -3.1401288509368896,
"logps/chosen": -273.1130676269531,
"logps/rejected": -241.987060546875,
"loss": 0.5497,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 0.02567141316831112,
"rewards/margins": 0.5613437294960022,
"rewards/rejected": -0.5356722474098206,
"step": 870
},
{
"epoch": 1.3464235624123422,
"grad_norm": 6.6557440757751465,
"learning_rate": 5.508982035928144e-05,
"logits/chosen": -3.3836898803710938,
"logits/rejected": -3.1433603763580322,
"logps/chosen": -266.1312561035156,
"logps/rejected": -238.70309448242188,
"loss": 0.5474,
"rewards/accuracies": 0.7322916388511658,
"rewards/chosen": 0.016369260847568512,
"rewards/margins": 0.563011109828949,
"rewards/rejected": -0.5466418862342834,
"step": 900
},
{
"epoch": 1.3464235624123422,
"eval_logits/chosen": -3.3754773139953613,
"eval_logits/rejected": -3.106959819793701,
"eval_logps/chosen": -271.97869873046875,
"eval_logps/rejected": -234.78985595703125,
"eval_loss": 0.629611074924469,
"eval_rewards/accuracies": 0.6299551129341125,
"eval_rewards/chosen": -0.009662697091698647,
"eval_rewards/margins": 0.3622985780239105,
"eval_rewards/rejected": -0.3719612658023834,
"eval_runtime": 1688.3293,
"eval_samples_per_second": 3.168,
"eval_steps_per_second": 3.168,
"step": 900
},
{
"epoch": 1.391304347826087,
"grad_norm": 5.2359724044799805,
"learning_rate": 5.359281437125748e-05,
"logits/chosen": -3.3651976585388184,
"logits/rejected": -3.123444080352783,
"logps/chosen": -271.6989440917969,
"logps/rejected": -236.84417724609375,
"loss": 0.54,
"rewards/accuracies": 0.7416666746139526,
"rewards/chosen": 0.1108192428946495,
"rewards/margins": 0.5575817823410034,
"rewards/rejected": -0.4467625319957733,
"step": 930
},
{
"epoch": 1.4361851332398317,
"grad_norm": 5.669713497161865,
"learning_rate": 5.209580838323354e-05,
"logits/chosen": -3.3611793518066406,
"logits/rejected": -3.099807024002075,
"logps/chosen": -274.7862243652344,
"logps/rejected": -237.52139282226562,
"loss": 0.5405,
"rewards/accuracies": 0.7260416746139526,
"rewards/chosen": 0.027847904711961746,
"rewards/margins": 0.5692722797393799,
"rewards/rejected": -0.5414243936538696,
"step": 960
},
{
"epoch": 1.4810659186535764,
"grad_norm": 6.500946044921875,
"learning_rate": 5.059880239520959e-05,
"logits/chosen": -3.3827614784240723,
"logits/rejected": -3.09436297416687,
"logps/chosen": -276.1948547363281,
"logps/rejected": -238.23243713378906,
"loss": 0.5235,
"rewards/accuracies": 0.7593749761581421,
"rewards/chosen": -0.051430441439151764,
"rewards/margins": 0.6151652932167053,
"rewards/rejected": -0.6665957570075989,
"step": 990
},
{
"epoch": 1.496026180458158,
"eval_logits/chosen": -3.366751194000244,
"eval_logits/rejected": -3.1013710498809814,
"eval_logps/chosen": -272.0386047363281,
"eval_logps/rejected": -234.93768310546875,
"eval_loss": 0.628265380859375,
"eval_rewards/accuracies": 0.6325729489326477,
"eval_rewards/chosen": -0.01565566658973694,
"eval_rewards/margins": 0.37109050154685974,
"eval_rewards/rejected": -0.38674619793891907,
"eval_runtime": 1688.5815,
"eval_samples_per_second": 3.167,
"eval_steps_per_second": 3.167,
"step": 1000
},
{
"epoch": 1.5259467040673211,
"grad_norm": 5.458423614501953,
"learning_rate": 4.910179640718563e-05,
"logits/chosen": -3.3471567630767822,
"logits/rejected": -3.1277146339416504,
"logps/chosen": -269.085205078125,
"logps/rejected": -243.0161895751953,
"loss": 0.5338,
"rewards/accuracies": 0.7489583492279053,
"rewards/chosen": 0.08274559676647186,
"rewards/margins": 0.5906849503517151,
"rewards/rejected": -0.5079393982887268,
"step": 1020
},
{
"epoch": 1.5708274894810659,
"grad_norm": 7.57076358795166,
"learning_rate": 4.7604790419161675e-05,
"logits/chosen": -3.3720383644104004,
"logits/rejected": -3.0789096355438232,
"logps/chosen": -266.96270751953125,
"logps/rejected": -228.08006286621094,
"loss": 0.5331,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": 0.014148912392556667,
"rewards/margins": 0.5797646045684814,
"rewards/rejected": -0.5656156539916992,
"step": 1050
},
{
"epoch": 1.6157082748948106,
"grad_norm": 5.330569744110107,
"learning_rate": 4.610778443113773e-05,
"logits/chosen": -3.359792947769165,
"logits/rejected": -3.09041166305542,
"logps/chosen": -276.38226318359375,
"logps/rejected": -243.1844024658203,
"loss": 0.5232,
"rewards/accuracies": 0.7395833134651184,
"rewards/chosen": 0.06125294789671898,
"rewards/margins": 0.6237131953239441,
"rewards/rejected": -0.56246018409729,
"step": 1080
},
{
"epoch": 1.645628798503974,
"eval_logits/chosen": -3.3514223098754883,
"eval_logits/rejected": -3.0870354175567627,
"eval_logps/chosen": -271.9643859863281,
"eval_logps/rejected": -234.57383728027344,
"eval_loss": 0.6333222389221191,
"eval_rewards/accuracies": 0.620792806148529,
"eval_rewards/chosen": -0.008234047330915928,
"eval_rewards/margins": 0.34212782979011536,
"eval_rewards/rejected": -0.3503618538379669,
"eval_runtime": 1688.4687,
"eval_samples_per_second": 3.167,
"eval_steps_per_second": 3.167,
"step": 1100
},
{
"epoch": 1.6605890603085554,
"grad_norm": 6.506576061248779,
"learning_rate": 4.4610778443113777e-05,
"logits/chosen": -3.3484690189361572,
"logits/rejected": -3.130286455154419,
"logps/chosen": -273.8847961425781,
"logps/rejected": -247.9475860595703,
"loss": 0.5384,
"rewards/accuracies": 0.7322916388511658,
"rewards/chosen": 0.10436714440584183,
"rewards/margins": 0.5445392727851868,
"rewards/rejected": -0.44017213582992554,
"step": 1110
},
{
"epoch": 1.7054698457223,
"grad_norm": 6.028050422668457,
"learning_rate": 4.311377245508982e-05,
"logits/chosen": -3.3555943965911865,
"logits/rejected": -3.125157356262207,
"logps/chosen": -267.73516845703125,
"logps/rejected": -236.43356323242188,
"loss": 0.5549,
"rewards/accuracies": 0.7354166507720947,
"rewards/chosen": -0.052730146795511246,
"rewards/margins": 0.5250240564346313,
"rewards/rejected": -0.5777541995048523,
"step": 1140
},
{
"epoch": 1.7503506311360448,
"grad_norm": 6.8205647468566895,
"learning_rate": 4.161676646706587e-05,
"logits/chosen": -3.383364677429199,
"logits/rejected": -3.1156816482543945,
"logps/chosen": -273.9105529785156,
"logps/rejected": -237.84432983398438,
"loss": 0.523,
"rewards/accuracies": 0.746874988079071,
"rewards/chosen": -0.06969426572322845,
"rewards/margins": 0.6354466676712036,
"rewards/rejected": -0.705141007900238,
"step": 1170
},
{
"epoch": 1.7952314165497896,
"grad_norm": 6.074138641357422,
"learning_rate": 4.0119760479041915e-05,
"logits/chosen": -3.391815185546875,
"logits/rejected": -3.1276473999023438,
"logps/chosen": -279.9575500488281,
"logps/rejected": -244.89010620117188,
"loss": 0.5156,
"rewards/accuracies": 0.7479166388511658,
"rewards/chosen": -0.07707042992115021,
"rewards/margins": 0.6421669125556946,
"rewards/rejected": -0.7192373871803284,
"step": 1200
},
{
"epoch": 1.7952314165497896,
"eval_logits/chosen": -3.379970073699951,
"eval_logits/rejected": -3.1176722049713135,
"eval_logps/chosen": -274.0703430175781,
"eval_logps/rejected": -237.28604125976562,
"eval_loss": 0.6306100487709045,
"eval_rewards/accuracies": 0.6350037455558777,
"eval_rewards/chosen": -0.21883098781108856,
"eval_rewards/margins": 0.4027484953403473,
"eval_rewards/rejected": -0.6215794086456299,
"eval_runtime": 1688.6398,
"eval_samples_per_second": 3.167,
"eval_steps_per_second": 3.167,
"step": 1200
},
{
"epoch": 1.8401122019635343,
"grad_norm": 5.888485431671143,
"learning_rate": 3.8622754491017966e-05,
"logits/chosen": -3.3851656913757324,
"logits/rejected": -3.1222336292266846,
"logps/chosen": -272.52117919921875,
"logps/rejected": -237.61158752441406,
"loss": 0.5372,
"rewards/accuracies": 0.7416666746139526,
"rewards/chosen": -0.14584079384803772,
"rewards/margins": 0.6132307052612305,
"rewards/rejected": -0.7590714693069458,
"step": 1230
},
{
"epoch": 1.884992987377279,
"grad_norm": 6.371288299560547,
"learning_rate": 3.712574850299401e-05,
"logits/chosen": -3.379087209701538,
"logits/rejected": -3.1119582653045654,
"logps/chosen": -273.9693603515625,
"logps/rejected": -238.9442901611328,
"loss": 0.5142,
"rewards/accuracies": 0.7614583373069763,
"rewards/chosen": -0.13823945820331573,
"rewards/margins": 0.6301066279411316,
"rewards/rejected": -0.7683460116386414,
"step": 1260
},
{
"epoch": 1.9298737727910238,
"grad_norm": 6.35048246383667,
"learning_rate": 3.562874251497006e-05,
"logits/chosen": -3.3981616497039795,
"logits/rejected": -3.162132740020752,
"logps/chosen": -268.2223205566406,
"logps/rejected": -237.77635192871094,
"loss": 0.5352,
"rewards/accuracies": 0.7364583611488342,
"rewards/chosen": -0.19809262454509735,
"rewards/margins": 0.6094833016395569,
"rewards/rejected": -0.8075758814811707,
"step": 1290
},
{
"epoch": 1.9448340345956054,
"eval_logits/chosen": -3.3784019947052,
"eval_logits/rejected": -3.116711378097534,
"eval_logps/chosen": -274.5598449707031,
"eval_logps/rejected": -237.6013946533203,
"eval_loss": 0.6299869418144226,
"eval_rewards/accuracies": 0.6327599287033081,
"eval_rewards/chosen": -0.2677817940711975,
"eval_rewards/margins": 0.38533419370651245,
"eval_rewards/rejected": -0.6531160473823547,
"eval_runtime": 1688.4156,
"eval_samples_per_second": 3.167,
"eval_steps_per_second": 3.167,
"step": 1300
},
{
"epoch": 1.9747545582047685,
"grad_norm": 6.9590959548950195,
"learning_rate": 3.413173652694611e-05,
"logits/chosen": -3.376148223876953,
"logits/rejected": -3.1122324466705322,
"logps/chosen": -282.5127258300781,
"logps/rejected": -247.69212341308594,
"loss": 0.5232,
"rewards/accuracies": 0.7333333492279053,
"rewards/chosen": -0.10552702099084854,
"rewards/margins": 0.6293079257011414,
"rewards/rejected": -0.7348350286483765,
"step": 1320
},
{
"epoch": 2.0196353436185133,
"grad_norm": 5.992002010345459,
"learning_rate": 3.263473053892216e-05,
"logits/chosen": -3.397113800048828,
"logits/rejected": -3.136545419692993,
"logps/chosen": -278.75390625,
"logps/rejected": -246.4496307373047,
"loss": 0.5015,
"rewards/accuracies": 0.765625,
"rewards/chosen": -0.11011376976966858,
"rewards/margins": 0.6908566355705261,
"rewards/rejected": -0.8009704351425171,
"step": 1350
},
{
"epoch": 2.064516129032258,
"grad_norm": 7.003468990325928,
"learning_rate": 3.1137724550898205e-05,
"logits/chosen": -3.370246410369873,
"logits/rejected": -3.093019723892212,
"logps/chosen": -279.3021240234375,
"logps/rejected": -242.58258056640625,
"loss": 0.446,
"rewards/accuracies": 0.8114583492279053,
"rewards/chosen": -0.05044478550553322,
"rewards/margins": 0.8847902417182922,
"rewards/rejected": -0.9352350234985352,
"step": 1380
},
{
"epoch": 2.0944366526414213,
"eval_logits/chosen": -3.3703560829162598,
"eval_logits/rejected": -3.111078977584839,
"eval_logps/chosen": -274.6524353027344,
"eval_logps/rejected": -237.6984405517578,
"eval_loss": 0.6312919855117798,
"eval_rewards/accuracies": 0.6325729489326477,
"eval_rewards/chosen": -0.27703869342803955,
"eval_rewards/margins": 0.38578376173973083,
"eval_rewards/rejected": -0.6628224849700928,
"eval_runtime": 1685.7321,
"eval_samples_per_second": 3.173,
"eval_steps_per_second": 3.173,
"step": 1400
},
{
"epoch": 2.1093969144460027,
"grad_norm": 5.607975959777832,
"learning_rate": 2.9640718562874252e-05,
"logits/chosen": -3.365170955657959,
"logits/rejected": -3.1272387504577637,
"logps/chosen": -271.7861022949219,
"logps/rejected": -242.59573364257812,
"loss": 0.4698,
"rewards/accuracies": 0.8062499761581421,
"rewards/chosen": -0.10354464501142502,
"rewards/margins": 0.7553961277008057,
"rewards/rejected": -0.8589407801628113,
"step": 1410
},
{
"epoch": 2.1542776998597475,
"grad_norm": 5.758065223693848,
"learning_rate": 2.81437125748503e-05,
"logits/chosen": -3.362076997756958,
"logits/rejected": -3.1057257652282715,
"logps/chosen": -267.41705322265625,
"logps/rejected": -235.0531463623047,
"loss": 0.4586,
"rewards/accuracies": 0.8166666626930237,
"rewards/chosen": -0.11131696403026581,
"rewards/margins": 0.785390317440033,
"rewards/rejected": -0.89670729637146,
"step": 1440
},
{
"epoch": 2.1991584852734922,
"grad_norm": 6.8294501304626465,
"learning_rate": 2.6646706586826347e-05,
"logits/chosen": -3.368708610534668,
"logits/rejected": -3.0964319705963135,
"logps/chosen": -269.26409912109375,
"logps/rejected": -237.1984405517578,
"loss": 0.4496,
"rewards/accuracies": 0.8145833611488342,
"rewards/chosen": -0.10699882358312607,
"rewards/margins": 0.8314520120620728,
"rewards/rejected": -0.938450813293457,
"step": 1470
},
{
"epoch": 2.244039270687237,
"grad_norm": 6.356990337371826,
"learning_rate": 2.5149700598802394e-05,
"logits/chosen": -3.374453067779541,
"logits/rejected": -3.129500389099121,
"logps/chosen": -271.7542724609375,
"logps/rejected": -241.45423889160156,
"loss": 0.4552,
"rewards/accuracies": 0.8135416507720947,
"rewards/chosen": -0.20287248492240906,
"rewards/margins": 0.8104608058929443,
"rewards/rejected": -1.0133334398269653,
"step": 1500
},
{
"epoch": 2.244039270687237,
"eval_logits/chosen": -3.360283613204956,
"eval_logits/rejected": -3.1080915927886963,
"eval_logps/chosen": -276.3040466308594,
"eval_logps/rejected": -239.7833251953125,
"eval_loss": 0.6368128657341003,
"eval_rewards/accuracies": 0.6351907253265381,
"eval_rewards/chosen": -0.44220101833343506,
"eval_rewards/margins": 0.4291093647480011,
"eval_rewards/rejected": -0.871310293674469,
"eval_runtime": 1686.3289,
"eval_samples_per_second": 3.171,
"eval_steps_per_second": 3.171,
"step": 1500
},
{
"epoch": 2.2889200561009817,
"grad_norm": 6.016663551330566,
"learning_rate": 2.3652694610778445e-05,
"logits/chosen": -3.3569111824035645,
"logits/rejected": -3.123525857925415,
"logps/chosen": -274.6582946777344,
"logps/rejected": -241.02252197265625,
"loss": 0.4577,
"rewards/accuracies": 0.7947916388511658,
"rewards/chosen": -0.20317865908145905,
"rewards/margins": 0.8162151575088501,
"rewards/rejected": -1.0193939208984375,
"step": 1530
},
{
"epoch": 2.3338008415147264,
"grad_norm": 5.684780120849609,
"learning_rate": 2.2155688622754492e-05,
"logits/chosen": -3.3533644676208496,
"logits/rejected": -3.146190881729126,
"logps/chosen": -271.4990234375,
"logps/rejected": -242.20486450195312,
"loss": 0.4674,
"rewards/accuracies": 0.7989583611488342,
"rewards/chosen": -0.12297000735998154,
"rewards/margins": 0.8095114827156067,
"rewards/rejected": -0.9324816465377808,
"step": 1560
},
{
"epoch": 2.378681626928471,
"grad_norm": 7.419367790222168,
"learning_rate": 2.065868263473054e-05,
"logits/chosen": -3.364116907119751,
"logits/rejected": -3.092254638671875,
"logps/chosen": -270.5090026855469,
"logps/rejected": -237.64964294433594,
"loss": 0.4443,
"rewards/accuracies": 0.8177083134651184,
"rewards/chosen": -0.22570447623729706,
"rewards/margins": 0.84433513879776,
"rewards/rejected": -1.0700395107269287,
"step": 1590
},
{
"epoch": 2.393641888733053,
"eval_logits/chosen": -3.354207992553711,
"eval_logits/rejected": -3.103837013244629,
"eval_logps/chosen": -276.166015625,
"eval_logps/rejected": -239.59542846679688,
"eval_loss": 0.6390828490257263,
"eval_rewards/accuracies": 0.6344428062438965,
"eval_rewards/chosen": -0.4283973276615143,
"eval_rewards/margins": 0.42412257194519043,
"eval_rewards/rejected": -0.8525198101997375,
"eval_runtime": 1685.5337,
"eval_samples_per_second": 3.173,
"eval_steps_per_second": 3.173,
"step": 1600
},
{
"epoch": 2.423562412342216,
"grad_norm": 7.774267196655273,
"learning_rate": 1.916167664670659e-05,
"logits/chosen": -3.355332851409912,
"logits/rejected": -3.1059510707855225,
"logps/chosen": -277.3658752441406,
"logps/rejected": -247.025146484375,
"loss": 0.4466,
"rewards/accuracies": 0.8052083253860474,
"rewards/chosen": -0.20841935276985168,
"rewards/margins": 0.860171377658844,
"rewards/rejected": -1.0685906410217285,
"step": 1620
},
{
"epoch": 2.4684431977559607,
"grad_norm": 7.505038738250732,
"learning_rate": 1.7664670658682637e-05,
"logits/chosen": -3.345045804977417,
"logits/rejected": -3.1149702072143555,
"logps/chosen": -278.6654968261719,
"logps/rejected": -250.63827514648438,
"loss": 0.4452,
"rewards/accuracies": 0.828125,
"rewards/chosen": -0.24601925909519196,
"rewards/margins": 0.8955973982810974,
"rewards/rejected": -1.141616702079773,
"step": 1650
},
{
"epoch": 2.5133239831697054,
"grad_norm": 7.055671215057373,
"learning_rate": 1.6167664670658684e-05,
"logits/chosen": -3.3556442260742188,
"logits/rejected": -3.086568593978882,
"logps/chosen": -275.1831359863281,
"logps/rejected": -240.82598876953125,
"loss": 0.4564,
"rewards/accuracies": 0.8052083253860474,
"rewards/chosen": -0.29922303557395935,
"rewards/margins": 0.8765833377838135,
"rewards/rejected": -1.1758064031600952,
"step": 1680
},
{
"epoch": 2.5432445067788687,
"eval_logits/chosen": -3.349193811416626,
"eval_logits/rejected": -3.098674774169922,
"eval_logps/chosen": -277.01483154296875,
"eval_logps/rejected": -240.51805114746094,
"eval_loss": 0.642005980014801,
"eval_rewards/accuracies": 0.6297681331634521,
"eval_rewards/chosen": -0.5132736563682556,
"eval_rewards/margins": 0.4315095543861389,
"eval_rewards/rejected": -0.9447831511497498,
"eval_runtime": 1686.0549,
"eval_samples_per_second": 3.172,
"eval_steps_per_second": 3.172,
"step": 1700
},
{
"epoch": 2.55820476858345,
"grad_norm": 7.247310638427734,
"learning_rate": 1.467065868263473e-05,
"logits/chosen": -3.3303916454315186,
"logits/rejected": -3.118966817855835,
"logps/chosen": -276.04510498046875,
"logps/rejected": -250.57984924316406,
"loss": 0.4615,
"rewards/accuracies": 0.8072916865348816,
"rewards/chosen": -0.27220281958580017,
"rewards/margins": 0.8313066363334656,
"rewards/rejected": -1.103509545326233,
"step": 1710
},
{
"epoch": 2.603085553997195,
"grad_norm": 6.719433784484863,
"learning_rate": 1.317365269461078e-05,
"logits/chosen": -3.3551132678985596,
"logits/rejected": -3.1186678409576416,
"logps/chosen": -277.4861755371094,
"logps/rejected": -251.39437866210938,
"loss": 0.455,
"rewards/accuracies": 0.8031250238418579,
"rewards/chosen": -0.23053057491779327,
"rewards/margins": 0.8569380640983582,
"rewards/rejected": -1.0874686241149902,
"step": 1740
},
{
"epoch": 2.6479663394109396,
"grad_norm": 6.049899101257324,
"learning_rate": 1.1676646706586828e-05,
"logits/chosen": -3.3462002277374268,
"logits/rejected": -3.0950281620025635,
"logps/chosen": -279.08447265625,
"logps/rejected": -243.8069305419922,
"loss": 0.4414,
"rewards/accuracies": 0.8072916865348816,
"rewards/chosen": -0.24068358540534973,
"rewards/margins": 0.8998420238494873,
"rewards/rejected": -1.1405255794525146,
"step": 1770
},
{
"epoch": 2.6928471248246844,
"grad_norm": 7.545809268951416,
"learning_rate": 1.0179640718562875e-05,
"logits/chosen": -3.346256732940674,
"logits/rejected": -3.112372875213623,
"logps/chosen": -270.18499755859375,
"logps/rejected": -240.69723510742188,
"loss": 0.4603,
"rewards/accuracies": 0.8083333373069763,
"rewards/chosen": -0.25966814160346985,
"rewards/margins": 0.8120385408401489,
"rewards/rejected": -1.071706771850586,
"step": 1800
},
{
"epoch": 2.6928471248246844,
"eval_logits/chosen": -3.3438971042633057,
"eval_logits/rejected": -3.0931475162506104,
"eval_logps/chosen": -276.7391662597656,
"eval_logps/rejected": -240.1473388671875,
"eval_loss": 0.6427502036094666,
"eval_rewards/accuracies": 0.6297681331634521,
"eval_rewards/chosen": -0.48571139574050903,
"eval_rewards/margins": 0.4220017194747925,
"eval_rewards/rejected": -0.9077131152153015,
"eval_runtime": 1686.025,
"eval_samples_per_second": 3.172,
"eval_steps_per_second": 3.172,
"step": 1800
},
{
"epoch": 2.737727910238429,
"grad_norm": 5.611355304718018,
"learning_rate": 8.682634730538922e-06,
"logits/chosen": -3.347557306289673,
"logits/rejected": -3.109966993331909,
"logps/chosen": -275.6930236816406,
"logps/rejected": -247.47406005859375,
"loss": 0.4457,
"rewards/accuracies": 0.8291666507720947,
"rewards/chosen": -0.3010416030883789,
"rewards/margins": 0.8664290308952332,
"rewards/rejected": -1.1674706935882568,
"step": 1830
},
{
"epoch": 2.782608695652174,
"grad_norm": 8.53209114074707,
"learning_rate": 7.18562874251497e-06,
"logits/chosen": -3.3400204181671143,
"logits/rejected": -3.103865623474121,
"logps/chosen": -285.2029724121094,
"logps/rejected": -255.09078979492188,
"loss": 0.4524,
"rewards/accuracies": 0.8062499761581421,
"rewards/chosen": -0.22924675047397614,
"rewards/margins": 0.8349610567092896,
"rewards/rejected": -1.0642077922821045,
"step": 1860
},
{
"epoch": 2.8274894810659186,
"grad_norm": 7.011772155761719,
"learning_rate": 5.688622754491018e-06,
"logits/chosen": -3.3375208377838135,
"logits/rejected": -3.0882365703582764,
"logps/chosen": -269.7694091796875,
"logps/rejected": -238.83042907714844,
"loss": 0.4511,
"rewards/accuracies": 0.8031250238418579,
"rewards/chosen": -0.3288494944572449,
"rewards/margins": 0.8713601231575012,
"rewards/rejected": -1.2002094984054565,
"step": 1890
},
{
"epoch": 2.8424497428705005,
"eval_logits/chosen": -3.3421826362609863,
"eval_logits/rejected": -3.0923619270324707,
"eval_logps/chosen": -277.226806640625,
"eval_logps/rejected": -240.6798858642578,
"eval_loss": 0.6432516574859619,
"eval_rewards/accuracies": 0.6295811533927917,
"eval_rewards/chosen": -0.5344744324684143,
"eval_rewards/margins": 0.42649218440055847,
"eval_rewards/rejected": -0.9609667062759399,
"eval_runtime": 1686.3389,
"eval_samples_per_second": 3.171,
"eval_steps_per_second": 3.171,
"step": 1900
},
{
"epoch": 2.8723702664796633,
"grad_norm": 7.099593162536621,
"learning_rate": 4.191616766467066e-06,
"logits/chosen": -3.359609365463257,
"logits/rejected": -3.0945444107055664,
"logps/chosen": -280.75030517578125,
"logps/rejected": -245.13504028320312,
"loss": 0.4418,
"rewards/accuracies": 0.8197916746139526,
"rewards/chosen": -0.30151474475860596,
"rewards/margins": 0.8953721523284912,
"rewards/rejected": -1.1968867778778076,
"step": 1920
},
{
"epoch": 2.917251051893408,
"grad_norm": 7.788060188293457,
"learning_rate": 2.6946107784431138e-06,
"logits/chosen": -3.3403496742248535,
"logits/rejected": -3.091184139251709,
"logps/chosen": -280.9390869140625,
"logps/rejected": -247.351806640625,
"loss": 0.444,
"rewards/accuracies": 0.8302083611488342,
"rewards/chosen": -0.2591714859008789,
"rewards/margins": 0.885311484336853,
"rewards/rejected": -1.1444830894470215,
"step": 1950
},
{
"epoch": 2.962131837307153,
"grad_norm": 7.973437786102295,
"learning_rate": 1.1976047904191619e-06,
"logits/chosen": -3.328507900238037,
"logits/rejected": -3.088214635848999,
"logps/chosen": -271.0534362792969,
"logps/rejected": -242.70079040527344,
"loss": 0.4531,
"rewards/accuracies": 0.815625011920929,
"rewards/chosen": -0.357342392206192,
"rewards/margins": 0.865265429019928,
"rewards/rejected": -1.2226077318191528,
"step": 1980
},
{
"epoch": 2.992052360916316,
"eval_logits/chosen": -3.342743158340454,
"eval_logits/rejected": -3.0929155349731445,
"eval_logps/chosen": -277.3058166503906,
"eval_logps/rejected": -240.79270935058594,
"eval_loss": 0.6429719924926758,
"eval_rewards/accuracies": 0.6299551129341125,
"eval_rewards/chosen": -0.5423800349235535,
"eval_rewards/margins": 0.42986956238746643,
"eval_rewards/rejected": -0.9722495079040527,
"eval_runtime": 1684.4785,
"eval_samples_per_second": 3.175,
"eval_steps_per_second": 3.175,
"step": 2000
}
],
"logging_steps": 30,
"max_steps": 2004,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}