|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 100, |
|
"global_step": 1065, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.3852074765983984, |
|
"learning_rate": 4.672897196261682e-08, |
|
"logits/chosen": -2.4213736057281494, |
|
"logits/rejected": -2.1724228858947754, |
|
"logps/chosen": -311.7572021484375, |
|
"logps/rejected": -242.86618041992188, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/margins_max": 0.0, |
|
"rewards/margins_min": 0.0, |
|
"rewards/margins_std": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.3445286792409266, |
|
"learning_rate": 4.6728971962616824e-07, |
|
"logits/chosen": -2.4368515014648438, |
|
"logits/rejected": -2.2642922401428223, |
|
"logps/chosen": -307.2416076660156, |
|
"logps/rejected": -312.71978759765625, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5555555820465088, |
|
"rewards/chosen": 0.00042119898716919124, |
|
"rewards/margins": 0.0008334853337146342, |
|
"rewards/margins_max": 0.00408088369295001, |
|
"rewards/margins_min": -0.001809651032090187, |
|
"rewards/margins_std": 0.0026596880052238703, |
|
"rewards/rejected": -0.0004122863756492734, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2.711998547730859, |
|
"learning_rate": 9.345794392523365e-07, |
|
"logits/chosen": -2.3758203983306885, |
|
"logits/rejected": -2.2394745349884033, |
|
"logps/chosen": -266.3940734863281, |
|
"logps/rejected": -250.22067260742188, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.00042475899681448936, |
|
"rewards/margins": 0.004263690672814846, |
|
"rewards/margins_max": 0.010423297993838787, |
|
"rewards/margins_min": -0.000582061184104532, |
|
"rewards/margins_std": 0.004974424839019775, |
|
"rewards/rejected": -0.003838931443169713, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 3.331840625958437, |
|
"learning_rate": 1.4018691588785047e-06, |
|
"logits/chosen": -2.4345130920410156, |
|
"logits/rejected": -2.2301573753356934, |
|
"logps/chosen": -277.0610046386719, |
|
"logps/rejected": -270.2976989746094, |
|
"loss": 0.6848, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.0029614921659231186, |
|
"rewards/margins": 0.016091803088784218, |
|
"rewards/margins_max": 0.030258700251579285, |
|
"rewards/margins_min": 0.004859076347202063, |
|
"rewards/margins_std": 0.011583611369132996, |
|
"rewards/rejected": -0.01313030906021595, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 2.5069076474331458, |
|
"learning_rate": 1.869158878504673e-06, |
|
"logits/chosen": -2.5057766437530518, |
|
"logits/rejected": -2.2820587158203125, |
|
"logps/chosen": -263.85906982421875, |
|
"logps/rejected": -258.41522216796875, |
|
"loss": 0.6746, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.0037085427902638912, |
|
"rewards/margins": 0.030518993735313416, |
|
"rewards/margins_max": 0.0725535899400711, |
|
"rewards/margins_min": 0.0008986728498712182, |
|
"rewards/margins_std": 0.03197382763028145, |
|
"rewards/rejected": -0.026810448616743088, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 3.2621789299254584, |
|
"learning_rate": 2.3364485981308413e-06, |
|
"logits/chosen": -2.432992935180664, |
|
"logits/rejected": -2.222877025604248, |
|
"logps/chosen": -256.1089782714844, |
|
"logps/rejected": -280.87469482421875, |
|
"loss": 0.6526, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.0002926398010458797, |
|
"rewards/margins": 0.09236637502908707, |
|
"rewards/margins_max": 0.2024260014295578, |
|
"rewards/margins_min": 0.017648588865995407, |
|
"rewards/margins_std": 0.08838540315628052, |
|
"rewards/rejected": -0.09207373112440109, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 3.2693868625712255, |
|
"learning_rate": 2.8037383177570094e-06, |
|
"logits/chosen": -2.3796916007995605, |
|
"logits/rejected": -2.216627836227417, |
|
"logps/chosen": -292.62139892578125, |
|
"logps/rejected": -324.89324951171875, |
|
"loss": 0.6222, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.0356663353741169, |
|
"rewards/margins": 0.15710246562957764, |
|
"rewards/margins_max": 0.3139253556728363, |
|
"rewards/margins_min": 0.024519650265574455, |
|
"rewards/margins_std": 0.131367027759552, |
|
"rewards/rejected": -0.19276879727840424, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 2.595351862049113, |
|
"learning_rate": 3.2710280373831774e-06, |
|
"logits/chosen": -2.4088263511657715, |
|
"logits/rejected": -2.2108654975891113, |
|
"logps/chosen": -296.26385498046875, |
|
"logps/rejected": -306.9451599121094, |
|
"loss": 0.5849, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.10243145376443863, |
|
"rewards/margins": 0.22524826228618622, |
|
"rewards/margins_max": 0.517895519733429, |
|
"rewards/margins_min": 0.032067470252513885, |
|
"rewards/margins_std": 0.21916556358337402, |
|
"rewards/rejected": -0.32767972350120544, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 3.072039945720203, |
|
"learning_rate": 3.738317757009346e-06, |
|
"logits/chosen": -2.4255895614624023, |
|
"logits/rejected": -2.304298162460327, |
|
"logps/chosen": -329.9145202636719, |
|
"logps/rejected": -401.10516357421875, |
|
"loss": 0.5333, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.2832830548286438, |
|
"rewards/margins": 0.4214704632759094, |
|
"rewards/margins_max": 0.8972963094711304, |
|
"rewards/margins_min": 0.05010233446955681, |
|
"rewards/margins_std": 0.3841872811317444, |
|
"rewards/rejected": -0.7047535181045532, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 3.642567259744184, |
|
"learning_rate": 4.205607476635514e-06, |
|
"logits/chosen": -2.224909782409668, |
|
"logits/rejected": -2.080477476119995, |
|
"logps/chosen": -290.53643798828125, |
|
"logps/rejected": -332.0419616699219, |
|
"loss": 0.4756, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.42501896619796753, |
|
"rewards/margins": 0.5945996046066284, |
|
"rewards/margins_max": 1.4964139461517334, |
|
"rewards/margins_min": 0.0675990879535675, |
|
"rewards/margins_std": 0.6498464941978455, |
|
"rewards/rejected": -1.0196186304092407, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 3.345534089333945, |
|
"learning_rate": 4.6728971962616825e-06, |
|
"logits/chosen": -2.17991042137146, |
|
"logits/rejected": -2.0627541542053223, |
|
"logps/chosen": -351.88580322265625, |
|
"logps/rejected": -475.9768981933594, |
|
"loss": 0.4256, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.875613808631897, |
|
"rewards/margins": 0.8630256652832031, |
|
"rewards/margins_max": 2.0646145343780518, |
|
"rewards/margins_min": 0.07761440426111221, |
|
"rewards/margins_std": 0.921543300151825, |
|
"rewards/rejected": -1.7386394739151, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_logits/chosen": -2.0750787258148193, |
|
"eval_logits/rejected": -1.9770790338516235, |
|
"eval_logps/chosen": -455.095947265625, |
|
"eval_logps/rejected": -453.3454895019531, |
|
"eval_loss": 0.8162721991539001, |
|
"eval_rewards/accuracies": 0.5609999895095825, |
|
"eval_rewards/chosen": -1.802152395248413, |
|
"eval_rewards/margins": 0.15609782934188843, |
|
"eval_rewards/margins_max": 2.204866886138916, |
|
"eval_rewards/margins_min": -1.819094181060791, |
|
"eval_rewards/margins_std": 1.3259263038635254, |
|
"eval_rewards/rejected": -1.9582501649856567, |
|
"eval_runtime": 738.2434, |
|
"eval_samples_per_second": 2.709, |
|
"eval_steps_per_second": 0.169, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 3.790495737109387, |
|
"learning_rate": 4.999879018839288e-06, |
|
"logits/chosen": -2.319441318511963, |
|
"logits/rejected": -2.0694375038146973, |
|
"logps/chosen": -491.3214416503906, |
|
"logps/rejected": -595.2050170898438, |
|
"loss": 0.3726, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.4289052486419678, |
|
"rewards/margins": 1.2257483005523682, |
|
"rewards/margins_max": 2.6363110542297363, |
|
"rewards/margins_min": -0.0019395649433135986, |
|
"rewards/margins_std": 1.1995513439178467, |
|
"rewards/rejected": -2.654653787612915, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 6.4022099851607335, |
|
"learning_rate": 4.99772856836941e-06, |
|
"logits/chosen": -2.0878264904022217, |
|
"logits/rejected": -1.9656604528427124, |
|
"logps/chosen": -540.1173095703125, |
|
"logps/rejected": -716.8549194335938, |
|
"loss": 0.3017, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.161752462387085, |
|
"rewards/margins": 1.9279537200927734, |
|
"rewards/margins_max": 3.7465851306915283, |
|
"rewards/margins_min": 0.2842390835285187, |
|
"rewards/margins_std": 1.603005051612854, |
|
"rewards/rejected": -4.089705944061279, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 3.8141114628331887, |
|
"learning_rate": 4.992892309373227e-06, |
|
"logits/chosen": -1.9616358280181885, |
|
"logits/rejected": -1.7907485961914062, |
|
"logps/chosen": -607.4080810546875, |
|
"logps/rejected": -861.5751953125, |
|
"loss": 0.2862, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.5392794609069824, |
|
"rewards/margins": 2.649423122406006, |
|
"rewards/margins_max": 5.628636360168457, |
|
"rewards/margins_min": 0.41630443930625916, |
|
"rewards/margins_std": 2.4051852226257324, |
|
"rewards/rejected": -5.188702583312988, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 18.15292585345447, |
|
"learning_rate": 4.985375442281969e-06, |
|
"logits/chosen": -1.8700624704360962, |
|
"logits/rejected": -1.7391315698623657, |
|
"logps/chosen": -530.605224609375, |
|
"logps/rejected": -826.2467651367188, |
|
"loss": 0.261, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -2.36662220954895, |
|
"rewards/margins": 2.824148654937744, |
|
"rewards/margins_max": 6.096582889556885, |
|
"rewards/margins_min": 0.574691653251648, |
|
"rewards/margins_std": 2.5261497497558594, |
|
"rewards/rejected": -5.190770626068115, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 4.549040288633133, |
|
"learning_rate": 4.9751860499858175e-06, |
|
"logits/chosen": -1.6621425151824951, |
|
"logits/rejected": -1.5595060586929321, |
|
"logps/chosen": -543.5266723632812, |
|
"logps/rejected": -745.6972045898438, |
|
"loss": 0.2821, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -2.752904176712036, |
|
"rewards/margins": 2.2729761600494385, |
|
"rewards/margins_max": 4.967066764831543, |
|
"rewards/margins_min": 0.14136430621147156, |
|
"rewards/margins_std": 2.177089214324951, |
|
"rewards/rejected": -5.025879859924316, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 5.567321059696331, |
|
"learning_rate": 4.962335089142376e-06, |
|
"logits/chosen": -1.684842824935913, |
|
"logits/rejected": -1.5501009225845337, |
|
"logps/chosen": -474.55108642578125, |
|
"logps/rejected": -770.0437622070312, |
|
"loss": 0.2122, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.0399696826934814, |
|
"rewards/margins": 2.976529359817505, |
|
"rewards/margins_max": 6.136769771575928, |
|
"rewards/margins_min": 0.4981708526611328, |
|
"rewards/margins_std": 2.5306599140167236, |
|
"rewards/rejected": -5.016499042510986, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 10.10370874579834, |
|
"learning_rate": 4.946836378394967e-06, |
|
"logits/chosen": -1.719364881515503, |
|
"logits/rejected": -1.5284518003463745, |
|
"logps/chosen": -491.3282775878906, |
|
"logps/rejected": -959.0675659179688, |
|
"loss": 0.2013, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -1.9167028665542603, |
|
"rewards/margins": 4.33759069442749, |
|
"rewards/margins_max": 8.84797191619873, |
|
"rewards/margins_min": 1.2430771589279175, |
|
"rewards/margins_std": 3.5163204669952393, |
|
"rewards/rejected": -6.254293918609619, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 5.681771212296982, |
|
"learning_rate": 4.928706583513441e-06, |
|
"logits/chosen": -1.5217533111572266, |
|
"logits/rejected": -1.2059122323989868, |
|
"logps/chosen": -738.7340698242188, |
|
"logps/rejected": -1276.347900390625, |
|
"loss": 0.2247, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -4.31705904006958, |
|
"rewards/margins": 5.126463890075684, |
|
"rewards/margins_max": 10.595057487487793, |
|
"rewards/margins_min": 0.7047984004020691, |
|
"rewards/margins_std": 4.3886613845825195, |
|
"rewards/rejected": -9.443523406982422, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 11.72688194414167, |
|
"learning_rate": 4.907965199473471e-06, |
|
"logits/chosen": -1.5132646560668945, |
|
"logits/rejected": -1.1687037944793701, |
|
"logps/chosen": -706.4207153320312, |
|
"logps/rejected": -1510.3458251953125, |
|
"loss": 0.1628, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -4.151159286499023, |
|
"rewards/margins": 8.210009574890137, |
|
"rewards/margins_max": 16.38364028930664, |
|
"rewards/margins_min": 2.5097246170043945, |
|
"rewards/margins_std": 6.4036359786987305, |
|
"rewards/rejected": -12.361169815063477, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 27.957964269833404, |
|
"learning_rate": 4.884634529493591e-06, |
|
"logits/chosen": -1.494742751121521, |
|
"logits/rejected": -1.0926433801651, |
|
"logps/chosen": -993.9827270507812, |
|
"logps/rejected": -2114.66748046875, |
|
"loss": 0.1591, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -6.776390075683594, |
|
"rewards/margins": 11.342796325683594, |
|
"rewards/margins_max": 26.83356285095215, |
|
"rewards/margins_min": 1.5193722248077393, |
|
"rewards/margins_std": 11.783136367797852, |
|
"rewards/rejected": -18.11918830871582, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_logits/chosen": -1.446925401687622, |
|
"eval_logits/rejected": -1.3460043668746948, |
|
"eval_logps/chosen": -784.6454467773438, |
|
"eval_logps/rejected": -919.6762084960938, |
|
"eval_loss": 1.212246060371399, |
|
"eval_rewards/accuracies": 0.6050000190734863, |
|
"eval_rewards/chosen": -5.097646713256836, |
|
"eval_rewards/margins": 1.5239101648330688, |
|
"eval_rewards/margins_max": 9.997103691101074, |
|
"eval_rewards/margins_min": -4.875259876251221, |
|
"eval_rewards/margins_std": 4.826797008514404, |
|
"eval_rewards/rejected": -6.621557235717773, |
|
"eval_runtime": 739.1046, |
|
"eval_samples_per_second": 2.706, |
|
"eval_steps_per_second": 0.169, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 4.4515073816349, |
|
"learning_rate": 4.858739661052539e-06, |
|
"logits/chosen": -1.375280499458313, |
|
"logits/rejected": -0.9940829277038574, |
|
"logps/chosen": -801.5595703125, |
|
"logps/rejected": -1996.09375, |
|
"loss": 0.1097, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -5.175868034362793, |
|
"rewards/margins": 11.802214622497559, |
|
"rewards/margins_max": 24.37476921081543, |
|
"rewards/margins_min": 3.159264087677002, |
|
"rewards/margins_std": 9.701603889465332, |
|
"rewards/rejected": -16.97808074951172, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 5.583399739036604, |
|
"learning_rate": 4.830308438912687e-06, |
|
"logits/chosen": -1.3399736881256104, |
|
"logits/rejected": -0.8271608352661133, |
|
"logps/chosen": -1234.1002197265625, |
|
"logps/rejected": -2999.682861328125, |
|
"loss": 0.425, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -9.106898307800293, |
|
"rewards/margins": 17.739770889282227, |
|
"rewards/margins_max": 37.61690139770508, |
|
"rewards/margins_min": 2.8995840549468994, |
|
"rewards/margins_std": 16.034954071044922, |
|
"rewards/rejected": -26.846668243408203, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 5.236021050652021, |
|
"learning_rate": 4.799371435178544e-06, |
|
"logits/chosen": -1.6026887893676758, |
|
"logits/rejected": -1.2132550477981567, |
|
"logps/chosen": -873.904296875, |
|
"logps/rejected": -1824.0445556640625, |
|
"loss": 0.1169, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -5.2920122146606445, |
|
"rewards/margins": 9.946649551391602, |
|
"rewards/margins_max": 21.537805557250977, |
|
"rewards/margins_min": 1.2446839809417725, |
|
"rewards/margins_std": 9.166532516479492, |
|
"rewards/rejected": -15.238659858703613, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 6.9079353036860525, |
|
"learning_rate": 4.765961916422575e-06, |
|
"logits/chosen": -1.378777027130127, |
|
"logits/rejected": -0.9224980473518372, |
|
"logps/chosen": -1299.450927734375, |
|
"logps/rejected": -2845.23095703125, |
|
"loss": 0.1933, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -9.927515983581543, |
|
"rewards/margins": 15.248102188110352, |
|
"rewards/margins_max": 35.37622833251953, |
|
"rewards/margins_min": 2.7304091453552246, |
|
"rewards/margins_std": 14.676897048950195, |
|
"rewards/rejected": -25.175617218017578, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 3.479040193067339, |
|
"learning_rate": 4.730115807913627e-06, |
|
"logits/chosen": -1.3043248653411865, |
|
"logits/rejected": -0.9514943957328796, |
|
"logps/chosen": -1031.6715087890625, |
|
"logps/rejected": -2111.33447265625, |
|
"loss": 0.1542, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -7.449245452880859, |
|
"rewards/margins": 10.80849552154541, |
|
"rewards/margins_max": 24.898605346679688, |
|
"rewards/margins_min": 2.4293816089630127, |
|
"rewards/margins_std": 10.350214958190918, |
|
"rewards/rejected": -18.257740020751953, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.6636821106137679, |
|
"learning_rate": 4.691871654986485e-06, |
|
"logits/chosen": -1.3028302192687988, |
|
"logits/rejected": -0.9591180086135864, |
|
"logps/chosen": -1015.4215087890625, |
|
"logps/rejected": -2004.902587890625, |
|
"loss": 0.1454, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -7.468132019042969, |
|
"rewards/margins": 9.605308532714844, |
|
"rewards/margins_max": 19.181324005126953, |
|
"rewards/margins_min": 1.7056152820587158, |
|
"rewards/margins_std": 7.867035865783691, |
|
"rewards/rejected": -17.073440551757812, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 16.019493625748307, |
|
"learning_rate": 4.651270581594054e-06, |
|
"logits/chosen": -1.0485166311264038, |
|
"logits/rejected": -0.6178771257400513, |
|
"logps/chosen": -1578.3748779296875, |
|
"logps/rejected": -3275.0478515625, |
|
"loss": 0.2713, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -12.956689834594727, |
|
"rewards/margins": 16.732524871826172, |
|
"rewards/margins_max": 34.99681854248047, |
|
"rewards/margins_min": 4.456276893615723, |
|
"rewards/margins_std": 13.886381149291992, |
|
"rewards/rejected": -29.689218521118164, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 17.992430896909905, |
|
"learning_rate": 4.6083562460867545e-06, |
|
"logits/chosen": -1.2608745098114014, |
|
"logits/rejected": -0.790873646736145, |
|
"logps/chosen": -1115.44775390625, |
|
"logps/rejected": -2799.135986328125, |
|
"loss": 0.0779, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -8.298356056213379, |
|
"rewards/margins": 16.56003189086914, |
|
"rewards/margins_max": 32.726654052734375, |
|
"rewards/margins_min": 4.10027551651001, |
|
"rewards/margins_std": 13.312190055847168, |
|
"rewards/rejected": -24.858386993408203, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 20.054208913991488, |
|
"learning_rate": 4.563174794266684e-06, |
|
"logits/chosen": -1.0551059246063232, |
|
"logits/rejected": -0.4062129557132721, |
|
"logps/chosen": -1836.141845703125, |
|
"logps/rejected": -4272.1552734375, |
|
"loss": 0.3014, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -15.701252937316895, |
|
"rewards/margins": 24.028852462768555, |
|
"rewards/margins_max": 54.14502716064453, |
|
"rewards/margins_min": 5.250021934509277, |
|
"rewards/margins_std": 22.093597412109375, |
|
"rewards/rejected": -39.73011016845703, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 2.439806702571055, |
|
"learning_rate": 4.5157748097670125e-06, |
|
"logits/chosen": -1.5465186834335327, |
|
"logits/rejected": -1.0866758823394775, |
|
"logps/chosen": -846.8386840820312, |
|
"logps/rejected": -2151.494873046875, |
|
"loss": 0.1126, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -5.326035499572754, |
|
"rewards/margins": 12.93223762512207, |
|
"rewards/margins_max": 28.242706298828125, |
|
"rewards/margins_min": 3.0544486045837402, |
|
"rewards/margins_std": 11.579388618469238, |
|
"rewards/rejected": -18.25827407836914, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_logits/chosen": -1.3141957521438599, |
|
"eval_logits/rejected": -1.213302731513977, |
|
"eval_logps/chosen": -891.159912109375, |
|
"eval_logps/rejected": -1116.3018798828125, |
|
"eval_loss": 1.723042607307434, |
|
"eval_rewards/accuracies": 0.609000027179718, |
|
"eval_rewards/chosen": -6.162792205810547, |
|
"eval_rewards/margins": 2.4250221252441406, |
|
"eval_rewards/margins_max": 18.91021156311035, |
|
"eval_rewards/margins_min": -8.22018814086914, |
|
"eval_rewards/margins_std": 8.723590850830078, |
|
"eval_rewards/rejected": -8.587814331054688, |
|
"eval_runtime": 739.1854, |
|
"eval_samples_per_second": 2.706, |
|
"eval_steps_per_second": 0.169, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 3.8120942447056025, |
|
"learning_rate": 4.466207261809989e-06, |
|
"logits/chosen": -1.319267749786377, |
|
"logits/rejected": -0.8222479820251465, |
|
"logps/chosen": -1014.4478759765625, |
|
"logps/rejected": -2260.364501953125, |
|
"loss": 0.2071, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -7.412560939788818, |
|
"rewards/margins": 12.69267749786377, |
|
"rewards/margins_max": 31.255285263061523, |
|
"rewards/margins_min": 1.447127103805542, |
|
"rewards/margins_std": 13.759561538696289, |
|
"rewards/rejected": -20.105239868164062, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 47.6233390458397, |
|
"learning_rate": 4.414525450399713e-06, |
|
"logits/chosen": -1.2297580242156982, |
|
"logits/rejected": -0.662652850151062, |
|
"logps/chosen": -1187.2958984375, |
|
"logps/rejected": -3130.074951171875, |
|
"loss": 0.1808, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -8.963447570800781, |
|
"rewards/margins": 19.364063262939453, |
|
"rewards/margins_max": 49.05697250366211, |
|
"rewards/margins_min": 0.8736963272094727, |
|
"rewards/margins_std": 21.952783584594727, |
|
"rewards/rejected": -28.327510833740234, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 8.112447579189505, |
|
"learning_rate": 4.360784949008615e-06, |
|
"logits/chosen": -1.1926701068878174, |
|
"logits/rejected": -0.5003105401992798, |
|
"logps/chosen": -1699.5732421875, |
|
"logps/rejected": -4102.2255859375, |
|
"loss": 0.1992, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -13.697128295898438, |
|
"rewards/margins": 24.271343231201172, |
|
"rewards/margins_max": 48.1143798828125, |
|
"rewards/margins_min": 6.217949390411377, |
|
"rewards/margins_std": 18.776830673217773, |
|
"rewards/rejected": -37.96847152709961, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 2.6961010650119905, |
|
"learning_rate": 4.30504354481929e-06, |
|
"logits/chosen": -1.4151222705841064, |
|
"logits/rejected": -0.9203524589538574, |
|
"logps/chosen": -864.4846801757812, |
|
"logps/rejected": -2527.864013671875, |
|
"loss": 0.1551, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.877382755279541, |
|
"rewards/margins": 16.530858993530273, |
|
"rewards/margins_max": 33.9772834777832, |
|
"rewards/margins_min": 3.8386902809143066, |
|
"rewards/margins_std": 14.032522201538086, |
|
"rewards/rejected": -22.408239364624023, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 6.911698207606949, |
|
"learning_rate": 4.247361176585904e-06, |
|
"logits/chosen": -1.2823737859725952, |
|
"logits/rejected": -0.8435856103897095, |
|
"logps/chosen": -1273.652587890625, |
|
"logps/rejected": -3208.216064453125, |
|
"loss": 0.2927, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -9.91613483428955, |
|
"rewards/margins": 18.9328670501709, |
|
"rewards/margins_max": 38.30847930908203, |
|
"rewards/margins_min": 6.44967794418335, |
|
"rewards/margins_std": 14.676486015319824, |
|
"rewards/rejected": -28.849002838134766, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 0.20907754692817052, |
|
"learning_rate": 4.187799870182038e-06, |
|
"logits/chosen": -1.4417529106140137, |
|
"logits/rejected": -0.9101096391677856, |
|
"logps/chosen": -1253.6064453125, |
|
"logps/rejected": -3580.76416015625, |
|
"loss": 0.0852, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -9.736591339111328, |
|
"rewards/margins": 23.14864730834961, |
|
"rewards/margins_max": 51.2208251953125, |
|
"rewards/margins_min": 5.839755058288574, |
|
"rewards/margins_std": 21.0888671875, |
|
"rewards/rejected": -32.88523483276367, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 38.435126006464614, |
|
"learning_rate": 4.1264236719042365e-06, |
|
"logits/chosen": -1.4028445482254028, |
|
"logits/rejected": -0.8279297947883606, |
|
"logps/chosen": -1447.45263671875, |
|
"logps/rejected": -3875.93994140625, |
|
"loss": 0.1371, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -11.246275901794434, |
|
"rewards/margins": 24.722837448120117, |
|
"rewards/margins_max": 49.426544189453125, |
|
"rewards/margins_min": 3.9899165630340576, |
|
"rewards/margins_std": 21.102214813232422, |
|
"rewards/rejected": -35.9691162109375, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 5.26142385176066, |
|
"learning_rate": 4.063298579603001e-06, |
|
"logits/chosen": -1.4487351179122925, |
|
"logits/rejected": -0.8309275507926941, |
|
"logps/chosen": -997.6652221679688, |
|
"logps/rejected": -3492.02880859375, |
|
"loss": 0.029, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.381462097167969, |
|
"rewards/margins": 24.83234977722168, |
|
"rewards/margins_max": 55.610748291015625, |
|
"rewards/margins_min": 7.033749580383301, |
|
"rewards/margins_std": 22.912935256958008, |
|
"rewards/rejected": -32.21381378173828, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 2.2541383197386446, |
|
"learning_rate": 3.998492471715272e-06, |
|
"logits/chosen": -1.2715747356414795, |
|
"logits/rejected": -0.8856738805770874, |
|
"logps/chosen": -1461.220703125, |
|
"logps/rejected": -3670.775390625, |
|
"loss": 0.1251, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.813475608825684, |
|
"rewards/margins": 21.913679122924805, |
|
"rewards/margins_max": 46.8076057434082, |
|
"rewards/margins_min": 5.384150505065918, |
|
"rewards/margins_std": 18.93104362487793, |
|
"rewards/rejected": -33.72715759277344, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 1.9849281441200701, |
|
"learning_rate": 3.932075034274723e-06, |
|
"logits/chosen": -1.3581750392913818, |
|
"logits/rejected": -0.8970950245857239, |
|
"logps/chosen": -1317.798095703125, |
|
"logps/rejected": -3307.11083984375, |
|
"loss": 0.074, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -10.455536842346191, |
|
"rewards/margins": 19.807851791381836, |
|
"rewards/margins_max": 41.626365661621094, |
|
"rewards/margins_min": 5.861770153045654, |
|
"rewards/margins_std": 16.298381805419922, |
|
"rewards/rejected": -30.263391494750977, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"eval_logits/chosen": -1.4369679689407349, |
|
"eval_logits/rejected": -1.3244106769561768, |
|
"eval_logps/chosen": -1146.1495361328125, |
|
"eval_logps/rejected": -1451.477783203125, |
|
"eval_loss": 2.0005135536193848, |
|
"eval_rewards/accuracies": 0.621999979019165, |
|
"eval_rewards/chosen": -8.712687492370605, |
|
"eval_rewards/margins": 3.22688364982605, |
|
"eval_rewards/margins_max": 20.153732299804688, |
|
"eval_rewards/margins_min": -9.986676216125488, |
|
"eval_rewards/margins_std": 9.687753677368164, |
|
"eval_rewards/rejected": -11.93957233428955, |
|
"eval_runtime": 739.9676, |
|
"eval_samples_per_second": 2.703, |
|
"eval_steps_per_second": 0.169, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 2.538517914103785, |
|
"learning_rate": 3.864117685978339e-06, |
|
"logits/chosen": -1.3837534189224243, |
|
"logits/rejected": -0.8808004260063171, |
|
"logps/chosen": -1536.3201904296875, |
|
"logps/rejected": -3880.190185546875, |
|
"loss": 0.041, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -12.580427169799805, |
|
"rewards/margins": 23.37277603149414, |
|
"rewards/margins_max": 50.48382568359375, |
|
"rewards/margins_min": 4.341352939605713, |
|
"rewards/margins_std": 21.75898551940918, |
|
"rewards/rejected": -35.95320129394531, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 5.339854465744551, |
|
"learning_rate": 3.794693501389861e-06, |
|
"logits/chosen": -1.0723979473114014, |
|
"logits/rejected": -0.3059498071670532, |
|
"logps/chosen": -2542.86083984375, |
|
"logps/rejected": -6684.94140625, |
|
"loss": 0.0319, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -22.507823944091797, |
|
"rewards/margins": 41.29346466064453, |
|
"rewards/margins_max": 83.78596496582031, |
|
"rewards/margins_min": 9.751798629760742, |
|
"rewards/margins_std": 34.60697937011719, |
|
"rewards/rejected": -63.80128860473633, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 4.806290621145595, |
|
"learning_rate": 3.7238771323626822e-06, |
|
"logits/chosen": -1.1698893308639526, |
|
"logits/rejected": -0.48589786887168884, |
|
"logps/chosen": -2241.239013671875, |
|
"logps/rejected": -5464.1240234375, |
|
"loss": 0.1424, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -19.22347068786621, |
|
"rewards/margins": 32.403419494628906, |
|
"rewards/margins_max": 70.16567993164062, |
|
"rewards/margins_min": 5.948617935180664, |
|
"rewards/margins_std": 29.28656578063965, |
|
"rewards/rejected": -51.62689208984375, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 16.40920174339595, |
|
"learning_rate": 3.651744727766676e-06, |
|
"logits/chosen": -1.252957820892334, |
|
"logits/rejected": -0.6666532158851624, |
|
"logps/chosen": -1763.9290771484375, |
|
"logps/rejected": -4610.29296875, |
|
"loss": 0.1017, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -14.94475269317627, |
|
"rewards/margins": 28.329538345336914, |
|
"rewards/margins_max": 58.97295379638672, |
|
"rewards/margins_min": 7.963846683502197, |
|
"rewards/margins_std": 23.118122100830078, |
|
"rewards/rejected": -43.274295806884766, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 8.291804889185014, |
|
"learning_rate": 3.57837385160529e-06, |
|
"logits/chosen": -1.4508641958236694, |
|
"logits/rejected": -0.9649600982666016, |
|
"logps/chosen": -1113.8978271484375, |
|
"logps/rejected": -2990.294921875, |
|
"loss": 0.0341, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -8.290533065795898, |
|
"rewards/margins": 18.671770095825195, |
|
"rewards/margins_max": 35.884986877441406, |
|
"rewards/margins_min": 5.625349998474121, |
|
"rewards/margins_std": 13.8485689163208, |
|
"rewards/rejected": -26.96230125427246, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 5.207329228568341, |
|
"learning_rate": 3.503843399610941e-06, |
|
"logits/chosen": -1.5748833417892456, |
|
"logits/rejected": -1.0820248126983643, |
|
"logps/chosen": -1375.6676025390625, |
|
"logps/rejected": -3950.961669921875, |
|
"loss": 0.1747, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -10.502653121948242, |
|
"rewards/margins": 25.67014503479004, |
|
"rewards/margins_max": 56.58990478515625, |
|
"rewards/margins_min": 4.733575344085693, |
|
"rewards/margins_std": 23.528743743896484, |
|
"rewards/rejected": -36.17279052734375, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 1.3112038161123905, |
|
"learning_rate": 3.4282335144083985e-06, |
|
"logits/chosen": -1.4795830249786377, |
|
"logits/rejected": -0.9518693089485168, |
|
"logps/chosen": -1163.8775634765625, |
|
"logps/rejected": -3689.42919921875, |
|
"loss": 0.0479, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -8.739542007446289, |
|
"rewards/margins": 25.273595809936523, |
|
"rewards/margins_max": 51.39923858642578, |
|
"rewards/margins_min": 7.340193271636963, |
|
"rewards/margins_std": 19.96113395690918, |
|
"rewards/rejected": -34.01313400268555, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 1.5548232896248495, |
|
"learning_rate": 3.351625499337395e-06, |
|
"logits/chosen": -1.408975601196289, |
|
"logits/rejected": -0.8387428522109985, |
|
"logps/chosen": -1590.7392578125, |
|
"logps/rejected": -4186.083984375, |
|
"loss": 0.1211, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -12.902124404907227, |
|
"rewards/margins": 26.1358699798584, |
|
"rewards/margins_max": 52.00028610229492, |
|
"rewards/margins_min": 7.108595848083496, |
|
"rewards/margins_std": 20.017724990844727, |
|
"rewards/rejected": -39.037994384765625, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 11.945131109643063, |
|
"learning_rate": 3.2741017310271056e-06, |
|
"logits/chosen": -1.4146662950515747, |
|
"logits/rejected": -0.797744631767273, |
|
"logps/chosen": -1408.4781494140625, |
|
"logps/rejected": -4716.1142578125, |
|
"loss": 0.1193, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -11.056276321411133, |
|
"rewards/margins": 32.81745147705078, |
|
"rewards/margins_max": 74.43333435058594, |
|
"rewards/margins_min": 6.130241394042969, |
|
"rewards/margins_std": 30.70904541015625, |
|
"rewards/rejected": -43.87372589111328, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 8.866730611852605, |
|
"learning_rate": 3.195745570816532e-06, |
|
"logits/chosen": -1.5317000150680542, |
|
"logits/rejected": -1.1303898096084595, |
|
"logps/chosen": -1008.9549560546875, |
|
"logps/rejected": -2925.121337890625, |
|
"loss": 0.0551, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -7.364511013031006, |
|
"rewards/margins": 19.03972625732422, |
|
"rewards/margins_max": 39.17045211791992, |
|
"rewards/margins_min": 5.060760974884033, |
|
"rewards/margins_std": 15.347724914550781, |
|
"rewards/rejected": -26.40423583984375, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_logits/chosen": -1.4257258176803589, |
|
"eval_logits/rejected": -1.2957897186279297, |
|
"eval_logps/chosen": -1318.13232421875, |
|
"eval_logps/rejected": -1773.228271484375, |
|
"eval_loss": 2.656811475753784, |
|
"eval_rewards/accuracies": 0.6259999871253967, |
|
"eval_rewards/chosen": -10.432517051696777, |
|
"eval_rewards/margins": 4.7245612144470215, |
|
"eval_rewards/margins_max": 28.604450225830078, |
|
"eval_rewards/margins_min": -13.697463989257812, |
|
"eval_rewards/margins_std": 13.803962707519531, |
|
"eval_rewards/rejected": -15.15707778930664, |
|
"eval_runtime": 740.0178, |
|
"eval_samples_per_second": 2.703, |
|
"eval_steps_per_second": 0.169, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 4.416696704306515, |
|
"learning_rate": 3.116641275116018e-06, |
|
"logits/chosen": -1.3493144512176514, |
|
"logits/rejected": -0.7629820704460144, |
|
"logps/chosen": -1329.723388671875, |
|
"logps/rejected": -4176.525390625, |
|
"loss": 0.079, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -10.125879287719727, |
|
"rewards/margins": 28.669696807861328, |
|
"rewards/margins_max": 62.80031204223633, |
|
"rewards/margins_min": 7.63208532333374, |
|
"rewards/margins_std": 25.432636260986328, |
|
"rewards/rejected": -38.79557418823242, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 16.45913444533014, |
|
"learning_rate": 3.0368739048062956e-06, |
|
"logits/chosen": -1.283140778541565, |
|
"logits/rejected": -0.6157187819480896, |
|
"logps/chosen": -1691.1859130859375, |
|
"logps/rejected": -5241.02587890625, |
|
"loss": 0.0912, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -14.164215087890625, |
|
"rewards/margins": 35.53845977783203, |
|
"rewards/margins_max": 76.7809066772461, |
|
"rewards/margins_min": 7.163271903991699, |
|
"rewards/margins_std": 31.75979995727539, |
|
"rewards/rejected": -49.702667236328125, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 0.20699893951201953, |
|
"learning_rate": 2.956529233772492e-06, |
|
"logits/chosen": -1.337871789932251, |
|
"logits/rejected": -0.6666404008865356, |
|
"logps/chosen": -1663.0726318359375, |
|
"logps/rejected": -5488.6357421875, |
|
"loss": 0.0491, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -13.510289192199707, |
|
"rewards/margins": 38.012184143066406, |
|
"rewards/margins_max": 82.3269271850586, |
|
"rewards/margins_min": 9.285993576049805, |
|
"rewards/margins_std": 33.284690856933594, |
|
"rewards/rejected": -51.52248001098633, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 5.250766698166954, |
|
"learning_rate": 2.8756936566714317e-06, |
|
"logits/chosen": -1.2444673776626587, |
|
"logits/rejected": -0.6022458672523499, |
|
"logps/chosen": -2035.296875, |
|
"logps/rejected": -5392.39453125, |
|
"loss": 0.1281, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -17.186960220336914, |
|
"rewards/margins": 33.95701217651367, |
|
"rewards/margins_max": 71.49807739257812, |
|
"rewards/margins_min": 9.499161720275879, |
|
"rewards/margins_std": 27.914098739624023, |
|
"rewards/rejected": -51.14397430419922, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 2.140750073865154, |
|
"learning_rate": 2.794454096031429e-06, |
|
"logits/chosen": -1.3312510251998901, |
|
"logits/rejected": -0.5831511616706848, |
|
"logps/chosen": -1793.1119384765625, |
|
"logps/rejected": -6092.6904296875, |
|
"loss": 0.0615, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -14.43737506866455, |
|
"rewards/margins": 42.91600036621094, |
|
"rewards/margins_max": 97.83891296386719, |
|
"rewards/margins_min": 9.349691390991211, |
|
"rewards/margins_std": 40.1955451965332, |
|
"rewards/rejected": -57.35337448120117, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 2.5243522946384265, |
|
"learning_rate": 2.71289790878446e-06, |
|
"logits/chosen": -1.369600534439087, |
|
"logits/rejected": -0.7785685658454895, |
|
"logps/chosen": -1457.351318359375, |
|
"logps/rejected": -4937.0205078125, |
|
"loss": 0.061, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -11.771065711975098, |
|
"rewards/margins": 34.56814956665039, |
|
"rewards/margins_max": 73.30443572998047, |
|
"rewards/margins_min": 9.829178810119629, |
|
"rewards/margins_std": 29.120563507080078, |
|
"rewards/rejected": -46.339210510253906, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 21.43574111885225, |
|
"learning_rate": 2.6311127923312156e-06, |
|
"logits/chosen": -1.2619574069976807, |
|
"logits/rejected": -0.5961970090866089, |
|
"logps/chosen": -2181.08935546875, |
|
"logps/rejected": -6657.7587890625, |
|
"loss": 0.1197, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -18.670825958251953, |
|
"rewards/margins": 44.561912536621094, |
|
"rewards/margins_max": 88.48506164550781, |
|
"rewards/margins_min": 7.3762311935424805, |
|
"rewards/margins_std": 36.8846435546875, |
|
"rewards/rejected": -63.23274612426758, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 7.871582743030215, |
|
"learning_rate": 2.549186690240057e-06, |
|
"logits/chosen": -1.2399415969848633, |
|
"logits/rejected": -0.647193431854248, |
|
"logps/chosen": -1575.2166748046875, |
|
"logps/rejected": -5238.3876953125, |
|
"loss": 0.0476, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -13.128217697143555, |
|
"rewards/margins": 36.19137191772461, |
|
"rewards/margins_max": 77.69859313964844, |
|
"rewards/margins_min": 8.904541015625, |
|
"rewards/margins_std": 31.661495208740234, |
|
"rewards/rejected": -49.3195915222168, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"grad_norm": 3.101951593932612, |
|
"learning_rate": 2.4672076976812548e-06, |
|
"logits/chosen": -1.310465693473816, |
|
"logits/rejected": -0.7587507963180542, |
|
"logps/chosen": -1462.403564453125, |
|
"logps/rejected": -4328.84130859375, |
|
"loss": 0.1237, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -11.766267776489258, |
|
"rewards/margins": 28.488006591796875, |
|
"rewards/margins_max": 57.8286247253418, |
|
"rewards/margins_min": 7.110069274902344, |
|
"rewards/margins_std": 22.86618423461914, |
|
"rewards/rejected": -40.254276275634766, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.3852639666982218e-06, |
|
"logits/chosen": -1.1802486181259155, |
|
"logits/rejected": -0.5148967504501343, |
|
"logps/chosen": -1528.84228515625, |
|
"logps/rejected": -5615.95703125, |
|
"loss": 0.169, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -12.585975646972656, |
|
"rewards/margins": 40.26185989379883, |
|
"rewards/margins_max": 87.59795379638672, |
|
"rewards/margins_min": 7.060022830963135, |
|
"rewards/margins_std": 37.70138931274414, |
|
"rewards/rejected": -52.84783935546875, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"eval_logits/chosen": -1.1529114246368408, |
|
"eval_logits/rejected": -1.0369744300842285, |
|
"eval_logps/chosen": -1772.8465576171875, |
|
"eval_logps/rejected": -2317.167724609375, |
|
"eval_loss": 3.7088778018951416, |
|
"eval_rewards/accuracies": 0.6159999966621399, |
|
"eval_rewards/chosen": -14.979656219482422, |
|
"eval_rewards/margins": 5.616815567016602, |
|
"eval_rewards/margins_max": 36.04051208496094, |
|
"eval_rewards/margins_min": -19.89307975769043, |
|
"eval_rewards/margins_std": 18.072784423828125, |
|
"eval_rewards/rejected": -20.596471786499023, |
|
"eval_runtime": 739.6295, |
|
"eval_samples_per_second": 2.704, |
|
"eval_steps_per_second": 0.169, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 0.41834345626108355, |
|
"learning_rate": 2.303443611417584e-06, |
|
"logits/chosen": -1.282365083694458, |
|
"logits/rejected": -0.7108389139175415, |
|
"logps/chosen": -1314.89794921875, |
|
"logps/rejected": -4228.2021484375, |
|
"loss": 0.0614, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -10.093293190002441, |
|
"rewards/margins": 29.427413940429688, |
|
"rewards/margins_max": 62.4293327331543, |
|
"rewards/margins_min": 9.642549514770508, |
|
"rewards/margins_std": 24.211936950683594, |
|
"rewards/rejected": -39.52070617675781, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 6.90289983608622e-05, |
|
"learning_rate": 2.2218346133000264e-06, |
|
"logits/chosen": -1.1936254501342773, |
|
"logits/rejected": -0.497224897146225, |
|
"logps/chosen": -1679.509033203125, |
|
"logps/rejected": -5510.2685546875, |
|
"loss": 0.0599, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -13.5853853225708, |
|
"rewards/margins": 38.249794006347656, |
|
"rewards/margins_max": 87.04119873046875, |
|
"rewards/margins_min": 6.039667129516602, |
|
"rewards/margins_std": 37.55732727050781, |
|
"rewards/rejected": -51.835174560546875, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 6.551057267217207, |
|
"learning_rate": 2.140524726533792e-06, |
|
"logits/chosen": -1.1961278915405273, |
|
"logits/rejected": -0.6267169117927551, |
|
"logps/chosen": -1415.692626953125, |
|
"logps/rejected": -5236.87548828125, |
|
"loss": 0.0962, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -10.874711036682129, |
|
"rewards/margins": 38.274715423583984, |
|
"rewards/margins_max": 77.94049072265625, |
|
"rewards/margins_min": 6.714731693267822, |
|
"rewards/margins_std": 32.879066467285156, |
|
"rewards/rejected": -49.14943313598633, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 6.856262195892754, |
|
"learning_rate": 2.059601383672566e-06, |
|
"logits/chosen": -1.358189344406128, |
|
"logits/rejected": -0.7102182507514954, |
|
"logps/chosen": -1562.2977294921875, |
|
"logps/rejected": -5266.2451171875, |
|
"loss": 0.0188, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -12.751333236694336, |
|
"rewards/margins": 37.24244689941406, |
|
"rewards/margins_max": 71.85942077636719, |
|
"rewards/margins_min": 10.842636108398438, |
|
"rewards/margins_std": 27.61344337463379, |
|
"rewards/rejected": -49.99378204345703, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 21.756292024970186, |
|
"learning_rate": 1.9791516016192214e-06, |
|
"logits/chosen": -1.2505046129226685, |
|
"logits/rejected": -0.5122033357620239, |
|
"logps/chosen": -2004.923095703125, |
|
"logps/rejected": -6630.80859375, |
|
"loss": 0.0833, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -16.955032348632812, |
|
"rewards/margins": 46.02431869506836, |
|
"rewards/margins_max": 99.85273742675781, |
|
"rewards/margins_min": 7.133955478668213, |
|
"rewards/margins_std": 42.62360382080078, |
|
"rewards/rejected": -62.9793586730957, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"grad_norm": 4.783156669269607, |
|
"learning_rate": 1.8992618880565039e-06, |
|
"logits/chosen": -1.26731538772583, |
|
"logits/rejected": -0.6431769132614136, |
|
"logps/chosen": -1309.8038330078125, |
|
"logps/rejected": -4725.5908203125, |
|
"loss": 0.0275, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -10.83329963684082, |
|
"rewards/margins": 33.95838165283203, |
|
"rewards/margins_max": 75.65066528320312, |
|
"rewards/margins_min": 5.828721046447754, |
|
"rewards/margins_std": 32.41167449951172, |
|
"rewards/rejected": -44.79167938232422, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"grad_norm": 5.459662336311476, |
|
"learning_rate": 1.8200181484252888e-06, |
|
"logits/chosen": -1.3983234167099, |
|
"logits/rejected": -0.8143168687820435, |
|
"logps/chosen": -1471.9613037109375, |
|
"logps/rejected": -4750.08203125, |
|
"loss": 0.0401, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -11.914285659790039, |
|
"rewards/margins": 32.79840850830078, |
|
"rewards/margins_max": 64.46766662597656, |
|
"rewards/margins_min": 8.825395584106445, |
|
"rewards/margins_std": 24.9855899810791, |
|
"rewards/rejected": -44.71269226074219, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 65.44842315709371, |
|
"learning_rate": 1.7415055935504234e-06, |
|
"logits/chosen": -1.2928615808486938, |
|
"logits/rejected": -0.5238109230995178, |
|
"logps/chosen": -2183.151123046875, |
|
"logps/rejected": -6693.0283203125, |
|
"loss": 0.0503, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -18.94681167602539, |
|
"rewards/margins": 45.092262268066406, |
|
"rewards/margins_max": 93.51702117919922, |
|
"rewards/margins_min": 9.774969100952148, |
|
"rewards/margins_std": 37.305381774902344, |
|
"rewards/rejected": -64.03907775878906, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"grad_norm": 1.5650985472245555, |
|
"learning_rate": 1.6638086480134954e-06, |
|
"logits/chosen": -1.207524061203003, |
|
"logits/rejected": -0.47601214051246643, |
|
"logps/chosen": -1792.2386474609375, |
|
"logps/rejected": -6021.9873046875, |
|
"loss": 0.1575, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -15.667875289916992, |
|
"rewards/margins": 42.214996337890625, |
|
"rewards/margins_max": 87.77252197265625, |
|
"rewards/margins_min": 9.487606048583984, |
|
"rewards/margins_std": 37.55704879760742, |
|
"rewards/rejected": -57.88287353515625, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"grad_norm": 19.45271789860168, |
|
"learning_rate": 1.5870108593710473e-06, |
|
"logits/chosen": -1.3592132329940796, |
|
"logits/rejected": -0.6936360001564026, |
|
"logps/chosen": -1344.2899169921875, |
|
"logps/rejected": -4588.08251953125, |
|
"loss": 0.0661, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -10.740570068359375, |
|
"rewards/margins": 32.726192474365234, |
|
"rewards/margins_max": 63.50341033935547, |
|
"rewards/margins_min": 8.110437393188477, |
|
"rewards/margins_std": 25.54091453552246, |
|
"rewards/rejected": -43.466758728027344, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_logits/chosen": -1.2633495330810547, |
|
"eval_logits/rejected": -1.1163122653961182, |
|
"eval_logps/chosen": -1868.0721435546875, |
|
"eval_logps/rejected": -2522.087890625, |
|
"eval_loss": 4.195674896240234, |
|
"eval_rewards/accuracies": 0.621999979019165, |
|
"eval_rewards/chosen": -15.931914329528809, |
|
"eval_rewards/margins": 6.713759899139404, |
|
"eval_rewards/margins_max": 41.90719223022461, |
|
"eval_rewards/margins_min": -22.690624237060547, |
|
"eval_rewards/margins_std": 20.96088218688965, |
|
"eval_rewards/rejected": -22.645673751831055, |
|
"eval_runtime": 738.3261, |
|
"eval_samples_per_second": 2.709, |
|
"eval_steps_per_second": 0.169, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 57.39478740350686, |
|
"learning_rate": 1.511194808315853e-06, |
|
"logits/chosen": -1.3517937660217285, |
|
"logits/rejected": -0.7227485775947571, |
|
"logps/chosen": -1058.769775390625, |
|
"logps/rejected": -4626.90625, |
|
"loss": 0.0338, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -8.303699493408203, |
|
"rewards/margins": 35.37030792236328, |
|
"rewards/margins_max": 70.94367218017578, |
|
"rewards/margins_min": 11.402629852294922, |
|
"rewards/margins_std": 27.145639419555664, |
|
"rewards/rejected": -43.67401123046875, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 0.30572900295523964, |
|
"learning_rate": 1.4364420198778662e-06, |
|
"logits/chosen": -1.4744371175765991, |
|
"logits/rejected": -0.6496419906616211, |
|
"logps/chosen": -1649.629638671875, |
|
"logps/rejected": -6396.2255859375, |
|
"loss": 0.0131, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -13.1373929977417, |
|
"rewards/margins": 47.363670349121094, |
|
"rewards/margins_max": 104.21504211425781, |
|
"rewards/margins_min": 10.33763313293457, |
|
"rewards/margins_std": 43.073524475097656, |
|
"rewards/rejected": -60.501068115234375, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 4.84455677819935, |
|
"learning_rate": 1.3628328757603243e-06, |
|
"logits/chosen": -1.1539907455444336, |
|
"logits/rejected": -0.274676114320755, |
|
"logps/chosen": -2860.142578125, |
|
"logps/rejected": -8397.5234375, |
|
"loss": 0.0368, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -25.513622283935547, |
|
"rewards/margins": 55.286590576171875, |
|
"rewards/margins_max": 105.07356262207031, |
|
"rewards/margins_min": 14.340426445007324, |
|
"rewards/margins_std": 41.029579162597656, |
|
"rewards/rejected": -80.80020904541016, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 0.0021241478585518002, |
|
"learning_rate": 1.2904465279052725e-06, |
|
"logits/chosen": -1.1582523584365845, |
|
"logits/rejected": -0.3782978653907776, |
|
"logps/chosen": -2420.54833984375, |
|
"logps/rejected": -7015.25, |
|
"loss": 0.0249, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -21.189661026000977, |
|
"rewards/margins": 45.897850036621094, |
|
"rewards/margins_max": 94.14543914794922, |
|
"rewards/margins_min": 7.491143703460693, |
|
"rewards/margins_std": 39.1116828918457, |
|
"rewards/rejected": -67.08751678466797, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"grad_norm": 0.031617935973131554, |
|
"learning_rate": 1.219360813381446e-06, |
|
"logits/chosen": -1.1664245128631592, |
|
"logits/rejected": -0.344910204410553, |
|
"logps/chosen": -1666.4990234375, |
|
"logps/rejected": -6462.9248046875, |
|
"loss": 0.005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -14.041155815124512, |
|
"rewards/margins": 47.90277862548828, |
|
"rewards/margins_max": 93.52235412597656, |
|
"rewards/margins_min": 12.527769088745117, |
|
"rewards/margins_std": 36.832435607910156, |
|
"rewards/rejected": -61.943939208984375, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"grad_norm": 0.00013460482103239127, |
|
"learning_rate": 1.1496521706860392e-06, |
|
"logits/chosen": -1.1053615808486938, |
|
"logits/rejected": -0.3047180771827698, |
|
"logps/chosen": -1850.0924072265625, |
|
"logps/rejected": -6907.0830078125, |
|
"loss": 0.0315, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -15.86078929901123, |
|
"rewards/margins": 50.24542999267578, |
|
"rewards/margins_max": 114.74400329589844, |
|
"rewards/margins_min": 12.046091079711914, |
|
"rewards/margins_std": 46.68362808227539, |
|
"rewards/rejected": -66.1062240600586, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.0813955575503588e-06, |
|
"logits/chosen": -1.2353287935256958, |
|
"logits/rejected": -0.49261850118637085, |
|
"logps/chosen": -1610.0069580078125, |
|
"logps/rejected": -6046.82470703125, |
|
"loss": 0.0287, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -13.354846000671387, |
|
"rewards/margins": 44.665653228759766, |
|
"rewards/margins_max": 94.2379379272461, |
|
"rewards/margins_min": 14.147542953491211, |
|
"rewards/margins_std": 37.06581115722656, |
|
"rewards/rejected": -58.02050018310547, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 0.22671226780074974, |
|
"learning_rate": 1.0146643703377488e-06, |
|
"logits/chosen": -1.314743161201477, |
|
"logits/rejected": -0.6268264055252075, |
|
"logps/chosen": -1836.8834228515625, |
|
"logps/rejected": -5721.7490234375, |
|
"loss": 0.0229, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -15.416688919067383, |
|
"rewards/margins": 39.22182083129883, |
|
"rewards/margins_max": 87.73088073730469, |
|
"rewards/margins_min": 7.594358921051025, |
|
"rewards/margins_std": 37.2376708984375, |
|
"rewards/rejected": -54.63850784301758, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"grad_norm": 0.14863704144029602, |
|
"learning_rate": 9.495303651204496e-07, |
|
"logits/chosen": -1.2842717170715332, |
|
"logits/rejected": -0.5629429817199707, |
|
"logps/chosen": -1538.7266845703125, |
|
"logps/rejected": -5633.85009765625, |
|
"loss": 0.0131, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -12.397846221923828, |
|
"rewards/margins": 41.08926010131836, |
|
"rewards/margins_max": 78.1208724975586, |
|
"rewards/margins_min": 11.591024398803711, |
|
"rewards/margins_std": 29.9827880859375, |
|
"rewards/rejected": -53.48711013793945, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 0.030764744246518656, |
|
"learning_rate": 8.860635805202616e-07, |
|
"logits/chosen": -1.3080496788024902, |
|
"logits/rejected": -0.35764509439468384, |
|
"logps/chosen": -1737.4599609375, |
|
"logps/rejected": -7442.0615234375, |
|
"loss": 0.0044, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -14.408218383789062, |
|
"rewards/margins": 56.461143493652344, |
|
"rewards/margins_max": 120.32511901855469, |
|
"rewards/margins_min": 16.574840545654297, |
|
"rewards/margins_std": 48.085899353027344, |
|
"rewards/rejected": -70.8693618774414, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"eval_logits/chosen": -1.093600869178772, |
|
"eval_logits/rejected": -0.9370656609535217, |
|
"eval_logps/chosen": -2551.046142578125, |
|
"eval_logps/rejected": -3403.35693359375, |
|
"eval_loss": 5.91084098815918, |
|
"eval_rewards/accuracies": 0.6230000257492065, |
|
"eval_rewards/chosen": -22.761653900146484, |
|
"eval_rewards/margins": 8.696711540222168, |
|
"eval_rewards/margins_max": 56.638038635253906, |
|
"eval_rewards/margins_min": -31.93355941772461, |
|
"eval_rewards/margins_std": 28.60364532470703, |
|
"eval_rewards/rejected": -31.45836639404297, |
|
"eval_runtime": 738.0909, |
|
"eval_samples_per_second": 2.71, |
|
"eval_steps_per_second": 0.169, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"grad_norm": 2.1784962232415586, |
|
"learning_rate": 8.24332262395994e-07, |
|
"logits/chosen": -1.2908947467803955, |
|
"logits/rejected": -0.5336360931396484, |
|
"logps/chosen": -1974.3099365234375, |
|
"logps/rejected": -6812.33740234375, |
|
"loss": 0.014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -16.995155334472656, |
|
"rewards/margins": 48.02867889404297, |
|
"rewards/margins_max": 103.1453628540039, |
|
"rewards/margins_min": 12.88886833190918, |
|
"rewards/margins_std": 41.732765197753906, |
|
"rewards/rejected": -65.02383422851562, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"grad_norm": 0.06615521863125323, |
|
"learning_rate": 7.644027904586587e-07, |
|
"logits/chosen": -1.2683175802230835, |
|
"logits/rejected": -0.41825681924819946, |
|
"logps/chosen": -1580.210693359375, |
|
"logps/rejected": -7023.9873046875, |
|
"loss": 0.0061, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -12.912153244018555, |
|
"rewards/margins": 54.1514778137207, |
|
"rewards/margins_max": 105.11991882324219, |
|
"rewards/margins_min": 14.542474746704102, |
|
"rewards/margins_std": 42.142704010009766, |
|
"rewards/rejected": -67.06363677978516, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"grad_norm": 5.130329059658452, |
|
"learning_rate": 7.06339606893347e-07, |
|
"logits/chosen": -1.0899183750152588, |
|
"logits/rejected": -0.23333916068077087, |
|
"logps/chosen": -2606.93798828125, |
|
"logps/rejected": -8327.251953125, |
|
"loss": 0.0214, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -22.539045333862305, |
|
"rewards/margins": 57.628456115722656, |
|
"rewards/margins_max": 118.33135986328125, |
|
"rewards/margins_min": 17.841205596923828, |
|
"rewards/margins_std": 45.653324127197266, |
|
"rewards/rejected": -80.16749572753906, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"grad_norm": 0.9977784813046777, |
|
"learning_rate": 6.502051470645149e-07, |
|
"logits/chosen": -1.2162657976150513, |
|
"logits/rejected": -0.460705429315567, |
|
"logps/chosen": -2124.04736328125, |
|
"logps/rejected": -6999.44921875, |
|
"loss": 0.0297, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -18.209606170654297, |
|
"rewards/margins": 48.93000793457031, |
|
"rewards/margins_max": 95.8235855102539, |
|
"rewards/margins_min": 14.549077987670898, |
|
"rewards/margins_std": 37.2866096496582, |
|
"rewards/rejected": -67.13961029052734, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"grad_norm": 0.6712946936221619, |
|
"learning_rate": 5.960597723792194e-07, |
|
"logits/chosen": -1.2636550664901733, |
|
"logits/rejected": -0.2811248004436493, |
|
"logps/chosen": -1696.5123291015625, |
|
"logps/rejected": -7235.36328125, |
|
"loss": 0.0267, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -14.254020690917969, |
|
"rewards/margins": 55.350074768066406, |
|
"rewards/margins_max": 116.00260162353516, |
|
"rewards/margins_min": 13.50024700164795, |
|
"rewards/margins_std": 47.108055114746094, |
|
"rewards/rejected": -69.60409545898438, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"grad_norm": 2.1640840935576553, |
|
"learning_rate": 5.43961705380465e-07, |
|
"logits/chosen": -1.2832921743392944, |
|
"logits/rejected": -0.3425557315349579, |
|
"logps/chosen": -2057.831787109375, |
|
"logps/rejected": -7537.8369140625, |
|
"loss": 0.0145, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -17.41219139099121, |
|
"rewards/margins": 55.01224899291992, |
|
"rewards/margins_max": 117.5909194946289, |
|
"rewards/margins_min": 12.145246505737305, |
|
"rewards/margins_std": 48.41987228393555, |
|
"rewards/rejected": -72.42444610595703, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"grad_norm": 85.12949407648767, |
|
"learning_rate": 4.939669671404871e-07, |
|
"logits/chosen": -1.2509949207305908, |
|
"logits/rejected": -0.4620714783668518, |
|
"logps/chosen": -1721.5452880859375, |
|
"logps/rejected": -6441.6572265625, |
|
"loss": 0.0141, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -14.360589981079102, |
|
"rewards/margins": 46.867820739746094, |
|
"rewards/margins_max": 93.87699890136719, |
|
"rewards/margins_min": 13.444913864135742, |
|
"rewards/margins_std": 36.849510192871094, |
|
"rewards/rejected": -61.228416442871094, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.461293170212644e-07, |
|
"logits/chosen": -1.229883074760437, |
|
"logits/rejected": -0.33345091342926025, |
|
"logps/chosen": -2521.58740234375, |
|
"logps/rejected": -7449.078125, |
|
"loss": 0.0109, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -22.13032341003418, |
|
"rewards/margins": 49.36927032470703, |
|
"rewards/margins_max": 106.86543273925781, |
|
"rewards/margins_min": 12.414986610412598, |
|
"rewards/margins_std": 43.098392486572266, |
|
"rewards/rejected": -71.49959564208984, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"grad_norm": 1.1810182556014033, |
|
"learning_rate": 4.005001948670606e-07, |
|
"logits/chosen": -1.272280216217041, |
|
"logits/rejected": -0.45221084356307983, |
|
"logps/chosen": -2259.484130859375, |
|
"logps/rejected": -7555.74365234375, |
|
"loss": 0.0132, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -19.396902084350586, |
|
"rewards/margins": 53.13331985473633, |
|
"rewards/margins_max": 103.8553237915039, |
|
"rewards/margins_min": 17.713207244873047, |
|
"rewards/margins_std": 38.8509407043457, |
|
"rewards/rejected": -72.53022003173828, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"grad_norm": 0.35529843117163307, |
|
"learning_rate": 3.571286656911377e-07, |
|
"logits/chosen": -1.2354185581207275, |
|
"logits/rejected": -0.25159841775894165, |
|
"logps/chosen": -2037.9954833984375, |
|
"logps/rejected": -7491.6142578125, |
|
"loss": 0.011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -17.313739776611328, |
|
"rewards/margins": 54.32684326171875, |
|
"rewards/margins_max": 107.57340240478516, |
|
"rewards/margins_min": 10.841619491577148, |
|
"rewards/margins_std": 45.87627029418945, |
|
"rewards/rejected": -71.64057922363281, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"eval_logits/chosen": -1.0846086740493774, |
|
"eval_logits/rejected": -0.9208425283432007, |
|
"eval_logps/chosen": -2583.26708984375, |
|
"eval_logps/rejected": -3463.187255859375, |
|
"eval_loss": 5.921300888061523, |
|
"eval_rewards/accuracies": 0.6230000257492065, |
|
"eval_rewards/chosen": -23.083864212036133, |
|
"eval_rewards/margins": 8.97280502319336, |
|
"eval_rewards/margins_max": 56.95476531982422, |
|
"eval_rewards/margins_min": -32.0980110168457, |
|
"eval_rewards/margins_std": 28.85976219177246, |
|
"eval_rewards/rejected": -32.05666732788086, |
|
"eval_runtime": 738.68, |
|
"eval_samples_per_second": 2.708, |
|
"eval_steps_per_second": 0.169, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"grad_norm": 0.10081394980985045, |
|
"learning_rate": 3.1606136691612555e-07, |
|
"logits/chosen": -1.226280689239502, |
|
"logits/rejected": -0.4225079119205475, |
|
"logps/chosen": -1866.7056884765625, |
|
"logps/rejected": -6545.6845703125, |
|
"loss": 0.0179, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -15.806465148925781, |
|
"rewards/margins": 47.003273010253906, |
|
"rewards/margins_max": 105.39058685302734, |
|
"rewards/margins_min": 13.083778381347656, |
|
"rewards/margins_std": 42.84505081176758, |
|
"rewards/rejected": -62.80973434448242, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"grad_norm": 1.3123811940509262, |
|
"learning_rate": 2.773424582247844e-07, |
|
"logits/chosen": -1.2208049297332764, |
|
"logits/rejected": -0.3649698495864868, |
|
"logps/chosen": -1686.8743896484375, |
|
"logps/rejected": -6142.4833984375, |
|
"loss": 0.008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -14.011746406555176, |
|
"rewards/margins": 44.98779296875, |
|
"rewards/margins_max": 102.88818359375, |
|
"rewards/margins_min": 11.078018188476562, |
|
"rewards/margins_std": 43.19032669067383, |
|
"rewards/rejected": -58.999542236328125, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"grad_norm": 0.21161148338166058, |
|
"learning_rate": 2.410135740750821e-07, |
|
"logits/chosen": -1.2054741382598877, |
|
"logits/rejected": -0.38507014513015747, |
|
"logps/chosen": -2171.072021484375, |
|
"logps/rejected": -6896.95458984375, |
|
"loss": 0.0157, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -19.08205795288086, |
|
"rewards/margins": 47.25795364379883, |
|
"rewards/margins_max": 104.60585021972656, |
|
"rewards/margins_min": 10.657424926757812, |
|
"rewards/margins_std": 42.776161193847656, |
|
"rewards/rejected": -66.34001922607422, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 0.3466449666433785, |
|
"learning_rate": 2.0711377893064182e-07, |
|
"logits/chosen": -1.23805832862854, |
|
"logits/rejected": -0.43131136894226074, |
|
"logps/chosen": -2135.45068359375, |
|
"logps/rejected": -7015.8134765625, |
|
"loss": 0.014, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -18.11324691772461, |
|
"rewards/margins": 48.709083557128906, |
|
"rewards/margins_max": 110.52925872802734, |
|
"rewards/margins_min": 12.27102279663086, |
|
"rewards/margins_std": 45.295265197753906, |
|
"rewards/rejected": -66.82232666015625, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"grad_norm": 2.4378946425006456, |
|
"learning_rate": 1.756795252547111e-07, |
|
"logits/chosen": -1.2234621047973633, |
|
"logits/rejected": -0.623192310333252, |
|
"logps/chosen": -1561.3631591796875, |
|
"logps/rejected": -4726.6142578125, |
|
"loss": 0.0703, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -13.122468948364258, |
|
"rewards/margins": 31.732192993164062, |
|
"rewards/margins_max": 68.03765869140625, |
|
"rewards/margins_min": 5.950772285461426, |
|
"rewards/margins_std": 28.550399780273438, |
|
"rewards/rejected": -44.85466384887695, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.4674461431281013e-07, |
|
"logits/chosen": -1.3227882385253906, |
|
"logits/rejected": -0.6799469590187073, |
|
"logps/chosen": -1683.427490234375, |
|
"logps/rejected": -5307.63525390625, |
|
"loss": 0.0084, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -14.210386276245117, |
|
"rewards/margins": 36.34208679199219, |
|
"rewards/margins_max": 72.8411636352539, |
|
"rewards/margins_min": 9.163644790649414, |
|
"rewards/margins_std": 29.5941104888916, |
|
"rewards/rejected": -50.5524787902832, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"grad_norm": 0.0577551737134431, |
|
"learning_rate": 1.2034015982622243e-07, |
|
"logits/chosen": -1.357311487197876, |
|
"logits/rejected": -0.47240549325942993, |
|
"logps/chosen": -1767.5843505859375, |
|
"logps/rejected": -6352.15283203125, |
|
"loss": 0.0052, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -14.427714347839355, |
|
"rewards/margins": 45.86684799194336, |
|
"rewards/margins_max": 90.58439636230469, |
|
"rewards/margins_min": 9.973871231079102, |
|
"rewards/margins_std": 37.57789611816406, |
|
"rewards/rejected": -60.29457473754883, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"grad_norm": 0.22029503083466712, |
|
"learning_rate": 9.649455451539419e-08, |
|
"logits/chosen": -1.2634292840957642, |
|
"logits/rejected": -0.45305362343788147, |
|
"logps/chosen": -1745.5113525390625, |
|
"logps/rejected": -5825.91943359375, |
|
"loss": 0.0138, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -14.848994255065918, |
|
"rewards/margins": 40.82499694824219, |
|
"rewards/margins_max": 80.49317932128906, |
|
"rewards/margins_min": 10.800613403320312, |
|
"rewards/margins_std": 31.47903060913086, |
|
"rewards/rejected": -55.67399215698242, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"grad_norm": 0.09652421472407109, |
|
"learning_rate": 7.523343956923196e-08, |
|
"logits/chosen": -1.3216396570205688, |
|
"logits/rejected": -0.4804636836051941, |
|
"logps/chosen": -1506.0504150390625, |
|
"logps/rejected": -6089.37158203125, |
|
"loss": 0.0055, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -12.122316360473633, |
|
"rewards/margins": 45.81190490722656, |
|
"rewards/margins_max": 100.2818603515625, |
|
"rewards/margins_min": 9.738183975219727, |
|
"rewards/margins_std": 41.03260040283203, |
|
"rewards/rejected": -57.93422317504883, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"grad_norm": 0.37399026907919497, |
|
"learning_rate": 5.657967707312195e-08, |
|
"logits/chosen": -1.2263704538345337, |
|
"logits/rejected": -0.4276729226112366, |
|
"logps/chosen": -1708.02734375, |
|
"logps/rejected": -6663.94140625, |
|
"loss": 0.0138, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -14.4015474319458, |
|
"rewards/margins": 49.11872482299805, |
|
"rewards/margins_max": 102.17176818847656, |
|
"rewards/margins_min": 15.611490249633789, |
|
"rewards/margins_std": 40.248619079589844, |
|
"rewards/rejected": -63.52027130126953, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_logits/chosen": -1.0810388326644897, |
|
"eval_logits/rejected": -0.9159793257713318, |
|
"eval_logps/chosen": -2609.25732421875, |
|
"eval_logps/rejected": -3499.874267578125, |
|
"eval_loss": 6.058404922485352, |
|
"eval_rewards/accuracies": 0.628000020980835, |
|
"eval_rewards/chosen": -23.343765258789062, |
|
"eval_rewards/margins": 9.0797700881958, |
|
"eval_rewards/margins_max": 58.32236862182617, |
|
"eval_rewards/margins_min": -32.86642837524414, |
|
"eval_rewards/margins_std": 29.538137435913086, |
|
"eval_rewards/rejected": -32.42353439331055, |
|
"eval_runtime": 739.0567, |
|
"eval_samples_per_second": 2.706, |
|
"eval_steps_per_second": 0.169, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"grad_norm": 2.8531847376173625, |
|
"learning_rate": 4.055332542531959e-08, |
|
"logits/chosen": -1.26244056224823, |
|
"logits/rejected": -0.42530936002731323, |
|
"logps/chosen": -2178.673828125, |
|
"logps/rejected": -7108.33203125, |
|
"loss": 0.0333, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -18.750629425048828, |
|
"rewards/margins": 48.864402770996094, |
|
"rewards/margins_max": 102.9088363647461, |
|
"rewards/margins_min": 12.413164138793945, |
|
"rewards/margins_std": 41.09243392944336, |
|
"rewards/rejected": -67.61502838134766, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"grad_norm": 0.02965677453231411, |
|
"learning_rate": 2.7171617768147472e-08, |
|
"logits/chosen": -1.3160995244979858, |
|
"logits/rejected": -0.41138404607772827, |
|
"logps/chosen": -1946.414794921875, |
|
"logps/rejected": -6739.81494140625, |
|
"loss": 0.0229, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -16.537607192993164, |
|
"rewards/margins": 47.81861114501953, |
|
"rewards/margins_max": 101.61311340332031, |
|
"rewards/margins_min": 11.498300552368164, |
|
"rewards/margins_std": 40.96593475341797, |
|
"rewards/rejected": -64.35621643066406, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"grad_norm": 0.22573854808180674, |
|
"learning_rate": 1.6448943457189616e-08, |
|
"logits/chosen": -1.2531920671463013, |
|
"logits/rejected": -0.3577966094017029, |
|
"logps/chosen": -1684.554443359375, |
|
"logps/rejected": -7091.48828125, |
|
"loss": 0.0258, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -13.979682922363281, |
|
"rewards/margins": 53.9659538269043, |
|
"rewards/margins_max": 115.4454345703125, |
|
"rewards/margins_min": 13.953027725219727, |
|
"rewards/margins_std": 46.97876739501953, |
|
"rewards/rejected": -67.94563293457031, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"grad_norm": 5.432210871045947, |
|
"learning_rate": 8.39683258841123e-09, |
|
"logits/chosen": -1.1665958166122437, |
|
"logits/rejected": -0.5134680867195129, |
|
"logps/chosen": -1703.557373046875, |
|
"logps/rejected": -5138.3486328125, |
|
"loss": 0.0249, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -14.348538398742676, |
|
"rewards/margins": 34.34075164794922, |
|
"rewards/margins_max": 77.13099670410156, |
|
"rewards/margins_min": 9.077662467956543, |
|
"rewards/margins_std": 31.803686141967773, |
|
"rewards/rejected": -48.68928909301758, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"grad_norm": 0.11937178792801269, |
|
"learning_rate": 3.0239435998430376e-09, |
|
"logits/chosen": -1.2731530666351318, |
|
"logits/rejected": -0.38636043667793274, |
|
"logps/chosen": -1771.852783203125, |
|
"logps/rejected": -6674.75390625, |
|
"loss": 0.0232, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -14.935644149780273, |
|
"rewards/margins": 49.16513442993164, |
|
"rewards/margins_max": 97.74002838134766, |
|
"rewards/margins_min": 9.202229499816895, |
|
"rewards/margins_std": 40.83153533935547, |
|
"rewards/rejected": -64.10078430175781, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"grad_norm": 0.044483769560748045, |
|
"learning_rate": 3.3605396115826695e-10, |
|
"logits/chosen": -1.2338615655899048, |
|
"logits/rejected": -0.40085142850875854, |
|
"logps/chosen": -1394.041748046875, |
|
"logps/rejected": -6284.47021484375, |
|
"loss": 0.0075, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.409103393554688, |
|
"rewards/margins": 48.55421447753906, |
|
"rewards/margins_max": 100.22550964355469, |
|
"rewards/margins_min": 13.580920219421387, |
|
"rewards/margins_std": 40.853858947753906, |
|
"rewards/rejected": -59.96331787109375, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 1065, |
|
"total_flos": 0.0, |
|
"train_loss": 0.1413031851636692, |
|
"train_runtime": 20921.2576, |
|
"train_samples_per_second": 0.814, |
|
"train_steps_per_second": 0.051 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1065, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|