|
{ |
|
"best_metric": 1.265723466873169, |
|
"best_model_checkpoint": "saves/Gemma-7B-It/lora/orpo-salt/checkpoint-1500", |
|
"epoch": 2.9969690846635686, |
|
"eval_steps": 500, |
|
"global_step": 1854, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01616488179430188, |
|
"grad_norm": 4.377878189086914, |
|
"learning_rate": 4.999648198770648e-06, |
|
"logits/chosen": 209.9345245361328, |
|
"logits/rejected": 210.6967315673828, |
|
"logps/chosen": -2.4765946865081787, |
|
"logps/rejected": -2.9186055660247803, |
|
"loss": 2.5449, |
|
"odds_ratio_loss": 0.6828715205192566, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.24765947461128235, |
|
"rewards/margins": 0.04420109838247299, |
|
"rewards/rejected": -0.29186058044433594, |
|
"sft_loss": 2.4765946865081787, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03232976358860376, |
|
"grad_norm": 2.781564950942993, |
|
"learning_rate": 4.998578646361359e-06, |
|
"logits/chosen": 210.4038543701172, |
|
"logits/rejected": 212.20718383789062, |
|
"logps/chosen": -2.4702863693237305, |
|
"logps/rejected": -2.504176616668701, |
|
"loss": 2.564, |
|
"odds_ratio_loss": 0.9375804662704468, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.2470286339521408, |
|
"rewards/margins": 0.0033890369813889265, |
|
"rewards/rejected": -0.25041764974594116, |
|
"sft_loss": 2.4702863693237305, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04849464538290564, |
|
"grad_norm": 5.785957336425781, |
|
"learning_rate": 4.996791614004449e-06, |
|
"logits/chosen": 209.83865356445312, |
|
"logits/rejected": 212.08535766601562, |
|
"logps/chosen": -2.6004161834716797, |
|
"logps/rejected": -2.695502758026123, |
|
"loss": 2.6963, |
|
"odds_ratio_loss": 0.9585107564926147, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.26004162430763245, |
|
"rewards/margins": 0.009508667513728142, |
|
"rewards/rejected": -0.26955026388168335, |
|
"sft_loss": 2.6004161834716797, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06465952717720752, |
|
"grad_norm": 7.009506702423096, |
|
"learning_rate": 4.994287614855618e-06, |
|
"logits/chosen": 210.0410614013672, |
|
"logits/rejected": 211.40286254882812, |
|
"logps/chosen": -2.6340386867523193, |
|
"logps/rejected": -2.6249070167541504, |
|
"loss": 2.7374, |
|
"odds_ratio_loss": 1.0338027477264404, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.26340389251708984, |
|
"rewards/margins": -0.0009131729602813721, |
|
"rewards/rejected": -0.2624906897544861, |
|
"sft_loss": 2.6340386867523193, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0808244089715094, |
|
"grad_norm": 4.594735145568848, |
|
"learning_rate": 4.991067367951343e-06, |
|
"logits/chosen": 219.71932983398438, |
|
"logits/rejected": 219.6745147705078, |
|
"logps/chosen": -2.3416378498077393, |
|
"logps/rejected": -2.4940619468688965, |
|
"loss": 2.4215, |
|
"odds_ratio_loss": 0.7983426451683044, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.2341637909412384, |
|
"rewards/margins": 0.015242427587509155, |
|
"rewards/rejected": -0.24940618872642517, |
|
"sft_loss": 2.3416378498077393, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.09698929076581128, |
|
"grad_norm": 2.953855276107788, |
|
"learning_rate": 4.987131798002389e-06, |
|
"logits/chosen": 217.3623504638672, |
|
"logits/rejected": 218.24862670898438, |
|
"logps/chosen": -2.2888264656066895, |
|
"logps/rejected": -2.6409952640533447, |
|
"loss": 2.3829, |
|
"odds_ratio_loss": 0.940882682800293, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.22888264060020447, |
|
"rewards/margins": 0.035216934978961945, |
|
"rewards/rejected": -0.2640995383262634, |
|
"sft_loss": 2.2888264656066895, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.11315417256011315, |
|
"grad_norm": 4.224141597747803, |
|
"learning_rate": 4.982482035128285e-06, |
|
"logits/chosen": 217.9263458251953, |
|
"logits/rejected": 218.54122924804688, |
|
"logps/chosen": -2.326590061187744, |
|
"logps/rejected": -2.605003833770752, |
|
"loss": 2.4191, |
|
"odds_ratio_loss": 0.9254364967346191, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.2326590120792389, |
|
"rewards/margins": 0.027841363102197647, |
|
"rewards/rejected": -0.26050037145614624, |
|
"sft_loss": 2.326590061187744, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.12931905435441504, |
|
"grad_norm": 7.0222883224487305, |
|
"learning_rate": 4.9771194145328e-06, |
|
"logits/chosen": 224.885986328125, |
|
"logits/rejected": 225.7215576171875, |
|
"logps/chosen": -1.8678385019302368, |
|
"logps/rejected": -2.1334400177001953, |
|
"loss": 1.9429, |
|
"odds_ratio_loss": 0.7510749697685242, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.1867838352918625, |
|
"rewards/margins": 0.026560146361589432, |
|
"rewards/rejected": -0.213344007730484, |
|
"sft_loss": 1.8678385019302368, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1454839361487169, |
|
"grad_norm": 9.777688026428223, |
|
"learning_rate": 4.971045476120532e-06, |
|
"logits/chosen": 226.64450073242188, |
|
"logits/rejected": 227.11874389648438, |
|
"logps/chosen": -1.9129263162612915, |
|
"logps/rejected": -2.10162091255188, |
|
"loss": 1.9975, |
|
"odds_ratio_loss": 0.8461491465568542, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.1912926286458969, |
|
"rewards/margins": 0.01886945590376854, |
|
"rewards/rejected": -0.21016211807727814, |
|
"sft_loss": 1.9129263162612915, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1616488179430188, |
|
"grad_norm": 3.441721200942993, |
|
"learning_rate": 4.964261964054713e-06, |
|
"logits/chosen": 230.32669067382812, |
|
"logits/rejected": 231.39498901367188, |
|
"logps/chosen": -1.8438594341278076, |
|
"logps/rejected": -2.1114680767059326, |
|
"loss": 1.923, |
|
"odds_ratio_loss": 0.7917153239250183, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.18438595533370972, |
|
"rewards/margins": 0.026760881766676903, |
|
"rewards/rejected": -0.21114683151245117, |
|
"sft_loss": 1.8438594341278076, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.17781369973732067, |
|
"grad_norm": 3.7387969493865967, |
|
"learning_rate": 4.956770826256372e-06, |
|
"logits/chosen": 233.9343719482422, |
|
"logits/rejected": 234.51516723632812, |
|
"logps/chosen": -1.6228179931640625, |
|
"logps/rejected": -1.8143441677093506, |
|
"loss": 1.6988, |
|
"odds_ratio_loss": 0.7598803043365479, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.1622818112373352, |
|
"rewards/margins": 0.01915261521935463, |
|
"rewards/rejected": -0.18143442273139954, |
|
"sft_loss": 1.6228179931640625, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.19397858153162256, |
|
"grad_norm": 2.157771110534668, |
|
"learning_rate": 4.94857421384497e-06, |
|
"logits/chosen": 235.01248168945312, |
|
"logits/rejected": 235.390869140625, |
|
"logps/chosen": -1.6021674871444702, |
|
"logps/rejected": -1.885866403579712, |
|
"loss": 1.6762, |
|
"odds_ratio_loss": 0.7404050230979919, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.1602167785167694, |
|
"rewards/margins": 0.02836987003684044, |
|
"rewards/rejected": -0.18858662247657776, |
|
"sft_loss": 1.6021674871444702, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.21014346332592443, |
|
"grad_norm": 2.794867515563965, |
|
"learning_rate": 4.939674480520701e-06, |
|
"logits/chosen": 236.7910614013672, |
|
"logits/rejected": 237.41806030273438, |
|
"logps/chosen": -1.5707839727401733, |
|
"logps/rejected": -1.6964565515518188, |
|
"loss": 1.6508, |
|
"odds_ratio_loss": 0.8003607988357544, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.15707840025424957, |
|
"rewards/margins": 0.01256726123392582, |
|
"rewards/rejected": -0.16964565217494965, |
|
"sft_loss": 1.5707839727401733, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2263083451202263, |
|
"grad_norm": 1.2237716913223267, |
|
"learning_rate": 4.930074181888613e-06, |
|
"logits/chosen": 240.5333251953125, |
|
"logits/rejected": 241.0712432861328, |
|
"logps/chosen": -1.6245830059051514, |
|
"logps/rejected": -1.83035409450531, |
|
"loss": 1.6912, |
|
"odds_ratio_loss": 0.6659940481185913, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.16245830059051514, |
|
"rewards/margins": 0.02057710848748684, |
|
"rewards/rejected": -0.18303541839122772, |
|
"sft_loss": 1.6245830059051514, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.2424732269145282, |
|
"grad_norm": 3.366241693496704, |
|
"learning_rate": 4.91977607472475e-06, |
|
"logits/chosen": 240.38449096679688, |
|
"logits/rejected": 241.23794555664062, |
|
"logps/chosen": -1.5312227010726929, |
|
"logps/rejected": -1.6705970764160156, |
|
"loss": 1.6035, |
|
"odds_ratio_loss": 0.7224593162536621, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.15312227606773376, |
|
"rewards/margins": 0.013937436044216156, |
|
"rewards/rejected": -0.16705971956253052, |
|
"sft_loss": 1.5312227010726929, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2586381087088301, |
|
"grad_norm": 2.1750807762145996, |
|
"learning_rate": 4.908783116184534e-06, |
|
"logits/chosen": 240.67446899414062, |
|
"logits/rejected": 241.75424194335938, |
|
"logps/chosen": -1.4731028079986572, |
|
"logps/rejected": -1.7678340673446655, |
|
"loss": 1.5362, |
|
"odds_ratio_loss": 0.6307698488235474, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.1473102867603302, |
|
"rewards/margins": 0.029473140835762024, |
|
"rewards/rejected": -0.17678341269493103, |
|
"sft_loss": 1.4731028079986572, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.27480299050313195, |
|
"grad_norm": 2.9354147911071777, |
|
"learning_rate": 4.897098462953598e-06, |
|
"logits/chosen": 243.85806274414062, |
|
"logits/rejected": 244.68405151367188, |
|
"logps/chosen": -1.3806183338165283, |
|
"logps/rejected": -1.7258154153823853, |
|
"loss": 1.4423, |
|
"odds_ratio_loss": 0.6170833706855774, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.13806185126304626, |
|
"rewards/margins": 0.03451969474554062, |
|
"rewards/rejected": -0.1725815385580063, |
|
"sft_loss": 1.3806183338165283, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.2909678722974338, |
|
"grad_norm": 1.4452638626098633, |
|
"learning_rate": 4.884725470341331e-06, |
|
"logits/chosen": 242.984619140625, |
|
"logits/rejected": 243.6776580810547, |
|
"logps/chosen": -1.2990996837615967, |
|
"logps/rejected": -1.616987943649292, |
|
"loss": 1.3597, |
|
"odds_ratio_loss": 0.6057690382003784, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.12990999221801758, |
|
"rewards/margins": 0.031788814812898636, |
|
"rewards/rejected": -0.16169880330562592, |
|
"sft_loss": 1.2990996837615967, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3071327540917357, |
|
"grad_norm": 4.690347194671631, |
|
"learning_rate": 4.871667691317377e-06, |
|
"logits/chosen": 244.59634399414062, |
|
"logits/rejected": 244.5352325439453, |
|
"logps/chosen": -1.4848819971084595, |
|
"logps/rejected": -1.573250412940979, |
|
"loss": 1.5639, |
|
"odds_ratio_loss": 0.7902374267578125, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.14848819375038147, |
|
"rewards/margins": 0.008836844936013222, |
|
"rewards/rejected": -0.15732502937316895, |
|
"sft_loss": 1.4848819971084595, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.3232976358860376, |
|
"grad_norm": 7.527270317077637, |
|
"learning_rate": 4.857928875491392e-06, |
|
"logits/chosen": 243.60494995117188, |
|
"logits/rejected": 244.3643035888672, |
|
"logps/chosen": -1.3324997425079346, |
|
"logps/rejected": -1.5205867290496826, |
|
"loss": 1.402, |
|
"odds_ratio_loss": 0.6946145296096802, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.13324996829032898, |
|
"rewards/margins": 0.018808716908097267, |
|
"rewards/rejected": -0.1520586758852005, |
|
"sft_loss": 1.3324997425079346, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.33946251768033947, |
|
"grad_norm": 2.1978328227996826, |
|
"learning_rate": 4.843512968036314e-06, |
|
"logits/chosen": 244.3915557861328, |
|
"logits/rejected": 244.58700561523438, |
|
"logps/chosen": -1.3562281131744385, |
|
"logps/rejected": -1.4892576932907104, |
|
"loss": 1.4274, |
|
"odds_ratio_loss": 0.7121940851211548, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.1356228142976761, |
|
"rewards/margins": 0.01330297440290451, |
|
"rewards/rejected": -0.1489257663488388, |
|
"sft_loss": 1.3562281131744385, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.35562739947464134, |
|
"grad_norm": 6.31206750869751, |
|
"learning_rate": 4.828424108555486e-06, |
|
"logits/chosen": 246.1901092529297, |
|
"logits/rejected": 246.36703491210938, |
|
"logps/chosen": -1.5392124652862549, |
|
"logps/rejected": -1.7705978155136108, |
|
"loss": 1.6086, |
|
"odds_ratio_loss": 0.6943382024765015, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.1539212465286255, |
|
"rewards/margins": 0.023138541728258133, |
|
"rewards/rejected": -0.17705979943275452, |
|
"sft_loss": 1.5392124652862549, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.3717922812689432, |
|
"grad_norm": 1.1257890462875366, |
|
"learning_rate": 4.812666629893957e-06, |
|
"logits/chosen": 246.37399291992188, |
|
"logits/rejected": 246.72891235351562, |
|
"logps/chosen": -1.3704453706741333, |
|
"logps/rejected": -1.4485595226287842, |
|
"loss": 1.4433, |
|
"odds_ratio_loss": 0.7287623882293701, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.1370445340871811, |
|
"rewards/margins": 0.007811415940523148, |
|
"rewards/rejected": -0.14485594630241394, |
|
"sft_loss": 1.3704453706741333, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.3879571630632451, |
|
"grad_norm": 1.9700157642364502, |
|
"learning_rate": 4.796245056894273e-06, |
|
"logits/chosen": 244.54165649414062, |
|
"logits/rejected": 244.89407348632812, |
|
"logps/chosen": -1.4429550170898438, |
|
"logps/rejected": -1.5743396282196045, |
|
"loss": 1.5184, |
|
"odds_ratio_loss": 0.7547486424446106, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.14429552853107452, |
|
"rewards/margins": 0.013138455338776112, |
|
"rewards/rejected": -0.15743397176265717, |
|
"sft_loss": 1.4429550170898438, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.404122044857547, |
|
"grad_norm": 1.5832947492599487, |
|
"learning_rate": 4.779164105097148e-06, |
|
"logits/chosen": 246.41659545898438, |
|
"logits/rejected": 246.4707489013672, |
|
"logps/chosen": -1.3124094009399414, |
|
"logps/rejected": -1.5739551782608032, |
|
"loss": 1.3768, |
|
"odds_ratio_loss": 0.6443756818771362, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.13124093413352966, |
|
"rewards/margins": 0.026154566556215286, |
|
"rewards/rejected": -0.15739551186561584, |
|
"sft_loss": 1.3124094009399414, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.42028692665184886, |
|
"grad_norm": 2.2224152088165283, |
|
"learning_rate": 4.761428679387373e-06, |
|
"logits/chosen": 247.0335235595703, |
|
"logits/rejected": 247.7626953125, |
|
"logps/chosen": -1.2735482454299927, |
|
"logps/rejected": -1.5084031820297241, |
|
"loss": 1.3358, |
|
"odds_ratio_loss": 0.6226388216018677, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.12735481560230255, |
|
"rewards/margins": 0.02348550595343113, |
|
"rewards/rejected": -0.15084032714366913, |
|
"sft_loss": 1.2735482454299927, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.4364518084461507, |
|
"grad_norm": 2.0271799564361572, |
|
"learning_rate": 4.7430438725853515e-06, |
|
"logits/chosen": 247.60205078125, |
|
"logits/rejected": 247.61654663085938, |
|
"logps/chosen": -1.3570277690887451, |
|
"logps/rejected": -1.714133858680725, |
|
"loss": 1.4226, |
|
"odds_ratio_loss": 0.6552777290344238, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.13570277392864227, |
|
"rewards/margins": 0.035710614174604416, |
|
"rewards/rejected": -0.171413391828537, |
|
"sft_loss": 1.3570277690887451, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.4526166902404526, |
|
"grad_norm": 2.142329216003418, |
|
"learning_rate": 4.724014963984669e-06, |
|
"logits/chosen": 248.28439331054688, |
|
"logits/rejected": 249.0177459716797, |
|
"logps/chosen": -1.3674625158309937, |
|
"logps/rejected": -1.6127933263778687, |
|
"loss": 1.435, |
|
"odds_ratio_loss": 0.6751004457473755, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.13674625754356384, |
|
"rewards/margins": 0.02453308179974556, |
|
"rewards/rejected": -0.1612793505191803, |
|
"sft_loss": 1.3674625158309937, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.4687815720347545, |
|
"grad_norm": 2.8357582092285156, |
|
"learning_rate": 4.704347417836116e-06, |
|
"logits/chosen": 247.2007598876953, |
|
"logits/rejected": 247.60107421875, |
|
"logps/chosen": -1.2728191614151, |
|
"logps/rejected": -1.5069888830184937, |
|
"loss": 1.3382, |
|
"odds_ratio_loss": 0.6542028784751892, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.12728191912174225, |
|
"rewards/margins": 0.023416969925165176, |
|
"rewards/rejected": -0.15069888532161713, |
|
"sft_loss": 1.2728191614151, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.4849464538290564, |
|
"grad_norm": 3.075584888458252, |
|
"learning_rate": 4.684046881778603e-06, |
|
"logits/chosen": 247.69580078125, |
|
"logits/rejected": 247.7963409423828, |
|
"logps/chosen": -1.3267529010772705, |
|
"logps/rejected": -1.46425461769104, |
|
"loss": 1.3929, |
|
"odds_ratio_loss": 0.6614553332328796, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.13267529010772705, |
|
"rewards/margins": 0.013750175014138222, |
|
"rewards/rejected": -0.14642547070980072, |
|
"sft_loss": 1.3267529010772705, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5011113356233583, |
|
"grad_norm": 1.1745957136154175, |
|
"learning_rate": 4.663119185217409e-06, |
|
"logits/chosen": 247.5077667236328, |
|
"logits/rejected": 247.80752563476562, |
|
"logps/chosen": -1.2750051021575928, |
|
"logps/rejected": -1.5364891290664673, |
|
"loss": 1.3385, |
|
"odds_ratio_loss": 0.6352204084396362, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.1275005042552948, |
|
"rewards/margins": 0.026148397475481033, |
|
"rewards/rejected": -0.15364892780780792, |
|
"sft_loss": 1.2750051021575928, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.5172762174176602, |
|
"grad_norm": 1.1816167831420898, |
|
"learning_rate": 4.641570337650232e-06, |
|
"logits/chosen": 248.5536651611328, |
|
"logits/rejected": 248.5113067626953, |
|
"logps/chosen": -1.1914936304092407, |
|
"logps/rejected": -1.4479808807373047, |
|
"loss": 1.2531, |
|
"odds_ratio_loss": 0.615585446357727, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.11914938688278198, |
|
"rewards/margins": 0.025648722425103188, |
|
"rewards/rejected": -0.14479808509349823, |
|
"sft_loss": 1.1914936304092407, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.533441099211962, |
|
"grad_norm": 6.805661678314209, |
|
"learning_rate": 4.61940652694154e-06, |
|
"logits/chosen": 246.8784637451172, |
|
"logits/rejected": 247.60842895507812, |
|
"logps/chosen": -1.371927261352539, |
|
"logps/rejected": -1.4951013326644897, |
|
"loss": 1.444, |
|
"odds_ratio_loss": 0.7208858728408813, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.1371927261352539, |
|
"rewards/margins": 0.012317392975091934, |
|
"rewards/rejected": -0.14951011538505554, |
|
"sft_loss": 1.371927261352539, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.5496059810062639, |
|
"grad_norm": 2.8288872241973877, |
|
"learning_rate": 4.596634117545689e-06, |
|
"logits/chosen": 248.96542358398438, |
|
"logits/rejected": 249.38369750976562, |
|
"logps/chosen": -1.3861172199249268, |
|
"logps/rejected": -1.6291033029556274, |
|
"loss": 1.4514, |
|
"odds_ratio_loss": 0.6529659032821655, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.13861171901226044, |
|
"rewards/margins": 0.02429860271513462, |
|
"rewards/rejected": -0.1629103422164917, |
|
"sft_loss": 1.3861172199249268, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.5657708628005658, |
|
"grad_norm": 2.343557834625244, |
|
"learning_rate": 4.573259648679335e-06, |
|
"logits/chosen": 247.5604248046875, |
|
"logits/rejected": 247.8214111328125, |
|
"logps/chosen": -1.3334286212921143, |
|
"logps/rejected": -1.642163634300232, |
|
"loss": 1.3937, |
|
"odds_ratio_loss": 0.603044331073761, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.13334286212921143, |
|
"rewards/margins": 0.030873507261276245, |
|
"rewards/rejected": -0.16421635448932648, |
|
"sft_loss": 1.3334286212921143, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.5819357445948676, |
|
"grad_norm": 6.341250896453857, |
|
"learning_rate": 4.549289832443663e-06, |
|
"logits/chosen": 249.6760711669922, |
|
"logits/rejected": 249.2826385498047, |
|
"logps/chosen": -1.2829958200454712, |
|
"logps/rejected": -1.5420135259628296, |
|
"loss": 1.351, |
|
"odds_ratio_loss": 0.6805331110954285, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.12829959392547607, |
|
"rewards/margins": 0.025901764631271362, |
|
"rewards/rejected": -0.15420134365558624, |
|
"sft_loss": 1.2829958200454712, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.5981006263891695, |
|
"grad_norm": 1.415165901184082, |
|
"learning_rate": 4.524731551896978e-06, |
|
"logits/chosen": 247.46142578125, |
|
"logits/rejected": 247.42459106445312, |
|
"logps/chosen": -1.2169711589813232, |
|
"logps/rejected": -1.3963136672973633, |
|
"loss": 1.2853, |
|
"odds_ratio_loss": 0.6832239031791687, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.12169712781906128, |
|
"rewards/margins": 0.017934244126081467, |
|
"rewards/rejected": -0.13963137567043304, |
|
"sft_loss": 1.2169711589813232, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.6142655081834714, |
|
"grad_norm": 2.7373573780059814, |
|
"learning_rate": 4.4995918590781925e-06, |
|
"logits/chosen": 250.4862518310547, |
|
"logits/rejected": 250.1310272216797, |
|
"logps/chosen": -1.2185784578323364, |
|
"logps/rejected": -1.435258150100708, |
|
"loss": 1.2834, |
|
"odds_ratio_loss": 0.6484395265579224, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.12185785919427872, |
|
"rewards/margins": 0.021667957305908203, |
|
"rewards/rejected": -0.14352580904960632, |
|
"sft_loss": 1.2185784578323364, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.6304303899777733, |
|
"grad_norm": 1.0431718826293945, |
|
"learning_rate": 4.473877972981797e-06, |
|
"logits/chosen": 247.82730102539062, |
|
"logits/rejected": 248.197998046875, |
|
"logps/chosen": -1.3133275508880615, |
|
"logps/rejected": -1.566138505935669, |
|
"loss": 1.378, |
|
"odds_ratio_loss": 0.6465703248977661, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.13133276998996735, |
|
"rewards/margins": 0.02528109773993492, |
|
"rewards/rejected": -0.15661385655403137, |
|
"sft_loss": 1.3133275508880615, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.6465952717720752, |
|
"grad_norm": 2.605905771255493, |
|
"learning_rate": 4.447597277484894e-06, |
|
"logits/chosen": 248.4889678955078, |
|
"logits/rejected": 248.0493927001953, |
|
"logps/chosen": -1.1982879638671875, |
|
"logps/rejected": -1.3909344673156738, |
|
"loss": 1.2662, |
|
"odds_ratio_loss": 0.6788827180862427, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.11982879787683487, |
|
"rewards/margins": 0.01926465705037117, |
|
"rewards/rejected": -0.13909344375133514, |
|
"sft_loss": 1.1982879638671875, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.6627601535663771, |
|
"grad_norm": 2.7441976070404053, |
|
"learning_rate": 4.42075731922687e-06, |
|
"logits/chosen": 250.9984893798828, |
|
"logits/rejected": 250.879150390625, |
|
"logps/chosen": -1.3381072282791138, |
|
"logps/rejected": -1.476546049118042, |
|
"loss": 1.4062, |
|
"odds_ratio_loss": 0.6813501119613647, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.13381072878837585, |
|
"rewards/margins": 0.013843873515725136, |
|
"rewards/rejected": -0.14765460789203644, |
|
"sft_loss": 1.3381072282791138, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.6789250353606789, |
|
"grad_norm": 3.2034897804260254, |
|
"learning_rate": 4.3933658054423465e-06, |
|
"logits/chosen": 249.34951782226562, |
|
"logits/rejected": 249.37582397460938, |
|
"logps/chosen": -1.2343724966049194, |
|
"logps/rejected": -1.4455711841583252, |
|
"loss": 1.2964, |
|
"odds_ratio_loss": 0.6205655932426453, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.12343724817037582, |
|
"rewards/margins": 0.021119873970746994, |
|
"rewards/rejected": -0.1445571333169937, |
|
"sft_loss": 1.2343724966049194, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.6950899171549808, |
|
"grad_norm": 2.552898645401001, |
|
"learning_rate": 4.365430601748003e-06, |
|
"logits/chosen": 247.7689208984375, |
|
"logits/rejected": 247.95205688476562, |
|
"logps/chosen": -1.3558423519134521, |
|
"logps/rejected": -1.4942983388900757, |
|
"loss": 1.4265, |
|
"odds_ratio_loss": 0.7065616250038147, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.13558423519134521, |
|
"rewards/margins": 0.013845594599843025, |
|
"rewards/rejected": -0.1494298279285431, |
|
"sft_loss": 1.3558423519134521, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.7112547989492827, |
|
"grad_norm": 7.701834201812744, |
|
"learning_rate": 4.336959729883925e-06, |
|
"logits/chosen": 248.16812133789062, |
|
"logits/rejected": 248.47384643554688, |
|
"logps/chosen": -1.2508445978164673, |
|
"logps/rejected": -1.3401494026184082, |
|
"loss": 1.3242, |
|
"odds_ratio_loss": 0.7333300113677979, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.12508445978164673, |
|
"rewards/margins": 0.008930487558245659, |
|
"rewards/rejected": -0.13401496410369873, |
|
"sft_loss": 1.2508445978164673, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.7274196807435845, |
|
"grad_norm": 1.3677743673324585, |
|
"learning_rate": 4.307961365410118e-06, |
|
"logits/chosen": 249.19546508789062, |
|
"logits/rejected": 249.5364990234375, |
|
"logps/chosen": -1.2851893901824951, |
|
"logps/rejected": -1.4277610778808594, |
|
"loss": 1.3525, |
|
"odds_ratio_loss": 0.6732175946235657, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.12851892411708832, |
|
"rewards/margins": 0.014257180504500866, |
|
"rewards/rejected": -0.14277611672878265, |
|
"sft_loss": 1.2851893901824951, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.7435845625378864, |
|
"grad_norm": 3.3310444355010986, |
|
"learning_rate": 4.278443835358854e-06, |
|
"logits/chosen": 249.6570281982422, |
|
"logits/rejected": 249.6079864501953, |
|
"logps/chosen": -1.1893627643585205, |
|
"logps/rejected": -1.4945032596588135, |
|
"loss": 1.2482, |
|
"odds_ratio_loss": 0.5885173082351685, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.11893627792596817, |
|
"rewards/margins": 0.030514035373926163, |
|
"rewards/rejected": -0.14945031702518463, |
|
"sft_loss": 1.1893627643585205, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.7597494443321883, |
|
"grad_norm": 2.5770180225372314, |
|
"learning_rate": 4.248415615843523e-06, |
|
"logits/chosen": 249.5537567138672, |
|
"logits/rejected": 249.5972442626953, |
|
"logps/chosen": -1.2710212469100952, |
|
"logps/rejected": -1.4037456512451172, |
|
"loss": 1.3415, |
|
"odds_ratio_loss": 0.7046067714691162, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.12710212171077728, |
|
"rewards/margins": 0.01327243447303772, |
|
"rewards/rejected": -0.140374556183815, |
|
"sft_loss": 1.2710212469100952, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.7759143261264903, |
|
"grad_norm": 9.182385444641113, |
|
"learning_rate": 4.217885329624666e-06, |
|
"logits/chosen": 249.1255645751953, |
|
"logits/rejected": 249.16854858398438, |
|
"logps/chosen": -1.1571811437606812, |
|
"logps/rejected": -1.4825657606124878, |
|
"loss": 1.2175, |
|
"odds_ratio_loss": 0.6027355194091797, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.11571812629699707, |
|
"rewards/margins": 0.032538462430238724, |
|
"rewards/rejected": -0.1482565701007843, |
|
"sft_loss": 1.1571811437606812, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.7920792079207921, |
|
"grad_norm": 2.0430970191955566, |
|
"learning_rate": 4.186861743633911e-06, |
|
"logits/chosen": 248.51168823242188, |
|
"logits/rejected": 248.83370971679688, |
|
"logps/chosen": -1.216133713722229, |
|
"logps/rejected": -1.4709254503250122, |
|
"loss": 1.2856, |
|
"odds_ratio_loss": 0.694364070892334, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.12161336094141006, |
|
"rewards/margins": 0.025479182600975037, |
|
"rewards/rejected": -0.1470925360918045, |
|
"sft_loss": 1.216133713722229, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.808244089715094, |
|
"grad_norm": 2.13413143157959, |
|
"learning_rate": 4.155353766456497e-06, |
|
"logits/chosen": 252.05142211914062, |
|
"logits/rejected": 251.9636993408203, |
|
"logps/chosen": -1.3067327737808228, |
|
"logps/rejected": -1.4753313064575195, |
|
"loss": 1.374, |
|
"odds_ratio_loss": 0.6729229688644409, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.13067328929901123, |
|
"rewards/margins": 0.01685984991490841, |
|
"rewards/rejected": -0.147533118724823, |
|
"sft_loss": 1.3067327737808228, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.808244089715094, |
|
"eval_logits/chosen": 249.61227416992188, |
|
"eval_logits/rejected": 249.90635681152344, |
|
"eval_logps/chosen": -1.2762008905410767, |
|
"eval_logps/rejected": -1.5033098459243774, |
|
"eval_loss": 1.3435848951339722, |
|
"eval_odds_ratio_loss": 0.6738389730453491, |
|
"eval_rewards/accuracies": 0.5672727227210999, |
|
"eval_rewards/chosen": -0.12762011587619781, |
|
"eval_rewards/margins": 0.0227108895778656, |
|
"eval_rewards/rejected": -0.15033100545406342, |
|
"eval_runtime": 221.4313, |
|
"eval_samples_per_second": 4.968, |
|
"eval_sft_loss": 1.2762008905410767, |
|
"eval_steps_per_second": 2.484, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8244089715093958, |
|
"grad_norm": 2.4113426208496094, |
|
"learning_rate": 4.123370445773134e-06, |
|
"logits/chosen": 250.3010711669922, |
|
"logits/rejected": 250.5003662109375, |
|
"logps/chosen": -1.2399041652679443, |
|
"logps/rejected": -1.3490018844604492, |
|
"loss": 1.3103, |
|
"odds_ratio_loss": 0.7042885422706604, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.12399041652679443, |
|
"rewards/margins": 0.010909780859947205, |
|
"rewards/rejected": -0.13490018248558044, |
|
"sft_loss": 1.2399041652679443, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.8405738533036977, |
|
"grad_norm": 4.632988452911377, |
|
"learning_rate": 4.090920965761906e-06, |
|
"logits/chosen": 249.44210815429688, |
|
"logits/rejected": 249.96994018554688, |
|
"logps/chosen": -1.2807283401489258, |
|
"logps/rejected": -1.4942976236343384, |
|
"loss": 1.3484, |
|
"odds_ratio_loss": 0.6771414875984192, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.1280728280544281, |
|
"rewards/margins": 0.02135692723095417, |
|
"rewards/rejected": -0.14942976832389832, |
|
"sft_loss": 1.2807283401489258, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.8567387350979996, |
|
"grad_norm": 9.196592330932617, |
|
"learning_rate": 4.058014644460991e-06, |
|
"logits/chosen": 250.1853790283203, |
|
"logits/rejected": 250.6529083251953, |
|
"logps/chosen": -1.2633569240570068, |
|
"logps/rejected": -1.4294393062591553, |
|
"loss": 1.329, |
|
"odds_ratio_loss": 0.6560603976249695, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.12633569538593292, |
|
"rewards/margins": 0.01660825125873089, |
|
"rewards/rejected": -0.14294394850730896, |
|
"sft_loss": 1.2633569240570068, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.8729036168923014, |
|
"grad_norm": 1.8403383493423462, |
|
"learning_rate": 4.024660931092939e-06, |
|
"logits/chosen": 250.708251953125, |
|
"logits/rejected": 251.0161895751953, |
|
"logps/chosen": -1.287913203239441, |
|
"logps/rejected": -1.553476095199585, |
|
"loss": 1.3531, |
|
"odds_ratio_loss": 0.6521779298782349, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.12879131734371185, |
|
"rewards/margins": 0.026556288823485374, |
|
"rewards/rejected": -0.15534761548042297, |
|
"sft_loss": 1.287913203239441, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.8890684986866033, |
|
"grad_norm": 7.186382293701172, |
|
"learning_rate": 3.990869403351272e-06, |
|
"logits/chosen": 251.8153839111328, |
|
"logits/rejected": 251.8900604248047, |
|
"logps/chosen": -1.268169641494751, |
|
"logps/rejected": -1.511528491973877, |
|
"loss": 1.3283, |
|
"odds_ratio_loss": 0.6014903783798218, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.12681695818901062, |
|
"rewards/margins": 0.024335889145731926, |
|
"rewards/rejected": -0.1511528491973877, |
|
"sft_loss": 1.268169641494751, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.9052333804809052, |
|
"grad_norm": 2.923800230026245, |
|
"learning_rate": 3.956649764650206e-06, |
|
"logits/chosen": 250.7381591796875, |
|
"logits/rejected": 250.7707061767578, |
|
"logps/chosen": -1.2698795795440674, |
|
"logps/rejected": -1.4995825290679932, |
|
"loss": 1.3379, |
|
"odds_ratio_loss": 0.6804186105728149, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.12698796391487122, |
|
"rewards/margins": 0.0229702927172184, |
|
"rewards/rejected": -0.14995825290679932, |
|
"sft_loss": 1.2698795795440674, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.9213982622752072, |
|
"grad_norm": 6.1557416915893555, |
|
"learning_rate": 3.92201184133826e-06, |
|
"logits/chosen": 250.94808959960938, |
|
"logits/rejected": 251.642822265625, |
|
"logps/chosen": -1.2907052040100098, |
|
"logps/rejected": -1.54143226146698, |
|
"loss": 1.3538, |
|
"odds_ratio_loss": 0.6311336755752563, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.12907053530216217, |
|
"rewards/margins": 0.025072699412703514, |
|
"rewards/rejected": -0.15414324402809143, |
|
"sft_loss": 1.2907052040100098, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.937563144069509, |
|
"grad_norm": 2.1665000915527344, |
|
"learning_rate": 3.886965579876572e-06, |
|
"logits/chosen": 252.3577423095703, |
|
"logits/rejected": 252.0865478515625, |
|
"logps/chosen": -1.2575817108154297, |
|
"logps/rejected": -1.3686320781707764, |
|
"loss": 1.3293, |
|
"odds_ratio_loss": 0.7170482873916626, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.12575815618038177, |
|
"rewards/margins": 0.011105048470199108, |
|
"rewards/rejected": -0.13686320185661316, |
|
"sft_loss": 1.2575817108154297, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.9537280258638109, |
|
"grad_norm": 2.289733648300171, |
|
"learning_rate": 3.851521043982716e-06, |
|
"logits/chosen": 251.7819061279297, |
|
"logits/rejected": 251.52749633789062, |
|
"logps/chosen": -1.2542387247085571, |
|
"logps/rejected": -1.3954790830612183, |
|
"loss": 1.3206, |
|
"odds_ratio_loss": 0.6639243960380554, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.1254238784313202, |
|
"rewards/margins": 0.014124047942459583, |
|
"rewards/rejected": -0.1395479142665863, |
|
"sft_loss": 1.2542387247085571, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.9698929076581128, |
|
"grad_norm": 2.7564313411712646, |
|
"learning_rate": 3.81568841174086e-06, |
|
"logits/chosen": 251.03280639648438, |
|
"logits/rejected": 251.2174835205078, |
|
"logps/chosen": -1.2807530164718628, |
|
"logps/rejected": -1.5129293203353882, |
|
"loss": 1.3482, |
|
"odds_ratio_loss": 0.674277663230896, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.12807528674602509, |
|
"rewards/margins": 0.023217635229229927, |
|
"rewards/rejected": -0.15129292011260986, |
|
"sft_loss": 1.2807530164718628, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.9860577894524146, |
|
"grad_norm": 2.1846888065338135, |
|
"learning_rate": 3.7794779726790664e-06, |
|
"logits/chosen": 249.8391571044922, |
|
"logits/rejected": 250.3789520263672, |
|
"logps/chosen": -1.1555012464523315, |
|
"logps/rejected": -1.3768011331558228, |
|
"loss": 1.2212, |
|
"odds_ratio_loss": 0.6573610305786133, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.11555011570453644, |
|
"rewards/margins": 0.022129978984594345, |
|
"rewards/rejected": -0.13768009841442108, |
|
"sft_loss": 1.1555012464523315, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.0022226712467166, |
|
"grad_norm": 2.2191011905670166, |
|
"learning_rate": 3.7429001248146096e-06, |
|
"logits/chosen": 250.8198699951172, |
|
"logits/rejected": 251.24819946289062, |
|
"logps/chosen": -1.272541880607605, |
|
"logps/rejected": -1.5292177200317383, |
|
"loss": 1.3338, |
|
"odds_ratio_loss": 0.6125348806381226, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.1272541880607605, |
|
"rewards/margins": 0.0256675872951746, |
|
"rewards/rejected": -0.15292176604270935, |
|
"sft_loss": 1.272541880607605, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.0183875530410185, |
|
"grad_norm": 1.6834843158721924, |
|
"learning_rate": 3.7059653716681227e-06, |
|
"logits/chosen": 250.3338623046875, |
|
"logits/rejected": 250.6593780517578, |
|
"logps/chosen": -1.2664134502410889, |
|
"logps/rejected": -1.469812035560608, |
|
"loss": 1.3343, |
|
"odds_ratio_loss": 0.6792756915092468, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.12664134800434113, |
|
"rewards/margins": 0.020339861512184143, |
|
"rewards/rejected": -0.14698120951652527, |
|
"sft_loss": 1.2664134502410889, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.0345524348353203, |
|
"grad_norm": 5.188844203948975, |
|
"learning_rate": 3.668684319247463e-06, |
|
"logits/chosen": 249.46969604492188, |
|
"logits/rejected": 250.1366729736328, |
|
"logps/chosen": -1.1969501972198486, |
|
"logps/rejected": -1.5598738193511963, |
|
"loss": 1.2558, |
|
"odds_ratio_loss": 0.588589072227478, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.11969500780105591, |
|
"rewards/margins": 0.03629238158464432, |
|
"rewards/rejected": -0.15598741173744202, |
|
"sft_loss": 1.1969501972198486, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.0507173166296222, |
|
"grad_norm": 1.8501890897750854, |
|
"learning_rate": 3.6310676730021373e-06, |
|
"logits/chosen": 250.78857421875, |
|
"logits/rejected": 250.8007354736328, |
|
"logps/chosen": -1.2203996181488037, |
|
"logps/rejected": -1.3524749279022217, |
|
"loss": 1.2867, |
|
"odds_ratio_loss": 0.662962794303894, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.12203995883464813, |
|
"rewards/margins": 0.013207539916038513, |
|
"rewards/rejected": -0.13524749875068665, |
|
"sft_loss": 1.2203996181488037, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.066882198423924, |
|
"grad_norm": 3.5492091178894043, |
|
"learning_rate": 3.593126234749178e-06, |
|
"logits/chosen": 250.8761749267578, |
|
"logits/rejected": 251.28622436523438, |
|
"logps/chosen": -1.2661250829696655, |
|
"logps/rejected": -1.455129861831665, |
|
"loss": 1.3334, |
|
"odds_ratio_loss": 0.6727336645126343, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.12661252915859222, |
|
"rewards/margins": 0.01890045776963234, |
|
"rewards/rejected": -0.14551296830177307, |
|
"sft_loss": 1.2661250829696655, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.083047080218226, |
|
"grad_norm": 3.5715062618255615, |
|
"learning_rate": 3.554870899571343e-06, |
|
"logits/chosen": 252.4844512939453, |
|
"logits/rejected": 252.82699584960938, |
|
"logps/chosen": -1.2469182014465332, |
|
"logps/rejected": -1.4401594400405884, |
|
"loss": 1.3131, |
|
"odds_ratio_loss": 0.6617658734321594, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.12469182908535004, |
|
"rewards/margins": 0.019324112683534622, |
|
"rewards/rejected": -0.14401593804359436, |
|
"sft_loss": 1.2469182014465332, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.0992119620125278, |
|
"grad_norm": 4.318095684051514, |
|
"learning_rate": 3.5163126526885373e-06, |
|
"logits/chosen": 252.0143585205078, |
|
"logits/rejected": 251.80081176757812, |
|
"logps/chosen": -1.1914775371551514, |
|
"logps/rejected": -1.4009137153625488, |
|
"loss": 1.2573, |
|
"odds_ratio_loss": 0.6579803824424744, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.11914775520563126, |
|
"rewards/margins": 0.02094360813498497, |
|
"rewards/rejected": -0.14009135961532593, |
|
"sft_loss": 1.1914775371551514, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.1153768438068297, |
|
"grad_norm": 2.403775930404663, |
|
"learning_rate": 3.4774625663033484e-06, |
|
"logits/chosen": 251.2095184326172, |
|
"logits/rejected": 251.48507690429688, |
|
"logps/chosen": -1.2048381567001343, |
|
"logps/rejected": -1.3877532482147217, |
|
"loss": 1.27, |
|
"odds_ratio_loss": 0.6517833471298218, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.12048381567001343, |
|
"rewards/margins": 0.018291514366865158, |
|
"rewards/rejected": -0.13877533376216888, |
|
"sft_loss": 1.2048381567001343, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.1315417256011315, |
|
"grad_norm": 1.7898093461990356, |
|
"learning_rate": 3.4383317964216067e-06, |
|
"logits/chosen": 252.33316040039062, |
|
"logits/rejected": 252.1842498779297, |
|
"logps/chosen": -1.1471569538116455, |
|
"logps/rejected": -1.306755781173706, |
|
"loss": 1.2157, |
|
"odds_ratio_loss": 0.6855098009109497, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.11471569538116455, |
|
"rewards/margins": 0.01595989242196083, |
|
"rewards/rejected": -0.13067558407783508, |
|
"sft_loss": 1.1471569538116455, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.1477066073954334, |
|
"grad_norm": 3.209373712539673, |
|
"learning_rate": 3.398931579648877e-06, |
|
"logits/chosen": 251.15170288085938, |
|
"logits/rejected": 251.59976196289062, |
|
"logps/chosen": -1.239712119102478, |
|
"logps/rejected": -1.5323327779769897, |
|
"loss": 1.3045, |
|
"odds_ratio_loss": 0.6475890874862671, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.12397120893001556, |
|
"rewards/margins": 0.029262065887451172, |
|
"rewards/rejected": -0.15323328971862793, |
|
"sft_loss": 1.239712119102478, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.1638714891897353, |
|
"grad_norm": 2.6601579189300537, |
|
"learning_rate": 3.359273229963813e-06, |
|
"logits/chosen": 250.285400390625, |
|
"logits/rejected": 250.47323608398438, |
|
"logps/chosen": -1.2064179182052612, |
|
"logps/rejected": -1.3739216327667236, |
|
"loss": 1.2742, |
|
"odds_ratio_loss": 0.6774007081985474, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.12064179033041, |
|
"rewards/margins": 0.01675037480890751, |
|
"rewards/rejected": -0.13739216327667236, |
|
"sft_loss": 1.2064179182052612, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.1800363709840371, |
|
"grad_norm": 1.836297631263733, |
|
"learning_rate": 3.319368135469285e-06, |
|
"logits/chosen": 251.77334594726562, |
|
"logits/rejected": 252.28701782226562, |
|
"logps/chosen": -1.2479230165481567, |
|
"logps/rejected": -1.4433178901672363, |
|
"loss": 1.3175, |
|
"odds_ratio_loss": 0.6954701542854309, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.12479230016469955, |
|
"rewards/margins": 0.019539497792720795, |
|
"rewards/rejected": -0.14433178305625916, |
|
"sft_loss": 1.2479230165481567, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.196201252778339, |
|
"grad_norm": 3.1846110820770264, |
|
"learning_rate": 3.279227755122228e-06, |
|
"logits/chosen": 252.08438110351562, |
|
"logits/rejected": 252.65792846679688, |
|
"logps/chosen": -1.196380376815796, |
|
"logps/rejected": -1.5170973539352417, |
|
"loss": 1.2585, |
|
"odds_ratio_loss": 0.6215213537216187, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.11963804066181183, |
|
"rewards/margins": 0.032071683555841446, |
|
"rewards/rejected": -0.15170973539352417, |
|
"sft_loss": 1.196380376815796, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.2123661345726409, |
|
"grad_norm": 3.024951934814453, |
|
"learning_rate": 3.2388636154431417e-06, |
|
"logits/chosen": 253.1087646484375, |
|
"logits/rejected": 253.23635864257812, |
|
"logps/chosen": -1.3020392656326294, |
|
"logps/rejected": -1.5343925952911377, |
|
"loss": 1.3675, |
|
"odds_ratio_loss": 0.654754638671875, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.13020391762256622, |
|
"rewards/margins": 0.02323536016047001, |
|
"rewards/rejected": -0.1534392535686493, |
|
"sft_loss": 1.3020392656326294, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.2285310163669427, |
|
"grad_norm": 2.166121482849121, |
|
"learning_rate": 3.198287307206192e-06, |
|
"logits/chosen": 251.711669921875, |
|
"logits/rejected": 251.5684356689453, |
|
"logps/chosen": -1.1889938116073608, |
|
"logps/rejected": -1.4522913694381714, |
|
"loss": 1.2499, |
|
"odds_ratio_loss": 0.60938560962677, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.1188993826508522, |
|
"rewards/margins": 0.02632974646985531, |
|
"rewards/rejected": -0.14522913098335266, |
|
"sft_loss": 1.1889938116073608, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.2446958981612446, |
|
"grad_norm": 1.8584887981414795, |
|
"learning_rate": 3.157510482110856e-06, |
|
"logits/chosen": 252.8727569580078, |
|
"logits/rejected": 253.4295654296875, |
|
"logps/chosen": -1.2046940326690674, |
|
"logps/rejected": -1.360910177230835, |
|
"loss": 1.2735, |
|
"odds_ratio_loss": 0.6879505515098572, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.12046940624713898, |
|
"rewards/margins": 0.01562163233757019, |
|
"rewards/rejected": -0.13609102368354797, |
|
"sft_loss": 1.2046940326690674, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.2608607799555465, |
|
"grad_norm": 1.6219208240509033, |
|
"learning_rate": 3.116544849436077e-06, |
|
"logits/chosen": 251.80764770507812, |
|
"logits/rejected": 251.75509643554688, |
|
"logps/chosen": -1.3175479173660278, |
|
"logps/rejected": -1.6150630712509155, |
|
"loss": 1.3813, |
|
"odds_ratio_loss": 0.6378855109214783, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.1317548006772995, |
|
"rewards/margins": 0.029751509428024292, |
|
"rewards/rejected": -0.1615062952041626, |
|
"sft_loss": 1.3175479173660278, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.2770256617498483, |
|
"grad_norm": 2.1420071125030518, |
|
"learning_rate": 3.0754021726778848e-06, |
|
"logits/chosen": 252.167724609375, |
|
"logits/rejected": 251.9316864013672, |
|
"logps/chosen": -1.1495087146759033, |
|
"logps/rejected": -1.426129937171936, |
|
"loss": 1.2132, |
|
"odds_ratio_loss": 0.6372426748275757, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.11495087295770645, |
|
"rewards/margins": 0.02766209840774536, |
|
"rewards/rejected": -0.1426129937171936, |
|
"sft_loss": 1.1495087146759033, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.2931905435441502, |
|
"grad_norm": 1.3823323249816895, |
|
"learning_rate": 3.0340942661714463e-06, |
|
"logits/chosen": 252.6959686279297, |
|
"logits/rejected": 252.73464965820312, |
|
"logps/chosen": -1.2912076711654663, |
|
"logps/rejected": -1.4657213687896729, |
|
"loss": 1.3573, |
|
"odds_ratio_loss": 0.6610310673713684, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.12912078201770782, |
|
"rewards/margins": 0.017451368272304535, |
|
"rewards/rejected": -0.14657214283943176, |
|
"sft_loss": 1.2912076711654663, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.3093554253384523, |
|
"grad_norm": 3.4516756534576416, |
|
"learning_rate": 2.992632991698512e-06, |
|
"logits/chosen": 250.41928100585938, |
|
"logits/rejected": 250.66513061523438, |
|
"logps/chosen": -1.219699501991272, |
|
"logps/rejected": -1.483235239982605, |
|
"loss": 1.2828, |
|
"odds_ratio_loss": 0.6311507821083069, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.12196997553110123, |
|
"rewards/margins": 0.02635357342660427, |
|
"rewards/rejected": -0.14832353591918945, |
|
"sft_loss": 1.219699501991272, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.3255203071327541, |
|
"grad_norm": 2.465632677078247, |
|
"learning_rate": 2.9510302550812537e-06, |
|
"logits/chosen": 251.94296264648438, |
|
"logits/rejected": 252.61587524414062, |
|
"logps/chosen": -1.144325852394104, |
|
"logps/rejected": -1.4354488849639893, |
|
"loss": 1.2042, |
|
"odds_ratio_loss": 0.5983381271362305, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.11443258821964264, |
|
"rewards/margins": 0.02911229059100151, |
|
"rewards/rejected": -0.14354488253593445, |
|
"sft_loss": 1.144325852394104, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.341685188927056, |
|
"grad_norm": 3.969513416290283, |
|
"learning_rate": 2.9092980027634325e-06, |
|
"logits/chosen": 251.37832641601562, |
|
"logits/rejected": 251.625244140625, |
|
"logps/chosen": -1.1136391162872314, |
|
"logps/rejected": -1.3801125288009644, |
|
"loss": 1.1766, |
|
"odds_ratio_loss": 0.6293498277664185, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.11136390268802643, |
|
"rewards/margins": 0.026647353544831276, |
|
"rewards/rejected": -0.13801124691963196, |
|
"sft_loss": 1.1136391162872314, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.3578500707213579, |
|
"grad_norm": 1.7552839517593384, |
|
"learning_rate": 2.867448218379927e-06, |
|
"logits/chosen": 252.9868621826172, |
|
"logits/rejected": 253.2499542236328, |
|
"logps/chosen": -1.249079704284668, |
|
"logps/rejected": -1.4685295820236206, |
|
"loss": 1.3139, |
|
"odds_ratio_loss": 0.6482545733451843, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.1249079704284668, |
|
"rewards/margins": 0.02194499969482422, |
|
"rewards/rejected": -0.14685297012329102, |
|
"sft_loss": 1.249079704284668, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.3740149525156597, |
|
"grad_norm": 5.6061906814575195, |
|
"learning_rate": 2.825492919315559e-06, |
|
"logits/chosen": 252.72372436523438, |
|
"logits/rejected": 252.32168579101562, |
|
"logps/chosen": -1.2922828197479248, |
|
"logps/rejected": -1.4327681064605713, |
|
"loss": 1.3613, |
|
"odds_ratio_loss": 0.6897528767585754, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.12922829389572144, |
|
"rewards/margins": 0.01404851209372282, |
|
"rewards/rejected": -0.14327679574489594, |
|
"sft_loss": 1.2922828197479248, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.3901798343099616, |
|
"grad_norm": 2.2057290077209473, |
|
"learning_rate": 2.7834441532542482e-06, |
|
"logits/chosen": 251.51272583007812, |
|
"logits/rejected": 251.97573852539062, |
|
"logps/chosen": -1.1630654335021973, |
|
"logps/rejected": -1.4224598407745361, |
|
"loss": 1.2262, |
|
"odds_ratio_loss": 0.6317997574806213, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.11630652844905853, |
|
"rewards/margins": 0.025939440354704857, |
|
"rewards/rejected": -0.14224597811698914, |
|
"sft_loss": 1.1630654335021973, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.4063447161042635, |
|
"grad_norm": 2.0599286556243896, |
|
"learning_rate": 2.74131399471945e-06, |
|
"logits/chosen": 252.7571258544922, |
|
"logits/rejected": 253.06008911132812, |
|
"logps/chosen": -1.2314178943634033, |
|
"logps/rejected": -1.404909372329712, |
|
"loss": 1.297, |
|
"odds_ratio_loss": 0.6555390357971191, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.12314176559448242, |
|
"rewards/margins": 0.01734915003180504, |
|
"rewards/rejected": -0.14049093425273895, |
|
"sft_loss": 1.2314178943634033, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.4225095978985653, |
|
"grad_norm": 3.7026567459106445, |
|
"learning_rate": 2.6991145416068947e-06, |
|
"logits/chosen": 252.689697265625, |
|
"logits/rejected": 252.87332153320312, |
|
"logps/chosen": -1.2634754180908203, |
|
"logps/rejected": -1.376312255859375, |
|
"loss": 1.3339, |
|
"odds_ratio_loss": 0.7037913799285889, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.12634754180908203, |
|
"rewards/margins": 0.011283671483397484, |
|
"rewards/rejected": -0.13763120770454407, |
|
"sft_loss": 1.2634754180908203, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.4386744796928672, |
|
"grad_norm": 2.7741122245788574, |
|
"learning_rate": 2.6568579117106143e-06, |
|
"logits/chosen": 251.893310546875, |
|
"logits/rejected": 251.9792022705078, |
|
"logps/chosen": -1.1909462213516235, |
|
"logps/rejected": -1.444592833518982, |
|
"loss": 1.257, |
|
"odds_ratio_loss": 0.6606670618057251, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.11909462511539459, |
|
"rewards/margins": 0.025364672765135765, |
|
"rewards/rejected": -0.1444592922925949, |
|
"sft_loss": 1.1909462213516235, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.454839361487169, |
|
"grad_norm": 1.2793887853622437, |
|
"learning_rate": 2.6145562392432544e-06, |
|
"logits/chosen": 253.50723266601562, |
|
"logits/rejected": 253.42764282226562, |
|
"logps/chosen": -1.2168656587600708, |
|
"logps/rejected": -1.336360216140747, |
|
"loss": 1.2887, |
|
"odds_ratio_loss": 0.7181415557861328, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.12168655544519424, |
|
"rewards/margins": 0.01194946188479662, |
|
"rewards/rejected": -0.13363602757453918, |
|
"sft_loss": 1.2168656587600708, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.471004243281471, |
|
"grad_norm": 2.857558012008667, |
|
"learning_rate": 2.5722216713516682e-06, |
|
"logits/chosen": 252.78237915039062, |
|
"logits/rejected": 253.788330078125, |
|
"logps/chosen": -1.1416139602661133, |
|
"logps/rejected": -1.3757555484771729, |
|
"loss": 1.2043, |
|
"odds_ratio_loss": 0.6271349787712097, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.11416139453649521, |
|
"rewards/margins": 0.023414146155118942, |
|
"rewards/rejected": -0.13757555186748505, |
|
"sft_loss": 1.1416139602661133, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.4871691250757728, |
|
"grad_norm": 2.625776529312134, |
|
"learning_rate": 2.5298663646288064e-06, |
|
"logits/chosen": 253.61221313476562, |
|
"logits/rejected": 254.0120391845703, |
|
"logps/chosen": -1.1546480655670166, |
|
"logps/rejected": -1.4036109447479248, |
|
"loss": 1.2201, |
|
"odds_ratio_loss": 0.6542297601699829, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.11546480655670166, |
|
"rewards/margins": 0.024896297603845596, |
|
"rewards/rejected": -0.14036110043525696, |
|
"sft_loss": 1.1546480655670166, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.503334006870075, |
|
"grad_norm": 3.928030014038086, |
|
"learning_rate": 2.487502481622879e-06, |
|
"logits/chosen": 252.84619140625, |
|
"logits/rejected": 253.71127319335938, |
|
"logps/chosen": -1.2712576389312744, |
|
"logps/rejected": -1.42746901512146, |
|
"loss": 1.3413, |
|
"odds_ratio_loss": 0.7003083229064941, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.12712574005126953, |
|
"rewards/margins": 0.01562117226421833, |
|
"rewards/rejected": -0.1427469402551651, |
|
"sft_loss": 1.2712576389312744, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.5194988886643768, |
|
"grad_norm": 2.4900426864624023, |
|
"learning_rate": 2.4451421873448253e-06, |
|
"logits/chosen": 252.51846313476562, |
|
"logits/rejected": 253.07400512695312, |
|
"logps/chosen": -1.193199634552002, |
|
"logps/rejected": -1.3677222728729248, |
|
"loss": 1.2601, |
|
"odds_ratio_loss": 0.6688076257705688, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.11931997537612915, |
|
"rewards/margins": 0.01745227724313736, |
|
"rewards/rejected": -0.1367722451686859, |
|
"sft_loss": 1.193199634552002, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.5356637704586786, |
|
"grad_norm": 6.85699987411499, |
|
"learning_rate": 2.40279764577506e-06, |
|
"logits/chosen": 253.85693359375, |
|
"logits/rejected": 253.9010467529297, |
|
"logps/chosen": -1.304840087890625, |
|
"logps/rejected": -1.417873501777649, |
|
"loss": 1.3741, |
|
"odds_ratio_loss": 0.6923686861991882, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.13048401474952698, |
|
"rewards/margins": 0.011303339153528214, |
|
"rewards/rejected": -0.1417873501777649, |
|
"sft_loss": 1.304840087890625, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.5518286522529805, |
|
"grad_norm": 2.3570547103881836, |
|
"learning_rate": 2.3604810163705242e-06, |
|
"logits/chosen": 253.90060424804688, |
|
"logits/rejected": 254.25430297851562, |
|
"logps/chosen": -1.1358963251113892, |
|
"logps/rejected": -1.3512394428253174, |
|
"loss": 1.1966, |
|
"odds_ratio_loss": 0.6068128943443298, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.11358962953090668, |
|
"rewards/margins": 0.021534323692321777, |
|
"rewards/rejected": -0.13512396812438965, |
|
"sft_loss": 1.1358963251113892, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.5679935340472824, |
|
"grad_norm": 1.6715513467788696, |
|
"learning_rate": 2.3182044505730364e-06, |
|
"logits/chosen": 252.765380859375, |
|
"logits/rejected": 252.7443389892578, |
|
"logps/chosen": -1.0937732458114624, |
|
"logps/rejected": -1.302191972732544, |
|
"loss": 1.1567, |
|
"odds_ratio_loss": 0.6288636922836304, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.10937733948230743, |
|
"rewards/margins": 0.020841870456933975, |
|
"rewards/rejected": -0.13021919131278992, |
|
"sft_loss": 1.0937732458114624, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.5841584158415842, |
|
"grad_norm": 1.8489584922790527, |
|
"learning_rate": 2.275980088319941e-06, |
|
"logits/chosen": 253.30712890625, |
|
"logits/rejected": 253.5155487060547, |
|
"logps/chosen": -1.149460792541504, |
|
"logps/rejected": -1.2745110988616943, |
|
"loss": 1.2198, |
|
"odds_ratio_loss": 0.7036079168319702, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.11494608223438263, |
|
"rewards/margins": 0.012505029328167439, |
|
"rewards/rejected": -0.1274511069059372, |
|
"sft_loss": 1.149460792541504, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.600323297635886, |
|
"grad_norm": 2.3143341541290283, |
|
"learning_rate": 2.2338200545580577e-06, |
|
"logits/chosen": 253.9146728515625, |
|
"logits/rejected": 254.3609619140625, |
|
"logps/chosen": -1.1358720064163208, |
|
"logps/rejected": -1.409860372543335, |
|
"loss": 1.203, |
|
"odds_ratio_loss": 0.6715231537818909, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.11358718574047089, |
|
"rewards/margins": 0.02739885076880455, |
|
"rewards/rejected": -0.14098605513572693, |
|
"sft_loss": 1.1358720064163208, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.616488179430188, |
|
"grad_norm": 2.5078933238983154, |
|
"learning_rate": 2.191736455761947e-06, |
|
"logits/chosen": 252.4419708251953, |
|
"logits/rejected": 252.6824493408203, |
|
"logps/chosen": -1.102782964706421, |
|
"logps/rejected": -1.295693039894104, |
|
"loss": 1.1628, |
|
"odds_ratio_loss": 0.5999386310577393, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.11027830839157104, |
|
"rewards/margins": 0.019290992990136147, |
|
"rewards/rejected": -0.12956929206848145, |
|
"sft_loss": 1.102782964706421, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.616488179430188, |
|
"eval_logits/chosen": 252.82716369628906, |
|
"eval_logits/rejected": 253.18104553222656, |
|
"eval_logps/chosen": -1.2153432369232178, |
|
"eval_logps/rejected": -1.446128010749817, |
|
"eval_loss": 1.2833058834075928, |
|
"eval_odds_ratio_loss": 0.6796271204948425, |
|
"eval_rewards/accuracies": 0.5618181824684143, |
|
"eval_rewards/chosen": -0.1215343102812767, |
|
"eval_rewards/margins": 0.023078490048646927, |
|
"eval_rewards/rejected": -0.14461281895637512, |
|
"eval_runtime": 221.4361, |
|
"eval_samples_per_second": 4.968, |
|
"eval_sft_loss": 1.2153432369232178, |
|
"eval_steps_per_second": 2.484, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.6326530612244898, |
|
"grad_norm": 1.7511672973632812, |
|
"learning_rate": 2.1497413764574673e-06, |
|
"logits/chosen": 253.8401336669922, |
|
"logits/rejected": 253.7457733154297, |
|
"logps/chosen": -1.2121939659118652, |
|
"logps/rejected": -1.4931201934814453, |
|
"loss": 1.2703, |
|
"odds_ratio_loss": 0.5808267593383789, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.12121939659118652, |
|
"rewards/margins": 0.028092628344893456, |
|
"rewards/rejected": -0.14931201934814453, |
|
"sft_loss": 1.2121939659118652, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.6488179430187917, |
|
"grad_norm": 2.1624321937561035, |
|
"learning_rate": 2.1078468757516395e-06, |
|
"logits/chosen": 252.7372589111328, |
|
"logits/rejected": 253.10342407226562, |
|
"logps/chosen": -1.1226885318756104, |
|
"logps/rejected": -1.302170991897583, |
|
"loss": 1.1845, |
|
"odds_ratio_loss": 0.6178861856460571, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.11226886510848999, |
|
"rewards/margins": 0.017948249354958534, |
|
"rewards/rejected": -0.13021712005138397, |
|
"sft_loss": 1.1226885318756104, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.6649828248130936, |
|
"grad_norm": 2.5826563835144043, |
|
"learning_rate": 2.0660649838698145e-06, |
|
"logits/chosen": 255.34326171875, |
|
"logits/rejected": 255.65859985351562, |
|
"logps/chosen": -1.1558864116668701, |
|
"logps/rejected": -1.3295384645462036, |
|
"loss": 1.2211, |
|
"odds_ratio_loss": 0.6525439023971558, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.11558864265680313, |
|
"rewards/margins": 0.01736520044505596, |
|
"rewards/rejected": -0.13295385241508484, |
|
"sft_loss": 1.1558864116668701, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.6811477066073954, |
|
"grad_norm": 1.975549340248108, |
|
"learning_rate": 2.0244076987011284e-06, |
|
"logits/chosen": 255.1981964111328, |
|
"logits/rejected": 255.7158966064453, |
|
"logps/chosen": -1.2127221822738647, |
|
"logps/rejected": -1.4685566425323486, |
|
"loss": 1.2727, |
|
"odds_ratio_loss": 0.6000550389289856, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.1212722510099411, |
|
"rewards/margins": 0.025583425536751747, |
|
"rewards/rejected": -0.1468556672334671, |
|
"sft_loss": 1.2127221822738647, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.6973125884016973, |
|
"grad_norm": 2.224191904067993, |
|
"learning_rate": 1.982886982353251e-06, |
|
"logits/chosen": 252.6818389892578, |
|
"logits/rejected": 252.80859375, |
|
"logps/chosen": -1.193681240081787, |
|
"logps/rejected": -1.44672691822052, |
|
"loss": 1.2608, |
|
"odds_ratio_loss": 0.6715336441993713, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.11936812102794647, |
|
"rewards/margins": 0.025304565206170082, |
|
"rewards/rejected": -0.144672691822052, |
|
"sft_loss": 1.193681240081787, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.7134774701959992, |
|
"grad_norm": 2.571403980255127, |
|
"learning_rate": 1.941514757717392e-06, |
|
"logits/chosen": 253.2911376953125, |
|
"logits/rejected": 254.0371551513672, |
|
"logps/chosen": -1.2021260261535645, |
|
"logps/rejected": -1.443331003189087, |
|
"loss": 1.2653, |
|
"odds_ratio_loss": 0.6321113705635071, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.1202125996351242, |
|
"rewards/margins": 0.02412049099802971, |
|
"rewards/rejected": -0.1443330943584442, |
|
"sft_loss": 1.2021260261535645, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.729642351990301, |
|
"grad_norm": 4.061903476715088, |
|
"learning_rate": 1.9003029050445953e-06, |
|
"logits/chosen": 254.00650024414062, |
|
"logits/rejected": 254.38876342773438, |
|
"logps/chosen": -1.2242114543914795, |
|
"logps/rejected": -1.4163745641708374, |
|
"loss": 1.2891, |
|
"odds_ratio_loss": 0.6486276984214783, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.12242114543914795, |
|
"rewards/margins": 0.019216306507587433, |
|
"rewards/rejected": -0.14163745939731598, |
|
"sft_loss": 1.2242114543914795, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.745807233784603, |
|
"grad_norm": 2.371570110321045, |
|
"learning_rate": 1.8592632585342523e-06, |
|
"logits/chosen": 254.29745483398438, |
|
"logits/rejected": 254.67745971679688, |
|
"logps/chosen": -1.1612073183059692, |
|
"logps/rejected": -1.4247183799743652, |
|
"loss": 1.2246, |
|
"odds_ratio_loss": 0.6341406106948853, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.11612071841955185, |
|
"rewards/margins": 0.026351114735007286, |
|
"rewards/rejected": -0.1424718201160431, |
|
"sft_loss": 1.1612073183059692, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.7619721155789048, |
|
"grad_norm": 8.819137573242188, |
|
"learning_rate": 1.8184076029358527e-06, |
|
"logits/chosen": 253.06661987304688, |
|
"logits/rejected": 252.38119506835938, |
|
"logps/chosen": -1.161278486251831, |
|
"logps/rejected": -1.2557886838912964, |
|
"loss": 1.2286, |
|
"odds_ratio_loss": 0.6734786033630371, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.1161278635263443, |
|
"rewards/margins": 0.009451002813875675, |
|
"rewards/rejected": -0.1255788505077362, |
|
"sft_loss": 1.161278486251831, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.7781369973732066, |
|
"grad_norm": 1.7281618118286133, |
|
"learning_rate": 1.7777476701649318e-06, |
|
"logits/chosen": 251.3661651611328, |
|
"logits/rejected": 252.11477661132812, |
|
"logps/chosen": -1.1861474514007568, |
|
"logps/rejected": -1.3936630487442017, |
|
"loss": 1.2518, |
|
"odds_ratio_loss": 0.6561599373817444, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.11861474812030792, |
|
"rewards/margins": 0.020751552656292915, |
|
"rewards/rejected": -0.1393662989139557, |
|
"sft_loss": 1.1861474514007568, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.7943018791675085, |
|
"grad_norm": 3.3538103103637695, |
|
"learning_rate": 1.7372951359341925e-06, |
|
"logits/chosen": 253.0167236328125, |
|
"logits/rejected": 253.53396606445312, |
|
"logps/chosen": -1.137481451034546, |
|
"logps/rejected": -1.2894176244735718, |
|
"loss": 1.2055, |
|
"odds_ratio_loss": 0.6805364489555359, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.11374815553426743, |
|
"rewards/margins": 0.01519359927624464, |
|
"rewards/rejected": -0.12894175946712494, |
|
"sft_loss": 1.137481451034546, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.8104667609618104, |
|
"grad_norm": 3.6225833892822266, |
|
"learning_rate": 1.6970616164007547e-06, |
|
"logits/chosen": 252.6470489501953, |
|
"logits/rejected": 252.9353485107422, |
|
"logps/chosen": -1.1084340810775757, |
|
"logps/rejected": -1.3413022756576538, |
|
"loss": 1.1728, |
|
"odds_ratio_loss": 0.6432042717933655, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.11084340512752533, |
|
"rewards/margins": 0.02328682318329811, |
|
"rewards/rejected": -0.13413023948669434, |
|
"sft_loss": 1.1084340810775757, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.8266316427561122, |
|
"grad_norm": 4.332692623138428, |
|
"learning_rate": 1.6570586648305276e-06, |
|
"logits/chosen": 253.6255645751953, |
|
"logits/rejected": 253.76217651367188, |
|
"logps/chosen": -1.1925103664398193, |
|
"logps/rejected": -1.4342319965362549, |
|
"loss": 1.2579, |
|
"odds_ratio_loss": 0.6541949510574341, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.11925105005502701, |
|
"rewards/margins": 0.024172160774469376, |
|
"rewards/rejected": -0.14342321455478668, |
|
"sft_loss": 1.1925103664398193, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.842796524550414, |
|
"grad_norm": 3.238105535507202, |
|
"learning_rate": 1.6172977682806151e-06, |
|
"logits/chosen": 253.7568817138672, |
|
"logits/rejected": 254.8863525390625, |
|
"logps/chosen": -1.2200841903686523, |
|
"logps/rejected": -1.4592787027359009, |
|
"loss": 1.2837, |
|
"odds_ratio_loss": 0.6365170478820801, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.12200842052698135, |
|
"rewards/margins": 0.023919429630041122, |
|
"rewards/rejected": -0.14592786133289337, |
|
"sft_loss": 1.2200841903686523, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.858961406344716, |
|
"grad_norm": 2.5219290256500244, |
|
"learning_rate": 1.5777903443007586e-06, |
|
"logits/chosen": 253.631103515625, |
|
"logits/rejected": 253.82504272460938, |
|
"logps/chosen": -1.235215425491333, |
|
"logps/rejected": -1.4535129070281982, |
|
"loss": 1.3023, |
|
"odds_ratio_loss": 0.6708552241325378, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.12352155148983002, |
|
"rewards/margins": 0.021829739212989807, |
|
"rewards/rejected": -0.14535130560398102, |
|
"sft_loss": 1.235215425491333, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.8751262881390178, |
|
"grad_norm": 3.190958261489868, |
|
"learning_rate": 1.5385477376547226e-06, |
|
"logits/chosen": 255.1521759033203, |
|
"logits/rejected": 255.2525634765625, |
|
"logps/chosen": -1.229001760482788, |
|
"logps/rejected": -1.4793845415115356, |
|
"loss": 1.2891, |
|
"odds_ratio_loss": 0.601111888885498, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.12290020287036896, |
|
"rewards/margins": 0.02503824792802334, |
|
"rewards/rejected": -0.14793843030929565, |
|
"sft_loss": 1.229001760482788, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.89129116993332, |
|
"grad_norm": 2.217510461807251, |
|
"learning_rate": 1.4995812170625845e-06, |
|
"logits/chosen": 253.1751251220703, |
|
"logits/rejected": 253.72238159179688, |
|
"logps/chosen": -1.2252581119537354, |
|
"logps/rejected": -1.5921032428741455, |
|
"loss": 1.2851, |
|
"odds_ratio_loss": 0.598137617111206, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.12252581119537354, |
|
"rewards/margins": 0.036684513092041016, |
|
"rewards/rejected": -0.15921030938625336, |
|
"sft_loss": 1.2252581119537354, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.9074560517276218, |
|
"grad_norm": 3.0452287197113037, |
|
"learning_rate": 1.4609019719648666e-06, |
|
"logits/chosen": 254.07901000976562, |
|
"logits/rejected": 254.59957885742188, |
|
"logps/chosen": -1.2207356691360474, |
|
"logps/rejected": -1.4706141948699951, |
|
"loss": 1.2826, |
|
"odds_ratio_loss": 0.6183902025222778, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.12207356840372086, |
|
"rewards/margins": 0.024987850338220596, |
|
"rewards/rejected": -0.14706142246723175, |
|
"sft_loss": 1.2207356691360474, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.9236209335219236, |
|
"grad_norm": 4.679479122161865, |
|
"learning_rate": 1.42252110930943e-06, |
|
"logits/chosen": 252.7305450439453, |
|
"logits/rejected": 252.6374969482422, |
|
"logps/chosen": -1.064835786819458, |
|
"logps/rejected": -1.2910759449005127, |
|
"loss": 1.1283, |
|
"odds_ratio_loss": 0.6346200704574585, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.1064835786819458, |
|
"rewards/margins": 0.02262401580810547, |
|
"rewards/rejected": -0.12910759449005127, |
|
"sft_loss": 1.064835786819458, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.9397858153162255, |
|
"grad_norm": 3.286461353302002, |
|
"learning_rate": 1.3844496503620493e-06, |
|
"logits/chosen": 253.310302734375, |
|
"logits/rejected": 253.27023315429688, |
|
"logps/chosen": -1.2112998962402344, |
|
"logps/rejected": -1.3967139720916748, |
|
"loss": 1.2737, |
|
"odds_ratio_loss": 0.6242542862892151, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.12112998962402344, |
|
"rewards/margins": 0.01854141615331173, |
|
"rewards/rejected": -0.1396714150905609, |
|
"sft_loss": 1.2112998962402344, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.9559506971105274, |
|
"grad_norm": 2.8077545166015625, |
|
"learning_rate": 1.3466985275416081e-06, |
|
"logits/chosen": 254.2769775390625, |
|
"logits/rejected": 254.47360229492188, |
|
"logps/chosen": -1.2563018798828125, |
|
"logps/rejected": -1.4514508247375488, |
|
"loss": 1.3239, |
|
"odds_ratio_loss": 0.6757391691207886, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.1256301999092102, |
|
"rewards/margins": 0.019514882937073708, |
|
"rewards/rejected": -0.14514507353305817, |
|
"sft_loss": 1.2563018798828125, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.9721155789048292, |
|
"grad_norm": 2.275397777557373, |
|
"learning_rate": 1.309278581280791e-06, |
|
"logits/chosen": 253.5750274658203, |
|
"logits/rejected": 253.99935913085938, |
|
"logps/chosen": -1.1356334686279297, |
|
"logps/rejected": -1.4314597845077515, |
|
"loss": 1.1934, |
|
"odds_ratio_loss": 0.5772610902786255, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.11356334388256073, |
|
"rewards/margins": 0.029582645744085312, |
|
"rewards/rejected": -0.14314597845077515, |
|
"sft_loss": 1.1356334686279297, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.9882804606991311, |
|
"grad_norm": 1.454276204109192, |
|
"learning_rate": 1.272200556913199e-06, |
|
"logits/chosen": 254.544677734375, |
|
"logits/rejected": 254.67251586914062, |
|
"logps/chosen": -1.1884077787399292, |
|
"logps/rejected": -1.395115613937378, |
|
"loss": 1.2599, |
|
"odds_ratio_loss": 0.7147720456123352, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.1188407689332962, |
|
"rewards/margins": 0.020670795813202858, |
|
"rewards/rejected": -0.1395115852355957, |
|
"sft_loss": 1.1884077787399292, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 2.004445342493433, |
|
"grad_norm": 3.6475422382354736, |
|
"learning_rate": 1.2354751015877698e-06, |
|
"logits/chosen": 252.74777221679688, |
|
"logits/rejected": 253.66641235351562, |
|
"logps/chosen": -1.1167339086532593, |
|
"logps/rejected": -1.4450454711914062, |
|
"loss": 1.1791, |
|
"odds_ratio_loss": 0.6237870454788208, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.11167339980602264, |
|
"rewards/margins": 0.03283114731311798, |
|
"rewards/rejected": -0.14450454711914062, |
|
"sft_loss": 1.1167339086532593, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 2.020610224287735, |
|
"grad_norm": 3.52698016166687, |
|
"learning_rate": 1.1991127612113945e-06, |
|
"logits/chosen": 254.6741943359375, |
|
"logits/rejected": 254.9825897216797, |
|
"logps/chosen": -1.1792643070220947, |
|
"logps/rejected": -1.4326034784317017, |
|
"loss": 1.2387, |
|
"odds_ratio_loss": 0.5942111611366272, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.11792641878128052, |
|
"rewards/margins": 0.02533392235636711, |
|
"rewards/rejected": -0.14326035976409912, |
|
"sft_loss": 1.1792643070220947, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.036775106082037, |
|
"grad_norm": 3.579160690307617, |
|
"learning_rate": 1.1631239774206035e-06, |
|
"logits/chosen": 253.5153350830078, |
|
"logits/rejected": 253.659912109375, |
|
"logps/chosen": -1.1673438549041748, |
|
"logps/rejected": -1.4458467960357666, |
|
"loss": 1.2314, |
|
"odds_ratio_loss": 0.6410170793533325, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.11673440039157867, |
|
"rewards/margins": 0.027850273996591568, |
|
"rewards/rejected": -0.14458468556404114, |
|
"sft_loss": 1.1673438549041748, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 2.052939987876339, |
|
"grad_norm": 3.0812463760375977, |
|
"learning_rate": 1.1275190845831978e-06, |
|
"logits/chosen": 254.5985870361328, |
|
"logits/rejected": 254.2525177001953, |
|
"logps/chosen": -1.1342524290084839, |
|
"logps/rejected": -1.3948237895965576, |
|
"loss": 1.1925, |
|
"odds_ratio_loss": 0.5824798345565796, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.11342523247003555, |
|
"rewards/margins": 0.026057133451104164, |
|
"rewards/rejected": -0.13948237895965576, |
|
"sft_loss": 1.1342524290084839, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 2.0691048696706407, |
|
"grad_norm": 2.4549710750579834, |
|
"learning_rate": 1.0923083068306778e-06, |
|
"logits/chosen": 254.7982635498047, |
|
"logits/rejected": 255.1488494873047, |
|
"logps/chosen": -1.1482160091400146, |
|
"logps/rejected": -1.4850049018859863, |
|
"loss": 1.2055, |
|
"odds_ratio_loss": 0.5724589824676514, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.11482159048318863, |
|
"rewards/margins": 0.033678896725177765, |
|
"rewards/rejected": -0.1485004872083664, |
|
"sft_loss": 1.1482160091400146, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 2.0852697514649425, |
|
"grad_norm": 1.778605580329895, |
|
"learning_rate": 1.0575017551223348e-06, |
|
"logits/chosen": 253.03524780273438, |
|
"logits/rejected": 253.53366088867188, |
|
"logps/chosen": -1.087461233139038, |
|
"logps/rejected": -1.321656584739685, |
|
"loss": 1.1524, |
|
"odds_ratio_loss": 0.6496065855026245, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.10874611139297485, |
|
"rewards/margins": 0.023419544100761414, |
|
"rewards/rejected": -0.13216565549373627, |
|
"sft_loss": 1.087461233139038, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 2.1014346332592444, |
|
"grad_norm": 1.522445797920227, |
|
"learning_rate": 1.023109424341833e-06, |
|
"logits/chosen": 254.5054168701172, |
|
"logits/rejected": 255.06100463867188, |
|
"logps/chosen": -1.2142359018325806, |
|
"logps/rejected": -1.448194146156311, |
|
"loss": 1.2781, |
|
"odds_ratio_loss": 0.6386287808418274, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.12142357975244522, |
|
"rewards/margins": 0.023395827040076256, |
|
"rewards/rejected": -0.14481940865516663, |
|
"sft_loss": 1.2142359018325806, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.1175995150535463, |
|
"grad_norm": 2.577580690383911, |
|
"learning_rate": 9.891411904271273e-07, |
|
"logits/chosen": 254.14779663085938, |
|
"logits/rejected": 254.121826171875, |
|
"logps/chosen": -1.100303292274475, |
|
"logps/rejected": -1.3276548385620117, |
|
"loss": 1.1632, |
|
"odds_ratio_loss": 0.628852128982544, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.11003033071756363, |
|
"rewards/margins": 0.02273516170680523, |
|
"rewards/rejected": -0.1327655017375946, |
|
"sft_loss": 1.100303292274475, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 2.133764396847848, |
|
"grad_norm": 1.5676363706588745, |
|
"learning_rate": 9.556068075345363e-07, |
|
"logits/chosen": 255.0603485107422, |
|
"logits/rejected": 255.1541748046875, |
|
"logps/chosen": -1.1494947671890259, |
|
"logps/rejected": -1.3283547163009644, |
|
"loss": 1.2112, |
|
"odds_ratio_loss": 0.6172733306884766, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.11494947969913483, |
|
"rewards/margins": 0.017885997891426086, |
|
"rewards/rejected": -0.1328354775905609, |
|
"sft_loss": 1.1494947671890259, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 2.14992927864215, |
|
"grad_norm": 1.964956521987915, |
|
"learning_rate": 9.225159052377838e-07, |
|
"logits/chosen": 254.16183471679688, |
|
"logits/rejected": 254.3532257080078, |
|
"logps/chosen": -1.1823852062225342, |
|
"logps/rejected": -1.4285701513290405, |
|
"loss": 1.2468, |
|
"odds_ratio_loss": 0.6443654894828796, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.11823852360248566, |
|
"rewards/margins": 0.024618491530418396, |
|
"rewards/rejected": -0.14285701513290405, |
|
"sft_loss": 1.1823852062225342, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 2.166094160436452, |
|
"grad_norm": 4.320827484130859, |
|
"learning_rate": 8.898779857628184e-07, |
|
"logits/chosen": 253.94775390625, |
|
"logits/rejected": 253.83328247070312, |
|
"logps/chosen": -1.0813744068145752, |
|
"logps/rejected": -1.289052963256836, |
|
"loss": 1.1442, |
|
"odds_ratio_loss": 0.6285432577133179, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.10813745111227036, |
|
"rewards/margins": 0.020767847076058388, |
|
"rewards/rejected": -0.1289052963256836, |
|
"sft_loss": 1.0813744068145752, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 2.1822590422307537, |
|
"grad_norm": 1.9166721105575562, |
|
"learning_rate": 8.577024212591975e-07, |
|
"logits/chosen": 255.42715454101562, |
|
"logits/rejected": 255.6570281982422, |
|
"logps/chosen": -1.2112232446670532, |
|
"logps/rejected": -1.4022705554962158, |
|
"loss": 1.2754, |
|
"odds_ratio_loss": 0.6421025991439819, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.1211223155260086, |
|
"rewards/margins": 0.019104719161987305, |
|
"rewards/rejected": -0.1402270495891571, |
|
"sft_loss": 1.2112232446670532, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.1984239240250556, |
|
"grad_norm": 2.231593370437622, |
|
"learning_rate": 8.259984511088276e-07, |
|
"logits/chosen": 252.95217895507812, |
|
"logits/rejected": 253.24972534179688, |
|
"logps/chosen": -1.1978521347045898, |
|
"logps/rejected": -1.4085915088653564, |
|
"loss": 1.2643, |
|
"odds_ratio_loss": 0.6643570065498352, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.11978521198034286, |
|
"rewards/margins": 0.021073944866657257, |
|
"rewards/rejected": -0.14085917174816132, |
|
"sft_loss": 1.1978521347045898, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 2.2145888058193575, |
|
"grad_norm": 1.891438364982605, |
|
"learning_rate": 7.947751792728237e-07, |
|
"logits/chosen": 252.89163208007812, |
|
"logits/rejected": 252.9368133544922, |
|
"logps/chosen": -1.1386100053787231, |
|
"logps/rejected": -1.3931357860565186, |
|
"loss": 1.2001, |
|
"odds_ratio_loss": 0.6149393320083618, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.11386100947856903, |
|
"rewards/margins": 0.02545258030295372, |
|
"rewards/rejected": -0.13931360840797424, |
|
"sft_loss": 1.1386100053787231, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 2.2307536876136593, |
|
"grad_norm": 11.893668174743652, |
|
"learning_rate": 7.640415716772626e-07, |
|
"logits/chosen": 254.87893676757812, |
|
"logits/rejected": 254.9901123046875, |
|
"logps/chosen": -1.2301312685012817, |
|
"logps/rejected": -1.4715359210968018, |
|
"loss": 1.2969, |
|
"odds_ratio_loss": 0.6676316857337952, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.12301311641931534, |
|
"rewards/margins": 0.02414046786725521, |
|
"rewards/rejected": -0.1471536010503769, |
|
"sft_loss": 1.2301312685012817, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 2.246918569407961, |
|
"grad_norm": 1.648759365081787, |
|
"learning_rate": 7.338064536385722e-07, |
|
"logits/chosen": 253.27536010742188, |
|
"logits/rejected": 253.39450073242188, |
|
"logps/chosen": -1.172890543937683, |
|
"logps/rejected": -1.4573774337768555, |
|
"loss": 1.2306, |
|
"odds_ratio_loss": 0.5775946974754333, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.11728904396295547, |
|
"rewards/margins": 0.02844870649278164, |
|
"rewards/rejected": -0.14573773741722107, |
|
"sft_loss": 1.172890543937683, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 2.263083451202263, |
|
"grad_norm": 2.644590377807617, |
|
"learning_rate": 7.040785073292883e-07, |
|
"logits/chosen": 254.41006469726562, |
|
"logits/rejected": 254.5663299560547, |
|
"logps/chosen": -1.243436336517334, |
|
"logps/rejected": -1.455594778060913, |
|
"loss": 1.3115, |
|
"odds_ratio_loss": 0.6802859902381897, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.12434364855289459, |
|
"rewards/margins": 0.021215861663222313, |
|
"rewards/rejected": -0.14555948972702026, |
|
"sft_loss": 1.243436336517334, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.279248332996565, |
|
"grad_norm": 3.2420878410339355, |
|
"learning_rate": 6.748662692849297e-07, |
|
"logits/chosen": 253.18417358398438, |
|
"logits/rejected": 254.11279296875, |
|
"logps/chosen": -1.1471028327941895, |
|
"logps/rejected": -1.5119264125823975, |
|
"loss": 1.2055, |
|
"odds_ratio_loss": 0.5840214490890503, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.11471028625965118, |
|
"rewards/margins": 0.03648235648870468, |
|
"rewards/rejected": -0.15119265019893646, |
|
"sft_loss": 1.1471028327941895, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 2.295413214790867, |
|
"grad_norm": 4.394900798797607, |
|
"learning_rate": 6.46178127952686e-07, |
|
"logits/chosen": 254.48062133789062, |
|
"logits/rejected": 254.9115753173828, |
|
"logps/chosen": -1.1684550046920776, |
|
"logps/rejected": -1.38850998878479, |
|
"loss": 1.2286, |
|
"odds_ratio_loss": 0.601581871509552, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.1168455109000206, |
|
"rewards/margins": 0.022005509585142136, |
|
"rewards/rejected": -0.13885101675987244, |
|
"sft_loss": 1.1684550046920776, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 2.3115780965851687, |
|
"grad_norm": 2.162309169769287, |
|
"learning_rate": 6.180223212826289e-07, |
|
"logits/chosen": 253.58633422851562, |
|
"logits/rejected": 253.8734588623047, |
|
"logps/chosen": -1.1496318578720093, |
|
"logps/rejected": -1.364654779434204, |
|
"loss": 1.213, |
|
"odds_ratio_loss": 0.633824348449707, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.11496319621801376, |
|
"rewards/margins": 0.021502288058400154, |
|
"rewards/rejected": -0.13646547496318817, |
|
"sft_loss": 1.1496318578720093, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 2.3277429783794705, |
|
"grad_norm": 1.522935152053833, |
|
"learning_rate": 5.904069343621443e-07, |
|
"logits/chosen": 255.19082641601562, |
|
"logits/rejected": 255.11474609375, |
|
"logps/chosen": -1.1330249309539795, |
|
"logps/rejected": -1.386264443397522, |
|
"loss": 1.195, |
|
"odds_ratio_loss": 0.6193984746932983, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.11330248415470123, |
|
"rewards/margins": 0.025323981419205666, |
|
"rewards/rejected": -0.13862647116184235, |
|
"sft_loss": 1.1330249309539795, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 2.3439078601737724, |
|
"grad_norm": 2.982042074203491, |
|
"learning_rate": 5.633398970942544e-07, |
|
"logits/chosen": 254.9903564453125, |
|
"logits/rejected": 255.1248321533203, |
|
"logps/chosen": -1.1471365690231323, |
|
"logps/rejected": -1.3323371410369873, |
|
"loss": 1.2137, |
|
"odds_ratio_loss": 0.6657688617706299, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.114713653922081, |
|
"rewards/margins": 0.01852005161345005, |
|
"rewards/rejected": -0.1332337111234665, |
|
"sft_loss": 1.1471365690231323, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 2.3600727419680743, |
|
"grad_norm": 3.2461607456207275, |
|
"learning_rate": 5.368289819205069e-07, |
|
"logits/chosen": 254.27847290039062, |
|
"logits/rejected": 255.0779571533203, |
|
"logps/chosen": -1.11297607421875, |
|
"logps/rejected": -1.3122992515563965, |
|
"loss": 1.1805, |
|
"odds_ratio_loss": 0.6752298474311829, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.1112975925207138, |
|
"rewards/margins": 0.019932324066758156, |
|
"rewards/rejected": -0.1312299221754074, |
|
"sft_loss": 1.11297607421875, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 2.376237623762376, |
|
"grad_norm": 2.7223591804504395, |
|
"learning_rate": 5.108818015890785e-07, |
|
"logits/chosen": 255.47216796875, |
|
"logits/rejected": 255.6534423828125, |
|
"logps/chosen": -1.2367959022521973, |
|
"logps/rejected": -1.4369171857833862, |
|
"loss": 1.3029, |
|
"odds_ratio_loss": 0.6609222888946533, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.12367959320545197, |
|
"rewards/margins": 0.020012129098176956, |
|
"rewards/rejected": -0.14369171857833862, |
|
"sft_loss": 1.2367959022521973, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 2.392402505556678, |
|
"grad_norm": 2.912327527999878, |
|
"learning_rate": 4.855058069687291e-07, |
|
"logits/chosen": 253.0759735107422, |
|
"logits/rejected": 253.6752471923828, |
|
"logps/chosen": -1.111169695854187, |
|
"logps/rejected": -1.4328919649124146, |
|
"loss": 1.1697, |
|
"odds_ratio_loss": 0.5851289629936218, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.11111698299646378, |
|
"rewards/margins": 0.03217221051454544, |
|
"rewards/rejected": -0.1432892084121704, |
|
"sft_loss": 1.111169695854187, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 2.40856738735098, |
|
"grad_norm": 2.995020627975464, |
|
"learning_rate": 4.607082849092523e-07, |
|
"logits/chosen": 253.9425811767578, |
|
"logits/rejected": 254.0526580810547, |
|
"logps/chosen": -1.2607060670852661, |
|
"logps/rejected": -1.4026780128479004, |
|
"loss": 1.3291, |
|
"odds_ratio_loss": 0.6835006475448608, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.12607058882713318, |
|
"rewards/margins": 0.014197212643921375, |
|
"rewards/rejected": -0.14026781916618347, |
|
"sft_loss": 1.2607060670852661, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 2.4247322691452817, |
|
"grad_norm": 3.760835886001587, |
|
"learning_rate": 4.3649635614901405e-07, |
|
"logits/chosen": 254.07601928710938, |
|
"logits/rejected": 254.50985717773438, |
|
"logps/chosen": -1.1233417987823486, |
|
"logps/rejected": -1.2859314680099487, |
|
"loss": 1.1874, |
|
"odds_ratio_loss": 0.6403074860572815, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.11233416944742203, |
|
"rewards/margins": 0.01625899039208889, |
|
"rewards/rejected": -0.12859316170215607, |
|
"sft_loss": 1.1233417987823486, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.4247322691452817, |
|
"eval_logits/chosen": 253.6037139892578, |
|
"eval_logits/rejected": 253.95994567871094, |
|
"eval_logps/chosen": -1.1982638835906982, |
|
"eval_logps/rejected": -1.4377323389053345, |
|
"eval_loss": 1.265723466873169, |
|
"eval_odds_ratio_loss": 0.6745957732200623, |
|
"eval_rewards/accuracies": 0.5699999928474426, |
|
"eval_rewards/chosen": -0.11982638388872147, |
|
"eval_rewards/margins": 0.023946860805153847, |
|
"eval_rewards/rejected": -0.14377322793006897, |
|
"eval_runtime": 221.0804, |
|
"eval_samples_per_second": 4.976, |
|
"eval_sft_loss": 1.1982638835906982, |
|
"eval_steps_per_second": 2.488, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.4408971509395836, |
|
"grad_norm": 2.074381113052368, |
|
"learning_rate": 4.128769732701973e-07, |
|
"logits/chosen": 254.7397918701172, |
|
"logits/rejected": 254.7943572998047, |
|
"logps/chosen": -1.1791332960128784, |
|
"logps/rejected": -1.4214551448822021, |
|
"loss": 1.2445, |
|
"odds_ratio_loss": 0.6540807485580444, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.11791334301233292, |
|
"rewards/margins": 0.02423218823969364, |
|
"rewards/rejected": -0.14214551448822021, |
|
"sft_loss": 1.1791332960128784, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 2.4570620327338855, |
|
"grad_norm": 3.3741965293884277, |
|
"learning_rate": 3.8985691870233046e-07, |
|
"logits/chosen": 254.44534301757812, |
|
"logits/rejected": 254.93307495117188, |
|
"logps/chosen": -1.209789514541626, |
|
"logps/rejected": -1.4953523874282837, |
|
"loss": 1.2744, |
|
"odds_ratio_loss": 0.645904004573822, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.1209789514541626, |
|
"rewards/margins": 0.02855629101395607, |
|
"rewards/rejected": -0.14953525364398956, |
|
"sft_loss": 1.209789514541626, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 2.4732269145281873, |
|
"grad_norm": 4.19984245300293, |
|
"learning_rate": 3.6744280277467904e-07, |
|
"logits/chosen": 253.19186401367188, |
|
"logits/rejected": 253.959228515625, |
|
"logps/chosen": -1.1898527145385742, |
|
"logps/rejected": -1.418869972229004, |
|
"loss": 1.2551, |
|
"odds_ratio_loss": 0.6521813273429871, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.11898528039455414, |
|
"rewards/margins": 0.022901728749275208, |
|
"rewards/rejected": -0.14188699424266815, |
|
"sft_loss": 1.1898527145385742, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 2.489391796322489, |
|
"grad_norm": 3.145732879638672, |
|
"learning_rate": 3.456410618180503e-07, |
|
"logits/chosen": 252.92282104492188, |
|
"logits/rejected": 253.6022491455078, |
|
"logps/chosen": -1.0681793689727783, |
|
"logps/rejected": -1.446299433708191, |
|
"loss": 1.1295, |
|
"odds_ratio_loss": 0.6130428910255432, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.10681793838739395, |
|
"rewards/margins": 0.0378120057284832, |
|
"rewards/rejected": -0.14462995529174805, |
|
"sft_loss": 1.0681793689727783, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 2.5055566781167915, |
|
"grad_norm": 2.4530389308929443, |
|
"learning_rate": 3.244579563165753e-07, |
|
"logits/chosen": 252.8817138671875, |
|
"logits/rejected": 253.0416259765625, |
|
"logps/chosen": -1.1208980083465576, |
|
"logps/rejected": -1.4661897420883179, |
|
"loss": 1.1805, |
|
"odds_ratio_loss": 0.5958081483840942, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.11208979785442352, |
|
"rewards/margins": 0.034529171884059906, |
|
"rewards/rejected": -0.14661899209022522, |
|
"sft_loss": 1.1208980083465576, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.521721559911093, |
|
"grad_norm": 1.9151691198349, |
|
"learning_rate": 3.038995691099697e-07, |
|
"logits/chosen": 252.8979949951172, |
|
"logits/rejected": 253.34262084960938, |
|
"logps/chosen": -1.2310686111450195, |
|
"logps/rejected": -1.493896484375, |
|
"loss": 1.2954, |
|
"odds_ratio_loss": 0.6432778239250183, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.12310687452554703, |
|
"rewards/margins": 0.02628278359770775, |
|
"rewards/rejected": -0.14938965439796448, |
|
"sft_loss": 1.2310686111450195, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 2.5378864417053952, |
|
"grad_norm": 4.0270304679870605, |
|
"learning_rate": 2.839718036468192e-07, |
|
"logits/chosen": 255.1189422607422, |
|
"logits/rejected": 255.6085662841797, |
|
"logps/chosen": -1.2376972436904907, |
|
"logps/rejected": -1.4336402416229248, |
|
"loss": 1.306, |
|
"odds_ratio_loss": 0.6828280091285706, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.12376973778009415, |
|
"rewards/margins": 0.019594285637140274, |
|
"rewards/rejected": -0.14336401224136353, |
|
"sft_loss": 1.2376972436904907, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 2.5540513234996967, |
|
"grad_norm": 2.8464980125427246, |
|
"learning_rate": 2.646803822893723e-07, |
|
"logits/chosen": 254.5944366455078, |
|
"logits/rejected": 254.6045379638672, |
|
"logps/chosen": -1.1911519765853882, |
|
"logps/rejected": -1.4335906505584717, |
|
"loss": 1.255, |
|
"odds_ratio_loss": 0.6387141346931458, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.1191151887178421, |
|
"rewards/margins": 0.024243878200650215, |
|
"rewards/rejected": -0.14335909485816956, |
|
"sft_loss": 1.1911519765853882, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 2.570216205293999, |
|
"grad_norm": 2.3468587398529053, |
|
"learning_rate": 2.460308446703341e-07, |
|
"logits/chosen": 255.0273895263672, |
|
"logits/rejected": 255.33963012695312, |
|
"logps/chosen": -1.1678217649459839, |
|
"logps/rejected": -1.3280802965164185, |
|
"loss": 1.2319, |
|
"odds_ratio_loss": 0.6405949592590332, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.11678217351436615, |
|
"rewards/margins": 0.016025854274630547, |
|
"rewards/rejected": -0.13280804455280304, |
|
"sft_loss": 1.1678217649459839, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 2.5863810870883004, |
|
"grad_norm": 2.52286434173584, |
|
"learning_rate": 2.2802854610213143e-07, |
|
"logits/chosen": 253.905517578125, |
|
"logits/rejected": 254.2148895263672, |
|
"logps/chosen": -1.0991406440734863, |
|
"logps/rejected": -1.5185635089874268, |
|
"loss": 1.1551, |
|
"odds_ratio_loss": 0.5599113702774048, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.10991404950618744, |
|
"rewards/margins": 0.041942298412323, |
|
"rewards/rejected": -0.15185634791851044, |
|
"sft_loss": 1.0991406440734863, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.6025459688826027, |
|
"grad_norm": 6.791391849517822, |
|
"learning_rate": 2.106786560391072e-07, |
|
"logits/chosen": 253.763671875, |
|
"logits/rejected": 253.8026580810547, |
|
"logps/chosen": -1.2003083229064941, |
|
"logps/rejected": -1.4066941738128662, |
|
"loss": 1.2644, |
|
"odds_ratio_loss": 0.6404808163642883, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.12003083527088165, |
|
"rewards/margins": 0.020638594403862953, |
|
"rewards/rejected": -0.14066943526268005, |
|
"sft_loss": 1.2003083229064941, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 2.6187108506769046, |
|
"grad_norm": 2.421247720718384, |
|
"learning_rate": 1.9398615659308255e-07, |
|
"logits/chosen": 254.5044403076172, |
|
"logits/rejected": 255.14804077148438, |
|
"logps/chosen": -1.145989179611206, |
|
"logps/rejected": -1.2900917530059814, |
|
"loss": 1.2124, |
|
"odds_ratio_loss": 0.663682222366333, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.11459891498088837, |
|
"rewards/margins": 0.014410244300961494, |
|
"rewards/rejected": -0.1290091574192047, |
|
"sft_loss": 1.145989179611206, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 2.6348757324712064, |
|
"grad_norm": 2.5792062282562256, |
|
"learning_rate": 1.7795584110272184e-07, |
|
"logits/chosen": 254.8577117919922, |
|
"logits/rejected": 254.72903442382812, |
|
"logps/chosen": -1.1733181476593018, |
|
"logps/rejected": -1.3872268199920654, |
|
"loss": 1.2372, |
|
"odds_ratio_loss": 0.6392764449119568, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.11733181774616241, |
|
"rewards/margins": 0.021390849724411964, |
|
"rewards/rejected": -0.13872265815734863, |
|
"sft_loss": 1.1733181476593018, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 2.6510406142655083, |
|
"grad_norm": 4.67177152633667, |
|
"learning_rate": 1.6259231275709636e-07, |
|
"logits/chosen": 254.6771697998047, |
|
"logits/rejected": 254.81906127929688, |
|
"logps/chosen": -1.1654198169708252, |
|
"logps/rejected": -1.331291913986206, |
|
"loss": 1.2329, |
|
"odds_ratio_loss": 0.6746650338172913, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.11654196679592133, |
|
"rewards/margins": 0.016587218269705772, |
|
"rewards/rejected": -0.13312919437885284, |
|
"sft_loss": 1.1654198169708252, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 2.66720549605981, |
|
"grad_norm": 2.415234327316284, |
|
"learning_rate": 1.478999832738548e-07, |
|
"logits/chosen": 253.84658813476562, |
|
"logits/rejected": 254.2846221923828, |
|
"logps/chosen": -1.1557317972183228, |
|
"logps/rejected": -1.4208415746688843, |
|
"loss": 1.2199, |
|
"odds_ratio_loss": 0.6416669487953186, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.11557319015264511, |
|
"rewards/margins": 0.026510965079069138, |
|
"rewards/rejected": -0.14208415150642395, |
|
"sft_loss": 1.1557317972183228, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.683370377854112, |
|
"grad_norm": 2.4805846214294434, |
|
"learning_rate": 1.338830716323769e-07, |
|
"logits/chosen": 253.04605102539062, |
|
"logits/rejected": 253.3199005126953, |
|
"logps/chosen": -1.1211105585098267, |
|
"logps/rejected": -1.3149446249008179, |
|
"loss": 1.1846, |
|
"odds_ratio_loss": 0.6351101994514465, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.11211104691028595, |
|
"rewards/margins": 0.019383419305086136, |
|
"rewards/rejected": -0.13149447739124298, |
|
"sft_loss": 1.1211105585098267, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 2.699535259648414, |
|
"grad_norm": 4.832085609436035, |
|
"learning_rate": 1.205456028622723e-07, |
|
"logits/chosen": 254.3978729248047, |
|
"logits/rejected": 254.5135498046875, |
|
"logps/chosen": -1.0987465381622314, |
|
"logps/rejected": -1.4824903011322021, |
|
"loss": 1.157, |
|
"odds_ratio_loss": 0.582126259803772, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.1098746508359909, |
|
"rewards/margins": 0.038374386727809906, |
|
"rewards/rejected": -0.1482490599155426, |
|
"sft_loss": 1.0987465381622314, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 2.7157001414427158, |
|
"grad_norm": 1.8406291007995605, |
|
"learning_rate": 1.0789140688756805e-07, |
|
"logits/chosen": 254.9768524169922, |
|
"logits/rejected": 255.4145965576172, |
|
"logps/chosen": -1.1487500667572021, |
|
"logps/rejected": -1.416771650314331, |
|
"loss": 1.2068, |
|
"odds_ratio_loss": 0.5807241201400757, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.11487500369548798, |
|
"rewards/margins": 0.026802152395248413, |
|
"rewards/rejected": -0.14167717099189758, |
|
"sft_loss": 1.1487500667572021, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 2.7318650232370176, |
|
"grad_norm": 9.125133514404297, |
|
"learning_rate": 9.592411742693098e-08, |
|
"logits/chosen": 253.77490234375, |
|
"logits/rejected": 253.99368286132812, |
|
"logps/chosen": -1.2172834873199463, |
|
"logps/rejected": -1.371734857559204, |
|
"loss": 1.2881, |
|
"odds_ratio_loss": 0.7082632780075073, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.12172834575176239, |
|
"rewards/margins": 0.015445133671164513, |
|
"rewards/rejected": -0.13717348873615265, |
|
"sft_loss": 1.2172834873199463, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 2.7480299050313195, |
|
"grad_norm": 1.7780921459197998, |
|
"learning_rate": 8.464717095022168e-08, |
|
"logits/chosen": 251.9024658203125, |
|
"logits/rejected": 253.18222045898438, |
|
"logps/chosen": -1.1575608253479004, |
|
"logps/rejected": -1.4255679845809937, |
|
"loss": 1.2219, |
|
"odds_ratio_loss": 0.6432427167892456, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.115756094455719, |
|
"rewards/margins": 0.026800716295838356, |
|
"rewards/rejected": -0.1425568014383316, |
|
"sft_loss": 1.1575608253479004, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.7641947868256214, |
|
"grad_norm": 2.707679271697998, |
|
"learning_rate": 7.406380569169841e-08, |
|
"logits/chosen": 254.63046264648438, |
|
"logits/rejected": 255.2324676513672, |
|
"logps/chosen": -1.21084725856781, |
|
"logps/rejected": -1.339179277420044, |
|
"loss": 1.278, |
|
"odds_ratio_loss": 0.6712638139724731, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.12108473479747772, |
|
"rewards/margins": 0.012833192944526672, |
|
"rewards/rejected": -0.1339179426431656, |
|
"sft_loss": 1.21084725856781, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 2.7803596686199232, |
|
"grad_norm": 9.321467399597168, |
|
"learning_rate": 6.417706072013808e-08, |
|
"logits/chosen": 255.14419555664062, |
|
"logits/rejected": 255.5975341796875, |
|
"logps/chosen": -1.1439273357391357, |
|
"logps/rejected": -1.3444888591766357, |
|
"loss": 1.2084, |
|
"odds_ratio_loss": 0.644874095916748, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.1143927350640297, |
|
"rewards/margins": 0.020056165754795074, |
|
"rewards/rejected": -0.13444891571998596, |
|
"sft_loss": 1.1439273357391357, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 2.796524550414225, |
|
"grad_norm": 4.651902675628662, |
|
"learning_rate": 5.498977506615294e-08, |
|
"logits/chosen": 254.2846221923828, |
|
"logits/rejected": 255.0900421142578, |
|
"logps/chosen": -1.2078436613082886, |
|
"logps/rejected": -1.3565863370895386, |
|
"loss": 1.2795, |
|
"odds_ratio_loss": 0.7164067029953003, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.12078437954187393, |
|
"rewards/margins": 0.014874264597892761, |
|
"rewards/rejected": -0.1356586515903473, |
|
"sft_loss": 1.2078436613082886, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 2.812689432208527, |
|
"grad_norm": 2.109281301498413, |
|
"learning_rate": 4.6504586906947756e-08, |
|
"logits/chosen": 255.70327758789062, |
|
"logits/rejected": 255.75326538085938, |
|
"logps/chosen": -1.204192876815796, |
|
"logps/rejected": -1.3763076066970825, |
|
"loss": 1.2661, |
|
"odds_ratio_loss": 0.6194810271263123, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.12041930109262466, |
|
"rewards/margins": 0.01721145212650299, |
|
"rewards/rejected": -0.13763076066970825, |
|
"sft_loss": 1.204192876815796, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 2.828854314002829, |
|
"grad_norm": 11.28996467590332, |
|
"learning_rate": 3.8723932808754914e-08, |
|
"logits/chosen": 254.12826538085938, |
|
"logits/rejected": 254.1260528564453, |
|
"logps/chosen": -1.2869278192520142, |
|
"logps/rejected": -1.4129821062088013, |
|
"loss": 1.3563, |
|
"odds_ratio_loss": 0.6939128637313843, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.12869277596473694, |
|
"rewards/margins": 0.012605440802872181, |
|
"rewards/rejected": -0.14129820466041565, |
|
"sft_loss": 1.2869278192520142, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.8450191957971307, |
|
"grad_norm": 4.297213077545166, |
|
"learning_rate": 3.1650047027158014e-08, |
|
"logits/chosen": 254.026123046875, |
|
"logits/rejected": 254.0368194580078, |
|
"logps/chosen": -1.1498368978500366, |
|
"logps/rejected": -1.3505172729492188, |
|
"loss": 1.212, |
|
"odds_ratio_loss": 0.6218072175979614, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.11498367786407471, |
|
"rewards/margins": 0.020068055018782616, |
|
"rewards/rejected": -0.13505175709724426, |
|
"sft_loss": 1.1498368978500366, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 2.8611840775914326, |
|
"grad_norm": 3.5106639862060547, |
|
"learning_rate": 2.5284960865517848e-08, |
|
"logits/chosen": 253.19677734375, |
|
"logits/rejected": 253.49380493164062, |
|
"logps/chosen": -1.0654290914535522, |
|
"logps/rejected": -1.4208238124847412, |
|
"loss": 1.1222, |
|
"odds_ratio_loss": 0.5680567026138306, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.1065429076552391, |
|
"rewards/margins": 0.035539474338293076, |
|
"rewards/rejected": -0.14208237826824188, |
|
"sft_loss": 1.0654290914535522, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 2.8773489593857344, |
|
"grad_norm": 1.9635688066482544, |
|
"learning_rate": 1.9630502091670388e-08, |
|
"logits/chosen": 254.31576538085938, |
|
"logits/rejected": 254.73184204101562, |
|
"logps/chosen": -1.16495680809021, |
|
"logps/rejected": -1.438716173171997, |
|
"loss": 1.2222, |
|
"odds_ratio_loss": 0.5726233720779419, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.11649568378925323, |
|
"rewards/margins": 0.027375921607017517, |
|
"rewards/rejected": -0.14387162029743195, |
|
"sft_loss": 1.16495680809021, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 2.8935138411800363, |
|
"grad_norm": 7.4126458168029785, |
|
"learning_rate": 1.4688294413074677e-08, |
|
"logits/chosen": 253.7817840576172, |
|
"logits/rejected": 254.4253692626953, |
|
"logps/chosen": -1.081469178199768, |
|
"logps/rejected": -1.3936357498168945, |
|
"loss": 1.1414, |
|
"odds_ratio_loss": 0.5996376872062683, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.10814690589904785, |
|
"rewards/margins": 0.03121664747595787, |
|
"rewards/rejected": -0.13936355710029602, |
|
"sft_loss": 1.081469178199768, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 2.909678722974338, |
|
"grad_norm": 1.8516889810562134, |
|
"learning_rate": 1.0459757010556626e-08, |
|
"logits/chosen": 252.57345581054688, |
|
"logits/rejected": 252.3190155029297, |
|
"logps/chosen": -1.173514723777771, |
|
"logps/rejected": -1.303662657737732, |
|
"loss": 1.2401, |
|
"odds_ratio_loss": 0.6658231019973755, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.1173514723777771, |
|
"rewards/margins": 0.013014810159802437, |
|
"rewards/rejected": -0.13036629557609558, |
|
"sft_loss": 1.173514723777771, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.92584360476864, |
|
"grad_norm": 7.74896764755249, |
|
"learning_rate": 6.94610413078306e-09, |
|
"logits/chosen": 253.01602172851562, |
|
"logits/rejected": 253.71841430664062, |
|
"logps/chosen": -1.2020865678787231, |
|
"logps/rejected": -1.5033478736877441, |
|
"loss": 1.2671, |
|
"odds_ratio_loss": 0.6497219204902649, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.12020864337682724, |
|
"rewards/margins": 0.030126124620437622, |
|
"rewards/rejected": -0.15033479034900665, |
|
"sft_loss": 1.2020865678787231, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 2.942008486562942, |
|
"grad_norm": 2.0265376567840576, |
|
"learning_rate": 4.14834473758563e-09, |
|
"logits/chosen": 252.50656127929688, |
|
"logits/rejected": 252.5145721435547, |
|
"logps/chosen": -1.0974245071411133, |
|
"logps/rejected": -1.4048916101455688, |
|
"loss": 1.1558, |
|
"odds_ratio_loss": 0.5838974714279175, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.10974244773387909, |
|
"rewards/margins": 0.030746713280677795, |
|
"rewards/rejected": -0.14048916101455688, |
|
"sft_loss": 1.0974245071411133, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 2.9581733683572438, |
|
"grad_norm": 2.291332721710205, |
|
"learning_rate": 2.067282222230349e-09, |
|
"logits/chosen": 254.31192016601562, |
|
"logits/rejected": 254.5823974609375, |
|
"logps/chosen": -1.1228351593017578, |
|
"logps/rejected": -1.4580986499786377, |
|
"loss": 1.1841, |
|
"odds_ratio_loss": 0.6124657392501831, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.11228351294994354, |
|
"rewards/margins": 0.03352636098861694, |
|
"rewards/rejected": -0.1458098590373993, |
|
"sft_loss": 1.1228351593017578, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 2.9743382501515456, |
|
"grad_norm": 8.377201080322266, |
|
"learning_rate": 7.035141727212979e-10, |
|
"logits/chosen": 252.5790252685547, |
|
"logits/rejected": 253.33993530273438, |
|
"logps/chosen": -1.062239646911621, |
|
"logps/rejected": -1.3180148601531982, |
|
"loss": 1.1223, |
|
"odds_ratio_loss": 0.6001896858215332, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.10622396320104599, |
|
"rewards/margins": 0.025577524676918983, |
|
"rewards/rejected": -0.13180148601531982, |
|
"sft_loss": 1.062239646911621, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 2.9905031319458475, |
|
"grad_norm": 5.447793006896973, |
|
"learning_rate": 5.743220219761592e-11, |
|
"logits/chosen": 254.24691772460938, |
|
"logits/rejected": 254.78369140625, |
|
"logps/chosen": -1.195462942123413, |
|
"logps/rejected": -1.4066945314407349, |
|
"loss": 1.2627, |
|
"odds_ratio_loss": 0.6723325252532959, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.1195463091135025, |
|
"rewards/margins": 0.021123168990015984, |
|
"rewards/rejected": -0.14066946506500244, |
|
"sft_loss": 1.195462942123413, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 2.9969690846635686, |
|
"step": 1854, |
|
"total_flos": 2.1935611788745114e+18, |
|
"train_loss": 1.3469306265266197, |
|
"train_runtime": 24131.5713, |
|
"train_samples_per_second": 1.231, |
|
"train_steps_per_second": 0.077 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1854, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 2.1935611788745114e+18, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|