{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.995276334435522, "eval_steps": 50, "global_step": 352, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "dpo_loss": 0.6931471824645996, "epoch": 0.005668398677373642, "grad_norm": 13.413641782833313, "learning_rate": 1.3888888888888888e-07, "logits": -1.3147305250167847, "logps": -88.0877456665039, "loss": 0.6931, "objective": 0.6931471824645996, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5208333134651184, "regularize": 0.6931471824645996, "step": 1 }, { "dpo_loss": 0.6931948065757751, "epoch": 0.02834199338686821, "grad_norm": 13.69587680989851, "learning_rate": 6.944444444444446e-07, "logits": -1.3683627843856812, "logps": -83.96967315673828, "loss": 0.693, "objective": 0.6931948065757751, "ranking_idealized": 0.6145833134651184, "ranking_idealized_expo": 0.546875, "ranking_simple": 0.546875, "regularize": 0.6931948065757751, "step": 5 }, { "dpo_loss": 0.6823691129684448, "epoch": 0.05668398677373642, "grad_norm": 12.80657019274692, "learning_rate": 1.3888888888888892e-06, "logits": -1.4495999813079834, "logps": -81.29175567626953, "loss": 0.688, "objective": 0.6823691129684448, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5208333134651184, "regularize": 0.6823691129684448, "step": 10 }, { "dpo_loss": 0.6785086989402771, "epoch": 0.08502598016060463, "grad_norm": 12.407233192434104, "learning_rate": 2.0833333333333334e-06, "logits": -1.4651646614074707, "logps": -80.61344909667969, "loss": 0.6772, "objective": 0.6785086989402771, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5166666507720947, "regularize": 0.6785086989402771, "step": 15 }, { "dpo_loss": 0.6960199475288391, "epoch": 0.11336797354747284, "grad_norm": 14.590968333806934, "learning_rate": 2.7777777777777783e-06, "logits": -1.5296542644500732, "logps": -82.23778533935547, "loss": 0.6632, "objective": 0.6960199475288391, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5416666865348816, "regularize": 0.6960199475288391, "step": 20 }, { "dpo_loss": 0.6467238068580627, "epoch": 0.14170996693434104, "grad_norm": 12.356578847477945, "learning_rate": 3.4722222222222224e-06, "logits": -1.6228190660476685, "logps": -81.19103240966797, "loss": 0.651, "objective": 0.6467238068580627, "ranking_idealized": 0.5958333611488342, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.5375000238418579, "regularize": 0.6467238068580627, "step": 25 }, { "dpo_loss": 0.6353517174720764, "epoch": 0.17005196032120926, "grad_norm": 11.773480191521392, "learning_rate": 4.166666666666667e-06, "logits": -1.6215683221817017, "logps": -84.20466613769531, "loss": 0.6315, "objective": 0.6353517174720764, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.5625, "regularize": 0.6353517174720764, "step": 30 }, { "dpo_loss": 0.6039083003997803, "epoch": 0.19839395370807747, "grad_norm": 13.700920213341456, "learning_rate": 4.861111111111111e-06, "logits": -1.6290026903152466, "logps": -88.07846069335938, "loss": 0.5989, "objective": 0.6039083003997803, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.574999988079071, "regularize": 0.6039083003997803, "step": 35 }, { "dpo_loss": 0.5691289901733398, "epoch": 0.22673594709494568, "grad_norm": 15.376644512330481, "learning_rate": 4.998023493068255e-06, "logits": -1.660142183303833, "logps": -92.43869018554688, "loss": 0.5901, "objective": 0.5691289901733398, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5916666388511658, "regularize": 0.5691289901733398, "step": 40 }, { "dpo_loss": 0.54946368932724, "epoch": 0.25507794048181387, "grad_norm": 13.285916184549091, "learning_rate": 4.989999289644993e-06, "logits": -1.8279616832733154, "logps": -92.55197143554688, "loss": 0.5808, "objective": 0.54946368932724, "ranking_idealized": 0.6208333373069763, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.6000000238418579, "regularize": 0.54946368932724, "step": 45 }, { "dpo_loss": 0.5661661028862, "epoch": 0.2834199338686821, "grad_norm": 12.64511853627035, "learning_rate": 4.975823666181256e-06, "logits": -1.8111599683761597, "logps": -91.70741271972656, "loss": 0.5807, "objective": 0.5661661028862, "ranking_idealized": 0.5416666865348816, "ranking_idealized_expo": 0.42500001192092896, "ranking_simple": 0.5833333134651184, "regularize": 0.5661661028862, "step": 50 }, { "epoch": 0.2834199338686821, "eval_dpo_loss": 0.7087637782096863, "eval_logits": -1.8708840608596802, "eval_logps": -101.86113739013672, "eval_loss": 0.6822364926338196, "eval_objective": 0.7087637782096863, "eval_ranking_idealized": 0.5888429880142212, "eval_ranking_idealized_expo": 0.5103305578231812, "eval_ranking_simple": 0.51962810754776, "eval_regularize": 0.7087637782096863, "eval_runtime": 260.7452, "eval_samples_per_second": 22.206, "eval_steps_per_second": 0.928, "step": 50 }, { "dpo_loss": 0.5751214623451233, "epoch": 0.3117619272555503, "grad_norm": 13.121793817706566, "learning_rate": 4.955531642853404e-06, "logits": -1.8980281352996826, "logps": -95.49773406982422, "loss": 0.5587, "objective": 0.5751214623451233, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.6000000238418579, "regularize": 0.5751214623451233, "step": 55 }, { "dpo_loss": 0.5370620489120483, "epoch": 0.3401039206424185, "grad_norm": 14.066207842989309, "learning_rate": 4.929173350101025e-06, "logits": -1.8723183870315552, "logps": -95.54426574707031, "loss": 0.5636, "objective": 0.5370620489120483, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.612500011920929, "regularize": 0.5370620489120483, "step": 60 }, { "dpo_loss": 0.518057644367218, "epoch": 0.3684459140292867, "grad_norm": 11.034128756662287, "learning_rate": 4.896813904782162e-06, "logits": -1.8221579790115356, "logps": -94.84842681884766, "loss": 0.5446, "objective": 0.518057644367218, "ranking_idealized": 0.5708333253860474, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.6333333253860474, "regularize": 0.518057644367218, "step": 65 }, { "dpo_loss": 0.5486761331558228, "epoch": 0.39678790741615494, "grad_norm": 13.022002107607332, "learning_rate": 4.858533249305337e-06, "logits": -1.8116071224212646, "logps": -95.58805847167969, "loss": 0.5527, "objective": 0.5486761331558228, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.6041666865348816, "regularize": 0.5486761331558228, "step": 70 }, { "dpo_loss": 0.5020822286605835, "epoch": 0.42512990080302315, "grad_norm": 13.443795403497152, "learning_rate": 4.814425954135786e-06, "logits": -1.8384398221969604, "logps": -95.35403442382812, "loss": 0.506, "objective": 0.5020822286605835, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.6583333611488342, "regularize": 0.5020822286605835, "step": 75 }, { "dpo_loss": 0.5126288533210754, "epoch": 0.45347189418989137, "grad_norm": 14.542906941369981, "learning_rate": 4.764600984163809e-06, "logits": -1.8958051204681396, "logps": -94.8360824584961, "loss": 0.5222, "objective": 0.5126288533210754, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.675000011920929, "regularize": 0.5126288533210754, "step": 80 }, { "dpo_loss": 0.4901658594608307, "epoch": 0.4818138875767596, "grad_norm": 12.218149677924327, "learning_rate": 4.709181429512391e-06, "logits": -1.8605338335037231, "logps": -91.9226303100586, "loss": 0.5101, "objective": 0.4901658594608307, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.6333333253860474, "regularize": 0.4901658594608307, "step": 85 }, { "dpo_loss": 0.5431678891181946, "epoch": 0.5101558809636277, "grad_norm": 12.479759342264707, "learning_rate": 4.648304201449153e-06, "logits": -1.6652709245681763, "logps": -94.42205047607422, "loss": 0.4784, "objective": 0.5431678891181946, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.5833333134651184, "regularize": 0.5431678891181946, "step": 90 }, { "dpo_loss": 0.45254868268966675, "epoch": 0.538497874350496, "grad_norm": 13.513215812074137, "learning_rate": 4.582119694153834e-06, "logits": -1.7359992265701294, "logps": -94.93058013916016, "loss": 0.4763, "objective": 0.45254868268966675, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.6833333373069763, "regularize": 0.45254868268966675, "step": 95 }, { "dpo_loss": 0.4402621388435364, "epoch": 0.5668398677373642, "grad_norm": 13.595765492893772, "learning_rate": 4.510791413176912e-06, "logits": -1.849007248878479, "logps": -101.81654357910156, "loss": 0.4908, "objective": 0.4402621388435364, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.7041666507720947, "regularize": 0.4402621388435364, "step": 100 }, { "epoch": 0.5668398677373642, "eval_dpo_loss": 0.6854463219642639, "eval_logits": -1.8507442474365234, "eval_logps": -105.0768051147461, "eval_loss": 0.6801603436470032, "eval_objective": 0.6854463219642639, "eval_ranking_idealized": 0.5888429880142212, "eval_ranking_idealized_expo": 0.5103305578231812, "eval_ranking_simple": 0.5299586653709412, "eval_regularize": 0.6854463219642639, "eval_runtime": 259.5573, "eval_samples_per_second": 22.307, "eval_steps_per_second": 0.932, "step": 100 }, { "dpo_loss": 0.43196719884872437, "epoch": 0.5951818611242324, "grad_norm": 11.229649770242489, "learning_rate": 4.4344955715072344e-06, "logits": -1.6958547830581665, "logps": -102.08232879638672, "loss": 0.4538, "objective": 0.43196719884872437, "ranking_idealized": 0.6208333373069763, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.675000011920929, "regularize": 0.43196719884872437, "step": 105 }, { "dpo_loss": 0.4712914526462555, "epoch": 0.6235238545111006, "grad_norm": 12.961407259435642, "learning_rate": 4.353420654246546e-06, "logits": -1.8904917240142822, "logps": -100.97926330566406, "loss": 0.4539, "objective": 0.4712914526462555, "ranking_idealized": 0.5708333253860474, "ranking_idealized_expo": 0.4541666805744171, "ranking_simple": 0.6666666865348816, "regularize": 0.4712914526462555, "step": 110 }, { "dpo_loss": 0.45986562967300415, "epoch": 0.6518658478979689, "grad_norm": 12.814230479704358, "learning_rate": 4.267766952966369e-06, "logits": -1.8556350469589233, "logps": -102.53894805908203, "loss": 0.4603, "objective": 0.45986562967300415, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.6208333373069763, "regularize": 0.45986562967300415, "step": 115 }, { "dpo_loss": 0.4761102795600891, "epoch": 0.680207841284837, "grad_norm": 12.618691610478947, "learning_rate": 4.177746070897593e-06, "logits": -1.7863221168518066, "logps": -104.2935791015625, "loss": 0.4497, "objective": 0.4761102795600891, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.6583333611488342, "regularize": 0.4761102795600891, "step": 120 }, { "dpo_loss": 0.4552411735057831, "epoch": 0.7085498346717053, "grad_norm": 11.61517994185661, "learning_rate": 4.083580400175153e-06, "logits": -1.7287298440933228, "logps": -104.63223266601562, "loss": 0.4684, "objective": 0.4552411735057831, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.6958333253860474, "regularize": 0.4552411735057831, "step": 125 }, { "dpo_loss": 0.4498961567878723, "epoch": 0.7368918280585735, "grad_norm": 12.014560354642498, "learning_rate": 3.985502572429277e-06, "logits": -1.8877291679382324, "logps": -98.7514877319336, "loss": 0.4427, "objective": 0.4498961567878723, "ranking_idealized": 0.5416666865348816, "ranking_idealized_expo": 0.44999998807907104, "ranking_simple": 0.6499999761581421, "regularize": 0.4498961567878723, "step": 130 }, { "dpo_loss": 0.5134549140930176, "epoch": 0.7652338214454416, "grad_norm": 13.034967407252013, "learning_rate": 3.8837548840805395e-06, "logits": -1.9060827493667603, "logps": -102.064697265625, "loss": 0.4396, "objective": 0.5134549140930176, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.6666666865348816, "regularize": 0.5134549140930176, "step": 135 }, { "dpo_loss": 0.4010888636112213, "epoch": 0.7935758148323099, "grad_norm": 11.920508064913742, "learning_rate": 3.7785886977585562e-06, "logits": -1.866942048072815, "logps": -102.13489532470703, "loss": 0.4133, "objective": 0.4010888636112213, "ranking_idealized": 0.5791666507720947, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.6875, "regularize": 0.4010888636112213, "step": 140 }, { "dpo_loss": 0.3797028958797455, "epoch": 0.821917808219178, "grad_norm": 13.093355264813445, "learning_rate": 3.6702638213230344e-06, "logits": -1.9820648431777954, "logps": -102.35152435302734, "loss": 0.4038, "objective": 0.3797028958797455, "ranking_idealized": 0.5791666507720947, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.7250000238418579, "regularize": 0.3797028958797455, "step": 145 }, { "dpo_loss": 0.4191714823246002, "epoch": 0.8502598016060463, "grad_norm": 11.55897727937339, "learning_rate": 3.5590478660213214e-06, "logits": -2.00236439704895, "logps": -100.3611831665039, "loss": 0.4191, "objective": 0.4191714823246002, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.7208333611488342, "regularize": 0.4191714823246002, "step": 150 }, { "epoch": 0.8502598016060463, "eval_dpo_loss": 0.7127470374107361, "eval_logits": -2.1205222606658936, "eval_logps": -108.57035827636719, "eval_loss": 0.695988655090332, "eval_objective": 0.7127470374107361, "eval_ranking_idealized": 0.5888429880142212, "eval_ranking_idealized_expo": 0.5103305578231812, "eval_ranking_simple": 0.5402892827987671, "eval_regularize": 0.7127470374107361, "eval_runtime": 259.6667, "eval_samples_per_second": 22.298, "eval_steps_per_second": 0.932, "step": 150 }, { "dpo_loss": 0.394189715385437, "epoch": 0.8786017949929145, "grad_norm": 13.2018803660899, "learning_rate": 3.4452155853680457e-06, "logits": -2.0352015495300293, "logps": -101.7432632446289, "loss": 0.387, "objective": 0.394189715385437, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.7166666388511658, "regularize": 0.394189715385437, "step": 155 }, { "dpo_loss": 0.41441217064857483, "epoch": 0.9069437883797827, "grad_norm": 13.589086232542883, "learning_rate": 3.32904819638017e-06, "logits": -1.929869532585144, "logps": -104.0088119506836, "loss": 0.4188, "objective": 0.41441217064857483, "ranking_idealized": 0.5708333253860474, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.699999988079071, "regularize": 0.41441217064857483, "step": 160 }, { "dpo_loss": 0.4174909293651581, "epoch": 0.9352857817666509, "grad_norm": 12.769842629759419, "learning_rate": 3.2108326848442507e-06, "logits": -1.9259008169174194, "logps": -102.89508819580078, "loss": 0.3934, "objective": 0.4174909293651581, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.7083333134651184, "regularize": 0.4174909293651581, "step": 165 }, { "dpo_loss": 0.3482740819454193, "epoch": 0.9636277751535192, "grad_norm": 12.654714223723412, "learning_rate": 3.090861096332263e-06, "logits": -1.8867944478988647, "logps": -103.65254974365234, "loss": 0.385, "objective": 0.3482740819454193, "ranking_idealized": 0.5791666507720947, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.7333333492279053, "regularize": 0.3482740819454193, "step": 170 }, { "dpo_loss": 0.3600638806819916, "epoch": 0.9919697685403873, "grad_norm": 12.227920456755363, "learning_rate": 2.9694298147174566e-06, "logits": -1.8309239149093628, "logps": -104.46839141845703, "loss": 0.3687, "objective": 0.3600638806819916, "ranking_idealized": 0.6541666388511658, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.7833333611488342, "regularize": 0.3600638806819916, "step": 175 }, { "dpo_loss": 0.2651214897632599, "epoch": 1.0203117619272555, "grad_norm": 9.967134017735377, "learning_rate": 2.8468388299726714e-06, "logits": -2.0111324787139893, "logps": -106.2859115600586, "loss": 0.2698, "objective": 0.2651214897632599, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.8083333373069763, "regularize": 0.2651214897632599, "step": 180 }, { "dpo_loss": 0.2080942541360855, "epoch": 1.0486537553141237, "grad_norm": 13.966930247511787, "learning_rate": 2.723390997059943e-06, "logits": -1.873337984085083, "logps": -111.36536407470703, "loss": 0.2277, "objective": 0.2080942541360855, "ranking_idealized": 0.5791666507720947, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.8083333373069763, "regularize": 0.2080942541360855, "step": 185 }, { "dpo_loss": 0.23763832449913025, "epoch": 1.076995748700992, "grad_norm": 14.992011192414951, "learning_rate": 2.599391287742315e-06, "logits": -1.8715498447418213, "logps": -118.10340118408203, "loss": 0.2298, "objective": 0.23763832449913025, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.5541666746139526, "ranking_simple": 0.8500000238418579, "regularize": 0.23763832449913025, "step": 190 }, { "dpo_loss": 0.21301580965518951, "epoch": 1.10533774208786, "grad_norm": 10.744062848508776, "learning_rate": 2.4751460371661763e-06, "logits": -1.8871595859527588, "logps": -113.93363952636719, "loss": 0.2267, "objective": 0.21301580965518951, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.8333333134651184, "regularize": 0.21301580965518951, "step": 195 }, { "dpo_loss": 0.21621347963809967, "epoch": 1.1336797354747283, "grad_norm": 9.639805136359016, "learning_rate": 2.3509621870754505e-06, "logits": -2.02681303024292, "logps": -110.0398941040039, "loss": 0.2287, "objective": 0.21621347963809967, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.8583333492279053, "regularize": 0.21621347963809967, "step": 200 }, { "epoch": 1.1336797354747283, "eval_dpo_loss": 0.7402901649475098, "eval_logits": -2.076362133026123, "eval_logps": -115.44319915771484, "eval_loss": 0.7275605797767639, "eval_objective": 0.7402901649475098, "eval_ranking_idealized": 0.5888429880142212, "eval_ranking_idealized_expo": 0.5103305578231812, "eval_ranking_simple": 0.5361570119857788, "eval_regularize": 0.7402901649475098, "eval_runtime": 259.9944, "eval_samples_per_second": 22.27, "eval_steps_per_second": 0.931, "step": 200 }, { "dpo_loss": 0.23257608711719513, "epoch": 1.1620217288615966, "grad_norm": 10.185272365862717, "learning_rate": 2.2271465275271985e-06, "logits": -2.0131571292877197, "logps": -111.7054214477539, "loss": 0.2278, "objective": 0.23257608711719513, "ranking_idealized": 0.5708333253860474, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.8208333253860474, "regularize": 0.23257608711719513, "step": 205 }, { "dpo_loss": 0.2272442728281021, "epoch": 1.1903637222484649, "grad_norm": 9.529035858141413, "learning_rate": 2.1040049389819628e-06, "logits": -1.9997531175613403, "logps": -110.997314453125, "loss": 0.2366, "objective": 0.2272442728281021, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5, "ranking_simple": 0.7958333492279053, "regularize": 0.2272442728281021, "step": 210 }, { "dpo_loss": 0.2654048204421997, "epoch": 1.2187057156353331, "grad_norm": 11.812205063296917, "learning_rate": 1.9818416366412277e-06, "logits": -1.978461742401123, "logps": -113.06928253173828, "loss": 0.2372, "objective": 0.2654048204421997, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.800000011920929, "regularize": 0.2654048204421997, "step": 215 }, { "dpo_loss": 0.2245999276638031, "epoch": 1.2470477090222012, "grad_norm": 12.627574377824557, "learning_rate": 1.8609584188988135e-06, "logits": -1.9322872161865234, "logps": -112.9950942993164, "loss": 0.2418, "objective": 0.2245999276638031, "ranking_idealized": 0.6541666388511658, "ranking_idealized_expo": 0.6041666865348816, "ranking_simple": 0.8583333492279053, "regularize": 0.2245999276638031, "step": 220 }, { "dpo_loss": 0.21560445427894592, "epoch": 1.2753897024090695, "grad_norm": 10.156285420255232, "learning_rate": 1.7416539217628792e-06, "logits": -1.947004795074463, "logps": -113.21937561035156, "loss": 0.2349, "objective": 0.21560445427894592, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.8416666388511658, "regularize": 0.21560445427894592, "step": 225 }, { "dpo_loss": 0.25168582797050476, "epoch": 1.3037316957959377, "grad_norm": 9.19384725201928, "learning_rate": 1.6242228810904393e-06, "logits": -1.8978828191757202, "logps": -114.50511932373047, "loss": 0.2522, "objective": 0.25168582797050476, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.7958333492279053, "regularize": 0.25168582797050476, "step": 230 }, { "dpo_loss": 0.20120379328727722, "epoch": 1.3320736891828058, "grad_norm": 9.731362112902348, "learning_rate": 1.508955404457015e-06, "logits": -2.018658399581909, "logps": -108.0621109008789, "loss": 0.2386, "objective": 0.20120379328727722, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.8583333492279053, "regularize": 0.20120379328727722, "step": 235 }, { "dpo_loss": 0.2090713083744049, "epoch": 1.360415682569674, "grad_norm": 11.056486327313111, "learning_rate": 1.3961362544602215e-06, "logits": -1.977256417274475, "logps": -111.13236236572266, "loss": 0.2297, "objective": 0.2090713083744049, "ranking_idealized": 0.5541666746139526, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.8291666507720947, "regularize": 0.2090713083744049, "step": 240 }, { "dpo_loss": 0.2641766667366028, "epoch": 1.3887576759565423, "grad_norm": 10.388090633212887, "learning_rate": 1.2860441452278578e-06, "logits": -2.022833824157715, "logps": -111.30134582519531, "loss": 0.2375, "objective": 0.2641766667366028, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.824999988079071, "regularize": 0.2641766667366028, "step": 245 }, { "dpo_loss": 0.21442964673042297, "epoch": 1.4170996693434104, "grad_norm": 12.097077417741462, "learning_rate": 1.1789510538684524e-06, "logits": -1.969295620918274, "logps": -112.7668228149414, "loss": 0.2329, "objective": 0.21442964673042297, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.47083333134651184, "ranking_simple": 0.8374999761581421, "regularize": 0.21442964673042297, "step": 250 }, { "epoch": 1.4170996693434104, "eval_dpo_loss": 0.7705549597740173, "eval_logits": -2.064002513885498, "eval_logps": -118.24050903320312, "eval_loss": 0.7454098463058472, "eval_objective": 0.7705549597740173, "eval_ranking_idealized": 0.5888429880142212, "eval_ranking_idealized_expo": 0.5103305578231812, "eval_ranking_simple": 0.5351239442825317, "eval_regularize": 0.7705549597740173, "eval_runtime": 259.7824, "eval_samples_per_second": 22.288, "eval_steps_per_second": 0.932, "step": 250 }, { "dpo_loss": 0.21537743508815765, "epoch": 1.4454416627302786, "grad_norm": 11.517471828255623, "learning_rate": 1.0751215485652644e-06, "logits": -1.9144450426101685, "logps": -114.43687438964844, "loss": 0.2261, "objective": 0.21537743508815765, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.8458333611488342, "regularize": 0.21537743508815765, "step": 255 }, { "dpo_loss": 0.21364690363407135, "epoch": 1.473783656117147, "grad_norm": 10.723091996163696, "learning_rate": 9.74812134973689e-07, "logits": -1.9727046489715576, "logps": -113.64453887939453, "loss": 0.2051, "objective": 0.21364690363407135, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.8125, "regularize": 0.21364690363407135, "step": 260 }, { "dpo_loss": 0.2255079597234726, "epoch": 1.5021256495040152, "grad_norm": 11.69881544405136, "learning_rate": 8.78270622536716e-07, "logits": -1.9215840101242065, "logps": -114.519287109375, "loss": 0.2204, "objective": 0.2255079597234726, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.8208333253860474, "regularize": 0.2255079597234726, "step": 265 }, { "dpo_loss": 0.25144073367118835, "epoch": 1.5304676428908834, "grad_norm": 12.543731702485788, "learning_rate": 7.857355122839674e-07, "logits": -1.909374713897705, "logps": -116.20896911621094, "loss": 0.2129, "objective": 0.25144073367118835, "ranking_idealized": 0.5791666507720947, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.8083333373069763, "regularize": 0.25144073367118835, "step": 270 }, { "dpo_loss": 0.20962868630886078, "epoch": 1.5588096362777515, "grad_norm": 9.738954761513112, "learning_rate": 6.974354076267081e-07, "logits": -1.9043574333190918, "logps": -116.87390899658203, "loss": 0.2209, "objective": 0.20962868630886078, "ranking_idealized": 0.6208333373069763, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.8500000238418579, "regularize": 0.20962868630886078, "step": 275 }, { "dpo_loss": 0.21624349057674408, "epoch": 1.5871516296646198, "grad_norm": 11.162245217664504, "learning_rate": 6.135884496044245e-07, "logits": -1.8271541595458984, "logps": -115.82463073730469, "loss": 0.2254, "objective": 0.21624349057674408, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.8291666507720947, "regularize": 0.21624349057674408, "step": 280 }, { "dpo_loss": 0.1944807916879654, "epoch": 1.615493623051488, "grad_norm": 10.89680213411274, "learning_rate": 5.344017779781835e-07, "logits": -1.915182113647461, "logps": -117.32617950439453, "loss": 0.2038, "objective": 0.1944807916879654, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.8416666388511658, "regularize": 0.1944807916879654, "step": 285 }, { "dpo_loss": 0.2371363341808319, "epoch": 1.643835616438356, "grad_norm": 12.787089862396805, "learning_rate": 4.6007101950209827e-07, "logits": -1.949597716331482, "logps": -114.35741424560547, "loss": 0.2281, "objective": 0.2371363341808319, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.8291666507720947, "regularize": 0.2371363341808319, "step": 290 }, { "dpo_loss": 0.1856808066368103, "epoch": 1.6721776098252243, "grad_norm": 12.381384766536955, "learning_rate": 3.907798046371139e-07, "logits": -1.8460139036178589, "logps": -113.36736297607422, "loss": 0.2197, "objective": 0.1856808066368103, "ranking_idealized": 0.5791666507720947, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.862500011920929, "regularize": 0.1856808066368103, "step": 295 }, { "dpo_loss": 0.18572762608528137, "epoch": 1.7005196032120926, "grad_norm": 11.1669814272154, "learning_rate": 3.266993139010438e-07, "logits": -1.8981417417526245, "logps": -116.58564758300781, "loss": 0.2036, "objective": 0.18572762608528137, "ranking_idealized": 0.6583333611488342, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.8374999761581421, "regularize": 0.18572762608528137, "step": 300 }, { "epoch": 1.7005196032120926, "eval_dpo_loss": 0.7850732803344727, "eval_logits": -1.9745572805404663, "eval_logps": -120.76824188232422, "eval_loss": 0.757438063621521, "eval_objective": 0.7850732803344727, "eval_ranking_idealized": 0.5888429880142212, "eval_ranking_idealized_expo": 0.5103305578231812, "eval_ranking_simple": 0.5433884263038635, "eval_regularize": 0.7850732803344727, "eval_runtime": 259.9996, "eval_samples_per_second": 22.269, "eval_steps_per_second": 0.931, "step": 300 }, { "dpo_loss": 0.2228098213672638, "epoch": 1.7288615965989607, "grad_norm": 12.534520545773153, "learning_rate": 2.6798785497556184e-07, "logits": -1.8615758419036865, "logps": -115.37549591064453, "loss": 0.2165, "objective": 0.2228098213672638, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.8083333373069763, "regularize": 0.2228098213672638, "step": 305 }, { "dpo_loss": 0.20340654253959656, "epoch": 1.7572035899858292, "grad_norm": 11.747155200770623, "learning_rate": 2.1479047161491351e-07, "logits": -1.9263988733291626, "logps": -112.0289535522461, "loss": 0.2213, "objective": 0.20340654253959656, "ranking_idealized": 0.6083333492279053, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.8500000238418579, "regularize": 0.20340654253959656, "step": 310 }, { "dpo_loss": 0.2501530349254608, "epoch": 1.7855455833726972, "grad_norm": 11.283704550164217, "learning_rate": 1.6723858532249783e-07, "logits": -1.8143895864486694, "logps": -112.80689239501953, "loss": 0.2236, "objective": 0.2501530349254608, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.8125, "regularize": 0.2501530349254608, "step": 315 }, { "dpo_loss": 0.21945881843566895, "epoch": 1.8138875767595655, "grad_norm": 11.71850155399097, "learning_rate": 1.2544967068054332e-07, "logits": -1.8763319253921509, "logps": -117.6617202758789, "loss": 0.217, "objective": 0.21945881843566895, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.8208333253860474, "regularize": 0.21945881843566895, "step": 320 }, { "dpo_loss": 0.22155949473381042, "epoch": 1.8422295701464337, "grad_norm": 14.77256678289896, "learning_rate": 8.952696513496756e-08, "logits": -1.9358726739883423, "logps": -119.23826599121094, "loss": 0.2222, "objective": 0.22155949473381042, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.8666666746139526, "regularize": 0.22155949473381042, "step": 325 }, { "dpo_loss": 0.18552295863628387, "epoch": 1.8705715635333018, "grad_norm": 10.882571762322252, "learning_rate": 5.955921395237318e-08, "logits": -1.6932176351547241, "logps": -112.55815124511719, "loss": 0.2239, "objective": 0.18552295863628387, "ranking_idealized": 0.5708333253860474, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.8333333134651184, "regularize": 0.18552295863628387, "step": 330 }, { "dpo_loss": 0.23661907017230988, "epoch": 1.89891355692017, "grad_norm": 10.436309183414208, "learning_rate": 3.5620450979259034e-08, "logits": -1.8299282789230347, "logps": -112.96961212158203, "loss": 0.2253, "objective": 0.23661907017230988, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.8208333253860474, "regularize": 0.23661907017230988, "step": 335 }, { "dpo_loss": 0.20749402046203613, "epoch": 1.9272555503070383, "grad_norm": 10.928827594051384, "learning_rate": 1.7769815745066476e-08, "logits": -1.8688191175460815, "logps": -116.0504379272461, "loss": 0.2215, "objective": 0.20749402046203613, "ranking_idealized": 0.6416666507720947, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.862500011920929, "regularize": 0.20749402046203613, "step": 340 }, { "dpo_loss": 0.1883564591407776, "epoch": 1.9555975436939064, "grad_norm": 11.169724195510748, "learning_rate": 6.051407360895822e-09, "logits": -1.8896888494491577, "logps": -116.99043273925781, "loss": 0.2046, "objective": 0.1883564591407776, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.8666666746139526, "regularize": 0.1883564591407776, "step": 345 }, { "dpo_loss": 0.22660525143146515, "epoch": 1.9839395370807746, "grad_norm": 11.129465757937865, "learning_rate": 4.941755748361088e-10, "logits": -1.7810313701629639, "logps": -115.8853530883789, "loss": 0.2102, "objective": 0.22660525143146515, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.8291666507720947, "regularize": 0.22660525143146515, "step": 350 }, { "epoch": 1.9839395370807746, "eval_dpo_loss": 0.7840016484260559, "eval_logits": -1.973698377609253, "eval_logps": -120.04285430908203, "eval_loss": 0.7555912137031555, "eval_objective": 0.7840016484260559, "eval_ranking_idealized": 0.5888429880142212, "eval_ranking_idealized_expo": 0.5103305578231812, "eval_ranking_simple": 0.5402892827987671, "eval_regularize": 0.7840016484260559, "eval_runtime": 261.7375, "eval_samples_per_second": 22.121, "eval_steps_per_second": 0.925, "step": 350 }, { "epoch": 1.995276334435522, "step": 352, "total_flos": 0.0, "train_loss": 0.36798647613349283, "train_runtime": 14198.7063, "train_samples_per_second": 7.156, "train_steps_per_second": 0.025 } ], "logging_steps": 5, "max_steps": 352, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }