diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,8281 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 10, + "global_step": 2237, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.004470272686633884, + "grad_norm": 200.87217281502842, + "learning_rate": 5.000000000000001e-07, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -2.589505195617676, + "logits/rejected": -2.589505195617676, + "logps/chosen": -2.1613333225250244, + "logps/rejected": -2.1613333225250244, + "loss": 3.4851, + "nll_loss": 3.450399875640869, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.10806665569543839, + "rewards/margins": 0.0, + "rewards/rejected": -0.10806665569543839, + "step": 10 + }, + { + "epoch": 0.004470272686633884, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -2.5523717403411865, + "eval_logits/rejected": -2.5523717403411865, + "eval_logps/chosen": -0.27682167291641235, + "eval_logps/rejected": -0.27682167291641235, + "eval_loss": 1.5240834951400757, + "eval_nll_loss": 1.4894258975982666, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -0.01384108979254961, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -0.01384108979254961, + "eval_runtime": 307.2493, + "eval_samples_per_second": 58.236, + "eval_steps_per_second": 1.823, + "step": 10 + }, + { + "epoch": 0.008940545373267769, + "grad_norm": 35.74287350401742, + "learning_rate": 1.0000000000000002e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -2.1916913986206055, + "logits/rejected": -2.1916913986206055, + "logps/chosen": -0.04086422920227051, + "logps/rejected": -0.04086422920227051, + "loss": 1.086, + "nll_loss": 1.0513627529144287, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.002043211366981268, + "rewards/margins": 0.0, + "rewards/rejected": -0.002043211366981268, + "step": 20 + }, + { + "epoch": 0.008940545373267769, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -1.7726595401763916, + "eval_logits/rejected": -1.7726595401763916, + "eval_logps/chosen": -6.101293365645688e-06, + "eval_logps/rejected": -6.101293365645688e-06, + "eval_loss": 0.946562647819519, + "eval_nll_loss": 0.9119052886962891, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -3.050646739666263e-07, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -3.050646739666263e-07, + "eval_runtime": 307.2712, + "eval_samples_per_second": 58.232, + "eval_steps_per_second": 1.822, + "step": 20 + }, + { + "epoch": 0.013410818059901655, + "grad_norm": 35.233323872959666, + "learning_rate": 1.5e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -1.6510288715362549, + "logits/rejected": -1.6510288715362549, + "logps/chosen": -8.857093234837521e-06, + "logps/rejected": -8.857093234837521e-06, + "loss": 0.9087, + "nll_loss": 0.8740367889404297, + "rewards/accuracies": 0.0, + "rewards/chosen": -4.4285465605753416e-07, + "rewards/margins": 0.0, + "rewards/rejected": -4.4285465605753416e-07, + "step": 30 + }, + { + "epoch": 0.013410818059901655, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -1.5711654424667358, + "eval_logits/rejected": -1.5711654424667358, + "eval_logps/chosen": -1.556159622850828e-05, + "eval_logps/rejected": -1.556159622850828e-05, + "eval_loss": 0.8375607132911682, + "eval_nll_loss": 0.8029031753540039, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -7.780799933243543e-07, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -7.780799933243543e-07, + "eval_runtime": 307.3414, + "eval_samples_per_second": 58.219, + "eval_steps_per_second": 1.822, + "step": 30 + }, + { + "epoch": 0.017881090746535537, + "grad_norm": 38.04594247051011, + "learning_rate": 2.0000000000000003e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -1.567310094833374, + "logits/rejected": -1.567310094833374, + "logps/chosen": -5.675311058439547e-06, + "logps/rejected": -5.675311058439547e-06, + "loss": 0.7774, + "nll_loss": 0.742705225944519, + "rewards/accuracies": 0.0, + "rewards/chosen": -2.837655870280287e-07, + "rewards/margins": 0.0, + "rewards/rejected": -2.837655870280287e-07, + "step": 40 + }, + { + "epoch": 0.017881090746535537, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -1.5837194919586182, + "eval_logits/rejected": -1.5837194919586182, + "eval_logps/chosen": -4.9416635192756075e-06, + "eval_logps/rejected": -4.9416635192756075e-06, + "eval_loss": 0.691165030002594, + "eval_nll_loss": 0.6565076112747192, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -2.4708319301680604e-07, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -2.4708319301680604e-07, + "eval_runtime": 307.3287, + "eval_samples_per_second": 58.221, + "eval_steps_per_second": 1.822, + "step": 40 + }, + { + "epoch": 0.022351363433169423, + "grad_norm": 244.0980901004834, + "learning_rate": 2.5e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -1.5995845794677734, + "logits/rejected": -1.5995845794677734, + "logps/chosen": -7.533667030656943e-06, + "logps/rejected": -7.533667030656943e-06, + "loss": 0.5426, + "nll_loss": 0.507915198802948, + "rewards/accuracies": 0.0, + "rewards/chosen": -3.7668331742679584e-07, + "rewards/margins": 0.0, + "rewards/rejected": -3.7668331742679584e-07, + "step": 50 + }, + { + "epoch": 0.022351363433169423, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -1.5342867374420166, + "eval_logits/rejected": -1.5342867374420166, + "eval_logps/chosen": -4.649061338568572e-06, + "eval_logps/rejected": -4.649061338568572e-06, + "eval_loss": 0.1590723991394043, + "eval_nll_loss": 0.12441505491733551, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -2.32453146509215e-07, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -2.32453146509215e-07, + "eval_runtime": 307.2894, + "eval_samples_per_second": 58.229, + "eval_steps_per_second": 1.822, + "step": 50 + }, + { + "epoch": 0.02682163611980331, + "grad_norm": 742.9836173119904, + "learning_rate": 3e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -1.0592808723449707, + "logits/rejected": -1.0592808723449707, + "logps/chosen": -0.646319568157196, + "logps/rejected": -0.646319568157196, + "loss": 0.4926, + "nll_loss": 0.4579242765903473, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.03231597691774368, + "rewards/margins": 0.0, + "rewards/rejected": -0.03231597691774368, + "step": 60 + }, + { + "epoch": 0.02682163611980331, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -2.1701815128326416, + "eval_logits/rejected": -2.1701815128326416, + "eval_logps/chosen": -0.049711961299180984, + "eval_logps/rejected": -0.049711961299180984, + "eval_loss": 0.07283048331737518, + "eval_nll_loss": 0.0381731316447258, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -0.0024855986703187227, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -0.0024855986703187227, + "eval_runtime": 307.3188, + "eval_samples_per_second": 58.223, + "eval_steps_per_second": 1.822, + "step": 60 + }, + { + "epoch": 0.031291908806437195, + "grad_norm": 36.939547099581965, + "learning_rate": 3.5e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -2.109687328338623, + "logits/rejected": -2.109687328338623, + "logps/chosen": -0.4633702337741852, + "logps/rejected": -0.4633702337741852, + "loss": 0.3784, + "nll_loss": 0.34376633167266846, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.02316850982606411, + "rewards/margins": 0.0, + "rewards/rejected": -0.02316850982606411, + "step": 70 + }, + { + "epoch": 0.031291908806437195, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -1.8683053255081177, + "eval_logits/rejected": -1.8683053255081177, + "eval_logps/chosen": -0.0014687292277812958, + "eval_logps/rejected": -0.0014687292277812958, + "eval_loss": 0.037432197481393814, + "eval_nll_loss": 0.0027748411521315575, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -7.343645120272413e-05, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -7.343645120272413e-05, + "eval_runtime": 307.334, + "eval_samples_per_second": 58.22, + "eval_steps_per_second": 1.822, + "step": 70 + }, + { + "epoch": 0.035762181493071074, + "grad_norm": 8.17885090561578, + "learning_rate": 4.000000000000001e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -1.9282808303833008, + "logits/rejected": -1.9282808303833008, + "logps/chosen": -0.1062564104795456, + "logps/rejected": -0.1062564104795456, + "loss": 0.1081, + "nll_loss": 0.0734890028834343, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.005312820430845022, + "rewards/margins": 0.0, + "rewards/rejected": -0.005312820430845022, + "step": 80 + }, + { + "epoch": 0.035762181493071074, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -1.9697402715682983, + "eval_logits/rejected": -1.9697402715682983, + "eval_logps/chosen": -1.1368107152520679e-05, + "eval_logps/rejected": -1.1368107152520679e-05, + "eval_loss": 0.03469717875123024, + "eval_nll_loss": 3.982333146268502e-05, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -5.684053689947177e-07, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -5.684053689947177e-07, + "eval_runtime": 307.2922, + "eval_samples_per_second": 58.228, + "eval_steps_per_second": 1.822, + "step": 80 + }, + { + "epoch": 0.04023245417970496, + "grad_norm": 3.100931395880866, + "learning_rate": 4.5e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -2.24125075340271, + "logits/rejected": -2.24125075340271, + "logps/chosen": -0.265648752450943, + "logps/rejected": -0.265648752450943, + "loss": 0.2173, + "nll_loss": 0.18264153599739075, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.013282437808811665, + "rewards/margins": 0.0, + "rewards/rejected": -0.013282437808811665, + "step": 90 + }, + { + "epoch": 0.04023245417970496, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -2.7805023193359375, + "eval_logits/rejected": -2.7805023193359375, + "eval_logps/chosen": -0.6984005570411682, + "eval_logps/rejected": -0.6984005570411682, + "eval_loss": 0.5148123502731323, + "eval_nll_loss": 0.4801549017429352, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -0.03492003679275513, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -0.03492003679275513, + "eval_runtime": 307.3525, + "eval_samples_per_second": 58.217, + "eval_steps_per_second": 1.822, + "step": 90 + }, + { + "epoch": 0.044702726866338846, + "grad_norm": 0.708612815644015, + "learning_rate": 5e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -2.5113797187805176, + "logits/rejected": -2.5113797187805176, + "logps/chosen": -0.07246340066194534, + "logps/rejected": -0.07246340066194534, + "loss": 0.0845, + "nll_loss": 0.04982428997755051, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.003623170079663396, + "rewards/margins": 0.0, + "rewards/rejected": -0.003623170079663396, + "step": 100 + }, + { + "epoch": 0.044702726866338846, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -2.2378087043762207, + "eval_logits/rejected": -2.2378087043762207, + "eval_logps/chosen": -0.0317058339715004, + "eval_logps/rejected": -0.0317058339715004, + "eval_loss": 0.05645650252699852, + "eval_nll_loss": 0.021799137815833092, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -0.0015852916985750198, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -0.0015852916985750198, + "eval_runtime": 307.3355, + "eval_samples_per_second": 58.22, + "eval_steps_per_second": 1.822, + "step": 100 + }, + { + "epoch": 0.04917299955297273, + "grad_norm": 327.5861555766306, + "learning_rate": 4.767312946227961e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -2.459609031677246, + "logits/rejected": -2.459609031677246, + "logps/chosen": -0.1411500871181488, + "logps/rejected": -0.1411500871181488, + "loss": 0.1317, + "nll_loss": 0.09704854339361191, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.007057504262775183, + "rewards/margins": 0.0, + "rewards/rejected": -0.007057504262775183, + "step": 110 + }, + { + "epoch": 0.04917299955297273, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -2.4884233474731445, + "eval_logits/rejected": -2.4884233474731445, + "eval_logps/chosen": -1.179061109723989e-05, + "eval_logps/rejected": -1.179061109723989e-05, + "eval_loss": 0.03466625139117241, + "eval_nll_loss": 8.895804057829082e-06, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -5.895305434933107e-07, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -5.895305434933107e-07, + "eval_runtime": 308.2975, + "eval_samples_per_second": 58.038, + "eval_steps_per_second": 1.816, + "step": 110 + }, + { + "epoch": 0.05364327223960662, + "grad_norm": 855.5753312971198, + "learning_rate": 4.564354645876385e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -2.3581442832946777, + "logits/rejected": -2.3581442832946777, + "logps/chosen": -0.2550828456878662, + "logps/rejected": -0.2550828456878662, + "loss": 0.21, + "nll_loss": 0.1753716617822647, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.01275414414703846, + "rewards/margins": 0.0, + "rewards/rejected": -0.01275414414703846, + "step": 120 + }, + { + "epoch": 0.05364327223960662, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -2.1373255252838135, + "eval_logits/rejected": -2.1373255252838135, + "eval_logps/chosen": -0.00023682457685936242, + "eval_logps/rejected": -0.00023682457685936242, + "eval_loss": 0.034821655601263046, + "eval_nll_loss": 0.00016429205425083637, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -1.184122811537236e-05, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -1.184122811537236e-05, + "eval_runtime": 307.3399, + "eval_samples_per_second": 58.219, + "eval_steps_per_second": 1.822, + "step": 120 + }, + { + "epoch": 0.058113544926240504, + "grad_norm": 0.1670814802188449, + "learning_rate": 4.385290096535147e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -1.8926069736480713, + "logits/rejected": -1.8926069736480713, + "logps/chosen": -0.07454721629619598, + "logps/rejected": -0.07454721629619598, + "loss": 0.0859, + "nll_loss": 0.05125713348388672, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0037273610942065716, + "rewards/margins": 0.0, + "rewards/rejected": -0.0037273610942065716, + "step": 130 + }, + { + "epoch": 0.058113544926240504, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -1.857692837715149, + "eval_logits/rejected": -1.857692837715149, + "eval_logps/chosen": -2.4058474537014263e-06, + "eval_logps/rejected": -2.4058474537014263e-06, + "eval_loss": 0.03466090187430382, + "eval_nll_loss": 3.539007138897432e-06, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -1.2029238405375509e-07, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -1.2029238405375509e-07, + "eval_runtime": 307.2975, + "eval_samples_per_second": 58.227, + "eval_steps_per_second": 1.822, + "step": 130 + }, + { + "epoch": 0.06258381761287439, + "grad_norm": 0.06453300264388455, + "learning_rate": 4.2257712736425835e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -1.854994773864746, + "logits/rejected": -1.854994773864746, + "logps/chosen": -1.4180723155732267e-05, + "logps/rejected": -1.4180723155732267e-05, + "loss": 0.0347, + "nll_loss": 1.0921966349997092e-05, + "rewards/accuracies": 0.0, + "rewards/chosen": -7.090361577866133e-07, + "rewards/margins": 0.0, + "rewards/rejected": -7.090361577866133e-07, + "step": 140 + }, + { + "epoch": 0.06258381761287439, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -1.8251653909683228, + "eval_logits/rejected": -1.8251653909683228, + "eval_logps/chosen": -0.00015634606825187802, + "eval_logps/rejected": -0.00015634606825187802, + "eval_loss": 0.03476560115814209, + "eval_nll_loss": 0.00010824044147739187, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -7.817303412593901e-06, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -7.817303412593901e-06, + "eval_runtime": 307.2194, + "eval_samples_per_second": 58.242, + "eval_steps_per_second": 1.823, + "step": 140 + }, + { + "epoch": 0.06705409029950828, + "grad_norm": 0.20774379926153874, + "learning_rate": 4.082482904638631e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -1.847884178161621, + "logits/rejected": -1.847884178161621, + "logps/chosen": -0.021540921181440353, + "logps/rejected": -0.021540921181440353, + "loss": 0.0495, + "nll_loss": 0.014810365624725819, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0010770460357889533, + "rewards/margins": 0.0, + "rewards/rejected": -0.0010770460357889533, + "step": 150 + }, + { + "epoch": 0.06705409029950828, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -1.850500464439392, + "eval_logits/rejected": -1.850500464439392, + "eval_logps/chosen": -2.1336307327146642e-05, + "eval_logps/rejected": -2.1336307327146642e-05, + "eval_loss": 0.034673456102609634, + "eval_nll_loss": 1.6099216736620292e-05, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -1.0668153436199646e-06, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -1.0668153436199646e-06, + "eval_runtime": 307.2824, + "eval_samples_per_second": 58.23, + "eval_steps_per_second": 1.822, + "step": 150 + }, + { + "epoch": 0.07152436298614215, + "grad_norm": 0.4385089216707075, + "learning_rate": 3.952847075210474e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -1.9034173488616943, + "logits/rejected": -1.9034173488616943, + "logps/chosen": -2.675616997294128e-05, + "logps/rejected": -2.675616997294128e-05, + "loss": 0.0347, + "nll_loss": 1.960856025107205e-05, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.337808498647064e-06, + "rewards/margins": 0.0, + "rewards/rejected": -1.337808498647064e-06, + "step": 160 + }, + { + "epoch": 0.07152436298614215, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -1.9784146547317505, + "eval_logits/rejected": -1.9784146547317505, + "eval_logps/chosen": -1.380585763399722e-05, + "eval_logps/rejected": -1.380585763399722e-05, + "eval_loss": 0.03466769680380821, + "eval_nll_loss": 1.0333444151910953e-05, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -6.902928930685448e-07, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -6.902928930685448e-07, + "eval_runtime": 307.2592, + "eval_samples_per_second": 58.234, + "eval_steps_per_second": 1.823, + "step": 160 + }, + { + "epoch": 0.07599463567277603, + "grad_norm": 0.4184321814624391, + "learning_rate": 3.834824944236852e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -1.8877710103988647, + "logits/rejected": -1.8877710103988647, + "logps/chosen": -1.3126833437127061e-05, + "logps/rejected": -1.3126833437127061e-05, + "loss": 0.0347, + "nll_loss": 9.645330464991275e-06, + "rewards/accuracies": 0.0, + "rewards/chosen": -6.563416832250368e-07, + "rewards/margins": 0.0, + "rewards/rejected": -6.563416832250368e-07, + "step": 170 + }, + { + "epoch": 0.07599463567277603, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -1.7608203887939453, + "eval_logits/rejected": -1.7608203887939453, + "eval_logps/chosen": -2.7526366466190666e-06, + "eval_logps/rejected": -2.7526366466190666e-06, + "eval_loss": 0.03465956076979637, + "eval_nll_loss": 2.1979110442771344e-06, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -1.3763180106707296e-07, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -1.3763180106707296e-07, + "eval_runtime": 307.2297, + "eval_samples_per_second": 58.24, + "eval_steps_per_second": 1.823, + "step": 170 + }, + { + "epoch": 0.08046490835940992, + "grad_norm": 0.038632251509525706, + "learning_rate": 3.72677996249965e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -1.6208105087280273, + "logits/rejected": -1.6208105087280273, + "logps/chosen": -3.7441983295138925e-05, + "logps/rejected": -3.7441983295138925e-05, + "loss": 0.0347, + "nll_loss": 2.5923154680640437e-05, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.8720990055953735e-06, + "rewards/margins": 0.0, + "rewards/rejected": -1.8720990055953735e-06, + "step": 180 + }, + { + "epoch": 0.08046490835940992, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -1.5160672664642334, + "eval_logits/rejected": -1.5160672664642334, + "eval_logps/chosen": -5.201853809921886e-07, + "eval_logps/rejected": -5.201853809921886e-07, + "eval_loss": 0.03465788811445236, + "eval_nll_loss": 5.215401870373171e-07, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -2.6009265852167118e-08, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -2.6009265852167118e-08, + "eval_runtime": 307.3109, + "eval_samples_per_second": 58.224, + "eval_steps_per_second": 1.822, + "step": 180 + }, + { + "epoch": 0.0849351810460438, + "grad_norm": 0.03669320934920467, + "learning_rate": 3.6273812505500587e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -1.4880715608596802, + "logits/rejected": -1.4880715608596802, + "logps/chosen": -4.2265060073987115e-07, + "logps/rejected": -4.2265060073987115e-07, + "loss": 0.0347, + "nll_loss": 5.044038289270247e-07, + "rewards/accuracies": 0.0, + "rewards/chosen": -2.1132532168621765e-08, + "rewards/margins": 0.0, + "rewards/rejected": -2.1132532168621765e-08, + "step": 190 + }, + { + "epoch": 0.0849351810460438, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -1.4645824432373047, + "eval_logits/rejected": -1.4645824432373047, + "eval_logps/chosen": -2.9260445444379e-07, + "eval_logps/rejected": -2.9260445444379e-07, + "eval_loss": 0.03465784341096878, + "eval_nll_loss": 4.917378646496218e-07, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -1.4630221478739713e-08, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -1.4630221478739713e-08, + "eval_runtime": 307.3254, + "eval_samples_per_second": 58.222, + "eval_steps_per_second": 1.822, + "step": 190 + }, + { + "epoch": 0.08940545373267769, + "grad_norm": 0.012980292617960867, + "learning_rate": 3.5355339059327378e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -1.460682988166809, + "logits/rejected": -1.460682988166809, + "logps/chosen": -2.5575798190402566e-07, + "logps/rejected": -2.5575798190402566e-07, + "loss": 0.0347, + "nll_loss": 5.394212507781049e-07, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.2787898739929915e-08, + "rewards/margins": 0.0, + "rewards/rejected": -1.2787898739929915e-08, + "step": 200 + }, + { + "epoch": 0.08940545373267769, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -1.4564566612243652, + "eval_logits/rejected": -1.4564566612243652, + "eval_logps/chosen": -2.0590688620814035e-07, + "eval_logps/rejected": -2.0590688620814035e-07, + "eval_loss": 0.034657903015613556, + "eval_nll_loss": 5.513420546776615e-07, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -1.0295342356414494e-08, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -1.0295342356414494e-08, + "eval_runtime": 307.2465, + "eval_samples_per_second": 58.237, + "eval_steps_per_second": 1.823, + "step": 200 + }, + { + "epoch": 0.09387572641931158, + "grad_norm": 0.02641498837884943, + "learning_rate": 3.450327796711771e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -1.4444046020507812, + "logits/rejected": -1.4444046020507812, + "logps/chosen": -2.2758126760891173e-07, + "logps/rejected": -2.2758126760891173e-07, + "loss": 0.0347, + "nll_loss": 4.954629844178271e-07, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.1379063913352638e-08, + "rewards/margins": 0.0, + "rewards/rejected": -1.1379063913352638e-08, + "step": 210 + }, + { + "epoch": 0.09387572641931158, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -1.4376832246780396, + "eval_logits/rejected": -1.4376832246780396, + "eval_logps/chosen": -2.2758125339805702e-07, + "eval_logps/rejected": -2.2758125339805702e-07, + "eval_loss": 0.034657806158065796, + "eval_nll_loss": 4.4703440948978823e-07, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -1.1379063025174219e-08, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -1.1379063025174219e-08, + "eval_runtime": 307.3275, + "eval_samples_per_second": 58.221, + "eval_steps_per_second": 1.822, + "step": 210 + }, + { + "epoch": 0.09834599910594546, + "grad_norm": 0.03304538141719999, + "learning_rate": 3.3709993123162106e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -1.4320012331008911, + "logits/rejected": -1.4320012331008911, + "logps/chosen": -2.449207840982126e-07, + "logps/rejected": -2.449207840982126e-07, + "loss": 0.0347, + "nll_loss": 4.1574213582862285e-07, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.2246038849639262e-08, + "rewards/margins": 0.0, + "rewards/rejected": -1.2246038849639262e-08, + "step": 220 + }, + { + "epoch": 0.09834599910594546, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -1.4223650693893433, + "eval_logits/rejected": -1.4223650693893433, + "eval_logps/chosen": -2.492556632205378e-07, + "eval_logps/rejected": -2.492556632205378e-07, + "eval_loss": 0.03465771675109863, + "eval_nll_loss": 3.5017714594687277e-07, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -1.2462780141220264e-08, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -1.2462780141220264e-08, + "eval_runtime": 307.3295, + "eval_samples_per_second": 58.221, + "eval_steps_per_second": 1.822, + "step": 220 + }, + { + "epoch": 0.10281627179257935, + "grad_norm": 0.044286350524316676, + "learning_rate": 3.296902366978936e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -1.4200690984725952, + "logits/rejected": -1.4200690984725952, + "logps/chosen": -2.698463390515826e-07, + "logps/rejected": -2.698463390515826e-07, + "loss": 0.0347, + "nll_loss": 3.3378583452758903e-07, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.3492316597307763e-08, + "rewards/margins": 0.0, + "rewards/rejected": -1.3492316597307763e-08, + "step": 230 + }, + { + "epoch": 0.10281627179257935, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -1.413095474243164, + "eval_logits/rejected": -1.413095474243164, + "eval_logps/chosen": -2.7093008725387335e-07, + "eval_logps/rejected": -2.7093008725387335e-07, + "eval_loss": 0.034657664597034454, + "eval_nll_loss": 3.1292435664909135e-07, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -1.3546501698158409e-08, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -1.3546501698158409e-08, + "eval_runtime": 307.2385, + "eval_samples_per_second": 58.238, + "eval_steps_per_second": 1.823, + "step": 230 + }, + { + "epoch": 0.10728654447921324, + "grad_norm": 0.04353666797671452, + "learning_rate": 3.2274861218395142e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -1.4168875217437744, + "logits/rejected": -1.4168875217437744, + "logps/chosen": -2.6984636747329205e-07, + "logps/rejected": -2.6984636747329205e-07, + "loss": 0.0347, + "nll_loss": 3.1888473017716024e-07, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.3492316597307763e-08, + "rewards/margins": 0.0, + "rewards/rejected": -1.3492316597307763e-08, + "step": 240 + }, + { + "epoch": 0.10728654447921324, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -1.415872573852539, + "eval_logits/rejected": -1.415872573852539, + "eval_logps/chosen": -2.8176731348139583e-07, + "eval_logps/rejected": -2.8176731348139583e-07, + "eval_loss": 0.034657686948776245, + "eval_nll_loss": 3.278254041561013e-07, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -1.4088360700270641e-08, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -1.4088360700270641e-08, + "eval_runtime": 307.249, + "eval_samples_per_second": 58.236, + "eval_steps_per_second": 1.823, + "step": 240 + }, + { + "epoch": 0.11175681716584712, + "grad_norm": 0.03917480482423638, + "learning_rate": 3.1622776601683796e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -1.4259008169174194, + "logits/rejected": -1.4259008169174194, + "logps/chosen": -2.861021641820116e-07, + "logps/rejected": -2.861021641820116e-07, + "loss": 0.0347, + "nll_loss": 3.2782546099952015e-07, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.4305106432743742e-08, + "rewards/margins": 0.0, + "rewards/rejected": -1.4305106432743742e-08, + "step": 250 + }, + { + "epoch": 0.11175681716584712, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -1.4383741617202759, + "eval_logits/rejected": -1.4383741617202759, + "eval_logps/chosen": -2.8176731348139583e-07, + "eval_logps/rejected": -2.8176731348139583e-07, + "eval_loss": 0.034657690674066544, + "eval_nll_loss": 3.3527609843986284e-07, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -1.4088360700270641e-08, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -1.4088360700270641e-08, + "eval_runtime": 307.2546, + "eval_samples_per_second": 58.235, + "eval_steps_per_second": 1.823, + "step": 250 + }, + { + "epoch": 0.11622708985248101, + "grad_norm": 0.05198634530505512, + "learning_rate": 3.1008683647302113e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -1.4493887424468994, + "logits/rejected": -1.4493887424468994, + "logps/chosen": -2.633440487898042e-07, + "logps/rejected": -2.633440487898042e-07, + "loss": 0.0347, + "nll_loss": 3.524122860198986e-07, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.316720421584705e-08, + "rewards/margins": 0.0, + "rewards/rejected": -1.316720421584705e-08, + "step": 260 + }, + { + "epoch": 0.11622708985248101, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -1.4592266082763672, + "eval_logits/rejected": -1.4592266082763672, + "eval_logps/chosen": -2.7093008725387335e-07, + "eval_logps/rejected": -2.7093008725387335e-07, + "eval_loss": 0.03465774282813072, + "eval_nll_loss": 3.799794683345681e-07, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -1.3546508803585766e-08, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -1.3546508803585766e-08, + "eval_runtime": 307.2459, + "eval_samples_per_second": 58.237, + "eval_steps_per_second": 1.823, + "step": 260 + }, + { + "epoch": 0.12069736253911488, + "grad_norm": 0.11820570469696125, + "learning_rate": 3.0429030972509227e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -1.46332585811615, + "logits/rejected": -1.46332585811615, + "logps/chosen": -3.9230658899214177e-07, + "logps/rejected": -3.9230658899214177e-07, + "loss": 0.0347, + "nll_loss": 4.783269105246291e-07, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.9615329804878456e-08, + "rewards/margins": 0.0, + "rewards/rejected": -1.9615329804878456e-08, + "step": 270 + }, + { + "epoch": 0.12069736253911488, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -1.468156337738037, + "eval_logits/rejected": -1.468156337738037, + "eval_logps/chosen": -9.536715310787258e-07, + "eval_logps/rejected": -9.536715310787258e-07, + "eval_loss": 0.03465830534696579, + "eval_nll_loss": 9.387706541019725e-07, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -4.7683567316880726e-08, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -4.7683567316880726e-08, + "eval_runtime": 307.3223, + "eval_samples_per_second": 58.222, + "eval_steps_per_second": 1.822, + "step": 270 + }, + { + "epoch": 0.12516763522574878, + "grad_norm": 0.09515560200533464, + "learning_rate": 2.988071523335984e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -1.4118568897247314, + "logits/rejected": -1.4118568897247314, + "logps/chosen": -0.00016646471340209246, + "logps/rejected": -0.00016646471340209246, + "loss": 0.0348, + "nll_loss": 0.0001147976508946158, + "rewards/accuracies": 0.0, + "rewards/chosen": -8.323235306306742e-06, + "rewards/margins": 0.0, + "rewards/rejected": -8.323235306306742e-06, + "step": 280 + }, + { + "epoch": 0.12516763522574878, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -1.126119613647461, + "eval_logits/rejected": -1.126119613647461, + "eval_logps/chosen": -2.7093008725387335e-07, + "eval_logps/rejected": -2.7093008725387335e-07, + "eval_loss": 0.034658852964639664, + "eval_nll_loss": 1.4975536259953515e-06, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -1.3546501698158409e-08, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -1.3546501698158409e-08, + "eval_runtime": 307.2374, + "eval_samples_per_second": 58.238, + "eval_steps_per_second": 1.823, + "step": 280 + }, + { + "epoch": 0.12963790791238267, + "grad_norm": 0.24950192922112446, + "learning_rate": 2.9361010975735177e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -0.9670178294181824, + "logits/rejected": -0.9670178294181824, + "logps/chosen": -0.04551885277032852, + "logps/rejected": -0.04551885277032852, + "loss": 0.066, + "nll_loss": 0.031298212707042694, + "rewards/accuracies": 0.0, + "rewards/chosen": -0.0022759425919502974, + "rewards/margins": 0.0, + "rewards/rejected": -0.0022759425919502974, + "step": 290 + }, + { + "epoch": 0.12963790791238267, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -0.908425509929657, + "eval_logits/rejected": -0.908425509929657, + "eval_logps/chosen": -6.848872544651385e-06, + "eval_logps/rejected": -6.848872544651385e-06, + "eval_loss": 0.034663498401641846, + "eval_nll_loss": 6.1390978771669324e-06, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -3.42443740919407e-07, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -3.42443740919407e-07, + "eval_runtime": 307.2693, + "eval_samples_per_second": 58.232, + "eval_steps_per_second": 1.823, + "step": 290 + }, + { + "epoch": 0.13410818059901655, + "grad_norm": 0.03431844824411396, + "learning_rate": 2.8867513459481293e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -0.8719849586486816, + "logits/rejected": -0.8719849586486816, + "logps/chosen": -1.4694925312142004e-06, + "logps/rejected": -1.4694925312142004e-06, + "loss": 0.0347, + "nll_loss": 1.3790776165478746e-06, + "rewards/accuracies": 0.0, + "rewards/chosen": -7.347462371853908e-08, + "rewards/margins": 0.0, + "rewards/rejected": -7.347462371853908e-08, + "step": 300 + }, + { + "epoch": 0.13410818059901655, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -0.8419067859649658, + "eval_logits/rejected": -0.8419067859649658, + "eval_logps/chosen": -1.0837207042868613e-07, + "eval_logps/rejected": -1.0837207042868613e-07, + "eval_loss": 0.03465750813484192, + "eval_nll_loss": 1.4901159772762185e-07, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -5.418604231977042e-09, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -5.418604231977042e-09, + "eval_runtime": 307.3079, + "eval_samples_per_second": 58.225, + "eval_steps_per_second": 1.822, + "step": 300 + }, + { + "epoch": 0.13857845328565044, + "grad_norm": 0.046084457187151866, + "learning_rate": 2.839809171235324e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -0.8182867765426636, + "logits/rejected": -0.8182867765426636, + "logps/chosen": -1.1812553424306316e-07, + "logps/rejected": -1.1812553424306316e-07, + "loss": 0.0347, + "nll_loss": 1.356005299157914e-07, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.906276800971e-09, + "rewards/margins": 0.0, + "rewards/rejected": -5.906276800971e-09, + "step": 310 + }, + { + "epoch": 0.13857845328565044, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -0.7935153245925903, + "eval_logits/rejected": -0.7935153245925903, + "eval_logps/chosen": -1.3004645893488487e-07, + "eval_logps/rejected": -1.3004645893488487e-07, + "eval_loss": 0.03465748578310013, + "eval_nll_loss": 1.341104365337742e-07, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -6.502324456647557e-09, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -6.502324456647557e-09, + "eval_runtime": 307.2897, + "eval_samples_per_second": 58.228, + "eval_steps_per_second": 1.822, + "step": 310 + }, + { + "epoch": 0.1430487259722843, + "grad_norm": 0.026447763498169533, + "learning_rate": 2.7950849718747376e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -0.7777472734451294, + "logits/rejected": -0.7777472734451294, + "logps/chosen": -1.1704181446248185e-07, + "logps/rejected": -1.1704181446248185e-07, + "loss": 0.0347, + "nll_loss": 1.25169719922269e-07, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.852090811941935e-09, + "rewards/margins": 0.0, + "rewards/rejected": -5.852090811941935e-09, + "step": 320 + }, + { + "epoch": 0.1430487259722843, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -0.7703178524971008, + "eval_logits/rejected": -0.7703178524971008, + "eval_logps/chosen": -9.753485130659101e-08, + "eval_logps/rejected": -9.753485130659101e-08, + "eval_loss": 0.03465747460722923, + "eval_nll_loss": 1.1175869474300271e-07, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -4.876741233061921e-09, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -4.876741233061921e-09, + "eval_runtime": 307.3055, + "eval_samples_per_second": 58.225, + "eval_steps_per_second": 1.822, + "step": 320 + }, + { + "epoch": 0.14751899865891818, + "grad_norm": 0.024063594687914625, + "learning_rate": 2.752409412815902e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -0.7712096571922302, + "logits/rejected": -0.7712096571922302, + "logps/chosen": -1.1053950998984874e-07, + "logps/rejected": -1.1053950998984874e-07, + "loss": 0.0347, + "nll_loss": 1.244246874421151e-07, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.526974433678333e-09, + "rewards/margins": 0.0, + "rewards/rejected": -5.526974433678333e-09, + "step": 330 + }, + { + "epoch": 0.14751899865891818, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -0.7746484875679016, + "eval_logits/rejected": -0.7746484875679016, + "eval_logps/chosen": -1.1920927533992653e-07, + "eval_logps/rejected": -1.1920927533992653e-07, + "eval_loss": 0.03465748578310013, + "eval_nll_loss": 1.341104365337742e-07, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -5.960464122267695e-09, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -5.960464122267695e-09, + "eval_runtime": 307.2334, + "eval_samples_per_second": 58.239, + "eval_steps_per_second": 1.823, + "step": 330 + }, + { + "epoch": 0.15198927134555207, + "grad_norm": 0.020132595346862448, + "learning_rate": 2.711630722733202e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -0.7744746804237366, + "logits/rejected": -0.7744746804237366, + "logps/chosen": -1.1379066933159265e-07, + "logps/rejected": -1.1379066933159265e-07, + "loss": 0.0347, + "nll_loss": 1.2591478082413232e-07, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.689532400765529e-09, + "rewards/margins": 0.0, + "rewards/rejected": -5.689532400765529e-09, + "step": 340 + }, + { + "epoch": 0.15198927134555207, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -0.7745358347892761, + "eval_logits/rejected": -0.7745358347892761, + "eval_logps/chosen": -1.3004645893488487e-07, + "eval_logps/rejected": -1.3004645893488487e-07, + "eval_loss": 0.03465748578310013, + "eval_nll_loss": 1.341104365337742e-07, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -6.502324456647557e-09, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -6.502324456647557e-09, + "eval_runtime": 307.34, + "eval_samples_per_second": 58.219, + "eval_steps_per_second": 1.822, + "step": 340 + }, + { + "epoch": 0.15645954403218595, + "grad_norm": 0.023269788619903833, + "learning_rate": 2.6726124191242444e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -0.777168869972229, + "logits/rejected": -0.777168869972229, + "logps/chosen": -1.192092469182171e-07, + "logps/rejected": -1.192092469182171e-07, + "loss": 0.0347, + "nll_loss": 1.2665984172599565e-07, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.960462345910855e-09, + "rewards/margins": 0.0, + "rewards/rejected": -5.960462345910855e-09, + "step": 350 + }, + { + "epoch": 0.15645954403218595, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -0.7768326997756958, + "eval_logits/rejected": -0.7768326997756958, + "eval_logps/chosen": -1.1920927533992653e-07, + "eval_logps/rejected": -1.1920927533992653e-07, + "eval_loss": 0.03465748205780983, + "eval_nll_loss": 1.2665985593685036e-07, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -5.960461013643226e-09, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -5.960461013643226e-09, + "eval_runtime": 307.3208, + "eval_samples_per_second": 58.223, + "eval_steps_per_second": 1.822, + "step": 350 + }, + { + "epoch": 0.16092981671881984, + "grad_norm": 0.0223925116848789, + "learning_rate": 2.6352313834736496e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -0.7769736647605896, + "logits/rejected": -0.7769736647605896, + "logps/chosen": -1.1487436779589189e-07, + "logps/rejected": -1.1487436779589189e-07, + "loss": 0.0347, + "nll_loss": 1.1697407842348184e-07, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.743718389794594e-09, + "rewards/margins": 0.0, + "rewards/rejected": -5.743718389794594e-09, + "step": 360 + }, + { + "epoch": 0.16092981671881984, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -0.7752794027328491, + "eval_logits/rejected": -0.7752794027328491, + "eval_logps/chosen": -1.1920927533992653e-07, + "eval_logps/rejected": -1.1920927533992653e-07, + "eval_loss": 0.03465747833251953, + "eval_nll_loss": 1.1920927533992653e-07, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -5.960461013643226e-09, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -5.960461013643226e-09, + "eval_runtime": 307.3143, + "eval_samples_per_second": 58.224, + "eval_steps_per_second": 1.822, + "step": 360 + }, + { + "epoch": 0.16540008940545373, + "grad_norm": 0.023199535123816478, + "learning_rate": 2.599376224550182e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -0.7756190299987793, + "logits/rejected": -0.7756190299987793, + "logps/chosen": -1.192092469182171e-07, + "logps/rejected": -1.192092469182171e-07, + "loss": 0.0347, + "nll_loss": 1.192092469182171e-07, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.960462345910855e-09, + "rewards/margins": 0.0, + "rewards/rejected": -5.960462345910855e-09, + "step": 370 + }, + { + "epoch": 0.16540008940545373, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -0.7760257720947266, + "eval_logits/rejected": -0.7760257720947266, + "eval_logps/chosen": -1.1920927533992653e-07, + "eval_logps/rejected": -1.1920927533992653e-07, + "eval_loss": 0.03465747460722923, + "eval_nll_loss": 1.1175869474300271e-07, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -5.960461013643226e-09, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -5.960461013643226e-09, + "eval_runtime": 307.2373, + "eval_samples_per_second": 58.238, + "eval_steps_per_second": 1.823, + "step": 370 + }, + { + "epoch": 0.1698703620920876, + "grad_norm": 0.022797710054038613, + "learning_rate": 2.564945880212886e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -0.7724426984786987, + "logits/rejected": -0.7724426984786987, + "logps/chosen": -1.192092469182171e-07, + "logps/rejected": -1.192092469182171e-07, + "loss": 0.0347, + "nll_loss": 1.1771913932534517e-07, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.960462345910855e-09, + "rewards/margins": 0.0, + "rewards/rejected": -5.960462345910855e-09, + "step": 380 + }, + { + "epoch": 0.1698703620920876, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -0.7687971591949463, + "eval_logits/rejected": -0.7687971591949463, + "eval_logps/chosen": -1.1920927533992653e-07, + "eval_logps/rejected": -1.1920927533992653e-07, + "eval_loss": 0.03465747833251953, + "eval_nll_loss": 1.1920927533992653e-07, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -5.960461013643226e-09, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -5.960461013643226e-09, + "eval_runtime": 307.2189, + "eval_samples_per_second": 58.242, + "eval_steps_per_second": 1.823, + "step": 380 + }, + { + "epoch": 0.1743406347787215, + "grad_norm": 0.02330401700584419, + "learning_rate": 2.5318484177091667e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -0.7696870565414429, + "logits/rejected": -0.7696870565414429, + "logps/chosen": -1.192092469182171e-07, + "logps/rejected": -1.192092469182171e-07, + "loss": 0.0347, + "nll_loss": 1.2293455142753373e-07, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.960462345910855e-09, + "rewards/margins": 0.0, + "rewards/rejected": -5.960462345910855e-09, + "step": 390 + }, + { + "epoch": 0.1743406347787215, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -0.7687665224075317, + "eval_logits/rejected": -0.7687665224075317, + "eval_logps/chosen": -1.1920927533992653e-07, + "eval_logps/rejected": -1.1920927533992653e-07, + "eval_loss": 0.03465748205780983, + "eval_nll_loss": 1.2665985593685036e-07, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -5.960461013643226e-09, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -5.960461013643226e-09, + "eval_runtime": 307.3556, + "eval_samples_per_second": 58.216, + "eval_steps_per_second": 1.822, + "step": 390 + }, + { + "epoch": 0.17881090746535538, + "grad_norm": 0.023191506116135168, + "learning_rate": 2.5e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -0.7678502798080444, + "logits/rejected": -0.7678502798080444, + "logps/chosen": -1.192092469182171e-07, + "logps/rejected": -1.192092469182171e-07, + "loss": 0.0347, + "nll_loss": 1.25169719922269e-07, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.960462345910855e-09, + "rewards/margins": 0.0, + "rewards/rejected": -5.960462345910855e-09, + "step": 400 + }, + { + "epoch": 0.17881090746535538, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -0.7668212056159973, + "eval_logits/rejected": -0.7668212056159973, + "eval_logps/chosen": -1.1920927533992653e-07, + "eval_logps/rejected": -1.1920927533992653e-07, + "eval_loss": 0.03465748205780983, + "eval_nll_loss": 1.2665985593685036e-07, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -5.960461013643226e-09, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -5.960461013643226e-09, + "eval_runtime": 307.2727, + "eval_samples_per_second": 58.232, + "eval_steps_per_second": 1.822, + "step": 400 + }, + { + "epoch": 0.18328118015198927, + "grad_norm": 0.023363022377614208, + "learning_rate": 2.4693239916239746e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -0.7651191353797913, + "logits/rejected": -0.7651191353797913, + "logps/chosen": -1.192092469182171e-07, + "logps/rejected": -1.192092469182171e-07, + "loss": 0.0347, + "nll_loss": 1.25169719922269e-07, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.960462345910855e-09, + "rewards/margins": 0.0, + "rewards/rejected": -5.960462345910855e-09, + "step": 410 + }, + { + "epoch": 0.18328118015198927, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -0.7659606337547302, + "eval_logits/rejected": -0.7659606337547302, + "eval_logps/chosen": -1.1920927533992653e-07, + "eval_logps/rejected": -1.1920927533992653e-07, + "eval_loss": 0.03465747460722923, + "eval_nll_loss": 1.1175869474300271e-07, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -5.960461013643226e-09, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -5.960461013643226e-09, + "eval_runtime": 307.3455, + "eval_samples_per_second": 58.218, + "eval_steps_per_second": 1.822, + "step": 410 + }, + { + "epoch": 0.18775145283862316, + "grad_norm": 0.023511625619021405, + "learning_rate": 2.4397501823713327e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -0.7650267481803894, + "logits/rejected": -0.7650267481803894, + "logps/chosen": -1.192092469182171e-07, + "logps/rejected": -1.192092469182171e-07, + "loss": 0.0347, + "nll_loss": 1.2367959811854234e-07, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.960462345910855e-09, + "rewards/margins": 0.0, + "rewards/rejected": -5.960462345910855e-09, + "step": 420 + }, + { + "epoch": 0.18775145283862316, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -0.7655761241912842, + "eval_logits/rejected": -0.7655761241912842, + "eval_logps/chosen": -1.1920927533992653e-07, + "eval_logps/rejected": -1.1920927533992653e-07, + "eval_loss": 0.03465747833251953, + "eval_nll_loss": 1.1920927533992653e-07, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -5.960461013643226e-09, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -5.960461013643226e-09, + "eval_runtime": 307.2676, + "eval_samples_per_second": 58.233, + "eval_steps_per_second": 1.823, + "step": 420 + }, + { + "epoch": 0.19222172552525704, + "grad_norm": 0.02330154160943623, + "learning_rate": 2.411214110852061e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -0.7662619352340698, + "logits/rejected": -0.7662619352340698, + "logps/chosen": -1.192092469182171e-07, + "logps/rejected": -1.192092469182171e-07, + "loss": 0.0347, + "nll_loss": 1.1995430782008043e-07, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.960462345910855e-09, + "rewards/margins": 0.0, + "rewards/rejected": -5.960462345910855e-09, + "step": 430 + }, + { + "epoch": 0.19222172552525704, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -0.7679765820503235, + "eval_logits/rejected": -0.7679765820503235, + "eval_logps/chosen": -1.1920927533992653e-07, + "eval_logps/rejected": -1.1920927533992653e-07, + "eval_loss": 0.03465747460722923, + "eval_nll_loss": 1.1175869474300271e-07, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -5.960461013643226e-09, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -5.960461013643226e-09, + "eval_runtime": 307.2354, + "eval_samples_per_second": 58.239, + "eval_steps_per_second": 1.823, + "step": 430 + }, + { + "epoch": 0.19669199821189093, + "grad_norm": 0.022797027310381853, + "learning_rate": 2.3836564731139807e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -0.7675051689147949, + "logits/rejected": -0.7675051689147949, + "logps/chosen": -1.192092469182171e-07, + "logps/rejected": -1.192092469182171e-07, + "loss": 0.0347, + "nll_loss": 1.2293455142753373e-07, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.960462345910855e-09, + "rewards/margins": 0.0, + "rewards/rejected": -5.960462345910855e-09, + "step": 440 + }, + { + "epoch": 0.19669199821189093, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -0.7653347849845886, + "eval_logits/rejected": -0.7653347849845886, + "eval_logps/chosen": -1.1920927533992653e-07, + "eval_logps/rejected": -1.1920927533992653e-07, + "eval_loss": 0.03465748205780983, + "eval_nll_loss": 1.2665985593685036e-07, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -5.960461013643226e-09, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -5.960461013643226e-09, + "eval_runtime": 307.2951, + "eval_samples_per_second": 58.227, + "eval_steps_per_second": 1.822, + "step": 440 + }, + { + "epoch": 0.20116227089852481, + "grad_norm": 0.023019572901179387, + "learning_rate": 2.357022603955159e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -0.7651574015617371, + "logits/rejected": -0.7651574015617371, + "logps/chosen": -1.192092469182171e-07, + "logps/rejected": -1.192092469182171e-07, + "loss": 0.0347, + "nll_loss": 1.2665984172599565e-07, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.960462345910855e-09, + "rewards/margins": 0.0, + "rewards/rejected": -5.960462345910855e-09, + "step": 450 + }, + { + "epoch": 0.20116227089852481, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -0.7623587250709534, + "eval_logits/rejected": -0.7623587250709534, + "eval_logps/chosen": -1.1920927533992653e-07, + "eval_logps/rejected": -1.1920927533992653e-07, + "eval_loss": 0.03465748205780983, + "eval_nll_loss": 1.2665985593685036e-07, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -5.960461013643226e-09, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -5.960461013643226e-09, + "eval_runtime": 307.2612, + "eval_samples_per_second": 58.234, + "eval_steps_per_second": 1.823, + "step": 450 + }, + { + "epoch": 0.2056325435851587, + "grad_norm": 0.0240935794512142, + "learning_rate": 2.3312620206007847e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -0.7617487907409668, + "logits/rejected": -0.7617487907409668, + "logps/chosen": -1.192092469182171e-07, + "logps/rejected": -1.192092469182171e-07, + "loss": 0.0347, + "nll_loss": 1.3336534721020143e-07, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.960462345910855e-09, + "rewards/margins": 0.0, + "rewards/rejected": -5.960462345910855e-09, + "step": 460 + }, + { + "epoch": 0.2056325435851587, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -0.7611863613128662, + "eval_logits/rejected": -0.7611863613128662, + "eval_logps/chosen": -1.1920927533992653e-07, + "eval_logps/rejected": -1.1920927533992653e-07, + "eval_loss": 0.03465750440955162, + "eval_nll_loss": 1.4156101713069802e-07, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -5.960461013643226e-09, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -5.960461013643226e-09, + "eval_runtime": 307.3105, + "eval_samples_per_second": 58.224, + "eval_steps_per_second": 1.822, + "step": 460 + }, + { + "epoch": 0.2101028162717926, + "grad_norm": 0.02327309664522326, + "learning_rate": 2.3063280200722128e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -0.7611461877822876, + "logits/rejected": -0.7611461877822876, + "logps/chosen": -1.192092469182171e-07, + "logps/rejected": -1.192092469182171e-07, + "loss": 0.0347, + "nll_loss": 1.51991798702511e-07, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.960462345910855e-09, + "rewards/margins": 0.0, + "rewards/rejected": -5.960462345910855e-09, + "step": 470 + }, + { + "epoch": 0.2101028162717926, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -0.7622258067131042, + "eval_logits/rejected": -0.7622258067131042, + "eval_logps/chosen": -1.1920927533992653e-07, + "eval_logps/rejected": -1.1920927533992653e-07, + "eval_loss": 0.03465753048658371, + "eval_nll_loss": 1.6391270207805064e-07, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -5.960461013643226e-09, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -5.960461013643226e-09, + "eval_runtime": 307.2119, + "eval_samples_per_second": 58.243, + "eval_steps_per_second": 1.823, + "step": 470 + }, + { + "epoch": 0.21457308895842647, + "grad_norm": 0.023299532688741614, + "learning_rate": 2.2821773229381924e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -0.7616379261016846, + "logits/rejected": -0.7616379261016846, + "logps/chosen": -1.192092469182171e-07, + "logps/rejected": -1.192092469182171e-07, + "loss": 0.0347, + "nll_loss": 1.6763797816565784e-07, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.960462345910855e-09, + "rewards/margins": 0.0, + "rewards/rejected": -5.960462345910855e-09, + "step": 480 + }, + { + "epoch": 0.21457308895842647, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -0.7636578679084778, + "eval_logits/rejected": -0.7636578679084778, + "eval_logps/chosen": -1.1920927533992653e-07, + "eval_logps/rejected": -1.1920927533992653e-07, + "eval_loss": 0.03465753048658371, + "eval_nll_loss": 1.6391270207805064e-07, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -5.960461013643226e-09, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -5.960461013643226e-09, + "eval_runtime": 307.2264, + "eval_samples_per_second": 58.24, + "eval_steps_per_second": 1.823, + "step": 480 + }, + { + "epoch": 0.21904336164506036, + "grad_norm": 0.023667881715919378, + "learning_rate": 2.2587697572631284e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -0.7620495557785034, + "logits/rejected": -0.7620495557785034, + "logps/chosen": -1.192092469182171e-07, + "logps/rejected": -1.192092469182171e-07, + "loss": 0.0347, + "nll_loss": 1.6391268786719593e-07, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.960462345910855e-09, + "rewards/margins": 0.0, + "rewards/rejected": -5.960462345910855e-09, + "step": 490 + }, + { + "epoch": 0.21904336164506036, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -0.7611829042434692, + "eval_logits/rejected": -0.7611829042434692, + "eval_logps/chosen": -1.1920927533992653e-07, + "eval_logps/rejected": -1.1920927533992653e-07, + "eval_loss": 0.03465751186013222, + "eval_nll_loss": 1.5646217832454568e-07, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -5.960461013643226e-09, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -5.960461013643226e-09, + "eval_runtime": 307.2239, + "eval_samples_per_second": 58.241, + "eval_steps_per_second": 1.823, + "step": 490 + }, + { + "epoch": 0.22351363433169424, + "grad_norm": 0.023427690242502415, + "learning_rate": 2.23606797749979e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -0.7625541687011719, + "logits/rejected": -0.7625541687011719, + "logps/chosen": -1.192092469182171e-07, + "logps/rejected": -1.192092469182171e-07, + "loss": 0.0347, + "nll_loss": 1.5646214990283625e-07, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.960462345910855e-09, + "rewards/margins": 0.0, + "rewards/rejected": -5.960462345910855e-09, + "step": 500 + }, + { + "epoch": 0.22351363433169424, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -0.7622444033622742, + "eval_logits/rejected": -0.7622444033622742, + "eval_logps/chosen": -1.1920927533992653e-07, + "eval_logps/rejected": -1.1920927533992653e-07, + "eval_loss": 0.03465748578310013, + "eval_nll_loss": 1.341104365337742e-07, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -5.960461013643226e-09, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -5.960461013643226e-09, + "eval_runtime": 307.2023, + "eval_samples_per_second": 58.245, + "eval_steps_per_second": 1.823, + "step": 500 + }, + { + "epoch": 0.22798390701832813, + "grad_norm": 0.023813059050390148, + "learning_rate": 2.2140372138502386e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -0.7629829049110413, + "logits/rejected": -0.7629829049110413, + "logps/chosen": -1.181255271376358e-07, + "logps/rejected": -1.181255271376358e-07, + "loss": 0.0347, + "nll_loss": 1.3336536142105615e-07, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.90627635688179e-09, + "rewards/margins": 0.0, + "rewards/rejected": -5.90627635688179e-09, + "step": 510 + }, + { + "epoch": 0.22798390701832813, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -0.7672262787818909, + "eval_logits/rejected": -0.7672262787818909, + "eval_logps/chosen": -1.1920927533992653e-07, + "eval_logps/rejected": -1.1920927533992653e-07, + "eval_loss": 0.03465748578310013, + "eval_nll_loss": 1.341104365337742e-07, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -5.960461013643226e-09, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -5.960461013643226e-09, + "eval_runtime": 307.354, + "eval_samples_per_second": 58.216, + "eval_steps_per_second": 1.822, + "step": 510 + }, + { + "epoch": 0.23245417970496202, + "grad_norm": 0.023778754619915025, + "learning_rate": 2.1926450482675734e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -0.7636716365814209, + "logits/rejected": -0.7636716365814209, + "logps/chosen": -1.192092469182171e-07, + "logps/rejected": -1.192092469182171e-07, + "loss": 0.0347, + "nll_loss": 1.3411042232291948e-07, + "rewards/accuracies": 0.0, + "rewards/chosen": -5.960462345910855e-09, + "rewards/margins": 0.0, + "rewards/rejected": -5.960462345910855e-09, + "step": 520 + }, + { + "epoch": 0.23245417970496202, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -0.7621346116065979, + "eval_logits/rejected": -0.7621346116065979, + "eval_logps/chosen": -1.1920927533992653e-07, + "eval_logps/rejected": -1.1920927533992653e-07, + "eval_loss": 0.03465748578310013, + "eval_nll_loss": 1.341104365337742e-07, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -5.960461013643226e-09, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -5.960461013643226e-09, + "eval_runtime": 307.3259, + "eval_samples_per_second": 58.222, + "eval_steps_per_second": 1.822, + "step": 520 + }, + { + "epoch": 0.23692445239159587, + "grad_norm": 0.024300673901908782, + "learning_rate": 2.1718612138153473e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -0.7622011303901672, + "logits/rejected": -0.7622011303901672, + "logps/chosen": -1.2029298090965312e-07, + "logps/rejected": -1.2029298090965312e-07, + "loss": 0.0347, + "nll_loss": 1.3485546901392809e-07, + "rewards/accuracies": 0.0, + "rewards/chosen": -6.0146483349399205e-09, + "rewards/margins": 0.0, + "rewards/rejected": -6.0146483349399205e-09, + "step": 530 + }, + { + "epoch": 0.23692445239159587, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -0.7621825337409973, + "eval_logits/rejected": -0.7621825337409973, + "eval_logps/chosen": -1.1920927533992653e-07, + "eval_logps/rejected": -1.1920927533992653e-07, + "eval_loss": 0.03465748578310013, + "eval_nll_loss": 1.341104365337742e-07, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -5.960461013643226e-09, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -5.960461013643226e-09, + "eval_runtime": 307.2644, + "eval_samples_per_second": 58.233, + "eval_steps_per_second": 1.823, + "step": 530 + }, + { + "epoch": 0.24139472507822976, + "grad_norm": 0.024939117222400192, + "learning_rate": 2.151657414559676e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -0.7631164789199829, + "logits/rejected": -0.7631164789199829, + "logps/chosen": -1.2787901937372226e-07, + "logps/rejected": -1.2787901937372226e-07, + "loss": 0.0347, + "nll_loss": 1.4007086690526194e-07, + "rewards/accuracies": 0.0, + "rewards/chosen": -6.393950258143377e-09, + "rewards/margins": 0.0, + "rewards/rejected": -6.393950258143377e-09, + "step": 540 + }, + { + "epoch": 0.24139472507822976, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -0.7637412548065186, + "eval_logits/rejected": -0.7637412548065186, + "eval_logps/chosen": -1.3004644472403015e-07, + "eval_logps/rejected": -1.3004644472403015e-07, + "eval_loss": 0.03465750440955162, + "eval_nll_loss": 1.4156101713069802e-07, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -6.502321348023088e-09, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -6.502321348023088e-09, + "eval_runtime": 307.2322, + "eval_samples_per_second": 58.239, + "eval_steps_per_second": 1.823, + "step": 540 + }, + { + "epoch": 0.24586499776486365, + "grad_norm": 0.04096011470812148, + "learning_rate": 2.132007163556104e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -0.7638927698135376, + "logits/rejected": -0.7638927698135376, + "logps/chosen": -1.2787901937372226e-07, + "logps/rejected": -1.2787901937372226e-07, + "loss": 0.0347, + "nll_loss": 1.4007086690526194e-07, + "rewards/accuracies": 0.0, + "rewards/chosen": -6.393951146321797e-09, + "rewards/margins": 0.0, + "rewards/rejected": -6.393951146321797e-09, + "step": 550 + }, + { + "epoch": 0.24586499776486365, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -0.7629874348640442, + "eval_logits/rejected": -0.7629874348640442, + "eval_logps/chosen": -1.3004645893488487e-07, + "eval_logps/rejected": -1.3004645893488487e-07, + "eval_loss": 0.03465750440955162, + "eval_nll_loss": 1.4156101713069802e-07, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -6.502324456647557e-09, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -6.502324456647557e-09, + "eval_runtime": 307.2276, + "eval_samples_per_second": 58.24, + "eval_steps_per_second": 1.823, + "step": 550 + }, + { + "epoch": 0.25033527045149756, + "grad_norm": 0.06516073872439763, + "learning_rate": 2.1128856368212917e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -0.7624660134315491, + "logits/rejected": -0.7624660134315491, + "logps/chosen": -1.4088367095155263e-07, + "logps/rejected": -1.4088367095155263e-07, + "loss": 0.0347, + "nll_loss": 1.4901156930591242e-07, + "rewards/accuracies": 0.0, + "rewards/chosen": -7.04418301467058e-09, + "rewards/margins": 0.0, + "rewards/rejected": -7.04418301467058e-09, + "step": 560 + }, + { + "epoch": 0.25033527045149756, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -0.7590819001197815, + "eval_logits/rejected": -0.7590819001197815, + "eval_logps/chosen": -1.5172085454651096e-07, + "eval_logps/rejected": -1.5172085454651096e-07, + "eval_loss": 0.03465751186013222, + "eval_nll_loss": 1.5646217832454568e-07, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -7.586042904961232e-09, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -7.586042904961232e-09, + "eval_runtime": 307.2208, + "eval_samples_per_second": 58.242, + "eval_steps_per_second": 1.823, + "step": 560 + }, + { + "epoch": 0.2548055431381314, + "grad_norm": 0.07782558260433992, + "learning_rate": 2.0942695414584777e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -0.758915901184082, + "logits/rejected": -0.758915901184082, + "logps/chosen": -1.5280458853794698e-07, + "logps/rejected": -1.5280458853794698e-07, + "loss": 0.0347, + "nll_loss": 1.661478705727859e-07, + "rewards/accuracies": 0.0, + "rewards/chosen": -7.640229782168717e-09, + "rewards/margins": 0.0, + "rewards/rejected": -7.640229782168717e-09, + "step": 570 + }, + { + "epoch": 0.2548055431381314, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -0.7589533925056458, + "eval_logits/rejected": -0.7589533925056458, + "eval_logps/chosen": -1.5172085454651096e-07, + "eval_logps/rejected": -1.5172085454651096e-07, + "eval_loss": 0.03465753421187401, + "eval_nll_loss": 1.7136328267497447e-07, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -7.586042904961232e-09, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -7.586042904961232e-09, + "eval_runtime": 307.2908, + "eval_samples_per_second": 58.228, + "eval_steps_per_second": 1.822, + "step": 570 + }, + { + "epoch": 0.25927581582476533, + "grad_norm": 0.0704148190270702, + "learning_rate": 2.0761369963434992e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -0.7552851438522339, + "logits/rejected": -0.7552851438522339, + "logps/chosen": -1.538883083185283e-07, + "logps/rejected": -1.538883083185283e-07, + "loss": 0.0347, + "nll_loss": 1.7210831515512837e-07, + "rewards/accuracies": 0.0, + "rewards/chosen": -7.694415771197782e-09, + "rewards/margins": 0.0, + "rewards/rejected": -7.694415771197782e-09, + "step": 580 + }, + { + "epoch": 0.25927581582476533, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -0.754145622253418, + "eval_logits/rejected": -0.754145622253418, + "eval_logps/chosen": -1.5172085454651096e-07, + "eval_logps/rejected": -1.5172085454651096e-07, + "eval_loss": 0.03465753421187401, + "eval_nll_loss": 1.7136328267497447e-07, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -7.586042904961232e-09, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -7.586042904961232e-09, + "eval_runtime": 307.2521, + "eval_samples_per_second": 58.236, + "eval_steps_per_second": 1.823, + "step": 580 + }, + { + "epoch": 0.2637460885113992, + "grad_norm": 0.11568307292638198, + "learning_rate": 2.058467423981546e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -0.7538480162620544, + "logits/rejected": -0.7538480162620544, + "logps/chosen": -1.549720280991096e-07, + "logps/rejected": -1.549720280991096e-07, + "loss": 0.0347, + "nll_loss": 1.765786663554536e-07, + "rewards/accuracies": 0.0, + "rewards/chosen": -7.748601760226848e-09, + "rewards/margins": 0.0, + "rewards/rejected": -7.748601760226848e-09, + "step": 590 + }, + { + "epoch": 0.2637460885113992, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -0.755375325679779, + "eval_logits/rejected": -0.755375325679779, + "eval_logps/chosen": -1.6255808077403344e-07, + "eval_logps/rejected": -1.6255808077403344e-07, + "eval_loss": 0.034657545387744904, + "eval_nll_loss": 1.937150670983101e-07, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -8.127903683430304e-09, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -8.127903683430304e-09, + "eval_runtime": 307.2733, + "eval_samples_per_second": 58.232, + "eval_steps_per_second": 1.822, + "step": 590 + }, + { + "epoch": 0.2682163611980331, + "grad_norm": 0.19470687368738301, + "learning_rate": 2.0412414523193154e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -0.7575832605361938, + "logits/rejected": -0.7575832605361938, + "logps/chosen": -2.1999520072313317e-07, + "logps/rejected": -2.1999520072313317e-07, + "loss": 0.0347, + "nll_loss": 2.332030391016815e-07, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.0999761990149182e-08, + "rewards/margins": 0.0, + "rewards/rejected": -1.0999761990149182e-08, + "step": 600 + }, + { + "epoch": 0.2682163611980331, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -0.7635509967803955, + "eval_logits/rejected": -0.7635509967803955, + "eval_logps/chosen": -3.359532172453328e-07, + "eval_logps/rejected": -3.359532172453328e-07, + "eval_loss": 0.034657664597034454, + "eval_nll_loss": 3.1292407243199705e-07, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -1.6797658375367064e-08, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -1.6797658375367064e-08, + "eval_runtime": 307.2526, + "eval_samples_per_second": 58.235, + "eval_steps_per_second": 1.823, + "step": 600 + }, + { + "epoch": 0.27268663388466696, + "grad_norm": 0.42044147270647025, + "learning_rate": 2.0244408254472904e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -0.7522677183151245, + "logits/rejected": -0.7522677183151245, + "logps/chosen": -5.35355354713829e-07, + "logps/rejected": -5.35355354713829e-07, + "loss": 0.0347, + "nll_loss": 4.31386752097751e-07, + "rewards/accuracies": 0.0, + "rewards/chosen": -2.6767764893520507e-08, + "rewards/margins": 0.0, + "rewards/rejected": -2.6767764893520507e-08, + "step": 610 + }, + { + "epoch": 0.27268663388466696, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -0.7175083756446838, + "eval_logits/rejected": -0.7175083756446838, + "eval_logps/chosen": -4.291438472137088e-06, + "eval_logps/rejected": -4.291438472137088e-06, + "eval_loss": 0.03466034680604935, + "eval_nll_loss": 2.995067234223825e-06, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -2.1457185539475176e-07, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -2.1457185539475176e-07, + "eval_runtime": 307.3107, + "eval_samples_per_second": 58.224, + "eval_steps_per_second": 1.822, + "step": 610 + }, + { + "epoch": 0.2771569065713009, + "grad_norm": 0.01605006750727601, + "learning_rate": 2.0080483222562476e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -0.7701171040534973, + "logits/rejected": -0.7701171040534973, + "logps/chosen": -3.6403660487849265e-05, + "logps/rejected": -3.6403660487849265e-05, + "loss": 0.0347, + "nll_loss": 2.5153431124635972e-05, + "rewards/accuracies": 0.0, + "rewards/chosen": -1.820182887968258e-06, + "rewards/margins": 0.0, + "rewards/rejected": -1.820182887968258e-06, + "step": 620 + }, + { + "epoch": 0.2771569065713009, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -0.9072837233543396, + "eval_logits/rejected": -0.9072837233543396, + "eval_logps/chosen": -9.753479446317215e-08, + "eval_logps/rejected": -9.753479446317215e-08, + "eval_loss": 0.03465813025832176, + "eval_nll_loss": 7.674061066609283e-07, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -4.876741233061921e-09, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -4.876741233061921e-09, + "eval_runtime": 307.2783, + "eval_samples_per_second": 58.231, + "eval_steps_per_second": 1.822, + "step": 620 + }, + { + "epoch": 0.28162717925793473, + "grad_norm": 0.017203363283006045, + "learning_rate": 1.9920476822239895e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -0.9159248471260071, + "logits/rejected": -0.9159248471260071, + "logps/chosen": -5.274546765576815e-06, + "logps/rejected": -5.274546765576815e-06, + "loss": 0.0347, + "nll_loss": 4.1455546124780085e-06, + "rewards/accuracies": 0.0, + "rewards/chosen": -2.637273723848921e-07, + "rewards/margins": 0.0, + "rewards/rejected": -2.637273723848921e-07, + "step": 630 + }, + { + "epoch": 0.28162717925793473, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -1.001300573348999, + "eval_logits/rejected": -1.001300573348999, + "eval_logps/chosen": -1.0837207042868613e-07, + "eval_logps/rejected": -1.0837207042868613e-07, + "eval_loss": 0.03465818613767624, + "eval_nll_loss": 8.195610803340969e-07, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -5.418601123352573e-09, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -5.418601123352573e-09, + "eval_runtime": 307.2434, + "eval_samples_per_second": 58.237, + "eval_steps_per_second": 1.823, + "step": 630 + }, + { + "epoch": 0.2860974519445686, + "grad_norm": 0.30297850608494836, + "learning_rate": 1.976423537605237e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -1.05489182472229, + "logits/rejected": -1.05489182472229, + "logps/chosen": -1.5822315901914408e-07, + "logps/rejected": -1.5822315901914408e-07, + "loss": 0.0347, + "nll_loss": 1.037116476254596e-06, + "rewards/accuracies": 0.0, + "rewards/chosen": -7.911157062778784e-09, + "rewards/margins": 0.0, + "rewards/rejected": -7.911157062778784e-09, + "step": 640 + }, + { + "epoch": 0.2860974519445686, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -1.097064733505249, + "eval_logits/rejected": -1.097064733505249, + "eval_logps/chosen": -8.127877890728996e-07, + "eval_logps/rejected": -8.127877890728996e-07, + "eval_loss": 0.034658223390579224, + "eval_nll_loss": 8.642647912893153e-07, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -4.0639388743102245e-08, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -4.0639388743102245e-08, + "eval_runtime": 307.3284, + "eval_samples_per_second": 58.221, + "eval_steps_per_second": 1.822, + "step": 640 + }, + { + "epoch": 0.2905677246312025, + "grad_norm": 0.2522133573777816, + "learning_rate": 1.961161351381841e-06, + "log_odds_chosen": 0.0, + "log_odds_ratio": -0.6931471824645996, + "logits/chosen": -1.0934903621673584, + "logits/rejected": -1.0934903621673584, + "logps/chosen": -8.60470663610613e-07, + "logps/rejected": -8.60470663610613e-07, + "loss": 0.0347, + "nll_loss": 6.951365207896743e-07, + "rewards/accuracies": 0.0, + "rewards/chosen": -4.302352962781697e-08, + "rewards/margins": 0.0, + "rewards/rejected": -4.302352962781697e-08, + "step": 650 + }, + { + "epoch": 0.2905677246312025, + "eval_log_odds_chosen": 0.0, + "eval_log_odds_ratio": -0.6931473612785339, + "eval_logits/chosen": -0.8988085985183716, + "eval_logits/rejected": -0.8988085985183716, + "eval_logps/chosen": -1.3534682693716604e-05, + "eval_logps/rejected": -1.3534682693716604e-05, + "eval_loss": 0.03466695547103882, + "eval_nll_loss": 9.595665687811561e-06, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": -6.767340892110951e-07, + "eval_rewards/margins": 0.0, + "eval_rewards/rejected": -6.767340892110951e-07, + "eval_runtime": 307.2566, + "eval_samples_per_second": 58.235, + "eval_steps_per_second": 1.823, + "step": 650 + }, + { + "epoch": 0.29503799731783636, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.9462473604038077e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.1054, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 660 + }, + { + "epoch": 0.29503799731783636, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9632, + "eval_samples_per_second": 58.29, + "eval_steps_per_second": 1.824, + "step": 660 + }, + { + "epoch": 0.2995082700044703, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.9316685232156397e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 670 + }, + { + "epoch": 0.2995082700044703, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.937, + "eval_samples_per_second": 58.295, + "eval_steps_per_second": 1.824, + "step": 670 + }, + { + "epoch": 0.30397854269110414, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.917412472118426e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 680 + }, + { + "epoch": 0.30397854269110414, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9677, + "eval_samples_per_second": 58.29, + "eval_steps_per_second": 1.824, + "step": 680 + }, + { + "epoch": 0.30844881537773805, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.9034674690672024e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 690 + }, + { + "epoch": 0.30844881537773805, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9917, + "eval_samples_per_second": 58.285, + "eval_steps_per_second": 1.824, + "step": 690 + }, + { + "epoch": 0.3129190880643719, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.8898223650461362e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 700 + }, + { + "epoch": 0.3129190880643719, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 307.0225, + "eval_samples_per_second": 58.279, + "eval_steps_per_second": 1.824, + "step": 700 + }, + { + "epoch": 0.3173893607510058, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.876466562602004e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 710 + }, + { + "epoch": 0.3173893607510058, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 307.0188, + "eval_samples_per_second": 58.28, + "eval_steps_per_second": 1.824, + "step": 710 + }, + { + "epoch": 0.3218596334376397, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.863389981249825e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 720 + }, + { + "epoch": 0.3218596334376397, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9809, + "eval_samples_per_second": 58.287, + "eval_steps_per_second": 1.824, + "step": 720 + }, + { + "epoch": 0.3263299061242736, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.8505830254940132e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 730 + }, + { + "epoch": 0.3263299061242736, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.999, + "eval_samples_per_second": 58.284, + "eval_steps_per_second": 1.824, + "step": 730 + }, + { + "epoch": 0.33080017881090745, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.8380365552345197e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 740 + }, + { + "epoch": 0.33080017881090745, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9879, + "eval_samples_per_second": 58.286, + "eval_steps_per_second": 1.824, + "step": 740 + }, + { + "epoch": 0.33527045149754137, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.8257418583505536e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 750 + }, + { + "epoch": 0.33527045149754137, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9649, + "eval_samples_per_second": 58.29, + "eval_steps_per_second": 1.824, + "step": 750 + }, + { + "epoch": 0.3397407241841752, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.8136906252750293e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 760 + }, + { + "epoch": 0.3397407241841752, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9772, + "eval_samples_per_second": 58.288, + "eval_steps_per_second": 1.824, + "step": 760 + }, + { + "epoch": 0.34421099687080914, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.801874925391118e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 770 + }, + { + "epoch": 0.34421099687080914, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9557, + "eval_samples_per_second": 58.292, + "eval_steps_per_second": 1.824, + "step": 770 + }, + { + "epoch": 0.348681269557443, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.7902871850985824e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 780 + }, + { + "epoch": 0.348681269557443, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9969, + "eval_samples_per_second": 58.284, + "eval_steps_per_second": 1.824, + "step": 780 + }, + { + "epoch": 0.3531515422440769, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.7789201674120502e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 790 + }, + { + "epoch": 0.3531515422440769, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.985, + "eval_samples_per_second": 58.286, + "eval_steps_per_second": 1.824, + "step": 790 + }, + { + "epoch": 0.35762181493071077, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.7677669529663689e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 800 + }, + { + "epoch": 0.35762181493071077, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 307.0459, + "eval_samples_per_second": 58.275, + "eval_steps_per_second": 1.824, + "step": 800 + }, + { + "epoch": 0.3620920876173447, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.7568209223157664e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 810 + }, + { + "epoch": 0.3620920876173447, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 307.487, + "eval_samples_per_second": 58.191, + "eval_steps_per_second": 1.821, + "step": 810 + }, + { + "epoch": 0.36656236030397854, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.7460757394239458e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 820 + }, + { + "epoch": 0.36656236030397854, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9448, + "eval_samples_per_second": 58.294, + "eval_steps_per_second": 1.824, + "step": 820 + }, + { + "epoch": 0.37103263299061245, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.7355253362515584e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 830 + }, + { + "epoch": 0.37103263299061245, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9523, + "eval_samples_per_second": 58.292, + "eval_steps_per_second": 1.824, + "step": 830 + }, + { + "epoch": 0.3755029056772463, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.7251638983558855e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 840 + }, + { + "epoch": 0.3755029056772463, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9448, + "eval_samples_per_second": 58.294, + "eval_steps_per_second": 1.824, + "step": 840 + }, + { + "epoch": 0.37997317836388017, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.7149858514250883e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 850 + }, + { + "epoch": 0.37997317836388017, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9333, + "eval_samples_per_second": 58.296, + "eval_steps_per_second": 1.825, + "step": 850 + }, + { + "epoch": 0.3844434510505141, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.704985848676184e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 860 + }, + { + "epoch": 0.3844434510505141, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.8979, + "eval_samples_per_second": 58.303, + "eval_steps_per_second": 1.825, + "step": 860 + }, + { + "epoch": 0.38891372373714794, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.6951587590520263e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 870 + }, + { + "epoch": 0.38891372373714794, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9523, + "eval_samples_per_second": 58.292, + "eval_steps_per_second": 1.824, + "step": 870 + }, + { + "epoch": 0.39338399642378186, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.6854996561581053e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 880 + }, + { + "epoch": 0.39338399642378186, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9703, + "eval_samples_per_second": 58.289, + "eval_steps_per_second": 1.824, + "step": 880 + }, + { + "epoch": 0.3978542691104157, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.6760038078849776e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 890 + }, + { + "epoch": 0.3978542691104157, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9927, + "eval_samples_per_second": 58.285, + "eval_steps_per_second": 1.824, + "step": 890 + }, + { + "epoch": 0.40232454179704963, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.6666666666666667e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 900 + }, + { + "epoch": 0.40232454179704963, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9478, + "eval_samples_per_second": 58.293, + "eval_steps_per_second": 1.824, + "step": 900 + }, + { + "epoch": 0.4067948144836835, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.6574838603294898e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 910 + }, + { + "epoch": 0.4067948144836835, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 307.0325, + "eval_samples_per_second": 58.277, + "eval_steps_per_second": 1.824, + "step": 910 + }, + { + "epoch": 0.4112650871703174, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.648451183489468e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 920 + }, + { + "epoch": 0.4112650871703174, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 307.0105, + "eval_samples_per_second": 58.281, + "eval_steps_per_second": 1.824, + "step": 920 + }, + { + "epoch": 0.41573535985695126, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.6395645894598825e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 930 + }, + { + "epoch": 0.41573535985695126, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9663, + "eval_samples_per_second": 58.29, + "eval_steps_per_second": 1.824, + "step": 930 + }, + { + "epoch": 0.4202056325435852, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.6308201826336057e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 940 + }, + { + "epoch": 0.4202056325435852, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9346, + "eval_samples_per_second": 58.296, + "eval_steps_per_second": 1.824, + "step": 940 + }, + { + "epoch": 0.42467590523021903, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.6222142113076255e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 950 + }, + { + "epoch": 0.42467590523021903, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9147, + "eval_samples_per_second": 58.3, + "eval_steps_per_second": 1.825, + "step": 950 + }, + { + "epoch": 0.42914617791685294, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.6137430609197571e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 960 + }, + { + "epoch": 0.42914617791685294, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9235, + "eval_samples_per_second": 58.298, + "eval_steps_per_second": 1.825, + "step": 960 + }, + { + "epoch": 0.4336164506034868, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.605403247669839e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 970 + }, + { + "epoch": 0.4336164506034868, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9407, + "eval_samples_per_second": 58.295, + "eval_steps_per_second": 1.824, + "step": 970 + }, + { + "epoch": 0.4380867232901207, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.59719141249985e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 980 + }, + { + "epoch": 0.4380867232901207, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.8804, + "eval_samples_per_second": 58.306, + "eval_steps_per_second": 1.825, + "step": 980 + }, + { + "epoch": 0.4425569959767546, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.5891043154093205e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 990 + }, + { + "epoch": 0.4425569959767546, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9332, + "eval_samples_per_second": 58.296, + "eval_steps_per_second": 1.825, + "step": 990 + }, + { + "epoch": 0.4470272686633885, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.5811388300841898e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1000 + }, + { + "epoch": 0.4470272686633885, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9856, + "eval_samples_per_second": 58.286, + "eval_steps_per_second": 1.824, + "step": 1000 + }, + { + "epoch": 0.45149754135002235, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.5732919388188816e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1010 + }, + { + "epoch": 0.45149754135002235, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 307.1337, + "eval_samples_per_second": 58.258, + "eval_steps_per_second": 1.823, + "step": 1010 + }, + { + "epoch": 0.45596781403665626, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.565560727712874e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1020 + }, + { + "epoch": 0.45596781403665626, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.942, + "eval_samples_per_second": 58.294, + "eval_steps_per_second": 1.824, + "step": 1020 + }, + { + "epoch": 0.4604380867232901, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.5579423821243897e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1030 + }, + { + "epoch": 0.4604380867232901, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9725, + "eval_samples_per_second": 58.289, + "eval_steps_per_second": 1.824, + "step": 1030 + }, + { + "epoch": 0.46490835940992403, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.5504341823651056e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1040 + }, + { + "epoch": 0.46490835940992403, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9371, + "eval_samples_per_second": 58.295, + "eval_steps_per_second": 1.824, + "step": 1040 + }, + { + "epoch": 0.4693786320965579, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.5430334996209192e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1050 + }, + { + "epoch": 0.4693786320965579, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9659, + "eval_samples_per_second": 58.29, + "eval_steps_per_second": 1.824, + "step": 1050 + }, + { + "epoch": 0.47384890478319175, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.5357377920848783e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1060 + }, + { + "epoch": 0.47384890478319175, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.8938, + "eval_samples_per_second": 58.304, + "eval_steps_per_second": 1.825, + "step": 1060 + }, + { + "epoch": 0.47831917746982566, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.5285446012893579e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1070 + }, + { + "epoch": 0.47831917746982566, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.8839, + "eval_samples_per_second": 58.305, + "eval_steps_per_second": 1.825, + "step": 1070 + }, + { + "epoch": 0.4827894501564595, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.5214515486254614e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1080 + }, + { + "epoch": 0.4827894501564595, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9231, + "eval_samples_per_second": 58.298, + "eval_steps_per_second": 1.825, + "step": 1080 + }, + { + "epoch": 0.48725972284309343, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.5144563320384566e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1090 + }, + { + "epoch": 0.48725972284309343, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9186, + "eval_samples_per_second": 58.299, + "eval_steps_per_second": 1.825, + "step": 1090 + }, + { + "epoch": 0.4917299955297273, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.5075567228888182e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1100 + }, + { + "epoch": 0.4917299955297273, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9155, + "eval_samples_per_second": 58.299, + "eval_steps_per_second": 1.825, + "step": 1100 + }, + { + "epoch": 0.4962002682163612, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.5007505629691608e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1110 + }, + { + "epoch": 0.4962002682163612, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 307.0125, + "eval_samples_per_second": 58.281, + "eval_steps_per_second": 1.824, + "step": 1110 + }, + { + "epoch": 0.5006705409029951, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.494035761667992e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1120 + }, + { + "epoch": 0.5006705409029951, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9188, + "eval_samples_per_second": 58.299, + "eval_steps_per_second": 1.825, + "step": 1120 + }, + { + "epoch": 0.5051408135896289, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.487410293271824e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1130 + }, + { + "epoch": 0.5051408135896289, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9331, + "eval_samples_per_second": 58.296, + "eval_steps_per_second": 1.825, + "step": 1130 + }, + { + "epoch": 0.5096110862762628, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.480872194397731e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1140 + }, + { + "epoch": 0.5096110862762628, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9523, + "eval_samples_per_second": 58.292, + "eval_steps_per_second": 1.824, + "step": 1140 + }, + { + "epoch": 0.5140813589628968, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.4744195615489715e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1150 + }, + { + "epoch": 0.5140813589628968, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9243, + "eval_samples_per_second": 58.298, + "eval_steps_per_second": 1.825, + "step": 1150 + }, + { + "epoch": 0.5185516316495307, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.4680505487867589e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1160 + }, + { + "epoch": 0.5185516316495307, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9342, + "eval_samples_per_second": 58.296, + "eval_steps_per_second": 1.824, + "step": 1160 + }, + { + "epoch": 0.5230219043361645, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.4617633655117156e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1170 + }, + { + "epoch": 0.5230219043361645, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9471, + "eval_samples_per_second": 58.293, + "eval_steps_per_second": 1.824, + "step": 1170 + }, + { + "epoch": 0.5274921770227984, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.4555562743489552e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1180 + }, + { + "epoch": 0.5274921770227984, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9553, + "eval_samples_per_second": 58.292, + "eval_steps_per_second": 1.824, + "step": 1180 + }, + { + "epoch": 0.5319624497094323, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.4494275891311214e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1190 + }, + { + "epoch": 0.5319624497094323, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9175, + "eval_samples_per_second": 58.299, + "eval_steps_per_second": 1.825, + "step": 1190 + }, + { + "epoch": 0.5364327223960662, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.4433756729740647e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1200 + }, + { + "epoch": 0.5364327223960662, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9299, + "eval_samples_per_second": 58.297, + "eval_steps_per_second": 1.825, + "step": 1200 + }, + { + "epoch": 0.5409029950827, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.4373989364401727e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1210 + }, + { + "epoch": 0.5409029950827, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 307.4452, + "eval_samples_per_second": 58.199, + "eval_steps_per_second": 1.821, + "step": 1210 + }, + { + "epoch": 0.5453732677693339, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.4314958357846706e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1220 + }, + { + "epoch": 0.5453732677693339, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9633, + "eval_samples_per_second": 58.29, + "eval_steps_per_second": 1.824, + "step": 1220 + }, + { + "epoch": 0.5498435404559678, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.4256648712805027e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1230 + }, + { + "epoch": 0.5498435404559678, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 307.0093, + "eval_samples_per_second": 58.282, + "eval_steps_per_second": 1.824, + "step": 1230 + }, + { + "epoch": 0.5543138131426018, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.419904585617662e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1240 + }, + { + "epoch": 0.5543138131426018, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9636, + "eval_samples_per_second": 58.29, + "eval_steps_per_second": 1.824, + "step": 1240 + }, + { + "epoch": 0.5587840858292356, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.4142135623730952e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1250 + }, + { + "epoch": 0.5587840858292356, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9663, + "eval_samples_per_second": 58.29, + "eval_steps_per_second": 1.824, + "step": 1250 + }, + { + "epoch": 0.5632543585158695, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.4085904245475275e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1260 + }, + { + "epoch": 0.5632543585158695, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9137, + "eval_samples_per_second": 58.3, + "eval_steps_per_second": 1.825, + "step": 1260 + }, + { + "epoch": 0.5677246312025034, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.4030338331657844e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1270 + }, + { + "epoch": 0.5677246312025034, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9528, + "eval_samples_per_second": 58.292, + "eval_steps_per_second": 1.824, + "step": 1270 + }, + { + "epoch": 0.5721949038891372, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.3975424859373688e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1280 + }, + { + "epoch": 0.5721949038891372, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9438, + "eval_samples_per_second": 58.294, + "eval_steps_per_second": 1.824, + "step": 1280 + }, + { + "epoch": 0.5766651765757711, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.3921151159742616e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1290 + }, + { + "epoch": 0.5766651765757711, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9341, + "eval_samples_per_second": 58.296, + "eval_steps_per_second": 1.824, + "step": 1290 + }, + { + "epoch": 0.581135449262405, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.386750490563073e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1300 + }, + { + "epoch": 0.581135449262405, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9212, + "eval_samples_per_second": 58.298, + "eval_steps_per_second": 1.825, + "step": 1300 + }, + { + "epoch": 0.5856057219490389, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.3814474099888442e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1310 + }, + { + "epoch": 0.5856057219490389, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9709, + "eval_samples_per_second": 58.289, + "eval_steps_per_second": 1.824, + "step": 1310 + }, + { + "epoch": 0.5900759946356727, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.376204706407951e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1320 + }, + { + "epoch": 0.5900759946356727, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.8949, + "eval_samples_per_second": 58.303, + "eval_steps_per_second": 1.825, + "step": 1320 + }, + { + "epoch": 0.5945462673223066, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.3710212427677044e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1330 + }, + { + "epoch": 0.5945462673223066, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9456, + "eval_samples_per_second": 58.294, + "eval_steps_per_second": 1.824, + "step": 1330 + }, + { + "epoch": 0.5990165400089406, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.3658959117703826e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1340 + }, + { + "epoch": 0.5990165400089406, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.923, + "eval_samples_per_second": 58.298, + "eval_steps_per_second": 1.825, + "step": 1340 + }, + { + "epoch": 0.6034868126955745, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.3608276348795436e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1350 + }, + { + "epoch": 0.6034868126955745, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9347, + "eval_samples_per_second": 58.296, + "eval_steps_per_second": 1.824, + "step": 1350 + }, + { + "epoch": 0.6079570853822083, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.355815361366601e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1360 + }, + { + "epoch": 0.6079570853822083, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9472, + "eval_samples_per_second": 58.293, + "eval_steps_per_second": 1.824, + "step": 1360 + }, + { + "epoch": 0.6124273580688422, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.350858067395748e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1370 + }, + { + "epoch": 0.6124273580688422, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9086, + "eval_samples_per_second": 58.301, + "eval_steps_per_second": 1.825, + "step": 1370 + }, + { + "epoch": 0.6168976307554761, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.345954755145414e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1380 + }, + { + "epoch": 0.6168976307554761, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9757, + "eval_samples_per_second": 58.288, + "eval_steps_per_second": 1.824, + "step": 1380 + }, + { + "epoch": 0.62136790344211, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.3411044519645502e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1390 + }, + { + "epoch": 0.62136790344211, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9413, + "eval_samples_per_second": 58.295, + "eval_steps_per_second": 1.824, + "step": 1390 + }, + { + "epoch": 0.6258381761287438, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.3363062095621222e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1400 + }, + { + "epoch": 0.6258381761287438, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.8654, + "eval_samples_per_second": 58.309, + "eval_steps_per_second": 1.825, + "step": 1400 + }, + { + "epoch": 0.6303084488153777, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.3315591032282687e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1410 + }, + { + "epoch": 0.6303084488153777, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 307.3373, + "eval_samples_per_second": 58.219, + "eval_steps_per_second": 1.822, + "step": 1410 + }, + { + "epoch": 0.6347787215020116, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.3268622310856882e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1420 + }, + { + "epoch": 0.6347787215020116, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9821, + "eval_samples_per_second": 58.287, + "eval_steps_per_second": 1.824, + "step": 1420 + }, + { + "epoch": 0.6392489941886456, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.3222147133698626e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1430 + }, + { + "epoch": 0.6392489941886456, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9112, + "eval_samples_per_second": 58.3, + "eval_steps_per_second": 1.825, + "step": 1430 + }, + { + "epoch": 0.6437192668752794, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.3176156917368248e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1440 + }, + { + "epoch": 0.6437192668752794, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9398, + "eval_samples_per_second": 58.295, + "eval_steps_per_second": 1.824, + "step": 1440 + }, + { + "epoch": 0.6481895395619133, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.3130643285972255e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1450 + }, + { + "epoch": 0.6481895395619133, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9452, + "eval_samples_per_second": 58.294, + "eval_steps_per_second": 1.824, + "step": 1450 + }, + { + "epoch": 0.6526598122485472, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.3085598064755342e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1460 + }, + { + "epoch": 0.6526598122485472, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9408, + "eval_samples_per_second": 58.295, + "eval_steps_per_second": 1.824, + "step": 1460 + }, + { + "epoch": 0.657130084935181, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.3041013273932528e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1470 + }, + { + "epoch": 0.657130084935181, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9377, + "eval_samples_per_second": 58.295, + "eval_steps_per_second": 1.824, + "step": 1470 + }, + { + "epoch": 0.6616003576218149, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.299688112275091e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1480 + }, + { + "epoch": 0.6616003576218149, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9478, + "eval_samples_per_second": 58.293, + "eval_steps_per_second": 1.824, + "step": 1480 + }, + { + "epoch": 0.6660706303084488, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.2953194003770995e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1490 + }, + { + "epoch": 0.6660706303084488, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9455, + "eval_samples_per_second": 58.294, + "eval_steps_per_second": 1.824, + "step": 1490 + }, + { + "epoch": 0.6705409029950827, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.2909944487358056e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1500 + }, + { + "epoch": 0.6705409029950827, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9717, + "eval_samples_per_second": 58.289, + "eval_steps_per_second": 1.824, + "step": 1500 + }, + { + "epoch": 0.6750111756817165, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.286712531637447e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1510 + }, + { + "epoch": 0.6750111756817165, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 307.4527, + "eval_samples_per_second": 58.198, + "eval_steps_per_second": 1.821, + "step": 1510 + }, + { + "epoch": 0.6794814483683504, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.282472940106443e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1520 + }, + { + "epoch": 0.6794814483683504, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.8943, + "eval_samples_per_second": 58.303, + "eval_steps_per_second": 1.825, + "step": 1520 + }, + { + "epoch": 0.6839517210549844, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.278274981412284e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1530 + }, + { + "epoch": 0.6839517210549844, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.8936, + "eval_samples_per_second": 58.304, + "eval_steps_per_second": 1.825, + "step": 1530 + }, + { + "epoch": 0.6884219937416183, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.2741179785940638e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1540 + }, + { + "epoch": 0.6884219937416183, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9217, + "eval_samples_per_second": 58.298, + "eval_steps_per_second": 1.825, + "step": 1540 + }, + { + "epoch": 0.6928922664282521, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.270001270001905e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1550 + }, + { + "epoch": 0.6928922664282521, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9543, + "eval_samples_per_second": 58.292, + "eval_steps_per_second": 1.824, + "step": 1550 + }, + { + "epoch": 0.697362539114886, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.2659242088545834e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1560 + }, + { + "epoch": 0.697362539114886, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9759, + "eval_samples_per_second": 58.288, + "eval_steps_per_second": 1.824, + "step": 1560 + }, + { + "epoch": 0.7018328118015199, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.261886162812672e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1570 + }, + { + "epoch": 0.7018328118015199, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9444, + "eval_samples_per_second": 58.294, + "eval_steps_per_second": 1.824, + "step": 1570 + }, + { + "epoch": 0.7063030844881538, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.257886513566569e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1580 + }, + { + "epoch": 0.7063030844881538, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9556, + "eval_samples_per_second": 58.292, + "eval_steps_per_second": 1.824, + "step": 1580 + }, + { + "epoch": 0.7107733571747876, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.253924656438798e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1590 + }, + { + "epoch": 0.7107733571747876, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9498, + "eval_samples_per_second": 58.293, + "eval_steps_per_second": 1.824, + "step": 1590 + }, + { + "epoch": 0.7152436298614215, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.25e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1600 + }, + { + "epoch": 0.7152436298614215, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9189, + "eval_samples_per_second": 58.299, + "eval_steps_per_second": 1.825, + "step": 1600 + }, + { + "epoch": 0.7197139025480555, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.246111965698067e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1610 + }, + { + "epoch": 0.7197139025480555, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 307.0602, + "eval_samples_per_second": 58.272, + "eval_steps_per_second": 1.824, + "step": 1610 + }, + { + "epoch": 0.7241841752346894, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.2422599874998834e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1620 + }, + { + "epoch": 0.7241841752346894, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9262, + "eval_samples_per_second": 58.297, + "eval_steps_per_second": 1.825, + "step": 1620 + }, + { + "epoch": 0.7286544479213232, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.238443511545175e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1630 + }, + { + "epoch": 0.7286544479213232, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9104, + "eval_samples_per_second": 58.3, + "eval_steps_per_second": 1.825, + "step": 1630 + }, + { + "epoch": 0.7331247206079571, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.2346619958119873e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1640 + }, + { + "epoch": 0.7331247206079571, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9095, + "eval_samples_per_second": 58.301, + "eval_steps_per_second": 1.825, + "step": 1640 + }, + { + "epoch": 0.737594993294591, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.2309149097933274e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1650 + }, + { + "epoch": 0.737594993294591, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9941, + "eval_samples_per_second": 58.285, + "eval_steps_per_second": 1.824, + "step": 1650 + }, + { + "epoch": 0.7420652659812249, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.2272017341845401e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1660 + }, + { + "epoch": 0.7420652659812249, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9483, + "eval_samples_per_second": 58.293, + "eval_steps_per_second": 1.824, + "step": 1660 + }, + { + "epoch": 0.7465355386678587, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.223521960580991e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1670 + }, + { + "epoch": 0.7465355386678587, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9179, + "eval_samples_per_second": 58.299, + "eval_steps_per_second": 1.825, + "step": 1670 + }, + { + "epoch": 0.7510058113544926, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.2198750911856664e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1680 + }, + { + "epoch": 0.7510058113544926, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9109, + "eval_samples_per_second": 58.3, + "eval_steps_per_second": 1.825, + "step": 1680 + }, + { + "epoch": 0.7554760840411265, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.2162606385262997e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1690 + }, + { + "epoch": 0.7554760840411265, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.8631, + "eval_samples_per_second": 58.309, + "eval_steps_per_second": 1.825, + "step": 1690 + }, + { + "epoch": 0.7599463567277603, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.2126781251816649e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1700 + }, + { + "epoch": 0.7599463567277603, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9256, + "eval_samples_per_second": 58.298, + "eval_steps_per_second": 1.825, + "step": 1700 + }, + { + "epoch": 0.7644166294143943, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.2091270835166862e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1710 + }, + { + "epoch": 0.7644166294143943, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 307.0642, + "eval_samples_per_second": 58.271, + "eval_steps_per_second": 1.824, + "step": 1710 + }, + { + "epoch": 0.7688869021010282, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.2056070554260305e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1720 + }, + { + "epoch": 0.7688869021010282, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9109, + "eval_samples_per_second": 58.3, + "eval_steps_per_second": 1.825, + "step": 1720 + }, + { + "epoch": 0.7733571747876621, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.2021175920858626e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1730 + }, + { + "epoch": 0.7733571747876621, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.8822, + "eval_samples_per_second": 58.306, + "eval_steps_per_second": 1.825, + "step": 1730 + }, + { + "epoch": 0.7778274474742959, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.1986582537134606e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1740 + }, + { + "epoch": 0.7778274474742959, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.8705, + "eval_samples_per_second": 58.308, + "eval_steps_per_second": 1.825, + "step": 1740 + }, + { + "epoch": 0.7822977201609298, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.1952286093343937e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1750 + }, + { + "epoch": 0.7822977201609298, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9026, + "eval_samples_per_second": 58.302, + "eval_steps_per_second": 1.825, + "step": 1750 + }, + { + "epoch": 0.7867679928475637, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.1918282365569903e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1760 + }, + { + "epoch": 0.7867679928475637, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.928, + "eval_samples_per_second": 58.297, + "eval_steps_per_second": 1.825, + "step": 1760 + }, + { + "epoch": 0.7912382655341976, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.1884567213538209e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1770 + }, + { + "epoch": 0.7912382655341976, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9265, + "eval_samples_per_second": 58.297, + "eval_steps_per_second": 1.825, + "step": 1770 + }, + { + "epoch": 0.7957085382208314, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.1851136578499433e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1780 + }, + { + "epoch": 0.7957085382208314, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9251, + "eval_samples_per_second": 58.298, + "eval_steps_per_second": 1.825, + "step": 1780 + }, + { + "epoch": 0.8001788109074653, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.181798648117664e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1790 + }, + { + "epoch": 0.8001788109074653, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.8954, + "eval_samples_per_second": 58.303, + "eval_steps_per_second": 1.825, + "step": 1790 + }, + { + "epoch": 0.8046490835940993, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.1785113019775794e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1800 + }, + { + "epoch": 0.8046490835940993, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.8855, + "eval_samples_per_second": 58.305, + "eval_steps_per_second": 1.825, + "step": 1800 + }, + { + "epoch": 0.8091193562807332, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.1752512368056712e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1810 + }, + { + "epoch": 0.8091193562807332, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 307.0492, + "eval_samples_per_second": 58.274, + "eval_steps_per_second": 1.824, + "step": 1810 + }, + { + "epoch": 0.813589628967367, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.1720180773462387e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1820 + }, + { + "epoch": 0.813589628967367, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.8809, + "eval_samples_per_second": 58.306, + "eval_steps_per_second": 1.825, + "step": 1820 + }, + { + "epoch": 0.8180599016540009, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.168811455530461e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1830 + }, + { + "epoch": 0.8180599016540009, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.8895, + "eval_samples_per_second": 58.304, + "eval_steps_per_second": 1.825, + "step": 1830 + }, + { + "epoch": 0.8225301743406348, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.1656310103003923e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1840 + }, + { + "epoch": 0.8225301743406348, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.8857, + "eval_samples_per_second": 58.305, + "eval_steps_per_second": 1.825, + "step": 1840 + }, + { + "epoch": 0.8270004470272687, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.162476387438193e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1850 + }, + { + "epoch": 0.8270004470272687, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9216, + "eval_samples_per_second": 58.298, + "eval_steps_per_second": 1.825, + "step": 1850 + }, + { + "epoch": 0.8314707197139025, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.1593472394004206e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1860 + }, + { + "epoch": 0.8314707197139025, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.8721, + "eval_samples_per_second": 58.308, + "eval_steps_per_second": 1.825, + "step": 1860 + }, + { + "epoch": 0.8359409924005364, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.1562432251572007e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1870 + }, + { + "epoch": 0.8359409924005364, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.8854, + "eval_samples_per_second": 58.305, + "eval_steps_per_second": 1.825, + "step": 1870 + }, + { + "epoch": 0.8404112650871703, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.1531640100361064e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1880 + }, + { + "epoch": 0.8404112650871703, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.8876, + "eval_samples_per_second": 58.305, + "eval_steps_per_second": 1.825, + "step": 1880 + }, + { + "epoch": 0.8448815377738041, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.1501092655705905e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1890 + }, + { + "epoch": 0.8448815377738041, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.8984, + "eval_samples_per_second": 58.303, + "eval_steps_per_second": 1.825, + "step": 1890 + }, + { + "epoch": 0.8493518104604381, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.1470786693528087e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1900 + }, + { + "epoch": 0.8493518104604381, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.8788, + "eval_samples_per_second": 58.306, + "eval_steps_per_second": 1.825, + "step": 1900 + }, + { + "epoch": 0.853822083147072, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.144071904890689e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1910 + }, + { + "epoch": 0.853822083147072, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9922, + "eval_samples_per_second": 58.285, + "eval_steps_per_second": 1.824, + "step": 1910 + }, + { + "epoch": 0.8582923558337059, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.1410886614690962e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1920 + }, + { + "epoch": 0.8582923558337059, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.8985, + "eval_samples_per_second": 58.303, + "eval_steps_per_second": 1.825, + "step": 1920 + }, + { + "epoch": 0.8627626285203397, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.1381286340149635e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1930 + }, + { + "epoch": 0.8627626285203397, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.948, + "eval_samples_per_second": 58.293, + "eval_steps_per_second": 1.824, + "step": 1930 + }, + { + "epoch": 0.8672329012069736, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.1351915229662496e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1940 + }, + { + "epoch": 0.8672329012069736, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9062, + "eval_samples_per_second": 58.301, + "eval_steps_per_second": 1.825, + "step": 1940 + }, + { + "epoch": 0.8717031738936075, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.1322770341445958e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1950 + }, + { + "epoch": 0.8717031738936075, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9101, + "eval_samples_per_second": 58.3, + "eval_steps_per_second": 1.825, + "step": 1950 + }, + { + "epoch": 0.8761734465802414, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.1293848786315642e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1960 + }, + { + "epoch": 0.8761734465802414, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9046, + "eval_samples_per_second": 58.302, + "eval_steps_per_second": 1.825, + "step": 1960 + }, + { + "epoch": 0.8806437192668752, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.1265147726483323e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1970 + }, + { + "epoch": 0.8806437192668752, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.8737, + "eval_samples_per_second": 58.307, + "eval_steps_per_second": 1.825, + "step": 1970 + }, + { + "epoch": 0.8851139919535091, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.1236664374387369e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1980 + }, + { + "epoch": 0.8851139919535091, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.8878, + "eval_samples_per_second": 58.305, + "eval_steps_per_second": 1.825, + "step": 1980 + }, + { + "epoch": 0.8895842646401431, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.120839599155551e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 1990 + }, + { + "epoch": 0.8895842646401431, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.8703, + "eval_samples_per_second": 58.308, + "eval_steps_per_second": 1.825, + "step": 1990 + }, + { + "epoch": 0.894054537326777, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.118033988749895e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 2000 + }, + { + "epoch": 0.894054537326777, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.8991, + "eval_samples_per_second": 58.303, + "eval_steps_per_second": 1.825, + "step": 2000 + }, + { + "epoch": 0.8985248100134108, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.1152493418636764e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 2010 + }, + { + "epoch": 0.8985248100134108, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 307.3093, + "eval_samples_per_second": 58.225, + "eval_steps_per_second": 1.822, + "step": 2010 + }, + { + "epoch": 0.9029950827000447, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.112485398724962e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 2020 + }, + { + "epoch": 0.9029950827000447, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9509, + "eval_samples_per_second": 58.293, + "eval_steps_per_second": 1.824, + "step": 2020 + }, + { + "epoch": 0.9074653553866786, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.1097419040461884e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 2030 + }, + { + "epoch": 0.9074653553866786, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.8663, + "eval_samples_per_second": 58.309, + "eval_steps_per_second": 1.825, + "step": 2030 + }, + { + "epoch": 0.9119356280733125, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.1070186069251193e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 2040 + }, + { + "epoch": 0.9119356280733125, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9069, + "eval_samples_per_second": 58.301, + "eval_steps_per_second": 1.825, + "step": 2040 + }, + { + "epoch": 0.9164059007599463, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.1043152607484655e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 2050 + }, + { + "epoch": 0.9164059007599463, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.8916, + "eval_samples_per_second": 58.304, + "eval_steps_per_second": 1.825, + "step": 2050 + }, + { + "epoch": 0.9208761734465802, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.1016316230980794e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 2060 + }, + { + "epoch": 0.9208761734465802, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.933, + "eval_samples_per_second": 58.296, + "eval_steps_per_second": 1.825, + "step": 2060 + }, + { + "epoch": 0.9253464461332142, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.098967455659645e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 2070 + }, + { + "epoch": 0.9253464461332142, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9244, + "eval_samples_per_second": 58.298, + "eval_steps_per_second": 1.825, + "step": 2070 + }, + { + "epoch": 0.9298167188198481, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.0963225241337867e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 2080 + }, + { + "epoch": 0.9298167188198481, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9128, + "eval_samples_per_second": 58.3, + "eval_steps_per_second": 1.825, + "step": 2080 + }, + { + "epoch": 0.9342869915064819, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.093696598149518e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 2090 + }, + { + "epoch": 0.9342869915064819, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.8665, + "eval_samples_per_second": 58.309, + "eval_steps_per_second": 1.825, + "step": 2090 + }, + { + "epoch": 0.9387572641931158, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.091089451179962e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 2100 + }, + { + "epoch": 0.9387572641931158, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9558, + "eval_samples_per_second": 58.292, + "eval_steps_per_second": 1.824, + "step": 2100 + }, + { + "epoch": 0.9432275368797497, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.0885008604602703e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 2110 + }, + { + "epoch": 0.9432275368797497, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9696, + "eval_samples_per_second": 58.289, + "eval_steps_per_second": 1.824, + "step": 2110 + }, + { + "epoch": 0.9476978095663835, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.0859306069076736e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 2120 + }, + { + "epoch": 0.9476978095663835, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.8734, + "eval_samples_per_second": 58.307, + "eval_steps_per_second": 1.825, + "step": 2120 + }, + { + "epoch": 0.9521680822530174, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.083378475043599e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 2130 + }, + { + "epoch": 0.9521680822530174, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.8916, + "eval_samples_per_second": 58.304, + "eval_steps_per_second": 1.825, + "step": 2130 + }, + { + "epoch": 0.9566383549396513, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.0808442529177925e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 2140 + }, + { + "epoch": 0.9566383549396513, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9179, + "eval_samples_per_second": 58.299, + "eval_steps_per_second": 1.825, + "step": 2140 + }, + { + "epoch": 0.9611086276262852, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.0783277320343842e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 2150 + }, + { + "epoch": 0.9611086276262852, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.8541, + "eval_samples_per_second": 58.311, + "eval_steps_per_second": 1.825, + "step": 2150 + }, + { + "epoch": 0.965578900312919, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.075828707279838e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 2160 + }, + { + "epoch": 0.965578900312919, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9213, + "eval_samples_per_second": 58.298, + "eval_steps_per_second": 1.825, + "step": 2160 + }, + { + "epoch": 0.970049172999553, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.0733469768527298e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 2170 + }, + { + "epoch": 0.970049172999553, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.9379, + "eval_samples_per_second": 58.295, + "eval_steps_per_second": 1.824, + "step": 2170 + }, + { + "epoch": 0.9745194456861869, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.0708823421952984e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 2180 + }, + { + "epoch": 0.9745194456861869, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.8894, + "eval_samples_per_second": 58.304, + "eval_steps_per_second": 1.825, + "step": 2180 + }, + { + "epoch": 0.9789897183728208, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.0684346079267208e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 2190 + }, + { + "epoch": 0.9789897183728208, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.89, + "eval_samples_per_second": 58.304, + "eval_steps_per_second": 1.825, + "step": 2190 + }, + { + "epoch": 0.9834599910594546, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.066003581778052e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 2200 + }, + { + "epoch": 0.9834599910594546, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.8657, + "eval_samples_per_second": 58.309, + "eval_steps_per_second": 1.825, + "step": 2200 + }, + { + "epoch": 0.9879302637460885, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.0635890745287928e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 2210 + }, + { + "epoch": 0.9879302637460885, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 307.025, + "eval_samples_per_second": 58.279, + "eval_steps_per_second": 1.824, + "step": 2210 + }, + { + "epoch": 0.9924005364327224, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.0611908999450224e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 2220 + }, + { + "epoch": 0.9924005364327224, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.8682, + "eval_samples_per_second": 58.308, + "eval_steps_per_second": 1.825, + "step": 2220 + }, + { + "epoch": 0.9968708091193563, + "grad_norm": 1.4142135623730951, + "learning_rate": 1.058808874719067e-06, + "log_odds_chosen": NaN, + "log_odds_ratio": NaN, + "logits/chosen": NaN, + "logits/rejected": NaN, + "logps/chosen": NaN, + "logps/rejected": NaN, + "loss": 0.0, + "nll_loss": NaN, + "rewards/accuracies": 0.0, + "rewards/chosen": NaN, + "rewards/margins": NaN, + "rewards/rejected": NaN, + "step": 2230 + }, + { + "epoch": 0.9968708091193563, + "eval_log_odds_chosen": NaN, + "eval_log_odds_ratio": NaN, + "eval_logits/chosen": NaN, + "eval_logits/rejected": NaN, + "eval_logps/chosen": NaN, + "eval_logps/rejected": NaN, + "eval_loss": NaN, + "eval_nll_loss": NaN, + "eval_rewards/accuracies": 0.0, + "eval_rewards/chosen": NaN, + "eval_rewards/margins": NaN, + "eval_rewards/rejected": NaN, + "eval_runtime": 306.8635, + "eval_samples_per_second": 58.309, + "eval_steps_per_second": 1.825, + "step": 2230 + }, + { + "epoch": 1.0, + "step": 2237, + "total_flos": 0.0, + "train_loss": 0.04676872755621073, + "train_runtime": 74758.4678, + "train_samples_per_second": 0.957, + "train_steps_per_second": 0.03 + } + ], + "logging_steps": 10, + "max_steps": 2237, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 100, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +}