diff --git "a/last-checkpoint/trainer_state.json" "b/last-checkpoint/trainer_state.json" new file mode 100644--- /dev/null +++ "b/last-checkpoint/trainer_state.json" @@ -0,0 +1,44450 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 4039, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "chosen_geometric_mean": -1.7763246297836304, + "epoch": 0.0, + "grad_norm": 1048.0, + "learning_rate": 5.000000000000001e-07, + "log_odds": -0.3995080590248108, + "log_odds_ratio": -0.9159038066864014, + "loss": 6.582, + "rejected_geometric_mean": -1.4595143795013428, + "step": 1 + }, + { + "chosen_geometric_mean": -1.2260702848434448, + "epoch": 0.0, + "grad_norm": 1032.0, + "learning_rate": 1.0000000000000002e-06, + "log_odds": -0.016261622309684753, + "log_odds_ratio": -0.7163001894950867, + "loss": 6.457, + "rejected_geometric_mean": -1.1924188137054443, + "step": 2 + }, + { + "chosen_geometric_mean": -1.1969268321990967, + "epoch": 0.0, + "grad_norm": 1016.0, + "learning_rate": 1.5e-06, + "log_odds": -0.1923251897096634, + "log_odds_ratio": -0.794471025466919, + "loss": 5.2891, + "rejected_geometric_mean": -1.0677305459976196, + "step": 3 + }, + { + "chosen_geometric_mean": -1.6256661415100098, + "epoch": 0.0, + "grad_norm": 716.0, + "learning_rate": 2.0000000000000003e-06, + "log_odds": -0.592944860458374, + "log_odds_ratio": -1.0399197340011597, + "loss": 2.3232, + "rejected_geometric_mean": -1.18324875831604, + "step": 4 + }, + { + "chosen_geometric_mean": -1.4044809341430664, + "epoch": 0.0, + "grad_norm": 484.0, + "learning_rate": 2.5e-06, + "log_odds": -0.6297827959060669, + "log_odds_ratio": -1.0689723491668701, + "loss": 1.0903, + "rejected_geometric_mean": -0.987423300743103, + "step": 5 + }, + { + "chosen_geometric_mean": -1.3123711347579956, + "epoch": 0.0, + "grad_norm": 36.5, + "learning_rate": 3e-06, + "log_odds": -0.3031950891017914, + "log_odds_ratio": -0.8605786561965942, + "loss": 0.4816, + "rejected_geometric_mean": -1.100913166999817, + "step": 6 + }, + { + "chosen_geometric_mean": -1.7310140132904053, + "epoch": 0.0, + "grad_norm": 5.96875, + "learning_rate": 3.5e-06, + "log_odds": -0.48653241991996765, + "log_odds_ratio": -0.9764511585235596, + "loss": 0.4288, + "rejected_geometric_mean": -1.3676079511642456, + "step": 7 + }, + { + "chosen_geometric_mean": -1.673514485359192, + "epoch": 0.0, + "grad_norm": 51.25, + "learning_rate": 4.000000000000001e-06, + "log_odds": -0.3446342945098877, + "log_odds_ratio": -0.8921982049942017, + "loss": 0.3905, + "rejected_geometric_mean": -1.4051668643951416, + "step": 8 + }, + { + "chosen_geometric_mean": -1.4519433975219727, + "epoch": 0.0, + "grad_norm": 4.78125, + "learning_rate": 4.5e-06, + "log_odds": 0.014258764684200287, + "log_odds_ratio": -0.6903261542320251, + "loss": 0.4373, + "rejected_geometric_mean": -1.444451928138733, + "step": 9 + }, + { + "chosen_geometric_mean": -1.253537893295288, + "epoch": 0.0, + "grad_norm": 135.0, + "learning_rate": 5e-06, + "log_odds": 0.08755617588758469, + "log_odds_ratio": -0.6710808873176575, + "loss": 0.4661, + "rejected_geometric_mean": -1.3183112144470215, + "step": 10 + }, + { + "chosen_geometric_mean": -1.0212026834487915, + "epoch": 0.0, + "grad_norm": 4.875, + "learning_rate": 4.999999810469995e-06, + "log_odds": -0.13884621858596802, + "log_odds_ratio": -0.8003092408180237, + "loss": 0.4237, + "rejected_geometric_mean": -0.9496305584907532, + "step": 11 + }, + { + "chosen_geometric_mean": -1.4823530912399292, + "epoch": 0.0, + "grad_norm": 6.84375, + "learning_rate": 4.999999241880009e-06, + "log_odds": -0.2986679673194885, + "log_odds_ratio": -0.8621305823326111, + "loss": 0.3925, + "rejected_geometric_mean": -1.2728855609893799, + "step": 12 + }, + { + "chosen_geometric_mean": -1.2460086345672607, + "epoch": 0.0, + "grad_norm": 4.375, + "learning_rate": 4.999998294230128e-06, + "log_odds": -0.3997105658054352, + "log_odds_ratio": -0.9138590097427368, + "loss": 0.3954, + "rejected_geometric_mean": -0.9794821739196777, + "step": 13 + }, + { + "chosen_geometric_mean": -1.3409969806671143, + "epoch": 0.0, + "grad_norm": 2.828125, + "learning_rate": 4.999996967520497e-06, + "log_odds": 0.28860461711883545, + "log_odds_ratio": -0.5816660523414612, + "loss": 0.3796, + "rejected_geometric_mean": -1.5650138854980469, + "step": 14 + }, + { + "chosen_geometric_mean": -1.2136807441711426, + "epoch": 0.0, + "grad_norm": 5.40625, + "learning_rate": 4.999995261751315e-06, + "log_odds": -0.10004450380802155, + "log_odds_ratio": -0.7453711628913879, + "loss": 0.4167, + "rejected_geometric_mean": -1.1440973281860352, + "step": 15 + }, + { + "chosen_geometric_mean": -1.1477863788604736, + "epoch": 0.0, + "grad_norm": 2.71875, + "learning_rate": 4.999993176922842e-06, + "log_odds": -0.10113367438316345, + "log_odds_ratio": -0.7458310127258301, + "loss": 0.386, + "rejected_geometric_mean": -1.0824298858642578, + "step": 16 + }, + { + "chosen_geometric_mean": -1.3768670558929443, + "epoch": 0.0, + "grad_norm": 2.625, + "learning_rate": 4.999990713035393e-06, + "log_odds": 0.6183157563209534, + "log_odds_ratio": -0.5590088963508606, + "loss": 0.4065, + "rejected_geometric_mean": -1.9721308946609497, + "step": 17 + }, + { + "chosen_geometric_mean": -1.030426263809204, + "epoch": 0.0, + "grad_norm": 2.578125, + "learning_rate": 4.999987870089343e-06, + "log_odds": -0.17298468947410583, + "log_odds_ratio": -0.7850807309150696, + "loss": 0.354, + "rejected_geometric_mean": -0.9294872283935547, + "step": 18 + }, + { + "chosen_geometric_mean": -1.4962022304534912, + "epoch": 0.0, + "grad_norm": 2.40625, + "learning_rate": 4.999984648085122e-06, + "log_odds": 0.1230340376496315, + "log_odds_ratio": -0.65843665599823, + "loss": 0.317, + "rejected_geometric_mean": -1.6147785186767578, + "step": 19 + }, + { + "chosen_geometric_mean": -1.1209038496017456, + "epoch": 0.0, + "grad_norm": 2.546875, + "learning_rate": 4.999981047023219e-06, + "log_odds": -0.03024435043334961, + "log_odds_ratio": -0.7093698978424072, + "loss": 0.366, + "rejected_geometric_mean": -1.0954521894454956, + "step": 20 + }, + { + "chosen_geometric_mean": -1.2548167705535889, + "epoch": 0.01, + "grad_norm": 2.84375, + "learning_rate": 4.999977066904181e-06, + "log_odds": -0.004851222038269043, + "log_odds_ratio": -0.7223141193389893, + "loss": 0.4169, + "rejected_geometric_mean": -1.2470471858978271, + "step": 21 + }, + { + "chosen_geometric_mean": -1.0482044219970703, + "epoch": 0.01, + "grad_norm": 2.421875, + "learning_rate": 4.999972707728609e-06, + "log_odds": -0.11008964478969574, + "log_odds_ratio": -0.7535948157310486, + "loss": 0.3303, + "rejected_geometric_mean": -0.9904245734214783, + "step": 22 + }, + { + "chosen_geometric_mean": -1.1560916900634766, + "epoch": 0.01, + "grad_norm": 2.609375, + "learning_rate": 4.999967969497166e-06, + "log_odds": 0.30913013219833374, + "log_odds_ratio": -0.5553233027458191, + "loss": 0.3601, + "rejected_geometric_mean": -1.38321053981781, + "step": 23 + }, + { + "chosen_geometric_mean": -1.427901268005371, + "epoch": 0.01, + "grad_norm": 2.703125, + "learning_rate": 4.999962852210569e-06, + "log_odds": -0.07320241630077362, + "log_odds_ratio": -0.7343562841415405, + "loss": 0.38, + "rejected_geometric_mean": -1.3748245239257812, + "step": 24 + }, + { + "chosen_geometric_mean": -1.1885144710540771, + "epoch": 0.01, + "grad_norm": 2.53125, + "learning_rate": 4.999957355869595e-06, + "log_odds": -0.22386834025382996, + "log_odds_ratio": -0.8154250383377075, + "loss": 0.3647, + "rejected_geometric_mean": -1.0472617149353027, + "step": 25 + }, + { + "chosen_geometric_mean": -1.2543447017669678, + "epoch": 0.01, + "grad_norm": 2.734375, + "learning_rate": 4.999951480475078e-06, + "log_odds": 0.13933901488780975, + "log_odds_ratio": -0.6413201689720154, + "loss": 0.3817, + "rejected_geometric_mean": -1.3736152648925781, + "step": 26 + }, + { + "chosen_geometric_mean": -1.3358536958694458, + "epoch": 0.01, + "grad_norm": 2.53125, + "learning_rate": 4.999945226027907e-06, + "log_odds": -0.03861994296312332, + "log_odds_ratio": -0.725858211517334, + "loss": 0.3723, + "rejected_geometric_mean": -1.3067013025283813, + "step": 27 + }, + { + "chosen_geometric_mean": -1.0710030794143677, + "epoch": 0.01, + "grad_norm": 2.4375, + "learning_rate": 4.999938592529031e-06, + "log_odds": 0.002353765070438385, + "log_odds_ratio": -0.6999320983886719, + "loss": 0.332, + "rejected_geometric_mean": -1.0667288303375244, + "step": 28 + }, + { + "chosen_geometric_mean": -1.2700555324554443, + "epoch": 0.01, + "grad_norm": 2.6875, + "learning_rate": 4.999931579979457e-06, + "log_odds": 0.03248198702931404, + "log_odds_ratio": -0.6814274787902832, + "loss": 0.4067, + "rejected_geometric_mean": -1.3002400398254395, + "step": 29 + }, + { + "chosen_geometric_mean": -1.248997688293457, + "epoch": 0.01, + "grad_norm": 2.328125, + "learning_rate": 4.999924188380247e-06, + "log_odds": 1.9682700634002686, + "log_odds_ratio": -0.5259683728218079, + "loss": 0.317, + "rejected_geometric_mean": -3.16559100151062, + "step": 30 + }, + { + "chosen_geometric_mean": -1.1064237356185913, + "epoch": 0.01, + "grad_norm": 2.375, + "learning_rate": 4.9999164177325225e-06, + "log_odds": -0.02866411954164505, + "log_odds_ratio": -0.7154954075813293, + "loss": 0.363, + "rejected_geometric_mean": -1.0866203308105469, + "step": 31 + }, + { + "chosen_geometric_mean": -1.2025521993637085, + "epoch": 0.01, + "grad_norm": 2.359375, + "learning_rate": 4.99990826803746e-06, + "log_odds": 0.07881972193717957, + "log_odds_ratio": -0.6589128971099854, + "loss": 0.3212, + "rejected_geometric_mean": -1.2760748863220215, + "step": 32 + }, + { + "chosen_geometric_mean": -1.2427606582641602, + "epoch": 0.01, + "grad_norm": 2.59375, + "learning_rate": 4.999899739296297e-06, + "log_odds": 0.10904442518949509, + "log_odds_ratio": -0.6504892110824585, + "loss": 0.3879, + "rejected_geometric_mean": -1.3489984273910522, + "step": 33 + }, + { + "chosen_geometric_mean": -1.1363821029663086, + "epoch": 0.01, + "grad_norm": 2.375, + "learning_rate": 4.999890831510327e-06, + "log_odds": 0.42242366075515747, + "log_odds_ratio": -0.5334783792495728, + "loss": 0.3378, + "rejected_geometric_mean": -1.4582370519638062, + "step": 34 + }, + { + "chosen_geometric_mean": -1.1195378303527832, + "epoch": 0.01, + "grad_norm": 2.765625, + "learning_rate": 4.9998815446808994e-06, + "log_odds": 0.09950637072324753, + "log_odds_ratio": -0.6470373868942261, + "loss": 0.3905, + "rejected_geometric_mean": -1.1927167177200317, + "step": 35 + }, + { + "chosen_geometric_mean": -1.1885805130004883, + "epoch": 0.01, + "grad_norm": 2.40625, + "learning_rate": 4.999871878809422e-06, + "log_odds": 1.2125178575515747, + "log_odds_ratio": -0.44181978702545166, + "loss": 0.3237, + "rejected_geometric_mean": -2.289789915084839, + "step": 36 + }, + { + "chosen_geometric_mean": -1.2838592529296875, + "epoch": 0.01, + "grad_norm": 2.390625, + "learning_rate": 4.999861833897361e-06, + "log_odds": 0.19519180059432983, + "log_odds_ratio": -0.6106514930725098, + "loss": 0.3646, + "rejected_geometric_mean": -1.4303289651870728, + "step": 37 + }, + { + "chosen_geometric_mean": -1.0425281524658203, + "epoch": 0.01, + "grad_norm": 2.703125, + "learning_rate": 4.99985140994624e-06, + "log_odds": 0.08107808232307434, + "log_odds_ratio": -0.6970080137252808, + "loss": 0.4159, + "rejected_geometric_mean": -1.1298975944519043, + "step": 38 + }, + { + "chosen_geometric_mean": -1.2609992027282715, + "epoch": 0.01, + "grad_norm": 2.421875, + "learning_rate": 4.99984060695764e-06, + "log_odds": 1.0186322927474976, + "log_odds_ratio": -0.5209671258926392, + "loss": 0.3488, + "rejected_geometric_mean": -2.215270519256592, + "step": 39 + }, + { + "chosen_geometric_mean": -1.1431430578231812, + "epoch": 0.01, + "grad_norm": 2.5, + "learning_rate": 4.9998294249331965e-06, + "log_odds": 0.1358340084552765, + "log_odds_ratio": -0.6278203725814819, + "loss": 0.347, + "rejected_geometric_mean": -1.239497184753418, + "step": 40 + }, + { + "chosen_geometric_mean": -1.2451767921447754, + "epoch": 0.01, + "grad_norm": 2.453125, + "learning_rate": 4.999817863874607e-06, + "log_odds": 0.09865818917751312, + "log_odds_ratio": -0.6530817151069641, + "loss": 0.3845, + "rejected_geometric_mean": -1.3181231021881104, + "step": 41 + }, + { + "chosen_geometric_mean": -1.0691394805908203, + "epoch": 0.01, + "grad_norm": 2.34375, + "learning_rate": 4.999805923783624e-06, + "log_odds": -0.1203748807311058, + "log_odds_ratio": -0.7645888924598694, + "loss": 0.3232, + "rejected_geometric_mean": -1.0025304555892944, + "step": 42 + }, + { + "chosen_geometric_mean": -1.0725288391113281, + "epoch": 0.01, + "grad_norm": 2.578125, + "learning_rate": 4.999793604662058e-06, + "log_odds": 0.11620479077100754, + "log_odds_ratio": -0.6444766521453857, + "loss": 0.3766, + "rejected_geometric_mean": -1.1629694700241089, + "step": 43 + }, + { + "chosen_geometric_mean": -1.4244825839996338, + "epoch": 0.01, + "grad_norm": 2.421875, + "learning_rate": 4.999780906511776e-06, + "log_odds": 1.2075780630111694, + "log_odds_ratio": -0.4204549491405487, + "loss": 0.3318, + "rejected_geometric_mean": -2.5303311347961426, + "step": 44 + }, + { + "chosen_geometric_mean": -1.0775643587112427, + "epoch": 0.01, + "grad_norm": 2.453125, + "learning_rate": 4.999767829334705e-06, + "log_odds": 0.17135301232337952, + "log_odds_ratio": -0.6235606670379639, + "loss": 0.3276, + "rejected_geometric_mean": -1.200806736946106, + "step": 45 + }, + { + "chosen_geometric_mean": -1.4023363590240479, + "epoch": 0.01, + "grad_norm": 2.46875, + "learning_rate": 4.999754373132826e-06, + "log_odds": -0.13536055386066437, + "log_odds_ratio": -0.7789304256439209, + "loss": 0.3624, + "rejected_geometric_mean": -1.3101060390472412, + "step": 46 + }, + { + "chosen_geometric_mean": -1.105752944946289, + "epoch": 0.01, + "grad_norm": 2.296875, + "learning_rate": 4.999740537908181e-06, + "log_odds": 0.22343003749847412, + "log_odds_ratio": -0.611822247505188, + "loss": 0.3289, + "rejected_geometric_mean": -1.2714792490005493, + "step": 47 + }, + { + "chosen_geometric_mean": -1.0772072076797485, + "epoch": 0.01, + "grad_norm": 2.484375, + "learning_rate": 4.999726323662866e-06, + "log_odds": 0.18215292692184448, + "log_odds_ratio": -0.6224769949913025, + "loss": 0.3268, + "rejected_geometric_mean": -1.2087275981903076, + "step": 48 + }, + { + "chosen_geometric_mean": -1.3014483451843262, + "epoch": 0.01, + "grad_norm": 2.328125, + "learning_rate": 4.999711730399037e-06, + "log_odds": 1.6367573738098145, + "log_odds_ratio": -0.4044879674911499, + "loss": 0.3087, + "rejected_geometric_mean": -2.8093948364257812, + "step": 49 + }, + { + "chosen_geometric_mean": -1.13691246509552, + "epoch": 0.01, + "grad_norm": 2.5, + "learning_rate": 4.999696758118907e-06, + "log_odds": 0.02122858166694641, + "log_odds_ratio": -0.6857303380966187, + "loss": 0.3832, + "rejected_geometric_mean": -1.1569197177886963, + "step": 50 + }, + { + "chosen_geometric_mean": -1.0585601329803467, + "epoch": 0.01, + "grad_norm": 2.359375, + "learning_rate": 4.999681406824747e-06, + "log_odds": -0.07150229811668396, + "log_odds_ratio": -0.7402312755584717, + "loss": 0.3377, + "rejected_geometric_mean": -1.028062343597412, + "step": 51 + }, + { + "chosen_geometric_mean": -1.138702154159546, + "epoch": 0.01, + "grad_norm": 2.5, + "learning_rate": 4.999665676518882e-06, + "log_odds": -0.5007675290107727, + "log_odds_ratio": -1.0252081155776978, + "loss": 0.374, + "rejected_geometric_mean": -0.9505171775817871, + "step": 52 + }, + { + "chosen_geometric_mean": -1.2356818914413452, + "epoch": 0.01, + "grad_norm": 3.171875, + "learning_rate": 4.999649567203701e-06, + "log_odds": 1.0442497730255127, + "log_odds_ratio": -0.438517689704895, + "loss": 0.3407, + "rejected_geometric_mean": -2.1619956493377686, + "step": 53 + }, + { + "chosen_geometric_mean": -1.0453599691390991, + "epoch": 0.01, + "grad_norm": 2.671875, + "learning_rate": 4.999633078881641e-06, + "log_odds": 0.19154803454875946, + "log_odds_ratio": -0.6057674884796143, + "loss": 0.399, + "rejected_geometric_mean": -1.1819593906402588, + "step": 54 + }, + { + "chosen_geometric_mean": -1.173365592956543, + "epoch": 0.01, + "grad_norm": 2.46875, + "learning_rate": 4.999616211555208e-06, + "log_odds": 0.07438477128744125, + "log_odds_ratio": -0.659665584564209, + "loss": 0.374, + "rejected_geometric_mean": -1.2241953611373901, + "step": 55 + }, + { + "chosen_geometric_mean": -1.0724709033966064, + "epoch": 0.01, + "grad_norm": 2.53125, + "learning_rate": 4.999598965226956e-06, + "log_odds": -0.15090113878250122, + "log_odds_ratio": -0.7769546508789062, + "loss": 0.3524, + "rejected_geometric_mean": -0.985970139503479, + "step": 56 + }, + { + "chosen_geometric_mean": -1.4759962558746338, + "epoch": 0.01, + "grad_norm": 2.453125, + "learning_rate": 4.9995813398994996e-06, + "log_odds": 0.2469651699066162, + "log_odds_ratio": -0.5924747586250305, + "loss": 0.3523, + "rejected_geometric_mean": -1.6693758964538574, + "step": 57 + }, + { + "chosen_geometric_mean": -1.2783797979354858, + "epoch": 0.01, + "grad_norm": 2.40625, + "learning_rate": 4.999563335575513e-06, + "log_odds": 0.18407398462295532, + "log_odds_ratio": -0.6055968999862671, + "loss": 0.3589, + "rejected_geometric_mean": -1.412815809249878, + "step": 58 + }, + { + "chosen_geometric_mean": -1.1203184127807617, + "epoch": 0.01, + "grad_norm": 2.484375, + "learning_rate": 4.9995449522577265e-06, + "log_odds": 0.19627097249031067, + "log_odds_ratio": -0.6005597710609436, + "loss": 0.3445, + "rejected_geometric_mean": -1.2538151741027832, + "step": 59 + }, + { + "chosen_geometric_mean": -1.1528476476669312, + "epoch": 0.01, + "grad_norm": 2.546875, + "learning_rate": 4.999526189948925e-06, + "log_odds": 0.23978325724601746, + "log_odds_ratio": -0.5820591449737549, + "loss": 0.3629, + "rejected_geometric_mean": -1.325774073600769, + "step": 60 + }, + { + "chosen_geometric_mean": -1.4935963153839111, + "epoch": 0.02, + "grad_norm": 2.46875, + "learning_rate": 4.999507048651955e-06, + "log_odds": 1.4660899639129639, + "log_odds_ratio": -0.4683069586753845, + "loss": 0.325, + "rejected_geometric_mean": -2.8678810596466064, + "step": 61 + }, + { + "chosen_geometric_mean": -1.4079766273498535, + "epoch": 0.02, + "grad_norm": 2.359375, + "learning_rate": 4.999487528369719e-06, + "log_odds": 0.24609659612178802, + "log_odds_ratio": -0.5873382091522217, + "loss": 0.3649, + "rejected_geometric_mean": -1.6022205352783203, + "step": 62 + }, + { + "chosen_geometric_mean": -1.306457757949829, + "epoch": 0.02, + "grad_norm": 2.625, + "learning_rate": 4.999467629105176e-06, + "log_odds": 0.060147859156131744, + "log_odds_ratio": -0.7073543667793274, + "loss": 0.3961, + "rejected_geometric_mean": -1.370673418045044, + "step": 63 + }, + { + "chosen_geometric_mean": -0.9439505338668823, + "epoch": 0.02, + "grad_norm": 2.359375, + "learning_rate": 4.999447350861343e-06, + "log_odds": 0.050578005611896515, + "log_odds_ratio": -0.6695896983146667, + "loss": 0.3467, + "rejected_geometric_mean": -0.9780815839767456, + "step": 64 + }, + { + "chosen_geometric_mean": -1.162469744682312, + "epoch": 0.02, + "grad_norm": 2.703125, + "learning_rate": 4.999426693641295e-06, + "log_odds": 0.5330992937088013, + "log_odds_ratio": -0.4989972710609436, + "loss": 0.3688, + "rejected_geometric_mean": -1.577435851097107, + "step": 65 + }, + { + "chosen_geometric_mean": -1.0727607011795044, + "epoch": 0.02, + "grad_norm": 2.4375, + "learning_rate": 4.999405657448164e-06, + "log_odds": 0.04517597705125809, + "log_odds_ratio": -0.6734403371810913, + "loss": 0.3766, + "rejected_geometric_mean": -1.0998398065567017, + "step": 66 + }, + { + "chosen_geometric_mean": -1.0298705101013184, + "epoch": 0.02, + "grad_norm": 2.265625, + "learning_rate": 4.999384242285141e-06, + "log_odds": 0.6879474520683289, + "log_odds_ratio": -0.47784918546676636, + "loss": 0.3169, + "rejected_geometric_mean": -1.5928906202316284, + "step": 67 + }, + { + "chosen_geometric_mean": -1.356080412864685, + "epoch": 0.02, + "grad_norm": 2.40625, + "learning_rate": 4.999362448155471e-06, + "log_odds": 1.0906238555908203, + "log_odds_ratio": -0.5167521238327026, + "loss": 0.368, + "rejected_geometric_mean": -2.397657632827759, + "step": 68 + }, + { + "chosen_geometric_mean": -1.1961517333984375, + "epoch": 0.02, + "grad_norm": 2.53125, + "learning_rate": 4.99934027506246e-06, + "log_odds": 0.1525188535451889, + "log_odds_ratio": -0.6325230598449707, + "loss": 0.3446, + "rejected_geometric_mean": -1.3144973516464233, + "step": 69 + }, + { + "chosen_geometric_mean": -1.1613612174987793, + "epoch": 0.02, + "grad_norm": 2.296875, + "learning_rate": 4.999317723009468e-06, + "log_odds": 0.28490734100341797, + "log_odds_ratio": -0.5613175630569458, + "loss": 0.3431, + "rejected_geometric_mean": -1.3663723468780518, + "step": 70 + }, + { + "chosen_geometric_mean": -1.2979636192321777, + "epoch": 0.02, + "grad_norm": 2.296875, + "learning_rate": 4.9992947919999165e-06, + "log_odds": 0.2851577699184418, + "log_odds_ratio": -0.6094247698783875, + "loss": 0.3267, + "rejected_geometric_mean": -1.5466022491455078, + "step": 71 + }, + { + "chosen_geometric_mean": -0.9666152000427246, + "epoch": 0.02, + "grad_norm": 2.484375, + "learning_rate": 4.999271482037282e-06, + "log_odds": 0.1049174964427948, + "log_odds_ratio": -0.6610835194587708, + "loss": 0.3447, + "rejected_geometric_mean": -1.0466631650924683, + "step": 72 + }, + { + "chosen_geometric_mean": -1.0055763721466064, + "epoch": 0.02, + "grad_norm": 2.390625, + "learning_rate": 4.999247793125098e-06, + "log_odds": -0.12198376655578613, + "log_odds_ratio": -0.7963801026344299, + "loss": 0.3322, + "rejected_geometric_mean": -0.9484683275222778, + "step": 73 + }, + { + "chosen_geometric_mean": -1.1248955726623535, + "epoch": 0.02, + "grad_norm": 2.65625, + "learning_rate": 4.999223725266958e-06, + "log_odds": 0.019854523241519928, + "log_odds_ratio": -0.6871377825737, + "loss": 0.3499, + "rejected_geometric_mean": -1.1443893909454346, + "step": 74 + }, + { + "chosen_geometric_mean": -1.0511571168899536, + "epoch": 0.02, + "grad_norm": 2.59375, + "learning_rate": 4.999199278466509e-06, + "log_odds": 0.04581737518310547, + "log_odds_ratio": -0.6727886199951172, + "loss": 0.3757, + "rejected_geometric_mean": -1.080148696899414, + "step": 75 + }, + { + "chosen_geometric_mean": -1.265215277671814, + "epoch": 0.02, + "grad_norm": 2.515625, + "learning_rate": 4.999174452727459e-06, + "log_odds": 0.24702569842338562, + "log_odds_ratio": -0.5979483127593994, + "loss": 0.3398, + "rejected_geometric_mean": -1.4548060894012451, + "step": 76 + }, + { + "chosen_geometric_mean": -1.1635420322418213, + "epoch": 0.02, + "grad_norm": 2.453125, + "learning_rate": 4.999149248053572e-06, + "log_odds": 0.24852436780929565, + "log_odds_ratio": -0.5846266746520996, + "loss": 0.3507, + "rejected_geometric_mean": -1.342697262763977, + "step": 77 + }, + { + "chosen_geometric_mean": -1.0811238288879395, + "epoch": 0.02, + "grad_norm": 2.5, + "learning_rate": 4.9991236644486695e-06, + "log_odds": 0.04711653292179108, + "log_odds_ratio": -0.6750699281692505, + "loss": 0.353, + "rejected_geometric_mean": -1.1209741830825806, + "step": 78 + }, + { + "chosen_geometric_mean": -0.9329956769943237, + "epoch": 0.02, + "grad_norm": 2.40625, + "learning_rate": 4.999097701916631e-06, + "log_odds": 0.16495701670646667, + "log_odds_ratio": -0.6274144053459167, + "loss": 0.3218, + "rejected_geometric_mean": -1.0677403211593628, + "step": 79 + }, + { + "chosen_geometric_mean": -1.250285267829895, + "epoch": 0.02, + "grad_norm": 2.59375, + "learning_rate": 4.999071360461392e-06, + "log_odds": -0.10046304762363434, + "log_odds_ratio": -0.7477772235870361, + "loss": 0.3607, + "rejected_geometric_mean": -1.186889886856079, + "step": 80 + }, + { + "chosen_geometric_mean": -1.1218376159667969, + "epoch": 0.02, + "grad_norm": 2.484375, + "learning_rate": 4.999044640086949e-06, + "log_odds": 0.08803524821996689, + "log_odds_ratio": -0.6560962200164795, + "loss": 0.3404, + "rejected_geometric_mean": -1.1845550537109375, + "step": 81 + }, + { + "chosen_geometric_mean": -1.3111903667449951, + "epoch": 0.02, + "grad_norm": 2.4375, + "learning_rate": 4.99901754079735e-06, + "log_odds": 0.1323229968547821, + "log_odds_ratio": -0.63811856508255, + "loss": 0.3248, + "rejected_geometric_mean": -1.4195917844772339, + "step": 82 + }, + { + "chosen_geometric_mean": -1.040503978729248, + "epoch": 0.02, + "grad_norm": 2.5625, + "learning_rate": 4.9989900625967056e-06, + "log_odds": 0.2664858102798462, + "log_odds_ratio": -0.5692791938781738, + "loss": 0.3499, + "rejected_geometric_mean": -1.2223141193389893, + "step": 83 + }, + { + "chosen_geometric_mean": -1.083552598953247, + "epoch": 0.02, + "grad_norm": 2.953125, + "learning_rate": 4.998962205489183e-06, + "log_odds": 0.3900076150894165, + "log_odds_ratio": -0.5325907468795776, + "loss": 0.3372, + "rejected_geometric_mean": -1.380457878112793, + "step": 84 + }, + { + "chosen_geometric_mean": -1.2544456720352173, + "epoch": 0.02, + "grad_norm": 2.828125, + "learning_rate": 4.998933969479005e-06, + "log_odds": 0.1481724977493286, + "log_odds_ratio": -0.6273564696311951, + "loss": 0.4033, + "rejected_geometric_mean": -1.353423833847046, + "step": 85 + }, + { + "chosen_geometric_mean": -1.488006830215454, + "epoch": 0.02, + "grad_norm": 2.34375, + "learning_rate": 4.998905354570452e-06, + "log_odds": 0.06597848236560822, + "log_odds_ratio": -0.6932690143585205, + "loss": 0.3471, + "rejected_geometric_mean": -1.5568816661834717, + "step": 86 + }, + { + "chosen_geometric_mean": -1.369747519493103, + "epoch": 0.02, + "grad_norm": 2.359375, + "learning_rate": 4.998876360767865e-06, + "log_odds": -0.18257099390029907, + "log_odds_ratio": -0.7988353371620178, + "loss": 0.3488, + "rejected_geometric_mean": -1.242637038230896, + "step": 87 + }, + { + "chosen_geometric_mean": -1.100630521774292, + "epoch": 0.02, + "grad_norm": 2.25, + "learning_rate": 4.998846988075639e-06, + "log_odds": 0.5277435183525085, + "log_odds_ratio": -0.47101277112960815, + "loss": 0.3219, + "rejected_geometric_mean": -1.4835577011108398, + "step": 88 + }, + { + "chosen_geometric_mean": -1.2316269874572754, + "epoch": 0.02, + "grad_norm": 2.4375, + "learning_rate": 4.998817236498227e-06, + "log_odds": 0.20489799976348877, + "log_odds_ratio": -0.6032311916351318, + "loss": 0.3153, + "rejected_geometric_mean": -1.3815571069717407, + "step": 89 + }, + { + "chosen_geometric_mean": -1.1825851202011108, + "epoch": 0.02, + "grad_norm": 2.71875, + "learning_rate": 4.998787106040141e-06, + "log_odds": 0.007881812751293182, + "log_odds_ratio": -0.6982184648513794, + "loss": 0.3705, + "rejected_geometric_mean": -1.1982663869857788, + "step": 90 + }, + { + "chosen_geometric_mean": -1.3881449699401855, + "epoch": 0.02, + "grad_norm": 2.390625, + "learning_rate": 4.998756596705948e-06, + "log_odds": 0.899531900882721, + "log_odds_ratio": -0.43411529064178467, + "loss": 0.3329, + "rejected_geometric_mean": -2.184495449066162, + "step": 91 + }, + { + "chosen_geometric_mean": -1.2089850902557373, + "epoch": 0.02, + "grad_norm": 2.3125, + "learning_rate": 4.998725708500276e-06, + "log_odds": 0.32623523473739624, + "log_odds_ratio": -0.5623162388801575, + "loss": 0.2948, + "rejected_geometric_mean": -1.4796215295791626, + "step": 92 + }, + { + "chosen_geometric_mean": -1.1848278045654297, + "epoch": 0.02, + "grad_norm": 2.46875, + "learning_rate": 4.9986944414278064e-06, + "log_odds": -0.014299850910902023, + "log_odds_ratio": -0.7056666016578674, + "loss": 0.3124, + "rejected_geometric_mean": -1.186496376991272, + "step": 93 + }, + { + "chosen_geometric_mean": -1.2588026523590088, + "epoch": 0.02, + "grad_norm": 2.515625, + "learning_rate": 4.998662795493282e-06, + "log_odds": -0.10654105246067047, + "log_odds_ratio": -0.7524768114089966, + "loss": 0.3492, + "rejected_geometric_mean": -1.1889009475708008, + "step": 94 + }, + { + "chosen_geometric_mean": -1.0080780982971191, + "epoch": 0.02, + "grad_norm": 2.546875, + "learning_rate": 4.9986307707014995e-06, + "log_odds": 0.33107927441596985, + "log_odds_ratio": -0.564409077167511, + "loss": 0.3727, + "rejected_geometric_mean": -1.25315260887146, + "step": 95 + }, + { + "chosen_geometric_mean": -1.3194961547851562, + "epoch": 0.02, + "grad_norm": 2.359375, + "learning_rate": 4.998598367057316e-06, + "log_odds": 0.5191240906715393, + "log_odds_ratio": -0.49535322189331055, + "loss": 0.3193, + "rejected_geometric_mean": -1.7625101804733276, + "step": 96 + }, + { + "chosen_geometric_mean": -0.9283365607261658, + "epoch": 0.02, + "grad_norm": 2.53125, + "learning_rate": 4.998565584565643e-06, + "log_odds": 0.12137050181627274, + "log_odds_ratio": -0.6356824636459351, + "loss": 0.3811, + "rejected_geometric_mean": -1.0081812143325806, + "step": 97 + }, + { + "chosen_geometric_mean": -1.2514843940734863, + "epoch": 0.02, + "grad_norm": 2.59375, + "learning_rate": 4.998532423231453e-06, + "log_odds": 0.2844225764274597, + "log_odds_ratio": -0.5661210417747498, + "loss": 0.3214, + "rejected_geometric_mean": -1.4633910655975342, + "step": 98 + }, + { + "chosen_geometric_mean": -1.0220712423324585, + "epoch": 0.02, + "grad_norm": 2.4375, + "learning_rate": 4.998498883059772e-06, + "log_odds": -0.032445892691612244, + "log_odds_ratio": -0.7196729183197021, + "loss": 0.3183, + "rejected_geometric_mean": -0.9974101781845093, + "step": 99 + }, + { + "chosen_geometric_mean": -1.1132649183273315, + "epoch": 0.02, + "grad_norm": 2.4375, + "learning_rate": 4.998464964055687e-06, + "log_odds": 1.8943923711776733, + "log_odds_ratio": -0.40577858686447144, + "loss": 0.3743, + "rejected_geometric_mean": -2.8362114429473877, + "step": 100 + }, + { + "chosen_geometric_mean": -1.1876647472381592, + "epoch": 0.03, + "grad_norm": 2.78125, + "learning_rate": 4.9984306662243396e-06, + "log_odds": 0.1808111071586609, + "log_odds_ratio": -0.6329507231712341, + "loss": 0.3494, + "rejected_geometric_mean": -1.307098627090454, + "step": 101 + }, + { + "chosen_geometric_mean": -1.099252700805664, + "epoch": 0.03, + "grad_norm": 2.390625, + "learning_rate": 4.9983959895709324e-06, + "log_odds": 0.41511622071266174, + "log_odds_ratio": -0.5322706699371338, + "loss": 0.2899, + "rejected_geometric_mean": -1.4025615453720093, + "step": 102 + }, + { + "chosen_geometric_mean": -1.1701596975326538, + "epoch": 0.03, + "grad_norm": 2.796875, + "learning_rate": 4.998360934100722e-06, + "log_odds": 0.09034258127212524, + "log_odds_ratio": -0.6623478531837463, + "loss": 0.4178, + "rejected_geometric_mean": -1.252082109451294, + "step": 103 + }, + { + "chosen_geometric_mean": -1.1611348390579224, + "epoch": 0.03, + "grad_norm": 2.625, + "learning_rate": 4.998325499819022e-06, + "log_odds": 0.10241558402776718, + "log_odds_ratio": -0.6775040626525879, + "loss": 0.3631, + "rejected_geometric_mean": -1.256849765777588, + "step": 104 + }, + { + "chosen_geometric_mean": -1.2878015041351318, + "epoch": 0.03, + "grad_norm": 2.671875, + "learning_rate": 4.998289686731208e-06, + "log_odds": 0.06977014988660812, + "log_odds_ratio": -0.6621940732002258, + "loss": 0.3929, + "rejected_geometric_mean": -1.337518572807312, + "step": 105 + }, + { + "chosen_geometric_mean": -1.123100757598877, + "epoch": 0.03, + "grad_norm": 2.625, + "learning_rate": 4.998253494842709e-06, + "log_odds": 0.7753735184669495, + "log_odds_ratio": -0.3925502896308899, + "loss": 0.3212, + "rejected_geometric_mean": -1.7072702646255493, + "step": 106 + }, + { + "chosen_geometric_mean": -1.0053303241729736, + "epoch": 0.03, + "grad_norm": 2.640625, + "learning_rate": 4.998216924159012e-06, + "log_odds": 0.09253115206956863, + "log_odds_ratio": -0.6509010791778564, + "loss": 0.3636, + "rejected_geometric_mean": -1.0687940120697021, + "step": 107 + }, + { + "chosen_geometric_mean": -1.2381662130355835, + "epoch": 0.03, + "grad_norm": 2.5, + "learning_rate": 4.9981799746856616e-06, + "log_odds": 0.3407735824584961, + "log_odds_ratio": -0.5542266368865967, + "loss": 0.3325, + "rejected_geometric_mean": -1.4945223331451416, + "step": 108 + }, + { + "chosen_geometric_mean": -1.0785863399505615, + "epoch": 0.03, + "grad_norm": 2.484375, + "learning_rate": 4.998142646428261e-06, + "log_odds": 0.5050921440124512, + "log_odds_ratio": -0.48343926668167114, + "loss": 0.3289, + "rejected_geometric_mean": -1.4578911066055298, + "step": 109 + }, + { + "chosen_geometric_mean": -1.2188835144042969, + "epoch": 0.03, + "grad_norm": 2.484375, + "learning_rate": 4.998104939392471e-06, + "log_odds": 0.09656386822462082, + "log_odds_ratio": -0.6659069061279297, + "loss": 0.3263, + "rejected_geometric_mean": -1.3109078407287598, + "step": 110 + }, + { + "chosen_geometric_mean": -1.1314852237701416, + "epoch": 0.03, + "grad_norm": 2.515625, + "learning_rate": 4.998066853584007e-06, + "log_odds": -0.0379234179854393, + "log_odds_ratio": -0.7154731750488281, + "loss": 0.3718, + "rejected_geometric_mean": -1.1153227090835571, + "step": 111 + }, + { + "chosen_geometric_mean": -0.952487587928772, + "epoch": 0.03, + "grad_norm": 2.34375, + "learning_rate": 4.998028389008645e-06, + "log_odds": 0.4397592842578888, + "log_odds_ratio": -0.50258469581604, + "loss": 0.3193, + "rejected_geometric_mean": -1.245116949081421, + "step": 112 + }, + { + "chosen_geometric_mean": -1.2827636003494263, + "epoch": 0.03, + "grad_norm": 2.421875, + "learning_rate": 4.997989545672216e-06, + "log_odds": 0.3939821422100067, + "log_odds_ratio": -0.5229422450065613, + "loss": 0.3405, + "rejected_geometric_mean": -1.5757942199707031, + "step": 113 + }, + { + "chosen_geometric_mean": -1.0019140243530273, + "epoch": 0.03, + "grad_norm": 2.609375, + "learning_rate": 4.997950323580611e-06, + "log_odds": 0.4332154393196106, + "log_odds_ratio": -0.5150892734527588, + "loss": 0.3369, + "rejected_geometric_mean": -1.3265539407730103, + "step": 114 + }, + { + "chosen_geometric_mean": -1.0922328233718872, + "epoch": 0.03, + "grad_norm": 2.6875, + "learning_rate": 4.997910722739776e-06, + "log_odds": 0.35882851481437683, + "log_odds_ratio": -0.5442389249801636, + "loss": 0.3362, + "rejected_geometric_mean": -1.368677020072937, + "step": 115 + }, + { + "chosen_geometric_mean": -0.984994113445282, + "epoch": 0.03, + "grad_norm": 2.65625, + "learning_rate": 4.997870743155716e-06, + "log_odds": 0.00858684629201889, + "log_odds_ratio": -0.7030340433120728, + "loss": 0.3526, + "rejected_geometric_mean": -1.0184221267700195, + "step": 116 + }, + { + "chosen_geometric_mean": -1.2175488471984863, + "epoch": 0.03, + "grad_norm": 2.4375, + "learning_rate": 4.997830384834493e-06, + "log_odds": 0.26201820373535156, + "log_odds_ratio": -0.5728236436843872, + "loss": 0.3287, + "rejected_geometric_mean": -1.4128402471542358, + "step": 117 + }, + { + "chosen_geometric_mean": -1.120079517364502, + "epoch": 0.03, + "grad_norm": 2.484375, + "learning_rate": 4.997789647782226e-06, + "log_odds": 0.3507665991783142, + "log_odds_ratio": -0.5374487638473511, + "loss": 0.3575, + "rejected_geometric_mean": -1.3683598041534424, + "step": 118 + }, + { + "chosen_geometric_mean": -1.2820974588394165, + "epoch": 0.03, + "grad_norm": 3.359375, + "learning_rate": 4.997748532005091e-06, + "log_odds": 2.5514683723449707, + "log_odds_ratio": -0.4351623058319092, + "loss": 0.3935, + "rejected_geometric_mean": -3.73297381401062, + "step": 119 + }, + { + "chosen_geometric_mean": -1.1139044761657715, + "epoch": 0.03, + "grad_norm": 3.1875, + "learning_rate": 4.997707037509323e-06, + "log_odds": 0.002960219979286194, + "log_odds_ratio": -0.7002681493759155, + "loss": 0.3431, + "rejected_geometric_mean": -1.1238152980804443, + "step": 120 + }, + { + "chosen_geometric_mean": -1.087324857711792, + "epoch": 0.03, + "grad_norm": 2.59375, + "learning_rate": 4.997665164301213e-06, + "log_odds": 0.056684426963329315, + "log_odds_ratio": -0.6672213077545166, + "loss": 0.3466, + "rejected_geometric_mean": -1.1244608163833618, + "step": 121 + }, + { + "chosen_geometric_mean": -1.2184876203536987, + "epoch": 0.03, + "grad_norm": 2.6875, + "learning_rate": 4.997622912387111e-06, + "log_odds": 0.19738423824310303, + "log_odds_ratio": -0.6100046038627625, + "loss": 0.3738, + "rejected_geometric_mean": -1.3678041696548462, + "step": 122 + }, + { + "chosen_geometric_mean": -1.0791157484054565, + "epoch": 0.03, + "grad_norm": 2.15625, + "learning_rate": 4.997580281773423e-06, + "log_odds": 0.46820518374443054, + "log_odds_ratio": -0.5255691409111023, + "loss": 0.305, + "rejected_geometric_mean": -1.465670108795166, + "step": 123 + }, + { + "chosen_geometric_mean": -1.1692612171173096, + "epoch": 0.03, + "grad_norm": 2.5625, + "learning_rate": 4.997537272466612e-06, + "log_odds": 0.38126152753829956, + "log_odds_ratio": -0.5289596319198608, + "loss": 0.4141, + "rejected_geometric_mean": -1.4480040073394775, + "step": 124 + }, + { + "chosen_geometric_mean": -1.3066282272338867, + "epoch": 0.03, + "grad_norm": 2.578125, + "learning_rate": 4.9974938844732e-06, + "log_odds": 1.215251088142395, + "log_odds_ratio": -0.47381845116615295, + "loss": 0.343, + "rejected_geometric_mean": -2.4385600090026855, + "step": 125 + }, + { + "chosen_geometric_mean": -1.1875823736190796, + "epoch": 0.03, + "grad_norm": 2.734375, + "learning_rate": 4.997450117799765e-06, + "log_odds": 0.14262953400611877, + "log_odds_ratio": -0.6271205544471741, + "loss": 0.4043, + "rejected_geometric_mean": -1.292866587638855, + "step": 126 + }, + { + "chosen_geometric_mean": -1.1813535690307617, + "epoch": 0.03, + "grad_norm": 2.578125, + "learning_rate": 4.997405972452945e-06, + "log_odds": 2.6456172466278076, + "log_odds_ratio": -0.42481106519699097, + "loss": 0.3546, + "rejected_geometric_mean": -3.693533182144165, + "step": 127 + }, + { + "chosen_geometric_mean": -1.0428944826126099, + "epoch": 0.03, + "grad_norm": 2.65625, + "learning_rate": 4.99736144843943e-06, + "log_odds": -0.08061148226261139, + "log_odds_ratio": -0.7384124994277954, + "loss": 0.3631, + "rejected_geometric_mean": -1.0099475383758545, + "step": 128 + }, + { + "chosen_geometric_mean": -0.9795304536819458, + "epoch": 0.03, + "grad_norm": 3.90625, + "learning_rate": 4.997316545765974e-06, + "log_odds": 0.2773083746433258, + "log_odds_ratio": -0.5793435573577881, + "loss": 0.3025, + "rejected_geometric_mean": -1.160588264465332, + "step": 129 + }, + { + "chosen_geometric_mean": -1.0968636274337769, + "epoch": 0.03, + "grad_norm": 2.734375, + "learning_rate": 4.997271264439385e-06, + "log_odds": 0.44012755155563354, + "log_odds_ratio": -0.5000685453414917, + "loss": 0.3794, + "rejected_geometric_mean": -1.39738929271698, + "step": 130 + }, + { + "chosen_geometric_mean": -1.0599063634872437, + "epoch": 0.03, + "grad_norm": 2.828125, + "learning_rate": 4.997225604466527e-06, + "log_odds": 2.5178141593933105, + "log_odds_ratio": -0.4689072370529175, + "loss": 0.3563, + "rejected_geometric_mean": -3.472783327102661, + "step": 131 + }, + { + "chosen_geometric_mean": -1.1772544384002686, + "epoch": 0.03, + "grad_norm": 2.578125, + "learning_rate": 4.997179565854325e-06, + "log_odds": 0.30745258927345276, + "log_odds_ratio": -0.572236955165863, + "loss": 0.379, + "rejected_geometric_mean": -1.413469672203064, + "step": 132 + }, + { + "chosen_geometric_mean": -0.9665405750274658, + "epoch": 0.03, + "grad_norm": 2.546875, + "learning_rate": 4.997133148609757e-06, + "log_odds": 0.10358354449272156, + "log_odds_ratio": -0.6516766548156738, + "loss": 0.3958, + "rejected_geometric_mean": -1.0265101194381714, + "step": 133 + }, + { + "chosen_geometric_mean": -1.062159776687622, + "epoch": 0.03, + "grad_norm": 2.75, + "learning_rate": 4.997086352739864e-06, + "log_odds": 0.2421381026506424, + "log_odds_ratio": -0.5858981013298035, + "loss": 0.4005, + "rejected_geometric_mean": -1.2153143882751465, + "step": 134 + }, + { + "chosen_geometric_mean": -1.2649998664855957, + "epoch": 0.03, + "grad_norm": 2.5625, + "learning_rate": 4.99703917825174e-06, + "log_odds": 0.10295596718788147, + "log_odds_ratio": -0.6441360712051392, + "loss": 0.3542, + "rejected_geometric_mean": -1.3403438329696655, + "step": 135 + }, + { + "chosen_geometric_mean": -1.0300090312957764, + "epoch": 0.03, + "grad_norm": 2.40625, + "learning_rate": 4.996991625152539e-06, + "log_odds": 0.18236654996871948, + "log_odds_ratio": -0.643123984336853, + "loss": 0.3283, + "rejected_geometric_mean": -1.2203242778778076, + "step": 136 + }, + { + "chosen_geometric_mean": -1.2349507808685303, + "epoch": 0.03, + "grad_norm": 2.3125, + "learning_rate": 4.996943693449468e-06, + "log_odds": 0.30191439390182495, + "log_odds_ratio": -0.5845870971679688, + "loss": 0.3597, + "rejected_geometric_mean": -1.4879150390625, + "step": 137 + }, + { + "chosen_geometric_mean": -1.335270881652832, + "epoch": 0.03, + "grad_norm": 2.78125, + "learning_rate": 4.996895383149797e-06, + "log_odds": 0.04280693829059601, + "log_odds_ratio": -0.6741738319396973, + "loss": 0.427, + "rejected_geometric_mean": -1.3651858568191528, + "step": 138 + }, + { + "chosen_geometric_mean": -1.2816179990768433, + "epoch": 0.03, + "grad_norm": 2.265625, + "learning_rate": 4.996846694260852e-06, + "log_odds": 0.9478357434272766, + "log_odds_ratio": -0.39783069491386414, + "loss": 0.3231, + "rejected_geometric_mean": -2.108445644378662, + "step": 139 + }, + { + "chosen_geometric_mean": -1.0484120845794678, + "epoch": 0.03, + "grad_norm": 2.359375, + "learning_rate": 4.996797626790013e-06, + "log_odds": 0.2499707192182541, + "log_odds_ratio": -0.59568852186203, + "loss": 0.2985, + "rejected_geometric_mean": -1.243373155593872, + "step": 140 + }, + { + "chosen_geometric_mean": -0.9313812851905823, + "epoch": 0.03, + "grad_norm": 2.5, + "learning_rate": 4.9967481807447205e-06, + "log_odds": 0.09856288135051727, + "log_odds_ratio": -0.6488194465637207, + "loss": 0.3233, + "rejected_geometric_mean": -1.0070717334747314, + "step": 141 + }, + { + "chosen_geometric_mean": -1.105117917060852, + "epoch": 0.04, + "grad_norm": 2.859375, + "learning_rate": 4.996698356132474e-06, + "log_odds": 0.3097195327281952, + "log_odds_ratio": -0.5542622804641724, + "loss": 0.3793, + "rejected_geometric_mean": -1.3064972162246704, + "step": 142 + }, + { + "chosen_geometric_mean": -1.1447381973266602, + "epoch": 0.04, + "grad_norm": 2.34375, + "learning_rate": 4.996648152960823e-06, + "log_odds": 0.1526922583580017, + "log_odds_ratio": -0.6264346241950989, + "loss": 0.3462, + "rejected_geometric_mean": -1.265076994895935, + "step": 143 + }, + { + "chosen_geometric_mean": -1.2861472368240356, + "epoch": 0.04, + "grad_norm": 2.328125, + "learning_rate": 4.996597571237385e-06, + "log_odds": 0.3220303952693939, + "log_odds_ratio": -0.54880291223526, + "loss": 0.3401, + "rejected_geometric_mean": -1.5264127254486084, + "step": 144 + }, + { + "chosen_geometric_mean": -1.600177526473999, + "epoch": 0.04, + "grad_norm": 2.671875, + "learning_rate": 4.996546610969827e-06, + "log_odds": 0.2848108112812042, + "log_odds_ratio": -0.6511991024017334, + "loss": 0.395, + "rejected_geometric_mean": -1.8199361562728882, + "step": 145 + }, + { + "chosen_geometric_mean": -1.4912594556808472, + "epoch": 0.04, + "grad_norm": 2.140625, + "learning_rate": 4.996495272165875e-06, + "log_odds": 2.6351170539855957, + "log_odds_ratio": -0.17766232788562775, + "loss": 0.2695, + "rejected_geometric_mean": -3.9436936378479004, + "step": 146 + }, + { + "chosen_geometric_mean": -1.0321037769317627, + "epoch": 0.04, + "grad_norm": 2.84375, + "learning_rate": 4.996443554833315e-06, + "log_odds": 3.5920281410217285, + "log_odds_ratio": -0.3628746271133423, + "loss": 0.3536, + "rejected_geometric_mean": -4.371378421783447, + "step": 147 + }, + { + "chosen_geometric_mean": -0.9973492622375488, + "epoch": 0.04, + "grad_norm": 2.34375, + "learning_rate": 4.996391458979987e-06, + "log_odds": 2.5703744888305664, + "log_odds_ratio": -0.4937501549720764, + "loss": 0.3371, + "rejected_geometric_mean": -3.4351999759674072, + "step": 148 + }, + { + "chosen_geometric_mean": -1.2406787872314453, + "epoch": 0.04, + "grad_norm": 2.875, + "learning_rate": 4.99633898461379e-06, + "log_odds": 0.9291876554489136, + "log_odds_ratio": -0.49488022923469543, + "loss": 0.354, + "rejected_geometric_mean": -2.1147007942199707, + "step": 149 + }, + { + "chosen_geometric_mean": -1.1664021015167236, + "epoch": 0.04, + "grad_norm": 2.390625, + "learning_rate": 4.996286131742682e-06, + "log_odds": 0.04588336497545242, + "log_odds_ratio": -0.6761234998703003, + "loss": 0.2876, + "rejected_geometric_mean": -1.2075483798980713, + "step": 150 + }, + { + "chosen_geometric_mean": -0.8715320825576782, + "epoch": 0.04, + "grad_norm": 2.28125, + "learning_rate": 4.996232900374676e-06, + "log_odds": 0.14858676493167877, + "log_odds_ratio": -0.6309493780136108, + "loss": 0.2917, + "rejected_geometric_mean": -0.9705654978752136, + "step": 151 + }, + { + "chosen_geometric_mean": -0.9607850909233093, + "epoch": 0.04, + "grad_norm": 2.53125, + "learning_rate": 4.996179290517842e-06, + "log_odds": 0.5234165191650391, + "log_odds_ratio": -0.4968299865722656, + "loss": 0.361, + "rejected_geometric_mean": -1.342861533164978, + "step": 152 + }, + { + "chosen_geometric_mean": -1.1373703479766846, + "epoch": 0.04, + "grad_norm": 2.609375, + "learning_rate": 4.9961253021803104e-06, + "log_odds": 0.4886890649795532, + "log_odds_ratio": -0.4867987036705017, + "loss": 0.3473, + "rejected_geometric_mean": -1.4687169790267944, + "step": 153 + }, + { + "chosen_geometric_mean": -1.098047137260437, + "epoch": 0.04, + "grad_norm": 2.625, + "learning_rate": 4.996070935370265e-06, + "log_odds": 0.2889940142631531, + "log_odds_ratio": -0.5716444253921509, + "loss": 0.3943, + "rejected_geometric_mean": -1.2872709035873413, + "step": 154 + }, + { + "chosen_geometric_mean": -1.4126012325286865, + "epoch": 0.04, + "grad_norm": 2.484375, + "learning_rate": 4.996016190095952e-06, + "log_odds": 0.939311683177948, + "log_odds_ratio": -0.4436955749988556, + "loss": 0.3764, + "rejected_geometric_mean": -2.2350378036499023, + "step": 155 + }, + { + "chosen_geometric_mean": -1.0543169975280762, + "epoch": 0.04, + "grad_norm": 2.59375, + "learning_rate": 4.9959610663656685e-06, + "log_odds": 2.564847946166992, + "log_odds_ratio": -0.4661436975002289, + "loss": 0.3305, + "rejected_geometric_mean": -3.516183376312256, + "step": 156 + }, + { + "chosen_geometric_mean": -1.1547141075134277, + "epoch": 0.04, + "grad_norm": 2.765625, + "learning_rate": 4.995905564187776e-06, + "log_odds": 0.3538270592689514, + "log_odds_ratio": -0.5442517399787903, + "loss": 0.386, + "rejected_geometric_mean": -1.4184889793395996, + "step": 157 + }, + { + "chosen_geometric_mean": -0.9918975234031677, + "epoch": 0.04, + "grad_norm": 5.5, + "learning_rate": 4.995849683570688e-06, + "log_odds": 0.053335100412368774, + "log_odds_ratio": -0.6718221306800842, + "loss": 0.356, + "rejected_geometric_mean": -1.0391762256622314, + "step": 158 + }, + { + "chosen_geometric_mean": -1.1547085046768188, + "epoch": 0.04, + "grad_norm": 3.53125, + "learning_rate": 4.995793424522878e-06, + "log_odds": 0.6489954590797424, + "log_odds_ratio": -0.45570993423461914, + "loss": 0.3722, + "rejected_geometric_mean": -1.6564676761627197, + "step": 159 + }, + { + "chosen_geometric_mean": -1.2402082681655884, + "epoch": 0.04, + "grad_norm": 2.4375, + "learning_rate": 4.995736787052875e-06, + "log_odds": 0.12062809616327286, + "log_odds_ratio": -0.6379752159118652, + "loss": 0.3514, + "rejected_geometric_mean": -1.3286927938461304, + "step": 160 + }, + { + "chosen_geometric_mean": -1.1870145797729492, + "epoch": 0.04, + "grad_norm": 2.875, + "learning_rate": 4.995679771169269e-06, + "log_odds": 0.28006619215011597, + "log_odds_ratio": -0.5871683955192566, + "loss": 0.3523, + "rejected_geometric_mean": -1.3789925575256348, + "step": 161 + }, + { + "chosen_geometric_mean": -1.2330716848373413, + "epoch": 0.04, + "grad_norm": 2.8125, + "learning_rate": 4.995622376880703e-06, + "log_odds": 0.7883145809173584, + "log_odds_ratio": -0.4210037291049957, + "loss": 0.3121, + "rejected_geometric_mean": -1.8960603475570679, + "step": 162 + }, + { + "chosen_geometric_mean": -1.1193456649780273, + "epoch": 0.04, + "grad_norm": 3.875, + "learning_rate": 4.99556460419588e-06, + "log_odds": -0.04870523512363434, + "log_odds_ratio": -0.7205252647399902, + "loss": 0.3189, + "rejected_geometric_mean": -1.0901964902877808, + "step": 163 + }, + { + "chosen_geometric_mean": -1.2377948760986328, + "epoch": 0.04, + "grad_norm": 2.5625, + "learning_rate": 4.9955064531235595e-06, + "log_odds": 1.543456792831421, + "log_odds_ratio": -0.4395768642425537, + "loss": 0.3286, + "rejected_geometric_mean": -2.672427177429199, + "step": 164 + }, + { + "chosen_geometric_mean": -1.0758707523345947, + "epoch": 0.04, + "grad_norm": 2.609375, + "learning_rate": 4.995447923672559e-06, + "log_odds": 0.29609009623527527, + "log_odds_ratio": -0.5675739645957947, + "loss": 0.3421, + "rejected_geometric_mean": -1.2702500820159912, + "step": 165 + }, + { + "chosen_geometric_mean": -1.5121196508407593, + "epoch": 0.04, + "grad_norm": 14.125, + "learning_rate": 4.995389015851753e-06, + "log_odds": 2.020040988922119, + "log_odds_ratio": -0.2583630084991455, + "loss": 0.4207, + "rejected_geometric_mean": -3.38100528717041, + "step": 166 + }, + { + "chosen_geometric_mean": -1.4801299571990967, + "epoch": 0.04, + "grad_norm": 25.75, + "learning_rate": 4.9953297296700725e-06, + "log_odds": 0.608532190322876, + "log_odds_ratio": -0.48226994276046753, + "loss": 0.3586, + "rejected_geometric_mean": -2.015515089035034, + "step": 167 + }, + { + "chosen_geometric_mean": -0.9465616941452026, + "epoch": 0.04, + "grad_norm": 7.21875, + "learning_rate": 4.995270065136507e-06, + "log_odds": 0.15919971466064453, + "log_odds_ratio": -0.6260923147201538, + "loss": 0.3239, + "rejected_geometric_mean": -1.0729788541793823, + "step": 168 + }, + { + "chosen_geometric_mean": -1.1337922811508179, + "epoch": 0.04, + "grad_norm": 2.546875, + "learning_rate": 4.995210022260104e-06, + "log_odds": 0.35227739810943604, + "log_odds_ratio": -0.5357905626296997, + "loss": 0.3672, + "rejected_geometric_mean": -1.3811085224151611, + "step": 169 + }, + { + "chosen_geometric_mean": -1.2445317506790161, + "epoch": 0.04, + "grad_norm": 2.265625, + "learning_rate": 4.995149601049967e-06, + "log_odds": 0.7448248267173767, + "log_odds_ratio": -0.48832881450653076, + "loss": 0.322, + "rejected_geometric_mean": -1.8950934410095215, + "step": 170 + }, + { + "chosen_geometric_mean": -1.24470853805542, + "epoch": 0.04, + "grad_norm": 3.765625, + "learning_rate": 4.995088801515256e-06, + "log_odds": 1.151114821434021, + "log_odds_ratio": -0.40622836351394653, + "loss": 0.3111, + "rejected_geometric_mean": -2.260953426361084, + "step": 171 + }, + { + "chosen_geometric_mean": -1.068113923072815, + "epoch": 0.04, + "grad_norm": 5.0625, + "learning_rate": 4.995027623665193e-06, + "log_odds": 0.07278943061828613, + "log_odds_ratio": -0.6642503142356873, + "loss": 0.3464, + "rejected_geometric_mean": -1.1230744123458862, + "step": 172 + }, + { + "chosen_geometric_mean": -1.1847872734069824, + "epoch": 0.04, + "grad_norm": 19.125, + "learning_rate": 4.99496606750905e-06, + "log_odds": 0.10911382734775543, + "log_odds_ratio": -0.6470275521278381, + "loss": 0.374, + "rejected_geometric_mean": -1.2718958854675293, + "step": 173 + }, + { + "chosen_geometric_mean": -1.251149296760559, + "epoch": 0.04, + "grad_norm": 2.859375, + "learning_rate": 4.994904133056162e-06, + "log_odds": 0.3383331000804901, + "log_odds_ratio": -0.5415170192718506, + "loss": 0.3564, + "rejected_geometric_mean": -1.5105925798416138, + "step": 174 + }, + { + "chosen_geometric_mean": -1.2820147275924683, + "epoch": 0.04, + "grad_norm": 2.328125, + "learning_rate": 4.994841820315921e-06, + "log_odds": 1.8761407136917114, + "log_odds_ratio": -0.33590734004974365, + "loss": 0.3155, + "rejected_geometric_mean": -2.9879398345947266, + "step": 175 + }, + { + "chosen_geometric_mean": -1.0843278169631958, + "epoch": 0.04, + "grad_norm": 3.0, + "learning_rate": 4.994779129297774e-06, + "log_odds": 0.17733852565288544, + "log_odds_ratio": -0.6132782697677612, + "loss": 0.2826, + "rejected_geometric_mean": -1.2013810873031616, + "step": 176 + }, + { + "chosen_geometric_mean": -1.1368019580841064, + "epoch": 0.04, + "grad_norm": 2.21875, + "learning_rate": 4.994716060011226e-06, + "log_odds": 1.1700387001037598, + "log_odds_ratio": -0.5443904399871826, + "loss": 0.3195, + "rejected_geometric_mean": -2.260974407196045, + "step": 177 + }, + { + "chosen_geometric_mean": -1.3125300407409668, + "epoch": 0.04, + "grad_norm": 7.21875, + "learning_rate": 4.9946526124658404e-06, + "log_odds": 0.09583202004432678, + "log_odds_ratio": -0.7165672779083252, + "loss": 0.3723, + "rejected_geometric_mean": -1.3845932483673096, + "step": 178 + }, + { + "chosen_geometric_mean": -1.2787712812423706, + "epoch": 0.04, + "grad_norm": 2.515625, + "learning_rate": 4.994588786671237e-06, + "log_odds": 2.5522496700286865, + "log_odds_ratio": -0.3735196590423584, + "loss": 0.2957, + "rejected_geometric_mean": -3.709646224975586, + "step": 179 + }, + { + "chosen_geometric_mean": -1.1803220510482788, + "epoch": 0.04, + "grad_norm": 2.8125, + "learning_rate": 4.994524582637095e-06, + "log_odds": 0.1404455304145813, + "log_odds_ratio": -0.6356272101402283, + "loss": 0.2956, + "rejected_geometric_mean": -1.2947113513946533, + "step": 180 + }, + { + "chosen_geometric_mean": -1.0462549924850464, + "epoch": 0.04, + "grad_norm": 2.78125, + "learning_rate": 4.994460000373147e-06, + "log_odds": 0.22150357067584991, + "log_odds_ratio": -0.6008758544921875, + "loss": 0.2903, + "rejected_geometric_mean": -1.2116795778274536, + "step": 181 + }, + { + "chosen_geometric_mean": -0.949434757232666, + "epoch": 0.05, + "grad_norm": 2.453125, + "learning_rate": 4.994395039889186e-06, + "log_odds": 0.26562684774398804, + "log_odds_ratio": -0.5738218426704407, + "loss": 0.3221, + "rejected_geometric_mean": -1.1093271970748901, + "step": 182 + }, + { + "chosen_geometric_mean": -1.1670013666152954, + "epoch": 0.05, + "grad_norm": 3.140625, + "learning_rate": 4.994329701195061e-06, + "log_odds": 0.17098590731620789, + "log_odds_ratio": -0.6228401064872742, + "loss": 0.3215, + "rejected_geometric_mean": -1.3021498918533325, + "step": 183 + }, + { + "chosen_geometric_mean": -1.30832839012146, + "epoch": 0.05, + "grad_norm": 2.828125, + "learning_rate": 4.994263984300681e-06, + "log_odds": 0.3533199429512024, + "log_odds_ratio": -0.541040301322937, + "loss": 0.3329, + "rejected_geometric_mean": -1.5871491432189941, + "step": 184 + }, + { + "chosen_geometric_mean": -1.420723795890808, + "epoch": 0.05, + "grad_norm": 12.75, + "learning_rate": 4.9941978892160085e-06, + "log_odds": 0.7359671592712402, + "log_odds_ratio": -0.43375951051712036, + "loss": 0.3894, + "rejected_geometric_mean": -2.056565284729004, + "step": 185 + }, + { + "chosen_geometric_mean": -1.0405681133270264, + "epoch": 0.05, + "grad_norm": 2.4375, + "learning_rate": 4.994131415951064e-06, + "log_odds": 0.4246598780155182, + "log_odds_ratio": -0.5151104927062988, + "loss": 0.2887, + "rejected_geometric_mean": -1.34456467628479, + "step": 186 + }, + { + "chosen_geometric_mean": -0.9440240859985352, + "epoch": 0.05, + "grad_norm": 16.5, + "learning_rate": 4.99406456451593e-06, + "log_odds": 0.25249171257019043, + "log_odds_ratio": -0.5787007808685303, + "loss": 0.3662, + "rejected_geometric_mean": -1.1131114959716797, + "step": 187 + }, + { + "chosen_geometric_mean": -1.3440897464752197, + "epoch": 0.05, + "grad_norm": 2.90625, + "learning_rate": 4.99399733492074e-06, + "log_odds": 0.22392886877059937, + "log_odds_ratio": -0.5888032913208008, + "loss": 0.3781, + "rejected_geometric_mean": -1.5112444162368774, + "step": 188 + }, + { + "chosen_geometric_mean": -1.1114418506622314, + "epoch": 0.05, + "grad_norm": 2.34375, + "learning_rate": 4.993929727175688e-06, + "log_odds": 0.19131547212600708, + "log_odds_ratio": -0.6099583506584167, + "loss": 0.2957, + "rejected_geometric_mean": -1.2450218200683594, + "step": 189 + }, + { + "chosen_geometric_mean": -1.2516824007034302, + "epoch": 0.05, + "grad_norm": 2.421875, + "learning_rate": 4.993861741291026e-06, + "log_odds": 2.5662503242492676, + "log_odds_ratio": -0.4507599174976349, + "loss": 0.314, + "rejected_geometric_mean": -3.7108476161956787, + "step": 190 + }, + { + "chosen_geometric_mean": -1.243382215499878, + "epoch": 0.05, + "grad_norm": 2.828125, + "learning_rate": 4.993793377277061e-06, + "log_odds": 0.42425036430358887, + "log_odds_ratio": -0.5669263601303101, + "loss": 0.3682, + "rejected_geometric_mean": -1.6403274536132812, + "step": 191 + }, + { + "chosen_geometric_mean": -1.1061291694641113, + "epoch": 0.05, + "grad_norm": 2.765625, + "learning_rate": 4.993724635144159e-06, + "log_odds": 0.7827656865119934, + "log_odds_ratio": -0.41841208934783936, + "loss": 0.3478, + "rejected_geometric_mean": -1.7164642810821533, + "step": 192 + }, + { + "chosen_geometric_mean": -1.2342718839645386, + "epoch": 0.05, + "grad_norm": 6.125, + "learning_rate": 4.9936555149027435e-06, + "log_odds": 0.6677346229553223, + "log_odds_ratio": -0.45125773549079895, + "loss": 0.3303, + "rejected_geometric_mean": -1.7709944248199463, + "step": 193 + }, + { + "chosen_geometric_mean": -1.4072070121765137, + "epoch": 0.05, + "grad_norm": 2.421875, + "learning_rate": 4.993586016563295e-06, + "log_odds": 0.3636115491390228, + "log_odds_ratio": -0.6098060011863708, + "loss": 0.3384, + "rejected_geometric_mean": -1.7190804481506348, + "step": 194 + }, + { + "chosen_geometric_mean": -1.2069000005722046, + "epoch": 0.05, + "grad_norm": 2.5, + "learning_rate": 4.993516140136349e-06, + "log_odds": 0.3373100161552429, + "log_odds_ratio": -0.5418646335601807, + "loss": 0.3057, + "rejected_geometric_mean": -1.4613299369812012, + "step": 195 + }, + { + "chosen_geometric_mean": -1.2533704042434692, + "epoch": 0.05, + "grad_norm": 3.203125, + "learning_rate": 4.993445885632503e-06, + "log_odds": 0.05886344984173775, + "log_odds_ratio": -0.6680792570114136, + "loss": 0.3282, + "rejected_geometric_mean": -1.2942458391189575, + "step": 196 + }, + { + "chosen_geometric_mean": -1.0917757749557495, + "epoch": 0.05, + "grad_norm": 2.765625, + "learning_rate": 4.993375253062408e-06, + "log_odds": 0.39789098501205444, + "log_odds_ratio": -0.518082857131958, + "loss": 0.297, + "rejected_geometric_mean": -1.3790712356567383, + "step": 197 + }, + { + "chosen_geometric_mean": -1.1393541097640991, + "epoch": 0.05, + "grad_norm": 2.4375, + "learning_rate": 4.993304242436774e-06, + "log_odds": 0.46808651089668274, + "log_odds_ratio": -0.4923524260520935, + "loss": 0.3618, + "rejected_geometric_mean": -1.4861805438995361, + "step": 198 + }, + { + "chosen_geometric_mean": -1.0176441669464111, + "epoch": 0.05, + "grad_norm": 2.625, + "learning_rate": 4.993232853766368e-06, + "log_odds": 0.1857811063528061, + "log_odds_ratio": -0.6404739022254944, + "loss": 0.3159, + "rejected_geometric_mean": -1.1719011068344116, + "step": 199 + }, + { + "chosen_geometric_mean": -1.179661512374878, + "epoch": 0.05, + "grad_norm": 2.4375, + "learning_rate": 4.993161087062014e-06, + "log_odds": 0.4763856828212738, + "log_odds_ratio": -0.48501989245414734, + "loss": 0.2829, + "rejected_geometric_mean": -1.5287586450576782, + "step": 200 + }, + { + "chosen_geometric_mean": -1.1630135774612427, + "epoch": 0.05, + "grad_norm": 7.0, + "learning_rate": 4.993088942334593e-06, + "log_odds": 0.4679403007030487, + "log_odds_ratio": -0.5252052545547485, + "loss": 0.3312, + "rejected_geometric_mean": -1.4982750415802002, + "step": 201 + }, + { + "chosen_geometric_mean": -1.1338615417480469, + "epoch": 0.05, + "grad_norm": 3.015625, + "learning_rate": 4.993016419595045e-06, + "log_odds": 0.5754556655883789, + "log_odds_ratio": -0.47525453567504883, + "loss": 0.3766, + "rejected_geometric_mean": -1.5156314373016357, + "step": 202 + }, + { + "chosen_geometric_mean": -1.1084113121032715, + "epoch": 0.05, + "grad_norm": 2.71875, + "learning_rate": 4.9929435188543655e-06, + "log_odds": 1.8422839641571045, + "log_odds_ratio": -0.41076982021331787, + "loss": 0.3417, + "rejected_geometric_mean": -2.806966543197632, + "step": 203 + }, + { + "chosen_geometric_mean": -1.2415083646774292, + "epoch": 0.05, + "grad_norm": 2.984375, + "learning_rate": 4.992870240123608e-06, + "log_odds": 0.807998776435852, + "log_odds_ratio": -0.38356366753578186, + "loss": 0.3465, + "rejected_geometric_mean": -1.890868902206421, + "step": 204 + }, + { + "chosen_geometric_mean": -1.2308968305587769, + "epoch": 0.05, + "grad_norm": 2.515625, + "learning_rate": 4.992796583413884e-06, + "log_odds": 0.6722248196601868, + "log_odds_ratio": -0.4552055299282074, + "loss": 0.3256, + "rejected_geometric_mean": -1.7895218133926392, + "step": 205 + }, + { + "chosen_geometric_mean": -1.0976344347000122, + "epoch": 0.05, + "grad_norm": 4.9375, + "learning_rate": 4.99272254873636e-06, + "log_odds": 0.15903320908546448, + "log_odds_ratio": -0.6321718096733093, + "loss": 0.3272, + "rejected_geometric_mean": -1.2077491283416748, + "step": 206 + }, + { + "chosen_geometric_mean": -1.1455225944519043, + "epoch": 0.05, + "grad_norm": 7.59375, + "learning_rate": 4.9926481361022635e-06, + "log_odds": 0.05639234930276871, + "log_odds_ratio": -0.6686859130859375, + "loss": 0.3857, + "rejected_geometric_mean": -1.1862040758132935, + "step": 207 + }, + { + "chosen_geometric_mean": -0.9895858764648438, + "epoch": 0.05, + "grad_norm": 3.390625, + "learning_rate": 4.992573345522876e-06, + "log_odds": 0.07619926333427429, + "log_odds_ratio": -0.6856175065040588, + "loss": 0.3615, + "rejected_geometric_mean": -1.0302120447158813, + "step": 208 + }, + { + "chosen_geometric_mean": -1.0609006881713867, + "epoch": 0.05, + "grad_norm": 3.109375, + "learning_rate": 4.992498177009537e-06, + "log_odds": 0.26885876059532166, + "log_odds_ratio": -0.5752758383750916, + "loss": 0.3458, + "rejected_geometric_mean": -1.2241950035095215, + "step": 209 + }, + { + "chosen_geometric_mean": -1.0708904266357422, + "epoch": 0.05, + "grad_norm": 2.671875, + "learning_rate": 4.992422630573645e-06, + "log_odds": 0.6266077160835266, + "log_odds_ratio": -0.45527225732803345, + "loss": 0.2905, + "rejected_geometric_mean": -1.5458706617355347, + "step": 210 + }, + { + "chosen_geometric_mean": -1.2587456703186035, + "epoch": 0.05, + "grad_norm": 2.609375, + "learning_rate": 4.992346706226655e-06, + "log_odds": 0.24806839227676392, + "log_odds_ratio": -0.5835447311401367, + "loss": 0.3446, + "rejected_geometric_mean": -1.4477550983428955, + "step": 211 + }, + { + "chosen_geometric_mean": -0.8619174957275391, + "epoch": 0.05, + "grad_norm": 2.421875, + "learning_rate": 4.992270403980078e-06, + "log_odds": 0.36135661602020264, + "log_odds_ratio": -0.5476996302604675, + "loss": 0.3527, + "rejected_geometric_mean": -1.0905230045318604, + "step": 212 + }, + { + "chosen_geometric_mean": -1.047891616821289, + "epoch": 0.05, + "grad_norm": 2.84375, + "learning_rate": 4.992193723845483e-06, + "log_odds": -0.09682638198137283, + "log_odds_ratio": -0.7466752529144287, + "loss": 0.3404, + "rejected_geometric_mean": -0.9899511337280273, + "step": 213 + }, + { + "chosen_geometric_mean": -1.0916969776153564, + "epoch": 0.05, + "grad_norm": 5.875, + "learning_rate": 4.9921166658344975e-06, + "log_odds": 1.5378636121749878, + "log_odds_ratio": -0.2781875729560852, + "loss": 0.3076, + "rejected_geometric_mean": -2.3474907875061035, + "step": 214 + }, + { + "chosen_geometric_mean": -1.0893874168395996, + "epoch": 0.05, + "grad_norm": 34.0, + "learning_rate": 4.992039229958805e-06, + "log_odds": 0.3418283462524414, + "log_odds_ratio": -0.5437930822372437, + "loss": 0.3633, + "rejected_geometric_mean": -1.3313102722167969, + "step": 215 + }, + { + "chosen_geometric_mean": -1.3395975828170776, + "epoch": 0.05, + "grad_norm": 16.375, + "learning_rate": 4.991961416230146e-06, + "log_odds": 0.30887895822525024, + "log_odds_ratio": -0.5788172483444214, + "loss": 0.3636, + "rejected_geometric_mean": -1.5436984300613403, + "step": 216 + }, + { + "chosen_geometric_mean": -0.8353362083435059, + "epoch": 0.05, + "grad_norm": 15.3125, + "learning_rate": 4.99188322466032e-06, + "log_odds": 0.4805962145328522, + "log_odds_ratio": -0.4870532751083374, + "loss": 0.35, + "rejected_geometric_mean": -1.1313786506652832, + "step": 217 + }, + { + "chosen_geometric_mean": -1.5506023168563843, + "epoch": 0.05, + "grad_norm": 3.03125, + "learning_rate": 4.9918046552611825e-06, + "log_odds": 0.04899004474282265, + "log_odds_ratio": -0.6882613301277161, + "loss": 0.3796, + "rejected_geometric_mean": -1.5642707347869873, + "step": 218 + }, + { + "chosen_geometric_mean": -1.2351083755493164, + "epoch": 0.05, + "grad_norm": 2.28125, + "learning_rate": 4.991725708044645e-06, + "log_odds": 0.32596084475517273, + "log_odds_ratio": -0.5580781102180481, + "loss": 0.3511, + "rejected_geometric_mean": -1.4508053064346313, + "step": 219 + }, + { + "chosen_geometric_mean": -1.120776653289795, + "epoch": 0.05, + "grad_norm": 2.84375, + "learning_rate": 4.991646383022679e-06, + "log_odds": 0.4028118848800659, + "log_odds_ratio": -0.5207876563072205, + "loss": 0.4066, + "rejected_geometric_mean": -1.415776014328003, + "step": 220 + }, + { + "chosen_geometric_mean": -0.8958439230918884, + "epoch": 0.05, + "grad_norm": 2.53125, + "learning_rate": 4.991566680207314e-06, + "log_odds": 0.221856027841568, + "log_odds_ratio": -0.5965083837509155, + "loss": 0.3928, + "rejected_geometric_mean": -1.0282719135284424, + "step": 221 + }, + { + "chosen_geometric_mean": -1.2078721523284912, + "epoch": 0.05, + "grad_norm": 2.328125, + "learning_rate": 4.991486599610631e-06, + "log_odds": 0.3996574878692627, + "log_odds_ratio": -0.5195246934890747, + "loss": 0.3212, + "rejected_geometric_mean": -1.4984135627746582, + "step": 222 + }, + { + "chosen_geometric_mean": -1.3368995189666748, + "epoch": 0.06, + "grad_norm": 2.375, + "learning_rate": 4.991406141244774e-06, + "log_odds": 0.6114670634269714, + "log_odds_ratio": -0.4727194905281067, + "loss": 0.3425, + "rejected_geometric_mean": -1.847123146057129, + "step": 223 + }, + { + "chosen_geometric_mean": -1.1383509635925293, + "epoch": 0.06, + "grad_norm": 2.328125, + "learning_rate": 4.991325305121944e-06, + "log_odds": -0.000443875789642334, + "log_odds_ratio": -0.7045924663543701, + "loss": 0.3592, + "rejected_geometric_mean": -1.1396121978759766, + "step": 224 + }, + { + "chosen_geometric_mean": -1.0256948471069336, + "epoch": 0.06, + "grad_norm": 2.5, + "learning_rate": 4.991244091254395e-06, + "log_odds": 0.1194186806678772, + "log_odds_ratio": -0.6425703763961792, + "loss": 0.384, + "rejected_geometric_mean": -1.0976440906524658, + "step": 225 + }, + { + "chosen_geometric_mean": -1.0816514492034912, + "epoch": 0.06, + "grad_norm": 2.265625, + "learning_rate": 4.991162499654442e-06, + "log_odds": 0.17466774582862854, + "log_odds_ratio": -0.6123636960983276, + "loss": 0.3319, + "rejected_geometric_mean": -1.2032815217971802, + "step": 226 + }, + { + "chosen_geometric_mean": -1.119187355041504, + "epoch": 0.06, + "grad_norm": 2.1875, + "learning_rate": 4.9910805303344576e-06, + "log_odds": 0.27952197194099426, + "log_odds_ratio": -0.5659215450286865, + "loss": 0.2966, + "rejected_geometric_mean": -1.315107822418213, + "step": 227 + }, + { + "chosen_geometric_mean": -1.2122162580490112, + "epoch": 0.06, + "grad_norm": 2.421875, + "learning_rate": 4.990998183306868e-06, + "log_odds": 0.018853433430194855, + "log_odds_ratio": -0.687608003616333, + "loss": 0.3578, + "rejected_geometric_mean": -1.2254310846328735, + "step": 228 + }, + { + "chosen_geometric_mean": -0.9934058785438538, + "epoch": 0.06, + "grad_norm": 2.078125, + "learning_rate": 4.99091545858416e-06, + "log_odds": 0.23367857933044434, + "log_odds_ratio": -0.5955595374107361, + "loss": 0.293, + "rejected_geometric_mean": -1.1551923751831055, + "step": 229 + }, + { + "chosen_geometric_mean": -1.428916335105896, + "epoch": 0.06, + "grad_norm": 2.296875, + "learning_rate": 4.990832356178878e-06, + "log_odds": -0.179939404129982, + "log_odds_ratio": -0.7977494597434998, + "loss": 0.3529, + "rejected_geometric_mean": -1.3054091930389404, + "step": 230 + }, + { + "chosen_geometric_mean": -1.1522036790847778, + "epoch": 0.06, + "grad_norm": 2.484375, + "learning_rate": 4.99074887610362e-06, + "log_odds": 0.12334942817687988, + "log_odds_ratio": -0.648589015007019, + "loss": 0.391, + "rejected_geometric_mean": -1.2695698738098145, + "step": 231 + }, + { + "chosen_geometric_mean": -1.0325461626052856, + "epoch": 0.06, + "grad_norm": 2.390625, + "learning_rate": 4.990665018371045e-06, + "log_odds": 0.2463511973619461, + "log_odds_ratio": -0.5809915065765381, + "loss": 0.3041, + "rejected_geometric_mean": -1.1888041496276855, + "step": 232 + }, + { + "chosen_geometric_mean": -1.2802958488464355, + "epoch": 0.06, + "grad_norm": 2.46875, + "learning_rate": 4.990580782993868e-06, + "log_odds": 0.7231981754302979, + "log_odds_ratio": -0.42892441153526306, + "loss": 0.2976, + "rejected_geometric_mean": -1.8570938110351562, + "step": 233 + }, + { + "chosen_geometric_mean": -1.2884843349456787, + "epoch": 0.06, + "grad_norm": 2.421875, + "learning_rate": 4.9904961699848595e-06, + "log_odds": 0.3637769818305969, + "log_odds_ratio": -0.5293234586715698, + "loss": 0.3357, + "rejected_geometric_mean": -1.5546331405639648, + "step": 234 + }, + { + "chosen_geometric_mean": -1.3570666313171387, + "epoch": 0.06, + "grad_norm": 2.546875, + "learning_rate": 4.990411179356851e-06, + "log_odds": 1.8566433191299438, + "log_odds_ratio": -0.4151083528995514, + "loss": 0.3743, + "rejected_geometric_mean": -3.1014206409454346, + "step": 235 + }, + { + "chosen_geometric_mean": -1.2977032661437988, + "epoch": 0.06, + "grad_norm": 2.46875, + "learning_rate": 4.9903258111227275e-06, + "log_odds": 0.39650753140449524, + "log_odds_ratio": -0.5165966749191284, + "loss": 0.3184, + "rejected_geometric_mean": -1.5966401100158691, + "step": 236 + }, + { + "chosen_geometric_mean": -1.216667652130127, + "epoch": 0.06, + "grad_norm": 2.46875, + "learning_rate": 4.990240065295433e-06, + "log_odds": 0.5143022537231445, + "log_odds_ratio": -0.48017677664756775, + "loss": 0.3546, + "rejected_geometric_mean": -1.6126060485839844, + "step": 237 + }, + { + "chosen_geometric_mean": -1.1796993017196655, + "epoch": 0.06, + "grad_norm": 3.03125, + "learning_rate": 4.99015394188797e-06, + "log_odds": 0.46317702531814575, + "log_odds_ratio": -0.5534149408340454, + "loss": 0.3599, + "rejected_geometric_mean": -1.48850679397583, + "step": 238 + }, + { + "chosen_geometric_mean": -1.1987262964248657, + "epoch": 0.06, + "grad_norm": 2.34375, + "learning_rate": 4.990067440913395e-06, + "log_odds": 0.021765410900115967, + "log_odds_ratio": -0.6926246285438538, + "loss": 0.2915, + "rejected_geometric_mean": -1.2329421043395996, + "step": 239 + }, + { + "chosen_geometric_mean": -1.4610286951065063, + "epoch": 0.06, + "grad_norm": 3.453125, + "learning_rate": 4.989980562384826e-06, + "log_odds": 0.7000725269317627, + "log_odds_ratio": -0.42609792947769165, + "loss": 0.3328, + "rejected_geometric_mean": -2.04982852935791, + "step": 240 + }, + { + "chosen_geometric_mean": -1.1261155605316162, + "epoch": 0.06, + "grad_norm": 2.875, + "learning_rate": 4.989893306315433e-06, + "log_odds": 0.578449547290802, + "log_odds_ratio": -0.4512200951576233, + "loss": 0.3175, + "rejected_geometric_mean": -1.5503517389297485, + "step": 241 + }, + { + "chosen_geometric_mean": -1.0942699909210205, + "epoch": 0.06, + "grad_norm": 2.453125, + "learning_rate": 4.9898056727184486e-06, + "log_odds": 0.47187623381614685, + "log_odds_ratio": -0.4859543442726135, + "loss": 0.3412, + "rejected_geometric_mean": -1.426461100578308, + "step": 242 + }, + { + "chosen_geometric_mean": -1.2934740781784058, + "epoch": 0.06, + "grad_norm": 2.5, + "learning_rate": 4.989717661607159e-06, + "log_odds": 0.282554030418396, + "log_odds_ratio": -0.5685504674911499, + "loss": 0.3497, + "rejected_geometric_mean": -1.4936227798461914, + "step": 243 + }, + { + "chosen_geometric_mean": -1.2080974578857422, + "epoch": 0.06, + "grad_norm": 2.84375, + "learning_rate": 4.989629272994908e-06, + "log_odds": 0.2879350185394287, + "log_odds_ratio": -0.5839196443557739, + "loss": 0.3517, + "rejected_geometric_mean": -1.4423272609710693, + "step": 244 + }, + { + "chosen_geometric_mean": -1.1812032461166382, + "epoch": 0.06, + "grad_norm": 16.125, + "learning_rate": 4.989540506895099e-06, + "log_odds": 0.2726055383682251, + "log_odds_ratio": -0.6382602453231812, + "loss": 0.3878, + "rejected_geometric_mean": -1.395402193069458, + "step": 245 + }, + { + "chosen_geometric_mean": -1.1230601072311401, + "epoch": 0.06, + "grad_norm": 3.421875, + "learning_rate": 4.989451363321191e-06, + "log_odds": 0.975839376449585, + "log_odds_ratio": -0.6098222136497498, + "loss": 0.3876, + "rejected_geometric_mean": -2.1036369800567627, + "step": 246 + }, + { + "chosen_geometric_mean": -1.0634653568267822, + "epoch": 0.06, + "grad_norm": 3.0625, + "learning_rate": 4.989361842286698e-06, + "log_odds": 0.024470597505569458, + "log_odds_ratio": -0.6818052530288696, + "loss": 0.3474, + "rejected_geometric_mean": -1.078306794166565, + "step": 247 + }, + { + "chosen_geometric_mean": -0.9844958186149597, + "epoch": 0.06, + "grad_norm": 4.125, + "learning_rate": 4.989271943805197e-06, + "log_odds": 0.7260274887084961, + "log_odds_ratio": -0.4263699948787689, + "loss": 0.3654, + "rejected_geometric_mean": -1.5284024477005005, + "step": 248 + }, + { + "chosen_geometric_mean": -1.0367100238800049, + "epoch": 0.06, + "grad_norm": 4.1875, + "learning_rate": 4.989181667890317e-06, + "log_odds": 0.22546608746051788, + "log_odds_ratio": -0.5920775532722473, + "loss": 0.3367, + "rejected_geometric_mean": -1.1909607648849487, + "step": 249 + }, + { + "chosen_geometric_mean": -1.374396562576294, + "epoch": 0.06, + "grad_norm": 12.6875, + "learning_rate": 4.989091014555745e-06, + "log_odds": 1.2580581903457642, + "log_odds_ratio": -0.36882248520851135, + "loss": 0.3459, + "rejected_geometric_mean": -2.4919800758361816, + "step": 250 + }, + { + "chosen_geometric_mean": -1.240793228149414, + "epoch": 0.06, + "grad_norm": 17.25, + "learning_rate": 4.988999983815228e-06, + "log_odds": 1.5592790842056274, + "log_odds_ratio": -0.45095205307006836, + "loss": 0.3172, + "rejected_geometric_mean": -2.6955957412719727, + "step": 251 + }, + { + "chosen_geometric_mean": -1.618350625038147, + "epoch": 0.06, + "grad_norm": 17.0, + "learning_rate": 4.988908575682567e-06, + "log_odds": -0.06428265571594238, + "log_odds_ratio": -0.8237171173095703, + "loss": 0.3728, + "rejected_geometric_mean": -1.5732067823410034, + "step": 252 + }, + { + "chosen_geometric_mean": -1.212249517440796, + "epoch": 0.06, + "grad_norm": 7.46875, + "learning_rate": 4.988816790171623e-06, + "log_odds": 0.8121037483215332, + "log_odds_ratio": -0.42373567819595337, + "loss": 0.351, + "rejected_geometric_mean": -1.8493447303771973, + "step": 253 + }, + { + "chosen_geometric_mean": -1.1195601224899292, + "epoch": 0.06, + "grad_norm": 3.4375, + "learning_rate": 4.988724627296312e-06, + "log_odds": 1.1982498168945312, + "log_odds_ratio": -0.3569786846637726, + "loss": 0.3295, + "rejected_geometric_mean": -2.141957998275757, + "step": 254 + }, + { + "chosen_geometric_mean": -0.9318114519119263, + "epoch": 0.06, + "grad_norm": 2.375, + "learning_rate": 4.988632087070609e-06, + "log_odds": 0.43297895789146423, + "log_odds_ratio": -0.5107588171958923, + "loss": 0.3314, + "rejected_geometric_mean": -1.210066556930542, + "step": 255 + }, + { + "chosen_geometric_mean": -0.8403289318084717, + "epoch": 0.06, + "grad_norm": 3.3125, + "learning_rate": 4.988539169508544e-06, + "log_odds": 0.317874014377594, + "log_odds_ratio": -0.5659440755844116, + "loss": 0.2988, + "rejected_geometric_mean": -1.046566128730774, + "step": 256 + }, + { + "chosen_geometric_mean": -1.0697201490402222, + "epoch": 0.06, + "grad_norm": 2.578125, + "learning_rate": 4.988445874624206e-06, + "log_odds": 0.18152949213981628, + "log_odds_ratio": -0.6417188048362732, + "loss": 0.319, + "rejected_geometric_mean": -1.2066330909729004, + "step": 257 + }, + { + "chosen_geometric_mean": -1.1342949867248535, + "epoch": 0.06, + "grad_norm": 2.890625, + "learning_rate": 4.9883522024317424e-06, + "log_odds": 0.15606851875782013, + "log_odds_ratio": -0.6587874889373779, + "loss": 0.3701, + "rejected_geometric_mean": -1.2883384227752686, + "step": 258 + }, + { + "chosen_geometric_mean": -1.1721796989440918, + "epoch": 0.06, + "grad_norm": 3.21875, + "learning_rate": 4.988258152945353e-06, + "log_odds": 0.7642302513122559, + "log_odds_ratio": -0.4520948827266693, + "loss": 0.3318, + "rejected_geometric_mean": -1.8131358623504639, + "step": 259 + }, + { + "chosen_geometric_mean": -1.1587800979614258, + "epoch": 0.06, + "grad_norm": 5.875, + "learning_rate": 4.988163726179301e-06, + "log_odds": 0.8080539107322693, + "log_odds_ratio": -0.4552198350429535, + "loss": 0.349, + "rejected_geometric_mean": -1.8588554859161377, + "step": 260 + }, + { + "chosen_geometric_mean": -1.229234218597412, + "epoch": 0.06, + "grad_norm": 2.59375, + "learning_rate": 4.9880689221479016e-06, + "log_odds": 0.1705603003501892, + "log_odds_ratio": -0.6176745891571045, + "loss": 0.3463, + "rejected_geometric_mean": -1.3516833782196045, + "step": 261 + }, + { + "chosen_geometric_mean": -1.3961501121520996, + "epoch": 0.06, + "grad_norm": 3.3125, + "learning_rate": 4.98797374086553e-06, + "log_odds": 2.658801317214966, + "log_odds_ratio": -0.3448156416416168, + "loss": 0.3475, + "rejected_geometric_mean": -3.9357402324676514, + "step": 262 + }, + { + "chosen_geometric_mean": -0.920565128326416, + "epoch": 0.07, + "grad_norm": 2.640625, + "learning_rate": 4.9878781823466186e-06, + "log_odds": -0.0036184117197990417, + "log_odds_ratio": -0.7155005931854248, + "loss": 0.3383, + "rejected_geometric_mean": -0.9337551593780518, + "step": 263 + }, + { + "chosen_geometric_mean": -1.3253734111785889, + "epoch": 0.07, + "grad_norm": 10.8125, + "learning_rate": 4.987782246605656e-06, + "log_odds": 1.823051929473877, + "log_odds_ratio": -0.42835691571235657, + "loss": 0.3901, + "rejected_geometric_mean": -3.051286458969116, + "step": 264 + }, + { + "chosen_geometric_mean": -1.2162325382232666, + "epoch": 0.07, + "grad_norm": 2.375, + "learning_rate": 4.9876859336571885e-06, + "log_odds": 0.08623462170362473, + "log_odds_ratio": -0.6690406799316406, + "loss": 0.3458, + "rejected_geometric_mean": -1.2889924049377441, + "step": 265 + }, + { + "chosen_geometric_mean": -1.050315499305725, + "epoch": 0.07, + "grad_norm": 7.40625, + "learning_rate": 4.987589243515818e-06, + "log_odds": 0.0041388049721717834, + "log_odds_ratio": -0.6973927617073059, + "loss": 0.3836, + "rejected_geometric_mean": -1.0585206747055054, + "step": 266 + }, + { + "chosen_geometric_mean": -1.0355077981948853, + "epoch": 0.07, + "grad_norm": 2.984375, + "learning_rate": 4.987492176196207e-06, + "log_odds": 0.3189356327056885, + "log_odds_ratio": -0.551554799079895, + "loss": 0.3123, + "rejected_geometric_mean": -1.2601503133773804, + "step": 267 + }, + { + "chosen_geometric_mean": -1.3074049949645996, + "epoch": 0.07, + "grad_norm": 2.1875, + "learning_rate": 4.987394731713073e-06, + "log_odds": 0.43339672684669495, + "log_odds_ratio": -0.533097505569458, + "loss": 0.3491, + "rejected_geometric_mean": -1.7056126594543457, + "step": 268 + }, + { + "chosen_geometric_mean": -1.1887043714523315, + "epoch": 0.07, + "grad_norm": 2.859375, + "learning_rate": 4.98729691008119e-06, + "log_odds": 0.10535141825675964, + "log_odds_ratio": -0.6525646448135376, + "loss": 0.3394, + "rejected_geometric_mean": -1.2707602977752686, + "step": 269 + }, + { + "chosen_geometric_mean": -1.0465751886367798, + "epoch": 0.07, + "grad_norm": 2.46875, + "learning_rate": 4.9871987113153906e-06, + "log_odds": 1.7054622173309326, + "log_odds_ratio": -0.2884497046470642, + "loss": 0.2856, + "rejected_geometric_mean": -2.5275766849517822, + "step": 270 + }, + { + "chosen_geometric_mean": -1.243595838546753, + "epoch": 0.07, + "grad_norm": 2.5, + "learning_rate": 4.987100135430565e-06, + "log_odds": 0.9812898635864258, + "log_odds_ratio": -0.42512211203575134, + "loss": 0.3104, + "rejected_geometric_mean": -2.096797227859497, + "step": 271 + }, + { + "chosen_geometric_mean": -1.147362470626831, + "epoch": 0.07, + "grad_norm": 7.5, + "learning_rate": 4.9870011824416575e-06, + "log_odds": 0.41132691502571106, + "log_odds_ratio": -0.5253641605377197, + "loss": 0.392, + "rejected_geometric_mean": -1.4629299640655518, + "step": 272 + }, + { + "chosen_geometric_mean": -1.0923891067504883, + "epoch": 0.07, + "grad_norm": 2.65625, + "learning_rate": 4.986901852363673e-06, + "log_odds": 0.17913636565208435, + "log_odds_ratio": -0.6201163530349731, + "loss": 0.3842, + "rejected_geometric_mean": -1.2289294004440308, + "step": 273 + }, + { + "chosen_geometric_mean": -1.3925951719284058, + "epoch": 0.07, + "grad_norm": 2.890625, + "learning_rate": 4.9868021452116734e-06, + "log_odds": 1.270074486732483, + "log_odds_ratio": -0.4285975694656372, + "loss": 0.3013, + "rejected_geometric_mean": -2.570542573928833, + "step": 274 + }, + { + "chosen_geometric_mean": -1.1288838386535645, + "epoch": 0.07, + "grad_norm": 2.453125, + "learning_rate": 4.986702061000774e-06, + "log_odds": 0.3134547770023346, + "log_odds_ratio": -0.5545663833618164, + "loss": 0.3448, + "rejected_geometric_mean": -1.3216238021850586, + "step": 275 + }, + { + "chosen_geometric_mean": -1.1324944496154785, + "epoch": 0.07, + "grad_norm": 2.4375, + "learning_rate": 4.986601599746152e-06, + "log_odds": 0.3573974072933197, + "log_odds_ratio": -0.5359705686569214, + "loss": 0.3239, + "rejected_geometric_mean": -1.3930834531784058, + "step": 276 + }, + { + "chosen_geometric_mean": -1.4492032527923584, + "epoch": 0.07, + "grad_norm": 37.5, + "learning_rate": 4.98650076146304e-06, + "log_odds": 0.09639207273721695, + "log_odds_ratio": -0.7189069986343384, + "loss": 0.375, + "rejected_geometric_mean": -1.4793723821640015, + "step": 277 + }, + { + "chosen_geometric_mean": -1.044616937637329, + "epoch": 0.07, + "grad_norm": 2.375, + "learning_rate": 4.986399546166727e-06, + "log_odds": 0.3552761673927307, + "log_odds_ratio": -0.5742849707603455, + "loss": 0.3285, + "rejected_geometric_mean": -1.3153350353240967, + "step": 278 + }, + { + "chosen_geometric_mean": -1.1482868194580078, + "epoch": 0.07, + "grad_norm": 3.859375, + "learning_rate": 4.986297953872559e-06, + "log_odds": 0.43194952607154846, + "log_odds_ratio": -0.5739467144012451, + "loss": 0.3527, + "rejected_geometric_mean": -1.5185704231262207, + "step": 279 + }, + { + "chosen_geometric_mean": -1.3334325551986694, + "epoch": 0.07, + "grad_norm": 3.09375, + "learning_rate": 4.98619598459594e-06, + "log_odds": 0.364126980304718, + "log_odds_ratio": -0.564242959022522, + "loss": 0.4024, + "rejected_geometric_mean": -1.6265084743499756, + "step": 280 + }, + { + "chosen_geometric_mean": -1.3550878763198853, + "epoch": 0.07, + "grad_norm": 3.71875, + "learning_rate": 4.986093638352331e-06, + "log_odds": 0.06454429775476456, + "log_odds_ratio": -0.6616631150245667, + "loss": 0.3431, + "rejected_geometric_mean": -1.4006067514419556, + "step": 281 + }, + { + "chosen_geometric_mean": -1.0210031270980835, + "epoch": 0.07, + "grad_norm": 2.5, + "learning_rate": 4.985990915157252e-06, + "log_odds": 1.2494419813156128, + "log_odds_ratio": -0.4325599670410156, + "loss": 0.3035, + "rejected_geometric_mean": -2.1218080520629883, + "step": 282 + }, + { + "chosen_geometric_mean": -1.2289655208587646, + "epoch": 0.07, + "grad_norm": 3.859375, + "learning_rate": 4.985887815026275e-06, + "log_odds": 1.6639493703842163, + "log_odds_ratio": -0.31644207239151, + "loss": 0.2997, + "rejected_geometric_mean": -2.706446647644043, + "step": 283 + }, + { + "chosen_geometric_mean": -1.1891281604766846, + "epoch": 0.07, + "grad_norm": 6.25, + "learning_rate": 4.985784337975036e-06, + "log_odds": 0.37308645248413086, + "log_odds_ratio": -0.5416997075080872, + "loss": 0.3504, + "rejected_geometric_mean": -1.4776244163513184, + "step": 284 + }, + { + "chosen_geometric_mean": -1.01689875125885, + "epoch": 0.07, + "grad_norm": 2.515625, + "learning_rate": 4.985680484019221e-06, + "log_odds": 0.31079861521720886, + "log_odds_ratio": -0.5649324655532837, + "loss": 0.3463, + "rejected_geometric_mean": -1.2273447513580322, + "step": 285 + }, + { + "chosen_geometric_mean": -1.2856734991073608, + "epoch": 0.07, + "grad_norm": 2.390625, + "learning_rate": 4.98557625317458e-06, + "log_odds": 1.1850199699401855, + "log_odds_ratio": -0.4167174994945526, + "loss": 0.3241, + "rejected_geometric_mean": -2.34088134765625, + "step": 286 + }, + { + "chosen_geometric_mean": -1.1340723037719727, + "epoch": 0.07, + "grad_norm": 8.5625, + "learning_rate": 4.9854716454569145e-06, + "log_odds": 0.253020703792572, + "log_odds_ratio": -0.5813108086585999, + "loss": 0.3811, + "rejected_geometric_mean": -1.3141982555389404, + "step": 287 + }, + { + "chosen_geometric_mean": -1.416925072669983, + "epoch": 0.07, + "grad_norm": 2.59375, + "learning_rate": 4.985366660882087e-06, + "log_odds": 0.6837731003761292, + "log_odds_ratio": -0.4256528317928314, + "loss": 0.2721, + "rejected_geometric_mean": -1.9876888990402222, + "step": 288 + }, + { + "chosen_geometric_mean": -1.1753759384155273, + "epoch": 0.07, + "grad_norm": 2.90625, + "learning_rate": 4.9852612994660145e-06, + "log_odds": 0.09225469827651978, + "log_odds_ratio": -0.6790415048599243, + "loss": 0.3477, + "rejected_geometric_mean": -1.250462293624878, + "step": 289 + }, + { + "chosen_geometric_mean": -1.0067076683044434, + "epoch": 0.07, + "grad_norm": 2.546875, + "learning_rate": 4.985155561224674e-06, + "log_odds": 0.08405979722738266, + "log_odds_ratio": -0.6627264022827148, + "loss": 0.3336, + "rejected_geometric_mean": -1.0704329013824463, + "step": 290 + }, + { + "chosen_geometric_mean": -1.0765951871871948, + "epoch": 0.07, + "grad_norm": 3.578125, + "learning_rate": 4.985049446174096e-06, + "log_odds": 0.4418318569660187, + "log_odds_ratio": -0.509522557258606, + "loss": 0.3378, + "rejected_geometric_mean": -1.4004522562026978, + "step": 291 + }, + { + "chosen_geometric_mean": -1.02976393699646, + "epoch": 0.07, + "grad_norm": 3.53125, + "learning_rate": 4.984942954330371e-06, + "log_odds": 0.4793664216995239, + "log_odds_ratio": -0.5426982045173645, + "loss": 0.3901, + "rejected_geometric_mean": -1.4244465827941895, + "step": 292 + }, + { + "chosen_geometric_mean": -1.0730223655700684, + "epoch": 0.07, + "grad_norm": 2.640625, + "learning_rate": 4.984836085709646e-06, + "log_odds": 0.15403257310390472, + "log_odds_ratio": -0.6206762790679932, + "loss": 0.347, + "rejected_geometric_mean": -1.1781913042068481, + "step": 293 + }, + { + "chosen_geometric_mean": -1.0766851902008057, + "epoch": 0.07, + "grad_norm": 6.875, + "learning_rate": 4.984728840328125e-06, + "log_odds": 0.3470381200313568, + "log_odds_ratio": -0.5360792875289917, + "loss": 0.3817, + "rejected_geometric_mean": -1.3126189708709717, + "step": 294 + }, + { + "chosen_geometric_mean": -1.1421127319335938, + "epoch": 0.07, + "grad_norm": 2.796875, + "learning_rate": 4.984621218202068e-06, + "log_odds": 0.30733755230903625, + "log_odds_ratio": -0.5616778135299683, + "loss": 0.3412, + "rejected_geometric_mean": -1.366063117980957, + "step": 295 + }, + { + "chosen_geometric_mean": -1.111205816268921, + "epoch": 0.07, + "grad_norm": 6.84375, + "learning_rate": 4.984513219347794e-06, + "log_odds": 0.15841397643089294, + "log_odds_ratio": -0.6305115222930908, + "loss": 0.3078, + "rejected_geometric_mean": -1.2339258193969727, + "step": 296 + }, + { + "chosen_geometric_mean": -1.2005751132965088, + "epoch": 0.07, + "grad_norm": 10.0625, + "learning_rate": 4.984404843781678e-06, + "log_odds": 0.6387118697166443, + "log_odds_ratio": -0.4475688934326172, + "loss": 0.3145, + "rejected_geometric_mean": -1.6890041828155518, + "step": 297 + }, + { + "chosen_geometric_mean": -1.054663896560669, + "epoch": 0.07, + "grad_norm": 6.875, + "learning_rate": 4.984296091520153e-06, + "log_odds": 0.15198726952075958, + "log_odds_ratio": -0.6204363107681274, + "loss": 0.351, + "rejected_geometric_mean": -1.1556391716003418, + "step": 298 + }, + { + "chosen_geometric_mean": -1.2805641889572144, + "epoch": 0.07, + "grad_norm": 3.3125, + "learning_rate": 4.984186962579706e-06, + "log_odds": 0.2557763457298279, + "log_odds_ratio": -0.5968447923660278, + "loss": 0.354, + "rejected_geometric_mean": -1.495936393737793, + "step": 299 + }, + { + "chosen_geometric_mean": -1.3568599224090576, + "epoch": 0.07, + "grad_norm": 2.375, + "learning_rate": 4.984077456976887e-06, + "log_odds": 0.6243099570274353, + "log_odds_ratio": -0.4710903763771057, + "loss": 0.3218, + "rejected_geometric_mean": -1.8635828495025635, + "step": 300 + }, + { + "chosen_geometric_mean": -1.272909164428711, + "epoch": 0.07, + "grad_norm": 2.21875, + "learning_rate": 4.983967574728297e-06, + "log_odds": 0.5864981412887573, + "log_odds_ratio": -0.4619028568267822, + "loss": 0.323, + "rejected_geometric_mean": -1.7352644205093384, + "step": 301 + }, + { + "chosen_geometric_mean": -1.0985301733016968, + "epoch": 0.07, + "grad_norm": 3.15625, + "learning_rate": 4.983857315850597e-06, + "log_odds": -0.05727966129779816, + "log_odds_ratio": -0.7254320383071899, + "loss": 0.3, + "rejected_geometric_mean": -1.0623283386230469, + "step": 302 + }, + { + "chosen_geometric_mean": -1.0101709365844727, + "epoch": 0.08, + "grad_norm": 2.9375, + "learning_rate": 4.983746680360507e-06, + "log_odds": 0.4423341751098633, + "log_odds_ratio": -0.520412802696228, + "loss": 0.3328, + "rejected_geometric_mean": -1.3294036388397217, + "step": 303 + }, + { + "chosen_geometric_mean": -1.0326749086380005, + "epoch": 0.08, + "grad_norm": 2.515625, + "learning_rate": 4.9836356682748e-06, + "log_odds": 0.11993491649627686, + "log_odds_ratio": -0.649997889995575, + "loss": 0.3203, + "rejected_geometric_mean": -1.1032795906066895, + "step": 304 + }, + { + "chosen_geometric_mean": -1.2800103425979614, + "epoch": 0.08, + "grad_norm": 2.65625, + "learning_rate": 4.983524279610309e-06, + "log_odds": 0.7005776762962341, + "log_odds_ratio": -0.4288063645362854, + "loss": 0.3288, + "rejected_geometric_mean": -1.8451720476150513, + "step": 305 + }, + { + "chosen_geometric_mean": -1.3031586408615112, + "epoch": 0.08, + "grad_norm": 8.5, + "learning_rate": 4.983412514383922e-06, + "log_odds": 1.1300562620162964, + "log_odds_ratio": -0.30874016880989075, + "loss": 0.3174, + "rejected_geometric_mean": -2.2527825832366943, + "step": 306 + }, + { + "chosen_geometric_mean": -1.1579475402832031, + "epoch": 0.08, + "grad_norm": 6.59375, + "learning_rate": 4.983300372612586e-06, + "log_odds": 0.5205963850021362, + "log_odds_ratio": -0.47244930267333984, + "loss": 0.3477, + "rejected_geometric_mean": -1.5497409105300903, + "step": 307 + }, + { + "chosen_geometric_mean": -1.1562212705612183, + "epoch": 0.08, + "grad_norm": 3.078125, + "learning_rate": 4.983187854313306e-06, + "log_odds": 0.45617642998695374, + "log_odds_ratio": -0.5193001627922058, + "loss": 0.3438, + "rejected_geometric_mean": -1.4913699626922607, + "step": 308 + }, + { + "chosen_geometric_mean": -1.446051001548767, + "epoch": 0.08, + "grad_norm": 7.59375, + "learning_rate": 4.98307495950314e-06, + "log_odds": 1.5934935808181763, + "log_odds_ratio": -0.2788432836532593, + "loss": 0.3923, + "rejected_geometric_mean": -2.8589885234832764, + "step": 309 + }, + { + "chosen_geometric_mean": -1.2670044898986816, + "epoch": 0.08, + "grad_norm": 2.921875, + "learning_rate": 4.982961688199207e-06, + "log_odds": 0.43114930391311646, + "log_odds_ratio": -0.5550333857536316, + "loss": 0.3584, + "rejected_geometric_mean": -1.572402000427246, + "step": 310 + }, + { + "chosen_geometric_mean": -1.0075558423995972, + "epoch": 0.08, + "grad_norm": 3.890625, + "learning_rate": 4.982848040418682e-06, + "log_odds": 0.6052576899528503, + "log_odds_ratio": -0.47191452980041504, + "loss": 0.2975, + "rejected_geometric_mean": -1.4744722843170166, + "step": 311 + }, + { + "chosen_geometric_mean": -1.2231817245483398, + "epoch": 0.08, + "grad_norm": 4.0625, + "learning_rate": 4.982734016178795e-06, + "log_odds": 0.13560128211975098, + "log_odds_ratio": -0.6370800733566284, + "loss": 0.3424, + "rejected_geometric_mean": -1.3302181959152222, + "step": 312 + }, + { + "chosen_geometric_mean": -1.365334391593933, + "epoch": 0.08, + "grad_norm": 2.609375, + "learning_rate": 4.982619615496836e-06, + "log_odds": 1.0323312282562256, + "log_odds_ratio": -0.544610857963562, + "loss": 0.3121, + "rejected_geometric_mean": -2.3628716468811035, + "step": 313 + }, + { + "chosen_geometric_mean": -1.228336215019226, + "epoch": 0.08, + "grad_norm": 2.5, + "learning_rate": 4.982504838390151e-06, + "log_odds": 0.037478938698768616, + "log_odds_ratio": -0.6778494119644165, + "loss": 0.4559, + "rejected_geometric_mean": -1.2463644742965698, + "step": 314 + }, + { + "chosen_geometric_mean": -1.262304663658142, + "epoch": 0.08, + "grad_norm": 2.46875, + "learning_rate": 4.982389684876142e-06, + "log_odds": -0.05873722955584526, + "log_odds_ratio": -0.7374820709228516, + "loss": 0.362, + "rejected_geometric_mean": -1.243721604347229, + "step": 315 + }, + { + "chosen_geometric_mean": -1.2252260446548462, + "epoch": 0.08, + "grad_norm": 2.734375, + "learning_rate": 4.98227415497227e-06, + "log_odds": -0.031957536935806274, + "log_odds_ratio": -0.7159287333488464, + "loss": 0.3545, + "rejected_geometric_mean": -1.2111403942108154, + "step": 316 + }, + { + "chosen_geometric_mean": -1.2089873552322388, + "epoch": 0.08, + "grad_norm": 2.578125, + "learning_rate": 4.9821582486960515e-06, + "log_odds": 0.22428056597709656, + "log_odds_ratio": -0.6157387495040894, + "loss": 0.3174, + "rejected_geometric_mean": -1.3893113136291504, + "step": 317 + }, + { + "chosen_geometric_mean": -1.1503149271011353, + "epoch": 0.08, + "grad_norm": 2.515625, + "learning_rate": 4.982041966065062e-06, + "log_odds": 0.3147876262664795, + "log_odds_ratio": -0.565610945224762, + "loss": 0.3315, + "rejected_geometric_mean": -1.403635859489441, + "step": 318 + }, + { + "chosen_geometric_mean": -1.1912806034088135, + "epoch": 0.08, + "grad_norm": 2.421875, + "learning_rate": 4.981925307096931e-06, + "log_odds": 0.1897951364517212, + "log_odds_ratio": -0.6056790351867676, + "loss": 0.3251, + "rejected_geometric_mean": -1.317586064338684, + "step": 319 + }, + { + "chosen_geometric_mean": -1.0845218896865845, + "epoch": 0.08, + "grad_norm": 2.171875, + "learning_rate": 4.981808271809347e-06, + "log_odds": 0.559588611125946, + "log_odds_ratio": -0.49189281463623047, + "loss": 0.2839, + "rejected_geometric_mean": -1.5067427158355713, + "step": 320 + }, + { + "chosen_geometric_mean": -1.1343715190887451, + "epoch": 0.08, + "grad_norm": 2.859375, + "learning_rate": 4.981690860220057e-06, + "log_odds": 0.38345545530319214, + "log_odds_ratio": -0.5383836030960083, + "loss": 0.3504, + "rejected_geometric_mean": -1.410220742225647, + "step": 321 + }, + { + "chosen_geometric_mean": -1.1126294136047363, + "epoch": 0.08, + "grad_norm": 2.828125, + "learning_rate": 4.981573072346862e-06, + "log_odds": 0.5540388822555542, + "log_odds_ratio": -0.4873189628124237, + "loss": 0.3763, + "rejected_geometric_mean": -1.562835693359375, + "step": 322 + }, + { + "chosen_geometric_mean": -1.0461148023605347, + "epoch": 0.08, + "grad_norm": 6.5, + "learning_rate": 4.981454908207621e-06, + "log_odds": 0.30320465564727783, + "log_odds_ratio": -0.5689256191253662, + "loss": 0.3708, + "rejected_geometric_mean": -1.2624812126159668, + "step": 323 + }, + { + "chosen_geometric_mean": -1.4005014896392822, + "epoch": 0.08, + "grad_norm": 3.984375, + "learning_rate": 4.981336367820252e-06, + "log_odds": -0.005990803241729736, + "log_odds_ratio": -0.7796093225479126, + "loss": 0.4064, + "rejected_geometric_mean": -1.379280924797058, + "step": 324 + }, + { + "chosen_geometric_mean": -1.346123218536377, + "epoch": 0.08, + "grad_norm": 2.59375, + "learning_rate": 4.981217451202728e-06, + "log_odds": 0.32583653926849365, + "log_odds_ratio": -0.5563771724700928, + "loss": 0.3297, + "rejected_geometric_mean": -1.6066749095916748, + "step": 325 + }, + { + "chosen_geometric_mean": -1.063804268836975, + "epoch": 0.08, + "grad_norm": 2.921875, + "learning_rate": 4.981098158373079e-06, + "log_odds": 0.10494797676801682, + "log_odds_ratio": -0.6573840379714966, + "loss": 0.3701, + "rejected_geometric_mean": -1.1453157663345337, + "step": 326 + }, + { + "chosen_geometric_mean": -1.1746143102645874, + "epoch": 0.08, + "grad_norm": 2.90625, + "learning_rate": 4.980978489349393e-06, + "log_odds": 1.0156196355819702, + "log_odds_ratio": -0.5073622465133667, + "loss": 0.3935, + "rejected_geometric_mean": -2.0973167419433594, + "step": 327 + }, + { + "chosen_geometric_mean": -1.167644739151001, + "epoch": 0.08, + "grad_norm": 2.59375, + "learning_rate": 4.980858444149815e-06, + "log_odds": 0.054562874138355255, + "log_odds_ratio": -0.6679367423057556, + "loss": 0.3586, + "rejected_geometric_mean": -1.2139078378677368, + "step": 328 + }, + { + "chosen_geometric_mean": -1.086222767829895, + "epoch": 0.08, + "grad_norm": 2.75, + "learning_rate": 4.980738022792546e-06, + "log_odds": 1.1576682329177856, + "log_odds_ratio": -0.4569448232650757, + "loss": 0.3248, + "rejected_geometric_mean": -2.1318061351776123, + "step": 329 + }, + { + "chosen_geometric_mean": -1.0301215648651123, + "epoch": 0.08, + "grad_norm": 10.125, + "learning_rate": 4.980617225295846e-06, + "log_odds": 0.21822072565555573, + "log_odds_ratio": -0.5907126069068909, + "loss": 0.3532, + "rejected_geometric_mean": -1.1672507524490356, + "step": 330 + }, + { + "chosen_geometric_mean": -1.033495545387268, + "epoch": 0.08, + "grad_norm": 2.328125, + "learning_rate": 4.9804960516780306e-06, + "log_odds": 0.4350074529647827, + "log_odds_ratio": -0.5080119371414185, + "loss": 0.2769, + "rejected_geometric_mean": -1.3228657245635986, + "step": 331 + }, + { + "chosen_geometric_mean": -1.3364778757095337, + "epoch": 0.08, + "grad_norm": 3.015625, + "learning_rate": 4.980374501957471e-06, + "log_odds": 0.03644890338182449, + "log_odds_ratio": -0.6796573996543884, + "loss": 0.3564, + "rejected_geometric_mean": -1.3704144954681396, + "step": 332 + }, + { + "chosen_geometric_mean": -1.075545072555542, + "epoch": 0.08, + "grad_norm": 2.75, + "learning_rate": 4.980252576152599e-06, + "log_odds": 0.09739816933870316, + "log_odds_ratio": -0.6589347124099731, + "loss": 0.343, + "rejected_geometric_mean": -1.1330121755599976, + "step": 333 + }, + { + "chosen_geometric_mean": -1.0864908695220947, + "epoch": 0.08, + "grad_norm": 2.6875, + "learning_rate": 4.9801302742819005e-06, + "log_odds": 0.23467335104942322, + "log_odds_ratio": -0.5964602828025818, + "loss": 0.3769, + "rejected_geometric_mean": -1.2569162845611572, + "step": 334 + }, + { + "chosen_geometric_mean": -0.923858642578125, + "epoch": 0.08, + "grad_norm": 2.515625, + "learning_rate": 4.980007596363919e-06, + "log_odds": 0.296653687953949, + "log_odds_ratio": -0.5673141479492188, + "loss": 0.292, + "rejected_geometric_mean": -1.1155740022659302, + "step": 335 + }, + { + "chosen_geometric_mean": -1.2323416471481323, + "epoch": 0.08, + "grad_norm": 3.09375, + "learning_rate": 4.979884542417257e-06, + "log_odds": 0.3507169783115387, + "log_odds_ratio": -0.5482258200645447, + "loss": 0.3375, + "rejected_geometric_mean": -1.4961224794387817, + "step": 336 + }, + { + "chosen_geometric_mean": -1.0140942335128784, + "epoch": 0.08, + "grad_norm": 2.703125, + "learning_rate": 4.979761112460571e-06, + "log_odds": 0.7329339981079102, + "log_odds_ratio": -0.46226751804351807, + "loss": 0.3328, + "rejected_geometric_mean": -1.6124656200408936, + "step": 337 + }, + { + "chosen_geometric_mean": -1.1142454147338867, + "epoch": 0.08, + "grad_norm": 2.453125, + "learning_rate": 4.979637306512576e-06, + "log_odds": 0.3549093008041382, + "log_odds_ratio": -0.5604091882705688, + "loss": 0.2744, + "rejected_geometric_mean": -1.3832893371582031, + "step": 338 + }, + { + "chosen_geometric_mean": -0.9522650241851807, + "epoch": 0.08, + "grad_norm": 3.96875, + "learning_rate": 4.979513124592045e-06, + "log_odds": 0.7563007473945618, + "log_odds_ratio": -0.39389318227767944, + "loss": 0.2841, + "rejected_geometric_mean": -1.4932453632354736, + "step": 339 + }, + { + "chosen_geometric_mean": -1.0158003568649292, + "epoch": 0.08, + "grad_norm": 3.625, + "learning_rate": 4.979388566717806e-06, + "log_odds": 1.1818797588348389, + "log_odds_ratio": -0.39368361234664917, + "loss": 0.2995, + "rejected_geometric_mean": -2.0210483074188232, + "step": 340 + }, + { + "chosen_geometric_mean": -1.564903974533081, + "epoch": 0.08, + "grad_norm": 19.5, + "learning_rate": 4.979263632908745e-06, + "log_odds": 1.273064374923706, + "log_odds_ratio": -0.3531360626220703, + "loss": 0.3908, + "rejected_geometric_mean": -2.71903657913208, + "step": 341 + }, + { + "chosen_geometric_mean": -1.1893399953842163, + "epoch": 0.08, + "grad_norm": 4.59375, + "learning_rate": 4.979138323183805e-06, + "log_odds": 1.4057588577270508, + "log_odds_ratio": -0.4362488389015198, + "loss": 0.323, + "rejected_geometric_mean": -2.477105140686035, + "step": 342 + }, + { + "chosen_geometric_mean": -1.3309904336929321, + "epoch": 0.08, + "grad_norm": 8.875, + "learning_rate": 4.979012637561987e-06, + "log_odds": 0.6811039447784424, + "log_odds_ratio": -0.4580315351486206, + "loss": 0.3231, + "rejected_geometric_mean": -1.8997159004211426, + "step": 343 + }, + { + "chosen_geometric_mean": -1.045284628868103, + "epoch": 0.09, + "grad_norm": 3.0625, + "learning_rate": 4.978886576062346e-06, + "log_odds": 0.2202332615852356, + "log_odds_ratio": -0.5919233560562134, + "loss": 0.3419, + "rejected_geometric_mean": -1.1932792663574219, + "step": 344 + }, + { + "chosen_geometric_mean": -1.1769461631774902, + "epoch": 0.09, + "grad_norm": 2.53125, + "learning_rate": 4.9787601387039984e-06, + "log_odds": 0.1368057280778885, + "log_odds_ratio": -0.6414902806282043, + "loss": 0.326, + "rejected_geometric_mean": -1.254459261894226, + "step": 345 + }, + { + "chosen_geometric_mean": -0.8674470782279968, + "epoch": 0.09, + "grad_norm": 2.671875, + "learning_rate": 4.978633325506113e-06, + "log_odds": 0.4285489320755005, + "log_odds_ratio": -0.5135911107063293, + "loss": 0.3491, + "rejected_geometric_mean": -1.1557151079177856, + "step": 346 + }, + { + "chosen_geometric_mean": -1.0346598625183105, + "epoch": 0.09, + "grad_norm": 8.75, + "learning_rate": 4.978506136487918e-06, + "log_odds": -0.0959433764219284, + "log_odds_ratio": -0.7466540336608887, + "loss": 0.3341, + "rejected_geometric_mean": -0.9834038019180298, + "step": 347 + }, + { + "chosen_geometric_mean": -1.1268516778945923, + "epoch": 0.09, + "grad_norm": 2.640625, + "learning_rate": 4.9783785716687e-06, + "log_odds": 0.28194230794906616, + "log_odds_ratio": -0.5717902183532715, + "loss": 0.312, + "rejected_geometric_mean": -1.3190641403198242, + "step": 348 + }, + { + "chosen_geometric_mean": -0.934389591217041, + "epoch": 0.09, + "grad_norm": 2.359375, + "learning_rate": 4.9782506310677995e-06, + "log_odds": -0.13504034280776978, + "log_odds_ratio": -0.7757370471954346, + "loss": 0.3226, + "rejected_geometric_mean": -0.8748790621757507, + "step": 349 + }, + { + "chosen_geometric_mean": -1.2884700298309326, + "epoch": 0.09, + "grad_norm": 9.625, + "learning_rate": 4.9781223147046155e-06, + "log_odds": 0.07775162160396576, + "log_odds_ratio": -0.670209527015686, + "loss": 0.3573, + "rejected_geometric_mean": -1.3238654136657715, + "step": 350 + }, + { + "chosen_geometric_mean": -1.2457526922225952, + "epoch": 0.09, + "grad_norm": 2.6875, + "learning_rate": 4.977993622598604e-06, + "log_odds": 0.3307192623615265, + "log_odds_ratio": -0.5489625334739685, + "loss": 0.3629, + "rejected_geometric_mean": -1.4922362565994263, + "step": 351 + }, + { + "chosen_geometric_mean": -1.1883111000061035, + "epoch": 0.09, + "grad_norm": 2.484375, + "learning_rate": 4.977864554769277e-06, + "log_odds": 0.5252569317817688, + "log_odds_ratio": -0.4982718825340271, + "loss": 0.342, + "rejected_geometric_mean": -1.564767837524414, + "step": 352 + }, + { + "chosen_geometric_mean": -1.1323463916778564, + "epoch": 0.09, + "grad_norm": 2.609375, + "learning_rate": 4.977735111236206e-06, + "log_odds": 0.57431960105896, + "log_odds_ratio": -0.47556740045547485, + "loss": 0.3533, + "rejected_geometric_mean": -1.584932804107666, + "step": 353 + }, + { + "chosen_geometric_mean": -0.9852162003517151, + "epoch": 0.09, + "grad_norm": 2.640625, + "learning_rate": 4.9776052920190164e-06, + "log_odds": -0.055177539587020874, + "log_odds_ratio": -0.7386831045150757, + "loss": 0.3517, + "rejected_geometric_mean": -0.9600192308425903, + "step": 354 + }, + { + "chosen_geometric_mean": -1.0857300758361816, + "epoch": 0.09, + "grad_norm": 2.734375, + "learning_rate": 4.977475097137392e-06, + "log_odds": 0.09899038076400757, + "log_odds_ratio": -0.6494197845458984, + "loss": 0.3369, + "rejected_geometric_mean": -1.1316167116165161, + "step": 355 + }, + { + "chosen_geometric_mean": -1.2633479833602905, + "epoch": 0.09, + "grad_norm": 2.65625, + "learning_rate": 4.977344526611074e-06, + "log_odds": 2.302952527999878, + "log_odds_ratio": -0.29223939776420593, + "loss": 0.2842, + "rejected_geometric_mean": -3.36974835395813, + "step": 356 + }, + { + "chosen_geometric_mean": -1.142402172088623, + "epoch": 0.09, + "grad_norm": 2.796875, + "learning_rate": 4.97721358045986e-06, + "log_odds": 0.5387564897537231, + "log_odds_ratio": -0.6159143447875977, + "loss": 0.4071, + "rejected_geometric_mean": -1.6254479885101318, + "step": 357 + }, + { + "chosen_geometric_mean": -1.2547967433929443, + "epoch": 0.09, + "grad_norm": 2.875, + "learning_rate": 4.977082258703604e-06, + "log_odds": 2.521986484527588, + "log_odds_ratio": -0.4659903049468994, + "loss": 0.3297, + "rejected_geometric_mean": -3.669353723526001, + "step": 358 + }, + { + "chosen_geometric_mean": -1.1622724533081055, + "epoch": 0.09, + "grad_norm": 2.546875, + "learning_rate": 4.976950561362219e-06, + "log_odds": 0.34735241532325745, + "log_odds_ratio": -0.5647368431091309, + "loss": 0.3755, + "rejected_geometric_mean": -1.455688714981079, + "step": 359 + }, + { + "chosen_geometric_mean": -1.2196379899978638, + "epoch": 0.09, + "grad_norm": 2.34375, + "learning_rate": 4.976818488455671e-06, + "log_odds": 0.16454261541366577, + "log_odds_ratio": -0.6211406588554382, + "loss": 0.3225, + "rejected_geometric_mean": -1.3333067893981934, + "step": 360 + }, + { + "chosen_geometric_mean": -1.007572889328003, + "epoch": 0.09, + "grad_norm": 2.59375, + "learning_rate": 4.976686040003987e-06, + "log_odds": 0.546827495098114, + "log_odds_ratio": -0.480060875415802, + "loss": 0.354, + "rejected_geometric_mean": -1.4065302610397339, + "step": 361 + }, + { + "chosen_geometric_mean": -1.2387031316757202, + "epoch": 0.09, + "grad_norm": 3.40625, + "learning_rate": 4.97655321602725e-06, + "log_odds": 0.6749162673950195, + "log_odds_ratio": -0.4401581585407257, + "loss": 0.3029, + "rejected_geometric_mean": -1.75050687789917, + "step": 362 + }, + { + "chosen_geometric_mean": -0.9877021312713623, + "epoch": 0.09, + "grad_norm": 9.1875, + "learning_rate": 4.976420016545598e-06, + "log_odds": 0.22389283776283264, + "log_odds_ratio": -0.5997564792633057, + "loss": 0.3375, + "rejected_geometric_mean": -1.1496398448944092, + "step": 363 + }, + { + "chosen_geometric_mean": -1.1415016651153564, + "epoch": 0.09, + "grad_norm": 5.75, + "learning_rate": 4.976286441579228e-06, + "log_odds": 0.3363339900970459, + "log_odds_ratio": -0.5879240036010742, + "loss": 0.351, + "rejected_geometric_mean": -1.4158568382263184, + "step": 364 + }, + { + "chosen_geometric_mean": -1.2034006118774414, + "epoch": 0.09, + "grad_norm": 2.90625, + "learning_rate": 4.976152491148392e-06, + "log_odds": 1.86408531665802, + "log_odds_ratio": -0.3103852868080139, + "loss": 0.274, + "rejected_geometric_mean": -2.8665497303009033, + "step": 365 + }, + { + "chosen_geometric_mean": -1.345849871635437, + "epoch": 0.09, + "grad_norm": 3.484375, + "learning_rate": 4.976018165273402e-06, + "log_odds": 0.8013976812362671, + "log_odds_ratio": -0.38716214895248413, + "loss": 0.2852, + "rejected_geometric_mean": -1.9766868352890015, + "step": 366 + }, + { + "chosen_geometric_mean": -1.3241486549377441, + "epoch": 0.09, + "grad_norm": 11.0625, + "learning_rate": 4.975883463974623e-06, + "log_odds": -0.29497992992401123, + "log_odds_ratio": -0.8739235401153564, + "loss": 0.3195, + "rejected_geometric_mean": -1.1043678522109985, + "step": 367 + }, + { + "chosen_geometric_mean": -1.208280086517334, + "epoch": 0.09, + "grad_norm": 2.328125, + "learning_rate": 4.975748387272481e-06, + "log_odds": 0.8247249126434326, + "log_odds_ratio": -0.3669470548629761, + "loss": 0.2331, + "rejected_geometric_mean": -1.8498871326446533, + "step": 368 + }, + { + "chosen_geometric_mean": -1.110971450805664, + "epoch": 0.09, + "grad_norm": 2.703125, + "learning_rate": 4.975612935187455e-06, + "log_odds": 1.6340192556381226, + "log_odds_ratio": -0.33694687485694885, + "loss": 0.2835, + "rejected_geometric_mean": -2.5328636169433594, + "step": 369 + }, + { + "chosen_geometric_mean": -1.1677980422973633, + "epoch": 0.09, + "grad_norm": 3.171875, + "learning_rate": 4.975477107740084e-06, + "log_odds": 0.36485663056373596, + "log_odds_ratio": -0.5435121655464172, + "loss": 0.2841, + "rejected_geometric_mean": -1.4561680555343628, + "step": 370 + }, + { + "chosen_geometric_mean": -1.083272099494934, + "epoch": 0.09, + "grad_norm": 11.5625, + "learning_rate": 4.975340904950963e-06, + "log_odds": 1.1467567682266235, + "log_odds_ratio": -0.3211004436016083, + "loss": 0.3531, + "rejected_geometric_mean": -1.9737480878829956, + "step": 371 + }, + { + "chosen_geometric_mean": -1.1842823028564453, + "epoch": 0.09, + "grad_norm": 4.71875, + "learning_rate": 4.975204326840741e-06, + "log_odds": 1.1074360609054565, + "log_odds_ratio": -0.3411163091659546, + "loss": 0.344, + "rejected_geometric_mean": -2.104248523712158, + "step": 372 + }, + { + "chosen_geometric_mean": -1.0357261896133423, + "epoch": 0.09, + "grad_norm": 8.125, + "learning_rate": 4.97506737343013e-06, + "log_odds": 1.2597417831420898, + "log_odds_ratio": -0.4808803200721741, + "loss": 0.3636, + "rejected_geometric_mean": -2.147030830383301, + "step": 373 + }, + { + "chosen_geometric_mean": -1.1991517543792725, + "epoch": 0.09, + "grad_norm": 7.71875, + "learning_rate": 4.974930044739894e-06, + "log_odds": 1.0927259922027588, + "log_odds_ratio": -0.35915929079055786, + "loss": 0.3151, + "rejected_geometric_mean": -2.1007094383239746, + "step": 374 + }, + { + "chosen_geometric_mean": -1.4545882940292358, + "epoch": 0.09, + "grad_norm": 11.0625, + "learning_rate": 4.974792340790855e-06, + "log_odds": 0.36699163913726807, + "log_odds_ratio": -0.5707244873046875, + "loss": 0.3509, + "rejected_geometric_mean": -1.7045893669128418, + "step": 375 + }, + { + "chosen_geometric_mean": -1.1533410549163818, + "epoch": 0.09, + "grad_norm": 4.21875, + "learning_rate": 4.974654261603892e-06, + "log_odds": 0.4760585427284241, + "log_odds_ratio": -0.495166540145874, + "loss": 0.3569, + "rejected_geometric_mean": -1.4890871047973633, + "step": 376 + }, + { + "chosen_geometric_mean": -1.2438678741455078, + "epoch": 0.09, + "grad_norm": 9.25, + "learning_rate": 4.974515807199941e-06, + "log_odds": 0.29896774888038635, + "log_odds_ratio": -0.5560833215713501, + "loss": 0.3448, + "rejected_geometric_mean": -1.4676140546798706, + "step": 377 + }, + { + "chosen_geometric_mean": -1.179314374923706, + "epoch": 0.09, + "grad_norm": 3.75, + "learning_rate": 4.974376977599997e-06, + "log_odds": 0.7394829392433167, + "log_odds_ratio": -0.39149588346481323, + "loss": 0.3431, + "rejected_geometric_mean": -1.736992597579956, + "step": 378 + }, + { + "chosen_geometric_mean": -1.4012278318405151, + "epoch": 0.09, + "grad_norm": 4.78125, + "learning_rate": 4.9742377728251075e-06, + "log_odds": 0.14907661080360413, + "log_odds_ratio": -0.6222332715988159, + "loss": 0.3419, + "rejected_geometric_mean": -1.5159555673599243, + "step": 379 + }, + { + "chosen_geometric_mean": -1.09626305103302, + "epoch": 0.09, + "grad_norm": 3.390625, + "learning_rate": 4.974098192896381e-06, + "log_odds": 0.4180215299129486, + "log_odds_ratio": -0.5078508853912354, + "loss": 0.3046, + "rejected_geometric_mean": -1.3904986381530762, + "step": 380 + }, + { + "chosen_geometric_mean": -1.2631051540374756, + "epoch": 0.09, + "grad_norm": 2.921875, + "learning_rate": 4.973958237834979e-06, + "log_odds": 1.7336018085479736, + "log_odds_ratio": -0.40164393186569214, + "loss": 0.3919, + "rejected_geometric_mean": -2.8562498092651367, + "step": 381 + }, + { + "chosen_geometric_mean": -1.23740553855896, + "epoch": 0.09, + "grad_norm": 2.546875, + "learning_rate": 4.973817907662124e-06, + "log_odds": 0.869941234588623, + "log_odds_ratio": -0.40024933218955994, + "loss": 0.3451, + "rejected_geometric_mean": -1.9591573476791382, + "step": 382 + }, + { + "chosen_geometric_mean": -1.0213465690612793, + "epoch": 0.09, + "grad_norm": 3.578125, + "learning_rate": 4.9736772023990936e-06, + "log_odds": 0.9123114347457886, + "log_odds_ratio": -0.45121264457702637, + "loss": 0.3153, + "rejected_geometric_mean": -1.7247400283813477, + "step": 383 + }, + { + "chosen_geometric_mean": -1.0522135496139526, + "epoch": 0.1, + "grad_norm": 2.796875, + "learning_rate": 4.973536122067221e-06, + "log_odds": 0.15687739849090576, + "log_odds_ratio": -0.6480624675750732, + "loss": 0.3403, + "rejected_geometric_mean": -1.2186856269836426, + "step": 384 + }, + { + "chosen_geometric_mean": -1.231806993484497, + "epoch": 0.1, + "grad_norm": 5.5625, + "learning_rate": 4.973394666687897e-06, + "log_odds": 0.853783369064331, + "log_odds_ratio": -0.3950483798980713, + "loss": 0.2914, + "rejected_geometric_mean": -1.93462073802948, + "step": 385 + }, + { + "chosen_geometric_mean": -1.2268648147583008, + "epoch": 0.1, + "grad_norm": 17.625, + "learning_rate": 4.973252836282572e-06, + "log_odds": 2.6147711277008057, + "log_odds_ratio": -0.3719320297241211, + "loss": 0.32, + "rejected_geometric_mean": -3.670952081680298, + "step": 386 + }, + { + "chosen_geometric_mean": -1.328624963760376, + "epoch": 0.1, + "grad_norm": 25.75, + "learning_rate": 4.9731106308727485e-06, + "log_odds": 0.8905925750732422, + "log_odds_ratio": -0.4179306924343109, + "loss": 0.4283, + "rejected_geometric_mean": -2.0525331497192383, + "step": 387 + }, + { + "chosen_geometric_mean": -1.0086941719055176, + "epoch": 0.1, + "grad_norm": 3.03125, + "learning_rate": 4.972968050479989e-06, + "log_odds": 0.5038474202156067, + "log_odds_ratio": -0.4757832884788513, + "loss": 0.3345, + "rejected_geometric_mean": -1.3543696403503418, + "step": 388 + }, + { + "chosen_geometric_mean": -1.1026906967163086, + "epoch": 0.1, + "grad_norm": 2.921875, + "learning_rate": 4.972825095125912e-06, + "log_odds": 0.6350424289703369, + "log_odds_ratio": -0.4647711515426636, + "loss": 0.2587, + "rejected_geometric_mean": -1.6024608612060547, + "step": 389 + }, + { + "chosen_geometric_mean": -1.2663567066192627, + "epoch": 0.1, + "grad_norm": 3.125, + "learning_rate": 4.972681764832192e-06, + "log_odds": 0.564903974533081, + "log_odds_ratio": -0.4628449082374573, + "loss": 0.3278, + "rejected_geometric_mean": -1.712633490562439, + "step": 390 + }, + { + "chosen_geometric_mean": -1.1340337991714478, + "epoch": 0.1, + "grad_norm": 2.65625, + "learning_rate": 4.972538059620564e-06, + "log_odds": 0.4525113105773926, + "log_odds_ratio": -0.5155782103538513, + "loss": 0.3868, + "rejected_geometric_mean": -1.458471655845642, + "step": 391 + }, + { + "chosen_geometric_mean": -1.2306134700775146, + "epoch": 0.1, + "grad_norm": 3.140625, + "learning_rate": 4.972393979512815e-06, + "log_odds": 0.4719717502593994, + "log_odds_ratio": -0.5137293934822083, + "loss": 0.3938, + "rejected_geometric_mean": -1.5849393606185913, + "step": 392 + }, + { + "chosen_geometric_mean": -1.3528354167938232, + "epoch": 0.1, + "grad_norm": 2.765625, + "learning_rate": 4.972249524530792e-06, + "log_odds": 1.2694923877716064, + "log_odds_ratio": -0.2907450795173645, + "loss": 0.2954, + "rejected_geometric_mean": -2.4485390186309814, + "step": 393 + }, + { + "chosen_geometric_mean": -1.2548878192901611, + "epoch": 0.1, + "grad_norm": 3.375, + "learning_rate": 4.972104694696397e-06, + "log_odds": 0.6302085518836975, + "log_odds_ratio": -0.4503012001514435, + "loss": 0.3365, + "rejected_geometric_mean": -1.760292410850525, + "step": 394 + }, + { + "chosen_geometric_mean": -1.0666898488998413, + "epoch": 0.1, + "grad_norm": 6.21875, + "learning_rate": 4.9719594900315895e-06, + "log_odds": 2.5426154136657715, + "log_odds_ratio": -0.32342949509620667, + "loss": 0.329, + "rejected_geometric_mean": -3.393279552459717, + "step": 395 + }, + { + "chosen_geometric_mean": -1.1587163209915161, + "epoch": 0.1, + "grad_norm": 6.625, + "learning_rate": 4.971813910558387e-06, + "log_odds": 0.21176405251026154, + "log_odds_ratio": -0.6035059094429016, + "loss": 0.3551, + "rejected_geometric_mean": -1.315605878829956, + "step": 396 + }, + { + "chosen_geometric_mean": -1.3781061172485352, + "epoch": 0.1, + "grad_norm": 2.5625, + "learning_rate": 4.971667956298863e-06, + "log_odds": 0.12968821823596954, + "log_odds_ratio": -0.6334434747695923, + "loss": 0.3193, + "rejected_geometric_mean": -1.4722530841827393, + "step": 397 + }, + { + "chosen_geometric_mean": -1.1722034215927124, + "epoch": 0.1, + "grad_norm": 2.640625, + "learning_rate": 4.971521627275146e-06, + "log_odds": 2.176056385040283, + "log_odds_ratio": -0.2301880121231079, + "loss": 0.3401, + "rejected_geometric_mean": -3.1387362480163574, + "step": 398 + }, + { + "chosen_geometric_mean": -1.071449875831604, + "epoch": 0.1, + "grad_norm": 4.0, + "learning_rate": 4.971374923509425e-06, + "log_odds": 1.5900613069534302, + "log_odds_ratio": -0.49722468852996826, + "loss": 0.3202, + "rejected_geometric_mean": -2.576514959335327, + "step": 399 + }, + { + "chosen_geometric_mean": -1.1810109615325928, + "epoch": 0.1, + "grad_norm": 3.109375, + "learning_rate": 4.971227845023943e-06, + "log_odds": 0.6897750496864319, + "log_odds_ratio": -0.4269440174102783, + "loss": 0.3657, + "rejected_geometric_mean": -1.708787441253662, + "step": 400 + }, + { + "chosen_geometric_mean": -0.9286993145942688, + "epoch": 0.1, + "grad_norm": 2.515625, + "learning_rate": 4.971080391841001e-06, + "log_odds": 0.41966360807418823, + "log_odds_ratio": -0.5248618721961975, + "loss": 0.3752, + "rejected_geometric_mean": -1.2047778367996216, + "step": 401 + }, + { + "chosen_geometric_mean": -1.3560744524002075, + "epoch": 0.1, + "grad_norm": 2.578125, + "learning_rate": 4.970932563982954e-06, + "log_odds": 0.5435925722122192, + "log_odds_ratio": -0.4985630512237549, + "loss": 0.3743, + "rejected_geometric_mean": -1.8170976638793945, + "step": 402 + }, + { + "chosen_geometric_mean": -1.2866156101226807, + "epoch": 0.1, + "grad_norm": 2.53125, + "learning_rate": 4.97078436147222e-06, + "log_odds": 0.5897354483604431, + "log_odds_ratio": -0.4460267424583435, + "loss": 0.329, + "rejected_geometric_mean": -1.744659185409546, + "step": 403 + }, + { + "chosen_geometric_mean": -1.1142027378082275, + "epoch": 0.1, + "grad_norm": 2.234375, + "learning_rate": 4.970635784331268e-06, + "log_odds": 0.3437427282333374, + "log_odds_ratio": -0.5436228513717651, + "loss": 0.3619, + "rejected_geometric_mean": -1.3728878498077393, + "step": 404 + }, + { + "chosen_geometric_mean": -1.0982649326324463, + "epoch": 0.1, + "grad_norm": 2.515625, + "learning_rate": 4.970486832582626e-06, + "log_odds": -0.023696430027484894, + "log_odds_ratio": -0.7277148962020874, + "loss": 0.3641, + "rejected_geometric_mean": -1.1194019317626953, + "step": 405 + }, + { + "chosen_geometric_mean": -1.0459905862808228, + "epoch": 0.1, + "grad_norm": 3.140625, + "learning_rate": 4.970337506248879e-06, + "log_odds": 0.5184637904167175, + "log_odds_ratio": -0.4899861216545105, + "loss": 0.3599, + "rejected_geometric_mean": -1.4183602333068848, + "step": 406 + }, + { + "chosen_geometric_mean": -1.0832165479660034, + "epoch": 0.1, + "grad_norm": 2.390625, + "learning_rate": 4.970187805352668e-06, + "log_odds": 0.44457390904426575, + "log_odds_ratio": -0.5307112336158752, + "loss": 0.3362, + "rejected_geometric_mean": -1.4276210069656372, + "step": 407 + }, + { + "chosen_geometric_mean": -0.9470300674438477, + "epoch": 0.1, + "grad_norm": 2.875, + "learning_rate": 4.970037729916691e-06, + "log_odds": 0.6587172746658325, + "log_odds_ratio": -0.4225254952907562, + "loss": 0.4009, + "rejected_geometric_mean": -1.3912765979766846, + "step": 408 + }, + { + "chosen_geometric_mean": -1.136187195777893, + "epoch": 0.1, + "grad_norm": 3.03125, + "learning_rate": 4.969887279963705e-06, + "log_odds": 0.7312847971916199, + "log_odds_ratio": -0.4099994897842407, + "loss": 0.3263, + "rejected_geometric_mean": -1.6975173950195312, + "step": 409 + }, + { + "chosen_geometric_mean": -1.2319891452789307, + "epoch": 0.1, + "grad_norm": 6.09375, + "learning_rate": 4.96973645551652e-06, + "log_odds": 0.48430728912353516, + "log_odds_ratio": -0.48518455028533936, + "loss": 0.3401, + "rejected_geometric_mean": -1.6013134717941284, + "step": 410 + }, + { + "chosen_geometric_mean": -1.244951605796814, + "epoch": 0.1, + "grad_norm": 6.15625, + "learning_rate": 4.969585256598004e-06, + "log_odds": 1.2783488035202026, + "log_odds_ratio": -0.39170753955841064, + "loss": 0.3072, + "rejected_geometric_mean": -2.3963022232055664, + "step": 411 + }, + { + "chosen_geometric_mean": -1.2506130933761597, + "epoch": 0.1, + "grad_norm": 13.125, + "learning_rate": 4.969433683231084e-06, + "log_odds": 0.4342688322067261, + "log_odds_ratio": -0.5108813643455505, + "loss": 0.3326, + "rejected_geometric_mean": -1.5726466178894043, + "step": 412 + }, + { + "chosen_geometric_mean": -1.3269786834716797, + "epoch": 0.1, + "grad_norm": 6.28125, + "learning_rate": 4.969281735438741e-06, + "log_odds": 0.12043699622154236, + "log_odds_ratio": -0.6394372582435608, + "loss": 0.4464, + "rejected_geometric_mean": -1.414017677307129, + "step": 413 + }, + { + "chosen_geometric_mean": -1.0056965351104736, + "epoch": 0.1, + "grad_norm": 3.296875, + "learning_rate": 4.969129413244015e-06, + "log_odds": 0.7813076972961426, + "log_odds_ratio": -0.4849604368209839, + "loss": 0.3347, + "rejected_geometric_mean": -1.6497507095336914, + "step": 414 + }, + { + "chosen_geometric_mean": -1.1964919567108154, + "epoch": 0.1, + "grad_norm": 14.25, + "learning_rate": 4.9689767166700005e-06, + "log_odds": 2.972177028656006, + "log_odds_ratio": -0.19860829412937164, + "loss": 0.4132, + "rejected_geometric_mean": -3.920228958129883, + "step": 415 + }, + { + "chosen_geometric_mean": -1.0971301794052124, + "epoch": 0.1, + "grad_norm": 4.28125, + "learning_rate": 4.968823645739852e-06, + "log_odds": 0.8034006953239441, + "log_odds_ratio": -0.5229478478431702, + "loss": 0.3741, + "rejected_geometric_mean": -1.7927697896957397, + "step": 416 + }, + { + "chosen_geometric_mean": -0.9829413890838623, + "epoch": 0.1, + "grad_norm": 3.28125, + "learning_rate": 4.968670200476777e-06, + "log_odds": 1.8255679607391357, + "log_odds_ratio": -0.22209452092647552, + "loss": 0.3531, + "rejected_geometric_mean": -2.4710841178894043, + "step": 417 + }, + { + "chosen_geometric_mean": -1.176067590713501, + "epoch": 0.1, + "grad_norm": 3.390625, + "learning_rate": 4.968516380904041e-06, + "log_odds": 0.9955872297286987, + "log_odds_ratio": -0.4470263421535492, + "loss": 0.3466, + "rejected_geometric_mean": -2.033424139022827, + "step": 418 + }, + { + "chosen_geometric_mean": -1.0870437622070312, + "epoch": 0.1, + "grad_norm": 5.03125, + "learning_rate": 4.968362187044968e-06, + "log_odds": 0.38562849164009094, + "log_odds_ratio": -0.5288567543029785, + "loss": 0.3278, + "rejected_geometric_mean": -1.3743491172790527, + "step": 419 + }, + { + "chosen_geometric_mean": -1.0568761825561523, + "epoch": 0.1, + "grad_norm": 10.3125, + "learning_rate": 4.968207618922937e-06, + "log_odds": 0.9620361924171448, + "log_odds_ratio": -0.449487566947937, + "loss": 0.357, + "rejected_geometric_mean": -1.8494291305541992, + "step": 420 + }, + { + "chosen_geometric_mean": -1.1976715326309204, + "epoch": 0.1, + "grad_norm": 2.71875, + "learning_rate": 4.968052676561384e-06, + "log_odds": 2.4991376399993896, + "log_odds_ratio": -0.48710155487060547, + "loss": 0.3157, + "rejected_geometric_mean": -3.5876030921936035, + "step": 421 + }, + { + "chosen_geometric_mean": -1.1034668684005737, + "epoch": 0.1, + "grad_norm": 2.765625, + "learning_rate": 4.967897359983803e-06, + "log_odds": 0.7050017714500427, + "log_odds_ratio": -0.4386187195777893, + "loss": 0.3023, + "rejected_geometric_mean": -1.6477315425872803, + "step": 422 + }, + { + "chosen_geometric_mean": -1.2270792722702026, + "epoch": 0.1, + "grad_norm": 2.765625, + "learning_rate": 4.967741669213743e-06, + "log_odds": 2.3640472888946533, + "log_odds_ratio": -0.27483436465263367, + "loss": 0.2859, + "rejected_geometric_mean": -3.3522121906280518, + "step": 423 + }, + { + "chosen_geometric_mean": -1.111342191696167, + "epoch": 0.1, + "grad_norm": 4.03125, + "learning_rate": 4.967585604274811e-06, + "log_odds": 2.0447232723236084, + "log_odds_ratio": -0.36133742332458496, + "loss": 0.271, + "rejected_geometric_mean": -2.990628242492676, + "step": 424 + }, + { + "chosen_geometric_mean": -0.9520787000656128, + "epoch": 0.11, + "grad_norm": 2.84375, + "learning_rate": 4.967429165190668e-06, + "log_odds": 1.410316824913025, + "log_odds_ratio": -0.35939908027648926, + "loss": 0.3186, + "rejected_geometric_mean": -2.1275761127471924, + "step": 425 + }, + { + "chosen_geometric_mean": -1.0490070581436157, + "epoch": 0.11, + "grad_norm": 4.5, + "learning_rate": 4.967272351985037e-06, + "log_odds": 1.1459243297576904, + "log_odds_ratio": -0.38950350880622864, + "loss": 0.345, + "rejected_geometric_mean": -1.933181643486023, + "step": 426 + }, + { + "chosen_geometric_mean": -1.100046157836914, + "epoch": 0.11, + "grad_norm": 6.125, + "learning_rate": 4.967115164681693e-06, + "log_odds": 1.467482089996338, + "log_odds_ratio": -0.33886247873306274, + "loss": 0.3047, + "rejected_geometric_mean": -2.332857847213745, + "step": 427 + }, + { + "chosen_geometric_mean": -1.2083392143249512, + "epoch": 0.11, + "grad_norm": 6.125, + "learning_rate": 4.966957603304469e-06, + "log_odds": 2.622241735458374, + "log_odds_ratio": -0.15483753383159637, + "loss": 0.3006, + "rejected_geometric_mean": -3.5368804931640625, + "step": 428 + }, + { + "chosen_geometric_mean": -1.1946674585342407, + "epoch": 0.11, + "grad_norm": 4.59375, + "learning_rate": 4.966799667877255e-06, + "log_odds": 1.4920878410339355, + "log_odds_ratio": -0.32918697595596313, + "loss": 0.3295, + "rejected_geometric_mean": -2.442279815673828, + "step": 429 + }, + { + "chosen_geometric_mean": -1.181828260421753, + "epoch": 0.11, + "grad_norm": 19.5, + "learning_rate": 4.9666413584240005e-06, + "log_odds": 1.3142215013504028, + "log_odds_ratio": -0.3630255460739136, + "loss": 0.3498, + "rejected_geometric_mean": -2.317513942718506, + "step": 430 + }, + { + "chosen_geometric_mean": -1.3676056861877441, + "epoch": 0.11, + "grad_norm": 7.5625, + "learning_rate": 4.966482674968706e-06, + "log_odds": 1.7839033603668213, + "log_odds_ratio": -0.31749606132507324, + "loss": 0.3491, + "rejected_geometric_mean": -2.990821599960327, + "step": 431 + }, + { + "chosen_geometric_mean": -1.4539626836776733, + "epoch": 0.11, + "grad_norm": 40.75, + "learning_rate": 4.966323617535433e-06, + "log_odds": 1.8545329570770264, + "log_odds_ratio": -0.3480570614337921, + "loss": 0.3602, + "rejected_geometric_mean": -3.18497896194458, + "step": 432 + }, + { + "chosen_geometric_mean": -1.3661092519760132, + "epoch": 0.11, + "grad_norm": 29.0, + "learning_rate": 4.966164186148298e-06, + "log_odds": 2.065199375152588, + "log_odds_ratio": -0.2962234914302826, + "loss": 0.3576, + "rejected_geometric_mean": -3.1610093116760254, + "step": 433 + }, + { + "chosen_geometric_mean": -1.0872844457626343, + "epoch": 0.11, + "grad_norm": 5.25, + "learning_rate": 4.966004380831474e-06, + "log_odds": 1.0125632286071777, + "log_odds_ratio": -0.41014745831489563, + "loss": 0.2893, + "rejected_geometric_mean": -1.9273886680603027, + "step": 434 + }, + { + "chosen_geometric_mean": -1.2473175525665283, + "epoch": 0.11, + "grad_norm": 16.875, + "learning_rate": 4.965844201609193e-06, + "log_odds": 0.29507529735565186, + "log_odds_ratio": -0.5683002471923828, + "loss": 0.3468, + "rejected_geometric_mean": -1.4575648307800293, + "step": 435 + }, + { + "chosen_geometric_mean": -1.1278932094573975, + "epoch": 0.11, + "grad_norm": 5.5, + "learning_rate": 4.965683648505741e-06, + "log_odds": 0.3778820037841797, + "log_odds_ratio": -0.5332691669464111, + "loss": 0.3267, + "rejected_geometric_mean": -1.3945953845977783, + "step": 436 + }, + { + "chosen_geometric_mean": -1.2489854097366333, + "epoch": 0.11, + "grad_norm": 4.1875, + "learning_rate": 4.965522721545461e-06, + "log_odds": 1.07789146900177, + "log_odds_ratio": -0.4058510363101959, + "loss": 0.3466, + "rejected_geometric_mean": -2.1798195838928223, + "step": 437 + }, + { + "chosen_geometric_mean": -1.176177978515625, + "epoch": 0.11, + "grad_norm": 3.265625, + "learning_rate": 4.965361420752756e-06, + "log_odds": 0.6299899816513062, + "log_odds_ratio": -0.4352540969848633, + "loss": 0.2908, + "rejected_geometric_mean": -1.645702838897705, + "step": 438 + }, + { + "chosen_geometric_mean": -0.9729644656181335, + "epoch": 0.11, + "grad_norm": 4.90625, + "learning_rate": 4.96519974615208e-06, + "log_odds": 0.47512370347976685, + "log_odds_ratio": -0.502302348613739, + "loss": 0.3018, + "rejected_geometric_mean": -1.2887003421783447, + "step": 439 + }, + { + "chosen_geometric_mean": -1.1826688051223755, + "epoch": 0.11, + "grad_norm": 6.9375, + "learning_rate": 4.965037697767949e-06, + "log_odds": 1.0678303241729736, + "log_odds_ratio": -0.4559107720851898, + "loss": 0.3539, + "rejected_geometric_mean": -2.1347784996032715, + "step": 440 + }, + { + "chosen_geometric_mean": -1.3520146608352661, + "epoch": 0.11, + "grad_norm": 7.59375, + "learning_rate": 4.964875275624931e-06, + "log_odds": 1.5222532749176025, + "log_odds_ratio": -0.4083974361419678, + "loss": 0.3123, + "rejected_geometric_mean": -2.7442073822021484, + "step": 441 + }, + { + "chosen_geometric_mean": -1.2161865234375, + "epoch": 0.11, + "grad_norm": 25.625, + "learning_rate": 4.964712479747655e-06, + "log_odds": 0.5318041443824768, + "log_odds_ratio": -0.5193435549736023, + "loss": 0.3231, + "rejected_geometric_mean": -1.6481047868728638, + "step": 442 + }, + { + "chosen_geometric_mean": -0.9814406633377075, + "epoch": 0.11, + "grad_norm": 14.375, + "learning_rate": 4.964549310160805e-06, + "log_odds": 0.02451208233833313, + "log_odds_ratio": -0.7032529711723328, + "loss": 0.3436, + "rejected_geometric_mean": -0.9849827885627747, + "step": 443 + }, + { + "chosen_geometric_mean": -1.4274622201919556, + "epoch": 0.11, + "grad_norm": 13.625, + "learning_rate": 4.964385766889121e-06, + "log_odds": 1.3200185298919678, + "log_odds_ratio": -0.2846577763557434, + "loss": 0.3073, + "rejected_geometric_mean": -2.567185163497925, + "step": 444 + }, + { + "chosen_geometric_mean": -1.2787030935287476, + "epoch": 0.11, + "grad_norm": 4.78125, + "learning_rate": 4.964221849957399e-06, + "log_odds": 2.619938373565674, + "log_odds_ratio": -0.14515458047389984, + "loss": 0.2741, + "rejected_geometric_mean": -3.634828567504883, + "step": 445 + }, + { + "chosen_geometric_mean": -1.2337548732757568, + "epoch": 0.11, + "grad_norm": 5.59375, + "learning_rate": 4.9640575593904936e-06, + "log_odds": 2.3177947998046875, + "log_odds_ratio": -0.40964275598526, + "loss": 0.2903, + "rejected_geometric_mean": -3.409881591796875, + "step": 446 + }, + { + "chosen_geometric_mean": -1.1703290939331055, + "epoch": 0.11, + "grad_norm": 3.640625, + "learning_rate": 4.9638928952133144e-06, + "log_odds": 0.0925067737698555, + "log_odds_ratio": -0.6554445028305054, + "loss": 0.3403, + "rejected_geometric_mean": -1.2298998832702637, + "step": 447 + }, + { + "chosen_geometric_mean": -0.9989065527915955, + "epoch": 0.11, + "grad_norm": 5.65625, + "learning_rate": 4.963727857450831e-06, + "log_odds": 1.153738021850586, + "log_odds_ratio": -0.44651490449905396, + "loss": 0.2833, + "rejected_geometric_mean": -2.0225961208343506, + "step": 448 + }, + { + "chosen_geometric_mean": -1.0912617444992065, + "epoch": 0.11, + "grad_norm": 3.46875, + "learning_rate": 4.963562446128064e-06, + "log_odds": 0.3823373317718506, + "log_odds_ratio": -0.5352867841720581, + "loss": 0.3473, + "rejected_geometric_mean": -1.3600574731826782, + "step": 449 + }, + { + "chosen_geometric_mean": -1.1442763805389404, + "epoch": 0.11, + "grad_norm": 9.75, + "learning_rate": 4.963396661270095e-06, + "log_odds": 0.5972590446472168, + "log_odds_ratio": -0.5280458331108093, + "loss": 0.3533, + "rejected_geometric_mean": -1.661576509475708, + "step": 450 + }, + { + "chosen_geometric_mean": -1.0983250141143799, + "epoch": 0.11, + "grad_norm": 3.296875, + "learning_rate": 4.963230502902061e-06, + "log_odds": 1.33872389793396, + "log_odds_ratio": -0.3937245011329651, + "loss": 0.3181, + "rejected_geometric_mean": -2.2407174110412598, + "step": 451 + }, + { + "chosen_geometric_mean": -1.1620044708251953, + "epoch": 0.11, + "grad_norm": 3.0625, + "learning_rate": 4.9630639710491565e-06, + "log_odds": 0.9008673429489136, + "log_odds_ratio": -0.4115908741950989, + "loss": 0.3263, + "rejected_geometric_mean": -1.899512529373169, + "step": 452 + }, + { + "chosen_geometric_mean": -1.0147432088851929, + "epoch": 0.11, + "grad_norm": 3.234375, + "learning_rate": 4.96289706573663e-06, + "log_odds": 1.6531442403793335, + "log_odds_ratio": -0.43467122316360474, + "loss": 0.3146, + "rejected_geometric_mean": -2.505366325378418, + "step": 453 + }, + { + "chosen_geometric_mean": -1.0569819211959839, + "epoch": 0.11, + "grad_norm": 12.5, + "learning_rate": 4.9627297869897895e-06, + "log_odds": 1.145910620689392, + "log_odds_ratio": -0.4409821033477783, + "loss": 0.3477, + "rejected_geometric_mean": -2.0586812496185303, + "step": 454 + }, + { + "chosen_geometric_mean": -1.195504069328308, + "epoch": 0.11, + "grad_norm": 8.1875, + "learning_rate": 4.962562134833998e-06, + "log_odds": 2.928720712661743, + "log_odds_ratio": -0.22648121416568756, + "loss": 0.2927, + "rejected_geometric_mean": -3.882040023803711, + "step": 455 + }, + { + "chosen_geometric_mean": -1.0970526933670044, + "epoch": 0.11, + "grad_norm": 4.28125, + "learning_rate": 4.962394109294676e-06, + "log_odds": 1.1741214990615845, + "log_odds_ratio": -0.28294065594673157, + "loss": 0.3351, + "rejected_geometric_mean": -2.024284839630127, + "step": 456 + }, + { + "chosen_geometric_mean": -1.2017168998718262, + "epoch": 0.11, + "grad_norm": 8.75, + "learning_rate": 4.9622257103973e-06, + "log_odds": 0.574148416519165, + "log_odds_ratio": -0.4597485661506653, + "loss": 0.34, + "rejected_geometric_mean": -1.6439591646194458, + "step": 457 + }, + { + "chosen_geometric_mean": -1.0543317794799805, + "epoch": 0.11, + "grad_norm": 5.6875, + "learning_rate": 4.962056938167403e-06, + "log_odds": 0.631183385848999, + "log_odds_ratio": -0.462896466255188, + "loss": 0.3243, + "rejected_geometric_mean": -1.540545105934143, + "step": 458 + }, + { + "chosen_geometric_mean": -1.0911026000976562, + "epoch": 0.11, + "grad_norm": 4.28125, + "learning_rate": 4.961887792630575e-06, + "log_odds": 0.8551726341247559, + "log_odds_ratio": -0.37974730134010315, + "loss": 0.4116, + "rejected_geometric_mean": -1.7365964651107788, + "step": 459 + }, + { + "chosen_geometric_mean": -1.2604730129241943, + "epoch": 0.11, + "grad_norm": 8.5625, + "learning_rate": 4.961718273812462e-06, + "log_odds": 1.0697718858718872, + "log_odds_ratio": -0.34713971614837646, + "loss": 0.2912, + "rejected_geometric_mean": -2.1523211002349854, + "step": 460 + }, + { + "chosen_geometric_mean": -1.1860663890838623, + "epoch": 0.11, + "grad_norm": 6.0625, + "learning_rate": 4.9615483817387685e-06, + "log_odds": 0.48210275173187256, + "log_odds_ratio": -0.5005173683166504, + "loss": 0.2902, + "rejected_geometric_mean": -1.5617561340332031, + "step": 461 + }, + { + "chosen_geometric_mean": -1.138071060180664, + "epoch": 0.11, + "grad_norm": 4.125, + "learning_rate": 4.961378116435254e-06, + "log_odds": 0.8723495006561279, + "log_odds_ratio": -0.48554471135139465, + "loss": 0.3187, + "rejected_geometric_mean": -1.8796608448028564, + "step": 462 + }, + { + "chosen_geometric_mean": -1.0842421054840088, + "epoch": 0.11, + "grad_norm": 4.15625, + "learning_rate": 4.961207477927734e-06, + "log_odds": 2.2739312648773193, + "log_odds_ratio": -0.2888869643211365, + "loss": 0.3328, + "rejected_geometric_mean": -3.1516036987304688, + "step": 463 + }, + { + "chosen_geometric_mean": -1.0309950113296509, + "epoch": 0.11, + "grad_norm": 3.125, + "learning_rate": 4.9610364662420815e-06, + "log_odds": 1.8337092399597168, + "log_odds_ratio": -0.2396613359451294, + "loss": 0.2981, + "rejected_geometric_mean": -2.5528242588043213, + "step": 464 + }, + { + "chosen_geometric_mean": -0.9553298354148865, + "epoch": 0.12, + "grad_norm": 27.25, + "learning_rate": 4.960865081404227e-06, + "log_odds": 2.6388401985168457, + "log_odds_ratio": -0.2133084535598755, + "loss": 0.3364, + "rejected_geometric_mean": -3.233643054962158, + "step": 465 + }, + { + "chosen_geometric_mean": -1.0527034997940063, + "epoch": 0.12, + "grad_norm": 4.3125, + "learning_rate": 4.960693323440155e-06, + "log_odds": 1.6185386180877686, + "log_odds_ratio": -0.3407844305038452, + "loss": 0.302, + "rejected_geometric_mean": -2.41257381439209, + "step": 466 + }, + { + "chosen_geometric_mean": -1.166412115097046, + "epoch": 0.12, + "grad_norm": 27.0, + "learning_rate": 4.960521192375909e-06, + "log_odds": 4.457666873931885, + "log_odds_ratio": -0.25166505575180054, + "loss": 0.3144, + "rejected_geometric_mean": -5.401359558105469, + "step": 467 + }, + { + "chosen_geometric_mean": -1.1389591693878174, + "epoch": 0.12, + "grad_norm": 12.0625, + "learning_rate": 4.960348688237588e-06, + "log_odds": 2.153461456298828, + "log_odds_ratio": -0.22107459604740143, + "loss": 0.3687, + "rejected_geometric_mean": -3.0179357528686523, + "step": 468 + }, + { + "chosen_geometric_mean": -1.2968381643295288, + "epoch": 0.12, + "grad_norm": 3.984375, + "learning_rate": 4.960175811051348e-06, + "log_odds": 3.5543527603149414, + "log_odds_ratio": -0.17851999402046204, + "loss": 0.3489, + "rejected_geometric_mean": -4.61310338973999, + "step": 469 + }, + { + "chosen_geometric_mean": -1.0184730291366577, + "epoch": 0.12, + "grad_norm": 5.28125, + "learning_rate": 4.960002560843401e-06, + "log_odds": 0.38654765486717224, + "log_odds_ratio": -0.5256352424621582, + "loss": 0.3734, + "rejected_geometric_mean": -1.279801368713379, + "step": 470 + }, + { + "chosen_geometric_mean": -1.0569634437561035, + "epoch": 0.12, + "grad_norm": 8.875, + "learning_rate": 4.959828937640017e-06, + "log_odds": 2.6539785861968994, + "log_odds_ratio": -0.13687771558761597, + "loss": 0.3281, + "rejected_geometric_mean": -3.367488384246826, + "step": 471 + }, + { + "chosen_geometric_mean": -1.275186538696289, + "epoch": 0.12, + "grad_norm": 15.0, + "learning_rate": 4.959654941467519e-06, + "log_odds": 3.9638619422912598, + "log_odds_ratio": -0.23074717819690704, + "loss": 0.3695, + "rejected_geometric_mean": -5.0332417488098145, + "step": 472 + }, + { + "chosen_geometric_mean": -0.8870686888694763, + "epoch": 0.12, + "grad_norm": 7.0, + "learning_rate": 4.959480572352291e-06, + "log_odds": 1.9592535495758057, + "log_odds_ratio": -0.30593758821487427, + "loss": 0.3071, + "rejected_geometric_mean": -2.5345277786254883, + "step": 473 + }, + { + "chosen_geometric_mean": -1.1908384561538696, + "epoch": 0.12, + "grad_norm": 9.1875, + "learning_rate": 4.959305830320772e-06, + "log_odds": 5.593839168548584, + "log_odds_ratio": -0.008600474335253239, + "loss": 0.2753, + "rejected_geometric_mean": -6.3812642097473145, + "step": 474 + }, + { + "chosen_geometric_mean": -1.1201249361038208, + "epoch": 0.12, + "grad_norm": 3.015625, + "learning_rate": 4.959130715399455e-06, + "log_odds": 3.6007447242736816, + "log_odds_ratio": -0.1732337921857834, + "loss": 0.2935, + "rejected_geometric_mean": -4.4094438552856445, + "step": 475 + }, + { + "chosen_geometric_mean": -1.2476950883865356, + "epoch": 0.12, + "grad_norm": 3.578125, + "learning_rate": 4.958955227614893e-06, + "log_odds": 1.799126386642456, + "log_odds_ratio": -0.3123151659965515, + "loss": 0.3116, + "rejected_geometric_mean": -2.8479461669921875, + "step": 476 + }, + { + "chosen_geometric_mean": -1.1753144264221191, + "epoch": 0.12, + "grad_norm": 6.15625, + "learning_rate": 4.958779366993695e-06, + "log_odds": 1.1063306331634521, + "log_odds_ratio": -0.35988786816596985, + "loss": 0.303, + "rejected_geometric_mean": -2.1038522720336914, + "step": 477 + }, + { + "chosen_geometric_mean": -1.043491244316101, + "epoch": 0.12, + "grad_norm": 3.921875, + "learning_rate": 4.958603133562523e-06, + "log_odds": 0.23347845673561096, + "log_odds_ratio": -0.6013347506523132, + "loss": 0.3721, + "rejected_geometric_mean": -1.1824027299880981, + "step": 478 + }, + { + "chosen_geometric_mean": -1.3488932847976685, + "epoch": 0.12, + "grad_norm": 5.8125, + "learning_rate": 4.958426527348101e-06, + "log_odds": 0.26828959584236145, + "log_odds_ratio": -0.5736435651779175, + "loss": 0.3459, + "rejected_geometric_mean": -1.5548005104064941, + "step": 479 + }, + { + "chosen_geometric_mean": -1.0723705291748047, + "epoch": 0.12, + "grad_norm": 38.5, + "learning_rate": 4.958249548377205e-06, + "log_odds": 2.916334867477417, + "log_odds_ratio": -0.2820374369621277, + "loss": 0.3711, + "rejected_geometric_mean": -3.7272567749023438, + "step": 480 + }, + { + "chosen_geometric_mean": -1.2238478660583496, + "epoch": 0.12, + "grad_norm": 18.5, + "learning_rate": 4.95807219667667e-06, + "log_odds": 2.1051599979400635, + "log_odds_ratio": -0.38767191767692566, + "loss": 0.3763, + "rejected_geometric_mean": -3.1561272144317627, + "step": 481 + }, + { + "chosen_geometric_mean": -1.3290232419967651, + "epoch": 0.12, + "grad_norm": 6.40625, + "learning_rate": 4.9578944722733865e-06, + "log_odds": 5.445578098297119, + "log_odds_ratio": -0.13271267712116241, + "loss": 0.3516, + "rejected_geometric_mean": -6.489430904388428, + "step": 482 + }, + { + "chosen_geometric_mean": -1.0061854124069214, + "epoch": 0.12, + "grad_norm": 4.09375, + "learning_rate": 4.957716375194302e-06, + "log_odds": 2.2511651515960693, + "log_odds_ratio": -0.5366852879524231, + "loss": 0.3466, + "rejected_geometric_mean": -3.1467154026031494, + "step": 483 + }, + { + "chosen_geometric_mean": -1.0709335803985596, + "epoch": 0.12, + "grad_norm": 4.0625, + "learning_rate": 4.9575379054664205e-06, + "log_odds": 2.8649508953094482, + "log_odds_ratio": -0.2039608359336853, + "loss": 0.2756, + "rejected_geometric_mean": -3.6062464714050293, + "step": 484 + }, + { + "chosen_geometric_mean": -1.3687041997909546, + "epoch": 0.12, + "grad_norm": 7.96875, + "learning_rate": 4.957359063116802e-06, + "log_odds": 0.7274734377861023, + "log_odds_ratio": -0.4787672162055969, + "loss": 0.3016, + "rejected_geometric_mean": -1.9820303916931152, + "step": 485 + }, + { + "chosen_geometric_mean": -1.0720552206039429, + "epoch": 0.12, + "grad_norm": 5.90625, + "learning_rate": 4.957179848172563e-06, + "log_odds": 0.8326131105422974, + "log_odds_ratio": -0.4080589711666107, + "loss": 0.2745, + "rejected_geometric_mean": -1.7335765361785889, + "step": 486 + }, + { + "chosen_geometric_mean": -1.373079776763916, + "epoch": 0.12, + "grad_norm": 8.5625, + "learning_rate": 4.957000260660878e-06, + "log_odds": 1.3610060214996338, + "log_odds_ratio": -0.35517317056655884, + "loss": 0.3501, + "rejected_geometric_mean": -2.572274684906006, + "step": 487 + }, + { + "chosen_geometric_mean": -1.2689011096954346, + "epoch": 0.12, + "grad_norm": 6.125, + "learning_rate": 4.956820300608974e-06, + "log_odds": 0.8202266693115234, + "log_odds_ratio": -0.46781447529792786, + "loss": 0.3081, + "rejected_geometric_mean": -1.9708752632141113, + "step": 488 + }, + { + "chosen_geometric_mean": -0.9909614324569702, + "epoch": 0.12, + "grad_norm": 10.25, + "learning_rate": 4.95663996804414e-06, + "log_odds": 1.1552985906600952, + "log_odds_ratio": -0.4781886041164398, + "loss": 0.2922, + "rejected_geometric_mean": -1.9577343463897705, + "step": 489 + }, + { + "chosen_geometric_mean": -1.2427952289581299, + "epoch": 0.12, + "grad_norm": 253.0, + "learning_rate": 4.9564592629937184e-06, + "log_odds": 1.765657663345337, + "log_odds_ratio": -0.391388475894928, + "loss": 0.357, + "rejected_geometric_mean": -2.7646191120147705, + "step": 490 + }, + { + "chosen_geometric_mean": -1.3470250368118286, + "epoch": 0.12, + "grad_norm": 47.75, + "learning_rate": 4.956278185485108e-06, + "log_odds": 5.477660179138184, + "log_odds_ratio": -0.14060640335083008, + "loss": 0.3595, + "rejected_geometric_mean": -6.578428268432617, + "step": 491 + }, + { + "chosen_geometric_mean": -1.2886285781860352, + "epoch": 0.12, + "grad_norm": 5.09375, + "learning_rate": 4.956096735545763e-06, + "log_odds": 2.903818368911743, + "log_odds_ratio": -0.41123729944229126, + "loss": 0.3324, + "rejected_geometric_mean": -4.062933921813965, + "step": 492 + }, + { + "chosen_geometric_mean": -1.034691572189331, + "epoch": 0.12, + "grad_norm": 21.5, + "learning_rate": 4.955914913203198e-06, + "log_odds": 1.6903910636901855, + "log_odds_ratio": -0.43163543939590454, + "loss": 0.2776, + "rejected_geometric_mean": -2.513972282409668, + "step": 493 + }, + { + "chosen_geometric_mean": -0.8090394139289856, + "epoch": 0.12, + "grad_norm": 7.03125, + "learning_rate": 4.955732718484981e-06, + "log_odds": 1.700671911239624, + "log_odds_ratio": -0.2900487184524536, + "loss": 0.3001, + "rejected_geometric_mean": -2.1711995601654053, + "step": 494 + }, + { + "chosen_geometric_mean": -1.1508548259735107, + "epoch": 0.12, + "grad_norm": 4.96875, + "learning_rate": 4.9555501514187356e-06, + "log_odds": 1.4635674953460693, + "log_odds_ratio": -0.34965312480926514, + "loss": 0.3742, + "rejected_geometric_mean": -2.430339813232422, + "step": 495 + }, + { + "chosen_geometric_mean": -0.9898369312286377, + "epoch": 0.12, + "grad_norm": 3.671875, + "learning_rate": 4.955367212032145e-06, + "log_odds": 0.7630058526992798, + "log_odds_ratio": -0.5418511629104614, + "loss": 0.3192, + "rejected_geometric_mean": -1.685344934463501, + "step": 496 + }, + { + "chosen_geometric_mean": -1.2575969696044922, + "epoch": 0.12, + "grad_norm": 6.34375, + "learning_rate": 4.955183900352947e-06, + "log_odds": 4.544260025024414, + "log_odds_ratio": -0.27004534006118774, + "loss": 0.3247, + "rejected_geometric_mean": -5.612606048583984, + "step": 497 + }, + { + "chosen_geometric_mean": -1.0325627326965332, + "epoch": 0.12, + "grad_norm": 34.75, + "learning_rate": 4.955000216408935e-06, + "log_odds": 5.571357727050781, + "log_odds_ratio": -0.13859163224697113, + "loss": 0.378, + "rejected_geometric_mean": -6.241788864135742, + "step": 498 + }, + { + "chosen_geometric_mean": -1.497570276260376, + "epoch": 0.12, + "grad_norm": 4.3125, + "learning_rate": 4.95481616022796e-06, + "log_odds": 3.982949733734131, + "log_odds_ratio": -0.1992783397436142, + "loss": 0.3774, + "rejected_geometric_mean": -5.253515720367432, + "step": 499 + }, + { + "chosen_geometric_mean": -0.9701957106590271, + "epoch": 0.12, + "grad_norm": 10.4375, + "learning_rate": 4.954631731837931e-06, + "log_odds": 2.7072856426239014, + "log_odds_ratio": -0.12245924025774002, + "loss": 0.3387, + "rejected_geometric_mean": -3.2606236934661865, + "step": 500 + }, + { + "chosen_geometric_mean": -1.2489407062530518, + "epoch": 0.12, + "grad_norm": 3.90625, + "learning_rate": 4.95444693126681e-06, + "log_odds": 2.4946045875549316, + "log_odds_ratio": -0.297359436750412, + "loss": 0.308, + "rejected_geometric_mean": -3.5432727336883545, + "step": 501 + }, + { + "chosen_geometric_mean": -1.3150436878204346, + "epoch": 0.12, + "grad_norm": 4.3125, + "learning_rate": 4.954261758542619e-06, + "log_odds": 1.2297104597091675, + "log_odds_ratio": -0.2829849123954773, + "loss": 0.3534, + "rejected_geometric_mean": -2.305762529373169, + "step": 502 + }, + { + "chosen_geometric_mean": -1.1729484796524048, + "epoch": 0.12, + "grad_norm": 18.25, + "learning_rate": 4.9540762136934326e-06, + "log_odds": 3.4563443660736084, + "log_odds_ratio": -0.4343057870864868, + "loss": 0.3345, + "rejected_geometric_mean": -4.516203880310059, + "step": 503 + }, + { + "chosen_geometric_mean": -1.8614790439605713, + "epoch": 0.12, + "grad_norm": 26.625, + "learning_rate": 4.953890296747384e-06, + "log_odds": -0.12413370609283447, + "log_odds_ratio": -0.9972852468490601, + "loss": 0.3752, + "rejected_geometric_mean": -1.7021822929382324, + "step": 504 + }, + { + "chosen_geometric_mean": -1.1636197566986084, + "epoch": 0.13, + "grad_norm": 53.0, + "learning_rate": 4.953704007732665e-06, + "log_odds": 1.2402734756469727, + "log_odds_ratio": -0.30170220136642456, + "loss": 0.3505, + "rejected_geometric_mean": -2.156008243560791, + "step": 505 + }, + { + "chosen_geometric_mean": -1.112337350845337, + "epoch": 0.13, + "grad_norm": 5.3125, + "learning_rate": 4.953517346677519e-06, + "log_odds": 4.379216194152832, + "log_odds_ratio": -0.11418179422616959, + "loss": 0.3128, + "rejected_geometric_mean": -5.107399940490723, + "step": 506 + }, + { + "chosen_geometric_mean": -1.3181315660476685, + "epoch": 0.13, + "grad_norm": 21.5, + "learning_rate": 4.95333031361025e-06, + "log_odds": 1.3012993335723877, + "log_odds_ratio": -0.33601394295692444, + "loss": 0.3956, + "rejected_geometric_mean": -2.449592113494873, + "step": 507 + }, + { + "chosen_geometric_mean": -1.165719747543335, + "epoch": 0.13, + "grad_norm": 14.0, + "learning_rate": 4.953142908559216e-06, + "log_odds": 1.0373165607452393, + "log_odds_ratio": -0.3506828844547272, + "loss": 0.3376, + "rejected_geometric_mean": -1.9807579517364502, + "step": 508 + }, + { + "chosen_geometric_mean": -1.2068243026733398, + "epoch": 0.13, + "grad_norm": 2.796875, + "learning_rate": 4.95295513155283e-06, + "log_odds": 0.9415046572685242, + "log_odds_ratio": -0.5003578066825867, + "loss": 0.3309, + "rejected_geometric_mean": -2.0549471378326416, + "step": 509 + }, + { + "chosen_geometric_mean": -1.019629955291748, + "epoch": 0.13, + "grad_norm": 16.25, + "learning_rate": 4.952766982619567e-06, + "log_odds": -0.12060505151748657, + "log_odds_ratio": -0.7686488032341003, + "loss": 0.3323, + "rejected_geometric_mean": -0.9404113292694092, + "step": 510 + }, + { + "chosen_geometric_mean": -1.1947892904281616, + "epoch": 0.13, + "grad_norm": 3.1875, + "learning_rate": 4.952578461787954e-06, + "log_odds": 0.3177875280380249, + "log_odds_ratio": -0.5781277418136597, + "loss": 0.343, + "rejected_geometric_mean": -1.4913175106048584, + "step": 511 + }, + { + "chosen_geometric_mean": -1.2517144680023193, + "epoch": 0.13, + "grad_norm": 7.375, + "learning_rate": 4.9523895690865735e-06, + "log_odds": 0.9499645829200745, + "log_odds_ratio": -0.48033764958381653, + "loss": 0.3388, + "rejected_geometric_mean": -2.098667860031128, + "step": 512 + }, + { + "chosen_geometric_mean": -1.1467058658599854, + "epoch": 0.13, + "grad_norm": 2.515625, + "learning_rate": 4.952200304544068e-06, + "log_odds": 0.25609493255615234, + "log_odds_ratio": -0.5762277245521545, + "loss": 0.3617, + "rejected_geometric_mean": -1.3272504806518555, + "step": 513 + }, + { + "chosen_geometric_mean": -1.155045747756958, + "epoch": 0.13, + "grad_norm": 3.96875, + "learning_rate": 4.9520106681891335e-06, + "log_odds": 0.9375984072685242, + "log_odds_ratio": -0.3884119987487793, + "loss": 0.3419, + "rejected_geometric_mean": -1.8946990966796875, + "step": 514 + }, + { + "chosen_geometric_mean": -1.2969579696655273, + "epoch": 0.13, + "grad_norm": 4.25, + "learning_rate": 4.951820660050524e-06, + "log_odds": 2.648833990097046, + "log_odds_ratio": -0.3559073209762573, + "loss": 0.301, + "rejected_geometric_mean": -3.800513505935669, + "step": 515 + }, + { + "chosen_geometric_mean": -1.2103596925735474, + "epoch": 0.13, + "grad_norm": 3.046875, + "learning_rate": 4.951630280157049e-06, + "log_odds": 0.9189144968986511, + "log_odds_ratio": -0.41150587797164917, + "loss": 0.3397, + "rejected_geometric_mean": -1.9900197982788086, + "step": 516 + }, + { + "chosen_geometric_mean": -1.201521873474121, + "epoch": 0.13, + "grad_norm": 17.625, + "learning_rate": 4.951439528537576e-06, + "log_odds": 1.2142170667648315, + "log_odds_ratio": -0.48174765706062317, + "loss": 0.3762, + "rejected_geometric_mean": -2.3116812705993652, + "step": 517 + }, + { + "chosen_geometric_mean": -1.1653648614883423, + "epoch": 0.13, + "grad_norm": 8.375, + "learning_rate": 4.951248405221025e-06, + "log_odds": 1.621659517288208, + "log_odds_ratio": -0.3201407194137573, + "loss": 0.2768, + "rejected_geometric_mean": -2.582150936126709, + "step": 518 + }, + { + "chosen_geometric_mean": -1.0534493923187256, + "epoch": 0.13, + "grad_norm": 5.0, + "learning_rate": 4.951056910236377e-06, + "log_odds": 1.0535924434661865, + "log_odds_ratio": -0.34663066267967224, + "loss": 0.3656, + "rejected_geometric_mean": -1.8693022727966309, + "step": 519 + }, + { + "chosen_geometric_mean": -1.252073884010315, + "epoch": 0.13, + "grad_norm": 9.6875, + "learning_rate": 4.9508650436126655e-06, + "log_odds": 1.724299430847168, + "log_odds_ratio": -0.43496859073638916, + "loss": 0.34, + "rejected_geometric_mean": -2.815548896789551, + "step": 520 + }, + { + "chosen_geometric_mean": -1.381784200668335, + "epoch": 0.13, + "grad_norm": 6.3125, + "learning_rate": 4.950672805378984e-06, + "log_odds": 1.4490097761154175, + "log_odds_ratio": -0.31341552734375, + "loss": 0.3177, + "rejected_geometric_mean": -2.599897861480713, + "step": 521 + }, + { + "chosen_geometric_mean": -1.270157814025879, + "epoch": 0.13, + "grad_norm": 7.65625, + "learning_rate": 4.9504801955644786e-06, + "log_odds": 1.7246959209442139, + "log_odds_ratio": -0.2984834313392639, + "loss": 0.3107, + "rejected_geometric_mean": -2.7942705154418945, + "step": 522 + }, + { + "chosen_geometric_mean": -1.310754656791687, + "epoch": 0.13, + "grad_norm": 6.96875, + "learning_rate": 4.950287214198355e-06, + "log_odds": 2.196049451828003, + "log_odds_ratio": -0.2740892171859741, + "loss": 0.2947, + "rejected_geometric_mean": -3.2805376052856445, + "step": 523 + }, + { + "chosen_geometric_mean": -1.1810173988342285, + "epoch": 0.13, + "grad_norm": 26.625, + "learning_rate": 4.950093861309873e-06, + "log_odds": 2.2174830436706543, + "log_odds_ratio": -0.24358917772769928, + "loss": 0.3071, + "rejected_geometric_mean": -3.126640558242798, + "step": 524 + }, + { + "chosen_geometric_mean": -1.1073063611984253, + "epoch": 0.13, + "grad_norm": 32.5, + "learning_rate": 4.94990013692835e-06, + "log_odds": 1.081723928451538, + "log_odds_ratio": -0.3017794191837311, + "loss": 0.3331, + "rejected_geometric_mean": -1.9241373538970947, + "step": 525 + }, + { + "chosen_geometric_mean": -0.99483323097229, + "epoch": 0.13, + "grad_norm": 7.34375, + "learning_rate": 4.949706041083159e-06, + "log_odds": 1.6411144733428955, + "log_odds_ratio": -0.29427823424339294, + "loss": 0.29, + "rejected_geometric_mean": -2.3684747219085693, + "step": 526 + }, + { + "chosen_geometric_mean": -1.148667335510254, + "epoch": 0.13, + "grad_norm": 20.75, + "learning_rate": 4.949511573803729e-06, + "log_odds": 1.9749730825424194, + "log_odds_ratio": -0.22222411632537842, + "loss": 0.3093, + "rejected_geometric_mean": -2.849231243133545, + "step": 527 + }, + { + "chosen_geometric_mean": -1.2233940362930298, + "epoch": 0.13, + "grad_norm": 7.96875, + "learning_rate": 4.949316735119547e-06, + "log_odds": 1.5699130296707153, + "log_odds_ratio": -0.26434916257858276, + "loss": 0.3293, + "rejected_geometric_mean": -2.555938243865967, + "step": 528 + }, + { + "chosen_geometric_mean": -1.0195504426956177, + "epoch": 0.13, + "grad_norm": 3.4375, + "learning_rate": 4.949121525060155e-06, + "log_odds": 0.8635275363922119, + "log_odds_ratio": -0.4596068561077118, + "loss": 0.2782, + "rejected_geometric_mean": -1.7172565460205078, + "step": 529 + }, + { + "chosen_geometric_mean": -1.001889705657959, + "epoch": 0.13, + "grad_norm": 2.734375, + "learning_rate": 4.948925943655152e-06, + "log_odds": -0.05764288455247879, + "log_odds_ratio": -0.7258678674697876, + "loss": 0.3623, + "rejected_geometric_mean": -0.9582545757293701, + "step": 530 + }, + { + "chosen_geometric_mean": -1.1466649770736694, + "epoch": 0.13, + "grad_norm": 3.671875, + "learning_rate": 4.94872999093419e-06, + "log_odds": 2.544466257095337, + "log_odds_ratio": -0.3691500127315521, + "loss": 0.3403, + "rejected_geometric_mean": -3.513035535812378, + "step": 531 + }, + { + "chosen_geometric_mean": -0.916832685470581, + "epoch": 0.13, + "grad_norm": 2.859375, + "learning_rate": 4.9485336669269845e-06, + "log_odds": 3.5595905780792236, + "log_odds_ratio": -0.15010985732078552, + "loss": 0.3094, + "rejected_geometric_mean": -4.0468549728393555, + "step": 532 + }, + { + "chosen_geometric_mean": -1.0615043640136719, + "epoch": 0.13, + "grad_norm": 29.875, + "learning_rate": 4.9483369716633e-06, + "log_odds": 1.1829904317855835, + "log_odds_ratio": -0.46455904841423035, + "loss": 0.3262, + "rejected_geometric_mean": -2.14852237701416, + "step": 533 + }, + { + "chosen_geometric_mean": -1.4752404689788818, + "epoch": 0.13, + "grad_norm": 5.65625, + "learning_rate": 4.94813990517296e-06, + "log_odds": 0.9211012125015259, + "log_odds_ratio": -0.36583447456359863, + "loss": 0.3425, + "rejected_geometric_mean": -2.278273820877075, + "step": 534 + }, + { + "chosen_geometric_mean": -1.310723066329956, + "epoch": 0.13, + "grad_norm": 3.09375, + "learning_rate": 4.947942467485846e-06, + "log_odds": 0.45693787932395935, + "log_odds_ratio": -0.55035400390625, + "loss": 0.355, + "rejected_geometric_mean": -1.6633479595184326, + "step": 535 + }, + { + "chosen_geometric_mean": -1.012882947921753, + "epoch": 0.13, + "grad_norm": 4.65625, + "learning_rate": 4.947744658631894e-06, + "log_odds": 2.3130548000335693, + "log_odds_ratio": -0.3062552213668823, + "loss": 0.27, + "rejected_geometric_mean": -3.1006693840026855, + "step": 536 + }, + { + "chosen_geometric_mean": -1.0960420370101929, + "epoch": 0.13, + "grad_norm": 4.3125, + "learning_rate": 4.947546478641096e-06, + "log_odds": 1.1135135889053345, + "log_odds_ratio": -0.39304298162460327, + "loss": 0.3448, + "rejected_geometric_mean": -2.0221774578094482, + "step": 537 + }, + { + "chosen_geometric_mean": -1.1089789867401123, + "epoch": 0.13, + "grad_norm": 5.8125, + "learning_rate": 4.947347927543501e-06, + "log_odds": 1.022849202156067, + "log_odds_ratio": -0.4672106206417084, + "loss": 0.3561, + "rejected_geometric_mean": -1.9931570291519165, + "step": 538 + }, + { + "chosen_geometric_mean": -0.936589241027832, + "epoch": 0.13, + "grad_norm": 9.25, + "learning_rate": 4.947149005369215e-06, + "log_odds": 1.152172327041626, + "log_odds_ratio": -0.294159471988678, + "loss": 0.2908, + "rejected_geometric_mean": -1.770085334777832, + "step": 539 + }, + { + "chosen_geometric_mean": -1.0537922382354736, + "epoch": 0.13, + "grad_norm": 4.3125, + "learning_rate": 4.9469497121483976e-06, + "log_odds": 1.169294834136963, + "log_odds_ratio": -0.3068423271179199, + "loss": 0.288, + "rejected_geometric_mean": -1.9298324584960938, + "step": 540 + }, + { + "chosen_geometric_mean": -1.1859307289123535, + "epoch": 0.13, + "grad_norm": 5.34375, + "learning_rate": 4.946750047911268e-06, + "log_odds": 4.102444648742676, + "log_odds_ratio": -0.22046740353107452, + "loss": 0.365, + "rejected_geometric_mean": -5.01584529876709, + "step": 541 + }, + { + "chosen_geometric_mean": -1.0856598615646362, + "epoch": 0.13, + "grad_norm": 42.75, + "learning_rate": 4.9465500126881e-06, + "log_odds": 1.639519453048706, + "log_odds_ratio": -0.47531428933143616, + "loss": 0.4, + "rejected_geometric_mean": -2.5462589263916016, + "step": 542 + }, + { + "chosen_geometric_mean": -1.1547929048538208, + "epoch": 0.13, + "grad_norm": 8.25, + "learning_rate": 4.946349606509222e-06, + "log_odds": 2.81402325630188, + "log_odds_ratio": -0.2132764756679535, + "loss": 0.3106, + "rejected_geometric_mean": -3.6985082626342773, + "step": 543 + }, + { + "chosen_geometric_mean": -1.0585479736328125, + "epoch": 0.13, + "grad_norm": 16.375, + "learning_rate": 4.946148829405023e-06, + "log_odds": 1.9207202196121216, + "log_odds_ratio": -0.31044235825538635, + "loss": 0.3161, + "rejected_geometric_mean": -2.7274084091186523, + "step": 544 + }, + { + "chosen_geometric_mean": -1.255150318145752, + "epoch": 0.13, + "grad_norm": 17.125, + "learning_rate": 4.945947681405944e-06, + "log_odds": 3.2360024452209473, + "log_odds_ratio": -0.11247344315052032, + "loss": 0.3245, + "rejected_geometric_mean": -4.169225692749023, + "step": 545 + }, + { + "chosen_geometric_mean": -1.4015450477600098, + "epoch": 0.14, + "grad_norm": 12.0625, + "learning_rate": 4.945746162542483e-06, + "log_odds": 0.6469626426696777, + "log_odds_ratio": -0.4471096992492676, + "loss": 0.2859, + "rejected_geometric_mean": -1.9392156600952148, + "step": 546 + }, + { + "chosen_geometric_mean": -1.2645870447158813, + "epoch": 0.14, + "grad_norm": 5.46875, + "learning_rate": 4.9455442728451974e-06, + "log_odds": 0.7874064445495605, + "log_odds_ratio": -0.4313787519931793, + "loss": 0.3358, + "rejected_geometric_mean": -1.9201956987380981, + "step": 547 + }, + { + "chosen_geometric_mean": -1.1586618423461914, + "epoch": 0.14, + "grad_norm": 4.15625, + "learning_rate": 4.945342012344697e-06, + "log_odds": 0.11291198432445526, + "log_odds_ratio": -0.7178196907043457, + "loss": 0.3502, + "rejected_geometric_mean": -1.2965553998947144, + "step": 548 + }, + { + "chosen_geometric_mean": -1.1893110275268555, + "epoch": 0.14, + "grad_norm": 4.625, + "learning_rate": 4.9451393810716504e-06, + "log_odds": 0.569747269153595, + "log_odds_ratio": -0.4526485502719879, + "loss": 0.3132, + "rejected_geometric_mean": -1.6043375730514526, + "step": 549 + }, + { + "chosen_geometric_mean": -1.0384970903396606, + "epoch": 0.14, + "grad_norm": 5.71875, + "learning_rate": 4.94493637905678e-06, + "log_odds": 2.0260026454925537, + "log_odds_ratio": -0.2562410831451416, + "loss": 0.2632, + "rejected_geometric_mean": -2.741060733795166, + "step": 550 + }, + { + "chosen_geometric_mean": -1.0830951929092407, + "epoch": 0.14, + "grad_norm": 4.4375, + "learning_rate": 4.944733006330867e-06, + "log_odds": 2.0989365577697754, + "log_odds_ratio": -0.2719537615776062, + "loss": 0.2852, + "rejected_geometric_mean": -2.8852145671844482, + "step": 551 + }, + { + "chosen_geometric_mean": -1.060459852218628, + "epoch": 0.14, + "grad_norm": 3.78125, + "learning_rate": 4.944529262924747e-06, + "log_odds": 0.4208890199661255, + "log_odds_ratio": -0.524484395980835, + "loss": 0.3508, + "rejected_geometric_mean": -1.3917224407196045, + "step": 552 + }, + { + "chosen_geometric_mean": -1.1661341190338135, + "epoch": 0.14, + "grad_norm": 6.40625, + "learning_rate": 4.944325148869312e-06, + "log_odds": 2.1474432945251465, + "log_odds_ratio": -0.20258887112140656, + "loss": 0.3463, + "rejected_geometric_mean": -2.9764137268066406, + "step": 553 + }, + { + "chosen_geometric_mean": -1.1056315898895264, + "epoch": 0.14, + "grad_norm": 10.4375, + "learning_rate": 4.944120664195511e-06, + "log_odds": 1.3768994808197021, + "log_odds_ratio": -0.2900029122829437, + "loss": 0.3334, + "rejected_geometric_mean": -2.2585415840148926, + "step": 554 + }, + { + "chosen_geometric_mean": -1.1164820194244385, + "epoch": 0.14, + "grad_norm": 43.75, + "learning_rate": 4.943915808934349e-06, + "log_odds": 2.814371109008789, + "log_odds_ratio": -0.16763362288475037, + "loss": 0.355, + "rejected_geometric_mean": -3.606975793838501, + "step": 555 + }, + { + "chosen_geometric_mean": -1.1994050741195679, + "epoch": 0.14, + "grad_norm": 12.6875, + "learning_rate": 4.943710583116888e-06, + "log_odds": 3.7204790115356445, + "log_odds_ratio": -0.1726079285144806, + "loss": 0.3376, + "rejected_geometric_mean": -4.655621528625488, + "step": 556 + }, + { + "chosen_geometric_mean": -1.073981761932373, + "epoch": 0.14, + "grad_norm": 15.125, + "learning_rate": 4.943504986774243e-06, + "log_odds": 3.517242908477783, + "log_odds_ratio": -0.23396025598049164, + "loss": 0.3632, + "rejected_geometric_mean": -4.257510185241699, + "step": 557 + }, + { + "chosen_geometric_mean": -1.1498448848724365, + "epoch": 0.14, + "grad_norm": 14.6875, + "learning_rate": 4.943299019937587e-06, + "log_odds": 1.104701042175293, + "log_odds_ratio": -0.38833409547805786, + "loss": 0.3344, + "rejected_geometric_mean": -2.078232526779175, + "step": 558 + }, + { + "chosen_geometric_mean": -1.1574496030807495, + "epoch": 0.14, + "grad_norm": 11.375, + "learning_rate": 4.943092682638153e-06, + "log_odds": 1.0442607402801514, + "log_odds_ratio": -0.40034082531929016, + "loss": 0.3132, + "rejected_geometric_mean": -2.031172752380371, + "step": 559 + }, + { + "chosen_geometric_mean": -1.317558765411377, + "epoch": 0.14, + "grad_norm": 4.15625, + "learning_rate": 4.942885974907223e-06, + "log_odds": 0.6959587931632996, + "log_odds_ratio": -0.4283686578273773, + "loss": 0.2797, + "rejected_geometric_mean": -1.888674020767212, + "step": 560 + }, + { + "chosen_geometric_mean": -1.3179423809051514, + "epoch": 0.14, + "grad_norm": 2.640625, + "learning_rate": 4.942678896776141e-06, + "log_odds": 2.488729476928711, + "log_odds_ratio": -0.3986026644706726, + "loss": 0.3252, + "rejected_geometric_mean": -3.6748318672180176, + "step": 561 + }, + { + "chosen_geometric_mean": -1.1277954578399658, + "epoch": 0.14, + "grad_norm": 3.9375, + "learning_rate": 4.942471448276304e-06, + "log_odds": 0.5837129354476929, + "log_odds_ratio": -0.47161865234375, + "loss": 0.367, + "rejected_geometric_mean": -1.5429461002349854, + "step": 562 + }, + { + "chosen_geometric_mean": -0.9291600584983826, + "epoch": 0.14, + "grad_norm": 3.390625, + "learning_rate": 4.9422636294391664e-06, + "log_odds": 0.09746754914522171, + "log_odds_ratio": -0.6551436185836792, + "loss": 0.3841, + "rejected_geometric_mean": -0.97902911901474, + "step": 563 + }, + { + "chosen_geometric_mean": -1.2653090953826904, + "epoch": 0.14, + "grad_norm": 3.109375, + "learning_rate": 4.942055440296239e-06, + "log_odds": 0.6463981866836548, + "log_odds_ratio": -0.4949810206890106, + "loss": 0.3356, + "rejected_geometric_mean": -1.7992373704910278, + "step": 564 + }, + { + "chosen_geometric_mean": -0.9897299408912659, + "epoch": 0.14, + "grad_norm": 2.65625, + "learning_rate": 4.9418468808790886e-06, + "log_odds": 1.587507724761963, + "log_odds_ratio": -0.4242613911628723, + "loss": 0.2836, + "rejected_geometric_mean": -2.4314160346984863, + "step": 565 + }, + { + "chosen_geometric_mean": -1.3094346523284912, + "epoch": 0.14, + "grad_norm": 3.109375, + "learning_rate": 4.941637951219337e-06, + "log_odds": 0.13066279888153076, + "log_odds_ratio": -0.6380438208580017, + "loss": 0.3684, + "rejected_geometric_mean": -1.4122776985168457, + "step": 566 + }, + { + "chosen_geometric_mean": -1.223865270614624, + "epoch": 0.14, + "grad_norm": 3.34375, + "learning_rate": 4.941428651348663e-06, + "log_odds": 3.1774866580963135, + "log_odds_ratio": -0.2995169460773468, + "loss": 0.3079, + "rejected_geometric_mean": -4.180548667907715, + "step": 567 + }, + { + "chosen_geometric_mean": -1.2642812728881836, + "epoch": 0.14, + "grad_norm": 4.4375, + "learning_rate": 4.9412189812988006e-06, + "log_odds": 6.632050514221191, + "log_odds_ratio": -0.1804676353931427, + "loss": 0.3197, + "rejected_geometric_mean": -7.596570014953613, + "step": 568 + }, + { + "chosen_geometric_mean": -1.0667033195495605, + "epoch": 0.14, + "grad_norm": 6.78125, + "learning_rate": 4.941008941101543e-06, + "log_odds": 5.133482933044434, + "log_odds_ratio": -0.35958945751190186, + "loss": 0.3127, + "rejected_geometric_mean": -5.911395072937012, + "step": 569 + }, + { + "chosen_geometric_mean": -1.2312517166137695, + "epoch": 0.14, + "grad_norm": 72.5, + "learning_rate": 4.9407985307887365e-06, + "log_odds": 6.261143207550049, + "log_odds_ratio": -0.14690479636192322, + "loss": 0.5562, + "rejected_geometric_mean": -7.150456428527832, + "step": 570 + }, + { + "chosen_geometric_mean": -1.566796064376831, + "epoch": 0.14, + "grad_norm": 102.5, + "learning_rate": 4.9405877503922826e-06, + "log_odds": 4.581361770629883, + "log_odds_ratio": -0.3149906098842621, + "loss": 0.3987, + "rejected_geometric_mean": -6.036029815673828, + "step": 571 + }, + { + "chosen_geometric_mean": -3.813931941986084, + "epoch": 0.14, + "grad_norm": 87.0, + "learning_rate": 4.940376599944143e-06, + "log_odds": 2.6471328735351562, + "log_odds_ratio": -0.2750900983810425, + "loss": 0.6347, + "rejected_geometric_mean": -6.428035736083984, + "step": 572 + }, + { + "chosen_geometric_mean": -2.1590933799743652, + "epoch": 0.14, + "grad_norm": 112.5, + "learning_rate": 4.940165079476332e-06, + "log_odds": 3.6791770458221436, + "log_odds_ratio": -0.23757266998291016, + "loss": 0.4918, + "rejected_geometric_mean": -5.679369926452637, + "step": 573 + }, + { + "chosen_geometric_mean": -1.1580533981323242, + "epoch": 0.14, + "grad_norm": 47.0, + "learning_rate": 4.939953189020922e-06, + "log_odds": 2.8024516105651855, + "log_odds_ratio": -0.17274509370326996, + "loss": 0.5479, + "rejected_geometric_mean": -3.665109395980835, + "step": 574 + }, + { + "chosen_geometric_mean": -1.2593566179275513, + "epoch": 0.14, + "grad_norm": 32.25, + "learning_rate": 4.93974092861004e-06, + "log_odds": 2.408761978149414, + "log_odds_ratio": -0.15563884377479553, + "loss": 0.4109, + "rejected_geometric_mean": -3.380682945251465, + "step": 575 + }, + { + "chosen_geometric_mean": -1.1643856763839722, + "epoch": 0.14, + "grad_norm": 9.1875, + "learning_rate": 4.939528298275868e-06, + "log_odds": 0.8568911552429199, + "log_odds_ratio": -0.37809881567955017, + "loss": 0.3157, + "rejected_geometric_mean": -1.8261218070983887, + "step": 576 + }, + { + "chosen_geometric_mean": -1.0068252086639404, + "epoch": 0.14, + "grad_norm": 5.125, + "learning_rate": 4.93931529805065e-06, + "log_odds": 1.0572261810302734, + "log_odds_ratio": -0.3394325375556946, + "loss": 0.3134, + "rejected_geometric_mean": -1.7813082933425903, + "step": 577 + }, + { + "chosen_geometric_mean": -0.9744901657104492, + "epoch": 0.14, + "grad_norm": 4.75, + "learning_rate": 4.939101927966679e-06, + "log_odds": 1.294965147972107, + "log_odds_ratio": -0.43967947363853455, + "loss": 0.3674, + "rejected_geometric_mean": -2.0858917236328125, + "step": 578 + }, + { + "chosen_geometric_mean": -0.9697179198265076, + "epoch": 0.14, + "grad_norm": 3.328125, + "learning_rate": 4.9388881880563076e-06, + "log_odds": 0.746321976184845, + "log_odds_ratio": -0.4464353322982788, + "loss": 0.3391, + "rejected_geometric_mean": -1.5077898502349854, + "step": 579 + }, + { + "chosen_geometric_mean": -1.2242717742919922, + "epoch": 0.14, + "grad_norm": 2.59375, + "learning_rate": 4.938674078351944e-06, + "log_odds": 0.9001191258430481, + "log_odds_ratio": -0.38177061080932617, + "loss": 0.3124, + "rejected_geometric_mean": -1.963480830192566, + "step": 580 + }, + { + "chosen_geometric_mean": -1.2526545524597168, + "epoch": 0.14, + "grad_norm": 3.46875, + "learning_rate": 4.938459598886052e-06, + "log_odds": 0.6538909673690796, + "log_odds_ratio": -0.45202046632766724, + "loss": 0.3404, + "rejected_geometric_mean": -1.770172119140625, + "step": 581 + }, + { + "chosen_geometric_mean": -1.1583373546600342, + "epoch": 0.14, + "grad_norm": 11.25, + "learning_rate": 4.938244749691153e-06, + "log_odds": 0.20650914311408997, + "log_odds_ratio": -0.6128801107406616, + "loss": 0.3383, + "rejected_geometric_mean": -1.306961178779602, + "step": 582 + }, + { + "chosen_geometric_mean": -1.153109073638916, + "epoch": 0.14, + "grad_norm": 2.40625, + "learning_rate": 4.938029530799821e-06, + "log_odds": 0.07722117751836777, + "log_odds_ratio": -0.6567209362983704, + "loss": 0.3116, + "rejected_geometric_mean": -1.2099683284759521, + "step": 583 + }, + { + "chosen_geometric_mean": -0.945954442024231, + "epoch": 0.14, + "grad_norm": 3.8125, + "learning_rate": 4.937813942244691e-06, + "log_odds": 1.7131634950637817, + "log_odds_ratio": -0.30342966318130493, + "loss": 0.3718, + "rejected_geometric_mean": -2.3022091388702393, + "step": 584 + }, + { + "chosen_geometric_mean": -0.9816660284996033, + "epoch": 0.14, + "grad_norm": 2.65625, + "learning_rate": 4.937597984058451e-06, + "log_odds": 0.4155740737915039, + "log_odds_ratio": -0.5929270386695862, + "loss": 0.2985, + "rejected_geometric_mean": -1.296079158782959, + "step": 585 + }, + { + "chosen_geometric_mean": -1.1435520648956299, + "epoch": 0.15, + "grad_norm": 2.84375, + "learning_rate": 4.937381656273844e-06, + "log_odds": 0.46001148223876953, + "log_odds_ratio": -0.49510669708251953, + "loss": 0.3217, + "rejected_geometric_mean": -1.4713937044143677, + "step": 586 + }, + { + "chosen_geometric_mean": -1.244260311126709, + "epoch": 0.15, + "grad_norm": 4.34375, + "learning_rate": 4.937164958923672e-06, + "log_odds": 1.4755252599716187, + "log_odds_ratio": -0.33428049087524414, + "loss": 0.3166, + "rejected_geometric_mean": -2.536851406097412, + "step": 587 + }, + { + "chosen_geometric_mean": -1.2050803899765015, + "epoch": 0.15, + "grad_norm": 6.25, + "learning_rate": 4.9369478920407895e-06, + "log_odds": 1.771841049194336, + "log_odds_ratio": -0.4743456542491913, + "loss": 0.3239, + "rejected_geometric_mean": -2.852112293243408, + "step": 588 + }, + { + "chosen_geometric_mean": -1.141899585723877, + "epoch": 0.15, + "grad_norm": 69.5, + "learning_rate": 4.9367304556581104e-06, + "log_odds": 0.4203386902809143, + "log_odds_ratio": -0.5116568207740784, + "loss": 0.4401, + "rejected_geometric_mean": -1.4529502391815186, + "step": 589 + }, + { + "chosen_geometric_mean": -1.2482426166534424, + "epoch": 0.15, + "grad_norm": 4.09375, + "learning_rate": 4.936512649808604e-06, + "log_odds": 1.5512897968292236, + "log_odds_ratio": -0.4023764729499817, + "loss": 0.3357, + "rejected_geometric_mean": -2.669492244720459, + "step": 590 + }, + { + "chosen_geometric_mean": -1.3481589555740356, + "epoch": 0.15, + "grad_norm": 65.0, + "learning_rate": 4.936294474525293e-06, + "log_odds": 3.585012197494507, + "log_odds_ratio": -0.25374338030815125, + "loss": 0.4521, + "rejected_geometric_mean": -4.605930805206299, + "step": 591 + }, + { + "chosen_geometric_mean": -1.2163002490997314, + "epoch": 0.15, + "grad_norm": 3.0625, + "learning_rate": 4.93607592984126e-06, + "log_odds": 1.3932929039001465, + "log_odds_ratio": -0.35444825887680054, + "loss": 0.3448, + "rejected_geometric_mean": -2.4272947311401367, + "step": 592 + }, + { + "chosen_geometric_mean": -1.1002438068389893, + "epoch": 0.15, + "grad_norm": 6.6875, + "learning_rate": 4.93585701578964e-06, + "log_odds": 1.2772139310836792, + "log_odds_ratio": -0.4087262749671936, + "loss": 0.338, + "rejected_geometric_mean": -2.1947133541107178, + "step": 593 + }, + { + "chosen_geometric_mean": -0.9912674427032471, + "epoch": 0.15, + "grad_norm": 4.65625, + "learning_rate": 4.935637732403626e-06, + "log_odds": 2.2547476291656494, + "log_odds_ratio": -0.30830541253089905, + "loss": 0.2975, + "rejected_geometric_mean": -2.980926036834717, + "step": 594 + }, + { + "chosen_geometric_mean": -1.2481845617294312, + "epoch": 0.15, + "grad_norm": 4.5, + "learning_rate": 4.935418079716467e-06, + "log_odds": 0.32861873507499695, + "log_odds_ratio": -0.5499800443649292, + "loss": 0.3577, + "rejected_geometric_mean": -1.4922150373458862, + "step": 595 + }, + { + "chosen_geometric_mean": -1.0695286989212036, + "epoch": 0.15, + "grad_norm": 8.75, + "learning_rate": 4.935198057761468e-06, + "log_odds": 0.8328912854194641, + "log_odds_ratio": -0.5297352075576782, + "loss": 0.4084, + "rejected_geometric_mean": -1.8185776472091675, + "step": 596 + }, + { + "chosen_geometric_mean": -1.3071262836456299, + "epoch": 0.15, + "grad_norm": 17.625, + "learning_rate": 4.93497766657199e-06, + "log_odds": 4.215099334716797, + "log_odds_ratio": -0.25747150182724, + "loss": 0.3687, + "rejected_geometric_mean": -5.332474708557129, + "step": 597 + }, + { + "chosen_geometric_mean": -1.2103729248046875, + "epoch": 0.15, + "grad_norm": 2.828125, + "learning_rate": 4.934756906181447e-06, + "log_odds": 2.986933708190918, + "log_odds_ratio": -0.19699111580848694, + "loss": 0.2847, + "rejected_geometric_mean": -3.9358880519866943, + "step": 598 + }, + { + "chosen_geometric_mean": -1.2167236804962158, + "epoch": 0.15, + "grad_norm": 5.53125, + "learning_rate": 4.934535776623315e-06, + "log_odds": 0.48647379875183105, + "log_odds_ratio": -0.51619952917099, + "loss": 0.3026, + "rejected_geometric_mean": -1.6030222177505493, + "step": 599 + }, + { + "chosen_geometric_mean": -1.1313843727111816, + "epoch": 0.15, + "grad_norm": 6.1875, + "learning_rate": 4.934314277931119e-06, + "log_odds": 1.3503139019012451, + "log_odds_ratio": -0.26263824105262756, + "loss": 0.3286, + "rejected_geometric_mean": -2.2317049503326416, + "step": 600 + }, + { + "chosen_geometric_mean": -1.1924479007720947, + "epoch": 0.15, + "grad_norm": 9.125, + "learning_rate": 4.934092410138447e-06, + "log_odds": 1.509590983390808, + "log_odds_ratio": -0.3655097484588623, + "loss": 0.3035, + "rejected_geometric_mean": -2.532362699508667, + "step": 601 + }, + { + "chosen_geometric_mean": -1.385690450668335, + "epoch": 0.15, + "grad_norm": 15.6875, + "learning_rate": 4.933870173278938e-06, + "log_odds": 1.455269694328308, + "log_odds_ratio": -0.3769083023071289, + "loss": 0.3343, + "rejected_geometric_mean": -2.678271532058716, + "step": 602 + }, + { + "chosen_geometric_mean": -1.252881407737732, + "epoch": 0.15, + "grad_norm": 9.875, + "learning_rate": 4.933647567386287e-06, + "log_odds": 1.056508183479309, + "log_odds_ratio": -0.41685572266578674, + "loss": 0.3683, + "rejected_geometric_mean": -2.143765449523926, + "step": 603 + }, + { + "chosen_geometric_mean": -1.136393427848816, + "epoch": 0.15, + "grad_norm": 7.90625, + "learning_rate": 4.933424592494248e-06, + "log_odds": 1.179059386253357, + "log_odds_ratio": -0.3788832426071167, + "loss": 0.3586, + "rejected_geometric_mean": -2.1158313751220703, + "step": 604 + }, + { + "chosen_geometric_mean": -1.227293848991394, + "epoch": 0.15, + "grad_norm": 4.84375, + "learning_rate": 4.93320124863663e-06, + "log_odds": 2.066295862197876, + "log_odds_ratio": -0.3954803943634033, + "loss": 0.2795, + "rejected_geometric_mean": -3.1209371089935303, + "step": 605 + }, + { + "chosen_geometric_mean": -1.0798832178115845, + "epoch": 0.15, + "grad_norm": 3.390625, + "learning_rate": 4.932977535847295e-06, + "log_odds": 1.6635841131210327, + "log_odds_ratio": -0.35401833057403564, + "loss": 0.301, + "rejected_geometric_mean": -2.514594793319702, + "step": 606 + }, + { + "chosen_geometric_mean": -1.1426081657409668, + "epoch": 0.15, + "grad_norm": 4.65625, + "learning_rate": 4.932753454160165e-06, + "log_odds": 2.095040798187256, + "log_odds_ratio": -0.22393864393234253, + "loss": 0.3403, + "rejected_geometric_mean": -2.941660165786743, + "step": 607 + }, + { + "chosen_geometric_mean": -1.0832464694976807, + "epoch": 0.15, + "grad_norm": 2.671875, + "learning_rate": 4.932529003609217e-06, + "log_odds": 0.2812510132789612, + "log_odds_ratio": -0.5643763542175293, + "loss": 0.3309, + "rejected_geometric_mean": -1.2810194492340088, + "step": 608 + }, + { + "chosen_geometric_mean": -1.146684169769287, + "epoch": 0.15, + "grad_norm": 15.625, + "learning_rate": 4.9323041842284805e-06, + "log_odds": 2.5853271484375, + "log_odds_ratio": -0.4317861795425415, + "loss": 0.3149, + "rejected_geometric_mean": -3.6177632808685303, + "step": 609 + }, + { + "chosen_geometric_mean": -1.168164610862732, + "epoch": 0.15, + "grad_norm": 32.25, + "learning_rate": 4.932078996052045e-06, + "log_odds": 1.4522439241409302, + "log_odds_ratio": -0.3845398724079132, + "loss": 0.3061, + "rejected_geometric_mean": -2.474417209625244, + "step": 610 + }, + { + "chosen_geometric_mean": -1.3562694787979126, + "epoch": 0.15, + "grad_norm": 10.3125, + "learning_rate": 4.931853439114055e-06, + "log_odds": 1.5362473726272583, + "log_odds_ratio": -0.30154967308044434, + "loss": 0.3995, + "rejected_geometric_mean": -2.6720759868621826, + "step": 611 + }, + { + "chosen_geometric_mean": -1.2623178958892822, + "epoch": 0.15, + "grad_norm": 5.6875, + "learning_rate": 4.931627513448709e-06, + "log_odds": 0.7544103264808655, + "log_odds_ratio": -0.49274319410324097, + "loss": 0.3525, + "rejected_geometric_mean": -1.9242323637008667, + "step": 612 + }, + { + "chosen_geometric_mean": -1.1465717554092407, + "epoch": 0.15, + "grad_norm": 5.6875, + "learning_rate": 4.931401219090263e-06, + "log_odds": 0.35578593611717224, + "log_odds_ratio": -0.5901294350624084, + "loss": 0.322, + "rejected_geometric_mean": -1.4583985805511475, + "step": 613 + }, + { + "chosen_geometric_mean": -0.9541431665420532, + "epoch": 0.15, + "grad_norm": 2.53125, + "learning_rate": 4.931174556073029e-06, + "log_odds": 0.8725629448890686, + "log_odds_ratio": -0.43910184502601624, + "loss": 0.3036, + "rejected_geometric_mean": -1.6518226861953735, + "step": 614 + }, + { + "chosen_geometric_mean": -1.1431043148040771, + "epoch": 0.15, + "grad_norm": 2.421875, + "learning_rate": 4.930947524431376e-06, + "log_odds": 1.0245211124420166, + "log_odds_ratio": -0.4257236123085022, + "loss": 0.3079, + "rejected_geometric_mean": -2.010275363922119, + "step": 615 + }, + { + "chosen_geometric_mean": -1.098301649093628, + "epoch": 0.15, + "grad_norm": 2.515625, + "learning_rate": 4.930720124199725e-06, + "log_odds": 0.7103597521781921, + "log_odds_ratio": -0.41768065094947815, + "loss": 0.3147, + "rejected_geometric_mean": -1.6418672800064087, + "step": 616 + }, + { + "chosen_geometric_mean": -0.8899770975112915, + "epoch": 0.15, + "grad_norm": 2.375, + "learning_rate": 4.930492355412556e-06, + "log_odds": 0.5314224362373352, + "log_odds_ratio": -0.48054400086402893, + "loss": 0.3085, + "rejected_geometric_mean": -1.244566798210144, + "step": 617 + }, + { + "chosen_geometric_mean": -1.1467909812927246, + "epoch": 0.15, + "grad_norm": 3.4375, + "learning_rate": 4.930264218104405e-06, + "log_odds": 1.7401854991912842, + "log_odds_ratio": -0.46051326394081116, + "loss": 0.3638, + "rejected_geometric_mean": -2.7522146701812744, + "step": 618 + }, + { + "chosen_geometric_mean": -1.0179613828659058, + "epoch": 0.15, + "grad_norm": 2.625, + "learning_rate": 4.930035712309863e-06, + "log_odds": 0.38710159063339233, + "log_odds_ratio": -0.5296186208724976, + "loss": 0.3, + "rejected_geometric_mean": -1.2847200632095337, + "step": 619 + }, + { + "chosen_geometric_mean": -0.9826322197914124, + "epoch": 0.15, + "grad_norm": 2.25, + "learning_rate": 4.929806838063577e-06, + "log_odds": 0.26618242263793945, + "log_odds_ratio": -0.5711685419082642, + "loss": 0.2719, + "rejected_geometric_mean": -1.1538772583007812, + "step": 620 + }, + { + "chosen_geometric_mean": -1.186330795288086, + "epoch": 0.15, + "grad_norm": 3.28125, + "learning_rate": 4.929577595400248e-06, + "log_odds": 0.5430085062980652, + "log_odds_ratio": -0.4626278579235077, + "loss": 0.3607, + "rejected_geometric_mean": -1.5762014389038086, + "step": 621 + }, + { + "chosen_geometric_mean": -0.9633045792579651, + "epoch": 0.15, + "grad_norm": 5.40625, + "learning_rate": 4.929347984354637e-06, + "log_odds": 5.320793628692627, + "log_odds_ratio": -0.015341139398515224, + "loss": 0.3017, + "rejected_geometric_mean": -5.803522109985352, + "step": 622 + }, + { + "chosen_geometric_mean": -1.1912997961044312, + "epoch": 0.15, + "grad_norm": 5.84375, + "learning_rate": 4.929118004961558e-06, + "log_odds": 0.7163465023040771, + "log_odds_ratio": -0.4089818000793457, + "loss": 0.3295, + "rejected_geometric_mean": -1.751517653465271, + "step": 623 + }, + { + "chosen_geometric_mean": -1.2116789817810059, + "epoch": 0.15, + "grad_norm": 5.46875, + "learning_rate": 4.928887657255881e-06, + "log_odds": 3.52453875541687, + "log_odds_ratio": -0.12701964378356934, + "loss": 0.3467, + "rejected_geometric_mean": -4.424401760101318, + "step": 624 + }, + { + "chosen_geometric_mean": -1.2349759340286255, + "epoch": 0.15, + "grad_norm": 26.875, + "learning_rate": 4.9286569412725315e-06, + "log_odds": 1.2101330757141113, + "log_odds_ratio": -0.31378841400146484, + "loss": 0.4695, + "rejected_geometric_mean": -2.2288401126861572, + "step": 625 + }, + { + "chosen_geometric_mean": -1.3406751155853271, + "epoch": 0.15, + "grad_norm": 18.625, + "learning_rate": 4.928425857046493e-06, + "log_odds": 2.0882534980773926, + "log_odds_ratio": -0.16431476175785065, + "loss": 0.345, + "rejected_geometric_mean": -3.187391757965088, + "step": 626 + }, + { + "chosen_geometric_mean": -1.3731118440628052, + "epoch": 0.16, + "grad_norm": 50.75, + "learning_rate": 4.928194404612803e-06, + "log_odds": 3.381589889526367, + "log_odds_ratio": -0.37428566813468933, + "loss": 0.3954, + "rejected_geometric_mean": -4.592554092407227, + "step": 627 + }, + { + "chosen_geometric_mean": -1.2633951902389526, + "epoch": 0.16, + "grad_norm": 24.875, + "learning_rate": 4.927962584006555e-06, + "log_odds": 1.6302158832550049, + "log_odds_ratio": -0.28138405084609985, + "loss": 0.3501, + "rejected_geometric_mean": -2.6623518466949463, + "step": 628 + }, + { + "chosen_geometric_mean": -0.9644449949264526, + "epoch": 0.16, + "grad_norm": 2.640625, + "learning_rate": 4.927730395262898e-06, + "log_odds": 0.06474187970161438, + "log_odds_ratio": -0.6665656566619873, + "loss": 0.2943, + "rejected_geometric_mean": -1.0293834209442139, + "step": 629 + }, + { + "chosen_geometric_mean": -1.1283820867538452, + "epoch": 0.16, + "grad_norm": 20.375, + "learning_rate": 4.927497838417039e-06, + "log_odds": 2.493252992630005, + "log_odds_ratio": -0.15243253111839294, + "loss": 0.3804, + "rejected_geometric_mean": -3.296459436416626, + "step": 630 + }, + { + "chosen_geometric_mean": -0.9780951142311096, + "epoch": 0.16, + "grad_norm": 7.53125, + "learning_rate": 4.927264913504238e-06, + "log_odds": 0.5783870816230774, + "log_odds_ratio": -0.45545703172683716, + "loss": 0.3164, + "rejected_geometric_mean": -1.3773152828216553, + "step": 631 + }, + { + "chosen_geometric_mean": -1.2608938217163086, + "epoch": 0.16, + "grad_norm": 8.4375, + "learning_rate": 4.927031620559812e-06, + "log_odds": 0.7104082703590393, + "log_odds_ratio": -0.4837653636932373, + "loss": 0.3752, + "rejected_geometric_mean": -1.8533899784088135, + "step": 632 + }, + { + "chosen_geometric_mean": -1.2296490669250488, + "epoch": 0.16, + "grad_norm": 6.03125, + "learning_rate": 4.926797959619134e-06, + "log_odds": 2.5278780460357666, + "log_odds_ratio": -0.28318852186203003, + "loss": 0.3183, + "rejected_geometric_mean": -3.5444154739379883, + "step": 633 + }, + { + "chosen_geometric_mean": -1.2046613693237305, + "epoch": 0.16, + "grad_norm": 3.515625, + "learning_rate": 4.926563930717634e-06, + "log_odds": 2.18465518951416, + "log_odds_ratio": -0.16964848339557648, + "loss": 0.3605, + "rejected_geometric_mean": -3.088139057159424, + "step": 634 + }, + { + "chosen_geometric_mean": -1.1175580024719238, + "epoch": 0.16, + "grad_norm": 3.46875, + "learning_rate": 4.926329533890793e-06, + "log_odds": 0.6855787634849548, + "log_odds_ratio": -0.5123869180679321, + "loss": 0.3683, + "rejected_geometric_mean": -1.6564191579818726, + "step": 635 + }, + { + "chosen_geometric_mean": -1.1075859069824219, + "epoch": 0.16, + "grad_norm": 2.8125, + "learning_rate": 4.926094769174154e-06, + "log_odds": 0.19175277650356293, + "log_odds_ratio": -0.6263344883918762, + "loss": 0.339, + "rejected_geometric_mean": -1.2574831247329712, + "step": 636 + }, + { + "chosen_geometric_mean": -1.0195378065109253, + "epoch": 0.16, + "grad_norm": 3.890625, + "learning_rate": 4.925859636603313e-06, + "log_odds": 0.9724366664886475, + "log_odds_ratio": -0.4348229169845581, + "loss": 0.3279, + "rejected_geometric_mean": -1.829759955406189, + "step": 637 + }, + { + "chosen_geometric_mean": -1.290669560432434, + "epoch": 0.16, + "grad_norm": 3.921875, + "learning_rate": 4.92562413621392e-06, + "log_odds": 0.8520291447639465, + "log_odds_ratio": -0.4810843765735626, + "loss": 0.2968, + "rejected_geometric_mean": -2.0579912662506104, + "step": 638 + }, + { + "chosen_geometric_mean": -1.253618597984314, + "epoch": 0.16, + "grad_norm": 3.125, + "learning_rate": 4.925388268041684e-06, + "log_odds": 1.6989667415618896, + "log_odds_ratio": -0.32729947566986084, + "loss": 0.3279, + "rejected_geometric_mean": -2.694547176361084, + "step": 639 + }, + { + "chosen_geometric_mean": -1.1041059494018555, + "epoch": 0.16, + "grad_norm": 4.75, + "learning_rate": 4.925152032122368e-06, + "log_odds": 1.1776514053344727, + "log_odds_ratio": -0.45295166969299316, + "loss": 0.3101, + "rejected_geometric_mean": -2.136796474456787, + "step": 640 + }, + { + "chosen_geometric_mean": -1.195492148399353, + "epoch": 0.16, + "grad_norm": 3.015625, + "learning_rate": 4.9249154284917896e-06, + "log_odds": 0.3901822865009308, + "log_odds_ratio": -0.5498154163360596, + "loss": 0.35, + "rejected_geometric_mean": -1.5088378190994263, + "step": 641 + }, + { + "chosen_geometric_mean": -1.1190588474273682, + "epoch": 0.16, + "grad_norm": 3.59375, + "learning_rate": 4.924678457185826e-06, + "log_odds": 2.3918776512145996, + "log_odds_ratio": -0.20731452107429504, + "loss": 0.3216, + "rejected_geometric_mean": -3.226884365081787, + "step": 642 + }, + { + "chosen_geometric_mean": -1.1630418300628662, + "epoch": 0.16, + "grad_norm": 4.875, + "learning_rate": 4.924441118240406e-06, + "log_odds": 1.0910353660583496, + "log_odds_ratio": -0.35410258173942566, + "loss": 0.351, + "rejected_geometric_mean": -2.0646157264709473, + "step": 643 + }, + { + "chosen_geometric_mean": -1.1600146293640137, + "epoch": 0.16, + "grad_norm": 4.90625, + "learning_rate": 4.924203411691516e-06, + "log_odds": 1.5366616249084473, + "log_odds_ratio": -0.27935266494750977, + "loss": 0.3315, + "rejected_geometric_mean": -2.460803508758545, + "step": 644 + }, + { + "chosen_geometric_mean": -1.272174596786499, + "epoch": 0.16, + "grad_norm": 16.125, + "learning_rate": 4.9239653375751995e-06, + "log_odds": 0.9456494450569153, + "log_odds_ratio": -0.4951263964176178, + "loss": 0.3583, + "rejected_geometric_mean": -2.1144402027130127, + "step": 645 + }, + { + "chosen_geometric_mean": -1.1268517971038818, + "epoch": 0.16, + "grad_norm": 26.625, + "learning_rate": 4.923726895927552e-06, + "log_odds": 0.9238622188568115, + "log_odds_ratio": -0.37590545415878296, + "loss": 0.3404, + "rejected_geometric_mean": -1.851063847541809, + "step": 646 + }, + { + "chosen_geometric_mean": -0.9089518785476685, + "epoch": 0.16, + "grad_norm": 39.0, + "learning_rate": 4.923488086784729e-06, + "log_odds": 0.9852067232131958, + "log_odds_ratio": -0.36628687381744385, + "loss": 0.3225, + "rejected_geometric_mean": -1.6113741397857666, + "step": 647 + }, + { + "chosen_geometric_mean": -1.5045270919799805, + "epoch": 0.16, + "grad_norm": 31.875, + "learning_rate": 4.923248910182938e-06, + "log_odds": 3.4767253398895264, + "log_odds_ratio": -0.15256410837173462, + "loss": 0.4167, + "rejected_geometric_mean": -4.756143569946289, + "step": 648 + }, + { + "chosen_geometric_mean": -0.9553551077842712, + "epoch": 0.16, + "grad_norm": 30.0, + "learning_rate": 4.9230093661584456e-06, + "log_odds": 1.7027919292449951, + "log_odds_ratio": -0.31760382652282715, + "loss": 0.3967, + "rejected_geometric_mean": -2.40497088432312, + "step": 649 + }, + { + "chosen_geometric_mean": -1.2409169673919678, + "epoch": 0.16, + "grad_norm": 109.0, + "learning_rate": 4.922769454747571e-06, + "log_odds": 1.0964512825012207, + "log_odds_ratio": -0.41506290435791016, + "loss": 0.3465, + "rejected_geometric_mean": -2.175783157348633, + "step": 650 + }, + { + "chosen_geometric_mean": -1.2467776536941528, + "epoch": 0.16, + "grad_norm": 13.0625, + "learning_rate": 4.922529175986691e-06, + "log_odds": 1.1158849000930786, + "log_odds_ratio": -0.35789424180984497, + "loss": 0.3991, + "rejected_geometric_mean": -2.1557846069335938, + "step": 651 + }, + { + "chosen_geometric_mean": -1.1415716409683228, + "epoch": 0.16, + "grad_norm": 24.75, + "learning_rate": 4.922288529912238e-06, + "log_odds": 0.7087739706039429, + "log_odds_ratio": -0.4395962953567505, + "loss": 0.2948, + "rejected_geometric_mean": -1.6901905536651611, + "step": 652 + }, + { + "chosen_geometric_mean": -1.1607261896133423, + "epoch": 0.16, + "grad_norm": 3.0, + "learning_rate": 4.9220475165607e-06, + "log_odds": 3.8061697483062744, + "log_odds_ratio": -0.2067623734474182, + "loss": 0.3152, + "rejected_geometric_mean": -4.697953224182129, + "step": 653 + }, + { + "chosen_geometric_mean": -0.8991954922676086, + "epoch": 0.16, + "grad_norm": 3.578125, + "learning_rate": 4.9218061359686186e-06, + "log_odds": 0.7890310287475586, + "log_odds_ratio": -0.49488160014152527, + "loss": 0.2973, + "rejected_geometric_mean": -1.5389888286590576, + "step": 654 + }, + { + "chosen_geometric_mean": -0.8338973522186279, + "epoch": 0.16, + "grad_norm": 6.0625, + "learning_rate": 4.921564388172595e-06, + "log_odds": 1.5205748081207275, + "log_odds_ratio": -0.26491275429725647, + "loss": 0.3385, + "rejected_geometric_mean": -1.9489445686340332, + "step": 655 + }, + { + "chosen_geometric_mean": -1.1131904125213623, + "epoch": 0.16, + "grad_norm": 3.78125, + "learning_rate": 4.921322273209284e-06, + "log_odds": 0.041813239455223083, + "log_odds_ratio": -0.6963893175125122, + "loss": 0.3661, + "rejected_geometric_mean": -1.1489092111587524, + "step": 656 + }, + { + "chosen_geometric_mean": -1.2786040306091309, + "epoch": 0.16, + "grad_norm": 2.65625, + "learning_rate": 4.921079791115393e-06, + "log_odds": 2.3750624656677246, + "log_odds_ratio": -0.429431289434433, + "loss": 0.3072, + "rejected_geometric_mean": -3.519383192062378, + "step": 657 + }, + { + "chosen_geometric_mean": -1.555657148361206, + "epoch": 0.16, + "grad_norm": 23.0, + "learning_rate": 4.920836941927691e-06, + "log_odds": 3.180155038833618, + "log_odds_ratio": -0.2729276120662689, + "loss": 0.3535, + "rejected_geometric_mean": -4.555578231811523, + "step": 658 + }, + { + "chosen_geometric_mean": -1.0047473907470703, + "epoch": 0.16, + "grad_norm": 6.84375, + "learning_rate": 4.920593725683001e-06, + "log_odds": 0.6980210542678833, + "log_odds_ratio": -0.45937177538871765, + "loss": 0.2967, + "rejected_geometric_mean": -1.5775667428970337, + "step": 659 + }, + { + "chosen_geometric_mean": -1.2844429016113281, + "epoch": 0.16, + "grad_norm": 7.6875, + "learning_rate": 4.920350142418196e-06, + "log_odds": 2.1673030853271484, + "log_odds_ratio": -0.2971216142177582, + "loss": 0.377, + "rejected_geometric_mean": -3.275272846221924, + "step": 660 + }, + { + "chosen_geometric_mean": -1.3288991451263428, + "epoch": 0.16, + "grad_norm": 11.75, + "learning_rate": 4.920106192170212e-06, + "log_odds": 2.7654871940612793, + "log_odds_ratio": -0.2386103868484497, + "loss": 0.3181, + "rejected_geometric_mean": -3.8529422283172607, + "step": 661 + }, + { + "chosen_geometric_mean": -0.8574404120445251, + "epoch": 0.16, + "grad_norm": 6.25, + "learning_rate": 4.919861874976038e-06, + "log_odds": 1.7626991271972656, + "log_odds_ratio": -0.18652738630771637, + "loss": 0.2609, + "rejected_geometric_mean": -2.197223424911499, + "step": 662 + }, + { + "chosen_geometric_mean": -1.1517764329910278, + "epoch": 0.16, + "grad_norm": 44.75, + "learning_rate": 4.919617190872718e-06, + "log_odds": 4.9878034591674805, + "log_odds_ratio": -0.11312597990036011, + "loss": 0.3162, + "rejected_geometric_mean": -5.789288520812988, + "step": 663 + }, + { + "chosen_geometric_mean": -1.2645537853240967, + "epoch": 0.16, + "grad_norm": 27.0, + "learning_rate": 4.919372139897351e-06, + "log_odds": 1.5815448760986328, + "log_odds_ratio": -0.2614295482635498, + "loss": 0.3642, + "rejected_geometric_mean": -2.5580410957336426, + "step": 664 + }, + { + "chosen_geometric_mean": -1.1446359157562256, + "epoch": 0.16, + "grad_norm": 60.25, + "learning_rate": 4.919126722087092e-06, + "log_odds": 2.4418764114379883, + "log_odds_ratio": -0.23228314518928528, + "loss": 0.4719, + "rejected_geometric_mean": -3.2554688453674316, + "step": 665 + }, + { + "chosen_geometric_mean": -1.2891379594802856, + "epoch": 0.16, + "grad_norm": 10.9375, + "learning_rate": 4.918880937479155e-06, + "log_odds": 3.7641031742095947, + "log_odds_ratio": -0.16153879463672638, + "loss": 0.2853, + "rejected_geometric_mean": -4.797823905944824, + "step": 666 + }, + { + "chosen_geometric_mean": -1.3162386417388916, + "epoch": 0.17, + "grad_norm": 19.625, + "learning_rate": 4.918634786110804e-06, + "log_odds": 3.540355682373047, + "log_odds_ratio": -0.2264198362827301, + "loss": 0.3692, + "rejected_geometric_mean": -4.5938262939453125, + "step": 667 + }, + { + "chosen_geometric_mean": -0.9982919692993164, + "epoch": 0.17, + "grad_norm": 6.09375, + "learning_rate": 4.918388268019364e-06, + "log_odds": 3.128364086151123, + "log_odds_ratio": -0.19171741604804993, + "loss": 0.3393, + "rejected_geometric_mean": -3.7890868186950684, + "step": 668 + }, + { + "chosen_geometric_mean": -1.0380136966705322, + "epoch": 0.17, + "grad_norm": 4.40625, + "learning_rate": 4.918141383242211e-06, + "log_odds": 2.9405410289764404, + "log_odds_ratio": -0.2624157667160034, + "loss": 0.3222, + "rejected_geometric_mean": -3.723233699798584, + "step": 669 + }, + { + "chosen_geometric_mean": -1.1430305242538452, + "epoch": 0.17, + "grad_norm": 5.875, + "learning_rate": 4.917894131816779e-06, + "log_odds": 0.6736006140708923, + "log_odds_ratio": -0.4200131297111511, + "loss": 0.3309, + "rejected_geometric_mean": -1.6511356830596924, + "step": 670 + }, + { + "chosen_geometric_mean": -1.2984204292297363, + "epoch": 0.17, + "grad_norm": 29.75, + "learning_rate": 4.9176465137805584e-06, + "log_odds": 1.5036448240280151, + "log_odds_ratio": -0.4697170555591583, + "loss": 0.3593, + "rejected_geometric_mean": -2.693510055541992, + "step": 671 + }, + { + "chosen_geometric_mean": -1.0397849082946777, + "epoch": 0.17, + "grad_norm": 5.90625, + "learning_rate": 4.917398529171094e-06, + "log_odds": 0.3912128210067749, + "log_odds_ratio": -0.5597583055496216, + "loss": 0.3374, + "rejected_geometric_mean": -1.3578935861587524, + "step": 672 + }, + { + "chosen_geometric_mean": -1.2190067768096924, + "epoch": 0.17, + "grad_norm": 5.875, + "learning_rate": 4.917150178025985e-06, + "log_odds": 0.7028408050537109, + "log_odds_ratio": -0.5446604490280151, + "loss": 0.3981, + "rejected_geometric_mean": -1.8362298011779785, + "step": 673 + }, + { + "chosen_geometric_mean": -1.03200101852417, + "epoch": 0.17, + "grad_norm": 3.171875, + "learning_rate": 4.916901460382887e-06, + "log_odds": 1.63679039478302, + "log_odds_ratio": -0.37305060029029846, + "loss": 0.2758, + "rejected_geometric_mean": -2.414665937423706, + "step": 674 + }, + { + "chosen_geometric_mean": -1.2095153331756592, + "epoch": 0.17, + "grad_norm": 4.21875, + "learning_rate": 4.916652376279513e-06, + "log_odds": 1.9740068912506104, + "log_odds_ratio": -0.28492024540901184, + "loss": 0.3142, + "rejected_geometric_mean": -2.9537761211395264, + "step": 675 + }, + { + "chosen_geometric_mean": -1.07765793800354, + "epoch": 0.17, + "grad_norm": 11.125, + "learning_rate": 4.91640292575363e-06, + "log_odds": 1.1232595443725586, + "log_odds_ratio": -0.4191213548183441, + "loss": 0.3204, + "rejected_geometric_mean": -2.0322258472442627, + "step": 676 + }, + { + "chosen_geometric_mean": -1.195480227470398, + "epoch": 0.17, + "grad_norm": 4.59375, + "learning_rate": 4.9161531088430606e-06, + "log_odds": 2.1036312580108643, + "log_odds_ratio": -0.39167582988739014, + "loss": 0.3441, + "rejected_geometric_mean": -3.1885757446289062, + "step": 677 + }, + { + "chosen_geometric_mean": -1.0113449096679688, + "epoch": 0.17, + "grad_norm": 5.59375, + "learning_rate": 4.915902925585682e-06, + "log_odds": 1.1915265321731567, + "log_odds_ratio": -0.35287606716156006, + "loss": 0.3428, + "rejected_geometric_mean": -1.9737831354141235, + "step": 678 + }, + { + "chosen_geometric_mean": -1.3719465732574463, + "epoch": 0.17, + "grad_norm": 10.3125, + "learning_rate": 4.915652376019429e-06, + "log_odds": 0.18899311125278473, + "log_odds_ratio": -0.6137575507164001, + "loss": 0.3552, + "rejected_geometric_mean": -1.532928705215454, + "step": 679 + }, + { + "chosen_geometric_mean": -1.084216833114624, + "epoch": 0.17, + "grad_norm": 13.9375, + "learning_rate": 4.915401460182291e-06, + "log_odds": 0.7906134128570557, + "log_odds_ratio": -0.5124551057815552, + "loss": 0.317, + "rejected_geometric_mean": -1.7393279075622559, + "step": 680 + }, + { + "chosen_geometric_mean": -1.0407495498657227, + "epoch": 0.17, + "grad_norm": 4.34375, + "learning_rate": 4.915150178112312e-06, + "log_odds": 0.826185941696167, + "log_odds_ratio": -0.410052090883255, + "loss": 0.3117, + "rejected_geometric_mean": -1.6133849620819092, + "step": 681 + }, + { + "chosen_geometric_mean": -1.325562834739685, + "epoch": 0.17, + "grad_norm": 26.375, + "learning_rate": 4.914898529847593e-06, + "log_odds": 0.21629223227500916, + "log_odds_ratio": -0.6295257210731506, + "loss": 0.3657, + "rejected_geometric_mean": -1.4942681789398193, + "step": 682 + }, + { + "chosen_geometric_mean": -1.2533413171768188, + "epoch": 0.17, + "grad_norm": 5.9375, + "learning_rate": 4.91464651542629e-06, + "log_odds": 2.389291763305664, + "log_odds_ratio": -0.23024190962314606, + "loss": 0.3212, + "rejected_geometric_mean": -3.414527177810669, + "step": 683 + }, + { + "chosen_geometric_mean": -1.1947892904281616, + "epoch": 0.17, + "grad_norm": 7.5, + "learning_rate": 4.914394134886614e-06, + "log_odds": 0.256850004196167, + "log_odds_ratio": -0.5873991847038269, + "loss": 0.3641, + "rejected_geometric_mean": -1.3814692497253418, + "step": 684 + }, + { + "chosen_geometric_mean": -1.115666389465332, + "epoch": 0.17, + "grad_norm": 2.8125, + "learning_rate": 4.914141388266833e-06, + "log_odds": 1.3418972492218018, + "log_odds_ratio": -0.3548451066017151, + "loss": 0.2754, + "rejected_geometric_mean": -2.248448133468628, + "step": 685 + }, + { + "chosen_geometric_mean": -1.0272881984710693, + "epoch": 0.17, + "grad_norm": 2.703125, + "learning_rate": 4.913888275605269e-06, + "log_odds": 1.855837345123291, + "log_odds_ratio": -0.18173810839653015, + "loss": 0.2984, + "rejected_geometric_mean": -2.5397377014160156, + "step": 686 + }, + { + "chosen_geometric_mean": -1.0666426420211792, + "epoch": 0.17, + "grad_norm": 4.8125, + "learning_rate": 4.913634796940298e-06, + "log_odds": 1.2518746852874756, + "log_odds_ratio": -0.3067352771759033, + "loss": 0.3067, + "rejected_geometric_mean": -2.069173812866211, + "step": 687 + }, + { + "chosen_geometric_mean": -1.1257648468017578, + "epoch": 0.17, + "grad_norm": 2.890625, + "learning_rate": 4.913380952310356e-06, + "log_odds": 0.698371171951294, + "log_odds_ratio": -0.44310325384140015, + "loss": 0.2894, + "rejected_geometric_mean": -1.6685622930526733, + "step": 688 + }, + { + "chosen_geometric_mean": -1.2910921573638916, + "epoch": 0.17, + "grad_norm": 3.453125, + "learning_rate": 4.91312674175393e-06, + "log_odds": 0.2761771082878113, + "log_odds_ratio": -0.5723698735237122, + "loss": 0.3121, + "rejected_geometric_mean": -1.5041998624801636, + "step": 689 + }, + { + "chosen_geometric_mean": -1.3482801914215088, + "epoch": 0.17, + "grad_norm": 7.90625, + "learning_rate": 4.912872165309568e-06, + "log_odds": 3.7681097984313965, + "log_odds_ratio": -0.0929601788520813, + "loss": 0.2869, + "rejected_geometric_mean": -4.827731609344482, + "step": 690 + }, + { + "chosen_geometric_mean": -1.2593822479248047, + "epoch": 0.17, + "grad_norm": 8.5625, + "learning_rate": 4.9126172230158654e-06, + "log_odds": 1.737200140953064, + "log_odds_ratio": -0.31583577394485474, + "loss": 0.3084, + "rejected_geometric_mean": -2.8170061111450195, + "step": 691 + }, + { + "chosen_geometric_mean": -1.2370353937149048, + "epoch": 0.17, + "grad_norm": 5.78125, + "learning_rate": 4.91236191491148e-06, + "log_odds": 1.1639370918273926, + "log_odds_ratio": -0.39466890692710876, + "loss": 0.3358, + "rejected_geometric_mean": -2.2600367069244385, + "step": 692 + }, + { + "chosen_geometric_mean": -1.1917498111724854, + "epoch": 0.17, + "grad_norm": 9.3125, + "learning_rate": 4.912106241035121e-06, + "log_odds": 3.2563796043395996, + "log_odds_ratio": -0.1842147707939148, + "loss": 0.2899, + "rejected_geometric_mean": -4.168694496154785, + "step": 693 + }, + { + "chosen_geometric_mean": -1.0712610483169556, + "epoch": 0.17, + "grad_norm": 5.1875, + "learning_rate": 4.911850201425558e-06, + "log_odds": 0.2691078186035156, + "log_odds_ratio": -0.5938885807991028, + "loss": 0.3234, + "rejected_geometric_mean": -1.3038114309310913, + "step": 694 + }, + { + "chosen_geometric_mean": -1.2488449811935425, + "epoch": 0.17, + "grad_norm": 15.0, + "learning_rate": 4.91159379612161e-06, + "log_odds": 0.6406638622283936, + "log_odds_ratio": -0.438539981842041, + "loss": 0.3364, + "rejected_geometric_mean": -1.7445688247680664, + "step": 695 + }, + { + "chosen_geometric_mean": -1.0441536903381348, + "epoch": 0.17, + "grad_norm": 13.75, + "learning_rate": 4.911337025162154e-06, + "log_odds": 0.5323293209075928, + "log_odds_ratio": -0.49222373962402344, + "loss": 0.3054, + "rejected_geometric_mean": -1.4468833208084106, + "step": 696 + }, + { + "chosen_geometric_mean": -1.068847417831421, + "epoch": 0.17, + "grad_norm": 3.859375, + "learning_rate": 4.911079888586124e-06, + "log_odds": 2.230067491531372, + "log_odds_ratio": -0.38348132371902466, + "loss": 0.3754, + "rejected_geometric_mean": -3.1196913719177246, + "step": 697 + }, + { + "chosen_geometric_mean": -1.1977989673614502, + "epoch": 0.17, + "grad_norm": 3.953125, + "learning_rate": 4.910822386432507e-06, + "log_odds": 0.9423573017120361, + "log_odds_ratio": -0.37535491585731506, + "loss": 0.2886, + "rejected_geometric_mean": -1.9413832426071167, + "step": 698 + }, + { + "chosen_geometric_mean": -1.224046230316162, + "epoch": 0.17, + "grad_norm": 5.21875, + "learning_rate": 4.910564518740347e-06, + "log_odds": 0.2657322883605957, + "log_odds_ratio": -0.599587082862854, + "loss": 0.3499, + "rejected_geometric_mean": -1.4352097511291504, + "step": 699 + }, + { + "chosen_geometric_mean": -1.1134260892868042, + "epoch": 0.17, + "grad_norm": 4.28125, + "learning_rate": 4.9103062855487436e-06, + "log_odds": 3.5840799808502197, + "log_odds_ratio": -0.2709987163543701, + "loss": 0.2884, + "rejected_geometric_mean": -4.445669174194336, + "step": 700 + }, + { + "chosen_geometric_mean": -0.9830744862556458, + "epoch": 0.17, + "grad_norm": 5.78125, + "learning_rate": 4.91004768689685e-06, + "log_odds": 3.6912410259246826, + "log_odds_ratio": -0.29347676038742065, + "loss": 0.2898, + "rejected_geometric_mean": -4.404994487762451, + "step": 701 + }, + { + "chosen_geometric_mean": -0.8686716556549072, + "epoch": 0.17, + "grad_norm": 9.0, + "learning_rate": 4.909788722823877e-06, + "log_odds": 5.857670783996582, + "log_odds_ratio": -0.04051865264773369, + "loss": 0.3064, + "rejected_geometric_mean": -6.201098442077637, + "step": 702 + }, + { + "chosen_geometric_mean": -1.3188680410385132, + "epoch": 0.17, + "grad_norm": 4.71875, + "learning_rate": 4.909529393369089e-06, + "log_odds": 3.706620454788208, + "log_odds_ratio": -0.1789647340774536, + "loss": 0.277, + "rejected_geometric_mean": -4.786334991455078, + "step": 703 + }, + { + "chosen_geometric_mean": -1.3654662370681763, + "epoch": 0.17, + "grad_norm": 8.875, + "learning_rate": 4.909269698571807e-06, + "log_odds": 2.396480083465576, + "log_odds_ratio": -0.2724609375, + "loss": 0.2992, + "rejected_geometric_mean": -3.5275745391845703, + "step": 704 + }, + { + "chosen_geometric_mean": -1.057477355003357, + "epoch": 0.17, + "grad_norm": 5.5, + "learning_rate": 4.909009638471407e-06, + "log_odds": 4.526589393615723, + "log_odds_ratio": -0.3628372550010681, + "loss": 0.3015, + "rejected_geometric_mean": -5.330488204956055, + "step": 705 + }, + { + "chosen_geometric_mean": -1.3667116165161133, + "epoch": 0.17, + "grad_norm": 2.9375, + "learning_rate": 4.90874921310732e-06, + "log_odds": 4.678783416748047, + "log_odds_ratio": -0.15831847488880157, + "loss": 0.2949, + "rejected_geometric_mean": -5.817950248718262, + "step": 706 + }, + { + "chosen_geometric_mean": -0.8230222463607788, + "epoch": 0.18, + "grad_norm": 34.0, + "learning_rate": 4.908488422519032e-06, + "log_odds": 2.7491261959075928, + "log_odds_ratio": -0.31264638900756836, + "loss": 0.3121, + "rejected_geometric_mean": -3.273193836212158, + "step": 707 + }, + { + "chosen_geometric_mean": -2.032649040222168, + "epoch": 0.18, + "grad_norm": 36.5, + "learning_rate": 4.908227266746087e-06, + "log_odds": 2.087428092956543, + "log_odds_ratio": -0.4490914046764374, + "loss": 0.3524, + "rejected_geometric_mean": -4.094099998474121, + "step": 708 + }, + { + "chosen_geometric_mean": -1.1104536056518555, + "epoch": 0.18, + "grad_norm": 68.0, + "learning_rate": 4.9079657458280814e-06, + "log_odds": 3.435213088989258, + "log_odds_ratio": -0.21499964594841003, + "loss": 0.366, + "rejected_geometric_mean": -4.219351291656494, + "step": 709 + }, + { + "chosen_geometric_mean": -1.3146142959594727, + "epoch": 0.18, + "grad_norm": 135.0, + "learning_rate": 4.907703859804668e-06, + "log_odds": 4.670612812042236, + "log_odds_ratio": -0.23614799976348877, + "loss": 0.4867, + "rejected_geometric_mean": -5.772465705871582, + "step": 710 + }, + { + "chosen_geometric_mean": -1.1394778490066528, + "epoch": 0.18, + "grad_norm": 11.75, + "learning_rate": 4.907441608715554e-06, + "log_odds": 4.31093168258667, + "log_odds_ratio": -0.309349000453949, + "loss": 0.3211, + "rejected_geometric_mean": -5.154521942138672, + "step": 711 + }, + { + "chosen_geometric_mean": -0.9943021535873413, + "epoch": 0.18, + "grad_norm": 13.25, + "learning_rate": 4.907178992600505e-06, + "log_odds": 2.1779897212982178, + "log_odds_ratio": -0.42724940180778503, + "loss": 0.3516, + "rejected_geometric_mean": -2.9416120052337646, + "step": 712 + }, + { + "chosen_geometric_mean": -0.9235613942146301, + "epoch": 0.18, + "grad_norm": 3.28125, + "learning_rate": 4.90691601149934e-06, + "log_odds": 0.1875842660665512, + "log_odds_ratio": -0.6080539226531982, + "loss": 0.3049, + "rejected_geometric_mean": -1.0573921203613281, + "step": 713 + }, + { + "chosen_geometric_mean": -1.2873865365982056, + "epoch": 0.18, + "grad_norm": 5.5, + "learning_rate": 4.906652665451931e-06, + "log_odds": 1.9475531578063965, + "log_odds_ratio": -0.19268737733364105, + "loss": 0.3037, + "rejected_geometric_mean": -2.991642475128174, + "step": 714 + }, + { + "chosen_geometric_mean": -1.0176602602005005, + "epoch": 0.18, + "grad_norm": 22.375, + "learning_rate": 4.906388954498209e-06, + "log_odds": 0.8036289215087891, + "log_odds_ratio": -0.4040794372558594, + "loss": 0.3138, + "rejected_geometric_mean": -1.573843240737915, + "step": 715 + }, + { + "chosen_geometric_mean": -1.2372090816497803, + "epoch": 0.18, + "grad_norm": 3.75, + "learning_rate": 4.906124878678159e-06, + "log_odds": 2.344686985015869, + "log_odds_ratio": -0.3739830255508423, + "loss": 0.2984, + "rejected_geometric_mean": -3.4363105297088623, + "step": 716 + }, + { + "chosen_geometric_mean": -1.1081126928329468, + "epoch": 0.18, + "grad_norm": 5.03125, + "learning_rate": 4.905860438031821e-06, + "log_odds": 1.8026487827301025, + "log_odds_ratio": -0.16867749392986298, + "loss": 0.3439, + "rejected_geometric_mean": -2.564863681793213, + "step": 717 + }, + { + "chosen_geometric_mean": -1.1679426431655884, + "epoch": 0.18, + "grad_norm": 3.046875, + "learning_rate": 4.90559563259929e-06, + "log_odds": 3.507965564727783, + "log_odds_ratio": -0.18614497780799866, + "loss": 0.2926, + "rejected_geometric_mean": -4.39508056640625, + "step": 718 + }, + { + "chosen_geometric_mean": -1.0994250774383545, + "epoch": 0.18, + "grad_norm": 16.25, + "learning_rate": 4.905330462420718e-06, + "log_odds": 2.0558605194091797, + "log_odds_ratio": -0.47551900148391724, + "loss": 0.3425, + "rejected_geometric_mean": -3.014915704727173, + "step": 719 + }, + { + "chosen_geometric_mean": -1.054782509803772, + "epoch": 0.18, + "grad_norm": 13.0625, + "learning_rate": 4.90506492753631e-06, + "log_odds": 3.442978858947754, + "log_odds_ratio": -0.06052258610725403, + "loss": 0.2915, + "rejected_geometric_mean": -4.0636396408081055, + "step": 720 + }, + { + "chosen_geometric_mean": -1.3681297302246094, + "epoch": 0.18, + "grad_norm": 5.28125, + "learning_rate": 4.9047990279863275e-06, + "log_odds": 1.6282963752746582, + "log_odds_ratio": -0.30565783381462097, + "loss": 0.3134, + "rejected_geometric_mean": -2.817876100540161, + "step": 721 + }, + { + "chosen_geometric_mean": -1.624801754951477, + "epoch": 0.18, + "grad_norm": 62.25, + "learning_rate": 4.904532763811089e-06, + "log_odds": 1.115924596786499, + "log_odds_ratio": -0.522761344909668, + "loss": 0.3652, + "rejected_geometric_mean": -2.6837732791900635, + "step": 722 + }, + { + "chosen_geometric_mean": -1.3165191411972046, + "epoch": 0.18, + "grad_norm": 24.875, + "learning_rate": 4.9042661350509646e-06, + "log_odds": 1.1274821758270264, + "log_odds_ratio": -0.6355529427528381, + "loss": 0.3834, + "rejected_geometric_mean": -2.2848899364471436, + "step": 723 + }, + { + "chosen_geometric_mean": -1.7774879932403564, + "epoch": 0.18, + "grad_norm": 4.125, + "learning_rate": 4.903999141746383e-06, + "log_odds": 2.194894552230835, + "log_odds_ratio": -0.3128625154495239, + "loss": 0.2646, + "rejected_geometric_mean": -3.809011697769165, + "step": 724 + }, + { + "chosen_geometric_mean": -1.0605895519256592, + "epoch": 0.18, + "grad_norm": 10.25, + "learning_rate": 4.9037317839378255e-06, + "log_odds": 2.3274590969085693, + "log_odds_ratio": -0.3161850571632385, + "loss": 0.2704, + "rejected_geometric_mean": -3.15761661529541, + "step": 725 + }, + { + "chosen_geometric_mean": -1.124338984489441, + "epoch": 0.18, + "grad_norm": 20.125, + "learning_rate": 4.903464061665831e-06, + "log_odds": 0.5463271737098694, + "log_odds_ratio": -0.48683732748031616, + "loss": 0.3738, + "rejected_geometric_mean": -1.5291192531585693, + "step": 726 + }, + { + "chosen_geometric_mean": -1.0188267230987549, + "epoch": 0.18, + "grad_norm": 6.59375, + "learning_rate": 4.903195974970991e-06, + "log_odds": 2.80588436126709, + "log_odds_ratio": -0.3567107319831848, + "loss": 0.3335, + "rejected_geometric_mean": -3.568838357925415, + "step": 727 + }, + { + "chosen_geometric_mean": -1.10396409034729, + "epoch": 0.18, + "grad_norm": 18.75, + "learning_rate": 4.902927523893957e-06, + "log_odds": 3.5557475090026855, + "log_odds_ratio": -0.2595711052417755, + "loss": 0.3051, + "rejected_geometric_mean": -4.377292156219482, + "step": 728 + }, + { + "chosen_geometric_mean": -1.3028802871704102, + "epoch": 0.18, + "grad_norm": 20.0, + "learning_rate": 4.90265870847543e-06, + "log_odds": 2.755624771118164, + "log_odds_ratio": -0.16595245897769928, + "loss": 0.3456, + "rejected_geometric_mean": -3.8100717067718506, + "step": 729 + }, + { + "chosen_geometric_mean": -1.1342999935150146, + "epoch": 0.18, + "grad_norm": 6.21875, + "learning_rate": 4.902389528756168e-06, + "log_odds": 3.63830828666687, + "log_odds_ratio": -0.13102038204669952, + "loss": 0.3568, + "rejected_geometric_mean": -4.424839019775391, + "step": 730 + }, + { + "chosen_geometric_mean": -1.090015172958374, + "epoch": 0.18, + "grad_norm": 4.71875, + "learning_rate": 4.902119984776988e-06, + "log_odds": 1.7364393472671509, + "log_odds_ratio": -0.421972393989563, + "loss": 0.2826, + "rejected_geometric_mean": -2.6227786540985107, + "step": 731 + }, + { + "chosen_geometric_mean": -1.0887510776519775, + "epoch": 0.18, + "grad_norm": 4.25, + "learning_rate": 4.901850076578758e-06, + "log_odds": 0.749172568321228, + "log_odds_ratio": -0.41215264797210693, + "loss": 0.3451, + "rejected_geometric_mean": -1.663530945777893, + "step": 732 + }, + { + "chosen_geometric_mean": -1.0335816144943237, + "epoch": 0.18, + "grad_norm": 8.125, + "learning_rate": 4.901579804202403e-06, + "log_odds": 1.640447735786438, + "log_odds_ratio": -0.416591614484787, + "loss": 0.3484, + "rejected_geometric_mean": -2.4634008407592773, + "step": 733 + }, + { + "chosen_geometric_mean": -1.178180456161499, + "epoch": 0.18, + "grad_norm": 5.75, + "learning_rate": 4.901309167688902e-06, + "log_odds": 2.429280996322632, + "log_odds_ratio": -0.20638230443000793, + "loss": 0.308, + "rejected_geometric_mean": -3.2812414169311523, + "step": 734 + }, + { + "chosen_geometric_mean": -1.3419843912124634, + "epoch": 0.18, + "grad_norm": 10.9375, + "learning_rate": 4.901038167079291e-06, + "log_odds": -0.002389274537563324, + "log_odds_ratio": -0.6944823265075684, + "loss": 0.3445, + "rejected_geometric_mean": -1.341101884841919, + "step": 735 + }, + { + "chosen_geometric_mean": -1.0778032541275024, + "epoch": 0.18, + "grad_norm": 3.734375, + "learning_rate": 4.9007668024146584e-06, + "log_odds": 1.2232744693756104, + "log_odds_ratio": -0.4605935513973236, + "loss": 0.2912, + "rejected_geometric_mean": -2.164146900177002, + "step": 736 + }, + { + "chosen_geometric_mean": -1.1769362688064575, + "epoch": 0.18, + "grad_norm": 4.0625, + "learning_rate": 4.9004950737361506e-06, + "log_odds": 2.2704577445983887, + "log_odds_ratio": -0.1967618465423584, + "loss": 0.3129, + "rejected_geometric_mean": -3.1511917114257812, + "step": 737 + }, + { + "chosen_geometric_mean": -1.1119110584259033, + "epoch": 0.18, + "grad_norm": 9.0, + "learning_rate": 4.900222981084969e-06, + "log_odds": 0.1890096813440323, + "log_odds_ratio": -0.6080745458602905, + "loss": 0.354, + "rejected_geometric_mean": -1.2531365156173706, + "step": 738 + }, + { + "chosen_geometric_mean": -1.1439318656921387, + "epoch": 0.18, + "grad_norm": 30.0, + "learning_rate": 4.899950524502369e-06, + "log_odds": 2.8232474327087402, + "log_odds_ratio": -0.19950829446315765, + "loss": 0.3243, + "rejected_geometric_mean": -3.686147689819336, + "step": 739 + }, + { + "chosen_geometric_mean": -1.4049047231674194, + "epoch": 0.18, + "grad_norm": 23.5, + "learning_rate": 4.899677704029661e-06, + "log_odds": 2.0634312629699707, + "log_odds_ratio": -0.27790629863739014, + "loss": 0.3361, + "rejected_geometric_mean": -3.287778854370117, + "step": 740 + }, + { + "chosen_geometric_mean": -1.384354591369629, + "epoch": 0.18, + "grad_norm": 12.0625, + "learning_rate": 4.899404519708211e-06, + "log_odds": 2.8179049491882324, + "log_odds_ratio": -0.2096070796251297, + "loss": 0.2984, + "rejected_geometric_mean": -3.9837470054626465, + "step": 741 + }, + { + "chosen_geometric_mean": -1.0828596353530884, + "epoch": 0.18, + "grad_norm": 5.15625, + "learning_rate": 4.899130971579441e-06, + "log_odds": 1.5403376817703247, + "log_odds_ratio": -0.320785254240036, + "loss": 0.3022, + "rejected_geometric_mean": -2.353520631790161, + "step": 742 + }, + { + "chosen_geometric_mean": -1.2970985174179077, + "epoch": 0.18, + "grad_norm": 3.03125, + "learning_rate": 4.898857059684827e-06, + "log_odds": 2.415149450302124, + "log_odds_ratio": -0.4998568892478943, + "loss": 0.3238, + "rejected_geometric_mean": -3.599698066711426, + "step": 743 + }, + { + "chosen_geometric_mean": -1.106622338294983, + "epoch": 0.18, + "grad_norm": 3.46875, + "learning_rate": 4.8985827840659e-06, + "log_odds": 0.2528882622718811, + "log_odds_ratio": -0.5783423185348511, + "loss": 0.292, + "rejected_geometric_mean": -1.2913365364074707, + "step": 744 + }, + { + "chosen_geometric_mean": -1.1927587985992432, + "epoch": 0.18, + "grad_norm": 2.53125, + "learning_rate": 4.898308144764249e-06, + "log_odds": 2.3939125537872314, + "log_odds_ratio": -0.15625253319740295, + "loss": 0.2871, + "rejected_geometric_mean": -3.2608981132507324, + "step": 745 + }, + { + "chosen_geometric_mean": -1.2406563758850098, + "epoch": 0.18, + "grad_norm": 34.0, + "learning_rate": 4.898033141821515e-06, + "log_odds": 3.032972574234009, + "log_odds_ratio": -0.06729497760534286, + "loss": 0.3763, + "rejected_geometric_mean": -3.9176907539367676, + "step": 746 + }, + { + "chosen_geometric_mean": -1.4623132944107056, + "epoch": 0.18, + "grad_norm": 5.25, + "learning_rate": 4.897757775279393e-06, + "log_odds": 0.4702058434486389, + "log_odds_ratio": -0.4922525882720947, + "loss": 0.2563, + "rejected_geometric_mean": -1.8404771089553833, + "step": 747 + }, + { + "chosen_geometric_mean": -1.1021913290023804, + "epoch": 0.19, + "grad_norm": 7.03125, + "learning_rate": 4.897482045179637e-06, + "log_odds": 1.5227930545806885, + "log_odds_ratio": -0.2542310655117035, + "loss": 0.3102, + "rejected_geometric_mean": -2.34721040725708, + "step": 748 + }, + { + "chosen_geometric_mean": -1.3191838264465332, + "epoch": 0.19, + "grad_norm": 3.203125, + "learning_rate": 4.897205951564053e-06, + "log_odds": 1.1028231382369995, + "log_odds_ratio": -0.47655418515205383, + "loss": 0.3497, + "rejected_geometric_mean": -2.345162868499756, + "step": 749 + }, + { + "chosen_geometric_mean": -1.1785235404968262, + "epoch": 0.19, + "grad_norm": 3.4375, + "learning_rate": 4.896929494474506e-06, + "log_odds": 0.5917258262634277, + "log_odds_ratio": -0.4701879024505615, + "loss": 0.3165, + "rejected_geometric_mean": -1.6472336053848267, + "step": 750 + }, + { + "chosen_geometric_mean": -1.678901195526123, + "epoch": 0.19, + "grad_norm": 10.375, + "learning_rate": 4.896652673952912e-06, + "log_odds": 3.4526515007019043, + "log_odds_ratio": -0.2641853988170624, + "loss": 0.3254, + "rejected_geometric_mean": -4.935418605804443, + "step": 751 + }, + { + "chosen_geometric_mean": -1.2144200801849365, + "epoch": 0.19, + "grad_norm": 5.4375, + "learning_rate": 4.896375490041243e-06, + "log_odds": 2.9917261600494385, + "log_odds_ratio": -0.23477569222450256, + "loss": 0.2948, + "rejected_geometric_mean": -3.9589951038360596, + "step": 752 + }, + { + "chosen_geometric_mean": -1.0492069721221924, + "epoch": 0.19, + "grad_norm": 2.984375, + "learning_rate": 4.896097942781528e-06, + "log_odds": 2.3062803745269775, + "log_odds_ratio": -0.3634171485900879, + "loss": 0.3288, + "rejected_geometric_mean": -3.156672239303589, + "step": 753 + }, + { + "chosen_geometric_mean": -1.0313928127288818, + "epoch": 0.19, + "grad_norm": 10.25, + "learning_rate": 4.895820032215848e-06, + "log_odds": 0.6431698203086853, + "log_odds_ratio": -0.45577752590179443, + "loss": 0.3174, + "rejected_geometric_mean": -1.5189766883850098, + "step": 754 + }, + { + "chosen_geometric_mean": -0.8766739368438721, + "epoch": 0.19, + "grad_norm": 3.34375, + "learning_rate": 4.895541758386343e-06, + "log_odds": 0.2758033871650696, + "log_odds_ratio": -0.566300094127655, + "loss": 0.3134, + "rejected_geometric_mean": -1.0400644540786743, + "step": 755 + }, + { + "chosen_geometric_mean": -1.2936145067214966, + "epoch": 0.19, + "grad_norm": 12.9375, + "learning_rate": 4.895263121335204e-06, + "log_odds": 2.2914681434631348, + "log_odds_ratio": -0.30328112840652466, + "loss": 0.364, + "rejected_geometric_mean": -3.3900957107543945, + "step": 756 + }, + { + "chosen_geometric_mean": -1.1386635303497314, + "epoch": 0.19, + "grad_norm": 47.25, + "learning_rate": 4.8949841211046814e-06, + "log_odds": 0.26103049516677856, + "log_odds_ratio": -0.6065797805786133, + "loss": 0.3383, + "rejected_geometric_mean": -1.3254410028457642, + "step": 757 + }, + { + "chosen_geometric_mean": -1.0273010730743408, + "epoch": 0.19, + "grad_norm": 27.0, + "learning_rate": 4.8947047577370755e-06, + "log_odds": 3.58142352104187, + "log_odds_ratio": -0.1934301108121872, + "loss": 0.3745, + "rejected_geometric_mean": -4.28499698638916, + "step": 758 + }, + { + "chosen_geometric_mean": -1.1288509368896484, + "epoch": 0.19, + "grad_norm": 16.25, + "learning_rate": 4.894425031274747e-06, + "log_odds": 2.1324472427368164, + "log_odds_ratio": -0.19739507138729095, + "loss": 0.427, + "rejected_geometric_mean": -2.956575870513916, + "step": 759 + }, + { + "chosen_geometric_mean": -0.9666345119476318, + "epoch": 0.19, + "grad_norm": 10.375, + "learning_rate": 4.8941449417601085e-06, + "log_odds": 1.5406639575958252, + "log_odds_ratio": -0.2556183636188507, + "loss": 0.3063, + "rejected_geometric_mean": -2.1480674743652344, + "step": 760 + }, + { + "chosen_geometric_mean": -1.1994460821151733, + "epoch": 0.19, + "grad_norm": 13.5, + "learning_rate": 4.893864489235627e-06, + "log_odds": 3.01114559173584, + "log_odds_ratio": -0.1663239300251007, + "loss": 0.3689, + "rejected_geometric_mean": -3.9293575286865234, + "step": 761 + }, + { + "chosen_geometric_mean": -1.238433599472046, + "epoch": 0.19, + "grad_norm": 2.921875, + "learning_rate": 4.8935836737438275e-06, + "log_odds": 2.8604576587677, + "log_odds_ratio": -0.23753994703292847, + "loss": 0.3414, + "rejected_geometric_mean": -3.86812424659729, + "step": 762 + }, + { + "chosen_geometric_mean": -1.0445832014083862, + "epoch": 0.19, + "grad_norm": 15.4375, + "learning_rate": 4.893302495327288e-06, + "log_odds": 2.931340217590332, + "log_odds_ratio": -0.40062230825424194, + "loss": 0.348, + "rejected_geometric_mean": -3.7951152324676514, + "step": 763 + }, + { + "chosen_geometric_mean": -0.9299150705337524, + "epoch": 0.19, + "grad_norm": 4.28125, + "learning_rate": 4.893020954028641e-06, + "log_odds": 0.792526125907898, + "log_odds_ratio": -0.44054728746414185, + "loss": 0.2886, + "rejected_geometric_mean": -1.4744164943695068, + "step": 764 + }, + { + "chosen_geometric_mean": -1.120256781578064, + "epoch": 0.19, + "grad_norm": 2.46875, + "learning_rate": 4.892739049890575e-06, + "log_odds": 0.6519784927368164, + "log_odds_ratio": -0.6180499792098999, + "loss": 0.2978, + "rejected_geometric_mean": -1.726375699043274, + "step": 765 + }, + { + "chosen_geometric_mean": -1.2807008028030396, + "epoch": 0.19, + "grad_norm": 6.375, + "learning_rate": 4.892456782955835e-06, + "log_odds": 1.2378146648406982, + "log_odds_ratio": -0.3452436923980713, + "loss": 0.3427, + "rejected_geometric_mean": -2.3190670013427734, + "step": 766 + }, + { + "chosen_geometric_mean": -1.1793162822723389, + "epoch": 0.19, + "grad_norm": 2.875, + "learning_rate": 4.892174153267217e-06, + "log_odds": 2.2819371223449707, + "log_odds_ratio": -0.3577579855918884, + "loss": 0.2667, + "rejected_geometric_mean": -3.1964669227600098, + "step": 767 + }, + { + "chosen_geometric_mean": -1.2811894416809082, + "epoch": 0.19, + "grad_norm": 21.75, + "learning_rate": 4.891891160867577e-06, + "log_odds": 0.5554751753807068, + "log_odds_ratio": -0.5428399443626404, + "loss": 0.284, + "rejected_geometric_mean": -1.7559417486190796, + "step": 768 + }, + { + "chosen_geometric_mean": -1.2414460182189941, + "epoch": 0.19, + "grad_norm": 6.09375, + "learning_rate": 4.891607805799822e-06, + "log_odds": 2.494884967803955, + "log_odds_ratio": -0.3950499892234802, + "loss": 0.3345, + "rejected_geometric_mean": -3.5525591373443604, + "step": 769 + }, + { + "chosen_geometric_mean": -1.3909822702407837, + "epoch": 0.19, + "grad_norm": 33.25, + "learning_rate": 4.891324088106916e-06, + "log_odds": 5.052587032318115, + "log_odds_ratio": -0.09818508476018906, + "loss": 0.3917, + "rejected_geometric_mean": -6.141176700592041, + "step": 770 + }, + { + "chosen_geometric_mean": -1.1721676588058472, + "epoch": 0.19, + "grad_norm": 9.3125, + "learning_rate": 4.891040007831876e-06, + "log_odds": 0.3298260569572449, + "log_odds_ratio": -0.5903780460357666, + "loss": 0.3201, + "rejected_geometric_mean": -1.449493408203125, + "step": 771 + }, + { + "chosen_geometric_mean": -0.9733375906944275, + "epoch": 0.19, + "grad_norm": 3.71875, + "learning_rate": 4.890755565017777e-06, + "log_odds": 0.22626054286956787, + "log_odds_ratio": -0.5926490426063538, + "loss": 0.3273, + "rejected_geometric_mean": -1.1182838678359985, + "step": 772 + }, + { + "chosen_geometric_mean": -1.0124431848526, + "epoch": 0.19, + "grad_norm": 2.9375, + "learning_rate": 4.890470759707747e-06, + "log_odds": 1.4170113801956177, + "log_odds_ratio": -0.23159638047218323, + "loss": 0.2811, + "rejected_geometric_mean": -2.110255479812622, + "step": 773 + }, + { + "chosen_geometric_mean": -0.9398514032363892, + "epoch": 0.19, + "grad_norm": 10.4375, + "learning_rate": 4.8901855919449696e-06, + "log_odds": 1.8301149606704712, + "log_odds_ratio": -0.4367145001888275, + "loss": 0.2792, + "rejected_geometric_mean": -2.589182138442993, + "step": 774 + }, + { + "chosen_geometric_mean": -1.2742764949798584, + "epoch": 0.19, + "grad_norm": 6.96875, + "learning_rate": 4.889900061772682e-06, + "log_odds": 1.2610570192337036, + "log_odds_ratio": -0.46875321865081787, + "loss": 0.3506, + "rejected_geometric_mean": -2.406498908996582, + "step": 775 + }, + { + "chosen_geometric_mean": -1.1249594688415527, + "epoch": 0.19, + "grad_norm": 3.328125, + "learning_rate": 4.889614169234179e-06, + "log_odds": 0.07692839205265045, + "log_odds_ratio": -0.6659159660339355, + "loss": 0.3238, + "rejected_geometric_mean": -1.199682593345642, + "step": 776 + }, + { + "chosen_geometric_mean": -0.9453404545783997, + "epoch": 0.19, + "grad_norm": 6.25, + "learning_rate": 4.8893279143728065e-06, + "log_odds": 4.396649360656738, + "log_odds_ratio": -0.0955611914396286, + "loss": 0.2894, + "rejected_geometric_mean": -4.91825008392334, + "step": 777 + }, + { + "chosen_geometric_mean": -1.1267592906951904, + "epoch": 0.19, + "grad_norm": 3.9375, + "learning_rate": 4.8890412972319705e-06, + "log_odds": 0.8795090913772583, + "log_odds_ratio": -0.4545797109603882, + "loss": 0.3069, + "rejected_geometric_mean": -1.8721468448638916, + "step": 778 + }, + { + "chosen_geometric_mean": -1.0685899257659912, + "epoch": 0.19, + "grad_norm": 5.21875, + "learning_rate": 4.888754317855126e-06, + "log_odds": 0.7282095551490784, + "log_odds_ratio": -0.46557945013046265, + "loss": 0.3187, + "rejected_geometric_mean": -1.6118667125701904, + "step": 779 + }, + { + "chosen_geometric_mean": -1.3601397275924683, + "epoch": 0.19, + "grad_norm": 27.625, + "learning_rate": 4.888466976285788e-06, + "log_odds": 2.455002546310425, + "log_odds_ratio": -0.2544807195663452, + "loss": 0.3582, + "rejected_geometric_mean": -3.6094436645507812, + "step": 780 + }, + { + "chosen_geometric_mean": -1.061476469039917, + "epoch": 0.19, + "grad_norm": 19.375, + "learning_rate": 4.888179272567523e-06, + "log_odds": 0.7116526961326599, + "log_odds_ratio": -0.4587893486022949, + "loss": 0.3394, + "rejected_geometric_mean": -1.6223814487457275, + "step": 781 + }, + { + "chosen_geometric_mean": -1.4238057136535645, + "epoch": 0.19, + "grad_norm": 26.375, + "learning_rate": 4.8878912067439566e-06, + "log_odds": 1.155382752418518, + "log_odds_ratio": -0.3270575702190399, + "loss": 0.3572, + "rejected_geometric_mean": -2.4346206188201904, + "step": 782 + }, + { + "chosen_geometric_mean": -1.1351333856582642, + "epoch": 0.19, + "grad_norm": 3.578125, + "learning_rate": 4.887602778858764e-06, + "log_odds": 4.537087917327881, + "log_odds_ratio": -0.1573318988084793, + "loss": 0.3234, + "rejected_geometric_mean": -5.350329399108887, + "step": 783 + }, + { + "chosen_geometric_mean": -1.2246875762939453, + "epoch": 0.19, + "grad_norm": 6.6875, + "learning_rate": 4.887313988955677e-06, + "log_odds": 2.4450936317443848, + "log_odds_ratio": -0.31526321172714233, + "loss": 0.3487, + "rejected_geometric_mean": -3.507688045501709, + "step": 784 + }, + { + "chosen_geometric_mean": -1.3414394855499268, + "epoch": 0.19, + "grad_norm": 4.875, + "learning_rate": 4.887024837078485e-06, + "log_odds": 0.26945745944976807, + "log_odds_ratio": -0.5829352140426636, + "loss": 0.3251, + "rejected_geometric_mean": -1.5524810552597046, + "step": 785 + }, + { + "chosen_geometric_mean": -1.0473355054855347, + "epoch": 0.19, + "grad_norm": 3.640625, + "learning_rate": 4.88673532327103e-06, + "log_odds": 1.4290227890014648, + "log_odds_ratio": -0.5679783225059509, + "loss": 0.3005, + "rejected_geometric_mean": -2.418595314025879, + "step": 786 + }, + { + "chosen_geometric_mean": -1.0318275690078735, + "epoch": 0.19, + "grad_norm": 3.875, + "learning_rate": 4.886445447577209e-06, + "log_odds": 1.334669828414917, + "log_odds_ratio": -0.29189857840538025, + "loss": 0.3541, + "rejected_geometric_mean": -2.0958752632141113, + "step": 787 + }, + { + "chosen_geometric_mean": -1.0695767402648926, + "epoch": 0.2, + "grad_norm": 4.0625, + "learning_rate": 4.886155210040973e-06, + "log_odds": 1.802022099494934, + "log_odds_ratio": -0.19314342737197876, + "loss": 0.2797, + "rejected_geometric_mean": -2.5550050735473633, + "step": 788 + }, + { + "chosen_geometric_mean": -1.2165120840072632, + "epoch": 0.2, + "grad_norm": 3.5625, + "learning_rate": 4.885864610706332e-06, + "log_odds": 2.1797919273376465, + "log_odds_ratio": -0.24512505531311035, + "loss": 0.2935, + "rejected_geometric_mean": -3.1126949787139893, + "step": 789 + }, + { + "chosen_geometric_mean": -1.4887452125549316, + "epoch": 0.2, + "grad_norm": 59.75, + "learning_rate": 4.885573649617345e-06, + "log_odds": 3.2494688034057617, + "log_odds_ratio": -0.27116671204566956, + "loss": 0.4338, + "rejected_geometric_mean": -4.497758865356445, + "step": 790 + }, + { + "chosen_geometric_mean": -1.1279151439666748, + "epoch": 0.2, + "grad_norm": 17.75, + "learning_rate": 4.885282326818129e-06, + "log_odds": 4.611000061035156, + "log_odds_ratio": -0.21061410009860992, + "loss": 0.3817, + "rejected_geometric_mean": -5.446310997009277, + "step": 791 + }, + { + "chosen_geometric_mean": -1.2799036502838135, + "epoch": 0.2, + "grad_norm": 76.0, + "learning_rate": 4.884990642352857e-06, + "log_odds": 3.442927360534668, + "log_odds_ratio": -0.11453399062156677, + "loss": 0.3781, + "rejected_geometric_mean": -4.370213508605957, + "step": 792 + }, + { + "chosen_geometric_mean": -1.276361346244812, + "epoch": 0.2, + "grad_norm": 19.625, + "learning_rate": 4.8846985962657545e-06, + "log_odds": 1.5296785831451416, + "log_odds_ratio": -0.28144749999046326, + "loss": 0.3726, + "rejected_geometric_mean": -2.6075363159179688, + "step": 793 + }, + { + "chosen_geometric_mean": -1.0905144214630127, + "epoch": 0.2, + "grad_norm": 28.5, + "learning_rate": 4.884406188601102e-06, + "log_odds": 1.6707059144973755, + "log_odds_ratio": -0.335357666015625, + "loss": 0.335, + "rejected_geometric_mean": -2.5689332485198975, + "step": 794 + }, + { + "chosen_geometric_mean": -1.280463457107544, + "epoch": 0.2, + "grad_norm": 9.9375, + "learning_rate": 4.884113419403237e-06, + "log_odds": 2.179516553878784, + "log_odds_ratio": -0.19744716584682465, + "loss": 0.3092, + "rejected_geometric_mean": -3.214179039001465, + "step": 795 + }, + { + "chosen_geometric_mean": -1.3062670230865479, + "epoch": 0.2, + "grad_norm": 5.25, + "learning_rate": 4.883820288716549e-06, + "log_odds": 1.0576355457305908, + "log_odds_ratio": -0.3756954073905945, + "loss": 0.3092, + "rejected_geometric_mean": -2.2041003704071045, + "step": 796 + }, + { + "chosen_geometric_mean": -1.0463660955429077, + "epoch": 0.2, + "grad_norm": 5.96875, + "learning_rate": 4.8835267965854846e-06, + "log_odds": 1.5493849515914917, + "log_odds_ratio": -0.38430675864219666, + "loss": 0.3302, + "rejected_geometric_mean": -2.3924059867858887, + "step": 797 + }, + { + "chosen_geometric_mean": -0.9207516312599182, + "epoch": 0.2, + "grad_norm": 4.1875, + "learning_rate": 4.883232943054544e-06, + "log_odds": 1.2775276899337769, + "log_odds_ratio": -0.35482436418533325, + "loss": 0.3323, + "rejected_geometric_mean": -1.9334287643432617, + "step": 798 + }, + { + "chosen_geometric_mean": -1.188710331916809, + "epoch": 0.2, + "grad_norm": 3.015625, + "learning_rate": 4.882938728168282e-06, + "log_odds": 0.7188770174980164, + "log_odds_ratio": -0.4049757122993469, + "loss": 0.3074, + "rejected_geometric_mean": -1.737180471420288, + "step": 799 + }, + { + "chosen_geometric_mean": -1.47170090675354, + "epoch": 0.2, + "grad_norm": 2.546875, + "learning_rate": 4.882644151971308e-06, + "log_odds": 0.8319138884544373, + "log_odds_ratio": -0.5670340061187744, + "loss": 0.3347, + "rejected_geometric_mean": -2.254936933517456, + "step": 800 + }, + { + "chosen_geometric_mean": -1.3893531560897827, + "epoch": 0.2, + "grad_norm": 3.390625, + "learning_rate": 4.882349214508288e-06, + "log_odds": 1.519091248512268, + "log_odds_ratio": -0.5971696376800537, + "loss": 0.3573, + "rejected_geometric_mean": -2.8874759674072266, + "step": 801 + }, + { + "chosen_geometric_mean": -0.9672195911407471, + "epoch": 0.2, + "grad_norm": 4.5, + "learning_rate": 4.882053915823943e-06, + "log_odds": 0.3304665982723236, + "log_odds_ratio": -0.5986183881759644, + "loss": 0.3232, + "rejected_geometric_mean": -1.2050367593765259, + "step": 802 + }, + { + "chosen_geometric_mean": -1.0198873281478882, + "epoch": 0.2, + "grad_norm": 2.1875, + "learning_rate": 4.8817582559630446e-06, + "log_odds": 3.480433225631714, + "log_odds_ratio": -0.3107180893421173, + "loss": 0.2892, + "rejected_geometric_mean": -4.226463317871094, + "step": 803 + }, + { + "chosen_geometric_mean": -1.081971526145935, + "epoch": 0.2, + "grad_norm": 2.5625, + "learning_rate": 4.881462234970423e-06, + "log_odds": 0.5747750401496887, + "log_odds_ratio": -0.546176016330719, + "loss": 0.2924, + "rejected_geometric_mean": -1.579345703125, + "step": 804 + }, + { + "chosen_geometric_mean": -1.1453367471694946, + "epoch": 0.2, + "grad_norm": 14.6875, + "learning_rate": 4.881165852890962e-06, + "log_odds": 1.6805061101913452, + "log_odds_ratio": -0.5005480647087097, + "loss": 0.3691, + "rejected_geometric_mean": -2.6926803588867188, + "step": 805 + }, + { + "chosen_geometric_mean": -1.1359858512878418, + "epoch": 0.2, + "grad_norm": 27.0, + "learning_rate": 4.880869109769601e-06, + "log_odds": 6.878969192504883, + "log_odds_ratio": -0.006004112772643566, + "loss": 0.3093, + "rejected_geometric_mean": -7.624094486236572, + "step": 806 + }, + { + "chosen_geometric_mean": -1.1811559200286865, + "epoch": 0.2, + "grad_norm": 40.75, + "learning_rate": 4.880572005651333e-06, + "log_odds": 9.604914665222168, + "log_odds_ratio": -0.11726005375385284, + "loss": 0.3126, + "rejected_geometric_mean": -10.467974662780762, + "step": 807 + }, + { + "chosen_geometric_mean": -1.0230276584625244, + "epoch": 0.2, + "grad_norm": 36.75, + "learning_rate": 4.880274540581205e-06, + "log_odds": 7.4056267738342285, + "log_odds_ratio": -0.304290771484375, + "loss": 0.3499, + "rejected_geometric_mean": -8.149813652038574, + "step": 808 + }, + { + "chosen_geometric_mean": -4.208622932434082, + "epoch": 0.2, + "grad_norm": 95.0, + "learning_rate": 4.8799767146043215e-06, + "log_odds": 2.071181297302246, + "log_odds_ratio": -1.1971276998519897, + "loss": 0.6364, + "rejected_geometric_mean": -6.114220142364502, + "step": 809 + }, + { + "chosen_geometric_mean": -1.0350228548049927, + "epoch": 0.2, + "grad_norm": 13.8125, + "learning_rate": 4.879678527765839e-06, + "log_odds": 6.645658493041992, + "log_odds_ratio": -0.12235777825117111, + "loss": 0.3005, + "rejected_geometric_mean": -7.277911186218262, + "step": 810 + }, + { + "chosen_geometric_mean": -1.18113112449646, + "epoch": 0.2, + "grad_norm": 30.875, + "learning_rate": 4.87937998011097e-06, + "log_odds": 0.5898548364639282, + "log_odds_ratio": -0.45745599269866943, + "loss": 0.4503, + "rejected_geometric_mean": -1.6286044120788574, + "step": 811 + }, + { + "chosen_geometric_mean": -1.058413028717041, + "epoch": 0.2, + "grad_norm": 2.546875, + "learning_rate": 4.879081071684981e-06, + "log_odds": 1.6348835229873657, + "log_odds_ratio": -0.2515327036380768, + "loss": 0.3552, + "rejected_geometric_mean": -2.3939621448516846, + "step": 812 + }, + { + "chosen_geometric_mean": -1.143977403640747, + "epoch": 0.2, + "grad_norm": 3.78125, + "learning_rate": 4.878781802533196e-06, + "log_odds": 1.0416350364685059, + "log_odds_ratio": -0.327552855014801, + "loss": 0.3599, + "rejected_geometric_mean": -1.955923080444336, + "step": 813 + }, + { + "chosen_geometric_mean": -1.106191635131836, + "epoch": 0.2, + "grad_norm": 4.375, + "learning_rate": 4.878482172700988e-06, + "log_odds": 1.9557572603225708, + "log_odds_ratio": -0.3202569782733917, + "loss": 0.3116, + "rejected_geometric_mean": -2.8552746772766113, + "step": 814 + }, + { + "chosen_geometric_mean": -0.9392951130867004, + "epoch": 0.2, + "grad_norm": 6.03125, + "learning_rate": 4.8781821822337905e-06, + "log_odds": 0.6034797430038452, + "log_odds_ratio": -0.47287121415138245, + "loss": 0.3489, + "rejected_geometric_mean": -1.3575057983398438, + "step": 815 + }, + { + "chosen_geometric_mean": -1.3721848726272583, + "epoch": 0.2, + "grad_norm": 4.59375, + "learning_rate": 4.8778818311770885e-06, + "log_odds": 3.7998571395874023, + "log_odds_ratio": -0.22268858551979065, + "loss": 0.3033, + "rejected_geometric_mean": -4.952844619750977, + "step": 816 + }, + { + "chosen_geometric_mean": -1.2530746459960938, + "epoch": 0.2, + "grad_norm": 5.375, + "learning_rate": 4.8775811195764215e-06, + "log_odds": 2.072025775909424, + "log_odds_ratio": -0.23963357508182526, + "loss": 0.3098, + "rejected_geometric_mean": -3.080132007598877, + "step": 817 + }, + { + "chosen_geometric_mean": -1.1458847522735596, + "epoch": 0.2, + "grad_norm": 3.6875, + "learning_rate": 4.877280047477386e-06, + "log_odds": 1.3892320394515991, + "log_odds_ratio": -0.2592180371284485, + "loss": 0.2903, + "rejected_geometric_mean": -2.260188579559326, + "step": 818 + }, + { + "chosen_geometric_mean": -1.0548806190490723, + "epoch": 0.2, + "grad_norm": 3.15625, + "learning_rate": 4.8769786149256315e-06, + "log_odds": 0.5276453495025635, + "log_odds_ratio": -0.5119853615760803, + "loss": 0.3281, + "rejected_geometric_mean": -1.470825433731079, + "step": 819 + }, + { + "chosen_geometric_mean": -0.9722810387611389, + "epoch": 0.2, + "grad_norm": 4.0, + "learning_rate": 4.876676821966863e-06, + "log_odds": 1.139652967453003, + "log_odds_ratio": -0.46450796723365784, + "loss": 0.305, + "rejected_geometric_mean": -1.8824976682662964, + "step": 820 + }, + { + "chosen_geometric_mean": -1.0892119407653809, + "epoch": 0.2, + "grad_norm": 3.296875, + "learning_rate": 4.876374668646837e-06, + "log_odds": 0.9164718985557556, + "log_odds_ratio": -0.390339732170105, + "loss": 0.3206, + "rejected_geometric_mean": -1.8016362190246582, + "step": 821 + }, + { + "chosen_geometric_mean": -0.9769967794418335, + "epoch": 0.2, + "grad_norm": 4.28125, + "learning_rate": 4.87607215501137e-06, + "log_odds": 1.1381155252456665, + "log_odds_ratio": -0.45737361907958984, + "loss": 0.3134, + "rejected_geometric_mean": -1.9773290157318115, + "step": 822 + }, + { + "chosen_geometric_mean": -1.3621351718902588, + "epoch": 0.2, + "grad_norm": 15.625, + "learning_rate": 4.8757692811063296e-06, + "log_odds": 2.162351369857788, + "log_odds_ratio": -0.2161659598350525, + "loss": 0.3551, + "rejected_geometric_mean": -3.269731044769287, + "step": 823 + }, + { + "chosen_geometric_mean": -1.205798864364624, + "epoch": 0.2, + "grad_norm": 23.75, + "learning_rate": 4.875466046977637e-06, + "log_odds": 1.7982006072998047, + "log_odds_ratio": -0.32871922850608826, + "loss": 0.3474, + "rejected_geometric_mean": -2.805990695953369, + "step": 824 + }, + { + "chosen_geometric_mean": -1.0093281269073486, + "epoch": 0.2, + "grad_norm": 24.875, + "learning_rate": 4.875162452671273e-06, + "log_odds": 2.205981731414795, + "log_odds_ratio": -0.4529017508029938, + "loss": 0.314, + "rejected_geometric_mean": -3.043013572692871, + "step": 825 + }, + { + "chosen_geometric_mean": -1.125693678855896, + "epoch": 0.2, + "grad_norm": 18.75, + "learning_rate": 4.874858498233267e-06, + "log_odds": 3.316218137741089, + "log_odds_ratio": -0.34330350160598755, + "loss": 0.347, + "rejected_geometric_mean": -4.237433433532715, + "step": 826 + }, + { + "chosen_geometric_mean": -1.2353627681732178, + "epoch": 0.2, + "grad_norm": 5.59375, + "learning_rate": 4.874554183709706e-06, + "log_odds": 2.007798194885254, + "log_odds_ratio": -0.1665271371603012, + "loss": 0.2881, + "rejected_geometric_mean": -2.968501329421997, + "step": 827 + }, + { + "chosen_geometric_mean": -1.057629108428955, + "epoch": 0.21, + "grad_norm": 8.9375, + "learning_rate": 4.874249509146734e-06, + "log_odds": 1.691965937614441, + "log_odds_ratio": -0.479308545589447, + "loss": 0.3148, + "rejected_geometric_mean": -2.5939176082611084, + "step": 828 + }, + { + "chosen_geometric_mean": -1.075743556022644, + "epoch": 0.21, + "grad_norm": 6.84375, + "learning_rate": 4.873944474590543e-06, + "log_odds": 1.8587689399719238, + "log_odds_ratio": -0.27479100227355957, + "loss": 0.3384, + "rejected_geometric_mean": -2.7144200801849365, + "step": 829 + }, + { + "chosen_geometric_mean": -1.2268210649490356, + "epoch": 0.21, + "grad_norm": 2.59375, + "learning_rate": 4.873639080087386e-06, + "log_odds": 0.10652953386306763, + "log_odds_ratio": -0.677491307258606, + "loss": 0.3757, + "rejected_geometric_mean": -1.3370202779769897, + "step": 830 + }, + { + "chosen_geometric_mean": -1.177470326423645, + "epoch": 0.21, + "grad_norm": 29.875, + "learning_rate": 4.87333332568357e-06, + "log_odds": 0.5864822268486023, + "log_odds_ratio": -0.5044782161712646, + "loss": 0.3113, + "rejected_geometric_mean": -1.6551425457000732, + "step": 831 + }, + { + "chosen_geometric_mean": -1.043386459350586, + "epoch": 0.21, + "grad_norm": 2.328125, + "learning_rate": 4.8730272114254506e-06, + "log_odds": 0.2334015816450119, + "log_odds_ratio": -0.6187934875488281, + "loss": 0.3122, + "rejected_geometric_mean": -1.2746360301971436, + "step": 832 + }, + { + "chosen_geometric_mean": -1.1063309907913208, + "epoch": 0.21, + "grad_norm": 8.0625, + "learning_rate": 4.8727207373594445e-06, + "log_odds": 1.037078857421875, + "log_odds_ratio": -0.41411781311035156, + "loss": 0.308, + "rejected_geometric_mean": -1.9827193021774292, + "step": 833 + }, + { + "chosen_geometric_mean": -0.8199345469474792, + "epoch": 0.21, + "grad_norm": 2.90625, + "learning_rate": 4.87241390353202e-06, + "log_odds": 0.3449960947036743, + "log_odds_ratio": -0.5436315536499023, + "loss": 0.2974, + "rejected_geometric_mean": -1.0140154361724854, + "step": 834 + }, + { + "chosen_geometric_mean": -1.10308837890625, + "epoch": 0.21, + "grad_norm": 2.296875, + "learning_rate": 4.8721067099897e-06, + "log_odds": -0.0037429630756378174, + "log_odds_ratio": -0.6966684460639954, + "loss": 0.345, + "rejected_geometric_mean": -1.1006304025650024, + "step": 835 + }, + { + "chosen_geometric_mean": -0.9675674438476562, + "epoch": 0.21, + "grad_norm": 2.390625, + "learning_rate": 4.871799156779064e-06, + "log_odds": 0.6736066341400146, + "log_odds_ratio": -0.5098447203636169, + "loss": 0.3367, + "rejected_geometric_mean": -1.4925447702407837, + "step": 836 + }, + { + "chosen_geometric_mean": -0.9137908220291138, + "epoch": 0.21, + "grad_norm": 5.15625, + "learning_rate": 4.8714912439467435e-06, + "log_odds": 1.333925724029541, + "log_odds_ratio": -0.2739831805229187, + "loss": 0.2929, + "rejected_geometric_mean": -1.915780782699585, + "step": 837 + }, + { + "chosen_geometric_mean": -1.2797863483428955, + "epoch": 0.21, + "grad_norm": 4.65625, + "learning_rate": 4.871182971539424e-06, + "log_odds": 0.09813141822814941, + "log_odds_ratio": -0.6464918851852417, + "loss": 0.3369, + "rejected_geometric_mean": -1.3507554531097412, + "step": 838 + }, + { + "chosen_geometric_mean": -0.9614389538764954, + "epoch": 0.21, + "grad_norm": 5.375, + "learning_rate": 4.8708743396038485e-06, + "log_odds": 1.3737694025039673, + "log_odds_ratio": -0.47738200426101685, + "loss": 0.2776, + "rejected_geometric_mean": -2.250349998474121, + "step": 839 + }, + { + "chosen_geometric_mean": -1.073499083518982, + "epoch": 0.21, + "grad_norm": 3.921875, + "learning_rate": 4.870565348186814e-06, + "log_odds": 5.160057544708252, + "log_odds_ratio": -0.3701232671737671, + "loss": 0.29, + "rejected_geometric_mean": -6.03000545501709, + "step": 840 + }, + { + "chosen_geometric_mean": -0.9974759817123413, + "epoch": 0.21, + "grad_norm": 4.0, + "learning_rate": 4.8702559973351685e-06, + "log_odds": 4.521410942077637, + "log_odds_ratio": -0.1376037299633026, + "loss": 0.3073, + "rejected_geometric_mean": -5.100500583648682, + "step": 841 + }, + { + "chosen_geometric_mean": -1.2216942310333252, + "epoch": 0.21, + "grad_norm": 22.75, + "learning_rate": 4.869946287095819e-06, + "log_odds": 4.636595249176025, + "log_odds_ratio": -0.1574288010597229, + "loss": 0.3557, + "rejected_geometric_mean": -5.591344833374023, + "step": 842 + }, + { + "chosen_geometric_mean": -1.3172448873519897, + "epoch": 0.21, + "grad_norm": 26.25, + "learning_rate": 4.869636217515725e-06, + "log_odds": 1.7221518754959106, + "log_odds_ratio": -0.2669374942779541, + "loss": 0.3516, + "rejected_geometric_mean": -2.8027913570404053, + "step": 843 + }, + { + "chosen_geometric_mean": -1.0694983005523682, + "epoch": 0.21, + "grad_norm": 28.0, + "learning_rate": 4.8693257886418986e-06, + "log_odds": 2.4261770248413086, + "log_odds_ratio": -0.3066287636756897, + "loss": 0.2714, + "rejected_geometric_mean": -3.2131590843200684, + "step": 844 + }, + { + "chosen_geometric_mean": -1.0549914836883545, + "epoch": 0.21, + "grad_norm": 6.46875, + "learning_rate": 4.869015000521411e-06, + "log_odds": 4.2509846687316895, + "log_odds_ratio": -0.2539803087711334, + "loss": 0.4083, + "rejected_geometric_mean": -4.97807502746582, + "step": 845 + }, + { + "chosen_geometric_mean": -1.1853870153427124, + "epoch": 0.21, + "grad_norm": 3.5, + "learning_rate": 4.868703853201383e-06, + "log_odds": 2.6745169162750244, + "log_odds_ratio": -0.24022504687309265, + "loss": 0.281, + "rejected_geometric_mean": -3.5979666709899902, + "step": 846 + }, + { + "chosen_geometric_mean": -1.2520842552185059, + "epoch": 0.21, + "grad_norm": 22.625, + "learning_rate": 4.868392346728992e-06, + "log_odds": 2.6709017753601074, + "log_odds_ratio": -0.4173887372016907, + "loss": 0.3383, + "rejected_geometric_mean": -3.7106919288635254, + "step": 847 + }, + { + "chosen_geometric_mean": -0.9498488903045654, + "epoch": 0.21, + "grad_norm": 36.0, + "learning_rate": 4.868080481151471e-06, + "log_odds": 2.402388334274292, + "log_odds_ratio": -0.6258426308631897, + "loss": 0.2926, + "rejected_geometric_mean": -3.276283025741577, + "step": 848 + }, + { + "chosen_geometric_mean": -1.1750376224517822, + "epoch": 0.21, + "grad_norm": 10.4375, + "learning_rate": 4.867768256516107e-06, + "log_odds": 1.043379545211792, + "log_odds_ratio": -0.357561320066452, + "loss": 0.3023, + "rejected_geometric_mean": -2.0015037059783936, + "step": 849 + }, + { + "chosen_geometric_mean": -1.0802278518676758, + "epoch": 0.21, + "grad_norm": 4.34375, + "learning_rate": 4.867455672870238e-06, + "log_odds": 0.9083252549171448, + "log_odds_ratio": -0.41351717710494995, + "loss": 0.2732, + "rejected_geometric_mean": -1.8174748420715332, + "step": 850 + }, + { + "chosen_geometric_mean": -0.9906502962112427, + "epoch": 0.21, + "grad_norm": 8.25, + "learning_rate": 4.8671427302612615e-06, + "log_odds": 2.676995038986206, + "log_odds_ratio": -0.4012325406074524, + "loss": 0.3244, + "rejected_geometric_mean": -3.4935097694396973, + "step": 851 + }, + { + "chosen_geometric_mean": -0.9666157960891724, + "epoch": 0.21, + "grad_norm": 39.25, + "learning_rate": 4.866829428736626e-06, + "log_odds": 0.5085737705230713, + "log_odds_ratio": -0.5710868239402771, + "loss": 0.3636, + "rejected_geometric_mean": -1.3932483196258545, + "step": 852 + }, + { + "chosen_geometric_mean": -1.3599853515625, + "epoch": 0.21, + "grad_norm": 11.0, + "learning_rate": 4.866515768343836e-06, + "log_odds": 2.8544976711273193, + "log_odds_ratio": -0.31483596563339233, + "loss": 0.3856, + "rejected_geometric_mean": -3.995793342590332, + "step": 853 + }, + { + "chosen_geometric_mean": -1.04324209690094, + "epoch": 0.21, + "grad_norm": 5.9375, + "learning_rate": 4.86620174913045e-06, + "log_odds": 2.7851433753967285, + "log_odds_ratio": -0.3193470537662506, + "loss": 0.2959, + "rejected_geometric_mean": -3.597984552383423, + "step": 854 + }, + { + "chosen_geometric_mean": -1.2678320407867432, + "epoch": 0.21, + "grad_norm": 67.5, + "learning_rate": 4.8658873711440815e-06, + "log_odds": 2.452056646347046, + "log_odds_ratio": -0.4533049762248993, + "loss": 0.479, + "rejected_geometric_mean": -3.6118621826171875, + "step": 855 + }, + { + "chosen_geometric_mean": -1.1614623069763184, + "epoch": 0.21, + "grad_norm": 9.25, + "learning_rate": 4.865572634432395e-06, + "log_odds": 0.8223272562026978, + "log_odds_ratio": -0.407044917345047, + "loss": 0.3189, + "rejected_geometric_mean": -1.826027512550354, + "step": 856 + }, + { + "chosen_geometric_mean": -1.181102991104126, + "epoch": 0.21, + "grad_norm": 4.78125, + "learning_rate": 4.865257539043116e-06, + "log_odds": 2.1255884170532227, + "log_odds_ratio": -0.33747196197509766, + "loss": 0.3015, + "rejected_geometric_mean": -3.105483055114746, + "step": 857 + }, + { + "chosen_geometric_mean": -1.439640998840332, + "epoch": 0.21, + "grad_norm": 22.5, + "learning_rate": 4.864942085024018e-06, + "log_odds": 2.3478891849517822, + "log_odds_ratio": -0.2367425411939621, + "loss": 0.3257, + "rejected_geometric_mean": -3.636248826980591, + "step": 858 + }, + { + "chosen_geometric_mean": -1.064340591430664, + "epoch": 0.21, + "grad_norm": 5.34375, + "learning_rate": 4.8646262724229324e-06, + "log_odds": 2.882741689682007, + "log_odds_ratio": -0.1380225270986557, + "loss": 0.2275, + "rejected_geometric_mean": -3.535351276397705, + "step": 859 + }, + { + "chosen_geometric_mean": -1.757073163986206, + "epoch": 0.21, + "grad_norm": 37.5, + "learning_rate": 4.8643101012877425e-06, + "log_odds": 1.7421929836273193, + "log_odds_ratio": -0.4655974209308624, + "loss": 0.3638, + "rejected_geometric_mean": -3.454089641571045, + "step": 860 + }, + { + "chosen_geometric_mean": -1.162645697593689, + "epoch": 0.21, + "grad_norm": 11.0625, + "learning_rate": 4.86399357166639e-06, + "log_odds": 1.817366361618042, + "log_odds_ratio": -0.2736918330192566, + "loss": 0.3057, + "rejected_geometric_mean": -2.7245984077453613, + "step": 861 + }, + { + "chosen_geometric_mean": -1.872421383857727, + "epoch": 0.21, + "grad_norm": 104.5, + "learning_rate": 4.863676683606866e-06, + "log_odds": 2.859394073486328, + "log_odds_ratio": -0.1975751370191574, + "loss": 0.5311, + "rejected_geometric_mean": -4.525472164154053, + "step": 862 + }, + { + "chosen_geometric_mean": -1.140525460243225, + "epoch": 0.21, + "grad_norm": 9.4375, + "learning_rate": 4.863359437157219e-06, + "log_odds": 2.9530153274536133, + "log_odds_ratio": -0.3331834077835083, + "loss": 0.3127, + "rejected_geometric_mean": -3.891662120819092, + "step": 863 + }, + { + "chosen_geometric_mean": -1.4088928699493408, + "epoch": 0.21, + "grad_norm": 3.5625, + "learning_rate": 4.863041832365552e-06, + "log_odds": 2.4520421028137207, + "log_odds_ratio": -0.2409733235836029, + "loss": 0.2933, + "rejected_geometric_mean": -3.681734561920166, + "step": 864 + }, + { + "chosen_geometric_mean": -1.2721396684646606, + "epoch": 0.21, + "grad_norm": 24.375, + "learning_rate": 4.862723869280021e-06, + "log_odds": 0.2943055033683777, + "log_odds_ratio": -0.6228097081184387, + "loss": 0.3755, + "rejected_geometric_mean": -1.5590097904205322, + "step": 865 + }, + { + "chosen_geometric_mean": -1.3546634912490845, + "epoch": 0.21, + "grad_norm": 21.5, + "learning_rate": 4.862405547948836e-06, + "log_odds": 4.382254600524902, + "log_odds_ratio": -0.2542288303375244, + "loss": 0.3531, + "rejected_geometric_mean": -5.531083583831787, + "step": 866 + }, + { + "chosen_geometric_mean": -1.433501124382019, + "epoch": 0.21, + "grad_norm": 21.625, + "learning_rate": 4.862086868420264e-06, + "log_odds": 3.501521587371826, + "log_odds_ratio": -0.19882339239120483, + "loss": 0.3512, + "rejected_geometric_mean": -4.705908298492432, + "step": 867 + }, + { + "chosen_geometric_mean": -1.0359569787979126, + "epoch": 0.21, + "grad_norm": 6.25, + "learning_rate": 4.861767830742623e-06, + "log_odds": 2.0924954414367676, + "log_odds_ratio": -0.22635670006275177, + "loss": 0.2831, + "rejected_geometric_mean": -2.8472959995269775, + "step": 868 + }, + { + "chosen_geometric_mean": -0.9221137762069702, + "epoch": 0.22, + "grad_norm": 4.6875, + "learning_rate": 4.861448434964288e-06, + "log_odds": 2.577157497406006, + "log_odds_ratio": -0.16279302537441254, + "loss": 0.303, + "rejected_geometric_mean": -3.037156581878662, + "step": 869 + }, + { + "chosen_geometric_mean": -1.1197741031646729, + "epoch": 0.22, + "grad_norm": 3.453125, + "learning_rate": 4.861128681133685e-06, + "log_odds": 3.3504176139831543, + "log_odds_ratio": -0.288482129573822, + "loss": 0.3157, + "rejected_geometric_mean": -4.173748970031738, + "step": 870 + }, + { + "chosen_geometric_mean": -1.3352229595184326, + "epoch": 0.22, + "grad_norm": 2.796875, + "learning_rate": 4.8608085692992976e-06, + "log_odds": 1.447047233581543, + "log_odds_ratio": -0.30861347913742065, + "loss": 0.3204, + "rejected_geometric_mean": -2.5767621994018555, + "step": 871 + }, + { + "chosen_geometric_mean": -1.3871090412139893, + "epoch": 0.22, + "grad_norm": 2.046875, + "learning_rate": 4.860488099509663e-06, + "log_odds": 1.4304043054580688, + "log_odds_ratio": -0.3881967067718506, + "loss": 0.2825, + "rejected_geometric_mean": -2.6769580841064453, + "step": 872 + }, + { + "chosen_geometric_mean": -0.9252368211746216, + "epoch": 0.22, + "grad_norm": 3.078125, + "learning_rate": 4.860167271813371e-06, + "log_odds": 4.3246378898620605, + "log_odds_ratio": -0.2387019544839859, + "loss": 0.2999, + "rejected_geometric_mean": -4.893364429473877, + "step": 873 + }, + { + "chosen_geometric_mean": -0.983137845993042, + "epoch": 0.22, + "grad_norm": 3.875, + "learning_rate": 4.859846086259068e-06, + "log_odds": 2.8709795475006104, + "log_odds_ratio": -0.22507993876934052, + "loss": 0.324, + "rejected_geometric_mean": -3.48934006690979, + "step": 874 + }, + { + "chosen_geometric_mean": -1.071897029876709, + "epoch": 0.22, + "grad_norm": 4.375, + "learning_rate": 4.859524542895453e-06, + "log_odds": 2.510582208633423, + "log_odds_ratio": -0.23285159468650818, + "loss": 0.3173, + "rejected_geometric_mean": -3.276479721069336, + "step": 875 + }, + { + "chosen_geometric_mean": -1.177738904953003, + "epoch": 0.22, + "grad_norm": 4.15625, + "learning_rate": 4.859202641771278e-06, + "log_odds": 1.1106061935424805, + "log_odds_ratio": -0.3669736087322235, + "loss": 0.3149, + "rejected_geometric_mean": -2.0760064125061035, + "step": 876 + }, + { + "chosen_geometric_mean": -1.289778232574463, + "epoch": 0.22, + "grad_norm": 22.875, + "learning_rate": 4.8588803829353535e-06, + "log_odds": 2.6485486030578613, + "log_odds_ratio": -0.1476883441209793, + "loss": 0.2611, + "rejected_geometric_mean": -3.6648435592651367, + "step": 877 + }, + { + "chosen_geometric_mean": -1.242565393447876, + "epoch": 0.22, + "grad_norm": 5.0625, + "learning_rate": 4.858557766436541e-06, + "log_odds": 2.0014030933380127, + "log_odds_ratio": -0.28677505254745483, + "loss": 0.3373, + "rejected_geometric_mean": -3.039320230484009, + "step": 878 + }, + { + "chosen_geometric_mean": -1.110677719116211, + "epoch": 0.22, + "grad_norm": 10.0625, + "learning_rate": 4.858234792323756e-06, + "log_odds": 2.482701301574707, + "log_odds_ratio": -0.3314967155456543, + "loss": 0.2531, + "rejected_geometric_mean": -3.3636770248413086, + "step": 879 + }, + { + "chosen_geometric_mean": -1.1919859647750854, + "epoch": 0.22, + "grad_norm": 6.6875, + "learning_rate": 4.8579114606459695e-06, + "log_odds": 0.831703782081604, + "log_odds_ratio": -0.43170857429504395, + "loss": 0.3317, + "rejected_geometric_mean": -1.92323637008667, + "step": 880 + }, + { + "chosen_geometric_mean": -1.346179723739624, + "epoch": 0.22, + "grad_norm": 38.75, + "learning_rate": 4.857587771452206e-06, + "log_odds": 0.994870662689209, + "log_odds_ratio": -0.6024503111839294, + "loss": 0.366, + "rejected_geometric_mean": -2.219846725463867, + "step": 881 + }, + { + "chosen_geometric_mean": -2.0747692584991455, + "epoch": 0.22, + "grad_norm": 42.0, + "learning_rate": 4.857263724791546e-06, + "log_odds": 1.406293511390686, + "log_odds_ratio": -1.0388991832733154, + "loss": 0.4276, + "rejected_geometric_mean": -3.3620493412017822, + "step": 882 + }, + { + "chosen_geometric_mean": -1.2052726745605469, + "epoch": 0.22, + "grad_norm": 7.78125, + "learning_rate": 4.856939320713121e-06, + "log_odds": 0.7790898680686951, + "log_odds_ratio": -0.4585851728916168, + "loss": 0.315, + "rejected_geometric_mean": -1.8300224542617798, + "step": 883 + }, + { + "chosen_geometric_mean": -1.2148399353027344, + "epoch": 0.22, + "grad_norm": 4.375, + "learning_rate": 4.856614559266119e-06, + "log_odds": 2.24674654006958, + "log_odds_ratio": -0.290203332901001, + "loss": 0.3251, + "rejected_geometric_mean": -3.27530837059021, + "step": 884 + }, + { + "chosen_geometric_mean": -1.2424557209014893, + "epoch": 0.22, + "grad_norm": 2.625, + "learning_rate": 4.856289440499783e-06, + "log_odds": 0.6883969902992249, + "log_odds_ratio": -0.41940709948539734, + "loss": 0.3698, + "rejected_geometric_mean": -1.7811107635498047, + "step": 885 + }, + { + "chosen_geometric_mean": -0.9937007427215576, + "epoch": 0.22, + "grad_norm": 11.4375, + "learning_rate": 4.855963964463407e-06, + "log_odds": 2.2107911109924316, + "log_odds_ratio": -0.2583654522895813, + "loss": 0.293, + "rejected_geometric_mean": -2.9326446056365967, + "step": 886 + }, + { + "chosen_geometric_mean": -1.0504577159881592, + "epoch": 0.22, + "grad_norm": 5.96875, + "learning_rate": 4.8556381312063415e-06, + "log_odds": 0.6396840810775757, + "log_odds_ratio": -0.47389209270477295, + "loss": 0.3356, + "rejected_geometric_mean": -1.5119467973709106, + "step": 887 + }, + { + "chosen_geometric_mean": -1.1194391250610352, + "epoch": 0.22, + "grad_norm": 3.25, + "learning_rate": 4.85531194077799e-06, + "log_odds": 0.2188122570514679, + "log_odds_ratio": -0.6277362704277039, + "loss": 0.3069, + "rejected_geometric_mean": -1.3061378002166748, + "step": 888 + }, + { + "chosen_geometric_mean": -1.5663213729858398, + "epoch": 0.22, + "grad_norm": 5.6875, + "learning_rate": 4.854985393227813e-06, + "log_odds": 1.4986672401428223, + "log_odds_ratio": -0.45905444025993347, + "loss": 0.3033, + "rejected_geometric_mean": -2.97804594039917, + "step": 889 + }, + { + "chosen_geometric_mean": -1.14750337600708, + "epoch": 0.22, + "grad_norm": 4.3125, + "learning_rate": 4.854658488605321e-06, + "log_odds": 1.5137406587600708, + "log_odds_ratio": -0.32857587933540344, + "loss": 0.3364, + "rejected_geometric_mean": -2.427158832550049, + "step": 890 + }, + { + "chosen_geometric_mean": -1.2419930696487427, + "epoch": 0.22, + "grad_norm": 2.90625, + "learning_rate": 4.85433122696008e-06, + "log_odds": 0.6839073300361633, + "log_odds_ratio": -0.4273797273635864, + "loss": 0.311, + "rejected_geometric_mean": -1.7913475036621094, + "step": 891 + }, + { + "chosen_geometric_mean": -1.034178614616394, + "epoch": 0.22, + "grad_norm": 2.8125, + "learning_rate": 4.854003608341714e-06, + "log_odds": 3.3232760429382324, + "log_odds_ratio": -0.17662250995635986, + "loss": 0.3125, + "rejected_geometric_mean": -4.0414581298828125, + "step": 892 + }, + { + "chosen_geometric_mean": -1.3131427764892578, + "epoch": 0.22, + "grad_norm": 2.46875, + "learning_rate": 4.853675632799894e-06, + "log_odds": 2.44724702835083, + "log_odds_ratio": -0.36768096685409546, + "loss": 0.3174, + "rejected_geometric_mean": -3.6032705307006836, + "step": 893 + }, + { + "chosen_geometric_mean": -1.3577550649642944, + "epoch": 0.22, + "grad_norm": 11.8125, + "learning_rate": 4.853347300384351e-06, + "log_odds": 2.12506103515625, + "log_odds_ratio": -0.2875249981880188, + "loss": 0.2912, + "rejected_geometric_mean": -3.303246021270752, + "step": 894 + }, + { + "chosen_geometric_mean": -1.3466525077819824, + "epoch": 0.22, + "grad_norm": 5.9375, + "learning_rate": 4.853018611144869e-06, + "log_odds": 1.1975921392440796, + "log_odds_ratio": -0.39618778228759766, + "loss": 0.3517, + "rejected_geometric_mean": -2.3945505619049072, + "step": 895 + }, + { + "chosen_geometric_mean": -1.0021424293518066, + "epoch": 0.22, + "grad_norm": 26.75, + "learning_rate": 4.852689565131282e-06, + "log_odds": 7.005582332611084, + "log_odds_ratio": -0.005237803794443607, + "loss": 0.3688, + "rejected_geometric_mean": -7.541569709777832, + "step": 896 + }, + { + "chosen_geometric_mean": -0.8598216772079468, + "epoch": 0.22, + "grad_norm": 20.125, + "learning_rate": 4.852360162393484e-06, + "log_odds": 2.794147253036499, + "log_odds_ratio": -0.22110289335250854, + "loss": 0.2632, + "rejected_geometric_mean": -3.2739720344543457, + "step": 897 + }, + { + "chosen_geometric_mean": -1.1771478652954102, + "epoch": 0.22, + "grad_norm": 14.0625, + "learning_rate": 4.852030402981419e-06, + "log_odds": 2.0786728858947754, + "log_odds_ratio": -0.43627557158470154, + "loss": 0.3541, + "rejected_geometric_mean": -3.094512462615967, + "step": 898 + }, + { + "chosen_geometric_mean": -1.0593430995941162, + "epoch": 0.22, + "grad_norm": 3.25, + "learning_rate": 4.851700286945087e-06, + "log_odds": 1.9431945085525513, + "log_odds_ratio": -0.2998851537704468, + "loss": 0.2934, + "rejected_geometric_mean": -2.720900774002075, + "step": 899 + }, + { + "chosen_geometric_mean": -1.738872766494751, + "epoch": 0.22, + "grad_norm": 23.625, + "learning_rate": 4.851369814334542e-06, + "log_odds": 1.6579116582870483, + "log_odds_ratio": -0.7311854958534241, + "loss": 0.3415, + "rejected_geometric_mean": -3.259331226348877, + "step": 900 + }, + { + "chosen_geometric_mean": -1.1344045400619507, + "epoch": 0.22, + "grad_norm": 2.625, + "learning_rate": 4.851038985199891e-06, + "log_odds": 0.2763371765613556, + "log_odds_ratio": -0.574765682220459, + "loss": 0.3519, + "rejected_geometric_mean": -1.3233853578567505, + "step": 901 + }, + { + "chosen_geometric_mean": -1.1251689195632935, + "epoch": 0.22, + "grad_norm": 3.046875, + "learning_rate": 4.850707799591295e-06, + "log_odds": 0.9402135014533997, + "log_odds_ratio": -0.3737034797668457, + "loss": 0.3221, + "rejected_geometric_mean": -1.8687843084335327, + "step": 902 + }, + { + "chosen_geometric_mean": -1.3358523845672607, + "epoch": 0.22, + "grad_norm": 5.03125, + "learning_rate": 4.850376257558972e-06, + "log_odds": 0.24266746640205383, + "log_odds_ratio": -0.6014509797096252, + "loss": 0.3627, + "rejected_geometric_mean": -1.5337833166122437, + "step": 903 + }, + { + "chosen_geometric_mean": -1.0600485801696777, + "epoch": 0.22, + "grad_norm": 3.09375, + "learning_rate": 4.850044359153189e-06, + "log_odds": 2.324765920639038, + "log_odds_ratio": -0.2062206119298935, + "loss": 0.3359, + "rejected_geometric_mean": -3.0775198936462402, + "step": 904 + }, + { + "chosen_geometric_mean": -1.079714298248291, + "epoch": 0.22, + "grad_norm": 3.78125, + "learning_rate": 4.849712104424271e-06, + "log_odds": 3.6621127128601074, + "log_odds_ratio": -0.2975698411464691, + "loss": 0.2702, + "rejected_geometric_mean": -4.4889960289001465, + "step": 905 + }, + { + "chosen_geometric_mean": -1.2728391885757446, + "epoch": 0.22, + "grad_norm": 6.25, + "learning_rate": 4.8493794934225955e-06, + "log_odds": 1.7751942873001099, + "log_odds_ratio": -0.2767201364040375, + "loss": 0.3266, + "rejected_geometric_mean": -2.8295786380767822, + "step": 906 + }, + { + "chosen_geometric_mean": -1.080270528793335, + "epoch": 0.22, + "grad_norm": 6.65625, + "learning_rate": 4.8490465261985946e-06, + "log_odds": 1.9024848937988281, + "log_odds_ratio": -0.23660393059253693, + "loss": 0.2697, + "rejected_geometric_mean": -2.7183494567871094, + "step": 907 + }, + { + "chosen_geometric_mean": -1.381215214729309, + "epoch": 0.22, + "grad_norm": 40.25, + "learning_rate": 4.8487132028027535e-06, + "log_odds": 0.50141841173172, + "log_odds_ratio": -0.48018917441368103, + "loss": 0.3431, + "rejected_geometric_mean": -1.775238037109375, + "step": 908 + }, + { + "chosen_geometric_mean": -1.0493484735488892, + "epoch": 0.23, + "grad_norm": 7.3125, + "learning_rate": 4.848379523285613e-06, + "log_odds": 2.3963840007781982, + "log_odds_ratio": -0.36745551228523254, + "loss": 0.3417, + "rejected_geometric_mean": -3.2319459915161133, + "step": 909 + }, + { + "chosen_geometric_mean": -1.2633317708969116, + "epoch": 0.23, + "grad_norm": 29.375, + "learning_rate": 4.8480454876977675e-06, + "log_odds": 1.8258891105651855, + "log_odds_ratio": -0.3131214380264282, + "loss": 0.3892, + "rejected_geometric_mean": -2.8903276920318604, + "step": 910 + }, + { + "chosen_geometric_mean": -1.1580548286437988, + "epoch": 0.23, + "grad_norm": 12.5625, + "learning_rate": 4.847711096089862e-06, + "log_odds": 2.6035966873168945, + "log_odds_ratio": -0.19874517619609833, + "loss": 0.4542, + "rejected_geometric_mean": -3.485537052154541, + "step": 911 + }, + { + "chosen_geometric_mean": -1.1943411827087402, + "epoch": 0.23, + "grad_norm": 9.25, + "learning_rate": 4.847376348512601e-06, + "log_odds": 2.938279867172241, + "log_odds_ratio": -0.32813477516174316, + "loss": 0.3168, + "rejected_geometric_mean": -3.915489912033081, + "step": 912 + }, + { + "chosen_geometric_mean": -1.2993252277374268, + "epoch": 0.23, + "grad_norm": 3.3125, + "learning_rate": 4.847041245016739e-06, + "log_odds": 5.060973167419434, + "log_odds_ratio": -0.06905681639909744, + "loss": 0.3224, + "rejected_geometric_mean": -6.068423271179199, + "step": 913 + }, + { + "chosen_geometric_mean": -1.116031289100647, + "epoch": 0.23, + "grad_norm": 55.5, + "learning_rate": 4.846705785653087e-06, + "log_odds": 6.39138126373291, + "log_odds_ratio": -0.002160581760108471, + "loss": 0.3372, + "rejected_geometric_mean": -7.108582496643066, + "step": 914 + }, + { + "chosen_geometric_mean": -1.198366403579712, + "epoch": 0.23, + "grad_norm": 7.53125, + "learning_rate": 4.846369970472507e-06, + "log_odds": 3.3713772296905518, + "log_odds_ratio": -0.34935373067855835, + "loss": 0.3254, + "rejected_geometric_mean": -4.331434726715088, + "step": 915 + }, + { + "chosen_geometric_mean": -1.1336079835891724, + "epoch": 0.23, + "grad_norm": 4.4375, + "learning_rate": 4.846033799525918e-06, + "log_odds": 3.2223961353302, + "log_odds_ratio": -0.29038286209106445, + "loss": 0.3335, + "rejected_geometric_mean": -4.072814464569092, + "step": 916 + }, + { + "chosen_geometric_mean": -0.9682372808456421, + "epoch": 0.23, + "grad_norm": 10.5, + "learning_rate": 4.84569727286429e-06, + "log_odds": 0.31322869658470154, + "log_odds_ratio": -0.5508437156677246, + "loss": 0.3651, + "rejected_geometric_mean": -1.1691808700561523, + "step": 917 + }, + { + "chosen_geometric_mean": -1.2113503217697144, + "epoch": 0.23, + "grad_norm": 3.4375, + "learning_rate": 4.84536039053865e-06, + "log_odds": 3.728398561477661, + "log_odds_ratio": -0.1404486447572708, + "loss": 0.3369, + "rejected_geometric_mean": -4.642525672912598, + "step": 918 + }, + { + "chosen_geometric_mean": -1.066394567489624, + "epoch": 0.23, + "grad_norm": 3.8125, + "learning_rate": 4.845023152600077e-06, + "log_odds": 0.6629558801651001, + "log_odds_ratio": -0.47777098417282104, + "loss": 0.3262, + "rejected_geometric_mean": -1.619860053062439, + "step": 919 + }, + { + "chosen_geometric_mean": -1.1155426502227783, + "epoch": 0.23, + "grad_norm": 2.390625, + "learning_rate": 4.8446855590997045e-06, + "log_odds": 2.5888993740081787, + "log_odds_ratio": -0.12451554834842682, + "loss": 0.2761, + "rejected_geometric_mean": -3.3788061141967773, + "step": 920 + }, + { + "chosen_geometric_mean": -1.017586350440979, + "epoch": 0.23, + "grad_norm": 4.875, + "learning_rate": 4.844347610088719e-06, + "log_odds": 0.6943036913871765, + "log_odds_ratio": -0.5618640184402466, + "loss": 0.3675, + "rejected_geometric_mean": -1.6156587600708008, + "step": 921 + }, + { + "chosen_geometric_mean": -1.0340436697006226, + "epoch": 0.23, + "grad_norm": 9.5625, + "learning_rate": 4.844009305618363e-06, + "log_odds": 0.5094701647758484, + "log_odds_ratio": -0.5650402307510376, + "loss": 0.3463, + "rejected_geometric_mean": -1.4096770286560059, + "step": 922 + }, + { + "chosen_geometric_mean": -1.556753396987915, + "epoch": 0.23, + "grad_norm": 3.28125, + "learning_rate": 4.84367064573993e-06, + "log_odds": 3.9199767112731934, + "log_odds_ratio": -0.1752561330795288, + "loss": 0.2555, + "rejected_geometric_mean": -5.268148422241211, + "step": 923 + }, + { + "chosen_geometric_mean": -1.2243061065673828, + "epoch": 0.23, + "grad_norm": 38.75, + "learning_rate": 4.84333163050477e-06, + "log_odds": 3.1008520126342773, + "log_odds_ratio": -0.054934702813625336, + "loss": 0.3268, + "rejected_geometric_mean": -3.9800503253936768, + "step": 924 + }, + { + "chosen_geometric_mean": -1.4069833755493164, + "epoch": 0.23, + "grad_norm": 31.375, + "learning_rate": 4.842992259964286e-06, + "log_odds": 1.4709872007369995, + "log_odds_ratio": -0.7025166749954224, + "loss": 0.3526, + "rejected_geometric_mean": -2.7653017044067383, + "step": 925 + }, + { + "chosen_geometric_mean": -1.0298486948013306, + "epoch": 0.23, + "grad_norm": 2.984375, + "learning_rate": 4.8426525341699345e-06, + "log_odds": 2.4138681888580322, + "log_odds_ratio": -0.3170270323753357, + "loss": 0.3297, + "rejected_geometric_mean": -3.208343505859375, + "step": 926 + }, + { + "chosen_geometric_mean": -1.037609577178955, + "epoch": 0.23, + "grad_norm": 7.15625, + "learning_rate": 4.842312453173225e-06, + "log_odds": 2.519946813583374, + "log_odds_ratio": -0.1161791980266571, + "loss": 0.287, + "rejected_geometric_mean": -3.1818888187408447, + "step": 927 + }, + { + "chosen_geometric_mean": -1.0820891857147217, + "epoch": 0.23, + "grad_norm": 3.546875, + "learning_rate": 4.841972017025724e-06, + "log_odds": 4.333585262298584, + "log_odds_ratio": -0.051517337560653687, + "loss": 0.2859, + "rejected_geometric_mean": -5.0068511962890625, + "step": 928 + }, + { + "chosen_geometric_mean": -1.234864592552185, + "epoch": 0.23, + "grad_norm": 4.8125, + "learning_rate": 4.84163122577905e-06, + "log_odds": 1.491013765335083, + "log_odds_ratio": -0.27927953004837036, + "loss": 0.3104, + "rejected_geometric_mean": -2.5136663913726807, + "step": 929 + }, + { + "chosen_geometric_mean": -1.1124687194824219, + "epoch": 0.23, + "grad_norm": 5.0625, + "learning_rate": 4.841290079484872e-06, + "log_odds": 0.25240352749824524, + "log_odds_ratio": -0.5768262147903442, + "loss": 0.3001, + "rejected_geometric_mean": -1.2917836904525757, + "step": 930 + }, + { + "chosen_geometric_mean": -1.17979896068573, + "epoch": 0.23, + "grad_norm": 2.921875, + "learning_rate": 4.840948578194918e-06, + "log_odds": 3.0861878395080566, + "log_odds_ratio": -0.09219098091125488, + "loss": 0.2976, + "rejected_geometric_mean": -3.9371161460876465, + "step": 931 + }, + { + "chosen_geometric_mean": -1.3412995338439941, + "epoch": 0.23, + "grad_norm": 2.296875, + "learning_rate": 4.840606721960967e-06, + "log_odds": 2.9443421363830566, + "log_odds_ratio": -0.13619190454483032, + "loss": 0.2942, + "rejected_geometric_mean": -4.00725793838501, + "step": 932 + }, + { + "chosen_geometric_mean": -1.8332605361938477, + "epoch": 0.23, + "grad_norm": 7.21875, + "learning_rate": 4.840264510834855e-06, + "log_odds": 1.4977190494537354, + "log_odds_ratio": -0.3009324073791504, + "loss": 0.318, + "rejected_geometric_mean": -3.167064666748047, + "step": 933 + }, + { + "chosen_geometric_mean": -1.023484468460083, + "epoch": 0.23, + "grad_norm": 3.78125, + "learning_rate": 4.839921944868466e-06, + "log_odds": 2.688810348510742, + "log_odds_ratio": -0.0952506735920906, + "loss": 0.2828, + "rejected_geometric_mean": -3.301563262939453, + "step": 934 + }, + { + "chosen_geometric_mean": -1.5597282648086548, + "epoch": 0.23, + "grad_norm": 27.875, + "learning_rate": 4.839579024113744e-06, + "log_odds": 1.945586919784546, + "log_odds_ratio": -0.3651283383369446, + "loss": 0.37, + "rejected_geometric_mean": -3.37656831741333, + "step": 935 + }, + { + "chosen_geometric_mean": -1.0842301845550537, + "epoch": 0.23, + "grad_norm": 7.59375, + "learning_rate": 4.839235748622682e-06, + "log_odds": 2.8774538040161133, + "log_odds_ratio": -0.2405390441417694, + "loss": 0.3386, + "rejected_geometric_mean": -3.707784652709961, + "step": 936 + }, + { + "chosen_geometric_mean": -1.6350791454315186, + "epoch": 0.23, + "grad_norm": 37.5, + "learning_rate": 4.83889211844733e-06, + "log_odds": -0.12261977046728134, + "log_odds_ratio": -0.7987842559814453, + "loss": 0.3659, + "rejected_geometric_mean": -1.564340591430664, + "step": 937 + }, + { + "chosen_geometric_mean": -1.1050444841384888, + "epoch": 0.23, + "grad_norm": 24.875, + "learning_rate": 4.838548133639791e-06, + "log_odds": 1.3401423692703247, + "log_odds_ratio": -0.5136739611625671, + "loss": 0.3329, + "rejected_geometric_mean": -2.3535408973693848, + "step": 938 + }, + { + "chosen_geometric_mean": -1.3445578813552856, + "epoch": 0.23, + "grad_norm": 5.8125, + "learning_rate": 4.83820379425222e-06, + "log_odds": 0.855749249458313, + "log_odds_ratio": -0.4250240623950958, + "loss": 0.3394, + "rejected_geometric_mean": -2.0815279483795166, + "step": 939 + }, + { + "chosen_geometric_mean": -1.0907803773880005, + "epoch": 0.23, + "grad_norm": 13.5, + "learning_rate": 4.837859100336827e-06, + "log_odds": 2.5337562561035156, + "log_odds_ratio": -0.3007212281227112, + "loss": 0.3444, + "rejected_geometric_mean": -3.384768486022949, + "step": 940 + }, + { + "chosen_geometric_mean": -1.1740412712097168, + "epoch": 0.23, + "grad_norm": 7.125, + "learning_rate": 4.8375140519458776e-06, + "log_odds": 0.10267293453216553, + "log_odds_ratio": -0.6687473058700562, + "loss": 0.3251, + "rejected_geometric_mean": -1.2702744007110596, + "step": 941 + }, + { + "chosen_geometric_mean": -1.1856188774108887, + "epoch": 0.23, + "grad_norm": 21.125, + "learning_rate": 4.837168649131688e-06, + "log_odds": 1.8478600978851318, + "log_odds_ratio": -0.3986678719520569, + "loss": 0.3248, + "rejected_geometric_mean": -2.8511505126953125, + "step": 942 + }, + { + "chosen_geometric_mean": -1.104393482208252, + "epoch": 0.23, + "grad_norm": 4.65625, + "learning_rate": 4.836822891946631e-06, + "log_odds": 3.0513856410980225, + "log_odds_ratio": -0.1822534203529358, + "loss": 0.3416, + "rejected_geometric_mean": -3.8530941009521484, + "step": 943 + }, + { + "chosen_geometric_mean": -1.1313214302062988, + "epoch": 0.23, + "grad_norm": 3.625, + "learning_rate": 4.836476780443129e-06, + "log_odds": 0.7675334215164185, + "log_odds_ratio": -0.43363699316978455, + "loss": 0.2886, + "rejected_geometric_mean": -1.7311458587646484, + "step": 944 + }, + { + "chosen_geometric_mean": -0.8031457662582397, + "epoch": 0.23, + "grad_norm": 3.859375, + "learning_rate": 4.836130314673664e-06, + "log_odds": 0.9466537833213806, + "log_odds_ratio": -0.5086901783943176, + "loss": 0.3423, + "rejected_geometric_mean": -1.5977503061294556, + "step": 945 + }, + { + "chosen_geometric_mean": -1.0672882795333862, + "epoch": 0.23, + "grad_norm": 7.375, + "learning_rate": 4.8357834946907655e-06, + "log_odds": 3.642645835876465, + "log_odds_ratio": -0.24813632667064667, + "loss": 0.3475, + "rejected_geometric_mean": -4.436470985412598, + "step": 946 + }, + { + "chosen_geometric_mean": -1.0779796838760376, + "epoch": 0.23, + "grad_norm": 9.9375, + "learning_rate": 4.835436320547022e-06, + "log_odds": 1.7469961643218994, + "log_odds_ratio": -0.3038789629936218, + "loss": 0.3636, + "rejected_geometric_mean": -2.5862178802490234, + "step": 947 + }, + { + "chosen_geometric_mean": -0.9201076030731201, + "epoch": 0.23, + "grad_norm": 16.0, + "learning_rate": 4.835088792295073e-06, + "log_odds": 0.8062009811401367, + "log_odds_ratio": -0.4191747009754181, + "loss": 0.2953, + "rejected_geometric_mean": -1.514838695526123, + "step": 948 + }, + { + "chosen_geometric_mean": -1.2670984268188477, + "epoch": 0.23, + "grad_norm": 5.5, + "learning_rate": 4.834740909987612e-06, + "log_odds": 2.0438921451568604, + "log_odds_ratio": -0.3314111828804016, + "loss": 0.2772, + "rejected_geometric_mean": -3.157761335372925, + "step": 949 + }, + { + "chosen_geometric_mean": -1.2840721607208252, + "epoch": 0.24, + "grad_norm": 10.75, + "learning_rate": 4.834392673677385e-06, + "log_odds": 1.8543306589126587, + "log_odds_ratio": -0.3320719301700592, + "loss": 0.3126, + "rejected_geometric_mean": -2.914937973022461, + "step": 950 + }, + { + "chosen_geometric_mean": -1.0140830278396606, + "epoch": 0.24, + "grad_norm": 3.546875, + "learning_rate": 4.834044083417196e-06, + "log_odds": 2.0870797634124756, + "log_odds_ratio": -0.2853657007217407, + "loss": 0.3055, + "rejected_geometric_mean": -2.8022780418395996, + "step": 951 + }, + { + "chosen_geometric_mean": -1.078272819519043, + "epoch": 0.24, + "grad_norm": 9.0, + "learning_rate": 4.833695139259895e-06, + "log_odds": 0.6545569896697998, + "log_odds_ratio": -0.45989152789115906, + "loss": 0.3691, + "rejected_geometric_mean": -1.5566658973693848, + "step": 952 + }, + { + "chosen_geometric_mean": -1.0207575559616089, + "epoch": 0.24, + "grad_norm": 6.71875, + "learning_rate": 4.8333458412583956e-06, + "log_odds": 1.195088505744934, + "log_odds_ratio": -0.38783007860183716, + "loss": 0.3348, + "rejected_geometric_mean": -2.016885757446289, + "step": 953 + }, + { + "chosen_geometric_mean": -1.1529593467712402, + "epoch": 0.24, + "grad_norm": 5.8125, + "learning_rate": 4.832996189465656e-06, + "log_odds": 1.867342233657837, + "log_odds_ratio": -0.264734148979187, + "loss": 0.274, + "rejected_geometric_mean": -2.7768654823303223, + "step": 954 + }, + { + "chosen_geometric_mean": -1.061488389968872, + "epoch": 0.24, + "grad_norm": 3.0, + "learning_rate": 4.832646183934693e-06, + "log_odds": 2.67812180519104, + "log_odds_ratio": -0.08998244255781174, + "loss": 0.3178, + "rejected_geometric_mean": -3.338751792907715, + "step": 955 + }, + { + "chosen_geometric_mean": -1.0603703260421753, + "epoch": 0.24, + "grad_norm": 8.125, + "learning_rate": 4.832295824718576e-06, + "log_odds": 1.4281457662582397, + "log_odds_ratio": -0.27671539783477783, + "loss": 0.3515, + "rejected_geometric_mean": -2.2428879737854004, + "step": 956 + }, + { + "chosen_geometric_mean": -0.9546753764152527, + "epoch": 0.24, + "grad_norm": 5.53125, + "learning_rate": 4.831945111870428e-06, + "log_odds": 1.664874792098999, + "log_odds_ratio": -0.30760693550109863, + "loss": 0.2767, + "rejected_geometric_mean": -2.305555582046509, + "step": 957 + }, + { + "chosen_geometric_mean": -1.6050795316696167, + "epoch": 0.24, + "grad_norm": 19.25, + "learning_rate": 4.831594045443425e-06, + "log_odds": 2.2851386070251465, + "log_odds_ratio": -0.31340423226356506, + "loss": 0.336, + "rejected_geometric_mean": -3.7624754905700684, + "step": 958 + }, + { + "chosen_geometric_mean": -0.9623544216156006, + "epoch": 0.24, + "grad_norm": 2.875, + "learning_rate": 4.8312426254907975e-06, + "log_odds": 2.1680824756622314, + "log_odds_ratio": -0.3104209899902344, + "loss": 0.2785, + "rejected_geometric_mean": -2.7804007530212402, + "step": 959 + }, + { + "chosen_geometric_mean": -1.1269285678863525, + "epoch": 0.24, + "grad_norm": 14.375, + "learning_rate": 4.830890852065829e-06, + "log_odds": 2.539768695831299, + "log_odds_ratio": -0.18422165513038635, + "loss": 0.371, + "rejected_geometric_mean": -3.3617970943450928, + "step": 960 + }, + { + "chosen_geometric_mean": -1.191391944885254, + "epoch": 0.24, + "grad_norm": 6.9375, + "learning_rate": 4.830538725221857e-06, + "log_odds": 0.9411325454711914, + "log_odds_ratio": -0.45208635926246643, + "loss": 0.3265, + "rejected_geometric_mean": -1.9945392608642578, + "step": 961 + }, + { + "chosen_geometric_mean": -1.4582918882369995, + "epoch": 0.24, + "grad_norm": 6.875, + "learning_rate": 4.830186245012272e-06, + "log_odds": 2.983207941055298, + "log_odds_ratio": -0.34742921590805054, + "loss": 0.4054, + "rejected_geometric_mean": -4.255593776702881, + "step": 962 + }, + { + "chosen_geometric_mean": -1.0432817935943604, + "epoch": 0.24, + "grad_norm": 30.625, + "learning_rate": 4.8298334114905186e-06, + "log_odds": 0.5503261089324951, + "log_odds_ratio": -0.4920879304409027, + "loss": 0.2936, + "rejected_geometric_mean": -1.449838638305664, + "step": 963 + }, + { + "chosen_geometric_mean": -1.1140313148498535, + "epoch": 0.24, + "grad_norm": 3.5625, + "learning_rate": 4.829480224710094e-06, + "log_odds": 0.9036850333213806, + "log_odds_ratio": -0.4942012131214142, + "loss": 0.3263, + "rejected_geometric_mean": -1.8851045370101929, + "step": 964 + }, + { + "chosen_geometric_mean": -1.0959222316741943, + "epoch": 0.24, + "grad_norm": 15.1875, + "learning_rate": 4.829126684724552e-06, + "log_odds": 0.9536967277526855, + "log_odds_ratio": -0.5473127365112305, + "loss": 0.3454, + "rejected_geometric_mean": -1.940177083015442, + "step": 965 + }, + { + "chosen_geometric_mean": -1.124467372894287, + "epoch": 0.24, + "grad_norm": 2.6875, + "learning_rate": 4.828772791587496e-06, + "log_odds": 3.3927977085113525, + "log_odds_ratio": -0.23082999885082245, + "loss": 0.2836, + "rejected_geometric_mean": -4.195369720458984, + "step": 966 + }, + { + "chosen_geometric_mean": -1.2696490287780762, + "epoch": 0.24, + "grad_norm": 17.5, + "learning_rate": 4.828418545352585e-06, + "log_odds": 1.5461348295211792, + "log_odds_ratio": -0.3714827597141266, + "loss": 0.3278, + "rejected_geometric_mean": -2.6180505752563477, + "step": 967 + }, + { + "chosen_geometric_mean": -1.4484964609146118, + "epoch": 0.24, + "grad_norm": 20.0, + "learning_rate": 4.828063946073532e-06, + "log_odds": 0.2715992331504822, + "log_odds_ratio": -0.5783618092536926, + "loss": 0.3214, + "rejected_geometric_mean": -1.683394193649292, + "step": 968 + }, + { + "chosen_geometric_mean": -1.297599196434021, + "epoch": 0.24, + "grad_norm": 5.875, + "learning_rate": 4.827708993804101e-06, + "log_odds": 0.5906326770782471, + "log_odds_ratio": -0.4648492634296417, + "loss": 0.3194, + "rejected_geometric_mean": -1.7613314390182495, + "step": 969 + }, + { + "chosen_geometric_mean": -1.814007043838501, + "epoch": 0.24, + "grad_norm": 33.25, + "learning_rate": 4.827353688598113e-06, + "log_odds": 1.8886897563934326, + "log_odds_ratio": -0.37768813967704773, + "loss": 0.2813, + "rejected_geometric_mean": -3.44612979888916, + "step": 970 + }, + { + "chosen_geometric_mean": -1.2037513256072998, + "epoch": 0.24, + "grad_norm": 20.375, + "learning_rate": 4.82699803050944e-06, + "log_odds": 1.5388116836547852, + "log_odds_ratio": -0.3109394609928131, + "loss": 0.261, + "rejected_geometric_mean": -2.516371965408325, + "step": 971 + }, + { + "chosen_geometric_mean": -1.0759838819503784, + "epoch": 0.24, + "grad_norm": 5.125, + "learning_rate": 4.82664201959201e-06, + "log_odds": 3.620959997177124, + "log_odds_ratio": -0.2811501920223236, + "loss": 0.3123, + "rejected_geometric_mean": -4.425314903259277, + "step": 972 + }, + { + "chosen_geometric_mean": -1.454331874847412, + "epoch": 0.24, + "grad_norm": 15.9375, + "learning_rate": 4.826285655899799e-06, + "log_odds": 1.111992597579956, + "log_odds_ratio": -0.4372331500053406, + "loss": 0.3496, + "rejected_geometric_mean": -2.456932306289673, + "step": 973 + }, + { + "chosen_geometric_mean": -1.1090089082717896, + "epoch": 0.24, + "grad_norm": 5.375, + "learning_rate": 4.825928939486844e-06, + "log_odds": 0.6031625270843506, + "log_odds_ratio": -0.48117485642433167, + "loss": 0.3699, + "rejected_geometric_mean": -1.5422675609588623, + "step": 974 + }, + { + "chosen_geometric_mean": -1.251862645149231, + "epoch": 0.24, + "grad_norm": 8.5, + "learning_rate": 4.8255718704072296e-06, + "log_odds": 0.5513910055160522, + "log_odds_ratio": -0.5586150884628296, + "loss": 0.3441, + "rejected_geometric_mean": -1.7284660339355469, + "step": 975 + }, + { + "chosen_geometric_mean": -1.1354734897613525, + "epoch": 0.24, + "grad_norm": 34.0, + "learning_rate": 4.825214448715097e-06, + "log_odds": 0.8616930842399597, + "log_odds_ratio": -0.45719051361083984, + "loss": 0.3773, + "rejected_geometric_mean": -1.8849341869354248, + "step": 976 + }, + { + "chosen_geometric_mean": -1.151244044303894, + "epoch": 0.24, + "grad_norm": 7.65625, + "learning_rate": 4.824856674464639e-06, + "log_odds": 2.5173239707946777, + "log_odds_ratio": -0.20437610149383545, + "loss": 0.3109, + "rejected_geometric_mean": -3.4127633571624756, + "step": 977 + }, + { + "chosen_geometric_mean": -1.1444554328918457, + "epoch": 0.24, + "grad_norm": 7.03125, + "learning_rate": 4.824498547710106e-06, + "log_odds": 0.358733594417572, + "log_odds_ratio": -0.5501757860183716, + "loss": 0.3316, + "rejected_geometric_mean": -1.4107944965362549, + "step": 978 + }, + { + "chosen_geometric_mean": -1.7203583717346191, + "epoch": 0.24, + "grad_norm": 14.3125, + "learning_rate": 4.824140068505794e-06, + "log_odds": 1.750291109085083, + "log_odds_ratio": -0.24586474895477295, + "loss": 0.3023, + "rejected_geometric_mean": -3.317742347717285, + "step": 979 + }, + { + "chosen_geometric_mean": -1.0490063428878784, + "epoch": 0.24, + "grad_norm": 2.484375, + "learning_rate": 4.82378123690606e-06, + "log_odds": 2.9898288249969482, + "log_odds_ratio": -0.36320847272872925, + "loss": 0.3216, + "rejected_geometric_mean": -3.8563530445098877, + "step": 980 + }, + { + "chosen_geometric_mean": -1.3257579803466797, + "epoch": 0.24, + "grad_norm": 2.09375, + "learning_rate": 4.8234220529653095e-06, + "log_odds": 2.108583450317383, + "log_odds_ratio": -0.330108106136322, + "loss": 0.2728, + "rejected_geometric_mean": -3.237243413925171, + "step": 981 + }, + { + "chosen_geometric_mean": -0.9235042929649353, + "epoch": 0.24, + "grad_norm": 3.125, + "learning_rate": 4.823062516738006e-06, + "log_odds": 0.24275854229927063, + "log_odds_ratio": -0.627009391784668, + "loss": 0.3584, + "rejected_geometric_mean": -1.1338154077529907, + "step": 982 + }, + { + "chosen_geometric_mean": -1.1315230131149292, + "epoch": 0.24, + "grad_norm": 3.0, + "learning_rate": 4.822702628278661e-06, + "log_odds": 1.737363338470459, + "log_odds_ratio": -0.3361295163631439, + "loss": 0.3447, + "rejected_geometric_mean": -2.638134002685547, + "step": 983 + }, + { + "chosen_geometric_mean": -1.0565552711486816, + "epoch": 0.24, + "grad_norm": 2.890625, + "learning_rate": 4.822342387641844e-06, + "log_odds": 0.43422845005989075, + "log_odds_ratio": -0.5209131836891174, + "loss": 0.2826, + "rejected_geometric_mean": -1.393399715423584, + "step": 984 + }, + { + "chosen_geometric_mean": -1.0194519758224487, + "epoch": 0.24, + "grad_norm": 4.09375, + "learning_rate": 4.821981794882177e-06, + "log_odds": 2.157592296600342, + "log_odds_ratio": -0.15280435979366302, + "loss": 0.257, + "rejected_geometric_mean": -2.809162139892578, + "step": 985 + }, + { + "chosen_geometric_mean": -0.9013987183570862, + "epoch": 0.24, + "grad_norm": 3.84375, + "learning_rate": 4.821620850054332e-06, + "log_odds": 0.4550275206565857, + "log_odds_ratio": -0.5024555325508118, + "loss": 0.2909, + "rejected_geometric_mean": -1.1802870035171509, + "step": 986 + }, + { + "chosen_geometric_mean": -1.0376646518707275, + "epoch": 0.24, + "grad_norm": 6.90625, + "learning_rate": 4.821259553213038e-06, + "log_odds": 3.807908535003662, + "log_odds_ratio": -0.2684950828552246, + "loss": 0.3336, + "rejected_geometric_mean": -4.5526299476623535, + "step": 987 + }, + { + "chosen_geometric_mean": -0.8711281418800354, + "epoch": 0.24, + "grad_norm": 2.703125, + "learning_rate": 4.820897904413077e-06, + "log_odds": 5.57951021194458, + "log_odds_ratio": -0.011303205043077469, + "loss": 0.2831, + "rejected_geometric_mean": -5.908957004547119, + "step": 988 + }, + { + "chosen_geometric_mean": -1.4990589618682861, + "epoch": 0.24, + "grad_norm": 14.0625, + "learning_rate": 4.820535903709283e-06, + "log_odds": 4.857852458953857, + "log_odds_ratio": -0.15884163975715637, + "loss": 0.3158, + "rejected_geometric_mean": -6.150486946105957, + "step": 989 + }, + { + "chosen_geometric_mean": -0.9906178116798401, + "epoch": 0.25, + "grad_norm": 2.453125, + "learning_rate": 4.820173551156543e-06, + "log_odds": 0.7608760595321655, + "log_odds_ratio": -0.39413708448410034, + "loss": 0.2818, + "rejected_geometric_mean": -1.5371737480163574, + "step": 990 + }, + { + "chosen_geometric_mean": -0.9775682091712952, + "epoch": 0.25, + "grad_norm": 5.5625, + "learning_rate": 4.8198108468098005e-06, + "log_odds": 5.048196792602539, + "log_odds_ratio": -0.04435425624251366, + "loss": 0.2437, + "rejected_geometric_mean": -5.555904865264893, + "step": 991 + }, + { + "chosen_geometric_mean": -1.1648316383361816, + "epoch": 0.25, + "grad_norm": 7.46875, + "learning_rate": 4.819447790724049e-06, + "log_odds": 4.4137349128723145, + "log_odds_ratio": -0.09109801799058914, + "loss": 0.2828, + "rejected_geometric_mean": -5.2570977210998535, + "step": 992 + }, + { + "chosen_geometric_mean": -1.2735365629196167, + "epoch": 0.25, + "grad_norm": 35.25, + "learning_rate": 4.819084382954336e-06, + "log_odds": 0.7939440011978149, + "log_odds_ratio": -0.42965686321258545, + "loss": 0.3156, + "rejected_geometric_mean": -1.9422903060913086, + "step": 993 + }, + { + "chosen_geometric_mean": -1.3334505558013916, + "epoch": 0.25, + "grad_norm": 2.96875, + "learning_rate": 4.818720623555765e-06, + "log_odds": 0.2151167243719101, + "log_odds_ratio": -0.6099213361740112, + "loss": 0.386, + "rejected_geometric_mean": -1.4957212209701538, + "step": 994 + }, + { + "chosen_geometric_mean": -1.0413933992385864, + "epoch": 0.25, + "grad_norm": 4.78125, + "learning_rate": 4.818356512583487e-06, + "log_odds": 0.4121949076652527, + "log_odds_ratio": -0.5203163623809814, + "loss": 0.3148, + "rejected_geometric_mean": -1.316549301147461, + "step": 995 + }, + { + "chosen_geometric_mean": -1.0325566530227661, + "epoch": 0.25, + "grad_norm": 7.28125, + "learning_rate": 4.817992050092714e-06, + "log_odds": 0.9340776205062866, + "log_odds_ratio": -0.391661137342453, + "loss": 0.3173, + "rejected_geometric_mean": -1.738355278968811, + "step": 996 + }, + { + "chosen_geometric_mean": -1.275100827217102, + "epoch": 0.25, + "grad_norm": 7.71875, + "learning_rate": 4.817627236138704e-06, + "log_odds": 0.24837161600589752, + "log_odds_ratio": -0.6022191047668457, + "loss": 0.3722, + "rejected_geometric_mean": -1.444992184638977, + "step": 997 + }, + { + "chosen_geometric_mean": -1.0658161640167236, + "epoch": 0.25, + "grad_norm": 4.3125, + "learning_rate": 4.817262070776773e-06, + "log_odds": 2.5034589767456055, + "log_odds_ratio": -0.2209053337574005, + "loss": 0.2401, + "rejected_geometric_mean": -3.314067840576172, + "step": 998 + }, + { + "chosen_geometric_mean": -1.105780839920044, + "epoch": 0.25, + "grad_norm": 14.875, + "learning_rate": 4.816896554062288e-06, + "log_odds": 2.7927098274230957, + "log_odds_ratio": -0.4699341952800751, + "loss": 0.3507, + "rejected_geometric_mean": -3.7597129344940186, + "step": 999 + }, + { + "chosen_geometric_mean": -0.9673168063163757, + "epoch": 0.25, + "grad_norm": 4.84375, + "learning_rate": 4.816530686050672e-06, + "log_odds": 1.1657483577728271, + "log_odds_ratio": -0.39752668142318726, + "loss": 0.2899, + "rejected_geometric_mean": -1.905017375946045, + "step": 1000 + }, + { + "chosen_geometric_mean": -1.2315114736557007, + "epoch": 0.25, + "grad_norm": 5.03125, + "learning_rate": 4.816164466797399e-06, + "log_odds": 0.7392870187759399, + "log_odds_ratio": -0.42181676626205444, + "loss": 0.3354, + "rejected_geometric_mean": -1.8213399648666382, + "step": 1001 + }, + { + "chosen_geometric_mean": -1.0887585878372192, + "epoch": 0.25, + "grad_norm": 6.84375, + "learning_rate": 4.815797896357993e-06, + "log_odds": 0.8869326710700989, + "log_odds_ratio": -0.5006448030471802, + "loss": 0.2612, + "rejected_geometric_mean": -1.8890901803970337, + "step": 1002 + }, + { + "chosen_geometric_mean": -1.1364929676055908, + "epoch": 0.25, + "grad_norm": 10.125, + "learning_rate": 4.81543097478804e-06, + "log_odds": 3.666076898574829, + "log_odds_ratio": -0.21804390847682953, + "loss": 0.2638, + "rejected_geometric_mean": -4.507699012756348, + "step": 1003 + }, + { + "chosen_geometric_mean": -1.3437278270721436, + "epoch": 0.25, + "grad_norm": 31.5, + "learning_rate": 4.815063702143171e-06, + "log_odds": 2.2625720500946045, + "log_odds_ratio": -0.18473537266254425, + "loss": 0.357, + "rejected_geometric_mean": -3.376073122024536, + "step": 1004 + }, + { + "chosen_geometric_mean": -1.257191777229309, + "epoch": 0.25, + "grad_norm": 6.28125, + "learning_rate": 4.8146960784790744e-06, + "log_odds": 3.1716198921203613, + "log_odds_ratio": -0.3054070770740509, + "loss": 0.3384, + "rejected_geometric_mean": -4.2147536277771, + "step": 1005 + }, + { + "chosen_geometric_mean": -1.1244518756866455, + "epoch": 0.25, + "grad_norm": 10.5625, + "learning_rate": 4.814328103851491e-06, + "log_odds": 2.6775193214416504, + "log_odds_ratio": -0.2752275764942169, + "loss": 0.3355, + "rejected_geometric_mean": -3.5790419578552246, + "step": 1006 + }, + { + "chosen_geometric_mean": -1.180476188659668, + "epoch": 0.25, + "grad_norm": 3.296875, + "learning_rate": 4.813959778316213e-06, + "log_odds": 2.4499926567077637, + "log_odds_ratio": -0.31203770637512207, + "loss": 0.2891, + "rejected_geometric_mean": -3.4349474906921387, + "step": 1007 + }, + { + "chosen_geometric_mean": -1.0850218534469604, + "epoch": 0.25, + "grad_norm": 3.625, + "learning_rate": 4.813591101929089e-06, + "log_odds": 3.5021772384643555, + "log_odds_ratio": -0.24693894386291504, + "loss": 0.3077, + "rejected_geometric_mean": -4.257981300354004, + "step": 1008 + }, + { + "chosen_geometric_mean": -1.1442548036575317, + "epoch": 0.25, + "grad_norm": 4.03125, + "learning_rate": 4.813222074746019e-06, + "log_odds": 1.7417110204696655, + "log_odds_ratio": -0.4205954968929291, + "loss": 0.2874, + "rejected_geometric_mean": -2.6716935634613037, + "step": 1009 + }, + { + "chosen_geometric_mean": -1.3685648441314697, + "epoch": 0.25, + "grad_norm": 24.375, + "learning_rate": 4.8128526968229565e-06, + "log_odds": 3.303112268447876, + "log_odds_ratio": -0.1714688241481781, + "loss": 0.3563, + "rejected_geometric_mean": -4.4371232986450195, + "step": 1010 + }, + { + "chosen_geometric_mean": -1.2784366607666016, + "epoch": 0.25, + "grad_norm": 4.78125, + "learning_rate": 4.812482968215906e-06, + "log_odds": 0.3448689877986908, + "log_odds_ratio": -0.5412402749061584, + "loss": 0.334, + "rejected_geometric_mean": -1.5425945520401, + "step": 1011 + }, + { + "chosen_geometric_mean": -1.3501505851745605, + "epoch": 0.25, + "grad_norm": 3.9375, + "learning_rate": 4.81211288898093e-06, + "log_odds": 1.6294779777526855, + "log_odds_ratio": -0.28184300661087036, + "loss": 0.2808, + "rejected_geometric_mean": -2.7852697372436523, + "step": 1012 + }, + { + "chosen_geometric_mean": -0.9374516010284424, + "epoch": 0.25, + "grad_norm": 2.46875, + "learning_rate": 4.81174245917414e-06, + "log_odds": 3.5065646171569824, + "log_odds_ratio": -0.33291253447532654, + "loss": 0.3311, + "rejected_geometric_mean": -4.132026672363281, + "step": 1013 + }, + { + "chosen_geometric_mean": -0.908944308757782, + "epoch": 0.25, + "grad_norm": 6.03125, + "learning_rate": 4.811371678851703e-06, + "log_odds": 2.1635773181915283, + "log_odds_ratio": -0.33017781376838684, + "loss": 0.3147, + "rejected_geometric_mean": -2.6896235942840576, + "step": 1014 + }, + { + "chosen_geometric_mean": -1.2757794857025146, + "epoch": 0.25, + "grad_norm": 15.75, + "learning_rate": 4.811000548069836e-06, + "log_odds": 4.245070457458496, + "log_odds_ratio": -0.374697208404541, + "loss": 0.3727, + "rejected_geometric_mean": -5.363122463226318, + "step": 1015 + }, + { + "chosen_geometric_mean": -1.1371760368347168, + "epoch": 0.25, + "grad_norm": 7.5, + "learning_rate": 4.810629066884814e-06, + "log_odds": 3.85347056388855, + "log_odds_ratio": -0.3240523636341095, + "loss": 0.3163, + "rejected_geometric_mean": -4.900417804718018, + "step": 1016 + }, + { + "chosen_geometric_mean": -1.000716209411621, + "epoch": 0.25, + "grad_norm": 22.5, + "learning_rate": 4.8102572353529615e-06, + "log_odds": 0.2436596006155014, + "log_odds_ratio": -0.6262915134429932, + "loss": 0.3029, + "rejected_geometric_mean": -1.1665598154067993, + "step": 1017 + }, + { + "chosen_geometric_mean": -0.9064507484436035, + "epoch": 0.25, + "grad_norm": 2.34375, + "learning_rate": 4.809885053530656e-06, + "log_odds": 4.3332109451293945, + "log_odds_ratio": -0.23185738921165466, + "loss": 0.2948, + "rejected_geometric_mean": -4.878248691558838, + "step": 1018 + }, + { + "chosen_geometric_mean": -1.1910151243209839, + "epoch": 0.25, + "grad_norm": 10.0625, + "learning_rate": 4.809512521474331e-06, + "log_odds": 5.543900966644287, + "log_odds_ratio": -0.3681783080101013, + "loss": 0.3175, + "rejected_geometric_mean": -6.523789405822754, + "step": 1019 + }, + { + "chosen_geometric_mean": -1.1193746328353882, + "epoch": 0.25, + "grad_norm": 3.515625, + "learning_rate": 4.80913963924047e-06, + "log_odds": 4.369364261627197, + "log_odds_ratio": -0.03870498389005661, + "loss": 0.3245, + "rejected_geometric_mean": -5.108509063720703, + "step": 1020 + }, + { + "chosen_geometric_mean": -1.0174126625061035, + "epoch": 0.25, + "grad_norm": 4.9375, + "learning_rate": 4.808766406885611e-06, + "log_odds": 5.857166290283203, + "log_odds_ratio": -0.23381322622299194, + "loss": 0.2974, + "rejected_geometric_mean": -6.524782180786133, + "step": 1021 + }, + { + "chosen_geometric_mean": -1.3539422750473022, + "epoch": 0.25, + "grad_norm": 8.3125, + "learning_rate": 4.808392824466346e-06, + "log_odds": 1.15217924118042, + "log_odds_ratio": -0.5139679908752441, + "loss": 0.2828, + "rejected_geometric_mean": -2.4215316772460938, + "step": 1022 + }, + { + "chosen_geometric_mean": -1.3073910474777222, + "epoch": 0.25, + "grad_norm": 31.875, + "learning_rate": 4.8080188920393175e-06, + "log_odds": 2.0599265098571777, + "log_odds_ratio": -0.28688329458236694, + "loss": 0.3641, + "rejected_geometric_mean": -3.13810396194458, + "step": 1023 + }, + { + "chosen_geometric_mean": -1.0560673475265503, + "epoch": 0.25, + "grad_norm": 57.75, + "learning_rate": 4.807644609661224e-06, + "log_odds": 2.153667449951172, + "log_odds_ratio": -0.5387263298034668, + "loss": 0.438, + "rejected_geometric_mean": -3.040769577026367, + "step": 1024 + }, + { + "chosen_geometric_mean": -3.5257551670074463, + "epoch": 0.25, + "grad_norm": 39.5, + "learning_rate": 4.807269977388816e-06, + "log_odds": 3.3127388954162598, + "log_odds_ratio": -1.7506425380706787, + "loss": 0.448, + "rejected_geometric_mean": -6.697192192077637, + "step": 1025 + }, + { + "chosen_geometric_mean": -1.0646607875823975, + "epoch": 0.25, + "grad_norm": 3.140625, + "learning_rate": 4.806894995278894e-06, + "log_odds": 1.746376633644104, + "log_odds_ratio": -0.42465221881866455, + "loss": 0.2813, + "rejected_geometric_mean": -2.6199235916137695, + "step": 1026 + }, + { + "chosen_geometric_mean": -1.2435663938522339, + "epoch": 0.25, + "grad_norm": 5.8125, + "learning_rate": 4.8065196633883175e-06, + "log_odds": 1.7332313060760498, + "log_odds_ratio": -0.27949151396751404, + "loss": 0.3117, + "rejected_geometric_mean": -2.7851593494415283, + "step": 1027 + }, + { + "chosen_geometric_mean": -1.0462943315505981, + "epoch": 0.25, + "grad_norm": 2.953125, + "learning_rate": 4.806143981773994e-06, + "log_odds": 2.604187250137329, + "log_odds_ratio": -0.3152523636817932, + "loss": 0.3017, + "rejected_geometric_mean": -3.403425693511963, + "step": 1028 + }, + { + "chosen_geometric_mean": -1.2554150819778442, + "epoch": 0.25, + "grad_norm": 3.21875, + "learning_rate": 4.8057679504928866e-06, + "log_odds": 3.253618001937866, + "log_odds_ratio": -0.437688946723938, + "loss": 0.2775, + "rejected_geometric_mean": -4.387164115905762, + "step": 1029 + }, + { + "chosen_geometric_mean": -0.9876554012298584, + "epoch": 0.26, + "grad_norm": 11.125, + "learning_rate": 4.80539156960201e-06, + "log_odds": 3.424504280090332, + "log_odds_ratio": -0.24541917443275452, + "loss": 0.3168, + "rejected_geometric_mean": -4.066751003265381, + "step": 1030 + }, + { + "chosen_geometric_mean": -0.8375313878059387, + "epoch": 0.26, + "grad_norm": 5.03125, + "learning_rate": 4.805014839158434e-06, + "log_odds": 6.1370439529418945, + "log_odds_ratio": -0.014222441241145134, + "loss": 0.3313, + "rejected_geometric_mean": -6.402694225311279, + "step": 1031 + }, + { + "chosen_geometric_mean": -1.1197234392166138, + "epoch": 0.26, + "grad_norm": 11.5625, + "learning_rate": 4.804637759219278e-06, + "log_odds": 4.9866228103637695, + "log_odds_ratio": -0.02690793201327324, + "loss": 0.3005, + "rejected_geometric_mean": -5.7122602462768555, + "step": 1032 + }, + { + "chosen_geometric_mean": -1.3724735975265503, + "epoch": 0.26, + "grad_norm": 2.921875, + "learning_rate": 4.804260329841718e-06, + "log_odds": 2.3313169479370117, + "log_odds_ratio": -0.2449643909931183, + "loss": 0.3329, + "rejected_geometric_mean": -3.4846079349517822, + "step": 1033 + }, + { + "chosen_geometric_mean": -1.8077812194824219, + "epoch": 0.26, + "grad_norm": 30.625, + "learning_rate": 4.80388255108298e-06, + "log_odds": 3.265981912612915, + "log_odds_ratio": -0.08981017768383026, + "loss": 0.416, + "rejected_geometric_mean": -4.841080188751221, + "step": 1034 + }, + { + "chosen_geometric_mean": -1.297255039215088, + "epoch": 0.26, + "grad_norm": 3.65625, + "learning_rate": 4.803504423000346e-06, + "log_odds": 3.6411147117614746, + "log_odds_ratio": -0.16806252300739288, + "loss": 0.3044, + "rejected_geometric_mean": -4.675996780395508, + "step": 1035 + }, + { + "chosen_geometric_mean": -1.109208583831787, + "epoch": 0.26, + "grad_norm": 29.0, + "learning_rate": 4.8031259456511475e-06, + "log_odds": 2.5925300121307373, + "log_odds_ratio": -0.15526118874549866, + "loss": 0.3217, + "rejected_geometric_mean": -3.391341209411621, + "step": 1036 + }, + { + "chosen_geometric_mean": -1.138648509979248, + "epoch": 0.26, + "grad_norm": 6.34375, + "learning_rate": 4.802747119092772e-06, + "log_odds": 0.9574225544929504, + "log_odds_ratio": -0.39903002977371216, + "loss": 0.2953, + "rejected_geometric_mean": -1.9372575283050537, + "step": 1037 + }, + { + "chosen_geometric_mean": -1.155627727508545, + "epoch": 0.26, + "grad_norm": 5.3125, + "learning_rate": 4.802367943382658e-06, + "log_odds": 0.4399852752685547, + "log_odds_ratio": -0.5003744959831238, + "loss": 0.3249, + "rejected_geometric_mean": -1.4839396476745605, + "step": 1038 + }, + { + "chosen_geometric_mean": -0.9225017428398132, + "epoch": 0.26, + "grad_norm": 6.46875, + "learning_rate": 4.801988418578298e-06, + "log_odds": 1.859790563583374, + "log_odds_ratio": -0.2363831102848053, + "loss": 0.2901, + "rejected_geometric_mean": -2.4439964294433594, + "step": 1039 + }, + { + "chosen_geometric_mean": -1.0545843839645386, + "epoch": 0.26, + "grad_norm": 2.140625, + "learning_rate": 4.8016085447372376e-06, + "log_odds": 0.8545517921447754, + "log_odds_ratio": -0.5329055190086365, + "loss": 0.3288, + "rejected_geometric_mean": -1.8089044094085693, + "step": 1040 + }, + { + "chosen_geometric_mean": -1.043123722076416, + "epoch": 0.26, + "grad_norm": 10.3125, + "learning_rate": 4.801228321917074e-06, + "log_odds": 0.4343413710594177, + "log_odds_ratio": -0.50316321849823, + "loss": 0.2853, + "rejected_geometric_mean": -1.3418760299682617, + "step": 1041 + }, + { + "chosen_geometric_mean": -1.1671112775802612, + "epoch": 0.26, + "grad_norm": 3.15625, + "learning_rate": 4.800847750175458e-06, + "log_odds": 0.8467669486999512, + "log_odds_ratio": -0.43454015254974365, + "loss": 0.3039, + "rejected_geometric_mean": -1.8647470474243164, + "step": 1042 + }, + { + "chosen_geometric_mean": -1.267272710800171, + "epoch": 0.26, + "grad_norm": 2.53125, + "learning_rate": 4.800466829570094e-06, + "log_odds": 2.923772096633911, + "log_odds_ratio": -0.5580295324325562, + "loss": 0.3431, + "rejected_geometric_mean": -4.098170757293701, + "step": 1043 + }, + { + "chosen_geometric_mean": -1.204809546470642, + "epoch": 0.26, + "grad_norm": 2.359375, + "learning_rate": 4.800085560158738e-06, + "log_odds": 2.864642858505249, + "log_odds_ratio": -0.14927519857883453, + "loss": 0.3511, + "rejected_geometric_mean": -3.7832303047180176, + "step": 1044 + }, + { + "chosen_geometric_mean": -1.3568964004516602, + "epoch": 0.26, + "grad_norm": 2.328125, + "learning_rate": 4.7997039419992005e-06, + "log_odds": 2.192084789276123, + "log_odds_ratio": -0.2958572208881378, + "loss": 0.3123, + "rejected_geometric_mean": -3.357814311981201, + "step": 1045 + }, + { + "chosen_geometric_mean": -1.1259279251098633, + "epoch": 0.26, + "grad_norm": 5.125, + "learning_rate": 4.799321975149343e-06, + "log_odds": 0.6131182312965393, + "log_odds_ratio": -0.46558678150177, + "loss": 0.2908, + "rejected_geometric_mean": -1.592429280281067, + "step": 1046 + }, + { + "chosen_geometric_mean": -1.1234493255615234, + "epoch": 0.26, + "grad_norm": 4.5, + "learning_rate": 4.7989396596670815e-06, + "log_odds": 2.768632173538208, + "log_odds_ratio": -0.5221789479255676, + "loss": 0.319, + "rejected_geometric_mean": -3.8040237426757812, + "step": 1047 + }, + { + "chosen_geometric_mean": -1.0733718872070312, + "epoch": 0.26, + "grad_norm": 7.625, + "learning_rate": 4.798556995610384e-06, + "log_odds": 3.113527774810791, + "log_odds_ratio": -0.4074907600879669, + "loss": 0.3034, + "rejected_geometric_mean": -4.006837844848633, + "step": 1048 + }, + { + "chosen_geometric_mean": -1.2619335651397705, + "epoch": 0.26, + "grad_norm": 39.5, + "learning_rate": 4.798173983037272e-06, + "log_odds": 3.9169743061065674, + "log_odds_ratio": -0.22962252795696259, + "loss": 0.3495, + "rejected_geometric_mean": -4.986333847045898, + "step": 1049 + }, + { + "chosen_geometric_mean": -1.201573371887207, + "epoch": 0.26, + "grad_norm": 35.5, + "learning_rate": 4.797790622005819e-06, + "log_odds": 4.099194049835205, + "log_odds_ratio": -0.3035103380680084, + "loss": 0.3218, + "rejected_geometric_mean": -5.115314960479736, + "step": 1050 + }, + { + "chosen_geometric_mean": -2.16994571685791, + "epoch": 0.26, + "grad_norm": 15.75, + "learning_rate": 4.797406912574151e-06, + "log_odds": 1.943562626838684, + "log_odds_ratio": -0.3865809142589569, + "loss": 0.3252, + "rejected_geometric_mean": -4.0249528884887695, + "step": 1051 + }, + { + "chosen_geometric_mean": -1.1417994499206543, + "epoch": 0.26, + "grad_norm": 19.375, + "learning_rate": 4.797022854800449e-06, + "log_odds": 3.200472354888916, + "log_odds_ratio": -0.2661692798137665, + "loss": 0.3489, + "rejected_geometric_mean": -4.050419330596924, + "step": 1052 + }, + { + "chosen_geometric_mean": -1.3144768476486206, + "epoch": 0.26, + "grad_norm": 25.0, + "learning_rate": 4.796638448742944e-06, + "log_odds": 3.40514874458313, + "log_odds_ratio": -0.2977111339569092, + "loss": 0.2737, + "rejected_geometric_mean": -4.538198471069336, + "step": 1053 + }, + { + "chosen_geometric_mean": -1.232452392578125, + "epoch": 0.26, + "grad_norm": 17.5, + "learning_rate": 4.796253694459923e-06, + "log_odds": 2.7565884590148926, + "log_odds_ratio": -0.2511245012283325, + "loss": 0.286, + "rejected_geometric_mean": -3.742687225341797, + "step": 1054 + }, + { + "chosen_geometric_mean": -0.9095355868339539, + "epoch": 0.26, + "grad_norm": 14.125, + "learning_rate": 4.795868592009721e-06, + "log_odds": 5.697417259216309, + "log_odds_ratio": -0.06545817852020264, + "loss": 0.3001, + "rejected_geometric_mean": -6.136788845062256, + "step": 1055 + }, + { + "chosen_geometric_mean": -0.9790799617767334, + "epoch": 0.26, + "grad_norm": 6.15625, + "learning_rate": 4.795483141450732e-06, + "log_odds": 3.4094724655151367, + "log_odds_ratio": -0.2051403671503067, + "loss": 0.3157, + "rejected_geometric_mean": -3.8856396675109863, + "step": 1056 + }, + { + "chosen_geometric_mean": -0.9648580551147461, + "epoch": 0.26, + "grad_norm": 13.625, + "learning_rate": 4.795097342841398e-06, + "log_odds": 4.9041595458984375, + "log_odds_ratio": -0.060128919780254364, + "loss": 0.3142, + "rejected_geometric_mean": -5.430051803588867, + "step": 1057 + }, + { + "chosen_geometric_mean": -0.891865074634552, + "epoch": 0.26, + "grad_norm": 3.828125, + "learning_rate": 4.794711196240215e-06, + "log_odds": 2.062467098236084, + "log_odds_ratio": -0.33654677867889404, + "loss": 0.3043, + "rejected_geometric_mean": -2.6358070373535156, + "step": 1058 + }, + { + "chosen_geometric_mean": -1.3503707647323608, + "epoch": 0.26, + "grad_norm": 5.0625, + "learning_rate": 4.794324701705733e-06, + "log_odds": 1.559795618057251, + "log_odds_ratio": -0.3436124920845032, + "loss": 0.3043, + "rejected_geometric_mean": -2.7007596492767334, + "step": 1059 + }, + { + "chosen_geometric_mean": -1.1056042909622192, + "epoch": 0.26, + "grad_norm": 2.359375, + "learning_rate": 4.793937859296553e-06, + "log_odds": 3.00067138671875, + "log_odds_ratio": -0.15221711993217468, + "loss": 0.3073, + "rejected_geometric_mean": -3.803748846054077, + "step": 1060 + }, + { + "chosen_geometric_mean": -1.0838725566864014, + "epoch": 0.26, + "grad_norm": 23.625, + "learning_rate": 4.793550669071331e-06, + "log_odds": 1.611894965171814, + "log_odds_ratio": -0.401789128780365, + "loss": 0.3685, + "rejected_geometric_mean": -2.486917495727539, + "step": 1061 + }, + { + "chosen_geometric_mean": -0.9009881615638733, + "epoch": 0.26, + "grad_norm": 2.28125, + "learning_rate": 4.793163131088773e-06, + "log_odds": 2.3603057861328125, + "log_odds_ratio": -0.24603085219860077, + "loss": 0.2744, + "rejected_geometric_mean": -2.9301342964172363, + "step": 1062 + }, + { + "chosen_geometric_mean": -0.9429289102554321, + "epoch": 0.26, + "grad_norm": 6.5, + "learning_rate": 4.792775245407639e-06, + "log_odds": 3.7382850646972656, + "log_odds_ratio": -0.24955828487873077, + "loss": 0.3619, + "rejected_geometric_mean": -4.3584442138671875, + "step": 1063 + }, + { + "chosen_geometric_mean": -1.1948137283325195, + "epoch": 0.26, + "grad_norm": 3.78125, + "learning_rate": 4.792387012086742e-06, + "log_odds": 0.10726660490036011, + "log_odds_ratio": -0.6578881740570068, + "loss": 0.327, + "rejected_geometric_mean": -1.2997627258300781, + "step": 1064 + }, + { + "chosen_geometric_mean": -0.9366722702980042, + "epoch": 0.26, + "grad_norm": 4.28125, + "learning_rate": 4.791998431184948e-06, + "log_odds": 1.4436123371124268, + "log_odds_ratio": -0.23721843957901, + "loss": 0.3138, + "rejected_geometric_mean": -2.015545129776001, + "step": 1065 + }, + { + "chosen_geometric_mean": -1.0113903284072876, + "epoch": 0.26, + "grad_norm": 2.28125, + "learning_rate": 4.791609502761175e-06, + "log_odds": 0.7862951755523682, + "log_odds_ratio": -0.5519981384277344, + "loss": 0.356, + "rejected_geometric_mean": -1.6337082386016846, + "step": 1066 + }, + { + "chosen_geometric_mean": -1.2580877542495728, + "epoch": 0.26, + "grad_norm": 2.0625, + "learning_rate": 4.791220226874394e-06, + "log_odds": 1.9383937120437622, + "log_odds_ratio": -0.2757892906665802, + "loss": 0.2969, + "rejected_geometric_mean": -2.979031801223755, + "step": 1067 + }, + { + "chosen_geometric_mean": -1.0084000825881958, + "epoch": 0.26, + "grad_norm": 3.140625, + "learning_rate": 4.790830603583629e-06, + "log_odds": 1.2044438123703003, + "log_odds_ratio": -0.4075292944908142, + "loss": 0.3499, + "rejected_geometric_mean": -2.0444889068603516, + "step": 1068 + }, + { + "chosen_geometric_mean": -1.0059173107147217, + "epoch": 0.26, + "grad_norm": 6.40625, + "learning_rate": 4.7904406329479535e-06, + "log_odds": 2.4829607009887695, + "log_odds_ratio": -0.16200301051139832, + "loss": 0.2741, + "rejected_geometric_mean": -3.1176373958587646, + "step": 1069 + }, + { + "chosen_geometric_mean": -1.246567964553833, + "epoch": 0.26, + "grad_norm": 6.78125, + "learning_rate": 4.7900503150265e-06, + "log_odds": 0.7714155912399292, + "log_odds_ratio": -0.47225040197372437, + "loss": 0.2994, + "rejected_geometric_mean": -1.9127141237258911, + "step": 1070 + }, + { + "chosen_geometric_mean": -0.6956329345703125, + "epoch": 0.27, + "grad_norm": 3.265625, + "learning_rate": 4.789659649878448e-06, + "log_odds": 1.7572318315505981, + "log_odds_ratio": -0.3006286323070526, + "loss": 0.3054, + "rejected_geometric_mean": -2.081327199935913, + "step": 1071 + }, + { + "chosen_geometric_mean": -1.029189109802246, + "epoch": 0.27, + "grad_norm": 49.0, + "learning_rate": 4.789268637563033e-06, + "log_odds": 2.0967984199523926, + "log_odds_ratio": -0.45270541310310364, + "loss": 0.3661, + "rejected_geometric_mean": -2.974627733230591, + "step": 1072 + }, + { + "chosen_geometric_mean": -1.1311569213867188, + "epoch": 0.27, + "grad_norm": 5.78125, + "learning_rate": 4.78887727813954e-06, + "log_odds": 3.0761542320251465, + "log_odds_ratio": -0.13258345425128937, + "loss": 0.3443, + "rejected_geometric_mean": -3.889549970626831, + "step": 1073 + }, + { + "chosen_geometric_mean": -0.9397861957550049, + "epoch": 0.27, + "grad_norm": 17.625, + "learning_rate": 4.788485571667311e-06, + "log_odds": 2.490370750427246, + "log_odds_ratio": -0.25304052233695984, + "loss": 0.3179, + "rejected_geometric_mean": -3.0716500282287598, + "step": 1074 + }, + { + "chosen_geometric_mean": -1.2711431980133057, + "epoch": 0.27, + "grad_norm": 12.3125, + "learning_rate": 4.788093518205736e-06, + "log_odds": 4.421093463897705, + "log_odds_ratio": -0.2941473424434662, + "loss": 0.3317, + "rejected_geometric_mean": -5.49753475189209, + "step": 1075 + }, + { + "chosen_geometric_mean": -1.2427961826324463, + "epoch": 0.27, + "grad_norm": 44.25, + "learning_rate": 4.78770111781426e-06, + "log_odds": 1.8813508749008179, + "log_odds_ratio": -0.42821723222732544, + "loss": 0.4071, + "rejected_geometric_mean": -2.961324691772461, + "step": 1076 + }, + { + "chosen_geometric_mean": -1.1004225015640259, + "epoch": 0.27, + "grad_norm": 6.6875, + "learning_rate": 4.787308370552382e-06, + "log_odds": 5.711406230926514, + "log_odds_ratio": -0.048916250467300415, + "loss": 0.2897, + "rejected_geometric_mean": -6.410928726196289, + "step": 1077 + }, + { + "chosen_geometric_mean": -1.2307502031326294, + "epoch": 0.27, + "grad_norm": 10.3125, + "learning_rate": 4.786915276479649e-06, + "log_odds": 1.2044886350631714, + "log_odds_ratio": -0.3421025276184082, + "loss": 0.3073, + "rejected_geometric_mean": -2.232262134552002, + "step": 1078 + }, + { + "chosen_geometric_mean": -1.4036738872528076, + "epoch": 0.27, + "grad_norm": 4.71875, + "learning_rate": 4.7865218356556665e-06, + "log_odds": 3.017031192779541, + "log_odds_ratio": -0.4046740233898163, + "loss": 0.3414, + "rejected_geometric_mean": -4.293764591217041, + "step": 1079 + }, + { + "chosen_geometric_mean": -1.078851342201233, + "epoch": 0.27, + "grad_norm": 16.0, + "learning_rate": 4.786128048140087e-06, + "log_odds": 1.5959402322769165, + "log_odds_ratio": -0.2914453446865082, + "loss": 0.3294, + "rejected_geometric_mean": -2.429560661315918, + "step": 1080 + }, + { + "chosen_geometric_mean": -1.2377690076828003, + "epoch": 0.27, + "grad_norm": 4.28125, + "learning_rate": 4.78573391399262e-06, + "log_odds": 0.8422820568084717, + "log_odds_ratio": -0.4634755253791809, + "loss": 0.317, + "rejected_geometric_mean": -1.9474542140960693, + "step": 1081 + }, + { + "chosen_geometric_mean": -1.0871891975402832, + "epoch": 0.27, + "grad_norm": 5.5625, + "learning_rate": 4.785339433273026e-06, + "log_odds": 4.01533842086792, + "log_odds_ratio": -0.1445770263671875, + "loss": 0.3136, + "rejected_geometric_mean": -4.724130153656006, + "step": 1082 + }, + { + "chosen_geometric_mean": -1.3098942041397095, + "epoch": 0.27, + "grad_norm": 4.0625, + "learning_rate": 4.784944606041115e-06, + "log_odds": 2.8325610160827637, + "log_odds_ratio": -0.46883583068847656, + "loss": 0.3678, + "rejected_geometric_mean": -4.065999507904053, + "step": 1083 + }, + { + "chosen_geometric_mean": -1.2207294702529907, + "epoch": 0.27, + "grad_norm": 4.25, + "learning_rate": 4.784549432356755e-06, + "log_odds": 0.13327434659004211, + "log_odds_ratio": -0.6432350873947144, + "loss": 0.3313, + "rejected_geometric_mean": -1.2906571626663208, + "step": 1084 + }, + { + "chosen_geometric_mean": -1.2108428478240967, + "epoch": 0.27, + "grad_norm": 2.65625, + "learning_rate": 4.784153912279864e-06, + "log_odds": 0.22137700021266937, + "log_odds_ratio": -0.5988343954086304, + "loss": 0.276, + "rejected_geometric_mean": -1.3868207931518555, + "step": 1085 + }, + { + "chosen_geometric_mean": -1.0195060968399048, + "epoch": 0.27, + "grad_norm": 4.5, + "learning_rate": 4.78375804587041e-06, + "log_odds": 2.7649002075195312, + "log_odds_ratio": -0.32073378562927246, + "loss": 0.3364, + "rejected_geometric_mean": -3.4754812717437744, + "step": 1086 + }, + { + "chosen_geometric_mean": -0.9879128336906433, + "epoch": 0.27, + "grad_norm": 3.390625, + "learning_rate": 4.783361833188417e-06, + "log_odds": 1.1145288944244385, + "log_odds_ratio": -0.3660142123699188, + "loss": 0.3096, + "rejected_geometric_mean": -1.8573641777038574, + "step": 1087 + }, + { + "chosen_geometric_mean": -1.0590946674346924, + "epoch": 0.27, + "grad_norm": 3.140625, + "learning_rate": 4.782965274293962e-06, + "log_odds": 0.15996597707271576, + "log_odds_ratio": -0.6495181918144226, + "loss": 0.3162, + "rejected_geometric_mean": -1.228430986404419, + "step": 1088 + }, + { + "chosen_geometric_mean": -0.9862625598907471, + "epoch": 0.27, + "grad_norm": 21.5, + "learning_rate": 4.782568369247169e-06, + "log_odds": 2.9713220596313477, + "log_odds_ratio": -0.24806880950927734, + "loss": 0.3607, + "rejected_geometric_mean": -3.6184592247009277, + "step": 1089 + }, + { + "chosen_geometric_mean": -0.9151864051818848, + "epoch": 0.27, + "grad_norm": 4.1875, + "learning_rate": 4.782171118108222e-06, + "log_odds": 1.664252519607544, + "log_odds_ratio": -0.25574633479118347, + "loss": 0.3181, + "rejected_geometric_mean": -2.2536566257476807, + "step": 1090 + }, + { + "chosen_geometric_mean": -1.3536324501037598, + "epoch": 0.27, + "grad_norm": 9.375, + "learning_rate": 4.781773520937352e-06, + "log_odds": 1.661636233329773, + "log_odds_ratio": -0.3532717525959015, + "loss": 0.3331, + "rejected_geometric_mean": -2.8008596897125244, + "step": 1091 + }, + { + "chosen_geometric_mean": -1.0191799402236938, + "epoch": 0.27, + "grad_norm": 9.6875, + "learning_rate": 4.781375577794846e-06, + "log_odds": 3.7740015983581543, + "log_odds_ratio": -0.18238618969917297, + "loss": 0.3686, + "rejected_geometric_mean": -4.451169013977051, + "step": 1092 + }, + { + "chosen_geometric_mean": -0.903404712677002, + "epoch": 0.27, + "grad_norm": 31.25, + "learning_rate": 4.780977288741039e-06, + "log_odds": 1.8524909019470215, + "log_odds_ratio": -0.3298444151878357, + "loss": 0.333, + "rejected_geometric_mean": -2.4166059494018555, + "step": 1093 + }, + { + "chosen_geometric_mean": -1.127882480621338, + "epoch": 0.27, + "grad_norm": 20.0, + "learning_rate": 4.780578653836323e-06, + "log_odds": 2.176511287689209, + "log_odds_ratio": -0.2698853313922882, + "loss": 0.2886, + "rejected_geometric_mean": -3.044414758682251, + "step": 1094 + }, + { + "chosen_geometric_mean": -1.2302958965301514, + "epoch": 0.27, + "grad_norm": 5.5, + "learning_rate": 4.78017967314114e-06, + "log_odds": 1.062711238861084, + "log_odds_ratio": -0.3673860430717468, + "loss": 0.3476, + "rejected_geometric_mean": -2.110429286956787, + "step": 1095 + }, + { + "chosen_geometric_mean": -0.9327062368392944, + "epoch": 0.27, + "grad_norm": 94.5, + "learning_rate": 4.779780346715985e-06, + "log_odds": 6.674318313598633, + "log_odds_ratio": -0.2570089101791382, + "loss": 0.3413, + "rejected_geometric_mean": -7.343272686004639, + "step": 1096 + }, + { + "chosen_geometric_mean": -1.3848308324813843, + "epoch": 0.27, + "grad_norm": 18.625, + "learning_rate": 4.7793806746214074e-06, + "log_odds": 0.4105468988418579, + "log_odds_ratio": -0.5154603123664856, + "loss": 0.3195, + "rejected_geometric_mean": -1.7063994407653809, + "step": 1097 + }, + { + "chosen_geometric_mean": -1.0655298233032227, + "epoch": 0.27, + "grad_norm": 30.625, + "learning_rate": 4.778980656918004e-06, + "log_odds": 4.454585552215576, + "log_odds_ratio": -0.4697182774543762, + "loss": 0.3225, + "rejected_geometric_mean": -5.374671936035156, + "step": 1098 + }, + { + "chosen_geometric_mean": -0.8408294320106506, + "epoch": 0.27, + "grad_norm": 3.140625, + "learning_rate": 4.778580293666429e-06, + "log_odds": 2.632445812225342, + "log_odds_ratio": -0.4698677361011505, + "loss": 0.3011, + "rejected_geometric_mean": -3.266495943069458, + "step": 1099 + }, + { + "chosen_geometric_mean": -1.1785407066345215, + "epoch": 0.27, + "grad_norm": 4.5625, + "learning_rate": 4.778179584927387e-06, + "log_odds": 3.432499647140503, + "log_odds_ratio": -0.2702576220035553, + "loss": 0.3123, + "rejected_geometric_mean": -4.338688373565674, + "step": 1100 + }, + { + "chosen_geometric_mean": -1.1428052186965942, + "epoch": 0.27, + "grad_norm": 42.25, + "learning_rate": 4.777778530761634e-06, + "log_odds": 0.7296289205551147, + "log_odds_ratio": -0.42297878861427307, + "loss": 0.3026, + "rejected_geometric_mean": -1.6947581768035889, + "step": 1101 + }, + { + "chosen_geometric_mean": -0.9444661140441895, + "epoch": 0.27, + "grad_norm": 7.4375, + "learning_rate": 4.777377131229981e-06, + "log_odds": 5.69691276550293, + "log_odds_ratio": -0.22933726012706757, + "loss": 0.302, + "rejected_geometric_mean": -6.318724155426025, + "step": 1102 + }, + { + "chosen_geometric_mean": -0.9704506397247314, + "epoch": 0.27, + "grad_norm": 2.328125, + "learning_rate": 4.776975386393288e-06, + "log_odds": 1.1777604818344116, + "log_odds_ratio": -0.4860045611858368, + "loss": 0.3287, + "rejected_geometric_mean": -1.9765177965164185, + "step": 1103 + }, + { + "chosen_geometric_mean": -1.2391283512115479, + "epoch": 0.27, + "grad_norm": 2.28125, + "learning_rate": 4.77657329631247e-06, + "log_odds": 5.3985443115234375, + "log_odds_ratio": -0.15113621950149536, + "loss": 0.309, + "rejected_geometric_mean": -6.374627590179443, + "step": 1104 + }, + { + "chosen_geometric_mean": -1.1943475008010864, + "epoch": 0.27, + "grad_norm": 6.9375, + "learning_rate": 4.7761708610484945e-06, + "log_odds": 1.5267478227615356, + "log_odds_ratio": -0.43032193183898926, + "loss": 0.3201, + "rejected_geometric_mean": -2.6232736110687256, + "step": 1105 + }, + { + "chosen_geometric_mean": -1.081477403640747, + "epoch": 0.27, + "grad_norm": 4.15625, + "learning_rate": 4.775768080662378e-06, + "log_odds": 0.4706086814403534, + "log_odds_ratio": -0.6595932841300964, + "loss": 0.3456, + "rejected_geometric_mean": -1.552882432937622, + "step": 1106 + }, + { + "chosen_geometric_mean": -1.0972275733947754, + "epoch": 0.27, + "grad_norm": 2.84375, + "learning_rate": 4.775364955215195e-06, + "log_odds": 1.2313947677612305, + "log_odds_ratio": -0.4013711214065552, + "loss": 0.2566, + "rejected_geometric_mean": -2.154878616333008, + "step": 1107 + }, + { + "chosen_geometric_mean": -1.2013201713562012, + "epoch": 0.27, + "grad_norm": 5.6875, + "learning_rate": 4.774961484768066e-06, + "log_odds": 2.9417343139648438, + "log_odds_ratio": -0.2624565660953522, + "loss": 0.2941, + "rejected_geometric_mean": -3.8919517993927, + "step": 1108 + }, + { + "chosen_geometric_mean": -0.867701530456543, + "epoch": 0.27, + "grad_norm": 15.8125, + "learning_rate": 4.774557669382168e-06, + "log_odds": 0.8873229026794434, + "log_odds_ratio": -0.4216555655002594, + "loss": 0.2779, + "rejected_geometric_mean": -1.5392074584960938, + "step": 1109 + }, + { + "chosen_geometric_mean": -1.1196134090423584, + "epoch": 0.27, + "grad_norm": 3.734375, + "learning_rate": 4.77415350911873e-06, + "log_odds": 6.335427284240723, + "log_odds_ratio": -0.0789833813905716, + "loss": 0.3331, + "rejected_geometric_mean": -7.087868690490723, + "step": 1110 + }, + { + "chosen_geometric_mean": -2.0357017517089844, + "epoch": 0.28, + "grad_norm": 81.0, + "learning_rate": 4.77374900403903e-06, + "log_odds": 2.4607114791870117, + "log_odds_ratio": -1.2411555051803589, + "loss": 0.4417, + "rejected_geometric_mean": -4.318750858306885, + "step": 1111 + }, + { + "chosen_geometric_mean": -1.1708259582519531, + "epoch": 0.28, + "grad_norm": 23.125, + "learning_rate": 4.773344154204403e-06, + "log_odds": 6.144050598144531, + "log_odds_ratio": -0.1554565131664276, + "loss": 0.3567, + "rejected_geometric_mean": -6.976982116699219, + "step": 1112 + }, + { + "chosen_geometric_mean": -1.2020081281661987, + "epoch": 0.28, + "grad_norm": 8.0625, + "learning_rate": 4.772938959676233e-06, + "log_odds": 0.7972379326820374, + "log_odds_ratio": -0.44430145621299744, + "loss": 0.3447, + "rejected_geometric_mean": -1.8500441312789917, + "step": 1113 + }, + { + "chosen_geometric_mean": -1.608235239982605, + "epoch": 0.28, + "grad_norm": 108.0, + "learning_rate": 4.772533420515957e-06, + "log_odds": 2.2035317420959473, + "log_odds_ratio": -0.30490922927856445, + "loss": 0.4208, + "rejected_geometric_mean": -3.6949901580810547, + "step": 1114 + }, + { + "chosen_geometric_mean": -1.1060551404953003, + "epoch": 0.28, + "grad_norm": 17.375, + "learning_rate": 4.772127536785064e-06, + "log_odds": 4.338674545288086, + "log_odds_ratio": -0.19848035275936127, + "loss": 0.3865, + "rejected_geometric_mean": -5.107486248016357, + "step": 1115 + }, + { + "chosen_geometric_mean": -1.2020334005355835, + "epoch": 0.28, + "grad_norm": 35.25, + "learning_rate": 4.771721308545097e-06, + "log_odds": 1.4452463388442993, + "log_odds_ratio": -0.32094064354896545, + "loss": 0.3343, + "rejected_geometric_mean": -2.414379119873047, + "step": 1116 + }, + { + "chosen_geometric_mean": -1.1195052862167358, + "epoch": 0.28, + "grad_norm": 2.5625, + "learning_rate": 4.771314735857651e-06, + "log_odds": -0.13135209679603577, + "log_odds_ratio": -0.7665253281593323, + "loss": 0.3116, + "rejected_geometric_mean": -1.0377976894378662, + "step": 1117 + }, + { + "chosen_geometric_mean": -1.195796251296997, + "epoch": 0.28, + "grad_norm": 2.078125, + "learning_rate": 4.770907818784368e-06, + "log_odds": 0.024331875145435333, + "log_odds_ratio": -0.6941342949867249, + "loss": 0.2678, + "rejected_geometric_mean": -1.2269022464752197, + "step": 1118 + }, + { + "chosen_geometric_mean": -1.0899994373321533, + "epoch": 0.28, + "grad_norm": 4.53125, + "learning_rate": 4.7705005573869514e-06, + "log_odds": 2.8835253715515137, + "log_odds_ratio": -0.28158870339393616, + "loss": 0.3279, + "rejected_geometric_mean": -3.702446937561035, + "step": 1119 + }, + { + "chosen_geometric_mean": -1.1724694967269897, + "epoch": 0.28, + "grad_norm": 2.21875, + "learning_rate": 4.770092951727148e-06, + "log_odds": 2.2800099849700928, + "log_odds_ratio": -0.2643692195415497, + "loss": 0.2693, + "rejected_geometric_mean": -3.2026212215423584, + "step": 1120 + }, + { + "chosen_geometric_mean": -0.9107321500778198, + "epoch": 0.28, + "grad_norm": 2.984375, + "learning_rate": 4.7696850018667616e-06, + "log_odds": 5.391205787658691, + "log_odds_ratio": -0.3300858736038208, + "loss": 0.2816, + "rejected_geometric_mean": -6.007321357727051, + "step": 1121 + }, + { + "chosen_geometric_mean": -1.13423752784729, + "epoch": 0.28, + "grad_norm": 4.15625, + "learning_rate": 4.769276707867649e-06, + "log_odds": 3.148859977722168, + "log_odds_ratio": -0.1512216329574585, + "loss": 0.2688, + "rejected_geometric_mean": -3.9391722679138184, + "step": 1122 + }, + { + "chosen_geometric_mean": -1.1227550506591797, + "epoch": 0.28, + "grad_norm": 12.3125, + "learning_rate": 4.768868069791714e-06, + "log_odds": 2.1517465114593506, + "log_odds_ratio": -0.3389771580696106, + "loss": 0.362, + "rejected_geometric_mean": -3.071171760559082, + "step": 1123 + }, + { + "chosen_geometric_mean": -0.9560496807098389, + "epoch": 0.28, + "grad_norm": 2.453125, + "learning_rate": 4.7684590877009194e-06, + "log_odds": 3.4150795936584473, + "log_odds_ratio": -0.16149282455444336, + "loss": 0.2888, + "rejected_geometric_mean": -3.9647324085235596, + "step": 1124 + }, + { + "chosen_geometric_mean": -0.983507513999939, + "epoch": 0.28, + "grad_norm": 2.453125, + "learning_rate": 4.768049761657275e-06, + "log_odds": 2.2326912879943848, + "log_odds_ratio": -0.4781126081943512, + "loss": 0.3522, + "rejected_geometric_mean": -3.1031746864318848, + "step": 1125 + }, + { + "chosen_geometric_mean": -0.9116372466087341, + "epoch": 0.28, + "grad_norm": 3.625, + "learning_rate": 4.767640091722845e-06, + "log_odds": 2.6627161502838135, + "log_odds_ratio": -0.2935413718223572, + "loss": 0.2486, + "rejected_geometric_mean": -3.307023763656616, + "step": 1126 + }, + { + "chosen_geometric_mean": -1.5521687269210815, + "epoch": 0.28, + "grad_norm": 34.5, + "learning_rate": 4.767230077959744e-06, + "log_odds": 1.0318628549575806, + "log_odds_ratio": -0.35759782791137695, + "loss": 0.3639, + "rejected_geometric_mean": -2.467972755432129, + "step": 1127 + }, + { + "chosen_geometric_mean": -1.331205129623413, + "epoch": 0.28, + "grad_norm": 5.5, + "learning_rate": 4.766819720430141e-06, + "log_odds": 2.7628355026245117, + "log_odds_ratio": -0.29352304339408875, + "loss": 0.3828, + "rejected_geometric_mean": -3.883594512939453, + "step": 1128 + }, + { + "chosen_geometric_mean": -1.221732497215271, + "epoch": 0.28, + "grad_norm": 45.5, + "learning_rate": 4.766409019196256e-06, + "log_odds": 0.2669821083545685, + "log_odds_ratio": -0.6208819150924683, + "loss": 0.3922, + "rejected_geometric_mean": -1.368013858795166, + "step": 1129 + }, + { + "chosen_geometric_mean": -0.9330434799194336, + "epoch": 0.28, + "grad_norm": 45.25, + "learning_rate": 4.765997974320361e-06, + "log_odds": 3.8683953285217285, + "log_odds_ratio": -0.26701804995536804, + "loss": 0.4059, + "rejected_geometric_mean": -4.492033004760742, + "step": 1130 + }, + { + "chosen_geometric_mean": -1.2080146074295044, + "epoch": 0.28, + "grad_norm": 3.484375, + "learning_rate": 4.7655865858647805e-06, + "log_odds": 5.208322525024414, + "log_odds_ratio": -0.13633830845355988, + "loss": 0.3311, + "rejected_geometric_mean": -6.130749702453613, + "step": 1131 + }, + { + "chosen_geometric_mean": -1.0699430704116821, + "epoch": 0.28, + "grad_norm": 8.75, + "learning_rate": 4.765174853891891e-06, + "log_odds": 2.789442777633667, + "log_odds_ratio": -0.23872873187065125, + "loss": 0.3201, + "rejected_geometric_mean": -3.5698702335357666, + "step": 1132 + }, + { + "chosen_geometric_mean": -1.0858083963394165, + "epoch": 0.28, + "grad_norm": 14.4375, + "learning_rate": 4.764762778464119e-06, + "log_odds": 2.51073956489563, + "log_odds_ratio": -0.29050615429878235, + "loss": 0.3373, + "rejected_geometric_mean": -3.301896810531616, + "step": 1133 + }, + { + "chosen_geometric_mean": -1.4136672019958496, + "epoch": 0.28, + "grad_norm": 70.5, + "learning_rate": 4.764350359643948e-06, + "log_odds": 1.6379989385604858, + "log_odds_ratio": -0.5573425889015198, + "loss": 0.3279, + "rejected_geometric_mean": -2.8398797512054443, + "step": 1134 + }, + { + "chosen_geometric_mean": -1.0050729513168335, + "epoch": 0.28, + "grad_norm": 3.296875, + "learning_rate": 4.7639375974939085e-06, + "log_odds": 0.5534557700157166, + "log_odds_ratio": -0.49156978726387024, + "loss": 0.2413, + "rejected_geometric_mean": -1.4343684911727905, + "step": 1135 + }, + { + "chosen_geometric_mean": -1.1667823791503906, + "epoch": 0.28, + "grad_norm": 3.078125, + "learning_rate": 4.763524492076587e-06, + "log_odds": 3.891338348388672, + "log_odds_ratio": -0.1460878551006317, + "loss": 0.3253, + "rejected_geometric_mean": -4.762848854064941, + "step": 1136 + }, + { + "chosen_geometric_mean": -1.0094976425170898, + "epoch": 0.28, + "grad_norm": 10.625, + "learning_rate": 4.7631110434546175e-06, + "log_odds": 1.9482167959213257, + "log_odds_ratio": -0.3378644287586212, + "loss": 0.2546, + "rejected_geometric_mean": -2.736001491546631, + "step": 1137 + }, + { + "chosen_geometric_mean": -0.9254677295684814, + "epoch": 0.28, + "grad_norm": 6.34375, + "learning_rate": 4.7626972516906914e-06, + "log_odds": 2.5043039321899414, + "log_odds_ratio": -0.20369666814804077, + "loss": 0.3253, + "rejected_geometric_mean": -3.021580219268799, + "step": 1138 + }, + { + "chosen_geometric_mean": -1.1805894374847412, + "epoch": 0.28, + "grad_norm": 4.21875, + "learning_rate": 4.7622831168475465e-06, + "log_odds": 1.6788413524627686, + "log_odds_ratio": -0.4333016872406006, + "loss": 0.3049, + "rejected_geometric_mean": -2.7248475551605225, + "step": 1139 + }, + { + "chosen_geometric_mean": -1.099723219871521, + "epoch": 0.28, + "grad_norm": 3.53125, + "learning_rate": 4.76186863898798e-06, + "log_odds": 1.903681755065918, + "log_odds_ratio": -0.2818901538848877, + "loss": 0.2656, + "rejected_geometric_mean": -2.7392473220825195, + "step": 1140 + }, + { + "chosen_geometric_mean": -1.0014700889587402, + "epoch": 0.28, + "grad_norm": 5.0, + "learning_rate": 4.761453818174833e-06, + "log_odds": 1.7183148860931396, + "log_odds_ratio": -0.3080488443374634, + "loss": 0.2958, + "rejected_geometric_mean": -2.4512808322906494, + "step": 1141 + }, + { + "chosen_geometric_mean": -0.9738159775733948, + "epoch": 0.28, + "grad_norm": 5.8125, + "learning_rate": 4.761038654471003e-06, + "log_odds": 1.758391261100769, + "log_odds_ratio": -0.27748921513557434, + "loss": 0.3152, + "rejected_geometric_mean": -2.416222095489502, + "step": 1142 + }, + { + "chosen_geometric_mean": -1.3011043071746826, + "epoch": 0.28, + "grad_norm": 5.84375, + "learning_rate": 4.760623147939439e-06, + "log_odds": 1.6034955978393555, + "log_odds_ratio": -0.34499549865722656, + "loss": 0.3537, + "rejected_geometric_mean": -2.678190231323242, + "step": 1143 + }, + { + "chosen_geometric_mean": -1.0552239418029785, + "epoch": 0.28, + "grad_norm": 15.5625, + "learning_rate": 4.7602072986431426e-06, + "log_odds": 2.6052515506744385, + "log_odds_ratio": -0.31270405650138855, + "loss": 0.3159, + "rejected_geometric_mean": -3.3654050827026367, + "step": 1144 + }, + { + "chosen_geometric_mean": -0.9013341069221497, + "epoch": 0.28, + "grad_norm": 17.875, + "learning_rate": 4.759791106645167e-06, + "log_odds": 4.244614601135254, + "log_odds_ratio": -0.07396034896373749, + "loss": 0.3215, + "rejected_geometric_mean": -4.66275691986084, + "step": 1145 + }, + { + "chosen_geometric_mean": -1.4460728168487549, + "epoch": 0.28, + "grad_norm": 21.625, + "learning_rate": 4.759374572008614e-06, + "log_odds": 1.6260502338409424, + "log_odds_ratio": -0.3231305778026581, + "loss": 0.3302, + "rejected_geometric_mean": -2.934267044067383, + "step": 1146 + }, + { + "chosen_geometric_mean": -1.1753438711166382, + "epoch": 0.28, + "grad_norm": 7.78125, + "learning_rate": 4.758957694796642e-06, + "log_odds": 1.7800908088684082, + "log_odds_ratio": -0.27233338356018066, + "loss": 0.3689, + "rejected_geometric_mean": -2.7318038940429688, + "step": 1147 + }, + { + "chosen_geometric_mean": -1.1443495750427246, + "epoch": 0.28, + "grad_norm": 12.1875, + "learning_rate": 4.758540475072461e-06, + "log_odds": 3.4057164192199707, + "log_odds_ratio": -0.30708611011505127, + "loss": 0.3082, + "rejected_geometric_mean": -4.3231353759765625, + "step": 1148 + }, + { + "chosen_geometric_mean": -1.087997317314148, + "epoch": 0.28, + "grad_norm": 12.4375, + "learning_rate": 4.758122912899329e-06, + "log_odds": 0.8589944243431091, + "log_odds_ratio": -0.37683549523353577, + "loss": 0.3077, + "rejected_geometric_mean": -1.714207410812378, + "step": 1149 + }, + { + "chosen_geometric_mean": -1.3552260398864746, + "epoch": 0.28, + "grad_norm": 6.5, + "learning_rate": 4.7577050083405595e-06, + "log_odds": 1.0223469734191895, + "log_odds_ratio": -0.4235718250274658, + "loss": 0.3505, + "rejected_geometric_mean": -2.2532296180725098, + "step": 1150 + }, + { + "chosen_geometric_mean": -1.4886908531188965, + "epoch": 0.28, + "grad_norm": 5.21875, + "learning_rate": 4.757286761459518e-06, + "log_odds": 2.6578867435455322, + "log_odds_ratio": -0.135132297873497, + "loss": 0.2649, + "rejected_geometric_mean": -3.8857264518737793, + "step": 1151 + }, + { + "chosen_geometric_mean": -1.04649817943573, + "epoch": 0.29, + "grad_norm": 3.8125, + "learning_rate": 4.756868172319619e-06, + "log_odds": 2.485355854034424, + "log_odds_ratio": -0.3168845772743225, + "loss": 0.3288, + "rejected_geometric_mean": -3.3045895099639893, + "step": 1152 + }, + { + "chosen_geometric_mean": -1.05525541305542, + "epoch": 0.29, + "grad_norm": 5.5, + "learning_rate": 4.7564492409843325e-06, + "log_odds": 1.4268277883529663, + "log_odds_ratio": -0.3555312752723694, + "loss": 0.3581, + "rejected_geometric_mean": -2.244316577911377, + "step": 1153 + }, + { + "chosen_geometric_mean": -1.4232165813446045, + "epoch": 0.29, + "grad_norm": 20.625, + "learning_rate": 4.756029967517176e-06, + "log_odds": 2.2717907428741455, + "log_odds_ratio": -0.3682131767272949, + "loss": 0.401, + "rejected_geometric_mean": -3.582326889038086, + "step": 1154 + }, + { + "chosen_geometric_mean": -0.9003429412841797, + "epoch": 0.29, + "grad_norm": 5.71875, + "learning_rate": 4.755610351981723e-06, + "log_odds": 1.4322853088378906, + "log_odds_ratio": -0.33078712224960327, + "loss": 0.2919, + "rejected_geometric_mean": -2.0629684925079346, + "step": 1155 + }, + { + "chosen_geometric_mean": -0.9695592522621155, + "epoch": 0.29, + "grad_norm": 15.4375, + "learning_rate": 4.7551903944415976e-06, + "log_odds": 1.289100170135498, + "log_odds_ratio": -0.32691287994384766, + "loss": 0.2786, + "rejected_geometric_mean": -1.9924688339233398, + "step": 1156 + }, + { + "chosen_geometric_mean": -1.1117441654205322, + "epoch": 0.29, + "grad_norm": 6.1875, + "learning_rate": 4.754770094960475e-06, + "log_odds": 1.4643676280975342, + "log_odds_ratio": -0.4853992164134979, + "loss": 0.3217, + "rejected_geometric_mean": -2.4719879627227783, + "step": 1157 + }, + { + "chosen_geometric_mean": -1.3454557657241821, + "epoch": 0.29, + "grad_norm": 21.5, + "learning_rate": 4.754349453602083e-06, + "log_odds": 1.0961456298828125, + "log_odds_ratio": -0.3507709503173828, + "loss": 0.3471, + "rejected_geometric_mean": -2.2414069175720215, + "step": 1158 + }, + { + "chosen_geometric_mean": -1.1175868511199951, + "epoch": 0.29, + "grad_norm": 19.625, + "learning_rate": 4.7539284704302e-06, + "log_odds": 1.5751665830612183, + "log_odds_ratio": -0.2795928716659546, + "loss": 0.4307, + "rejected_geometric_mean": -2.4516968727111816, + "step": 1159 + }, + { + "chosen_geometric_mean": -1.2482812404632568, + "epoch": 0.29, + "grad_norm": 7.6875, + "learning_rate": 4.753507145508659e-06, + "log_odds": 1.7675981521606445, + "log_odds_ratio": -0.2555953860282898, + "loss": 0.2984, + "rejected_geometric_mean": -2.7389283180236816, + "step": 1160 + }, + { + "chosen_geometric_mean": -1.2112659215927124, + "epoch": 0.29, + "grad_norm": 9.0, + "learning_rate": 4.75308547890134e-06, + "log_odds": 1.8896890878677368, + "log_odds_ratio": -0.5255677103996277, + "loss": 0.3044, + "rejected_geometric_mean": -2.9448509216308594, + "step": 1161 + }, + { + "chosen_geometric_mean": -0.946186900138855, + "epoch": 0.29, + "grad_norm": 5.71875, + "learning_rate": 4.752663470672181e-06, + "log_odds": 0.9230955243110657, + "log_odds_ratio": -0.4312092065811157, + "loss": 0.3261, + "rejected_geometric_mean": -1.6058560609817505, + "step": 1162 + }, + { + "chosen_geometric_mean": -1.2497775554656982, + "epoch": 0.29, + "grad_norm": 3.296875, + "learning_rate": 4.752241120885166e-06, + "log_odds": 1.5151512622833252, + "log_odds_ratio": -0.2577987611293793, + "loss": 0.3384, + "rejected_geometric_mean": -2.5085036754608154, + "step": 1163 + }, + { + "chosen_geometric_mean": -1.0076100826263428, + "epoch": 0.29, + "grad_norm": 5.125, + "learning_rate": 4.751818429604335e-06, + "log_odds": 3.407501220703125, + "log_odds_ratio": -0.06986964493989944, + "loss": 0.2593, + "rejected_geometric_mean": -3.9773004055023193, + "step": 1164 + }, + { + "chosen_geometric_mean": -1.3298709392547607, + "epoch": 0.29, + "grad_norm": 6.53125, + "learning_rate": 4.751395396893778e-06, + "log_odds": 0.4172133505344391, + "log_odds_ratio": -0.5406699180603027, + "loss": 0.3549, + "rejected_geometric_mean": -1.6720620393753052, + "step": 1165 + }, + { + "chosen_geometric_mean": -1.2310426235198975, + "epoch": 0.29, + "grad_norm": 4.0625, + "learning_rate": 4.750972022817635e-06, + "log_odds": 4.643771171569824, + "log_odds_ratio": -0.14481228590011597, + "loss": 0.3262, + "rejected_geometric_mean": -5.60509729385376, + "step": 1166 + }, + { + "chosen_geometric_mean": -1.1480427980422974, + "epoch": 0.29, + "grad_norm": 2.90625, + "learning_rate": 4.750548307440103e-06, + "log_odds": 1.1632599830627441, + "log_odds_ratio": -0.3020778000354767, + "loss": 0.2994, + "rejected_geometric_mean": -2.0864264965057373, + "step": 1167 + }, + { + "chosen_geometric_mean": -0.9689590930938721, + "epoch": 0.29, + "grad_norm": 3.34375, + "learning_rate": 4.7501242508254244e-06, + "log_odds": 1.6181519031524658, + "log_odds_ratio": -0.27831628918647766, + "loss": 0.335, + "rejected_geometric_mean": -2.314274311065674, + "step": 1168 + }, + { + "chosen_geometric_mean": -1.0504521131515503, + "epoch": 0.29, + "grad_norm": 15.0625, + "learning_rate": 4.749699853037898e-06, + "log_odds": 1.4353127479553223, + "log_odds_ratio": -0.4082659184932709, + "loss": 0.2872, + "rejected_geometric_mean": -2.3297853469848633, + "step": 1169 + }, + { + "chosen_geometric_mean": -0.8860317468643188, + "epoch": 0.29, + "grad_norm": 15.0, + "learning_rate": 4.749275114141873e-06, + "log_odds": 0.5801122188568115, + "log_odds_ratio": -0.47049376368522644, + "loss": 0.3162, + "rejected_geometric_mean": -1.256864070892334, + "step": 1170 + }, + { + "chosen_geometric_mean": -1.0384056568145752, + "epoch": 0.29, + "grad_norm": 17.375, + "learning_rate": 4.748850034201748e-06, + "log_odds": 3.3581056594848633, + "log_odds_ratio": -0.25128889083862305, + "loss": 0.3247, + "rejected_geometric_mean": -4.05424165725708, + "step": 1171 + }, + { + "chosen_geometric_mean": -1.3206318616867065, + "epoch": 0.29, + "grad_norm": 8.5, + "learning_rate": 4.748424613281977e-06, + "log_odds": 1.3443814516067505, + "log_odds_ratio": -0.3407056927680969, + "loss": 0.3201, + "rejected_geometric_mean": -2.478437900543213, + "step": 1172 + }, + { + "chosen_geometric_mean": -1.0358712673187256, + "epoch": 0.29, + "grad_norm": 11.8125, + "learning_rate": 4.7479988514470634e-06, + "log_odds": 0.8636820316314697, + "log_odds_ratio": -0.5385569930076599, + "loss": 0.3273, + "rejected_geometric_mean": -1.780251383781433, + "step": 1173 + }, + { + "chosen_geometric_mean": -1.222996473312378, + "epoch": 0.29, + "grad_norm": 3.921875, + "learning_rate": 4.747572748761564e-06, + "log_odds": 1.9382127523422241, + "log_odds_ratio": -0.3253224492073059, + "loss": 0.3513, + "rejected_geometric_mean": -2.9684596061706543, + "step": 1174 + }, + { + "chosen_geometric_mean": -1.160275936126709, + "epoch": 0.29, + "grad_norm": 6.90625, + "learning_rate": 4.747146305290085e-06, + "log_odds": 2.261787176132202, + "log_odds_ratio": -0.22019079327583313, + "loss": 0.3378, + "rejected_geometric_mean": -3.165523052215576, + "step": 1175 + }, + { + "chosen_geometric_mean": -1.2506868839263916, + "epoch": 0.29, + "grad_norm": 19.375, + "learning_rate": 4.746719521097286e-06, + "log_odds": 0.9868375062942505, + "log_odds_ratio": -0.5171828866004944, + "loss": 0.368, + "rejected_geometric_mean": -2.1502366065979004, + "step": 1176 + }, + { + "chosen_geometric_mean": -1.164671540260315, + "epoch": 0.29, + "grad_norm": 2.296875, + "learning_rate": 4.746292396247877e-06, + "log_odds": 0.9230546355247498, + "log_odds_ratio": -0.4677029252052307, + "loss": 0.3348, + "rejected_geometric_mean": -1.984647274017334, + "step": 1177 + }, + { + "chosen_geometric_mean": -1.0747181177139282, + "epoch": 0.29, + "grad_norm": 19.875, + "learning_rate": 4.74586493080662e-06, + "log_odds": 2.1262171268463135, + "log_odds_ratio": -0.322970986366272, + "loss": 0.3057, + "rejected_geometric_mean": -2.9630937576293945, + "step": 1178 + }, + { + "chosen_geometric_mean": -1.1648322343826294, + "epoch": 0.29, + "grad_norm": 14.5625, + "learning_rate": 4.745437124838332e-06, + "log_odds": 3.3130135536193848, + "log_odds_ratio": -0.2884318232536316, + "loss": 0.3162, + "rejected_geometric_mean": -4.261077880859375, + "step": 1179 + }, + { + "chosen_geometric_mean": -1.2737188339233398, + "epoch": 0.29, + "grad_norm": 15.5625, + "learning_rate": 4.745008978407876e-06, + "log_odds": 1.9903068542480469, + "log_odds_ratio": -0.26566851139068604, + "loss": 0.3264, + "rejected_geometric_mean": -3.0469613075256348, + "step": 1180 + }, + { + "chosen_geometric_mean": -0.8020158410072327, + "epoch": 0.29, + "grad_norm": 41.25, + "learning_rate": 4.744580491580169e-06, + "log_odds": 2.0562968254089355, + "log_odds_ratio": -0.19151149690151215, + "loss": 0.3277, + "rejected_geometric_mean": -2.351088285446167, + "step": 1181 + }, + { + "chosen_geometric_mean": -1.1808030605316162, + "epoch": 0.29, + "grad_norm": 12.875, + "learning_rate": 4.744151664420181e-06, + "log_odds": 0.9080991744995117, + "log_odds_ratio": -0.3752993047237396, + "loss": 0.3055, + "rejected_geometric_mean": -1.9060790538787842, + "step": 1182 + }, + { + "chosen_geometric_mean": -1.1849346160888672, + "epoch": 0.29, + "grad_norm": 5.0, + "learning_rate": 4.743722496992933e-06, + "log_odds": 1.7697772979736328, + "log_odds_ratio": -0.36769920587539673, + "loss": 0.3082, + "rejected_geometric_mean": -2.7783617973327637, + "step": 1183 + }, + { + "chosen_geometric_mean": -0.9912102222442627, + "epoch": 0.29, + "grad_norm": 2.4375, + "learning_rate": 4.743292989363496e-06, + "log_odds": 2.962372303009033, + "log_odds_ratio": -0.2002180814743042, + "loss": 0.2716, + "rejected_geometric_mean": -3.6095786094665527, + "step": 1184 + }, + { + "chosen_geometric_mean": -1.039893388748169, + "epoch": 0.29, + "grad_norm": 4.21875, + "learning_rate": 4.742863141596993e-06, + "log_odds": 4.140162944793701, + "log_odds_ratio": -0.14667746424674988, + "loss": 0.2675, + "rejected_geometric_mean": -4.789547443389893, + "step": 1185 + }, + { + "chosen_geometric_mean": -1.0400079488754272, + "epoch": 0.29, + "grad_norm": 16.125, + "learning_rate": 4.742432953758601e-06, + "log_odds": 7.583197593688965, + "log_odds_ratio": -0.018467124551534653, + "loss": 0.2763, + "rejected_geometric_mean": -8.148725509643555, + "step": 1186 + }, + { + "chosen_geometric_mean": -1.2303993701934814, + "epoch": 0.29, + "grad_norm": 2.421875, + "learning_rate": 4.7420024259135465e-06, + "log_odds": 2.424870252609253, + "log_odds_ratio": -0.2303314208984375, + "loss": 0.32, + "rejected_geometric_mean": -3.4055111408233643, + "step": 1187 + }, + { + "chosen_geometric_mean": -1.241769552230835, + "epoch": 0.29, + "grad_norm": 6.59375, + "learning_rate": 4.7415715581271075e-06, + "log_odds": 0.34577682614326477, + "log_odds_ratio": -0.545263409614563, + "loss": 0.2945, + "rejected_geometric_mean": -1.5089521408081055, + "step": 1188 + }, + { + "chosen_geometric_mean": -1.0477852821350098, + "epoch": 0.29, + "grad_norm": 26.0, + "learning_rate": 4.741140350464612e-06, + "log_odds": 4.657544136047363, + "log_odds_ratio": -0.16215583682060242, + "loss": 0.4493, + "rejected_geometric_mean": -5.38417911529541, + "step": 1189 + }, + { + "chosen_geometric_mean": -0.891537606716156, + "epoch": 0.29, + "grad_norm": 4.125, + "learning_rate": 4.740708802991445e-06, + "log_odds": 2.494680166244507, + "log_odds_ratio": -0.5623871684074402, + "loss": 0.3119, + "rejected_geometric_mean": -3.2785565853118896, + "step": 1190 + }, + { + "chosen_geometric_mean": -1.0997873544692993, + "epoch": 0.29, + "grad_norm": 20.25, + "learning_rate": 4.740276915773037e-06, + "log_odds": 2.112894296646118, + "log_odds_ratio": -0.4445880949497223, + "loss": 0.3285, + "rejected_geometric_mean": -3.067995548248291, + "step": 1191 + }, + { + "chosen_geometric_mean": -1.1323384046554565, + "epoch": 0.3, + "grad_norm": 15.6875, + "learning_rate": 4.739844688874874e-06, + "log_odds": 1.4299522638320923, + "log_odds_ratio": -0.3408048450946808, + "loss": 0.3183, + "rejected_geometric_mean": -2.3477892875671387, + "step": 1192 + }, + { + "chosen_geometric_mean": -1.1540745496749878, + "epoch": 0.3, + "grad_norm": 5.15625, + "learning_rate": 4.73941212236249e-06, + "log_odds": 3.446173906326294, + "log_odds_ratio": -0.16235962510108948, + "loss": 0.3195, + "rejected_geometric_mean": -4.2927045822143555, + "step": 1193 + }, + { + "chosen_geometric_mean": -1.2818762063980103, + "epoch": 0.3, + "grad_norm": 6.40625, + "learning_rate": 4.738979216301474e-06, + "log_odds": 0.1992451250553131, + "log_odds_ratio": -0.6141101717948914, + "loss": 0.3465, + "rejected_geometric_mean": -1.4367530345916748, + "step": 1194 + }, + { + "chosen_geometric_mean": -1.100434422492981, + "epoch": 0.3, + "grad_norm": 7.125, + "learning_rate": 4.7385459707574644e-06, + "log_odds": 2.4415061473846436, + "log_odds_ratio": -0.20391030609607697, + "loss": 0.3191, + "rejected_geometric_mean": -3.235320806503296, + "step": 1195 + }, + { + "chosen_geometric_mean": -1.0541452169418335, + "epoch": 0.3, + "grad_norm": 3.75, + "learning_rate": 4.738112385796152e-06, + "log_odds": 2.7149999141693115, + "log_odds_ratio": -0.3174899220466614, + "loss": 0.2938, + "rejected_geometric_mean": -3.5526602268218994, + "step": 1196 + }, + { + "chosen_geometric_mean": -1.038728952407837, + "epoch": 0.3, + "grad_norm": 5.9375, + "learning_rate": 4.737678461483278e-06, + "log_odds": 2.033031940460205, + "log_odds_ratio": -0.37835046648979187, + "loss": 0.2964, + "rejected_geometric_mean": -2.845116376876831, + "step": 1197 + }, + { + "chosen_geometric_mean": -1.1267032623291016, + "epoch": 0.3, + "grad_norm": 2.703125, + "learning_rate": 4.737244197884637e-06, + "log_odds": 1.0469856262207031, + "log_odds_ratio": -0.4318682551383972, + "loss": 0.2654, + "rejected_geometric_mean": -2.02148175239563, + "step": 1198 + }, + { + "chosen_geometric_mean": -1.1515734195709229, + "epoch": 0.3, + "grad_norm": 9.0625, + "learning_rate": 4.736809595066072e-06, + "log_odds": 1.0785512924194336, + "log_odds_ratio": -0.3304828703403473, + "loss": 0.3214, + "rejected_geometric_mean": -2.0074257850646973, + "step": 1199 + }, + { + "chosen_geometric_mean": -1.2839769124984741, + "epoch": 0.3, + "grad_norm": 3.1875, + "learning_rate": 4.7363746530934815e-06, + "log_odds": 2.541262626647949, + "log_odds_ratio": -0.26155173778533936, + "loss": 0.3395, + "rejected_geometric_mean": -3.6034326553344727, + "step": 1200 + }, + { + "chosen_geometric_mean": -1.154129981994629, + "epoch": 0.3, + "grad_norm": 3.65625, + "learning_rate": 4.735939372032811e-06, + "log_odds": 1.3828788995742798, + "log_odds_ratio": -0.4136867821216583, + "loss": 0.2752, + "rejected_geometric_mean": -2.3962862491607666, + "step": 1201 + }, + { + "chosen_geometric_mean": -1.0019487142562866, + "epoch": 0.3, + "grad_norm": 5.375, + "learning_rate": 4.7355037519500605e-06, + "log_odds": 2.476163387298584, + "log_odds_ratio": -0.30538904666900635, + "loss": 0.3082, + "rejected_geometric_mean": -3.193206548690796, + "step": 1202 + }, + { + "chosen_geometric_mean": -1.1144887208938599, + "epoch": 0.3, + "grad_norm": 3.375, + "learning_rate": 4.73506779291128e-06, + "log_odds": 1.66619873046875, + "log_odds_ratio": -0.33025017380714417, + "loss": 0.292, + "rejected_geometric_mean": -2.581083059310913, + "step": 1203 + }, + { + "chosen_geometric_mean": -1.571491003036499, + "epoch": 0.3, + "grad_norm": 24.125, + "learning_rate": 4.734631494982572e-06, + "log_odds": 4.422560691833496, + "log_odds_ratio": -0.39280158281326294, + "loss": 0.3813, + "rejected_geometric_mean": -5.816839218139648, + "step": 1204 + }, + { + "chosen_geometric_mean": -0.9582076072692871, + "epoch": 0.3, + "grad_norm": 24.25, + "learning_rate": 4.73419485823009e-06, + "log_odds": 1.1457959413528442, + "log_odds_ratio": -0.3272794187068939, + "loss": 0.2895, + "rejected_geometric_mean": -1.821116328239441, + "step": 1205 + }, + { + "chosen_geometric_mean": -1.0912514925003052, + "epoch": 0.3, + "grad_norm": 14.0625, + "learning_rate": 4.733757882720037e-06, + "log_odds": 2.9868416786193848, + "log_odds_ratio": -0.439483106136322, + "loss": 0.324, + "rejected_geometric_mean": -3.9559741020202637, + "step": 1206 + }, + { + "chosen_geometric_mean": -0.9253547191619873, + "epoch": 0.3, + "grad_norm": 27.625, + "learning_rate": 4.733320568518671e-06, + "log_odds": 3.7967772483825684, + "log_odds_ratio": -0.031868111342191696, + "loss": 0.2915, + "rejected_geometric_mean": -4.215329170227051, + "step": 1207 + }, + { + "chosen_geometric_mean": -1.0043712854385376, + "epoch": 0.3, + "grad_norm": 21.75, + "learning_rate": 4.732882915692297e-06, + "log_odds": 1.1798368692398071, + "log_odds_ratio": -0.3526177704334259, + "loss": 0.3452, + "rejected_geometric_mean": -1.9718313217163086, + "step": 1208 + }, + { + "chosen_geometric_mean": -1.2570059299468994, + "epoch": 0.3, + "grad_norm": 5.34375, + "learning_rate": 4.732444924307277e-06, + "log_odds": 2.7179157733917236, + "log_odds_ratio": -0.1811910718679428, + "loss": 0.3371, + "rejected_geometric_mean": -3.720461845397949, + "step": 1209 + }, + { + "chosen_geometric_mean": -1.048344373703003, + "epoch": 0.3, + "grad_norm": 4.9375, + "learning_rate": 4.732006594430017e-06, + "log_odds": 0.17265506088733673, + "log_odds_ratio": -0.6166179776191711, + "loss": 0.3505, + "rejected_geometric_mean": -1.1555851697921753, + "step": 1210 + }, + { + "chosen_geometric_mean": -1.032361626625061, + "epoch": 0.3, + "grad_norm": 5.15625, + "learning_rate": 4.731567926126982e-06, + "log_odds": 3.524091958999634, + "log_odds_ratio": -0.15634703636169434, + "loss": 0.304, + "rejected_geometric_mean": -4.185843467712402, + "step": 1211 + }, + { + "chosen_geometric_mean": -1.2347261905670166, + "epoch": 0.3, + "grad_norm": 4.15625, + "learning_rate": 4.731128919464682e-06, + "log_odds": 2.1683242321014404, + "log_odds_ratio": -0.2653271555900574, + "loss": 0.298, + "rejected_geometric_mean": -3.1738147735595703, + "step": 1212 + }, + { + "chosen_geometric_mean": -1.049296259880066, + "epoch": 0.3, + "grad_norm": 7.3125, + "learning_rate": 4.7306895745096825e-06, + "log_odds": 1.608936071395874, + "log_odds_ratio": -0.35574519634246826, + "loss": 0.322, + "rejected_geometric_mean": -2.450507402420044, + "step": 1213 + }, + { + "chosen_geometric_mean": -1.0208745002746582, + "epoch": 0.3, + "grad_norm": 4.34375, + "learning_rate": 4.730249891328599e-06, + "log_odds": 1.7642651796340942, + "log_odds_ratio": -0.27816158533096313, + "loss": 0.326, + "rejected_geometric_mean": -2.52081036567688, + "step": 1214 + }, + { + "chosen_geometric_mean": -1.0296547412872314, + "epoch": 0.3, + "grad_norm": 4.3125, + "learning_rate": 4.729809869988095e-06, + "log_odds": 1.2097840309143066, + "log_odds_ratio": -0.3303201496601105, + "loss": 0.3099, + "rejected_geometric_mean": -1.9979585409164429, + "step": 1215 + }, + { + "chosen_geometric_mean": -1.4856841564178467, + "epoch": 0.3, + "grad_norm": 4.28125, + "learning_rate": 4.729369510554893e-06, + "log_odds": 0.7005731463432312, + "log_odds_ratio": -0.41975337266921997, + "loss": 0.3138, + "rejected_geometric_mean": -2.090817928314209, + "step": 1216 + }, + { + "chosen_geometric_mean": -1.3735249042510986, + "epoch": 0.3, + "grad_norm": 7.75, + "learning_rate": 4.728928813095759e-06, + "log_odds": 3.97959566116333, + "log_odds_ratio": -0.21844345331192017, + "loss": 0.3286, + "rejected_geometric_mean": -5.085599899291992, + "step": 1217 + }, + { + "chosen_geometric_mean": -1.0247759819030762, + "epoch": 0.3, + "grad_norm": 2.765625, + "learning_rate": 4.728487777677513e-06, + "log_odds": 2.700770854949951, + "log_odds_ratio": -0.24369880557060242, + "loss": 0.2996, + "rejected_geometric_mean": -3.4201831817626953, + "step": 1218 + }, + { + "chosen_geometric_mean": -1.1277389526367188, + "epoch": 0.3, + "grad_norm": 3.359375, + "learning_rate": 4.7280464043670275e-06, + "log_odds": 3.0815911293029785, + "log_odds_ratio": -0.28242990374565125, + "loss": 0.3254, + "rejected_geometric_mean": -4.000299453735352, + "step": 1219 + }, + { + "chosen_geometric_mean": -1.415297031402588, + "epoch": 0.3, + "grad_norm": 22.75, + "learning_rate": 4.727604693231226e-06, + "log_odds": 2.2010762691497803, + "log_odds_ratio": -0.40533217787742615, + "loss": 0.389, + "rejected_geometric_mean": -3.468320369720459, + "step": 1220 + }, + { + "chosen_geometric_mean": -0.9564027190208435, + "epoch": 0.3, + "grad_norm": 8.9375, + "learning_rate": 4.727162644337081e-06, + "log_odds": 2.400712728500366, + "log_odds_ratio": -0.38183772563934326, + "loss": 0.3075, + "rejected_geometric_mean": -3.143397569656372, + "step": 1221 + }, + { + "chosen_geometric_mean": -1.02935791015625, + "epoch": 0.3, + "grad_norm": 20.125, + "learning_rate": 4.726720257751619e-06, + "log_odds": 2.0954904556274414, + "log_odds_ratio": -0.30813977122306824, + "loss": 0.3674, + "rejected_geometric_mean": -2.8441243171691895, + "step": 1222 + }, + { + "chosen_geometric_mean": -0.9487906694412231, + "epoch": 0.3, + "grad_norm": 4.46875, + "learning_rate": 4.726277533541915e-06, + "log_odds": 4.297144412994385, + "log_odds_ratio": -0.09743195027112961, + "loss": 0.3182, + "rejected_geometric_mean": -4.797399520874023, + "step": 1223 + }, + { + "chosen_geometric_mean": -1.0031318664550781, + "epoch": 0.3, + "grad_norm": 2.890625, + "learning_rate": 4.7258344717750994e-06, + "log_odds": 3.885807752609253, + "log_odds_ratio": -0.17947591841220856, + "loss": 0.3349, + "rejected_geometric_mean": -4.523686408996582, + "step": 1224 + }, + { + "chosen_geometric_mean": -1.038597583770752, + "epoch": 0.3, + "grad_norm": 4.6875, + "learning_rate": 4.725391072518348e-06, + "log_odds": 4.272707462310791, + "log_odds_ratio": -0.18795517086982727, + "loss": 0.3257, + "rejected_geometric_mean": -4.998659610748291, + "step": 1225 + }, + { + "chosen_geometric_mean": -1.0866156816482544, + "epoch": 0.3, + "grad_norm": 31.625, + "learning_rate": 4.724947335838892e-06, + "log_odds": 1.514294147491455, + "log_odds_ratio": -0.3846755921840668, + "loss": 0.3893, + "rejected_geometric_mean": -2.403167724609375, + "step": 1226 + }, + { + "chosen_geometric_mean": -1.6216325759887695, + "epoch": 0.3, + "grad_norm": 13.25, + "learning_rate": 4.724503261804012e-06, + "log_odds": 3.8641817569732666, + "log_odds_ratio": -0.13415972888469696, + "loss": 0.3188, + "rejected_geometric_mean": -5.211447715759277, + "step": 1227 + }, + { + "chosen_geometric_mean": -1.510323405265808, + "epoch": 0.3, + "grad_norm": 3.0, + "learning_rate": 4.724058850481042e-06, + "log_odds": 3.6299631595611572, + "log_odds_ratio": -0.261874794960022, + "loss": 0.3715, + "rejected_geometric_mean": -4.966976642608643, + "step": 1228 + }, + { + "chosen_geometric_mean": -1.234331488609314, + "epoch": 0.3, + "grad_norm": 2.890625, + "learning_rate": 4.7236141019373624e-06, + "log_odds": 2.3456034660339355, + "log_odds_ratio": -0.4822996258735657, + "loss": 0.4066, + "rejected_geometric_mean": -3.4811184406280518, + "step": 1229 + }, + { + "chosen_geometric_mean": -1.3655189275741577, + "epoch": 0.3, + "grad_norm": 19.75, + "learning_rate": 4.72316901624041e-06, + "log_odds": 4.3812689781188965, + "log_odds_ratio": -0.19852742552757263, + "loss": 0.3317, + "rejected_geometric_mean": -5.526092529296875, + "step": 1230 + }, + { + "chosen_geometric_mean": -1.159306526184082, + "epoch": 0.3, + "grad_norm": 10.8125, + "learning_rate": 4.722723593457671e-06, + "log_odds": 4.979822635650635, + "log_odds_ratio": -0.23345442116260529, + "loss": 0.2974, + "rejected_geometric_mean": -5.8653340339660645, + "step": 1231 + }, + { + "chosen_geometric_mean": -1.1846716403961182, + "epoch": 0.31, + "grad_norm": 20.875, + "learning_rate": 4.72227783365668e-06, + "log_odds": 1.7645238637924194, + "log_odds_ratio": -0.3642890453338623, + "loss": 0.3282, + "rejected_geometric_mean": -2.789891242980957, + "step": 1232 + }, + { + "chosen_geometric_mean": -1.7683871984481812, + "epoch": 0.31, + "grad_norm": 27.25, + "learning_rate": 4.721831736905027e-06, + "log_odds": 5.950438022613525, + "log_odds_ratio": -0.12029803544282913, + "loss": 0.3143, + "rejected_geometric_mean": -7.432938575744629, + "step": 1233 + }, + { + "chosen_geometric_mean": -1.1294320821762085, + "epoch": 0.31, + "grad_norm": 5.75, + "learning_rate": 4.721385303270349e-06, + "log_odds": 0.5593326687812805, + "log_odds_ratio": -0.4757819175720215, + "loss": 0.3052, + "rejected_geometric_mean": -1.5678256750106812, + "step": 1234 + }, + { + "chosen_geometric_mean": -1.1685526371002197, + "epoch": 0.31, + "grad_norm": 2.203125, + "learning_rate": 4.720938532820338e-06, + "log_odds": 1.0176591873168945, + "log_odds_ratio": -0.4510643482208252, + "loss": 0.3169, + "rejected_geometric_mean": -2.0687732696533203, + "step": 1235 + }, + { + "chosen_geometric_mean": -0.8752250671386719, + "epoch": 0.31, + "grad_norm": 3.6875, + "learning_rate": 4.720491425622734e-06, + "log_odds": 3.6729884147644043, + "log_odds_ratio": -0.3065349757671356, + "loss": 0.2927, + "rejected_geometric_mean": -4.266124725341797, + "step": 1236 + }, + { + "chosen_geometric_mean": -1.117460012435913, + "epoch": 0.31, + "grad_norm": 2.796875, + "learning_rate": 4.72004398174533e-06, + "log_odds": 2.6098544597625732, + "log_odds_ratio": -0.21091677248477936, + "loss": 0.3082, + "rejected_geometric_mean": -3.4014108180999756, + "step": 1237 + }, + { + "chosen_geometric_mean": -0.8457859754562378, + "epoch": 0.31, + "grad_norm": 4.59375, + "learning_rate": 4.719596201255967e-06, + "log_odds": 3.1940009593963623, + "log_odds_ratio": -0.09106761962175369, + "loss": 0.2775, + "rejected_geometric_mean": -3.510683059692383, + "step": 1238 + }, + { + "chosen_geometric_mean": -0.9730502963066101, + "epoch": 0.31, + "grad_norm": 4.78125, + "learning_rate": 4.719148084222542e-06, + "log_odds": 1.963191270828247, + "log_odds_ratio": -0.34268832206726074, + "loss": 0.2864, + "rejected_geometric_mean": -2.6876437664031982, + "step": 1239 + }, + { + "chosen_geometric_mean": -0.9892361760139465, + "epoch": 0.31, + "grad_norm": 19.375, + "learning_rate": 4.718699630712998e-06, + "log_odds": 2.499114990234375, + "log_odds_ratio": -0.23587164282798767, + "loss": 0.2878, + "rejected_geometric_mean": -3.147753953933716, + "step": 1240 + }, + { + "chosen_geometric_mean": -1.1998615264892578, + "epoch": 0.31, + "grad_norm": 3.078125, + "learning_rate": 4.718250840795334e-06, + "log_odds": 4.976986885070801, + "log_odds_ratio": -0.1855105757713318, + "loss": 0.3062, + "rejected_geometric_mean": -5.874039649963379, + "step": 1241 + }, + { + "chosen_geometric_mean": -1.0771976709365845, + "epoch": 0.31, + "grad_norm": 14.625, + "learning_rate": 4.717801714537596e-06, + "log_odds": 1.5924839973449707, + "log_odds_ratio": -0.38894709944725037, + "loss": 0.3356, + "rejected_geometric_mean": -2.446237802505493, + "step": 1242 + }, + { + "chosen_geometric_mean": -1.7190120220184326, + "epoch": 0.31, + "grad_norm": 6.4375, + "learning_rate": 4.717352252007881e-06, + "log_odds": 4.043746471405029, + "log_odds_ratio": -0.34487730264663696, + "loss": 0.3292, + "rejected_geometric_mean": -5.6668267250061035, + "step": 1243 + }, + { + "chosen_geometric_mean": -1.0247875452041626, + "epoch": 0.31, + "grad_norm": 50.5, + "learning_rate": 4.716902453274339e-06, + "log_odds": 2.774712085723877, + "log_odds_ratio": -0.2684614062309265, + "loss": 0.2656, + "rejected_geometric_mean": -3.521146774291992, + "step": 1244 + }, + { + "chosen_geometric_mean": -1.2854684591293335, + "epoch": 0.31, + "grad_norm": 2.359375, + "learning_rate": 4.716452318405172e-06, + "log_odds": 3.3626599311828613, + "log_odds_ratio": -0.18158116936683655, + "loss": 0.2911, + "rejected_geometric_mean": -4.414170265197754, + "step": 1245 + }, + { + "chosen_geometric_mean": -1.152339220046997, + "epoch": 0.31, + "grad_norm": 5.03125, + "learning_rate": 4.71600184746863e-06, + "log_odds": 0.11172415316104889, + "log_odds_ratio": -0.6435278654098511, + "loss": 0.3531, + "rejected_geometric_mean": -1.2315804958343506, + "step": 1246 + }, + { + "chosen_geometric_mean": -1.3362317085266113, + "epoch": 0.31, + "grad_norm": 3.53125, + "learning_rate": 4.715551040533014e-06, + "log_odds": 2.7150797843933105, + "log_odds_ratio": -0.2433442324399948, + "loss": 0.3143, + "rejected_geometric_mean": -3.837697744369507, + "step": 1247 + }, + { + "chosen_geometric_mean": -1.2354493141174316, + "epoch": 0.31, + "grad_norm": 6.25, + "learning_rate": 4.715099897666679e-06, + "log_odds": 0.8837780952453613, + "log_odds_ratio": -0.48569685220718384, + "loss": 0.3096, + "rejected_geometric_mean": -2.0541951656341553, + "step": 1248 + }, + { + "chosen_geometric_mean": -1.1986370086669922, + "epoch": 0.31, + "grad_norm": 42.25, + "learning_rate": 4.714648418938029e-06, + "log_odds": 1.7165746688842773, + "log_odds_ratio": -0.27488550543785095, + "loss": 0.3755, + "rejected_geometric_mean": -2.690864086151123, + "step": 1249 + }, + { + "chosen_geometric_mean": -1.1005538702011108, + "epoch": 0.31, + "grad_norm": 5.1875, + "learning_rate": 4.714196604415518e-06, + "log_odds": 5.690670013427734, + "log_odds_ratio": -0.004140795208513737, + "loss": 0.2847, + "rejected_geometric_mean": -6.3819966316223145, + "step": 1250 + }, + { + "chosen_geometric_mean": -1.1765544414520264, + "epoch": 0.31, + "grad_norm": 25.75, + "learning_rate": 4.713744454167652e-06, + "log_odds": 2.057255268096924, + "log_odds_ratio": -0.31252166628837585, + "loss": 0.3431, + "rejected_geometric_mean": -3.0548269748687744, + "step": 1251 + }, + { + "chosen_geometric_mean": -1.1343672275543213, + "epoch": 0.31, + "grad_norm": 18.75, + "learning_rate": 4.713291968262988e-06, + "log_odds": 0.8781342506408691, + "log_odds_ratio": -0.4283333718776703, + "loss": 0.3754, + "rejected_geometric_mean": -1.8314974308013916, + "step": 1252 + }, + { + "chosen_geometric_mean": -1.2497673034667969, + "epoch": 0.31, + "grad_norm": 12.75, + "learning_rate": 4.712839146770135e-06, + "log_odds": 3.2366364002227783, + "log_odds_ratio": -0.2870302200317383, + "loss": 0.2646, + "rejected_geometric_mean": -4.263981342315674, + "step": 1253 + }, + { + "chosen_geometric_mean": -1.0826934576034546, + "epoch": 0.31, + "grad_norm": 4.375, + "learning_rate": 4.7123859897577505e-06, + "log_odds": 4.319065570831299, + "log_odds_ratio": -0.09156733751296997, + "loss": 0.3499, + "rejected_geometric_mean": -4.9761881828308105, + "step": 1254 + }, + { + "chosen_geometric_mean": -1.3767876625061035, + "epoch": 0.31, + "grad_norm": 14.75, + "learning_rate": 4.711932497294543e-06, + "log_odds": 4.2973833084106445, + "log_odds_ratio": -0.167774960398674, + "loss": 0.3361, + "rejected_geometric_mean": -5.403656005859375, + "step": 1255 + }, + { + "chosen_geometric_mean": -0.8464776277542114, + "epoch": 0.31, + "grad_norm": 20.0, + "learning_rate": 4.711478669449275e-06, + "log_odds": 4.586361885070801, + "log_odds_ratio": -0.37553825974464417, + "loss": 0.3134, + "rejected_geometric_mean": -5.155323028564453, + "step": 1256 + }, + { + "chosen_geometric_mean": -1.1252455711364746, + "epoch": 0.31, + "grad_norm": 12.25, + "learning_rate": 4.711024506290756e-06, + "log_odds": 2.025076150894165, + "log_odds_ratio": -0.30779242515563965, + "loss": 0.3849, + "rejected_geometric_mean": -2.936918258666992, + "step": 1257 + }, + { + "chosen_geometric_mean": -1.4058809280395508, + "epoch": 0.31, + "grad_norm": 14.8125, + "learning_rate": 4.7105700078878485e-06, + "log_odds": 4.530988693237305, + "log_odds_ratio": -0.0693483054637909, + "loss": 0.3326, + "rejected_geometric_mean": -5.588649749755859, + "step": 1258 + }, + { + "chosen_geometric_mean": -1.3719351291656494, + "epoch": 0.31, + "grad_norm": 7.28125, + "learning_rate": 4.710115174309465e-06, + "log_odds": 0.5264118909835815, + "log_odds_ratio": -0.4723144769668579, + "loss": 0.3078, + "rejected_geometric_mean": -1.7820439338684082, + "step": 1259 + }, + { + "chosen_geometric_mean": -1.0460220575332642, + "epoch": 0.31, + "grad_norm": 2.453125, + "learning_rate": 4.70966000562457e-06, + "log_odds": 2.9728095531463623, + "log_odds_ratio": -0.2364136278629303, + "loss": 0.2361, + "rejected_geometric_mean": -3.717712163925171, + "step": 1260 + }, + { + "chosen_geometric_mean": -1.0683934688568115, + "epoch": 0.31, + "grad_norm": 11.0, + "learning_rate": 4.709204501902179e-06, + "log_odds": 2.034187078475952, + "log_odds_ratio": -0.2770317792892456, + "loss": 0.3556, + "rejected_geometric_mean": -2.8230555057525635, + "step": 1261 + }, + { + "chosen_geometric_mean": -1.1788592338562012, + "epoch": 0.31, + "grad_norm": 4.0625, + "learning_rate": 4.7087486632113546e-06, + "log_odds": 0.18631741404533386, + "log_odds_ratio": -0.6142085790634155, + "loss": 0.3387, + "rejected_geometric_mean": -1.3283175230026245, + "step": 1262 + }, + { + "chosen_geometric_mean": -1.1267427206039429, + "epoch": 0.31, + "grad_norm": 3.796875, + "learning_rate": 4.7082924896212144e-06, + "log_odds": 1.915442705154419, + "log_odds_ratio": -0.35569047927856445, + "loss": 0.3305, + "rejected_geometric_mean": -2.855600118637085, + "step": 1263 + }, + { + "chosen_geometric_mean": -1.1750340461730957, + "epoch": 0.31, + "grad_norm": 3.34375, + "learning_rate": 4.707835981200925e-06, + "log_odds": 4.231747627258301, + "log_odds_ratio": -0.046277351677417755, + "loss": 0.283, + "rejected_geometric_mean": -5.049305438995361, + "step": 1264 + }, + { + "chosen_geometric_mean": -1.3310707807540894, + "epoch": 0.31, + "grad_norm": 15.9375, + "learning_rate": 4.707379138019704e-06, + "log_odds": 3.26737117767334, + "log_odds_ratio": -0.25342294573783875, + "loss": 0.3601, + "rejected_geometric_mean": -4.439229965209961, + "step": 1265 + }, + { + "chosen_geometric_mean": -0.9756525754928589, + "epoch": 0.31, + "grad_norm": 11.25, + "learning_rate": 4.706921960146821e-06, + "log_odds": 2.8346433639526367, + "log_odds_ratio": -0.19870169460773468, + "loss": 0.3259, + "rejected_geometric_mean": -3.4110095500946045, + "step": 1266 + }, + { + "chosen_geometric_mean": -1.2704896926879883, + "epoch": 0.31, + "grad_norm": 12.75, + "learning_rate": 4.706464447651593e-06, + "log_odds": 1.9114305973052979, + "log_odds_ratio": -0.245963454246521, + "loss": 0.2997, + "rejected_geometric_mean": -2.9022464752197266, + "step": 1267 + }, + { + "chosen_geometric_mean": -1.0928759574890137, + "epoch": 0.31, + "grad_norm": 33.0, + "learning_rate": 4.706006600603391e-06, + "log_odds": 3.776726245880127, + "log_odds_ratio": -0.1649758517742157, + "loss": 0.342, + "rejected_geometric_mean": -4.565269470214844, + "step": 1268 + }, + { + "chosen_geometric_mean": -1.4107184410095215, + "epoch": 0.31, + "grad_norm": 27.375, + "learning_rate": 4.705548419071636e-06, + "log_odds": 2.72792387008667, + "log_odds_ratio": -0.2637838125228882, + "loss": 0.3194, + "rejected_geometric_mean": -3.974738359451294, + "step": 1269 + }, + { + "chosen_geometric_mean": -1.0497084856033325, + "epoch": 0.31, + "grad_norm": 17.5, + "learning_rate": 4.705089903125798e-06, + "log_odds": 4.002599239349365, + "log_odds_ratio": -0.12131539732217789, + "loss": 0.3257, + "rejected_geometric_mean": -4.641271591186523, + "step": 1270 + }, + { + "chosen_geometric_mean": -1.0272518396377563, + "epoch": 0.31, + "grad_norm": 43.75, + "learning_rate": 4.704631052835401e-06, + "log_odds": 4.511002540588379, + "log_odds_ratio": -0.031371746212244034, + "loss": 0.2979, + "rejected_geometric_mean": -5.0687336921691895, + "step": 1271 + }, + { + "chosen_geometric_mean": -1.1721129417419434, + "epoch": 0.31, + "grad_norm": 10.6875, + "learning_rate": 4.704171868270016e-06, + "log_odds": 2.3669941425323486, + "log_odds_ratio": -0.3750726580619812, + "loss": 0.3568, + "rejected_geometric_mean": -3.3139448165893555, + "step": 1272 + }, + { + "chosen_geometric_mean": -1.171305775642395, + "epoch": 0.32, + "grad_norm": 24.75, + "learning_rate": 4.703712349499268e-06, + "log_odds": 3.1185989379882812, + "log_odds_ratio": -0.1361808478832245, + "loss": 0.265, + "rejected_geometric_mean": -3.996372699737549, + "step": 1273 + }, + { + "chosen_geometric_mean": -1.1377475261688232, + "epoch": 0.32, + "grad_norm": 3.53125, + "learning_rate": 4.703252496592828e-06, + "log_odds": 3.5403053760528564, + "log_odds_ratio": -0.40804457664489746, + "loss": 0.3794, + "rejected_geometric_mean": -4.515976428985596, + "step": 1274 + }, + { + "chosen_geometric_mean": -1.1849459409713745, + "epoch": 0.32, + "grad_norm": 7.0, + "learning_rate": 4.702792309620425e-06, + "log_odds": 1.9327020645141602, + "log_odds_ratio": -0.26010727882385254, + "loss": 0.2712, + "rejected_geometric_mean": -2.8352906703948975, + "step": 1275 + }, + { + "chosen_geometric_mean": -1.2389421463012695, + "epoch": 0.32, + "grad_norm": 3.25, + "learning_rate": 4.70233178865183e-06, + "log_odds": 2.2043213844299316, + "log_odds_ratio": -0.2737710773944855, + "loss": 0.2939, + "rejected_geometric_mean": -3.220377206802368, + "step": 1276 + }, + { + "chosen_geometric_mean": -1.2080674171447754, + "epoch": 0.32, + "grad_norm": 4.78125, + "learning_rate": 4.701870933756873e-06, + "log_odds": 0.2237234264612198, + "log_odds_ratio": -0.5922243595123291, + "loss": 0.3376, + "rejected_geometric_mean": -1.369500756263733, + "step": 1277 + }, + { + "chosen_geometric_mean": -1.1346995830535889, + "epoch": 0.32, + "grad_norm": 4.8125, + "learning_rate": 4.701409745005428e-06, + "log_odds": 0.36680328845977783, + "log_odds_ratio": -0.5792292952537537, + "loss": 0.3089, + "rejected_geometric_mean": -1.4501782655715942, + "step": 1278 + }, + { + "chosen_geometric_mean": -1.2719027996063232, + "epoch": 0.32, + "grad_norm": 5.8125, + "learning_rate": 4.700948222467424e-06, + "log_odds": 1.1215142011642456, + "log_odds_ratio": -0.34499406814575195, + "loss": 0.3002, + "rejected_geometric_mean": -2.208070755004883, + "step": 1279 + }, + { + "chosen_geometric_mean": -1.062976360321045, + "epoch": 0.32, + "grad_norm": 6.6875, + "learning_rate": 4.700486366212838e-06, + "log_odds": 1.457611083984375, + "log_odds_ratio": -0.2898084223270416, + "loss": 0.3159, + "rejected_geometric_mean": -2.250237464904785, + "step": 1280 + }, + { + "chosen_geometric_mean": -1.2129242420196533, + "epoch": 0.32, + "grad_norm": 20.75, + "learning_rate": 4.700024176311698e-06, + "log_odds": 0.44516220688819885, + "log_odds_ratio": -0.5002706050872803, + "loss": 0.3294, + "rejected_geometric_mean": -1.5422048568725586, + "step": 1281 + }, + { + "chosen_geometric_mean": -1.1099086999893188, + "epoch": 0.32, + "grad_norm": 5.5, + "learning_rate": 4.6995616528340844e-06, + "log_odds": 3.9488396644592285, + "log_odds_ratio": -0.3532145321369171, + "loss": 0.3437, + "rejected_geometric_mean": -4.8386406898498535, + "step": 1282 + }, + { + "chosen_geometric_mean": -1.4860570430755615, + "epoch": 0.32, + "grad_norm": 13.75, + "learning_rate": 4.699098795850126e-06, + "log_odds": 1.9644036293029785, + "log_odds_ratio": -0.42000141739845276, + "loss": 0.3251, + "rejected_geometric_mean": -3.2485506534576416, + "step": 1283 + }, + { + "chosen_geometric_mean": -0.8366389274597168, + "epoch": 0.32, + "grad_norm": 21.625, + "learning_rate": 4.698635605430003e-06, + "log_odds": 0.9427911043167114, + "log_odds_ratio": -0.4967067837715149, + "loss": 0.3444, + "rejected_geometric_mean": -1.6312460899353027, + "step": 1284 + }, + { + "chosen_geometric_mean": -1.092137098312378, + "epoch": 0.32, + "grad_norm": 5.78125, + "learning_rate": 4.698172081643946e-06, + "log_odds": 0.8493940830230713, + "log_odds_ratio": -0.41676127910614014, + "loss": 0.3462, + "rejected_geometric_mean": -1.7558876276016235, + "step": 1285 + }, + { + "chosen_geometric_mean": -1.002164602279663, + "epoch": 0.32, + "grad_norm": 14.5625, + "learning_rate": 4.697708224562237e-06, + "log_odds": 2.180851936340332, + "log_odds_ratio": -0.21343977749347687, + "loss": 0.3436, + "rejected_geometric_mean": -2.8300271034240723, + "step": 1286 + }, + { + "chosen_geometric_mean": -1.1587334871292114, + "epoch": 0.32, + "grad_norm": 12.625, + "learning_rate": 4.697244034255208e-06, + "log_odds": 3.0263848304748535, + "log_odds_ratio": -0.19276314973831177, + "loss": 0.2719, + "rejected_geometric_mean": -3.87141752243042, + "step": 1287 + }, + { + "chosen_geometric_mean": -1.1080291271209717, + "epoch": 0.32, + "grad_norm": 3.28125, + "learning_rate": 4.6967795107932405e-06, + "log_odds": 0.31426507234573364, + "log_odds_ratio": -0.5500245094299316, + "loss": 0.3361, + "rejected_geometric_mean": -1.3266104459762573, + "step": 1288 + }, + { + "chosen_geometric_mean": -1.0587414503097534, + "epoch": 0.32, + "grad_norm": 2.703125, + "learning_rate": 4.696314654246767e-06, + "log_odds": 3.2054901123046875, + "log_odds_ratio": -0.2214668244123459, + "loss": 0.2806, + "rejected_geometric_mean": -3.9715986251831055, + "step": 1289 + }, + { + "chosen_geometric_mean": -1.0930734872817993, + "epoch": 0.32, + "grad_norm": 11.9375, + "learning_rate": 4.695849464686273e-06, + "log_odds": 3.106079339981079, + "log_odds_ratio": -0.1986120045185089, + "loss": 0.3936, + "rejected_geometric_mean": -3.870302200317383, + "step": 1290 + }, + { + "chosen_geometric_mean": -1.1165032386779785, + "epoch": 0.32, + "grad_norm": 9.875, + "learning_rate": 4.69538394218229e-06, + "log_odds": 0.7004064917564392, + "log_odds_ratio": -0.46215543150901794, + "loss": 0.322, + "rejected_geometric_mean": -1.669655680656433, + "step": 1291 + }, + { + "chosen_geometric_mean": -1.0043318271636963, + "epoch": 0.32, + "grad_norm": 5.6875, + "learning_rate": 4.694918086805404e-06, + "log_odds": 1.483759880065918, + "log_odds_ratio": -0.2982497811317444, + "loss": 0.3061, + "rejected_geometric_mean": -2.164055347442627, + "step": 1292 + }, + { + "chosen_geometric_mean": -0.942093014717102, + "epoch": 0.32, + "grad_norm": 3.390625, + "learning_rate": 4.69445189862625e-06, + "log_odds": 2.08111834526062, + "log_odds_ratio": -0.3033233880996704, + "loss": 0.3266, + "rejected_geometric_mean": -2.70157527923584, + "step": 1293 + }, + { + "chosen_geometric_mean": -1.277070164680481, + "epoch": 0.32, + "grad_norm": 37.75, + "learning_rate": 4.693985377715512e-06, + "log_odds": 0.33674323558807373, + "log_odds_ratio": -0.563288152217865, + "loss": 0.3801, + "rejected_geometric_mean": -1.5604002475738525, + "step": 1294 + }, + { + "chosen_geometric_mean": -1.16243314743042, + "epoch": 0.32, + "grad_norm": 4.09375, + "learning_rate": 4.693518524143928e-06, + "log_odds": 0.9488053917884827, + "log_odds_ratio": -0.522936224937439, + "loss": 0.3228, + "rejected_geometric_mean": -2.0451438426971436, + "step": 1295 + }, + { + "chosen_geometric_mean": -1.1847541332244873, + "epoch": 0.32, + "grad_norm": 3.125, + "learning_rate": 4.69305133798228e-06, + "log_odds": 0.4623114764690399, + "log_odds_ratio": -0.4965663552284241, + "loss": 0.2951, + "rejected_geometric_mean": -1.5351784229278564, + "step": 1296 + }, + { + "chosen_geometric_mean": -1.1741300821304321, + "epoch": 0.32, + "grad_norm": 14.0625, + "learning_rate": 4.6925838193014095e-06, + "log_odds": 1.8675446510314941, + "log_odds_ratio": -0.47273319959640503, + "loss": 0.308, + "rejected_geometric_mean": -2.8844053745269775, + "step": 1297 + }, + { + "chosen_geometric_mean": -1.033128023147583, + "epoch": 0.32, + "grad_norm": 6.65625, + "learning_rate": 4.692115968172201e-06, + "log_odds": 0.3753684461116791, + "log_odds_ratio": -0.5363674163818359, + "loss": 0.3026, + "rejected_geometric_mean": -1.3100897073745728, + "step": 1298 + }, + { + "chosen_geometric_mean": -1.2025549411773682, + "epoch": 0.32, + "grad_norm": 12.125, + "learning_rate": 4.691647784665592e-06, + "log_odds": 1.1225566864013672, + "log_odds_ratio": -0.4822681248188019, + "loss": 0.3197, + "rejected_geometric_mean": -2.173618793487549, + "step": 1299 + }, + { + "chosen_geometric_mean": -1.0713510513305664, + "epoch": 0.32, + "grad_norm": 41.25, + "learning_rate": 4.6911792688525715e-06, + "log_odds": 0.8372280597686768, + "log_odds_ratio": -0.5380593538284302, + "loss": 0.331, + "rejected_geometric_mean": -1.8096051216125488, + "step": 1300 + }, + { + "chosen_geometric_mean": -0.9016138315200806, + "epoch": 0.32, + "grad_norm": 5.84375, + "learning_rate": 4.6907104208041756e-06, + "log_odds": 1.9281686544418335, + "log_odds_ratio": -0.4023728668689728, + "loss": 0.2888, + "rejected_geometric_mean": -2.63493013381958, + "step": 1301 + }, + { + "chosen_geometric_mean": -1.1216378211975098, + "epoch": 0.32, + "grad_norm": 34.75, + "learning_rate": 4.690241240591495e-06, + "log_odds": 0.3466126024723053, + "log_odds_ratio": -0.5858777165412903, + "loss": 0.3267, + "rejected_geometric_mean": -1.3715541362762451, + "step": 1302 + }, + { + "chosen_geometric_mean": -1.1738675832748413, + "epoch": 0.32, + "grad_norm": 15.75, + "learning_rate": 4.689771728285668e-06, + "log_odds": 0.162115216255188, + "log_odds_ratio": -0.635015606880188, + "loss": 0.3436, + "rejected_geometric_mean": -1.284010410308838, + "step": 1303 + }, + { + "chosen_geometric_mean": -0.9903149604797363, + "epoch": 0.32, + "grad_norm": 5.59375, + "learning_rate": 4.6893018839578845e-06, + "log_odds": 2.1049153804779053, + "log_odds_ratio": -0.4419752359390259, + "loss": 0.3196, + "rejected_geometric_mean": -2.7952589988708496, + "step": 1304 + }, + { + "chosen_geometric_mean": -1.09657883644104, + "epoch": 0.32, + "grad_norm": 9.4375, + "learning_rate": 4.688831707679382e-06, + "log_odds": 2.95928955078125, + "log_odds_ratio": -0.2910267412662506, + "loss": 0.296, + "rejected_geometric_mean": -3.7428040504455566, + "step": 1305 + }, + { + "chosen_geometric_mean": -0.970647931098938, + "epoch": 0.32, + "grad_norm": 3.984375, + "learning_rate": 4.688361199521453e-06, + "log_odds": 0.37597450613975525, + "log_odds_ratio": -0.5567976832389832, + "loss": 0.2777, + "rejected_geometric_mean": -1.2673914432525635, + "step": 1306 + }, + { + "chosen_geometric_mean": -0.9042072296142578, + "epoch": 0.32, + "grad_norm": 11.1875, + "learning_rate": 4.687890359555437e-06, + "log_odds": 2.4050116539001465, + "log_odds_ratio": -0.19987738132476807, + "loss": 0.3996, + "rejected_geometric_mean": -2.9548792839050293, + "step": 1307 + }, + { + "chosen_geometric_mean": -1.4595530033111572, + "epoch": 0.32, + "grad_norm": 2.578125, + "learning_rate": 4.687419187852725e-06, + "log_odds": 2.115111827850342, + "log_odds_ratio": -0.3006412386894226, + "loss": 0.3252, + "rejected_geometric_mean": -3.3789191246032715, + "step": 1308 + }, + { + "chosen_geometric_mean": -1.1305595636367798, + "epoch": 0.32, + "grad_norm": 43.0, + "learning_rate": 4.686947684484757e-06, + "log_odds": 2.7034711837768555, + "log_odds_ratio": -0.2802969515323639, + "loss": 0.3281, + "rejected_geometric_mean": -3.562178373336792, + "step": 1309 + }, + { + "chosen_geometric_mean": -1.2423458099365234, + "epoch": 0.32, + "grad_norm": 22.0, + "learning_rate": 4.686475849523024e-06, + "log_odds": 2.9084343910217285, + "log_odds_ratio": -0.13295215368270874, + "loss": 0.3411, + "rejected_geometric_mean": -3.8325655460357666, + "step": 1310 + }, + { + "chosen_geometric_mean": -1.5210254192352295, + "epoch": 0.32, + "grad_norm": 43.0, + "learning_rate": 4.686003683039069e-06, + "log_odds": 0.2243671417236328, + "log_odds_ratio": -0.6442238092422485, + "loss": 0.3419, + "rejected_geometric_mean": -1.72629714012146, + "step": 1311 + }, + { + "chosen_geometric_mean": -0.9514464139938354, + "epoch": 0.32, + "grad_norm": 3.75, + "learning_rate": 4.6855311851044834e-06, + "log_odds": 2.5684428215026855, + "log_odds_ratio": -0.2241767793893814, + "loss": 0.2563, + "rejected_geometric_mean": -3.1345467567443848, + "step": 1312 + }, + { + "chosen_geometric_mean": -0.947719931602478, + "epoch": 0.33, + "grad_norm": 7.0, + "learning_rate": 4.685058355790908e-06, + "log_odds": 3.6592655181884766, + "log_odds_ratio": -0.2115652710199356, + "loss": 0.3376, + "rejected_geometric_mean": -4.1833930015563965, + "step": 1313 + }, + { + "chosen_geometric_mean": -0.9056069254875183, + "epoch": 0.33, + "grad_norm": 5.3125, + "learning_rate": 4.684585195170036e-06, + "log_odds": 2.487577438354492, + "log_odds_ratio": -0.20854270458221436, + "loss": 0.293, + "rejected_geometric_mean": -3.0237011909484863, + "step": 1314 + }, + { + "chosen_geometric_mean": -1.0949026346206665, + "epoch": 0.33, + "grad_norm": 32.25, + "learning_rate": 4.684111703313611e-06, + "log_odds": 1.7326706647872925, + "log_odds_ratio": -0.3909994959831238, + "loss": 0.382, + "rejected_geometric_mean": -2.599757671356201, + "step": 1315 + }, + { + "chosen_geometric_mean": -1.2946138381958008, + "epoch": 0.33, + "grad_norm": 3.15625, + "learning_rate": 4.683637880293423e-06, + "log_odds": -0.31788885593414307, + "log_odds_ratio": -0.8694355487823486, + "loss": 0.3568, + "rejected_geometric_mean": -1.0833585262298584, + "step": 1316 + }, + { + "chosen_geometric_mean": -1.1814824342727661, + "epoch": 0.33, + "grad_norm": 5.09375, + "learning_rate": 4.683163726181317e-06, + "log_odds": 2.1527822017669678, + "log_odds_ratio": -0.24463684856891632, + "loss": 0.3282, + "rejected_geometric_mean": -3.084357261657715, + "step": 1317 + }, + { + "chosen_geometric_mean": -1.3102271556854248, + "epoch": 0.33, + "grad_norm": 37.75, + "learning_rate": 4.682689241049186e-06, + "log_odds": 3.011693239212036, + "log_odds_ratio": -0.1975027620792389, + "loss": 0.3134, + "rejected_geometric_mean": -4.1004743576049805, + "step": 1318 + }, + { + "chosen_geometric_mean": -0.8843953013420105, + "epoch": 0.33, + "grad_norm": 17.125, + "learning_rate": 4.682214424968972e-06, + "log_odds": 2.4857778549194336, + "log_odds_ratio": -0.21699975430965424, + "loss": 0.3136, + "rejected_geometric_mean": -2.964456081390381, + "step": 1319 + }, + { + "chosen_geometric_mean": -1.0861645936965942, + "epoch": 0.33, + "grad_norm": 3.625, + "learning_rate": 4.68173927801267e-06, + "log_odds": 0.4718666970729828, + "log_odds_ratio": -0.5169600248336792, + "loss": 0.2969, + "rejected_geometric_mean": -1.4509611129760742, + "step": 1320 + }, + { + "chosen_geometric_mean": -1.2029398679733276, + "epoch": 0.33, + "grad_norm": 9.375, + "learning_rate": 4.681263800252324e-06, + "log_odds": 0.26218897104263306, + "log_odds_ratio": -0.5747706294059753, + "loss": 0.3315, + "rejected_geometric_mean": -1.399227499961853, + "step": 1321 + }, + { + "chosen_geometric_mean": -1.1181089878082275, + "epoch": 0.33, + "grad_norm": 10.3125, + "learning_rate": 4.680787991760026e-06, + "log_odds": 0.6884464621543884, + "log_odds_ratio": -0.4323141574859619, + "loss": 0.3488, + "rejected_geometric_mean": -1.590965747833252, + "step": 1322 + }, + { + "chosen_geometric_mean": -1.4003727436065674, + "epoch": 0.33, + "grad_norm": 38.0, + "learning_rate": 4.680311852607922e-06, + "log_odds": 0.304507315158844, + "log_odds_ratio": -0.6249561309814453, + "loss": 0.406, + "rejected_geometric_mean": -1.7083678245544434, + "step": 1323 + }, + { + "chosen_geometric_mean": -1.0287517309188843, + "epoch": 0.33, + "grad_norm": 7.40625, + "learning_rate": 4.679835382868204e-06, + "log_odds": 0.47689351439476013, + "log_odds_ratio": -0.4996940493583679, + "loss": 0.3427, + "rejected_geometric_mean": -1.3727586269378662, + "step": 1324 + }, + { + "chosen_geometric_mean": -0.9568521976470947, + "epoch": 0.33, + "grad_norm": 13.6875, + "learning_rate": 4.6793585826131175e-06, + "log_odds": 0.4159271717071533, + "log_odds_ratio": -0.5133500099182129, + "loss": 0.3198, + "rejected_geometric_mean": -1.2432488203048706, + "step": 1325 + }, + { + "chosen_geometric_mean": -1.0278973579406738, + "epoch": 0.33, + "grad_norm": 13.1875, + "learning_rate": 4.678881451914957e-06, + "log_odds": 0.2783964276313782, + "log_odds_ratio": -0.5878166556358337, + "loss": 0.3311, + "rejected_geometric_mean": -1.1970698833465576, + "step": 1326 + }, + { + "chosen_geometric_mean": -1.2273253202438354, + "epoch": 0.33, + "grad_norm": 3.359375, + "learning_rate": 4.678403990846067e-06, + "log_odds": 1.4684767723083496, + "log_odds_ratio": -0.27758342027664185, + "loss": 0.2982, + "rejected_geometric_mean": -2.4591054916381836, + "step": 1327 + }, + { + "chosen_geometric_mean": -1.0892655849456787, + "epoch": 0.33, + "grad_norm": 6.0625, + "learning_rate": 4.677926199478842e-06, + "log_odds": 1.154260516166687, + "log_odds_ratio": -0.4050588309764862, + "loss": 0.3017, + "rejected_geometric_mean": -2.064295768737793, + "step": 1328 + }, + { + "chosen_geometric_mean": -1.2532896995544434, + "epoch": 0.33, + "grad_norm": 7.28125, + "learning_rate": 4.677448077885725e-06, + "log_odds": 1.5939587354660034, + "log_odds_ratio": -0.336442232131958, + "loss": 0.2941, + "rejected_geometric_mean": -2.6438169479370117, + "step": 1329 + }, + { + "chosen_geometric_mean": -1.2757785320281982, + "epoch": 0.33, + "grad_norm": 11.5, + "learning_rate": 4.676969626139212e-06, + "log_odds": 0.8392319083213806, + "log_odds_ratio": -0.387815922498703, + "loss": 0.3318, + "rejected_geometric_mean": -1.9612258672714233, + "step": 1330 + }, + { + "chosen_geometric_mean": -1.1147100925445557, + "epoch": 0.33, + "grad_norm": 3.328125, + "learning_rate": 4.67649084431185e-06, + "log_odds": 2.3927857875823975, + "log_odds_ratio": -0.35525640845298767, + "loss": 0.3563, + "rejected_geometric_mean": -3.3022983074188232, + "step": 1331 + }, + { + "chosen_geometric_mean": -1.3304178714752197, + "epoch": 0.33, + "grad_norm": 5.375, + "learning_rate": 4.67601173247623e-06, + "log_odds": 4.490025520324707, + "log_odds_ratio": -0.36823728680610657, + "loss": 0.3598, + "rejected_geometric_mean": -5.635961532592773, + "step": 1332 + }, + { + "chosen_geometric_mean": -1.1549811363220215, + "epoch": 0.33, + "grad_norm": 7.9375, + "learning_rate": 4.6755322907049995e-06, + "log_odds": 2.2143235206604004, + "log_odds_ratio": -0.371388703584671, + "loss": 0.3032, + "rejected_geometric_mean": -3.129371166229248, + "step": 1333 + }, + { + "chosen_geometric_mean": -1.2791553735733032, + "epoch": 0.33, + "grad_norm": 4.4375, + "learning_rate": 4.6750525190708515e-06, + "log_odds": 1.1889437437057495, + "log_odds_ratio": -0.4104268550872803, + "loss": 0.3666, + "rejected_geometric_mean": -2.311793804168701, + "step": 1334 + }, + { + "chosen_geometric_mean": -1.0878690481185913, + "epoch": 0.33, + "grad_norm": 5.46875, + "learning_rate": 4.674572417646532e-06, + "log_odds": 2.615248203277588, + "log_odds_ratio": -0.29851001501083374, + "loss": 0.2972, + "rejected_geometric_mean": -3.4736685752868652, + "step": 1335 + }, + { + "chosen_geometric_mean": -1.0173083543777466, + "epoch": 0.33, + "grad_norm": 2.625, + "learning_rate": 4.674091986504837e-06, + "log_odds": 3.382174491882324, + "log_odds_ratio": -0.3557184040546417, + "loss": 0.2685, + "rejected_geometric_mean": -4.203064441680908, + "step": 1336 + }, + { + "chosen_geometric_mean": -1.4319666624069214, + "epoch": 0.33, + "grad_norm": 6.5625, + "learning_rate": 4.673611225718609e-06, + "log_odds": 5.003036975860596, + "log_odds_ratio": -0.14884895086288452, + "loss": 0.3277, + "rejected_geometric_mean": -6.216357231140137, + "step": 1337 + }, + { + "chosen_geometric_mean": -0.9810203313827515, + "epoch": 0.33, + "grad_norm": 5.28125, + "learning_rate": 4.673130135360744e-06, + "log_odds": 0.9550068378448486, + "log_odds_ratio": -0.45313510298728943, + "loss": 0.3357, + "rejected_geometric_mean": -1.7502468824386597, + "step": 1338 + }, + { + "chosen_geometric_mean": -1.2818704843521118, + "epoch": 0.33, + "grad_norm": 10.875, + "learning_rate": 4.672648715504188e-06, + "log_odds": 1.6837608814239502, + "log_odds_ratio": -0.454608678817749, + "loss": 0.3566, + "rejected_geometric_mean": -2.850862503051758, + "step": 1339 + }, + { + "chosen_geometric_mean": -1.3126764297485352, + "epoch": 0.33, + "grad_norm": 5.625, + "learning_rate": 4.6721669662219325e-06, + "log_odds": 4.994429588317871, + "log_odds_ratio": -0.08483031392097473, + "loss": 0.3076, + "rejected_geometric_mean": -6.007511138916016, + "step": 1340 + }, + { + "chosen_geometric_mean": -1.1820882558822632, + "epoch": 0.33, + "grad_norm": 8.3125, + "learning_rate": 4.671684887587026e-06, + "log_odds": 3.8063535690307617, + "log_odds_ratio": -0.20040611922740936, + "loss": 0.3165, + "rejected_geometric_mean": -4.724165916442871, + "step": 1341 + }, + { + "chosen_geometric_mean": -1.067107915878296, + "epoch": 0.33, + "grad_norm": 7.84375, + "learning_rate": 4.6712024796725616e-06, + "log_odds": 6.2690582275390625, + "log_odds_ratio": -0.02782074734568596, + "loss": 0.2823, + "rejected_geometric_mean": -6.855967044830322, + "step": 1342 + }, + { + "chosen_geometric_mean": -1.105157732963562, + "epoch": 0.33, + "grad_norm": 7.84375, + "learning_rate": 4.670719742551683e-06, + "log_odds": 0.49844610691070557, + "log_odds_ratio": -0.5122907161712646, + "loss": 0.3049, + "rejected_geometric_mean": -1.5207781791687012, + "step": 1343 + }, + { + "chosen_geometric_mean": -2.153339385986328, + "epoch": 0.33, + "grad_norm": 92.0, + "learning_rate": 4.670236676297586e-06, + "log_odds": 2.0295417308807373, + "log_odds_ratio": -0.1282731294631958, + "loss": 0.4237, + "rejected_geometric_mean": -4.023952960968018, + "step": 1344 + }, + { + "chosen_geometric_mean": -1.432891845703125, + "epoch": 0.33, + "grad_norm": 11.1875, + "learning_rate": 4.669753280983516e-06, + "log_odds": 5.3379621505737305, + "log_odds_ratio": -0.04211759567260742, + "loss": 0.31, + "rejected_geometric_mean": -6.439060211181641, + "step": 1345 + }, + { + "chosen_geometric_mean": -1.851661205291748, + "epoch": 0.33, + "grad_norm": 23.625, + "learning_rate": 4.669269556682764e-06, + "log_odds": 2.7157864570617676, + "log_odds_ratio": -0.27047064900398254, + "loss": 0.3319, + "rejected_geometric_mean": -4.415240287780762, + "step": 1346 + }, + { + "chosen_geometric_mean": -1.3081542253494263, + "epoch": 0.33, + "grad_norm": 7.71875, + "learning_rate": 4.668785503468677e-06, + "log_odds": 4.638895511627197, + "log_odds_ratio": -0.19085639715194702, + "loss": 0.3261, + "rejected_geometric_mean": -5.654041290283203, + "step": 1347 + }, + { + "chosen_geometric_mean": -1.2190837860107422, + "epoch": 0.33, + "grad_norm": 12.8125, + "learning_rate": 4.668301121414649e-06, + "log_odds": 3.534703016281128, + "log_odds_ratio": -0.07603667676448822, + "loss": 0.2947, + "rejected_geometric_mean": -4.316196441650391, + "step": 1348 + }, + { + "chosen_geometric_mean": -1.3528800010681152, + "epoch": 0.33, + "grad_norm": 37.75, + "learning_rate": 4.667816410594123e-06, + "log_odds": 3.107593059539795, + "log_odds_ratio": -0.24389180541038513, + "loss": 0.3368, + "rejected_geometric_mean": -4.2374043464660645, + "step": 1349 + }, + { + "chosen_geometric_mean": -1.2796342372894287, + "epoch": 0.33, + "grad_norm": 7.28125, + "learning_rate": 4.667331371080592e-06, + "log_odds": 2.1018564701080322, + "log_odds_ratio": -0.14721375703811646, + "loss": 0.2952, + "rejected_geometric_mean": -3.118384838104248, + "step": 1350 + }, + { + "chosen_geometric_mean": -1.0204472541809082, + "epoch": 0.33, + "grad_norm": 10.75, + "learning_rate": 4.666846002947602e-06, + "log_odds": 1.341217041015625, + "log_odds_ratio": -0.4283515214920044, + "loss": 0.3488, + "rejected_geometric_mean": -2.1624162197113037, + "step": 1351 + }, + { + "chosen_geometric_mean": -1.178596019744873, + "epoch": 0.33, + "grad_norm": 2.546875, + "learning_rate": 4.666360306268744e-06, + "log_odds": 0.7467103004455566, + "log_odds_ratio": -0.4860570430755615, + "loss": 0.3171, + "rejected_geometric_mean": -1.817428708076477, + "step": 1352 + }, + { + "chosen_geometric_mean": -1.0987743139266968, + "epoch": 0.33, + "grad_norm": 3.78125, + "learning_rate": 4.665874281117663e-06, + "log_odds": 2.078295946121216, + "log_odds_ratio": -0.31326451897621155, + "loss": 0.2934, + "rejected_geometric_mean": -2.9483892917633057, + "step": 1353 + }, + { + "chosen_geometric_mean": -1.2380656003952026, + "epoch": 0.34, + "grad_norm": 2.609375, + "learning_rate": 4.665387927568052e-06, + "log_odds": 2.1822898387908936, + "log_odds_ratio": -0.388729453086853, + "loss": 0.2903, + "rejected_geometric_mean": -3.279372215270996, + "step": 1354 + }, + { + "chosen_geometric_mean": -1.1394507884979248, + "epoch": 0.34, + "grad_norm": 6.5625, + "learning_rate": 4.664901245693652e-06, + "log_odds": 0.4334219992160797, + "log_odds_ratio": -0.5360475778579712, + "loss": 0.3613, + "rejected_geometric_mean": -1.4588820934295654, + "step": 1355 + }, + { + "chosen_geometric_mean": -1.1256663799285889, + "epoch": 0.34, + "grad_norm": 4.21875, + "learning_rate": 4.664414235568259e-06, + "log_odds": 1.9073162078857422, + "log_odds_ratio": -0.21919898688793182, + "loss": 0.3024, + "rejected_geometric_mean": -2.722496509552002, + "step": 1356 + }, + { + "chosen_geometric_mean": -0.9903801083564758, + "epoch": 0.34, + "grad_norm": 3.796875, + "learning_rate": 4.6639268972657125e-06, + "log_odds": 0.3484267294406891, + "log_odds_ratio": -0.5687004327774048, + "loss": 0.3475, + "rejected_geometric_mean": -1.2341793775558472, + "step": 1357 + }, + { + "chosen_geometric_mean": -1.121822714805603, + "epoch": 0.34, + "grad_norm": 2.75, + "learning_rate": 4.663439230859906e-06, + "log_odds": 0.9452775716781616, + "log_odds_ratio": -0.4030371904373169, + "loss": 0.3004, + "rejected_geometric_mean": -1.909753441810608, + "step": 1358 + }, + { + "chosen_geometric_mean": -1.0207232236862183, + "epoch": 0.34, + "grad_norm": 3.09375, + "learning_rate": 4.66295123642478e-06, + "log_odds": 0.21085891127586365, + "log_odds_ratio": -0.6020824313163757, + "loss": 0.3083, + "rejected_geometric_mean": -1.1561062335968018, + "step": 1359 + }, + { + "chosen_geometric_mean": -1.1253688335418701, + "epoch": 0.34, + "grad_norm": 5.96875, + "learning_rate": 4.662462914034328e-06, + "log_odds": 1.7840619087219238, + "log_odds_ratio": -0.2536427974700928, + "loss": 0.2806, + "rejected_geometric_mean": -2.6552531719207764, + "step": 1360 + }, + { + "chosen_geometric_mean": -1.1400388479232788, + "epoch": 0.34, + "grad_norm": 2.90625, + "learning_rate": 4.6619742637625905e-06, + "log_odds": 3.9647233486175537, + "log_odds_ratio": -0.3424846827983856, + "loss": 0.3432, + "rejected_geometric_mean": -4.926758289337158, + "step": 1361 + }, + { + "chosen_geometric_mean": -1.2102718353271484, + "epoch": 0.34, + "grad_norm": 5.625, + "learning_rate": 4.661485285683659e-06, + "log_odds": 3.2788095474243164, + "log_odds_ratio": -0.36632823944091797, + "loss": 0.3234, + "rejected_geometric_mean": -4.269107818603516, + "step": 1362 + }, + { + "chosen_geometric_mean": -0.9278558492660522, + "epoch": 0.34, + "grad_norm": 10.5, + "learning_rate": 4.660995979871674e-06, + "log_odds": 3.8504996299743652, + "log_odds_ratio": -0.26240983605384827, + "loss": 0.2855, + "rejected_geometric_mean": -4.452406406402588, + "step": 1363 + }, + { + "chosen_geometric_mean": -0.9979575872421265, + "epoch": 0.34, + "grad_norm": 3.875, + "learning_rate": 4.660506346400826e-06, + "log_odds": 3.4495630264282227, + "log_odds_ratio": -0.266088604927063, + "loss": 0.3709, + "rejected_geometric_mean": -4.136246681213379, + "step": 1364 + }, + { + "chosen_geometric_mean": -1.283326268196106, + "epoch": 0.34, + "grad_norm": 35.25, + "learning_rate": 4.660016385345355e-06, + "log_odds": 0.8148782253265381, + "log_odds_ratio": -0.5194228291511536, + "loss": 0.3749, + "rejected_geometric_mean": -1.9796714782714844, + "step": 1365 + }, + { + "chosen_geometric_mean": -1.2530351877212524, + "epoch": 0.34, + "grad_norm": 7.9375, + "learning_rate": 4.659526096779552e-06, + "log_odds": 0.7728311419487, + "log_odds_ratio": -0.4059904217720032, + "loss": 0.3051, + "rejected_geometric_mean": -1.8841667175292969, + "step": 1366 + }, + { + "chosen_geometric_mean": -1.008587121963501, + "epoch": 0.34, + "grad_norm": 3.6875, + "learning_rate": 4.659035480777755e-06, + "log_odds": 4.937076091766357, + "log_odds_ratio": -0.26750749349594116, + "loss": 0.2886, + "rejected_geometric_mean": -5.612231731414795, + "step": 1367 + }, + { + "chosen_geometric_mean": -1.1194803714752197, + "epoch": 0.34, + "grad_norm": 8.5, + "learning_rate": 4.658544537414354e-06, + "log_odds": 3.4163765907287598, + "log_odds_ratio": -0.17783179879188538, + "loss": 0.3487, + "rejected_geometric_mean": -4.225832939147949, + "step": 1368 + }, + { + "chosen_geometric_mean": -1.1676514148712158, + "epoch": 0.34, + "grad_norm": 7.34375, + "learning_rate": 4.658053266763787e-06, + "log_odds": 2.894334316253662, + "log_odds_ratio": -0.44081738591194153, + "loss": 0.3228, + "rejected_geometric_mean": -3.87410306930542, + "step": 1369 + }, + { + "chosen_geometric_mean": -1.170428991317749, + "epoch": 0.34, + "grad_norm": 20.375, + "learning_rate": 4.657561668900544e-06, + "log_odds": 6.710470199584961, + "log_odds_ratio": -0.09293850511312485, + "loss": 0.4302, + "rejected_geometric_mean": -7.524961471557617, + "step": 1370 + }, + { + "chosen_geometric_mean": -1.141181468963623, + "epoch": 0.34, + "grad_norm": 11.1875, + "learning_rate": 4.657069743899161e-06, + "log_odds": 3.1290175914764404, + "log_odds_ratio": -0.21817448735237122, + "loss": 0.3442, + "rejected_geometric_mean": -4.029120922088623, + "step": 1371 + }, + { + "chosen_geometric_mean": -1.084596872329712, + "epoch": 0.34, + "grad_norm": 16.625, + "learning_rate": 4.656577491834228e-06, + "log_odds": 2.9772188663482666, + "log_odds_ratio": -0.30430716276168823, + "loss": 0.3163, + "rejected_geometric_mean": -3.85274076461792, + "step": 1372 + }, + { + "chosen_geometric_mean": -1.2475230693817139, + "epoch": 0.34, + "grad_norm": 3.40625, + "learning_rate": 4.65608491278038e-06, + "log_odds": 0.858757495880127, + "log_odds_ratio": -0.4247305989265442, + "loss": 0.3104, + "rejected_geometric_mean": -1.9712618589401245, + "step": 1373 + }, + { + "chosen_geometric_mean": -0.9708861112594604, + "epoch": 0.34, + "grad_norm": 2.984375, + "learning_rate": 4.655592006812305e-06, + "log_odds": 2.860058546066284, + "log_odds_ratio": -0.22921805083751678, + "loss": 0.3035, + "rejected_geometric_mean": -3.4844393730163574, + "step": 1374 + }, + { + "chosen_geometric_mean": -1.359515905380249, + "epoch": 0.34, + "grad_norm": 3.78125, + "learning_rate": 4.655098774004739e-06, + "log_odds": 2.1777234077453613, + "log_odds_ratio": -0.23992221057415009, + "loss": 0.3107, + "rejected_geometric_mean": -3.3390374183654785, + "step": 1375 + }, + { + "chosen_geometric_mean": -1.3200854063034058, + "epoch": 0.34, + "grad_norm": 6.84375, + "learning_rate": 4.654605214432468e-06, + "log_odds": 0.44334954023361206, + "log_odds_ratio": -0.5215333700180054, + "loss": 0.2807, + "rejected_geometric_mean": -1.6540716886520386, + "step": 1376 + }, + { + "chosen_geometric_mean": -1.2345709800720215, + "epoch": 0.34, + "grad_norm": 3.40625, + "learning_rate": 4.654111328170328e-06, + "log_odds": 2.701050043106079, + "log_odds_ratio": -0.14270047843456268, + "loss": 0.33, + "rejected_geometric_mean": -3.6052474975585938, + "step": 1377 + }, + { + "chosen_geometric_mean": -1.1656931638717651, + "epoch": 0.34, + "grad_norm": 10.0625, + "learning_rate": 4.653617115293203e-06, + "log_odds": 2.4991912841796875, + "log_odds_ratio": -0.3996685743331909, + "loss": 0.3261, + "rejected_geometric_mean": -3.5123162269592285, + "step": 1378 + }, + { + "chosen_geometric_mean": -1.5697221755981445, + "epoch": 0.34, + "grad_norm": 17.5, + "learning_rate": 4.6531225758760275e-06, + "log_odds": 0.7638301849365234, + "log_odds_ratio": -0.4391316771507263, + "loss": 0.3505, + "rejected_geometric_mean": -2.2170963287353516, + "step": 1379 + }, + { + "chosen_geometric_mean": -1.0228530168533325, + "epoch": 0.34, + "grad_norm": 11.375, + "learning_rate": 4.652627709993788e-06, + "log_odds": 0.6608610153198242, + "log_odds_ratio": -0.5754537582397461, + "loss": 0.3653, + "rejected_geometric_mean": -1.575190544128418, + "step": 1380 + }, + { + "chosen_geometric_mean": -1.1645536422729492, + "epoch": 0.34, + "grad_norm": 2.9375, + "learning_rate": 4.6521325177215145e-06, + "log_odds": 2.3574256896972656, + "log_odds_ratio": -0.28768491744995117, + "loss": 0.3232, + "rejected_geometric_mean": -3.293553352355957, + "step": 1381 + }, + { + "chosen_geometric_mean": -1.0634561777114868, + "epoch": 0.34, + "grad_norm": 3.109375, + "learning_rate": 4.6516369991342925e-06, + "log_odds": 3.096222400665283, + "log_odds_ratio": -0.07048723101615906, + "loss": 0.3247, + "rejected_geometric_mean": -3.7573323249816895, + "step": 1382 + }, + { + "chosen_geometric_mean": -0.9964427351951599, + "epoch": 0.34, + "grad_norm": 4.90625, + "learning_rate": 4.651141154307253e-06, + "log_odds": 4.216732501983643, + "log_odds_ratio": -0.07266024500131607, + "loss": 0.3085, + "rejected_geometric_mean": -4.769020080566406, + "step": 1383 + }, + { + "chosen_geometric_mean": -1.2941715717315674, + "epoch": 0.34, + "grad_norm": 14.375, + "learning_rate": 4.650644983315579e-06, + "log_odds": 2.310441732406616, + "log_odds_ratio": -0.12238451838493347, + "loss": 0.3448, + "rejected_geometric_mean": -3.2580771446228027, + "step": 1384 + }, + { + "chosen_geometric_mean": -1.0818617343902588, + "epoch": 0.34, + "grad_norm": 10.4375, + "learning_rate": 4.6501484862345016e-06, + "log_odds": 1.9094288349151611, + "log_odds_ratio": -0.542000412940979, + "loss": 0.3492, + "rejected_geometric_mean": -2.8443667888641357, + "step": 1385 + }, + { + "chosen_geometric_mean": -1.3975961208343506, + "epoch": 0.34, + "grad_norm": 3.1875, + "learning_rate": 4.649651663139302e-06, + "log_odds": 1.9172031879425049, + "log_odds_ratio": -0.20932447910308838, + "loss": 0.3441, + "rejected_geometric_mean": -3.100170135498047, + "step": 1386 + }, + { + "chosen_geometric_mean": -1.5774049758911133, + "epoch": 0.34, + "grad_norm": 13.5, + "learning_rate": 4.64915451410531e-06, + "log_odds": 0.8072076439857483, + "log_odds_ratio": -0.4702173173427582, + "loss": 0.3662, + "rejected_geometric_mean": -2.33115553855896, + "step": 1387 + }, + { + "chosen_geometric_mean": -1.2273861169815063, + "epoch": 0.34, + "grad_norm": 7.28125, + "learning_rate": 4.648657039207906e-06, + "log_odds": 3.9665744304656982, + "log_odds_ratio": -0.09558653831481934, + "loss": 0.3041, + "rejected_geometric_mean": -4.862249851226807, + "step": 1388 + }, + { + "chosen_geometric_mean": -1.2304484844207764, + "epoch": 0.34, + "grad_norm": 4.28125, + "learning_rate": 4.648159238522518e-06, + "log_odds": 1.9398292303085327, + "log_odds_ratio": -0.2417762279510498, + "loss": 0.3204, + "rejected_geometric_mean": -2.941735029220581, + "step": 1389 + }, + { + "chosen_geometric_mean": -1.116571307182312, + "epoch": 0.34, + "grad_norm": 22.5, + "learning_rate": 4.647661112124626e-06, + "log_odds": 2.5515456199645996, + "log_odds_ratio": -0.27980726957321167, + "loss": 0.285, + "rejected_geometric_mean": -3.4678795337677, + "step": 1390 + }, + { + "chosen_geometric_mean": -0.9958896636962891, + "epoch": 0.34, + "grad_norm": 5.5, + "learning_rate": 4.647162660089756e-06, + "log_odds": 2.78863263130188, + "log_odds_ratio": -0.21827712655067444, + "loss": 0.3129, + "rejected_geometric_mean": -3.442837953567505, + "step": 1391 + }, + { + "chosen_geometric_mean": -1.174465537071228, + "epoch": 0.34, + "grad_norm": 3.40625, + "learning_rate": 4.646663882493487e-06, + "log_odds": 0.7981317639350891, + "log_odds_ratio": -0.3821180760860443, + "loss": 0.3004, + "rejected_geometric_mean": -1.788975477218628, + "step": 1392 + }, + { + "chosen_geometric_mean": -1.0851709842681885, + "epoch": 0.34, + "grad_norm": 3.390625, + "learning_rate": 4.646164779411445e-06, + "log_odds": 2.3818271160125732, + "log_odds_ratio": -0.18930166959762573, + "loss": 0.3191, + "rejected_geometric_mean": -3.1350934505462646, + "step": 1393 + }, + { + "chosen_geometric_mean": -1.0029277801513672, + "epoch": 0.35, + "grad_norm": 3.109375, + "learning_rate": 4.645665350919306e-06, + "log_odds": 2.458662986755371, + "log_odds_ratio": -0.27122563123703003, + "loss": 0.2952, + "rejected_geometric_mean": -3.1845297813415527, + "step": 1394 + }, + { + "chosen_geometric_mean": -1.1530815362930298, + "epoch": 0.35, + "grad_norm": 11.25, + "learning_rate": 4.645165597092796e-06, + "log_odds": 2.299771785736084, + "log_odds_ratio": -0.2504086196422577, + "loss": 0.295, + "rejected_geometric_mean": -3.13810396194458, + "step": 1395 + }, + { + "chosen_geometric_mean": -0.9974702596664429, + "epoch": 0.35, + "grad_norm": 13.0625, + "learning_rate": 4.644665518007689e-06, + "log_odds": 0.5909821391105652, + "log_odds_ratio": -0.8111361861228943, + "loss": 0.3372, + "rejected_geometric_mean": -1.7275331020355225, + "step": 1396 + }, + { + "chosen_geometric_mean": -1.198164701461792, + "epoch": 0.35, + "grad_norm": 3.515625, + "learning_rate": 4.644165113739808e-06, + "log_odds": 1.1287524700164795, + "log_odds_ratio": -0.3495214581489563, + "loss": 0.308, + "rejected_geometric_mean": -2.1360368728637695, + "step": 1397 + }, + { + "chosen_geometric_mean": -1.0435376167297363, + "epoch": 0.35, + "grad_norm": 23.375, + "learning_rate": 4.6436643843650285e-06, + "log_odds": 4.257663726806641, + "log_odds_ratio": -0.15085014700889587, + "loss": 0.3274, + "rejected_geometric_mean": -4.956542015075684, + "step": 1398 + }, + { + "chosen_geometric_mean": -0.9077863693237305, + "epoch": 0.35, + "grad_norm": 6.46875, + "learning_rate": 4.643163329959272e-06, + "log_odds": 2.033134937286377, + "log_odds_ratio": -0.3652347922325134, + "loss": 0.2849, + "rejected_geometric_mean": -2.670438766479492, + "step": 1399 + }, + { + "chosen_geometric_mean": -1.381584644317627, + "epoch": 0.35, + "grad_norm": 7.3125, + "learning_rate": 4.64266195059851e-06, + "log_odds": 3.924734592437744, + "log_odds_ratio": -0.2656053304672241, + "loss": 0.3224, + "rejected_geometric_mean": -5.102260589599609, + "step": 1400 + }, + { + "chosen_geometric_mean": -0.9938032031059265, + "epoch": 0.35, + "grad_norm": 4.5, + "learning_rate": 4.6421602463587636e-06, + "log_odds": 1.6271088123321533, + "log_odds_ratio": -0.4649182856082916, + "loss": 0.271, + "rejected_geometric_mean": -2.419029951095581, + "step": 1401 + }, + { + "chosen_geometric_mean": -1.0613276958465576, + "epoch": 0.35, + "grad_norm": 2.90625, + "learning_rate": 4.641658217316104e-06, + "log_odds": 0.38103175163269043, + "log_odds_ratio": -0.5247403383255005, + "loss": 0.2904, + "rejected_geometric_mean": -1.327103614807129, + "step": 1402 + }, + { + "chosen_geometric_mean": -1.182185411453247, + "epoch": 0.35, + "grad_norm": 3.21875, + "learning_rate": 4.641155863546651e-06, + "log_odds": 2.6246535778045654, + "log_odds_ratio": -0.31800907850265503, + "loss": 0.3554, + "rejected_geometric_mean": -3.5751700401306152, + "step": 1403 + }, + { + "chosen_geometric_mean": -1.2688297033309937, + "epoch": 0.35, + "grad_norm": 4.53125, + "learning_rate": 4.640653185126572e-06, + "log_odds": 0.8586835265159607, + "log_odds_ratio": -0.3689676821231842, + "loss": 0.382, + "rejected_geometric_mean": -1.9421255588531494, + "step": 1404 + }, + { + "chosen_geometric_mean": -1.0040907859802246, + "epoch": 0.35, + "grad_norm": 5.40625, + "learning_rate": 4.640150182132087e-06, + "log_odds": 3.7175440788269043, + "log_odds_ratio": -0.26250138878822327, + "loss": 0.2629, + "rejected_geometric_mean": -4.442347049713135, + "step": 1405 + }, + { + "chosen_geometric_mean": -1.0362199544906616, + "epoch": 0.35, + "grad_norm": 5.125, + "learning_rate": 4.639646854639461e-06, + "log_odds": 2.883476972579956, + "log_odds_ratio": -0.3265366554260254, + "loss": 0.3191, + "rejected_geometric_mean": -3.675930976867676, + "step": 1406 + }, + { + "chosen_geometric_mean": -1.3915680646896362, + "epoch": 0.35, + "grad_norm": 5.59375, + "learning_rate": 4.639143202725014e-06, + "log_odds": 3.117438554763794, + "log_odds_ratio": -0.2869589626789093, + "loss": 0.3443, + "rejected_geometric_mean": -4.321604251861572, + "step": 1407 + }, + { + "chosen_geometric_mean": -1.495029091835022, + "epoch": 0.35, + "grad_norm": 55.75, + "learning_rate": 4.638639226465108e-06, + "log_odds": 4.329499244689941, + "log_odds_ratio": -0.016043435782194138, + "loss": 0.4851, + "rejected_geometric_mean": -5.54591703414917, + "step": 1408 + }, + { + "chosen_geometric_mean": -1.0756447315216064, + "epoch": 0.35, + "grad_norm": 36.25, + "learning_rate": 4.63813492593616e-06, + "log_odds": 4.6965179443359375, + "log_odds_ratio": -0.14744915068149567, + "loss": 0.4113, + "rejected_geometric_mean": -5.465517997741699, + "step": 1409 + }, + { + "chosen_geometric_mean": -1.046170711517334, + "epoch": 0.35, + "grad_norm": 14.0625, + "learning_rate": 4.637630301214634e-06, + "log_odds": 3.2457644939422607, + "log_odds_ratio": -0.2743242681026459, + "loss": 0.323, + "rejected_geometric_mean": -4.013260841369629, + "step": 1410 + }, + { + "chosen_geometric_mean": -1.41164231300354, + "epoch": 0.35, + "grad_norm": 36.5, + "learning_rate": 4.637125352377042e-06, + "log_odds": 1.5072482824325562, + "log_odds_ratio": -0.34036821126937866, + "loss": 0.3703, + "rejected_geometric_mean": -2.763683795928955, + "step": 1411 + }, + { + "chosen_geometric_mean": -1.7748005390167236, + "epoch": 0.35, + "grad_norm": 66.0, + "learning_rate": 4.636620079499948e-06, + "log_odds": 3.47392201423645, + "log_odds_ratio": -0.20309889316558838, + "loss": 0.397, + "rejected_geometric_mean": -5.034399509429932, + "step": 1412 + }, + { + "chosen_geometric_mean": -0.8975827693939209, + "epoch": 0.35, + "grad_norm": 16.75, + "learning_rate": 4.6361144826599625e-06, + "log_odds": 1.8899779319763184, + "log_odds_ratio": -0.19095346331596375, + "loss": 0.3242, + "rejected_geometric_mean": -2.386756181716919, + "step": 1413 + }, + { + "chosen_geometric_mean": -0.9039343595504761, + "epoch": 0.35, + "grad_norm": 3.4375, + "learning_rate": 4.635608561933746e-06, + "log_odds": 5.526113510131836, + "log_odds_ratio": -0.15806010365486145, + "loss": 0.3088, + "rejected_geometric_mean": -6.006158828735352, + "step": 1414 + }, + { + "chosen_geometric_mean": -0.7819664478302002, + "epoch": 0.35, + "grad_norm": 3.3125, + "learning_rate": 4.6351023173980095e-06, + "log_odds": 2.54500412940979, + "log_odds_ratio": -0.20438306033611298, + "loss": 0.2703, + "rejected_geometric_mean": -2.9069557189941406, + "step": 1415 + }, + { + "chosen_geometric_mean": -1.1574442386627197, + "epoch": 0.35, + "grad_norm": 2.875, + "learning_rate": 4.63459574912951e-06, + "log_odds": 1.7917380332946777, + "log_odds_ratio": -0.320252001285553, + "loss": 0.2679, + "rejected_geometric_mean": -2.700127124786377, + "step": 1416 + }, + { + "chosen_geometric_mean": -0.9833818674087524, + "epoch": 0.35, + "grad_norm": 6.21875, + "learning_rate": 4.634088857205056e-06, + "log_odds": 3.08270001411438, + "log_odds_ratio": -0.14454978704452515, + "loss": 0.3072, + "rejected_geometric_mean": -3.6169707775115967, + "step": 1417 + }, + { + "chosen_geometric_mean": -1.1047847270965576, + "epoch": 0.35, + "grad_norm": 3.953125, + "learning_rate": 4.633581641701506e-06, + "log_odds": 0.052247554063797, + "log_odds_ratio": -0.7097718715667725, + "loss": 0.3539, + "rejected_geometric_mean": -1.1690092086791992, + "step": 1418 + }, + { + "chosen_geometric_mean": -1.1773433685302734, + "epoch": 0.35, + "grad_norm": 24.5, + "learning_rate": 4.633074102695763e-06, + "log_odds": 3.49696683883667, + "log_odds_ratio": -0.1837363988161087, + "loss": 0.2725, + "rejected_geometric_mean": -4.428374290466309, + "step": 1419 + }, + { + "chosen_geometric_mean": -1.0552899837493896, + "epoch": 0.35, + "grad_norm": 3.375, + "learning_rate": 4.632566240264785e-06, + "log_odds": 2.4273080825805664, + "log_odds_ratio": -0.3152357041835785, + "loss": 0.3368, + "rejected_geometric_mean": -3.2236790657043457, + "step": 1420 + }, + { + "chosen_geometric_mean": -1.1943330764770508, + "epoch": 0.35, + "grad_norm": 17.75, + "learning_rate": 4.632058054485575e-06, + "log_odds": 0.7403641939163208, + "log_odds_ratio": -0.49466678500175476, + "loss": 0.3108, + "rejected_geometric_mean": -1.7920172214508057, + "step": 1421 + }, + { + "chosen_geometric_mean": -1.031801462173462, + "epoch": 0.35, + "grad_norm": 7.65625, + "learning_rate": 4.631549545435186e-06, + "log_odds": 3.85274338722229, + "log_odds_ratio": -0.35596218705177307, + "loss": 0.3113, + "rejected_geometric_mean": -4.6604743003845215, + "step": 1422 + }, + { + "chosen_geometric_mean": -0.9857476949691772, + "epoch": 0.35, + "grad_norm": 3.0, + "learning_rate": 4.631040713190721e-06, + "log_odds": 1.3332380056381226, + "log_odds_ratio": -0.37013983726501465, + "loss": 0.284, + "rejected_geometric_mean": -2.0707693099975586, + "step": 1423 + }, + { + "chosen_geometric_mean": -1.3871026039123535, + "epoch": 0.35, + "grad_norm": 3.15625, + "learning_rate": 4.63053155782933e-06, + "log_odds": 3.8827855587005615, + "log_odds_ratio": -0.20070263743400574, + "loss": 0.2989, + "rejected_geometric_mean": -5.048107624053955, + "step": 1424 + }, + { + "chosen_geometric_mean": -1.1386209726333618, + "epoch": 0.35, + "grad_norm": 3.0, + "learning_rate": 4.630022079428214e-06, + "log_odds": 1.3342574834823608, + "log_odds_ratio": -0.4210759103298187, + "loss": 0.3133, + "rejected_geometric_mean": -2.298657178878784, + "step": 1425 + }, + { + "chosen_geometric_mean": -1.2888606786727905, + "epoch": 0.35, + "grad_norm": 5.1875, + "learning_rate": 4.62951227806462e-06, + "log_odds": 1.9046822786331177, + "log_odds_ratio": -0.49956023693084717, + "loss": 0.3319, + "rejected_geometric_mean": -3.1175785064697266, + "step": 1426 + }, + { + "chosen_geometric_mean": -1.094012975692749, + "epoch": 0.35, + "grad_norm": 15.5, + "learning_rate": 4.62900215381585e-06, + "log_odds": 0.4657946228981018, + "log_odds_ratio": -0.5337790250778198, + "loss": 0.3084, + "rejected_geometric_mean": -1.4535021781921387, + "step": 1427 + }, + { + "chosen_geometric_mean": -1.2220916748046875, + "epoch": 0.35, + "grad_norm": 17.125, + "learning_rate": 4.628491706759249e-06, + "log_odds": 2.433363437652588, + "log_odds_ratio": -0.45914146304130554, + "loss": 0.3177, + "rejected_geometric_mean": -3.4945926666259766, + "step": 1428 + }, + { + "chosen_geometric_mean": -1.1638048887252808, + "epoch": 0.35, + "grad_norm": 2.625, + "learning_rate": 4.6279809369722124e-06, + "log_odds": 4.135611534118652, + "log_odds_ratio": -0.33707496523857117, + "loss": 0.3277, + "rejected_geometric_mean": -5.090388298034668, + "step": 1429 + }, + { + "chosen_geometric_mean": -1.105266809463501, + "epoch": 0.35, + "grad_norm": 2.84375, + "learning_rate": 4.627469844532187e-06, + "log_odds": 2.5965001583099365, + "log_odds_ratio": -0.15202096104621887, + "loss": 0.3079, + "rejected_geometric_mean": -3.369807243347168, + "step": 1430 + }, + { + "chosen_geometric_mean": -1.1641799211502075, + "epoch": 0.35, + "grad_norm": 5.625, + "learning_rate": 4.6269584295166645e-06, + "log_odds": 3.0478312969207764, + "log_odds_ratio": -0.16499732434749603, + "loss": 0.2992, + "rejected_geometric_mean": -3.930093288421631, + "step": 1431 + }, + { + "chosen_geometric_mean": -1.4218454360961914, + "epoch": 0.35, + "grad_norm": 15.9375, + "learning_rate": 4.626446692003189e-06, + "log_odds": 1.139992356300354, + "log_odds_ratio": -0.33182960748672485, + "loss": 0.2965, + "rejected_geometric_mean": -2.394585371017456, + "step": 1432 + }, + { + "chosen_geometric_mean": -1.2636520862579346, + "epoch": 0.35, + "grad_norm": 3.84375, + "learning_rate": 4.625934632069353e-06, + "log_odds": 1.0442200899124146, + "log_odds_ratio": -0.4475743770599365, + "loss": 0.3165, + "rejected_geometric_mean": -2.2144970893859863, + "step": 1433 + }, + { + "chosen_geometric_mean": -1.027129888534546, + "epoch": 0.36, + "grad_norm": 4.9375, + "learning_rate": 4.625422249792794e-06, + "log_odds": 0.9415104389190674, + "log_odds_ratio": -0.38312429189682007, + "loss": 0.3231, + "rejected_geometric_mean": -1.761306881904602, + "step": 1434 + }, + { + "chosen_geometric_mean": -1.1129279136657715, + "epoch": 0.36, + "grad_norm": 3.21875, + "learning_rate": 4.624909545251205e-06, + "log_odds": 3.5926120281219482, + "log_odds_ratio": -0.37863898277282715, + "loss": 0.3176, + "rejected_geometric_mean": -4.521854877471924, + "step": 1435 + }, + { + "chosen_geometric_mean": -1.5635499954223633, + "epoch": 0.36, + "grad_norm": 12.25, + "learning_rate": 4.6243965185223224e-06, + "log_odds": 0.19608530402183533, + "log_odds_ratio": -0.6913924813270569, + "loss": 0.4004, + "rejected_geometric_mean": -1.7092669010162354, + "step": 1436 + }, + { + "chosen_geometric_mean": -1.1456718444824219, + "epoch": 0.36, + "grad_norm": 2.5, + "learning_rate": 4.6238831696839335e-06, + "log_odds": 4.006965637207031, + "log_odds_ratio": -0.23309479653835297, + "loss": 0.3452, + "rejected_geometric_mean": -4.884618759155273, + "step": 1437 + }, + { + "chosen_geometric_mean": -0.9399720430374146, + "epoch": 0.36, + "grad_norm": 4.03125, + "learning_rate": 4.623369498813875e-06, + "log_odds": 0.6279305219650269, + "log_odds_ratio": -0.4652565121650696, + "loss": 0.2971, + "rejected_geometric_mean": -1.3699995279312134, + "step": 1438 + }, + { + "chosen_geometric_mean": -1.0537779331207275, + "epoch": 0.36, + "grad_norm": 3.140625, + "learning_rate": 4.622855505990031e-06, + "log_odds": 5.717514514923096, + "log_odds_ratio": -0.009222380816936493, + "loss": 0.2983, + "rejected_geometric_mean": -6.308180332183838, + "step": 1439 + }, + { + "chosen_geometric_mean": -0.9459221363067627, + "epoch": 0.36, + "grad_norm": 5.6875, + "learning_rate": 4.622341191290336e-06, + "log_odds": 4.242349147796631, + "log_odds_ratio": -0.22228822112083435, + "loss": 0.3132, + "rejected_geometric_mean": -4.841825008392334, + "step": 1440 + }, + { + "chosen_geometric_mean": -1.3839486837387085, + "epoch": 0.36, + "grad_norm": 26.875, + "learning_rate": 4.621826554792771e-06, + "log_odds": 3.31015682220459, + "log_odds_ratio": -0.17601816356182098, + "loss": 0.3126, + "rejected_geometric_mean": -4.461277008056641, + "step": 1441 + }, + { + "chosen_geometric_mean": -1.100510835647583, + "epoch": 0.36, + "grad_norm": 30.75, + "learning_rate": 4.6213115965753686e-06, + "log_odds": 4.469616889953613, + "log_odds_ratio": -0.29321804642677307, + "loss": 0.345, + "rejected_geometric_mean": -5.296463489532471, + "step": 1442 + }, + { + "chosen_geometric_mean": -1.6027668714523315, + "epoch": 0.36, + "grad_norm": 24.0, + "learning_rate": 4.6207963167162075e-06, + "log_odds": 2.878032684326172, + "log_odds_ratio": -0.16917391121387482, + "loss": 0.3193, + "rejected_geometric_mean": -4.293848991394043, + "step": 1443 + }, + { + "chosen_geometric_mean": -1.7154066562652588, + "epoch": 0.36, + "grad_norm": 23.125, + "learning_rate": 4.620280715293419e-06, + "log_odds": 3.802940607070923, + "log_odds_ratio": -0.17046189308166504, + "loss": 0.3307, + "rejected_geometric_mean": -5.322514057159424, + "step": 1444 + }, + { + "chosen_geometric_mean": -1.0671981573104858, + "epoch": 0.36, + "grad_norm": 6.125, + "learning_rate": 4.619764792385178e-06, + "log_odds": 6.121645450592041, + "log_odds_ratio": -0.09835954010486603, + "loss": 0.3637, + "rejected_geometric_mean": -6.825639247894287, + "step": 1445 + }, + { + "chosen_geometric_mean": -1.3397395610809326, + "epoch": 0.36, + "grad_norm": 38.75, + "learning_rate": 4.619248548069712e-06, + "log_odds": 1.9097261428833008, + "log_odds_ratio": -0.393159955739975, + "loss": 0.4116, + "rejected_geometric_mean": -3.134749174118042, + "step": 1446 + }, + { + "chosen_geometric_mean": -1.1295322179794312, + "epoch": 0.36, + "grad_norm": 23.5, + "learning_rate": 4.618731982425295e-06, + "log_odds": 2.6939611434936523, + "log_odds_ratio": -0.35778331756591797, + "loss": 0.3507, + "rejected_geometric_mean": -3.63497257232666, + "step": 1447 + }, + { + "chosen_geometric_mean": -1.2990261316299438, + "epoch": 0.36, + "grad_norm": 11.9375, + "learning_rate": 4.618215095530253e-06, + "log_odds": 0.7307778596878052, + "log_odds_ratio": -0.7262084484100342, + "loss": 0.3381, + "rejected_geometric_mean": -1.8637580871582031, + "step": 1448 + }, + { + "chosen_geometric_mean": -1.4253287315368652, + "epoch": 0.36, + "grad_norm": 9.0, + "learning_rate": 4.617697887462956e-06, + "log_odds": 2.806750774383545, + "log_odds_ratio": -0.3127865493297577, + "loss": 0.3448, + "rejected_geometric_mean": -4.094038009643555, + "step": 1449 + }, + { + "chosen_geometric_mean": -1.2358477115631104, + "epoch": 0.36, + "grad_norm": 5.09375, + "learning_rate": 4.617180358301827e-06, + "log_odds": 1.6574749946594238, + "log_odds_ratio": -0.22708699107170105, + "loss": 0.3065, + "rejected_geometric_mean": -2.64894437789917, + "step": 1450 + }, + { + "chosen_geometric_mean": -1.0619628429412842, + "epoch": 0.36, + "grad_norm": 24.125, + "learning_rate": 4.616662508125336e-06, + "log_odds": 3.3232882022857666, + "log_odds_ratio": -0.32551389932632446, + "loss": 0.3453, + "rejected_geometric_mean": -4.135101795196533, + "step": 1451 + }, + { + "chosen_geometric_mean": -1.3702996969223022, + "epoch": 0.36, + "grad_norm": 2.9375, + "learning_rate": 4.616144337011998e-06, + "log_odds": 2.4995875358581543, + "log_odds_ratio": -0.27456021308898926, + "loss": 0.3734, + "rejected_geometric_mean": -3.7129390239715576, + "step": 1452 + }, + { + "chosen_geometric_mean": -1.1256141662597656, + "epoch": 0.36, + "grad_norm": 7.84375, + "learning_rate": 4.615625845040385e-06, + "log_odds": 1.252434492111206, + "log_odds_ratio": -0.2924342155456543, + "loss": 0.2962, + "rejected_geometric_mean": -2.1472020149230957, + "step": 1453 + }, + { + "chosen_geometric_mean": -0.9861263036727905, + "epoch": 0.36, + "grad_norm": 3.375, + "learning_rate": 4.615107032289109e-06, + "log_odds": 1.0156729221343994, + "log_odds_ratio": -0.37802398204803467, + "loss": 0.2792, + "rejected_geometric_mean": -1.7890900373458862, + "step": 1454 + }, + { + "chosen_geometric_mean": -1.1922435760498047, + "epoch": 0.36, + "grad_norm": 4.0625, + "learning_rate": 4.614587898836837e-06, + "log_odds": 2.876333475112915, + "log_odds_ratio": -0.20629800856113434, + "loss": 0.2995, + "rejected_geometric_mean": -3.8080663681030273, + "step": 1455 + }, + { + "chosen_geometric_mean": -1.1310808658599854, + "epoch": 0.36, + "grad_norm": 36.75, + "learning_rate": 4.61406844476228e-06, + "log_odds": 2.202515125274658, + "log_odds_ratio": -0.3248080015182495, + "loss": 0.2756, + "rejected_geometric_mean": -3.0951805114746094, + "step": 1456 + }, + { + "chosen_geometric_mean": -1.2797902822494507, + "epoch": 0.36, + "grad_norm": 7.09375, + "learning_rate": 4.613548670144202e-06, + "log_odds": 2.3374364376068115, + "log_odds_ratio": -0.22611485421657562, + "loss": 0.2842, + "rejected_geometric_mean": -3.3760793209075928, + "step": 1457 + }, + { + "chosen_geometric_mean": -0.9585232734680176, + "epoch": 0.36, + "grad_norm": 10.4375, + "learning_rate": 4.613028575061412e-06, + "log_odds": 3.607254981994629, + "log_odds_ratio": -0.5496553182601929, + "loss": 0.3244, + "rejected_geometric_mean": -4.526525974273682, + "step": 1458 + }, + { + "chosen_geometric_mean": -1.1159604787826538, + "epoch": 0.36, + "grad_norm": 11.625, + "learning_rate": 4.612508159592769e-06, + "log_odds": 1.0080755949020386, + "log_odds_ratio": -0.49602246284484863, + "loss": 0.3312, + "rejected_geometric_mean": -2.016479015350342, + "step": 1459 + }, + { + "chosen_geometric_mean": -1.1674964427947998, + "epoch": 0.36, + "grad_norm": 3.890625, + "learning_rate": 4.611987423817181e-06, + "log_odds": 0.787864625453949, + "log_odds_ratio": -0.4963518977165222, + "loss": 0.2953, + "rejected_geometric_mean": -1.8403836488723755, + "step": 1460 + }, + { + "chosen_geometric_mean": -1.7332350015640259, + "epoch": 0.36, + "grad_norm": 23.5, + "learning_rate": 4.6114663678136026e-06, + "log_odds": 3.918994903564453, + "log_odds_ratio": -0.44864025712013245, + "loss": 0.3443, + "rejected_geometric_mean": -5.513606071472168, + "step": 1461 + }, + { + "chosen_geometric_mean": -1.1060283184051514, + "epoch": 0.36, + "grad_norm": 10.75, + "learning_rate": 4.61094499166104e-06, + "log_odds": 1.4255290031433105, + "log_odds_ratio": -0.4309540390968323, + "loss": 0.3091, + "rejected_geometric_mean": -2.3950181007385254, + "step": 1462 + }, + { + "chosen_geometric_mean": -1.136723279953003, + "epoch": 0.36, + "grad_norm": 6.0625, + "learning_rate": 4.610423295438546e-06, + "log_odds": 4.113787651062012, + "log_odds_ratio": -0.23342810571193695, + "loss": 0.2658, + "rejected_geometric_mean": -4.9850382804870605, + "step": 1463 + }, + { + "chosen_geometric_mean": -1.1546839475631714, + "epoch": 0.36, + "grad_norm": 7.75, + "learning_rate": 4.609901279225222e-06, + "log_odds": 2.553309202194214, + "log_odds_ratio": -0.3624350428581238, + "loss": 0.3384, + "rejected_geometric_mean": -3.5244736671447754, + "step": 1464 + }, + { + "chosen_geometric_mean": -1.0319623947143555, + "epoch": 0.36, + "grad_norm": 3.046875, + "learning_rate": 4.6093789431002165e-06, + "log_odds": 3.0047240257263184, + "log_odds_ratio": -0.25803929567337036, + "loss": 0.2838, + "rejected_geometric_mean": -3.7828261852264404, + "step": 1465 + }, + { + "chosen_geometric_mean": -0.935274064540863, + "epoch": 0.36, + "grad_norm": 26.5, + "learning_rate": 4.608856287142731e-06, + "log_odds": 5.703431129455566, + "log_odds_ratio": -0.01330349501222372, + "loss": 0.3373, + "rejected_geometric_mean": -6.123660087585449, + "step": 1466 + }, + { + "chosen_geometric_mean": -1.1420867443084717, + "epoch": 0.36, + "grad_norm": 2.71875, + "learning_rate": 4.608333311432012e-06, + "log_odds": 1.9704787731170654, + "log_odds_ratio": -0.25532400608062744, + "loss": 0.3246, + "rejected_geometric_mean": -2.8587045669555664, + "step": 1467 + }, + { + "chosen_geometric_mean": -1.019932746887207, + "epoch": 0.36, + "grad_norm": 9.4375, + "learning_rate": 4.607810016047354e-06, + "log_odds": 0.8577715754508972, + "log_odds_ratio": -0.4314608573913574, + "loss": 0.3262, + "rejected_geometric_mean": -1.7290486097335815, + "step": 1468 + }, + { + "chosen_geometric_mean": -1.0278801918029785, + "epoch": 0.36, + "grad_norm": 4.21875, + "learning_rate": 4.607286401068103e-06, + "log_odds": 1.1187553405761719, + "log_odds_ratio": -0.3619157373905182, + "loss": 0.3055, + "rejected_geometric_mean": -1.9682438373565674, + "step": 1469 + }, + { + "chosen_geometric_mean": -1.1859067678451538, + "epoch": 0.36, + "grad_norm": 2.84375, + "learning_rate": 4.6067624665736485e-06, + "log_odds": 1.697790265083313, + "log_odds_ratio": -0.4359515309333801, + "loss": 0.4106, + "rejected_geometric_mean": -2.735109806060791, + "step": 1470 + }, + { + "chosen_geometric_mean": -1.1879603862762451, + "epoch": 0.36, + "grad_norm": 14.625, + "learning_rate": 4.606238212643436e-06, + "log_odds": 4.162668704986572, + "log_odds_ratio": -0.11595934629440308, + "loss": 0.3099, + "rejected_geometric_mean": -4.999417781829834, + "step": 1471 + }, + { + "chosen_geometric_mean": -1.1314611434936523, + "epoch": 0.36, + "grad_norm": 8.9375, + "learning_rate": 4.6057136393569505e-06, + "log_odds": 3.541705369949341, + "log_odds_ratio": -0.16821202635765076, + "loss": 0.3389, + "rejected_geometric_mean": -4.337607383728027, + "step": 1472 + }, + { + "chosen_geometric_mean": -1.188661813735962, + "epoch": 0.36, + "grad_norm": 4.03125, + "learning_rate": 4.6051887467937326e-06, + "log_odds": 3.295590400695801, + "log_odds_ratio": -0.31608808040618896, + "loss": 0.295, + "rejected_geometric_mean": -4.28494930267334, + "step": 1473 + }, + { + "chosen_geometric_mean": -1.1117725372314453, + "epoch": 0.36, + "grad_norm": 23.0, + "learning_rate": 4.604663535033368e-06, + "log_odds": 2.1041958332061768, + "log_odds_ratio": -0.34342262148857117, + "loss": 0.2819, + "rejected_geometric_mean": -2.972607374191284, + "step": 1474 + }, + { + "chosen_geometric_mean": -1.9557560682296753, + "epoch": 0.37, + "grad_norm": 9.4375, + "learning_rate": 4.6041380041554914e-06, + "log_odds": 3.7835183143615723, + "log_odds_ratio": -0.19642165303230286, + "loss": 0.3502, + "rejected_geometric_mean": -5.588187217712402, + "step": 1475 + }, + { + "chosen_geometric_mean": -1.5307607650756836, + "epoch": 0.37, + "grad_norm": 28.625, + "learning_rate": 4.603612154239786e-06, + "log_odds": 2.185267448425293, + "log_odds_ratio": -0.1817542314529419, + "loss": 0.3805, + "rejected_geometric_mean": -3.4406018257141113, + "step": 1476 + }, + { + "chosen_geometric_mean": -0.9587987661361694, + "epoch": 0.37, + "grad_norm": 2.9375, + "learning_rate": 4.603085985365982e-06, + "log_odds": 2.8006479740142822, + "log_odds_ratio": -0.34645307064056396, + "loss": 0.314, + "rejected_geometric_mean": -3.4775452613830566, + "step": 1477 + }, + { + "chosen_geometric_mean": -1.0415661334991455, + "epoch": 0.37, + "grad_norm": 2.5625, + "learning_rate": 4.602559497613861e-06, + "log_odds": 0.712054967880249, + "log_odds_ratio": -0.547055721282959, + "loss": 0.3075, + "rejected_geometric_mean": -1.6543352603912354, + "step": 1478 + }, + { + "chosen_geometric_mean": -1.2506461143493652, + "epoch": 0.37, + "grad_norm": 6.15625, + "learning_rate": 4.602032691063251e-06, + "log_odds": 2.6069178581237793, + "log_odds_ratio": -0.1988842934370041, + "loss": 0.2889, + "rejected_geometric_mean": -3.542881488800049, + "step": 1479 + }, + { + "chosen_geometric_mean": -0.9812649488449097, + "epoch": 0.37, + "grad_norm": 5.53125, + "learning_rate": 4.601505565794027e-06, + "log_odds": 1.4725306034088135, + "log_odds_ratio": -0.4179133474826813, + "loss": 0.2983, + "rejected_geometric_mean": -2.210866689682007, + "step": 1480 + }, + { + "chosen_geometric_mean": -1.283879041671753, + "epoch": 0.37, + "grad_norm": 4.28125, + "learning_rate": 4.600978121886116e-06, + "log_odds": 0.32305145263671875, + "log_odds_ratio": -0.5503706336021423, + "loss": 0.3066, + "rejected_geometric_mean": -1.520157814025879, + "step": 1481 + }, + { + "chosen_geometric_mean": -1.0795104503631592, + "epoch": 0.37, + "grad_norm": 3.5, + "learning_rate": 4.600450359419489e-06, + "log_odds": 0.9268131256103516, + "log_odds_ratio": -0.3824394941329956, + "loss": 0.3083, + "rejected_geometric_mean": -1.827045202255249, + "step": 1482 + }, + { + "chosen_geometric_mean": -1.349550485610962, + "epoch": 0.37, + "grad_norm": 5.8125, + "learning_rate": 4.5999222784741685e-06, + "log_odds": 0.5176019668579102, + "log_odds_ratio": -0.48425978422164917, + "loss": 0.3121, + "rejected_geometric_mean": -1.7578649520874023, + "step": 1483 + }, + { + "chosen_geometric_mean": -0.9885033965110779, + "epoch": 0.37, + "grad_norm": 3.40625, + "learning_rate": 4.599393879130225e-06, + "log_odds": 1.7656935453414917, + "log_odds_ratio": -0.46784961223602295, + "loss": 0.3091, + "rejected_geometric_mean": -2.579251766204834, + "step": 1484 + }, + { + "chosen_geometric_mean": -1.0489494800567627, + "epoch": 0.37, + "grad_norm": 2.515625, + "learning_rate": 4.598865161467775e-06, + "log_odds": 0.5381725430488586, + "log_odds_ratio": -0.5325372815132141, + "loss": 0.3044, + "rejected_geometric_mean": -1.4968748092651367, + "step": 1485 + }, + { + "chosen_geometric_mean": -1.068974494934082, + "epoch": 0.37, + "grad_norm": 2.671875, + "learning_rate": 4.598336125566986e-06, + "log_odds": 1.0520650148391724, + "log_odds_ratio": -0.35686081647872925, + "loss": 0.2676, + "rejected_geometric_mean": -1.9418957233428955, + "step": 1486 + }, + { + "chosen_geometric_mean": -1.2313854694366455, + "epoch": 0.37, + "grad_norm": 4.5625, + "learning_rate": 4.597806771508072e-06, + "log_odds": 1.9064995050430298, + "log_odds_ratio": -0.3696022629737854, + "loss": 0.3275, + "rejected_geometric_mean": -2.962824583053589, + "step": 1487 + }, + { + "chosen_geometric_mean": -1.1653473377227783, + "epoch": 0.37, + "grad_norm": 3.203125, + "learning_rate": 4.597277099371295e-06, + "log_odds": 4.794358730316162, + "log_odds_ratio": -0.02606082335114479, + "loss": 0.2368, + "rejected_geometric_mean": -5.578413963317871, + "step": 1488 + }, + { + "chosen_geometric_mean": -1.0450091361999512, + "epoch": 0.37, + "grad_norm": 5.125, + "learning_rate": 4.5967471092369686e-06, + "log_odds": 6.771695137023926, + "log_odds_ratio": -0.11655769497156143, + "loss": 0.2859, + "rejected_geometric_mean": -7.388764381408691, + "step": 1489 + }, + { + "chosen_geometric_mean": -1.2132624387741089, + "epoch": 0.37, + "grad_norm": 9.875, + "learning_rate": 4.59621680118545e-06, + "log_odds": 2.852810859680176, + "log_odds_ratio": -0.2926008701324463, + "loss": 0.3039, + "rejected_geometric_mean": -3.842711925506592, + "step": 1490 + }, + { + "chosen_geometric_mean": -1.1355338096618652, + "epoch": 0.37, + "grad_norm": 6.53125, + "learning_rate": 4.595686175297146e-06, + "log_odds": 4.988010883331299, + "log_odds_ratio": -0.28386053442955017, + "loss": 0.345, + "rejected_geometric_mean": -5.85849142074585, + "step": 1491 + }, + { + "chosen_geometric_mean": -2.5530548095703125, + "epoch": 0.37, + "grad_norm": 63.75, + "learning_rate": 4.595155231652513e-06, + "log_odds": -0.701549768447876, + "log_odds_ratio": -1.2648760080337524, + "loss": 0.4571, + "rejected_geometric_mean": -1.8374996185302734, + "step": 1492 + }, + { + "chosen_geometric_mean": -1.0329662561416626, + "epoch": 0.37, + "grad_norm": 37.75, + "learning_rate": 4.594623970332056e-06, + "log_odds": 4.996891975402832, + "log_odds_ratio": -0.13703703880310059, + "loss": 0.2983, + "rejected_geometric_mean": -5.6382927894592285, + "step": 1493 + }, + { + "chosen_geometric_mean": -1.3422861099243164, + "epoch": 0.37, + "grad_norm": 21.5, + "learning_rate": 4.594092391416325e-06, + "log_odds": 5.146800518035889, + "log_odds_ratio": -0.045473746955394745, + "loss": 0.3002, + "rejected_geometric_mean": -6.198325157165527, + "step": 1494 + }, + { + "chosen_geometric_mean": -1.014627456665039, + "epoch": 0.37, + "grad_norm": 53.25, + "learning_rate": 4.593560494985921e-06, + "log_odds": 5.600971221923828, + "log_odds_ratio": -0.055386822670698166, + "loss": 0.3466, + "rejected_geometric_mean": -6.198751449584961, + "step": 1495 + }, + { + "chosen_geometric_mean": -1.130090594291687, + "epoch": 0.37, + "grad_norm": 14.375, + "learning_rate": 4.593028281121494e-06, + "log_odds": 7.852907657623291, + "log_odds_ratio": -0.008345144800841808, + "loss": 0.3542, + "rejected_geometric_mean": -8.594594955444336, + "step": 1496 + }, + { + "chosen_geometric_mean": -1.386603832244873, + "epoch": 0.37, + "grad_norm": 57.75, + "learning_rate": 4.592495749903736e-06, + "log_odds": 9.189559936523438, + "log_odds_ratio": -0.13961055874824524, + "loss": 0.423, + "rejected_geometric_mean": -10.30351448059082, + "step": 1497 + }, + { + "chosen_geometric_mean": -1.013298511505127, + "epoch": 0.37, + "grad_norm": 6.5625, + "learning_rate": 4.591962901413396e-06, + "log_odds": 0.5508179664611816, + "log_odds_ratio": -0.5111557841300964, + "loss": 0.331, + "rejected_geometric_mean": -1.4367547035217285, + "step": 1498 + }, + { + "chosen_geometric_mean": -1.0986441373825073, + "epoch": 0.37, + "grad_norm": 5.6875, + "learning_rate": 4.5914297357312645e-06, + "log_odds": 5.809285640716553, + "log_odds_ratio": -0.14158910512924194, + "loss": 0.3027, + "rejected_geometric_mean": -6.577139854431152, + "step": 1499 + }, + { + "chosen_geometric_mean": -1.1697603464126587, + "epoch": 0.37, + "grad_norm": 3.6875, + "learning_rate": 4.590896252938183e-06, + "log_odds": 2.4760711193084717, + "log_odds_ratio": -0.2700382471084595, + "loss": 0.2812, + "rejected_geometric_mean": -3.385720729827881, + "step": 1500 + }, + { + "chosen_geometric_mean": -1.0857220888137817, + "epoch": 0.37, + "grad_norm": 4.40625, + "learning_rate": 4.590362453115039e-06, + "log_odds": 4.670676231384277, + "log_odds_ratio": -0.0814281776547432, + "loss": 0.2223, + "rejected_geometric_mean": -5.334166049957275, + "step": 1501 + }, + { + "chosen_geometric_mean": -1.337758183479309, + "epoch": 0.37, + "grad_norm": 6.34375, + "learning_rate": 4.5898283363427705e-06, + "log_odds": 6.806110382080078, + "log_odds_ratio": -0.01562603749334812, + "loss": 0.2783, + "rejected_geometric_mean": -7.787198066711426, + "step": 1502 + }, + { + "chosen_geometric_mean": -1.411303997039795, + "epoch": 0.37, + "grad_norm": 32.75, + "learning_rate": 4.589293902702362e-06, + "log_odds": 5.4840192794799805, + "log_odds_ratio": -0.18884864449501038, + "loss": 0.3133, + "rejected_geometric_mean": -6.713677883148193, + "step": 1503 + }, + { + "chosen_geometric_mean": -1.1368635892868042, + "epoch": 0.37, + "grad_norm": 11.75, + "learning_rate": 4.588759152274847e-06, + "log_odds": 2.4991791248321533, + "log_odds_ratio": -0.28075623512268066, + "loss": 0.2548, + "rejected_geometric_mean": -3.4169657230377197, + "step": 1504 + }, + { + "chosen_geometric_mean": -0.9936081171035767, + "epoch": 0.37, + "grad_norm": 2.6875, + "learning_rate": 4.588224085141306e-06, + "log_odds": 3.630675792694092, + "log_odds_ratio": -0.23261132836341858, + "loss": 0.2922, + "rejected_geometric_mean": -4.318724632263184, + "step": 1505 + }, + { + "chosen_geometric_mean": -1.135224461555481, + "epoch": 0.37, + "grad_norm": 16.25, + "learning_rate": 4.587688701382867e-06, + "log_odds": 1.480989694595337, + "log_odds_ratio": -0.2991344928741455, + "loss": 0.3612, + "rejected_geometric_mean": -2.391955852508545, + "step": 1506 + }, + { + "chosen_geometric_mean": -0.801735520362854, + "epoch": 0.37, + "grad_norm": 14.125, + "learning_rate": 4.58715300108071e-06, + "log_odds": 3.7916746139526367, + "log_odds_ratio": -0.24529139697551727, + "loss": 0.3008, + "rejected_geometric_mean": -4.173245429992676, + "step": 1507 + }, + { + "chosen_geometric_mean": -1.2961153984069824, + "epoch": 0.37, + "grad_norm": 10.8125, + "learning_rate": 4.586616984316058e-06, + "log_odds": 2.571043014526367, + "log_odds_ratio": -0.29467737674713135, + "loss": 0.3408, + "rejected_geometric_mean": -3.697512149810791, + "step": 1508 + }, + { + "chosen_geometric_mean": -1.0472952127456665, + "epoch": 0.37, + "grad_norm": 13.4375, + "learning_rate": 4.5860806511701836e-06, + "log_odds": 4.956719398498535, + "log_odds_ratio": -0.12308313697576523, + "loss": 0.2745, + "rejected_geometric_mean": -5.657515525817871, + "step": 1509 + }, + { + "chosen_geometric_mean": -1.3814448118209839, + "epoch": 0.37, + "grad_norm": 16.875, + "learning_rate": 4.585544001724408e-06, + "log_odds": 3.544369697570801, + "log_odds_ratio": -0.43743202090263367, + "loss": 0.2903, + "rejected_geometric_mean": -4.807173728942871, + "step": 1510 + }, + { + "chosen_geometric_mean": -1.1106600761413574, + "epoch": 0.37, + "grad_norm": 6.96875, + "learning_rate": 4.585007036060101e-06, + "log_odds": 1.830551266670227, + "log_odds_ratio": -0.18412479758262634, + "loss": 0.345, + "rejected_geometric_mean": -2.636711597442627, + "step": 1511 + }, + { + "chosen_geometric_mean": -1.0283012390136719, + "epoch": 0.37, + "grad_norm": 10.3125, + "learning_rate": 4.584469754258679e-06, + "log_odds": 0.9720558524131775, + "log_odds_ratio": -0.4565253257751465, + "loss": 0.3552, + "rejected_geometric_mean": -1.8227078914642334, + "step": 1512 + }, + { + "chosen_geometric_mean": -1.1418813467025757, + "epoch": 0.37, + "grad_norm": 2.4375, + "learning_rate": 4.583932156401607e-06, + "log_odds": 2.466088056564331, + "log_odds_ratio": -0.43479761481285095, + "loss": 0.3193, + "rejected_geometric_mean": -3.4080917835235596, + "step": 1513 + }, + { + "chosen_geometric_mean": -1.079543113708496, + "epoch": 0.37, + "grad_norm": 3.0625, + "learning_rate": 4.583394242570397e-06, + "log_odds": 1.5509356260299683, + "log_odds_ratio": -0.490561306476593, + "loss": 0.2632, + "rejected_geometric_mean": -2.555537223815918, + "step": 1514 + }, + { + "chosen_geometric_mean": -0.9421805143356323, + "epoch": 0.38, + "grad_norm": 5.4375, + "learning_rate": 4.58285601284661e-06, + "log_odds": 2.030519723892212, + "log_odds_ratio": -0.3229184150695801, + "loss": 0.3033, + "rejected_geometric_mean": -2.666264295578003, + "step": 1515 + }, + { + "chosen_geometric_mean": -1.2878531217575073, + "epoch": 0.38, + "grad_norm": 2.421875, + "learning_rate": 4.582317467311856e-06, + "log_odds": 2.201770305633545, + "log_odds_ratio": -0.4610859155654907, + "loss": 0.2922, + "rejected_geometric_mean": -3.3756468296051025, + "step": 1516 + }, + { + "chosen_geometric_mean": -1.144963264465332, + "epoch": 0.38, + "grad_norm": 4.90625, + "learning_rate": 4.581778606047788e-06, + "log_odds": 1.4234492778778076, + "log_odds_ratio": -0.33903467655181885, + "loss": 0.3495, + "rejected_geometric_mean": -2.3363089561462402, + "step": 1517 + }, + { + "chosen_geometric_mean": -1.2591757774353027, + "epoch": 0.38, + "grad_norm": 2.796875, + "learning_rate": 4.581239429136115e-06, + "log_odds": 0.7884895205497742, + "log_odds_ratio": -0.5768468379974365, + "loss": 0.3581, + "rejected_geometric_mean": -1.9941680431365967, + "step": 1518 + }, + { + "chosen_geometric_mean": -0.9718834161758423, + "epoch": 0.38, + "grad_norm": 5.125, + "learning_rate": 4.580699936658585e-06, + "log_odds": 0.5650339722633362, + "log_odds_ratio": -0.5051402449607849, + "loss": 0.2974, + "rejected_geometric_mean": -1.4042255878448486, + "step": 1519 + }, + { + "chosen_geometric_mean": -1.1108258962631226, + "epoch": 0.38, + "grad_norm": 2.734375, + "learning_rate": 4.5801601286970005e-06, + "log_odds": 0.7454726696014404, + "log_odds_ratio": -0.453815758228302, + "loss": 0.3008, + "rejected_geometric_mean": -1.6829414367675781, + "step": 1520 + }, + { + "chosen_geometric_mean": -1.0216071605682373, + "epoch": 0.38, + "grad_norm": 5.78125, + "learning_rate": 4.579620005333208e-06, + "log_odds": 1.7779121398925781, + "log_odds_ratio": -0.3164367079734802, + "loss": 0.3297, + "rejected_geometric_mean": -2.5372066497802734, + "step": 1521 + }, + { + "chosen_geometric_mean": -1.1452276706695557, + "epoch": 0.38, + "grad_norm": 30.125, + "learning_rate": 4.579079566649104e-06, + "log_odds": 2.679072141647339, + "log_odds_ratio": -0.2493586540222168, + "loss": 0.3655, + "rejected_geometric_mean": -3.5811336040496826, + "step": 1522 + }, + { + "chosen_geometric_mean": -1.295328140258789, + "epoch": 0.38, + "grad_norm": 4.03125, + "learning_rate": 4.5785388127266315e-06, + "log_odds": 3.0072214603424072, + "log_odds_ratio": -0.21633434295654297, + "loss": 0.2897, + "rejected_geometric_mean": -4.094902992248535, + "step": 1523 + }, + { + "chosen_geometric_mean": -1.2976014614105225, + "epoch": 0.38, + "grad_norm": 17.875, + "learning_rate": 4.577997743647782e-06, + "log_odds": 2.8655004501342773, + "log_odds_ratio": -0.2760055661201477, + "loss": 0.3718, + "rejected_geometric_mean": -3.9594650268554688, + "step": 1524 + }, + { + "chosen_geometric_mean": -1.0393024682998657, + "epoch": 0.38, + "grad_norm": 64.5, + "learning_rate": 4.577456359494594e-06, + "log_odds": 3.1819849014282227, + "log_odds_ratio": -0.1424512416124344, + "loss": 0.4688, + "rejected_geometric_mean": -3.875108242034912, + "step": 1525 + }, + { + "chosen_geometric_mean": -1.3473494052886963, + "epoch": 0.38, + "grad_norm": 14.9375, + "learning_rate": 4.576914660349155e-06, + "log_odds": 3.5282063484191895, + "log_odds_ratio": -0.42833709716796875, + "loss": 0.3227, + "rejected_geometric_mean": -4.751281261444092, + "step": 1526 + }, + { + "chosen_geometric_mean": -1.3653998374938965, + "epoch": 0.38, + "grad_norm": 5.125, + "learning_rate": 4.576372646293599e-06, + "log_odds": 4.662622451782227, + "log_odds_ratio": -0.11266900599002838, + "loss": 0.3107, + "rejected_geometric_mean": -5.696878433227539, + "step": 1527 + }, + { + "chosen_geometric_mean": -1.2658088207244873, + "epoch": 0.38, + "grad_norm": 14.0, + "learning_rate": 4.5758303174101105e-06, + "log_odds": 3.4661288261413574, + "log_odds_ratio": -0.32298409938812256, + "loss": 0.3326, + "rejected_geometric_mean": -4.558785438537598, + "step": 1528 + }, + { + "chosen_geometric_mean": -1.0363390445709229, + "epoch": 0.38, + "grad_norm": 6.34375, + "learning_rate": 4.575287673780916e-06, + "log_odds": 1.088029384613037, + "log_odds_ratio": -0.42722946405410767, + "loss": 0.3162, + "rejected_geometric_mean": -1.9293181896209717, + "step": 1529 + }, + { + "chosen_geometric_mean": -1.1497454643249512, + "epoch": 0.38, + "grad_norm": 7.6875, + "learning_rate": 4.574744715488296e-06, + "log_odds": 2.030602216720581, + "log_odds_ratio": -0.2869182527065277, + "loss": 0.356, + "rejected_geometric_mean": -2.9444034099578857, + "step": 1530 + }, + { + "chosen_geometric_mean": -1.2965244054794312, + "epoch": 0.38, + "grad_norm": 14.75, + "learning_rate": 4.574201442614575e-06, + "log_odds": 1.6234962940216064, + "log_odds_ratio": -0.3276890516281128, + "loss": 0.3337, + "rejected_geometric_mean": -2.705976963043213, + "step": 1531 + }, + { + "chosen_geometric_mean": -1.1458344459533691, + "epoch": 0.38, + "grad_norm": 4.84375, + "learning_rate": 4.573657855242126e-06, + "log_odds": 1.9650754928588867, + "log_odds_ratio": -0.25983136892318726, + "loss": 0.3172, + "rejected_geometric_mean": -2.875300884246826, + "step": 1532 + }, + { + "chosen_geometric_mean": -1.0662879943847656, + "epoch": 0.38, + "grad_norm": 3.4375, + "learning_rate": 4.5731139534533706e-06, + "log_odds": 0.8296006917953491, + "log_odds_ratio": -0.3926377296447754, + "loss": 0.3065, + "rejected_geometric_mean": -1.7195444107055664, + "step": 1533 + }, + { + "chosen_geometric_mean": -1.1922028064727783, + "epoch": 0.38, + "grad_norm": 3.296875, + "learning_rate": 4.572569737330777e-06, + "log_odds": 1.7482635974884033, + "log_odds_ratio": -0.29434096813201904, + "loss": 0.287, + "rejected_geometric_mean": -2.7357780933380127, + "step": 1534 + }, + { + "chosen_geometric_mean": -1.0809564590454102, + "epoch": 0.38, + "grad_norm": 6.0625, + "learning_rate": 4.5720252069568615e-06, + "log_odds": 1.741896152496338, + "log_odds_ratio": -0.30712977051734924, + "loss": 0.321, + "rejected_geometric_mean": -2.5224990844726562, + "step": 1535 + }, + { + "chosen_geometric_mean": -1.1907209157943726, + "epoch": 0.38, + "grad_norm": 7.09375, + "learning_rate": 4.571480362414188e-06, + "log_odds": 4.499973297119141, + "log_odds_ratio": -0.12353229522705078, + "loss": 0.3246, + "rejected_geometric_mean": -5.396533966064453, + "step": 1536 + }, + { + "chosen_geometric_mean": -1.2855072021484375, + "epoch": 0.38, + "grad_norm": 9.6875, + "learning_rate": 4.570935203785367e-06, + "log_odds": 5.257225036621094, + "log_odds_ratio": -0.24733728170394897, + "loss": 0.2998, + "rejected_geometric_mean": -6.330707550048828, + "step": 1537 + }, + { + "chosen_geometric_mean": -1.0709385871887207, + "epoch": 0.38, + "grad_norm": 2.953125, + "learning_rate": 4.570389731153059e-06, + "log_odds": 2.600937604904175, + "log_odds_ratio": -0.15200453996658325, + "loss": 0.307, + "rejected_geometric_mean": -3.342292308807373, + "step": 1538 + }, + { + "chosen_geometric_mean": -1.191806435585022, + "epoch": 0.38, + "grad_norm": 7.25, + "learning_rate": 4.569843944599971e-06, + "log_odds": 6.272963047027588, + "log_odds_ratio": -0.013439484871923923, + "loss": 0.3608, + "rejected_geometric_mean": -7.095616340637207, + "step": 1539 + }, + { + "chosen_geometric_mean": -1.1158156394958496, + "epoch": 0.38, + "grad_norm": 9.125, + "learning_rate": 4.569297844208855e-06, + "log_odds": 0.7887609601020813, + "log_odds_ratio": -0.40146657824516296, + "loss": 0.2867, + "rejected_geometric_mean": -1.7306841611862183, + "step": 1540 + }, + { + "chosen_geometric_mean": -1.2142404317855835, + "epoch": 0.38, + "grad_norm": 6.59375, + "learning_rate": 4.5687514300625165e-06, + "log_odds": 2.922107219696045, + "log_odds_ratio": -0.31376004219055176, + "loss": 0.3554, + "rejected_geometric_mean": -3.9395017623901367, + "step": 1541 + }, + { + "chosen_geometric_mean": -1.0879632234573364, + "epoch": 0.38, + "grad_norm": 7.59375, + "learning_rate": 4.568204702243802e-06, + "log_odds": 1.914637804031372, + "log_odds_ratio": -0.21888695657253265, + "loss": 0.3055, + "rejected_geometric_mean": -2.72727632522583, + "step": 1542 + }, + { + "chosen_geometric_mean": -1.1494979858398438, + "epoch": 0.38, + "grad_norm": 2.9375, + "learning_rate": 4.5676576608356095e-06, + "log_odds": 2.71207594871521, + "log_odds_ratio": -0.2403598129749298, + "loss": 0.3228, + "rejected_geometric_mean": -3.6091861724853516, + "step": 1543 + }, + { + "chosen_geometric_mean": -1.0337333679199219, + "epoch": 0.38, + "grad_norm": 3.28125, + "learning_rate": 4.567110305920884e-06, + "log_odds": 0.40623506903648376, + "log_odds_ratio": -0.5357698202133179, + "loss": 0.3562, + "rejected_geometric_mean": -1.3478971719741821, + "step": 1544 + }, + { + "chosen_geometric_mean": -1.1416703462600708, + "epoch": 0.38, + "grad_norm": 2.8125, + "learning_rate": 4.5665626375826176e-06, + "log_odds": 2.2318265438079834, + "log_odds_ratio": -0.24179182946681976, + "loss": 0.3038, + "rejected_geometric_mean": -3.1004700660705566, + "step": 1545 + }, + { + "chosen_geometric_mean": -1.3040474653244019, + "epoch": 0.38, + "grad_norm": 6.28125, + "learning_rate": 4.56601465590385e-06, + "log_odds": 1.5319147109985352, + "log_odds_ratio": -0.2800804078578949, + "loss": 0.3178, + "rejected_geometric_mean": -2.640072822570801, + "step": 1546 + }, + { + "chosen_geometric_mean": -1.280731439590454, + "epoch": 0.38, + "grad_norm": 3.109375, + "learning_rate": 4.565466360967667e-06, + "log_odds": 0.5636779069900513, + "log_odds_ratio": -0.5227541923522949, + "loss": 0.335, + "rejected_geometric_mean": -1.7553855180740356, + "step": 1547 + }, + { + "chosen_geometric_mean": -1.1197882890701294, + "epoch": 0.38, + "grad_norm": 17.75, + "learning_rate": 4.564917752857206e-06, + "log_odds": 1.3427653312683105, + "log_odds_ratio": -0.32155752182006836, + "loss": 0.2894, + "rejected_geometric_mean": -2.217867612838745, + "step": 1548 + }, + { + "chosen_geometric_mean": -0.9851348996162415, + "epoch": 0.38, + "grad_norm": 3.921875, + "learning_rate": 4.564368831655647e-06, + "log_odds": 2.3970446586608887, + "log_odds_ratio": -0.41293931007385254, + "loss": 0.3316, + "rejected_geometric_mean": -3.196033477783203, + "step": 1549 + }, + { + "chosen_geometric_mean": -1.1176536083221436, + "epoch": 0.38, + "grad_norm": 3.015625, + "learning_rate": 4.56381959744622e-06, + "log_odds": 0.9416757225990295, + "log_odds_ratio": -0.45704132318496704, + "loss": 0.346, + "rejected_geometric_mean": -1.9118309020996094, + "step": 1550 + }, + { + "chosen_geometric_mean": -1.1826283931732178, + "epoch": 0.38, + "grad_norm": 14.0625, + "learning_rate": 4.5632700503122015e-06, + "log_odds": 3.813235282897949, + "log_odds_ratio": -0.11832689493894577, + "loss": 0.4411, + "rejected_geometric_mean": -4.689940929412842, + "step": 1551 + }, + { + "chosen_geometric_mean": -1.3130062818527222, + "epoch": 0.38, + "grad_norm": 7.6875, + "learning_rate": 4.562720190336918e-06, + "log_odds": 2.909276008605957, + "log_odds_ratio": -0.2918027937412262, + "loss": 0.3174, + "rejected_geometric_mean": -4.023200511932373, + "step": 1552 + }, + { + "chosen_geometric_mean": -0.9970000982284546, + "epoch": 0.38, + "grad_norm": 13.25, + "learning_rate": 4.562170017603739e-06, + "log_odds": 4.314116954803467, + "log_odds_ratio": -0.03302929550409317, + "loss": 0.2906, + "rejected_geometric_mean": -4.836977481842041, + "step": 1553 + }, + { + "chosen_geometric_mean": -1.0698457956314087, + "epoch": 0.38, + "grad_norm": 4.71875, + "learning_rate": 4.561619532196086e-06, + "log_odds": 3.140101909637451, + "log_odds_ratio": -0.1928396075963974, + "loss": 0.2916, + "rejected_geometric_mean": -3.895656108856201, + "step": 1554 + }, + { + "chosen_geometric_mean": -0.9614567756652832, + "epoch": 0.38, + "grad_norm": 22.625, + "learning_rate": 4.5610687341974245e-06, + "log_odds": 2.581184148788452, + "log_odds_ratio": -0.36035919189453125, + "loss": 0.4274, + "rejected_geometric_mean": -3.24422550201416, + "step": 1555 + }, + { + "chosen_geometric_mean": -1.0598586797714233, + "epoch": 0.39, + "grad_norm": 30.125, + "learning_rate": 4.560517623691269e-06, + "log_odds": 2.148226022720337, + "log_odds_ratio": -0.19874358177185059, + "loss": 0.351, + "rejected_geometric_mean": -2.9034411907196045, + "step": 1556 + }, + { + "chosen_geometric_mean": -1.2568409442901611, + "epoch": 0.39, + "grad_norm": 5.875, + "learning_rate": 4.5599662007611825e-06, + "log_odds": 2.2178094387054443, + "log_odds_ratio": -0.28886502981185913, + "loss": 0.2841, + "rejected_geometric_mean": -3.237257719039917, + "step": 1557 + }, + { + "chosen_geometric_mean": -1.1520096063613892, + "epoch": 0.39, + "grad_norm": 17.375, + "learning_rate": 4.559414465490773e-06, + "log_odds": 2.2231829166412354, + "log_odds_ratio": -0.13670018315315247, + "loss": 0.3498, + "rejected_geometric_mean": -3.0635836124420166, + "step": 1558 + }, + { + "chosen_geometric_mean": -1.0041892528533936, + "epoch": 0.39, + "grad_norm": 9.1875, + "learning_rate": 4.558862417963694e-06, + "log_odds": 1.835819959640503, + "log_odds_ratio": -0.2818332016468048, + "loss": 0.3012, + "rejected_geometric_mean": -2.5373895168304443, + "step": 1559 + }, + { + "chosen_geometric_mean": -1.3418747186660767, + "epoch": 0.39, + "grad_norm": 6.15625, + "learning_rate": 4.558310058263653e-06, + "log_odds": 0.26433801651000977, + "log_odds_ratio": -0.6441822648048401, + "loss": 0.3229, + "rejected_geometric_mean": -1.5673679113388062, + "step": 1560 + }, + { + "chosen_geometric_mean": -1.209672212600708, + "epoch": 0.39, + "grad_norm": 3.09375, + "learning_rate": 4.5577573864744e-06, + "log_odds": 1.1203296184539795, + "log_odds_ratio": -0.3880811333656311, + "loss": 0.3267, + "rejected_geometric_mean": -2.1075637340545654, + "step": 1561 + }, + { + "chosen_geometric_mean": -1.040501594543457, + "epoch": 0.39, + "grad_norm": 9.3125, + "learning_rate": 4.557204402679734e-06, + "log_odds": 3.1971583366394043, + "log_odds_ratio": -0.17864511907100677, + "loss": 0.3168, + "rejected_geometric_mean": -3.8750741481781006, + "step": 1562 + }, + { + "chosen_geometric_mean": -1.1009488105773926, + "epoch": 0.39, + "grad_norm": 2.953125, + "learning_rate": 4.556651106963498e-06, + "log_odds": 0.7254916429519653, + "log_odds_ratio": -0.4541694223880768, + "loss": 0.3171, + "rejected_geometric_mean": -1.6661428213119507, + "step": 1563 + }, + { + "chosen_geometric_mean": -1.259527325630188, + "epoch": 0.39, + "grad_norm": 8.5625, + "learning_rate": 4.556097499409586e-06, + "log_odds": 1.919930100440979, + "log_odds_ratio": -0.3761522173881531, + "loss": 0.3564, + "rejected_geometric_mean": -3.051727056503296, + "step": 1564 + }, + { + "chosen_geometric_mean": -1.1347070932388306, + "epoch": 0.39, + "grad_norm": 16.0, + "learning_rate": 4.5555435801019395e-06, + "log_odds": 2.7208914756774902, + "log_odds_ratio": -0.28850239515304565, + "loss": 0.3755, + "rejected_geometric_mean": -3.62947416305542, + "step": 1565 + }, + { + "chosen_geometric_mean": -1.340543508529663, + "epoch": 0.39, + "grad_norm": 3.359375, + "learning_rate": 4.554989349124546e-06, + "log_odds": 1.442563772201538, + "log_odds_ratio": -0.2950704097747803, + "loss": 0.2928, + "rejected_geometric_mean": -2.602442502975464, + "step": 1566 + }, + { + "chosen_geometric_mean": -0.9795596599578857, + "epoch": 0.39, + "grad_norm": 4.59375, + "learning_rate": 4.554434806561438e-06, + "log_odds": 3.7271015644073486, + "log_odds_ratio": -0.21316614747047424, + "loss": 0.2993, + "rejected_geometric_mean": -4.323164463043213, + "step": 1567 + }, + { + "chosen_geometric_mean": -1.0078593492507935, + "epoch": 0.39, + "grad_norm": 20.875, + "learning_rate": 4.5538799524967e-06, + "log_odds": 2.06544828414917, + "log_odds_ratio": -0.2711518406867981, + "loss": 0.3094, + "rejected_geometric_mean": -2.7842233180999756, + "step": 1568 + }, + { + "chosen_geometric_mean": -1.111436367034912, + "epoch": 0.39, + "grad_norm": 4.15625, + "learning_rate": 4.553324787014459e-06, + "log_odds": 1.0624653100967407, + "log_odds_ratio": -0.4171214699745178, + "loss": 0.3178, + "rejected_geometric_mean": -1.9984266757965088, + "step": 1569 + }, + { + "chosen_geometric_mean": -1.164101243019104, + "epoch": 0.39, + "grad_norm": 19.125, + "learning_rate": 4.552769310198893e-06, + "log_odds": 2.6679437160491943, + "log_odds_ratio": -0.20425072312355042, + "loss": 0.3516, + "rejected_geometric_mean": -3.5353739261627197, + "step": 1570 + }, + { + "chosen_geometric_mean": -1.0830662250518799, + "epoch": 0.39, + "grad_norm": 3.40625, + "learning_rate": 4.552213522134226e-06, + "log_odds": 3.8239381313323975, + "log_odds_ratio": -0.1606370061635971, + "loss": 0.2659, + "rejected_geometric_mean": -4.577947616577148, + "step": 1571 + }, + { + "chosen_geometric_mean": -0.989113986492157, + "epoch": 0.39, + "grad_norm": 20.5, + "learning_rate": 4.551657422904726e-06, + "log_odds": 2.709540367126465, + "log_odds_ratio": -0.2667732238769531, + "loss": 0.2891, + "rejected_geometric_mean": -3.4124836921691895, + "step": 1572 + }, + { + "chosen_geometric_mean": -0.9549914002418518, + "epoch": 0.39, + "grad_norm": 3.890625, + "learning_rate": 4.551101012594714e-06, + "log_odds": 0.6234775185585022, + "log_odds_ratio": -0.467641681432724, + "loss": 0.3043, + "rejected_geometric_mean": -1.4214273691177368, + "step": 1573 + }, + { + "chosen_geometric_mean": -1.1770328283309937, + "epoch": 0.39, + "grad_norm": 4.9375, + "learning_rate": 4.550544291288554e-06, + "log_odds": 0.8260797262191772, + "log_odds_ratio": -0.40040573477745056, + "loss": 0.2768, + "rejected_geometric_mean": -1.8199878931045532, + "step": 1574 + }, + { + "chosen_geometric_mean": -1.2100826501846313, + "epoch": 0.39, + "grad_norm": 9.5, + "learning_rate": 4.549987259070658e-06, + "log_odds": 1.1596086025238037, + "log_odds_ratio": -0.3452179431915283, + "loss": 0.3235, + "rejected_geometric_mean": -2.2187509536743164, + "step": 1575 + }, + { + "chosen_geometric_mean": -1.1425902843475342, + "epoch": 0.39, + "grad_norm": 5.90625, + "learning_rate": 4.549429916025486e-06, + "log_odds": 2.4083986282348633, + "log_odds_ratio": -0.37456753849983215, + "loss": 0.2956, + "rejected_geometric_mean": -3.3800933361053467, + "step": 1576 + }, + { + "chosen_geometric_mean": -1.1869416236877441, + "epoch": 0.39, + "grad_norm": 11.3125, + "learning_rate": 4.548872262237544e-06, + "log_odds": 5.219871997833252, + "log_odds_ratio": -0.141618013381958, + "loss": 0.2879, + "rejected_geometric_mean": -6.102996826171875, + "step": 1577 + }, + { + "chosen_geometric_mean": -1.05623197555542, + "epoch": 0.39, + "grad_norm": 6.90625, + "learning_rate": 4.548314297791386e-06, + "log_odds": 6.912762641906738, + "log_odds_ratio": -0.16283392906188965, + "loss": 0.2914, + "rejected_geometric_mean": -7.659709930419922, + "step": 1578 + }, + { + "chosen_geometric_mean": -1.1517820358276367, + "epoch": 0.39, + "grad_norm": 18.5, + "learning_rate": 4.547756022771614e-06, + "log_odds": 1.5239503383636475, + "log_odds_ratio": -0.4503345489501953, + "loss": 0.3033, + "rejected_geometric_mean": -2.5086824893951416, + "step": 1579 + }, + { + "chosen_geometric_mean": -1.0251622200012207, + "epoch": 0.39, + "grad_norm": 7.03125, + "learning_rate": 4.5471974372628735e-06, + "log_odds": 3.1011834144592285, + "log_odds_ratio": -0.15925510227680206, + "loss": 0.2902, + "rejected_geometric_mean": -3.7664413452148438, + "step": 1580 + }, + { + "chosen_geometric_mean": -1.0791809558868408, + "epoch": 0.39, + "grad_norm": 11.6875, + "learning_rate": 4.546638541349862e-06, + "log_odds": 4.4813079833984375, + "log_odds_ratio": -0.21768711507320404, + "loss": 0.3029, + "rejected_geometric_mean": -5.265204429626465, + "step": 1581 + }, + { + "chosen_geometric_mean": -1.2165220975875854, + "epoch": 0.39, + "grad_norm": 4.09375, + "learning_rate": 4.54607933511732e-06, + "log_odds": 3.1393580436706543, + "log_odds_ratio": -0.1946781873703003, + "loss": 0.2851, + "rejected_geometric_mean": -4.050002574920654, + "step": 1582 + }, + { + "chosen_geometric_mean": -1.1594572067260742, + "epoch": 0.39, + "grad_norm": 7.65625, + "learning_rate": 4.545519818650036e-06, + "log_odds": 1.9680676460266113, + "log_odds_ratio": -0.28787314891815186, + "loss": 0.2542, + "rejected_geometric_mean": -2.890821933746338, + "step": 1583 + }, + { + "chosen_geometric_mean": -1.244327425956726, + "epoch": 0.39, + "grad_norm": 2.609375, + "learning_rate": 4.544959992032847e-06, + "log_odds": 2.498636245727539, + "log_odds_ratio": -0.2302892804145813, + "loss": 0.3002, + "rejected_geometric_mean": -3.485732316970825, + "step": 1584 + }, + { + "chosen_geometric_mean": -1.1468784809112549, + "epoch": 0.39, + "grad_norm": 7.0625, + "learning_rate": 4.544399855350638e-06, + "log_odds": 1.5533626079559326, + "log_odds_ratio": -0.3873319625854492, + "loss": 0.3085, + "rejected_geometric_mean": -2.507148265838623, + "step": 1585 + }, + { + "chosen_geometric_mean": -1.1202106475830078, + "epoch": 0.39, + "grad_norm": 6.46875, + "learning_rate": 4.543839408688336e-06, + "log_odds": 3.1549971103668213, + "log_odds_ratio": -0.18934977054595947, + "loss": 0.3005, + "rejected_geometric_mean": -3.9610977172851562, + "step": 1586 + }, + { + "chosen_geometric_mean": -1.1084942817687988, + "epoch": 0.39, + "grad_norm": 12.9375, + "learning_rate": 4.5432786521309185e-06, + "log_odds": 1.0789408683776855, + "log_odds_ratio": -0.46613800525665283, + "loss": 0.3084, + "rejected_geometric_mean": -2.0772109031677246, + "step": 1587 + }, + { + "chosen_geometric_mean": -1.2302104234695435, + "epoch": 0.39, + "grad_norm": 4.84375, + "learning_rate": 4.542717585763412e-06, + "log_odds": 1.8747529983520508, + "log_odds_ratio": -0.30344825983047485, + "loss": 0.3147, + "rejected_geometric_mean": -2.873659372329712, + "step": 1588 + }, + { + "chosen_geometric_mean": -1.2252057790756226, + "epoch": 0.39, + "grad_norm": 13.4375, + "learning_rate": 4.542156209670886e-06, + "log_odds": 2.1063358783721924, + "log_odds_ratio": -0.35483652353286743, + "loss": 0.3187, + "rejected_geometric_mean": -3.1142830848693848, + "step": 1589 + }, + { + "chosen_geometric_mean": -1.1500424146652222, + "epoch": 0.39, + "grad_norm": 16.0, + "learning_rate": 4.541594523938459e-06, + "log_odds": 3.305656909942627, + "log_odds_ratio": -0.2714472711086273, + "loss": 0.3146, + "rejected_geometric_mean": -4.205766201019287, + "step": 1590 + }, + { + "chosen_geometric_mean": -1.0664074420928955, + "epoch": 0.39, + "grad_norm": 3.953125, + "learning_rate": 4.541032528651294e-06, + "log_odds": 0.281460702419281, + "log_odds_ratio": -0.5725318193435669, + "loss": 0.3078, + "rejected_geometric_mean": -1.2770766019821167, + "step": 1591 + }, + { + "chosen_geometric_mean": -1.2310372591018677, + "epoch": 0.39, + "grad_norm": 7.84375, + "learning_rate": 4.5404702238946075e-06, + "log_odds": 4.06941032409668, + "log_odds_ratio": -0.09084419906139374, + "loss": 0.3205, + "rejected_geometric_mean": -4.992154598236084, + "step": 1592 + }, + { + "chosen_geometric_mean": -1.180891752243042, + "epoch": 0.39, + "grad_norm": 6.125, + "learning_rate": 4.539907609753655e-06, + "log_odds": 3.1745617389678955, + "log_odds_ratio": -0.23557928204536438, + "loss": 0.2759, + "rejected_geometric_mean": -4.1076884269714355, + "step": 1593 + }, + { + "chosen_geometric_mean": -1.0005462169647217, + "epoch": 0.39, + "grad_norm": 19.0, + "learning_rate": 4.539344686313742e-06, + "log_odds": 2.525090456008911, + "log_odds_ratio": -0.27553942799568176, + "loss": 0.2961, + "rejected_geometric_mean": -3.252697706222534, + "step": 1594 + }, + { + "chosen_geometric_mean": -1.0765159130096436, + "epoch": 0.39, + "grad_norm": 3.640625, + "learning_rate": 4.538781453660224e-06, + "log_odds": 0.3350633680820465, + "log_odds_ratio": -0.543206512928009, + "loss": 0.3351, + "rejected_geometric_mean": -1.3073703050613403, + "step": 1595 + }, + { + "chosen_geometric_mean": -1.3813865184783936, + "epoch": 0.4, + "grad_norm": 8.4375, + "learning_rate": 4.538217911878497e-06, + "log_odds": 3.4033329486846924, + "log_odds_ratio": -0.3074553608894348, + "loss": 0.2724, + "rejected_geometric_mean": -4.627069473266602, + "step": 1596 + }, + { + "chosen_geometric_mean": -1.0923089981079102, + "epoch": 0.4, + "grad_norm": 7.125, + "learning_rate": 4.537654061054012e-06, + "log_odds": 2.2934391498565674, + "log_odds_ratio": -0.40880608558654785, + "loss": 0.3136, + "rejected_geometric_mean": -3.2120070457458496, + "step": 1597 + }, + { + "chosen_geometric_mean": -1.3170713186264038, + "epoch": 0.4, + "grad_norm": 3.875, + "learning_rate": 4.537089901272257e-06, + "log_odds": 3.412313938140869, + "log_odds_ratio": -0.2195303589105606, + "loss": 0.3849, + "rejected_geometric_mean": -4.498272895812988, + "step": 1598 + }, + { + "chosen_geometric_mean": -1.1717300415039062, + "epoch": 0.4, + "grad_norm": 2.96875, + "learning_rate": 4.536525432618777e-06, + "log_odds": 3.3939425945281982, + "log_odds_ratio": -0.2511332035064697, + "loss": 0.3054, + "rejected_geometric_mean": -4.298965930938721, + "step": 1599 + }, + { + "chosen_geometric_mean": -1.0541757345199585, + "epoch": 0.4, + "grad_norm": 2.03125, + "learning_rate": 4.535960655179155e-06, + "log_odds": 3.374568462371826, + "log_odds_ratio": -0.3726736307144165, + "loss": 0.2816, + "rejected_geometric_mean": -4.262835502624512, + "step": 1600 + }, + { + "chosen_geometric_mean": -1.1742384433746338, + "epoch": 0.4, + "grad_norm": 5.96875, + "learning_rate": 4.535395569039027e-06, + "log_odds": 2.977888584136963, + "log_odds_ratio": -0.3105067312717438, + "loss": 0.3151, + "rejected_geometric_mean": -3.9302287101745605, + "step": 1601 + }, + { + "chosen_geometric_mean": -1.1307015419006348, + "epoch": 0.4, + "grad_norm": 6.5, + "learning_rate": 4.534830174284074e-06, + "log_odds": 4.958407402038574, + "log_odds_ratio": -0.33173668384552, + "loss": 0.3006, + "rejected_geometric_mean": -5.807947158813477, + "step": 1602 + }, + { + "chosen_geometric_mean": -1.0819908380508423, + "epoch": 0.4, + "grad_norm": 7.1875, + "learning_rate": 4.534264471000022e-06, + "log_odds": 5.269145965576172, + "log_odds_ratio": -0.02555025741457939, + "loss": 0.2672, + "rejected_geometric_mean": -5.930891990661621, + "step": 1603 + }, + { + "chosen_geometric_mean": -1.4248219728469849, + "epoch": 0.4, + "grad_norm": 23.375, + "learning_rate": 4.533698459272646e-06, + "log_odds": 0.6821508407592773, + "log_odds_ratio": -0.5141878128051758, + "loss": 0.4144, + "rejected_geometric_mean": -2.072136163711548, + "step": 1604 + }, + { + "chosen_geometric_mean": -1.3341219425201416, + "epoch": 0.4, + "grad_norm": 4.53125, + "learning_rate": 4.533132139187768e-06, + "log_odds": 5.326561450958252, + "log_odds_ratio": -0.18953032791614532, + "loss": 0.2913, + "rejected_geometric_mean": -6.414937973022461, + "step": 1605 + }, + { + "chosen_geometric_mean": -1.219595193862915, + "epoch": 0.4, + "grad_norm": 14.5, + "learning_rate": 4.532565510831254e-06, + "log_odds": 1.5006752014160156, + "log_odds_ratio": -0.3345961272716522, + "loss": 0.2895, + "rejected_geometric_mean": -2.518216609954834, + "step": 1606 + }, + { + "chosen_geometric_mean": -1.0030299425125122, + "epoch": 0.4, + "grad_norm": 6.1875, + "learning_rate": 4.531998574289018e-06, + "log_odds": 2.850450277328491, + "log_odds_ratio": -0.16290618479251862, + "loss": 0.3242, + "rejected_geometric_mean": -3.4465200901031494, + "step": 1607 + }, + { + "chosen_geometric_mean": -1.5218381881713867, + "epoch": 0.4, + "grad_norm": 54.25, + "learning_rate": 4.531431329647022e-06, + "log_odds": 5.642401695251465, + "log_odds_ratio": -0.13747797906398773, + "loss": 0.361, + "rejected_geometric_mean": -6.873940944671631, + "step": 1608 + }, + { + "chosen_geometric_mean": -1.4125702381134033, + "epoch": 0.4, + "grad_norm": 11.4375, + "learning_rate": 4.530863776991276e-06, + "log_odds": 2.210129737854004, + "log_odds_ratio": -0.38838887214660645, + "loss": 0.3332, + "rejected_geometric_mean": -3.480222225189209, + "step": 1609 + }, + { + "chosen_geometric_mean": -1.116542100906372, + "epoch": 0.4, + "grad_norm": 2.890625, + "learning_rate": 4.530295916407832e-06, + "log_odds": 4.830536842346191, + "log_odds_ratio": -0.14457423985004425, + "loss": 0.299, + "rejected_geometric_mean": -5.650726795196533, + "step": 1610 + }, + { + "chosen_geometric_mean": -1.2220888137817383, + "epoch": 0.4, + "grad_norm": 17.375, + "learning_rate": 4.52972774798279e-06, + "log_odds": 1.1639198064804077, + "log_odds_ratio": -0.3004240095615387, + "loss": 0.3347, + "rejected_geometric_mean": -2.186486005783081, + "step": 1611 + }, + { + "chosen_geometric_mean": -1.2982920408248901, + "epoch": 0.4, + "grad_norm": 3.0625, + "learning_rate": 4.529159271802303e-06, + "log_odds": 1.8622606992721558, + "log_odds_ratio": -0.4948020875453949, + "loss": 0.3033, + "rejected_geometric_mean": -3.0886714458465576, + "step": 1612 + }, + { + "chosen_geometric_mean": -1.0654144287109375, + "epoch": 0.4, + "grad_norm": 2.734375, + "learning_rate": 4.5285904879525614e-06, + "log_odds": 1.3069164752960205, + "log_odds_ratio": -0.46914443373680115, + "loss": 0.3122, + "rejected_geometric_mean": -2.230520486831665, + "step": 1613 + }, + { + "chosen_geometric_mean": -1.032609462738037, + "epoch": 0.4, + "grad_norm": 2.90625, + "learning_rate": 4.528021396519807e-06, + "log_odds": 3.9233429431915283, + "log_odds_ratio": -0.20566613972187042, + "loss": 0.3086, + "rejected_geometric_mean": -4.641011714935303, + "step": 1614 + }, + { + "chosen_geometric_mean": -0.9350543022155762, + "epoch": 0.4, + "grad_norm": 28.125, + "learning_rate": 4.52745199759033e-06, + "log_odds": 4.6666765213012695, + "log_odds_ratio": -0.15569648146629333, + "loss": 0.3812, + "rejected_geometric_mean": -5.191585540771484, + "step": 1615 + }, + { + "chosen_geometric_mean": -1.2751867771148682, + "epoch": 0.4, + "grad_norm": 33.5, + "learning_rate": 4.526882291250462e-06, + "log_odds": 2.4076125621795654, + "log_odds_ratio": -0.22982439398765564, + "loss": 0.3332, + "rejected_geometric_mean": -3.4606504440307617, + "step": 1616 + }, + { + "chosen_geometric_mean": -1.40121328830719, + "epoch": 0.4, + "grad_norm": 9.1875, + "learning_rate": 4.526312277586586e-06, + "log_odds": 6.324715614318848, + "log_odds_ratio": -0.08998869359493256, + "loss": 0.3126, + "rejected_geometric_mean": -7.465877056121826, + "step": 1617 + }, + { + "chosen_geometric_mean": -1.1045500040054321, + "epoch": 0.4, + "grad_norm": 3.21875, + "learning_rate": 4.525741956685129e-06, + "log_odds": 5.52852725982666, + "log_odds_ratio": -0.3532373905181885, + "loss": 0.323, + "rejected_geometric_mean": -6.418085098266602, + "step": 1618 + }, + { + "chosen_geometric_mean": -1.2685050964355469, + "epoch": 0.4, + "grad_norm": 28.375, + "learning_rate": 4.525171328632566e-06, + "log_odds": 3.814138889312744, + "log_odds_ratio": -0.2208782583475113, + "loss": 0.328, + "rejected_geometric_mean": -4.840380668640137, + "step": 1619 + }, + { + "chosen_geometric_mean": -0.863332986831665, + "epoch": 0.4, + "grad_norm": 25.375, + "learning_rate": 4.524600393515417e-06, + "log_odds": 3.308924674987793, + "log_odds_ratio": -0.370944082736969, + "loss": 0.3034, + "rejected_geometric_mean": -3.8801207542419434, + "step": 1620 + }, + { + "chosen_geometric_mean": -1.0670017004013062, + "epoch": 0.4, + "grad_norm": 4.5, + "learning_rate": 4.524029151420251e-06, + "log_odds": 2.1941111087799072, + "log_odds_ratio": -0.2771848142147064, + "loss": 0.3043, + "rejected_geometric_mean": -2.9562416076660156, + "step": 1621 + }, + { + "chosen_geometric_mean": -1.1600620746612549, + "epoch": 0.4, + "grad_norm": 4.71875, + "learning_rate": 4.523457602433679e-06, + "log_odds": 2.60280704498291, + "log_odds_ratio": -0.25363609194755554, + "loss": 0.2967, + "rejected_geometric_mean": -3.5022637844085693, + "step": 1622 + }, + { + "chosen_geometric_mean": -1.1840254068374634, + "epoch": 0.4, + "grad_norm": 10.4375, + "learning_rate": 4.522885746642366e-06, + "log_odds": 0.5739185214042664, + "log_odds_ratio": -0.46068522334098816, + "loss": 0.3016, + "rejected_geometric_mean": -1.6245124340057373, + "step": 1623 + }, + { + "chosen_geometric_mean": -1.1333253383636475, + "epoch": 0.4, + "grad_norm": 6.8125, + "learning_rate": 4.5223135841330155e-06, + "log_odds": 2.761371374130249, + "log_odds_ratio": -0.2655472159385681, + "loss": 0.2837, + "rejected_geometric_mean": -3.6324002742767334, + "step": 1624 + }, + { + "chosen_geometric_mean": -1.004289984703064, + "epoch": 0.4, + "grad_norm": 10.4375, + "learning_rate": 4.521741114992382e-06, + "log_odds": 3.583911657333374, + "log_odds_ratio": -0.17430540919303894, + "loss": 0.2954, + "rejected_geometric_mean": -4.222709655761719, + "step": 1625 + }, + { + "chosen_geometric_mean": -1.1861257553100586, + "epoch": 0.4, + "grad_norm": 3.453125, + "learning_rate": 4.521168339307267e-06, + "log_odds": 7.012939929962158, + "log_odds_ratio": -0.18128105998039246, + "loss": 0.3022, + "rejected_geometric_mean": -7.955594062805176, + "step": 1626 + }, + { + "chosen_geometric_mean": -1.2608462572097778, + "epoch": 0.4, + "grad_norm": 3.421875, + "learning_rate": 4.520595257164514e-06, + "log_odds": 3.115882158279419, + "log_odds_ratio": -0.34255364537239075, + "loss": 0.3423, + "rejected_geometric_mean": -4.198894023895264, + "step": 1627 + }, + { + "chosen_geometric_mean": -1.0932246446609497, + "epoch": 0.4, + "grad_norm": 3.34375, + "learning_rate": 4.5200218686510186e-06, + "log_odds": 1.969091773033142, + "log_odds_ratio": -0.30903932452201843, + "loss": 0.2594, + "rejected_geometric_mean": -2.756934642791748, + "step": 1628 + }, + { + "chosen_geometric_mean": -1.117788553237915, + "epoch": 0.4, + "grad_norm": 10.375, + "learning_rate": 4.51944817385372e-06, + "log_odds": 3.269693374633789, + "log_odds_ratio": -0.2623392641544342, + "loss": 0.34, + "rejected_geometric_mean": -4.116168975830078, + "step": 1629 + }, + { + "chosen_geometric_mean": -1.0059845447540283, + "epoch": 0.4, + "grad_norm": 4.15625, + "learning_rate": 4.518874172859603e-06, + "log_odds": 1.997402310371399, + "log_odds_ratio": -0.3319195508956909, + "loss": 0.2648, + "rejected_geometric_mean": -2.77522349357605, + "step": 1630 + }, + { + "chosen_geometric_mean": -1.1971590518951416, + "epoch": 0.4, + "grad_norm": 2.421875, + "learning_rate": 4.518299865755701e-06, + "log_odds": 7.254344940185547, + "log_odds_ratio": -0.16930609941482544, + "loss": 0.282, + "rejected_geometric_mean": -8.137669563293457, + "step": 1631 + }, + { + "chosen_geometric_mean": -1.2037749290466309, + "epoch": 0.4, + "grad_norm": 13.125, + "learning_rate": 4.517725252629092e-06, + "log_odds": 1.5139639377593994, + "log_odds_ratio": -0.44849973917007446, + "loss": 0.3279, + "rejected_geometric_mean": -2.603100538253784, + "step": 1632 + }, + { + "chosen_geometric_mean": -1.0177446603775024, + "epoch": 0.4, + "grad_norm": 3.78125, + "learning_rate": 4.517150333566901e-06, + "log_odds": 0.7130782008171082, + "log_odds_ratio": -0.41765135526657104, + "loss": 0.3236, + "rejected_geometric_mean": -1.5239132642745972, + "step": 1633 + }, + { + "chosen_geometric_mean": -1.1933534145355225, + "epoch": 0.4, + "grad_norm": 12.0, + "learning_rate": 4.516575108656301e-06, + "log_odds": 2.203516721725464, + "log_odds_ratio": -0.2693008482456207, + "loss": 0.2933, + "rejected_geometric_mean": -3.1561052799224854, + "step": 1634 + }, + { + "chosen_geometric_mean": -1.1742022037506104, + "epoch": 0.4, + "grad_norm": 32.5, + "learning_rate": 4.5159995779845065e-06, + "log_odds": 7.750026702880859, + "log_odds_ratio": -0.006753567606210709, + "loss": 0.4021, + "rejected_geometric_mean": -8.55556869506836, + "step": 1635 + }, + { + "chosen_geometric_mean": -1.0847870111465454, + "epoch": 0.41, + "grad_norm": 49.5, + "learning_rate": 4.515423741638786e-06, + "log_odds": 6.985817909240723, + "log_odds_ratio": -0.014434278011322021, + "loss": 0.367, + "rejected_geometric_mean": -7.6434526443481445, + "step": 1636 + }, + { + "chosen_geometric_mean": -1.2283155918121338, + "epoch": 0.41, + "grad_norm": 22.75, + "learning_rate": 4.5148475997064474e-06, + "log_odds": 3.867633581161499, + "log_odds_ratio": -0.28319400548934937, + "loss": 0.3727, + "rejected_geometric_mean": -4.876476287841797, + "step": 1637 + }, + { + "chosen_geometric_mean": -1.1850030422210693, + "epoch": 0.41, + "grad_norm": 4.8125, + "learning_rate": 4.514271152274849e-06, + "log_odds": 1.1250158548355103, + "log_odds_ratio": -0.32507187128067017, + "loss": 0.2798, + "rejected_geometric_mean": -2.1100611686706543, + "step": 1638 + }, + { + "chosen_geometric_mean": -1.1928389072418213, + "epoch": 0.41, + "grad_norm": 26.375, + "learning_rate": 4.513694399431393e-06, + "log_odds": 5.985788822174072, + "log_odds_ratio": -0.06412456929683685, + "loss": 0.287, + "rejected_geometric_mean": -6.849775791168213, + "step": 1639 + }, + { + "chosen_geometric_mean": -1.004366159439087, + "epoch": 0.41, + "grad_norm": 18.5, + "learning_rate": 4.513117341263531e-06, + "log_odds": 5.267237663269043, + "log_odds_ratio": -0.011965411715209484, + "loss": 0.3096, + "rejected_geometric_mean": -5.812328815460205, + "step": 1640 + }, + { + "chosen_geometric_mean": -1.224157452583313, + "epoch": 0.41, + "grad_norm": 7.28125, + "learning_rate": 4.512539977858756e-06, + "log_odds": 4.793059825897217, + "log_odds_ratio": -0.09232079982757568, + "loss": 0.2996, + "rejected_geometric_mean": -5.702874660491943, + "step": 1641 + }, + { + "chosen_geometric_mean": -0.9006788730621338, + "epoch": 0.41, + "grad_norm": 7.3125, + "learning_rate": 4.511962309304611e-06, + "log_odds": 2.7791953086853027, + "log_odds_ratio": -0.3912509083747864, + "loss": 0.3065, + "rejected_geometric_mean": -3.431804895401001, + "step": 1642 + }, + { + "chosen_geometric_mean": -1.0194299221038818, + "epoch": 0.41, + "grad_norm": 5.8125, + "learning_rate": 4.511384335688686e-06, + "log_odds": 1.3880951404571533, + "log_odds_ratio": -0.2753058969974518, + "loss": 0.2722, + "rejected_geometric_mean": -2.1257872581481934, + "step": 1643 + }, + { + "chosen_geometric_mean": -1.169524908065796, + "epoch": 0.41, + "grad_norm": 3.453125, + "learning_rate": 4.5108060570986145e-06, + "log_odds": 7.672309875488281, + "log_odds_ratio": -0.14029882848262787, + "loss": 0.2769, + "rejected_geometric_mean": -8.53757381439209, + "step": 1644 + }, + { + "chosen_geometric_mean": -1.1275886297225952, + "epoch": 0.41, + "grad_norm": 26.75, + "learning_rate": 4.5102274736220776e-06, + "log_odds": 3.853555917739868, + "log_odds_ratio": -0.3263646960258484, + "loss": 0.3493, + "rejected_geometric_mean": -4.796231269836426, + "step": 1645 + }, + { + "chosen_geometric_mean": -2.411397695541382, + "epoch": 0.41, + "grad_norm": 43.5, + "learning_rate": 4.509648585346802e-06, + "log_odds": 4.547672271728516, + "log_odds_ratio": -0.2426597625017166, + "loss": 0.3678, + "rejected_geometric_mean": -6.816060543060303, + "step": 1646 + }, + { + "chosen_geometric_mean": -1.0068113803863525, + "epoch": 0.41, + "grad_norm": 7.625, + "learning_rate": 4.509069392360561e-06, + "log_odds": 2.1998183727264404, + "log_odds_ratio": -0.36266371607780457, + "loss": 0.3328, + "rejected_geometric_mean": -3.0315401554107666, + "step": 1647 + }, + { + "chosen_geometric_mean": -1.2168446779251099, + "epoch": 0.41, + "grad_norm": 15.25, + "learning_rate": 4.508489894751176e-06, + "log_odds": 1.1977919340133667, + "log_odds_ratio": -0.2919588088989258, + "loss": 0.2432, + "rejected_geometric_mean": -2.2051148414611816, + "step": 1648 + }, + { + "chosen_geometric_mean": -1.0320367813110352, + "epoch": 0.41, + "grad_norm": 19.375, + "learning_rate": 4.50791009260651e-06, + "log_odds": 8.67586898803711, + "log_odds_ratio": -0.04311022534966469, + "loss": 0.3282, + "rejected_geometric_mean": -9.277700424194336, + "step": 1649 + }, + { + "chosen_geometric_mean": -1.1712040901184082, + "epoch": 0.41, + "grad_norm": 12.875, + "learning_rate": 4.507329986014477e-06, + "log_odds": 1.1947253942489624, + "log_odds_ratio": -0.452790230512619, + "loss": 0.3229, + "rejected_geometric_mean": -2.2007181644439697, + "step": 1650 + }, + { + "chosen_geometric_mean": -1.091226577758789, + "epoch": 0.41, + "grad_norm": 4.78125, + "learning_rate": 4.506749575063033e-06, + "log_odds": 1.5357229709625244, + "log_odds_ratio": -0.31847673654556274, + "loss": 0.2882, + "rejected_geometric_mean": -2.385561227798462, + "step": 1651 + }, + { + "chosen_geometric_mean": -0.9074312448501587, + "epoch": 0.41, + "grad_norm": 2.90625, + "learning_rate": 4.506168859840185e-06, + "log_odds": 1.375478744506836, + "log_odds_ratio": -0.3854416310787201, + "loss": 0.2862, + "rejected_geometric_mean": -2.044491767883301, + "step": 1652 + }, + { + "chosen_geometric_mean": -1.2170590162277222, + "epoch": 0.41, + "grad_norm": 6.09375, + "learning_rate": 4.505587840433981e-06, + "log_odds": 1.0524159669876099, + "log_odds_ratio": -0.5431352853775024, + "loss": 0.2668, + "rejected_geometric_mean": -2.233792781829834, + "step": 1653 + }, + { + "chosen_geometric_mean": -1.4749287366867065, + "epoch": 0.41, + "grad_norm": 15.0, + "learning_rate": 4.505006516932519e-06, + "log_odds": 1.03016996383667, + "log_odds_ratio": -0.32739076018333435, + "loss": 0.29, + "rejected_geometric_mean": -2.352915048599243, + "step": 1654 + }, + { + "chosen_geometric_mean": -1.2253797054290771, + "epoch": 0.41, + "grad_norm": 16.0, + "learning_rate": 4.504424889423941e-06, + "log_odds": 1.2679625749588013, + "log_odds_ratio": -0.35316002368927, + "loss": 0.3692, + "rejected_geometric_mean": -2.3020083904266357, + "step": 1655 + }, + { + "chosen_geometric_mean": -1.2314448356628418, + "epoch": 0.41, + "grad_norm": 3.765625, + "learning_rate": 4.503842957996436e-06, + "log_odds": 4.182364463806152, + "log_odds_ratio": -0.1590726524591446, + "loss": 0.3419, + "rejected_geometric_mean": -5.169227123260498, + "step": 1656 + }, + { + "chosen_geometric_mean": -1.2106177806854248, + "epoch": 0.41, + "grad_norm": 13.25, + "learning_rate": 4.503260722738238e-06, + "log_odds": 0.1581992208957672, + "log_odds_ratio": -0.6236296892166138, + "loss": 0.3228, + "rejected_geometric_mean": -1.3177757263183594, + "step": 1657 + }, + { + "chosen_geometric_mean": -1.1715977191925049, + "epoch": 0.41, + "grad_norm": 4.90625, + "learning_rate": 4.502678183737629e-06, + "log_odds": 1.6028093099594116, + "log_odds_ratio": -0.37998515367507935, + "loss": 0.2901, + "rejected_geometric_mean": -2.592684268951416, + "step": 1658 + }, + { + "chosen_geometric_mean": -1.115199089050293, + "epoch": 0.41, + "grad_norm": 12.25, + "learning_rate": 4.502095341082934e-06, + "log_odds": 1.5652155876159668, + "log_odds_ratio": -0.26948004961013794, + "loss": 0.298, + "rejected_geometric_mean": -2.4187281131744385, + "step": 1659 + }, + { + "chosen_geometric_mean": -1.1455886363983154, + "epoch": 0.41, + "grad_norm": 14.0, + "learning_rate": 4.5015121948625286e-06, + "log_odds": 3.2879114151000977, + "log_odds_ratio": -0.2167787104845047, + "loss": 0.3188, + "rejected_geometric_mean": -4.173910140991211, + "step": 1660 + }, + { + "chosen_geometric_mean": -1.3614048957824707, + "epoch": 0.41, + "grad_norm": 7.46875, + "learning_rate": 4.500928745164828e-06, + "log_odds": 0.3017843961715698, + "log_odds_ratio": -0.5612783432006836, + "loss": 0.4061, + "rejected_geometric_mean": -1.6001826524734497, + "step": 1661 + }, + { + "chosen_geometric_mean": -1.2133007049560547, + "epoch": 0.41, + "grad_norm": 4.1875, + "learning_rate": 4.500344992078302e-06, + "log_odds": 3.2838456630706787, + "log_odds_ratio": -0.31248676776885986, + "loss": 0.2454, + "rejected_geometric_mean": -4.320638656616211, + "step": 1662 + }, + { + "chosen_geometric_mean": -0.8183406591415405, + "epoch": 0.41, + "grad_norm": 4.6875, + "learning_rate": 4.499760935691458e-06, + "log_odds": 2.207101821899414, + "log_odds_ratio": -0.27805620431900024, + "loss": 0.2914, + "rejected_geometric_mean": -2.6950459480285645, + "step": 1663 + }, + { + "chosen_geometric_mean": -1.0067418813705444, + "epoch": 0.41, + "grad_norm": 18.125, + "learning_rate": 4.499176576092855e-06, + "log_odds": 5.115880489349365, + "log_odds_ratio": -0.04301251098513603, + "loss": 0.2652, + "rejected_geometric_mean": -5.6495866775512695, + "step": 1664 + }, + { + "chosen_geometric_mean": -1.272212028503418, + "epoch": 0.41, + "grad_norm": 16.5, + "learning_rate": 4.498591913371094e-06, + "log_odds": 4.063713073730469, + "log_odds_ratio": -0.1706899106502533, + "loss": 0.3176, + "rejected_geometric_mean": -5.0870513916015625, + "step": 1665 + }, + { + "chosen_geometric_mean": -1.0915759801864624, + "epoch": 0.41, + "grad_norm": 2.125, + "learning_rate": 4.498006947614825e-06, + "log_odds": 2.3636670112609863, + "log_odds_ratio": -0.25658899545669556, + "loss": 0.3346, + "rejected_geometric_mean": -3.2071096897125244, + "step": 1666 + }, + { + "chosen_geometric_mean": -1.203467845916748, + "epoch": 0.41, + "grad_norm": 3.6875, + "learning_rate": 4.497421678912742e-06, + "log_odds": 1.3089503049850464, + "log_odds_ratio": -0.36446413397789, + "loss": 0.2979, + "rejected_geometric_mean": -2.35046648979187, + "step": 1667 + }, + { + "chosen_geometric_mean": -1.0052852630615234, + "epoch": 0.41, + "grad_norm": 18.125, + "learning_rate": 4.4968361073535885e-06, + "log_odds": 0.3242972791194916, + "log_odds_ratio": -0.5678144693374634, + "loss": 0.3386, + "rejected_geometric_mean": -1.2299678325653076, + "step": 1668 + }, + { + "chosen_geometric_mean": -0.9772685766220093, + "epoch": 0.41, + "grad_norm": 2.328125, + "learning_rate": 4.496250233026148e-06, + "log_odds": 3.498095989227295, + "log_odds_ratio": -0.2594791650772095, + "loss": 0.3499, + "rejected_geometric_mean": -4.152459144592285, + "step": 1669 + }, + { + "chosen_geometric_mean": -1.0148102045059204, + "epoch": 0.41, + "grad_norm": 13.125, + "learning_rate": 4.495664056019254e-06, + "log_odds": 2.974233865737915, + "log_odds_ratio": -0.28398606181144714, + "loss": 0.276, + "rejected_geometric_mean": -3.6499433517456055, + "step": 1670 + }, + { + "chosen_geometric_mean": -1.0571496486663818, + "epoch": 0.41, + "grad_norm": 2.265625, + "learning_rate": 4.4950775764217855e-06, + "log_odds": 2.731513738632202, + "log_odds_ratio": -0.4611281156539917, + "loss": 0.3437, + "rejected_geometric_mean": -3.632183313369751, + "step": 1671 + }, + { + "chosen_geometric_mean": -0.9455985426902771, + "epoch": 0.41, + "grad_norm": 3.75, + "learning_rate": 4.494490794322667e-06, + "log_odds": 2.399959087371826, + "log_odds_ratio": -0.17377495765686035, + "loss": 0.2969, + "rejected_geometric_mean": -2.940415859222412, + "step": 1672 + }, + { + "chosen_geometric_mean": -1.1795002222061157, + "epoch": 0.41, + "grad_norm": 3.5625, + "learning_rate": 4.493903709810869e-06, + "log_odds": 3.745828628540039, + "log_odds_ratio": -0.3840811252593994, + "loss": 0.3661, + "rejected_geometric_mean": -4.730765342712402, + "step": 1673 + }, + { + "chosen_geometric_mean": -1.19760262966156, + "epoch": 0.41, + "grad_norm": 2.296875, + "learning_rate": 4.493316322975406e-06, + "log_odds": 2.449734926223755, + "log_odds_ratio": -0.33356615900993347, + "loss": 0.3675, + "rejected_geometric_mean": -3.4397661685943604, + "step": 1674 + }, + { + "chosen_geometric_mean": -1.3990161418914795, + "epoch": 0.41, + "grad_norm": 5.4375, + "learning_rate": 4.492728633905342e-06, + "log_odds": 1.9114878177642822, + "log_odds_ratio": -0.22613121569156647, + "loss": 0.2913, + "rejected_geometric_mean": -3.090989351272583, + "step": 1675 + }, + { + "chosen_geometric_mean": -1.132455587387085, + "epoch": 0.41, + "grad_norm": 10.1875, + "learning_rate": 4.492140642689784e-06, + "log_odds": 2.544268846511841, + "log_odds_ratio": -0.3841296434402466, + "loss": 0.2929, + "rejected_geometric_mean": -3.4673383235931396, + "step": 1676 + }, + { + "chosen_geometric_mean": -1.030240774154663, + "epoch": 0.42, + "grad_norm": 5.0, + "learning_rate": 4.491552349417885e-06, + "log_odds": 3.8783798217773438, + "log_odds_ratio": -0.3071936368942261, + "loss": 0.4047, + "rejected_geometric_mean": -4.605748176574707, + "step": 1677 + }, + { + "chosen_geometric_mean": -1.425516128540039, + "epoch": 0.42, + "grad_norm": 33.5, + "learning_rate": 4.4909637541788445e-06, + "log_odds": 1.9329509735107422, + "log_odds_ratio": -0.38468295335769653, + "loss": 0.3486, + "rejected_geometric_mean": -3.27642560005188, + "step": 1678 + }, + { + "chosen_geometric_mean": -1.1769747734069824, + "epoch": 0.42, + "grad_norm": 46.25, + "learning_rate": 4.490374857061908e-06, + "log_odds": 3.9465250968933105, + "log_odds_ratio": -0.25081443786621094, + "loss": 0.3699, + "rejected_geometric_mean": -4.852395534515381, + "step": 1679 + }, + { + "chosen_geometric_mean": -1.0271446704864502, + "epoch": 0.42, + "grad_norm": 22.875, + "learning_rate": 4.489785658156366e-06, + "log_odds": 6.447461128234863, + "log_odds_ratio": -0.16596725583076477, + "loss": 0.3262, + "rejected_geometric_mean": -7.121992588043213, + "step": 1680 + }, + { + "chosen_geometric_mean": -1.3144495487213135, + "epoch": 0.42, + "grad_norm": 176.0, + "learning_rate": 4.489196157551556e-06, + "log_odds": 13.252542495727539, + "log_odds_ratio": -0.017093846574425697, + "loss": 0.3323, + "rejected_geometric_mean": -14.248795509338379, + "step": 1681 + }, + { + "chosen_geometric_mean": -1.2697116136550903, + "epoch": 0.42, + "grad_norm": 37.0, + "learning_rate": 4.48860635533686e-06, + "log_odds": 8.844267845153809, + "log_odds_ratio": -0.3254148066043854, + "loss": 0.2836, + "rejected_geometric_mean": -9.9234619140625, + "step": 1682 + }, + { + "chosen_geometric_mean": -1.2864327430725098, + "epoch": 0.42, + "grad_norm": 8.3125, + "learning_rate": 4.488016251601707e-06, + "log_odds": 0.36969929933547974, + "log_odds_ratio": -0.5368117690086365, + "loss": 0.2795, + "rejected_geometric_mean": -1.560673713684082, + "step": 1683 + }, + { + "chosen_geometric_mean": -1.1820378303527832, + "epoch": 0.42, + "grad_norm": 5.3125, + "learning_rate": 4.487425846435569e-06, + "log_odds": 5.286905765533447, + "log_odds_ratio": -0.132748544216156, + "loss": 0.2544, + "rejected_geometric_mean": -6.125870704650879, + "step": 1684 + }, + { + "chosen_geometric_mean": -0.992956817150116, + "epoch": 0.42, + "grad_norm": 7.53125, + "learning_rate": 4.486835139927967e-06, + "log_odds": 7.844699859619141, + "log_odds_ratio": -0.17351648211479187, + "loss": 0.2926, + "rejected_geometric_mean": -8.502686500549316, + "step": 1685 + }, + { + "chosen_geometric_mean": -0.9973886609077454, + "epoch": 0.42, + "grad_norm": 62.25, + "learning_rate": 4.4862441321684655e-06, + "log_odds": 0.8150268197059631, + "log_odds_ratio": -0.46987056732177734, + "loss": 0.4022, + "rejected_geometric_mean": -1.650924563407898, + "step": 1686 + }, + { + "chosen_geometric_mean": -1.0202966928482056, + "epoch": 0.42, + "grad_norm": 4.0625, + "learning_rate": 4.485652823246677e-06, + "log_odds": 2.098623752593994, + "log_odds_ratio": -0.4568670690059662, + "loss": 0.3056, + "rejected_geometric_mean": -2.9421238899230957, + "step": 1687 + }, + { + "chosen_geometric_mean": -1.2265095710754395, + "epoch": 0.42, + "grad_norm": 3.640625, + "learning_rate": 4.485061213252256e-06, + "log_odds": 2.4724550247192383, + "log_odds_ratio": -0.20419961214065552, + "loss": 0.3314, + "rejected_geometric_mean": -3.4296467304229736, + "step": 1688 + }, + { + "chosen_geometric_mean": -1.1314237117767334, + "epoch": 0.42, + "grad_norm": 4.6875, + "learning_rate": 4.484469302274906e-06, + "log_odds": 5.744166374206543, + "log_odds_ratio": -0.26544588804244995, + "loss": 0.2582, + "rejected_geometric_mean": -6.657821178436279, + "step": 1689 + }, + { + "chosen_geometric_mean": -0.9981706738471985, + "epoch": 0.42, + "grad_norm": 7.46875, + "learning_rate": 4.483877090404375e-06, + "log_odds": 3.914879322052002, + "log_odds_ratio": -0.3036194443702698, + "loss": 0.3055, + "rejected_geometric_mean": -4.6280598640441895, + "step": 1690 + }, + { + "chosen_geometric_mean": -1.0182392597198486, + "epoch": 0.42, + "grad_norm": 3.6875, + "learning_rate": 4.483284577730457e-06, + "log_odds": 6.1497321128845215, + "log_odds_ratio": -0.43530911207199097, + "loss": 0.273, + "rejected_geometric_mean": -7.060674667358398, + "step": 1691 + }, + { + "chosen_geometric_mean": -1.3674488067626953, + "epoch": 0.42, + "grad_norm": 11.875, + "learning_rate": 4.482691764342989e-06, + "log_odds": 0.27903759479522705, + "log_odds_ratio": -0.5872141122817993, + "loss": 0.4023, + "rejected_geometric_mean": -1.6124038696289062, + "step": 1692 + }, + { + "chosen_geometric_mean": -1.0316849946975708, + "epoch": 0.42, + "grad_norm": 9.0625, + "learning_rate": 4.482098650331857e-06, + "log_odds": 3.6284098625183105, + "log_odds_ratio": -0.10913404077291489, + "loss": 0.3021, + "rejected_geometric_mean": -4.290831089019775, + "step": 1693 + }, + { + "chosen_geometric_mean": -1.2930389642715454, + "epoch": 0.42, + "grad_norm": 8.0625, + "learning_rate": 4.481505235786993e-06, + "log_odds": 2.9777252674102783, + "log_odds_ratio": -0.20800158381462097, + "loss": 0.3183, + "rejected_geometric_mean": -4.012582302093506, + "step": 1694 + }, + { + "chosen_geometric_mean": -1.0265824794769287, + "epoch": 0.42, + "grad_norm": 9.5, + "learning_rate": 4.480911520798371e-06, + "log_odds": 6.784676551818848, + "log_odds_ratio": -0.05942055583000183, + "loss": 0.3098, + "rejected_geometric_mean": -7.387750148773193, + "step": 1695 + }, + { + "chosen_geometric_mean": -1.6074573993682861, + "epoch": 0.42, + "grad_norm": 22.75, + "learning_rate": 4.4803175054560125e-06, + "log_odds": 4.178095817565918, + "log_odds_ratio": -0.1538720279932022, + "loss": 0.2953, + "rejected_geometric_mean": -5.5959343910217285, + "step": 1696 + }, + { + "chosen_geometric_mean": -1.4221638441085815, + "epoch": 0.42, + "grad_norm": 48.5, + "learning_rate": 4.479723189849985e-06, + "log_odds": 8.523834228515625, + "log_odds_ratio": -0.02814912050962448, + "loss": 0.3814, + "rejected_geometric_mean": -9.621992111206055, + "step": 1697 + }, + { + "chosen_geometric_mean": -1.160250186920166, + "epoch": 0.42, + "grad_norm": 16.75, + "learning_rate": 4.479128574070401e-06, + "log_odds": 5.0708909034729, + "log_odds_ratio": -0.23230311274528503, + "loss": 0.3174, + "rejected_geometric_mean": -5.955091953277588, + "step": 1698 + }, + { + "chosen_geometric_mean": -1.1658719778060913, + "epoch": 0.42, + "grad_norm": 10.375, + "learning_rate": 4.478533658207417e-06, + "log_odds": 6.492591381072998, + "log_odds_ratio": -0.05362777039408684, + "loss": 0.2975, + "rejected_geometric_mean": -7.276548862457275, + "step": 1699 + }, + { + "chosen_geometric_mean": -1.3881264925003052, + "epoch": 0.42, + "grad_norm": 4.4375, + "learning_rate": 4.4779384423512386e-06, + "log_odds": 0.6386615037918091, + "log_odds_ratio": -0.5354256629943848, + "loss": 0.374, + "rejected_geometric_mean": -1.9887018203735352, + "step": 1700 + }, + { + "chosen_geometric_mean": -0.965758740901947, + "epoch": 0.42, + "grad_norm": 9.4375, + "learning_rate": 4.4773429265921145e-06, + "log_odds": 5.074653625488281, + "log_odds_ratio": -0.047143980860710144, + "loss": 0.2925, + "rejected_geometric_mean": -5.543351650238037, + "step": 1701 + }, + { + "chosen_geometric_mean": -1.1381464004516602, + "epoch": 0.42, + "grad_norm": 7.90625, + "learning_rate": 4.476747111020338e-06, + "log_odds": 3.631361961364746, + "log_odds_ratio": -0.3429141044616699, + "loss": 0.3153, + "rejected_geometric_mean": -4.532959461212158, + "step": 1702 + }, + { + "chosen_geometric_mean": -1.0212671756744385, + "epoch": 0.42, + "grad_norm": 2.625, + "learning_rate": 4.476150995726251e-06, + "log_odds": 0.34629613161087036, + "log_odds_ratio": -0.5369685888290405, + "loss": 0.2951, + "rejected_geometric_mean": -1.254062294960022, + "step": 1703 + }, + { + "chosen_geometric_mean": -1.036257028579712, + "epoch": 0.42, + "grad_norm": 8.6875, + "learning_rate": 4.475554580800237e-06, + "log_odds": 2.89467453956604, + "log_odds_ratio": -0.372158020734787, + "loss": 0.33, + "rejected_geometric_mean": -3.712972640991211, + "step": 1704 + }, + { + "chosen_geometric_mean": -1.1516742706298828, + "epoch": 0.42, + "grad_norm": 3.53125, + "learning_rate": 4.474957866332728e-06, + "log_odds": 1.562072992324829, + "log_odds_ratio": -0.5692834258079529, + "loss": 0.3119, + "rejected_geometric_mean": -2.6468348503112793, + "step": 1705 + }, + { + "chosen_geometric_mean": -1.3139824867248535, + "epoch": 0.42, + "grad_norm": 2.140625, + "learning_rate": 4.474360852414198e-06, + "log_odds": 2.6921422481536865, + "log_odds_ratio": -0.3313302993774414, + "loss": 0.2818, + "rejected_geometric_mean": -3.8232264518737793, + "step": 1706 + }, + { + "chosen_geometric_mean": -1.2470418214797974, + "epoch": 0.42, + "grad_norm": 2.890625, + "learning_rate": 4.473763539135172e-06, + "log_odds": 4.378767967224121, + "log_odds_ratio": -0.17180167138576508, + "loss": 0.3359, + "rejected_geometric_mean": -5.363852500915527, + "step": 1707 + }, + { + "chosen_geometric_mean": -1.2935371398925781, + "epoch": 0.42, + "grad_norm": 3.03125, + "learning_rate": 4.473165926586215e-06, + "log_odds": 2.7644765377044678, + "log_odds_ratio": -0.35845044255256653, + "loss": 0.3196, + "rejected_geometric_mean": -3.9227349758148193, + "step": 1708 + }, + { + "chosen_geometric_mean": -1.3507990837097168, + "epoch": 0.42, + "grad_norm": 4.6875, + "learning_rate": 4.47256801485794e-06, + "log_odds": 4.0310444831848145, + "log_odds_ratio": -0.36351293325424194, + "loss": 0.3602, + "rejected_geometric_mean": -5.237612724304199, + "step": 1709 + }, + { + "chosen_geometric_mean": -1.0609489679336548, + "epoch": 0.42, + "grad_norm": 19.75, + "learning_rate": 4.471969804041004e-06, + "log_odds": 1.2843114137649536, + "log_odds_ratio": -0.26125115156173706, + "loss": 0.3138, + "rejected_geometric_mean": -2.0697832107543945, + "step": 1710 + }, + { + "chosen_geometric_mean": -1.1747770309448242, + "epoch": 0.42, + "grad_norm": 4.03125, + "learning_rate": 4.4713712942261116e-06, + "log_odds": 1.0887866020202637, + "log_odds_ratio": -0.35276222229003906, + "loss": 0.2993, + "rejected_geometric_mean": -2.0655834674835205, + "step": 1711 + }, + { + "chosen_geometric_mean": -1.219056487083435, + "epoch": 0.42, + "grad_norm": 18.25, + "learning_rate": 4.470772485504009e-06, + "log_odds": 11.266207695007324, + "log_odds_ratio": -0.07306304574012756, + "loss": 0.2621, + "rejected_geometric_mean": -12.16739273071289, + "step": 1712 + }, + { + "chosen_geometric_mean": -1.1125916242599487, + "epoch": 0.42, + "grad_norm": 14.5, + "learning_rate": 4.4701733779654925e-06, + "log_odds": 1.5257010459899902, + "log_odds_ratio": -0.3713216483592987, + "loss": 0.3129, + "rejected_geometric_mean": -2.4309985637664795, + "step": 1713 + }, + { + "chosen_geometric_mean": -1.652889370918274, + "epoch": 0.42, + "grad_norm": 9.8125, + "learning_rate": 4.4695739717014e-06, + "log_odds": 2.7010927200317383, + "log_odds_ratio": -0.2446155548095703, + "loss": 0.3273, + "rejected_geometric_mean": -4.198429584503174, + "step": 1714 + }, + { + "chosen_geometric_mean": -1.182380199432373, + "epoch": 0.42, + "grad_norm": 15.25, + "learning_rate": 4.468974266802616e-06, + "log_odds": 7.860272407531738, + "log_odds_ratio": -0.09340207278728485, + "loss": 0.2541, + "rejected_geometric_mean": -8.727434158325195, + "step": 1715 + }, + { + "chosen_geometric_mean": -1.0962181091308594, + "epoch": 0.42, + "grad_norm": 7.0625, + "learning_rate": 4.468374263360069e-06, + "log_odds": 3.163560390472412, + "log_odds_ratio": -0.22261080145835876, + "loss": 0.2933, + "rejected_geometric_mean": -3.9314138889312744, + "step": 1716 + }, + { + "chosen_geometric_mean": -0.8602861166000366, + "epoch": 0.43, + "grad_norm": 8.375, + "learning_rate": 4.467773961464735e-06, + "log_odds": 2.702608108520508, + "log_odds_ratio": -0.2683190703392029, + "loss": 0.2632, + "rejected_geometric_mean": -3.2204928398132324, + "step": 1717 + }, + { + "chosen_geometric_mean": -0.8830393552780151, + "epoch": 0.43, + "grad_norm": 5.84375, + "learning_rate": 4.467173361207635e-06, + "log_odds": 5.2174906730651855, + "log_odds_ratio": -0.3199913501739502, + "loss": 0.325, + "rejected_geometric_mean": -5.810677528381348, + "step": 1718 + }, + { + "chosen_geometric_mean": -1.098467469215393, + "epoch": 0.43, + "grad_norm": 20.5, + "learning_rate": 4.466572462679834e-06, + "log_odds": 2.3003525733947754, + "log_odds_ratio": -0.32257285714149475, + "loss": 0.3546, + "rejected_geometric_mean": -3.150150775909424, + "step": 1719 + }, + { + "chosen_geometric_mean": -1.0247961282730103, + "epoch": 0.43, + "grad_norm": 7.5, + "learning_rate": 4.465971265972442e-06, + "log_odds": 2.3783695697784424, + "log_odds_ratio": -0.22815018892288208, + "loss": 0.3086, + "rejected_geometric_mean": -3.071897506713867, + "step": 1720 + }, + { + "chosen_geometric_mean": -1.1808490753173828, + "epoch": 0.43, + "grad_norm": 9.375, + "learning_rate": 4.465369771176614e-06, + "log_odds": 1.8581727743148804, + "log_odds_ratio": -0.2620849013328552, + "loss": 0.3659, + "rejected_geometric_mean": -2.8063619136810303, + "step": 1721 + }, + { + "chosen_geometric_mean": -1.0587539672851562, + "epoch": 0.43, + "grad_norm": 5.4375, + "learning_rate": 4.464767978383553e-06, + "log_odds": 2.7416532039642334, + "log_odds_ratio": -0.2659454345703125, + "loss": 0.3193, + "rejected_geometric_mean": -3.486396312713623, + "step": 1722 + }, + { + "chosen_geometric_mean": -1.1785658597946167, + "epoch": 0.43, + "grad_norm": 2.65625, + "learning_rate": 4.464165887684504e-06, + "log_odds": 3.9498214721679688, + "log_odds_ratio": -0.42074188590049744, + "loss": 0.3283, + "rejected_geometric_mean": -4.995942115783691, + "step": 1723 + }, + { + "chosen_geometric_mean": -1.2353434562683105, + "epoch": 0.43, + "grad_norm": 2.734375, + "learning_rate": 4.463563499170759e-06, + "log_odds": 3.595909833908081, + "log_odds_ratio": -0.26204901933670044, + "loss": 0.3105, + "rejected_geometric_mean": -4.607703685760498, + "step": 1724 + }, + { + "chosen_geometric_mean": -1.1669189929962158, + "epoch": 0.43, + "grad_norm": 48.75, + "learning_rate": 4.462960812933654e-06, + "log_odds": 2.855742931365967, + "log_odds_ratio": -0.31771382689476013, + "loss": 0.4078, + "rejected_geometric_mean": -3.827603816986084, + "step": 1725 + }, + { + "chosen_geometric_mean": -1.0881567001342773, + "epoch": 0.43, + "grad_norm": 5.0, + "learning_rate": 4.462357829064571e-06, + "log_odds": 2.9997243881225586, + "log_odds_ratio": -0.1627756953239441, + "loss": 0.3027, + "rejected_geometric_mean": -3.7610273361206055, + "step": 1726 + }, + { + "chosen_geometric_mean": -1.1306133270263672, + "epoch": 0.43, + "grad_norm": 2.390625, + "learning_rate": 4.4617545476549375e-06, + "log_odds": 0.5505412817001343, + "log_odds_ratio": -0.4846646785736084, + "loss": 0.3226, + "rejected_geometric_mean": -1.5304057598114014, + "step": 1727 + }, + { + "chosen_geometric_mean": -0.9175443649291992, + "epoch": 0.43, + "grad_norm": 12.0625, + "learning_rate": 4.461150968796224e-06, + "log_odds": 2.1917152404785156, + "log_odds_ratio": -0.4306556284427643, + "loss": 0.313, + "rejected_geometric_mean": -2.8179547786712646, + "step": 1728 + }, + { + "chosen_geometric_mean": -1.3542354106903076, + "epoch": 0.43, + "grad_norm": 11.875, + "learning_rate": 4.460547092579949e-06, + "log_odds": 3.5941720008850098, + "log_odds_ratio": -0.2912854254245758, + "loss": 0.4048, + "rejected_geometric_mean": -4.779589653015137, + "step": 1729 + }, + { + "chosen_geometric_mean": -0.9577183723449707, + "epoch": 0.43, + "grad_norm": 4.53125, + "learning_rate": 4.459942919097673e-06, + "log_odds": 9.546819686889648, + "log_odds_ratio": -0.25769510865211487, + "loss": 0.2805, + "rejected_geometric_mean": -10.21430778503418, + "step": 1730 + }, + { + "chosen_geometric_mean": -1.2614314556121826, + "epoch": 0.43, + "grad_norm": 3.796875, + "learning_rate": 4.4593384484410055e-06, + "log_odds": 0.8108349442481995, + "log_odds_ratio": -0.48460495471954346, + "loss": 0.2943, + "rejected_geometric_mean": -1.9701321125030518, + "step": 1731 + }, + { + "chosen_geometric_mean": -0.8767555952072144, + "epoch": 0.43, + "grad_norm": 2.578125, + "learning_rate": 4.458733680701596e-06, + "log_odds": 5.175412654876709, + "log_odds_ratio": -0.15945133566856384, + "loss": 0.2991, + "rejected_geometric_mean": -5.627267837524414, + "step": 1732 + }, + { + "chosen_geometric_mean": -1.00462806224823, + "epoch": 0.43, + "grad_norm": 18.0, + "learning_rate": 4.458128615971144e-06, + "log_odds": 0.3329022526741028, + "log_odds_ratio": -0.5937553644180298, + "loss": 0.3542, + "rejected_geometric_mean": -1.2881460189819336, + "step": 1733 + }, + { + "chosen_geometric_mean": -0.9510040283203125, + "epoch": 0.43, + "grad_norm": 42.5, + "learning_rate": 4.457523254341391e-06, + "log_odds": 5.743760108947754, + "log_odds_ratio": -0.17431454360485077, + "loss": 0.4384, + "rejected_geometric_mean": -6.304104804992676, + "step": 1734 + }, + { + "chosen_geometric_mean": -1.5093517303466797, + "epoch": 0.43, + "grad_norm": 5.90625, + "learning_rate": 4.456917595904125e-06, + "log_odds": 0.7076570987701416, + "log_odds_ratio": -0.45117729902267456, + "loss": 0.2794, + "rejected_geometric_mean": -2.1320345401763916, + "step": 1735 + }, + { + "chosen_geometric_mean": -1.2839957475662231, + "epoch": 0.43, + "grad_norm": 21.375, + "learning_rate": 4.456311640751177e-06, + "log_odds": 3.4819746017456055, + "log_odds_ratio": -0.15391945838928223, + "loss": 0.3571, + "rejected_geometric_mean": -4.5089240074157715, + "step": 1736 + }, + { + "chosen_geometric_mean": -0.9667969346046448, + "epoch": 0.43, + "grad_norm": 25.875, + "learning_rate": 4.455705388974425e-06, + "log_odds": 5.931650161743164, + "log_odds_ratio": -0.0490480475127697, + "loss": 0.3237, + "rejected_geometric_mean": -6.448597431182861, + "step": 1737 + }, + { + "chosen_geometric_mean": -1.1576240062713623, + "epoch": 0.43, + "grad_norm": 57.0, + "learning_rate": 4.4550988406657916e-06, + "log_odds": 3.3285257816314697, + "log_odds_ratio": -0.10888881236314774, + "loss": 0.3343, + "rejected_geometric_mean": -4.107925891876221, + "step": 1738 + }, + { + "chosen_geometric_mean": -1.240220308303833, + "epoch": 0.43, + "grad_norm": 3.21875, + "learning_rate": 4.454491995917244e-06, + "log_odds": 2.5856986045837402, + "log_odds_ratio": -0.46738871932029724, + "loss": 0.3246, + "rejected_geometric_mean": -3.676677703857422, + "step": 1739 + }, + { + "chosen_geometric_mean": -1.0423099994659424, + "epoch": 0.43, + "grad_norm": 27.25, + "learning_rate": 4.4538848548207946e-06, + "log_odds": 4.484564781188965, + "log_odds_ratio": -0.2260856032371521, + "loss": 0.3644, + "rejected_geometric_mean": -5.212161064147949, + "step": 1740 + }, + { + "chosen_geometric_mean": -1.0291565656661987, + "epoch": 0.43, + "grad_norm": 8.0625, + "learning_rate": 4.4532774174685e-06, + "log_odds": 1.490378737449646, + "log_odds_ratio": -0.3553997278213501, + "loss": 0.2568, + "rejected_geometric_mean": -2.2095589637756348, + "step": 1741 + }, + { + "chosen_geometric_mean": -1.3697069883346558, + "epoch": 0.43, + "grad_norm": 10.75, + "learning_rate": 4.452669683952462e-06, + "log_odds": 1.2950778007507324, + "log_odds_ratio": -0.3506712317466736, + "loss": 0.3358, + "rejected_geometric_mean": -2.531355142593384, + "step": 1742 + }, + { + "chosen_geometric_mean": -1.0047390460968018, + "epoch": 0.43, + "grad_norm": 2.953125, + "learning_rate": 4.452061654364829e-06, + "log_odds": 1.5027607679367065, + "log_odds_ratio": -0.4173772931098938, + "loss": 0.3848, + "rejected_geometric_mean": -2.3016762733459473, + "step": 1743 + }, + { + "chosen_geometric_mean": -1.246733546257019, + "epoch": 0.43, + "grad_norm": 11.4375, + "learning_rate": 4.451453328797792e-06, + "log_odds": 6.979783535003662, + "log_odds_ratio": -0.44326263666152954, + "loss": 0.3284, + "rejected_geometric_mean": -8.116333961486816, + "step": 1744 + }, + { + "chosen_geometric_mean": -0.9366618990898132, + "epoch": 0.43, + "grad_norm": 3.1875, + "learning_rate": 4.450844707343588e-06, + "log_odds": 2.4201619625091553, + "log_odds_ratio": -0.39206433296203613, + "loss": 0.2877, + "rejected_geometric_mean": -3.13236927986145, + "step": 1745 + }, + { + "chosen_geometric_mean": -1.2008206844329834, + "epoch": 0.43, + "grad_norm": 6.65625, + "learning_rate": 4.450235790094498e-06, + "log_odds": 2.6657204627990723, + "log_odds_ratio": -0.1874188780784607, + "loss": 0.2911, + "rejected_geometric_mean": -3.600532293319702, + "step": 1746 + }, + { + "chosen_geometric_mean": -1.2105214595794678, + "epoch": 0.43, + "grad_norm": 20.875, + "learning_rate": 4.4496265771428494e-06, + "log_odds": 0.7738102078437805, + "log_odds_ratio": -0.49414676427841187, + "loss": 0.3267, + "rejected_geometric_mean": -1.8942160606384277, + "step": 1747 + }, + { + "chosen_geometric_mean": -1.1721147298812866, + "epoch": 0.43, + "grad_norm": 8.6875, + "learning_rate": 4.449017068581013e-06, + "log_odds": 4.050106048583984, + "log_odds_ratio": -0.29170477390289307, + "loss": 0.3307, + "rejected_geometric_mean": -5.015154838562012, + "step": 1748 + }, + { + "chosen_geometric_mean": -0.8148590326309204, + "epoch": 0.43, + "grad_norm": 3.484375, + "learning_rate": 4.448407264501404e-06, + "log_odds": 0.6159048080444336, + "log_odds_ratio": -0.4418785274028778, + "loss": 0.2721, + "rejected_geometric_mean": -1.2136521339416504, + "step": 1749 + }, + { + "chosen_geometric_mean": -1.1368904113769531, + "epoch": 0.43, + "grad_norm": 7.34375, + "learning_rate": 4.4477971649964854e-06, + "log_odds": 1.8881969451904297, + "log_odds_ratio": -0.22136527299880981, + "loss": 0.2605, + "rejected_geometric_mean": -2.718026638031006, + "step": 1750 + }, + { + "chosen_geometric_mean": -1.0999290943145752, + "epoch": 0.43, + "grad_norm": 2.453125, + "learning_rate": 4.4471867701587615e-06, + "log_odds": 3.1070003509521484, + "log_odds_ratio": -0.3680693805217743, + "loss": 0.2928, + "rejected_geometric_mean": -4.0128936767578125, + "step": 1751 + }, + { + "chosen_geometric_mean": -1.1099852323532104, + "epoch": 0.43, + "grad_norm": 6.3125, + "learning_rate": 4.446576080080784e-06, + "log_odds": 7.671482086181641, + "log_odds_ratio": -0.17001861333847046, + "loss": 0.3204, + "rejected_geometric_mean": -8.443986892700195, + "step": 1752 + }, + { + "chosen_geometric_mean": -1.1493602991104126, + "epoch": 0.43, + "grad_norm": 7.6875, + "learning_rate": 4.445965094855146e-06, + "log_odds": 0.7723655104637146, + "log_odds_ratio": -0.42672213912010193, + "loss": 0.3342, + "rejected_geometric_mean": -1.7504565715789795, + "step": 1753 + }, + { + "chosen_geometric_mean": -0.9681916832923889, + "epoch": 0.43, + "grad_norm": 19.0, + "learning_rate": 4.445353814574489e-06, + "log_odds": 6.796178340911865, + "log_odds_ratio": -0.037810057401657104, + "loss": 0.317, + "rejected_geometric_mean": -7.305753231048584, + "step": 1754 + }, + { + "chosen_geometric_mean": -1.1220719814300537, + "epoch": 0.43, + "grad_norm": 8.6875, + "learning_rate": 4.444742239331498e-06, + "log_odds": 3.389387369155884, + "log_odds_ratio": -0.22985613346099854, + "loss": 0.2945, + "rejected_geometric_mean": -4.233686447143555, + "step": 1755 + }, + { + "chosen_geometric_mean": -0.9833804368972778, + "epoch": 0.43, + "grad_norm": 5.4375, + "learning_rate": 4.444130369218902e-06, + "log_odds": 6.8976054191589355, + "log_odds_ratio": -0.11955764889717102, + "loss": 0.2966, + "rejected_geometric_mean": -7.488661289215088, + "step": 1756 + }, + { + "chosen_geometric_mean": -1.4674556255340576, + "epoch": 0.44, + "grad_norm": 18.875, + "learning_rate": 4.443518204329475e-06, + "log_odds": 3.259511709213257, + "log_odds_ratio": -0.13609983026981354, + "loss": 0.3517, + "rejected_geometric_mean": -4.478999137878418, + "step": 1757 + }, + { + "chosen_geometric_mean": -0.9844093322753906, + "epoch": 0.44, + "grad_norm": 8.125, + "learning_rate": 4.442905744756036e-06, + "log_odds": 1.278831124305725, + "log_odds_ratio": -0.29393836855888367, + "loss": 0.2694, + "rejected_geometric_mean": -2.004948854446411, + "step": 1758 + }, + { + "chosen_geometric_mean": -1.1220135688781738, + "epoch": 0.44, + "grad_norm": 9.8125, + "learning_rate": 4.442292990591449e-06, + "log_odds": 0.5452309846878052, + "log_odds_ratio": -0.5270274877548218, + "loss": 0.2964, + "rejected_geometric_mean": -1.5396636724472046, + "step": 1759 + }, + { + "chosen_geometric_mean": -1.2866699695587158, + "epoch": 0.44, + "grad_norm": 6.1875, + "learning_rate": 4.441679941928623e-06, + "log_odds": 5.581765174865723, + "log_odds_ratio": -0.2151518613100052, + "loss": 0.3301, + "rejected_geometric_mean": -6.638415336608887, + "step": 1760 + }, + { + "chosen_geometric_mean": -1.166471004486084, + "epoch": 0.44, + "grad_norm": 5.28125, + "learning_rate": 4.441066598860508e-06, + "log_odds": 2.252816915512085, + "log_odds_ratio": -0.22001619637012482, + "loss": 0.2483, + "rejected_geometric_mean": -3.12988543510437, + "step": 1761 + }, + { + "chosen_geometric_mean": -0.9271672964096069, + "epoch": 0.44, + "grad_norm": 2.734375, + "learning_rate": 4.440452961480105e-06, + "log_odds": 7.514883518218994, + "log_odds_ratio": -0.1434362530708313, + "loss": 0.3496, + "rejected_geometric_mean": -8.021146774291992, + "step": 1762 + }, + { + "chosen_geometric_mean": -1.1460574865341187, + "epoch": 0.44, + "grad_norm": 10.25, + "learning_rate": 4.439839029880454e-06, + "log_odds": 2.2072396278381348, + "log_odds_ratio": -0.2986280620098114, + "loss": 0.3038, + "rejected_geometric_mean": -3.165529727935791, + "step": 1763 + }, + { + "chosen_geometric_mean": -0.9049961566925049, + "epoch": 0.44, + "grad_norm": 15.0, + "learning_rate": 4.439224804154642e-06, + "log_odds": 1.6242263317108154, + "log_odds_ratio": -0.36921218037605286, + "loss": 0.2845, + "rejected_geometric_mean": -2.256193161010742, + "step": 1764 + }, + { + "chosen_geometric_mean": -0.9296051859855652, + "epoch": 0.44, + "grad_norm": 4.09375, + "learning_rate": 4.438610284395801e-06, + "log_odds": 1.9773144721984863, + "log_odds_ratio": -0.26115262508392334, + "loss": 0.2848, + "rejected_geometric_mean": -2.5794785022735596, + "step": 1765 + }, + { + "chosen_geometric_mean": -1.0423940420150757, + "epoch": 0.44, + "grad_norm": 3.0, + "learning_rate": 4.437995470697107e-06, + "log_odds": 4.998732566833496, + "log_odds_ratio": -0.06722893565893173, + "loss": 0.327, + "rejected_geometric_mean": -5.6272993087768555, + "step": 1766 + }, + { + "chosen_geometric_mean": -0.9640398621559143, + "epoch": 0.44, + "grad_norm": 4.375, + "learning_rate": 4.43738036315178e-06, + "log_odds": 2.3290975093841553, + "log_odds_ratio": -0.33323487639427185, + "loss": 0.272, + "rejected_geometric_mean": -3.009286880493164, + "step": 1767 + }, + { + "chosen_geometric_mean": -1.1867023706436157, + "epoch": 0.44, + "grad_norm": 8.375, + "learning_rate": 4.436764961853085e-06, + "log_odds": 3.862685441970825, + "log_odds_ratio": -0.20138174295425415, + "loss": 0.4529, + "rejected_geometric_mean": -4.785499095916748, + "step": 1768 + }, + { + "chosen_geometric_mean": -1.2487409114837646, + "epoch": 0.44, + "grad_norm": 3.046875, + "learning_rate": 4.436149266894332e-06, + "log_odds": 1.8716354370117188, + "log_odds_ratio": -0.4340004324913025, + "loss": 0.2944, + "rejected_geometric_mean": -2.9627418518066406, + "step": 1769 + }, + { + "chosen_geometric_mean": -1.0382736921310425, + "epoch": 0.44, + "grad_norm": 10.0, + "learning_rate": 4.435533278368874e-06, + "log_odds": 0.6222615242004395, + "log_odds_ratio": -0.46296873688697815, + "loss": 0.2946, + "rejected_geometric_mean": -1.514965295791626, + "step": 1770 + }, + { + "chosen_geometric_mean": -1.0294674634933472, + "epoch": 0.44, + "grad_norm": 4.0625, + "learning_rate": 4.434916996370112e-06, + "log_odds": 2.662165641784668, + "log_odds_ratio": -0.14138145744800568, + "loss": 0.2798, + "rejected_geometric_mean": -3.339444875717163, + "step": 1771 + }, + { + "chosen_geometric_mean": -0.9988399147987366, + "epoch": 0.44, + "grad_norm": 16.125, + "learning_rate": 4.434300420991487e-06, + "log_odds": 0.4969213306903839, + "log_odds_ratio": -0.5078740119934082, + "loss": 0.315, + "rejected_geometric_mean": -1.3888201713562012, + "step": 1772 + }, + { + "chosen_geometric_mean": -1.0976604223251343, + "epoch": 0.44, + "grad_norm": 20.25, + "learning_rate": 4.433683552326487e-06, + "log_odds": 4.241073131561279, + "log_odds_ratio": -0.30607378482818604, + "loss": 0.3601, + "rejected_geometric_mean": -5.161480903625488, + "step": 1773 + }, + { + "chosen_geometric_mean": -1.1104776859283447, + "epoch": 0.44, + "grad_norm": 5.46875, + "learning_rate": 4.433066390468646e-06, + "log_odds": 8.141698837280273, + "log_odds_ratio": -0.09050478786230087, + "loss": 0.2537, + "rejected_geometric_mean": -8.871403694152832, + "step": 1774 + }, + { + "chosen_geometric_mean": -1.141558051109314, + "epoch": 0.44, + "grad_norm": 5.96875, + "learning_rate": 4.432448935511537e-06, + "log_odds": 5.835055351257324, + "log_odds_ratio": -0.29396381974220276, + "loss": 0.3002, + "rejected_geometric_mean": -6.711798191070557, + "step": 1775 + }, + { + "chosen_geometric_mean": -1.0381150245666504, + "epoch": 0.44, + "grad_norm": 8.875, + "learning_rate": 4.431831187548785e-06, + "log_odds": 1.4420137405395508, + "log_odds_ratio": -0.36517876386642456, + "loss": 0.2903, + "rejected_geometric_mean": -2.2699878215789795, + "step": 1776 + }, + { + "chosen_geometric_mean": -1.4657204151153564, + "epoch": 0.44, + "grad_norm": 23.875, + "learning_rate": 4.431213146674053e-06, + "log_odds": 4.285562992095947, + "log_odds_ratio": -0.4003090560436249, + "loss": 0.3427, + "rejected_geometric_mean": -5.601140022277832, + "step": 1777 + }, + { + "chosen_geometric_mean": -1.304053544998169, + "epoch": 0.44, + "grad_norm": 42.0, + "learning_rate": 4.43059481298105e-06, + "log_odds": 0.9476728439331055, + "log_odds_ratio": -0.4433993697166443, + "loss": 0.3348, + "rejected_geometric_mean": -2.1192760467529297, + "step": 1778 + }, + { + "chosen_geometric_mean": -1.8316357135772705, + "epoch": 0.44, + "grad_norm": 28.5, + "learning_rate": 4.4299761865635325e-06, + "log_odds": 0.9317483901977539, + "log_odds_ratio": -0.589479923248291, + "loss": 0.4054, + "rejected_geometric_mean": -2.6165263652801514, + "step": 1779 + }, + { + "chosen_geometric_mean": -1.1273659467697144, + "epoch": 0.44, + "grad_norm": 20.5, + "learning_rate": 4.429357267515299e-06, + "log_odds": 0.38301077485084534, + "log_odds_ratio": -0.5243513584136963, + "loss": 0.2868, + "rejected_geometric_mean": -1.4068822860717773, + "step": 1780 + }, + { + "chosen_geometric_mean": -0.8767703771591187, + "epoch": 0.44, + "grad_norm": 3.875, + "learning_rate": 4.42873805593019e-06, + "log_odds": 0.30504170060157776, + "log_odds_ratio": -0.5610967874526978, + "loss": 0.3105, + "rejected_geometric_mean": -1.062912940979004, + "step": 1781 + }, + { + "chosen_geometric_mean": -1.2055145502090454, + "epoch": 0.44, + "grad_norm": 24.625, + "learning_rate": 4.428118551902095e-06, + "log_odds": 0.2992868423461914, + "log_odds_ratio": -0.5850517749786377, + "loss": 0.3403, + "rejected_geometric_mean": -1.436133861541748, + "step": 1782 + }, + { + "chosen_geometric_mean": -1.1021696329116821, + "epoch": 0.44, + "grad_norm": 3.21875, + "learning_rate": 4.427498755524945e-06, + "log_odds": 0.6520952582359314, + "log_odds_ratio": -0.4566084146499634, + "loss": 0.3414, + "rejected_geometric_mean": -1.6000618934631348, + "step": 1783 + }, + { + "chosen_geometric_mean": -0.9512509107589722, + "epoch": 0.44, + "grad_norm": 3.359375, + "learning_rate": 4.426878666892716e-06, + "log_odds": 4.4132843017578125, + "log_odds_ratio": -0.06328742206096649, + "loss": 0.2549, + "rejected_geometric_mean": -4.883922576904297, + "step": 1784 + }, + { + "chosen_geometric_mean": -0.9824396371841431, + "epoch": 0.44, + "grad_norm": 2.3125, + "learning_rate": 4.42625828609943e-06, + "log_odds": 4.8886308670043945, + "log_odds_ratio": -0.14330905675888062, + "loss": 0.2872, + "rejected_geometric_mean": -5.463611125946045, + "step": 1785 + }, + { + "chosen_geometric_mean": -0.9816051721572876, + "epoch": 0.44, + "grad_norm": 4.03125, + "learning_rate": 4.425637613239148e-06, + "log_odds": 0.6994794607162476, + "log_odds_ratio": -0.4709039032459259, + "loss": 0.2909, + "rejected_geometric_mean": -1.5498977899551392, + "step": 1786 + }, + { + "chosen_geometric_mean": -0.9435622692108154, + "epoch": 0.44, + "grad_norm": 4.21875, + "learning_rate": 4.425016648405982e-06, + "log_odds": 0.5520557761192322, + "log_odds_ratio": -0.514534056186676, + "loss": 0.2811, + "rejected_geometric_mean": -1.3412113189697266, + "step": 1787 + }, + { + "chosen_geometric_mean": -1.0569602251052856, + "epoch": 0.44, + "grad_norm": 3.4375, + "learning_rate": 4.4243953916940845e-06, + "log_odds": 5.841507434844971, + "log_odds_ratio": -0.40377193689346313, + "loss": 0.3553, + "rejected_geometric_mean": -6.720778942108154, + "step": 1788 + }, + { + "chosen_geometric_mean": -1.305704951286316, + "epoch": 0.44, + "grad_norm": 4.4375, + "learning_rate": 4.423773843197652e-06, + "log_odds": 8.528519630432129, + "log_odds_ratio": -0.20538704097270966, + "loss": 0.2886, + "rejected_geometric_mean": -9.584900856018066, + "step": 1789 + }, + { + "chosen_geometric_mean": -1.2510063648223877, + "epoch": 0.44, + "grad_norm": 7.40625, + "learning_rate": 4.423152003010927e-06, + "log_odds": 9.341708183288574, + "log_odds_ratio": -0.04560524970293045, + "loss": 0.3333, + "rejected_geometric_mean": -10.225927352905273, + "step": 1790 + }, + { + "chosen_geometric_mean": -0.9732087850570679, + "epoch": 0.44, + "grad_norm": 3.03125, + "learning_rate": 4.422529871228195e-06, + "log_odds": 7.003571033477783, + "log_odds_ratio": -0.2103976160287857, + "loss": 0.2935, + "rejected_geometric_mean": -7.56102991104126, + "step": 1791 + }, + { + "chosen_geometric_mean": -1.216546893119812, + "epoch": 0.44, + "grad_norm": 7.46875, + "learning_rate": 4.4219074479437865e-06, + "log_odds": 7.804410457611084, + "log_odds_ratio": -0.16893064975738525, + "loss": 0.3096, + "rejected_geometric_mean": -8.773444175720215, + "step": 1792 + }, + { + "chosen_geometric_mean": -1.0120632648468018, + "epoch": 0.44, + "grad_norm": 5.71875, + "learning_rate": 4.421284733252075e-06, + "log_odds": 3.8438313007354736, + "log_odds_ratio": -0.2204931229352951, + "loss": 0.3069, + "rejected_geometric_mean": -4.461218357086182, + "step": 1793 + }, + { + "chosen_geometric_mean": -1.0446516275405884, + "epoch": 0.44, + "grad_norm": 27.625, + "learning_rate": 4.420661727247481e-06, + "log_odds": 5.943826675415039, + "log_odds_ratio": -0.40914759039878845, + "loss": 0.3156, + "rejected_geometric_mean": -6.773975372314453, + "step": 1794 + }, + { + "chosen_geometric_mean": -1.2510873079299927, + "epoch": 0.44, + "grad_norm": 42.5, + "learning_rate": 4.420038430024464e-06, + "log_odds": 5.741130352020264, + "log_odds_ratio": -0.19262520968914032, + "loss": 0.4293, + "rejected_geometric_mean": -6.733246803283691, + "step": 1795 + }, + { + "chosen_geometric_mean": -1.761096715927124, + "epoch": 0.44, + "grad_norm": 44.75, + "learning_rate": 4.4194148416775335e-06, + "log_odds": 5.5825605392456055, + "log_odds_ratio": -0.23653194308280945, + "loss": 0.3803, + "rejected_geometric_mean": -7.205415725708008, + "step": 1796 + }, + { + "chosen_geometric_mean": -1.0226352214813232, + "epoch": 0.44, + "grad_norm": 14.9375, + "learning_rate": 4.41879096230124e-06, + "log_odds": 4.205869197845459, + "log_odds_ratio": -0.20558536052703857, + "loss": 0.2998, + "rejected_geometric_mean": -4.870631217956543, + "step": 1797 + }, + { + "chosen_geometric_mean": -1.1098668575286865, + "epoch": 0.45, + "grad_norm": 2.875, + "learning_rate": 4.418166791990176e-06, + "log_odds": 4.173837184906006, + "log_odds_ratio": -0.14407600462436676, + "loss": 0.3002, + "rejected_geometric_mean": -4.965449333190918, + "step": 1798 + }, + { + "chosen_geometric_mean": -1.1826691627502441, + "epoch": 0.45, + "grad_norm": 29.125, + "learning_rate": 4.417542330838985e-06, + "log_odds": 1.4844465255737305, + "log_odds_ratio": -0.39896413683891296, + "loss": 0.3389, + "rejected_geometric_mean": -2.5352604389190674, + "step": 1799 + }, + { + "chosen_geometric_mean": -1.2973120212554932, + "epoch": 0.45, + "grad_norm": 2.5625, + "learning_rate": 4.416917578942347e-06, + "log_odds": 6.436850070953369, + "log_odds_ratio": -0.4789558947086334, + "loss": 0.3133, + "rejected_geometric_mean": -7.579648017883301, + "step": 1800 + }, + { + "chosen_geometric_mean": -1.12079918384552, + "epoch": 0.45, + "grad_norm": 16.75, + "learning_rate": 4.416292536394991e-06, + "log_odds": 5.224698066711426, + "log_odds_ratio": -0.2556925415992737, + "loss": 0.2904, + "rejected_geometric_mean": -6.084751129150391, + "step": 1801 + }, + { + "chosen_geometric_mean": -1.1079972982406616, + "epoch": 0.45, + "grad_norm": 12.75, + "learning_rate": 4.415667203291688e-06, + "log_odds": 1.2318445444107056, + "log_odds_ratio": -0.35620763897895813, + "loss": 0.3617, + "rejected_geometric_mean": -2.1160457134246826, + "step": 1802 + }, + { + "chosen_geometric_mean": -1.0826759338378906, + "epoch": 0.45, + "grad_norm": 2.0625, + "learning_rate": 4.415041579727253e-06, + "log_odds": 4.118045806884766, + "log_odds_ratio": -0.15990497171878815, + "loss": 0.3295, + "rejected_geometric_mean": -4.817312240600586, + "step": 1803 + }, + { + "chosen_geometric_mean": -0.8290700316429138, + "epoch": 0.45, + "grad_norm": 9.8125, + "learning_rate": 4.414415665796546e-06, + "log_odds": 2.901505470275879, + "log_odds_ratio": -0.132732555270195, + "loss": 0.2936, + "rejected_geometric_mean": -3.2044100761413574, + "step": 1804 + }, + { + "chosen_geometric_mean": -0.977151095867157, + "epoch": 0.45, + "grad_norm": 2.703125, + "learning_rate": 4.413789461594471e-06, + "log_odds": 0.2714274823665619, + "log_odds_ratio": -0.5697789788246155, + "loss": 0.3588, + "rejected_geometric_mean": -1.1387890577316284, + "step": 1805 + }, + { + "chosen_geometric_mean": -1.1092783212661743, + "epoch": 0.45, + "grad_norm": 2.46875, + "learning_rate": 4.413162967215976e-06, + "log_odds": 2.3228163719177246, + "log_odds_ratio": -0.20898307859897614, + "loss": 0.335, + "rejected_geometric_mean": -3.1204066276550293, + "step": 1806 + }, + { + "chosen_geometric_mean": -1.044616460800171, + "epoch": 0.45, + "grad_norm": 6.21875, + "learning_rate": 4.412536182756051e-06, + "log_odds": 4.339573860168457, + "log_odds_ratio": -0.41317155957221985, + "loss": 0.298, + "rejected_geometric_mean": -5.197179794311523, + "step": 1807 + }, + { + "chosen_geometric_mean": -0.9572176933288574, + "epoch": 0.45, + "grad_norm": 3.0625, + "learning_rate": 4.4119091083097325e-06, + "log_odds": 7.665177822113037, + "log_odds_ratio": -0.19271162152290344, + "loss": 0.3123, + "rejected_geometric_mean": -8.26356315612793, + "step": 1808 + }, + { + "chosen_geometric_mean": -0.9884729385375977, + "epoch": 0.45, + "grad_norm": 2.078125, + "learning_rate": 4.411281743972099e-06, + "log_odds": 5.73775053024292, + "log_odds_ratio": -0.2649417519569397, + "loss": 0.3022, + "rejected_geometric_mean": -6.465243339538574, + "step": 1809 + }, + { + "chosen_geometric_mean": -1.0691251754760742, + "epoch": 0.45, + "grad_norm": 3.671875, + "learning_rate": 4.410654089838275e-06, + "log_odds": 3.784459352493286, + "log_odds_ratio": -0.2987317442893982, + "loss": 0.3151, + "rejected_geometric_mean": -4.6181464195251465, + "step": 1810 + }, + { + "chosen_geometric_mean": -1.4279839992523193, + "epoch": 0.45, + "grad_norm": 23.125, + "learning_rate": 4.410026146003429e-06, + "log_odds": 1.6329238414764404, + "log_odds_ratio": -0.5328207612037659, + "loss": 0.3226, + "rejected_geometric_mean": -2.955355405807495, + "step": 1811 + }, + { + "chosen_geometric_mean": -1.1516449451446533, + "epoch": 0.45, + "grad_norm": 2.578125, + "learning_rate": 4.409397912562769e-06, + "log_odds": 3.8344430923461914, + "log_odds_ratio": -0.3773231506347656, + "loss": 0.3619, + "rejected_geometric_mean": -4.797245502471924, + "step": 1812 + }, + { + "chosen_geometric_mean": -1.3086076974868774, + "epoch": 0.45, + "grad_norm": 15.5, + "learning_rate": 4.408769389611554e-06, + "log_odds": 1.8323489427566528, + "log_odds_ratio": -0.42397886514663696, + "loss": 0.3502, + "rejected_geometric_mean": -3.0308709144592285, + "step": 1813 + }, + { + "chosen_geometric_mean": -1.317373275756836, + "epoch": 0.45, + "grad_norm": 4.4375, + "learning_rate": 4.408140577245082e-06, + "log_odds": 4.575351238250732, + "log_odds_ratio": -0.19581817090511322, + "loss": 0.268, + "rejected_geometric_mean": -5.641661643981934, + "step": 1814 + }, + { + "chosen_geometric_mean": -1.1073377132415771, + "epoch": 0.45, + "grad_norm": 7.375, + "learning_rate": 4.407511475558695e-06, + "log_odds": 1.957397222518921, + "log_odds_ratio": -0.40138477087020874, + "loss": 0.3434, + "rejected_geometric_mean": -2.9101476669311523, + "step": 1815 + }, + { + "chosen_geometric_mean": -1.1658320426940918, + "epoch": 0.45, + "grad_norm": 1.9296875, + "learning_rate": 4.40688208464778e-06, + "log_odds": 4.473222255706787, + "log_odds_ratio": -0.3602411448955536, + "loss": 0.3001, + "rejected_geometric_mean": -5.4881134033203125, + "step": 1816 + }, + { + "chosen_geometric_mean": -1.0893828868865967, + "epoch": 0.45, + "grad_norm": 2.71875, + "learning_rate": 4.406252404607769e-06, + "log_odds": 1.3441048860549927, + "log_odds_ratio": -0.33546996116638184, + "loss": 0.2961, + "rejected_geometric_mean": -2.2374863624572754, + "step": 1817 + }, + { + "chosen_geometric_mean": -1.3628835678100586, + "epoch": 0.45, + "grad_norm": 10.125, + "learning_rate": 4.405622435534136e-06, + "log_odds": 0.8750506639480591, + "log_odds_ratio": -0.4173659682273865, + "loss": 0.4295, + "rejected_geometric_mean": -2.088853359222412, + "step": 1818 + }, + { + "chosen_geometric_mean": -1.085851788520813, + "epoch": 0.45, + "grad_norm": 27.875, + "learning_rate": 4.404992177522399e-06, + "log_odds": 8.3126802444458, + "log_odds_ratio": -0.23964329063892365, + "loss": 0.2925, + "rejected_geometric_mean": -9.128230094909668, + "step": 1819 + }, + { + "chosen_geometric_mean": -0.9804251194000244, + "epoch": 0.45, + "grad_norm": 11.0625, + "learning_rate": 4.4043616306681214e-06, + "log_odds": 2.933140277862549, + "log_odds_ratio": -0.428788423538208, + "loss": 0.2731, + "rejected_geometric_mean": -3.6595778465270996, + "step": 1820 + }, + { + "chosen_geometric_mean": -1.1658501625061035, + "epoch": 0.45, + "grad_norm": 14.0625, + "learning_rate": 4.403730795066908e-06, + "log_odds": 0.16799360513687134, + "log_odds_ratio": -0.6146544814109802, + "loss": 0.3251, + "rejected_geometric_mean": -1.2822134494781494, + "step": 1821 + }, + { + "chosen_geometric_mean": -0.9807788133621216, + "epoch": 0.45, + "grad_norm": 8.4375, + "learning_rate": 4.403099670814409e-06, + "log_odds": 1.6425962448120117, + "log_odds_ratio": -0.3680110275745392, + "loss": 0.3484, + "rejected_geometric_mean": -2.3473191261291504, + "step": 1822 + }, + { + "chosen_geometric_mean": -1.112969994544983, + "epoch": 0.45, + "grad_norm": 6.875, + "learning_rate": 4.4024682580063185e-06, + "log_odds": 0.6762142777442932, + "log_odds_ratio": -0.5709751844406128, + "loss": 0.3337, + "rejected_geometric_mean": -1.656299352645874, + "step": 1823 + }, + { + "chosen_geometric_mean": -1.108139991760254, + "epoch": 0.45, + "grad_norm": 3.625, + "learning_rate": 4.401836556738372e-06, + "log_odds": 2.08138108253479, + "log_odds_ratio": -0.29395538568496704, + "loss": 0.2888, + "rejected_geometric_mean": -2.9196083545684814, + "step": 1824 + }, + { + "chosen_geometric_mean": -1.2518279552459717, + "epoch": 0.45, + "grad_norm": 3.78125, + "learning_rate": 4.401204567106353e-06, + "log_odds": 7.2708539962768555, + "log_odds_ratio": -0.14466503262519836, + "loss": 0.3209, + "rejected_geometric_mean": -8.256114959716797, + "step": 1825 + }, + { + "chosen_geometric_mean": -0.9545212984085083, + "epoch": 0.45, + "grad_norm": 8.1875, + "learning_rate": 4.400572289206085e-06, + "log_odds": 8.845989227294922, + "log_odds_ratio": -0.15806464850902557, + "loss": 0.2758, + "rejected_geometric_mean": -9.43829345703125, + "step": 1826 + }, + { + "chosen_geometric_mean": -1.4506022930145264, + "epoch": 0.45, + "grad_norm": 6.1875, + "learning_rate": 4.3999397231334375e-06, + "log_odds": 1.4788968563079834, + "log_odds_ratio": -0.4380556344985962, + "loss": 0.3347, + "rejected_geometric_mean": -2.8213932514190674, + "step": 1827 + }, + { + "chosen_geometric_mean": -0.8594657182693481, + "epoch": 0.45, + "grad_norm": 8.9375, + "learning_rate": 4.399306868984321e-06, + "log_odds": 10.7453031539917, + "log_odds_ratio": -0.017180562019348145, + "loss": 0.288, + "rejected_geometric_mean": -11.01136589050293, + "step": 1828 + }, + { + "chosen_geometric_mean": -1.3448327779769897, + "epoch": 0.45, + "grad_norm": 2.890625, + "learning_rate": 4.398673726854693e-06, + "log_odds": 3.4346423149108887, + "log_odds_ratio": -0.17260169982910156, + "loss": 0.3469, + "rejected_geometric_mean": -4.540515899658203, + "step": 1829 + }, + { + "chosen_geometric_mean": -1.1227076053619385, + "epoch": 0.45, + "grad_norm": 3.828125, + "learning_rate": 4.398040296840552e-06, + "log_odds": 9.43625545501709, + "log_odds_ratio": -0.17138853669166565, + "loss": 0.2896, + "rejected_geometric_mean": -10.220420837402344, + "step": 1830 + }, + { + "chosen_geometric_mean": -1.4039666652679443, + "epoch": 0.45, + "grad_norm": 4.28125, + "learning_rate": 4.397406579037942e-06, + "log_odds": 7.236285209655762, + "log_odds_ratio": -0.009813414886593819, + "loss": 0.2742, + "rejected_geometric_mean": -8.340566635131836, + "step": 1831 + }, + { + "chosen_geometric_mean": -2.8875114917755127, + "epoch": 0.45, + "grad_norm": 51.25, + "learning_rate": 4.396772573542949e-06, + "log_odds": 0.5632511377334595, + "log_odds_ratio": -1.193876028060913, + "loss": 0.4818, + "rejected_geometric_mean": -3.3372349739074707, + "step": 1832 + }, + { + "chosen_geometric_mean": -1.1336066722869873, + "epoch": 0.45, + "grad_norm": 6.40625, + "learning_rate": 4.396138280451704e-06, + "log_odds": 4.025381088256836, + "log_odds_ratio": -0.10586834698915482, + "loss": 0.3534, + "rejected_geometric_mean": -4.7880988121032715, + "step": 1833 + }, + { + "chosen_geometric_mean": -1.0874757766723633, + "epoch": 0.45, + "grad_norm": 7.125, + "learning_rate": 4.395503699860381e-06, + "log_odds": 3.651545763015747, + "log_odds_ratio": -0.28029224276542664, + "loss": 0.3102, + "rejected_geometric_mean": -4.456183433532715, + "step": 1834 + }, + { + "chosen_geometric_mean": -1.0995028018951416, + "epoch": 0.45, + "grad_norm": 3.09375, + "learning_rate": 4.394868831865198e-06, + "log_odds": 5.247446060180664, + "log_odds_ratio": -0.2627985179424286, + "loss": 0.2852, + "rejected_geometric_mean": -6.0754570960998535, + "step": 1835 + }, + { + "chosen_geometric_mean": -1.4032458066940308, + "epoch": 0.45, + "grad_norm": 18.5, + "learning_rate": 4.394233676562415e-06, + "log_odds": 2.6431050300598145, + "log_odds_ratio": -0.5463618040084839, + "loss": 0.3238, + "rejected_geometric_mean": -3.8131465911865234, + "step": 1836 + }, + { + "chosen_geometric_mean": -1.0474680662155151, + "epoch": 0.45, + "grad_norm": 9.0, + "learning_rate": 4.393598234048338e-06, + "log_odds": 3.1958556175231934, + "log_odds_ratio": -0.14623528718948364, + "loss": 0.299, + "rejected_geometric_mean": -3.8924527168273926, + "step": 1837 + }, + { + "chosen_geometric_mean": -1.1602842807769775, + "epoch": 0.46, + "grad_norm": 2.640625, + "learning_rate": 4.392962504419315e-06, + "log_odds": 1.811379075050354, + "log_odds_ratio": -0.4392426311969757, + "loss": 0.3231, + "rejected_geometric_mean": -2.8077380657196045, + "step": 1838 + }, + { + "chosen_geometric_mean": -0.8915351629257202, + "epoch": 0.46, + "grad_norm": 2.859375, + "learning_rate": 4.392326487771737e-06, + "log_odds": 1.5911924839019775, + "log_odds_ratio": -0.5278423428535461, + "loss": 0.3099, + "rejected_geometric_mean": -2.3401331901550293, + "step": 1839 + }, + { + "chosen_geometric_mean": -1.089472770690918, + "epoch": 0.46, + "grad_norm": 2.875, + "learning_rate": 4.391690184202042e-06, + "log_odds": 0.3333020806312561, + "log_odds_ratio": -0.5480641722679138, + "loss": 0.32, + "rejected_geometric_mean": -1.3262838125228882, + "step": 1840 + }, + { + "chosen_geometric_mean": -1.1123985052108765, + "epoch": 0.46, + "grad_norm": 4.125, + "learning_rate": 4.391053593806706e-06, + "log_odds": 5.576429843902588, + "log_odds_ratio": -0.2170334905385971, + "loss": 0.2863, + "rejected_geometric_mean": -6.436389923095703, + "step": 1841 + }, + { + "chosen_geometric_mean": -0.9081377387046814, + "epoch": 0.46, + "grad_norm": 21.625, + "learning_rate": 4.390416716682252e-06, + "log_odds": 2.043006181716919, + "log_odds_ratio": -0.31109437346458435, + "loss": 0.3717, + "rejected_geometric_mean": -2.6740541458129883, + "step": 1842 + }, + { + "chosen_geometric_mean": -1.2714766263961792, + "epoch": 0.46, + "grad_norm": 17.25, + "learning_rate": 4.389779552925247e-06, + "log_odds": 4.034053802490234, + "log_odds_ratio": -0.28696414828300476, + "loss": 0.3044, + "rejected_geometric_mean": -5.125010967254639, + "step": 1843 + }, + { + "chosen_geometric_mean": -1.1306853294372559, + "epoch": 0.46, + "grad_norm": 6.40625, + "learning_rate": 4.3891421026323e-06, + "log_odds": 4.223883628845215, + "log_odds_ratio": -0.021988332271575928, + "loss": 0.3196, + "rejected_geometric_mean": -4.970595359802246, + "step": 1844 + }, + { + "chosen_geometric_mean": -1.4673129320144653, + "epoch": 0.46, + "grad_norm": 9.3125, + "learning_rate": 4.388504365900062e-06, + "log_odds": 0.9830901622772217, + "log_odds_ratio": -0.40660667419433594, + "loss": 0.3012, + "rejected_geometric_mean": -2.342430591583252, + "step": 1845 + }, + { + "chosen_geometric_mean": -1.7360165119171143, + "epoch": 0.46, + "grad_norm": 12.4375, + "learning_rate": 4.387866342825232e-06, + "log_odds": 1.529340147972107, + "log_odds_ratio": -0.5917270183563232, + "loss": 0.3774, + "rejected_geometric_mean": -3.1170480251312256, + "step": 1846 + }, + { + "chosen_geometric_mean": -1.451049566268921, + "epoch": 0.46, + "grad_norm": 11.3125, + "learning_rate": 4.387228033504548e-06, + "log_odds": 2.7776451110839844, + "log_odds_ratio": -0.1406996101140976, + "loss": 0.3638, + "rejected_geometric_mean": -4.004114151000977, + "step": 1847 + }, + { + "chosen_geometric_mean": -1.0203840732574463, + "epoch": 0.46, + "grad_norm": 8.5, + "learning_rate": 4.386589438034793e-06, + "log_odds": 1.812760829925537, + "log_odds_ratio": -0.29576635360717773, + "loss": 0.2675, + "rejected_geometric_mean": -2.5340352058410645, + "step": 1848 + }, + { + "chosen_geometric_mean": -1.0228427648544312, + "epoch": 0.46, + "grad_norm": 3.15625, + "learning_rate": 4.385950556512794e-06, + "log_odds": 1.4668583869934082, + "log_odds_ratio": -0.37480485439300537, + "loss": 0.2775, + "rejected_geometric_mean": -2.238309383392334, + "step": 1849 + }, + { + "chosen_geometric_mean": -1.4406001567840576, + "epoch": 0.46, + "grad_norm": 8.3125, + "learning_rate": 4.38531138903542e-06, + "log_odds": 2.0471367835998535, + "log_odds_ratio": -0.2522790729999542, + "loss": 0.3493, + "rejected_geometric_mean": -3.2522358894348145, + "step": 1850 + }, + { + "chosen_geometric_mean": -1.163550615310669, + "epoch": 0.46, + "grad_norm": 3.421875, + "learning_rate": 4.384671935699585e-06, + "log_odds": 3.816131591796875, + "log_odds_ratio": -0.1768779456615448, + "loss": 0.3207, + "rejected_geometric_mean": -4.700021743774414, + "step": 1851 + }, + { + "chosen_geometric_mean": -1.0070319175720215, + "epoch": 0.46, + "grad_norm": 3.484375, + "learning_rate": 4.384032196602245e-06, + "log_odds": 2.102400302886963, + "log_odds_ratio": -0.3351084291934967, + "loss": 0.3086, + "rejected_geometric_mean": -2.880689859390259, + "step": 1852 + }, + { + "chosen_geometric_mean": -1.1166126728057861, + "epoch": 0.46, + "grad_norm": 2.546875, + "learning_rate": 4.3833921718404e-06, + "log_odds": 3.1274375915527344, + "log_odds_ratio": -0.0670049786567688, + "loss": 0.2886, + "rejected_geometric_mean": -3.8538949489593506, + "step": 1853 + }, + { + "chosen_geometric_mean": -0.9852174520492554, + "epoch": 0.46, + "grad_norm": 3.75, + "learning_rate": 4.382751861511092e-06, + "log_odds": 0.7020577192306519, + "log_odds_ratio": -0.44405296444892883, + "loss": 0.3024, + "rejected_geometric_mean": -1.484931230545044, + "step": 1854 + }, + { + "chosen_geometric_mean": -1.008513331413269, + "epoch": 0.46, + "grad_norm": 6.78125, + "learning_rate": 4.382111265711409e-06, + "log_odds": 3.9549007415771484, + "log_odds_ratio": -0.23279313743114471, + "loss": 0.3452, + "rejected_geometric_mean": -4.619722843170166, + "step": 1855 + }, + { + "chosen_geometric_mean": -1.1579848527908325, + "epoch": 0.46, + "grad_norm": 9.6875, + "learning_rate": 4.38147038453848e-06, + "log_odds": 3.256964683532715, + "log_odds_ratio": -0.16583742201328278, + "loss": 0.2995, + "rejected_geometric_mean": -4.152653217315674, + "step": 1856 + }, + { + "chosen_geometric_mean": -1.3305296897888184, + "epoch": 0.46, + "grad_norm": 9.375, + "learning_rate": 4.3808292180894775e-06, + "log_odds": 2.739777088165283, + "log_odds_ratio": -0.1875443458557129, + "loss": 0.3149, + "rejected_geometric_mean": -3.8414244651794434, + "step": 1857 + }, + { + "chosen_geometric_mean": -1.1554603576660156, + "epoch": 0.46, + "grad_norm": 36.25, + "learning_rate": 4.380187766461619e-06, + "log_odds": 5.2166361808776855, + "log_odds_ratio": -0.06839220970869064, + "loss": 0.347, + "rejected_geometric_mean": -5.991111755371094, + "step": 1858 + }, + { + "chosen_geometric_mean": -1.0318235158920288, + "epoch": 0.46, + "grad_norm": 7.09375, + "learning_rate": 4.379546029752164e-06, + "log_odds": 4.398106575012207, + "log_odds_ratio": -0.11889880150556564, + "loss": 0.2862, + "rejected_geometric_mean": -5.0779194831848145, + "step": 1859 + }, + { + "chosen_geometric_mean": -1.12590491771698, + "epoch": 0.46, + "grad_norm": 19.375, + "learning_rate": 4.378904008058413e-06, + "log_odds": 4.221503257751465, + "log_odds_ratio": -0.037198469042778015, + "loss": 0.2856, + "rejected_geometric_mean": -4.933136940002441, + "step": 1860 + }, + { + "chosen_geometric_mean": -1.1965646743774414, + "epoch": 0.46, + "grad_norm": 32.0, + "learning_rate": 4.378261701477714e-06, + "log_odds": 2.7768239974975586, + "log_odds_ratio": -0.1914384961128235, + "loss": 0.3692, + "rejected_geometric_mean": -3.648989677429199, + "step": 1861 + }, + { + "chosen_geometric_mean": -0.9828108549118042, + "epoch": 0.46, + "grad_norm": 7.75, + "learning_rate": 4.377619110107455e-06, + "log_odds": 0.6136043667793274, + "log_odds_ratio": -0.4770044684410095, + "loss": 0.3099, + "rejected_geometric_mean": -1.4443763494491577, + "step": 1862 + }, + { + "chosen_geometric_mean": -1.212286114692688, + "epoch": 0.46, + "grad_norm": 13.5, + "learning_rate": 4.376976234045069e-06, + "log_odds": 2.7985525131225586, + "log_odds_ratio": -0.15627378225326538, + "loss": 0.3139, + "rejected_geometric_mean": -3.7113900184631348, + "step": 1863 + }, + { + "chosen_geometric_mean": -1.118955373764038, + "epoch": 0.46, + "grad_norm": 2.828125, + "learning_rate": 4.376333073388031e-06, + "log_odds": 3.0711491107940674, + "log_odds_ratio": -0.07190665602684021, + "loss": 0.3096, + "rejected_geometric_mean": -3.8128910064697266, + "step": 1864 + }, + { + "chosen_geometric_mean": -1.0375622510910034, + "epoch": 0.46, + "grad_norm": 5.46875, + "learning_rate": 4.37568962823386e-06, + "log_odds": 3.5967776775360107, + "log_odds_ratio": -0.13990022242069244, + "loss": 0.3171, + "rejected_geometric_mean": -4.252926826477051, + "step": 1865 + }, + { + "chosen_geometric_mean": -1.2165807485580444, + "epoch": 0.46, + "grad_norm": 13.5625, + "learning_rate": 4.375045898680117e-06, + "log_odds": 1.0769764184951782, + "log_odds_ratio": -0.3140985071659088, + "loss": 0.3391, + "rejected_geometric_mean": -2.090562343597412, + "step": 1866 + }, + { + "chosen_geometric_mean": -1.4680743217468262, + "epoch": 0.46, + "grad_norm": 16.125, + "learning_rate": 4.3744018848244075e-06, + "log_odds": 1.6812796592712402, + "log_odds_ratio": -0.3978869915008545, + "loss": 0.4097, + "rejected_geometric_mean": -3.011503219604492, + "step": 1867 + }, + { + "chosen_geometric_mean": -1.2699203491210938, + "epoch": 0.46, + "grad_norm": 5.59375, + "learning_rate": 4.373757586764379e-06, + "log_odds": 2.475843667984009, + "log_odds_ratio": -0.21370497345924377, + "loss": 0.3149, + "rejected_geometric_mean": -3.493579387664795, + "step": 1868 + }, + { + "chosen_geometric_mean": -1.1202341318130493, + "epoch": 0.46, + "grad_norm": 6.03125, + "learning_rate": 4.373113004597724e-06, + "log_odds": 1.2351852655410767, + "log_odds_ratio": -0.3290978670120239, + "loss": 0.3382, + "rejected_geometric_mean": -2.1416118144989014, + "step": 1869 + }, + { + "chosen_geometric_mean": -1.175622582435608, + "epoch": 0.46, + "grad_norm": 11.125, + "learning_rate": 4.372468138422174e-06, + "log_odds": 0.5949130058288574, + "log_odds_ratio": -0.4580589234828949, + "loss": 0.3497, + "rejected_geometric_mean": -1.6323333978652954, + "step": 1870 + }, + { + "chosen_geometric_mean": -1.1085127592086792, + "epoch": 0.46, + "grad_norm": 11.0625, + "learning_rate": 4.371822988335508e-06, + "log_odds": 3.6615586280822754, + "log_odds_ratio": -0.15580695867538452, + "loss": 0.293, + "rejected_geometric_mean": -4.424258708953857, + "step": 1871 + }, + { + "chosen_geometric_mean": -0.9617820978164673, + "epoch": 0.46, + "grad_norm": 2.484375, + "learning_rate": 4.371177554435546e-06, + "log_odds": 3.416428565979004, + "log_odds_ratio": -0.27639612555503845, + "loss": 0.316, + "rejected_geometric_mean": -4.10797643661499, + "step": 1872 + }, + { + "chosen_geometric_mean": -0.7634018063545227, + "epoch": 0.46, + "grad_norm": 2.390625, + "learning_rate": 4.370531836820152e-06, + "log_odds": 2.3702878952026367, + "log_odds_ratio": -0.2821887731552124, + "loss": 0.294, + "rejected_geometric_mean": -2.7254858016967773, + "step": 1873 + }, + { + "chosen_geometric_mean": -1.0417946577072144, + "epoch": 0.46, + "grad_norm": 5.84375, + "learning_rate": 4.369885835587231e-06, + "log_odds": 3.8398518562316895, + "log_odds_ratio": -0.29070234298706055, + "loss": 0.3065, + "rejected_geometric_mean": -4.615297794342041, + "step": 1874 + }, + { + "chosen_geometric_mean": -1.004245400428772, + "epoch": 0.46, + "grad_norm": 7.09375, + "learning_rate": 4.369239550834732e-06, + "log_odds": 3.921332359313965, + "log_odds_ratio": -0.2369844615459442, + "loss": 0.3663, + "rejected_geometric_mean": -4.649115085601807, + "step": 1875 + }, + { + "chosen_geometric_mean": -1.1105934381484985, + "epoch": 0.46, + "grad_norm": 14.3125, + "learning_rate": 4.368592982660649e-06, + "log_odds": 3.3649091720581055, + "log_odds_ratio": -0.31868046522140503, + "loss": 0.3524, + "rejected_geometric_mean": -4.282039642333984, + "step": 1876 + }, + { + "chosen_geometric_mean": -1.1945650577545166, + "epoch": 0.46, + "grad_norm": 8.625, + "learning_rate": 4.367946131163016e-06, + "log_odds": 3.6585330963134766, + "log_odds_ratio": -0.10767550766468048, + "loss": 0.3503, + "rejected_geometric_mean": -4.47582483291626, + "step": 1877 + }, + { + "chosen_geometric_mean": -1.208833932876587, + "epoch": 0.46, + "grad_norm": 8.125, + "learning_rate": 4.367298996439911e-06, + "log_odds": 4.947737216949463, + "log_odds_ratio": -0.06622253358364105, + "loss": 0.3164, + "rejected_geometric_mean": -5.834689140319824, + "step": 1878 + }, + { + "chosen_geometric_mean": -1.148996353149414, + "epoch": 0.47, + "grad_norm": 3.125, + "learning_rate": 4.366651578589456e-06, + "log_odds": 1.5875117778778076, + "log_odds_ratio": -0.34956151247024536, + "loss": 0.3145, + "rejected_geometric_mean": -2.5241005420684814, + "step": 1879 + }, + { + "chosen_geometric_mean": -1.163192629814148, + "epoch": 0.47, + "grad_norm": 13.125, + "learning_rate": 4.366003877709814e-06, + "log_odds": 3.92779803276062, + "log_odds_ratio": -0.0998193770647049, + "loss": 0.299, + "rejected_geometric_mean": -4.735935211181641, + "step": 1880 + }, + { + "chosen_geometric_mean": -1.0503370761871338, + "epoch": 0.47, + "grad_norm": 14.875, + "learning_rate": 4.365355893899194e-06, + "log_odds": 6.188724517822266, + "log_odds_ratio": -0.2521236538887024, + "loss": 0.37, + "rejected_geometric_mean": -6.915406703948975, + "step": 1881 + }, + { + "chosen_geometric_mean": -1.189758539199829, + "epoch": 0.47, + "grad_norm": 10.1875, + "learning_rate": 4.364707627255844e-06, + "log_odds": 5.53317403793335, + "log_odds_ratio": -0.03655970096588135, + "loss": 0.3166, + "rejected_geometric_mean": -6.353583812713623, + "step": 1882 + }, + { + "chosen_geometric_mean": -1.2764031887054443, + "epoch": 0.47, + "grad_norm": 10.25, + "learning_rate": 4.364059077878057e-06, + "log_odds": 2.9177393913269043, + "log_odds_ratio": -0.14402234554290771, + "loss": 0.3617, + "rejected_geometric_mean": -3.8895556926727295, + "step": 1883 + }, + { + "chosen_geometric_mean": -1.0627708435058594, + "epoch": 0.47, + "grad_norm": 14.5625, + "learning_rate": 4.36341024586417e-06, + "log_odds": 2.393531084060669, + "log_odds_ratio": -0.24021980166435242, + "loss": 0.3401, + "rejected_geometric_mean": -3.1826000213623047, + "step": 1884 + }, + { + "chosen_geometric_mean": -1.0526670217514038, + "epoch": 0.47, + "grad_norm": 2.5625, + "learning_rate": 4.36276113131256e-06, + "log_odds": 1.8314762115478516, + "log_odds_ratio": -0.3639862537384033, + "loss": 0.3039, + "rejected_geometric_mean": -2.673185110092163, + "step": 1885 + }, + { + "chosen_geometric_mean": -1.078261375427246, + "epoch": 0.47, + "grad_norm": 2.703125, + "learning_rate": 4.362111734321649e-06, + "log_odds": 6.29730224609375, + "log_odds_ratio": -0.05045456439256668, + "loss": 0.2843, + "rejected_geometric_mean": -6.981632232666016, + "step": 1886 + }, + { + "chosen_geometric_mean": -0.9729216694831848, + "epoch": 0.47, + "grad_norm": 2.5, + "learning_rate": 4.3614620549899014e-06, + "log_odds": 1.2547895908355713, + "log_odds_ratio": -0.28996044397354126, + "loss": 0.2541, + "rejected_geometric_mean": -1.9303791522979736, + "step": 1887 + }, + { + "chosen_geometric_mean": -1.1493569612503052, + "epoch": 0.47, + "grad_norm": 16.375, + "learning_rate": 4.360812093415823e-06, + "log_odds": 1.2940893173217773, + "log_odds_ratio": -0.4075603485107422, + "loss": 0.357, + "rejected_geometric_mean": -2.273552656173706, + "step": 1888 + }, + { + "chosen_geometric_mean": -1.2535253763198853, + "epoch": 0.47, + "grad_norm": 5.0, + "learning_rate": 4.3601618496979655e-06, + "log_odds": 3.7314341068267822, + "log_odds_ratio": -0.17250779271125793, + "loss": 0.2884, + "rejected_geometric_mean": -4.692933559417725, + "step": 1889 + }, + { + "chosen_geometric_mean": -1.0939366817474365, + "epoch": 0.47, + "grad_norm": 3.8125, + "learning_rate": 4.3595113239349194e-06, + "log_odds": 2.042210102081299, + "log_odds_ratio": -0.4803561270236969, + "loss": 0.3127, + "rejected_geometric_mean": -3.0397019386291504, + "step": 1890 + }, + { + "chosen_geometric_mean": -0.997256875038147, + "epoch": 0.47, + "grad_norm": 11.375, + "learning_rate": 4.358860516225322e-06, + "log_odds": 3.060180425643921, + "log_odds_ratio": -0.15994705259799957, + "loss": 0.2928, + "rejected_geometric_mean": -3.6746435165405273, + "step": 1891 + }, + { + "chosen_geometric_mean": -0.9794834852218628, + "epoch": 0.47, + "grad_norm": 11.875, + "learning_rate": 4.35820942666785e-06, + "log_odds": 4.246831893920898, + "log_odds_ratio": -0.31209778785705566, + "loss": 0.3204, + "rejected_geometric_mean": -4.897074222564697, + "step": 1892 + }, + { + "chosen_geometric_mean": -1.0880703926086426, + "epoch": 0.47, + "grad_norm": 6.34375, + "learning_rate": 4.357558055361225e-06, + "log_odds": 3.9795703887939453, + "log_odds_ratio": -0.24486999213695526, + "loss": 0.3069, + "rejected_geometric_mean": -4.8061089515686035, + "step": 1893 + }, + { + "chosen_geometric_mean": -1.0584325790405273, + "epoch": 0.47, + "grad_norm": 2.65625, + "learning_rate": 4.35690640240421e-06, + "log_odds": 4.489630222320557, + "log_odds_ratio": -0.16998696327209473, + "loss": 0.3081, + "rejected_geometric_mean": -5.201876640319824, + "step": 1894 + }, + { + "chosen_geometric_mean": -1.5063927173614502, + "epoch": 0.47, + "grad_norm": 25.125, + "learning_rate": 4.3562544678956105e-06, + "log_odds": 0.7990249395370483, + "log_odds_ratio": -0.497707724571228, + "loss": 0.3564, + "rejected_geometric_mean": -2.253329038619995, + "step": 1895 + }, + { + "chosen_geometric_mean": -1.1414930820465088, + "epoch": 0.47, + "grad_norm": 3.25, + "learning_rate": 4.355602251934277e-06, + "log_odds": 7.2489728927612305, + "log_odds_ratio": -0.16483165323734283, + "loss": 0.3225, + "rejected_geometric_mean": -8.07541561126709, + "step": 1896 + }, + { + "chosen_geometric_mean": -1.0554094314575195, + "epoch": 0.47, + "grad_norm": 15.625, + "learning_rate": 4.354949754619101e-06, + "log_odds": 2.4673476219177246, + "log_odds_ratio": -0.37175068259239197, + "loss": 0.3806, + "rejected_geometric_mean": -3.3104984760284424, + "step": 1897 + }, + { + "chosen_geometric_mean": -1.1346845626831055, + "epoch": 0.47, + "grad_norm": 4.40625, + "learning_rate": 4.354296976049015e-06, + "log_odds": 1.646906852722168, + "log_odds_ratio": -0.32685723900794983, + "loss": 0.2859, + "rejected_geometric_mean": -2.557199001312256, + "step": 1898 + }, + { + "chosen_geometric_mean": -1.123451590538025, + "epoch": 0.47, + "grad_norm": 6.9375, + "learning_rate": 4.353643916322997e-06, + "log_odds": 5.091062545776367, + "log_odds_ratio": -0.14049959182739258, + "loss": 0.2693, + "rejected_geometric_mean": -5.8668622970581055, + "step": 1899 + }, + { + "chosen_geometric_mean": -1.173458218574524, + "epoch": 0.47, + "grad_norm": 3.234375, + "learning_rate": 4.352990575540067e-06, + "log_odds": 4.393706321716309, + "log_odds_ratio": -0.24824142456054688, + "loss": 0.3024, + "rejected_geometric_mean": -5.321377754211426, + "step": 1900 + }, + { + "chosen_geometric_mean": -1.1226309537887573, + "epoch": 0.47, + "grad_norm": 5.5625, + "learning_rate": 4.352336953799287e-06, + "log_odds": 5.129857540130615, + "log_odds_ratio": -0.1507422775030136, + "loss": 0.3149, + "rejected_geometric_mean": -5.944531440734863, + "step": 1901 + }, + { + "chosen_geometric_mean": -1.1350970268249512, + "epoch": 0.47, + "grad_norm": 5.625, + "learning_rate": 4.3516830511997615e-06, + "log_odds": 3.2242236137390137, + "log_odds_ratio": -0.3273918628692627, + "loss": 0.3033, + "rejected_geometric_mean": -4.154085159301758, + "step": 1902 + }, + { + "chosen_geometric_mean": -1.0676445960998535, + "epoch": 0.47, + "grad_norm": 14.4375, + "learning_rate": 4.351028867840638e-06, + "log_odds": 1.4923975467681885, + "log_odds_ratio": -0.3621395528316498, + "loss": 0.2921, + "rejected_geometric_mean": -2.3150031566619873, + "step": 1903 + }, + { + "chosen_geometric_mean": -1.1177406311035156, + "epoch": 0.47, + "grad_norm": 6.40625, + "learning_rate": 4.350374403821105e-06, + "log_odds": 2.432931423187256, + "log_odds_ratio": -0.39395251870155334, + "loss": 0.3498, + "rejected_geometric_mean": -3.3410680294036865, + "step": 1904 + }, + { + "chosen_geometric_mean": -1.0625629425048828, + "epoch": 0.47, + "grad_norm": 19.625, + "learning_rate": 4.349719659240398e-06, + "log_odds": 3.751565933227539, + "log_odds_ratio": -0.22669798135757446, + "loss": 0.3166, + "rejected_geometric_mean": -4.550964832305908, + "step": 1905 + }, + { + "chosen_geometric_mean": -1.307382583618164, + "epoch": 0.47, + "grad_norm": 28.375, + "learning_rate": 4.349064634197788e-06, + "log_odds": 1.8115952014923096, + "log_odds_ratio": -0.3049500286579132, + "loss": 0.3398, + "rejected_geometric_mean": -2.9346649646759033, + "step": 1906 + }, + { + "chosen_geometric_mean": -0.9705257415771484, + "epoch": 0.47, + "grad_norm": 25.0, + "learning_rate": 4.348409328792596e-06, + "log_odds": 10.892723083496094, + "log_odds_ratio": -0.1648028939962387, + "loss": 0.315, + "rejected_geometric_mean": -11.452061653137207, + "step": 1907 + }, + { + "chosen_geometric_mean": -1.4805831909179688, + "epoch": 0.47, + "grad_norm": 16.375, + "learning_rate": 4.34775374312418e-06, + "log_odds": 2.6246159076690674, + "log_odds_ratio": -0.28452634811401367, + "loss": 0.3205, + "rejected_geometric_mean": -3.8967902660369873, + "step": 1908 + }, + { + "chosen_geometric_mean": -1.2486720085144043, + "epoch": 0.47, + "grad_norm": 3.0625, + "learning_rate": 4.3470978772919435e-06, + "log_odds": 2.2279105186462402, + "log_odds_ratio": -0.26311516761779785, + "loss": 0.2589, + "rejected_geometric_mean": -3.2618143558502197, + "step": 1909 + }, + { + "chosen_geometric_mean": -1.2104320526123047, + "epoch": 0.47, + "grad_norm": 9.125, + "learning_rate": 4.346441731395332e-06, + "log_odds": 3.5803542137145996, + "log_odds_ratio": -0.18266476690769196, + "loss": 0.2797, + "rejected_geometric_mean": -4.542218208312988, + "step": 1910 + }, + { + "chosen_geometric_mean": -1.050624132156372, + "epoch": 0.47, + "grad_norm": 2.984375, + "learning_rate": 4.345785305533831e-06, + "log_odds": 3.1620383262634277, + "log_odds_ratio": -0.09647135436534882, + "loss": 0.2926, + "rejected_geometric_mean": -3.824786901473999, + "step": 1911 + }, + { + "chosen_geometric_mean": -1.0758687257766724, + "epoch": 0.47, + "grad_norm": 3.59375, + "learning_rate": 4.345128599806972e-06, + "log_odds": 2.9252779483795166, + "log_odds_ratio": -0.2717047929763794, + "loss": 0.3354, + "rejected_geometric_mean": -3.6769330501556396, + "step": 1912 + }, + { + "chosen_geometric_mean": -1.2338182926177979, + "epoch": 0.47, + "grad_norm": 3.484375, + "learning_rate": 4.344471614314327e-06, + "log_odds": 2.900118112564087, + "log_odds_ratio": -0.42074528336524963, + "loss": 0.332, + "rejected_geometric_mean": -3.920743703842163, + "step": 1913 + }, + { + "chosen_geometric_mean": -0.9489054083824158, + "epoch": 0.47, + "grad_norm": 4.15625, + "learning_rate": 4.343814349155511e-06, + "log_odds": 1.9317302703857422, + "log_odds_ratio": -0.33190733194351196, + "loss": 0.3199, + "rejected_geometric_mean": -2.579881429672241, + "step": 1914 + }, + { + "chosen_geometric_mean": -1.1010959148406982, + "epoch": 0.47, + "grad_norm": 8.875, + "learning_rate": 4.343156804430181e-06, + "log_odds": 3.5426111221313477, + "log_odds_ratio": -0.23075781762599945, + "loss": 0.2864, + "rejected_geometric_mean": -4.383349895477295, + "step": 1915 + }, + { + "chosen_geometric_mean": -0.895933210849762, + "epoch": 0.47, + "grad_norm": 2.671875, + "learning_rate": 4.342498980238035e-06, + "log_odds": 1.8048954010009766, + "log_odds_ratio": -0.2479819655418396, + "loss": 0.3154, + "rejected_geometric_mean": -2.383577823638916, + "step": 1916 + }, + { + "chosen_geometric_mean": -1.10924232006073, + "epoch": 0.47, + "grad_norm": 4.03125, + "learning_rate": 4.341840876678817e-06, + "log_odds": 2.1544902324676514, + "log_odds_ratio": -0.20233109593391418, + "loss": 0.3303, + "rejected_geometric_mean": -2.9901680946350098, + "step": 1917 + }, + { + "chosen_geometric_mean": -1.094407558441162, + "epoch": 0.47, + "grad_norm": 4.65625, + "learning_rate": 4.3411824938523125e-06, + "log_odds": 5.711684703826904, + "log_odds_ratio": -0.23301240801811218, + "loss": 0.2955, + "rejected_geometric_mean": -6.539724826812744, + "step": 1918 + }, + { + "chosen_geometric_mean": -1.1957788467407227, + "epoch": 0.48, + "grad_norm": 3.9375, + "learning_rate": 4.340523831858345e-06, + "log_odds": 0.8106328248977661, + "log_odds_ratio": -0.4318689703941345, + "loss": 0.2763, + "rejected_geometric_mean": -1.8767921924591064, + "step": 1919 + }, + { + "chosen_geometric_mean": -1.0044761896133423, + "epoch": 0.48, + "grad_norm": 5.40625, + "learning_rate": 4.339864890796785e-06, + "log_odds": 8.122251510620117, + "log_odds_ratio": -0.3588823080062866, + "loss": 0.3691, + "rejected_geometric_mean": -8.883976936340332, + "step": 1920 + }, + { + "chosen_geometric_mean": -0.9969667792320251, + "epoch": 0.48, + "grad_norm": 7.65625, + "learning_rate": 4.3392056707675435e-06, + "log_odds": 1.4523934125900269, + "log_odds_ratio": -0.28623589873313904, + "loss": 0.369, + "rejected_geometric_mean": -2.133057117462158, + "step": 1921 + }, + { + "chosen_geometric_mean": -1.3741228580474854, + "epoch": 0.48, + "grad_norm": 4.53125, + "learning_rate": 4.338546171870574e-06, + "log_odds": 1.2719197273254395, + "log_odds_ratio": -0.4137195646762848, + "loss": 0.3027, + "rejected_geometric_mean": -2.4963314533233643, + "step": 1922 + }, + { + "chosen_geometric_mean": -0.9205586314201355, + "epoch": 0.48, + "grad_norm": 6.6875, + "learning_rate": 4.337886394205872e-06, + "log_odds": 6.633530616760254, + "log_odds_ratio": -0.1251620054244995, + "loss": 0.3203, + "rejected_geometric_mean": -7.136865615844727, + "step": 1923 + }, + { + "chosen_geometric_mean": -1.265774130821228, + "epoch": 0.48, + "grad_norm": 31.125, + "learning_rate": 4.337226337873477e-06, + "log_odds": 5.743187427520752, + "log_odds_ratio": -0.43628138303756714, + "loss": 0.3891, + "rejected_geometric_mean": -6.857210159301758, + "step": 1924 + }, + { + "chosen_geometric_mean": -0.9305793642997742, + "epoch": 0.48, + "grad_norm": 4.78125, + "learning_rate": 4.336566002973469e-06, + "log_odds": 2.5313212871551514, + "log_odds_ratio": -0.3564247488975525, + "loss": 0.3234, + "rejected_geometric_mean": -3.2565293312072754, + "step": 1925 + }, + { + "chosen_geometric_mean": -0.9726340174674988, + "epoch": 0.48, + "grad_norm": 3.828125, + "learning_rate": 4.335905389605969e-06, + "log_odds": 1.4064388275146484, + "log_odds_ratio": -0.2779981195926666, + "loss": 0.3392, + "rejected_geometric_mean": -2.0553972721099854, + "step": 1926 + }, + { + "chosen_geometric_mean": -0.9459094405174255, + "epoch": 0.48, + "grad_norm": 88.0, + "learning_rate": 4.335244497871144e-06, + "log_odds": 1.1336324214935303, + "log_odds_ratio": -0.3528977334499359, + "loss": 0.4857, + "rejected_geometric_mean": -1.7895855903625488, + "step": 1927 + }, + { + "chosen_geometric_mean": -2.565507173538208, + "epoch": 0.48, + "grad_norm": 69.0, + "learning_rate": 4.3345833278692e-06, + "log_odds": 12.047801971435547, + "log_odds_ratio": -0.08450356125831604, + "loss": 0.4407, + "rejected_geometric_mean": -14.174860954284668, + "step": 1928 + }, + { + "chosen_geometric_mean": -1.129230260848999, + "epoch": 0.48, + "grad_norm": 9.625, + "learning_rate": 4.333921879700386e-06, + "log_odds": 13.651174545288086, + "log_odds_ratio": -0.1877540796995163, + "loss": 0.2746, + "rejected_geometric_mean": -14.45516586303711, + "step": 1929 + }, + { + "chosen_geometric_mean": -1.6760185956954956, + "epoch": 0.48, + "grad_norm": 34.25, + "learning_rate": 4.333260153464993e-06, + "log_odds": 0.35195863246917725, + "log_odds_ratio": -0.6781618595123291, + "loss": 0.3458, + "rejected_geometric_mean": -1.9965442419052124, + "step": 1930 + }, + { + "chosen_geometric_mean": -1.0160173177719116, + "epoch": 0.48, + "grad_norm": 2.75, + "learning_rate": 4.332598149263356e-06, + "log_odds": 3.187653064727783, + "log_odds_ratio": -0.24135282635688782, + "loss": 0.263, + "rejected_geometric_mean": -3.892878770828247, + "step": 1931 + }, + { + "chosen_geometric_mean": -1.1254280805587769, + "epoch": 0.48, + "grad_norm": 26.625, + "learning_rate": 4.33193586719585e-06, + "log_odds": 0.6395018100738525, + "log_odds_ratio": -0.4303337633609772, + "loss": 0.3098, + "rejected_geometric_mean": -1.5935441255569458, + "step": 1932 + }, + { + "chosen_geometric_mean": -1.2499593496322632, + "epoch": 0.48, + "grad_norm": 26.375, + "learning_rate": 4.331273307362893e-06, + "log_odds": 5.504624843597412, + "log_odds_ratio": -0.22294816374778748, + "loss": 0.3506, + "rejected_geometric_mean": -6.541005611419678, + "step": 1933 + }, + { + "chosen_geometric_mean": -0.9135271310806274, + "epoch": 0.48, + "grad_norm": 3.765625, + "learning_rate": 4.3306104698649446e-06, + "log_odds": 3.7493977546691895, + "log_odds_ratio": -0.12318557500839233, + "loss": 0.3058, + "rejected_geometric_mean": -4.239701271057129, + "step": 1934 + }, + { + "chosen_geometric_mean": -1.006820559501648, + "epoch": 0.48, + "grad_norm": 16.75, + "learning_rate": 4.329947354802507e-06, + "log_odds": 2.9267494678497314, + "log_odds_ratio": -0.41735976934432983, + "loss": 0.3058, + "rejected_geometric_mean": -3.738030433654785, + "step": 1935 + }, + { + "chosen_geometric_mean": -1.2345266342163086, + "epoch": 0.48, + "grad_norm": 5.5, + "learning_rate": 4.329283962276125e-06, + "log_odds": 4.691279411315918, + "log_odds_ratio": -0.276014506816864, + "loss": 0.2802, + "rejected_geometric_mean": -5.70097541809082, + "step": 1936 + }, + { + "chosen_geometric_mean": -1.111280083656311, + "epoch": 0.48, + "grad_norm": 3.609375, + "learning_rate": 4.328620292386385e-06, + "log_odds": 3.5999317169189453, + "log_odds_ratio": -0.3987988829612732, + "loss": 0.3328, + "rejected_geometric_mean": -4.504088401794434, + "step": 1937 + }, + { + "chosen_geometric_mean": -0.8405176401138306, + "epoch": 0.48, + "grad_norm": 3.890625, + "learning_rate": 4.3279563452339135e-06, + "log_odds": 1.957524299621582, + "log_odds_ratio": -0.3788875341415405, + "loss": 0.2371, + "rejected_geometric_mean": -2.5002996921539307, + "step": 1938 + }, + { + "chosen_geometric_mean": -0.8063274621963501, + "epoch": 0.48, + "grad_norm": 4.75, + "learning_rate": 4.327292120919382e-06, + "log_odds": 3.2329134941101074, + "log_odds_ratio": -0.3720303773880005, + "loss": 0.2825, + "rejected_geometric_mean": -3.7839248180389404, + "step": 1939 + }, + { + "chosen_geometric_mean": -1.136153221130371, + "epoch": 0.48, + "grad_norm": 8.6875, + "learning_rate": 4.3266276195435035e-06, + "log_odds": 3.678231716156006, + "log_odds_ratio": -0.3068571090698242, + "loss": 0.2915, + "rejected_geometric_mean": -4.536818027496338, + "step": 1940 + }, + { + "chosen_geometric_mean": -1.380558967590332, + "epoch": 0.48, + "grad_norm": 3.296875, + "learning_rate": 4.325962841207031e-06, + "log_odds": 4.634082794189453, + "log_odds_ratio": -0.12243859469890594, + "loss": 0.2934, + "rejected_geometric_mean": -5.771267414093018, + "step": 1941 + }, + { + "chosen_geometric_mean": -1.3024559020996094, + "epoch": 0.48, + "grad_norm": 2.65625, + "learning_rate": 4.325297786010762e-06, + "log_odds": 6.968923568725586, + "log_odds_ratio": -0.11379227042198181, + "loss": 0.2777, + "rejected_geometric_mean": -8.00055980682373, + "step": 1942 + }, + { + "chosen_geometric_mean": -1.0072261095046997, + "epoch": 0.48, + "grad_norm": 7.8125, + "learning_rate": 4.3246324540555334e-06, + "log_odds": 1.5024967193603516, + "log_odds_ratio": -0.3491537570953369, + "loss": 0.3597, + "rejected_geometric_mean": -2.2242815494537354, + "step": 1943 + }, + { + "chosen_geometric_mean": -1.0763174295425415, + "epoch": 0.48, + "grad_norm": 2.65625, + "learning_rate": 4.323966845442227e-06, + "log_odds": 2.3682472705841064, + "log_odds_ratio": -0.2324390411376953, + "loss": 0.2868, + "rejected_geometric_mean": -3.148684024810791, + "step": 1944 + }, + { + "chosen_geometric_mean": -1.2601555585861206, + "epoch": 0.48, + "grad_norm": 1.828125, + "learning_rate": 4.3233009602717644e-06, + "log_odds": 1.41164231300354, + "log_odds_ratio": -0.34592151641845703, + "loss": 0.2527, + "rejected_geometric_mean": -2.497947931289673, + "step": 1945 + }, + { + "chosen_geometric_mean": -1.0417492389678955, + "epoch": 0.48, + "grad_norm": 14.25, + "learning_rate": 4.32263479864511e-06, + "log_odds": 7.0167646408081055, + "log_odds_ratio": -0.024571113288402557, + "loss": 0.2423, + "rejected_geometric_mean": -7.627270698547363, + "step": 1946 + }, + { + "chosen_geometric_mean": -1.0314656496047974, + "epoch": 0.48, + "grad_norm": 1.890625, + "learning_rate": 4.32196836066327e-06, + "log_odds": 6.663778305053711, + "log_odds_ratio": -0.10734040290117264, + "loss": 0.2761, + "rejected_geometric_mean": -7.311849594116211, + "step": 1947 + }, + { + "chosen_geometric_mean": -1.2408335208892822, + "epoch": 0.48, + "grad_norm": 4.0, + "learning_rate": 4.321301646427292e-06, + "log_odds": 5.543487548828125, + "log_odds_ratio": -0.05860184133052826, + "loss": 0.2984, + "rejected_geometric_mean": -6.465704917907715, + "step": 1948 + }, + { + "chosen_geometric_mean": -1.7357902526855469, + "epoch": 0.48, + "grad_norm": 47.0, + "learning_rate": 4.3206346560382665e-06, + "log_odds": 5.59035062789917, + "log_odds_ratio": -0.05939514562487602, + "loss": 0.3592, + "rejected_geometric_mean": -7.015669822692871, + "step": 1949 + }, + { + "chosen_geometric_mean": -1.2107782363891602, + "epoch": 0.48, + "grad_norm": 3.015625, + "learning_rate": 4.319967389597324e-06, + "log_odds": 4.270669937133789, + "log_odds_ratio": -0.26336175203323364, + "loss": 0.2656, + "rejected_geometric_mean": -5.213716983795166, + "step": 1950 + }, + { + "chosen_geometric_mean": -1.0681949853897095, + "epoch": 0.48, + "grad_norm": 6.03125, + "learning_rate": 4.319299847205639e-06, + "log_odds": 5.448994159698486, + "log_odds_ratio": -0.18990851938724518, + "loss": 0.3069, + "rejected_geometric_mean": -6.163477420806885, + "step": 1951 + }, + { + "chosen_geometric_mean": -1.157964825630188, + "epoch": 0.48, + "grad_norm": 44.5, + "learning_rate": 4.318632028964427e-06, + "log_odds": 0.6363164186477661, + "log_odds_ratio": -0.5168014764785767, + "loss": 0.4466, + "rejected_geometric_mean": -1.7081035375595093, + "step": 1952 + }, + { + "chosen_geometric_mean": -1.139581561088562, + "epoch": 0.48, + "grad_norm": 7.59375, + "learning_rate": 4.317963934974945e-06, + "log_odds": 7.158350467681885, + "log_odds_ratio": -0.3148055076599121, + "loss": 0.3274, + "rejected_geometric_mean": -8.071843147277832, + "step": 1953 + }, + { + "chosen_geometric_mean": -1.3810361623764038, + "epoch": 0.48, + "grad_norm": 24.875, + "learning_rate": 4.317295565338492e-06, + "log_odds": 6.6147356033325195, + "log_odds_ratio": -0.24206872284412384, + "loss": 0.336, + "rejected_geometric_mean": -7.7490034103393555, + "step": 1954 + }, + { + "chosen_geometric_mean": -1.1755403280258179, + "epoch": 0.48, + "grad_norm": 41.0, + "learning_rate": 4.3166269201564094e-06, + "log_odds": 1.1637202501296997, + "log_odds_ratio": -0.45567911863327026, + "loss": 0.4513, + "rejected_geometric_mean": -2.236215353012085, + "step": 1955 + }, + { + "chosen_geometric_mean": -1.0696109533309937, + "epoch": 0.48, + "grad_norm": 11.5625, + "learning_rate": 4.31595799953008e-06, + "log_odds": 2.396824359893799, + "log_odds_ratio": -0.2771226167678833, + "loss": 0.278, + "rejected_geometric_mean": -3.1992080211639404, + "step": 1956 + }, + { + "chosen_geometric_mean": -1.2478805780410767, + "epoch": 0.48, + "grad_norm": 4.84375, + "learning_rate": 4.3152888035609265e-06, + "log_odds": 1.2149393558502197, + "log_odds_ratio": -0.3295521140098572, + "loss": 0.2906, + "rejected_geometric_mean": -2.2880382537841797, + "step": 1957 + }, + { + "chosen_geometric_mean": -0.912968099117279, + "epoch": 0.48, + "grad_norm": 5.75, + "learning_rate": 4.314619332350417e-06, + "log_odds": 2.5188817977905273, + "log_odds_ratio": -0.2745165228843689, + "loss": 0.3037, + "rejected_geometric_mean": -3.123922824859619, + "step": 1958 + }, + { + "chosen_geometric_mean": -1.3817476034164429, + "epoch": 0.49, + "grad_norm": 2.375, + "learning_rate": 4.313949586000059e-06, + "log_odds": 4.686833381652832, + "log_odds_ratio": -0.13803651928901672, + "loss": 0.2719, + "rejected_geometric_mean": -5.819026947021484, + "step": 1959 + }, + { + "chosen_geometric_mean": -1.2347102165222168, + "epoch": 0.49, + "grad_norm": 6.0, + "learning_rate": 4.313279564611403e-06, + "log_odds": 2.4949004650115967, + "log_odds_ratio": -0.46834784746170044, + "loss": 0.3086, + "rejected_geometric_mean": -3.6172523498535156, + "step": 1960 + }, + { + "chosen_geometric_mean": -0.9425944089889526, + "epoch": 0.49, + "grad_norm": 15.625, + "learning_rate": 4.312609268286036e-06, + "log_odds": 0.8506931066513062, + "log_odds_ratio": -0.40481919050216675, + "loss": 0.3484, + "rejected_geometric_mean": -1.5442463159561157, + "step": 1961 + }, + { + "chosen_geometric_mean": -1.069062352180481, + "epoch": 0.49, + "grad_norm": 6.78125, + "learning_rate": 4.3119386971255975e-06, + "log_odds": 5.091161251068115, + "log_odds_ratio": -0.06823226064443588, + "loss": 0.2351, + "rejected_geometric_mean": -5.725925922393799, + "step": 1962 + }, + { + "chosen_geometric_mean": -1.1417808532714844, + "epoch": 0.49, + "grad_norm": 18.5, + "learning_rate": 4.311267851231758e-06, + "log_odds": 2.797560214996338, + "log_odds_ratio": -0.34248167276382446, + "loss": 0.3467, + "rejected_geometric_mean": -3.5824520587921143, + "step": 1963 + }, + { + "chosen_geometric_mean": -1.1280076503753662, + "epoch": 0.49, + "grad_norm": 2.640625, + "learning_rate": 4.310596730706234e-06, + "log_odds": 2.453603744506836, + "log_odds_ratio": -0.29939940571784973, + "loss": 0.3287, + "rejected_geometric_mean": -3.3270821571350098, + "step": 1964 + }, + { + "chosen_geometric_mean": -1.0857746601104736, + "epoch": 0.49, + "grad_norm": 8.5625, + "learning_rate": 4.309925335650784e-06, + "log_odds": 3.744567632675171, + "log_odds_ratio": -0.24363695085048676, + "loss": 0.3322, + "rejected_geometric_mean": -4.552492141723633, + "step": 1965 + }, + { + "chosen_geometric_mean": -1.1694408655166626, + "epoch": 0.49, + "grad_norm": 2.75, + "learning_rate": 4.309253666167208e-06, + "log_odds": 1.608594298362732, + "log_odds_ratio": -0.33185499906539917, + "loss": 0.2857, + "rejected_geometric_mean": -2.573310375213623, + "step": 1966 + }, + { + "chosen_geometric_mean": -0.9468762874603271, + "epoch": 0.49, + "grad_norm": 7.21875, + "learning_rate": 4.308581722357346e-06, + "log_odds": 5.991353988647461, + "log_odds_ratio": -0.004344041459262371, + "loss": 0.2914, + "rejected_geometric_mean": -6.399165153503418, + "step": 1967 + }, + { + "chosen_geometric_mean": -1.2059118747711182, + "epoch": 0.49, + "grad_norm": 2.859375, + "learning_rate": 4.307909504323084e-06, + "log_odds": 3.469906806945801, + "log_odds_ratio": -0.17709392309188843, + "loss": 0.3098, + "rejected_geometric_mean": -4.369644641876221, + "step": 1968 + }, + { + "chosen_geometric_mean": -1.1164650917053223, + "epoch": 0.49, + "grad_norm": 2.5, + "learning_rate": 4.307237012166344e-06, + "log_odds": 1.2233023643493652, + "log_odds_ratio": -0.3416926860809326, + "loss": 0.2881, + "rejected_geometric_mean": -2.117253065109253, + "step": 1969 + }, + { + "chosen_geometric_mean": -1.1846306324005127, + "epoch": 0.49, + "grad_norm": 16.375, + "learning_rate": 4.306564245989091e-06, + "log_odds": 3.015040397644043, + "log_odds_ratio": -0.24686451256275177, + "loss": 0.3053, + "rejected_geometric_mean": -3.9196295738220215, + "step": 1970 + }, + { + "chosen_geometric_mean": -0.9710975885391235, + "epoch": 0.49, + "grad_norm": 7.8125, + "learning_rate": 4.305891205893334e-06, + "log_odds": 6.17336368560791, + "log_odds_ratio": -0.11318870633840561, + "loss": 0.3561, + "rejected_geometric_mean": -6.730748176574707, + "step": 1971 + }, + { + "chosen_geometric_mean": -1.2988446950912476, + "epoch": 0.49, + "grad_norm": 8.4375, + "learning_rate": 4.305217891981123e-06, + "log_odds": 2.064298629760742, + "log_odds_ratio": -0.18239858746528625, + "loss": 0.2857, + "rejected_geometric_mean": -3.117013454437256, + "step": 1972 + }, + { + "chosen_geometric_mean": -1.2014131546020508, + "epoch": 0.49, + "grad_norm": 3.296875, + "learning_rate": 4.304544304354546e-06, + "log_odds": 1.620223045349121, + "log_odds_ratio": -0.26197323203086853, + "loss": 0.2786, + "rejected_geometric_mean": -2.5838921070098877, + "step": 1973 + }, + { + "chosen_geometric_mean": -1.6216535568237305, + "epoch": 0.49, + "grad_norm": 42.25, + "learning_rate": 4.303870443115736e-06, + "log_odds": 1.7860374450683594, + "log_odds_ratio": -0.6540462970733643, + "loss": 0.3813, + "rejected_geometric_mean": -3.4578847885131836, + "step": 1974 + }, + { + "chosen_geometric_mean": -1.2135214805603027, + "epoch": 0.49, + "grad_norm": 27.75, + "learning_rate": 4.303196308366867e-06, + "log_odds": 4.284836769104004, + "log_odds_ratio": -0.06743679940700531, + "loss": 0.39, + "rejected_geometric_mean": -5.161427974700928, + "step": 1975 + }, + { + "chosen_geometric_mean": -1.0642389059066772, + "epoch": 0.49, + "grad_norm": 7.21875, + "learning_rate": 4.302521900210155e-06, + "log_odds": 5.302975654602051, + "log_odds_ratio": -0.2040095031261444, + "loss": 0.3983, + "rejected_geometric_mean": -6.03493070602417, + "step": 1976 + }, + { + "chosen_geometric_mean": -0.9042975902557373, + "epoch": 0.49, + "grad_norm": 25.375, + "learning_rate": 4.301847218747855e-06, + "log_odds": 1.470625638961792, + "log_odds_ratio": -0.3039708733558655, + "loss": 0.3758, + "rejected_geometric_mean": -2.0463449954986572, + "step": 1977 + }, + { + "chosen_geometric_mean": -0.9740897417068481, + "epoch": 0.49, + "grad_norm": 2.953125, + "learning_rate": 4.3011722640822644e-06, + "log_odds": 1.2216942310333252, + "log_odds_ratio": -0.3868430256843567, + "loss": 0.3031, + "rejected_geometric_mean": -1.983203411102295, + "step": 1978 + }, + { + "chosen_geometric_mean": -1.1707653999328613, + "epoch": 0.49, + "grad_norm": 11.5625, + "learning_rate": 4.300497036315724e-06, + "log_odds": 2.7796924114227295, + "log_odds_ratio": -0.18477188050746918, + "loss": 0.3064, + "rejected_geometric_mean": -3.6787800788879395, + "step": 1979 + }, + { + "chosen_geometric_mean": -1.1855998039245605, + "epoch": 0.49, + "grad_norm": 2.84375, + "learning_rate": 4.2998215355506135e-06, + "log_odds": 0.36889487504959106, + "log_odds_ratio": -0.5540237426757812, + "loss": 0.3199, + "rejected_geometric_mean": -1.4809271097183228, + "step": 1980 + }, + { + "chosen_geometric_mean": -0.9614263772964478, + "epoch": 0.49, + "grad_norm": 2.390625, + "learning_rate": 4.299145761889356e-06, + "log_odds": 1.8580589294433594, + "log_odds_ratio": -0.1713939607143402, + "loss": 0.279, + "rejected_geometric_mean": -2.4321084022521973, + "step": 1981 + }, + { + "chosen_geometric_mean": -1.2376081943511963, + "epoch": 0.49, + "grad_norm": 2.109375, + "learning_rate": 4.298469715434414e-06, + "log_odds": 2.313776969909668, + "log_odds_ratio": -0.1726880967617035, + "loss": 0.3043, + "rejected_geometric_mean": -3.252471923828125, + "step": 1982 + }, + { + "chosen_geometric_mean": -1.1325883865356445, + "epoch": 0.49, + "grad_norm": 3.65625, + "learning_rate": 4.2977933962882935e-06, + "log_odds": 4.09757661819458, + "log_odds_ratio": -0.18632620573043823, + "loss": 0.3068, + "rejected_geometric_mean": -4.884902000427246, + "step": 1983 + }, + { + "chosen_geometric_mean": -1.146950364112854, + "epoch": 0.49, + "grad_norm": 5.9375, + "learning_rate": 4.29711680455354e-06, + "log_odds": 0.5116978883743286, + "log_odds_ratio": -0.49448129534721375, + "loss": 0.293, + "rejected_geometric_mean": -1.5290930271148682, + "step": 1984 + }, + { + "chosen_geometric_mean": -1.1280512809753418, + "epoch": 0.49, + "grad_norm": 3.046875, + "learning_rate": 4.29643994033274e-06, + "log_odds": 5.537474632263184, + "log_odds_ratio": -0.26903361082077026, + "loss": 0.3298, + "rejected_geometric_mean": -6.4020094871521, + "step": 1985 + }, + { + "chosen_geometric_mean": -1.0607619285583496, + "epoch": 0.49, + "grad_norm": 17.5, + "learning_rate": 4.295762803728524e-06, + "log_odds": 2.7605679035186768, + "log_odds_ratio": -0.3253716826438904, + "loss": 0.3535, + "rejected_geometric_mean": -3.5878214836120605, + "step": 1986 + }, + { + "chosen_geometric_mean": -1.1157822608947754, + "epoch": 0.49, + "grad_norm": 3.765625, + "learning_rate": 4.295085394843561e-06, + "log_odds": 5.0751471519470215, + "log_odds_ratio": -0.446789026260376, + "loss": 0.3141, + "rejected_geometric_mean": -5.994556427001953, + "step": 1987 + }, + { + "chosen_geometric_mean": -0.905455470085144, + "epoch": 0.49, + "grad_norm": 41.0, + "learning_rate": 4.294407713780564e-06, + "log_odds": 5.623911380767822, + "log_odds_ratio": -0.2743135690689087, + "loss": 0.3046, + "rejected_geometric_mean": -6.16546106338501, + "step": 1988 + }, + { + "chosen_geometric_mean": -1.1587527990341187, + "epoch": 0.49, + "grad_norm": 7.90625, + "learning_rate": 4.293729760642284e-06, + "log_odds": 2.159202814102173, + "log_odds_ratio": -0.42342305183410645, + "loss": 0.3693, + "rejected_geometric_mean": -3.093648910522461, + "step": 1989 + }, + { + "chosen_geometric_mean": -1.1200191974639893, + "epoch": 0.49, + "grad_norm": 19.125, + "learning_rate": 4.293051535531517e-06, + "log_odds": 1.5004444122314453, + "log_odds_ratio": -0.40265876054763794, + "loss": 0.3773, + "rejected_geometric_mean": -2.459501028060913, + "step": 1990 + }, + { + "chosen_geometric_mean": -1.1240017414093018, + "epoch": 0.49, + "grad_norm": 3.84375, + "learning_rate": 4.292373038551096e-06, + "log_odds": 3.1577041149139404, + "log_odds_ratio": -0.21777638792991638, + "loss": 0.3212, + "rejected_geometric_mean": -4.0048723220825195, + "step": 1991 + }, + { + "chosen_geometric_mean": -1.266579270362854, + "epoch": 0.49, + "grad_norm": 14.375, + "learning_rate": 4.291694269803899e-06, + "log_odds": 3.9392471313476562, + "log_odds_ratio": -0.32496145367622375, + "loss": 0.338, + "rejected_geometric_mean": -4.996890068054199, + "step": 1992 + }, + { + "chosen_geometric_mean": -1.2943942546844482, + "epoch": 0.49, + "grad_norm": 29.5, + "learning_rate": 4.291015229392842e-06, + "log_odds": 4.3092145919799805, + "log_odds_ratio": -0.1812593787908554, + "loss": 0.3095, + "rejected_geometric_mean": -5.38193416595459, + "step": 1993 + }, + { + "chosen_geometric_mean": -1.3937883377075195, + "epoch": 0.49, + "grad_norm": 22.25, + "learning_rate": 4.2903359174208856e-06, + "log_odds": 7.216974258422852, + "log_odds_ratio": -0.2420506477355957, + "loss": 0.45, + "rejected_geometric_mean": -8.385390281677246, + "step": 1994 + }, + { + "chosen_geometric_mean": -1.4285898208618164, + "epoch": 0.49, + "grad_norm": 37.0, + "learning_rate": 4.289656333991029e-06, + "log_odds": 3.278989553451538, + "log_odds_ratio": -0.23215889930725098, + "loss": 0.3672, + "rejected_geometric_mean": -4.527440071105957, + "step": 1995 + }, + { + "chosen_geometric_mean": -1.0682462453842163, + "epoch": 0.49, + "grad_norm": 17.875, + "learning_rate": 4.288976479206313e-06, + "log_odds": 4.774545669555664, + "log_odds_ratio": -0.15115126967430115, + "loss": 0.2809, + "rejected_geometric_mean": -5.475611686706543, + "step": 1996 + }, + { + "chosen_geometric_mean": -1.3389126062393188, + "epoch": 0.49, + "grad_norm": 16.625, + "learning_rate": 4.288296353169821e-06, + "log_odds": 4.002577781677246, + "log_odds_ratio": -0.2793644666671753, + "loss": 0.3578, + "rejected_geometric_mean": -5.139666557312012, + "step": 1997 + }, + { + "chosen_geometric_mean": -1.1881968975067139, + "epoch": 0.49, + "grad_norm": 23.0, + "learning_rate": 4.287615955984676e-06, + "log_odds": 4.180443286895752, + "log_odds_ratio": -0.13744860887527466, + "loss": 0.3212, + "rejected_geometric_mean": -5.050036430358887, + "step": 1998 + }, + { + "chosen_geometric_mean": -1.1212413311004639, + "epoch": 0.49, + "grad_norm": 12.5625, + "learning_rate": 4.286935287754041e-06, + "log_odds": 1.732574224472046, + "log_odds_ratio": -0.29354235529899597, + "loss": 0.2632, + "rejected_geometric_mean": -2.6587133407592773, + "step": 1999 + }, + { + "chosen_geometric_mean": -0.9828506708145142, + "epoch": 0.5, + "grad_norm": 2.609375, + "learning_rate": 4.286254348581124e-06, + "log_odds": 3.030348777770996, + "log_odds_ratio": -0.35231971740722656, + "loss": 0.2876, + "rejected_geometric_mean": -3.74151611328125, + "step": 2000 + }, + { + "chosen_geometric_mean": -1.2833315134048462, + "epoch": 0.5, + "grad_norm": 7.0625, + "learning_rate": 4.2855731385691696e-06, + "log_odds": 1.9042353630065918, + "log_odds_ratio": -0.37749096751213074, + "loss": 0.3531, + "rejected_geometric_mean": -3.0404152870178223, + "step": 2001 + }, + { + "chosen_geometric_mean": -1.1673403978347778, + "epoch": 0.5, + "grad_norm": 3.390625, + "learning_rate": 4.284891657821467e-06, + "log_odds": 1.9705674648284912, + "log_odds_ratio": -0.16701483726501465, + "loss": 0.2687, + "rejected_geometric_mean": -2.8282177448272705, + "step": 2002 + }, + { + "chosen_geometric_mean": -1.0866165161132812, + "epoch": 0.5, + "grad_norm": 3.0625, + "learning_rate": 4.284209906441345e-06, + "log_odds": 0.15039251744747162, + "log_odds_ratio": -0.6389544010162354, + "loss": 0.3209, + "rejected_geometric_mean": -1.2110304832458496, + "step": 2003 + }, + { + "chosen_geometric_mean": -0.9640308618545532, + "epoch": 0.5, + "grad_norm": 2.078125, + "learning_rate": 4.2835278845321735e-06, + "log_odds": 4.11205530166626, + "log_odds_ratio": -0.11114349216222763, + "loss": 0.2935, + "rejected_geometric_mean": -4.665201187133789, + "step": 2004 + }, + { + "chosen_geometric_mean": -1.1431214809417725, + "epoch": 0.5, + "grad_norm": 4.40625, + "learning_rate": 4.282845592197362e-06, + "log_odds": 3.253934383392334, + "log_odds_ratio": -0.16900302469730377, + "loss": 0.3327, + "rejected_geometric_mean": -4.084853172302246, + "step": 2005 + }, + { + "chosen_geometric_mean": -1.2894619703292847, + "epoch": 0.5, + "grad_norm": 3.234375, + "learning_rate": 4.2821630295403646e-06, + "log_odds": 3.7350900173187256, + "log_odds_ratio": -0.3036467432975769, + "loss": 0.3069, + "rejected_geometric_mean": -4.8458051681518555, + "step": 2006 + }, + { + "chosen_geometric_mean": -0.8990203738212585, + "epoch": 0.5, + "grad_norm": 2.28125, + "learning_rate": 4.281480196664672e-06, + "log_odds": 1.159942626953125, + "log_odds_ratio": -0.3742258548736572, + "loss": 0.2854, + "rejected_geometric_mean": -1.8080003261566162, + "step": 2007 + }, + { + "chosen_geometric_mean": -1.0683395862579346, + "epoch": 0.5, + "grad_norm": 3.109375, + "learning_rate": 4.28079709367382e-06, + "log_odds": 3.985682249069214, + "log_odds_ratio": -0.2781466841697693, + "loss": 0.3133, + "rejected_geometric_mean": -4.784601211547852, + "step": 2008 + }, + { + "chosen_geometric_mean": -1.2021417617797852, + "epoch": 0.5, + "grad_norm": 17.375, + "learning_rate": 4.2801137206713826e-06, + "log_odds": 3.7030389308929443, + "log_odds_ratio": -0.20504632592201233, + "loss": 0.3169, + "rejected_geometric_mean": -4.628543376922607, + "step": 2009 + }, + { + "chosen_geometric_mean": -1.2144421339035034, + "epoch": 0.5, + "grad_norm": 5.90625, + "learning_rate": 4.279430077760974e-06, + "log_odds": 0.6025955080986023, + "log_odds_ratio": -0.48809802532196045, + "loss": 0.2504, + "rejected_geometric_mean": -1.6907649040222168, + "step": 2010 + }, + { + "chosen_geometric_mean": -0.9644778966903687, + "epoch": 0.5, + "grad_norm": 10.625, + "learning_rate": 4.278746165046254e-06, + "log_odds": 7.084109306335449, + "log_odds_ratio": -0.03658608719706535, + "loss": 0.3038, + "rejected_geometric_mean": -7.578919410705566, + "step": 2011 + }, + { + "chosen_geometric_mean": -1.3539752960205078, + "epoch": 0.5, + "grad_norm": 11.4375, + "learning_rate": 4.278061982630918e-06, + "log_odds": 1.070633888244629, + "log_odds_ratio": -0.4103385806083679, + "loss": 0.2788, + "rejected_geometric_mean": -2.318732976913452, + "step": 2012 + }, + { + "chosen_geometric_mean": -0.9976054430007935, + "epoch": 0.5, + "grad_norm": 4.21875, + "learning_rate": 4.2773775306187046e-06, + "log_odds": 5.3493499755859375, + "log_odds_ratio": -0.248824343085289, + "loss": 0.2963, + "rejected_geometric_mean": -6.027142524719238, + "step": 2013 + }, + { + "chosen_geometric_mean": -1.1196037530899048, + "epoch": 0.5, + "grad_norm": 15.0625, + "learning_rate": 4.276692809113393e-06, + "log_odds": 3.9626855850219727, + "log_odds_ratio": -0.18703174591064453, + "loss": 0.2929, + "rejected_geometric_mean": -4.769087791442871, + "step": 2014 + }, + { + "chosen_geometric_mean": -1.098982334136963, + "epoch": 0.5, + "grad_norm": 10.5, + "learning_rate": 4.276007818218805e-06, + "log_odds": 9.267210006713867, + "log_odds_ratio": -0.1485578715801239, + "loss": 0.2789, + "rejected_geometric_mean": -9.983552932739258, + "step": 2015 + }, + { + "chosen_geometric_mean": -1.6312856674194336, + "epoch": 0.5, + "grad_norm": 27.625, + "learning_rate": 4.2753225580388e-06, + "log_odds": 1.0885424613952637, + "log_odds_ratio": -0.7905142307281494, + "loss": 0.3223, + "rejected_geometric_mean": -2.5754246711730957, + "step": 2016 + }, + { + "chosen_geometric_mean": -1.344544768333435, + "epoch": 0.5, + "grad_norm": 26.875, + "learning_rate": 4.2746370286772805e-06, + "log_odds": 3.4056360721588135, + "log_odds_ratio": -0.48599791526794434, + "loss": 0.3341, + "rejected_geometric_mean": -4.620262145996094, + "step": 2017 + }, + { + "chosen_geometric_mean": -1.3756550550460815, + "epoch": 0.5, + "grad_norm": 20.75, + "learning_rate": 4.2739512302381895e-06, + "log_odds": 3.214873790740967, + "log_odds_ratio": -0.1480293571949005, + "loss": 0.3032, + "rejected_geometric_mean": -4.3485541343688965, + "step": 2018 + }, + { + "chosen_geometric_mean": -1.0676138401031494, + "epoch": 0.5, + "grad_norm": 3.03125, + "learning_rate": 4.2732651628255105e-06, + "log_odds": 2.5662059783935547, + "log_odds_ratio": -0.36499255895614624, + "loss": 0.2974, + "rejected_geometric_mean": -3.433378219604492, + "step": 2019 + }, + { + "chosen_geometric_mean": -1.3001484870910645, + "epoch": 0.5, + "grad_norm": 3.296875, + "learning_rate": 4.272578826543267e-06, + "log_odds": 1.44992995262146, + "log_odds_ratio": -0.44538310170173645, + "loss": 0.3323, + "rejected_geometric_mean": -2.618332624435425, + "step": 2020 + }, + { + "chosen_geometric_mean": -1.167516827583313, + "epoch": 0.5, + "grad_norm": 3.453125, + "learning_rate": 4.271892221495525e-06, + "log_odds": 1.6408156156539917, + "log_odds_ratio": -0.43254831433296204, + "loss": 0.3234, + "rejected_geometric_mean": -2.6750831604003906, + "step": 2021 + }, + { + "chosen_geometric_mean": -1.264995813369751, + "epoch": 0.5, + "grad_norm": 4.25, + "learning_rate": 4.2712053477863895e-06, + "log_odds": 0.1792137622833252, + "log_odds_ratio": -0.6312735676765442, + "loss": 0.3365, + "rejected_geometric_mean": -1.4060015678405762, + "step": 2022 + }, + { + "chosen_geometric_mean": -1.1031711101531982, + "epoch": 0.5, + "grad_norm": 4.4375, + "learning_rate": 4.270518205520008e-06, + "log_odds": 0.5600261688232422, + "log_odds_ratio": -0.4731821119785309, + "loss": 0.362, + "rejected_geometric_mean": -1.5295482873916626, + "step": 2023 + }, + { + "chosen_geometric_mean": -1.1447213888168335, + "epoch": 0.5, + "grad_norm": 5.46875, + "learning_rate": 4.2698307948005654e-06, + "log_odds": 2.8770229816436768, + "log_odds_ratio": -0.18822580575942993, + "loss": 0.3261, + "rejected_geometric_mean": -3.714352607727051, + "step": 2024 + }, + { + "chosen_geometric_mean": -1.3786554336547852, + "epoch": 0.5, + "grad_norm": 2.328125, + "learning_rate": 4.269143115732293e-06, + "log_odds": 1.572445273399353, + "log_odds_ratio": -0.4543371796607971, + "loss": 0.3151, + "rejected_geometric_mean": -2.798872947692871, + "step": 2025 + }, + { + "chosen_geometric_mean": -1.1167311668395996, + "epoch": 0.5, + "grad_norm": 7.03125, + "learning_rate": 4.268455168419458e-06, + "log_odds": 3.8414146900177, + "log_odds_ratio": -0.2804562449455261, + "loss": 0.2778, + "rejected_geometric_mean": -4.708870887756348, + "step": 2026 + }, + { + "chosen_geometric_mean": -1.0927608013153076, + "epoch": 0.5, + "grad_norm": 6.25, + "learning_rate": 4.267766952966369e-06, + "log_odds": 3.6427769660949707, + "log_odds_ratio": -0.13380418717861176, + "loss": 0.2888, + "rejected_geometric_mean": -4.379271507263184, + "step": 2027 + }, + { + "chosen_geometric_mean": -1.140842318534851, + "epoch": 0.5, + "grad_norm": 2.515625, + "learning_rate": 4.267078469477377e-06, + "log_odds": 7.35347843170166, + "log_odds_ratio": -0.06509886682033539, + "loss": 0.2974, + "rejected_geometric_mean": -8.137836456298828, + "step": 2028 + }, + { + "chosen_geometric_mean": -1.1867674589157104, + "epoch": 0.5, + "grad_norm": 4.59375, + "learning_rate": 4.266389718056872e-06, + "log_odds": 1.3076162338256836, + "log_odds_ratio": -0.46613264083862305, + "loss": 0.3485, + "rejected_geometric_mean": -2.3593850135803223, + "step": 2029 + }, + { + "chosen_geometric_mean": -1.1224379539489746, + "epoch": 0.5, + "grad_norm": 23.5, + "learning_rate": 4.265700698809285e-06, + "log_odds": 3.0197527408599854, + "log_odds_ratio": -0.2830209732055664, + "loss": 0.3382, + "rejected_geometric_mean": -3.933762311935425, + "step": 2030 + }, + { + "chosen_geometric_mean": -1.273395299911499, + "epoch": 0.5, + "grad_norm": 25.25, + "learning_rate": 4.265011411839089e-06, + "log_odds": 2.399149179458618, + "log_odds_ratio": -0.3613073229789734, + "loss": 0.3359, + "rejected_geometric_mean": -3.4949307441711426, + "step": 2031 + }, + { + "chosen_geometric_mean": -1.144340991973877, + "epoch": 0.5, + "grad_norm": 12.0625, + "learning_rate": 4.264321857250796e-06, + "log_odds": 3.296442747116089, + "log_odds_ratio": -0.09841437637805939, + "loss": 0.3064, + "rejected_geometric_mean": -4.0768842697143555, + "step": 2032 + }, + { + "chosen_geometric_mean": -1.2202895879745483, + "epoch": 0.5, + "grad_norm": 6.9375, + "learning_rate": 4.263632035148959e-06, + "log_odds": 1.0102499723434448, + "log_odds_ratio": -0.4206584692001343, + "loss": 0.2899, + "rejected_geometric_mean": -2.1104984283447266, + "step": 2033 + }, + { + "chosen_geometric_mean": -1.1277408599853516, + "epoch": 0.5, + "grad_norm": 6.3125, + "learning_rate": 4.26294194563817e-06, + "log_odds": 0.778621256351471, + "log_odds_ratio": -0.5029101967811584, + "loss": 0.3095, + "rejected_geometric_mean": -1.81171715259552, + "step": 2034 + }, + { + "chosen_geometric_mean": -0.8961904644966125, + "epoch": 0.5, + "grad_norm": 6.65625, + "learning_rate": 4.262251588823065e-06, + "log_odds": 2.5669682025909424, + "log_odds_ratio": -0.24424651265144348, + "loss": 0.2969, + "rejected_geometric_mean": -3.124441385269165, + "step": 2035 + }, + { + "chosen_geometric_mean": -1.1107978820800781, + "epoch": 0.5, + "grad_norm": 7.5625, + "learning_rate": 4.261560964808319e-06, + "log_odds": 4.154030799865723, + "log_odds_ratio": -0.103546142578125, + "loss": 0.2733, + "rejected_geometric_mean": -4.891605377197266, + "step": 2036 + }, + { + "chosen_geometric_mean": -1.191691279411316, + "epoch": 0.5, + "grad_norm": 13.9375, + "learning_rate": 4.260870073698646e-06, + "log_odds": 3.734450340270996, + "log_odds_ratio": -0.3042978048324585, + "loss": 0.3501, + "rejected_geometric_mean": -4.707571029663086, + "step": 2037 + }, + { + "chosen_geometric_mean": -1.0467685461044312, + "epoch": 0.5, + "grad_norm": 21.125, + "learning_rate": 4.260178915598802e-06, + "log_odds": 4.282810688018799, + "log_odds_ratio": -0.07151514291763306, + "loss": 0.3717, + "rejected_geometric_mean": -4.930585861206055, + "step": 2038 + }, + { + "chosen_geometric_mean": -1.229313611984253, + "epoch": 0.5, + "grad_norm": 23.5, + "learning_rate": 4.259487490613583e-06, + "log_odds": 1.297213077545166, + "log_odds_ratio": -0.3701450228691101, + "loss": 0.3396, + "rejected_geometric_mean": -2.350072145462036, + "step": 2039 + }, + { + "chosen_geometric_mean": -1.2697691917419434, + "epoch": 0.51, + "grad_norm": 2.296875, + "learning_rate": 4.258795798847826e-06, + "log_odds": 3.659669876098633, + "log_odds_ratio": -0.19783592224121094, + "loss": 0.3196, + "rejected_geometric_mean": -4.699151515960693, + "step": 2040 + }, + { + "chosen_geometric_mean": -1.221498966217041, + "epoch": 0.51, + "grad_norm": 15.875, + "learning_rate": 4.2581038404064076e-06, + "log_odds": 1.593230962753296, + "log_odds_ratio": -0.3867371678352356, + "loss": 0.3225, + "rejected_geometric_mean": -2.6349594593048096, + "step": 2041 + }, + { + "chosen_geometric_mean": -1.245492696762085, + "epoch": 0.51, + "grad_norm": 4.65625, + "learning_rate": 4.257411615394246e-06, + "log_odds": 2.719735622406006, + "log_odds_ratio": -0.30940717458724976, + "loss": 0.3091, + "rejected_geometric_mean": -3.7566823959350586, + "step": 2042 + }, + { + "chosen_geometric_mean": -1.3206533193588257, + "epoch": 0.51, + "grad_norm": 2.5625, + "learning_rate": 4.2567191239162985e-06, + "log_odds": 0.3613415062427521, + "log_odds_ratio": -0.5383706092834473, + "loss": 0.3324, + "rejected_geometric_mean": -1.6010059118270874, + "step": 2043 + }, + { + "chosen_geometric_mean": -0.9995614290237427, + "epoch": 0.51, + "grad_norm": 11.8125, + "learning_rate": 4.256026366077564e-06, + "log_odds": 1.6914894580841064, + "log_odds_ratio": -0.3472574055194855, + "loss": 0.3101, + "rejected_geometric_mean": -2.4434163570404053, + "step": 2044 + }, + { + "chosen_geometric_mean": -1.1461085081100464, + "epoch": 0.51, + "grad_norm": 3.140625, + "learning_rate": 4.25533334198308e-06, + "log_odds": 3.1622464656829834, + "log_odds_ratio": -0.3065532147884369, + "loss": 0.3342, + "rejected_geometric_mean": -4.102318286895752, + "step": 2045 + }, + { + "chosen_geometric_mean": -1.174501895904541, + "epoch": 0.51, + "grad_norm": 3.546875, + "learning_rate": 4.254640051737928e-06, + "log_odds": 1.725458025932312, + "log_odds_ratio": -0.36037400364875793, + "loss": 0.3486, + "rejected_geometric_mean": -2.6460347175598145, + "step": 2046 + }, + { + "chosen_geometric_mean": -0.976158857345581, + "epoch": 0.51, + "grad_norm": 2.1875, + "learning_rate": 4.253946495447226e-06, + "log_odds": 0.8945037126541138, + "log_odds_ratio": -0.5030253529548645, + "loss": 0.2729, + "rejected_geometric_mean": -1.7814586162567139, + "step": 2047 + }, + { + "chosen_geometric_mean": -1.298400640487671, + "epoch": 0.51, + "grad_norm": 5.46875, + "learning_rate": 4.253252673216132e-06, + "log_odds": 4.482857704162598, + "log_odds_ratio": -0.08165021985769272, + "loss": 0.2738, + "rejected_geometric_mean": -5.496498107910156, + "step": 2048 + }, + { + "chosen_geometric_mean": -1.061506748199463, + "epoch": 0.51, + "grad_norm": 2.9375, + "learning_rate": 4.252558585149848e-06, + "log_odds": 2.8591809272766113, + "log_odds_ratio": -0.32637059688568115, + "loss": 0.3056, + "rejected_geometric_mean": -3.673295497894287, + "step": 2049 + }, + { + "chosen_geometric_mean": -1.143930435180664, + "epoch": 0.51, + "grad_norm": 10.8125, + "learning_rate": 4.251864231353615e-06, + "log_odds": 3.4504945278167725, + "log_odds_ratio": -0.2637462913990021, + "loss": 0.2976, + "rejected_geometric_mean": -4.395703315734863, + "step": 2050 + }, + { + "chosen_geometric_mean": -1.0449415445327759, + "epoch": 0.51, + "grad_norm": 6.3125, + "learning_rate": 4.251169611932713e-06, + "log_odds": 1.7545418739318848, + "log_odds_ratio": -0.49217715859413147, + "loss": 0.3111, + "rejected_geometric_mean": -2.6946864128112793, + "step": 2051 + }, + { + "chosen_geometric_mean": -0.9759700894355774, + "epoch": 0.51, + "grad_norm": 20.875, + "learning_rate": 4.250474726992463e-06, + "log_odds": 1.2566255331039429, + "log_odds_ratio": -0.4300770163536072, + "loss": 0.331, + "rejected_geometric_mean": -2.031668186187744, + "step": 2052 + }, + { + "chosen_geometric_mean": -0.9565527439117432, + "epoch": 0.51, + "grad_norm": 17.625, + "learning_rate": 4.249779576638226e-06, + "log_odds": 4.912618637084961, + "log_odds_ratio": -0.012036577798426151, + "loss": 0.3012, + "rejected_geometric_mean": -5.369410514831543, + "step": 2053 + }, + { + "chosen_geometric_mean": -0.9756186604499817, + "epoch": 0.51, + "grad_norm": 9.0625, + "learning_rate": 4.249084160975403e-06, + "log_odds": 5.096858978271484, + "log_odds_ratio": -0.16349570453166962, + "loss": 0.3411, + "rejected_geometric_mean": -5.697827339172363, + "step": 2054 + }, + { + "chosen_geometric_mean": -0.9763151407241821, + "epoch": 0.51, + "grad_norm": 3.953125, + "learning_rate": 4.248388480109436e-06, + "log_odds": 3.039952278137207, + "log_odds_ratio": -0.20121632516384125, + "loss": 0.2675, + "rejected_geometric_mean": -3.6807188987731934, + "step": 2055 + }, + { + "chosen_geometric_mean": -1.0682616233825684, + "epoch": 0.51, + "grad_norm": 10.4375, + "learning_rate": 4.2476925341458085e-06, + "log_odds": 4.787009239196777, + "log_odds_ratio": -0.14246997237205505, + "loss": 0.3388, + "rejected_geometric_mean": -5.4905242919921875, + "step": 2056 + }, + { + "chosen_geometric_mean": -1.2237986326217651, + "epoch": 0.51, + "grad_norm": 13.875, + "learning_rate": 4.246996323190041e-06, + "log_odds": 2.3009257316589355, + "log_odds_ratio": -0.14930376410484314, + "loss": 0.3144, + "rejected_geometric_mean": -3.227862596511841, + "step": 2057 + }, + { + "chosen_geometric_mean": -1.055679440498352, + "epoch": 0.51, + "grad_norm": 18.5, + "learning_rate": 4.2462998473476955e-06, + "log_odds": 7.093246936798096, + "log_odds_ratio": -0.133330836892128, + "loss": 0.3506, + "rejected_geometric_mean": -7.80052375793457, + "step": 2058 + }, + { + "chosen_geometric_mean": -1.3612722158432007, + "epoch": 0.51, + "grad_norm": 11.9375, + "learning_rate": 4.245603106724376e-06, + "log_odds": 2.9829771518707275, + "log_odds_ratio": -0.3753107786178589, + "loss": 0.3515, + "rejected_geometric_mean": -4.209330081939697, + "step": 2059 + }, + { + "chosen_geometric_mean": -1.189884901046753, + "epoch": 0.51, + "grad_norm": 7.09375, + "learning_rate": 4.244906101425724e-06, + "log_odds": 3.233440399169922, + "log_odds_ratio": -0.20275740325450897, + "loss": 0.2944, + "rejected_geometric_mean": -4.134991645812988, + "step": 2060 + }, + { + "chosen_geometric_mean": -1.0288112163543701, + "epoch": 0.51, + "grad_norm": 4.875, + "learning_rate": 4.2442088315574225e-06, + "log_odds": 1.882025957107544, + "log_odds_ratio": -0.33498531579971313, + "loss": 0.3831, + "rejected_geometric_mean": -2.5965452194213867, + "step": 2061 + }, + { + "chosen_geometric_mean": -1.2830705642700195, + "epoch": 0.51, + "grad_norm": 4.65625, + "learning_rate": 4.243511297225194e-06, + "log_odds": 2.5918750762939453, + "log_odds_ratio": -0.4676639437675476, + "loss": 0.3384, + "rejected_geometric_mean": -3.770282030105591, + "step": 2062 + }, + { + "chosen_geometric_mean": -1.1648437976837158, + "epoch": 0.51, + "grad_norm": 6.0, + "learning_rate": 4.242813498534801e-06, + "log_odds": 1.4014241695404053, + "log_odds_ratio": -0.41291770339012146, + "loss": 0.3321, + "rejected_geometric_mean": -2.430967330932617, + "step": 2063 + }, + { + "chosen_geometric_mean": -1.4077503681182861, + "epoch": 0.51, + "grad_norm": 10.25, + "learning_rate": 4.242115435592048e-06, + "log_odds": 2.1364431381225586, + "log_odds_ratio": -0.30151599645614624, + "loss": 0.2939, + "rejected_geometric_mean": -3.3897786140441895, + "step": 2064 + }, + { + "chosen_geometric_mean": -1.1077873706817627, + "epoch": 0.51, + "grad_norm": 9.75, + "learning_rate": 4.241417108502778e-06, + "log_odds": 4.316949367523193, + "log_odds_ratio": -0.3184102773666382, + "loss": 0.3002, + "rejected_geometric_mean": -5.217288494110107, + "step": 2065 + }, + { + "chosen_geometric_mean": -1.0982705354690552, + "epoch": 0.51, + "grad_norm": 2.34375, + "learning_rate": 4.2407185173728724e-06, + "log_odds": 4.530375957489014, + "log_odds_ratio": -0.20762096345424652, + "loss": 0.2909, + "rejected_geometric_mean": -5.329505920410156, + "step": 2066 + }, + { + "chosen_geometric_mean": -0.9339339733123779, + "epoch": 0.51, + "grad_norm": 3.296875, + "learning_rate": 4.240019662308256e-06, + "log_odds": 2.141362190246582, + "log_odds_ratio": -0.19222091138362885, + "loss": 0.3045, + "rejected_geometric_mean": -2.6889920234680176, + "step": 2067 + }, + { + "chosen_geometric_mean": -1.053288459777832, + "epoch": 0.51, + "grad_norm": 3.0, + "learning_rate": 4.239320543414892e-06, + "log_odds": 1.4162721633911133, + "log_odds_ratio": -0.4197124242782593, + "loss": 0.2785, + "rejected_geometric_mean": -2.316059112548828, + "step": 2068 + }, + { + "chosen_geometric_mean": -1.225647211074829, + "epoch": 0.51, + "grad_norm": 3.640625, + "learning_rate": 4.238621160798783e-06, + "log_odds": 0.26987025141716003, + "log_odds_ratio": -0.5750732421875, + "loss": 0.2973, + "rejected_geometric_mean": -1.4334394931793213, + "step": 2069 + }, + { + "chosen_geometric_mean": -1.1152687072753906, + "epoch": 0.51, + "grad_norm": 39.75, + "learning_rate": 4.237921514565971e-06, + "log_odds": 1.8510724306106567, + "log_odds_ratio": -0.2981814444065094, + "loss": 0.2815, + "rejected_geometric_mean": -2.7329859733581543, + "step": 2070 + }, + { + "chosen_geometric_mean": -1.1330193281173706, + "epoch": 0.51, + "grad_norm": 7.75, + "learning_rate": 4.237221604822542e-06, + "log_odds": 4.091797828674316, + "log_odds_ratio": -0.12304382771253586, + "loss": 0.3383, + "rejected_geometric_mean": -4.901696681976318, + "step": 2071 + }, + { + "chosen_geometric_mean": -1.2934118509292603, + "epoch": 0.51, + "grad_norm": 37.0, + "learning_rate": 4.236521431674616e-06, + "log_odds": 2.6533310413360596, + "log_odds_ratio": -0.24442780017852783, + "loss": 0.4182, + "rejected_geometric_mean": -3.7255568504333496, + "step": 2072 + }, + { + "chosen_geometric_mean": -1.3567960262298584, + "epoch": 0.51, + "grad_norm": 14.0625, + "learning_rate": 4.2358209952283594e-06, + "log_odds": 2.1200740337371826, + "log_odds_ratio": -0.4014774262905121, + "loss": 0.26, + "rejected_geometric_mean": -3.263751268386841, + "step": 2073 + }, + { + "chosen_geometric_mean": -1.2830301523208618, + "epoch": 0.51, + "grad_norm": 7.0, + "learning_rate": 4.235120295589973e-06, + "log_odds": 3.589369297027588, + "log_odds_ratio": -0.1594112366437912, + "loss": 0.2925, + "rejected_geometric_mean": -4.59644889831543, + "step": 2074 + }, + { + "chosen_geometric_mean": -1.0945724248886108, + "epoch": 0.51, + "grad_norm": 12.3125, + "learning_rate": 4.234419332865699e-06, + "log_odds": 1.1315301656723022, + "log_odds_ratio": -0.32474201917648315, + "loss": 0.3304, + "rejected_geometric_mean": -1.9412035942077637, + "step": 2075 + }, + { + "chosen_geometric_mean": -1.033164381980896, + "epoch": 0.51, + "grad_norm": 4.25, + "learning_rate": 4.233718107161823e-06, + "log_odds": 2.849010467529297, + "log_odds_ratio": -0.3678615689277649, + "loss": 0.3163, + "rejected_geometric_mean": -3.643798828125, + "step": 2076 + }, + { + "chosen_geometric_mean": -1.1467005014419556, + "epoch": 0.51, + "grad_norm": 7.9375, + "learning_rate": 4.233016618584664e-06, + "log_odds": 0.5666083097457886, + "log_odds_ratio": -0.5075480341911316, + "loss": 0.3438, + "rejected_geometric_mean": -1.5597155094146729, + "step": 2077 + }, + { + "chosen_geometric_mean": -0.9298539161682129, + "epoch": 0.51, + "grad_norm": 41.5, + "learning_rate": 4.232314867240588e-06, + "log_odds": 3.294189691543579, + "log_odds_ratio": -0.19320893287658691, + "loss": 0.3113, + "rejected_geometric_mean": -3.864783763885498, + "step": 2078 + }, + { + "chosen_geometric_mean": -1.3841702938079834, + "epoch": 0.51, + "grad_norm": 5.15625, + "learning_rate": 4.2316128532359954e-06, + "log_odds": 2.407944679260254, + "log_odds_ratio": -0.1681540459394455, + "loss": 0.314, + "rejected_geometric_mean": -3.544330358505249, + "step": 2079 + }, + { + "chosen_geometric_mean": -1.449554443359375, + "epoch": 0.51, + "grad_norm": 12.875, + "learning_rate": 4.230910576677328e-06, + "log_odds": 0.9856749176979065, + "log_odds_ratio": -0.521400511264801, + "loss": 0.2838, + "rejected_geometric_mean": -2.3398478031158447, + "step": 2080 + }, + { + "chosen_geometric_mean": -1.2495324611663818, + "epoch": 0.52, + "grad_norm": 4.28125, + "learning_rate": 4.2302080376710695e-06, + "log_odds": 2.8336870670318604, + "log_odds_ratio": -0.29801031947135925, + "loss": 0.2792, + "rejected_geometric_mean": -3.8765053749084473, + "step": 2081 + }, + { + "chosen_geometric_mean": -1.1290011405944824, + "epoch": 0.52, + "grad_norm": 7.375, + "learning_rate": 4.22950523632374e-06, + "log_odds": 1.4831514358520508, + "log_odds_ratio": -0.36885347962379456, + "loss": 0.3027, + "rejected_geometric_mean": -2.4076082706451416, + "step": 2082 + }, + { + "chosen_geometric_mean": -1.1730215549468994, + "epoch": 0.52, + "grad_norm": 4.59375, + "learning_rate": 4.228802172741902e-06, + "log_odds": 1.3469518423080444, + "log_odds_ratio": -0.28655797243118286, + "loss": 0.2912, + "rejected_geometric_mean": -2.2983226776123047, + "step": 2083 + }, + { + "chosen_geometric_mean": -1.0223517417907715, + "epoch": 0.52, + "grad_norm": 3.921875, + "learning_rate": 4.228098847032157e-06, + "log_odds": 1.9159953594207764, + "log_odds_ratio": -0.3910009264945984, + "loss": 0.3374, + "rejected_geometric_mean": -2.750840663909912, + "step": 2084 + }, + { + "chosen_geometric_mean": -1.2471680641174316, + "epoch": 0.52, + "grad_norm": 19.375, + "learning_rate": 4.227395259301145e-06, + "log_odds": 1.7999236583709717, + "log_odds_ratio": -0.32096442580223083, + "loss": 0.3454, + "rejected_geometric_mean": -2.867197036743164, + "step": 2085 + }, + { + "chosen_geometric_mean": -1.1417491436004639, + "epoch": 0.52, + "grad_norm": 3.234375, + "learning_rate": 4.226691409655548e-06, + "log_odds": 2.312640905380249, + "log_odds_ratio": -0.23343944549560547, + "loss": 0.2632, + "rejected_geometric_mean": -3.1473846435546875, + "step": 2086 + }, + { + "chosen_geometric_mean": -0.8777218461036682, + "epoch": 0.52, + "grad_norm": 3.34375, + "learning_rate": 4.225987298202085e-06, + "log_odds": 1.6349971294403076, + "log_odds_ratio": -0.3038874864578247, + "loss": 0.3117, + "rejected_geometric_mean": -2.2232255935668945, + "step": 2087 + }, + { + "chosen_geometric_mean": -0.9835354089736938, + "epoch": 0.52, + "grad_norm": 7.5, + "learning_rate": 4.225282925047518e-06, + "log_odds": 0.7901096343994141, + "log_odds_ratio": -0.43392518162727356, + "loss": 0.3084, + "rejected_geometric_mean": -1.5514397621154785, + "step": 2088 + }, + { + "chosen_geometric_mean": -1.117722511291504, + "epoch": 0.52, + "grad_norm": 11.1875, + "learning_rate": 4.224578290298646e-06, + "log_odds": 1.2871513366699219, + "log_odds_ratio": -0.42447429895401, + "loss": 0.3365, + "rejected_geometric_mean": -2.2463295459747314, + "step": 2089 + }, + { + "chosen_geometric_mean": -1.0939428806304932, + "epoch": 0.52, + "grad_norm": 10.625, + "learning_rate": 4.223873394062309e-06, + "log_odds": 1.5573064088821411, + "log_odds_ratio": -0.34649720788002014, + "loss": 0.3556, + "rejected_geometric_mean": -2.4647445678710938, + "step": 2090 + }, + { + "chosen_geometric_mean": -1.1043328046798706, + "epoch": 0.52, + "grad_norm": 14.875, + "learning_rate": 4.223168236445384e-06, + "log_odds": 2.442397117614746, + "log_odds_ratio": -0.11499684303998947, + "loss": 0.3389, + "rejected_geometric_mean": -3.1728742122650146, + "step": 2091 + }, + { + "chosen_geometric_mean": -1.2341734170913696, + "epoch": 0.52, + "grad_norm": 6.53125, + "learning_rate": 4.222462817554793e-06, + "log_odds": 1.2134077548980713, + "log_odds_ratio": -0.34726372361183167, + "loss": 0.2719, + "rejected_geometric_mean": -2.2876522541046143, + "step": 2092 + }, + { + "chosen_geometric_mean": -1.0291045904159546, + "epoch": 0.52, + "grad_norm": 4.3125, + "learning_rate": 4.221757137497493e-06, + "log_odds": 3.8333802223205566, + "log_odds_ratio": -0.07945255190134048, + "loss": 0.256, + "rejected_geometric_mean": -4.448787689208984, + "step": 2093 + }, + { + "chosen_geometric_mean": -1.0017132759094238, + "epoch": 0.52, + "grad_norm": 6.71875, + "learning_rate": 4.221051196380481e-06, + "log_odds": 1.3953888416290283, + "log_odds_ratio": -0.39882200956344604, + "loss": 0.3067, + "rejected_geometric_mean": -2.2258074283599854, + "step": 2094 + }, + { + "chosen_geometric_mean": -1.2704097032546997, + "epoch": 0.52, + "grad_norm": 4.15625, + "learning_rate": 4.220344994310797e-06, + "log_odds": 1.0941035747528076, + "log_odds_ratio": -0.31023624539375305, + "loss": 0.3411, + "rejected_geometric_mean": -2.15907883644104, + "step": 2095 + }, + { + "chosen_geometric_mean": -1.1347546577453613, + "epoch": 0.52, + "grad_norm": 3.375, + "learning_rate": 4.2196385313955164e-06, + "log_odds": 1.9774796962738037, + "log_odds_ratio": -0.16205455362796783, + "loss": 0.3074, + "rejected_geometric_mean": -2.7957544326782227, + "step": 2096 + }, + { + "chosen_geometric_mean": -1.195863962173462, + "epoch": 0.52, + "grad_norm": 7.53125, + "learning_rate": 4.218931807741756e-06, + "log_odds": 4.563247203826904, + "log_odds_ratio": -0.12483483552932739, + "loss": 0.2681, + "rejected_geometric_mean": -5.450043678283691, + "step": 2097 + }, + { + "chosen_geometric_mean": -1.199370265007019, + "epoch": 0.52, + "grad_norm": 3.296875, + "learning_rate": 4.218224823456672e-06, + "log_odds": 0.743573009967804, + "log_odds_ratio": -0.43011972308158875, + "loss": 0.293, + "rejected_geometric_mean": -1.776227593421936, + "step": 2098 + }, + { + "chosen_geometric_mean": -0.898622989654541, + "epoch": 0.52, + "grad_norm": 2.8125, + "learning_rate": 4.217517578647462e-06, + "log_odds": 1.26350736618042, + "log_odds_ratio": -0.4278470277786255, + "loss": 0.2799, + "rejected_geometric_mean": -1.9730257987976074, + "step": 2099 + }, + { + "chosen_geometric_mean": -1.2909421920776367, + "epoch": 0.52, + "grad_norm": 9.125, + "learning_rate": 4.216810073421359e-06, + "log_odds": 0.04564943164587021, + "log_odds_ratio": -0.6725139617919922, + "loss": 0.356, + "rejected_geometric_mean": -1.3225934505462646, + "step": 2100 + }, + { + "chosen_geometric_mean": -1.098056674003601, + "epoch": 0.52, + "grad_norm": 53.5, + "learning_rate": 4.216102307885639e-06, + "log_odds": 1.3454948663711548, + "log_odds_ratio": -0.3525104522705078, + "loss": 0.373, + "rejected_geometric_mean": -2.257965564727783, + "step": 2101 + }, + { + "chosen_geometric_mean": -1.1520799398422241, + "epoch": 0.52, + "grad_norm": 43.0, + "learning_rate": 4.215394282147616e-06, + "log_odds": 6.573976993560791, + "log_odds_ratio": -0.08648354560136795, + "loss": 0.3337, + "rejected_geometric_mean": -7.339165687561035, + "step": 2102 + }, + { + "chosen_geometric_mean": -1.256343126296997, + "epoch": 0.52, + "grad_norm": 7.875, + "learning_rate": 4.214685996314643e-06, + "log_odds": 2.7926321029663086, + "log_odds_ratio": -0.08765677362680435, + "loss": 0.3158, + "rejected_geometric_mean": -3.6661477088928223, + "step": 2103 + }, + { + "chosen_geometric_mean": -1.0387321710586548, + "epoch": 0.52, + "grad_norm": 3.734375, + "learning_rate": 4.213977450494115e-06, + "log_odds": 2.492079734802246, + "log_odds_ratio": -0.14819282293319702, + "loss": 0.2744, + "rejected_geometric_mean": -3.1742258071899414, + "step": 2104 + }, + { + "chosen_geometric_mean": -1.366736888885498, + "epoch": 0.52, + "grad_norm": 14.375, + "learning_rate": 4.2132686447934625e-06, + "log_odds": 1.0413074493408203, + "log_odds_ratio": -0.5174878835678101, + "loss": 0.3601, + "rejected_geometric_mean": -2.2687511444091797, + "step": 2105 + }, + { + "chosen_geometric_mean": -1.231909990310669, + "epoch": 0.52, + "grad_norm": 26.125, + "learning_rate": 4.212559579320158e-06, + "log_odds": 1.931136131286621, + "log_odds_ratio": -0.3871460556983948, + "loss": 0.3698, + "rejected_geometric_mean": -3.033534049987793, + "step": 2106 + }, + { + "chosen_geometric_mean": -1.0731353759765625, + "epoch": 0.52, + "grad_norm": 6.375, + "learning_rate": 4.2118502541817144e-06, + "log_odds": 3.1173553466796875, + "log_odds_ratio": -0.12105703353881836, + "loss": 0.3029, + "rejected_geometric_mean": -3.830117702484131, + "step": 2107 + }, + { + "chosen_geometric_mean": -1.5811572074890137, + "epoch": 0.52, + "grad_norm": 22.5, + "learning_rate": 4.21114066948568e-06, + "log_odds": 3.661862373352051, + "log_odds_ratio": -0.2878141403198242, + "loss": 0.3605, + "rejected_geometric_mean": -5.0310235023498535, + "step": 2108 + }, + { + "chosen_geometric_mean": -1.1141111850738525, + "epoch": 0.52, + "grad_norm": 17.0, + "learning_rate": 4.2104308253396465e-06, + "log_odds": 1.328112006187439, + "log_odds_ratio": -0.31178534030914307, + "loss": 0.2527, + "rejected_geometric_mean": -2.1789138317108154, + "step": 2109 + }, + { + "chosen_geometric_mean": -1.1581507921218872, + "epoch": 0.52, + "grad_norm": 2.546875, + "learning_rate": 4.2097207218512425e-06, + "log_odds": 3.211421251296997, + "log_odds_ratio": -0.16468891501426697, + "loss": 0.3148, + "rejected_geometric_mean": -4.062557220458984, + "step": 2110 + }, + { + "chosen_geometric_mean": -1.2749707698822021, + "epoch": 0.52, + "grad_norm": 6.84375, + "learning_rate": 4.209010359128137e-06, + "log_odds": 3.8617300987243652, + "log_odds_ratio": -0.2795686423778534, + "loss": 0.3536, + "rejected_geometric_mean": -4.926259994506836, + "step": 2111 + }, + { + "chosen_geometric_mean": -1.020042061805725, + "epoch": 0.52, + "grad_norm": 2.953125, + "learning_rate": 4.208299737278039e-06, + "log_odds": 1.2047290802001953, + "log_odds_ratio": -0.38373732566833496, + "loss": 0.2862, + "rejected_geometric_mean": -2.0410690307617188, + "step": 2112 + }, + { + "chosen_geometric_mean": -1.0524876117706299, + "epoch": 0.52, + "grad_norm": 5.71875, + "learning_rate": 4.207588856408694e-06, + "log_odds": 3.0640532970428467, + "log_odds_ratio": -0.33157879114151, + "loss": 0.3352, + "rejected_geometric_mean": -3.876702070236206, + "step": 2113 + }, + { + "chosen_geometric_mean": -0.9437546133995056, + "epoch": 0.52, + "grad_norm": 2.515625, + "learning_rate": 4.20687771662789e-06, + "log_odds": 3.3249764442443848, + "log_odds_ratio": -0.15840347111225128, + "loss": 0.2933, + "rejected_geometric_mean": -3.861682415008545, + "step": 2114 + }, + { + "chosen_geometric_mean": -0.8742375373840332, + "epoch": 0.52, + "grad_norm": 11.1875, + "learning_rate": 4.206166318043452e-06, + "log_odds": 1.0858644247055054, + "log_odds_ratio": -0.4255078136920929, + "loss": 0.332, + "rejected_geometric_mean": -1.7468469142913818, + "step": 2115 + }, + { + "chosen_geometric_mean": -1.0711642503738403, + "epoch": 0.52, + "grad_norm": 2.0625, + "learning_rate": 4.205454660763245e-06, + "log_odds": 3.029766798019409, + "log_odds_ratio": -0.2839829921722412, + "loss": 0.2778, + "rejected_geometric_mean": -3.817504405975342, + "step": 2116 + }, + { + "chosen_geometric_mean": -1.120596170425415, + "epoch": 0.52, + "grad_norm": 2.203125, + "learning_rate": 4.204742744895175e-06, + "log_odds": 1.977556586265564, + "log_odds_ratio": -0.3667733669281006, + "loss": 0.3087, + "rejected_geometric_mean": -2.897911310195923, + "step": 2117 + }, + { + "chosen_geometric_mean": -0.9555112719535828, + "epoch": 0.52, + "grad_norm": 2.953125, + "learning_rate": 4.204030570547184e-06, + "log_odds": 2.0821170806884766, + "log_odds_ratio": -0.2186850905418396, + "loss": 0.3087, + "rejected_geometric_mean": -2.6567723751068115, + "step": 2118 + }, + { + "chosen_geometric_mean": -1.1688206195831299, + "epoch": 0.52, + "grad_norm": 4.53125, + "learning_rate": 4.203318137827255e-06, + "log_odds": 1.1431102752685547, + "log_odds_ratio": -0.3762422502040863, + "loss": 0.2805, + "rejected_geometric_mean": -2.092876434326172, + "step": 2119 + }, + { + "chosen_geometric_mean": -0.9565755128860474, + "epoch": 0.52, + "grad_norm": 2.21875, + "learning_rate": 4.202605446843411e-06, + "log_odds": 0.6412051916122437, + "log_odds_ratio": -0.5866883397102356, + "loss": 0.3307, + "rejected_geometric_mean": -1.5033708810806274, + "step": 2120 + }, + { + "chosen_geometric_mean": -1.118517518043518, + "epoch": 0.53, + "grad_norm": 2.78125, + "learning_rate": 4.2018924977037105e-06, + "log_odds": 3.652477741241455, + "log_odds_ratio": -0.3503192365169525, + "loss": 0.3123, + "rejected_geometric_mean": -4.5981597900390625, + "step": 2121 + }, + { + "chosen_geometric_mean": -1.160024642944336, + "epoch": 0.53, + "grad_norm": 25.25, + "learning_rate": 4.201179290516255e-06, + "log_odds": 3.0814433097839355, + "log_odds_ratio": -0.18416529893875122, + "loss": 0.3269, + "rejected_geometric_mean": -3.951218605041504, + "step": 2122 + }, + { + "chosen_geometric_mean": -1.1287442445755005, + "epoch": 0.53, + "grad_norm": 7.375, + "learning_rate": 4.200465825389185e-06, + "log_odds": 2.5496935844421387, + "log_odds_ratio": -0.2059451788663864, + "loss": 0.3163, + "rejected_geometric_mean": -3.368490695953369, + "step": 2123 + }, + { + "chosen_geometric_mean": -1.1354951858520508, + "epoch": 0.53, + "grad_norm": 25.75, + "learning_rate": 4.199752102430678e-06, + "log_odds": 2.083627223968506, + "log_odds_ratio": -0.25236976146698, + "loss": 0.4133, + "rejected_geometric_mean": -2.916046619415283, + "step": 2124 + }, + { + "chosen_geometric_mean": -2.019606828689575, + "epoch": 0.53, + "grad_norm": 22.125, + "learning_rate": 4.199038121748952e-06, + "log_odds": 2.311556100845337, + "log_odds_ratio": -0.5033897161483765, + "loss": 0.381, + "rejected_geometric_mean": -4.178552627563477, + "step": 2125 + }, + { + "chosen_geometric_mean": -1.943558692932129, + "epoch": 0.53, + "grad_norm": 39.0, + "learning_rate": 4.1983238834522635e-06, + "log_odds": 3.4005424976348877, + "log_odds_ratio": -0.27992963790893555, + "loss": 0.4056, + "rejected_geometric_mean": -5.143290996551514, + "step": 2126 + }, + { + "chosen_geometric_mean": -1.1163251399993896, + "epoch": 0.53, + "grad_norm": 16.75, + "learning_rate": 4.1976093876489066e-06, + "log_odds": 3.0521223545074463, + "log_odds_ratio": -0.20998571813106537, + "loss": 0.379, + "rejected_geometric_mean": -3.8512766361236572, + "step": 2127 + }, + { + "chosen_geometric_mean": -1.2044180631637573, + "epoch": 0.53, + "grad_norm": 10.25, + "learning_rate": 4.1968946344472175e-06, + "log_odds": 5.865571022033691, + "log_odds_ratio": -0.1620377153158188, + "loss": 0.3084, + "rejected_geometric_mean": -6.733669281005859, + "step": 2128 + }, + { + "chosen_geometric_mean": -0.8823902606964111, + "epoch": 0.53, + "grad_norm": 10.9375, + "learning_rate": 4.19617962395557e-06, + "log_odds": 0.6962004899978638, + "log_odds_ratio": -0.4381457567214966, + "loss": 0.3284, + "rejected_geometric_mean": -1.362479567527771, + "step": 2129 + }, + { + "chosen_geometric_mean": -1.323014497756958, + "epoch": 0.53, + "grad_norm": 5.65625, + "learning_rate": 4.195464356282377e-06, + "log_odds": 2.7283060550689697, + "log_odds_ratio": -0.15444651246070862, + "loss": 0.2911, + "rejected_geometric_mean": -3.767242908477783, + "step": 2130 + }, + { + "chosen_geometric_mean": -0.9895983934402466, + "epoch": 0.53, + "grad_norm": 4.28125, + "learning_rate": 4.1947488315360885e-06, + "log_odds": 1.6831719875335693, + "log_odds_ratio": -0.3113850951194763, + "loss": 0.2939, + "rejected_geometric_mean": -2.384063720703125, + "step": 2131 + }, + { + "chosen_geometric_mean": -1.025159478187561, + "epoch": 0.53, + "grad_norm": 3.703125, + "learning_rate": 4.194033049825198e-06, + "log_odds": 2.1895594596862793, + "log_odds_ratio": -0.37163278460502625, + "loss": 0.2829, + "rejected_geometric_mean": -2.993006944656372, + "step": 2132 + }, + { + "chosen_geometric_mean": -1.0082025527954102, + "epoch": 0.53, + "grad_norm": 3.25, + "learning_rate": 4.1933170112582325e-06, + "log_odds": 1.1692252159118652, + "log_odds_ratio": -0.4417255222797394, + "loss": 0.2932, + "rejected_geometric_mean": -2.0454206466674805, + "step": 2133 + }, + { + "chosen_geometric_mean": -1.209110975265503, + "epoch": 0.53, + "grad_norm": 5.625, + "learning_rate": 4.192600715943763e-06, + "log_odds": 3.0188021659851074, + "log_odds_ratio": -0.19772900640964508, + "loss": 0.2993, + "rejected_geometric_mean": -3.911656379699707, + "step": 2134 + }, + { + "chosen_geometric_mean": -1.041901707649231, + "epoch": 0.53, + "grad_norm": 2.671875, + "learning_rate": 4.191884163990395e-06, + "log_odds": 2.1322920322418213, + "log_odds_ratio": -0.42995935678482056, + "loss": 0.2826, + "rejected_geometric_mean": -3.0153250694274902, + "step": 2135 + }, + { + "chosen_geometric_mean": -1.2023143768310547, + "epoch": 0.53, + "grad_norm": 2.6875, + "learning_rate": 4.1911673555067765e-06, + "log_odds": 4.4617509841918945, + "log_odds_ratio": -0.11450964212417603, + "loss": 0.3182, + "rejected_geometric_mean": -5.352847576141357, + "step": 2136 + }, + { + "chosen_geometric_mean": -0.9840452671051025, + "epoch": 0.53, + "grad_norm": 3.859375, + "learning_rate": 4.190450290601592e-06, + "log_odds": 2.052015781402588, + "log_odds_ratio": -0.3068414330482483, + "loss": 0.3061, + "rejected_geometric_mean": -2.7621803283691406, + "step": 2137 + }, + { + "chosen_geometric_mean": -1.2748066186904907, + "epoch": 0.53, + "grad_norm": 10.9375, + "learning_rate": 4.189732969383566e-06, + "log_odds": 1.227150797843933, + "log_odds_ratio": -0.2925030589103699, + "loss": 0.3119, + "rejected_geometric_mean": -2.293909788131714, + "step": 2138 + }, + { + "chosen_geometric_mean": -0.9635728597640991, + "epoch": 0.53, + "grad_norm": 3.125, + "learning_rate": 4.189015391961461e-06, + "log_odds": 1.6941713094711304, + "log_odds_ratio": -0.26910918951034546, + "loss": 0.3106, + "rejected_geometric_mean": -2.354445695877075, + "step": 2139 + }, + { + "chosen_geometric_mean": -1.1179003715515137, + "epoch": 0.53, + "grad_norm": 5.1875, + "learning_rate": 4.188297558444081e-06, + "log_odds": 2.4835245609283447, + "log_odds_ratio": -0.16795414686203003, + "loss": 0.3369, + "rejected_geometric_mean": -3.2685039043426514, + "step": 2140 + }, + { + "chosen_geometric_mean": -1.0267168283462524, + "epoch": 0.53, + "grad_norm": 12.625, + "learning_rate": 4.187579468940264e-06, + "log_odds": 5.7042436599731445, + "log_odds_ratio": -0.07344038784503937, + "loss": 0.3034, + "rejected_geometric_mean": -6.284360885620117, + "step": 2141 + }, + { + "chosen_geometric_mean": -0.8777504563331604, + "epoch": 0.53, + "grad_norm": 4.71875, + "learning_rate": 4.186861123558891e-06, + "log_odds": 2.416053295135498, + "log_odds_ratio": -0.21324987709522247, + "loss": 0.2789, + "rejected_geometric_mean": -2.858013391494751, + "step": 2142 + }, + { + "chosen_geometric_mean": -0.9645394682884216, + "epoch": 0.53, + "grad_norm": 9.125, + "learning_rate": 4.186142522408882e-06, + "log_odds": 1.4251511096954346, + "log_odds_ratio": -0.2835286557674408, + "loss": 0.2955, + "rejected_geometric_mean": -2.050043821334839, + "step": 2143 + }, + { + "chosen_geometric_mean": -1.1764967441558838, + "epoch": 0.53, + "grad_norm": 10.25, + "learning_rate": 4.185423665599191e-06, + "log_odds": 2.0037121772766113, + "log_odds_ratio": -0.16603508591651917, + "loss": 0.3365, + "rejected_geometric_mean": -2.8523223400115967, + "step": 2144 + }, + { + "chosen_geometric_mean": -0.9258625507354736, + "epoch": 0.53, + "grad_norm": 8.3125, + "learning_rate": 4.184704553238816e-06, + "log_odds": 2.599093198776245, + "log_odds_ratio": -0.14462119340896606, + "loss": 0.3269, + "rejected_geometric_mean": -3.096562623977661, + "step": 2145 + }, + { + "chosen_geometric_mean": -0.9872543215751648, + "epoch": 0.53, + "grad_norm": 7.5, + "learning_rate": 4.18398518543679e-06, + "log_odds": 1.305109977722168, + "log_odds_ratio": -0.3092353343963623, + "loss": 0.3251, + "rejected_geometric_mean": -2.0198850631713867, + "step": 2146 + }, + { + "chosen_geometric_mean": -1.175811529159546, + "epoch": 0.53, + "grad_norm": 5.3125, + "learning_rate": 4.18326556230219e-06, + "log_odds": 0.6746028661727905, + "log_odds_ratio": -0.45390915870666504, + "loss": 0.2849, + "rejected_geometric_mean": -1.6490799188613892, + "step": 2147 + }, + { + "chosen_geometric_mean": -1.1351251602172852, + "epoch": 0.53, + "grad_norm": 8.125, + "learning_rate": 4.182545683944125e-06, + "log_odds": 0.9320884346961975, + "log_odds_ratio": -0.3593209683895111, + "loss": 0.3627, + "rejected_geometric_mean": -1.8505607843399048, + "step": 2148 + }, + { + "chosen_geometric_mean": -1.162864327430725, + "epoch": 0.53, + "grad_norm": 26.125, + "learning_rate": 4.181825550471747e-06, + "log_odds": 1.287445068359375, + "log_odds_ratio": -0.3569355309009552, + "loss": 0.3199, + "rejected_geometric_mean": -2.2400381565093994, + "step": 2149 + }, + { + "chosen_geometric_mean": -1.2688459157943726, + "epoch": 0.53, + "grad_norm": 4.34375, + "learning_rate": 4.181105161994243e-06, + "log_odds": 1.4871408939361572, + "log_odds_ratio": -0.28933730721473694, + "loss": 0.2856, + "rejected_geometric_mean": -2.5196211338043213, + "step": 2150 + }, + { + "chosen_geometric_mean": -1.1457096338272095, + "epoch": 0.53, + "grad_norm": 6.21875, + "learning_rate": 4.180384518620846e-06, + "log_odds": 2.2685818672180176, + "log_odds_ratio": -0.3291812539100647, + "loss": 0.3351, + "rejected_geometric_mean": -3.2098305225372314, + "step": 2151 + }, + { + "chosen_geometric_mean": -0.9890944957733154, + "epoch": 0.53, + "grad_norm": 11.9375, + "learning_rate": 4.179663620460818e-06, + "log_odds": 3.639906883239746, + "log_odds_ratio": -0.1148981973528862, + "loss": 0.3088, + "rejected_geometric_mean": -4.232547283172607, + "step": 2152 + }, + { + "chosen_geometric_mean": -1.0495097637176514, + "epoch": 0.53, + "grad_norm": 2.328125, + "learning_rate": 4.178942467623468e-06, + "log_odds": 2.820345401763916, + "log_odds_ratio": -0.17749658226966858, + "loss": 0.2293, + "rejected_geometric_mean": -3.539386749267578, + "step": 2153 + }, + { + "chosen_geometric_mean": -1.1181999444961548, + "epoch": 0.53, + "grad_norm": 5.5, + "learning_rate": 4.178221060218139e-06, + "log_odds": 0.8371918201446533, + "log_odds_ratio": -0.41334450244903564, + "loss": 0.3619, + "rejected_geometric_mean": -1.7437794208526611, + "step": 2154 + }, + { + "chosen_geometric_mean": -1.189647912979126, + "epoch": 0.53, + "grad_norm": 2.453125, + "learning_rate": 4.177499398354212e-06, + "log_odds": 2.042877674102783, + "log_odds_ratio": -0.22836723923683167, + "loss": 0.2833, + "rejected_geometric_mean": -2.9779560565948486, + "step": 2155 + }, + { + "chosen_geometric_mean": -1.1466012001037598, + "epoch": 0.53, + "grad_norm": 3.515625, + "learning_rate": 4.17677748214111e-06, + "log_odds": 1.3499506711959839, + "log_odds_ratio": -0.43328389525413513, + "loss": 0.4751, + "rejected_geometric_mean": -2.34183931350708, + "step": 2156 + }, + { + "chosen_geometric_mean": -0.9841944575309753, + "epoch": 0.53, + "grad_norm": 3.75, + "learning_rate": 4.176055311688293e-06, + "log_odds": 2.8288378715515137, + "log_odds_ratio": -0.3155047297477722, + "loss": 0.2847, + "rejected_geometric_mean": -3.5590314865112305, + "step": 2157 + }, + { + "chosen_geometric_mean": -1.08534836769104, + "epoch": 0.53, + "grad_norm": 3.71875, + "learning_rate": 4.175332887105259e-06, + "log_odds": 2.675234794616699, + "log_odds_ratio": -0.2741440236568451, + "loss": 0.2939, + "rejected_geometric_mean": -3.4928112030029297, + "step": 2158 + }, + { + "chosen_geometric_mean": -1.0905934572219849, + "epoch": 0.53, + "grad_norm": 3.5, + "learning_rate": 4.174610208501545e-06, + "log_odds": 1.6861900091171265, + "log_odds_ratio": -0.2954666316509247, + "loss": 0.2892, + "rejected_geometric_mean": -2.5344278812408447, + "step": 2159 + }, + { + "chosen_geometric_mean": -1.094943642616272, + "epoch": 0.53, + "grad_norm": 4.5625, + "learning_rate": 4.173887275986725e-06, + "log_odds": 1.6506073474884033, + "log_odds_ratio": -0.3838355839252472, + "loss": 0.326, + "rejected_geometric_mean": -2.5046067237854004, + "step": 2160 + }, + { + "chosen_geometric_mean": -1.3889590501785278, + "epoch": 0.54, + "grad_norm": 3.484375, + "learning_rate": 4.173164089670415e-06, + "log_odds": 3.1949944496154785, + "log_odds_ratio": -0.3031211495399475, + "loss": 0.3102, + "rejected_geometric_mean": -4.430042266845703, + "step": 2161 + }, + { + "chosen_geometric_mean": -1.216718077659607, + "epoch": 0.54, + "grad_norm": 2.875, + "learning_rate": 4.172440649662266e-06, + "log_odds": 1.8556458950042725, + "log_odds_ratio": -0.3504355549812317, + "loss": 0.2588, + "rejected_geometric_mean": -2.9092578887939453, + "step": 2162 + }, + { + "chosen_geometric_mean": -1.0910742282867432, + "epoch": 0.54, + "grad_norm": 2.203125, + "learning_rate": 4.171716956071969e-06, + "log_odds": 6.7336225509643555, + "log_odds_ratio": -0.15225081145763397, + "loss": 0.2881, + "rejected_geometric_mean": -7.45862340927124, + "step": 2163 + }, + { + "chosen_geometric_mean": -1.2408775091171265, + "epoch": 0.54, + "grad_norm": 3.828125, + "learning_rate": 4.170993009009255e-06, + "log_odds": 9.273770332336426, + "log_odds_ratio": -0.12993837893009186, + "loss": 0.3583, + "rejected_geometric_mean": -10.199642181396484, + "step": 2164 + }, + { + "chosen_geometric_mean": -0.8620501756668091, + "epoch": 0.54, + "grad_norm": 27.75, + "learning_rate": 4.17026880858389e-06, + "log_odds": 8.638137817382812, + "log_odds_ratio": -0.0029380801133811474, + "loss": 0.2745, + "rejected_geometric_mean": -8.925106048583984, + "step": 2165 + }, + { + "chosen_geometric_mean": -0.9399230480194092, + "epoch": 0.54, + "grad_norm": 5.84375, + "learning_rate": 4.16954435490568e-06, + "log_odds": 0.6876234412193298, + "log_odds_ratio": -0.49698877334594727, + "loss": 0.2991, + "rejected_geometric_mean": -1.4776146411895752, + "step": 2166 + }, + { + "chosen_geometric_mean": -1.17141592502594, + "epoch": 0.54, + "grad_norm": 21.75, + "learning_rate": 4.168819648084471e-06, + "log_odds": 6.658852577209473, + "log_odds_ratio": -0.094425268471241, + "loss": 0.3485, + "rejected_geometric_mean": -7.480804443359375, + "step": 2167 + }, + { + "chosen_geometric_mean": -1.120209813117981, + "epoch": 0.54, + "grad_norm": 15.4375, + "learning_rate": 4.168094688230144e-06, + "log_odds": 3.8313050270080566, + "log_odds_ratio": -0.08025026321411133, + "loss": 0.3086, + "rejected_geometric_mean": -4.579462051391602, + "step": 2168 + }, + { + "chosen_geometric_mean": -1.4909697771072388, + "epoch": 0.54, + "grad_norm": 17.625, + "learning_rate": 4.167369475452623e-06, + "log_odds": 2.542746067047119, + "log_odds_ratio": -0.2856212258338928, + "loss": 0.3514, + "rejected_geometric_mean": -3.8606321811676025, + "step": 2169 + }, + { + "chosen_geometric_mean": -0.8260037302970886, + "epoch": 0.54, + "grad_norm": 9.5625, + "learning_rate": 4.166644009861865e-06, + "log_odds": 1.9101024866104126, + "log_odds_ratio": -0.4615778625011444, + "loss": 0.269, + "rejected_geometric_mean": -2.5402722358703613, + "step": 2170 + }, + { + "chosen_geometric_mean": -1.0406545400619507, + "epoch": 0.54, + "grad_norm": 16.625, + "learning_rate": 4.16591829156787e-06, + "log_odds": 9.133011817932129, + "log_odds_ratio": -0.02462792582809925, + "loss": 0.3285, + "rejected_geometric_mean": -9.749027252197266, + "step": 2171 + }, + { + "chosen_geometric_mean": -1.1578730344772339, + "epoch": 0.54, + "grad_norm": 9.0, + "learning_rate": 4.165192320680672e-06, + "log_odds": 0.8175200819969177, + "log_odds_ratio": -0.40298783779144287, + "loss": 0.343, + "rejected_geometric_mean": -1.7833646535873413, + "step": 2172 + }, + { + "chosen_geometric_mean": -1.1056846380233765, + "epoch": 0.54, + "grad_norm": 4.03125, + "learning_rate": 4.164466097310349e-06, + "log_odds": 6.671261787414551, + "log_odds_ratio": -0.05807552486658096, + "loss": 0.304, + "rejected_geometric_mean": -7.355945587158203, + "step": 2173 + }, + { + "chosen_geometric_mean": -0.7845979332923889, + "epoch": 0.54, + "grad_norm": 15.0, + "learning_rate": 4.16373962156701e-06, + "log_odds": 1.19666588306427, + "log_odds_ratio": -0.38861480355262756, + "loss": 0.299, + "rejected_geometric_mean": -1.7094011306762695, + "step": 2174 + }, + { + "chosen_geometric_mean": -0.8606439828872681, + "epoch": 0.54, + "grad_norm": 3.125, + "learning_rate": 4.163012893560809e-06, + "log_odds": 7.710422992706299, + "log_odds_ratio": -0.048186007887125015, + "loss": 0.3013, + "rejected_geometric_mean": -8.047311782836914, + "step": 2175 + }, + { + "chosen_geometric_mean": -0.9120635986328125, + "epoch": 0.54, + "grad_norm": 2.484375, + "learning_rate": 4.162285913401934e-06, + "log_odds": 1.9852454662322998, + "log_odds_ratio": -0.21331670880317688, + "loss": 0.3146, + "rejected_geometric_mean": -2.550776243209839, + "step": 2176 + }, + { + "chosen_geometric_mean": -0.7438821196556091, + "epoch": 0.54, + "grad_norm": 4.40625, + "learning_rate": 4.161558681200615e-06, + "log_odds": 2.826590061187744, + "log_odds_ratio": -0.3083527684211731, + "loss": 0.2863, + "rejected_geometric_mean": -3.260760545730591, + "step": 2177 + }, + { + "chosen_geometric_mean": -1.0871638059616089, + "epoch": 0.54, + "grad_norm": 18.75, + "learning_rate": 4.160831197067114e-06, + "log_odds": 3.6164801120758057, + "log_odds_ratio": -0.27546316385269165, + "loss": 0.364, + "rejected_geometric_mean": -4.446598529815674, + "step": 2178 + }, + { + "chosen_geometric_mean": -1.2206634283065796, + "epoch": 0.54, + "grad_norm": 3.34375, + "learning_rate": 4.160103461111737e-06, + "log_odds": 2.0805485248565674, + "log_odds_ratio": -0.3178224265575409, + "loss": 0.3024, + "rejected_geometric_mean": -3.1266496181488037, + "step": 2179 + }, + { + "chosen_geometric_mean": -1.1974902153015137, + "epoch": 0.54, + "grad_norm": 7.96875, + "learning_rate": 4.159375473444828e-06, + "log_odds": 6.925518035888672, + "log_odds_ratio": -0.18937572836875916, + "loss": 0.2957, + "rejected_geometric_mean": -7.880805015563965, + "step": 2180 + }, + { + "chosen_geometric_mean": -1.2764217853546143, + "epoch": 0.54, + "grad_norm": 10.25, + "learning_rate": 4.158647234176765e-06, + "log_odds": 4.087822437286377, + "log_odds_ratio": -0.11673220992088318, + "loss": 0.3116, + "rejected_geometric_mean": -5.019139289855957, + "step": 2181 + }, + { + "chosen_geometric_mean": -1.0840826034545898, + "epoch": 0.54, + "grad_norm": 3.796875, + "learning_rate": 4.157918743417967e-06, + "log_odds": 4.4429402351379395, + "log_odds_ratio": -0.46935832500457764, + "loss": 0.3065, + "rejected_geometric_mean": -5.407428741455078, + "step": 2182 + }, + { + "chosen_geometric_mean": -1.0439696311950684, + "epoch": 0.54, + "grad_norm": 10.6875, + "learning_rate": 4.157190001278891e-06, + "log_odds": 3.669647216796875, + "log_odds_ratio": -0.28919997811317444, + "loss": 0.3412, + "rejected_geometric_mean": -4.469003200531006, + "step": 2183 + }, + { + "chosen_geometric_mean": -1.2383148670196533, + "epoch": 0.54, + "grad_norm": 6.125, + "learning_rate": 4.156461007870032e-06, + "log_odds": 3.069629430770874, + "log_odds_ratio": -0.262484073638916, + "loss": 0.3519, + "rejected_geometric_mean": -4.0633039474487305, + "step": 2184 + }, + { + "chosen_geometric_mean": -1.0077404975891113, + "epoch": 0.54, + "grad_norm": 8.1875, + "learning_rate": 4.1557317633019215e-06, + "log_odds": 5.196384906768799, + "log_odds_ratio": -0.175362229347229, + "loss": 0.3054, + "rejected_geometric_mean": -5.859481334686279, + "step": 2185 + }, + { + "chosen_geometric_mean": -1.0984439849853516, + "epoch": 0.54, + "grad_norm": 7.34375, + "learning_rate": 4.1550022676851335e-06, + "log_odds": 3.7655937671661377, + "log_odds_ratio": -0.2853902578353882, + "loss": 0.2963, + "rejected_geometric_mean": -4.617985725402832, + "step": 2186 + }, + { + "chosen_geometric_mean": -1.0822192430496216, + "epoch": 0.54, + "grad_norm": 2.53125, + "learning_rate": 4.154272521130274e-06, + "log_odds": 2.914438247680664, + "log_odds_ratio": -0.3514275848865509, + "loss": 0.3001, + "rejected_geometric_mean": -3.766596794128418, + "step": 2187 + }, + { + "chosen_geometric_mean": -1.0142852067947388, + "epoch": 0.54, + "grad_norm": 20.75, + "learning_rate": 4.1535425237479906e-06, + "log_odds": 3.24890398979187, + "log_odds_ratio": -0.1747843474149704, + "loss": 0.3391, + "rejected_geometric_mean": -3.8833017349243164, + "step": 2188 + }, + { + "chosen_geometric_mean": -1.2133731842041016, + "epoch": 0.54, + "grad_norm": 10.3125, + "learning_rate": 4.15281227564897e-06, + "log_odds": 7.6273603439331055, + "log_odds_ratio": -0.016029199585318565, + "loss": 0.3187, + "rejected_geometric_mean": -8.479307174682617, + "step": 2189 + }, + { + "chosen_geometric_mean": -0.8042112588882446, + "epoch": 0.54, + "grad_norm": 13.9375, + "learning_rate": 4.152081776943935e-06, + "log_odds": 4.430505752563477, + "log_odds_ratio": -0.3293110132217407, + "loss": 0.2642, + "rejected_geometric_mean": -4.906844615936279, + "step": 2190 + }, + { + "chosen_geometric_mean": -1.3198422193527222, + "epoch": 0.54, + "grad_norm": 5.375, + "learning_rate": 4.151351027743645e-06, + "log_odds": 1.974130392074585, + "log_odds_ratio": -0.29561787843704224, + "loss": 0.3262, + "rejected_geometric_mean": -3.0770809650421143, + "step": 2191 + }, + { + "chosen_geometric_mean": -1.0698540210723877, + "epoch": 0.54, + "grad_norm": 3.5, + "learning_rate": 4.150620028158902e-06, + "log_odds": 2.067946434020996, + "log_odds_ratio": -0.3158702850341797, + "loss": 0.2983, + "rejected_geometric_mean": -2.8227033615112305, + "step": 2192 + }, + { + "chosen_geometric_mean": -1.0999255180358887, + "epoch": 0.54, + "grad_norm": 7.3125, + "learning_rate": 4.14988877830054e-06, + "log_odds": 2.2603185176849365, + "log_odds_ratio": -0.37183600664138794, + "loss": 0.2903, + "rejected_geometric_mean": -3.1564207077026367, + "step": 2193 + }, + { + "chosen_geometric_mean": -1.161419153213501, + "epoch": 0.54, + "grad_norm": 3.0, + "learning_rate": 4.149157278279436e-06, + "log_odds": 1.8169775009155273, + "log_odds_ratio": -0.3834897577762604, + "loss": 0.3199, + "rejected_geometric_mean": -2.900763988494873, + "step": 2194 + }, + { + "chosen_geometric_mean": -1.0161136388778687, + "epoch": 0.54, + "grad_norm": 4.21875, + "learning_rate": 4.148425528206501e-06, + "log_odds": 2.75879168510437, + "log_odds_ratio": -0.23593378067016602, + "loss": 0.3804, + "rejected_geometric_mean": -3.4370741844177246, + "step": 2195 + }, + { + "chosen_geometric_mean": -1.1379265785217285, + "epoch": 0.54, + "grad_norm": 8.1875, + "learning_rate": 4.1476935281926885e-06, + "log_odds": 1.8168013095855713, + "log_odds_ratio": -0.27019551396369934, + "loss": 0.2746, + "rejected_geometric_mean": -2.712597131729126, + "step": 2196 + }, + { + "chosen_geometric_mean": -1.1570640802383423, + "epoch": 0.54, + "grad_norm": 5.34375, + "learning_rate": 4.146961278348986e-06, + "log_odds": 0.5318237543106079, + "log_odds_ratio": -0.4916951358318329, + "loss": 0.3105, + "rejected_geometric_mean": -1.5451295375823975, + "step": 2197 + }, + { + "chosen_geometric_mean": -1.251354455947876, + "epoch": 0.54, + "grad_norm": 3.59375, + "learning_rate": 4.1462287787864195e-06, + "log_odds": 0.6901496648788452, + "log_odds_ratio": -0.42969849705696106, + "loss": 0.3013, + "rejected_geometric_mean": -1.8001718521118164, + "step": 2198 + }, + { + "chosen_geometric_mean": -1.2121073007583618, + "epoch": 0.54, + "grad_norm": 7.78125, + "learning_rate": 4.145496029616055e-06, + "log_odds": 3.603564739227295, + "log_odds_ratio": -0.32512444257736206, + "loss": 0.335, + "rejected_geometric_mean": -4.593017101287842, + "step": 2199 + }, + { + "chosen_geometric_mean": -1.116483211517334, + "epoch": 0.54, + "grad_norm": 3.125, + "learning_rate": 4.144763030948994e-06, + "log_odds": 1.0964739322662354, + "log_odds_ratio": -0.49763622879981995, + "loss": 0.3131, + "rejected_geometric_mean": -2.0892555713653564, + "step": 2200 + }, + { + "chosen_geometric_mean": -1.0181704759597778, + "epoch": 0.54, + "grad_norm": 13.9375, + "learning_rate": 4.144029782896376e-06, + "log_odds": 5.193316459655762, + "log_odds_ratio": -0.14779455959796906, + "loss": 0.3814, + "rejected_geometric_mean": -5.861390113830566, + "step": 2201 + }, + { + "chosen_geometric_mean": -0.9395930171012878, + "epoch": 0.55, + "grad_norm": 5.84375, + "learning_rate": 4.143296285569381e-06, + "log_odds": 3.3725335597991943, + "log_odds_ratio": -0.15129214525222778, + "loss": 0.2918, + "rejected_geometric_mean": -3.920325994491577, + "step": 2202 + }, + { + "chosen_geometric_mean": -1.2128651142120361, + "epoch": 0.55, + "grad_norm": 48.25, + "learning_rate": 4.142562539079223e-06, + "log_odds": 2.765169620513916, + "log_odds_ratio": -0.21809856593608856, + "loss": 0.2935, + "rejected_geometric_mean": -3.7131896018981934, + "step": 2203 + }, + { + "chosen_geometric_mean": -1.153916358947754, + "epoch": 0.55, + "grad_norm": 6.0, + "learning_rate": 4.141828543537155e-06, + "log_odds": 5.063270092010498, + "log_odds_ratio": -0.15812887251377106, + "loss": 0.2533, + "rejected_geometric_mean": -5.931621074676514, + "step": 2204 + }, + { + "chosen_geometric_mean": -1.1613668203353882, + "epoch": 0.55, + "grad_norm": 13.25, + "learning_rate": 4.141094299054471e-06, + "log_odds": 4.363627910614014, + "log_odds_ratio": -0.21461831033229828, + "loss": 0.293, + "rejected_geometric_mean": -5.285451889038086, + "step": 2205 + }, + { + "chosen_geometric_mean": -1.3946056365966797, + "epoch": 0.55, + "grad_norm": 35.75, + "learning_rate": 4.140359805742498e-06, + "log_odds": 6.74021577835083, + "log_odds_ratio": -0.04730965197086334, + "loss": 0.2713, + "rejected_geometric_mean": -7.862381935119629, + "step": 2206 + }, + { + "chosen_geometric_mean": -0.8902643322944641, + "epoch": 0.55, + "grad_norm": 2.109375, + "learning_rate": 4.139625063712603e-06, + "log_odds": 4.540762424468994, + "log_odds_ratio": -0.08602674305438995, + "loss": 0.299, + "rejected_geometric_mean": -4.959079742431641, + "step": 2207 + }, + { + "chosen_geometric_mean": -1.677720308303833, + "epoch": 0.55, + "grad_norm": 65.5, + "learning_rate": 4.138890073076192e-06, + "log_odds": 0.8471511006355286, + "log_odds_ratio": -0.3795563578605652, + "loss": 0.4343, + "rejected_geometric_mean": -2.4014453887939453, + "step": 2208 + }, + { + "chosen_geometric_mean": -1.1252566576004028, + "epoch": 0.55, + "grad_norm": 2.78125, + "learning_rate": 4.1381548339447056e-06, + "log_odds": 4.82102108001709, + "log_odds_ratio": -0.33083149790763855, + "loss": 0.2925, + "rejected_geometric_mean": -5.753514289855957, + "step": 2209 + }, + { + "chosen_geometric_mean": -1.0758998394012451, + "epoch": 0.55, + "grad_norm": 20.875, + "learning_rate": 4.1374193464296245e-06, + "log_odds": 3.8589773178100586, + "log_odds_ratio": -0.2578530013561249, + "loss": 0.3161, + "rejected_geometric_mean": -4.658625602722168, + "step": 2210 + }, + { + "chosen_geometric_mean": -1.2272605895996094, + "epoch": 0.55, + "grad_norm": 15.375, + "learning_rate": 4.136683610642465e-06, + "log_odds": 4.287960529327393, + "log_odds_ratio": -0.2037859410047531, + "loss": 0.3169, + "rejected_geometric_mean": -5.251577377319336, + "step": 2211 + }, + { + "chosen_geometric_mean": -1.2161033153533936, + "epoch": 0.55, + "grad_norm": 22.5, + "learning_rate": 4.135947626694784e-06, + "log_odds": 3.479997396469116, + "log_odds_ratio": -0.2148922085762024, + "loss": 0.3206, + "rejected_geometric_mean": -4.42175817489624, + "step": 2212 + }, + { + "chosen_geometric_mean": -1.1171354055404663, + "epoch": 0.55, + "grad_norm": 21.5, + "learning_rate": 4.135211394698173e-06, + "log_odds": 1.5811413526535034, + "log_odds_ratio": -0.36118996143341064, + "loss": 0.3655, + "rejected_geometric_mean": -2.4579062461853027, + "step": 2213 + }, + { + "chosen_geometric_mean": -1.457709550857544, + "epoch": 0.55, + "grad_norm": 4.46875, + "learning_rate": 4.1344749147642645e-06, + "log_odds": 4.815142631530762, + "log_odds_ratio": -0.17259089648723602, + "loss": 0.2945, + "rejected_geometric_mean": -6.082108497619629, + "step": 2214 + }, + { + "chosen_geometric_mean": -1.0282959938049316, + "epoch": 0.55, + "grad_norm": 7.71875, + "learning_rate": 4.1337381870047235e-06, + "log_odds": 4.93895149230957, + "log_odds_ratio": -0.14679816365242004, + "loss": 0.2911, + "rejected_geometric_mean": -5.603263854980469, + "step": 2215 + }, + { + "chosen_geometric_mean": -1.0022588968276978, + "epoch": 0.55, + "grad_norm": 6.6875, + "learning_rate": 4.1330012115312586e-06, + "log_odds": 0.19637665152549744, + "log_odds_ratio": -0.6011359691619873, + "loss": 0.3358, + "rejected_geometric_mean": -1.1309592723846436, + "step": 2216 + }, + { + "chosen_geometric_mean": -1.0502333641052246, + "epoch": 0.55, + "grad_norm": 14.25, + "learning_rate": 4.13226398845561e-06, + "log_odds": 3.645690679550171, + "log_odds_ratio": -0.13259285688400269, + "loss": 0.3063, + "rejected_geometric_mean": -4.339760780334473, + "step": 2217 + }, + { + "chosen_geometric_mean": -1.0682413578033447, + "epoch": 0.55, + "grad_norm": 4.6875, + "learning_rate": 4.131526517889562e-06, + "log_odds": 4.3726115226745605, + "log_odds_ratio": -0.027902215719223022, + "loss": 0.2656, + "rejected_geometric_mean": -5.025478363037109, + "step": 2218 + }, + { + "chosen_geometric_mean": -1.0939446687698364, + "epoch": 0.55, + "grad_norm": 9.25, + "learning_rate": 4.1307887999449305e-06, + "log_odds": 2.4905307292938232, + "log_odds_ratio": -0.30129924416542053, + "loss": 0.3931, + "rejected_geometric_mean": -3.3272383213043213, + "step": 2219 + }, + { + "chosen_geometric_mean": -1.0005104541778564, + "epoch": 0.55, + "grad_norm": 3.671875, + "learning_rate": 4.13005083473357e-06, + "log_odds": 4.894160270690918, + "log_odds_ratio": -0.0729200690984726, + "loss": 0.2887, + "rejected_geometric_mean": -5.476629257202148, + "step": 2220 + }, + { + "chosen_geometric_mean": -1.0020370483398438, + "epoch": 0.55, + "grad_norm": 2.6875, + "learning_rate": 4.129312622367377e-06, + "log_odds": 0.25828802585601807, + "log_odds_ratio": -0.6302019357681274, + "loss": 0.3705, + "rejected_geometric_mean": -1.2329744100570679, + "step": 2221 + }, + { + "chosen_geometric_mean": -0.9439151287078857, + "epoch": 0.55, + "grad_norm": 2.90625, + "learning_rate": 4.1285741629582795e-06, + "log_odds": 0.6446737051010132, + "log_odds_ratio": -0.45340582728385925, + "loss": 0.2837, + "rejected_geometric_mean": -1.394873857498169, + "step": 2222 + }, + { + "chosen_geometric_mean": -0.9021420478820801, + "epoch": 0.55, + "grad_norm": 7.46875, + "learning_rate": 4.127835456618247e-06, + "log_odds": 1.5964487791061401, + "log_odds_ratio": -0.49368199706077576, + "loss": 0.3206, + "rejected_geometric_mean": -2.3393383026123047, + "step": 2223 + }, + { + "chosen_geometric_mean": -1.025508165359497, + "epoch": 0.55, + "grad_norm": 2.4375, + "learning_rate": 4.127096503459285e-06, + "log_odds": 1.091423749923706, + "log_odds_ratio": -0.3463325798511505, + "loss": 0.2985, + "rejected_geometric_mean": -1.8898595571517944, + "step": 2224 + }, + { + "chosen_geometric_mean": -1.026841163635254, + "epoch": 0.55, + "grad_norm": 6.5, + "learning_rate": 4.126357303593437e-06, + "log_odds": 2.0197901725769043, + "log_odds_ratio": -0.32536542415618896, + "loss": 0.2779, + "rejected_geometric_mean": -2.7923460006713867, + "step": 2225 + }, + { + "chosen_geometric_mean": -1.1347541809082031, + "epoch": 0.55, + "grad_norm": 2.375, + "learning_rate": 4.125617857132782e-06, + "log_odds": 3.395298957824707, + "log_odds_ratio": -0.36109185218811035, + "loss": 0.2961, + "rejected_geometric_mean": -4.353968620300293, + "step": 2226 + }, + { + "chosen_geometric_mean": -1.2112118005752563, + "epoch": 0.55, + "grad_norm": 3.71875, + "learning_rate": 4.124878164189439e-06, + "log_odds": 2.3979408740997314, + "log_odds_ratio": -0.17328709363937378, + "loss": 0.3316, + "rejected_geometric_mean": -3.317379951477051, + "step": 2227 + }, + { + "chosen_geometric_mean": -1.0988541841506958, + "epoch": 0.55, + "grad_norm": 4.65625, + "learning_rate": 4.1241382248755625e-06, + "log_odds": 2.812917709350586, + "log_odds_ratio": -0.2861477732658386, + "loss": 0.3243, + "rejected_geometric_mean": -3.64847469329834, + "step": 2228 + }, + { + "chosen_geometric_mean": -1.1035802364349365, + "epoch": 0.55, + "grad_norm": 8.3125, + "learning_rate": 4.123398039303346e-06, + "log_odds": 1.080812692642212, + "log_odds_ratio": -0.3407352566719055, + "loss": 0.321, + "rejected_geometric_mean": -1.96365225315094, + "step": 2229 + }, + { + "chosen_geometric_mean": -1.1636333465576172, + "epoch": 0.55, + "grad_norm": 14.125, + "learning_rate": 4.122657607585018e-06, + "log_odds": 2.712801933288574, + "log_odds_ratio": -0.13203246891498566, + "loss": 0.3119, + "rejected_geometric_mean": -3.543609380722046, + "step": 2230 + }, + { + "chosen_geometric_mean": -0.9657652378082275, + "epoch": 0.55, + "grad_norm": 22.25, + "learning_rate": 4.121916929832848e-06, + "log_odds": 2.7461133003234863, + "log_odds_ratio": -0.3490443229675293, + "loss": 0.3658, + "rejected_geometric_mean": -3.4866204261779785, + "step": 2231 + }, + { + "chosen_geometric_mean": -1.3897604942321777, + "epoch": 0.55, + "grad_norm": 35.25, + "learning_rate": 4.121176006159138e-06, + "log_odds": 5.442571640014648, + "log_odds_ratio": -0.14384789764881134, + "loss": 0.3239, + "rejected_geometric_mean": -6.605987548828125, + "step": 2232 + }, + { + "chosen_geometric_mean": -1.287964105606079, + "epoch": 0.55, + "grad_norm": 6.5, + "learning_rate": 4.120434836676231e-06, + "log_odds": 1.3571456670761108, + "log_odds_ratio": -0.4607199430465698, + "loss": 0.3587, + "rejected_geometric_mean": -2.5126566886901855, + "step": 2233 + }, + { + "chosen_geometric_mean": -1.2985039949417114, + "epoch": 0.55, + "grad_norm": 35.5, + "learning_rate": 4.119693421496506e-06, + "log_odds": 7.619449615478516, + "log_odds_ratio": -0.010134450159966946, + "loss": 0.3306, + "rejected_geometric_mean": -8.595775604248047, + "step": 2234 + }, + { + "chosen_geometric_mean": -1.0839236974716187, + "epoch": 0.55, + "grad_norm": 34.0, + "learning_rate": 4.118951760732379e-06, + "log_odds": 6.795901775360107, + "log_odds_ratio": -0.03748388960957527, + "loss": 0.353, + "rejected_geometric_mean": -7.473565101623535, + "step": 2235 + }, + { + "chosen_geometric_mean": -0.9674731492996216, + "epoch": 0.55, + "grad_norm": 8.25, + "learning_rate": 4.118209854496304e-06, + "log_odds": 4.562919616699219, + "log_odds_ratio": -0.2816189229488373, + "loss": 0.3288, + "rejected_geometric_mean": -5.232429504394531, + "step": 2236 + }, + { + "chosen_geometric_mean": -1.112054467201233, + "epoch": 0.55, + "grad_norm": 40.0, + "learning_rate": 4.117467702900771e-06, + "log_odds": 4.678858757019043, + "log_odds_ratio": -0.2692401707172394, + "loss": 0.341, + "rejected_geometric_mean": -5.475438117980957, + "step": 2237 + }, + { + "chosen_geometric_mean": -1.1522003412246704, + "epoch": 0.55, + "grad_norm": 6.125, + "learning_rate": 4.11672530605831e-06, + "log_odds": 2.8315770626068115, + "log_odds_ratio": -0.10653355717658997, + "loss": 0.3057, + "rejected_geometric_mean": -3.637082815170288, + "step": 2238 + }, + { + "chosen_geometric_mean": -0.9794828295707703, + "epoch": 0.55, + "grad_norm": 5.3125, + "learning_rate": 4.1159826640814844e-06, + "log_odds": 1.5345957279205322, + "log_odds_ratio": -0.4259185492992401, + "loss": 0.2606, + "rejected_geometric_mean": -2.3615264892578125, + "step": 2239 + }, + { + "chosen_geometric_mean": -1.3773225545883179, + "epoch": 0.55, + "grad_norm": 16.875, + "learning_rate": 4.115239777082898e-06, + "log_odds": 3.022059202194214, + "log_odds_ratio": -0.2159317135810852, + "loss": 0.3199, + "rejected_geometric_mean": -4.19277286529541, + "step": 2240 + }, + { + "chosen_geometric_mean": -1.159719705581665, + "epoch": 0.55, + "grad_norm": 2.375, + "learning_rate": 4.114496645175188e-06, + "log_odds": 2.8429174423217773, + "log_odds_ratio": -0.19888760149478912, + "loss": 0.35, + "rejected_geometric_mean": -3.7323451042175293, + "step": 2241 + }, + { + "chosen_geometric_mean": -0.9213508367538452, + "epoch": 0.56, + "grad_norm": 3.375, + "learning_rate": 4.113753268471032e-06, + "log_odds": 0.7186025977134705, + "log_odds_ratio": -0.5425236821174622, + "loss": 0.3263, + "rejected_geometric_mean": -1.5272483825683594, + "step": 2242 + }, + { + "chosen_geometric_mean": -1.0598654747009277, + "epoch": 0.56, + "grad_norm": 3.65625, + "learning_rate": 4.113009647083146e-06, + "log_odds": 0.46624982357025146, + "log_odds_ratio": -0.5306075215339661, + "loss": 0.2953, + "rejected_geometric_mean": -1.4011056423187256, + "step": 2243 + }, + { + "chosen_geometric_mean": -1.0687980651855469, + "epoch": 0.56, + "grad_norm": 3.90625, + "learning_rate": 4.112265781124277e-06, + "log_odds": 5.801011562347412, + "log_odds_ratio": -0.20975413918495178, + "loss": 0.2785, + "rejected_geometric_mean": -6.55277681350708, + "step": 2244 + }, + { + "chosen_geometric_mean": -1.0978355407714844, + "epoch": 0.56, + "grad_norm": 3.125, + "learning_rate": 4.111521670707216e-06, + "log_odds": 1.0643079280853271, + "log_odds_ratio": -0.35432490706443787, + "loss": 0.2879, + "rejected_geometric_mean": -1.9336857795715332, + "step": 2245 + }, + { + "chosen_geometric_mean": -0.9123396873474121, + "epoch": 0.56, + "grad_norm": 4.125, + "learning_rate": 4.110777315944786e-06, + "log_odds": 0.6060781478881836, + "log_odds_ratio": -0.463162899017334, + "loss": 0.2994, + "rejected_geometric_mean": -1.3222990036010742, + "step": 2246 + }, + { + "chosen_geometric_mean": -0.8908171057701111, + "epoch": 0.56, + "grad_norm": 2.75, + "learning_rate": 4.110032716949851e-06, + "log_odds": 3.022603988647461, + "log_odds_ratio": -0.12905597686767578, + "loss": 0.3509, + "rejected_geometric_mean": -3.4785571098327637, + "step": 2247 + }, + { + "chosen_geometric_mean": -0.9044507741928101, + "epoch": 0.56, + "grad_norm": 2.859375, + "learning_rate": 4.1092878738353085e-06, + "log_odds": 2.7950098514556885, + "log_odds_ratio": -0.1878504753112793, + "loss": 0.3394, + "rejected_geometric_mean": -3.2914724349975586, + "step": 2248 + }, + { + "chosen_geometric_mean": -1.156371831893921, + "epoch": 0.56, + "grad_norm": 3.015625, + "learning_rate": 4.108542786714095e-06, + "log_odds": 2.2755134105682373, + "log_odds_ratio": -0.20489665865898132, + "loss": 0.259, + "rejected_geometric_mean": -3.1506850719451904, + "step": 2249 + }, + { + "chosen_geometric_mean": -1.0884349346160889, + "epoch": 0.56, + "grad_norm": 3.3125, + "learning_rate": 4.107797455699183e-06, + "log_odds": 1.0964945554733276, + "log_odds_ratio": -0.4646531641483307, + "loss": 0.2865, + "rejected_geometric_mean": -1.9885591268539429, + "step": 2250 + }, + { + "chosen_geometric_mean": -1.0583804845809937, + "epoch": 0.56, + "grad_norm": 4.21875, + "learning_rate": 4.107051880903584e-06, + "log_odds": 1.645815134048462, + "log_odds_ratio": -0.2423662543296814, + "loss": 0.2776, + "rejected_geometric_mean": -2.3529860973358154, + "step": 2251 + }, + { + "chosen_geometric_mean": -1.2643256187438965, + "epoch": 0.56, + "grad_norm": 4.875, + "learning_rate": 4.106306062440345e-06, + "log_odds": 6.730232238769531, + "log_odds_ratio": -0.2148473858833313, + "loss": 0.2812, + "rejected_geometric_mean": -7.726282119750977, + "step": 2252 + }, + { + "chosen_geometric_mean": -0.838148832321167, + "epoch": 0.56, + "grad_norm": 98.5, + "learning_rate": 4.105560000422548e-06, + "log_odds": 15.636473655700684, + "log_odds_ratio": -6.079744252929231e-06, + "loss": 0.2611, + "rejected_geometric_mean": -15.889823913574219, + "step": 2253 + }, + { + "chosen_geometric_mean": -1.3450729846954346, + "epoch": 0.56, + "grad_norm": 38.5, + "learning_rate": 4.104813694963317e-06, + "log_odds": 6.623666763305664, + "log_odds_ratio": -0.3657289147377014, + "loss": 0.3976, + "rejected_geometric_mean": -7.738060474395752, + "step": 2254 + }, + { + "chosen_geometric_mean": -1.151625156402588, + "epoch": 0.56, + "grad_norm": 43.25, + "learning_rate": 4.104067146175806e-06, + "log_odds": 6.810631275177002, + "log_odds_ratio": -0.07360793650150299, + "loss": 0.451, + "rejected_geometric_mean": -7.598406791687012, + "step": 2255 + }, + { + "chosen_geometric_mean": -1.2761365175247192, + "epoch": 0.56, + "grad_norm": 50.0, + "learning_rate": 4.103320354173213e-06, + "log_odds": 2.537452220916748, + "log_odds_ratio": -0.3591831624507904, + "loss": 0.3654, + "rejected_geometric_mean": -3.6542985439300537, + "step": 2256 + }, + { + "chosen_geometric_mean": -1.3182975053787231, + "epoch": 0.56, + "grad_norm": 29.875, + "learning_rate": 4.102573319068768e-06, + "log_odds": 4.988722324371338, + "log_odds_ratio": -0.19614194333553314, + "loss": 0.4543, + "rejected_geometric_mean": -6.069618225097656, + "step": 2257 + }, + { + "chosen_geometric_mean": -0.9879144430160522, + "epoch": 0.56, + "grad_norm": 27.375, + "learning_rate": 4.1018260409757395e-06, + "log_odds": 8.430142402648926, + "log_odds_ratio": -0.004426185041666031, + "loss": 0.3939, + "rejected_geometric_mean": -8.936958312988281, + "step": 2258 + }, + { + "chosen_geometric_mean": -1.1258991956710815, + "epoch": 0.56, + "grad_norm": 21.125, + "learning_rate": 4.101078520007434e-06, + "log_odds": 4.932052135467529, + "log_odds_ratio": -0.15520364046096802, + "loss": 0.3328, + "rejected_geometric_mean": -5.78313684463501, + "step": 2259 + }, + { + "chosen_geometric_mean": -1.1143581867218018, + "epoch": 0.56, + "grad_norm": 8.9375, + "learning_rate": 4.100330756277192e-06, + "log_odds": 2.800879955291748, + "log_odds_ratio": -0.2192709892988205, + "loss": 0.2761, + "rejected_geometric_mean": -3.5963056087493896, + "step": 2260 + }, + { + "chosen_geometric_mean": -1.4973151683807373, + "epoch": 0.56, + "grad_norm": 10.6875, + "learning_rate": 4.099582749898393e-06, + "log_odds": 3.937251091003418, + "log_odds_ratio": -0.38529250025749207, + "loss": 0.3544, + "rejected_geometric_mean": -5.322214603424072, + "step": 2261 + }, + { + "chosen_geometric_mean": -1.0673449039459229, + "epoch": 0.56, + "grad_norm": 2.171875, + "learning_rate": 4.098834500984453e-06, + "log_odds": 3.2983310222625732, + "log_odds_ratio": -0.2564455270767212, + "loss": 0.3112, + "rejected_geometric_mean": -4.094174861907959, + "step": 2262 + }, + { + "chosen_geometric_mean": -0.8130419850349426, + "epoch": 0.56, + "grad_norm": 7.0, + "learning_rate": 4.098086009648825e-06, + "log_odds": 4.579545974731445, + "log_odds_ratio": -0.25174519419670105, + "loss": 0.3035, + "rejected_geometric_mean": -4.987931251525879, + "step": 2263 + }, + { + "chosen_geometric_mean": -1.3867175579071045, + "epoch": 0.56, + "grad_norm": 7.375, + "learning_rate": 4.0973372760049965e-06, + "log_odds": 0.671527624130249, + "log_odds_ratio": -0.6475480794906616, + "loss": 0.3462, + "rejected_geometric_mean": -2.034806966781616, + "step": 2264 + }, + { + "chosen_geometric_mean": -1.299901008605957, + "epoch": 0.56, + "grad_norm": 7.5, + "learning_rate": 4.0965883001664955e-06, + "log_odds": 1.6430072784423828, + "log_odds_ratio": -0.29993245005607605, + "loss": 0.326, + "rejected_geometric_mean": -2.768900156021118, + "step": 2265 + }, + { + "chosen_geometric_mean": -1.1825741529464722, + "epoch": 0.56, + "grad_norm": 2.1875, + "learning_rate": 4.095839082246884e-06, + "log_odds": 2.1284379959106445, + "log_odds_ratio": -0.22916531562805176, + "loss": 0.3295, + "rejected_geometric_mean": -3.08079195022583, + "step": 2266 + }, + { + "chosen_geometric_mean": -1.0878636837005615, + "epoch": 0.56, + "grad_norm": 5.1875, + "learning_rate": 4.09508962235976e-06, + "log_odds": 2.636277914047241, + "log_odds_ratio": -0.21845237910747528, + "loss": 0.3121, + "rejected_geometric_mean": -3.4385886192321777, + "step": 2267 + }, + { + "chosen_geometric_mean": -1.154194951057434, + "epoch": 0.56, + "grad_norm": 3.21875, + "learning_rate": 4.094339920618762e-06, + "log_odds": 2.382836103439331, + "log_odds_ratio": -0.3439215421676636, + "loss": 0.3311, + "rejected_geometric_mean": -3.324453830718994, + "step": 2268 + }, + { + "chosen_geometric_mean": -1.178413987159729, + "epoch": 0.56, + "grad_norm": 2.03125, + "learning_rate": 4.093589977137561e-06, + "log_odds": 0.7089172601699829, + "log_odds_ratio": -0.4681134819984436, + "loss": 0.3135, + "rejected_geometric_mean": -1.7184338569641113, + "step": 2269 + }, + { + "chosen_geometric_mean": -1.2699681520462036, + "epoch": 0.56, + "grad_norm": 18.0, + "learning_rate": 4.092839792029867e-06, + "log_odds": 1.252532720565796, + "log_odds_ratio": -0.45170801877975464, + "loss": 0.332, + "rejected_geometric_mean": -2.4124321937561035, + "step": 2270 + }, + { + "chosen_geometric_mean": -1.159145712852478, + "epoch": 0.56, + "grad_norm": 7.90625, + "learning_rate": 4.0920893654094265e-06, + "log_odds": 1.00821852684021, + "log_odds_ratio": -0.3433271646499634, + "loss": 0.327, + "rejected_geometric_mean": -1.9785228967666626, + "step": 2271 + }, + { + "chosen_geometric_mean": -0.9998931884765625, + "epoch": 0.56, + "grad_norm": 2.328125, + "learning_rate": 4.091338697390022e-06, + "log_odds": 2.173386335372925, + "log_odds_ratio": -0.3180403709411621, + "loss": 0.2617, + "rejected_geometric_mean": -2.920888900756836, + "step": 2272 + }, + { + "chosen_geometric_mean": -0.9643294811248779, + "epoch": 0.56, + "grad_norm": 2.8125, + "learning_rate": 4.090587788085472e-06, + "log_odds": 1.8283922672271729, + "log_odds_ratio": -0.3617039918899536, + "loss": 0.332, + "rejected_geometric_mean": -2.5545806884765625, + "step": 2273 + }, + { + "chosen_geometric_mean": -0.99174964427948, + "epoch": 0.56, + "grad_norm": 3.140625, + "learning_rate": 4.089836637609633e-06, + "log_odds": 1.6670726537704468, + "log_odds_ratio": -0.4249505400657654, + "loss": 0.3542, + "rejected_geometric_mean": -2.4413089752197266, + "step": 2274 + }, + { + "chosen_geometric_mean": -1.0720093250274658, + "epoch": 0.56, + "grad_norm": 2.40625, + "learning_rate": 4.089085246076398e-06, + "log_odds": 3.6650497913360596, + "log_odds_ratio": -0.09148413687944412, + "loss": 0.2421, + "rejected_geometric_mean": -4.341422080993652, + "step": 2275 + }, + { + "chosen_geometric_mean": -1.17194664478302, + "epoch": 0.56, + "grad_norm": 4.6875, + "learning_rate": 4.088333613599695e-06, + "log_odds": 4.1434221267700195, + "log_odds_ratio": -0.16250646114349365, + "loss": 0.3049, + "rejected_geometric_mean": -5.015031337738037, + "step": 2276 + }, + { + "chosen_geometric_mean": -1.1760121583938599, + "epoch": 0.56, + "grad_norm": 4.125, + "learning_rate": 4.08758174029349e-06, + "log_odds": 0.1504158079624176, + "log_odds_ratio": -0.6417942643165588, + "loss": 0.3774, + "rejected_geometric_mean": -1.2920137643814087, + "step": 2277 + }, + { + "chosen_geometric_mean": -1.1917369365692139, + "epoch": 0.56, + "grad_norm": 60.0, + "learning_rate": 4.086829626271786e-06, + "log_odds": 4.485556125640869, + "log_odds_ratio": -0.34450891613960266, + "loss": 0.322, + "rejected_geometric_mean": -5.405789852142334, + "step": 2278 + }, + { + "chosen_geometric_mean": -1.663815975189209, + "epoch": 0.56, + "grad_norm": 25.375, + "learning_rate": 4.08607727164862e-06, + "log_odds": 3.4820499420166016, + "log_odds_ratio": -0.05375199019908905, + "loss": 0.3885, + "rejected_geometric_mean": -4.7390031814575195, + "step": 2279 + }, + { + "chosen_geometric_mean": -1.0196712017059326, + "epoch": 0.56, + "grad_norm": 33.75, + "learning_rate": 4.0853246765380665e-06, + "log_odds": 7.284397602081299, + "log_odds_ratio": -0.0066784825176000595, + "loss": 0.342, + "rejected_geometric_mean": -7.837472438812256, + "step": 2280 + }, + { + "chosen_geometric_mean": -1.3029145002365112, + "epoch": 0.56, + "grad_norm": 27.875, + "learning_rate": 4.0845718410542385e-06, + "log_odds": 2.00384783744812, + "log_odds_ratio": -0.3604293167591095, + "loss": 0.3124, + "rejected_geometric_mean": -3.076655149459839, + "step": 2281 + }, + { + "chosen_geometric_mean": -1.0923506021499634, + "epoch": 0.56, + "grad_norm": 24.0, + "learning_rate": 4.083818765311284e-06, + "log_odds": 4.092888832092285, + "log_odds_ratio": -0.30980968475341797, + "loss": 0.3824, + "rejected_geometric_mean": -4.929941177368164, + "step": 2282 + }, + { + "chosen_geometric_mean": -1.139657974243164, + "epoch": 0.57, + "grad_norm": 4.28125, + "learning_rate": 4.083065449423386e-06, + "log_odds": 0.5827104449272156, + "log_odds_ratio": -0.45211485028266907, + "loss": 0.2694, + "rejected_geometric_mean": -1.5782846212387085, + "step": 2283 + }, + { + "chosen_geometric_mean": -1.1877357959747314, + "epoch": 0.57, + "grad_norm": 19.75, + "learning_rate": 4.082311893504767e-06, + "log_odds": 1.927822232246399, + "log_odds_ratio": -0.5824472308158875, + "loss": 0.3156, + "rejected_geometric_mean": -3.04469633102417, + "step": 2284 + }, + { + "chosen_geometric_mean": -1.0435006618499756, + "epoch": 0.57, + "grad_norm": 15.5625, + "learning_rate": 4.081558097669683e-06, + "log_odds": 6.381081581115723, + "log_odds_ratio": -0.00736200762912631, + "loss": 0.3522, + "rejected_geometric_mean": -6.987320899963379, + "step": 2285 + }, + { + "chosen_geometric_mean": -1.0246245861053467, + "epoch": 0.57, + "grad_norm": 13.4375, + "learning_rate": 4.0808040620324275e-06, + "log_odds": 2.10898494720459, + "log_odds_ratio": -0.29721423983573914, + "loss": 0.3002, + "rejected_geometric_mean": -2.8489866256713867, + "step": 2286 + }, + { + "chosen_geometric_mean": -1.1188536882400513, + "epoch": 0.57, + "grad_norm": 4.1875, + "learning_rate": 4.080049786707331e-06, + "log_odds": 0.9013583660125732, + "log_odds_ratio": -0.41933465003967285, + "loss": 0.3038, + "rejected_geometric_mean": -1.8331917524337769, + "step": 2287 + }, + { + "chosen_geometric_mean": -1.0471084117889404, + "epoch": 0.57, + "grad_norm": 3.703125, + "learning_rate": 4.07929527180876e-06, + "log_odds": 1.3772180080413818, + "log_odds_ratio": -0.31326907873153687, + "loss": 0.2813, + "rejected_geometric_mean": -2.1593973636627197, + "step": 2288 + }, + { + "chosen_geometric_mean": -1.1112240552902222, + "epoch": 0.57, + "grad_norm": 4.4375, + "learning_rate": 4.078540517451115e-06, + "log_odds": 1.9950592517852783, + "log_odds_ratio": -0.23039580881595612, + "loss": 0.2101, + "rejected_geometric_mean": -2.7764394283294678, + "step": 2289 + }, + { + "chosen_geometric_mean": -1.121850609779358, + "epoch": 0.57, + "grad_norm": 2.578125, + "learning_rate": 4.0777855237488375e-06, + "log_odds": 4.385551929473877, + "log_odds_ratio": -0.1838785856962204, + "loss": 0.3589, + "rejected_geometric_mean": -5.215239524841309, + "step": 2290 + }, + { + "chosen_geometric_mean": -1.6018692255020142, + "epoch": 0.57, + "grad_norm": 40.25, + "learning_rate": 4.0770302908164005e-06, + "log_odds": 5.682231903076172, + "log_odds_ratio": -0.031669266521930695, + "loss": 0.3814, + "rejected_geometric_mean": -6.996096611022949, + "step": 2291 + }, + { + "chosen_geometric_mean": -1.0521636009216309, + "epoch": 0.57, + "grad_norm": 13.125, + "learning_rate": 4.0762748187683175e-06, + "log_odds": 0.23828202486038208, + "log_odds_ratio": -0.5881738662719727, + "loss": 0.3517, + "rejected_geometric_mean": -1.2027251720428467, + "step": 2292 + }, + { + "chosen_geometric_mean": -1.1785658597946167, + "epoch": 0.57, + "grad_norm": 47.0, + "learning_rate": 4.075519107719136e-06, + "log_odds": 2.282534599304199, + "log_odds_ratio": -0.28614431619644165, + "loss": 0.3594, + "rejected_geometric_mean": -3.189065933227539, + "step": 2293 + }, + { + "chosen_geometric_mean": -1.0246891975402832, + "epoch": 0.57, + "grad_norm": 2.65625, + "learning_rate": 4.074763157783438e-06, + "log_odds": 1.189239740371704, + "log_odds_ratio": -0.4146016538143158, + "loss": 0.288, + "rejected_geometric_mean": -2.08785343170166, + "step": 2294 + }, + { + "chosen_geometric_mean": -1.0545175075531006, + "epoch": 0.57, + "grad_norm": 8.0, + "learning_rate": 4.074006969075845e-06, + "log_odds": 0.5160993933677673, + "log_odds_ratio": -0.4887700080871582, + "loss": 0.3435, + "rejected_geometric_mean": -1.4411667585372925, + "step": 2295 + }, + { + "chosen_geometric_mean": -0.9843806624412537, + "epoch": 0.57, + "grad_norm": 7.5625, + "learning_rate": 4.073250541711013e-06, + "log_odds": 3.6260881423950195, + "log_odds_ratio": -0.387543261051178, + "loss": 0.3113, + "rejected_geometric_mean": -4.42161750793457, + "step": 2296 + }, + { + "chosen_geometric_mean": -1.1083598136901855, + "epoch": 0.57, + "grad_norm": 4.59375, + "learning_rate": 4.072493875803635e-06, + "log_odds": 1.989128828048706, + "log_odds_ratio": -0.264433890581131, + "loss": 0.3078, + "rejected_geometric_mean": -2.8094024658203125, + "step": 2297 + }, + { + "chosen_geometric_mean": -1.059485673904419, + "epoch": 0.57, + "grad_norm": 6.21875, + "learning_rate": 4.071736971468439e-06, + "log_odds": 1.7685155868530273, + "log_odds_ratio": -0.39496105909347534, + "loss": 0.2975, + "rejected_geometric_mean": -2.608859062194824, + "step": 2298 + }, + { + "chosen_geometric_mean": -1.035380482673645, + "epoch": 0.57, + "grad_norm": 17.25, + "learning_rate": 4.07097982882019e-06, + "log_odds": 3.9496254920959473, + "log_odds_ratio": -0.13173258304595947, + "loss": 0.307, + "rejected_geometric_mean": -4.607596397399902, + "step": 2299 + }, + { + "chosen_geometric_mean": -0.99300217628479, + "epoch": 0.57, + "grad_norm": 2.859375, + "learning_rate": 4.07022244797369e-06, + "log_odds": 2.4572367668151855, + "log_odds_ratio": -0.23981347680091858, + "loss": 0.2779, + "rejected_geometric_mean": -3.125636100769043, + "step": 2300 + }, + { + "chosen_geometric_mean": -1.181156039237976, + "epoch": 0.57, + "grad_norm": 13.4375, + "learning_rate": 4.0694648290437755e-06, + "log_odds": 2.5509634017944336, + "log_odds_ratio": -0.1938052773475647, + "loss": 0.3411, + "rejected_geometric_mean": -3.4640297889709473, + "step": 2301 + }, + { + "chosen_geometric_mean": -1.0528322458267212, + "epoch": 0.57, + "grad_norm": 3.015625, + "learning_rate": 4.068706972145319e-06, + "log_odds": 0.3489987254142761, + "log_odds_ratio": -0.5544213056564331, + "loss": 0.2639, + "rejected_geometric_mean": -1.308023452758789, + "step": 2302 + }, + { + "chosen_geometric_mean": -1.1190688610076904, + "epoch": 0.57, + "grad_norm": 24.625, + "learning_rate": 4.067948877393231e-06, + "log_odds": 3.207836151123047, + "log_odds_ratio": -0.18321895599365234, + "loss": 0.3295, + "rejected_geometric_mean": -4.005409240722656, + "step": 2303 + }, + { + "chosen_geometric_mean": -1.1023492813110352, + "epoch": 0.57, + "grad_norm": 3.828125, + "learning_rate": 4.067190544902456e-06, + "log_odds": 4.525722026824951, + "log_odds_ratio": -0.2852354049682617, + "loss": 0.2964, + "rejected_geometric_mean": -5.380928993225098, + "step": 2304 + }, + { + "chosen_geometric_mean": -1.140117883682251, + "epoch": 0.57, + "grad_norm": 21.75, + "learning_rate": 4.066431974787975e-06, + "log_odds": 3.3091046810150146, + "log_odds_ratio": -0.1804806888103485, + "loss": 0.3214, + "rejected_geometric_mean": -4.182188034057617, + "step": 2305 + }, + { + "chosen_geometric_mean": -1.055530309677124, + "epoch": 0.57, + "grad_norm": 4.15625, + "learning_rate": 4.065673167164808e-06, + "log_odds": 5.4672346115112305, + "log_odds_ratio": -0.18276555836200714, + "loss": 0.2531, + "rejected_geometric_mean": -6.231822967529297, + "step": 2306 + }, + { + "chosen_geometric_mean": -1.0733681917190552, + "epoch": 0.57, + "grad_norm": 7.84375, + "learning_rate": 4.064914122148005e-06, + "log_odds": 0.9543566703796387, + "log_odds_ratio": -0.45698678493499756, + "loss": 0.332, + "rejected_geometric_mean": -1.9075493812561035, + "step": 2307 + }, + { + "chosen_geometric_mean": -1.0273430347442627, + "epoch": 0.57, + "grad_norm": 2.71875, + "learning_rate": 4.0641548398526566e-06, + "log_odds": 4.232184410095215, + "log_odds_ratio": -0.11018123477697372, + "loss": 0.3068, + "rejected_geometric_mean": -4.8982648849487305, + "step": 2308 + }, + { + "chosen_geometric_mean": -1.2414391040802002, + "epoch": 0.57, + "grad_norm": 15.6875, + "learning_rate": 4.06339532039389e-06, + "log_odds": 3.392420530319214, + "log_odds_ratio": -0.25878697633743286, + "loss": 0.3182, + "rejected_geometric_mean": -4.435460567474365, + "step": 2309 + }, + { + "chosen_geometric_mean": -1.0395398139953613, + "epoch": 0.57, + "grad_norm": 32.75, + "learning_rate": 4.062635563886864e-06, + "log_odds": 2.664834976196289, + "log_odds_ratio": -0.365348756313324, + "loss": 0.3019, + "rejected_geometric_mean": -3.506500720977783, + "step": 2310 + }, + { + "chosen_geometric_mean": -1.0679582357406616, + "epoch": 0.57, + "grad_norm": 34.25, + "learning_rate": 4.061875570446779e-06, + "log_odds": 1.043847918510437, + "log_odds_ratio": -0.47573336958885193, + "loss": 0.3241, + "rejected_geometric_mean": -2.0111989974975586, + "step": 2311 + }, + { + "chosen_geometric_mean": -1.0258357524871826, + "epoch": 0.57, + "grad_norm": 25.75, + "learning_rate": 4.061115340188865e-06, + "log_odds": 4.651073455810547, + "log_odds_ratio": -0.2013646960258484, + "loss": 0.3848, + "rejected_geometric_mean": -5.308191299438477, + "step": 2312 + }, + { + "chosen_geometric_mean": -1.0315519571304321, + "epoch": 0.57, + "grad_norm": 8.125, + "learning_rate": 4.060354873228392e-06, + "log_odds": 7.505920886993408, + "log_odds_ratio": -0.038534920662641525, + "loss": 0.2906, + "rejected_geometric_mean": -8.107275009155273, + "step": 2313 + }, + { + "chosen_geometric_mean": -1.0999749898910522, + "epoch": 0.57, + "grad_norm": 7.0625, + "learning_rate": 4.059594169680666e-06, + "log_odds": 2.662461042404175, + "log_odds_ratio": -0.45172348618507385, + "loss": 0.3364, + "rejected_geometric_mean": -3.48982572555542, + "step": 2314 + }, + { + "chosen_geometric_mean": -1.0137004852294922, + "epoch": 0.57, + "grad_norm": 4.1875, + "learning_rate": 4.058833229661028e-06, + "log_odds": 4.850473403930664, + "log_odds_ratio": -0.32899683713912964, + "loss": 0.3079, + "rejected_geometric_mean": -5.611136436462402, + "step": 2315 + }, + { + "chosen_geometric_mean": -1.2110522985458374, + "epoch": 0.57, + "grad_norm": 2.609375, + "learning_rate": 4.058072053284854e-06, + "log_odds": 1.4521114826202393, + "log_odds_ratio": -0.47897496819496155, + "loss": 0.2972, + "rejected_geometric_mean": -2.498985767364502, + "step": 2316 + }, + { + "chosen_geometric_mean": -1.1775349378585815, + "epoch": 0.57, + "grad_norm": 1.9765625, + "learning_rate": 4.0573106406675566e-06, + "log_odds": 6.48151969909668, + "log_odds_ratio": -0.028414903208613396, + "loss": 0.2806, + "rejected_geometric_mean": -7.2602715492248535, + "step": 2317 + }, + { + "chosen_geometric_mean": -1.0154813528060913, + "epoch": 0.57, + "grad_norm": 7.4375, + "learning_rate": 4.056548991924585e-06, + "log_odds": 1.9363654851913452, + "log_odds_ratio": -0.3344061076641083, + "loss": 0.3406, + "rejected_geometric_mean": -2.6820099353790283, + "step": 2318 + }, + { + "chosen_geometric_mean": -1.170289158821106, + "epoch": 0.57, + "grad_norm": 2.328125, + "learning_rate": 4.055787107171423e-06, + "log_odds": 2.157341480255127, + "log_odds_ratio": -0.23392289876937866, + "loss": 0.3123, + "rejected_geometric_mean": -3.093430519104004, + "step": 2319 + }, + { + "chosen_geometric_mean": -1.1402357816696167, + "epoch": 0.57, + "grad_norm": 3.453125, + "learning_rate": 4.05502498652359e-06, + "log_odds": 1.6565613746643066, + "log_odds_ratio": -0.3645068109035492, + "loss": 0.2662, + "rejected_geometric_mean": -2.5249686241149902, + "step": 2320 + }, + { + "chosen_geometric_mean": -1.414625883102417, + "epoch": 0.57, + "grad_norm": 2.515625, + "learning_rate": 4.054262630096643e-06, + "log_odds": 1.345183253288269, + "log_odds_ratio": -0.4073762893676758, + "loss": 0.3856, + "rejected_geometric_mean": -2.63435435295105, + "step": 2321 + }, + { + "chosen_geometric_mean": -1.049152135848999, + "epoch": 0.57, + "grad_norm": 3.265625, + "learning_rate": 4.053500038006173e-06, + "log_odds": 0.9960349798202515, + "log_odds_ratio": -0.38430944085121155, + "loss": 0.3036, + "rejected_geometric_mean": -1.82867431640625, + "step": 2322 + }, + { + "chosen_geometric_mean": -1.0151019096374512, + "epoch": 0.58, + "grad_norm": 7.5625, + "learning_rate": 4.0527372103678065e-06, + "log_odds": 3.2235844135284424, + "log_odds_ratio": -0.18695098161697388, + "loss": 0.333, + "rejected_geometric_mean": -3.9155688285827637, + "step": 2323 + }, + { + "chosen_geometric_mean": -1.2047808170318604, + "epoch": 0.58, + "grad_norm": 2.5, + "learning_rate": 4.051974147297208e-06, + "log_odds": 3.953397274017334, + "log_odds_ratio": -0.11010964959859848, + "loss": 0.2812, + "rejected_geometric_mean": -4.841832637786865, + "step": 2324 + }, + { + "chosen_geometric_mean": -1.018851637840271, + "epoch": 0.58, + "grad_norm": 4.875, + "learning_rate": 4.051210848910074e-06, + "log_odds": 1.8703768253326416, + "log_odds_ratio": -0.30897387862205505, + "loss": 0.3571, + "rejected_geometric_mean": -2.656280517578125, + "step": 2325 + }, + { + "chosen_geometric_mean": -1.4875812530517578, + "epoch": 0.58, + "grad_norm": 8.5, + "learning_rate": 4.050447315322141e-06, + "log_odds": 3.8340418338775635, + "log_odds_ratio": -0.2431633025407791, + "loss": 0.3495, + "rejected_geometric_mean": -5.127621173858643, + "step": 2326 + }, + { + "chosen_geometric_mean": -1.492044448852539, + "epoch": 0.58, + "grad_norm": 23.25, + "learning_rate": 4.049683546649178e-06, + "log_odds": 1.9250534772872925, + "log_odds_ratio": -0.22587114572525024, + "loss": 0.3181, + "rejected_geometric_mean": -3.2368710041046143, + "step": 2327 + }, + { + "chosen_geometric_mean": -1.0556241273880005, + "epoch": 0.58, + "grad_norm": 6.75, + "learning_rate": 4.048919543006991e-06, + "log_odds": 3.1983306407928467, + "log_odds_ratio": -0.310885488986969, + "loss": 0.2896, + "rejected_geometric_mean": -4.019065856933594, + "step": 2328 + }, + { + "chosen_geometric_mean": -1.0033575296401978, + "epoch": 0.58, + "grad_norm": 2.234375, + "learning_rate": 4.048155304511422e-06, + "log_odds": 2.951685905456543, + "log_odds_ratio": -0.23643600940704346, + "loss": 0.3263, + "rejected_geometric_mean": -3.6530511379241943, + "step": 2329 + }, + { + "chosen_geometric_mean": -1.049412727355957, + "epoch": 0.58, + "grad_norm": 4.28125, + "learning_rate": 4.047390831278345e-06, + "log_odds": 3.165515422821045, + "log_odds_ratio": -0.10202345252037048, + "loss": 0.2584, + "rejected_geometric_mean": -3.813462257385254, + "step": 2330 + }, + { + "chosen_geometric_mean": -1.2435522079467773, + "epoch": 0.58, + "grad_norm": 29.875, + "learning_rate": 4.046626123423676e-06, + "log_odds": 4.514616012573242, + "log_odds_ratio": -0.20955201983451843, + "loss": 0.3104, + "rejected_geometric_mean": -5.5261735916137695, + "step": 2331 + }, + { + "chosen_geometric_mean": -1.2916935682296753, + "epoch": 0.58, + "grad_norm": 18.125, + "learning_rate": 4.045861181063361e-06, + "log_odds": 4.20570182800293, + "log_odds_ratio": -0.05437588691711426, + "loss": 0.3282, + "rejected_geometric_mean": -5.19028377532959, + "step": 2332 + }, + { + "chosen_geometric_mean": -1.2628573179244995, + "epoch": 0.58, + "grad_norm": 16.0, + "learning_rate": 4.045096004313384e-06, + "log_odds": 6.612514495849609, + "log_odds_ratio": -0.16321878135204315, + "loss": 0.3304, + "rejected_geometric_mean": -7.610996246337891, + "step": 2333 + }, + { + "chosen_geometric_mean": -1.012247085571289, + "epoch": 0.58, + "grad_norm": 2.53125, + "learning_rate": 4.044330593289766e-06, + "log_odds": 3.3476290702819824, + "log_odds_ratio": -0.301327645778656, + "loss": 0.2985, + "rejected_geometric_mean": -4.092570781707764, + "step": 2334 + }, + { + "chosen_geometric_mean": -1.3115358352661133, + "epoch": 0.58, + "grad_norm": 7.25, + "learning_rate": 4.043564948108558e-06, + "log_odds": 0.6766799688339233, + "log_odds_ratio": -0.5593099594116211, + "loss": 0.3589, + "rejected_geometric_mean": -1.8890151977539062, + "step": 2335 + }, + { + "chosen_geometric_mean": -1.167368769645691, + "epoch": 0.58, + "grad_norm": 2.890625, + "learning_rate": 4.042799068885853e-06, + "log_odds": 3.573364019393921, + "log_odds_ratio": -0.1370130032300949, + "loss": 0.3095, + "rejected_geometric_mean": -4.440464019775391, + "step": 2336 + }, + { + "chosen_geometric_mean": -1.218446969985962, + "epoch": 0.58, + "grad_norm": 6.25, + "learning_rate": 4.0420329557377765e-06, + "log_odds": 4.594830513000488, + "log_odds_ratio": -0.20516616106033325, + "loss": 0.3, + "rejected_geometric_mean": -5.559938430786133, + "step": 2337 + }, + { + "chosen_geometric_mean": -0.9599627256393433, + "epoch": 0.58, + "grad_norm": 11.4375, + "learning_rate": 4.041266608780489e-06, + "log_odds": 7.415806293487549, + "log_odds_ratio": -0.14249542355537415, + "loss": 0.3239, + "rejected_geometric_mean": -7.93914794921875, + "step": 2338 + }, + { + "chosen_geometric_mean": -1.396143913269043, + "epoch": 0.58, + "grad_norm": 29.25, + "learning_rate": 4.040500028130187e-06, + "log_odds": 1.1067581176757812, + "log_odds_ratio": -0.6067229509353638, + "loss": 0.3147, + "rejected_geometric_mean": -2.3872971534729004, + "step": 2339 + }, + { + "chosen_geometric_mean": -1.1954782009124756, + "epoch": 0.58, + "grad_norm": 15.875, + "learning_rate": 4.039733213903102e-06, + "log_odds": 7.179007053375244, + "log_odds_ratio": -0.19267000257968903, + "loss": 0.3327, + "rejected_geometric_mean": -8.077913284301758, + "step": 2340 + }, + { + "chosen_geometric_mean": -1.079294204711914, + "epoch": 0.58, + "grad_norm": 5.6875, + "learning_rate": 4.038966166215503e-06, + "log_odds": 3.5738487243652344, + "log_odds_ratio": -0.2343105971813202, + "loss": 0.2763, + "rejected_geometric_mean": -4.382807731628418, + "step": 2341 + }, + { + "chosen_geometric_mean": -1.0105583667755127, + "epoch": 0.58, + "grad_norm": 2.765625, + "learning_rate": 4.038198885183692e-06, + "log_odds": 1.759068250656128, + "log_odds_ratio": -0.27158644795417786, + "loss": 0.2755, + "rejected_geometric_mean": -2.446986436843872, + "step": 2342 + }, + { + "chosen_geometric_mean": -1.1090764999389648, + "epoch": 0.58, + "grad_norm": 9.875, + "learning_rate": 4.037431370924007e-06, + "log_odds": 2.4868884086608887, + "log_odds_ratio": -0.3278055787086487, + "loss": 0.3373, + "rejected_geometric_mean": -3.41408109664917, + "step": 2343 + }, + { + "chosen_geometric_mean": -0.9331492781639099, + "epoch": 0.58, + "grad_norm": 23.25, + "learning_rate": 4.036663623552821e-06, + "log_odds": 3.0286977291107178, + "log_odds_ratio": -0.10830497741699219, + "loss": 0.2875, + "rejected_geometric_mean": -3.515929698944092, + "step": 2344 + }, + { + "chosen_geometric_mean": -1.2621625661849976, + "epoch": 0.58, + "grad_norm": 8.0, + "learning_rate": 4.035895643186545e-06, + "log_odds": 3.0066962242126465, + "log_odds_ratio": -0.3915952146053314, + "loss": 0.3105, + "rejected_geometric_mean": -4.045346736907959, + "step": 2345 + }, + { + "chosen_geometric_mean": -1.3612158298492432, + "epoch": 0.58, + "grad_norm": 3.578125, + "learning_rate": 4.035127429941621e-06, + "log_odds": 2.070173740386963, + "log_odds_ratio": -0.14889445900917053, + "loss": 0.3375, + "rejected_geometric_mean": -3.164806604385376, + "step": 2346 + }, + { + "chosen_geometric_mean": -1.4287073612213135, + "epoch": 0.58, + "grad_norm": 21.25, + "learning_rate": 4.03435898393453e-06, + "log_odds": 2.3060946464538574, + "log_odds_ratio": -0.27557575702667236, + "loss": 0.3076, + "rejected_geometric_mean": -3.549975633621216, + "step": 2347 + }, + { + "chosen_geometric_mean": -1.3849945068359375, + "epoch": 0.58, + "grad_norm": 9.625, + "learning_rate": 4.033590305281787e-06, + "log_odds": 5.686511516571045, + "log_odds_ratio": -0.12655198574066162, + "loss": 0.2679, + "rejected_geometric_mean": -6.816463947296143, + "step": 2348 + }, + { + "chosen_geometric_mean": -1.0251189470291138, + "epoch": 0.58, + "grad_norm": 5.125, + "learning_rate": 4.032821394099941e-06, + "log_odds": 0.8521767258644104, + "log_odds_ratio": -0.4002460241317749, + "loss": 0.2656, + "rejected_geometric_mean": -1.6976158618927002, + "step": 2349 + }, + { + "chosen_geometric_mean": -0.9821091890335083, + "epoch": 0.58, + "grad_norm": 5.625, + "learning_rate": 4.032052250505577e-06, + "log_odds": 0.6254491806030273, + "log_odds_ratio": -0.49074310064315796, + "loss": 0.2804, + "rejected_geometric_mean": -1.4935717582702637, + "step": 2350 + }, + { + "chosen_geometric_mean": -1.3603456020355225, + "epoch": 0.58, + "grad_norm": 9.375, + "learning_rate": 4.031282874615318e-06, + "log_odds": 0.32028478384017944, + "log_odds_ratio": -0.6267164349555969, + "loss": 0.3384, + "rejected_geometric_mean": -1.6294357776641846, + "step": 2351 + }, + { + "chosen_geometric_mean": -1.0128438472747803, + "epoch": 0.58, + "grad_norm": 2.328125, + "learning_rate": 4.030513266545817e-06, + "log_odds": 3.7729907035827637, + "log_odds_ratio": -0.3447975516319275, + "loss": 0.2882, + "rejected_geometric_mean": -4.5381178855896, + "step": 2352 + }, + { + "chosen_geometric_mean": -1.0667660236358643, + "epoch": 0.58, + "grad_norm": 2.65625, + "learning_rate": 4.029743426413768e-06, + "log_odds": 3.2476420402526855, + "log_odds_ratio": -0.30367493629455566, + "loss": 0.3165, + "rejected_geometric_mean": -4.094823360443115, + "step": 2353 + }, + { + "chosen_geometric_mean": -1.1299453973770142, + "epoch": 0.58, + "grad_norm": 2.25, + "learning_rate": 4.028973354335895e-06, + "log_odds": 4.584434986114502, + "log_odds_ratio": -0.06082345172762871, + "loss": 0.2859, + "rejected_geometric_mean": -5.3541669845581055, + "step": 2354 + }, + { + "chosen_geometric_mean": -0.813315749168396, + "epoch": 0.58, + "grad_norm": 7.28125, + "learning_rate": 4.02820305042896e-06, + "log_odds": 3.26796293258667, + "log_odds_ratio": -0.36722859740257263, + "loss": 0.28, + "rejected_geometric_mean": -3.8013601303100586, + "step": 2355 + }, + { + "chosen_geometric_mean": -1.166595220565796, + "epoch": 0.58, + "grad_norm": 2.34375, + "learning_rate": 4.02743251480976e-06, + "log_odds": 2.446249008178711, + "log_odds_ratio": -0.22378209233283997, + "loss": 0.2764, + "rejected_geometric_mean": -3.3046669960021973, + "step": 2356 + }, + { + "chosen_geometric_mean": -1.1718697547912598, + "epoch": 0.58, + "grad_norm": 20.875, + "learning_rate": 4.026661747595126e-06, + "log_odds": -0.2321399450302124, + "log_odds_ratio": -0.8781611323356628, + "loss": 0.3241, + "rejected_geometric_mean": -0.9744800329208374, + "step": 2357 + }, + { + "chosen_geometric_mean": -1.21474289894104, + "epoch": 0.58, + "grad_norm": 2.390625, + "learning_rate": 4.025890748901926e-06, + "log_odds": 8.949475288391113, + "log_odds_ratio": -0.1367928683757782, + "loss": 0.3116, + "rejected_geometric_mean": -9.864550590515137, + "step": 2358 + }, + { + "chosen_geometric_mean": -1.2297486066818237, + "epoch": 0.58, + "grad_norm": 11.6875, + "learning_rate": 4.0251195188470606e-06, + "log_odds": 1.0286197662353516, + "log_odds_ratio": -0.3758201003074646, + "loss": 0.3071, + "rejected_geometric_mean": -2.106081485748291, + "step": 2359 + }, + { + "chosen_geometric_mean": -1.219468593597412, + "epoch": 0.58, + "grad_norm": 7.375, + "learning_rate": 4.024348057547467e-06, + "log_odds": 6.411226272583008, + "log_odds_ratio": -0.17196150124073029, + "loss": 0.3433, + "rejected_geometric_mean": -7.395411968231201, + "step": 2360 + }, + { + "chosen_geometric_mean": -1.005340814590454, + "epoch": 0.58, + "grad_norm": 4.59375, + "learning_rate": 4.023576365120117e-06, + "log_odds": 5.30761194229126, + "log_odds_ratio": -0.17301785945892334, + "loss": 0.2771, + "rejected_geometric_mean": -5.932512283325195, + "step": 2361 + }, + { + "chosen_geometric_mean": -0.9836242198944092, + "epoch": 0.58, + "grad_norm": 4.75, + "learning_rate": 4.022804441682019e-06, + "log_odds": 9.485834121704102, + "log_odds_ratio": -0.24442455172538757, + "loss": 0.3005, + "rejected_geometric_mean": -10.217458724975586, + "step": 2362 + }, + { + "chosen_geometric_mean": -1.3073188066482544, + "epoch": 0.59, + "grad_norm": 3.453125, + "learning_rate": 4.022032287350215e-06, + "log_odds": 4.235742092132568, + "log_odds_ratio": -0.16329701244831085, + "loss": 0.3044, + "rejected_geometric_mean": -5.216635704040527, + "step": 2363 + }, + { + "chosen_geometric_mean": -0.9692583084106445, + "epoch": 0.59, + "grad_norm": 1.8671875, + "learning_rate": 4.021259902241781e-06, + "log_odds": 6.869622230529785, + "log_odds_ratio": -0.28122255206108093, + "loss": 0.2534, + "rejected_geometric_mean": -7.525913238525391, + "step": 2364 + }, + { + "chosen_geometric_mean": -1.1385252475738525, + "epoch": 0.59, + "grad_norm": 13.9375, + "learning_rate": 4.02048728647383e-06, + "log_odds": 4.1863837242126465, + "log_odds_ratio": -0.2981528639793396, + "loss": 0.3171, + "rejected_geometric_mean": -5.099203109741211, + "step": 2365 + }, + { + "chosen_geometric_mean": -0.9134097099304199, + "epoch": 0.59, + "grad_norm": 1.9609375, + "learning_rate": 4.019714440163508e-06, + "log_odds": 4.020981788635254, + "log_odds_ratio": -0.18345193564891815, + "loss": 0.2924, + "rejected_geometric_mean": -4.532710075378418, + "step": 2366 + }, + { + "chosen_geometric_mean": -1.1861594915390015, + "epoch": 0.59, + "grad_norm": 19.0, + "learning_rate": 4.018941363427998e-06, + "log_odds": 5.277970790863037, + "log_odds_ratio": -0.3031637668609619, + "loss": 0.3542, + "rejected_geometric_mean": -6.286527156829834, + "step": 2367 + }, + { + "chosen_geometric_mean": -1.1322013139724731, + "epoch": 0.59, + "grad_norm": 18.625, + "learning_rate": 4.018168056384517e-06, + "log_odds": 4.845619201660156, + "log_odds_ratio": -0.33254802227020264, + "loss": 0.3657, + "rejected_geometric_mean": -5.751009464263916, + "step": 2368 + }, + { + "chosen_geometric_mean": -1.6565375328063965, + "epoch": 0.59, + "grad_norm": 25.875, + "learning_rate": 4.017394519150316e-06, + "log_odds": 4.4006123542785645, + "log_odds_ratio": -0.16083097457885742, + "loss": 0.2952, + "rejected_geometric_mean": -5.8921051025390625, + "step": 2369 + }, + { + "chosen_geometric_mean": -0.9319326877593994, + "epoch": 0.59, + "grad_norm": 5.96875, + "learning_rate": 4.016620751842683e-06, + "log_odds": 1.8995646238327026, + "log_odds_ratio": -0.2289772629737854, + "loss": 0.3269, + "rejected_geometric_mean": -2.490640878677368, + "step": 2370 + }, + { + "chosen_geometric_mean": -1.2114416360855103, + "epoch": 0.59, + "grad_norm": 25.25, + "learning_rate": 4.015846754578939e-06, + "log_odds": 1.8686785697937012, + "log_odds_ratio": -0.2234826683998108, + "loss": 0.2807, + "rejected_geometric_mean": -2.810857057571411, + "step": 2371 + }, + { + "chosen_geometric_mean": -1.2645423412322998, + "epoch": 0.59, + "grad_norm": 5.6875, + "learning_rate": 4.0150725274764415e-06, + "log_odds": 3.4528322219848633, + "log_odds_ratio": -0.20139428973197937, + "loss": 0.2977, + "rejected_geometric_mean": -4.475095748901367, + "step": 2372 + }, + { + "chosen_geometric_mean": -0.9255852103233337, + "epoch": 0.59, + "grad_norm": 2.421875, + "learning_rate": 4.01429807065258e-06, + "log_odds": 3.325697898864746, + "log_odds_ratio": -0.2988618016242981, + "loss": 0.3099, + "rejected_geometric_mean": -3.9399337768554688, + "step": 2373 + }, + { + "chosen_geometric_mean": -1.2592381238937378, + "epoch": 0.59, + "grad_norm": 2.9375, + "learning_rate": 4.013523384224783e-06, + "log_odds": 0.6254234910011292, + "log_odds_ratio": -0.5194570422172546, + "loss": 0.3206, + "rejected_geometric_mean": -1.8166706562042236, + "step": 2374 + }, + { + "chosen_geometric_mean": -1.2378907203674316, + "epoch": 0.59, + "grad_norm": 2.40625, + "learning_rate": 4.01274846831051e-06, + "log_odds": 3.830043315887451, + "log_odds_ratio": -0.18123222887516022, + "loss": 0.3, + "rejected_geometric_mean": -4.805148124694824, + "step": 2375 + }, + { + "chosen_geometric_mean": -0.8794372081756592, + "epoch": 0.59, + "grad_norm": 13.6875, + "learning_rate": 4.0119733230272564e-06, + "log_odds": 3.4190306663513184, + "log_odds_ratio": -0.16677673161029816, + "loss": 0.3547, + "rejected_geometric_mean": -3.8908886909484863, + "step": 2376 + }, + { + "chosen_geometric_mean": -1.0763838291168213, + "epoch": 0.59, + "grad_norm": 3.421875, + "learning_rate": 4.011197948492554e-06, + "log_odds": 1.6085491180419922, + "log_odds_ratio": -0.36311396956443787, + "loss": 0.3079, + "rejected_geometric_mean": -2.4421439170837402, + "step": 2377 + }, + { + "chosen_geometric_mean": -1.1253554821014404, + "epoch": 0.59, + "grad_norm": 2.515625, + "learning_rate": 4.01042234482397e-06, + "log_odds": 5.0321807861328125, + "log_odds_ratio": -0.24752086400985718, + "loss": 0.2697, + "rejected_geometric_mean": -5.885149002075195, + "step": 2378 + }, + { + "chosen_geometric_mean": -1.5208501815795898, + "epoch": 0.59, + "grad_norm": 3.171875, + "learning_rate": 4.0096465121391005e-06, + "log_odds": 3.349768877029419, + "log_odds_ratio": -0.16813777387142181, + "loss": 0.2542, + "rejected_geometric_mean": -4.663491249084473, + "step": 2379 + }, + { + "chosen_geometric_mean": -1.1045958995819092, + "epoch": 0.59, + "grad_norm": 28.625, + "learning_rate": 4.008870450555583e-06, + "log_odds": 3.5136196613311768, + "log_odds_ratio": -0.16244058310985565, + "loss": 0.3089, + "rejected_geometric_mean": -4.301338195800781, + "step": 2380 + }, + { + "chosen_geometric_mean": -0.8890515565872192, + "epoch": 0.59, + "grad_norm": 6.875, + "learning_rate": 4.008094160191086e-06, + "log_odds": 2.17415452003479, + "log_odds_ratio": -0.23780220746994019, + "loss": 0.2606, + "rejected_geometric_mean": -2.700761318206787, + "step": 2381 + }, + { + "chosen_geometric_mean": -1.8763989210128784, + "epoch": 0.59, + "grad_norm": 21.125, + "learning_rate": 4.007317641163314e-06, + "log_odds": 0.6329329013824463, + "log_odds_ratio": -0.43781155347824097, + "loss": 0.3747, + "rejected_geometric_mean": -2.3871116638183594, + "step": 2382 + }, + { + "chosen_geometric_mean": -1.3666033744812012, + "epoch": 0.59, + "grad_norm": 22.5, + "learning_rate": 4.006540893590007e-06, + "log_odds": 3.16241455078125, + "log_odds_ratio": -0.29879724979400635, + "loss": 0.3011, + "rejected_geometric_mean": -4.306887626647949, + "step": 2383 + }, + { + "chosen_geometric_mean": -1.1834954023361206, + "epoch": 0.59, + "grad_norm": 2.484375, + "learning_rate": 4.005763917588937e-06, + "log_odds": 2.472242832183838, + "log_odds_ratio": -0.3045019507408142, + "loss": 0.2632, + "rejected_geometric_mean": -3.4244191646575928, + "step": 2384 + }, + { + "chosen_geometric_mean": -1.0783807039260864, + "epoch": 0.59, + "grad_norm": 22.625, + "learning_rate": 4.004986713277913e-06, + "log_odds": 1.368049144744873, + "log_odds_ratio": -0.5519953370094299, + "loss": 0.3889, + "rejected_geometric_mean": -2.21421480178833, + "step": 2385 + }, + { + "chosen_geometric_mean": -1.1725192070007324, + "epoch": 0.59, + "grad_norm": 2.890625, + "learning_rate": 4.004209280774778e-06, + "log_odds": 4.727906703948975, + "log_odds_ratio": -0.17377018928527832, + "loss": 0.3172, + "rejected_geometric_mean": -5.605430603027344, + "step": 2386 + }, + { + "chosen_geometric_mean": -1.2475001811981201, + "epoch": 0.59, + "grad_norm": 31.5, + "learning_rate": 4.003431620197407e-06, + "log_odds": 0.1520487368106842, + "log_odds_ratio": -0.6212949156761169, + "loss": 0.3274, + "rejected_geometric_mean": -1.357513666152954, + "step": 2387 + }, + { + "chosen_geometric_mean": -1.2247157096862793, + "epoch": 0.59, + "grad_norm": 15.8125, + "learning_rate": 4.002653731663717e-06, + "log_odds": 4.5055108070373535, + "log_odds_ratio": -0.11676014959812164, + "loss": 0.3002, + "rejected_geometric_mean": -5.42353630065918, + "step": 2388 + }, + { + "chosen_geometric_mean": -1.242110013961792, + "epoch": 0.59, + "grad_norm": 3.375, + "learning_rate": 4.00187561529165e-06, + "log_odds": 5.243770122528076, + "log_odds_ratio": -0.22811180353164673, + "loss": 0.3153, + "rejected_geometric_mean": -6.25007438659668, + "step": 2389 + }, + { + "chosen_geometric_mean": -1.1072479486465454, + "epoch": 0.59, + "grad_norm": 13.25, + "learning_rate": 4.00109727119919e-06, + "log_odds": 7.445801734924316, + "log_odds_ratio": -0.1101832240819931, + "loss": 0.3337, + "rejected_geometric_mean": -8.19987678527832, + "step": 2390 + }, + { + "chosen_geometric_mean": -1.058351993560791, + "epoch": 0.59, + "grad_norm": 15.75, + "learning_rate": 4.000318699504351e-06, + "log_odds": 6.640785217285156, + "log_odds_ratio": -0.06048063933849335, + "loss": 0.3444, + "rejected_geometric_mean": -7.293018817901611, + "step": 2391 + }, + { + "chosen_geometric_mean": -0.988052248954773, + "epoch": 0.59, + "grad_norm": 4.21875, + "learning_rate": 3.9995399003251835e-06, + "log_odds": 4.963597774505615, + "log_odds_ratio": -0.2954249978065491, + "loss": 0.3425, + "rejected_geometric_mean": -5.662798881530762, + "step": 2392 + }, + { + "chosen_geometric_mean": -1.1562023162841797, + "epoch": 0.59, + "grad_norm": 2.5625, + "learning_rate": 3.998760873779772e-06, + "log_odds": 5.050435543060303, + "log_odds_ratio": -0.1539466679096222, + "loss": 0.2985, + "rejected_geometric_mean": -5.883086681365967, + "step": 2393 + }, + { + "chosen_geometric_mean": -0.9368551969528198, + "epoch": 0.59, + "grad_norm": 4.96875, + "learning_rate": 3.997981619986236e-06, + "log_odds": 0.29966965317726135, + "log_odds_ratio": -0.5571991205215454, + "loss": 0.2814, + "rejected_geometric_mean": -1.1326367855072021, + "step": 2394 + }, + { + "chosen_geometric_mean": -1.2301788330078125, + "epoch": 0.59, + "grad_norm": 3.296875, + "learning_rate": 3.997202139062729e-06, + "log_odds": 11.181032180786133, + "log_odds_ratio": -0.20850487053394318, + "loss": 0.2749, + "rejected_geometric_mean": -12.074291229248047, + "step": 2395 + }, + { + "chosen_geometric_mean": -1.2410162687301636, + "epoch": 0.59, + "grad_norm": 7.125, + "learning_rate": 3.996422431127439e-06, + "log_odds": 5.941020488739014, + "log_odds_ratio": -0.3009990155696869, + "loss": 0.3402, + "rejected_geometric_mean": -6.96359920501709, + "step": 2396 + }, + { + "chosen_geometric_mean": -1.1812633275985718, + "epoch": 0.59, + "grad_norm": 2.75, + "learning_rate": 3.995642496298589e-06, + "log_odds": 9.439745903015137, + "log_odds_ratio": -0.01152031309902668, + "loss": 0.2582, + "rejected_geometric_mean": -10.258115768432617, + "step": 2397 + }, + { + "chosen_geometric_mean": -0.9897315502166748, + "epoch": 0.59, + "grad_norm": 20.75, + "learning_rate": 3.9948623346944345e-06, + "log_odds": 4.57755184173584, + "log_odds_ratio": -0.30608952045440674, + "loss": 0.2836, + "rejected_geometric_mean": -5.320374011993408, + "step": 2398 + }, + { + "chosen_geometric_mean": -1.1666203737258911, + "epoch": 0.59, + "grad_norm": 16.125, + "learning_rate": 3.994081946433268e-06, + "log_odds": 3.5678153038024902, + "log_odds_ratio": -0.2172565460205078, + "loss": 0.2995, + "rejected_geometric_mean": -4.466898441314697, + "step": 2399 + }, + { + "chosen_geometric_mean": -1.3055973052978516, + "epoch": 0.59, + "grad_norm": 24.5, + "learning_rate": 3.993301331633415e-06, + "log_odds": 7.005211353302002, + "log_odds_ratio": -0.02942529320716858, + "loss": 0.3478, + "rejected_geometric_mean": -7.987934112548828, + "step": 2400 + }, + { + "chosen_geometric_mean": -1.0273268222808838, + "epoch": 0.59, + "grad_norm": 10.3125, + "learning_rate": 3.992520490413234e-06, + "log_odds": 2.110255241394043, + "log_odds_ratio": -0.29880625009536743, + "loss": 0.3588, + "rejected_geometric_mean": -2.8361611366271973, + "step": 2401 + }, + { + "chosen_geometric_mean": -1.2071950435638428, + "epoch": 0.59, + "grad_norm": 10.5625, + "learning_rate": 3.991739422891121e-06, + "log_odds": 1.840402364730835, + "log_odds_ratio": -0.3675418794155121, + "loss": 0.2696, + "rejected_geometric_mean": -2.8678064346313477, + "step": 2402 + }, + { + "chosen_geometric_mean": -1.111463189125061, + "epoch": 0.59, + "grad_norm": 19.75, + "learning_rate": 3.990958129185504e-06, + "log_odds": 3.133880615234375, + "log_odds_ratio": -0.2470589131116867, + "loss": 0.3538, + "rejected_geometric_mean": -3.9960033893585205, + "step": 2403 + }, + { + "chosen_geometric_mean": -1.352477788925171, + "epoch": 0.6, + "grad_norm": 24.25, + "learning_rate": 3.990176609414845e-06, + "log_odds": 4.767161846160889, + "log_odds_ratio": -0.22062957286834717, + "loss": 0.3767, + "rejected_geometric_mean": -5.907270908355713, + "step": 2404 + }, + { + "chosen_geometric_mean": -0.9604445695877075, + "epoch": 0.6, + "grad_norm": 11.8125, + "learning_rate": 3.989394863697643e-06, + "log_odds": 1.6018283367156982, + "log_odds_ratio": -0.2224186509847641, + "loss": 0.3654, + "rejected_geometric_mean": -2.221170663833618, + "step": 2405 + }, + { + "chosen_geometric_mean": -0.9022319316864014, + "epoch": 0.6, + "grad_norm": 13.625, + "learning_rate": 3.988612892152428e-06, + "log_odds": 1.955568790435791, + "log_odds_ratio": -0.33260512351989746, + "loss": 0.3169, + "rejected_geometric_mean": -2.6210720539093018, + "step": 2406 + }, + { + "chosen_geometric_mean": -1.0832884311676025, + "epoch": 0.6, + "grad_norm": 5.96875, + "learning_rate": 3.987830694897765e-06, + "log_odds": 3.895179271697998, + "log_odds_ratio": -0.1185469850897789, + "loss": 0.2896, + "rejected_geometric_mean": -4.640659332275391, + "step": 2407 + }, + { + "chosen_geometric_mean": -0.9613906145095825, + "epoch": 0.6, + "grad_norm": 5.59375, + "learning_rate": 3.987048272052256e-06, + "log_odds": 2.499629497528076, + "log_odds_ratio": -0.1400061696767807, + "loss": 0.2635, + "rejected_geometric_mean": -3.042334794998169, + "step": 2408 + }, + { + "chosen_geometric_mean": -0.9707144498825073, + "epoch": 0.6, + "grad_norm": 2.578125, + "learning_rate": 3.986265623734534e-06, + "log_odds": 4.90421199798584, + "log_odds_ratio": -0.14519529044628143, + "loss": 0.2892, + "rejected_geometric_mean": -5.467063903808594, + "step": 2409 + }, + { + "chosen_geometric_mean": -1.2177666425704956, + "epoch": 0.6, + "grad_norm": 4.4375, + "learning_rate": 3.985482750063267e-06, + "log_odds": 4.260279655456543, + "log_odds_ratio": -0.25191840529441833, + "loss": 0.3036, + "rejected_geometric_mean": -5.214590072631836, + "step": 2410 + }, + { + "chosen_geometric_mean": -1.2400609254837036, + "epoch": 0.6, + "grad_norm": 8.25, + "learning_rate": 3.9846996511571575e-06, + "log_odds": 4.0036540031433105, + "log_odds_ratio": -0.1415421962738037, + "loss": 0.2793, + "rejected_geometric_mean": -4.965338230133057, + "step": 2411 + }, + { + "chosen_geometric_mean": -1.2970408201217651, + "epoch": 0.6, + "grad_norm": 9.3125, + "learning_rate": 3.983916327134942e-06, + "log_odds": 0.6285759806632996, + "log_odds_ratio": -0.5144049525260925, + "loss": 0.4068, + "rejected_geometric_mean": -1.787993311882019, + "step": 2412 + }, + { + "chosen_geometric_mean": -0.9357343316078186, + "epoch": 0.6, + "grad_norm": 3.84375, + "learning_rate": 3.983132778115392e-06, + "log_odds": 5.630460262298584, + "log_odds_ratio": -0.03574468195438385, + "loss": 0.282, + "rejected_geometric_mean": -6.082698345184326, + "step": 2413 + }, + { + "chosen_geometric_mean": -1.116011142730713, + "epoch": 0.6, + "grad_norm": 5.5625, + "learning_rate": 3.982349004217312e-06, + "log_odds": 3.4929397106170654, + "log_odds_ratio": -0.26884788274765015, + "loss": 0.2783, + "rejected_geometric_mean": -4.340724945068359, + "step": 2414 + }, + { + "chosen_geometric_mean": -1.171235203742981, + "epoch": 0.6, + "grad_norm": 12.6875, + "learning_rate": 3.981565005559541e-06, + "log_odds": 0.4309003949165344, + "log_odds_ratio": -0.5287801623344421, + "loss": 0.3359, + "rejected_geometric_mean": -1.5190846920013428, + "step": 2415 + }, + { + "chosen_geometric_mean": -1.2111823558807373, + "epoch": 0.6, + "grad_norm": 3.46875, + "learning_rate": 3.980780782260951e-06, + "log_odds": 3.2163243293762207, + "log_odds_ratio": -0.15830528736114502, + "loss": 0.3221, + "rejected_geometric_mean": -4.136085510253906, + "step": 2416 + }, + { + "chosen_geometric_mean": -1.0067975521087646, + "epoch": 0.6, + "grad_norm": 12.8125, + "learning_rate": 3.97999633444045e-06, + "log_odds": 2.4992637634277344, + "log_odds_ratio": -0.18386460840702057, + "loss": 0.2977, + "rejected_geometric_mean": -3.1059298515319824, + "step": 2417 + }, + { + "chosen_geometric_mean": -1.0579357147216797, + "epoch": 0.6, + "grad_norm": 4.4375, + "learning_rate": 3.9792116622169795e-06, + "log_odds": 1.5096526145935059, + "log_odds_ratio": -0.23910491168498993, + "loss": 0.2281, + "rejected_geometric_mean": -2.2653889656066895, + "step": 2418 + }, + { + "chosen_geometric_mean": -1.0435785055160522, + "epoch": 0.6, + "grad_norm": 10.0, + "learning_rate": 3.978426765709513e-06, + "log_odds": 3.630936861038208, + "log_odds_ratio": -0.20019008219242096, + "loss": 0.3295, + "rejected_geometric_mean": -4.3436503410339355, + "step": 2419 + }, + { + "chosen_geometric_mean": -1.2402353286743164, + "epoch": 0.6, + "grad_norm": 16.25, + "learning_rate": 3.977641645037062e-06, + "log_odds": 1.6077332496643066, + "log_odds_ratio": -0.3431967794895172, + "loss": 0.3165, + "rejected_geometric_mean": -2.6703763008117676, + "step": 2420 + }, + { + "chosen_geometric_mean": -1.2346898317337036, + "epoch": 0.6, + "grad_norm": 16.375, + "learning_rate": 3.976856300318668e-06, + "log_odds": 3.3598382472991943, + "log_odds_ratio": -0.2012556940317154, + "loss": 0.2544, + "rejected_geometric_mean": -4.316314697265625, + "step": 2421 + }, + { + "chosen_geometric_mean": -1.131919503211975, + "epoch": 0.6, + "grad_norm": 3.8125, + "learning_rate": 3.976070731673408e-06, + "log_odds": 1.567522644996643, + "log_odds_ratio": -0.30019187927246094, + "loss": 0.3015, + "rejected_geometric_mean": -2.4659862518310547, + "step": 2422 + }, + { + "chosen_geometric_mean": -0.9489273428916931, + "epoch": 0.6, + "grad_norm": 6.03125, + "learning_rate": 3.975284939220395e-06, + "log_odds": 4.9924116134643555, + "log_odds_ratio": -0.02276482805609703, + "loss": 0.2878, + "rejected_geometric_mean": -5.442797660827637, + "step": 2423 + }, + { + "chosen_geometric_mean": -0.9717787504196167, + "epoch": 0.6, + "grad_norm": 2.578125, + "learning_rate": 3.974498923078771e-06, + "log_odds": 3.876629114151001, + "log_odds_ratio": -0.1052708625793457, + "loss": 0.2968, + "rejected_geometric_mean": -4.433994293212891, + "step": 2424 + }, + { + "chosen_geometric_mean": -1.1490797996520996, + "epoch": 0.6, + "grad_norm": 9.5, + "learning_rate": 3.9737126833677175e-06, + "log_odds": 4.205800533294678, + "log_odds_ratio": -0.05977368354797363, + "loss": 0.3198, + "rejected_geometric_mean": -4.9893293380737305, + "step": 2425 + }, + { + "chosen_geometric_mean": -0.874262809753418, + "epoch": 0.6, + "grad_norm": 2.53125, + "learning_rate": 3.972926220206446e-06, + "log_odds": 4.124474048614502, + "log_odds_ratio": -0.050112806260585785, + "loss": 0.269, + "rejected_geometric_mean": -4.483080863952637, + "step": 2426 + }, + { + "chosen_geometric_mean": -1.0827293395996094, + "epoch": 0.6, + "grad_norm": 25.125, + "learning_rate": 3.9721395337142036e-06, + "log_odds": 2.570945978164673, + "log_odds_ratio": -0.2801100015640259, + "loss": 0.3351, + "rejected_geometric_mean": -3.3988733291625977, + "step": 2427 + }, + { + "chosen_geometric_mean": -1.1628713607788086, + "epoch": 0.6, + "grad_norm": 39.75, + "learning_rate": 3.971352624010271e-06, + "log_odds": 0.29751870036125183, + "log_odds_ratio": -0.621772825717926, + "loss": 0.3917, + "rejected_geometric_mean": -1.439710259437561, + "step": 2428 + }, + { + "chosen_geometric_mean": -1.0988562107086182, + "epoch": 0.6, + "grad_norm": 22.0, + "learning_rate": 3.970565491213962e-06, + "log_odds": 2.649501085281372, + "log_odds_ratio": -0.3342650532722473, + "loss": 0.3387, + "rejected_geometric_mean": -3.5411031246185303, + "step": 2429 + }, + { + "chosen_geometric_mean": -1.1342439651489258, + "epoch": 0.6, + "grad_norm": 10.6875, + "learning_rate": 3.969778135444625e-06, + "log_odds": 4.015587329864502, + "log_odds_ratio": -0.23181158304214478, + "loss": 0.2945, + "rejected_geometric_mean": -4.889650344848633, + "step": 2430 + }, + { + "chosen_geometric_mean": -1.288103699684143, + "epoch": 0.6, + "grad_norm": 5.3125, + "learning_rate": 3.968990556821643e-06, + "log_odds": 4.358917713165283, + "log_odds_ratio": -0.1495257318019867, + "loss": 0.3469, + "rejected_geometric_mean": -5.357729911804199, + "step": 2431 + }, + { + "chosen_geometric_mean": -1.0835994482040405, + "epoch": 0.6, + "grad_norm": 23.375, + "learning_rate": 3.968202755464431e-06, + "log_odds": 1.7298884391784668, + "log_odds_ratio": -0.2822223901748657, + "loss": 0.3474, + "rejected_geometric_mean": -2.578680992126465, + "step": 2432 + }, + { + "chosen_geometric_mean": -1.0907386541366577, + "epoch": 0.6, + "grad_norm": 13.25, + "learning_rate": 3.9674147314924375e-06, + "log_odds": 1.155051827430725, + "log_odds_ratio": -0.39633023738861084, + "loss": 0.3749, + "rejected_geometric_mean": -2.029917001724243, + "step": 2433 + }, + { + "chosen_geometric_mean": -1.1636748313903809, + "epoch": 0.6, + "grad_norm": 2.90625, + "learning_rate": 3.966626485025148e-06, + "log_odds": 1.9979325532913208, + "log_odds_ratio": -0.29839158058166504, + "loss": 0.3314, + "rejected_geometric_mean": -2.929960012435913, + "step": 2434 + }, + { + "chosen_geometric_mean": -1.3231227397918701, + "epoch": 0.6, + "grad_norm": 6.125, + "learning_rate": 3.965838016182078e-06, + "log_odds": 1.9189094305038452, + "log_odds_ratio": -0.37580421566963196, + "loss": 0.2682, + "rejected_geometric_mean": -3.071960210800171, + "step": 2435 + }, + { + "chosen_geometric_mean": -1.2354001998901367, + "epoch": 0.6, + "grad_norm": 3.9375, + "learning_rate": 3.965049325082779e-06, + "log_odds": -0.06708183884620667, + "log_odds_ratio": -0.733739972114563, + "loss": 0.4041, + "rejected_geometric_mean": -1.202317476272583, + "step": 2436 + }, + { + "chosen_geometric_mean": -1.1300958395004272, + "epoch": 0.6, + "grad_norm": 98.5, + "learning_rate": 3.964260411846836e-06, + "log_odds": 5.216360092163086, + "log_odds_ratio": -0.20510168373584747, + "loss": 0.3383, + "rejected_geometric_mean": -6.037405014038086, + "step": 2437 + }, + { + "chosen_geometric_mean": -1.2171411514282227, + "epoch": 0.6, + "grad_norm": 9.5625, + "learning_rate": 3.9634712765938665e-06, + "log_odds": 3.08909010887146, + "log_odds_ratio": -0.24541053175926208, + "loss": 0.2585, + "rejected_geometric_mean": -4.034933090209961, + "step": 2438 + }, + { + "chosen_geometric_mean": -1.1366183757781982, + "epoch": 0.6, + "grad_norm": 2.3125, + "learning_rate": 3.962681919443522e-06, + "log_odds": 1.6986525058746338, + "log_odds_ratio": -0.3483197093009949, + "loss": 0.2803, + "rejected_geometric_mean": -2.5715901851654053, + "step": 2439 + }, + { + "chosen_geometric_mean": -1.0622811317443848, + "epoch": 0.6, + "grad_norm": 4.125, + "learning_rate": 3.961892340515489e-06, + "log_odds": 2.352888584136963, + "log_odds_ratio": -0.2143048346042633, + "loss": 0.3155, + "rejected_geometric_mean": -3.131312847137451, + "step": 2440 + }, + { + "chosen_geometric_mean": -1.1557971239089966, + "epoch": 0.6, + "grad_norm": 2.703125, + "learning_rate": 3.961102539929486e-06, + "log_odds": 2.1401586532592773, + "log_odds_ratio": -0.36987927556037903, + "loss": 0.3192, + "rejected_geometric_mean": -3.089290142059326, + "step": 2441 + }, + { + "chosen_geometric_mean": -1.2734862565994263, + "epoch": 0.6, + "grad_norm": 30.625, + "learning_rate": 3.9603125178052645e-06, + "log_odds": 0.910315752029419, + "log_odds_ratio": -0.5445996522903442, + "loss": 0.3695, + "rejected_geometric_mean": -2.047933578491211, + "step": 2442 + }, + { + "chosen_geometric_mean": -0.944745659828186, + "epoch": 0.6, + "grad_norm": 34.75, + "learning_rate": 3.959522274262613e-06, + "log_odds": 7.296947479248047, + "log_odds_ratio": -0.004209568258374929, + "loss": 0.2628, + "rejected_geometric_mean": -7.7465128898620605, + "step": 2443 + }, + { + "chosen_geometric_mean": -1.1202750205993652, + "epoch": 0.61, + "grad_norm": 9.4375, + "learning_rate": 3.95873180942135e-06, + "log_odds": 3.196021556854248, + "log_odds_ratio": -0.15567319095134735, + "loss": 0.3624, + "rejected_geometric_mean": -3.988018035888672, + "step": 2444 + }, + { + "chosen_geometric_mean": -1.521026372909546, + "epoch": 0.61, + "grad_norm": 15.5, + "learning_rate": 3.95794112340133e-06, + "log_odds": 1.7357165813446045, + "log_odds_ratio": -0.37709474563598633, + "loss": 0.3141, + "rejected_geometric_mean": -3.076946258544922, + "step": 2445 + }, + { + "chosen_geometric_mean": -1.3410569429397583, + "epoch": 0.61, + "grad_norm": 8.8125, + "learning_rate": 3.95715021632244e-06, + "log_odds": 1.2516361474990845, + "log_odds_ratio": -0.47843921184539795, + "loss": 0.2805, + "rejected_geometric_mean": -2.4923460483551025, + "step": 2446 + }, + { + "chosen_geometric_mean": -1.1483736038208008, + "epoch": 0.61, + "grad_norm": 2.984375, + "learning_rate": 3.956359088304598e-06, + "log_odds": 2.6666176319122314, + "log_odds_ratio": -0.3686847984790802, + "loss": 0.2888, + "rejected_geometric_mean": -3.6135709285736084, + "step": 2447 + }, + { + "chosen_geometric_mean": -1.3004480600357056, + "epoch": 0.61, + "grad_norm": 13.375, + "learning_rate": 3.955567739467762e-06, + "log_odds": 2.554609537124634, + "log_odds_ratio": -0.17384427785873413, + "loss": 0.2984, + "rejected_geometric_mean": -3.606248617172241, + "step": 2448 + }, + { + "chosen_geometric_mean": -1.2859580516815186, + "epoch": 0.61, + "grad_norm": 8.0, + "learning_rate": 3.954776169931916e-06, + "log_odds": 2.399975538253784, + "log_odds_ratio": -0.2696548104286194, + "loss": 0.3226, + "rejected_geometric_mean": -3.4692721366882324, + "step": 2449 + }, + { + "chosen_geometric_mean": -1.2011364698410034, + "epoch": 0.61, + "grad_norm": 3.515625, + "learning_rate": 3.953984379817083e-06, + "log_odds": 1.1398202180862427, + "log_odds_ratio": -0.40755677223205566, + "loss": 0.2482, + "rejected_geometric_mean": -2.229112148284912, + "step": 2450 + }, + { + "chosen_geometric_mean": -0.9727072715759277, + "epoch": 0.61, + "grad_norm": 28.5, + "learning_rate": 3.953192369243316e-06, + "log_odds": 2.9170446395874023, + "log_odds_ratio": -0.3245786726474762, + "loss": 0.2956, + "rejected_geometric_mean": -3.641998529434204, + "step": 2451 + }, + { + "chosen_geometric_mean": -1.0596519708633423, + "epoch": 0.61, + "grad_norm": 3.765625, + "learning_rate": 3.952400138330704e-06, + "log_odds": 1.6357080936431885, + "log_odds_ratio": -0.45269501209259033, + "loss": 0.3045, + "rejected_geometric_mean": -2.548161268234253, + "step": 2452 + }, + { + "chosen_geometric_mean": -1.2226895093917847, + "epoch": 0.61, + "grad_norm": 2.40625, + "learning_rate": 3.951607687199368e-06, + "log_odds": 0.737044095993042, + "log_odds_ratio": -0.5084657669067383, + "loss": 0.2993, + "rejected_geometric_mean": -1.9170498847961426, + "step": 2453 + }, + { + "chosen_geometric_mean": -1.1203714609146118, + "epoch": 0.61, + "grad_norm": 10.3125, + "learning_rate": 3.950815015969462e-06, + "log_odds": 1.7042533159255981, + "log_odds_ratio": -0.2817152738571167, + "loss": 0.3315, + "rejected_geometric_mean": -2.6354799270629883, + "step": 2454 + }, + { + "chosen_geometric_mean": -1.3370764255523682, + "epoch": 0.61, + "grad_norm": 9.5625, + "learning_rate": 3.950022124761174e-06, + "log_odds": 1.942885398864746, + "log_odds_ratio": -0.3452085852622986, + "loss": 0.3248, + "rejected_geometric_mean": -3.0485262870788574, + "step": 2455 + }, + { + "chosen_geometric_mean": -1.2337108850479126, + "epoch": 0.61, + "grad_norm": 7.0625, + "learning_rate": 3.949229013694726e-06, + "log_odds": 2.1536707878112793, + "log_odds_ratio": -0.36820971965789795, + "loss": 0.3549, + "rejected_geometric_mean": -3.2323007583618164, + "step": 2456 + }, + { + "chosen_geometric_mean": -1.294355869293213, + "epoch": 0.61, + "grad_norm": 3.4375, + "learning_rate": 3.948435682890373e-06, + "log_odds": 1.9195339679718018, + "log_odds_ratio": -0.40400049090385437, + "loss": 0.3219, + "rejected_geometric_mean": -3.030644416809082, + "step": 2457 + }, + { + "chosen_geometric_mean": -1.0788791179656982, + "epoch": 0.61, + "grad_norm": 5.15625, + "learning_rate": 3.947642132468401e-06, + "log_odds": 2.447423219680786, + "log_odds_ratio": -0.28493645787239075, + "loss": 0.301, + "rejected_geometric_mean": -3.259719133377075, + "step": 2458 + }, + { + "chosen_geometric_mean": -1.1133666038513184, + "epoch": 0.61, + "grad_norm": 5.5625, + "learning_rate": 3.946848362549133e-06, + "log_odds": 2.511334180831909, + "log_odds_ratio": -0.24004679918289185, + "loss": 0.3256, + "rejected_geometric_mean": -3.318981647491455, + "step": 2459 + }, + { + "chosen_geometric_mean": -0.995538592338562, + "epoch": 0.61, + "grad_norm": 4.4375, + "learning_rate": 3.946054373252924e-06, + "log_odds": 0.3944264054298401, + "log_odds_ratio": -0.523666262626648, + "loss": 0.3651, + "rejected_geometric_mean": -1.2702995538711548, + "step": 2460 + }, + { + "chosen_geometric_mean": -1.202271819114685, + "epoch": 0.61, + "grad_norm": 33.0, + "learning_rate": 3.94526016470016e-06, + "log_odds": 2.0609869956970215, + "log_odds_ratio": -0.3246912658214569, + "loss": 0.2958, + "rejected_geometric_mean": -3.038773536682129, + "step": 2461 + }, + { + "chosen_geometric_mean": -1.3894944190979004, + "epoch": 0.61, + "grad_norm": 10.4375, + "learning_rate": 3.944465737011264e-06, + "log_odds": 2.4762449264526367, + "log_odds_ratio": -0.2920641601085663, + "loss": 0.3562, + "rejected_geometric_mean": -3.692525863647461, + "step": 2462 + }, + { + "chosen_geometric_mean": -1.0956867933273315, + "epoch": 0.61, + "grad_norm": 15.25, + "learning_rate": 3.943671090306689e-06, + "log_odds": 3.7726423740386963, + "log_odds_ratio": -0.46257299184799194, + "loss": 0.2976, + "rejected_geometric_mean": -4.640456676483154, + "step": 2463 + }, + { + "chosen_geometric_mean": -1.1845502853393555, + "epoch": 0.61, + "grad_norm": 4.625, + "learning_rate": 3.942876224706923e-06, + "log_odds": 1.9277127981185913, + "log_odds_ratio": -0.24140863120555878, + "loss": 0.2553, + "rejected_geometric_mean": -2.8670849800109863, + "step": 2464 + }, + { + "chosen_geometric_mean": -0.9508744478225708, + "epoch": 0.61, + "grad_norm": 2.59375, + "learning_rate": 3.942081140332486e-06, + "log_odds": 3.406062602996826, + "log_odds_ratio": -0.15513946115970612, + "loss": 0.2613, + "rejected_geometric_mean": -3.947542667388916, + "step": 2465 + }, + { + "chosen_geometric_mean": -0.9667690992355347, + "epoch": 0.61, + "grad_norm": 6.6875, + "learning_rate": 3.941285837303934e-06, + "log_odds": 1.0370091199874878, + "log_odds_ratio": -0.3726140856742859, + "loss": 0.3855, + "rejected_geometric_mean": -1.7420883178710938, + "step": 2466 + }, + { + "chosen_geometric_mean": -1.0867217779159546, + "epoch": 0.61, + "grad_norm": 6.125, + "learning_rate": 3.940490315741851e-06, + "log_odds": 2.511690616607666, + "log_odds_ratio": -0.1965385377407074, + "loss": 0.3221, + "rejected_geometric_mean": -3.276155471801758, + "step": 2467 + }, + { + "chosen_geometric_mean": -1.0317952632904053, + "epoch": 0.61, + "grad_norm": 6.1875, + "learning_rate": 3.9396945757668605e-06, + "log_odds": 2.242682456970215, + "log_odds_ratio": -0.2224368155002594, + "loss": 0.2997, + "rejected_geometric_mean": -2.9491143226623535, + "step": 2468 + }, + { + "chosen_geometric_mean": -1.1685757637023926, + "epoch": 0.61, + "grad_norm": 10.5, + "learning_rate": 3.938898617499612e-06, + "log_odds": 1.3942644596099854, + "log_odds_ratio": -0.2878436744213104, + "loss": 0.3327, + "rejected_geometric_mean": -2.3268580436706543, + "step": 2469 + }, + { + "chosen_geometric_mean": -1.240765929222107, + "epoch": 0.61, + "grad_norm": 5.4375, + "learning_rate": 3.938102441060796e-06, + "log_odds": 1.9290944337844849, + "log_odds_ratio": -0.3164646029472351, + "loss": 0.3091, + "rejected_geometric_mean": -2.9138855934143066, + "step": 2470 + }, + { + "chosen_geometric_mean": -1.1368707418441772, + "epoch": 0.61, + "grad_norm": 3.828125, + "learning_rate": 3.937306046571129e-06, + "log_odds": 0.9595365524291992, + "log_odds_ratio": -0.4162098467350006, + "loss": 0.3111, + "rejected_geometric_mean": -1.869189977645874, + "step": 2471 + }, + { + "chosen_geometric_mean": -1.4740729331970215, + "epoch": 0.61, + "grad_norm": 7.09375, + "learning_rate": 3.9365094341513636e-06, + "log_odds": 2.0206332206726074, + "log_odds_ratio": -0.24782197177410126, + "loss": 0.3166, + "rejected_geometric_mean": -3.3265788555145264, + "step": 2472 + }, + { + "chosen_geometric_mean": -1.1676701307296753, + "epoch": 0.61, + "grad_norm": 10.5, + "learning_rate": 3.935712603922287e-06, + "log_odds": 1.725171685218811, + "log_odds_ratio": -0.44720742106437683, + "loss": 0.2664, + "rejected_geometric_mean": -2.7367608547210693, + "step": 2473 + }, + { + "chosen_geometric_mean": -1.0185413360595703, + "epoch": 0.61, + "grad_norm": 5.0625, + "learning_rate": 3.934915556004716e-06, + "log_odds": 1.4659302234649658, + "log_odds_ratio": -0.31580638885498047, + "loss": 0.3076, + "rejected_geometric_mean": -2.1931443214416504, + "step": 2474 + }, + { + "chosen_geometric_mean": -1.1828333139419556, + "epoch": 0.61, + "grad_norm": 14.75, + "learning_rate": 3.934118290519504e-06, + "log_odds": 3.195004940032959, + "log_odds_ratio": -0.24219658970832825, + "loss": 0.2822, + "rejected_geometric_mean": -4.098650932312012, + "step": 2475 + }, + { + "chosen_geometric_mean": -1.0957852602005005, + "epoch": 0.61, + "grad_norm": 5.875, + "learning_rate": 3.933320807587535e-06, + "log_odds": 2.1119658946990967, + "log_odds_ratio": -0.31722989678382874, + "loss": 0.3201, + "rejected_geometric_mean": -2.9934635162353516, + "step": 2476 + }, + { + "chosen_geometric_mean": -0.9816124439239502, + "epoch": 0.61, + "grad_norm": 14.25, + "learning_rate": 3.932523107329725e-06, + "log_odds": 1.7104108333587646, + "log_odds_ratio": -0.3981392979621887, + "loss": 0.3449, + "rejected_geometric_mean": -2.49534010887146, + "step": 2477 + }, + { + "chosen_geometric_mean": -1.0780080556869507, + "epoch": 0.61, + "grad_norm": 4.375, + "learning_rate": 3.931725189867026e-06, + "log_odds": 0.7267104387283325, + "log_odds_ratio": -0.44721007347106934, + "loss": 0.2893, + "rejected_geometric_mean": -1.6237908601760864, + "step": 2478 + }, + { + "chosen_geometric_mean": -1.0619516372680664, + "epoch": 0.61, + "grad_norm": 15.3125, + "learning_rate": 3.930927055320422e-06, + "log_odds": 1.0743454694747925, + "log_odds_ratio": -0.3536168336868286, + "loss": 0.3881, + "rejected_geometric_mean": -1.9222368001937866, + "step": 2479 + }, + { + "chosen_geometric_mean": -1.022769570350647, + "epoch": 0.61, + "grad_norm": 14.9375, + "learning_rate": 3.9301287038109285e-06, + "log_odds": 0.3329786956310272, + "log_odds_ratio": -0.5475665926933289, + "loss": 0.2755, + "rejected_geometric_mean": -1.2622052431106567, + "step": 2480 + }, + { + "chosen_geometric_mean": -1.1368827819824219, + "epoch": 0.61, + "grad_norm": 5.34375, + "learning_rate": 3.9293301354595945e-06, + "log_odds": 4.32112979888916, + "log_odds_ratio": -0.2954457700252533, + "loss": 0.2732, + "rejected_geometric_mean": -5.218775272369385, + "step": 2481 + }, + { + "chosen_geometric_mean": -1.259876012802124, + "epoch": 0.61, + "grad_norm": 9.4375, + "learning_rate": 3.9285313503875025e-06, + "log_odds": 4.885335445404053, + "log_odds_ratio": -0.1627987176179886, + "loss": 0.3447, + "rejected_geometric_mean": -5.897919654846191, + "step": 2482 + }, + { + "chosen_geometric_mean": -1.0037600994110107, + "epoch": 0.61, + "grad_norm": 19.625, + "learning_rate": 3.927732348715767e-06, + "log_odds": 0.1031855121254921, + "log_odds_ratio": -0.6608941555023193, + "loss": 0.3642, + "rejected_geometric_mean": -1.094529151916504, + "step": 2483 + }, + { + "chosen_geometric_mean": -1.4413409233093262, + "epoch": 0.62, + "grad_norm": 8.8125, + "learning_rate": 3.926933130565537e-06, + "log_odds": 2.416564702987671, + "log_odds_ratio": -0.2550012767314911, + "loss": 0.3849, + "rejected_geometric_mean": -3.683461904525757, + "step": 2484 + }, + { + "chosen_geometric_mean": -1.0147705078125, + "epoch": 0.62, + "grad_norm": 3.265625, + "learning_rate": 3.926133696057991e-06, + "log_odds": 2.730222702026367, + "log_odds_ratio": -0.17505383491516113, + "loss": 0.3026, + "rejected_geometric_mean": -3.3410656452178955, + "step": 2485 + }, + { + "chosen_geometric_mean": -1.019970417022705, + "epoch": 0.62, + "grad_norm": 3.25, + "learning_rate": 3.925334045314345e-06, + "log_odds": 2.5073728561401367, + "log_odds_ratio": -0.19685831665992737, + "loss": 0.2451, + "rejected_geometric_mean": -3.1423380374908447, + "step": 2486 + }, + { + "chosen_geometric_mean": -1.2660127878189087, + "epoch": 0.62, + "grad_norm": 6.84375, + "learning_rate": 3.924534178455845e-06, + "log_odds": 2.1908459663391113, + "log_odds_ratio": -0.25467532873153687, + "loss": 0.2795, + "rejected_geometric_mean": -3.2289938926696777, + "step": 2487 + }, + { + "chosen_geometric_mean": -1.046213984489441, + "epoch": 0.62, + "grad_norm": 7.0, + "learning_rate": 3.9237340956037675e-06, + "log_odds": 2.009148120880127, + "log_odds_ratio": -0.30018821358680725, + "loss": 0.2988, + "rejected_geometric_mean": -2.7632718086242676, + "step": 2488 + }, + { + "chosen_geometric_mean": -1.1777468919754028, + "epoch": 0.62, + "grad_norm": 5.84375, + "learning_rate": 3.922933796879426e-06, + "log_odds": 3.356929302215576, + "log_odds_ratio": -0.2938067317008972, + "loss": 0.3301, + "rejected_geometric_mean": -4.320181369781494, + "step": 2489 + }, + { + "chosen_geometric_mean": -1.2424927949905396, + "epoch": 0.62, + "grad_norm": 3.625, + "learning_rate": 3.922133282404166e-06, + "log_odds": 0.6428491473197937, + "log_odds_ratio": -0.5103597640991211, + "loss": 0.2818, + "rejected_geometric_mean": -1.8015743494033813, + "step": 2490 + }, + { + "chosen_geometric_mean": -1.2858808040618896, + "epoch": 0.62, + "grad_norm": 2.78125, + "learning_rate": 3.921332552299363e-06, + "log_odds": 0.9519981741905212, + "log_odds_ratio": -0.3801994323730469, + "loss": 0.3322, + "rejected_geometric_mean": -2.089858055114746, + "step": 2491 + }, + { + "chosen_geometric_mean": -0.922544002532959, + "epoch": 0.62, + "grad_norm": 2.484375, + "learning_rate": 3.920531606686428e-06, + "log_odds": 3.2922234535217285, + "log_odds_ratio": -0.1893511414527893, + "loss": 0.242, + "rejected_geometric_mean": -3.813279628753662, + "step": 2492 + }, + { + "chosen_geometric_mean": -1.0496269464492798, + "epoch": 0.62, + "grad_norm": 6.625, + "learning_rate": 3.919730445686802e-06, + "log_odds": 0.9149993658065796, + "log_odds_ratio": -0.36188942193984985, + "loss": 0.3058, + "rejected_geometric_mean": -1.7554740905761719, + "step": 2493 + }, + { + "chosen_geometric_mean": -0.9752038717269897, + "epoch": 0.62, + "grad_norm": 15.375, + "learning_rate": 3.918929069421963e-06, + "log_odds": 6.574007034301758, + "log_odds_ratio": -0.005328839644789696, + "loss": 0.3257, + "rejected_geometric_mean": -7.068141937255859, + "step": 2494 + }, + { + "chosen_geometric_mean": -1.1435736417770386, + "epoch": 0.62, + "grad_norm": 10.5, + "learning_rate": 3.918127478013416e-06, + "log_odds": 3.6326937675476074, + "log_odds_ratio": -0.2531125843524933, + "loss": 0.3237, + "rejected_geometric_mean": -4.527522087097168, + "step": 2495 + }, + { + "chosen_geometric_mean": -1.2328643798828125, + "epoch": 0.62, + "grad_norm": 12.0625, + "learning_rate": 3.917325671582703e-06, + "log_odds": 2.9398624897003174, + "log_odds_ratio": -0.0520809181034565, + "loss": 0.2852, + "rejected_geometric_mean": -3.79532527923584, + "step": 2496 + }, + { + "chosen_geometric_mean": -0.8390305042266846, + "epoch": 0.62, + "grad_norm": 8.8125, + "learning_rate": 3.916523650251397e-06, + "log_odds": 2.5603532791137695, + "log_odds_ratio": -0.14988672733306885, + "loss": 0.2612, + "rejected_geometric_mean": -2.931875467300415, + "step": 2497 + }, + { + "chosen_geometric_mean": -1.036651372909546, + "epoch": 0.62, + "grad_norm": 2.546875, + "learning_rate": 3.915721414141103e-06, + "log_odds": 1.98322331905365, + "log_odds_ratio": -0.27414000034332275, + "loss": 0.2787, + "rejected_geometric_mean": -2.7816052436828613, + "step": 2498 + }, + { + "chosen_geometric_mean": -0.8778311014175415, + "epoch": 0.62, + "grad_norm": 2.625, + "learning_rate": 3.914918963373461e-06, + "log_odds": 5.089408874511719, + "log_odds_ratio": -0.1343366503715515, + "loss": 0.3179, + "rejected_geometric_mean": -5.524194717407227, + "step": 2499 + }, + { + "chosen_geometric_mean": -1.2040082216262817, + "epoch": 0.62, + "grad_norm": 16.875, + "learning_rate": 3.914116298070139e-06, + "log_odds": 5.282711982727051, + "log_odds_ratio": -0.27958911657333374, + "loss": 0.319, + "rejected_geometric_mean": -6.265974521636963, + "step": 2500 + }, + { + "chosen_geometric_mean": -1.0068278312683105, + "epoch": 0.62, + "grad_norm": 6.71875, + "learning_rate": 3.913313418352843e-06, + "log_odds": 1.8556782007217407, + "log_odds_ratio": -0.2567354440689087, + "loss": 0.2741, + "rejected_geometric_mean": -2.5587222576141357, + "step": 2501 + }, + { + "chosen_geometric_mean": -1.1535518169403076, + "epoch": 0.62, + "grad_norm": 2.140625, + "learning_rate": 3.9125103243433075e-06, + "log_odds": 5.583212852478027, + "log_odds_ratio": -0.11121822893619537, + "loss": 0.2561, + "rejected_geometric_mean": -6.428123950958252, + "step": 2502 + }, + { + "chosen_geometric_mean": -1.1933059692382812, + "epoch": 0.62, + "grad_norm": 3.53125, + "learning_rate": 3.9117070161633005e-06, + "log_odds": 3.2407329082489014, + "log_odds_ratio": -0.45215344429016113, + "loss": 0.3381, + "rejected_geometric_mean": -4.310011386871338, + "step": 2503 + }, + { + "chosen_geometric_mean": -1.197760820388794, + "epoch": 0.62, + "grad_norm": 19.625, + "learning_rate": 3.910903493934624e-06, + "log_odds": 3.3660390377044678, + "log_odds_ratio": -0.5235796570777893, + "loss": 0.3783, + "rejected_geometric_mean": -4.4903106689453125, + "step": 2504 + }, + { + "chosen_geometric_mean": -1.168345332145691, + "epoch": 0.62, + "grad_norm": 13.3125, + "learning_rate": 3.91009975777911e-06, + "log_odds": 2.62361216545105, + "log_odds_ratio": -0.19144386053085327, + "loss": 0.3187, + "rejected_geometric_mean": -3.513105869293213, + "step": 2505 + }, + { + "chosen_geometric_mean": -1.1407040357589722, + "epoch": 0.62, + "grad_norm": 6.71875, + "learning_rate": 3.909295807818625e-06, + "log_odds": 7.36064338684082, + "log_odds_ratio": -0.15820185840129852, + "loss": 0.3288, + "rejected_geometric_mean": -8.175421714782715, + "step": 2506 + }, + { + "chosen_geometric_mean": -1.0945310592651367, + "epoch": 0.62, + "grad_norm": 8.5, + "learning_rate": 3.908491644175066e-06, + "log_odds": 8.13852310180664, + "log_odds_ratio": -0.022643033415079117, + "loss": 0.2886, + "rejected_geometric_mean": -8.83144760131836, + "step": 2507 + }, + { + "chosen_geometric_mean": -1.0509111881256104, + "epoch": 0.62, + "grad_norm": 5.0, + "learning_rate": 3.907687266970366e-06, + "log_odds": 2.1212785243988037, + "log_odds_ratio": -0.2659689486026764, + "loss": 0.285, + "rejected_geometric_mean": -2.8881618976593018, + "step": 2508 + }, + { + "chosen_geometric_mean": -1.1014509201049805, + "epoch": 0.62, + "grad_norm": 7.25, + "learning_rate": 3.906882676326486e-06, + "log_odds": 0.8806178569793701, + "log_odds_ratio": -0.4556027352809906, + "loss": 0.3066, + "rejected_geometric_mean": -1.7992181777954102, + "step": 2509 + }, + { + "chosen_geometric_mean": -1.141381859779358, + "epoch": 0.62, + "grad_norm": 26.625, + "learning_rate": 3.906077872365421e-06, + "log_odds": 1.3387696743011475, + "log_odds_ratio": -0.48766884207725525, + "loss": 0.3606, + "rejected_geometric_mean": -2.3595714569091797, + "step": 2510 + }, + { + "chosen_geometric_mean": -0.9246271848678589, + "epoch": 0.62, + "grad_norm": 5.3125, + "learning_rate": 3.9052728552092e-06, + "log_odds": 3.9073681831359863, + "log_odds_ratio": -0.10017676651477814, + "loss": 0.3128, + "rejected_geometric_mean": -4.392409324645996, + "step": 2511 + }, + { + "chosen_geometric_mean": -1.197098970413208, + "epoch": 0.62, + "grad_norm": 3.578125, + "learning_rate": 3.9044676249798815e-06, + "log_odds": 2.223011016845703, + "log_odds_ratio": -0.24031983315944672, + "loss": 0.3661, + "rejected_geometric_mean": -3.149731159210205, + "step": 2512 + }, + { + "chosen_geometric_mean": -1.2133654356002808, + "epoch": 0.62, + "grad_norm": 5.3125, + "learning_rate": 3.903662181799558e-06, + "log_odds": 5.040244102478027, + "log_odds_ratio": -0.06286086142063141, + "loss": 0.2998, + "rejected_geometric_mean": -5.9282379150390625, + "step": 2513 + }, + { + "chosen_geometric_mean": -1.1130225658416748, + "epoch": 0.62, + "grad_norm": 4.71875, + "learning_rate": 3.902856525790355e-06, + "log_odds": 6.469027996063232, + "log_odds_ratio": -0.17244946956634521, + "loss": 0.2793, + "rejected_geometric_mean": -7.272150039672852, + "step": 2514 + }, + { + "chosen_geometric_mean": -0.9211657047271729, + "epoch": 0.62, + "grad_norm": 4.53125, + "learning_rate": 3.902050657074428e-06, + "log_odds": 3.313086748123169, + "log_odds_ratio": -0.3441976308822632, + "loss": 0.3611, + "rejected_geometric_mean": -3.983290433883667, + "step": 2515 + }, + { + "chosen_geometric_mean": -1.1302545070648193, + "epoch": 0.62, + "grad_norm": 8.9375, + "learning_rate": 3.901244575773967e-06, + "log_odds": 2.00435471534729, + "log_odds_ratio": -0.2712538242340088, + "loss": 0.3478, + "rejected_geometric_mean": -2.852248191833496, + "step": 2516 + }, + { + "chosen_geometric_mean": -1.6395761966705322, + "epoch": 0.62, + "grad_norm": 27.625, + "learning_rate": 3.900438282011193e-06, + "log_odds": 3.3057451248168945, + "log_odds_ratio": -0.0739111602306366, + "loss": 0.3681, + "rejected_geometric_mean": -4.6806559562683105, + "step": 2517 + }, + { + "chosen_geometric_mean": -1.741694688796997, + "epoch": 0.62, + "grad_norm": 21.125, + "learning_rate": 3.89963177590836e-06, + "log_odds": 1.6560686826705933, + "log_odds_ratio": -0.23626743257045746, + "loss": 0.3057, + "rejected_geometric_mean": -3.2040610313415527, + "step": 2518 + }, + { + "chosen_geometric_mean": -1.1348538398742676, + "epoch": 0.62, + "grad_norm": 10.625, + "learning_rate": 3.898825057587752e-06, + "log_odds": 4.1278462409973145, + "log_odds_ratio": -0.3125090003013611, + "loss": 0.3273, + "rejected_geometric_mean": -5.059034824371338, + "step": 2519 + }, + { + "chosen_geometric_mean": -1.0491746664047241, + "epoch": 0.62, + "grad_norm": 19.5, + "learning_rate": 3.8980181271716885e-06, + "log_odds": 0.7738774418830872, + "log_odds_ratio": -0.4211951494216919, + "loss": 0.3097, + "rejected_geometric_mean": -1.6356028318405151, + "step": 2520 + }, + { + "chosen_geometric_mean": -1.2236112356185913, + "epoch": 0.62, + "grad_norm": 16.75, + "learning_rate": 3.897210984782519e-06, + "log_odds": 1.1270183324813843, + "log_odds_ratio": -0.43126052618026733, + "loss": 0.32, + "rejected_geometric_mean": -2.1936309337615967, + "step": 2521 + }, + { + "chosen_geometric_mean": -1.0892517566680908, + "epoch": 0.62, + "grad_norm": 14.8125, + "learning_rate": 3.896403630542627e-06, + "log_odds": 4.096281051635742, + "log_odds_ratio": -0.19155752658843994, + "loss": 0.2409, + "rejected_geometric_mean": -4.871438980102539, + "step": 2522 + }, + { + "chosen_geometric_mean": -1.0071814060211182, + "epoch": 0.62, + "grad_norm": 4.0625, + "learning_rate": 3.895596064574424e-06, + "log_odds": 3.7937402725219727, + "log_odds_ratio": -0.151100754737854, + "loss": 0.33, + "rejected_geometric_mean": -4.411472797393799, + "step": 2523 + }, + { + "chosen_geometric_mean": -1.0548384189605713, + "epoch": 0.62, + "grad_norm": 17.875, + "learning_rate": 3.894788287000357e-06, + "log_odds": 2.601318597793579, + "log_odds_ratio": -0.15880197286605835, + "loss": 0.2914, + "rejected_geometric_mean": -3.3010783195495605, + "step": 2524 + }, + { + "chosen_geometric_mean": -0.9861387014389038, + "epoch": 0.63, + "grad_norm": 16.125, + "learning_rate": 3.893980297942907e-06, + "log_odds": 3.9204442501068115, + "log_odds_ratio": -0.20896416902542114, + "loss": 0.2524, + "rejected_geometric_mean": -4.555182456970215, + "step": 2525 + }, + { + "chosen_geometric_mean": -0.9204127788543701, + "epoch": 0.63, + "grad_norm": 39.5, + "learning_rate": 3.893172097524582e-06, + "log_odds": 2.9743313789367676, + "log_odds_ratio": -0.2595927119255066, + "loss": 0.3406, + "rejected_geometric_mean": -3.545602798461914, + "step": 2526 + }, + { + "chosen_geometric_mean": -0.9722816348075867, + "epoch": 0.63, + "grad_norm": 2.78125, + "learning_rate": 3.8923636858679255e-06, + "log_odds": 4.215060710906982, + "log_odds_ratio": -0.183705136179924, + "loss": 0.28, + "rejected_geometric_mean": -4.785810947418213, + "step": 2527 + }, + { + "chosen_geometric_mean": -0.8911623358726501, + "epoch": 0.63, + "grad_norm": 2.34375, + "learning_rate": 3.891555063095513e-06, + "log_odds": 4.383275508880615, + "log_odds_ratio": -0.11670145392417908, + "loss": 0.3201, + "rejected_geometric_mean": -4.807540416717529, + "step": 2528 + }, + { + "chosen_geometric_mean": -1.1010913848876953, + "epoch": 0.63, + "grad_norm": 3.328125, + "learning_rate": 3.890746229329949e-06, + "log_odds": 1.7812665700912476, + "log_odds_ratio": -0.2992621958255768, + "loss": 0.3118, + "rejected_geometric_mean": -2.6407902240753174, + "step": 2529 + }, + { + "chosen_geometric_mean": -1.0173828601837158, + "epoch": 0.63, + "grad_norm": 3.109375, + "learning_rate": 3.889937184693873e-06, + "log_odds": 1.775618553161621, + "log_odds_ratio": -0.1980283111333847, + "loss": 0.2847, + "rejected_geometric_mean": -2.417419910430908, + "step": 2530 + }, + { + "chosen_geometric_mean": -1.240249514579773, + "epoch": 0.63, + "grad_norm": 12.25, + "learning_rate": 3.889127929309957e-06, + "log_odds": 0.928151547908783, + "log_odds_ratio": -0.3885290026664734, + "loss": 0.3086, + "rejected_geometric_mean": -1.9746781587600708, + "step": 2531 + }, + { + "chosen_geometric_mean": -1.0056226253509521, + "epoch": 0.63, + "grad_norm": 2.875, + "learning_rate": 3.888318463300902e-06, + "log_odds": 3.881371259689331, + "log_odds_ratio": -0.23343825340270996, + "loss": 0.2808, + "rejected_geometric_mean": -4.522387981414795, + "step": 2532 + }, + { + "chosen_geometric_mean": -1.1046621799468994, + "epoch": 0.63, + "grad_norm": 2.71875, + "learning_rate": 3.887508786789442e-06, + "log_odds": 1.4809811115264893, + "log_odds_ratio": -0.22003468871116638, + "loss": 0.2951, + "rejected_geometric_mean": -2.2874181270599365, + "step": 2533 + }, + { + "chosen_geometric_mean": -1.1321251392364502, + "epoch": 0.63, + "grad_norm": 2.234375, + "learning_rate": 3.886698899898345e-06, + "log_odds": 0.6826953887939453, + "log_odds_ratio": -0.5745435953140259, + "loss": 0.3885, + "rejected_geometric_mean": -1.7469613552093506, + "step": 2534 + }, + { + "chosen_geometric_mean": -1.054441213607788, + "epoch": 0.63, + "grad_norm": 11.75, + "learning_rate": 3.8858888027504095e-06, + "log_odds": 0.821379542350769, + "log_odds_ratio": -0.40444499254226685, + "loss": 0.3123, + "rejected_geometric_mean": -1.6944185495376587, + "step": 2535 + }, + { + "chosen_geometric_mean": -1.1085551977157593, + "epoch": 0.63, + "grad_norm": 13.1875, + "learning_rate": 3.885078495468463e-06, + "log_odds": 0.4565632939338684, + "log_odds_ratio": -0.4915453791618347, + "loss": 0.3584, + "rejected_geometric_mean": -1.43505859375, + "step": 2536 + }, + { + "chosen_geometric_mean": -1.2009204626083374, + "epoch": 0.63, + "grad_norm": 2.484375, + "learning_rate": 3.8842679781753705e-06, + "log_odds": 4.6568522453308105, + "log_odds_ratio": -0.12096015363931656, + "loss": 0.3216, + "rejected_geometric_mean": -5.531399726867676, + "step": 2537 + }, + { + "chosen_geometric_mean": -1.214493989944458, + "epoch": 0.63, + "grad_norm": 76.5, + "learning_rate": 3.883457250994024e-06, + "log_odds": 2.5422191619873047, + "log_odds_ratio": -0.4215032458305359, + "loss": 0.3122, + "rejected_geometric_mean": -3.6357343196868896, + "step": 2538 + }, + { + "chosen_geometric_mean": -0.9596608877182007, + "epoch": 0.63, + "grad_norm": 1.8828125, + "learning_rate": 3.88264631404735e-06, + "log_odds": 4.287538528442383, + "log_odds_ratio": -0.022482365369796753, + "loss": 0.2553, + "rejected_geometric_mean": -4.767940998077393, + "step": 2539 + }, + { + "chosen_geometric_mean": -1.693320631980896, + "epoch": 0.63, + "grad_norm": 39.25, + "learning_rate": 3.881835167458307e-06, + "log_odds": -0.31017836928367615, + "log_odds_ratio": -0.9647769331932068, + "loss": 0.4019, + "rejected_geometric_mean": -1.3993301391601562, + "step": 2540 + }, + { + "chosen_geometric_mean": -1.0125150680541992, + "epoch": 0.63, + "grad_norm": 2.203125, + "learning_rate": 3.881023811349881e-06, + "log_odds": 1.6913907527923584, + "log_odds_ratio": -0.28731846809387207, + "loss": 0.3304, + "rejected_geometric_mean": -2.439350128173828, + "step": 2541 + }, + { + "chosen_geometric_mean": -1.127409815788269, + "epoch": 0.63, + "grad_norm": 28.5, + "learning_rate": 3.880212245845097e-06, + "log_odds": 3.824700355529785, + "log_odds_ratio": -0.03655219450592995, + "loss": 0.3051, + "rejected_geometric_mean": -4.573334693908691, + "step": 2542 + }, + { + "chosen_geometric_mean": -1.22310209274292, + "epoch": 0.63, + "grad_norm": 5.75, + "learning_rate": 3.879400471067006e-06, + "log_odds": 2.014432668685913, + "log_odds_ratio": -0.31672602891921997, + "loss": 0.3611, + "rejected_geometric_mean": -3.035827159881592, + "step": 2543 + }, + { + "chosen_geometric_mean": -1.4155579805374146, + "epoch": 0.63, + "grad_norm": 36.5, + "learning_rate": 3.878588487138691e-06, + "log_odds": 1.1383388042449951, + "log_odds_ratio": -0.3587987422943115, + "loss": 0.3377, + "rejected_geometric_mean": -2.4398136138916016, + "step": 2544 + }, + { + "chosen_geometric_mean": -1.1986706256866455, + "epoch": 0.63, + "grad_norm": 2.734375, + "learning_rate": 3.877776294183271e-06, + "log_odds": 0.5288548469543457, + "log_odds_ratio": -0.4741157591342926, + "loss": 0.3359, + "rejected_geometric_mean": -1.5935940742492676, + "step": 2545 + }, + { + "chosen_geometric_mean": -1.1873695850372314, + "epoch": 0.63, + "grad_norm": 26.625, + "learning_rate": 3.876963892323893e-06, + "log_odds": 1.624666452407837, + "log_odds_ratio": -0.2807137370109558, + "loss": 0.3332, + "rejected_geometric_mean": -2.591766357421875, + "step": 2546 + }, + { + "chosen_geometric_mean": -1.0146952867507935, + "epoch": 0.63, + "grad_norm": 2.125, + "learning_rate": 3.876151281683735e-06, + "log_odds": 5.211935997009277, + "log_odds_ratio": -0.05015856400132179, + "loss": 0.2828, + "rejected_geometric_mean": -5.778871536254883, + "step": 2547 + }, + { + "chosen_geometric_mean": -1.0903565883636475, + "epoch": 0.63, + "grad_norm": 2.390625, + "learning_rate": 3.8753384623860115e-06, + "log_odds": 0.2376316487789154, + "log_odds_ratio": -0.6209080219268799, + "loss": 0.3036, + "rejected_geometric_mean": -1.2336819171905518, + "step": 2548 + }, + { + "chosen_geometric_mean": -1.150256872177124, + "epoch": 0.63, + "grad_norm": 15.875, + "learning_rate": 3.874525434553962e-06, + "log_odds": 4.179304599761963, + "log_odds_ratio": -0.11703316867351532, + "loss": 0.3634, + "rejected_geometric_mean": -4.993052959442139, + "step": 2549 + }, + { + "chosen_geometric_mean": -1.2539622783660889, + "epoch": 0.63, + "grad_norm": 8.875, + "learning_rate": 3.873712198310864e-06, + "log_odds": 5.570558547973633, + "log_odds_ratio": -0.2550351321697235, + "loss": 0.2932, + "rejected_geometric_mean": -6.596253395080566, + "step": 2550 + }, + { + "chosen_geometric_mean": -1.0413434505462646, + "epoch": 0.63, + "grad_norm": 4.34375, + "learning_rate": 3.8728987537800215e-06, + "log_odds": 6.914876461029053, + "log_odds_ratio": -0.24126672744750977, + "loss": 0.3064, + "rejected_geometric_mean": -7.694401741027832, + "step": 2551 + }, + { + "chosen_geometric_mean": -1.4880281686782837, + "epoch": 0.63, + "grad_norm": 70.0, + "learning_rate": 3.8720851010847725e-06, + "log_odds": 3.4220921993255615, + "log_odds_ratio": -0.24506181478500366, + "loss": 0.359, + "rejected_geometric_mean": -4.683887481689453, + "step": 2552 + }, + { + "chosen_geometric_mean": -1.3368265628814697, + "epoch": 0.63, + "grad_norm": 8.9375, + "learning_rate": 3.8712712403484875e-06, + "log_odds": 1.0669275522232056, + "log_odds_ratio": -0.3452739417552948, + "loss": 0.3586, + "rejected_geometric_mean": -2.2704691886901855, + "step": 2553 + }, + { + "chosen_geometric_mean": -0.9537687301635742, + "epoch": 0.63, + "grad_norm": 3.109375, + "learning_rate": 3.870457171694566e-06, + "log_odds": 3.631669044494629, + "log_odds_ratio": -0.07486313581466675, + "loss": 0.2818, + "rejected_geometric_mean": -4.124018669128418, + "step": 2554 + }, + { + "chosen_geometric_mean": -0.957595944404602, + "epoch": 0.63, + "grad_norm": 13.9375, + "learning_rate": 3.869642895246442e-06, + "log_odds": 1.8844964504241943, + "log_odds_ratio": -0.2534392178058624, + "loss": 0.3087, + "rejected_geometric_mean": -2.537801742553711, + "step": 2555 + }, + { + "chosen_geometric_mean": -1.3085145950317383, + "epoch": 0.63, + "grad_norm": 5.0, + "learning_rate": 3.8688284111275775e-06, + "log_odds": 3.3529953956604004, + "log_odds_ratio": -0.35215529799461365, + "loss": 0.314, + "rejected_geometric_mean": -4.44827938079834, + "step": 2556 + }, + { + "chosen_geometric_mean": -1.1146832704544067, + "epoch": 0.63, + "grad_norm": 3.015625, + "learning_rate": 3.868013719461469e-06, + "log_odds": 5.919373512268066, + "log_odds_ratio": -0.1712414026260376, + "loss": 0.2601, + "rejected_geometric_mean": -6.699482440948486, + "step": 2557 + }, + { + "chosen_geometric_mean": -0.9977065324783325, + "epoch": 0.63, + "grad_norm": 6.9375, + "learning_rate": 3.867198820371643e-06, + "log_odds": 1.6811048984527588, + "log_odds_ratio": -0.24414333701133728, + "loss": 0.3112, + "rejected_geometric_mean": -2.312260389328003, + "step": 2558 + }, + { + "chosen_geometric_mean": -1.3773982524871826, + "epoch": 0.63, + "grad_norm": 8.375, + "learning_rate": 3.866383713981657e-06, + "log_odds": 2.0895278453826904, + "log_odds_ratio": -0.1523257941007614, + "loss": 0.2984, + "rejected_geometric_mean": -3.2143054008483887, + "step": 2559 + }, + { + "chosen_geometric_mean": -0.9355998039245605, + "epoch": 0.63, + "grad_norm": 12.9375, + "learning_rate": 3.865568400415102e-06, + "log_odds": 5.360668182373047, + "log_odds_ratio": -0.17630968987941742, + "loss": 0.28, + "rejected_geometric_mean": -5.876862525939941, + "step": 2560 + }, + { + "chosen_geometric_mean": -1.171781063079834, + "epoch": 0.63, + "grad_norm": 3.0, + "learning_rate": 3.864752879795598e-06, + "log_odds": 0.34088489413261414, + "log_odds_ratio": -0.5610939264297485, + "loss": 0.2678, + "rejected_geometric_mean": -1.4425134658813477, + "step": 2561 + }, + { + "chosen_geometric_mean": -0.992392897605896, + "epoch": 0.63, + "grad_norm": 4.5, + "learning_rate": 3.863937152246798e-06, + "log_odds": 2.5124521255493164, + "log_odds_ratio": -0.2162715345621109, + "loss": 0.3135, + "rejected_geometric_mean": -3.1599044799804688, + "step": 2562 + }, + { + "chosen_geometric_mean": -1.3749653100967407, + "epoch": 0.63, + "grad_norm": 3.25, + "learning_rate": 3.863121217892386e-06, + "log_odds": 2.0186967849731445, + "log_odds_ratio": -0.37406814098358154, + "loss": 0.3436, + "rejected_geometric_mean": -3.1900854110717773, + "step": 2563 + }, + { + "chosen_geometric_mean": -1.0438615083694458, + "epoch": 0.63, + "grad_norm": 6.4375, + "learning_rate": 3.862305076856078e-06, + "log_odds": 3.384308338165283, + "log_odds_ratio": -0.15039321780204773, + "loss": 0.2802, + "rejected_geometric_mean": -4.096197128295898, + "step": 2564 + }, + { + "chosen_geometric_mean": -1.0870442390441895, + "epoch": 0.64, + "grad_norm": 11.4375, + "learning_rate": 3.861488729261619e-06, + "log_odds": 3.296928882598877, + "log_odds_ratio": -0.27759355306625366, + "loss": 0.3345, + "rejected_geometric_mean": -4.084593296051025, + "step": 2565 + }, + { + "chosen_geometric_mean": -0.9500424861907959, + "epoch": 0.64, + "grad_norm": 6.03125, + "learning_rate": 3.860672175232788e-06, + "log_odds": 3.3676819801330566, + "log_odds_ratio": -0.2917408347129822, + "loss": 0.3467, + "rejected_geometric_mean": -4.001568794250488, + "step": 2566 + }, + { + "chosen_geometric_mean": -1.1416820287704468, + "epoch": 0.64, + "grad_norm": 2.125, + "learning_rate": 3.859855414893392e-06, + "log_odds": 2.0030338764190674, + "log_odds_ratio": -0.3563975691795349, + "loss": 0.2584, + "rejected_geometric_mean": -2.9416635036468506, + "step": 2567 + }, + { + "chosen_geometric_mean": -1.1963199377059937, + "epoch": 0.64, + "grad_norm": 4.6875, + "learning_rate": 3.8590384483672756e-06, + "log_odds": 0.8952903747558594, + "log_odds_ratio": -0.4014899730682373, + "loss": 0.2703, + "rejected_geometric_mean": -1.9210798740386963, + "step": 2568 + }, + { + "chosen_geometric_mean": -1.0272506475448608, + "epoch": 0.64, + "grad_norm": 7.6875, + "learning_rate": 3.858221275778307e-06, + "log_odds": 6.792426109313965, + "log_odds_ratio": -0.2718857526779175, + "loss": 0.3229, + "rejected_geometric_mean": -7.546143531799316, + "step": 2569 + }, + { + "chosen_geometric_mean": -1.0813090801239014, + "epoch": 0.64, + "grad_norm": 5.75, + "learning_rate": 3.85740389725039e-06, + "log_odds": 3.226386547088623, + "log_odds_ratio": -0.30648213624954224, + "loss": 0.3076, + "rejected_geometric_mean": -4.080289840698242, + "step": 2570 + }, + { + "chosen_geometric_mean": -1.442227840423584, + "epoch": 0.64, + "grad_norm": 15.0, + "learning_rate": 3.85658631290746e-06, + "log_odds": 5.286176681518555, + "log_odds_ratio": -0.17939278483390808, + "loss": 0.3261, + "rejected_geometric_mean": -6.501667022705078, + "step": 2571 + }, + { + "chosen_geometric_mean": -1.8914437294006348, + "epoch": 0.64, + "grad_norm": 31.75, + "learning_rate": 3.855768522873482e-06, + "log_odds": 4.556204795837402, + "log_odds_ratio": -0.2316173017024994, + "loss": 0.3534, + "rejected_geometric_mean": -6.172921180725098, + "step": 2572 + }, + { + "chosen_geometric_mean": -1.2361609935760498, + "epoch": 0.64, + "grad_norm": 17.25, + "learning_rate": 3.854950527272451e-06, + "log_odds": 4.733003616333008, + "log_odds_ratio": -0.19582286477088928, + "loss": 0.3148, + "rejected_geometric_mean": -5.74422550201416, + "step": 2573 + }, + { + "chosen_geometric_mean": -0.997673749923706, + "epoch": 0.64, + "grad_norm": 3.234375, + "learning_rate": 3.854132326228397e-06, + "log_odds": 4.751339912414551, + "log_odds_ratio": -0.2853861153125763, + "loss": 0.2467, + "rejected_geometric_mean": -5.406716823577881, + "step": 2574 + }, + { + "chosen_geometric_mean": -1.0915038585662842, + "epoch": 0.64, + "grad_norm": 23.25, + "learning_rate": 3.8533139198653765e-06, + "log_odds": 2.6718263626098633, + "log_odds_ratio": -0.2612884044647217, + "loss": 0.3189, + "rejected_geometric_mean": -3.461678981781006, + "step": 2575 + }, + { + "chosen_geometric_mean": -1.0306864976882935, + "epoch": 0.64, + "grad_norm": 5.0, + "learning_rate": 3.852495308307483e-06, + "log_odds": 4.63963508605957, + "log_odds_ratio": -0.1664217859506607, + "loss": 0.333, + "rejected_geometric_mean": -5.318719387054443, + "step": 2576 + }, + { + "chosen_geometric_mean": -1.08232581615448, + "epoch": 0.64, + "grad_norm": 3.328125, + "learning_rate": 3.851676491678833e-06, + "log_odds": 5.772259712219238, + "log_odds_ratio": -0.15367548167705536, + "loss": 0.3259, + "rejected_geometric_mean": -6.457863807678223, + "step": 2577 + }, + { + "chosen_geometric_mean": -1.2642970085144043, + "epoch": 0.64, + "grad_norm": 2.140625, + "learning_rate": 3.850857470103583e-06, + "log_odds": 2.2365946769714355, + "log_odds_ratio": -0.28637412190437317, + "loss": 0.2812, + "rejected_geometric_mean": -3.2896742820739746, + "step": 2578 + }, + { + "chosen_geometric_mean": -0.9543132185935974, + "epoch": 0.64, + "grad_norm": 24.0, + "learning_rate": 3.850038243705913e-06, + "log_odds": 3.9229931831359863, + "log_odds_ratio": -0.03696685656905174, + "loss": 0.299, + "rejected_geometric_mean": -4.401655197143555, + "step": 2579 + }, + { + "chosen_geometric_mean": -1.4791924953460693, + "epoch": 0.64, + "grad_norm": 16.625, + "learning_rate": 3.849218812610042e-06, + "log_odds": 2.509678840637207, + "log_odds_ratio": -0.23009000718593597, + "loss": 0.3763, + "rejected_geometric_mean": -3.804696559906006, + "step": 2580 + }, + { + "chosen_geometric_mean": -1.0565627813339233, + "epoch": 0.64, + "grad_norm": 8.375, + "learning_rate": 3.84839917694021e-06, + "log_odds": 2.682011365890503, + "log_odds_ratio": -0.3746558129787445, + "loss": 0.2703, + "rejected_geometric_mean": -3.542694568634033, + "step": 2581 + }, + { + "chosen_geometric_mean": -1.2794945240020752, + "epoch": 0.64, + "grad_norm": 4.09375, + "learning_rate": 3.847579336820697e-06, + "log_odds": 2.6990435123443604, + "log_odds_ratio": -0.08436655253171921, + "loss": 0.3262, + "rejected_geometric_mean": -3.6850228309631348, + "step": 2582 + }, + { + "chosen_geometric_mean": -1.0293188095092773, + "epoch": 0.64, + "grad_norm": 7.40625, + "learning_rate": 3.846759292375809e-06, + "log_odds": 2.226919174194336, + "log_odds_ratio": -0.16340559720993042, + "loss": 0.3304, + "rejected_geometric_mean": -2.892838478088379, + "step": 2583 + }, + { + "chosen_geometric_mean": -1.2297627925872803, + "epoch": 0.64, + "grad_norm": 14.6875, + "learning_rate": 3.845939043729885e-06, + "log_odds": 2.309343099594116, + "log_odds_ratio": -0.1817057877779007, + "loss": 0.277, + "rejected_geometric_mean": -3.2619423866271973, + "step": 2584 + }, + { + "chosen_geometric_mean": -0.9323784708976746, + "epoch": 0.64, + "grad_norm": 10.1875, + "learning_rate": 3.845118591007294e-06, + "log_odds": 4.701723098754883, + "log_odds_ratio": -0.1635391116142273, + "loss": 0.2957, + "rejected_geometric_mean": -5.1763105392456055, + "step": 2585 + }, + { + "chosen_geometric_mean": -1.2169606685638428, + "epoch": 0.64, + "grad_norm": 4.03125, + "learning_rate": 3.844297934332436e-06, + "log_odds": 4.233643531799316, + "log_odds_ratio": -0.27834251523017883, + "loss": 0.3305, + "rejected_geometric_mean": -5.22529935836792, + "step": 2586 + }, + { + "chosen_geometric_mean": -1.3071868419647217, + "epoch": 0.64, + "grad_norm": 16.25, + "learning_rate": 3.843477073829743e-06, + "log_odds": 1.9978203773498535, + "log_odds_ratio": -0.27896296977996826, + "loss": 0.2756, + "rejected_geometric_mean": -3.1299169063568115, + "step": 2587 + }, + { + "chosen_geometric_mean": -1.328627347946167, + "epoch": 0.64, + "grad_norm": 5.59375, + "learning_rate": 3.842656009623678e-06, + "log_odds": 3.7356462478637695, + "log_odds_ratio": -0.07595189660787582, + "loss": 0.2908, + "rejected_geometric_mean": -4.778645038604736, + "step": 2588 + }, + { + "chosen_geometric_mean": -0.7835734486579895, + "epoch": 0.64, + "grad_norm": 6.0, + "learning_rate": 3.841834741838731e-06, + "log_odds": 1.7841215133666992, + "log_odds_ratio": -0.24753473699092865, + "loss": 0.3076, + "rejected_geometric_mean": -2.144108295440674, + "step": 2589 + }, + { + "chosen_geometric_mean": -0.9502925872802734, + "epoch": 0.64, + "grad_norm": 2.296875, + "learning_rate": 3.841013270599429e-06, + "log_odds": 5.34929084777832, + "log_odds_ratio": -0.037424977868795395, + "loss": 0.2823, + "rejected_geometric_mean": -5.821372032165527, + "step": 2590 + }, + { + "chosen_geometric_mean": -0.8257895708084106, + "epoch": 0.64, + "grad_norm": 18.375, + "learning_rate": 3.840191596030326e-06, + "log_odds": 3.748641014099121, + "log_odds_ratio": -0.1954038143157959, + "loss": 0.302, + "rejected_geometric_mean": -4.139512062072754, + "step": 2591 + }, + { + "chosen_geometric_mean": -0.9925756454467773, + "epoch": 0.64, + "grad_norm": 12.8125, + "learning_rate": 3.839369718256006e-06, + "log_odds": 1.6481252908706665, + "log_odds_ratio": -0.2361520230770111, + "loss": 0.3068, + "rejected_geometric_mean": -2.2892045974731445, + "step": 2592 + }, + { + "chosen_geometric_mean": -1.1165249347686768, + "epoch": 0.64, + "grad_norm": 3.859375, + "learning_rate": 3.8385476374010875e-06, + "log_odds": 6.612290382385254, + "log_odds_ratio": -0.1812041848897934, + "loss": 0.3445, + "rejected_geometric_mean": -7.405904769897461, + "step": 2593 + }, + { + "chosen_geometric_mean": -1.057173728942871, + "epoch": 0.64, + "grad_norm": 2.375, + "learning_rate": 3.837725353590217e-06, + "log_odds": 3.0330097675323486, + "log_odds_ratio": -0.4507367014884949, + "loss": 0.2246, + "rejected_geometric_mean": -3.9515724182128906, + "step": 2594 + }, + { + "chosen_geometric_mean": -1.0458242893218994, + "epoch": 0.64, + "grad_norm": 3.21875, + "learning_rate": 3.836902866948071e-06, + "log_odds": 2.2208144664764404, + "log_odds_ratio": -0.24506208300590515, + "loss": 0.2612, + "rejected_geometric_mean": -2.934135675430298, + "step": 2595 + }, + { + "chosen_geometric_mean": -1.0202785730361938, + "epoch": 0.64, + "grad_norm": 7.09375, + "learning_rate": 3.8360801775993605e-06, + "log_odds": 1.6216604709625244, + "log_odds_ratio": -0.22118714451789856, + "loss": 0.3218, + "rejected_geometric_mean": -2.3123366832733154, + "step": 2596 + }, + { + "chosen_geometric_mean": -1.035448431968689, + "epoch": 0.64, + "grad_norm": 3.953125, + "learning_rate": 3.835257285668824e-06, + "log_odds": 1.3822014331817627, + "log_odds_ratio": -0.422220379114151, + "loss": 0.3333, + "rejected_geometric_mean": -2.2457475662231445, + "step": 2597 + }, + { + "chosen_geometric_mean": -1.0274518728256226, + "epoch": 0.64, + "grad_norm": 3.546875, + "learning_rate": 3.834434191281231e-06, + "log_odds": 3.2528560161590576, + "log_odds_ratio": -0.1317538619041443, + "loss": 0.3488, + "rejected_geometric_mean": -3.8988468647003174, + "step": 2598 + }, + { + "chosen_geometric_mean": -1.0271153450012207, + "epoch": 0.64, + "grad_norm": 16.5, + "learning_rate": 3.833610894561384e-06, + "log_odds": 3.347346305847168, + "log_odds_ratio": -0.16877280175685883, + "loss": 0.3214, + "rejected_geometric_mean": -4.006842613220215, + "step": 2599 + }, + { + "chosen_geometric_mean": -1.019626259803772, + "epoch": 0.64, + "grad_norm": 27.75, + "learning_rate": 3.832787395634112e-06, + "log_odds": 4.5942158699035645, + "log_odds_ratio": -0.2497999370098114, + "loss": 0.2989, + "rejected_geometric_mean": -5.354199409484863, + "step": 2600 + }, + { + "chosen_geometric_mean": -1.039206862449646, + "epoch": 0.64, + "grad_norm": 20.25, + "learning_rate": 3.83196369462428e-06, + "log_odds": 11.052948951721191, + "log_odds_ratio": -0.050354938954114914, + "loss": 0.3171, + "rejected_geometric_mean": -11.664239883422852, + "step": 2601 + }, + { + "chosen_geometric_mean": -1.1698944568634033, + "epoch": 0.64, + "grad_norm": 11.25, + "learning_rate": 3.831139791656779e-06, + "log_odds": 1.9346644878387451, + "log_odds_ratio": -0.29427239298820496, + "loss": 0.2865, + "rejected_geometric_mean": -2.8317782878875732, + "step": 2602 + }, + { + "chosen_geometric_mean": -1.1869895458221436, + "epoch": 0.64, + "grad_norm": 23.875, + "learning_rate": 3.830315686856532e-06, + "log_odds": 3.940281391143799, + "log_odds_ratio": -0.2014559507369995, + "loss": 0.3488, + "rejected_geometric_mean": -4.870266437530518, + "step": 2603 + }, + { + "chosen_geometric_mean": -1.243798851966858, + "epoch": 0.64, + "grad_norm": 2.609375, + "learning_rate": 3.829491380348496e-06, + "log_odds": 2.8408641815185547, + "log_odds_ratio": -0.13042402267456055, + "loss": 0.3062, + "rejected_geometric_mean": -3.7726056575775146, + "step": 2604 + }, + { + "chosen_geometric_mean": -0.9850530624389648, + "epoch": 0.64, + "grad_norm": 21.0, + "learning_rate": 3.8286668722576526e-06, + "log_odds": 4.740886688232422, + "log_odds_ratio": -0.1924276500940323, + "loss": 0.3018, + "rejected_geometric_mean": -5.340813636779785, + "step": 2605 + }, + { + "chosen_geometric_mean": -0.7839841246604919, + "epoch": 0.65, + "grad_norm": 4.875, + "learning_rate": 3.827842162709019e-06, + "log_odds": 1.7049241065979004, + "log_odds_ratio": -0.5214327573776245, + "loss": 0.3009, + "rejected_geometric_mean": -2.3386569023132324, + "step": 2606 + }, + { + "chosen_geometric_mean": -0.9612317681312561, + "epoch": 0.65, + "grad_norm": 4.6875, + "learning_rate": 3.827017251827639e-06, + "log_odds": 3.764291286468506, + "log_odds_ratio": -0.20949290692806244, + "loss": 0.2814, + "rejected_geometric_mean": -4.34598970413208, + "step": 2607 + }, + { + "chosen_geometric_mean": -1.0201187133789062, + "epoch": 0.65, + "grad_norm": 5.0, + "learning_rate": 3.82619213973859e-06, + "log_odds": 1.198730707168579, + "log_odds_ratio": -0.4187030792236328, + "loss": 0.2708, + "rejected_geometric_mean": -2.045273542404175, + "step": 2608 + }, + { + "chosen_geometric_mean": -1.323763132095337, + "epoch": 0.65, + "grad_norm": 18.0, + "learning_rate": 3.82536682656698e-06, + "log_odds": 1.2284737825393677, + "log_odds_ratio": -0.4005969762802124, + "loss": 0.3194, + "rejected_geometric_mean": -2.4198312759399414, + "step": 2609 + }, + { + "chosen_geometric_mean": -1.0921666622161865, + "epoch": 0.65, + "grad_norm": 10.625, + "learning_rate": 3.824541312437944e-06, + "log_odds": 4.148761749267578, + "log_odds_ratio": -0.31197619438171387, + "loss": 0.3463, + "rejected_geometric_mean": -4.965094566345215, + "step": 2610 + }, + { + "chosen_geometric_mean": -1.0290415287017822, + "epoch": 0.65, + "grad_norm": 16.625, + "learning_rate": 3.823715597476652e-06, + "log_odds": 6.122892379760742, + "log_odds_ratio": -0.07945604622364044, + "loss": 0.3136, + "rejected_geometric_mean": -6.7186055183410645, + "step": 2611 + }, + { + "chosen_geometric_mean": -1.2126885652542114, + "epoch": 0.65, + "grad_norm": 3.1875, + "learning_rate": 3.822889681808299e-06, + "log_odds": 2.5251801013946533, + "log_odds_ratio": -0.4228968024253845, + "loss": 0.2982, + "rejected_geometric_mean": -3.56766414642334, + "step": 2612 + }, + { + "chosen_geometric_mean": -1.5119532346725464, + "epoch": 0.65, + "grad_norm": 23.625, + "learning_rate": 3.8220635655581186e-06, + "log_odds": 2.69916033744812, + "log_odds_ratio": -0.8210670948028564, + "loss": 0.3436, + "rejected_geometric_mean": -4.117437362670898, + "step": 2613 + }, + { + "chosen_geometric_mean": -1.1479835510253906, + "epoch": 0.65, + "grad_norm": 13.375, + "learning_rate": 3.821237248851364e-06, + "log_odds": 2.9342167377471924, + "log_odds_ratio": -0.1934521645307541, + "loss": 0.3403, + "rejected_geometric_mean": -3.7850871086120605, + "step": 2614 + }, + { + "chosen_geometric_mean": -1.0719807147979736, + "epoch": 0.65, + "grad_norm": 4.84375, + "learning_rate": 3.8204107318133295e-06, + "log_odds": 5.048535346984863, + "log_odds_ratio": -0.13100770115852356, + "loss": 0.2593, + "rejected_geometric_mean": -5.789877891540527, + "step": 2615 + }, + { + "chosen_geometric_mean": -1.269719123840332, + "epoch": 0.65, + "grad_norm": 3.03125, + "learning_rate": 3.819584014569333e-06, + "log_odds": 3.0434651374816895, + "log_odds_ratio": -0.14986176788806915, + "loss": 0.3052, + "rejected_geometric_mean": -4.01906156539917, + "step": 2616 + }, + { + "chosen_geometric_mean": -1.0447683334350586, + "epoch": 0.65, + "grad_norm": 2.84375, + "learning_rate": 3.818757097244724e-06, + "log_odds": 1.0781538486480713, + "log_odds_ratio": -0.37318867444992065, + "loss": 0.2988, + "rejected_geometric_mean": -1.9177113771438599, + "step": 2617 + }, + { + "chosen_geometric_mean": -0.9145344495773315, + "epoch": 0.65, + "grad_norm": 7.28125, + "learning_rate": 3.817929979964884e-06, + "log_odds": 4.058067321777344, + "log_odds_ratio": -0.3073790669441223, + "loss": 0.2934, + "rejected_geometric_mean": -4.607049942016602, + "step": 2618 + }, + { + "chosen_geometric_mean": -1.2318109273910522, + "epoch": 0.65, + "grad_norm": 6.59375, + "learning_rate": 3.817102662855223e-06, + "log_odds": 0.9261904954910278, + "log_odds_ratio": -0.40465813875198364, + "loss": 0.3234, + "rejected_geometric_mean": -1.9504003524780273, + "step": 2619 + }, + { + "chosen_geometric_mean": -1.0588762760162354, + "epoch": 0.65, + "grad_norm": 2.765625, + "learning_rate": 3.816275146041183e-06, + "log_odds": 1.9018535614013672, + "log_odds_ratio": -0.23490306735038757, + "loss": 0.3041, + "rejected_geometric_mean": -2.665832757949829, + "step": 2620 + }, + { + "chosen_geometric_mean": -1.200402855873108, + "epoch": 0.65, + "grad_norm": 5.5, + "learning_rate": 3.815447429648236e-06, + "log_odds": 1.2457019090652466, + "log_odds_ratio": -0.35501766204833984, + "loss": 0.3388, + "rejected_geometric_mean": -2.2018120288848877, + "step": 2621 + }, + { + "chosen_geometric_mean": -1.09388267993927, + "epoch": 0.65, + "grad_norm": 2.109375, + "learning_rate": 3.8146195138018817e-06, + "log_odds": 2.923619270324707, + "log_odds_ratio": -0.24072161316871643, + "loss": 0.2714, + "rejected_geometric_mean": -3.725447654724121, + "step": 2622 + }, + { + "chosen_geometric_mean": -1.040645956993103, + "epoch": 0.65, + "grad_norm": 5.125, + "learning_rate": 3.8137913986276537e-06, + "log_odds": 6.2342939376831055, + "log_odds_ratio": -0.18597625195980072, + "loss": 0.2916, + "rejected_geometric_mean": -6.960068225860596, + "step": 2623 + }, + { + "chosen_geometric_mean": -0.9957436323165894, + "epoch": 0.65, + "grad_norm": 5.84375, + "learning_rate": 3.8129630842511136e-06, + "log_odds": 3.1036674976348877, + "log_odds_ratio": -0.12063932418823242, + "loss": 0.2665, + "rejected_geometric_mean": -3.6828060150146484, + "step": 2624 + }, + { + "chosen_geometric_mean": -1.4223172664642334, + "epoch": 0.65, + "grad_norm": 8.8125, + "learning_rate": 3.8121345707978522e-06, + "log_odds": 3.4581995010375977, + "log_odds_ratio": -0.39039623737335205, + "loss": 0.3561, + "rejected_geometric_mean": -4.77419900894165, + "step": 2625 + }, + { + "chosen_geometric_mean": -0.8513588309288025, + "epoch": 0.65, + "grad_norm": 10.125, + "learning_rate": 3.811305858393495e-06, + "log_odds": 2.468433380126953, + "log_odds_ratio": -0.21877998113632202, + "loss": 0.2883, + "rejected_geometric_mean": -2.9334423542022705, + "step": 2626 + }, + { + "chosen_geometric_mean": -0.9973886013031006, + "epoch": 0.65, + "grad_norm": 18.25, + "learning_rate": 3.8104769471636926e-06, + "log_odds": -0.009316571056842804, + "log_odds_ratio": -0.7037814855575562, + "loss": 0.3262, + "rejected_geometric_mean": -0.9779956936836243, + "step": 2627 + }, + { + "chosen_geometric_mean": -1.2455852031707764, + "epoch": 0.65, + "grad_norm": 13.0625, + "learning_rate": 3.809647837234129e-06, + "log_odds": 2.0230560302734375, + "log_odds_ratio": -0.2651611566543579, + "loss": 0.2748, + "rejected_geometric_mean": -3.0528030395507812, + "step": 2628 + }, + { + "chosen_geometric_mean": -1.0929946899414062, + "epoch": 0.65, + "grad_norm": 11.5625, + "learning_rate": 3.8088185287305157e-06, + "log_odds": 4.016529083251953, + "log_odds_ratio": -0.08088642358779907, + "loss": 0.3341, + "rejected_geometric_mean": -4.732257843017578, + "step": 2629 + }, + { + "chosen_geometric_mean": -1.1288988590240479, + "epoch": 0.65, + "grad_norm": 8.625, + "learning_rate": 3.8079890217785974e-06, + "log_odds": 1.7193808555603027, + "log_odds_ratio": -0.4371904730796814, + "loss": 0.2907, + "rejected_geometric_mean": -2.731008529663086, + "step": 2630 + }, + { + "chosen_geometric_mean": -1.016314148902893, + "epoch": 0.65, + "grad_norm": 4.1875, + "learning_rate": 3.8071593165041463e-06, + "log_odds": 4.385809898376465, + "log_odds_ratio": -0.1969563364982605, + "loss": 0.2968, + "rejected_geometric_mean": -5.04103422164917, + "step": 2631 + }, + { + "chosen_geometric_mean": -1.0894695520401, + "epoch": 0.65, + "grad_norm": 7.40625, + "learning_rate": 3.8063294130329663e-06, + "log_odds": 1.9789623022079468, + "log_odds_ratio": -0.29927128553390503, + "loss": 0.3184, + "rejected_geometric_mean": -2.827195882797241, + "step": 2632 + }, + { + "chosen_geometric_mean": -1.404678225517273, + "epoch": 0.65, + "grad_norm": 14.5625, + "learning_rate": 3.80549931149089e-06, + "log_odds": 0.6495139598846436, + "log_odds_ratio": -0.6171324849128723, + "loss": 0.3193, + "rejected_geometric_mean": -2.035480260848999, + "step": 2633 + }, + { + "chosen_geometric_mean": -1.0550583600997925, + "epoch": 0.65, + "grad_norm": 20.75, + "learning_rate": 3.804669012003781e-06, + "log_odds": 6.0956926345825195, + "log_odds_ratio": -0.14982399344444275, + "loss": 0.3046, + "rejected_geometric_mean": -6.799950122833252, + "step": 2634 + }, + { + "chosen_geometric_mean": -1.083102822303772, + "epoch": 0.65, + "grad_norm": 18.875, + "learning_rate": 3.803838514697532e-06, + "log_odds": 8.662482261657715, + "log_odds_ratio": -0.11374816298484802, + "loss": 0.325, + "rejected_geometric_mean": -9.388110160827637, + "step": 2635 + }, + { + "chosen_geometric_mean": -1.045095682144165, + "epoch": 0.65, + "grad_norm": 2.28125, + "learning_rate": 3.803007819698068e-06, + "log_odds": 0.257210373878479, + "log_odds_ratio": -0.5817781090736389, + "loss": 0.3034, + "rejected_geometric_mean": -1.2222681045532227, + "step": 2636 + }, + { + "chosen_geometric_mean": -1.0831416845321655, + "epoch": 0.65, + "grad_norm": 1.7421875, + "learning_rate": 3.80217692713134e-06, + "log_odds": 7.325876712799072, + "log_odds_ratio": -0.1418386846780777, + "loss": 0.2332, + "rejected_geometric_mean": -8.069340705871582, + "step": 2637 + }, + { + "chosen_geometric_mean": -0.8905470371246338, + "epoch": 0.65, + "grad_norm": 2.09375, + "learning_rate": 3.8013458371233336e-06, + "log_odds": 5.49990701675415, + "log_odds_ratio": -0.021487122401595116, + "loss": 0.2418, + "rejected_geometric_mean": -5.856953144073486, + "step": 2638 + }, + { + "chosen_geometric_mean": -1.2267389297485352, + "epoch": 0.65, + "grad_norm": 3.109375, + "learning_rate": 3.8005145498000608e-06, + "log_odds": 1.9952929019927979, + "log_odds_ratio": -0.4685259461402893, + "loss": 0.2885, + "rejected_geometric_mean": -3.057983875274658, + "step": 2639 + }, + { + "chosen_geometric_mean": -1.0309162139892578, + "epoch": 0.65, + "grad_norm": 3.140625, + "learning_rate": 3.7996830652875644e-06, + "log_odds": 1.7005842924118042, + "log_odds_ratio": -0.37912020087242126, + "loss": 0.2701, + "rejected_geometric_mean": -2.5011157989501953, + "step": 2640 + }, + { + "chosen_geometric_mean": -1.2333064079284668, + "epoch": 0.65, + "grad_norm": 12.9375, + "learning_rate": 3.7988513837119173e-06, + "log_odds": 3.1556153297424316, + "log_odds_ratio": -0.2250385731458664, + "loss": 0.3354, + "rejected_geometric_mean": -4.123537063598633, + "step": 2641 + }, + { + "chosen_geometric_mean": -0.9063935875892639, + "epoch": 0.65, + "grad_norm": 11.5625, + "learning_rate": 3.798019505199224e-06, + "log_odds": 4.895097732543945, + "log_odds_ratio": -0.0629538744688034, + "loss": 0.2922, + "rejected_geometric_mean": -5.324473857879639, + "step": 2642 + }, + { + "chosen_geometric_mean": -1.2206209897994995, + "epoch": 0.65, + "grad_norm": 16.75, + "learning_rate": 3.7971874298756155e-06, + "log_odds": 1.6709518432617188, + "log_odds_ratio": -0.2710086703300476, + "loss": 0.2995, + "rejected_geometric_mean": -2.616710901260376, + "step": 2643 + }, + { + "chosen_geometric_mean": -1.2462830543518066, + "epoch": 0.65, + "grad_norm": 2.25, + "learning_rate": 3.7963551578672555e-06, + "log_odds": 2.438351631164551, + "log_odds_ratio": -0.18735553324222565, + "loss": 0.2832, + "rejected_geometric_mean": -3.433749198913574, + "step": 2644 + }, + { + "chosen_geometric_mean": -1.310444712638855, + "epoch": 0.65, + "grad_norm": 3.90625, + "learning_rate": 3.795522689300335e-06, + "log_odds": 1.9954801797866821, + "log_odds_ratio": -0.284218966960907, + "loss": 0.2815, + "rejected_geometric_mean": -3.1244964599609375, + "step": 2645 + }, + { + "chosen_geometric_mean": -1.1118035316467285, + "epoch": 0.66, + "grad_norm": 3.515625, + "learning_rate": 3.794690024301079e-06, + "log_odds": 3.0771608352661133, + "log_odds_ratio": -0.3849495053291321, + "loss": 0.2824, + "rejected_geometric_mean": -3.996958017349243, + "step": 2646 + }, + { + "chosen_geometric_mean": -1.6525850296020508, + "epoch": 0.66, + "grad_norm": 25.375, + "learning_rate": 3.7938571629957356e-06, + "log_odds": 2.051379680633545, + "log_odds_ratio": -0.2750946283340454, + "loss": 0.3162, + "rejected_geometric_mean": -3.5751705169677734, + "step": 2647 + }, + { + "chosen_geometric_mean": -1.0789909362792969, + "epoch": 0.66, + "grad_norm": 9.5, + "learning_rate": 3.79302410551059e-06, + "log_odds": 4.966122627258301, + "log_odds_ratio": -0.10198795050382614, + "loss": 0.3285, + "rejected_geometric_mean": -5.6694769859313965, + "step": 2648 + }, + { + "chosen_geometric_mean": -1.205091953277588, + "epoch": 0.66, + "grad_norm": 11.0625, + "learning_rate": 3.792190851971952e-06, + "log_odds": 7.087157249450684, + "log_odds_ratio": -0.1089215874671936, + "loss": 0.3174, + "rejected_geometric_mean": -7.973397254943848, + "step": 2649 + }, + { + "chosen_geometric_mean": -1.1238048076629639, + "epoch": 0.66, + "grad_norm": 33.25, + "learning_rate": 3.791357402506163e-06, + "log_odds": 4.701557636260986, + "log_odds_ratio": -0.36421340703964233, + "loss": 0.3891, + "rejected_geometric_mean": -5.670112609863281, + "step": 2650 + }, + { + "chosen_geometric_mean": -1.2469866275787354, + "epoch": 0.66, + "grad_norm": 31.75, + "learning_rate": 3.7905237572395943e-06, + "log_odds": 8.409323692321777, + "log_odds_ratio": -0.44947752356529236, + "loss": 0.3016, + "rejected_geometric_mean": -9.468398094177246, + "step": 2651 + }, + { + "chosen_geometric_mean": -1.1502692699432373, + "epoch": 0.66, + "grad_norm": 7.78125, + "learning_rate": 3.789689916298646e-06, + "log_odds": 5.689252853393555, + "log_odds_ratio": -0.028058316558599472, + "loss": 0.2629, + "rejected_geometric_mean": -6.4303364753723145, + "step": 2652 + }, + { + "chosen_geometric_mean": -1.2746509313583374, + "epoch": 0.66, + "grad_norm": 5.40625, + "learning_rate": 3.7888558798097495e-06, + "log_odds": 4.05543327331543, + "log_odds_ratio": -0.12516829371452332, + "loss": 0.2711, + "rejected_geometric_mean": -5.036910533905029, + "step": 2653 + }, + { + "chosen_geometric_mean": -1.1600042581558228, + "epoch": 0.66, + "grad_norm": 8.5625, + "learning_rate": 3.7880216478993638e-06, + "log_odds": 3.1276707649230957, + "log_odds_ratio": -0.14078189432621002, + "loss": 0.334, + "rejected_geometric_mean": -3.9735589027404785, + "step": 2654 + }, + { + "chosen_geometric_mean": -0.9745467901229858, + "epoch": 0.66, + "grad_norm": 7.28125, + "learning_rate": 3.7871872206939785e-06, + "log_odds": 2.462973117828369, + "log_odds_ratio": -0.23007862269878387, + "loss": 0.2814, + "rejected_geometric_mean": -3.1145710945129395, + "step": 2655 + }, + { + "chosen_geometric_mean": -1.2444887161254883, + "epoch": 0.66, + "grad_norm": 22.625, + "learning_rate": 3.7863525983201133e-06, + "log_odds": 2.2078857421875, + "log_odds_ratio": -0.2946384847164154, + "loss": 0.333, + "rejected_geometric_mean": -3.224109411239624, + "step": 2656 + }, + { + "chosen_geometric_mean": -1.1144405603408813, + "epoch": 0.66, + "grad_norm": 4.90625, + "learning_rate": 3.7855177809043154e-06, + "log_odds": 5.256767272949219, + "log_odds_ratio": -0.2639402449131012, + "loss": 0.2965, + "rejected_geometric_mean": -6.069121360778809, + "step": 2657 + }, + { + "chosen_geometric_mean": -1.3016952276229858, + "epoch": 0.66, + "grad_norm": 3.328125, + "learning_rate": 3.7846827685731657e-06, + "log_odds": 5.142940998077393, + "log_odds_ratio": -0.23541535437107086, + "loss": 0.298, + "rejected_geometric_mean": -6.230714321136475, + "step": 2658 + }, + { + "chosen_geometric_mean": -0.9801158308982849, + "epoch": 0.66, + "grad_norm": 4.21875, + "learning_rate": 3.783847561453271e-06, + "log_odds": 0.8323633670806885, + "log_odds_ratio": -0.45027631521224976, + "loss": 0.2948, + "rejected_geometric_mean": -1.626997470855713, + "step": 2659 + }, + { + "chosen_geometric_mean": -1.044137716293335, + "epoch": 0.66, + "grad_norm": 18.5, + "learning_rate": 3.7830121596712683e-06, + "log_odds": 3.1211459636688232, + "log_odds_ratio": -0.1483088880777359, + "loss": 0.3048, + "rejected_geometric_mean": -3.7820558547973633, + "step": 2660 + }, + { + "chosen_geometric_mean": -1.114497184753418, + "epoch": 0.66, + "grad_norm": 2.984375, + "learning_rate": 3.782176563353825e-06, + "log_odds": 0.4186105728149414, + "log_odds_ratio": -0.5067511796951294, + "loss": 0.2757, + "rejected_geometric_mean": -1.4058160781860352, + "step": 2661 + }, + { + "chosen_geometric_mean": -1.207599401473999, + "epoch": 0.66, + "grad_norm": 2.421875, + "learning_rate": 3.7813407726276372e-06, + "log_odds": 4.341401100158691, + "log_odds_ratio": -0.3379516005516052, + "loss": 0.3113, + "rejected_geometric_mean": -5.365636825561523, + "step": 2662 + }, + { + "chosen_geometric_mean": -1.0598827600479126, + "epoch": 0.66, + "grad_norm": 4.1875, + "learning_rate": 3.780504787619431e-06, + "log_odds": 5.62652063369751, + "log_odds_ratio": -0.09202774614095688, + "loss": 0.2562, + "rejected_geometric_mean": -6.31303596496582, + "step": 2663 + }, + { + "chosen_geometric_mean": -1.1130460500717163, + "epoch": 0.66, + "grad_norm": 4.84375, + "learning_rate": 3.779668608455962e-06, + "log_odds": 9.078641891479492, + "log_odds_ratio": -0.14469148218631744, + "loss": 0.3484, + "rejected_geometric_mean": -9.869985580444336, + "step": 2664 + }, + { + "chosen_geometric_mean": -1.2648625373840332, + "epoch": 0.66, + "grad_norm": 5.125, + "learning_rate": 3.778832235264015e-06, + "log_odds": 1.8069480657577515, + "log_odds_ratio": -0.550600528717041, + "loss": 0.2884, + "rejected_geometric_mean": -3.0735409259796143, + "step": 2665 + }, + { + "chosen_geometric_mean": -1.165282964706421, + "epoch": 0.66, + "grad_norm": 7.4375, + "learning_rate": 3.7779956681704044e-06, + "log_odds": 1.9786021709442139, + "log_odds_ratio": -0.2515631914138794, + "loss": 0.3251, + "rejected_geometric_mean": -2.879625082015991, + "step": 2666 + }, + { + "chosen_geometric_mean": -1.1913625001907349, + "epoch": 0.66, + "grad_norm": 7.5625, + "learning_rate": 3.7771589073019733e-06, + "log_odds": 10.49095344543457, + "log_odds_ratio": -0.22861365973949432, + "loss": 0.3063, + "rejected_geometric_mean": -11.353425025939941, + "step": 2667 + }, + { + "chosen_geometric_mean": -1.1728646755218506, + "epoch": 0.66, + "grad_norm": 15.3125, + "learning_rate": 3.7763219527855953e-06, + "log_odds": 3.7963216304779053, + "log_odds_ratio": -0.3611815869808197, + "loss": 0.2635, + "rejected_geometric_mean": -4.774584770202637, + "step": 2668 + }, + { + "chosen_geometric_mean": -1.0528210401535034, + "epoch": 0.66, + "grad_norm": 4.8125, + "learning_rate": 3.775484804748172e-06, + "log_odds": 6.635986804962158, + "log_odds_ratio": -0.20795083045959473, + "loss": 0.2629, + "rejected_geometric_mean": -7.41180419921875, + "step": 2669 + }, + { + "chosen_geometric_mean": -1.0054082870483398, + "epoch": 0.66, + "grad_norm": 2.984375, + "learning_rate": 3.7746474633166363e-06, + "log_odds": 2.5473697185516357, + "log_odds_ratio": -0.5018824338912964, + "loss": 0.3096, + "rejected_geometric_mean": -3.3920488357543945, + "step": 2670 + }, + { + "chosen_geometric_mean": -0.9623476266860962, + "epoch": 0.66, + "grad_norm": 9.375, + "learning_rate": 3.7738099286179476e-06, + "log_odds": 3.1117372512817383, + "log_odds_ratio": -0.21865186095237732, + "loss": 0.3088, + "rejected_geometric_mean": -3.7297375202178955, + "step": 2671 + }, + { + "chosen_geometric_mean": -1.1366455554962158, + "epoch": 0.66, + "grad_norm": 4.375, + "learning_rate": 3.772972200779098e-06, + "log_odds": 0.308260977268219, + "log_odds_ratio": -0.5689750909805298, + "loss": 0.3304, + "rejected_geometric_mean": -1.347390055656433, + "step": 2672 + }, + { + "chosen_geometric_mean": -1.1130125522613525, + "epoch": 0.66, + "grad_norm": 4.1875, + "learning_rate": 3.7721342799271066e-06, + "log_odds": 7.507782936096191, + "log_odds_ratio": -0.2873793840408325, + "loss": 0.3659, + "rejected_geometric_mean": -8.35838794708252, + "step": 2673 + }, + { + "chosen_geometric_mean": -1.4368435144424438, + "epoch": 0.66, + "grad_norm": 21.125, + "learning_rate": 3.771296166189022e-06, + "log_odds": 4.561524391174316, + "log_odds_ratio": -0.34778839349746704, + "loss": 0.3703, + "rejected_geometric_mean": -5.781425476074219, + "step": 2674 + }, + { + "chosen_geometric_mean": -1.2576804161071777, + "epoch": 0.66, + "grad_norm": 15.375, + "learning_rate": 3.770457859691923e-06, + "log_odds": 7.37758207321167, + "log_odds_ratio": -0.13412222266197205, + "loss": 0.2951, + "rejected_geometric_mean": -8.355745315551758, + "step": 2675 + }, + { + "chosen_geometric_mean": -1.235615849494934, + "epoch": 0.66, + "grad_norm": 8.375, + "learning_rate": 3.7696193605629163e-06, + "log_odds": 0.874864935874939, + "log_odds_ratio": -0.43667104840278625, + "loss": 0.2739, + "rejected_geometric_mean": -1.979292631149292, + "step": 2676 + }, + { + "chosen_geometric_mean": -1.009299874305725, + "epoch": 0.66, + "grad_norm": 11.0, + "learning_rate": 3.768780668929137e-06, + "log_odds": 14.078843116760254, + "log_odds_ratio": -0.09243211895227432, + "loss": 0.3274, + "rejected_geometric_mean": -14.70440673828125, + "step": 2677 + }, + { + "chosen_geometric_mean": -1.257749319076538, + "epoch": 0.66, + "grad_norm": 4.75, + "learning_rate": 3.7679417849177546e-06, + "log_odds": 1.243322730064392, + "log_odds_ratio": -0.4381979703903198, + "loss": 0.3525, + "rejected_geometric_mean": -2.350343704223633, + "step": 2678 + }, + { + "chosen_geometric_mean": -0.9179339408874512, + "epoch": 0.66, + "grad_norm": 4.71875, + "learning_rate": 3.7671027086559614e-06, + "log_odds": 9.476823806762695, + "log_odds_ratio": -0.1465834379196167, + "loss": 0.3269, + "rejected_geometric_mean": -9.920783042907715, + "step": 2679 + }, + { + "chosen_geometric_mean": -1.4288406372070312, + "epoch": 0.66, + "grad_norm": 4.84375, + "learning_rate": 3.7662634402709817e-06, + "log_odds": 1.6330982446670532, + "log_odds_ratio": -0.5158208012580872, + "loss": 0.3869, + "rejected_geometric_mean": -2.9566996097564697, + "step": 2680 + }, + { + "chosen_geometric_mean": -1.0226385593414307, + "epoch": 0.66, + "grad_norm": 4.53125, + "learning_rate": 3.7654239798900705e-06, + "log_odds": 6.754878520965576, + "log_odds_ratio": -0.22308893501758575, + "loss": 0.3398, + "rejected_geometric_mean": -7.4458513259887695, + "step": 2681 + }, + { + "chosen_geometric_mean": -1.5192553997039795, + "epoch": 0.66, + "grad_norm": 24.125, + "learning_rate": 3.7645843276405076e-06, + "log_odds": 4.539417266845703, + "log_odds_ratio": -0.20547986030578613, + "loss": 0.3422, + "rejected_geometric_mean": -5.823016166687012, + "step": 2682 + }, + { + "chosen_geometric_mean": -1.09922456741333, + "epoch": 0.66, + "grad_norm": 24.0, + "learning_rate": 3.7637444836496063e-06, + "log_odds": 3.3587608337402344, + "log_odds_ratio": -0.23793897032737732, + "loss": 0.3932, + "rejected_geometric_mean": -4.183681488037109, + "step": 2683 + }, + { + "chosen_geometric_mean": -1.1976064443588257, + "epoch": 0.66, + "grad_norm": 15.5625, + "learning_rate": 3.7629044480447056e-06, + "log_odds": 0.7537874579429626, + "log_odds_ratio": -0.4052823781967163, + "loss": 0.3691, + "rejected_geometric_mean": -1.8013861179351807, + "step": 2684 + }, + { + "chosen_geometric_mean": -1.2122011184692383, + "epoch": 0.66, + "grad_norm": 10.8125, + "learning_rate": 3.7620642209531765e-06, + "log_odds": 11.67483139038086, + "log_odds_ratio": -0.09394214302301407, + "loss": 0.3574, + "rejected_geometric_mean": -12.56550121307373, + "step": 2685 + }, + { + "chosen_geometric_mean": -1.032886028289795, + "epoch": 0.67, + "grad_norm": 10.3125, + "learning_rate": 3.7612238025024174e-06, + "log_odds": 12.2777099609375, + "log_odds_ratio": -0.06978849321603775, + "loss": 0.2741, + "rejected_geometric_mean": -12.89223861694336, + "step": 2686 + }, + { + "chosen_geometric_mean": -0.9674756526947021, + "epoch": 0.67, + "grad_norm": 38.5, + "learning_rate": 3.7603831928198555e-06, + "log_odds": 1.3240318298339844, + "log_odds_ratio": -0.3904668390750885, + "loss": 0.4416, + "rejected_geometric_mean": -1.9960248470306396, + "step": 2687 + }, + { + "chosen_geometric_mean": -1.3320989608764648, + "epoch": 0.67, + "grad_norm": 23.125, + "learning_rate": 3.7595423920329464e-06, + "log_odds": 5.727843284606934, + "log_odds_ratio": -0.25430628657341003, + "loss": 0.3342, + "rejected_geometric_mean": -6.834096908569336, + "step": 2688 + }, + { + "chosen_geometric_mean": -1.1048150062561035, + "epoch": 0.67, + "grad_norm": 9.3125, + "learning_rate": 3.7587014002691774e-06, + "log_odds": 4.991973876953125, + "log_odds_ratio": -0.03938442841172218, + "loss": 0.2858, + "rejected_geometric_mean": -5.708247661590576, + "step": 2689 + }, + { + "chosen_geometric_mean": -1.0166209936141968, + "epoch": 0.67, + "grad_norm": 22.25, + "learning_rate": 3.757860217656062e-06, + "log_odds": 10.206971168518066, + "log_odds_ratio": -0.0021793232299387455, + "loss": 0.3657, + "rejected_geometric_mean": -10.772205352783203, + "step": 2690 + }, + { + "chosen_geometric_mean": -1.0547176599502563, + "epoch": 0.67, + "grad_norm": 10.875, + "learning_rate": 3.757018844321144e-06, + "log_odds": 7.456261157989502, + "log_odds_ratio": -0.006227016914635897, + "loss": 0.2582, + "rejected_geometric_mean": -8.085004806518555, + "step": 2691 + }, + { + "chosen_geometric_mean": -1.716565728187561, + "epoch": 0.67, + "grad_norm": 12.125, + "learning_rate": 3.756177280391997e-06, + "log_odds": 2.0943655967712402, + "log_odds_ratio": -0.8925414085388184, + "loss": 0.3322, + "rejected_geometric_mean": -3.7081241607666016, + "step": 2692 + }, + { + "chosen_geometric_mean": -1.1779991388320923, + "epoch": 0.67, + "grad_norm": 2.53125, + "learning_rate": 3.75533552599622e-06, + "log_odds": 2.361445903778076, + "log_odds_ratio": -0.22792521119117737, + "loss": 0.3015, + "rejected_geometric_mean": -3.2891383171081543, + "step": 2693 + }, + { + "chosen_geometric_mean": -1.0554335117340088, + "epoch": 0.67, + "grad_norm": 3.921875, + "learning_rate": 3.754493581261445e-06, + "log_odds": 0.5729128122329712, + "log_odds_ratio": -0.477164626121521, + "loss": 0.3485, + "rejected_geometric_mean": -1.468501329421997, + "step": 2694 + }, + { + "chosen_geometric_mean": -1.0795528888702393, + "epoch": 0.67, + "grad_norm": 3.59375, + "learning_rate": 3.7536514463153296e-06, + "log_odds": 4.615596771240234, + "log_odds_ratio": -0.20921963453292847, + "loss": 0.2682, + "rejected_geometric_mean": -5.397600173950195, + "step": 2695 + }, + { + "chosen_geometric_mean": -1.2727516889572144, + "epoch": 0.67, + "grad_norm": 6.15625, + "learning_rate": 3.752809121285562e-06, + "log_odds": 2.6367006301879883, + "log_odds_ratio": -0.21960756182670593, + "loss": 0.3076, + "rejected_geometric_mean": -3.6324408054351807, + "step": 2696 + }, + { + "chosen_geometric_mean": -1.2376737594604492, + "epoch": 0.67, + "grad_norm": 4.4375, + "learning_rate": 3.7519666062998607e-06, + "log_odds": 2.933621406555176, + "log_odds_ratio": -0.3121088147163391, + "loss": 0.276, + "rejected_geometric_mean": -3.9704272747039795, + "step": 2697 + }, + { + "chosen_geometric_mean": -1.1065212488174438, + "epoch": 0.67, + "grad_norm": 5.28125, + "learning_rate": 3.7511239014859692e-06, + "log_odds": 2.8439159393310547, + "log_odds_ratio": -0.282915323972702, + "loss": 0.273, + "rejected_geometric_mean": -3.7221922874450684, + "step": 2698 + }, + { + "chosen_geometric_mean": -1.298832654953003, + "epoch": 0.67, + "grad_norm": 7.375, + "learning_rate": 3.7502810069716622e-06, + "log_odds": 2.8497519493103027, + "log_odds_ratio": -0.2607070207595825, + "loss": 0.3732, + "rejected_geometric_mean": -3.956256628036499, + "step": 2699 + }, + { + "chosen_geometric_mean": -0.9546579718589783, + "epoch": 0.67, + "grad_norm": 14.25, + "learning_rate": 3.7494379228847426e-06, + "log_odds": 5.786952972412109, + "log_odds_ratio": -0.172002375125885, + "loss": 0.2982, + "rejected_geometric_mean": -6.35004997253418, + "step": 2700 + }, + { + "chosen_geometric_mean": -1.1973353624343872, + "epoch": 0.67, + "grad_norm": 2.5, + "learning_rate": 3.748594649353043e-06, + "log_odds": 0.9365735054016113, + "log_odds_ratio": -0.4599442481994629, + "loss": 0.3319, + "rejected_geometric_mean": -1.9948488473892212, + "step": 2701 + }, + { + "chosen_geometric_mean": -1.0957565307617188, + "epoch": 0.67, + "grad_norm": 2.453125, + "learning_rate": 3.747751186504423e-06, + "log_odds": 2.0155715942382812, + "log_odds_ratio": -0.22556884586811066, + "loss": 0.3082, + "rejected_geometric_mean": -2.822813034057617, + "step": 2702 + }, + { + "chosen_geometric_mean": -1.045401692390442, + "epoch": 0.67, + "grad_norm": 7.4375, + "learning_rate": 3.746907534466773e-06, + "log_odds": 1.9480199813842773, + "log_odds_ratio": -0.19258028268814087, + "loss": 0.3557, + "rejected_geometric_mean": -2.6449289321899414, + "step": 2703 + }, + { + "chosen_geometric_mean": -0.881607711315155, + "epoch": 0.67, + "grad_norm": 3.53125, + "learning_rate": 3.746063693368008e-06, + "log_odds": 4.7614426612854, + "log_odds_ratio": -0.27420637011528015, + "loss": 0.3037, + "rejected_geometric_mean": -5.270364284515381, + "step": 2704 + }, + { + "chosen_geometric_mean": -1.2197074890136719, + "epoch": 0.67, + "grad_norm": 5.0, + "learning_rate": 3.7452196633360794e-06, + "log_odds": 1.5058835744857788, + "log_odds_ratio": -0.5696331262588501, + "loss": 0.3379, + "rejected_geometric_mean": -2.653787136077881, + "step": 2705 + }, + { + "chosen_geometric_mean": -1.1376478672027588, + "epoch": 0.67, + "grad_norm": 2.078125, + "learning_rate": 3.7443754444989587e-06, + "log_odds": 2.1839284896850586, + "log_odds_ratio": -0.16198919713497162, + "loss": 0.2975, + "rejected_geometric_mean": -3.016744613647461, + "step": 2706 + }, + { + "chosen_geometric_mean": -1.0000553131103516, + "epoch": 0.67, + "grad_norm": 6.75, + "learning_rate": 3.74353103698465e-06, + "log_odds": 3.8339147567749023, + "log_odds_ratio": -0.3108261525630951, + "loss": 0.3305, + "rejected_geometric_mean": -4.5497002601623535, + "step": 2707 + }, + { + "chosen_geometric_mean": -1.093359112739563, + "epoch": 0.67, + "grad_norm": 3.828125, + "learning_rate": 3.742686440921187e-06, + "log_odds": 2.684201240539551, + "log_odds_ratio": -0.42058101296424866, + "loss": 0.2913, + "rejected_geometric_mean": -3.6732306480407715, + "step": 2708 + }, + { + "chosen_geometric_mean": -1.1999661922454834, + "epoch": 0.67, + "grad_norm": 7.09375, + "learning_rate": 3.74184165643663e-06, + "log_odds": 1.3762038946151733, + "log_odds_ratio": -0.29579591751098633, + "loss": 0.2922, + "rejected_geometric_mean": -2.37074875831604, + "step": 2709 + }, + { + "chosen_geometric_mean": -1.337080717086792, + "epoch": 0.67, + "grad_norm": 15.9375, + "learning_rate": 3.7409966836590693e-06, + "log_odds": 1.217413067817688, + "log_odds_ratio": -0.31639865040779114, + "loss": 0.3707, + "rejected_geometric_mean": -2.380871295928955, + "step": 2710 + }, + { + "chosen_geometric_mean": -1.0666038990020752, + "epoch": 0.67, + "grad_norm": 3.421875, + "learning_rate": 3.740151522716622e-06, + "log_odds": 3.013162612915039, + "log_odds_ratio": -0.2966518998146057, + "loss": 0.2713, + "rejected_geometric_mean": -3.832097053527832, + "step": 2711 + }, + { + "chosen_geometric_mean": -1.213989019393921, + "epoch": 0.67, + "grad_norm": 6.375, + "learning_rate": 3.739306173737436e-06, + "log_odds": 4.591458797454834, + "log_odds_ratio": -0.16261796653270721, + "loss": 0.3196, + "rejected_geometric_mean": -5.490354061126709, + "step": 2712 + }, + { + "chosen_geometric_mean": -0.8317848443984985, + "epoch": 0.67, + "grad_norm": 4.0625, + "learning_rate": 3.7384606368496857e-06, + "log_odds": 6.752717971801758, + "log_odds_ratio": -0.14754138886928558, + "loss": 0.2997, + "rejected_geometric_mean": -7.142244815826416, + "step": 2713 + }, + { + "chosen_geometric_mean": -1.2591955661773682, + "epoch": 0.67, + "grad_norm": 10.1875, + "learning_rate": 3.7376149121815745e-06, + "log_odds": 3.4259865283966064, + "log_odds_ratio": -0.20745716989040375, + "loss": 0.3113, + "rejected_geometric_mean": -4.431702613830566, + "step": 2714 + }, + { + "chosen_geometric_mean": -1.0718358755111694, + "epoch": 0.67, + "grad_norm": 19.25, + "learning_rate": 3.7367689998613354e-06, + "log_odds": 2.746213912963867, + "log_odds_ratio": -0.3171928822994232, + "loss": 0.2774, + "rejected_geometric_mean": -3.592355728149414, + "step": 2715 + }, + { + "chosen_geometric_mean": -1.0812835693359375, + "epoch": 0.67, + "grad_norm": 11.6875, + "learning_rate": 3.735922900017228e-06, + "log_odds": 3.731262445449829, + "log_odds_ratio": -0.19754603505134583, + "loss": 0.3491, + "rejected_geometric_mean": -4.509757995605469, + "step": 2716 + }, + { + "chosen_geometric_mean": -1.338291049003601, + "epoch": 0.67, + "grad_norm": 9.25, + "learning_rate": 3.7350766127775427e-06, + "log_odds": 1.0560232400894165, + "log_odds_ratio": -0.3161032795906067, + "loss": 0.3329, + "rejected_geometric_mean": -2.216670036315918, + "step": 2717 + }, + { + "chosen_geometric_mean": -1.2493956089019775, + "epoch": 0.67, + "grad_norm": 14.8125, + "learning_rate": 3.7342301382705957e-06, + "log_odds": 2.985072135925293, + "log_odds_ratio": -0.2903594672679901, + "loss": 0.3273, + "rejected_geometric_mean": -4.015936374664307, + "step": 2718 + }, + { + "chosen_geometric_mean": -0.9218109250068665, + "epoch": 0.67, + "grad_norm": 8.125, + "learning_rate": 3.733383476624734e-06, + "log_odds": 4.678513050079346, + "log_odds_ratio": -0.031703297048807144, + "loss": 0.285, + "rejected_geometric_mean": -5.1030073165893555, + "step": 2719 + }, + { + "chosen_geometric_mean": -0.7433410882949829, + "epoch": 0.67, + "grad_norm": 2.765625, + "learning_rate": 3.7325366279683304e-06, + "log_odds": 8.712149620056152, + "log_odds_ratio": -0.004710518289357424, + "loss": 0.2816, + "rejected_geometric_mean": -8.80358600616455, + "step": 2720 + }, + { + "chosen_geometric_mean": -1.2285364866256714, + "epoch": 0.67, + "grad_norm": 2.921875, + "learning_rate": 3.7316895924297893e-06, + "log_odds": 6.073976516723633, + "log_odds_ratio": -0.01366979070007801, + "loss": 0.2517, + "rejected_geometric_mean": -6.947608470916748, + "step": 2721 + }, + { + "chosen_geometric_mean": -1.1940319538116455, + "epoch": 0.67, + "grad_norm": 1.9140625, + "learning_rate": 3.73084237013754e-06, + "log_odds": 5.410486698150635, + "log_odds_ratio": -0.030683312565088272, + "loss": 0.301, + "rejected_geometric_mean": -6.230391979217529, + "step": 2722 + }, + { + "chosen_geometric_mean": -1.1451096534729004, + "epoch": 0.67, + "grad_norm": 3.53125, + "learning_rate": 3.7299949612200428e-06, + "log_odds": 5.776304244995117, + "log_odds_ratio": -0.07942286878824234, + "loss": 0.2981, + "rejected_geometric_mean": -6.57856559753418, + "step": 2723 + }, + { + "chosen_geometric_mean": -1.0615313053131104, + "epoch": 0.67, + "grad_norm": 5.78125, + "learning_rate": 3.7291473658057843e-06, + "log_odds": 3.119105339050293, + "log_odds_ratio": -0.25899583101272583, + "loss": 0.3031, + "rejected_geometric_mean": -3.8368706703186035, + "step": 2724 + }, + { + "chosen_geometric_mean": -1.0448832511901855, + "epoch": 0.67, + "grad_norm": 2.40625, + "learning_rate": 3.7282995840232813e-06, + "log_odds": 0.9695296883583069, + "log_odds_ratio": -0.38961219787597656, + "loss": 0.3018, + "rejected_geometric_mean": -1.7983659505844116, + "step": 2725 + }, + { + "chosen_geometric_mean": -1.0814311504364014, + "epoch": 0.67, + "grad_norm": 4.84375, + "learning_rate": 3.727451616001076e-06, + "log_odds": 2.310439109802246, + "log_odds_ratio": -0.3466709554195404, + "loss": 0.2768, + "rejected_geometric_mean": -3.1988492012023926, + "step": 2726 + }, + { + "chosen_geometric_mean": -1.1679391860961914, + "epoch": 0.68, + "grad_norm": 5.15625, + "learning_rate": 3.726603461867743e-06, + "log_odds": 2.789642333984375, + "log_odds_ratio": -0.36103349924087524, + "loss": 0.3029, + "rejected_geometric_mean": -3.7542078495025635, + "step": 2727 + }, + { + "chosen_geometric_mean": -1.1195263862609863, + "epoch": 0.68, + "grad_norm": 3.09375, + "learning_rate": 3.725755121751883e-06, + "log_odds": 1.4965040683746338, + "log_odds_ratio": -0.4540903568267822, + "loss": 0.2983, + "rejected_geometric_mean": -2.493029832839966, + "step": 2728 + }, + { + "chosen_geometric_mean": -1.1293061971664429, + "epoch": 0.68, + "grad_norm": 3.75, + "learning_rate": 3.724906595782122e-06, + "log_odds": 0.791206955909729, + "log_odds_ratio": -0.3898247182369232, + "loss": 0.2776, + "rejected_geometric_mean": -1.7364745140075684, + "step": 2729 + }, + { + "chosen_geometric_mean": -1.066985011100769, + "epoch": 0.68, + "grad_norm": 2.96875, + "learning_rate": 3.7240578840871188e-06, + "log_odds": 10.604569435119629, + "log_odds_ratio": -0.049662429839372635, + "loss": 0.33, + "rejected_geometric_mean": -11.265771865844727, + "step": 2730 + }, + { + "chosen_geometric_mean": -1.1089011430740356, + "epoch": 0.68, + "grad_norm": 4.25, + "learning_rate": 3.7232089867955583e-06, + "log_odds": 5.827853679656982, + "log_odds_ratio": -0.46921539306640625, + "loss": 0.2862, + "rejected_geometric_mean": -6.754698276519775, + "step": 2731 + }, + { + "chosen_geometric_mean": -0.9728838205337524, + "epoch": 0.68, + "grad_norm": 2.3125, + "learning_rate": 3.7223599040361543e-06, + "log_odds": 2.79740309715271, + "log_odds_ratio": -0.3770379424095154, + "loss": 0.2736, + "rejected_geometric_mean": -3.5233983993530273, + "step": 2732 + }, + { + "chosen_geometric_mean": -1.1880743503570557, + "epoch": 0.68, + "grad_norm": 19.875, + "learning_rate": 3.7215106359376464e-06, + "log_odds": 6.788498878479004, + "log_odds_ratio": -0.3832266926765442, + "loss": 0.2919, + "rejected_geometric_mean": -7.8106689453125, + "step": 2733 + }, + { + "chosen_geometric_mean": -1.156409740447998, + "epoch": 0.68, + "grad_norm": 18.5, + "learning_rate": 3.7206611826288057e-06, + "log_odds": 2.4757630825042725, + "log_odds_ratio": -0.37522074580192566, + "loss": 0.291, + "rejected_geometric_mean": -3.4919650554656982, + "step": 2734 + }, + { + "chosen_geometric_mean": -1.1678123474121094, + "epoch": 0.68, + "grad_norm": 2.734375, + "learning_rate": 3.7198115442384287e-06, + "log_odds": 3.5525858402252197, + "log_odds_ratio": -0.17902058362960815, + "loss": 0.276, + "rejected_geometric_mean": -4.420702934265137, + "step": 2735 + }, + { + "chosen_geometric_mean": -1.0743408203125, + "epoch": 0.68, + "grad_norm": 31.375, + "learning_rate": 3.7189617208953417e-06, + "log_odds": 12.321276664733887, + "log_odds_ratio": -0.13942928612232208, + "loss": 0.3843, + "rejected_geometric_mean": -13.040946960449219, + "step": 2736 + }, + { + "chosen_geometric_mean": -3.692138671875, + "epoch": 0.68, + "grad_norm": 84.5, + "learning_rate": 3.7181117127283977e-06, + "log_odds": 6.634559631347656, + "log_odds_ratio": -0.19395755231380463, + "loss": 0.5142, + "rejected_geometric_mean": -10.10897445678711, + "step": 2737 + }, + { + "chosen_geometric_mean": -1.0846436023712158, + "epoch": 0.68, + "grad_norm": 13.25, + "learning_rate": 3.7172615198664785e-06, + "log_odds": 4.774355888366699, + "log_odds_ratio": -0.11077374219894409, + "loss": 0.3216, + "rejected_geometric_mean": -5.492242813110352, + "step": 2738 + }, + { + "chosen_geometric_mean": -1.222618579864502, + "epoch": 0.68, + "grad_norm": 4.625, + "learning_rate": 3.7164111424384942e-06, + "log_odds": 5.709948539733887, + "log_odds_ratio": -0.17671772837638855, + "loss": 0.2972, + "rejected_geometric_mean": -6.662300109863281, + "step": 2739 + }, + { + "chosen_geometric_mean": -0.9201110601425171, + "epoch": 0.68, + "grad_norm": 4.5625, + "learning_rate": 3.7155605805733814e-06, + "log_odds": 2.897817850112915, + "log_odds_ratio": -0.3012777864933014, + "loss": 0.27, + "rejected_geometric_mean": -3.4416110515594482, + "step": 2740 + }, + { + "chosen_geometric_mean": -1.2390468120574951, + "epoch": 0.68, + "grad_norm": 20.625, + "learning_rate": 3.7147098344001075e-06, + "log_odds": 3.481173515319824, + "log_odds_ratio": -0.10575605928897858, + "loss": 0.3348, + "rejected_geometric_mean": -4.415071487426758, + "step": 2741 + }, + { + "chosen_geometric_mean": -1.0708330869674683, + "epoch": 0.68, + "grad_norm": 25.375, + "learning_rate": 3.7138589040476635e-06, + "log_odds": 2.644426107406616, + "log_odds_ratio": -0.2975415587425232, + "loss": 0.2883, + "rejected_geometric_mean": -3.4937198162078857, + "step": 2742 + }, + { + "chosen_geometric_mean": -1.0633471012115479, + "epoch": 0.68, + "grad_norm": 8.875, + "learning_rate": 3.7130077896450732e-06, + "log_odds": 2.110459804534912, + "log_odds_ratio": -0.27763378620147705, + "loss": 0.3551, + "rejected_geometric_mean": -2.940960168838501, + "step": 2743 + }, + { + "chosen_geometric_mean": -1.0249080657958984, + "epoch": 0.68, + "grad_norm": 2.171875, + "learning_rate": 3.7121564913213855e-06, + "log_odds": 4.409576416015625, + "log_odds_ratio": -0.37436753511428833, + "loss": 0.3117, + "rejected_geometric_mean": -5.249242782592773, + "step": 2744 + }, + { + "chosen_geometric_mean": -1.1947104930877686, + "epoch": 0.68, + "grad_norm": 18.75, + "learning_rate": 3.7113050092056763e-06, + "log_odds": 9.024516105651855, + "log_odds_ratio": -0.12458455562591553, + "loss": 0.3647, + "rejected_geometric_mean": -9.919418334960938, + "step": 2745 + }, + { + "chosen_geometric_mean": -0.9815347790718079, + "epoch": 0.68, + "grad_norm": 2.703125, + "learning_rate": 3.7104533434270517e-06, + "log_odds": 6.294358730316162, + "log_odds_ratio": -0.13815419375896454, + "loss": 0.3131, + "rejected_geometric_mean": -6.807776927947998, + "step": 2746 + }, + { + "chosen_geometric_mean": -1.0384469032287598, + "epoch": 0.68, + "grad_norm": 2.375, + "learning_rate": 3.7096014941146452e-06, + "log_odds": 1.3940356969833374, + "log_odds_ratio": -0.4215015769004822, + "loss": 0.3123, + "rejected_geometric_mean": -2.2578482627868652, + "step": 2747 + }, + { + "chosen_geometric_mean": -0.9418867826461792, + "epoch": 0.68, + "grad_norm": 5.625, + "learning_rate": 3.7087494613976167e-06, + "log_odds": 9.071304321289062, + "log_odds_ratio": -0.08665698766708374, + "loss": 0.3525, + "rejected_geometric_mean": -9.57868766784668, + "step": 2748 + }, + { + "chosen_geometric_mean": -1.1292377710342407, + "epoch": 0.68, + "grad_norm": 4.5, + "learning_rate": 3.7078972454051547e-06, + "log_odds": 3.4176785945892334, + "log_odds_ratio": -0.03797845542430878, + "loss": 0.2932, + "rejected_geometric_mean": -4.137612819671631, + "step": 2749 + }, + { + "chosen_geometric_mean": -0.9895792007446289, + "epoch": 0.68, + "grad_norm": 4.53125, + "learning_rate": 3.7070448462664767e-06, + "log_odds": 3.408230781555176, + "log_odds_ratio": -0.2875003218650818, + "loss": 0.2726, + "rejected_geometric_mean": -4.057976722717285, + "step": 2750 + }, + { + "chosen_geometric_mean": -0.8181814551353455, + "epoch": 0.68, + "grad_norm": 4.78125, + "learning_rate": 3.7061922641108263e-06, + "log_odds": 3.9403390884399414, + "log_odds_ratio": -0.08829879015684128, + "loss": 0.2672, + "rejected_geometric_mean": -4.246560573577881, + "step": 2751 + }, + { + "chosen_geometric_mean": -0.7651472091674805, + "epoch": 0.68, + "grad_norm": 6.375, + "learning_rate": 3.705339499067475e-06, + "log_odds": 2.613532304763794, + "log_odds_ratio": -0.18349997699260712, + "loss": 0.3328, + "rejected_geometric_mean": -2.877067804336548, + "step": 2752 + }, + { + "chosen_geometric_mean": -1.0693069696426392, + "epoch": 0.68, + "grad_norm": 2.8125, + "learning_rate": 3.7044865512657217e-06, + "log_odds": 1.576512098312378, + "log_odds_ratio": -0.3056322932243347, + "loss": 0.2968, + "rejected_geometric_mean": -2.3829469680786133, + "step": 2753 + }, + { + "chosen_geometric_mean": -1.4249311685562134, + "epoch": 0.68, + "grad_norm": 26.5, + "learning_rate": 3.7036334208348968e-06, + "log_odds": 1.0326751470565796, + "log_odds_ratio": -0.4378718137741089, + "loss": 0.3498, + "rejected_geometric_mean": -2.3388094902038574, + "step": 2754 + }, + { + "chosen_geometric_mean": -1.1139708757400513, + "epoch": 0.68, + "grad_norm": 6.75, + "learning_rate": 3.7027801079043524e-06, + "log_odds": 5.6241044998168945, + "log_odds_ratio": -0.12085450440645218, + "loss": 0.3087, + "rejected_geometric_mean": -6.350707054138184, + "step": 2755 + }, + { + "chosen_geometric_mean": -1.0777974128723145, + "epoch": 0.68, + "grad_norm": 11.3125, + "learning_rate": 3.7019266126034712e-06, + "log_odds": 7.864020824432373, + "log_odds_ratio": -0.007000841666013002, + "loss": 0.3054, + "rejected_geometric_mean": -8.52138614654541, + "step": 2756 + }, + { + "chosen_geometric_mean": -1.3358718156814575, + "epoch": 0.68, + "grad_norm": 22.25, + "learning_rate": 3.7010729350616663e-06, + "log_odds": 3.697744607925415, + "log_odds_ratio": -0.24306997656822205, + "loss": 0.3913, + "rejected_geometric_mean": -4.806643486022949, + "step": 2757 + }, + { + "chosen_geometric_mean": -1.1716997623443604, + "epoch": 0.68, + "grad_norm": 22.5, + "learning_rate": 3.7002190754083732e-06, + "log_odds": 5.631929874420166, + "log_odds_ratio": -0.10421035438776016, + "loss": 0.3524, + "rejected_geometric_mean": -6.451107025146484, + "step": 2758 + }, + { + "chosen_geometric_mean": -1.2212638854980469, + "epoch": 0.68, + "grad_norm": 10.5625, + "learning_rate": 3.699365033773058e-06, + "log_odds": 4.565374374389648, + "log_odds_ratio": -0.20041289925575256, + "loss": 0.2785, + "rejected_geometric_mean": -5.530827522277832, + "step": 2759 + }, + { + "chosen_geometric_mean": -1.2377063035964966, + "epoch": 0.68, + "grad_norm": 12.5, + "learning_rate": 3.6985108102852153e-06, + "log_odds": 0.76019686460495, + "log_odds_ratio": -0.47290635108947754, + "loss": 0.3723, + "rejected_geometric_mean": -1.8656330108642578, + "step": 2760 + }, + { + "chosen_geometric_mean": -1.2421451807022095, + "epoch": 0.68, + "grad_norm": 2.015625, + "learning_rate": 3.6976564050743635e-06, + "log_odds": 2.838867425918579, + "log_odds_ratio": -0.2355792224407196, + "loss": 0.281, + "rejected_geometric_mean": -3.8353004455566406, + "step": 2761 + }, + { + "chosen_geometric_mean": -0.9550341963768005, + "epoch": 0.68, + "grad_norm": 4.0625, + "learning_rate": 3.696801818270053e-06, + "log_odds": 2.608713388442993, + "log_odds_ratio": -0.43647634983062744, + "loss": 0.2913, + "rejected_geometric_mean": -3.4045157432556152, + "step": 2762 + }, + { + "chosen_geometric_mean": -1.0338749885559082, + "epoch": 0.68, + "grad_norm": 2.328125, + "learning_rate": 3.695947050001859e-06, + "log_odds": 5.308618068695068, + "log_odds_ratio": -0.18173089623451233, + "loss": 0.3085, + "rejected_geometric_mean": -6.009143352508545, + "step": 2763 + }, + { + "chosen_geometric_mean": -1.1722737550735474, + "epoch": 0.68, + "grad_norm": 3.921875, + "learning_rate": 3.6950921003993846e-06, + "log_odds": 1.4439947605133057, + "log_odds_ratio": -0.34572434425354004, + "loss": 0.2834, + "rejected_geometric_mean": -2.3944973945617676, + "step": 2764 + }, + { + "chosen_geometric_mean": -1.003769040107727, + "epoch": 0.68, + "grad_norm": 2.15625, + "learning_rate": 3.694236969592261e-06, + "log_odds": 1.9445399045944214, + "log_odds_ratio": -0.4696720838546753, + "loss": 0.3124, + "rejected_geometric_mean": -2.8035120964050293, + "step": 2765 + }, + { + "chosen_geometric_mean": -1.0293731689453125, + "epoch": 0.68, + "grad_norm": 27.25, + "learning_rate": 3.693381657710147e-06, + "log_odds": 5.150898456573486, + "log_odds_ratio": -0.17899207770824432, + "loss": 0.3061, + "rejected_geometric_mean": -5.872223854064941, + "step": 2766 + }, + { + "chosen_geometric_mean": -1.1496376991271973, + "epoch": 0.69, + "grad_norm": 3.75, + "learning_rate": 3.6925261648827263e-06, + "log_odds": 1.5000561475753784, + "log_odds_ratio": -0.40040749311447144, + "loss": 0.2895, + "rejected_geometric_mean": -2.4542038440704346, + "step": 2767 + }, + { + "chosen_geometric_mean": -1.350210189819336, + "epoch": 0.69, + "grad_norm": 26.75, + "learning_rate": 3.691670491239715e-06, + "log_odds": 1.9656436443328857, + "log_odds_ratio": -0.22975894808769226, + "loss": 0.3272, + "rejected_geometric_mean": -3.121953010559082, + "step": 2768 + }, + { + "chosen_geometric_mean": -1.2501425743103027, + "epoch": 0.69, + "grad_norm": 1.9453125, + "learning_rate": 3.6908146369108523e-06, + "log_odds": 4.089244365692139, + "log_odds_ratio": -0.09633873403072357, + "loss": 0.2691, + "rejected_geometric_mean": -5.022124290466309, + "step": 2769 + }, + { + "chosen_geometric_mean": -1.3712852001190186, + "epoch": 0.69, + "grad_norm": 20.875, + "learning_rate": 3.689958602025906e-06, + "log_odds": 2.5152268409729004, + "log_odds_ratio": -0.5438624620437622, + "loss": 0.3719, + "rejected_geometric_mean": -3.733198642730713, + "step": 2770 + }, + { + "chosen_geometric_mean": -1.3912514448165894, + "epoch": 0.69, + "grad_norm": 38.5, + "learning_rate": 3.6891023867146726e-06, + "log_odds": 8.52684211730957, + "log_odds_ratio": -0.0004743394674733281, + "loss": 0.3677, + "rejected_geometric_mean": -9.547520637512207, + "step": 2771 + }, + { + "chosen_geometric_mean": -1.414525032043457, + "epoch": 0.69, + "grad_norm": 6.25, + "learning_rate": 3.688245991106973e-06, + "log_odds": 6.2824177742004395, + "log_odds_ratio": -0.056343547999858856, + "loss": 0.2774, + "rejected_geometric_mean": -7.3589653968811035, + "step": 2772 + }, + { + "chosen_geometric_mean": -1.271561622619629, + "epoch": 0.69, + "grad_norm": 14.5, + "learning_rate": 3.6873894153326594e-06, + "log_odds": 1.2518833875656128, + "log_odds_ratio": -0.3790895342826843, + "loss": 0.2602, + "rejected_geometric_mean": -2.3658180236816406, + "step": 2773 + }, + { + "chosen_geometric_mean": -0.97416752576828, + "epoch": 0.69, + "grad_norm": 6.90625, + "learning_rate": 3.686532659521609e-06, + "log_odds": 1.297448992729187, + "log_odds_ratio": -0.3424496650695801, + "loss": 0.2846, + "rejected_geometric_mean": -1.9919607639312744, + "step": 2774 + }, + { + "chosen_geometric_mean": -1.1171698570251465, + "epoch": 0.69, + "grad_norm": 27.25, + "learning_rate": 3.685675723803725e-06, + "log_odds": 1.4699451923370361, + "log_odds_ratio": -0.3780606985092163, + "loss": 0.2874, + "rejected_geometric_mean": -2.3656325340270996, + "step": 2775 + }, + { + "chosen_geometric_mean": -0.9495939612388611, + "epoch": 0.69, + "grad_norm": 2.40625, + "learning_rate": 3.684818608308941e-06, + "log_odds": 6.3501739501953125, + "log_odds_ratio": -0.2700042426586151, + "loss": 0.2731, + "rejected_geometric_mean": -6.9886064529418945, + "step": 2776 + }, + { + "chosen_geometric_mean": -1.1104732751846313, + "epoch": 0.69, + "grad_norm": 10.5, + "learning_rate": 3.6839613131672157e-06, + "log_odds": 0.7420892119407654, + "log_odds_ratio": -0.40621697902679443, + "loss": 0.3089, + "rejected_geometric_mean": -1.6330190896987915, + "step": 2777 + }, + { + "chosen_geometric_mean": -0.9958258271217346, + "epoch": 0.69, + "grad_norm": 4.15625, + "learning_rate": 3.683103838508535e-06, + "log_odds": 3.611088991165161, + "log_odds_ratio": -0.17753398418426514, + "loss": 0.302, + "rejected_geometric_mean": -4.2125773429870605, + "step": 2778 + }, + { + "chosen_geometric_mean": -0.9111175537109375, + "epoch": 0.69, + "grad_norm": 10.5625, + "learning_rate": 3.6822461844629144e-06, + "log_odds": 4.589810371398926, + "log_odds_ratio": -0.11186318844556808, + "loss": 0.2827, + "rejected_geometric_mean": -5.028744697570801, + "step": 2779 + }, + { + "chosen_geometric_mean": -1.0650924444198608, + "epoch": 0.69, + "grad_norm": 5.59375, + "learning_rate": 3.6813883511603927e-06, + "log_odds": 1.8661668300628662, + "log_odds_ratio": -0.2890201807022095, + "loss": 0.2944, + "rejected_geometric_mean": -2.7243247032165527, + "step": 2780 + }, + { + "chosen_geometric_mean": -1.0290603637695312, + "epoch": 0.69, + "grad_norm": 2.578125, + "learning_rate": 3.6805303387310393e-06, + "log_odds": 3.0474400520324707, + "log_odds_ratio": -0.20913144946098328, + "loss": 0.2701, + "rejected_geometric_mean": -3.774219512939453, + "step": 2781 + }, + { + "chosen_geometric_mean": -1.061230182647705, + "epoch": 0.69, + "grad_norm": 2.3125, + "learning_rate": 3.679672147304949e-06, + "log_odds": 1.5680263042449951, + "log_odds_ratio": -0.3675687611103058, + "loss": 0.3269, + "rejected_geometric_mean": -2.398139238357544, + "step": 2782 + }, + { + "chosen_geometric_mean": -1.0085657835006714, + "epoch": 0.69, + "grad_norm": 5.625, + "learning_rate": 3.6788137770122444e-06, + "log_odds": 1.2986475229263306, + "log_odds_ratio": -0.3445039391517639, + "loss": 0.3181, + "rejected_geometric_mean": -2.026322364807129, + "step": 2783 + }, + { + "chosen_geometric_mean": -1.110658049583435, + "epoch": 0.69, + "grad_norm": 3.640625, + "learning_rate": 3.6779552279830754e-06, + "log_odds": 1.8710168600082397, + "log_odds_ratio": -0.3185140788555145, + "loss": 0.3278, + "rejected_geometric_mean": -2.7617287635803223, + "step": 2784 + }, + { + "chosen_geometric_mean": -1.0828170776367188, + "epoch": 0.69, + "grad_norm": 10.1875, + "learning_rate": 3.677096500347619e-06, + "log_odds": 1.0300698280334473, + "log_odds_ratio": -0.37445268034935, + "loss": 0.3169, + "rejected_geometric_mean": -1.9162036180496216, + "step": 2785 + }, + { + "chosen_geometric_mean": -1.1357691287994385, + "epoch": 0.69, + "grad_norm": 4.09375, + "learning_rate": 3.676237594236076e-06, + "log_odds": 0.7359804511070251, + "log_odds_ratio": -0.5020890831947327, + "loss": 0.3161, + "rejected_geometric_mean": -1.7489874362945557, + "step": 2786 + }, + { + "chosen_geometric_mean": -0.9765836000442505, + "epoch": 0.69, + "grad_norm": 3.625, + "learning_rate": 3.675378509778682e-06, + "log_odds": 1.063992977142334, + "log_odds_ratio": -0.4318860173225403, + "loss": 0.3206, + "rejected_geometric_mean": -1.8344895839691162, + "step": 2787 + }, + { + "chosen_geometric_mean": -0.8823739290237427, + "epoch": 0.69, + "grad_norm": 23.375, + "learning_rate": 3.6745192471056913e-06, + "log_odds": 6.107271194458008, + "log_odds_ratio": -0.22736027836799622, + "loss": 0.2782, + "rejected_geometric_mean": -6.645395755767822, + "step": 2788 + }, + { + "chosen_geometric_mean": -0.877379834651947, + "epoch": 0.69, + "grad_norm": 2.40625, + "learning_rate": 3.6736598063473883e-06, + "log_odds": 2.8001391887664795, + "log_odds_ratio": -0.35922500491142273, + "loss": 0.2779, + "rejected_geometric_mean": -3.4001805782318115, + "step": 2789 + }, + { + "chosen_geometric_mean": -1.178097128868103, + "epoch": 0.69, + "grad_norm": 4.75, + "learning_rate": 3.6728001876340878e-06, + "log_odds": 3.6536641120910645, + "log_odds_ratio": -0.11584903299808502, + "loss": 0.3331, + "rejected_geometric_mean": -4.52455997467041, + "step": 2790 + }, + { + "chosen_geometric_mean": -1.1522077322006226, + "epoch": 0.69, + "grad_norm": 8.875, + "learning_rate": 3.6719403910961264e-06, + "log_odds": 0.54595547914505, + "log_odds_ratio": -0.5079348683357239, + "loss": 0.3069, + "rejected_geometric_mean": -1.6007425785064697, + "step": 2791 + }, + { + "chosen_geometric_mean": -1.0798096656799316, + "epoch": 0.69, + "grad_norm": 4.78125, + "learning_rate": 3.6710804168638707e-06, + "log_odds": 6.190179347991943, + "log_odds_ratio": -0.29224368929862976, + "loss": 0.2658, + "rejected_geometric_mean": -6.968833923339844, + "step": 2792 + }, + { + "chosen_geometric_mean": -1.1070811748504639, + "epoch": 0.69, + "grad_norm": 14.375, + "learning_rate": 3.6702202650677137e-06, + "log_odds": 4.184746742248535, + "log_odds_ratio": -0.09223850071430206, + "loss": 0.347, + "rejected_geometric_mean": -4.930652618408203, + "step": 2793 + }, + { + "chosen_geometric_mean": -1.0630279779434204, + "epoch": 0.69, + "grad_norm": 14.3125, + "learning_rate": 3.669359935838074e-06, + "log_odds": 3.326756715774536, + "log_odds_ratio": -0.26116716861724854, + "loss": 0.2932, + "rejected_geometric_mean": -4.129861354827881, + "step": 2794 + }, + { + "chosen_geometric_mean": -1.0637977123260498, + "epoch": 0.69, + "grad_norm": 2.359375, + "learning_rate": 3.6684994293053986e-06, + "log_odds": 11.662076950073242, + "log_odds_ratio": -0.0072985803708434105, + "loss": 0.3045, + "rejected_geometric_mean": -12.305055618286133, + "step": 2795 + }, + { + "chosen_geometric_mean": -1.1818214654922485, + "epoch": 0.69, + "grad_norm": 13.875, + "learning_rate": 3.6676387456001623e-06, + "log_odds": 4.317602157592773, + "log_odds_ratio": -0.14034947752952576, + "loss": 0.296, + "rejected_geometric_mean": -5.1590256690979, + "step": 2796 + }, + { + "chosen_geometric_mean": -1.0795493125915527, + "epoch": 0.69, + "grad_norm": 9.6875, + "learning_rate": 3.6667778848528635e-06, + "log_odds": 9.939373016357422, + "log_odds_ratio": -0.18009887635707855, + "loss": 0.2979, + "rejected_geometric_mean": -10.689737319946289, + "step": 2797 + }, + { + "chosen_geometric_mean": -1.0835217237472534, + "epoch": 0.69, + "grad_norm": 4.21875, + "learning_rate": 3.6659168471940304e-06, + "log_odds": 1.1434886455535889, + "log_odds_ratio": -0.38583263754844666, + "loss": 0.2987, + "rejected_geometric_mean": -2.035043478012085, + "step": 2798 + }, + { + "chosen_geometric_mean": -1.1325777769088745, + "epoch": 0.69, + "grad_norm": 4.6875, + "learning_rate": 3.665055632754216e-06, + "log_odds": 0.8635158538818359, + "log_odds_ratio": -0.42051422595977783, + "loss": 0.3109, + "rejected_geometric_mean": -1.820164680480957, + "step": 2799 + }, + { + "chosen_geometric_mean": -1.2467355728149414, + "epoch": 0.69, + "grad_norm": 3.921875, + "learning_rate": 3.6641942416640024e-06, + "log_odds": 6.245663166046143, + "log_odds_ratio": -0.07565920054912567, + "loss": 0.3792, + "rejected_geometric_mean": -7.1636528968811035, + "step": 2800 + }, + { + "chosen_geometric_mean": -1.1913788318634033, + "epoch": 0.69, + "grad_norm": 7.90625, + "learning_rate": 3.6633326740539965e-06, + "log_odds": 2.442727565765381, + "log_odds_ratio": -0.39491528272628784, + "loss": 0.2719, + "rejected_geometric_mean": -3.482245922088623, + "step": 2801 + }, + { + "chosen_geometric_mean": -1.0933761596679688, + "epoch": 0.69, + "grad_norm": 3.4375, + "learning_rate": 3.662470930054832e-06, + "log_odds": 3.9592220783233643, + "log_odds_ratio": -0.38762760162353516, + "loss": 0.3141, + "rejected_geometric_mean": -4.840481281280518, + "step": 2802 + }, + { + "chosen_geometric_mean": -1.7950890064239502, + "epoch": 0.69, + "grad_norm": 29.375, + "learning_rate": 3.6616090097971725e-06, + "log_odds": 5.292041778564453, + "log_odds_ratio": -0.019698528572916985, + "loss": 0.3847, + "rejected_geometric_mean": -6.6783037185668945, + "step": 2803 + }, + { + "chosen_geometric_mean": -1.0368576049804688, + "epoch": 0.69, + "grad_norm": 7.8125, + "learning_rate": 3.6607469134117022e-06, + "log_odds": 7.932605743408203, + "log_odds_ratio": -0.24531537294387817, + "loss": 0.3374, + "rejected_geometric_mean": -8.696898460388184, + "step": 2804 + }, + { + "chosen_geometric_mean": -1.3966765403747559, + "epoch": 0.69, + "grad_norm": 14.25, + "learning_rate": 3.659884641029138e-06, + "log_odds": 0.4162207841873169, + "log_odds_ratio": -0.5224296450614929, + "loss": 0.319, + "rejected_geometric_mean": -1.7366600036621094, + "step": 2805 + }, + { + "chosen_geometric_mean": -1.6384756565093994, + "epoch": 0.69, + "grad_norm": 13.125, + "learning_rate": 3.6590221927802203e-06, + "log_odds": 7.074734210968018, + "log_odds_ratio": -0.11869632452726364, + "loss": 0.3076, + "rejected_geometric_mean": -8.508644104003906, + "step": 2806 + }, + { + "chosen_geometric_mean": -1.018982172012329, + "epoch": 0.69, + "grad_norm": 11.5, + "learning_rate": 3.6581595687957174e-06, + "log_odds": 0.3331022560596466, + "log_odds_ratio": -0.542526364326477, + "loss": 0.3137, + "rejected_geometric_mean": -1.2433936595916748, + "step": 2807 + }, + { + "chosen_geometric_mean": -1.1738337278366089, + "epoch": 0.7, + "grad_norm": 13.5625, + "learning_rate": 3.6572967692064225e-06, + "log_odds": 4.443137168884277, + "log_odds_ratio": -0.17964062094688416, + "loss": 0.271, + "rejected_geometric_mean": -5.340447902679443, + "step": 2808 + }, + { + "chosen_geometric_mean": -1.0480562448501587, + "epoch": 0.7, + "grad_norm": 18.125, + "learning_rate": 3.656433794143158e-06, + "log_odds": 3.667503595352173, + "log_odds_ratio": -0.03087477758526802, + "loss": 0.2848, + "rejected_geometric_mean": -4.281957149505615, + "step": 2809 + }, + { + "chosen_geometric_mean": -0.8091942071914673, + "epoch": 0.7, + "grad_norm": 9.625, + "learning_rate": 3.6555706437367727e-06, + "log_odds": 1.8331607580184937, + "log_odds_ratio": -0.2681785225868225, + "loss": 0.3876, + "rejected_geometric_mean": -2.1852574348449707, + "step": 2810 + }, + { + "chosen_geometric_mean": -1.2046339511871338, + "epoch": 0.7, + "grad_norm": 1.953125, + "learning_rate": 3.6547073181181382e-06, + "log_odds": 2.5633416175842285, + "log_odds_ratio": -0.2536408305168152, + "loss": 0.258, + "rejected_geometric_mean": -3.523157835006714, + "step": 2811 + }, + { + "chosen_geometric_mean": -1.3238377571105957, + "epoch": 0.7, + "grad_norm": 27.625, + "learning_rate": 3.6538438174181574e-06, + "log_odds": 1.8178709745407104, + "log_odds_ratio": -0.4904484748840332, + "loss": 0.3151, + "rejected_geometric_mean": -3.007904052734375, + "step": 2812 + }, + { + "chosen_geometric_mean": -0.9175015091896057, + "epoch": 0.7, + "grad_norm": 31.875, + "learning_rate": 3.652980141767757e-06, + "log_odds": 0.6733536124229431, + "log_odds_ratio": -0.47144603729248047, + "loss": 0.3629, + "rejected_geometric_mean": -1.394018530845642, + "step": 2813 + }, + { + "chosen_geometric_mean": -1.2253035306930542, + "epoch": 0.7, + "grad_norm": 3.921875, + "learning_rate": 3.652116291297891e-06, + "log_odds": 4.17160701751709, + "log_odds_ratio": -0.3392072021961212, + "loss": 0.3047, + "rejected_geometric_mean": -5.172677993774414, + "step": 2814 + }, + { + "chosen_geometric_mean": -1.3304624557495117, + "epoch": 0.7, + "grad_norm": 3.15625, + "learning_rate": 3.65125226613954e-06, + "log_odds": 1.0050976276397705, + "log_odds_ratio": -0.46832185983657837, + "loss": 0.3569, + "rejected_geometric_mean": -2.1779673099517822, + "step": 2815 + }, + { + "chosen_geometric_mean": -1.0426019430160522, + "epoch": 0.7, + "grad_norm": 9.625, + "learning_rate": 3.650388066423711e-06, + "log_odds": 1.5484000444412231, + "log_odds_ratio": -0.35711902379989624, + "loss": 0.336, + "rejected_geometric_mean": -2.3293800354003906, + "step": 2816 + }, + { + "chosen_geometric_mean": -1.2241847515106201, + "epoch": 0.7, + "grad_norm": 2.390625, + "learning_rate": 3.6495236922814368e-06, + "log_odds": 1.184332251548767, + "log_odds_ratio": -0.3408709466457367, + "loss": 0.2917, + "rejected_geometric_mean": -2.2136967182159424, + "step": 2817 + }, + { + "chosen_geometric_mean": -1.072042465209961, + "epoch": 0.7, + "grad_norm": 7.59375, + "learning_rate": 3.6486591438437787e-06, + "log_odds": 1.9802992343902588, + "log_odds_ratio": -0.2708457112312317, + "loss": 0.3281, + "rejected_geometric_mean": -2.771652936935425, + "step": 2818 + }, + { + "chosen_geometric_mean": -0.9629544615745544, + "epoch": 0.7, + "grad_norm": 2.140625, + "learning_rate": 3.6477944212418216e-06, + "log_odds": 2.793285846710205, + "log_odds_ratio": -0.13058684766292572, + "loss": 0.2456, + "rejected_geometric_mean": -3.376739740371704, + "step": 2819 + }, + { + "chosen_geometric_mean": -0.9636276960372925, + "epoch": 0.7, + "grad_norm": 4.75, + "learning_rate": 3.646929524606678e-06, + "log_odds": 4.886661052703857, + "log_odds_ratio": -0.28138864040374756, + "loss": 0.2889, + "rejected_geometric_mean": -5.557032585144043, + "step": 2820 + }, + { + "chosen_geometric_mean": -0.9948627948760986, + "epoch": 0.7, + "grad_norm": 2.921875, + "learning_rate": 3.6460644540694878e-06, + "log_odds": 4.984259605407715, + "log_odds_ratio": -0.06001940742135048, + "loss": 0.2662, + "rejected_geometric_mean": -5.544945240020752, + "step": 2821 + }, + { + "chosen_geometric_mean": -1.1033872365951538, + "epoch": 0.7, + "grad_norm": 4.3125, + "learning_rate": 3.645199209761417e-06, + "log_odds": 3.1143417358398438, + "log_odds_ratio": -0.33411094546318054, + "loss": 0.2887, + "rejected_geometric_mean": -4.0048508644104, + "step": 2822 + }, + { + "chosen_geometric_mean": -1.1113648414611816, + "epoch": 0.7, + "grad_norm": 7.28125, + "learning_rate": 3.6443337918136567e-06, + "log_odds": 3.2622792720794678, + "log_odds_ratio": -0.4071517884731293, + "loss": 0.2934, + "rejected_geometric_mean": -4.210181713104248, + "step": 2823 + }, + { + "chosen_geometric_mean": -0.9555479288101196, + "epoch": 0.7, + "grad_norm": 5.8125, + "learning_rate": 3.6434682003574252e-06, + "log_odds": 4.160223007202148, + "log_odds_ratio": -0.4531787931919098, + "loss": 0.2667, + "rejected_geometric_mean": -4.865631103515625, + "step": 2824 + }, + { + "chosen_geometric_mean": -1.2626402378082275, + "epoch": 0.7, + "grad_norm": 8.25, + "learning_rate": 3.6426024355239662e-06, + "log_odds": 8.788166999816895, + "log_odds_ratio": -0.01741533726453781, + "loss": 0.2939, + "rejected_geometric_mean": -9.7139310836792, + "step": 2825 + }, + { + "chosen_geometric_mean": -1.0154446363449097, + "epoch": 0.7, + "grad_norm": 4.65625, + "learning_rate": 3.6417364974445516e-06, + "log_odds": 2.41886568069458, + "log_odds_ratio": -0.20000973343849182, + "loss": 0.3149, + "rejected_geometric_mean": -3.091154098510742, + "step": 2826 + }, + { + "chosen_geometric_mean": -1.023493766784668, + "epoch": 0.7, + "grad_norm": 5.46875, + "learning_rate": 3.6408703862504775e-06, + "log_odds": 3.2157325744628906, + "log_odds_ratio": -0.22196510434150696, + "loss": 0.2855, + "rejected_geometric_mean": -3.932974338531494, + "step": 2827 + }, + { + "chosen_geometric_mean": -1.1866214275360107, + "epoch": 0.7, + "grad_norm": 3.3125, + "learning_rate": 3.6400041020730673e-06, + "log_odds": 4.762442111968994, + "log_odds_ratio": -0.11781342327594757, + "loss": 0.2842, + "rejected_geometric_mean": -5.6504106521606445, + "step": 2828 + }, + { + "chosen_geometric_mean": -1.1774230003356934, + "epoch": 0.7, + "grad_norm": 10.375, + "learning_rate": 3.639137645043671e-06, + "log_odds": 6.176759719848633, + "log_odds_ratio": -0.039175231009721756, + "loss": 0.3492, + "rejected_geometric_mean": -7.000636100769043, + "step": 2829 + }, + { + "chosen_geometric_mean": -1.0763508081436157, + "epoch": 0.7, + "grad_norm": 6.875, + "learning_rate": 3.638271015293664e-06, + "log_odds": 7.431227684020996, + "log_odds_ratio": -0.3142777681350708, + "loss": 0.2599, + "rejected_geometric_mean": -8.207426071166992, + "step": 2830 + }, + { + "chosen_geometric_mean": -1.0731321573257446, + "epoch": 0.7, + "grad_norm": 12.875, + "learning_rate": 3.637404212954448e-06, + "log_odds": 6.129850387573242, + "log_odds_ratio": -0.14326222240924835, + "loss": 0.3453, + "rejected_geometric_mean": -6.861411094665527, + "step": 2831 + }, + { + "chosen_geometric_mean": -1.3315844535827637, + "epoch": 0.7, + "grad_norm": 3.375, + "learning_rate": 3.636537238157451e-06, + "log_odds": 2.686081647872925, + "log_odds_ratio": -0.24063318967819214, + "loss": 0.3212, + "rejected_geometric_mean": -3.7858405113220215, + "step": 2832 + }, + { + "chosen_geometric_mean": -1.0928833484649658, + "epoch": 0.7, + "grad_norm": 4.84375, + "learning_rate": 3.6356700910341276e-06, + "log_odds": 6.244639873504639, + "log_odds_ratio": -0.2651546001434326, + "loss": 0.2673, + "rejected_geometric_mean": -7.038464546203613, + "step": 2833 + }, + { + "chosen_geometric_mean": -1.0343507528305054, + "epoch": 0.7, + "grad_norm": 10.3125, + "learning_rate": 3.634802771715958e-06, + "log_odds": 5.618892669677734, + "log_odds_ratio": -0.13159295916557312, + "loss": 0.3345, + "rejected_geometric_mean": -6.224715709686279, + "step": 2834 + }, + { + "chosen_geometric_mean": -0.9965384006500244, + "epoch": 0.7, + "grad_norm": 26.625, + "learning_rate": 3.6339352803344473e-06, + "log_odds": 5.232593536376953, + "log_odds_ratio": -0.1649114191532135, + "loss": 0.2854, + "rejected_geometric_mean": -5.8297271728515625, + "step": 2835 + }, + { + "chosen_geometric_mean": -1.0090992450714111, + "epoch": 0.7, + "grad_norm": 2.71875, + "learning_rate": 3.633067617021131e-06, + "log_odds": 9.240349769592285, + "log_odds_ratio": -0.1681160032749176, + "loss": 0.3308, + "rejected_geometric_mean": -9.876997947692871, + "step": 2836 + }, + { + "chosen_geometric_mean": -1.1598843336105347, + "epoch": 0.7, + "grad_norm": 4.9375, + "learning_rate": 3.632199781907565e-06, + "log_odds": 1.6736328601837158, + "log_odds_ratio": -0.2053617537021637, + "loss": 0.2942, + "rejected_geometric_mean": -2.549840211868286, + "step": 2837 + }, + { + "chosen_geometric_mean": -1.5050318241119385, + "epoch": 0.7, + "grad_norm": 31.25, + "learning_rate": 3.6313317751253342e-06, + "log_odds": 0.7751402258872986, + "log_odds_ratio": -0.4533786177635193, + "loss": 0.3514, + "rejected_geometric_mean": -2.1702628135681152, + "step": 2838 + }, + { + "chosen_geometric_mean": -0.8967759609222412, + "epoch": 0.7, + "grad_norm": 6.34375, + "learning_rate": 3.6304635968060513e-06, + "log_odds": 7.705559730529785, + "log_odds_ratio": -0.0032393396832048893, + "loss": 0.2865, + "rejected_geometric_mean": -8.077981948852539, + "step": 2839 + }, + { + "chosen_geometric_mean": -0.8932985067367554, + "epoch": 0.7, + "grad_norm": 8.75, + "learning_rate": 3.6295952470813513e-06, + "log_odds": 7.999050617218018, + "log_odds_ratio": -0.15875405073165894, + "loss": 0.2865, + "rejected_geometric_mean": -8.498997688293457, + "step": 2840 + }, + { + "chosen_geometric_mean": -0.983068585395813, + "epoch": 0.7, + "grad_norm": 5.6875, + "learning_rate": 3.6287267260828956e-06, + "log_odds": 4.363382339477539, + "log_odds_ratio": -0.15554770827293396, + "loss": 0.252, + "rejected_geometric_mean": -4.971439361572266, + "step": 2841 + }, + { + "chosen_geometric_mean": -1.074861764907837, + "epoch": 0.7, + "grad_norm": 3.625, + "learning_rate": 3.627858033942376e-06, + "log_odds": 4.448439121246338, + "log_odds_ratio": -0.1433352530002594, + "loss": 0.3255, + "rejected_geometric_mean": -5.172051906585693, + "step": 2842 + }, + { + "chosen_geometric_mean": -0.8451831340789795, + "epoch": 0.7, + "grad_norm": 4.78125, + "learning_rate": 3.6269891707915045e-06, + "log_odds": 7.4332475662231445, + "log_odds_ratio": -0.1932828575372696, + "loss": 0.3071, + "rejected_geometric_mean": -7.843883991241455, + "step": 2843 + }, + { + "chosen_geometric_mean": -1.3329148292541504, + "epoch": 0.7, + "grad_norm": 6.625, + "learning_rate": 3.626120136762023e-06, + "log_odds": 0.8355885744094849, + "log_odds_ratio": -0.40755724906921387, + "loss": 0.2917, + "rejected_geometric_mean": -2.033684492111206, + "step": 2844 + }, + { + "chosen_geometric_mean": -1.203716516494751, + "epoch": 0.7, + "grad_norm": 7.625, + "learning_rate": 3.6252509319856976e-06, + "log_odds": 6.021388053894043, + "log_odds_ratio": -0.25284820795059204, + "loss": 0.3504, + "rejected_geometric_mean": -6.96200704574585, + "step": 2845 + }, + { + "chosen_geometric_mean": -2.2847537994384766, + "epoch": 0.7, + "grad_norm": 61.0, + "learning_rate": 3.6243815565943202e-06, + "log_odds": -0.7167325019836426, + "log_odds_ratio": -1.9038848876953125, + "loss": 0.4479, + "rejected_geometric_mean": -1.733677625656128, + "step": 2846 + }, + { + "chosen_geometric_mean": -1.0444700717926025, + "epoch": 0.7, + "grad_norm": 14.0625, + "learning_rate": 3.6235120107197098e-06, + "log_odds": 6.482638835906982, + "log_odds_ratio": -0.07388225942850113, + "loss": 0.3317, + "rejected_geometric_mean": -7.129493713378906, + "step": 2847 + }, + { + "chosen_geometric_mean": -1.2028945684432983, + "epoch": 0.71, + "grad_norm": 21.75, + "learning_rate": 3.622642294493709e-06, + "log_odds": 10.523568153381348, + "log_odds_ratio": -0.021871428936719894, + "loss": 0.3115, + "rejected_geometric_mean": -11.376986503601074, + "step": 2848 + }, + { + "chosen_geometric_mean": -0.9383476972579956, + "epoch": 0.71, + "grad_norm": 9.3125, + "learning_rate": 3.621772408048189e-06, + "log_odds": 9.361989974975586, + "log_odds_ratio": -0.15519429743289948, + "loss": 0.2857, + "rejected_geometric_mean": -9.891166687011719, + "step": 2849 + }, + { + "chosen_geometric_mean": -1.2938652038574219, + "epoch": 0.71, + "grad_norm": 32.5, + "learning_rate": 3.6209023515150452e-06, + "log_odds": 6.742959022521973, + "log_odds_ratio": -0.003874434158205986, + "loss": 0.3132, + "rejected_geometric_mean": -7.639260768890381, + "step": 2850 + }, + { + "chosen_geometric_mean": -1.031671166419983, + "epoch": 0.71, + "grad_norm": 17.0, + "learning_rate": 3.6200321250261988e-06, + "log_odds": 2.928571939468384, + "log_odds_ratio": -0.32259541749954224, + "loss": 0.3301, + "rejected_geometric_mean": -3.703491449356079, + "step": 2851 + }, + { + "chosen_geometric_mean": -1.0917598009109497, + "epoch": 0.71, + "grad_norm": 5.3125, + "learning_rate": 3.619161728713597e-06, + "log_odds": 3.916445255279541, + "log_odds_ratio": -0.16969645023345947, + "loss": 0.3133, + "rejected_geometric_mean": -4.683265686035156, + "step": 2852 + }, + { + "chosen_geometric_mean": -1.1196911334991455, + "epoch": 0.71, + "grad_norm": 9.0625, + "learning_rate": 3.6182911627092127e-06, + "log_odds": 2.8542776107788086, + "log_odds_ratio": -0.33309027552604675, + "loss": 0.2862, + "rejected_geometric_mean": -3.720390796661377, + "step": 2853 + }, + { + "chosen_geometric_mean": -1.2806541919708252, + "epoch": 0.71, + "grad_norm": 2.3125, + "learning_rate": 3.617420427145044e-06, + "log_odds": 1.3125829696655273, + "log_odds_ratio": -0.30039530992507935, + "loss": 0.288, + "rejected_geometric_mean": -2.3679492473602295, + "step": 2854 + }, + { + "chosen_geometric_mean": -1.1189771890640259, + "epoch": 0.71, + "grad_norm": 2.390625, + "learning_rate": 3.616549522153117e-06, + "log_odds": 3.094123363494873, + "log_odds_ratio": -0.45225197076797485, + "loss": 0.2457, + "rejected_geometric_mean": -4.019622802734375, + "step": 2855 + }, + { + "chosen_geometric_mean": -1.1271673440933228, + "epoch": 0.71, + "grad_norm": 4.0, + "learning_rate": 3.6156784478654816e-06, + "log_odds": 3.9634509086608887, + "log_odds_ratio": -0.21237899363040924, + "loss": 0.2871, + "rejected_geometric_mean": -4.838814735412598, + "step": 2856 + }, + { + "chosen_geometric_mean": -0.82262122631073, + "epoch": 0.71, + "grad_norm": 3.453125, + "learning_rate": 3.614807204414212e-06, + "log_odds": 8.311541557312012, + "log_odds_ratio": -0.07192796468734741, + "loss": 0.2859, + "rejected_geometric_mean": -8.602322578430176, + "step": 2857 + }, + { + "chosen_geometric_mean": -1.2270574569702148, + "epoch": 0.71, + "grad_norm": 1.984375, + "learning_rate": 3.61393579193141e-06, + "log_odds": 3.606438398361206, + "log_odds_ratio": -0.38022294640541077, + "loss": 0.2741, + "rejected_geometric_mean": -4.6897969245910645, + "step": 2858 + }, + { + "chosen_geometric_mean": -0.9272036552429199, + "epoch": 0.71, + "grad_norm": 15.875, + "learning_rate": 3.613064210549204e-06, + "log_odds": 5.272307872772217, + "log_odds_ratio": -0.2586124539375305, + "loss": 0.4395, + "rejected_geometric_mean": -5.872880935668945, + "step": 2859 + }, + { + "chosen_geometric_mean": -1.1192623376846313, + "epoch": 0.71, + "grad_norm": 2.515625, + "learning_rate": 3.6121924603997454e-06, + "log_odds": 2.36759352684021, + "log_odds_ratio": -0.44803664088249207, + "loss": 0.3149, + "rejected_geometric_mean": -3.3428218364715576, + "step": 2860 + }, + { + "chosen_geometric_mean": -1.0160514116287231, + "epoch": 0.71, + "grad_norm": 11.0, + "learning_rate": 3.611320541615213e-06, + "log_odds": 10.391234397888184, + "log_odds_ratio": -0.0026171058416366577, + "loss": 0.2899, + "rejected_geometric_mean": -10.946667671203613, + "step": 2861 + }, + { + "chosen_geometric_mean": -1.2213196754455566, + "epoch": 0.71, + "grad_norm": 5.84375, + "learning_rate": 3.61044845432781e-06, + "log_odds": 1.7823609113693237, + "log_odds_ratio": -0.5275455713272095, + "loss": 0.3376, + "rejected_geometric_mean": -2.919694423675537, + "step": 2862 + }, + { + "chosen_geometric_mean": -1.3653643131256104, + "epoch": 0.71, + "grad_norm": 25.5, + "learning_rate": 3.609576198669767e-06, + "log_odds": 3.5923714637756348, + "log_odds_ratio": -0.14883548021316528, + "loss": 0.3129, + "rejected_geometric_mean": -4.703958034515381, + "step": 2863 + }, + { + "chosen_geometric_mean": -0.995710551738739, + "epoch": 0.71, + "grad_norm": 3.0, + "learning_rate": 3.608703774773338e-06, + "log_odds": 3.2421445846557617, + "log_odds_ratio": -0.0920185074210167, + "loss": 0.2727, + "rejected_geometric_mean": -3.816632032394409, + "step": 2864 + }, + { + "chosen_geometric_mean": -1.0559688806533813, + "epoch": 0.71, + "grad_norm": 7.46875, + "learning_rate": 3.6078311827708023e-06, + "log_odds": 5.935116291046143, + "log_odds_ratio": -0.24756161868572235, + "loss": 0.3297, + "rejected_geometric_mean": -6.614165782928467, + "step": 2865 + }, + { + "chosen_geometric_mean": -1.242896318435669, + "epoch": 0.71, + "grad_norm": 7.03125, + "learning_rate": 3.6069584227944683e-06, + "log_odds": 3.2242040634155273, + "log_odds_ratio": -0.15970982611179352, + "loss": 0.3004, + "rejected_geometric_mean": -4.181958198547363, + "step": 2866 + }, + { + "chosen_geometric_mean": -1.017927885055542, + "epoch": 0.71, + "grad_norm": 12.625, + "learning_rate": 3.6060854949766656e-06, + "log_odds": 1.523719072341919, + "log_odds_ratio": -0.3847725987434387, + "loss": 0.2926, + "rejected_geometric_mean": -2.31961727142334, + "step": 2867 + }, + { + "chosen_geometric_mean": -1.0034112930297852, + "epoch": 0.71, + "grad_norm": 11.3125, + "learning_rate": 3.605212399449751e-06, + "log_odds": 3.6720449924468994, + "log_odds_ratio": -0.2283334881067276, + "loss": 0.3088, + "rejected_geometric_mean": -4.28029203414917, + "step": 2868 + }, + { + "chosen_geometric_mean": -1.346820592880249, + "epoch": 0.71, + "grad_norm": 61.5, + "learning_rate": 3.6043391363461083e-06, + "log_odds": 8.382649421691895, + "log_odds_ratio": -0.009049078449606895, + "loss": 0.4001, + "rejected_geometric_mean": -9.412971496582031, + "step": 2869 + }, + { + "chosen_geometric_mean": -1.1298664808273315, + "epoch": 0.71, + "grad_norm": 2.453125, + "learning_rate": 3.603465705798144e-06, + "log_odds": 3.4281344413757324, + "log_odds_ratio": -0.3055550456047058, + "loss": 0.3152, + "rejected_geometric_mean": -4.362220764160156, + "step": 2870 + }, + { + "chosen_geometric_mean": -1.3707338571548462, + "epoch": 0.71, + "grad_norm": 18.625, + "learning_rate": 3.6025921079382896e-06, + "log_odds": 5.333308219909668, + "log_odds_ratio": -0.2573586106300354, + "loss": 0.3243, + "rejected_geometric_mean": -6.469573974609375, + "step": 2871 + }, + { + "chosen_geometric_mean": -1.0249720811843872, + "epoch": 0.71, + "grad_norm": 4.8125, + "learning_rate": 3.6017183428990065e-06, + "log_odds": 0.3134930431842804, + "log_odds_ratio": -0.5991817116737366, + "loss": 0.3027, + "rejected_geometric_mean": -1.233628749847412, + "step": 2872 + }, + { + "chosen_geometric_mean": -0.9738062620162964, + "epoch": 0.71, + "grad_norm": 15.9375, + "learning_rate": 3.6008444108127767e-06, + "log_odds": 4.27097749710083, + "log_odds_ratio": -0.035241879522800446, + "loss": 0.3199, + "rejected_geometric_mean": -4.743121147155762, + "step": 2873 + }, + { + "chosen_geometric_mean": -1.1483087539672852, + "epoch": 0.71, + "grad_norm": 19.125, + "learning_rate": 3.599970311812109e-06, + "log_odds": 7.76953649520874, + "log_odds_ratio": -0.005652458406984806, + "loss": 0.2435, + "rejected_geometric_mean": -8.484789848327637, + "step": 2874 + }, + { + "chosen_geometric_mean": -1.0839064121246338, + "epoch": 0.71, + "grad_norm": 3.390625, + "learning_rate": 3.5990960460295397e-06, + "log_odds": 3.6271579265594482, + "log_odds_ratio": -0.2674601972103119, + "loss": 0.266, + "rejected_geometric_mean": -4.4664154052734375, + "step": 2875 + }, + { + "chosen_geometric_mean": -1.0747381448745728, + "epoch": 0.71, + "grad_norm": 4.34375, + "learning_rate": 3.5982216135976267e-06, + "log_odds": 10.735980033874512, + "log_odds_ratio": -0.10335230082273483, + "loss": 0.3176, + "rejected_geometric_mean": -11.398032188415527, + "step": 2876 + }, + { + "chosen_geometric_mean": -1.103008508682251, + "epoch": 0.71, + "grad_norm": 4.875, + "learning_rate": 3.5973470146489555e-06, + "log_odds": 7.0893707275390625, + "log_odds_ratio": -0.13840705156326294, + "loss": 0.2758, + "rejected_geometric_mean": -7.871971130371094, + "step": 2877 + }, + { + "chosen_geometric_mean": -0.9946990013122559, + "epoch": 0.71, + "grad_norm": 22.25, + "learning_rate": 3.596472249316136e-06, + "log_odds": 6.794026851654053, + "log_odds_ratio": -0.29002225399017334, + "loss": 0.3523, + "rejected_geometric_mean": -7.463706970214844, + "step": 2878 + }, + { + "chosen_geometric_mean": -1.0458931922912598, + "epoch": 0.71, + "grad_norm": 11.5, + "learning_rate": 3.5955973177318037e-06, + "log_odds": 8.17849063873291, + "log_odds_ratio": -0.3763068914413452, + "loss": 0.3323, + "rejected_geometric_mean": -9.040751457214355, + "step": 2879 + }, + { + "chosen_geometric_mean": -1.085469365119934, + "epoch": 0.71, + "grad_norm": 4.59375, + "learning_rate": 3.59472222002862e-06, + "log_odds": 1.2381865978240967, + "log_odds_ratio": -0.32601800560951233, + "loss": 0.2972, + "rejected_geometric_mean": -2.0480709075927734, + "step": 2880 + }, + { + "chosen_geometric_mean": -1.4547300338745117, + "epoch": 0.71, + "grad_norm": 20.75, + "learning_rate": 3.5938469563392696e-06, + "log_odds": 5.7970380783081055, + "log_odds_ratio": -0.10783986747264862, + "loss": 0.3031, + "rejected_geometric_mean": -6.970967769622803, + "step": 2881 + }, + { + "chosen_geometric_mean": -1.0469673871994019, + "epoch": 0.71, + "grad_norm": 4.59375, + "learning_rate": 3.5929715267964642e-06, + "log_odds": 1.2165926694869995, + "log_odds_ratio": -0.3355403542518616, + "loss": 0.2809, + "rejected_geometric_mean": -2.0116126537323, + "step": 2882 + }, + { + "chosen_geometric_mean": -1.1540981531143188, + "epoch": 0.71, + "grad_norm": 26.125, + "learning_rate": 3.59209593153294e-06, + "log_odds": 5.720816612243652, + "log_odds_ratio": -0.020250795409083366, + "loss": 0.2716, + "rejected_geometric_mean": -6.4988908767700195, + "step": 2883 + }, + { + "chosen_geometric_mean": -1.2877602577209473, + "epoch": 0.71, + "grad_norm": 7.875, + "learning_rate": 3.5912201706814576e-06, + "log_odds": 7.052018165588379, + "log_odds_ratio": -0.3120116591453552, + "loss": 0.2821, + "rejected_geometric_mean": -8.120575904846191, + "step": 2884 + }, + { + "chosen_geometric_mean": -1.1806527376174927, + "epoch": 0.71, + "grad_norm": 2.703125, + "learning_rate": 3.590344244374804e-06, + "log_odds": 2.9352641105651855, + "log_odds_ratio": -0.3106015920639038, + "loss": 0.2934, + "rejected_geometric_mean": -3.889331340789795, + "step": 2885 + }, + { + "chosen_geometric_mean": -1.028171181678772, + "epoch": 0.71, + "grad_norm": 2.53125, + "learning_rate": 3.5894681527457907e-06, + "log_odds": 13.253522872924805, + "log_odds_ratio": -0.012386060319840908, + "loss": 0.2615, + "rejected_geometric_mean": -13.809536933898926, + "step": 2886 + }, + { + "chosen_geometric_mean": -1.0931633710861206, + "epoch": 0.71, + "grad_norm": 15.375, + "learning_rate": 3.588591895927253e-06, + "log_odds": 2.0482444763183594, + "log_odds_ratio": -0.4659730792045593, + "loss": 0.3961, + "rejected_geometric_mean": -2.9730358123779297, + "step": 2887 + }, + { + "chosen_geometric_mean": -1.1730064153671265, + "epoch": 0.72, + "grad_norm": 5.46875, + "learning_rate": 3.587715474052055e-06, + "log_odds": 2.0979630947113037, + "log_odds_ratio": -0.2534909248352051, + "loss": 0.3028, + "rejected_geometric_mean": -2.983760356903076, + "step": 2888 + }, + { + "chosen_geometric_mean": -0.9911284446716309, + "epoch": 0.72, + "grad_norm": 14.375, + "learning_rate": 3.58683888725308e-06, + "log_odds": 6.1868896484375, + "log_odds_ratio": -0.2156946063041687, + "loss": 0.2398, + "rejected_geometric_mean": -6.883720874786377, + "step": 2889 + }, + { + "chosen_geometric_mean": -1.1424111127853394, + "epoch": 0.72, + "grad_norm": 4.96875, + "learning_rate": 3.5859621356632416e-06, + "log_odds": 3.0099568367004395, + "log_odds_ratio": -0.2563014626502991, + "loss": 0.2993, + "rejected_geometric_mean": -3.938478946685791, + "step": 2890 + }, + { + "chosen_geometric_mean": -1.0422800779342651, + "epoch": 0.72, + "grad_norm": 12.125, + "learning_rate": 3.585085219415477e-06, + "log_odds": 3.5283617973327637, + "log_odds_ratio": -0.22570690512657166, + "loss": 0.2645, + "rejected_geometric_mean": -4.2934441566467285, + "step": 2891 + }, + { + "chosen_geometric_mean": -1.0383930206298828, + "epoch": 0.72, + "grad_norm": 8.1875, + "learning_rate": 3.584208138642747e-06, + "log_odds": 2.748898506164551, + "log_odds_ratio": -0.14497393369674683, + "loss": 0.2579, + "rejected_geometric_mean": -3.436101198196411, + "step": 2892 + }, + { + "chosen_geometric_mean": -1.293508529663086, + "epoch": 0.72, + "grad_norm": 28.5, + "learning_rate": 3.5833308934780364e-06, + "log_odds": 2.008462429046631, + "log_odds_ratio": -0.2822262644767761, + "loss": 0.3124, + "rejected_geometric_mean": -3.0993120670318604, + "step": 2893 + }, + { + "chosen_geometric_mean": -0.9336520433425903, + "epoch": 0.72, + "grad_norm": 13.625, + "learning_rate": 3.582453484054359e-06, + "log_odds": 1.4667558670043945, + "log_odds_ratio": -0.42197176814079285, + "loss": 0.2391, + "rejected_geometric_mean": -2.231790781021118, + "step": 2894 + }, + { + "chosen_geometric_mean": -1.115248203277588, + "epoch": 0.72, + "grad_norm": 7.125, + "learning_rate": 3.5815759105047505e-06, + "log_odds": 4.747292995452881, + "log_odds_ratio": -0.16788442432880402, + "loss": 0.2685, + "rejected_geometric_mean": -5.519431114196777, + "step": 2895 + }, + { + "chosen_geometric_mean": -1.1356122493743896, + "epoch": 0.72, + "grad_norm": 2.3125, + "learning_rate": 3.580698172962272e-06, + "log_odds": 5.623650074005127, + "log_odds_ratio": -0.05085078999400139, + "loss": 0.3047, + "rejected_geometric_mean": -6.392096996307373, + "step": 2896 + }, + { + "chosen_geometric_mean": -1.2904077768325806, + "epoch": 0.72, + "grad_norm": 11.375, + "learning_rate": 3.579820271560009e-06, + "log_odds": 6.957749843597412, + "log_odds_ratio": -0.13208073377609253, + "loss": 0.3431, + "rejected_geometric_mean": -7.991968154907227, + "step": 2897 + }, + { + "chosen_geometric_mean": -2.51975417137146, + "epoch": 0.72, + "grad_norm": 34.25, + "learning_rate": 3.578942206431073e-06, + "log_odds": 8.321081161499023, + "log_odds_ratio": -0.1168995350599289, + "loss": 0.4405, + "rejected_geometric_mean": -10.604802131652832, + "step": 2898 + }, + { + "chosen_geometric_mean": -1.1411012411117554, + "epoch": 0.72, + "grad_norm": 9.75, + "learning_rate": 3.5780639777086e-06, + "log_odds": 6.304634094238281, + "log_odds_ratio": -0.11620986461639404, + "loss": 0.2556, + "rejected_geometric_mean": -7.10800838470459, + "step": 2899 + }, + { + "chosen_geometric_mean": -0.929471492767334, + "epoch": 0.72, + "grad_norm": 2.296875, + "learning_rate": 3.5771855855257503e-06, + "log_odds": 8.582326889038086, + "log_odds_ratio": -0.014976657927036285, + "loss": 0.3364, + "rejected_geometric_mean": -8.995965957641602, + "step": 2900 + }, + { + "chosen_geometric_mean": -1.1967740058898926, + "epoch": 0.72, + "grad_norm": 3.8125, + "learning_rate": 3.576307030015709e-06, + "log_odds": 2.3094024658203125, + "log_odds_ratio": -0.3423186242580414, + "loss": 0.2515, + "rejected_geometric_mean": -3.34940242767334, + "step": 2901 + }, + { + "chosen_geometric_mean": -0.9746506810188293, + "epoch": 0.72, + "grad_norm": 10.0, + "learning_rate": 3.575428311311686e-06, + "log_odds": 1.361006498336792, + "log_odds_ratio": -0.2947666645050049, + "loss": 0.3155, + "rejected_geometric_mean": -1.998544454574585, + "step": 2902 + }, + { + "chosen_geometric_mean": -1.0569311380386353, + "epoch": 0.72, + "grad_norm": 2.71875, + "learning_rate": 3.5745494295469163e-06, + "log_odds": 2.6071934700012207, + "log_odds_ratio": -0.22880232334136963, + "loss": 0.2422, + "rejected_geometric_mean": -3.4005699157714844, + "step": 2903 + }, + { + "chosen_geometric_mean": -1.0761109590530396, + "epoch": 0.72, + "grad_norm": 4.6875, + "learning_rate": 3.5736703848546604e-06, + "log_odds": 1.0108431577682495, + "log_odds_ratio": -0.41759926080703735, + "loss": 0.3086, + "rejected_geometric_mean": -1.8995462656021118, + "step": 2904 + }, + { + "chosen_geometric_mean": -1.1059271097183228, + "epoch": 0.72, + "grad_norm": 2.453125, + "learning_rate": 3.5727911773682022e-06, + "log_odds": 7.776189804077148, + "log_odds_ratio": -0.025785472244024277, + "loss": 0.2848, + "rejected_geometric_mean": -8.466445922851562, + "step": 2905 + }, + { + "chosen_geometric_mean": -1.098748803138733, + "epoch": 0.72, + "grad_norm": 3.703125, + "learning_rate": 3.5719118072208492e-06, + "log_odds": 4.253785133361816, + "log_odds_ratio": -0.12911765277385712, + "loss": 0.2697, + "rejected_geometric_mean": -5.013556957244873, + "step": 2906 + }, + { + "chosen_geometric_mean": -1.0058656930923462, + "epoch": 0.72, + "grad_norm": 4.125, + "learning_rate": 3.571032274545936e-06, + "log_odds": 6.001767158508301, + "log_odds_ratio": -0.1500195860862732, + "loss": 0.2856, + "rejected_geometric_mean": -6.636707305908203, + "step": 2907 + }, + { + "chosen_geometric_mean": -1.110149621963501, + "epoch": 0.72, + "grad_norm": 13.1875, + "learning_rate": 3.5701525794768225e-06, + "log_odds": 2.785057306289673, + "log_odds_ratio": -0.48065364360809326, + "loss": 0.2819, + "rejected_geometric_mean": -3.7463788986206055, + "step": 2908 + }, + { + "chosen_geometric_mean": -1.2046477794647217, + "epoch": 0.72, + "grad_norm": 7.0625, + "learning_rate": 3.5692727221468893e-06, + "log_odds": 9.334554672241211, + "log_odds_ratio": -0.1136009618639946, + "loss": 0.2783, + "rejected_geometric_mean": -10.209430694580078, + "step": 2909 + }, + { + "chosen_geometric_mean": -1.073198676109314, + "epoch": 0.72, + "grad_norm": 90.5, + "learning_rate": 3.568392702689544e-06, + "log_odds": 2.616333484649658, + "log_odds_ratio": -0.18227429687976837, + "loss": 0.3815, + "rejected_geometric_mean": -3.360713005065918, + "step": 2910 + }, + { + "chosen_geometric_mean": -1.3554667234420776, + "epoch": 0.72, + "grad_norm": 28.0, + "learning_rate": 3.5675125212382205e-06, + "log_odds": 1.5145007371902466, + "log_odds_ratio": -0.26084086298942566, + "loss": 0.4265, + "rejected_geometric_mean": -2.61480450630188, + "step": 2911 + }, + { + "chosen_geometric_mean": -1.0117418766021729, + "epoch": 0.72, + "grad_norm": 2.359375, + "learning_rate": 3.566632177926374e-06, + "log_odds": 4.620294570922852, + "log_odds_ratio": -0.1832827478647232, + "loss": 0.2846, + "rejected_geometric_mean": -5.19288444519043, + "step": 2912 + }, + { + "chosen_geometric_mean": -1.079593300819397, + "epoch": 0.72, + "grad_norm": 24.625, + "learning_rate": 3.565751672887486e-06, + "log_odds": 4.41737174987793, + "log_odds_ratio": -0.20978575944900513, + "loss": 0.3175, + "rejected_geometric_mean": -5.15620231628418, + "step": 2913 + }, + { + "chosen_geometric_mean": -0.8968774080276489, + "epoch": 0.72, + "grad_norm": 22.25, + "learning_rate": 3.564871006255062e-06, + "log_odds": 8.19293212890625, + "log_odds_ratio": -0.008783292025327682, + "loss": 0.2893, + "rejected_geometric_mean": -8.54030990600586, + "step": 2914 + }, + { + "chosen_geometric_mean": -1.266666293144226, + "epoch": 0.72, + "grad_norm": 43.25, + "learning_rate": 3.563990178162632e-06, + "log_odds": 3.8667588233947754, + "log_odds_ratio": -0.27653592824935913, + "loss": 0.3217, + "rejected_geometric_mean": -4.939450263977051, + "step": 2915 + }, + { + "chosen_geometric_mean": -1.0653128623962402, + "epoch": 0.72, + "grad_norm": 2.546875, + "learning_rate": 3.5631091887437514e-06, + "log_odds": 1.3053151369094849, + "log_odds_ratio": -0.3872668743133545, + "loss": 0.3029, + "rejected_geometric_mean": -2.1894147396087646, + "step": 2916 + }, + { + "chosen_geometric_mean": -1.0736126899719238, + "epoch": 0.72, + "grad_norm": 40.75, + "learning_rate": 3.5622280381319985e-06, + "log_odds": 0.987928032875061, + "log_odds_ratio": -0.515853762626648, + "loss": 0.4267, + "rejected_geometric_mean": -1.8639154434204102, + "step": 2917 + }, + { + "chosen_geometric_mean": -1.3199834823608398, + "epoch": 0.72, + "grad_norm": 30.625, + "learning_rate": 3.5613467264609786e-06, + "log_odds": 2.3685061931610107, + "log_odds_ratio": -0.5098247528076172, + "loss": 0.3693, + "rejected_geometric_mean": -3.528831958770752, + "step": 2918 + }, + { + "chosen_geometric_mean": -1.5684144496917725, + "epoch": 0.72, + "grad_norm": 6.90625, + "learning_rate": 3.560465253864318e-06, + "log_odds": 3.793482780456543, + "log_odds_ratio": -0.0528525747358799, + "loss": 0.3045, + "rejected_geometric_mean": -4.974355220794678, + "step": 2919 + }, + { + "chosen_geometric_mean": -1.08322012424469, + "epoch": 0.72, + "grad_norm": 9.0625, + "learning_rate": 3.5595836204756684e-06, + "log_odds": 1.4644570350646973, + "log_odds_ratio": -0.2642192840576172, + "loss": 0.2781, + "rejected_geometric_mean": -2.2852516174316406, + "step": 2920 + }, + { + "chosen_geometric_mean": -1.3038617372512817, + "epoch": 0.72, + "grad_norm": 24.875, + "learning_rate": 3.55870182642871e-06, + "log_odds": 4.31134033203125, + "log_odds_ratio": -0.31641972064971924, + "loss": 0.3255, + "rejected_geometric_mean": -5.42574405670166, + "step": 2921 + }, + { + "chosen_geometric_mean": -1.0990993976593018, + "epoch": 0.72, + "grad_norm": 21.75, + "learning_rate": 3.55781987185714e-06, + "log_odds": 3.6866350173950195, + "log_odds_ratio": -0.2529342770576477, + "loss": 0.3275, + "rejected_geometric_mean": -4.554658889770508, + "step": 2922 + }, + { + "chosen_geometric_mean": -1.0416215658187866, + "epoch": 0.72, + "grad_norm": 17.375, + "learning_rate": 3.5569377568946857e-06, + "log_odds": 6.277868747711182, + "log_odds_ratio": -0.008047031238675117, + "loss": 0.2812, + "rejected_geometric_mean": -6.872173309326172, + "step": 2923 + }, + { + "chosen_geometric_mean": -0.933387279510498, + "epoch": 0.72, + "grad_norm": 3.0625, + "learning_rate": 3.556055481675097e-06, + "log_odds": 4.137229919433594, + "log_odds_ratio": -0.10545343160629272, + "loss": 0.2489, + "rejected_geometric_mean": -4.6391754150390625, + "step": 2924 + }, + { + "chosen_geometric_mean": -1.5678935050964355, + "epoch": 0.72, + "grad_norm": 12.6875, + "learning_rate": 3.555173046332148e-06, + "log_odds": 4.39027214050293, + "log_odds_ratio": -0.48068705201148987, + "loss": 0.2965, + "rejected_geometric_mean": -5.753910064697266, + "step": 2925 + }, + { + "chosen_geometric_mean": -1.012325406074524, + "epoch": 0.72, + "grad_norm": 3.546875, + "learning_rate": 3.5542904509996372e-06, + "log_odds": 1.3178801536560059, + "log_odds_ratio": -0.3051193058490753, + "loss": 0.3256, + "rejected_geometric_mean": -2.036954879760742, + "step": 2926 + }, + { + "chosen_geometric_mean": -0.9478021860122681, + "epoch": 0.72, + "grad_norm": 2.9375, + "learning_rate": 3.5534076958113866e-06, + "log_odds": 1.0114662647247314, + "log_odds_ratio": -0.4355181157588959, + "loss": 0.3101, + "rejected_geometric_mean": -1.7280759811401367, + "step": 2927 + }, + { + "chosen_geometric_mean": -1.0631073713302612, + "epoch": 0.72, + "grad_norm": 4.28125, + "learning_rate": 3.5525247809012435e-06, + "log_odds": 4.627886772155762, + "log_odds_ratio": -0.25088199973106384, + "loss": 0.277, + "rejected_geometric_mean": -5.410758972167969, + "step": 2928 + }, + { + "chosen_geometric_mean": -1.4554808139801025, + "epoch": 0.73, + "grad_norm": 12.125, + "learning_rate": 3.5516417064030787e-06, + "log_odds": 2.5052809715270996, + "log_odds_ratio": -0.4308611750602722, + "loss": 0.3185, + "rejected_geometric_mean": -3.8070249557495117, + "step": 2929 + }, + { + "chosen_geometric_mean": -1.0729941129684448, + "epoch": 0.73, + "grad_norm": 9.875, + "learning_rate": 3.550758472450788e-06, + "log_odds": 3.3737521171569824, + "log_odds_ratio": -0.09191934019327164, + "loss": 0.3359, + "rejected_geometric_mean": -4.059999465942383, + "step": 2930 + }, + { + "chosen_geometric_mean": -1.0088980197906494, + "epoch": 0.73, + "grad_norm": 4.78125, + "learning_rate": 3.5498750791782903e-06, + "log_odds": 2.221349000930786, + "log_odds_ratio": -0.3173171877861023, + "loss": 0.3056, + "rejected_geometric_mean": -3.005133628845215, + "step": 2931 + }, + { + "chosen_geometric_mean": -1.174943208694458, + "epoch": 0.73, + "grad_norm": 4.25, + "learning_rate": 3.5489915267195297e-06, + "log_odds": 3.2603976726531982, + "log_odds_ratio": -0.1898675560951233, + "loss": 0.2701, + "rejected_geometric_mean": -4.1829304695129395, + "step": 2932 + }, + { + "chosen_geometric_mean": -1.0584540367126465, + "epoch": 0.73, + "grad_norm": 3.359375, + "learning_rate": 3.5481078152084735e-06, + "log_odds": 4.458843231201172, + "log_odds_ratio": -0.1456081122159958, + "loss": 0.2636, + "rejected_geometric_mean": -5.1377153396606445, + "step": 2933 + }, + { + "chosen_geometric_mean": -1.132278323173523, + "epoch": 0.73, + "grad_norm": 2.40625, + "learning_rate": 3.547223944779114e-06, + "log_odds": 3.1392176151275635, + "log_odds_ratio": -0.33783572912216187, + "loss": 0.2778, + "rejected_geometric_mean": -4.090661525726318, + "step": 2934 + }, + { + "chosen_geometric_mean": -1.1885133981704712, + "epoch": 0.73, + "grad_norm": 2.328125, + "learning_rate": 3.5463399155654672e-06, + "log_odds": 1.3988466262817383, + "log_odds_ratio": -0.3458109498023987, + "loss": 0.2962, + "rejected_geometric_mean": -2.390531539916992, + "step": 2935 + }, + { + "chosen_geometric_mean": -1.0242910385131836, + "epoch": 0.73, + "grad_norm": 5.8125, + "learning_rate": 3.545455727701572e-06, + "log_odds": 2.1612939834594727, + "log_odds_ratio": -0.18649156391620636, + "loss": 0.3045, + "rejected_geometric_mean": -2.8490052223205566, + "step": 2936 + }, + { + "chosen_geometric_mean": -1.043178677558899, + "epoch": 0.73, + "grad_norm": 8.25, + "learning_rate": 3.5445713813214945e-06, + "log_odds": 1.8834636211395264, + "log_odds_ratio": -0.18958070874214172, + "loss": 0.3793, + "rejected_geometric_mean": -2.5446019172668457, + "step": 2937 + }, + { + "chosen_geometric_mean": -1.1052913665771484, + "epoch": 0.73, + "grad_norm": 5.15625, + "learning_rate": 3.543686876559321e-06, + "log_odds": 0.8474446535110474, + "log_odds_ratio": -0.505370020866394, + "loss": 0.2766, + "rejected_geometric_mean": -1.8336048126220703, + "step": 2938 + }, + { + "chosen_geometric_mean": -1.2548660039901733, + "epoch": 0.73, + "grad_norm": 19.875, + "learning_rate": 3.5428022135491646e-06, + "log_odds": 4.82755184173584, + "log_odds_ratio": -0.1175309419631958, + "loss": 0.3086, + "rejected_geometric_mean": -5.783126354217529, + "step": 2939 + }, + { + "chosen_geometric_mean": -1.1247864961624146, + "epoch": 0.73, + "grad_norm": 12.625, + "learning_rate": 3.5419173924251613e-06, + "log_odds": 3.531195640563965, + "log_odds_ratio": -0.10150247067213058, + "loss": 0.265, + "rejected_geometric_mean": -4.303169250488281, + "step": 2940 + }, + { + "chosen_geometric_mean": -1.3705178499221802, + "epoch": 0.73, + "grad_norm": 14.625, + "learning_rate": 3.5410324133214725e-06, + "log_odds": 5.5651984214782715, + "log_odds_ratio": -0.051398202776908875, + "loss": 0.2781, + "rejected_geometric_mean": -6.642441749572754, + "step": 2941 + }, + { + "chosen_geometric_mean": -1.2551965713500977, + "epoch": 0.73, + "grad_norm": 2.015625, + "learning_rate": 3.540147276372279e-06, + "log_odds": 1.310220718383789, + "log_odds_ratio": -0.32723236083984375, + "loss": 0.2943, + "rejected_geometric_mean": -2.3799216747283936, + "step": 2942 + }, + { + "chosen_geometric_mean": -1.287485957145691, + "epoch": 0.73, + "grad_norm": 28.5, + "learning_rate": 3.5392619817117925e-06, + "log_odds": 4.355172157287598, + "log_odds_ratio": -0.444553017616272, + "loss": 0.3732, + "rejected_geometric_mean": -5.4761223793029785, + "step": 2943 + }, + { + "chosen_geometric_mean": -1.3267686367034912, + "epoch": 0.73, + "grad_norm": 10.5625, + "learning_rate": 3.538376529474242e-06, + "log_odds": 2.316166877746582, + "log_odds_ratio": -0.36492687463760376, + "loss": 0.3067, + "rejected_geometric_mean": -3.487961769104004, + "step": 2944 + }, + { + "chosen_geometric_mean": -1.1344878673553467, + "epoch": 0.73, + "grad_norm": 3.515625, + "learning_rate": 3.5374909197938852e-06, + "log_odds": 3.432511806488037, + "log_odds_ratio": -0.12016057223081589, + "loss": 0.2823, + "rejected_geometric_mean": -4.231391906738281, + "step": 2945 + }, + { + "chosen_geometric_mean": -1.018897533416748, + "epoch": 0.73, + "grad_norm": 12.0625, + "learning_rate": 3.5366051528050004e-06, + "log_odds": 8.292606353759766, + "log_odds_ratio": -0.10837334394454956, + "loss": 0.27, + "rejected_geometric_mean": -8.917527198791504, + "step": 2946 + }, + { + "chosen_geometric_mean": -1.2852914333343506, + "epoch": 0.73, + "grad_norm": 10.0, + "learning_rate": 3.5357192286418927e-06, + "log_odds": 2.9573636054992676, + "log_odds_ratio": -0.3059716820716858, + "loss": 0.3213, + "rejected_geometric_mean": -4.0752973556518555, + "step": 2947 + }, + { + "chosen_geometric_mean": -1.0186296701431274, + "epoch": 0.73, + "grad_norm": 15.0625, + "learning_rate": 3.534833147438888e-06, + "log_odds": 3.655884265899658, + "log_odds_ratio": -0.2886107563972473, + "loss": 0.3317, + "rejected_geometric_mean": -4.376067638397217, + "step": 2948 + }, + { + "chosen_geometric_mean": -1.1506547927856445, + "epoch": 0.73, + "grad_norm": 2.171875, + "learning_rate": 3.5339469093303386e-06, + "log_odds": 5.455211639404297, + "log_odds_ratio": -0.09388451278209686, + "loss": 0.2927, + "rejected_geometric_mean": -6.248235702514648, + "step": 2949 + }, + { + "chosen_geometric_mean": -1.2515027523040771, + "epoch": 0.73, + "grad_norm": 17.625, + "learning_rate": 3.533060514450618e-06, + "log_odds": 0.8516878485679626, + "log_odds_ratio": -0.39127808809280396, + "loss": 0.2487, + "rejected_geometric_mean": -1.9507312774658203, + "step": 2950 + }, + { + "chosen_geometric_mean": -1.2419028282165527, + "epoch": 0.73, + "grad_norm": 1.9453125, + "learning_rate": 3.5321739629341274e-06, + "log_odds": 6.790731430053711, + "log_odds_ratio": -0.1061495915055275, + "loss": 0.2766, + "rejected_geometric_mean": -7.749440670013428, + "step": 2951 + }, + { + "chosen_geometric_mean": -1.0775153636932373, + "epoch": 0.73, + "grad_norm": 12.75, + "learning_rate": 3.5312872549152865e-06, + "log_odds": 5.24193811416626, + "log_odds_ratio": -0.18931403756141663, + "loss": 0.2754, + "rejected_geometric_mean": -6.005500793457031, + "step": 2952 + }, + { + "chosen_geometric_mean": -1.082163691520691, + "epoch": 0.73, + "grad_norm": 8.0625, + "learning_rate": 3.5304003905285435e-06, + "log_odds": 1.1117662191390991, + "log_odds_ratio": -0.30731111764907837, + "loss": 0.3075, + "rejected_geometric_mean": -1.9479219913482666, + "step": 2953 + }, + { + "chosen_geometric_mean": -1.0758461952209473, + "epoch": 0.73, + "grad_norm": 13.5625, + "learning_rate": 3.529513369908368e-06, + "log_odds": 1.2507013082504272, + "log_odds_ratio": -0.3546789884567261, + "loss": 0.3193, + "rejected_geometric_mean": -2.1108663082122803, + "step": 2954 + }, + { + "chosen_geometric_mean": -1.0299601554870605, + "epoch": 0.73, + "grad_norm": 5.53125, + "learning_rate": 3.5286261931892517e-06, + "log_odds": 2.343064069747925, + "log_odds_ratio": -0.1808614879846573, + "loss": 0.291, + "rejected_geometric_mean": -3.0170392990112305, + "step": 2955 + }, + { + "chosen_geometric_mean": -1.303126573562622, + "epoch": 0.73, + "grad_norm": 2.8125, + "learning_rate": 3.527738860505715e-06, + "log_odds": 3.4055275917053223, + "log_odds_ratio": -0.18917861580848694, + "loss": 0.3367, + "rejected_geometric_mean": -4.469107627868652, + "step": 2956 + }, + { + "chosen_geometric_mean": -1.0060418844223022, + "epoch": 0.73, + "grad_norm": 5.5625, + "learning_rate": 3.526851371992297e-06, + "log_odds": 1.8988606929779053, + "log_odds_ratio": -0.24515901505947113, + "loss": 0.3536, + "rejected_geometric_mean": -2.6123147010803223, + "step": 2957 + }, + { + "chosen_geometric_mean": -0.9300158619880676, + "epoch": 0.73, + "grad_norm": 3.78125, + "learning_rate": 3.525963727783562e-06, + "log_odds": 2.0578949451446533, + "log_odds_ratio": -0.2889902889728546, + "loss": 0.2487, + "rejected_geometric_mean": -2.6441211700439453, + "step": 2958 + }, + { + "chosen_geometric_mean": -1.0528204441070557, + "epoch": 0.73, + "grad_norm": 5.8125, + "learning_rate": 3.5250759280140987e-06, + "log_odds": 3.282766819000244, + "log_odds_ratio": -0.27545270323753357, + "loss": 0.2631, + "rejected_geometric_mean": -3.966888189315796, + "step": 2959 + }, + { + "chosen_geometric_mean": -0.9469739198684692, + "epoch": 0.73, + "grad_norm": 8.625, + "learning_rate": 3.5241879728185198e-06, + "log_odds": 5.230642795562744, + "log_odds_ratio": -0.23416581749916077, + "loss": 0.3035, + "rejected_geometric_mean": -5.861437797546387, + "step": 2960 + }, + { + "chosen_geometric_mean": -1.2201035022735596, + "epoch": 0.73, + "grad_norm": 16.25, + "learning_rate": 3.523299862331459e-06, + "log_odds": 4.7978739738464355, + "log_odds_ratio": -0.10702741146087646, + "loss": 0.3041, + "rejected_geometric_mean": -5.692847728729248, + "step": 2961 + }, + { + "chosen_geometric_mean": -1.0921026468276978, + "epoch": 0.73, + "grad_norm": 2.796875, + "learning_rate": 3.522411596687576e-06, + "log_odds": 1.2893092632293701, + "log_odds_ratio": -0.3354733884334564, + "loss": 0.3348, + "rejected_geometric_mean": -2.102231025695801, + "step": 2962 + }, + { + "chosen_geometric_mean": -0.8954340219497681, + "epoch": 0.73, + "grad_norm": 2.28125, + "learning_rate": 3.521523176021553e-06, + "log_odds": 5.4080681800842285, + "log_odds_ratio": -0.20615388453006744, + "loss": 0.2519, + "rejected_geometric_mean": -5.904120445251465, + "step": 2963 + }, + { + "chosen_geometric_mean": -1.0792291164398193, + "epoch": 0.73, + "grad_norm": 3.984375, + "learning_rate": 3.5206346004680958e-06, + "log_odds": 6.336475372314453, + "log_odds_ratio": -0.4682878851890564, + "loss": 0.2598, + "rejected_geometric_mean": -7.21144962310791, + "step": 2964 + }, + { + "chosen_geometric_mean": -1.2528568506240845, + "epoch": 0.73, + "grad_norm": 6.8125, + "learning_rate": 3.5197458701619346e-06, + "log_odds": 0.9109684824943542, + "log_odds_ratio": -0.38998323678970337, + "loss": 0.2972, + "rejected_geometric_mean": -2.011408805847168, + "step": 2965 + }, + { + "chosen_geometric_mean": -1.1827301979064941, + "epoch": 0.73, + "grad_norm": 18.625, + "learning_rate": 3.5188569852378206e-06, + "log_odds": 3.0565788745880127, + "log_odds_ratio": -0.22176297008991241, + "loss": 0.3065, + "rejected_geometric_mean": -4.001916408538818, + "step": 2966 + }, + { + "chosen_geometric_mean": -1.0188355445861816, + "epoch": 0.73, + "grad_norm": 4.3125, + "learning_rate": 3.517967945830532e-06, + "log_odds": 3.3987746238708496, + "log_odds_ratio": -0.17462576925754547, + "loss": 0.3086, + "rejected_geometric_mean": -4.0791425704956055, + "step": 2967 + }, + { + "chosen_geometric_mean": -0.9689881801605225, + "epoch": 0.73, + "grad_norm": 2.796875, + "learning_rate": 3.5170787520748673e-06, + "log_odds": 1.9465266466140747, + "log_odds_ratio": -0.2356966733932495, + "loss": 0.2769, + "rejected_geometric_mean": -2.5721962451934814, + "step": 2968 + }, + { + "chosen_geometric_mean": -1.3383500576019287, + "epoch": 0.74, + "grad_norm": 32.0, + "learning_rate": 3.516189404105649e-06, + "log_odds": 4.852921962738037, + "log_odds_ratio": -0.13471205532550812, + "loss": 0.3738, + "rejected_geometric_mean": -5.946349143981934, + "step": 2969 + }, + { + "chosen_geometric_mean": -1.0856778621673584, + "epoch": 0.74, + "grad_norm": 4.375, + "learning_rate": 3.515299902057726e-06, + "log_odds": 5.797197341918945, + "log_odds_ratio": -0.13861510157585144, + "loss": 0.2736, + "rejected_geometric_mean": -6.536882400512695, + "step": 2970 + }, + { + "chosen_geometric_mean": -1.4394077062606812, + "epoch": 0.74, + "grad_norm": 11.4375, + "learning_rate": 3.5144102460659664e-06, + "log_odds": 4.369953155517578, + "log_odds_ratio": -0.13443437218666077, + "loss": 0.2747, + "rejected_geometric_mean": -5.565057754516602, + "step": 2971 + }, + { + "chosen_geometric_mean": -1.3087831735610962, + "epoch": 0.74, + "grad_norm": 6.71875, + "learning_rate": 3.5135204362652627e-06, + "log_odds": 1.9775283336639404, + "log_odds_ratio": -0.40718403458595276, + "loss": 0.2967, + "rejected_geometric_mean": -3.167891025543213, + "step": 2972 + }, + { + "chosen_geometric_mean": -1.2600151300430298, + "epoch": 0.74, + "grad_norm": 4.34375, + "learning_rate": 3.5126304727905325e-06, + "log_odds": 0.17773669958114624, + "log_odds_ratio": -0.6113871335983276, + "loss": 0.3153, + "rejected_geometric_mean": -1.4019248485565186, + "step": 2973 + }, + { + "chosen_geometric_mean": -1.1113172769546509, + "epoch": 0.74, + "grad_norm": 14.1875, + "learning_rate": 3.511740355776717e-06, + "log_odds": 11.405735969543457, + "log_odds_ratio": -0.004251118749380112, + "loss": 0.2588, + "rejected_geometric_mean": -12.044605255126953, + "step": 2974 + }, + { + "chosen_geometric_mean": -1.0601589679718018, + "epoch": 0.74, + "grad_norm": 2.375, + "learning_rate": 3.5108500853587763e-06, + "log_odds": 6.592151641845703, + "log_odds_ratio": -0.015237599611282349, + "loss": 0.2529, + "rejected_geometric_mean": -7.202122211456299, + "step": 2975 + }, + { + "chosen_geometric_mean": -1.5555086135864258, + "epoch": 0.74, + "grad_norm": 19.625, + "learning_rate": 3.5099596616716984e-06, + "log_odds": 3.385039806365967, + "log_odds_ratio": -0.26584815979003906, + "loss": 0.3185, + "rejected_geometric_mean": -4.784891605377197, + "step": 2976 + }, + { + "chosen_geometric_mean": -1.1561729907989502, + "epoch": 0.74, + "grad_norm": 8.25, + "learning_rate": 3.509069084850494e-06, + "log_odds": 7.266972541809082, + "log_odds_ratio": -0.18766307830810547, + "loss": 0.2681, + "rejected_geometric_mean": -8.100159645080566, + "step": 2977 + }, + { + "chosen_geometric_mean": -1.2052128314971924, + "epoch": 0.74, + "grad_norm": 7.53125, + "learning_rate": 3.5081783550301934e-06, + "log_odds": 1.3846410512924194, + "log_odds_ratio": -0.40256765484809875, + "loss": 0.3068, + "rejected_geometric_mean": -2.424445867538452, + "step": 2978 + }, + { + "chosen_geometric_mean": -0.9770079255104065, + "epoch": 0.74, + "grad_norm": 2.515625, + "learning_rate": 3.507287472345855e-06, + "log_odds": 2.4788708686828613, + "log_odds_ratio": -0.23902465403079987, + "loss": 0.2892, + "rejected_geometric_mean": -3.0571439266204834, + "step": 2979 + }, + { + "chosen_geometric_mean": -1.1017392873764038, + "epoch": 0.74, + "grad_norm": 11.0625, + "learning_rate": 3.5063964369325564e-06, + "log_odds": 3.0727620124816895, + "log_odds_ratio": -0.2565521001815796, + "loss": 0.2847, + "rejected_geometric_mean": -3.7963521480560303, + "step": 2980 + }, + { + "chosen_geometric_mean": -1.3829593658447266, + "epoch": 0.74, + "grad_norm": 28.0, + "learning_rate": 3.5055052489254006e-06, + "log_odds": 3.1130142211914062, + "log_odds_ratio": -0.2223445177078247, + "loss": 0.3372, + "rejected_geometric_mean": -4.290729522705078, + "step": 2981 + }, + { + "chosen_geometric_mean": -0.8144222497940063, + "epoch": 0.74, + "grad_norm": 1.78125, + "learning_rate": 3.5046139084595128e-06, + "log_odds": 6.249201774597168, + "log_odds_ratio": -0.03861146420240402, + "loss": 0.2198, + "rejected_geometric_mean": -6.505147933959961, + "step": 2982 + }, + { + "chosen_geometric_mean": -1.1032383441925049, + "epoch": 0.74, + "grad_norm": 2.65625, + "learning_rate": 3.503722415670042e-06, + "log_odds": 3.777555227279663, + "log_odds_ratio": -0.14479279518127441, + "loss": 0.3589, + "rejected_geometric_mean": -4.528254508972168, + "step": 2983 + }, + { + "chosen_geometric_mean": -0.8900115489959717, + "epoch": 0.74, + "grad_norm": 2.828125, + "learning_rate": 3.5028307706921595e-06, + "log_odds": 1.1506885290145874, + "log_odds_ratio": -0.4237838685512543, + "loss": 0.2781, + "rejected_geometric_mean": -1.7877870798110962, + "step": 2984 + }, + { + "chosen_geometric_mean": -0.9238110184669495, + "epoch": 0.74, + "grad_norm": 12.0625, + "learning_rate": 3.5019389736610598e-06, + "log_odds": 3.3018455505371094, + "log_odds_ratio": -0.256759375333786, + "loss": 0.2699, + "rejected_geometric_mean": -3.8226139545440674, + "step": 2985 + }, + { + "chosen_geometric_mean": -1.217146396636963, + "epoch": 0.74, + "grad_norm": 9.3125, + "learning_rate": 3.501047024711962e-06, + "log_odds": 5.688560962677002, + "log_odds_ratio": -0.08505528420209885, + "loss": 0.3047, + "rejected_geometric_mean": -6.585765361785889, + "step": 2986 + }, + { + "chosen_geometric_mean": -1.2464542388916016, + "epoch": 0.74, + "grad_norm": 3.875, + "learning_rate": 3.500154923980107e-06, + "log_odds": 5.2846550941467285, + "log_odds_ratio": -0.15755023062229156, + "loss": 0.2895, + "rejected_geometric_mean": -6.267376899719238, + "step": 2987 + }, + { + "chosen_geometric_mean": -1.1666345596313477, + "epoch": 0.74, + "grad_norm": 5.65625, + "learning_rate": 3.4992626716007568e-06, + "log_odds": 5.024322509765625, + "log_odds_ratio": -0.11518128961324692, + "loss": 0.2522, + "rejected_geometric_mean": -5.854202747344971, + "step": 2988 + }, + { + "chosen_geometric_mean": -1.2847886085510254, + "epoch": 0.74, + "grad_norm": 28.125, + "learning_rate": 3.498370267709199e-06, + "log_odds": 1.7354636192321777, + "log_odds_ratio": -0.5883302688598633, + "loss": 0.3301, + "rejected_geometric_mean": -2.977282762527466, + "step": 2989 + }, + { + "chosen_geometric_mean": -1.066534161567688, + "epoch": 0.74, + "grad_norm": 9.1875, + "learning_rate": 3.497477712440745e-06, + "log_odds": 2.364182949066162, + "log_odds_ratio": -0.3156087398529053, + "loss": 0.273, + "rejected_geometric_mean": -3.1268601417541504, + "step": 2990 + }, + { + "chosen_geometric_mean": -1.1514642238616943, + "epoch": 0.74, + "grad_norm": 3.140625, + "learning_rate": 3.4965850059307256e-06, + "log_odds": 5.895203590393066, + "log_odds_ratio": -0.01022146176546812, + "loss": 0.2835, + "rejected_geometric_mean": -6.6258745193481445, + "step": 2991 + }, + { + "chosen_geometric_mean": -1.1239054203033447, + "epoch": 0.74, + "grad_norm": 3.28125, + "learning_rate": 3.495692148314497e-06, + "log_odds": 1.070612907409668, + "log_odds_ratio": -0.33317452669143677, + "loss": 0.311, + "rejected_geometric_mean": -1.9910035133361816, + "step": 2992 + }, + { + "chosen_geometric_mean": -0.9486904740333557, + "epoch": 0.74, + "grad_norm": 23.25, + "learning_rate": 3.4947991397274394e-06, + "log_odds": 7.278461456298828, + "log_odds_ratio": -0.04472244530916214, + "loss": 0.332, + "rejected_geometric_mean": -7.758579730987549, + "step": 2993 + }, + { + "chosen_geometric_mean": -1.096821665763855, + "epoch": 0.74, + "grad_norm": 16.875, + "learning_rate": 3.4939059803049524e-06, + "log_odds": 3.3282885551452637, + "log_odds_ratio": -0.09399200975894928, + "loss": 0.331, + "rejected_geometric_mean": -4.072092533111572, + "step": 2994 + }, + { + "chosen_geometric_mean": -1.0948153734207153, + "epoch": 0.74, + "grad_norm": 6.46875, + "learning_rate": 3.4930126701824613e-06, + "log_odds": 2.841228485107422, + "log_odds_ratio": -0.42588454484939575, + "loss": 0.2944, + "rejected_geometric_mean": -3.8074283599853516, + "step": 2995 + }, + { + "chosen_geometric_mean": -1.0288394689559937, + "epoch": 0.74, + "grad_norm": 2.984375, + "learning_rate": 3.492119209495413e-06, + "log_odds": 6.274932861328125, + "log_odds_ratio": -0.08162341266870499, + "loss": 0.2545, + "rejected_geometric_mean": -6.891721248626709, + "step": 2996 + }, + { + "chosen_geometric_mean": -1.042419195175171, + "epoch": 0.74, + "grad_norm": 9.625, + "learning_rate": 3.491225598379278e-06, + "log_odds": 1.9314993619918823, + "log_odds_ratio": -0.2808060348033905, + "loss": 0.2792, + "rejected_geometric_mean": -2.6827754974365234, + "step": 2997 + }, + { + "chosen_geometric_mean": -1.3631548881530762, + "epoch": 0.74, + "grad_norm": 18.5, + "learning_rate": 3.490331836969549e-06, + "log_odds": 4.114731788635254, + "log_odds_ratio": -0.27861690521240234, + "loss": 0.2858, + "rejected_geometric_mean": -5.281224727630615, + "step": 2998 + }, + { + "chosen_geometric_mean": -1.0632339715957642, + "epoch": 0.74, + "grad_norm": 9.875, + "learning_rate": 3.489437925401741e-06, + "log_odds": 4.758759498596191, + "log_odds_ratio": -0.05532768368721008, + "loss": 0.3072, + "rejected_geometric_mean": -5.416691303253174, + "step": 2999 + }, + { + "chosen_geometric_mean": -1.1270955801010132, + "epoch": 0.74, + "grad_norm": 5.8125, + "learning_rate": 3.4885438638113946e-06, + "log_odds": 4.638463497161865, + "log_odds_ratio": -0.28355127573013306, + "loss": 0.4229, + "rejected_geometric_mean": -5.405652046203613, + "step": 3000 + }, + { + "chosen_geometric_mean": -0.9298238158226013, + "epoch": 0.74, + "grad_norm": 2.578125, + "learning_rate": 3.4876496523340685e-06, + "log_odds": 0.49888041615486145, + "log_odds_ratio": -0.48559099435806274, + "loss": 0.2947, + "rejected_geometric_mean": -1.2428110837936401, + "step": 3001 + }, + { + "chosen_geometric_mean": -1.1483910083770752, + "epoch": 0.74, + "grad_norm": 11.25, + "learning_rate": 3.486755291105347e-06, + "log_odds": 4.950191020965576, + "log_odds_ratio": -0.01092104334384203, + "loss": 0.3418, + "rejected_geometric_mean": -5.714200019836426, + "step": 3002 + }, + { + "chosen_geometric_mean": -1.1101974248886108, + "epoch": 0.74, + "grad_norm": 24.5, + "learning_rate": 3.485860780260839e-06, + "log_odds": 3.906407117843628, + "log_odds_ratio": -0.29751378297805786, + "loss": 0.3446, + "rejected_geometric_mean": -4.745204925537109, + "step": 3003 + }, + { + "chosen_geometric_mean": -1.1079590320587158, + "epoch": 0.74, + "grad_norm": 10.5, + "learning_rate": 3.484966119936172e-06, + "log_odds": 4.071001052856445, + "log_odds_ratio": -0.14931601285934448, + "loss": 0.3165, + "rejected_geometric_mean": -4.8682026863098145, + "step": 3004 + }, + { + "chosen_geometric_mean": -1.095954179763794, + "epoch": 0.74, + "grad_norm": 11.3125, + "learning_rate": 3.4840713102669964e-06, + "log_odds": 3.0144050121307373, + "log_odds_ratio": -0.3806181252002716, + "loss": 0.2829, + "rejected_geometric_mean": -3.921692132949829, + "step": 3005 + }, + { + "chosen_geometric_mean": -1.0436183214187622, + "epoch": 0.74, + "grad_norm": 18.125, + "learning_rate": 3.48317635138899e-06, + "log_odds": 2.1993165016174316, + "log_odds_ratio": -0.24200981855392456, + "loss": 0.2771, + "rejected_geometric_mean": -2.953130006790161, + "step": 3006 + }, + { + "chosen_geometric_mean": -1.0892503261566162, + "epoch": 0.74, + "grad_norm": 17.125, + "learning_rate": 3.482281243437848e-06, + "log_odds": 8.587407112121582, + "log_odds_ratio": -0.13718527555465698, + "loss": 0.338, + "rejected_geometric_mean": -9.293792724609375, + "step": 3007 + }, + { + "chosen_geometric_mean": -1.2194626331329346, + "epoch": 0.74, + "grad_norm": 23.25, + "learning_rate": 3.4813859865492916e-06, + "log_odds": 0.7573477625846863, + "log_odds_ratio": -0.5909706950187683, + "loss": 0.3284, + "rejected_geometric_mean": -1.9170360565185547, + "step": 3008 + }, + { + "chosen_geometric_mean": -1.1888713836669922, + "epoch": 0.74, + "grad_norm": 4.96875, + "learning_rate": 3.480490580859062e-06, + "log_odds": 3.2374305725097656, + "log_odds_ratio": -0.2492159903049469, + "loss": 0.2818, + "rejected_geometric_mean": -4.195006370544434, + "step": 3009 + }, + { + "chosen_geometric_mean": -0.9347663521766663, + "epoch": 0.75, + "grad_norm": 2.609375, + "learning_rate": 3.479595026502925e-06, + "log_odds": 1.2065718173980713, + "log_odds_ratio": -0.3567616939544678, + "loss": 0.3105, + "rejected_geometric_mean": -1.8699817657470703, + "step": 3010 + }, + { + "chosen_geometric_mean": -1.2148698568344116, + "epoch": 0.75, + "grad_norm": 2.265625, + "learning_rate": 3.4786993236166675e-06, + "log_odds": 1.71061372756958, + "log_odds_ratio": -0.3568226993083954, + "loss": 0.325, + "rejected_geometric_mean": -2.74817156791687, + "step": 3011 + }, + { + "chosen_geometric_mean": -1.148704171180725, + "epoch": 0.75, + "grad_norm": 2.171875, + "learning_rate": 3.477803472336101e-06, + "log_odds": 3.9260900020599365, + "log_odds_ratio": -0.21760903298854828, + "loss": 0.2855, + "rejected_geometric_mean": -4.752037048339844, + "step": 3012 + }, + { + "chosen_geometric_mean": -1.1709413528442383, + "epoch": 0.75, + "grad_norm": 3.15625, + "learning_rate": 3.476907472797056e-06, + "log_odds": 2.76029896736145, + "log_odds_ratio": -0.20674729347229004, + "loss": 0.2935, + "rejected_geometric_mean": -3.6152515411376953, + "step": 3013 + }, + { + "chosen_geometric_mean": -1.180134892463684, + "epoch": 0.75, + "grad_norm": 2.75, + "learning_rate": 3.476011325135389e-06, + "log_odds": 3.9082164764404297, + "log_odds_ratio": -0.1540118157863617, + "loss": 0.3258, + "rejected_geometric_mean": -4.809750556945801, + "step": 3014 + }, + { + "chosen_geometric_mean": -1.1023080348968506, + "epoch": 0.75, + "grad_norm": 5.25, + "learning_rate": 3.4751150294869764e-06, + "log_odds": 3.501677989959717, + "log_odds_ratio": -0.4403359591960907, + "loss": 0.3812, + "rejected_geometric_mean": -4.400391578674316, + "step": 3015 + }, + { + "chosen_geometric_mean": -1.1700979471206665, + "epoch": 0.75, + "grad_norm": 16.5, + "learning_rate": 3.4742185859877193e-06, + "log_odds": 0.7133776545524597, + "log_odds_ratio": -0.48635435104370117, + "loss": 0.3226, + "rejected_geometric_mean": -1.7825927734375, + "step": 3016 + }, + { + "chosen_geometric_mean": -1.2278940677642822, + "epoch": 0.75, + "grad_norm": 3.9375, + "learning_rate": 3.473321994773539e-06, + "log_odds": 2.2530465126037598, + "log_odds_ratio": -0.17241233587265015, + "loss": 0.3066, + "rejected_geometric_mean": -3.2067019939422607, + "step": 3017 + }, + { + "chosen_geometric_mean": -1.0894023180007935, + "epoch": 0.75, + "grad_norm": 10.0625, + "learning_rate": 3.4724252559803807e-06, + "log_odds": 3.1567418575286865, + "log_odds_ratio": -0.26773756742477417, + "loss": 0.3439, + "rejected_geometric_mean": -3.9824585914611816, + "step": 3018 + }, + { + "chosen_geometric_mean": -1.2345033884048462, + "epoch": 0.75, + "grad_norm": 3.484375, + "learning_rate": 3.471528369744212e-06, + "log_odds": 1.7381473779678345, + "log_odds_ratio": -0.24081239104270935, + "loss": 0.2874, + "rejected_geometric_mean": -2.7453441619873047, + "step": 3019 + }, + { + "chosen_geometric_mean": -1.2230499982833862, + "epoch": 0.75, + "grad_norm": 11.5625, + "learning_rate": 3.4706313362010225e-06, + "log_odds": 3.0778212547302246, + "log_odds_ratio": -0.17211057245731354, + "loss": 0.2924, + "rejected_geometric_mean": -4.0197343826293945, + "step": 3020 + }, + { + "chosen_geometric_mean": -1.1594760417938232, + "epoch": 0.75, + "grad_norm": 2.6875, + "learning_rate": 3.4697341554868224e-06, + "log_odds": 4.443748950958252, + "log_odds_ratio": -0.19976285099983215, + "loss": 0.2892, + "rejected_geometric_mean": -5.323853492736816, + "step": 3021 + }, + { + "chosen_geometric_mean": -1.0588459968566895, + "epoch": 0.75, + "grad_norm": 17.125, + "learning_rate": 3.468836827737647e-06, + "log_odds": 6.128829002380371, + "log_odds_ratio": -0.0692865401506424, + "loss": 0.2698, + "rejected_geometric_mean": -6.783141613006592, + "step": 3022 + }, + { + "chosen_geometric_mean": -1.1963272094726562, + "epoch": 0.75, + "grad_norm": 2.578125, + "learning_rate": 3.467939353089553e-06, + "log_odds": 4.49412727355957, + "log_odds_ratio": -0.1840125173330307, + "loss": 0.3481, + "rejected_geometric_mean": -5.385962963104248, + "step": 3023 + }, + { + "chosen_geometric_mean": -1.1385583877563477, + "epoch": 0.75, + "grad_norm": 13.5, + "learning_rate": 3.4670417316786185e-06, + "log_odds": 7.24717903137207, + "log_odds_ratio": -0.16035257279872894, + "loss": 0.3292, + "rejected_geometric_mean": -8.09163761138916, + "step": 3024 + }, + { + "chosen_geometric_mean": -1.6308140754699707, + "epoch": 0.75, + "grad_norm": 42.25, + "learning_rate": 3.4661439636409457e-06, + "log_odds": 6.120797157287598, + "log_odds_ratio": -0.18913495540618896, + "loss": 0.3441, + "rejected_geometric_mean": -7.561088562011719, + "step": 3025 + }, + { + "chosen_geometric_mean": -1.250676155090332, + "epoch": 0.75, + "grad_norm": 8.75, + "learning_rate": 3.4652460491126553e-06, + "log_odds": 1.5315358638763428, + "log_odds_ratio": -0.26060837507247925, + "loss": 0.2877, + "rejected_geometric_mean": -2.552733898162842, + "step": 3026 + }, + { + "chosen_geometric_mean": -1.1788406372070312, + "epoch": 0.75, + "grad_norm": 19.125, + "learning_rate": 3.4643479882298954e-06, + "log_odds": 5.807366847991943, + "log_odds_ratio": -0.4250015616416931, + "loss": 0.3709, + "rejected_geometric_mean": -6.817967414855957, + "step": 3027 + }, + { + "chosen_geometric_mean": -1.0469648838043213, + "epoch": 0.75, + "grad_norm": 5.75, + "learning_rate": 3.4634497811288314e-06, + "log_odds": 2.15602970123291, + "log_odds_ratio": -0.2635204792022705, + "loss": 0.3201, + "rejected_geometric_mean": -2.957754135131836, + "step": 3028 + }, + { + "chosen_geometric_mean": -0.8767881393432617, + "epoch": 0.75, + "grad_norm": 5.65625, + "learning_rate": 3.4625514279456545e-06, + "log_odds": 2.3886666297912598, + "log_odds_ratio": -0.35010915994644165, + "loss": 0.2709, + "rejected_geometric_mean": -3.0335261821746826, + "step": 3029 + }, + { + "chosen_geometric_mean": -1.2900902032852173, + "epoch": 0.75, + "grad_norm": 8.625, + "learning_rate": 3.4616529288165764e-06, + "log_odds": 0.6959059238433838, + "log_odds_ratio": -0.5027163028717041, + "loss": 0.3061, + "rejected_geometric_mean": -1.8881309032440186, + "step": 3030 + }, + { + "chosen_geometric_mean": -1.2961758375167847, + "epoch": 0.75, + "grad_norm": 6.78125, + "learning_rate": 3.46075428387783e-06, + "log_odds": 4.482542991638184, + "log_odds_ratio": -0.2450414001941681, + "loss": 0.3665, + "rejected_geometric_mean": -5.541917324066162, + "step": 3031 + }, + { + "chosen_geometric_mean": -1.0345661640167236, + "epoch": 0.75, + "grad_norm": 2.078125, + "learning_rate": 3.4598554932656726e-06, + "log_odds": 3.2658543586730957, + "log_odds_ratio": -0.17740122973918915, + "loss": 0.315, + "rejected_geometric_mean": -3.9312751293182373, + "step": 3032 + }, + { + "chosen_geometric_mean": -0.9814211130142212, + "epoch": 0.75, + "grad_norm": 3.4375, + "learning_rate": 3.458956557116383e-06, + "log_odds": 3.222348928451538, + "log_odds_ratio": -0.15356561541557312, + "loss": 0.3008, + "rejected_geometric_mean": -3.8405568599700928, + "step": 3033 + }, + { + "chosen_geometric_mean": -1.0390630960464478, + "epoch": 0.75, + "grad_norm": 11.125, + "learning_rate": 3.4580574755662597e-06, + "log_odds": 5.129357814788818, + "log_odds_ratio": -0.1082792729139328, + "loss": 0.314, + "rejected_geometric_mean": -5.757754325866699, + "step": 3034 + }, + { + "chosen_geometric_mean": -0.9180078506469727, + "epoch": 0.75, + "grad_norm": 3.15625, + "learning_rate": 3.457158248751626e-06, + "log_odds": 2.788592576980591, + "log_odds_ratio": -0.2646262049674988, + "loss": 0.3192, + "rejected_geometric_mean": -3.3523032665252686, + "step": 3035 + }, + { + "chosen_geometric_mean": -0.966519296169281, + "epoch": 0.75, + "grad_norm": 4.28125, + "learning_rate": 3.456258876808827e-06, + "log_odds": 4.987400531768799, + "log_odds_ratio": -0.1309710592031479, + "loss": 0.2726, + "rejected_geometric_mean": -5.560186862945557, + "step": 3036 + }, + { + "chosen_geometric_mean": -1.4781746864318848, + "epoch": 0.75, + "grad_norm": 41.75, + "learning_rate": 3.4553593598742274e-06, + "log_odds": 4.7751970291137695, + "log_odds_ratio": -0.18849249184131622, + "loss": 0.3185, + "rejected_geometric_mean": -5.995732307434082, + "step": 3037 + }, + { + "chosen_geometric_mean": -1.0957733392715454, + "epoch": 0.75, + "grad_norm": 3.96875, + "learning_rate": 3.4544596980842165e-06, + "log_odds": 5.28618049621582, + "log_odds_ratio": -0.22076955437660217, + "loss": 0.2769, + "rejected_geometric_mean": -6.048691749572754, + "step": 3038 + }, + { + "chosen_geometric_mean": -1.719919204711914, + "epoch": 0.75, + "grad_norm": 10.125, + "learning_rate": 3.4535598915752054e-06, + "log_odds": 5.712503433227539, + "log_odds_ratio": -0.09407998621463776, + "loss": 0.2986, + "rejected_geometric_mean": -7.226475238800049, + "step": 3039 + }, + { + "chosen_geometric_mean": -1.4131561517715454, + "epoch": 0.75, + "grad_norm": 14.3125, + "learning_rate": 3.452659940483625e-06, + "log_odds": 1.2077949047088623, + "log_odds_ratio": -0.34804248809814453, + "loss": 0.3068, + "rejected_geometric_mean": -2.347654342651367, + "step": 3040 + }, + { + "chosen_geometric_mean": -2.009655475616455, + "epoch": 0.75, + "grad_norm": 24.625, + "learning_rate": 3.45175984494593e-06, + "log_odds": 5.08465576171875, + "log_odds_ratio": -0.02410421334207058, + "loss": 0.3043, + "rejected_geometric_mean": -6.805169105529785, + "step": 3041 + }, + { + "chosen_geometric_mean": -1.1442222595214844, + "epoch": 0.75, + "grad_norm": 36.75, + "learning_rate": 3.4508596050985965e-06, + "log_odds": 4.923343658447266, + "log_odds_ratio": -0.27078408002853394, + "loss": 0.3508, + "rejected_geometric_mean": -5.797661781311035, + "step": 3042 + }, + { + "chosen_geometric_mean": -1.2528873682022095, + "epoch": 0.75, + "grad_norm": 18.25, + "learning_rate": 3.449959221078123e-06, + "log_odds": 5.793767929077148, + "log_odds_ratio": -0.07540348172187805, + "loss": 0.304, + "rejected_geometric_mean": -6.723886489868164, + "step": 3043 + }, + { + "chosen_geometric_mean": -1.053268551826477, + "epoch": 0.75, + "grad_norm": 12.625, + "learning_rate": 3.4490586930210286e-06, + "log_odds": 1.3312305212020874, + "log_odds_ratio": -0.35281211137771606, + "loss": 0.3168, + "rejected_geometric_mean": -2.1302719116210938, + "step": 3044 + }, + { + "chosen_geometric_mean": -1.270140290260315, + "epoch": 0.75, + "grad_norm": 3.34375, + "learning_rate": 3.4481580210638556e-06, + "log_odds": 5.763920783996582, + "log_odds_ratio": -0.2962666451931, + "loss": 0.2814, + "rejected_geometric_mean": -6.852447509765625, + "step": 3045 + }, + { + "chosen_geometric_mean": -1.0701152086257935, + "epoch": 0.75, + "grad_norm": 2.546875, + "learning_rate": 3.447257205343166e-06, + "log_odds": 4.292619705200195, + "log_odds_ratio": -0.1468820571899414, + "loss": 0.3074, + "rejected_geometric_mean": -5.026440143585205, + "step": 3046 + }, + { + "chosen_geometric_mean": -0.9945077896118164, + "epoch": 0.75, + "grad_norm": 2.984375, + "learning_rate": 3.4463562459955476e-06, + "log_odds": 6.5475335121154785, + "log_odds_ratio": -0.09959094226360321, + "loss": 0.2944, + "rejected_geometric_mean": -7.1165337562561035, + "step": 3047 + }, + { + "chosen_geometric_mean": -1.2078057527542114, + "epoch": 0.75, + "grad_norm": 4.9375, + "learning_rate": 3.4454551431576043e-06, + "log_odds": 8.891837120056152, + "log_odds_ratio": -0.12412402778863907, + "loss": 0.2519, + "rejected_geometric_mean": -9.783329963684082, + "step": 3048 + }, + { + "chosen_geometric_mean": -1.060709834098816, + "epoch": 0.75, + "grad_norm": 27.125, + "learning_rate": 3.4445538969659687e-06, + "log_odds": 5.459536552429199, + "log_odds_ratio": -0.05748080462217331, + "loss": 0.3187, + "rejected_geometric_mean": -6.090973854064941, + "step": 3049 + }, + { + "chosen_geometric_mean": -1.1218008995056152, + "epoch": 0.76, + "grad_norm": 17.5, + "learning_rate": 3.443652507557288e-06, + "log_odds": 1.8011373281478882, + "log_odds_ratio": -0.4019402861595154, + "loss": 0.3266, + "rejected_geometric_mean": -2.704331636428833, + "step": 3050 + }, + { + "chosen_geometric_mean": -1.1346360445022583, + "epoch": 0.76, + "grad_norm": 3.578125, + "learning_rate": 3.4427509750682353e-06, + "log_odds": 5.619083881378174, + "log_odds_ratio": -0.4087064564228058, + "loss": 0.2848, + "rejected_geometric_mean": -6.61514139175415, + "step": 3051 + }, + { + "chosen_geometric_mean": -1.0189440250396729, + "epoch": 0.76, + "grad_norm": 4.59375, + "learning_rate": 3.441849299635507e-06, + "log_odds": 1.9960166215896606, + "log_odds_ratio": -0.3987409472465515, + "loss": 0.2816, + "rejected_geometric_mean": -2.775599479675293, + "step": 3052 + }, + { + "chosen_geometric_mean": -1.2838025093078613, + "epoch": 0.76, + "grad_norm": 14.625, + "learning_rate": 3.4409474813958157e-06, + "log_odds": 1.6876065731048584, + "log_odds_ratio": -0.27194318175315857, + "loss": 0.2946, + "rejected_geometric_mean": -2.76619291305542, + "step": 3053 + }, + { + "chosen_geometric_mean": -1.1992473602294922, + "epoch": 0.76, + "grad_norm": 10.6875, + "learning_rate": 3.440045520485899e-06, + "log_odds": 0.8374301195144653, + "log_odds_ratio": -0.5002138018608093, + "loss": 0.2611, + "rejected_geometric_mean": -1.9477264881134033, + "step": 3054 + }, + { + "chosen_geometric_mean": -1.0969510078430176, + "epoch": 0.76, + "grad_norm": 26.375, + "learning_rate": 3.4391434170425173e-06, + "log_odds": 2.534214973449707, + "log_odds_ratio": -0.5018497705459595, + "loss": 0.3196, + "rejected_geometric_mean": -3.5402517318725586, + "step": 3055 + }, + { + "chosen_geometric_mean": -0.8360273838043213, + "epoch": 0.76, + "grad_norm": 12.625, + "learning_rate": 3.4382411712024505e-06, + "log_odds": 8.900678634643555, + "log_odds_ratio": -0.14644619822502136, + "loss": 0.2661, + "rejected_geometric_mean": -9.274240493774414, + "step": 3056 + }, + { + "chosen_geometric_mean": -1.253710389137268, + "epoch": 0.76, + "grad_norm": 46.25, + "learning_rate": 3.4373387831025006e-06, + "log_odds": 2.998551607131958, + "log_odds_ratio": -0.3940366804599762, + "loss": 0.3394, + "rejected_geometric_mean": -4.037442207336426, + "step": 3057 + }, + { + "chosen_geometric_mean": -0.98149573802948, + "epoch": 0.76, + "grad_norm": 10.3125, + "learning_rate": 3.436436252879492e-06, + "log_odds": 3.587441921234131, + "log_odds_ratio": -0.18928982317447662, + "loss": 0.3452, + "rejected_geometric_mean": -4.202199459075928, + "step": 3058 + }, + { + "chosen_geometric_mean": -0.9672026634216309, + "epoch": 0.76, + "grad_norm": 2.40625, + "learning_rate": 3.4355335806702684e-06, + "log_odds": 3.7681033611297607, + "log_odds_ratio": -0.3107636868953705, + "loss": 0.2923, + "rejected_geometric_mean": -4.472436904907227, + "step": 3059 + }, + { + "chosen_geometric_mean": -1.1785258054733276, + "epoch": 0.76, + "grad_norm": 2.796875, + "learning_rate": 3.4346307666116985e-06, + "log_odds": 4.192496299743652, + "log_odds_ratio": -0.0833922028541565, + "loss": 0.3016, + "rejected_geometric_mean": -5.0255961418151855, + "step": 3060 + }, + { + "chosen_geometric_mean": -1.2113370895385742, + "epoch": 0.76, + "grad_norm": 10.75, + "learning_rate": 3.4337278108406685e-06, + "log_odds": 4.545407772064209, + "log_odds_ratio": -0.13763126730918884, + "loss": 0.3425, + "rejected_geometric_mean": -5.462879180908203, + "step": 3061 + }, + { + "chosen_geometric_mean": -1.2111520767211914, + "epoch": 0.76, + "grad_norm": 4.03125, + "learning_rate": 3.43282471349409e-06, + "log_odds": 3.4544711112976074, + "log_odds_ratio": -0.19227173924446106, + "loss": 0.2825, + "rejected_geometric_mean": -4.350718975067139, + "step": 3062 + }, + { + "chosen_geometric_mean": -1.119766116142273, + "epoch": 0.76, + "grad_norm": 6.03125, + "learning_rate": 3.4319214747088936e-06, + "log_odds": 5.020020484924316, + "log_odds_ratio": -0.1893673837184906, + "loss": 0.2835, + "rejected_geometric_mean": -5.855108737945557, + "step": 3063 + }, + { + "chosen_geometric_mean": -1.454809546470642, + "epoch": 0.76, + "grad_norm": 10.1875, + "learning_rate": 3.431018094622031e-06, + "log_odds": 2.715850353240967, + "log_odds_ratio": -0.31646835803985596, + "loss": 0.2932, + "rejected_geometric_mean": -4.041149616241455, + "step": 3064 + }, + { + "chosen_geometric_mean": -1.123100757598877, + "epoch": 0.76, + "grad_norm": 13.0, + "learning_rate": 3.4301145733704776e-06, + "log_odds": 0.5033761262893677, + "log_odds_ratio": -0.5005344748497009, + "loss": 0.2766, + "rejected_geometric_mean": -1.5171208381652832, + "step": 3065 + }, + { + "chosen_geometric_mean": -0.8907932043075562, + "epoch": 0.76, + "grad_norm": 5.875, + "learning_rate": 3.4292109110912284e-06, + "log_odds": 4.891890525817871, + "log_odds_ratio": -0.03556815907359123, + "loss": 0.2759, + "rejected_geometric_mean": -5.265714645385742, + "step": 3066 + }, + { + "chosen_geometric_mean": -1.1380783319473267, + "epoch": 0.76, + "grad_norm": 3.140625, + "learning_rate": 3.4283071079213003e-06, + "log_odds": 3.1880788803100586, + "log_odds_ratio": -0.3783762454986572, + "loss": 0.2689, + "rejected_geometric_mean": -4.16922664642334, + "step": 3067 + }, + { + "chosen_geometric_mean": -1.168205976486206, + "epoch": 0.76, + "grad_norm": 17.25, + "learning_rate": 3.427403163997732e-06, + "log_odds": 5.04262113571167, + "log_odds_ratio": -0.11740465462207794, + "loss": 0.3129, + "rejected_geometric_mean": -5.836111545562744, + "step": 3068 + }, + { + "chosen_geometric_mean": -1.1454211473464966, + "epoch": 0.76, + "grad_norm": 2.0625, + "learning_rate": 3.426499079457583e-06, + "log_odds": 2.49878191947937, + "log_odds_ratio": -0.29039037227630615, + "loss": 0.3092, + "rejected_geometric_mean": -3.459716558456421, + "step": 3069 + }, + { + "chosen_geometric_mean": -1.13267982006073, + "epoch": 0.76, + "grad_norm": 31.5, + "learning_rate": 3.4255948544379334e-06, + "log_odds": 2.264420747756958, + "log_odds_ratio": -0.4216311573982239, + "loss": 0.3825, + "rejected_geometric_mean": -3.2505688667297363, + "step": 3070 + }, + { + "chosen_geometric_mean": -0.9217513203620911, + "epoch": 0.76, + "grad_norm": 2.375, + "learning_rate": 3.424690489075886e-06, + "log_odds": 1.4412829875946045, + "log_odds_ratio": -0.35645002126693726, + "loss": 0.3203, + "rejected_geometric_mean": -2.091581344604492, + "step": 3071 + }, + { + "chosen_geometric_mean": -1.4168561697006226, + "epoch": 0.76, + "grad_norm": 7.03125, + "learning_rate": 3.423785983508565e-06, + "log_odds": 2.21189546585083, + "log_odds_ratio": -0.19397497177124023, + "loss": 0.3447, + "rejected_geometric_mean": -3.4264137744903564, + "step": 3072 + }, + { + "chosen_geometric_mean": -0.9395928382873535, + "epoch": 0.76, + "grad_norm": 9.75, + "learning_rate": 3.422881337873113e-06, + "log_odds": 3.52496075630188, + "log_odds_ratio": -0.2586715817451477, + "loss": 0.3014, + "rejected_geometric_mean": -4.128118991851807, + "step": 3073 + }, + { + "chosen_geometric_mean": -1.22688889503479, + "epoch": 0.76, + "grad_norm": 3.5625, + "learning_rate": 3.4219765523066984e-06, + "log_odds": 8.794963836669922, + "log_odds_ratio": -0.06247876584529877, + "loss": 0.2616, + "rejected_geometric_mean": -9.697056770324707, + "step": 3074 + }, + { + "chosen_geometric_mean": -1.0790090560913086, + "epoch": 0.76, + "grad_norm": 2.875, + "learning_rate": 3.421071626946507e-06, + "log_odds": 4.556058406829834, + "log_odds_ratio": -0.20055578649044037, + "loss": 0.257, + "rejected_geometric_mean": -5.344844818115234, + "step": 3075 + }, + { + "chosen_geometric_mean": -1.0855486392974854, + "epoch": 0.76, + "grad_norm": 3.9375, + "learning_rate": 3.420166561929748e-06, + "log_odds": 4.788731575012207, + "log_odds_ratio": -0.08411218225955963, + "loss": 0.2548, + "rejected_geometric_mean": -5.476950645446777, + "step": 3076 + }, + { + "chosen_geometric_mean": -1.00920832157135, + "epoch": 0.76, + "grad_norm": 12.4375, + "learning_rate": 3.4192613573936504e-06, + "log_odds": 4.3945183753967285, + "log_odds_ratio": -0.220866858959198, + "loss": 0.2257, + "rejected_geometric_mean": -5.148234844207764, + "step": 3077 + }, + { + "chosen_geometric_mean": -1.22142493724823, + "epoch": 0.76, + "grad_norm": 14.5, + "learning_rate": 3.4183560134754656e-06, + "log_odds": 2.3922410011291504, + "log_odds_ratio": -0.44986414909362793, + "loss": 0.3209, + "rejected_geometric_mean": -3.489622116088867, + "step": 3078 + }, + { + "chosen_geometric_mean": -1.0736581087112427, + "epoch": 0.76, + "grad_norm": 7.375, + "learning_rate": 3.4174505303124643e-06, + "log_odds": 3.294369697570801, + "log_odds_ratio": -0.2554234266281128, + "loss": 0.3314, + "rejected_geometric_mean": -4.1025590896606445, + "step": 3079 + }, + { + "chosen_geometric_mean": -1.2038064002990723, + "epoch": 0.76, + "grad_norm": 6.625, + "learning_rate": 3.416544908041941e-06, + "log_odds": 5.488786220550537, + "log_odds_ratio": -0.30185240507125854, + "loss": 0.3483, + "rejected_geometric_mean": -6.467107772827148, + "step": 3080 + }, + { + "chosen_geometric_mean": -1.2418200969696045, + "epoch": 0.76, + "grad_norm": 18.625, + "learning_rate": 3.415639146801208e-06, + "log_odds": 10.586230278015137, + "log_odds_ratio": -0.0005516019882634282, + "loss": 0.3088, + "rejected_geometric_mean": -11.461479187011719, + "step": 3081 + }, + { + "chosen_geometric_mean": -1.1431732177734375, + "epoch": 0.76, + "grad_norm": 2.453125, + "learning_rate": 3.4147332467276024e-06, + "log_odds": 1.4866961240768433, + "log_odds_ratio": -0.4281293749809265, + "loss": 0.3284, + "rejected_geometric_mean": -2.4909415245056152, + "step": 3082 + }, + { + "chosen_geometric_mean": -1.0064847469329834, + "epoch": 0.76, + "grad_norm": 8.375, + "learning_rate": 3.4138272079584787e-06, + "log_odds": 7.403651237487793, + "log_odds_ratio": -0.02925468608736992, + "loss": 0.3234, + "rejected_geometric_mean": -7.900022029876709, + "step": 3083 + }, + { + "chosen_geometric_mean": -1.1002601385116577, + "epoch": 0.76, + "grad_norm": 2.34375, + "learning_rate": 3.4129210306312143e-06, + "log_odds": 4.075357913970947, + "log_odds_ratio": -0.39417028427124023, + "loss": 0.3271, + "rejected_geometric_mean": -4.985865592956543, + "step": 3084 + }, + { + "chosen_geometric_mean": -1.3271424770355225, + "epoch": 0.76, + "grad_norm": 9.0, + "learning_rate": 3.4120147148832096e-06, + "log_odds": 4.156773567199707, + "log_odds_ratio": -0.37734556198120117, + "loss": 0.31, + "rejected_geometric_mean": -5.381025314331055, + "step": 3085 + }, + { + "chosen_geometric_mean": -0.9114444851875305, + "epoch": 0.76, + "grad_norm": 8.1875, + "learning_rate": 3.4111082608518815e-06, + "log_odds": 5.8189287185668945, + "log_odds_ratio": -0.09509751945734024, + "loss": 0.2752, + "rejected_geometric_mean": -6.266277313232422, + "step": 3086 + }, + { + "chosen_geometric_mean": -1.256420612335205, + "epoch": 0.76, + "grad_norm": 7.09375, + "learning_rate": 3.4102016686746703e-06, + "log_odds": 2.5576024055480957, + "log_odds_ratio": -0.383628785610199, + "loss": 0.3489, + "rejected_geometric_mean": -3.6615116596221924, + "step": 3087 + }, + { + "chosen_geometric_mean": -1.328986644744873, + "epoch": 0.76, + "grad_norm": 27.25, + "learning_rate": 3.4092949384890385e-06, + "log_odds": 1.0957715511322021, + "log_odds_ratio": -0.40922969579696655, + "loss": 0.3622, + "rejected_geometric_mean": -2.298236608505249, + "step": 3088 + }, + { + "chosen_geometric_mean": -1.1252975463867188, + "epoch": 0.76, + "grad_norm": 20.75, + "learning_rate": 3.4083880704324666e-06, + "log_odds": 6.022900581359863, + "log_odds_ratio": -0.0437493696808815, + "loss": 0.3181, + "rejected_geometric_mean": -6.764562606811523, + "step": 3089 + }, + { + "chosen_geometric_mean": -1.1389586925506592, + "epoch": 0.77, + "grad_norm": 18.25, + "learning_rate": 3.4074810646424587e-06, + "log_odds": 2.645874500274658, + "log_odds_ratio": -0.3000888228416443, + "loss": 0.2598, + "rejected_geometric_mean": -3.5281662940979004, + "step": 3090 + }, + { + "chosen_geometric_mean": -1.1529825925827026, + "epoch": 0.77, + "grad_norm": 8.8125, + "learning_rate": 3.4065739212565386e-06, + "log_odds": 3.165745735168457, + "log_odds_ratio": -0.1874450147151947, + "loss": 0.3112, + "rejected_geometric_mean": -4.014639377593994, + "step": 3091 + }, + { + "chosen_geometric_mean": -1.0524345636367798, + "epoch": 0.77, + "grad_norm": 2.859375, + "learning_rate": 3.405666640412251e-06, + "log_odds": 0.8309946656227112, + "log_odds_ratio": -0.49424731731414795, + "loss": 0.3273, + "rejected_geometric_mean": -1.7786040306091309, + "step": 3092 + }, + { + "chosen_geometric_mean": -1.1486204862594604, + "epoch": 0.77, + "grad_norm": 26.125, + "learning_rate": 3.4047592222471605e-06, + "log_odds": 0.45914244651794434, + "log_odds_ratio": -0.5296574831008911, + "loss": 0.3344, + "rejected_geometric_mean": -1.523679256439209, + "step": 3093 + }, + { + "chosen_geometric_mean": -1.1204228401184082, + "epoch": 0.77, + "grad_norm": 3.890625, + "learning_rate": 3.4038516668988544e-06, + "log_odds": 9.99229907989502, + "log_odds_ratio": -0.16747035086154938, + "loss": 0.3171, + "rejected_geometric_mean": -10.804342269897461, + "step": 3094 + }, + { + "chosen_geometric_mean": -1.1849777698516846, + "epoch": 0.77, + "grad_norm": 4.59375, + "learning_rate": 3.4029439745049393e-06, + "log_odds": 8.311551094055176, + "log_odds_ratio": -0.02086731418967247, + "loss": 0.2972, + "rejected_geometric_mean": -9.133960723876953, + "step": 3095 + }, + { + "chosen_geometric_mean": -1.0707974433898926, + "epoch": 0.77, + "grad_norm": 6.46875, + "learning_rate": 3.402036145203044e-06, + "log_odds": 5.928472518920898, + "log_odds_ratio": -0.0075897108763456345, + "loss": 0.2864, + "rejected_geometric_mean": -6.540200233459473, + "step": 3096 + }, + { + "chosen_geometric_mean": -1.2147212028503418, + "epoch": 0.77, + "grad_norm": 7.4375, + "learning_rate": 3.4011281791308155e-06, + "log_odds": 3.8136467933654785, + "log_odds_ratio": -0.11941657960414886, + "loss": 0.2527, + "rejected_geometric_mean": -4.733660697937012, + "step": 3097 + }, + { + "chosen_geometric_mean": -1.0387369394302368, + "epoch": 0.77, + "grad_norm": 11.75, + "learning_rate": 3.400220076425926e-06, + "log_odds": 4.988860607147217, + "log_odds_ratio": -0.13286998867988586, + "loss": 0.317, + "rejected_geometric_mean": -5.667758941650391, + "step": 3098 + }, + { + "chosen_geometric_mean": -1.12650465965271, + "epoch": 0.77, + "grad_norm": 3.09375, + "learning_rate": 3.399311837226063e-06, + "log_odds": 4.5306267738342285, + "log_odds_ratio": -0.32273346185684204, + "loss": 0.2627, + "rejected_geometric_mean": -5.37748384475708, + "step": 3099 + }, + { + "chosen_geometric_mean": -0.9676976799964905, + "epoch": 0.77, + "grad_norm": 12.375, + "learning_rate": 3.398403461668939e-06, + "log_odds": 4.88444185256958, + "log_odds_ratio": -0.17194108664989471, + "loss": 0.3004, + "rejected_geometric_mean": -5.48381233215332, + "step": 3100 + }, + { + "chosen_geometric_mean": -1.0659440755844116, + "epoch": 0.77, + "grad_norm": 2.828125, + "learning_rate": 3.3974949498922843e-06, + "log_odds": 5.634222984313965, + "log_odds_ratio": -0.20858663320541382, + "loss": 0.256, + "rejected_geometric_mean": -6.357923984527588, + "step": 3101 + }, + { + "chosen_geometric_mean": -1.0233770608901978, + "epoch": 0.77, + "grad_norm": 1.796875, + "learning_rate": 3.396586302033853e-06, + "log_odds": 3.2859549522399902, + "log_odds_ratio": -0.1777525693178177, + "loss": 0.2432, + "rejected_geometric_mean": -3.926764965057373, + "step": 3102 + }, + { + "chosen_geometric_mean": -1.0606954097747803, + "epoch": 0.77, + "grad_norm": 3.03125, + "learning_rate": 3.395677518231416e-06, + "log_odds": 3.6831583976745605, + "log_odds_ratio": -0.2648143172264099, + "loss": 0.301, + "rejected_geometric_mean": -4.458075523376465, + "step": 3103 + }, + { + "chosen_geometric_mean": -1.6008033752441406, + "epoch": 0.77, + "grad_norm": 11.0625, + "learning_rate": 3.3947685986227673e-06, + "log_odds": 2.4910976886749268, + "log_odds_ratio": -0.26595795154571533, + "loss": 0.3606, + "rejected_geometric_mean": -3.92697811126709, + "step": 3104 + }, + { + "chosen_geometric_mean": -0.8405895233154297, + "epoch": 0.77, + "grad_norm": 5.34375, + "learning_rate": 3.393859543345722e-06, + "log_odds": 2.468644618988037, + "log_odds_ratio": -0.21731114387512207, + "loss": 0.2304, + "rejected_geometric_mean": -2.8735220432281494, + "step": 3105 + }, + { + "chosen_geometric_mean": -0.8717145919799805, + "epoch": 0.77, + "grad_norm": 14.5, + "learning_rate": 3.3929503525381123e-06, + "log_odds": 8.08756160736084, + "log_odds_ratio": -0.011156175285577774, + "loss": 0.3143, + "rejected_geometric_mean": -8.417510986328125, + "step": 3106 + }, + { + "chosen_geometric_mean": -1.275551199913025, + "epoch": 0.77, + "grad_norm": 5.71875, + "learning_rate": 3.392041026337796e-06, + "log_odds": 5.761207103729248, + "log_odds_ratio": -0.1376965194940567, + "loss": 0.2693, + "rejected_geometric_mean": -6.702660083770752, + "step": 3107 + }, + { + "chosen_geometric_mean": -1.0338033437728882, + "epoch": 0.77, + "grad_norm": 11.5625, + "learning_rate": 3.3911315648826475e-06, + "log_odds": 1.867698073387146, + "log_odds_ratio": -0.39690902829170227, + "loss": 0.2846, + "rejected_geometric_mean": -2.675494432449341, + "step": 3108 + }, + { + "chosen_geometric_mean": -1.578401803970337, + "epoch": 0.77, + "grad_norm": 35.0, + "learning_rate": 3.390221968310563e-06, + "log_odds": 1.1815142631530762, + "log_odds_ratio": -0.5301495790481567, + "loss": 0.3607, + "rejected_geometric_mean": -2.654763698577881, + "step": 3109 + }, + { + "chosen_geometric_mean": -1.453919529914856, + "epoch": 0.77, + "grad_norm": 9.3125, + "learning_rate": 3.3893122367594593e-06, + "log_odds": 3.688375949859619, + "log_odds_ratio": -0.1466912180185318, + "loss": 0.2591, + "rejected_geometric_mean": -4.903480529785156, + "step": 3110 + }, + { + "chosen_geometric_mean": -1.1156187057495117, + "epoch": 0.77, + "grad_norm": 13.0625, + "learning_rate": 3.3884023703672734e-06, + "log_odds": 2.6485061645507812, + "log_odds_ratio": -0.15932697057724, + "loss": 0.3166, + "rejected_geometric_mean": -3.414633274078369, + "step": 3111 + }, + { + "chosen_geometric_mean": -0.8937137126922607, + "epoch": 0.77, + "grad_norm": 13.875, + "learning_rate": 3.387492369271963e-06, + "log_odds": 4.781551361083984, + "log_odds_ratio": -0.1888803243637085, + "loss": 0.3162, + "rejected_geometric_mean": -5.302483081817627, + "step": 3112 + }, + { + "chosen_geometric_mean": -0.9291979074478149, + "epoch": 0.77, + "grad_norm": 2.328125, + "learning_rate": 3.3865822336115067e-06, + "log_odds": 4.290476322174072, + "log_odds_ratio": -0.23832476139068604, + "loss": 0.2469, + "rejected_geometric_mean": -4.820446491241455, + "step": 3113 + }, + { + "chosen_geometric_mean": -1.0808568000793457, + "epoch": 0.77, + "grad_norm": 5.0, + "learning_rate": 3.385671963523901e-06, + "log_odds": 4.832217216491699, + "log_odds_ratio": -0.12253068387508392, + "loss": 0.3029, + "rejected_geometric_mean": -5.5810770988464355, + "step": 3114 + }, + { + "chosen_geometric_mean": -1.1128814220428467, + "epoch": 0.77, + "grad_norm": 20.125, + "learning_rate": 3.384761559147168e-06, + "log_odds": 7.273891448974609, + "log_odds_ratio": -0.028951384127140045, + "loss": 0.3439, + "rejected_geometric_mean": -7.975540637969971, + "step": 3115 + }, + { + "chosen_geometric_mean": -1.0998363494873047, + "epoch": 0.77, + "grad_norm": 7.25, + "learning_rate": 3.383851020619344e-06, + "log_odds": 3.1264188289642334, + "log_odds_ratio": -0.17287598550319672, + "loss": 0.3139, + "rejected_geometric_mean": -3.9110939502716064, + "step": 3116 + }, + { + "chosen_geometric_mean": -0.9193607568740845, + "epoch": 0.77, + "grad_norm": 7.78125, + "learning_rate": 3.3829403480784883e-06, + "log_odds": 0.1319037228822708, + "log_odds_ratio": -0.636778712272644, + "loss": 0.3467, + "rejected_geometric_mean": -1.0240026712417603, + "step": 3117 + }, + { + "chosen_geometric_mean": -1.0510393381118774, + "epoch": 0.77, + "grad_norm": 3.703125, + "learning_rate": 3.382029541662683e-06, + "log_odds": 3.8242385387420654, + "log_odds_ratio": -0.16489024460315704, + "loss": 0.3134, + "rejected_geometric_mean": -4.534049034118652, + "step": 3118 + }, + { + "chosen_geometric_mean": -1.1904394626617432, + "epoch": 0.77, + "grad_norm": 10.0, + "learning_rate": 3.3811186015100262e-06, + "log_odds": 2.9604408740997314, + "log_odds_ratio": -0.21204033493995667, + "loss": 0.2649, + "rejected_geometric_mean": -3.861253499984741, + "step": 3119 + }, + { + "chosen_geometric_mean": -1.2544764280319214, + "epoch": 0.77, + "grad_norm": 4.65625, + "learning_rate": 3.38020752775864e-06, + "log_odds": 0.6498444676399231, + "log_odds_ratio": -0.48995673656463623, + "loss": 0.3068, + "rejected_geometric_mean": -1.7590694427490234, + "step": 3120 + }, + { + "chosen_geometric_mean": -1.0840113162994385, + "epoch": 0.77, + "grad_norm": 5.96875, + "learning_rate": 3.3792963205466647e-06, + "log_odds": 5.404654026031494, + "log_odds_ratio": -0.17028875648975372, + "loss": 0.3601, + "rejected_geometric_mean": -6.117221832275391, + "step": 3121 + }, + { + "chosen_geometric_mean": -0.8651411533355713, + "epoch": 0.77, + "grad_norm": 2.390625, + "learning_rate": 3.3783849800122585e-06, + "log_odds": 6.997591018676758, + "log_odds_ratio": -0.3758592903614044, + "loss": 0.2688, + "rejected_geometric_mean": -7.608308792114258, + "step": 3122 + }, + { + "chosen_geometric_mean": -1.278383493423462, + "epoch": 0.77, + "grad_norm": 4.375, + "learning_rate": 3.377473506293606e-06, + "log_odds": 2.3173046112060547, + "log_odds_ratio": -0.2202649563550949, + "loss": 0.3148, + "rejected_geometric_mean": -3.3143863677978516, + "step": 3123 + }, + { + "chosen_geometric_mean": -1.0196913480758667, + "epoch": 0.77, + "grad_norm": 7.375, + "learning_rate": 3.3765618995289072e-06, + "log_odds": 0.018718332052230835, + "log_odds_ratio": -0.7404623627662659, + "loss": 0.2837, + "rejected_geometric_mean": -1.0952367782592773, + "step": 3124 + }, + { + "chosen_geometric_mean": -0.9893287420272827, + "epoch": 0.77, + "grad_norm": 10.375, + "learning_rate": 3.3756501598563836e-06, + "log_odds": 5.627244472503662, + "log_odds_ratio": -0.0047240653075277805, + "loss": 0.2431, + "rejected_geometric_mean": -6.12346887588501, + "step": 3125 + }, + { + "chosen_geometric_mean": -1.0798354148864746, + "epoch": 0.77, + "grad_norm": 7.0625, + "learning_rate": 3.3747382874142763e-06, + "log_odds": 3.1541874408721924, + "log_odds_ratio": -0.2861495912075043, + "loss": 0.2924, + "rejected_geometric_mean": -3.9437808990478516, + "step": 3126 + }, + { + "chosen_geometric_mean": -1.0605627298355103, + "epoch": 0.77, + "grad_norm": 7.875, + "learning_rate": 3.3738262823408474e-06, + "log_odds": 3.0917763710021973, + "log_odds_ratio": -0.23554322123527527, + "loss": 0.2977, + "rejected_geometric_mean": -3.8588106632232666, + "step": 3127 + }, + { + "chosen_geometric_mean": -1.1700005531311035, + "epoch": 0.77, + "grad_norm": 6.59375, + "learning_rate": 3.3729141447743795e-06, + "log_odds": 0.033046990633010864, + "log_odds_ratio": -0.6858739852905273, + "loss": 0.2748, + "rejected_geometric_mean": -1.1636333465576172, + "step": 3128 + }, + { + "chosen_geometric_mean": -1.1959866285324097, + "epoch": 0.77, + "grad_norm": 8.9375, + "learning_rate": 3.3720018748531734e-06, + "log_odds": 2.7728018760681152, + "log_odds_ratio": -0.32526877522468567, + "loss": 0.3078, + "rejected_geometric_mean": -3.8010363578796387, + "step": 3129 + }, + { + "chosen_geometric_mean": -1.0495100021362305, + "epoch": 0.77, + "grad_norm": 35.0, + "learning_rate": 3.3710894727155507e-06, + "log_odds": 0.03966343402862549, + "log_odds_ratio": -0.7239308953285217, + "loss": 0.3085, + "rejected_geometric_mean": -1.0773817300796509, + "step": 3130 + }, + { + "chosen_geometric_mean": -1.0311399698257446, + "epoch": 0.78, + "grad_norm": 11.6875, + "learning_rate": 3.370176938499856e-06, + "log_odds": 4.018638610839844, + "log_odds_ratio": -0.31131917238235474, + "loss": 0.3217, + "rejected_geometric_mean": -4.827091217041016, + "step": 3131 + }, + { + "chosen_geometric_mean": -1.2376155853271484, + "epoch": 0.78, + "grad_norm": 11.5625, + "learning_rate": 3.36926427234445e-06, + "log_odds": 7.063536643981934, + "log_odds_ratio": -0.11121784150600433, + "loss": 0.2392, + "rejected_geometric_mean": -7.951111316680908, + "step": 3132 + }, + { + "chosen_geometric_mean": -1.2530913352966309, + "epoch": 0.78, + "grad_norm": 10.875, + "learning_rate": 3.3683514743877126e-06, + "log_odds": 2.195878744125366, + "log_odds_ratio": -0.4172239303588867, + "loss": 0.2974, + "rejected_geometric_mean": -3.329332113265991, + "step": 3133 + }, + { + "chosen_geometric_mean": -0.898646354675293, + "epoch": 0.78, + "grad_norm": 3.515625, + "learning_rate": 3.3674385447680494e-06, + "log_odds": 2.786426544189453, + "log_odds_ratio": -0.2356644868850708, + "loss": 0.3159, + "rejected_geometric_mean": -3.2787747383117676, + "step": 3134 + }, + { + "chosen_geometric_mean": -1.1629822254180908, + "epoch": 0.78, + "grad_norm": 8.0, + "learning_rate": 3.3665254836238805e-06, + "log_odds": 4.014208793640137, + "log_odds_ratio": -0.17846249043941498, + "loss": 0.3328, + "rejected_geometric_mean": -4.872129440307617, + "step": 3135 + }, + { + "chosen_geometric_mean": -1.1588518619537354, + "epoch": 0.78, + "grad_norm": 32.5, + "learning_rate": 3.3656122910936478e-06, + "log_odds": 2.5777931213378906, + "log_odds_ratio": -0.3192041218280792, + "loss": 0.3358, + "rejected_geometric_mean": -3.456976890563965, + "step": 3136 + }, + { + "chosen_geometric_mean": -0.9629278182983398, + "epoch": 0.78, + "grad_norm": 14.875, + "learning_rate": 3.3646989673158144e-06, + "log_odds": 0.8927086591720581, + "log_odds_ratio": -0.5001484155654907, + "loss": 0.2996, + "rejected_geometric_mean": -1.7417110204696655, + "step": 3137 + }, + { + "chosen_geometric_mean": -1.0430105924606323, + "epoch": 0.78, + "grad_norm": 4.90625, + "learning_rate": 3.363785512428861e-06, + "log_odds": 2.432453155517578, + "log_odds_ratio": -0.15144824981689453, + "loss": 0.3049, + "rejected_geometric_mean": -3.1104657649993896, + "step": 3138 + }, + { + "chosen_geometric_mean": -1.0899286270141602, + "epoch": 0.78, + "grad_norm": 10.75, + "learning_rate": 3.3628719265712895e-06, + "log_odds": 3.248032569885254, + "log_odds_ratio": -0.23444166779518127, + "loss": 0.2782, + "rejected_geometric_mean": -4.078937530517578, + "step": 3139 + }, + { + "chosen_geometric_mean": -1.1718292236328125, + "epoch": 0.78, + "grad_norm": 4.40625, + "learning_rate": 3.361958209881622e-06, + "log_odds": 2.4416327476501465, + "log_odds_ratio": -0.2769981026649475, + "loss": 0.3002, + "rejected_geometric_mean": -3.3830480575561523, + "step": 3140 + }, + { + "chosen_geometric_mean": -1.2542622089385986, + "epoch": 0.78, + "grad_norm": 2.40625, + "learning_rate": 3.3610443624983996e-06, + "log_odds": 2.8987531661987305, + "log_odds_ratio": -0.24038664996623993, + "loss": 0.3144, + "rejected_geometric_mean": -3.915982246398926, + "step": 3141 + }, + { + "chosen_geometric_mean": -1.247489333152771, + "epoch": 0.78, + "grad_norm": 2.015625, + "learning_rate": 3.360130384560183e-06, + "log_odds": 3.3677964210510254, + "log_odds_ratio": -0.20695514976978302, + "loss": 0.287, + "rejected_geometric_mean": -4.350977897644043, + "step": 3142 + }, + { + "chosen_geometric_mean": -0.9709517955780029, + "epoch": 0.78, + "grad_norm": 9.0625, + "learning_rate": 3.3592162762055543e-06, + "log_odds": 3.460474729537964, + "log_odds_ratio": -0.1941639631986618, + "loss": 0.2883, + "rejected_geometric_mean": -4.046849727630615, + "step": 3143 + }, + { + "chosen_geometric_mean": -1.3870199918746948, + "epoch": 0.78, + "grad_norm": 8.875, + "learning_rate": 3.358302037573113e-06, + "log_odds": 0.959497332572937, + "log_odds_ratio": -0.3782268166542053, + "loss": 0.3293, + "rejected_geometric_mean": -2.179037094116211, + "step": 3144 + }, + { + "chosen_geometric_mean": -0.7967991828918457, + "epoch": 0.78, + "grad_norm": 14.3125, + "learning_rate": 3.35738766880148e-06, + "log_odds": 4.599620819091797, + "log_odds_ratio": -0.1963522881269455, + "loss": 0.2502, + "rejected_geometric_mean": -4.988917827606201, + "step": 3145 + }, + { + "chosen_geometric_mean": -0.9393981695175171, + "epoch": 0.78, + "grad_norm": 2.515625, + "learning_rate": 3.356473170029296e-06, + "log_odds": 1.7618695497512817, + "log_odds_ratio": -0.3500758111476898, + "loss": 0.3184, + "rejected_geometric_mean": -2.4632363319396973, + "step": 3146 + }, + { + "chosen_geometric_mean": -1.2291680574417114, + "epoch": 0.78, + "grad_norm": 3.53125, + "learning_rate": 3.355558541395221e-06, + "log_odds": 3.1672797203063965, + "log_odds_ratio": -0.17239432036876678, + "loss": 0.2704, + "rejected_geometric_mean": -4.111078262329102, + "step": 3147 + }, + { + "chosen_geometric_mean": -1.104576587677002, + "epoch": 0.78, + "grad_norm": 36.25, + "learning_rate": 3.354643783037934e-06, + "log_odds": 4.902161598205566, + "log_odds_ratio": -0.18251563608646393, + "loss": 0.2943, + "rejected_geometric_mean": -5.680246829986572, + "step": 3148 + }, + { + "chosen_geometric_mean": -1.0981693267822266, + "epoch": 0.78, + "grad_norm": 25.875, + "learning_rate": 3.3537288950961345e-06, + "log_odds": 1.6063671112060547, + "log_odds_ratio": -0.23282372951507568, + "loss": 0.3365, + "rejected_geometric_mean": -2.4056365489959717, + "step": 3149 + }, + { + "chosen_geometric_mean": -1.209768295288086, + "epoch": 0.78, + "grad_norm": 4.8125, + "learning_rate": 3.3528138777085415e-06, + "log_odds": 2.6853299140930176, + "log_odds_ratio": -0.3432258665561676, + "loss": 0.3047, + "rejected_geometric_mean": -3.722626209259033, + "step": 3150 + }, + { + "chosen_geometric_mean": -0.9567747116088867, + "epoch": 0.78, + "grad_norm": 10.0, + "learning_rate": 3.351898731013895e-06, + "log_odds": 3.197986125946045, + "log_odds_ratio": -0.3679768443107605, + "loss": 0.3031, + "rejected_geometric_mean": -3.9231104850769043, + "step": 3151 + }, + { + "chosen_geometric_mean": -1.1351804733276367, + "epoch": 0.78, + "grad_norm": 9.5, + "learning_rate": 3.3509834551509514e-06, + "log_odds": 7.107330322265625, + "log_odds_ratio": -0.07436968386173248, + "loss": 0.2789, + "rejected_geometric_mean": -7.883645057678223, + "step": 3152 + }, + { + "chosen_geometric_mean": -1.2639739513397217, + "epoch": 0.78, + "grad_norm": 11.125, + "learning_rate": 3.350068050258488e-06, + "log_odds": 4.226911544799805, + "log_odds_ratio": -0.1800316572189331, + "loss": 0.2966, + "rejected_geometric_mean": -5.217734336853027, + "step": 3153 + }, + { + "chosen_geometric_mean": -0.9535168409347534, + "epoch": 0.78, + "grad_norm": 6.15625, + "learning_rate": 3.3491525164753047e-06, + "log_odds": 3.184692859649658, + "log_odds_ratio": -0.3277972936630249, + "loss": 0.2955, + "rejected_geometric_mean": -3.8224937915802, + "step": 3154 + }, + { + "chosen_geometric_mean": -0.9160205721855164, + "epoch": 0.78, + "grad_norm": 3.828125, + "learning_rate": 3.348236853940216e-06, + "log_odds": 1.4870681762695312, + "log_odds_ratio": -0.5369104146957397, + "loss": 0.3061, + "rejected_geometric_mean": -2.290255069732666, + "step": 3155 + }, + { + "chosen_geometric_mean": -1.3397012948989868, + "epoch": 0.78, + "grad_norm": 10.25, + "learning_rate": 3.3473210627920593e-06, + "log_odds": 8.541975975036621, + "log_odds_ratio": -0.0678727924823761, + "loss": 0.3197, + "rejected_geometric_mean": -9.582748413085938, + "step": 3156 + }, + { + "chosen_geometric_mean": -1.006155014038086, + "epoch": 0.78, + "grad_norm": 4.78125, + "learning_rate": 3.3464051431696907e-06, + "log_odds": 15.144278526306152, + "log_odds_ratio": -0.008960509672760963, + "loss": 0.2833, + "rejected_geometric_mean": -15.676928520202637, + "step": 3157 + }, + { + "chosen_geometric_mean": -1.110619068145752, + "epoch": 0.78, + "grad_norm": 2.046875, + "learning_rate": 3.3454890952119856e-06, + "log_odds": 3.1885361671447754, + "log_odds_ratio": -0.2861528694629669, + "loss": 0.309, + "rejected_geometric_mean": -4.066251277923584, + "step": 3158 + }, + { + "chosen_geometric_mean": -1.207815170288086, + "epoch": 0.78, + "grad_norm": 4.84375, + "learning_rate": 3.3445729190578386e-06, + "log_odds": 2.994788646697998, + "log_odds_ratio": -0.2466551959514618, + "loss": 0.2989, + "rejected_geometric_mean": -3.951833724975586, + "step": 3159 + }, + { + "chosen_geometric_mean": -1.0328890085220337, + "epoch": 0.78, + "grad_norm": 34.75, + "learning_rate": 3.3436566148461637e-06, + "log_odds": 2.9924793243408203, + "log_odds_ratio": -0.5421791076660156, + "loss": 0.292, + "rejected_geometric_mean": -3.9308648109436035, + "step": 3160 + }, + { + "chosen_geometric_mean": -1.3353692293167114, + "epoch": 0.78, + "grad_norm": 32.25, + "learning_rate": 3.342740182715895e-06, + "log_odds": 5.417609691619873, + "log_odds_ratio": -0.12194925546646118, + "loss": 0.2806, + "rejected_geometric_mean": -6.458696365356445, + "step": 3161 + }, + { + "chosen_geometric_mean": -1.0878193378448486, + "epoch": 0.78, + "grad_norm": 4.21875, + "learning_rate": 3.341823622805986e-06, + "log_odds": 5.865682125091553, + "log_odds_ratio": -0.1203555017709732, + "loss": 0.2917, + "rejected_geometric_mean": -6.591829299926758, + "step": 3162 + }, + { + "chosen_geometric_mean": -1.0576622486114502, + "epoch": 0.78, + "grad_norm": 9.0, + "learning_rate": 3.3409069352554075e-06, + "log_odds": 2.9933066368103027, + "log_odds_ratio": -0.2572237551212311, + "loss": 0.3033, + "rejected_geometric_mean": -3.7655227184295654, + "step": 3163 + }, + { + "chosen_geometric_mean": -1.0028364658355713, + "epoch": 0.78, + "grad_norm": 3.140625, + "learning_rate": 3.3399901202031536e-06, + "log_odds": 1.7384417057037354, + "log_odds_ratio": -0.3603461980819702, + "loss": 0.2822, + "rejected_geometric_mean": -2.507723331451416, + "step": 3164 + }, + { + "chosen_geometric_mean": -0.9462422132492065, + "epoch": 0.78, + "grad_norm": 2.84375, + "learning_rate": 3.3390731777882344e-06, + "log_odds": 9.510992050170898, + "log_odds_ratio": -0.0017909931484609842, + "loss": 0.2291, + "rejected_geometric_mean": -9.936623573303223, + "step": 3165 + }, + { + "chosen_geometric_mean": -1.1949125528335571, + "epoch": 0.78, + "grad_norm": 2.953125, + "learning_rate": 3.3381561081496793e-06, + "log_odds": 10.365436553955078, + "log_odds_ratio": -0.014513839967548847, + "loss": 0.267, + "rejected_geometric_mean": -11.196639060974121, + "step": 3166 + }, + { + "chosen_geometric_mean": -1.0250271558761597, + "epoch": 0.78, + "grad_norm": 13.625, + "learning_rate": 3.3372389114265406e-06, + "log_odds": 2.129737138748169, + "log_odds_ratio": -0.31599530577659607, + "loss": 0.3566, + "rejected_geometric_mean": -2.9112935066223145, + "step": 3167 + }, + { + "chosen_geometric_mean": -0.9357194900512695, + "epoch": 0.78, + "grad_norm": 3.078125, + "learning_rate": 3.336321587757886e-06, + "log_odds": 7.609400272369385, + "log_odds_ratio": -0.10654034465551376, + "loss": 0.3045, + "rejected_geometric_mean": -8.131532669067383, + "step": 3168 + }, + { + "chosen_geometric_mean": -1.5030885934829712, + "epoch": 0.78, + "grad_norm": 9.8125, + "learning_rate": 3.3354041372828022e-06, + "log_odds": 6.136117935180664, + "log_odds_ratio": -0.008020955137908459, + "loss": 0.287, + "rejected_geometric_mean": -7.363802433013916, + "step": 3169 + }, + { + "chosen_geometric_mean": -1.3163135051727295, + "epoch": 0.78, + "grad_norm": 3.546875, + "learning_rate": 3.3344865601403986e-06, + "log_odds": 2.2298059463500977, + "log_odds_ratio": -0.17129360139369965, + "loss": 0.3159, + "rejected_geometric_mean": -3.2813100814819336, + "step": 3170 + }, + { + "chosen_geometric_mean": -1.23151695728302, + "epoch": 0.79, + "grad_norm": 3.671875, + "learning_rate": 3.333568856469802e-06, + "log_odds": 8.885815620422363, + "log_odds_ratio": -0.218023419380188, + "loss": 0.3131, + "rejected_geometric_mean": -9.809931755065918, + "step": 3171 + }, + { + "chosen_geometric_mean": -1.295812726020813, + "epoch": 0.79, + "grad_norm": 20.75, + "learning_rate": 3.3326510264101582e-06, + "log_odds": 7.01495885848999, + "log_odds_ratio": -0.1819864809513092, + "loss": 0.3146, + "rejected_geometric_mean": -8.022665023803711, + "step": 3172 + }, + { + "chosen_geometric_mean": -1.0444152355194092, + "epoch": 0.79, + "grad_norm": 19.25, + "learning_rate": 3.331733070100631e-06, + "log_odds": 5.988019943237305, + "log_odds_ratio": -0.10262253135442734, + "loss": 0.3186, + "rejected_geometric_mean": -6.647820472717285, + "step": 3173 + }, + { + "chosen_geometric_mean": -0.8503373265266418, + "epoch": 0.79, + "grad_norm": 33.5, + "learning_rate": 3.3308149876804065e-06, + "log_odds": 3.0697643756866455, + "log_odds_ratio": -0.20317788422107697, + "loss": 0.299, + "rejected_geometric_mean": -3.5593814849853516, + "step": 3174 + }, + { + "chosen_geometric_mean": -1.237658977508545, + "epoch": 0.79, + "grad_norm": 15.5625, + "learning_rate": 3.3298967792886866e-06, + "log_odds": 6.614470958709717, + "log_odds_ratio": -0.17171701788902283, + "loss": 0.3074, + "rejected_geometric_mean": -7.472964763641357, + "step": 3175 + }, + { + "chosen_geometric_mean": -0.969700276851654, + "epoch": 0.79, + "grad_norm": 12.0625, + "learning_rate": 3.3289784450646945e-06, + "log_odds": 0.4720023274421692, + "log_odds_ratio": -0.5065468549728394, + "loss": 0.2961, + "rejected_geometric_mean": -1.330073595046997, + "step": 3176 + }, + { + "chosen_geometric_mean": -0.9883423447608948, + "epoch": 0.79, + "grad_norm": 19.25, + "learning_rate": 3.328059985147671e-06, + "log_odds": 7.435415267944336, + "log_odds_ratio": -0.22881560027599335, + "loss": 0.3207, + "rejected_geometric_mean": -8.02083969116211, + "step": 3177 + }, + { + "chosen_geometric_mean": -0.9160692691802979, + "epoch": 0.79, + "grad_norm": 5.03125, + "learning_rate": 3.327141399676877e-06, + "log_odds": 0.6387345790863037, + "log_odds_ratio": -0.4323364496231079, + "loss": 0.3202, + "rejected_geometric_mean": -1.3396852016448975, + "step": 3178 + }, + { + "chosen_geometric_mean": -0.9882732629776001, + "epoch": 0.79, + "grad_norm": 2.109375, + "learning_rate": 3.3262226887915914e-06, + "log_odds": 5.762950420379639, + "log_odds_ratio": -0.23474329710006714, + "loss": 0.3121, + "rejected_geometric_mean": -6.464344024658203, + "step": 3179 + }, + { + "chosen_geometric_mean": -1.1619868278503418, + "epoch": 0.79, + "grad_norm": 5.15625, + "learning_rate": 3.3253038526311155e-06, + "log_odds": 4.223609447479248, + "log_odds_ratio": -0.26200637221336365, + "loss": 0.3089, + "rejected_geometric_mean": -5.131814002990723, + "step": 3180 + }, + { + "chosen_geometric_mean": -1.0147595405578613, + "epoch": 0.79, + "grad_norm": 2.671875, + "learning_rate": 3.324384891334764e-06, + "log_odds": 1.202354907989502, + "log_odds_ratio": -0.4252733886241913, + "loss": 0.2265, + "rejected_geometric_mean": -1.9919102191925049, + "step": 3181 + }, + { + "chosen_geometric_mean": -1.0530483722686768, + "epoch": 0.79, + "grad_norm": 2.0625, + "learning_rate": 3.3234658050418743e-06, + "log_odds": 2.048267126083374, + "log_odds_ratio": -0.40499576926231384, + "loss": 0.2622, + "rejected_geometric_mean": -2.9096717834472656, + "step": 3182 + }, + { + "chosen_geometric_mean": -1.0523388385772705, + "epoch": 0.79, + "grad_norm": 3.984375, + "learning_rate": 3.3225465938918023e-06, + "log_odds": 4.535346031188965, + "log_odds_ratio": -0.23774835467338562, + "loss": 0.3166, + "rejected_geometric_mean": -5.265396595001221, + "step": 3183 + }, + { + "chosen_geometric_mean": -1.2369569540023804, + "epoch": 0.79, + "grad_norm": 8.5, + "learning_rate": 3.321627258023923e-06, + "log_odds": 3.462486982345581, + "log_odds_ratio": -0.2553213834762573, + "loss": 0.2559, + "rejected_geometric_mean": -4.429900646209717, + "step": 3184 + }, + { + "chosen_geometric_mean": -0.9833536744117737, + "epoch": 0.79, + "grad_norm": 5.0625, + "learning_rate": 3.3207077975776282e-06, + "log_odds": 3.936025619506836, + "log_odds_ratio": -0.2652779817581177, + "loss": 0.2529, + "rejected_geometric_mean": -4.609504222869873, + "step": 3185 + }, + { + "chosen_geometric_mean": -1.1588705778121948, + "epoch": 0.79, + "grad_norm": 6.03125, + "learning_rate": 3.319788212692331e-06, + "log_odds": 0.9604421257972717, + "log_odds_ratio": -0.3431553244590759, + "loss": 0.3162, + "rejected_geometric_mean": -1.9119045734405518, + "step": 3186 + }, + { + "chosen_geometric_mean": -1.0648605823516846, + "epoch": 0.79, + "grad_norm": 2.171875, + "learning_rate": 3.3188685035074637e-06, + "log_odds": 2.0720200538635254, + "log_odds_ratio": -0.30022192001342773, + "loss": 0.3011, + "rejected_geometric_mean": -2.8886795043945312, + "step": 3187 + }, + { + "chosen_geometric_mean": -1.0859429836273193, + "epoch": 0.79, + "grad_norm": 7.40625, + "learning_rate": 3.3179486701624742e-06, + "log_odds": 7.165439605712891, + "log_odds_ratio": -0.16246770322322845, + "loss": 0.3013, + "rejected_geometric_mean": -7.9093780517578125, + "step": 3188 + }, + { + "chosen_geometric_mean": -1.3524231910705566, + "epoch": 0.79, + "grad_norm": 28.625, + "learning_rate": 3.3170287127968324e-06, + "log_odds": 3.0435142517089844, + "log_odds_ratio": -0.15318582952022552, + "loss": 0.2711, + "rejected_geometric_mean": -4.129458904266357, + "step": 3189 + }, + { + "chosen_geometric_mean": -0.9184072017669678, + "epoch": 0.79, + "grad_norm": 3.1875, + "learning_rate": 3.3161086315500263e-06, + "log_odds": 6.148293972015381, + "log_odds_ratio": -0.1255006492137909, + "loss": 0.2895, + "rejected_geometric_mean": -6.587398529052734, + "step": 3190 + }, + { + "chosen_geometric_mean": -1.0432236194610596, + "epoch": 0.79, + "grad_norm": 6.53125, + "learning_rate": 3.315188426561562e-06, + "log_odds": 1.9536974430084229, + "log_odds_ratio": -0.40515291690826416, + "loss": 0.3081, + "rejected_geometric_mean": -2.7570433616638184, + "step": 3191 + }, + { + "chosen_geometric_mean": -1.1092652082443237, + "epoch": 0.79, + "grad_norm": 48.0, + "learning_rate": 3.314268097970964e-06, + "log_odds": 9.845867156982422, + "log_odds_ratio": -0.17004933953285217, + "loss": 0.4129, + "rejected_geometric_mean": -10.645866394042969, + "step": 3192 + }, + { + "chosen_geometric_mean": -1.4257289171218872, + "epoch": 0.79, + "grad_norm": 23.625, + "learning_rate": 3.313347645917777e-06, + "log_odds": 1.052164077758789, + "log_odds_ratio": -0.47228148579597473, + "loss": 0.3651, + "rejected_geometric_mean": -2.347409725189209, + "step": 3193 + }, + { + "chosen_geometric_mean": -1.0060869455337524, + "epoch": 0.79, + "grad_norm": 15.3125, + "learning_rate": 3.3124270705415636e-06, + "log_odds": 12.199053764343262, + "log_odds_ratio": -0.018599361181259155, + "loss": 0.355, + "rejected_geometric_mean": -12.73487663269043, + "step": 3194 + }, + { + "chosen_geometric_mean": -1.2046477794647217, + "epoch": 0.79, + "grad_norm": 6.5, + "learning_rate": 3.311506371981904e-06, + "log_odds": 3.9840826988220215, + "log_odds_ratio": -0.2526230216026306, + "loss": 0.2466, + "rejected_geometric_mean": -4.942911624908447, + "step": 3195 + }, + { + "chosen_geometric_mean": -1.1369352340698242, + "epoch": 0.79, + "grad_norm": 11.1875, + "learning_rate": 3.3105855503783997e-06, + "log_odds": 8.910150527954102, + "log_odds_ratio": -0.15573716163635254, + "loss": 0.3272, + "rejected_geometric_mean": -9.714315414428711, + "step": 3196 + }, + { + "chosen_geometric_mean": -0.9146169424057007, + "epoch": 0.79, + "grad_norm": 10.25, + "learning_rate": 3.3096646058706686e-06, + "log_odds": 6.761871337890625, + "log_odds_ratio": -0.14249232411384583, + "loss": 0.3232, + "rejected_geometric_mean": -7.209983825683594, + "step": 3197 + }, + { + "chosen_geometric_mean": -0.8991613984107971, + "epoch": 0.79, + "grad_norm": 4.8125, + "learning_rate": 3.3087435385983487e-06, + "log_odds": 4.708263397216797, + "log_odds_ratio": -0.024875322356820107, + "loss": 0.2588, + "rejected_geometric_mean": -5.089476585388184, + "step": 3198 + }, + { + "chosen_geometric_mean": -2.9422004222869873, + "epoch": 0.79, + "grad_norm": 28.75, + "learning_rate": 3.307822348701094e-06, + "log_odds": 3.8672709465026855, + "log_odds_ratio": -0.1149936094880104, + "loss": 0.3601, + "rejected_geometric_mean": -6.696819305419922, + "step": 3199 + }, + { + "chosen_geometric_mean": -1.7397840023040771, + "epoch": 0.79, + "grad_norm": 9.125, + "learning_rate": 3.306901036318582e-06, + "log_odds": 6.432443141937256, + "log_odds_ratio": -0.15910209715366364, + "loss": 0.3148, + "rejected_geometric_mean": -8.007079124450684, + "step": 3200 + }, + { + "chosen_geometric_mean": -1.3090267181396484, + "epoch": 0.79, + "grad_norm": 13.5625, + "learning_rate": 3.305979601590503e-06, + "log_odds": 6.187236785888672, + "log_odds_ratio": -0.24302825331687927, + "loss": 0.3047, + "rejected_geometric_mean": -7.272127628326416, + "step": 3201 + }, + { + "chosen_geometric_mean": -1.2500241994857788, + "epoch": 0.79, + "grad_norm": 14.75, + "learning_rate": 3.3050580446565693e-06, + "log_odds": 1.7861480712890625, + "log_odds_ratio": -0.22617796063423157, + "loss": 0.2982, + "rejected_geometric_mean": -2.787666082382202, + "step": 3202 + }, + { + "chosen_geometric_mean": -1.387286901473999, + "epoch": 0.79, + "grad_norm": 12.8125, + "learning_rate": 3.3041363656565127e-06, + "log_odds": 2.549419641494751, + "log_odds_ratio": -0.18315862119197845, + "loss": 0.3066, + "rejected_geometric_mean": -3.6743171215057373, + "step": 3203 + }, + { + "chosen_geometric_mean": -1.0730918645858765, + "epoch": 0.79, + "grad_norm": 6.1875, + "learning_rate": 3.30321456473008e-06, + "log_odds": 2.766184091567993, + "log_odds_ratio": -0.5076002478599548, + "loss": 0.2801, + "rejected_geometric_mean": -3.7035303115844727, + "step": 3204 + }, + { + "chosen_geometric_mean": -1.0659291744232178, + "epoch": 0.79, + "grad_norm": 1.984375, + "learning_rate": 3.3022926420170392e-06, + "log_odds": 7.444004058837891, + "log_odds_ratio": -0.11088629812002182, + "loss": 0.2798, + "rejected_geometric_mean": -8.161821365356445, + "step": 3205 + }, + { + "chosen_geometric_mean": -0.9925714731216431, + "epoch": 0.79, + "grad_norm": 16.75, + "learning_rate": 3.301370597657176e-06, + "log_odds": 5.672045707702637, + "log_odds_ratio": -0.16288448870182037, + "loss": 0.2526, + "rejected_geometric_mean": -6.327186584472656, + "step": 3206 + }, + { + "chosen_geometric_mean": -1.0493357181549072, + "epoch": 0.79, + "grad_norm": 8.625, + "learning_rate": 3.3004484317902934e-06, + "log_odds": 1.391097068786621, + "log_odds_ratio": -0.3191770315170288, + "loss": 0.3452, + "rejected_geometric_mean": -2.2304985523223877, + "step": 3207 + }, + { + "chosen_geometric_mean": -0.9201244711875916, + "epoch": 0.79, + "grad_norm": 6.625, + "learning_rate": 3.299526144556215e-06, + "log_odds": 3.2778022289276123, + "log_odds_ratio": -0.3761429488658905, + "loss": 0.2749, + "rejected_geometric_mean": -3.9651482105255127, + "step": 3208 + }, + { + "chosen_geometric_mean": -1.1939252614974976, + "epoch": 0.79, + "grad_norm": 28.875, + "learning_rate": 3.2986037360947814e-06, + "log_odds": 6.34785795211792, + "log_odds_ratio": -0.1430961936712265, + "loss": 0.3203, + "rejected_geometric_mean": -7.252202987670898, + "step": 3209 + }, + { + "chosen_geometric_mean": -1.0746409893035889, + "epoch": 0.79, + "grad_norm": 8.625, + "learning_rate": 3.2976812065458518e-06, + "log_odds": 1.97116219997406, + "log_odds_ratio": -0.2804795205593109, + "loss": 0.2698, + "rejected_geometric_mean": -2.818324565887451, + "step": 3210 + }, + { + "chosen_geometric_mean": -0.9716664552688599, + "epoch": 0.79, + "grad_norm": 2.71875, + "learning_rate": 3.2967585560493036e-06, + "log_odds": 2.8817825317382812, + "log_odds_ratio": -0.3950773775577545, + "loss": 0.3239, + "rejected_geometric_mean": -3.640815496444702, + "step": 3211 + }, + { + "chosen_geometric_mean": -1.1199184656143188, + "epoch": 0.8, + "grad_norm": 4.1875, + "learning_rate": 3.295835784745033e-06, + "log_odds": 1.942751407623291, + "log_odds_ratio": -0.3728797435760498, + "loss": 0.2469, + "rejected_geometric_mean": -2.8748059272766113, + "step": 3212 + }, + { + "chosen_geometric_mean": -1.0696935653686523, + "epoch": 0.8, + "grad_norm": 2.859375, + "learning_rate": 3.2949128927729546e-06, + "log_odds": 1.8681763410568237, + "log_odds_ratio": -0.2810502052307129, + "loss": 0.3011, + "rejected_geometric_mean": -2.6652510166168213, + "step": 3213 + }, + { + "chosen_geometric_mean": -1.0165679454803467, + "epoch": 0.8, + "grad_norm": 4.1875, + "learning_rate": 3.2939898802730007e-06, + "log_odds": 3.083333969116211, + "log_odds_ratio": -0.2159091681241989, + "loss": 0.3037, + "rejected_geometric_mean": -3.787139654159546, + "step": 3214 + }, + { + "chosen_geometric_mean": -1.1632155179977417, + "epoch": 0.8, + "grad_norm": 7.25, + "learning_rate": 3.2930667473851214e-06, + "log_odds": 1.6222290992736816, + "log_odds_ratio": -0.20509326457977295, + "loss": 0.3274, + "rejected_geometric_mean": -2.488243818283081, + "step": 3215 + }, + { + "chosen_geometric_mean": -1.1976187229156494, + "epoch": 0.8, + "grad_norm": 5.71875, + "learning_rate": 3.292143494249287e-06, + "log_odds": 2.450962543487549, + "log_odds_ratio": -0.2700466811656952, + "loss": 0.3015, + "rejected_geometric_mean": -3.4188730716705322, + "step": 3216 + }, + { + "chosen_geometric_mean": -1.0667939186096191, + "epoch": 0.8, + "grad_norm": 3.328125, + "learning_rate": 3.2912201210054845e-06, + "log_odds": 0.6605051755905151, + "log_odds_ratio": -0.5142151713371277, + "loss": 0.2658, + "rejected_geometric_mean": -1.642219066619873, + "step": 3217 + }, + { + "chosen_geometric_mean": -1.0994974374771118, + "epoch": 0.8, + "grad_norm": 3.8125, + "learning_rate": 3.290296627793719e-06, + "log_odds": 3.4542226791381836, + "log_odds_ratio": -0.22802427411079407, + "loss": 0.2993, + "rejected_geometric_mean": -4.282337665557861, + "step": 3218 + }, + { + "chosen_geometric_mean": -1.0539191961288452, + "epoch": 0.8, + "grad_norm": 3.28125, + "learning_rate": 3.289373014754014e-06, + "log_odds": 5.318334579467773, + "log_odds_ratio": -0.28462445735931396, + "loss": 0.2889, + "rejected_geometric_mean": -6.100116729736328, + "step": 3219 + }, + { + "chosen_geometric_mean": -1.0848207473754883, + "epoch": 0.8, + "grad_norm": 8.4375, + "learning_rate": 3.2884492820264126e-06, + "log_odds": 4.035150051116943, + "log_odds_ratio": -0.17494294047355652, + "loss": 0.2881, + "rejected_geometric_mean": -4.788609027862549, + "step": 3220 + }, + { + "chosen_geometric_mean": -1.0620779991149902, + "epoch": 0.8, + "grad_norm": 5.65625, + "learning_rate": 3.287525429750974e-06, + "log_odds": 2.787292242050171, + "log_odds_ratio": -0.1746545433998108, + "loss": 0.3003, + "rejected_geometric_mean": -3.5308468341827393, + "step": 3221 + }, + { + "chosen_geometric_mean": -1.2313103675842285, + "epoch": 0.8, + "grad_norm": 4.0625, + "learning_rate": 3.286601458067776e-06, + "log_odds": 4.96812629699707, + "log_odds_ratio": -0.11303384602069855, + "loss": 0.2782, + "rejected_geometric_mean": -5.901503562927246, + "step": 3222 + }, + { + "chosen_geometric_mean": -0.8688997030258179, + "epoch": 0.8, + "grad_norm": 7.09375, + "learning_rate": 3.2856773671169163e-06, + "log_odds": 6.340275287628174, + "log_odds_ratio": -0.0041561247780919075, + "loss": 0.3021, + "rejected_geometric_mean": -6.636226654052734, + "step": 3223 + }, + { + "chosen_geometric_mean": -1.1901450157165527, + "epoch": 0.8, + "grad_norm": 7.0, + "learning_rate": 3.2847531570385083e-06, + "log_odds": 1.1642485857009888, + "log_odds_ratio": -0.34007883071899414, + "loss": 0.2604, + "rejected_geometric_mean": -2.169020414352417, + "step": 3224 + }, + { + "chosen_geometric_mean": -1.052040457725525, + "epoch": 0.8, + "grad_norm": 10.8125, + "learning_rate": 3.2838288279726842e-06, + "log_odds": 5.06500244140625, + "log_odds_ratio": -0.08964411914348602, + "loss": 0.3235, + "rejected_geometric_mean": -5.7217631340026855, + "step": 3225 + }, + { + "chosen_geometric_mean": -1.121822714805603, + "epoch": 0.8, + "grad_norm": 3.78125, + "learning_rate": 3.282904380059595e-06, + "log_odds": 2.54069185256958, + "log_odds_ratio": -0.1059267520904541, + "loss": 0.2497, + "rejected_geometric_mean": -3.317768096923828, + "step": 3226 + }, + { + "chosen_geometric_mean": -1.079291820526123, + "epoch": 0.8, + "grad_norm": 10.5625, + "learning_rate": 3.2819798134394094e-06, + "log_odds": 3.3373961448669434, + "log_odds_ratio": -0.2372746616601944, + "loss": 0.3286, + "rejected_geometric_mean": -4.089273452758789, + "step": 3227 + }, + { + "chosen_geometric_mean": -1.1777526140213013, + "epoch": 0.8, + "grad_norm": 4.34375, + "learning_rate": 3.281055128252313e-06, + "log_odds": 1.5815905332565308, + "log_odds_ratio": -0.34783244132995605, + "loss": 0.298, + "rejected_geometric_mean": -2.581850051879883, + "step": 3228 + }, + { + "chosen_geometric_mean": -1.0914709568023682, + "epoch": 0.8, + "grad_norm": 11.9375, + "learning_rate": 3.280130324638511e-06, + "log_odds": 6.41591739654541, + "log_odds_ratio": -0.047331802546978, + "loss": 0.3404, + "rejected_geometric_mean": -7.108119010925293, + "step": 3229 + }, + { + "chosen_geometric_mean": -1.1848324537277222, + "epoch": 0.8, + "grad_norm": 20.625, + "learning_rate": 3.2792054027382254e-06, + "log_odds": 4.854142665863037, + "log_odds_ratio": -0.05531712993979454, + "loss": 0.3136, + "rejected_geometric_mean": -5.6944146156311035, + "step": 3230 + }, + { + "chosen_geometric_mean": -1.1554512977600098, + "epoch": 0.8, + "grad_norm": 7.375, + "learning_rate": 3.2782803626916957e-06, + "log_odds": 8.735756874084473, + "log_odds_ratio": -0.17241056263446808, + "loss": 0.2812, + "rejected_geometric_mean": -9.571995735168457, + "step": 3231 + }, + { + "chosen_geometric_mean": -1.0897620916366577, + "epoch": 0.8, + "grad_norm": 16.125, + "learning_rate": 3.277355204639183e-06, + "log_odds": 8.381397247314453, + "log_odds_ratio": -0.008557626977562904, + "loss": 0.3062, + "rejected_geometric_mean": -9.041709899902344, + "step": 3232 + }, + { + "chosen_geometric_mean": -0.8648512959480286, + "epoch": 0.8, + "grad_norm": 14.6875, + "learning_rate": 3.2764299287209612e-06, + "log_odds": 3.6355724334716797, + "log_odds_ratio": -0.3452843725681305, + "loss": 0.3132, + "rejected_geometric_mean": -4.22022008895874, + "step": 3233 + }, + { + "chosen_geometric_mean": -1.1294398307800293, + "epoch": 0.8, + "grad_norm": 49.0, + "learning_rate": 3.2755045350773242e-06, + "log_odds": 4.070113182067871, + "log_odds_ratio": -0.05394849181175232, + "loss": 0.3327, + "rejected_geometric_mean": -4.8257670402526855, + "step": 3234 + }, + { + "chosen_geometric_mean": -1.0720775127410889, + "epoch": 0.8, + "grad_norm": 4.15625, + "learning_rate": 3.2745790238485853e-06, + "log_odds": 2.3514060974121094, + "log_odds_ratio": -0.24160347878932953, + "loss": 0.3062, + "rejected_geometric_mean": -3.1534066200256348, + "step": 3235 + }, + { + "chosen_geometric_mean": -1.1194961071014404, + "epoch": 0.8, + "grad_norm": 3.5, + "learning_rate": 3.273653395175074e-06, + "log_odds": 2.9786574840545654, + "log_odds_ratio": -0.4055594801902771, + "loss": 0.3765, + "rejected_geometric_mean": -3.90922212600708, + "step": 3236 + }, + { + "chosen_geometric_mean": -1.0119816064834595, + "epoch": 0.8, + "grad_norm": 7.15625, + "learning_rate": 3.2727276491971353e-06, + "log_odds": 8.254886627197266, + "log_odds_ratio": -0.20863042771816254, + "loss": 0.3153, + "rejected_geometric_mean": -8.95791244506836, + "step": 3237 + }, + { + "chosen_geometric_mean": -1.1090214252471924, + "epoch": 0.8, + "grad_norm": 3.875, + "learning_rate": 3.2718017860551377e-06, + "log_odds": 0.9141771793365479, + "log_odds_ratio": -0.35865318775177, + "loss": 0.2785, + "rejected_geometric_mean": -1.8370435237884521, + "step": 3238 + }, + { + "chosen_geometric_mean": -1.2143332958221436, + "epoch": 0.8, + "grad_norm": 23.5, + "learning_rate": 3.270875805889463e-06, + "log_odds": 2.481851577758789, + "log_odds_ratio": -0.22956426441669464, + "loss": 0.2624, + "rejected_geometric_mean": -3.4391558170318604, + "step": 3239 + }, + { + "chosen_geometric_mean": -1.0954970121383667, + "epoch": 0.8, + "grad_norm": 43.25, + "learning_rate": 3.2699497088405126e-06, + "log_odds": 6.219062328338623, + "log_odds_ratio": -0.21614371240139008, + "loss": 0.2682, + "rejected_geometric_mean": -7.033670425415039, + "step": 3240 + }, + { + "chosen_geometric_mean": -1.555421233177185, + "epoch": 0.8, + "grad_norm": 18.125, + "learning_rate": 3.2690234950487037e-06, + "log_odds": 6.408080101013184, + "log_odds_ratio": -0.25704923272132874, + "loss": 0.2881, + "rejected_geometric_mean": -7.833419322967529, + "step": 3241 + }, + { + "chosen_geometric_mean": -0.9520480632781982, + "epoch": 0.8, + "grad_norm": 12.8125, + "learning_rate": 3.268097164654474e-06, + "log_odds": 3.286468505859375, + "log_odds_ratio": -0.14282579720020294, + "loss": 0.2742, + "rejected_geometric_mean": -3.8488080501556396, + "step": 3242 + }, + { + "chosen_geometric_mean": -1.054977536201477, + "epoch": 0.8, + "grad_norm": 2.0, + "learning_rate": 3.2671707177982763e-06, + "log_odds": 1.2025930881500244, + "log_odds_ratio": -0.4342092275619507, + "loss": 0.2784, + "rejected_geometric_mean": -2.086177349090576, + "step": 3243 + }, + { + "chosen_geometric_mean": -1.1096258163452148, + "epoch": 0.8, + "grad_norm": 21.0, + "learning_rate": 3.266244154620583e-06, + "log_odds": 3.4193174839019775, + "log_odds_ratio": -0.2674551010131836, + "loss": 0.2993, + "rejected_geometric_mean": -4.223458766937256, + "step": 3244 + }, + { + "chosen_geometric_mean": -1.1071767807006836, + "epoch": 0.8, + "grad_norm": 2.921875, + "learning_rate": 3.265317475261882e-06, + "log_odds": 1.87803053855896, + "log_odds_ratio": -0.2329237163066864, + "loss": 0.2512, + "rejected_geometric_mean": -2.7152018547058105, + "step": 3245 + }, + { + "chosen_geometric_mean": -1.0213584899902344, + "epoch": 0.8, + "grad_norm": 18.875, + "learning_rate": 3.264390679862683e-06, + "log_odds": 3.0324525833129883, + "log_odds_ratio": -0.23376095294952393, + "loss": 0.3538, + "rejected_geometric_mean": -3.6917495727539062, + "step": 3246 + }, + { + "chosen_geometric_mean": -0.9602272510528564, + "epoch": 0.8, + "grad_norm": 12.625, + "learning_rate": 3.2634637685635073e-06, + "log_odds": 6.071300983428955, + "log_odds_ratio": -0.1669706106185913, + "loss": 0.2983, + "rejected_geometric_mean": -6.656032562255859, + "step": 3247 + }, + { + "chosen_geometric_mean": -0.9842618703842163, + "epoch": 0.8, + "grad_norm": 2.40625, + "learning_rate": 3.262536741504898e-06, + "log_odds": 4.728396892547607, + "log_odds_ratio": -0.1218382716178894, + "loss": 0.2564, + "rejected_geometric_mean": -5.301697254180908, + "step": 3248 + }, + { + "chosen_geometric_mean": -1.0488264560699463, + "epoch": 0.8, + "grad_norm": 14.3125, + "learning_rate": 3.2616095988274162e-06, + "log_odds": 3.4542465209960938, + "log_odds_ratio": -0.3034683167934418, + "loss": 0.3526, + "rejected_geometric_mean": -4.255980491638184, + "step": 3249 + }, + { + "chosen_geometric_mean": -1.1651313304901123, + "epoch": 0.8, + "grad_norm": 4.96875, + "learning_rate": 3.2606823406716364e-06, + "log_odds": 3.3357551097869873, + "log_odds_ratio": -0.24573422968387604, + "loss": 0.3122, + "rejected_geometric_mean": -4.2550811767578125, + "step": 3250 + }, + { + "chosen_geometric_mean": -1.0574302673339844, + "epoch": 0.8, + "grad_norm": 2.453125, + "learning_rate": 3.2597549671781542e-06, + "log_odds": 1.7290500402450562, + "log_odds_ratio": -0.34818899631500244, + "loss": 0.3398, + "rejected_geometric_mean": -2.5672731399536133, + "step": 3251 + }, + { + "chosen_geometric_mean": -1.1173145771026611, + "epoch": 0.81, + "grad_norm": 11.8125, + "learning_rate": 3.258827478487583e-06, + "log_odds": 6.31786584854126, + "log_odds_ratio": -0.20871949195861816, + "loss": 0.3227, + "rejected_geometric_mean": -7.140745162963867, + "step": 3252 + }, + { + "chosen_geometric_mean": -1.0815551280975342, + "epoch": 0.81, + "grad_norm": 5.03125, + "learning_rate": 3.25789987474055e-06, + "log_odds": 6.658155918121338, + "log_odds_ratio": -0.13945847749710083, + "loss": 0.2697, + "rejected_geometric_mean": -7.376492977142334, + "step": 3253 + }, + { + "chosen_geometric_mean": -1.1789060831069946, + "epoch": 0.81, + "grad_norm": 2.21875, + "learning_rate": 3.256972156077704e-06, + "log_odds": 1.9738115072250366, + "log_odds_ratio": -0.2547057271003723, + "loss": 0.2896, + "rejected_geometric_mean": -2.8970460891723633, + "step": 3254 + }, + { + "chosen_geometric_mean": -1.0069692134857178, + "epoch": 0.81, + "grad_norm": 30.125, + "learning_rate": 3.256044322639709e-06, + "log_odds": 11.833385467529297, + "log_odds_ratio": -0.025197071954607964, + "loss": 0.2944, + "rejected_geometric_mean": -12.35984992980957, + "step": 3255 + }, + { + "chosen_geometric_mean": -1.01947820186615, + "epoch": 0.81, + "grad_norm": 23.875, + "learning_rate": 3.2551163745672464e-06, + "log_odds": 2.8099124431610107, + "log_odds_ratio": -0.25042086839675903, + "loss": 0.2433, + "rejected_geometric_mean": -3.5672953128814697, + "step": 3256 + }, + { + "chosen_geometric_mean": -1.0067641735076904, + "epoch": 0.81, + "grad_norm": 6.78125, + "learning_rate": 3.254188312001016e-06, + "log_odds": 8.194343566894531, + "log_odds_ratio": -0.16194385290145874, + "loss": 0.2568, + "rejected_geometric_mean": -8.792717933654785, + "step": 3257 + }, + { + "chosen_geometric_mean": -1.498992681503296, + "epoch": 0.81, + "grad_norm": 15.5, + "learning_rate": 3.2532601350817338e-06, + "log_odds": 1.429787278175354, + "log_odds_ratio": -0.3871389925479889, + "loss": 0.2849, + "rejected_geometric_mean": -2.8314828872680664, + "step": 3258 + }, + { + "chosen_geometric_mean": -1.2287777662277222, + "epoch": 0.81, + "grad_norm": 3.25, + "learning_rate": 3.252331843950134e-06, + "log_odds": 0.9191474318504333, + "log_odds_ratio": -0.47101321816444397, + "loss": 0.3163, + "rejected_geometric_mean": -2.0563900470733643, + "step": 3259 + }, + { + "chosen_geometric_mean": -1.000573754310608, + "epoch": 0.81, + "grad_norm": 2.5, + "learning_rate": 3.2514034387469673e-06, + "log_odds": 3.522336006164551, + "log_odds_ratio": -0.18510644137859344, + "loss": 0.3004, + "rejected_geometric_mean": -4.135610580444336, + "step": 3260 + }, + { + "chosen_geometric_mean": -1.0653371810913086, + "epoch": 0.81, + "grad_norm": 1.859375, + "learning_rate": 3.2504749196130026e-06, + "log_odds": 9.606342315673828, + "log_odds_ratio": -0.011369823478162289, + "loss": 0.2926, + "rejected_geometric_mean": -10.230259895324707, + "step": 3261 + }, + { + "chosen_geometric_mean": -1.0801470279693604, + "epoch": 0.81, + "grad_norm": 4.75, + "learning_rate": 3.249546286689027e-06, + "log_odds": 2.9884281158447266, + "log_odds_ratio": -0.2374134361743927, + "loss": 0.2885, + "rejected_geometric_mean": -3.788978099822998, + "step": 3262 + }, + { + "chosen_geometric_mean": -1.1959410905838013, + "epoch": 0.81, + "grad_norm": 9.0625, + "learning_rate": 3.2486175401158417e-06, + "log_odds": 2.905287742614746, + "log_odds_ratio": -0.22865955531597137, + "loss": 0.3337, + "rejected_geometric_mean": -3.8731789588928223, + "step": 3263 + }, + { + "chosen_geometric_mean": -1.404240369796753, + "epoch": 0.81, + "grad_norm": 49.25, + "learning_rate": 3.2476886800342666e-06, + "log_odds": 3.0923774242401123, + "log_odds_ratio": -0.21931377053260803, + "loss": 0.2548, + "rejected_geometric_mean": -4.208270072937012, + "step": 3264 + }, + { + "chosen_geometric_mean": -1.0578101873397827, + "epoch": 0.81, + "grad_norm": 1.8125, + "learning_rate": 3.2467597065851407e-06, + "log_odds": 0.9556601047515869, + "log_odds_ratio": -0.34361472725868225, + "loss": 0.2107, + "rejected_geometric_mean": -1.7942323684692383, + "step": 3265 + }, + { + "chosen_geometric_mean": -0.974528431892395, + "epoch": 0.81, + "grad_norm": 22.375, + "learning_rate": 3.2458306199093192e-06, + "log_odds": 8.241606712341309, + "log_odds_ratio": -0.07792090624570847, + "loss": 0.3037, + "rejected_geometric_mean": -8.757164001464844, + "step": 3266 + }, + { + "chosen_geometric_mean": -1.0547394752502441, + "epoch": 0.81, + "grad_norm": 17.875, + "learning_rate": 3.2449014201476714e-06, + "log_odds": 10.19725227355957, + "log_odds_ratio": -0.08289693295955658, + "loss": 0.2985, + "rejected_geometric_mean": -10.862325668334961, + "step": 3267 + }, + { + "chosen_geometric_mean": -0.9363255500793457, + "epoch": 0.81, + "grad_norm": 6.75, + "learning_rate": 3.2439721074410885e-06, + "log_odds": 9.473228454589844, + "log_odds_ratio": -0.12141268700361252, + "loss": 0.2939, + "rejected_geometric_mean": -10.018847465515137, + "step": 3268 + }, + { + "chosen_geometric_mean": -1.0338242053985596, + "epoch": 0.81, + "grad_norm": 4.875, + "learning_rate": 3.243042681930476e-06, + "log_odds": 5.170896053314209, + "log_odds_ratio": -0.14821520447731018, + "loss": 0.2943, + "rejected_geometric_mean": -5.8459153175354, + "step": 3269 + }, + { + "chosen_geometric_mean": -1.1229805946350098, + "epoch": 0.81, + "grad_norm": 2.953125, + "learning_rate": 3.242113143756756e-06, + "log_odds": 8.759507179260254, + "log_odds_ratio": -0.09875006973743439, + "loss": 0.247, + "rejected_geometric_mean": -9.52894401550293, + "step": 3270 + }, + { + "chosen_geometric_mean": -1.156799554824829, + "epoch": 0.81, + "grad_norm": 2.5, + "learning_rate": 3.241183493060871e-06, + "log_odds": 4.709647178649902, + "log_odds_ratio": -0.2105909138917923, + "loss": 0.2854, + "rejected_geometric_mean": -5.586439609527588, + "step": 3271 + }, + { + "chosen_geometric_mean": -1.3485182523727417, + "epoch": 0.81, + "grad_norm": 3.296875, + "learning_rate": 3.2402537299837766e-06, + "log_odds": 5.759077548980713, + "log_odds_ratio": -0.07718409597873688, + "loss": 0.3005, + "rejected_geometric_mean": -6.819128036499023, + "step": 3272 + }, + { + "chosen_geometric_mean": -1.2217297554016113, + "epoch": 0.81, + "grad_norm": 28.5, + "learning_rate": 3.239323854666447e-06, + "log_odds": 5.472847938537598, + "log_odds_ratio": -0.08111350238323212, + "loss": 0.3357, + "rejected_geometric_mean": -6.366575241088867, + "step": 3273 + }, + { + "chosen_geometric_mean": -1.0987600088119507, + "epoch": 0.81, + "grad_norm": 5.15625, + "learning_rate": 3.2383938672498756e-06, + "log_odds": 4.734104156494141, + "log_odds_ratio": -0.4548090100288391, + "loss": 0.275, + "rejected_geometric_mean": -5.70603609085083, + "step": 3274 + }, + { + "chosen_geometric_mean": -0.9193651676177979, + "epoch": 0.81, + "grad_norm": 57.5, + "learning_rate": 3.237463767875069e-06, + "log_odds": 12.22762393951416, + "log_odds_ratio": -0.0033061266876757145, + "loss": 0.3301, + "rejected_geometric_mean": -12.639734268188477, + "step": 3275 + }, + { + "chosen_geometric_mean": -1.1912474632263184, + "epoch": 0.81, + "grad_norm": 10.0625, + "learning_rate": 3.2365335566830537e-06, + "log_odds": 1.946045160293579, + "log_odds_ratio": -0.21620029211044312, + "loss": 0.31, + "rejected_geometric_mean": -2.8702359199523926, + "step": 3276 + }, + { + "chosen_geometric_mean": -0.9828389883041382, + "epoch": 0.81, + "grad_norm": 47.0, + "learning_rate": 3.235603233814871e-06, + "log_odds": 7.334555625915527, + "log_odds_ratio": -0.16554367542266846, + "loss": 0.482, + "rejected_geometric_mean": -7.914968013763428, + "step": 3277 + }, + { + "chosen_geometric_mean": -1.3546733856201172, + "epoch": 0.81, + "grad_norm": 4.4375, + "learning_rate": 3.23467279941158e-06, + "log_odds": 1.4833439588546753, + "log_odds_ratio": -0.32573527097702026, + "loss": 0.2841, + "rejected_geometric_mean": -2.6867055892944336, + "step": 3278 + }, + { + "chosen_geometric_mean": -0.9551272392272949, + "epoch": 0.81, + "grad_norm": 6.625, + "learning_rate": 3.2337422536142593e-06, + "log_odds": 3.437893867492676, + "log_odds_ratio": -0.09957484900951385, + "loss": 0.2759, + "rejected_geometric_mean": -3.972243547439575, + "step": 3279 + }, + { + "chosen_geometric_mean": -1.131442904472351, + "epoch": 0.81, + "grad_norm": 3.953125, + "learning_rate": 3.232811596563999e-06, + "log_odds": 3.1441307067871094, + "log_odds_ratio": -0.12922590970993042, + "loss": 0.2678, + "rejected_geometric_mean": -3.8968698978424072, + "step": 3280 + }, + { + "chosen_geometric_mean": -1.7748347520828247, + "epoch": 0.81, + "grad_norm": 49.75, + "learning_rate": 3.2318808284019098e-06, + "log_odds": 13.2130126953125, + "log_odds_ratio": -0.03323586657643318, + "loss": 0.3575, + "rejected_geometric_mean": -14.710224151611328, + "step": 3281 + }, + { + "chosen_geometric_mean": -1.2528207302093506, + "epoch": 0.81, + "grad_norm": 74.5, + "learning_rate": 3.230949949269121e-06, + "log_odds": 2.6437437534332275, + "log_odds_ratio": -0.39119353890419006, + "loss": 0.3089, + "rejected_geometric_mean": -3.7260658740997314, + "step": 3282 + }, + { + "chosen_geometric_mean": -1.3859221935272217, + "epoch": 0.81, + "grad_norm": 32.75, + "learning_rate": 3.2300189593067723e-06, + "log_odds": 7.6269941329956055, + "log_odds_ratio": -0.04381038621068001, + "loss": 0.3539, + "rejected_geometric_mean": -8.736928939819336, + "step": 3283 + }, + { + "chosen_geometric_mean": -1.1411782503128052, + "epoch": 0.81, + "grad_norm": 2.171875, + "learning_rate": 3.229087858656027e-06, + "log_odds": 0.6979854106903076, + "log_odds_ratio": -0.4808301627635956, + "loss": 0.2844, + "rejected_geometric_mean": -1.7197333574295044, + "step": 3284 + }, + { + "chosen_geometric_mean": -1.0641487836837769, + "epoch": 0.81, + "grad_norm": 12.8125, + "learning_rate": 3.2281566474580617e-06, + "log_odds": 5.930809020996094, + "log_odds_ratio": -0.07719248533248901, + "loss": 0.3118, + "rejected_geometric_mean": -6.613115310668945, + "step": 3285 + }, + { + "chosen_geometric_mean": -0.9943023920059204, + "epoch": 0.81, + "grad_norm": 37.25, + "learning_rate": 3.2272253258540685e-06, + "log_odds": 7.045504570007324, + "log_odds_ratio": -0.21456722915172577, + "loss": 0.3252, + "rejected_geometric_mean": -7.718473434448242, + "step": 3286 + }, + { + "chosen_geometric_mean": -1.0947561264038086, + "epoch": 0.81, + "grad_norm": 9.8125, + "learning_rate": 3.2262938939852612e-06, + "log_odds": 1.8886187076568604, + "log_odds_ratio": -0.21163107454776764, + "loss": 0.2709, + "rejected_geometric_mean": -2.664886951446533, + "step": 3287 + }, + { + "chosen_geometric_mean": -0.9956286549568176, + "epoch": 0.81, + "grad_norm": 9.6875, + "learning_rate": 3.225362351992865e-06, + "log_odds": 11.432191848754883, + "log_odds_ratio": -0.023722857236862183, + "loss": 0.3199, + "rejected_geometric_mean": -11.973831176757812, + "step": 3288 + }, + { + "chosen_geometric_mean": -1.1783826351165771, + "epoch": 0.81, + "grad_norm": 1.9140625, + "learning_rate": 3.224430700018125e-06, + "log_odds": 3.6955184936523438, + "log_odds_ratio": -0.31791752576828003, + "loss": 0.2766, + "rejected_geometric_mean": -4.6876749992370605, + "step": 3289 + }, + { + "chosen_geometric_mean": -1.105517029762268, + "epoch": 0.81, + "grad_norm": 4.6875, + "learning_rate": 3.2234989382023014e-06, + "log_odds": 3.035922050476074, + "log_odds_ratio": -0.2324787974357605, + "loss": 0.2715, + "rejected_geometric_mean": -3.924964427947998, + "step": 3290 + }, + { + "chosen_geometric_mean": -1.1306087970733643, + "epoch": 0.81, + "grad_norm": 11.375, + "learning_rate": 3.2225670666866723e-06, + "log_odds": 9.786005020141602, + "log_odds_ratio": -0.24142378568649292, + "loss": 0.2903, + "rejected_geometric_mean": -10.617389678955078, + "step": 3291 + }, + { + "chosen_geometric_mean": -1.238168478012085, + "epoch": 0.82, + "grad_norm": 6.21875, + "learning_rate": 3.221635085612531e-06, + "log_odds": 4.444430351257324, + "log_odds_ratio": -0.4958542585372925, + "loss": 0.3143, + "rejected_geometric_mean": -5.585955619812012, + "step": 3292 + }, + { + "chosen_geometric_mean": -1.0601062774658203, + "epoch": 0.82, + "grad_norm": 2.765625, + "learning_rate": 3.2207029951211887e-06, + "log_odds": 5.401917457580566, + "log_odds_ratio": -0.2837813198566437, + "loss": 0.3154, + "rejected_geometric_mean": -6.154509544372559, + "step": 3293 + }, + { + "chosen_geometric_mean": -1.2672083377838135, + "epoch": 0.82, + "grad_norm": 2.1875, + "learning_rate": 3.2197707953539726e-06, + "log_odds": 8.5787992477417, + "log_odds_ratio": -0.16186174750328064, + "loss": 0.2993, + "rejected_geometric_mean": -9.590129852294922, + "step": 3294 + }, + { + "chosen_geometric_mean": -1.112348198890686, + "epoch": 0.82, + "grad_norm": 7.875, + "learning_rate": 3.2188384864522276e-06, + "log_odds": 11.89334487915039, + "log_odds_ratio": -0.14976726472377777, + "loss": 0.295, + "rejected_geometric_mean": -12.680484771728516, + "step": 3295 + }, + { + "chosen_geometric_mean": -0.9602386951446533, + "epoch": 0.82, + "grad_norm": 4.21875, + "learning_rate": 3.2179060685573122e-06, + "log_odds": 5.544751167297363, + "log_odds_ratio": -0.32034924626350403, + "loss": 0.2674, + "rejected_geometric_mean": -6.230692386627197, + "step": 3296 + }, + { + "chosen_geometric_mean": -1.2218568325042725, + "epoch": 0.82, + "grad_norm": 3.546875, + "learning_rate": 3.216973541810604e-06, + "log_odds": 2.1557576656341553, + "log_odds_ratio": -0.4352847933769226, + "loss": 0.3169, + "rejected_geometric_mean": -3.240128517150879, + "step": 3297 + }, + { + "chosen_geometric_mean": -1.087724208831787, + "epoch": 0.82, + "grad_norm": 34.75, + "learning_rate": 3.2160409063534975e-06, + "log_odds": 4.695690631866455, + "log_odds_ratio": -0.07280005514621735, + "loss": 0.2761, + "rejected_geometric_mean": -5.402809143066406, + "step": 3298 + }, + { + "chosen_geometric_mean": -1.1114816665649414, + "epoch": 0.82, + "grad_norm": 5.40625, + "learning_rate": 3.2151081623274016e-06, + "log_odds": 5.836021423339844, + "log_odds_ratio": -0.1306387335062027, + "loss": 0.2473, + "rejected_geometric_mean": -6.605674743652344, + "step": 3299 + }, + { + "chosen_geometric_mean": -1.1378812789916992, + "epoch": 0.82, + "grad_norm": 3.921875, + "learning_rate": 3.214175309873742e-06, + "log_odds": 2.651128053665161, + "log_odds_ratio": -0.2754073441028595, + "loss": 0.2658, + "rejected_geometric_mean": -3.517223596572876, + "step": 3300 + }, + { + "chosen_geometric_mean": -0.937688410282135, + "epoch": 0.82, + "grad_norm": 14.5625, + "learning_rate": 3.213242349133963e-06, + "log_odds": 4.037258148193359, + "log_odds_ratio": -0.11772647500038147, + "loss": 0.3037, + "rejected_geometric_mean": -4.540610313415527, + "step": 3301 + }, + { + "chosen_geometric_mean": -1.0293365716934204, + "epoch": 0.82, + "grad_norm": 8.75, + "learning_rate": 3.2123092802495236e-06, + "log_odds": 2.5881128311157227, + "log_odds_ratio": -0.3705582618713379, + "loss": 0.3089, + "rejected_geometric_mean": -3.3928897380828857, + "step": 3302 + }, + { + "chosen_geometric_mean": -1.2950023412704468, + "epoch": 0.82, + "grad_norm": 44.25, + "learning_rate": 3.211376103361899e-06, + "log_odds": 0.6251558661460876, + "log_odds_ratio": -0.5773313641548157, + "loss": 0.3531, + "rejected_geometric_mean": -1.7962968349456787, + "step": 3303 + }, + { + "chosen_geometric_mean": -1.0203365087509155, + "epoch": 0.82, + "grad_norm": 19.125, + "learning_rate": 3.2104428186125813e-06, + "log_odds": 6.116938591003418, + "log_odds_ratio": -0.01573706418275833, + "loss": 0.315, + "rejected_geometric_mean": -6.676105499267578, + "step": 3304 + }, + { + "chosen_geometric_mean": -1.1296409368515015, + "epoch": 0.82, + "grad_norm": 17.75, + "learning_rate": 3.2095094261430787e-06, + "log_odds": 5.930510997772217, + "log_odds_ratio": -0.13911478221416473, + "loss": 0.2892, + "rejected_geometric_mean": -6.709827899932861, + "step": 3305 + }, + { + "chosen_geometric_mean": -1.0851198434829712, + "epoch": 0.82, + "grad_norm": 42.75, + "learning_rate": 3.208575926094916e-06, + "log_odds": 4.053489685058594, + "log_odds_ratio": -0.10262221843004227, + "loss": 0.3017, + "rejected_geometric_mean": -4.769761085510254, + "step": 3306 + }, + { + "chosen_geometric_mean": -1.1499505043029785, + "epoch": 0.82, + "grad_norm": 23.25, + "learning_rate": 3.207642318609635e-06, + "log_odds": 5.477077007293701, + "log_odds_ratio": -0.26010119915008545, + "loss": 0.2934, + "rejected_geometric_mean": -6.341903209686279, + "step": 3307 + }, + { + "chosen_geometric_mean": -0.9877123236656189, + "epoch": 0.82, + "grad_norm": 2.125, + "learning_rate": 3.2067086038287914e-06, + "log_odds": 5.237043380737305, + "log_odds_ratio": -0.04584880918264389, + "loss": 0.2933, + "rejected_geometric_mean": -5.772102355957031, + "step": 3308 + }, + { + "chosen_geometric_mean": -1.1426191329956055, + "epoch": 0.82, + "grad_norm": 8.9375, + "learning_rate": 3.20577478189396e-06, + "log_odds": 4.35713529586792, + "log_odds_ratio": -0.47895118594169617, + "loss": 0.3362, + "rejected_geometric_mean": -5.377492427825928, + "step": 3309 + }, + { + "chosen_geometric_mean": -1.222346305847168, + "epoch": 0.82, + "grad_norm": 49.75, + "learning_rate": 3.2048408529467306e-06, + "log_odds": 0.2090820074081421, + "log_odds_ratio": -0.5998126268386841, + "loss": 0.3801, + "rejected_geometric_mean": -1.378378987312317, + "step": 3310 + }, + { + "chosen_geometric_mean": -1.457863211631775, + "epoch": 0.82, + "grad_norm": 14.6875, + "learning_rate": 3.203906817128709e-06, + "log_odds": 3.933175802230835, + "log_odds_ratio": -0.1323622465133667, + "loss": 0.269, + "rejected_geometric_mean": -5.101670265197754, + "step": 3311 + }, + { + "chosen_geometric_mean": -1.033517837524414, + "epoch": 0.82, + "grad_norm": 6.5625, + "learning_rate": 3.202972674581517e-06, + "log_odds": 8.758634567260742, + "log_odds_ratio": -0.010695839300751686, + "loss": 0.2996, + "rejected_geometric_mean": -9.329230308532715, + "step": 3312 + }, + { + "chosen_geometric_mean": -0.9999604225158691, + "epoch": 0.82, + "grad_norm": 12.875, + "learning_rate": 3.202038425446793e-06, + "log_odds": 9.665633201599121, + "log_odds_ratio": -0.0927564799785614, + "loss": 0.3038, + "rejected_geometric_mean": -10.240827560424805, + "step": 3313 + }, + { + "chosen_geometric_mean": -1.148009181022644, + "epoch": 0.82, + "grad_norm": 4.625, + "learning_rate": 3.201104069866193e-06, + "log_odds": 1.5509105920791626, + "log_odds_ratio": -0.3434978723526001, + "loss": 0.2836, + "rejected_geometric_mean": -2.4753012657165527, + "step": 3314 + }, + { + "chosen_geometric_mean": -1.625632643699646, + "epoch": 0.82, + "grad_norm": 26.0, + "learning_rate": 3.2001696079813866e-06, + "log_odds": 9.930597305297852, + "log_odds_ratio": -0.5652507543563843, + "loss": 0.3687, + "rejected_geometric_mean": -11.304170608520508, + "step": 3315 + }, + { + "chosen_geometric_mean": -1.3874670267105103, + "epoch": 0.82, + "grad_norm": 109.5, + "learning_rate": 3.19923503993406e-06, + "log_odds": 6.927187919616699, + "log_odds_ratio": -0.2063378244638443, + "loss": 0.3946, + "rejected_geometric_mean": -8.08199691772461, + "step": 3316 + }, + { + "chosen_geometric_mean": -1.2957966327667236, + "epoch": 0.82, + "grad_norm": 50.25, + "learning_rate": 3.198300365865918e-06, + "log_odds": 1.928299069404602, + "log_odds_ratio": -0.24722595512866974, + "loss": 0.3566, + "rejected_geometric_mean": -3.007415294647217, + "step": 3317 + }, + { + "chosen_geometric_mean": -0.9576549530029297, + "epoch": 0.82, + "grad_norm": 20.375, + "learning_rate": 3.197365585918678e-06, + "log_odds": 2.034512519836426, + "log_odds_ratio": -0.28801852464675903, + "loss": 0.3051, + "rejected_geometric_mean": -2.7175588607788086, + "step": 3318 + }, + { + "chosen_geometric_mean": -1.0947643518447876, + "epoch": 0.82, + "grad_norm": 10.8125, + "learning_rate": 3.1964307002340754e-06, + "log_odds": 9.339500427246094, + "log_odds_ratio": -0.026822276413440704, + "loss": 0.2968, + "rejected_geometric_mean": -10.022791862487793, + "step": 3319 + }, + { + "chosen_geometric_mean": -0.9555853605270386, + "epoch": 0.82, + "grad_norm": 2.015625, + "learning_rate": 3.1954957089538623e-06, + "log_odds": 3.6423757076263428, + "log_odds_ratio": -0.2994523346424103, + "loss": 0.2991, + "rejected_geometric_mean": -4.301240921020508, + "step": 3320 + }, + { + "chosen_geometric_mean": -0.960263192653656, + "epoch": 0.82, + "grad_norm": 1.984375, + "learning_rate": 3.1945606122198048e-06, + "log_odds": 9.08367919921875, + "log_odds_ratio": -0.10712671279907227, + "loss": 0.2617, + "rejected_geometric_mean": -9.594337463378906, + "step": 3321 + }, + { + "chosen_geometric_mean": -1.0418295860290527, + "epoch": 0.82, + "grad_norm": 30.625, + "learning_rate": 3.1936254101736865e-06, + "log_odds": 2.7447216510772705, + "log_odds_ratio": -0.36399826407432556, + "loss": 0.3095, + "rejected_geometric_mean": -3.5766520500183105, + "step": 3322 + }, + { + "chosen_geometric_mean": -0.9978762269020081, + "epoch": 0.82, + "grad_norm": 2.765625, + "learning_rate": 3.192690102957306e-06, + "log_odds": 8.377588272094727, + "log_odds_ratio": -0.05879322811961174, + "loss": 0.271, + "rejected_geometric_mean": -8.928407669067383, + "step": 3323 + }, + { + "chosen_geometric_mean": -1.258589506149292, + "epoch": 0.82, + "grad_norm": 4.8125, + "learning_rate": 3.1917546907124786e-06, + "log_odds": 4.398470878601074, + "log_odds_ratio": -0.1730349212884903, + "loss": 0.2765, + "rejected_geometric_mean": -5.422160625457764, + "step": 3324 + }, + { + "chosen_geometric_mean": -1.2612037658691406, + "epoch": 0.82, + "grad_norm": 9.375, + "learning_rate": 3.190819173581036e-06, + "log_odds": 2.62365984916687, + "log_odds_ratio": -0.324165403842926, + "loss": 0.3168, + "rejected_geometric_mean": -3.684332847595215, + "step": 3325 + }, + { + "chosen_geometric_mean": -1.260498046875, + "epoch": 0.82, + "grad_norm": 3.953125, + "learning_rate": 3.189883551704823e-06, + "log_odds": 7.6502556800842285, + "log_odds_ratio": -0.3189909756183624, + "loss": 0.3373, + "rejected_geometric_mean": -8.663904190063477, + "step": 3326 + }, + { + "chosen_geometric_mean": -1.0355979204177856, + "epoch": 0.82, + "grad_norm": 16.375, + "learning_rate": 3.188947825225704e-06, + "log_odds": 8.085704803466797, + "log_odds_ratio": -0.03412278741598129, + "loss": 0.3531, + "rejected_geometric_mean": -8.658379554748535, + "step": 3327 + }, + { + "chosen_geometric_mean": -1.1088354587554932, + "epoch": 0.82, + "grad_norm": 41.75, + "learning_rate": 3.188011994285558e-06, + "log_odds": 6.904480457305908, + "log_odds_ratio": -0.18420478701591492, + "loss": 0.3484, + "rejected_geometric_mean": -7.696474075317383, + "step": 3328 + }, + { + "chosen_geometric_mean": -1.2226030826568604, + "epoch": 0.82, + "grad_norm": 14.375, + "learning_rate": 3.1870760590262784e-06, + "log_odds": 4.955247402191162, + "log_odds_ratio": -0.15040910243988037, + "loss": 0.2964, + "rejected_geometric_mean": -5.8797383308410645, + "step": 3329 + }, + { + "chosen_geometric_mean": -0.9857997894287109, + "epoch": 0.82, + "grad_norm": 2.515625, + "learning_rate": 3.186140019589775e-06, + "log_odds": 5.324572563171387, + "log_odds_ratio": -0.22758404910564423, + "loss": 0.2885, + "rejected_geometric_mean": -6.0026726722717285, + "step": 3330 + }, + { + "chosen_geometric_mean": -1.2716267108917236, + "epoch": 0.82, + "grad_norm": 26.0, + "learning_rate": 3.185203876117976e-06, + "log_odds": 1.9108479022979736, + "log_odds_ratio": -0.3246459364891052, + "loss": 0.3484, + "rejected_geometric_mean": -3.0048859119415283, + "step": 3331 + }, + { + "chosen_geometric_mean": -0.9811771512031555, + "epoch": 0.82, + "grad_norm": 7.5625, + "learning_rate": 3.184267628752821e-06, + "log_odds": 10.086462020874023, + "log_odds_ratio": -0.1522904634475708, + "loss": 0.2728, + "rejected_geometric_mean": -10.639649391174316, + "step": 3332 + }, + { + "chosen_geometric_mean": -1.1739351749420166, + "epoch": 0.83, + "grad_norm": 23.875, + "learning_rate": 3.183331277636268e-06, + "log_odds": 3.980445384979248, + "log_odds_ratio": -0.1723991334438324, + "loss": 0.3139, + "rejected_geometric_mean": -4.864134311676025, + "step": 3333 + }, + { + "chosen_geometric_mean": -1.0952187776565552, + "epoch": 0.83, + "grad_norm": 6.3125, + "learning_rate": 3.182394822910291e-06, + "log_odds": 1.461912989616394, + "log_odds_ratio": -0.2859318256378174, + "loss": 0.3083, + "rejected_geometric_mean": -2.2868220806121826, + "step": 3334 + }, + { + "chosen_geometric_mean": -0.9981805682182312, + "epoch": 0.83, + "grad_norm": 26.375, + "learning_rate": 3.1814582647168785e-06, + "log_odds": 5.942745685577393, + "log_odds_ratio": -0.06332015246152878, + "loss": 0.3455, + "rejected_geometric_mean": -6.466283798217773, + "step": 3335 + }, + { + "chosen_geometric_mean": -1.1756591796875, + "epoch": 0.83, + "grad_norm": 18.375, + "learning_rate": 3.180521603198036e-06, + "log_odds": 5.533174991607666, + "log_odds_ratio": -0.16984304785728455, + "loss": 0.2693, + "rejected_geometric_mean": -6.455038070678711, + "step": 3336 + }, + { + "chosen_geometric_mean": -1.0755034685134888, + "epoch": 0.83, + "grad_norm": 1.9609375, + "learning_rate": 3.1795848384957826e-06, + "log_odds": 7.399868011474609, + "log_odds_ratio": -0.00552678806707263, + "loss": 0.2574, + "rejected_geometric_mean": -8.025568008422852, + "step": 3337 + }, + { + "chosen_geometric_mean": -1.2150540351867676, + "epoch": 0.83, + "grad_norm": 12.625, + "learning_rate": 3.1786479707521556e-06, + "log_odds": 3.5084116458892822, + "log_odds_ratio": -0.3752693831920624, + "loss": 0.3653, + "rejected_geometric_mean": -4.527756214141846, + "step": 3338 + }, + { + "chosen_geometric_mean": -1.1117637157440186, + "epoch": 0.83, + "grad_norm": 3.484375, + "learning_rate": 3.1777110001092056e-06, + "log_odds": 1.246878743171692, + "log_odds_ratio": -0.3837924897670746, + "loss": 0.2556, + "rejected_geometric_mean": -2.1934614181518555, + "step": 3339 + }, + { + "chosen_geometric_mean": -1.1776622533798218, + "epoch": 0.83, + "grad_norm": 2.109375, + "learning_rate": 3.1767739267090003e-06, + "log_odds": 2.6134214401245117, + "log_odds_ratio": -0.12363497912883759, + "loss": 0.2472, + "rejected_geometric_mean": -3.4752888679504395, + "step": 3340 + }, + { + "chosen_geometric_mean": -0.9513381719589233, + "epoch": 0.83, + "grad_norm": 14.75, + "learning_rate": 3.175836750693623e-06, + "log_odds": 0.6708247661590576, + "log_odds_ratio": -0.46800512075424194, + "loss": 0.3135, + "rejected_geometric_mean": -1.4425045251846313, + "step": 3341 + }, + { + "chosen_geometric_mean": -1.4770680665969849, + "epoch": 0.83, + "grad_norm": 13.125, + "learning_rate": 3.1748994722051705e-06, + "log_odds": 0.7491077780723572, + "log_odds_ratio": -0.544746994972229, + "loss": 0.3093, + "rejected_geometric_mean": -2.1213250160217285, + "step": 3342 + }, + { + "chosen_geometric_mean": -0.8178836107254028, + "epoch": 0.83, + "grad_norm": 5.65625, + "learning_rate": 3.173962091385758e-06, + "log_odds": 12.118860244750977, + "log_odds_ratio": -8.216109563363716e-05, + "loss": 0.2531, + "rejected_geometric_mean": -12.322566032409668, + "step": 3343 + }, + { + "chosen_geometric_mean": -1.1846389770507812, + "epoch": 0.83, + "grad_norm": 1.9765625, + "learning_rate": 3.173024608377516e-06, + "log_odds": 2.549302101135254, + "log_odds_ratio": -0.18237119913101196, + "loss": 0.295, + "rejected_geometric_mean": -3.4493160247802734, + "step": 3344 + }, + { + "chosen_geometric_mean": -1.1143722534179688, + "epoch": 0.83, + "grad_norm": 10.25, + "learning_rate": 3.1720870233225875e-06, + "log_odds": 1.3144515752792358, + "log_odds_ratio": -0.42028650641441345, + "loss": 0.3134, + "rejected_geometric_mean": -2.1973001956939697, + "step": 3345 + }, + { + "chosen_geometric_mean": -1.233033299446106, + "epoch": 0.83, + "grad_norm": 14.5625, + "learning_rate": 3.171149336363133e-06, + "log_odds": 4.661530494689941, + "log_odds_ratio": -0.1824571192264557, + "loss": 0.2655, + "rejected_geometric_mean": -5.637453556060791, + "step": 3346 + }, + { + "chosen_geometric_mean": -1.1778560876846313, + "epoch": 0.83, + "grad_norm": 3.046875, + "learning_rate": 3.1702115476413287e-06, + "log_odds": 8.905717849731445, + "log_odds_ratio": -0.04706919565796852, + "loss": 0.2917, + "rejected_geometric_mean": -9.692024230957031, + "step": 3347 + }, + { + "chosen_geometric_mean": -1.0237934589385986, + "epoch": 0.83, + "grad_norm": 3.3125, + "learning_rate": 3.1692736572993676e-06, + "log_odds": 6.525629043579102, + "log_odds_ratio": -0.34821224212646484, + "loss": 0.2483, + "rejected_geometric_mean": -7.280886650085449, + "step": 3348 + }, + { + "chosen_geometric_mean": -1.298477053642273, + "epoch": 0.83, + "grad_norm": 5.5, + "learning_rate": 3.1683356654794527e-06, + "log_odds": 5.2057929039001465, + "log_odds_ratio": -0.13610394299030304, + "loss": 0.3221, + "rejected_geometric_mean": -6.260673522949219, + "step": 3349 + }, + { + "chosen_geometric_mean": -1.1994675397872925, + "epoch": 0.83, + "grad_norm": 2.375, + "learning_rate": 3.1673975723238097e-06, + "log_odds": 1.6379066705703735, + "log_odds_ratio": -0.3441495895385742, + "loss": 0.2955, + "rejected_geometric_mean": -2.6538121700286865, + "step": 3350 + }, + { + "chosen_geometric_mean": -1.130407691001892, + "epoch": 0.83, + "grad_norm": 18.125, + "learning_rate": 3.166459377974675e-06, + "log_odds": 0.9411526322364807, + "log_odds_ratio": -0.48608094453811646, + "loss": 0.3906, + "rejected_geometric_mean": -1.971243143081665, + "step": 3351 + }, + { + "chosen_geometric_mean": -0.9803937673568726, + "epoch": 0.83, + "grad_norm": 5.3125, + "learning_rate": 3.1655210825743e-06, + "log_odds": 3.8315787315368652, + "log_odds_ratio": -0.1496267467737198, + "loss": 0.2869, + "rejected_geometric_mean": -4.4176106452941895, + "step": 3352 + }, + { + "chosen_geometric_mean": -1.0365965366363525, + "epoch": 0.83, + "grad_norm": 7.90625, + "learning_rate": 3.164582686264954e-06, + "log_odds": 2.6329762935638428, + "log_odds_ratio": -0.3539696931838989, + "loss": 0.2865, + "rejected_geometric_mean": -3.4677672386169434, + "step": 3353 + }, + { + "chosen_geometric_mean": -1.0643341541290283, + "epoch": 0.83, + "grad_norm": 9.9375, + "learning_rate": 3.1636441891889203e-06, + "log_odds": 1.2823472023010254, + "log_odds_ratio": -0.3777823746204376, + "loss": 0.2995, + "rejected_geometric_mean": -2.1575920581817627, + "step": 3354 + }, + { + "chosen_geometric_mean": -1.107120156288147, + "epoch": 0.83, + "grad_norm": 2.40625, + "learning_rate": 3.162705591488497e-06, + "log_odds": 5.901980876922607, + "log_odds_ratio": -0.1633235067129135, + "loss": 0.2697, + "rejected_geometric_mean": -6.669260025024414, + "step": 3355 + }, + { + "chosen_geometric_mean": -0.889803409576416, + "epoch": 0.83, + "grad_norm": 18.125, + "learning_rate": 3.1617668933059987e-06, + "log_odds": 5.063732147216797, + "log_odds_ratio": -0.16258801519870758, + "loss": 0.292, + "rejected_geometric_mean": -5.482161521911621, + "step": 3356 + }, + { + "chosen_geometric_mean": -1.1887166500091553, + "epoch": 0.83, + "grad_norm": 19.125, + "learning_rate": 3.1608280947837544e-06, + "log_odds": 3.140998125076294, + "log_odds_ratio": -0.3260555863380432, + "loss": 0.3312, + "rejected_geometric_mean": -4.103658676147461, + "step": 3357 + }, + { + "chosen_geometric_mean": -0.9781473278999329, + "epoch": 0.83, + "grad_norm": 7.5625, + "learning_rate": 3.1598891960641088e-06, + "log_odds": 5.518162250518799, + "log_odds_ratio": -0.03942367061972618, + "loss": 0.2991, + "rejected_geometric_mean": -6.034716606140137, + "step": 3358 + }, + { + "chosen_geometric_mean": -1.0581965446472168, + "epoch": 0.83, + "grad_norm": 9.6875, + "learning_rate": 3.158950197289421e-06, + "log_odds": 5.174140453338623, + "log_odds_ratio": -0.17691676318645477, + "loss": 0.315, + "rejected_geometric_mean": -5.87586784362793, + "step": 3359 + }, + { + "chosen_geometric_mean": -0.9953625202178955, + "epoch": 0.83, + "grad_norm": 5.46875, + "learning_rate": 3.1580110986020663e-06, + "log_odds": 4.224327087402344, + "log_odds_ratio": -0.10960821807384491, + "loss": 0.2872, + "rejected_geometric_mean": -4.837444305419922, + "step": 3360 + }, + { + "chosen_geometric_mean": -1.0567320585250854, + "epoch": 0.83, + "grad_norm": 3.609375, + "learning_rate": 3.1570719001444335e-06, + "log_odds": 3.965689182281494, + "log_odds_ratio": -0.26343944668769836, + "loss": 0.2841, + "rejected_geometric_mean": -4.744972229003906, + "step": 3361 + }, + { + "chosen_geometric_mean": -1.2277675867080688, + "epoch": 0.83, + "grad_norm": 12.125, + "learning_rate": 3.156132602058929e-06, + "log_odds": 1.2678229808807373, + "log_odds_ratio": -0.31763923168182373, + "loss": 0.2979, + "rejected_geometric_mean": -2.2698309421539307, + "step": 3362 + }, + { + "chosen_geometric_mean": -1.1598416566848755, + "epoch": 0.83, + "grad_norm": 19.125, + "learning_rate": 3.155193204487971e-06, + "log_odds": 2.6732821464538574, + "log_odds_ratio": -0.3199983835220337, + "loss": 0.3002, + "rejected_geometric_mean": -3.609633445739746, + "step": 3363 + }, + { + "chosen_geometric_mean": -1.1173017024993896, + "epoch": 0.83, + "grad_norm": 11.25, + "learning_rate": 3.1542537075739977e-06, + "log_odds": 1.145890712738037, + "log_odds_ratio": -0.4184126853942871, + "loss": 0.2943, + "rejected_geometric_mean": -2.085317373275757, + "step": 3364 + }, + { + "chosen_geometric_mean": -1.0068483352661133, + "epoch": 0.83, + "grad_norm": 5.4375, + "learning_rate": 3.1533141114594557e-06, + "log_odds": 0.23160560429096222, + "log_odds_ratio": -0.6145411133766174, + "loss": 0.2664, + "rejected_geometric_mean": -1.1752774715423584, + "step": 3365 + }, + { + "chosen_geometric_mean": -0.9546523094177246, + "epoch": 0.83, + "grad_norm": 3.640625, + "learning_rate": 3.1523744162868134e-06, + "log_odds": 5.405646324157715, + "log_odds_ratio": -0.27353987097740173, + "loss": 0.2847, + "rejected_geometric_mean": -6.030333042144775, + "step": 3366 + }, + { + "chosen_geometric_mean": -1.2122371196746826, + "epoch": 0.83, + "grad_norm": 6.59375, + "learning_rate": 3.151434622198551e-06, + "log_odds": 5.831347465515137, + "log_odds_ratio": -0.26944810152053833, + "loss": 0.2695, + "rejected_geometric_mean": -6.747167587280273, + "step": 3367 + }, + { + "chosen_geometric_mean": -0.8206019401550293, + "epoch": 0.83, + "grad_norm": 3.5, + "learning_rate": 3.1504947293371615e-06, + "log_odds": 1.690291404724121, + "log_odds_ratio": -0.22381824254989624, + "loss": 0.2542, + "rejected_geometric_mean": -2.123778820037842, + "step": 3368 + }, + { + "chosen_geometric_mean": -1.028874397277832, + "epoch": 0.83, + "grad_norm": 3.09375, + "learning_rate": 3.1495547378451575e-06, + "log_odds": 4.119231224060059, + "log_odds_ratio": -0.0720430463552475, + "loss": 0.3162, + "rejected_geometric_mean": -4.702601432800293, + "step": 3369 + }, + { + "chosen_geometric_mean": -0.8131958246231079, + "epoch": 0.83, + "grad_norm": 4.71875, + "learning_rate": 3.1486146478650626e-06, + "log_odds": 1.444533348083496, + "log_odds_ratio": -0.33731311559677124, + "loss": 0.2881, + "rejected_geometric_mean": -1.908800482749939, + "step": 3370 + }, + { + "chosen_geometric_mean": -0.9816666841506958, + "epoch": 0.83, + "grad_norm": 7.21875, + "learning_rate": 3.1476744595394183e-06, + "log_odds": 8.36152458190918, + "log_odds_ratio": -0.03469283878803253, + "loss": 0.2725, + "rejected_geometric_mean": -8.879603385925293, + "step": 3371 + }, + { + "chosen_geometric_mean": -1.1991362571716309, + "epoch": 0.83, + "grad_norm": 6.21875, + "learning_rate": 3.1467341730107794e-06, + "log_odds": 1.0821492671966553, + "log_odds_ratio": -0.4217786192893982, + "loss": 0.337, + "rejected_geometric_mean": -2.1259047985076904, + "step": 3372 + }, + { + "chosen_geometric_mean": -1.0898312330245972, + "epoch": 0.84, + "grad_norm": 6.1875, + "learning_rate": 3.1457937884217148e-06, + "log_odds": 1.735054612159729, + "log_odds_ratio": -0.35473841428756714, + "loss": 0.2587, + "rejected_geometric_mean": -2.5965163707733154, + "step": 3373 + }, + { + "chosen_geometric_mean": -1.237847089767456, + "epoch": 0.84, + "grad_norm": 3.375, + "learning_rate": 3.1448533059148113e-06, + "log_odds": 0.8611358404159546, + "log_odds_ratio": -0.39769357442855835, + "loss": 0.3105, + "rejected_geometric_mean": -1.9274141788482666, + "step": 3374 + }, + { + "chosen_geometric_mean": -0.9056616425514221, + "epoch": 0.84, + "grad_norm": 4.34375, + "learning_rate": 3.1439127256326675e-06, + "log_odds": 3.9513702392578125, + "log_odds_ratio": -0.0973941758275032, + "loss": 0.3099, + "rejected_geometric_mean": -4.367147445678711, + "step": 3375 + }, + { + "chosen_geometric_mean": -0.9689140319824219, + "epoch": 0.84, + "grad_norm": 2.6875, + "learning_rate": 3.142972047717897e-06, + "log_odds": 5.712099552154541, + "log_odds_ratio": -0.19294673204421997, + "loss": 0.269, + "rejected_geometric_mean": -6.306001663208008, + "step": 3376 + }, + { + "chosen_geometric_mean": -1.2148338556289673, + "epoch": 0.84, + "grad_norm": 2.140625, + "learning_rate": 3.1420312723131312e-06, + "log_odds": 9.926325798034668, + "log_odds_ratio": -0.14073865115642548, + "loss": 0.2893, + "rejected_geometric_mean": -10.852869033813477, + "step": 3377 + }, + { + "chosen_geometric_mean": -1.1603506803512573, + "epoch": 0.84, + "grad_norm": 8.3125, + "learning_rate": 3.141090399561013e-06, + "log_odds": 1.5377180576324463, + "log_odds_ratio": -0.33291810750961304, + "loss": 0.3798, + "rejected_geometric_mean": -2.4864325523376465, + "step": 3378 + }, + { + "chosen_geometric_mean": -1.14158034324646, + "epoch": 0.84, + "grad_norm": 6.03125, + "learning_rate": 3.140149429604201e-06, + "log_odds": 8.31145191192627, + "log_odds_ratio": -0.012824395671486855, + "loss": 0.2654, + "rejected_geometric_mean": -9.055778503417969, + "step": 3379 + }, + { + "chosen_geometric_mean": -1.1054999828338623, + "epoch": 0.84, + "grad_norm": 37.0, + "learning_rate": 3.139208362585371e-06, + "log_odds": 8.79243278503418, + "log_odds_ratio": -0.017105773091316223, + "loss": 0.3546, + "rejected_geometric_mean": -9.497261047363281, + "step": 3380 + }, + { + "chosen_geometric_mean": -1.0891327857971191, + "epoch": 0.84, + "grad_norm": 25.0, + "learning_rate": 3.1382671986472085e-06, + "log_odds": 3.1462485790252686, + "log_odds_ratio": -0.36219316720962524, + "loss": 0.2769, + "rejected_geometric_mean": -4.03660774230957, + "step": 3381 + }, + { + "chosen_geometric_mean": -1.1292085647583008, + "epoch": 0.84, + "grad_norm": 9.375, + "learning_rate": 3.1373259379324172e-06, + "log_odds": 7.465922832489014, + "log_odds_ratio": -0.20279039442539215, + "loss": 0.3072, + "rejected_geometric_mean": -8.28763198852539, + "step": 3382 + }, + { + "chosen_geometric_mean": -1.01045823097229, + "epoch": 0.84, + "grad_norm": 6.03125, + "learning_rate": 3.136384580583717e-06, + "log_odds": 3.695162534713745, + "log_odds_ratio": -0.05134974792599678, + "loss": 0.2311, + "rejected_geometric_mean": -4.26996374130249, + "step": 3383 + }, + { + "chosen_geometric_mean": -1.1186259984970093, + "epoch": 0.84, + "grad_norm": 30.125, + "learning_rate": 3.1354431267438373e-06, + "log_odds": 9.283488273620605, + "log_odds_ratio": -0.0007337330607697368, + "loss": 0.2857, + "rejected_geometric_mean": -9.99406623840332, + "step": 3384 + }, + { + "chosen_geometric_mean": -1.107968807220459, + "epoch": 0.84, + "grad_norm": 51.0, + "learning_rate": 3.134501576555527e-06, + "log_odds": 11.765275001525879, + "log_odds_ratio": -0.12829534709453583, + "loss": 0.3541, + "rejected_geometric_mean": -12.509346008300781, + "step": 3385 + }, + { + "chosen_geometric_mean": -1.1835335493087769, + "epoch": 0.84, + "grad_norm": 3.4375, + "learning_rate": 3.133559930161547e-06, + "log_odds": 2.411649703979492, + "log_odds_ratio": -0.14768466353416443, + "loss": 0.3301, + "rejected_geometric_mean": -3.292459011077881, + "step": 3386 + }, + { + "chosen_geometric_mean": -1.1726922988891602, + "epoch": 0.84, + "grad_norm": 6.90625, + "learning_rate": 3.132618187704674e-06, + "log_odds": 4.116987705230713, + "log_odds_ratio": -0.17062124609947205, + "loss": 0.2861, + "rejected_geometric_mean": -4.9746503829956055, + "step": 3387 + }, + { + "chosen_geometric_mean": -1.3464164733886719, + "epoch": 0.84, + "grad_norm": 11.6875, + "learning_rate": 3.1316763493276985e-06, + "log_odds": 2.7521965503692627, + "log_odds_ratio": -0.33356568217277527, + "loss": 0.3143, + "rejected_geometric_mean": -3.890516519546509, + "step": 3388 + }, + { + "chosen_geometric_mean": -1.0853095054626465, + "epoch": 0.84, + "grad_norm": 3.96875, + "learning_rate": 3.1307344151734253e-06, + "log_odds": 4.362720012664795, + "log_odds_ratio": -0.19922983646392822, + "loss": 0.3141, + "rejected_geometric_mean": -5.108766078948975, + "step": 3389 + }, + { + "chosen_geometric_mean": -1.031964659690857, + "epoch": 0.84, + "grad_norm": 14.1875, + "learning_rate": 3.1297923853846752e-06, + "log_odds": 6.548357963562012, + "log_odds_ratio": -0.008938497863709927, + "loss": 0.3013, + "rejected_geometric_mean": -7.135020732879639, + "step": 3390 + }, + { + "chosen_geometric_mean": -1.4115971326828003, + "epoch": 0.84, + "grad_norm": 21.0, + "learning_rate": 3.1288502601042814e-06, + "log_odds": 2.0031914710998535, + "log_odds_ratio": -0.250935822725296, + "loss": 0.3912, + "rejected_geometric_mean": -3.175635576248169, + "step": 3391 + }, + { + "chosen_geometric_mean": -1.0032765865325928, + "epoch": 0.84, + "grad_norm": 18.0, + "learning_rate": 3.1279080394750938e-06, + "log_odds": 3.5704586505889893, + "log_odds_ratio": -0.14352741837501526, + "loss": 0.3139, + "rejected_geometric_mean": -4.194738388061523, + "step": 3392 + }, + { + "chosen_geometric_mean": -1.2470998764038086, + "epoch": 0.84, + "grad_norm": 23.0, + "learning_rate": 3.1269657236399746e-06, + "log_odds": 3.039926528930664, + "log_odds_ratio": -0.2733278274536133, + "loss": 0.3196, + "rejected_geometric_mean": -4.078494071960449, + "step": 3393 + }, + { + "chosen_geometric_mean": -1.2939507961273193, + "epoch": 0.84, + "grad_norm": 7.5625, + "learning_rate": 3.1260233127418027e-06, + "log_odds": 2.741334915161133, + "log_odds_ratio": -0.22483330965042114, + "loss": 0.2803, + "rejected_geometric_mean": -3.8106095790863037, + "step": 3394 + }, + { + "chosen_geometric_mean": -1.0873287916183472, + "epoch": 0.84, + "grad_norm": 15.3125, + "learning_rate": 3.1250808069234683e-06, + "log_odds": 2.4958393573760986, + "log_odds_ratio": -0.31327831745147705, + "loss": 0.3062, + "rejected_geometric_mean": -3.3502120971679688, + "step": 3395 + }, + { + "chosen_geometric_mean": -1.5220731496810913, + "epoch": 0.84, + "grad_norm": 11.875, + "learning_rate": 3.1241382063278803e-06, + "log_odds": 5.243049621582031, + "log_odds_ratio": -0.3319738507270813, + "loss": 0.3001, + "rejected_geometric_mean": -6.580417633056641, + "step": 3396 + }, + { + "chosen_geometric_mean": -1.090633749961853, + "epoch": 0.84, + "grad_norm": 2.046875, + "learning_rate": 3.123195511097959e-06, + "log_odds": 1.6226024627685547, + "log_odds_ratio": -0.49171456694602966, + "loss": 0.3173, + "rejected_geometric_mean": -2.553687810897827, + "step": 3397 + }, + { + "chosen_geometric_mean": -1.2272148132324219, + "epoch": 0.84, + "grad_norm": 19.75, + "learning_rate": 3.122252721376638e-06, + "log_odds": 2.809537649154663, + "log_odds_ratio": -0.37510812282562256, + "loss": 0.3589, + "rejected_geometric_mean": -3.862902879714966, + "step": 3398 + }, + { + "chosen_geometric_mean": -1.1618974208831787, + "epoch": 0.84, + "grad_norm": 2.1875, + "learning_rate": 3.121309837306868e-06, + "log_odds": 5.1452860832214355, + "log_odds_ratio": -0.2254265546798706, + "loss": 0.2769, + "rejected_geometric_mean": -6.031089782714844, + "step": 3399 + }, + { + "chosen_geometric_mean": -1.074684739112854, + "epoch": 0.84, + "grad_norm": 2.21875, + "learning_rate": 3.1203668590316138e-06, + "log_odds": 3.705982208251953, + "log_odds_ratio": -0.32481154799461365, + "loss": 0.321, + "rejected_geometric_mean": -4.551873683929443, + "step": 3400 + }, + { + "chosen_geometric_mean": -1.1755034923553467, + "epoch": 0.84, + "grad_norm": 10.0625, + "learning_rate": 3.1194237866938515e-06, + "log_odds": 4.812806606292725, + "log_odds_ratio": -0.2608172595500946, + "loss": 0.3524, + "rejected_geometric_mean": -5.753468036651611, + "step": 3401 + }, + { + "chosen_geometric_mean": -1.2654105424880981, + "epoch": 0.84, + "grad_norm": 8.0625, + "learning_rate": 3.1184806204365746e-06, + "log_odds": 0.8360320329666138, + "log_odds_ratio": -0.3971608281135559, + "loss": 0.3362, + "rejected_geometric_mean": -1.9612787961959839, + "step": 3402 + }, + { + "chosen_geometric_mean": -1.1821208000183105, + "epoch": 0.84, + "grad_norm": 16.75, + "learning_rate": 3.1175373604027902e-06, + "log_odds": 4.0987229347229, + "log_odds_ratio": -0.07836631685495377, + "loss": 0.324, + "rejected_geometric_mean": -4.954982757568359, + "step": 3403 + }, + { + "chosen_geometric_mean": -1.2120170593261719, + "epoch": 0.84, + "grad_norm": 7.15625, + "learning_rate": 3.116594006735518e-06, + "log_odds": 3.610213279724121, + "log_odds_ratio": -0.24524307250976562, + "loss": 0.2821, + "rejected_geometric_mean": -4.5860595703125, + "step": 3404 + }, + { + "chosen_geometric_mean": -1.205657958984375, + "epoch": 0.84, + "grad_norm": 5.5625, + "learning_rate": 3.115650559577794e-06, + "log_odds": 3.308953285217285, + "log_odds_ratio": -0.1706087738275528, + "loss": 0.289, + "rejected_geometric_mean": -4.21429443359375, + "step": 3405 + }, + { + "chosen_geometric_mean": -1.0201739072799683, + "epoch": 0.84, + "grad_norm": 2.234375, + "learning_rate": 3.1147070190726675e-06, + "log_odds": 1.809885025024414, + "log_odds_ratio": -0.29066240787506104, + "loss": 0.269, + "rejected_geometric_mean": -2.543043613433838, + "step": 3406 + }, + { + "chosen_geometric_mean": -0.940329909324646, + "epoch": 0.84, + "grad_norm": 4.4375, + "learning_rate": 3.1137633853632016e-06, + "log_odds": 2.699307441711426, + "log_odds_ratio": -0.2929629683494568, + "loss": 0.2885, + "rejected_geometric_mean": -3.4343173503875732, + "step": 3407 + }, + { + "chosen_geometric_mean": -1.230736255645752, + "epoch": 0.84, + "grad_norm": 15.0, + "learning_rate": 3.112819658592473e-06, + "log_odds": 4.372838973999023, + "log_odds_ratio": -0.35983145236968994, + "loss": 0.3138, + "rejected_geometric_mean": -5.436248302459717, + "step": 3408 + }, + { + "chosen_geometric_mean": -1.3404712677001953, + "epoch": 0.84, + "grad_norm": 3.15625, + "learning_rate": 3.111875838903574e-06, + "log_odds": 1.9585133790969849, + "log_odds_ratio": -0.43489640951156616, + "loss": 0.3245, + "rejected_geometric_mean": -3.167452096939087, + "step": 3409 + }, + { + "chosen_geometric_mean": -0.950248122215271, + "epoch": 0.84, + "grad_norm": 11.3125, + "learning_rate": 3.110931926439612e-06, + "log_odds": 9.334488868713379, + "log_odds_ratio": -0.0020573940128087997, + "loss": 0.3228, + "rejected_geometric_mean": -9.777304649353027, + "step": 3410 + }, + { + "chosen_geometric_mean": -1.2119370698928833, + "epoch": 0.84, + "grad_norm": 2.78125, + "learning_rate": 3.109987921343704e-06, + "log_odds": 2.735738754272461, + "log_odds_ratio": -0.323202908039093, + "loss": 0.285, + "rejected_geometric_mean": -3.7400777339935303, + "step": 3411 + }, + { + "chosen_geometric_mean": -0.889745831489563, + "epoch": 0.84, + "grad_norm": 3.140625, + "learning_rate": 3.1090438237589848e-06, + "log_odds": 1.369103193283081, + "log_odds_ratio": -0.32976213097572327, + "loss": 0.3265, + "rejected_geometric_mean": -1.9717544317245483, + "step": 3412 + }, + { + "chosen_geometric_mean": -1.1362301111221313, + "epoch": 0.85, + "grad_norm": 4.90625, + "learning_rate": 3.108099633828603e-06, + "log_odds": 2.2888050079345703, + "log_odds_ratio": -0.334224134683609, + "loss": 0.3368, + "rejected_geometric_mean": -3.192816734313965, + "step": 3413 + }, + { + "chosen_geometric_mean": -1.166513442993164, + "epoch": 0.85, + "grad_norm": 13.375, + "learning_rate": 3.10715535169572e-06, + "log_odds": 3.1658222675323486, + "log_odds_ratio": -0.33818069100379944, + "loss": 0.3427, + "rejected_geometric_mean": -4.189188003540039, + "step": 3414 + }, + { + "chosen_geometric_mean": -0.9409346580505371, + "epoch": 0.85, + "grad_norm": 4.90625, + "learning_rate": 3.106210977503511e-06, + "log_odds": 2.313122272491455, + "log_odds_ratio": -0.2948213219642639, + "loss": 0.2896, + "rejected_geometric_mean": -2.991394281387329, + "step": 3415 + }, + { + "chosen_geometric_mean": -1.0967082977294922, + "epoch": 0.85, + "grad_norm": 6.0625, + "learning_rate": 3.105266511395166e-06, + "log_odds": 6.5245585441589355, + "log_odds_ratio": -0.18225355446338654, + "loss": 0.3246, + "rejected_geometric_mean": -7.337040901184082, + "step": 3416 + }, + { + "chosen_geometric_mean": -0.9470281600952148, + "epoch": 0.85, + "grad_norm": 10.625, + "learning_rate": 3.1043219535138902e-06, + "log_odds": 0.3034864068031311, + "log_odds_ratio": -0.5673583745956421, + "loss": 0.316, + "rejected_geometric_mean": -1.16481351852417, + "step": 3417 + }, + { + "chosen_geometric_mean": -1.0357451438903809, + "epoch": 0.85, + "grad_norm": 2.296875, + "learning_rate": 3.1033773040029e-06, + "log_odds": 4.459331512451172, + "log_odds_ratio": -0.1401856243610382, + "loss": 0.3441, + "rejected_geometric_mean": -5.064094066619873, + "step": 3418 + }, + { + "chosen_geometric_mean": -1.0713484287261963, + "epoch": 0.85, + "grad_norm": 11.5625, + "learning_rate": 3.102432563005427e-06, + "log_odds": 5.038074016571045, + "log_odds_ratio": -0.3319365382194519, + "loss": 0.3479, + "rejected_geometric_mean": -5.8548712730407715, + "step": 3419 + }, + { + "chosen_geometric_mean": -1.1868016719818115, + "epoch": 0.85, + "grad_norm": 20.875, + "learning_rate": 3.1014877306647163e-06, + "log_odds": 5.689751625061035, + "log_odds_ratio": -0.38421380519866943, + "loss": 0.3412, + "rejected_geometric_mean": -6.683297634124756, + "step": 3420 + }, + { + "chosen_geometric_mean": -1.1478512287139893, + "epoch": 0.85, + "grad_norm": 2.03125, + "learning_rate": 3.100542807124028e-06, + "log_odds": 10.022120475769043, + "log_odds_ratio": -0.15205831825733185, + "loss": 0.2904, + "rejected_geometric_mean": -10.875724792480469, + "step": 3421 + }, + { + "chosen_geometric_mean": -0.8963659405708313, + "epoch": 0.85, + "grad_norm": 5.3125, + "learning_rate": 3.0995977925266347e-06, + "log_odds": 1.0083012580871582, + "log_odds_ratio": -0.4099824130535126, + "loss": 0.3273, + "rejected_geometric_mean": -1.6491142511367798, + "step": 3422 + }, + { + "chosen_geometric_mean": -1.2049386501312256, + "epoch": 0.85, + "grad_norm": 5.53125, + "learning_rate": 3.098652687015823e-06, + "log_odds": 3.166041851043701, + "log_odds_ratio": -0.37755677103996277, + "loss": 0.3067, + "rejected_geometric_mean": -4.1523756980896, + "step": 3423 + }, + { + "chosen_geometric_mean": -1.0228444337844849, + "epoch": 0.85, + "grad_norm": 3.171875, + "learning_rate": 3.097707490734894e-06, + "log_odds": 1.0527313947677612, + "log_odds_ratio": -0.39734697341918945, + "loss": 0.2924, + "rejected_geometric_mean": -1.8163225650787354, + "step": 3424 + }, + { + "chosen_geometric_mean": -0.9187493324279785, + "epoch": 0.85, + "grad_norm": 4.96875, + "learning_rate": 3.0967622038271623e-06, + "log_odds": 0.5713032484054565, + "log_odds_ratio": -0.527847409248352, + "loss": 0.3223, + "rejected_geometric_mean": -1.3653159141540527, + "step": 3425 + }, + { + "chosen_geometric_mean": -1.262895941734314, + "epoch": 0.85, + "grad_norm": 1.8984375, + "learning_rate": 3.0958168264359563e-06, + "log_odds": 2.586486339569092, + "log_odds_ratio": -0.2050313502550125, + "loss": 0.259, + "rejected_geometric_mean": -3.6163785457611084, + "step": 3426 + }, + { + "chosen_geometric_mean": -1.0385327339172363, + "epoch": 0.85, + "grad_norm": 3.3125, + "learning_rate": 3.094871358704617e-06, + "log_odds": 4.008091449737549, + "log_odds_ratio": -0.18422465026378632, + "loss": 0.2857, + "rejected_geometric_mean": -4.681693077087402, + "step": 3427 + }, + { + "chosen_geometric_mean": -1.281017780303955, + "epoch": 0.85, + "grad_norm": 7.65625, + "learning_rate": 3.0939258007765e-06, + "log_odds": 3.4741439819335938, + "log_odds_ratio": -0.16128399968147278, + "loss": 0.3011, + "rejected_geometric_mean": -4.449276447296143, + "step": 3428 + }, + { + "chosen_geometric_mean": -1.0400241613388062, + "epoch": 0.85, + "grad_norm": 13.6875, + "learning_rate": 3.0929801527949754e-06, + "log_odds": 1.2668513059616089, + "log_odds_ratio": -0.40468645095825195, + "loss": 0.2715, + "rejected_geometric_mean": -2.1151299476623535, + "step": 3429 + }, + { + "chosen_geometric_mean": -0.9662028551101685, + "epoch": 0.85, + "grad_norm": 5.1875, + "learning_rate": 3.0920344149034266e-06, + "log_odds": 3.6253066062927246, + "log_odds_ratio": -0.061235733330249786, + "loss": 0.2749, + "rejected_geometric_mean": -4.138719081878662, + "step": 3430 + }, + { + "chosen_geometric_mean": -1.1863716840744019, + "epoch": 0.85, + "grad_norm": 9.3125, + "learning_rate": 3.0910885872452484e-06, + "log_odds": 5.6937103271484375, + "log_odds_ratio": -0.1294316202402115, + "loss": 0.2377, + "rejected_geometric_mean": -6.538857936859131, + "step": 3431 + }, + { + "chosen_geometric_mean": -1.0201398134231567, + "epoch": 0.85, + "grad_norm": 14.5625, + "learning_rate": 3.090142669963852e-06, + "log_odds": 1.9379301071166992, + "log_odds_ratio": -0.2526549994945526, + "loss": 0.394, + "rejected_geometric_mean": -2.603046178817749, + "step": 3432 + }, + { + "chosen_geometric_mean": -1.1229827404022217, + "epoch": 0.85, + "grad_norm": 26.125, + "learning_rate": 3.0891966632026622e-06, + "log_odds": 7.60226583480835, + "log_odds_ratio": -0.10168042778968811, + "loss": 0.3077, + "rejected_geometric_mean": -8.318803787231445, + "step": 3433 + }, + { + "chosen_geometric_mean": -1.1104234457015991, + "epoch": 0.85, + "grad_norm": 2.921875, + "learning_rate": 3.0882505671051135e-06, + "log_odds": 10.40278434753418, + "log_odds_ratio": -0.0006125462241470814, + "loss": 0.2564, + "rejected_geometric_mean": -11.104450225830078, + "step": 3434 + }, + { + "chosen_geometric_mean": -1.1933989524841309, + "epoch": 0.85, + "grad_norm": 18.125, + "learning_rate": 3.0873043818146595e-06, + "log_odds": 7.23703670501709, + "log_odds_ratio": -0.025309724733233452, + "loss": 0.2446, + "rejected_geometric_mean": -8.059442520141602, + "step": 3435 + }, + { + "chosen_geometric_mean": -1.1330912113189697, + "epoch": 0.85, + "grad_norm": 13.1875, + "learning_rate": 3.086358107474763e-06, + "log_odds": 4.010078430175781, + "log_odds_ratio": -0.23595252633094788, + "loss": 0.3391, + "rejected_geometric_mean": -4.880934715270996, + "step": 3436 + }, + { + "chosen_geometric_mean": -1.0904587507247925, + "epoch": 0.85, + "grad_norm": 7.15625, + "learning_rate": 3.0854117442289034e-06, + "log_odds": 4.103574752807617, + "log_odds_ratio": -0.22026704251766205, + "loss": 0.3241, + "rejected_geometric_mean": -4.900832176208496, + "step": 3437 + }, + { + "chosen_geometric_mean": -1.1656707525253296, + "epoch": 0.85, + "grad_norm": 3.515625, + "learning_rate": 3.084465292220571e-06, + "log_odds": 6.36279296875, + "log_odds_ratio": -0.3061402142047882, + "loss": 0.3159, + "rejected_geometric_mean": -7.354167938232422, + "step": 3438 + }, + { + "chosen_geometric_mean": -1.0039857625961304, + "epoch": 0.85, + "grad_norm": 7.84375, + "learning_rate": 3.083518751593271e-06, + "log_odds": 8.597314834594727, + "log_odds_ratio": -0.18005497753620148, + "loss": 0.2485, + "rejected_geometric_mean": -9.214489936828613, + "step": 3439 + }, + { + "chosen_geometric_mean": -1.3124197721481323, + "epoch": 0.85, + "grad_norm": 4.6875, + "learning_rate": 3.082572122490521e-06, + "log_odds": 3.8782052993774414, + "log_odds_ratio": -0.18606463074684143, + "loss": 0.3209, + "rejected_geometric_mean": -4.955710411071777, + "step": 3440 + }, + { + "chosen_geometric_mean": -1.1970860958099365, + "epoch": 0.85, + "grad_norm": 7.71875, + "learning_rate": 3.081625405055853e-06, + "log_odds": 3.872795820236206, + "log_odds_ratio": -0.32604458928108215, + "loss": 0.3292, + "rejected_geometric_mean": -4.829930782318115, + "step": 3441 + }, + { + "chosen_geometric_mean": -1.088076114654541, + "epoch": 0.85, + "grad_norm": 13.3125, + "learning_rate": 3.0806785994328126e-06, + "log_odds": 4.665186405181885, + "log_odds_ratio": -0.028162501752376556, + "loss": 0.3062, + "rejected_geometric_mean": -5.351830959320068, + "step": 3442 + }, + { + "chosen_geometric_mean": -1.2298188209533691, + "epoch": 0.85, + "grad_norm": 5.46875, + "learning_rate": 3.0797317057649585e-06, + "log_odds": 9.007625579833984, + "log_odds_ratio": -0.028062131255865097, + "loss": 0.2849, + "rejected_geometric_mean": -9.860698699951172, + "step": 3443 + }, + { + "chosen_geometric_mean": -1.1098017692565918, + "epoch": 0.85, + "grad_norm": 5.9375, + "learning_rate": 3.0787847241958613e-06, + "log_odds": 4.313894271850586, + "log_odds_ratio": -0.177544504404068, + "loss": 0.2998, + "rejected_geometric_mean": -5.110243320465088, + "step": 3444 + }, + { + "chosen_geometric_mean": -1.213677167892456, + "epoch": 0.85, + "grad_norm": 8.6875, + "learning_rate": 3.077837654869106e-06, + "log_odds": 2.994617223739624, + "log_odds_ratio": -0.2897616922855377, + "loss": 0.2784, + "rejected_geometric_mean": -3.9928555488586426, + "step": 3445 + }, + { + "chosen_geometric_mean": -1.077986240386963, + "epoch": 0.85, + "grad_norm": 3.015625, + "learning_rate": 3.0768904979282938e-06, + "log_odds": 7.342472076416016, + "log_odds_ratio": -0.07009945064783096, + "loss": 0.2318, + "rejected_geometric_mean": -7.975953578948975, + "step": 3446 + }, + { + "chosen_geometric_mean": -1.324617862701416, + "epoch": 0.85, + "grad_norm": 25.5, + "learning_rate": 3.075943253517033e-06, + "log_odds": 6.0933518409729, + "log_odds_ratio": -0.21961846947669983, + "loss": 0.3937, + "rejected_geometric_mean": -7.134003639221191, + "step": 3447 + }, + { + "chosen_geometric_mean": -0.9419234991073608, + "epoch": 0.85, + "grad_norm": 28.125, + "learning_rate": 3.074995921778951e-06, + "log_odds": 11.340713500976562, + "log_odds_ratio": -0.03205953910946846, + "loss": 0.3055, + "rejected_geometric_mean": -11.808362007141113, + "step": 3448 + }, + { + "chosen_geometric_mean": -1.03369140625, + "epoch": 0.85, + "grad_norm": 6.28125, + "learning_rate": 3.074048502857685e-06, + "log_odds": 9.129202842712402, + "log_odds_ratio": -0.008990427479147911, + "loss": 0.2745, + "rejected_geometric_mean": -9.708205223083496, + "step": 3449 + }, + { + "chosen_geometric_mean": -1.160010814666748, + "epoch": 0.85, + "grad_norm": 2.46875, + "learning_rate": 3.073100996896886e-06, + "log_odds": 6.698367595672607, + "log_odds_ratio": -0.2428695261478424, + "loss": 0.2728, + "rejected_geometric_mean": -7.59641170501709, + "step": 3450 + }, + { + "chosen_geometric_mean": -1.012981653213501, + "epoch": 0.85, + "grad_norm": 2.953125, + "learning_rate": 3.0721534040402196e-06, + "log_odds": 0.3353627622127533, + "log_odds_ratio": -0.6211720705032349, + "loss": 0.3235, + "rejected_geometric_mean": -1.2770711183547974, + "step": 3451 + }, + { + "chosen_geometric_mean": -0.9373171329498291, + "epoch": 0.85, + "grad_norm": 2.5625, + "learning_rate": 3.0712057244313633e-06, + "log_odds": 0.8052789568901062, + "log_odds_ratio": -0.44255807995796204, + "loss": 0.2936, + "rejected_geometric_mean": -1.5276139974594116, + "step": 3452 + }, + { + "chosen_geometric_mean": -0.9775790572166443, + "epoch": 0.85, + "grad_norm": 4.0625, + "learning_rate": 3.0702579582140073e-06, + "log_odds": 2.180415630340576, + "log_odds_ratio": -0.1781471222639084, + "loss": 0.3112, + "rejected_geometric_mean": -2.767294406890869, + "step": 3453 + }, + { + "chosen_geometric_mean": -1.1756254434585571, + "epoch": 0.86, + "grad_norm": 4.40625, + "learning_rate": 3.0693101055318576e-06, + "log_odds": 6.010204792022705, + "log_odds_ratio": -0.13479705154895782, + "loss": 0.2786, + "rejected_geometric_mean": -6.827857971191406, + "step": 3454 + }, + { + "chosen_geometric_mean": -1.1474541425704956, + "epoch": 0.86, + "grad_norm": 5.09375, + "learning_rate": 3.068362166528629e-06, + "log_odds": 1.1853440999984741, + "log_odds_ratio": -0.467290997505188, + "loss": 0.322, + "rejected_geometric_mean": -2.185739040374756, + "step": 3455 + }, + { + "chosen_geometric_mean": -0.9740517139434814, + "epoch": 0.86, + "grad_norm": 3.28125, + "learning_rate": 3.067414141348054e-06, + "log_odds": 3.0985724925994873, + "log_odds_ratio": -0.17379090189933777, + "loss": 0.3218, + "rejected_geometric_mean": -3.6972622871398926, + "step": 3456 + }, + { + "chosen_geometric_mean": -0.9440573453903198, + "epoch": 0.86, + "grad_norm": 4.09375, + "learning_rate": 3.0664660301338744e-06, + "log_odds": 6.125597953796387, + "log_odds_ratio": -0.27257290482521057, + "loss": 0.3026, + "rejected_geometric_mean": -6.740299701690674, + "step": 3457 + }, + { + "chosen_geometric_mean": -1.0311763286590576, + "epoch": 0.86, + "grad_norm": 5.96875, + "learning_rate": 3.065517833029847e-06, + "log_odds": 4.08076810836792, + "log_odds_ratio": -0.28438234329223633, + "loss": 0.301, + "rejected_geometric_mean": -4.76712703704834, + "step": 3458 + }, + { + "chosen_geometric_mean": -1.1023188829421997, + "epoch": 0.86, + "grad_norm": 18.5, + "learning_rate": 3.0645695501797422e-06, + "log_odds": 9.225431442260742, + "log_odds_ratio": -0.33587318658828735, + "loss": 0.2735, + "rejected_geometric_mean": -10.065223693847656, + "step": 3459 + }, + { + "chosen_geometric_mean": -0.8517996072769165, + "epoch": 0.86, + "grad_norm": 8.0625, + "learning_rate": 3.0636211817273415e-06, + "log_odds": 7.385054588317871, + "log_odds_ratio": -0.07050961256027222, + "loss": 0.2572, + "rejected_geometric_mean": -7.733416557312012, + "step": 3460 + }, + { + "chosen_geometric_mean": -1.2200196981430054, + "epoch": 0.86, + "grad_norm": 4.28125, + "learning_rate": 3.0626727278164404e-06, + "log_odds": 9.372790336608887, + "log_odds_ratio": -0.15183818340301514, + "loss": 0.2721, + "rejected_geometric_mean": -10.283205032348633, + "step": 3461 + }, + { + "chosen_geometric_mean": -1.154860258102417, + "epoch": 0.86, + "grad_norm": 26.375, + "learning_rate": 3.0617241885908477e-06, + "log_odds": 5.077785015106201, + "log_odds_ratio": -0.032310258597135544, + "loss": 0.347, + "rejected_geometric_mean": -5.858974933624268, + "step": 3462 + }, + { + "chosen_geometric_mean": -1.1878950595855713, + "epoch": 0.86, + "grad_norm": 26.875, + "learning_rate": 3.060775564194384e-06, + "log_odds": 7.1698222160339355, + "log_odds_ratio": -0.022879382595419884, + "loss": 0.3184, + "rejected_geometric_mean": -7.9928059577941895, + "step": 3463 + }, + { + "chosen_geometric_mean": -1.0836741924285889, + "epoch": 0.86, + "grad_norm": 9.9375, + "learning_rate": 3.0598268547708835e-06, + "log_odds": 8.751150131225586, + "log_odds_ratio": -0.0571410246193409, + "loss": 0.226, + "rejected_geometric_mean": -9.449975967407227, + "step": 3464 + }, + { + "chosen_geometric_mean": -1.333648681640625, + "epoch": 0.86, + "grad_norm": 13.125, + "learning_rate": 3.058878060464195e-06, + "log_odds": 6.822827339172363, + "log_odds_ratio": -0.2159835696220398, + "loss": 0.3136, + "rejected_geometric_mean": -7.90793514251709, + "step": 3465 + }, + { + "chosen_geometric_mean": -1.148601770401001, + "epoch": 0.86, + "grad_norm": 13.5625, + "learning_rate": 3.0579291814181766e-06, + "log_odds": 5.356937408447266, + "log_odds_ratio": -0.04185250401496887, + "loss": 0.2977, + "rejected_geometric_mean": -6.13175630569458, + "step": 3466 + }, + { + "chosen_geometric_mean": -1.2548232078552246, + "epoch": 0.86, + "grad_norm": 19.125, + "learning_rate": 3.0569802177767015e-06, + "log_odds": 5.0387139320373535, + "log_odds_ratio": -0.10515681654214859, + "loss": 0.2507, + "rejected_geometric_mean": -5.9794793128967285, + "step": 3467 + }, + { + "chosen_geometric_mean": -0.930838942527771, + "epoch": 0.86, + "grad_norm": 7.15625, + "learning_rate": 3.0560311696836563e-06, + "log_odds": 10.7877197265625, + "log_odds_ratio": -0.029749799519777298, + "loss": 0.3018, + "rejected_geometric_mean": -11.213485717773438, + "step": 3468 + }, + { + "chosen_geometric_mean": -1.6490873098373413, + "epoch": 0.86, + "grad_norm": 14.125, + "learning_rate": 3.055082037282939e-06, + "log_odds": 2.7351465225219727, + "log_odds_ratio": -0.12586060166358948, + "loss": 0.3071, + "rejected_geometric_mean": -4.164828300476074, + "step": 3469 + }, + { + "chosen_geometric_mean": -0.9977877140045166, + "epoch": 0.86, + "grad_norm": 3.53125, + "learning_rate": 3.05413282071846e-06, + "log_odds": 1.3725779056549072, + "log_odds_ratio": -0.2998605966567993, + "loss": 0.2765, + "rejected_geometric_mean": -2.1206417083740234, + "step": 3470 + }, + { + "chosen_geometric_mean": -1.0317307710647583, + "epoch": 0.86, + "grad_norm": 34.0, + "learning_rate": 3.053183520134145e-06, + "log_odds": 10.264108657836914, + "log_odds_ratio": -0.16762037575244904, + "loss": 0.2949, + "rejected_geometric_mean": -10.933932304382324, + "step": 3471 + }, + { + "chosen_geometric_mean": -1.1571316719055176, + "epoch": 0.86, + "grad_norm": 3.765625, + "learning_rate": 3.0522341356739287e-06, + "log_odds": 1.8240821361541748, + "log_odds_ratio": -0.23512962460517883, + "loss": 0.3556, + "rejected_geometric_mean": -2.7132678031921387, + "step": 3472 + }, + { + "chosen_geometric_mean": -1.280104160308838, + "epoch": 0.86, + "grad_norm": 4.09375, + "learning_rate": 3.051284667481762e-06, + "log_odds": 4.262172698974609, + "log_odds_ratio": -0.2131335735321045, + "loss": 0.2725, + "rejected_geometric_mean": -5.2976789474487305, + "step": 3473 + }, + { + "chosen_geometric_mean": -1.2762656211853027, + "epoch": 0.86, + "grad_norm": 4.40625, + "learning_rate": 3.050335115701607e-06, + "log_odds": 3.6830098628997803, + "log_odds_ratio": -0.19719873368740082, + "loss": 0.2567, + "rejected_geometric_mean": -4.715172290802002, + "step": 3474 + }, + { + "chosen_geometric_mean": -1.1910895109176636, + "epoch": 0.86, + "grad_norm": 24.375, + "learning_rate": 3.049385480477438e-06, + "log_odds": 3.5271847248077393, + "log_odds_ratio": -0.11684484034776688, + "loss": 0.3521, + "rejected_geometric_mean": -4.39866304397583, + "step": 3475 + }, + { + "chosen_geometric_mean": -1.3885178565979004, + "epoch": 0.86, + "grad_norm": 14.625, + "learning_rate": 3.0484357619532428e-06, + "log_odds": 3.3049113750457764, + "log_odds_ratio": -0.24981562793254852, + "loss": 0.3546, + "rejected_geometric_mean": -4.517099380493164, + "step": 3476 + }, + { + "chosen_geometric_mean": -1.076051950454712, + "epoch": 0.86, + "grad_norm": 6.8125, + "learning_rate": 3.047485960273021e-06, + "log_odds": 7.390353202819824, + "log_odds_ratio": -0.25503644347190857, + "loss": 0.2997, + "rejected_geometric_mean": -8.22347640991211, + "step": 3477 + }, + { + "chosen_geometric_mean": -1.085521936416626, + "epoch": 0.86, + "grad_norm": 3.28125, + "learning_rate": 3.0465360755807864e-06, + "log_odds": 3.280975818634033, + "log_odds_ratio": -0.2277362197637558, + "loss": 0.3145, + "rejected_geometric_mean": -4.070206165313721, + "step": 3478 + }, + { + "chosen_geometric_mean": -1.1063575744628906, + "epoch": 0.86, + "grad_norm": 15.5625, + "learning_rate": 3.045586108020564e-06, + "log_odds": 11.275850296020508, + "log_odds_ratio": -0.3326243758201599, + "loss": 0.327, + "rejected_geometric_mean": -12.21158504486084, + "step": 3479 + }, + { + "chosen_geometric_mean": -1.0428814888000488, + "epoch": 0.86, + "grad_norm": 3.21875, + "learning_rate": 3.044636057736391e-06, + "log_odds": 1.198506236076355, + "log_odds_ratio": -0.4628135561943054, + "loss": 0.3217, + "rejected_geometric_mean": -2.0629444122314453, + "step": 3480 + }, + { + "chosen_geometric_mean": -1.9068917036056519, + "epoch": 0.86, + "grad_norm": 38.0, + "learning_rate": 3.043685924872318e-06, + "log_odds": 3.635685682296753, + "log_odds_ratio": -0.5526293516159058, + "loss": 0.3846, + "rejected_geometric_mean": -5.400042533874512, + "step": 3481 + }, + { + "chosen_geometric_mean": -0.8840491771697998, + "epoch": 0.86, + "grad_norm": 22.0, + "learning_rate": 3.042735709572409e-06, + "log_odds": 4.411342144012451, + "log_odds_ratio": -0.13312125205993652, + "loss": 0.316, + "rejected_geometric_mean": -4.863121032714844, + "step": 3482 + }, + { + "chosen_geometric_mean": -0.8579720258712769, + "epoch": 0.86, + "grad_norm": 9.1875, + "learning_rate": 3.041785411980738e-06, + "log_odds": 6.794007301330566, + "log_odds_ratio": -0.29418709874153137, + "loss": 0.412, + "rejected_geometric_mean": -7.331093788146973, + "step": 3483 + }, + { + "chosen_geometric_mean": -1.044766902923584, + "epoch": 0.86, + "grad_norm": 10.625, + "learning_rate": 3.0408350322413936e-06, + "log_odds": 2.3956339359283447, + "log_odds_ratio": -0.2164352983236313, + "loss": 0.3134, + "rejected_geometric_mean": -3.0963354110717773, + "step": 3484 + }, + { + "chosen_geometric_mean": -1.0731995105743408, + "epoch": 0.86, + "grad_norm": 16.5, + "learning_rate": 3.0398845704984764e-06, + "log_odds": 6.73459529876709, + "log_odds_ratio": -0.3588021695613861, + "loss": 0.2314, + "rejected_geometric_mean": -7.6267781257629395, + "step": 3485 + }, + { + "chosen_geometric_mean": -0.9600512981414795, + "epoch": 0.86, + "grad_norm": 9.8125, + "learning_rate": 3.038934026896099e-06, + "log_odds": 2.3685946464538574, + "log_odds_ratio": -0.2886433005332947, + "loss": 0.293, + "rejected_geometric_mean": -2.988671064376831, + "step": 3486 + }, + { + "chosen_geometric_mean": -1.005740761756897, + "epoch": 0.86, + "grad_norm": 5.1875, + "learning_rate": 3.037983401578386e-06, + "log_odds": 7.577173709869385, + "log_odds_ratio": -0.02425806038081646, + "loss": 0.335, + "rejected_geometric_mean": -8.136177062988281, + "step": 3487 + }, + { + "chosen_geometric_mean": -1.0896477699279785, + "epoch": 0.86, + "grad_norm": 4.625, + "learning_rate": 3.0370326946894765e-06, + "log_odds": 0.7213990688323975, + "log_odds_ratio": -0.5731912851333618, + "loss": 0.2946, + "rejected_geometric_mean": -1.6862940788269043, + "step": 3488 + }, + { + "chosen_geometric_mean": -1.0936999320983887, + "epoch": 0.86, + "grad_norm": 1.875, + "learning_rate": 3.0360819063735193e-06, + "log_odds": 8.83848762512207, + "log_odds_ratio": -0.1430501937866211, + "loss": 0.2605, + "rejected_geometric_mean": -9.55429744720459, + "step": 3489 + }, + { + "chosen_geometric_mean": -1.254753828048706, + "epoch": 0.86, + "grad_norm": 3.109375, + "learning_rate": 3.0351310367746767e-06, + "log_odds": 4.366938591003418, + "log_odds_ratio": -0.3517902195453644, + "loss": 0.2715, + "rejected_geometric_mean": -5.451986312866211, + "step": 3490 + }, + { + "chosen_geometric_mean": -1.1754308938980103, + "epoch": 0.86, + "grad_norm": 5.1875, + "learning_rate": 3.0341800860371234e-06, + "log_odds": 3.3232476711273193, + "log_odds_ratio": -0.22999820113182068, + "loss": 0.2903, + "rejected_geometric_mean": -4.207596302032471, + "step": 3491 + }, + { + "chosen_geometric_mean": -1.1049439907073975, + "epoch": 0.86, + "grad_norm": 2.015625, + "learning_rate": 3.0332290543050472e-06, + "log_odds": 9.696991920471191, + "log_odds_ratio": -0.1502683311700821, + "loss": 0.2766, + "rejected_geometric_mean": -10.419093132019043, + "step": 3492 + }, + { + "chosen_geometric_mean": -1.214316964149475, + "epoch": 0.86, + "grad_norm": 4.96875, + "learning_rate": 3.032277941722647e-06, + "log_odds": 2.7933382987976074, + "log_odds_ratio": -0.19211596250534058, + "loss": 0.2919, + "rejected_geometric_mean": -3.724346399307251, + "step": 3493 + }, + { + "chosen_geometric_mean": -0.9498435854911804, + "epoch": 0.87, + "grad_norm": 3.046875, + "learning_rate": 3.0313267484341337e-06, + "log_odds": 1.840898036956787, + "log_odds_ratio": -0.3161933124065399, + "loss": 0.2809, + "rejected_geometric_mean": -2.495013475418091, + "step": 3494 + }, + { + "chosen_geometric_mean": -1.218746304512024, + "epoch": 0.87, + "grad_norm": 12.625, + "learning_rate": 3.0303754745837315e-06, + "log_odds": 11.098735809326172, + "log_odds_ratio": -0.15340983867645264, + "loss": 0.3301, + "rejected_geometric_mean": -11.99479866027832, + "step": 3495 + }, + { + "chosen_geometric_mean": -1.0665380954742432, + "epoch": 0.87, + "grad_norm": 7.96875, + "learning_rate": 3.0294241203156765e-06, + "log_odds": 8.63116455078125, + "log_odds_ratio": -0.21188196539878845, + "loss": 0.3224, + "rejected_geometric_mean": -9.392434120178223, + "step": 3496 + }, + { + "chosen_geometric_mean": -1.164462685585022, + "epoch": 0.87, + "grad_norm": 19.5, + "learning_rate": 3.028472685774216e-06, + "log_odds": 4.075579643249512, + "log_odds_ratio": -0.28794151544570923, + "loss": 0.3216, + "rejected_geometric_mean": -5.031037330627441, + "step": 3497 + }, + { + "chosen_geometric_mean": -0.8545330166816711, + "epoch": 0.87, + "grad_norm": 6.125, + "learning_rate": 3.0275211711036112e-06, + "log_odds": 3.281344413757324, + "log_odds_ratio": -0.22441594302654266, + "loss": 0.2987, + "rejected_geometric_mean": -3.7030131816864014, + "step": 3498 + }, + { + "chosen_geometric_mean": -0.9437836408615112, + "epoch": 0.87, + "grad_norm": 4.90625, + "learning_rate": 3.026569576448134e-06, + "log_odds": 2.9466848373413086, + "log_odds_ratio": -0.3276578485965729, + "loss": 0.3109, + "rejected_geometric_mean": -3.6214723587036133, + "step": 3499 + }, + { + "chosen_geometric_mean": -1.052422285079956, + "epoch": 0.87, + "grad_norm": 26.625, + "learning_rate": 3.02561790195207e-06, + "log_odds": 0.8264880180358887, + "log_odds_ratio": -0.4039285480976105, + "loss": 0.3025, + "rejected_geometric_mean": -1.6775774955749512, + "step": 3500 + }, + { + "chosen_geometric_mean": -0.9974290728569031, + "epoch": 0.87, + "grad_norm": 2.6875, + "learning_rate": 3.024666147759715e-06, + "log_odds": 8.914008140563965, + "log_odds_ratio": -0.007118755020201206, + "loss": 0.2805, + "rejected_geometric_mean": -9.395756721496582, + "step": 3501 + }, + { + "chosen_geometric_mean": -1.2439154386520386, + "epoch": 0.87, + "grad_norm": 6.46875, + "learning_rate": 3.0237143140153775e-06, + "log_odds": 4.7242937088012695, + "log_odds_ratio": -0.34780028462409973, + "loss": 0.306, + "rejected_geometric_mean": -5.756730556488037, + "step": 3502 + }, + { + "chosen_geometric_mean": -1.216092586517334, + "epoch": 0.87, + "grad_norm": 16.5, + "learning_rate": 3.0227624008633795e-06, + "log_odds": 16.857685089111328, + "log_odds_ratio": -0.012893255800008774, + "loss": 0.2936, + "rejected_geometric_mean": -17.696269989013672, + "step": 3503 + }, + { + "chosen_geometric_mean": -1.254374384880066, + "epoch": 0.87, + "grad_norm": 3.875, + "learning_rate": 3.0218104084480526e-06, + "log_odds": 9.891115188598633, + "log_odds_ratio": -0.3080497980117798, + "loss": 0.3592, + "rejected_geometric_mean": -10.880887985229492, + "step": 3504 + }, + { + "chosen_geometric_mean": -1.0921084880828857, + "epoch": 0.87, + "grad_norm": 49.0, + "learning_rate": 3.020858336913742e-06, + "log_odds": 1.3996896743774414, + "log_odds_ratio": -0.2860381305217743, + "loss": 0.3519, + "rejected_geometric_mean": -2.2134828567504883, + "step": 3505 + }, + { + "chosen_geometric_mean": -1.111769437789917, + "epoch": 0.87, + "grad_norm": 25.25, + "learning_rate": 3.019906186404806e-06, + "log_odds": 9.909646034240723, + "log_odds_ratio": -0.08169160783290863, + "loss": 0.3038, + "rejected_geometric_mean": -10.674676895141602, + "step": 3506 + }, + { + "chosen_geometric_mean": -1.0663807392120361, + "epoch": 0.87, + "grad_norm": 9.4375, + "learning_rate": 3.0189539570656107e-06, + "log_odds": 2.620523691177368, + "log_odds_ratio": -0.26072442531585693, + "loss": 0.2766, + "rejected_geometric_mean": -3.4270851612091064, + "step": 3507 + }, + { + "chosen_geometric_mean": -0.7798989415168762, + "epoch": 0.87, + "grad_norm": 34.5, + "learning_rate": 3.01800164904054e-06, + "log_odds": 1.7017074823379517, + "log_odds_ratio": -0.3170478343963623, + "loss": 0.3128, + "rejected_geometric_mean": -2.1642282009124756, + "step": 3508 + }, + { + "chosen_geometric_mean": -1.068159580230713, + "epoch": 0.87, + "grad_norm": 2.296875, + "learning_rate": 3.017049262473985e-06, + "log_odds": 2.271925926208496, + "log_odds_ratio": -0.35544729232788086, + "loss": 0.273, + "rejected_geometric_mean": -3.111248731613159, + "step": 3509 + }, + { + "chosen_geometric_mean": -1.05316162109375, + "epoch": 0.87, + "grad_norm": 5.90625, + "learning_rate": 3.01609679751035e-06, + "log_odds": 5.160826683044434, + "log_odds_ratio": -0.1464826762676239, + "loss": 0.2544, + "rejected_geometric_mean": -5.858952522277832, + "step": 3510 + }, + { + "chosen_geometric_mean": -1.3217246532440186, + "epoch": 0.87, + "grad_norm": 23.25, + "learning_rate": 3.0151442542940525e-06, + "log_odds": 7.315487861633301, + "log_odds_ratio": -0.1787613332271576, + "loss": 0.3259, + "rejected_geometric_mean": -8.372721672058105, + "step": 3511 + }, + { + "chosen_geometric_mean": -1.1790584325790405, + "epoch": 0.87, + "grad_norm": 4.0625, + "learning_rate": 3.0141916329695207e-06, + "log_odds": 4.395198345184326, + "log_odds_ratio": -0.21594955027103424, + "loss": 0.3416, + "rejected_geometric_mean": -5.309193134307861, + "step": 3512 + }, + { + "chosen_geometric_mean": -1.3190172910690308, + "epoch": 0.87, + "grad_norm": 7.90625, + "learning_rate": 3.0132389336811944e-06, + "log_odds": 3.462172269821167, + "log_odds_ratio": -0.29587191343307495, + "loss": 0.3139, + "rejected_geometric_mean": -4.524017810821533, + "step": 3513 + }, + { + "chosen_geometric_mean": -1.0056252479553223, + "epoch": 0.87, + "grad_norm": 3.859375, + "learning_rate": 3.012286156573526e-06, + "log_odds": 7.848082065582275, + "log_odds_ratio": -0.033074185252189636, + "loss": 0.2895, + "rejected_geometric_mean": -8.408490180969238, + "step": 3514 + }, + { + "chosen_geometric_mean": -0.9652009010314941, + "epoch": 0.87, + "grad_norm": 3.015625, + "learning_rate": 3.011333301790981e-06, + "log_odds": 0.7377424836158752, + "log_odds_ratio": -0.41070812940597534, + "loss": 0.2765, + "rejected_geometric_mean": -1.5025371313095093, + "step": 3515 + }, + { + "chosen_geometric_mean": -1.1992868185043335, + "epoch": 0.87, + "grad_norm": 3.46875, + "learning_rate": 3.010380369478031e-06, + "log_odds": 1.2182326316833496, + "log_odds_ratio": -0.5243927836418152, + "loss": 0.3318, + "rejected_geometric_mean": -2.3223471641540527, + "step": 3516 + }, + { + "chosen_geometric_mean": -0.9641881585121155, + "epoch": 0.87, + "grad_norm": 9.25, + "learning_rate": 3.009427359779167e-06, + "log_odds": 5.338792324066162, + "log_odds_ratio": -0.20647531747817993, + "loss": 0.3098, + "rejected_geometric_mean": -5.924450397491455, + "step": 3517 + }, + { + "chosen_geometric_mean": -1.1142146587371826, + "epoch": 0.87, + "grad_norm": 3.0, + "learning_rate": 3.008474272838887e-06, + "log_odds": 9.064552307128906, + "log_odds_ratio": -0.01731160096824169, + "loss": 0.2834, + "rejected_geometric_mean": -9.783802032470703, + "step": 3518 + }, + { + "chosen_geometric_mean": -1.1696139574050903, + "epoch": 0.87, + "grad_norm": 6.75, + "learning_rate": 3.007521108801702e-06, + "log_odds": 5.193565368652344, + "log_odds_ratio": -0.15370455384254456, + "loss": 0.2587, + "rejected_geometric_mean": -5.947775363922119, + "step": 3519 + }, + { + "chosen_geometric_mean": -1.1767585277557373, + "epoch": 0.87, + "grad_norm": 3.59375, + "learning_rate": 3.006567867812134e-06, + "log_odds": 0.6479279398918152, + "log_odds_ratio": -0.48700112104415894, + "loss": 0.299, + "rejected_geometric_mean": -1.6952394247055054, + "step": 3520 + }, + { + "chosen_geometric_mean": -1.0814762115478516, + "epoch": 0.87, + "grad_norm": 5.125, + "learning_rate": 3.005614550014718e-06, + "log_odds": 2.6927297115325928, + "log_odds_ratio": -0.2190767377614975, + "loss": 0.2755, + "rejected_geometric_mean": -3.4699182510375977, + "step": 3521 + }, + { + "chosen_geometric_mean": -1.157799482345581, + "epoch": 0.87, + "grad_norm": 21.25, + "learning_rate": 3.0046611555539994e-06, + "log_odds": 3.163156747817993, + "log_odds_ratio": -0.24292096495628357, + "loss": 0.268, + "rejected_geometric_mean": -4.05027437210083, + "step": 3522 + }, + { + "chosen_geometric_mean": -1.0309394598007202, + "epoch": 0.87, + "grad_norm": 2.984375, + "learning_rate": 3.003707684574536e-06, + "log_odds": 2.477362871170044, + "log_odds_ratio": -0.505242109298706, + "loss": 0.262, + "rejected_geometric_mean": -3.360466957092285, + "step": 3523 + }, + { + "chosen_geometric_mean": -1.300872564315796, + "epoch": 0.87, + "grad_norm": 12.5, + "learning_rate": 3.0027541372208963e-06, + "log_odds": 7.891373634338379, + "log_odds_ratio": -0.04101564735174179, + "loss": 0.2866, + "rejected_geometric_mean": -8.843082427978516, + "step": 3524 + }, + { + "chosen_geometric_mean": -1.2516225576400757, + "epoch": 0.87, + "grad_norm": 19.5, + "learning_rate": 3.0018005136376615e-06, + "log_odds": 3.7252180576324463, + "log_odds_ratio": -0.1630311906337738, + "loss": 0.3641, + "rejected_geometric_mean": -4.704100131988525, + "step": 3525 + }, + { + "chosen_geometric_mean": -1.2884352207183838, + "epoch": 0.87, + "grad_norm": 20.0, + "learning_rate": 3.0008468139694237e-06, + "log_odds": 0.6619731187820435, + "log_odds_ratio": -0.46260613203048706, + "loss": 0.3277, + "rejected_geometric_mean": -1.8309568166732788, + "step": 3526 + }, + { + "chosen_geometric_mean": -1.3096829652786255, + "epoch": 0.87, + "grad_norm": 10.8125, + "learning_rate": 2.9998930383607854e-06, + "log_odds": 2.790477752685547, + "log_odds_ratio": -0.13234934210777283, + "loss": 0.2945, + "rejected_geometric_mean": -3.815791130065918, + "step": 3527 + }, + { + "chosen_geometric_mean": -1.0412578582763672, + "epoch": 0.87, + "grad_norm": 9.75, + "learning_rate": 2.998939186956365e-06, + "log_odds": 4.062838077545166, + "log_odds_ratio": -0.27547600865364075, + "loss": 0.2634, + "rejected_geometric_mean": -4.834794998168945, + "step": 3528 + }, + { + "chosen_geometric_mean": -1.1042112112045288, + "epoch": 0.87, + "grad_norm": 4.5, + "learning_rate": 2.997985259900786e-06, + "log_odds": 8.024715423583984, + "log_odds_ratio": -0.26094505190849304, + "loss": 0.3281, + "rejected_geometric_mean": -8.870408058166504, + "step": 3529 + }, + { + "chosen_geometric_mean": -1.1741160154342651, + "epoch": 0.87, + "grad_norm": 9.8125, + "learning_rate": 2.9970312573386883e-06, + "log_odds": 2.996428966522217, + "log_odds_ratio": -0.3025199770927429, + "loss": 0.366, + "rejected_geometric_mean": -3.9580116271972656, + "step": 3530 + }, + { + "chosen_geometric_mean": -0.9685252904891968, + "epoch": 0.87, + "grad_norm": 6.34375, + "learning_rate": 2.9960771794147225e-06, + "log_odds": 2.5123672485351562, + "log_odds_ratio": -0.3428640067577362, + "loss": 0.329, + "rejected_geometric_mean": -3.1311159133911133, + "step": 3531 + }, + { + "chosen_geometric_mean": -1.274114727973938, + "epoch": 0.87, + "grad_norm": 19.75, + "learning_rate": 2.995123026273547e-06, + "log_odds": 4.185031414031982, + "log_odds_ratio": -0.14484545588493347, + "loss": 0.3602, + "rejected_geometric_mean": -5.0935959815979, + "step": 3532 + }, + { + "chosen_geometric_mean": -1.1406004428863525, + "epoch": 0.87, + "grad_norm": 2.1875, + "learning_rate": 2.9941687980598363e-06, + "log_odds": 3.2998509407043457, + "log_odds_ratio": -0.1344156265258789, + "loss": 0.2152, + "rejected_geometric_mean": -4.129859447479248, + "step": 3533 + }, + { + "chosen_geometric_mean": -1.1966969966888428, + "epoch": 0.87, + "grad_norm": 2.78125, + "learning_rate": 2.993214494918274e-06, + "log_odds": 6.709155082702637, + "log_odds_ratio": -0.20723912119865417, + "loss": 0.2793, + "rejected_geometric_mean": -7.645683765411377, + "step": 3534 + }, + { + "chosen_geometric_mean": -0.961780846118927, + "epoch": 0.88, + "grad_norm": 3.734375, + "learning_rate": 2.992260116993555e-06, + "log_odds": 7.249660015106201, + "log_odds_ratio": -0.367235004901886, + "loss": 0.3097, + "rejected_geometric_mean": -7.961479187011719, + "step": 3535 + }, + { + "chosen_geometric_mean": -0.9676802158355713, + "epoch": 0.88, + "grad_norm": 7.125, + "learning_rate": 2.991305664430386e-06, + "log_odds": 7.845703601837158, + "log_odds_ratio": -0.17400683462619781, + "loss": 0.293, + "rejected_geometric_mean": -8.391441345214844, + "step": 3536 + }, + { + "chosen_geometric_mean": -1.2442543506622314, + "epoch": 0.88, + "grad_norm": 2.828125, + "learning_rate": 2.9903511373734855e-06, + "log_odds": 3.508375644683838, + "log_odds_ratio": -0.10047200322151184, + "loss": 0.3141, + "rejected_geometric_mean": -4.441690444946289, + "step": 3537 + }, + { + "chosen_geometric_mean": -1.115775227546692, + "epoch": 0.88, + "grad_norm": 35.0, + "learning_rate": 2.9893965359675818e-06, + "log_odds": 1.8591487407684326, + "log_odds_ratio": -0.37864482402801514, + "loss": 0.3545, + "rejected_geometric_mean": -2.722679853439331, + "step": 3538 + }, + { + "chosen_geometric_mean": -0.929928719997406, + "epoch": 0.88, + "grad_norm": 7.5625, + "learning_rate": 2.988441860357416e-06, + "log_odds": 6.25858211517334, + "log_odds_ratio": -0.1480870395898819, + "loss": 0.2975, + "rejected_geometric_mean": -6.798967361450195, + "step": 3539 + }, + { + "chosen_geometric_mean": -0.9205535650253296, + "epoch": 0.88, + "grad_norm": 6.5625, + "learning_rate": 2.987487110687739e-06, + "log_odds": 10.021377563476562, + "log_odds_ratio": -0.055001046508550644, + "loss": 0.274, + "rejected_geometric_mean": -10.399582862854004, + "step": 3540 + }, + { + "chosen_geometric_mean": -1.041390061378479, + "epoch": 0.88, + "grad_norm": 9.5625, + "learning_rate": 2.986532287103315e-06, + "log_odds": 7.3854475021362305, + "log_odds_ratio": -0.339447557926178, + "loss": 0.3058, + "rejected_geometric_mean": -8.190317153930664, + "step": 3541 + }, + { + "chosen_geometric_mean": -0.9617218971252441, + "epoch": 0.88, + "grad_norm": 8.5, + "learning_rate": 2.985577389748918e-06, + "log_odds": 6.223937511444092, + "log_odds_ratio": -0.2601105868816376, + "loss": 0.2789, + "rejected_geometric_mean": -6.912830352783203, + "step": 3542 + }, + { + "chosen_geometric_mean": -1.1066429615020752, + "epoch": 0.88, + "grad_norm": 3.796875, + "learning_rate": 2.9846224187693325e-06, + "log_odds": 3.846735954284668, + "log_odds_ratio": -0.3081589341163635, + "loss": 0.2664, + "rejected_geometric_mean": -4.725232124328613, + "step": 3543 + }, + { + "chosen_geometric_mean": -1.1315220594406128, + "epoch": 0.88, + "grad_norm": 102.0, + "learning_rate": 2.983667374309356e-06, + "log_odds": 7.303510665893555, + "log_odds_ratio": -0.018354447558522224, + "loss": 0.3181, + "rejected_geometric_mean": -8.048135757446289, + "step": 3544 + }, + { + "chosen_geometric_mean": -1.0698916912078857, + "epoch": 0.88, + "grad_norm": 24.125, + "learning_rate": 2.982712256513795e-06, + "log_odds": 3.0182418823242188, + "log_odds_ratio": -0.2217256724834442, + "loss": 0.3359, + "rejected_geometric_mean": -3.7779481410980225, + "step": 3545 + }, + { + "chosen_geometric_mean": -1.1795963048934937, + "epoch": 0.88, + "grad_norm": 8.8125, + "learning_rate": 2.9817570655274686e-06, + "log_odds": 1.4217641353607178, + "log_odds_ratio": -0.6069038510322571, + "loss": 0.3241, + "rejected_geometric_mean": -2.561408281326294, + "step": 3546 + }, + { + "chosen_geometric_mean": -1.194956660270691, + "epoch": 0.88, + "grad_norm": 4.59375, + "learning_rate": 2.980801801495207e-06, + "log_odds": 4.011427879333496, + "log_odds_ratio": -0.05574173852801323, + "loss": 0.3009, + "rejected_geometric_mean": -4.825978755950928, + "step": 3547 + }, + { + "chosen_geometric_mean": -1.5071207284927368, + "epoch": 0.88, + "grad_norm": 48.75, + "learning_rate": 2.979846464561852e-06, + "log_odds": 8.513116836547852, + "log_odds_ratio": -0.11556264013051987, + "loss": 0.372, + "rejected_geometric_mean": -9.778919219970703, + "step": 3548 + }, + { + "chosen_geometric_mean": -1.1747750043869019, + "epoch": 0.88, + "grad_norm": 19.125, + "learning_rate": 2.9788910548722545e-06, + "log_odds": 2.1781535148620605, + "log_odds_ratio": -0.18894772231578827, + "loss": 0.3579, + "rejected_geometric_mean": -3.0825061798095703, + "step": 3549 + }, + { + "chosen_geometric_mean": -1.108156681060791, + "epoch": 0.88, + "grad_norm": 25.375, + "learning_rate": 2.9779355725712777e-06, + "log_odds": 5.660840034484863, + "log_odds_ratio": -0.20803463459014893, + "loss": 0.3983, + "rejected_geometric_mean": -6.461073398590088, + "step": 3550 + }, + { + "chosen_geometric_mean": -0.9029474258422852, + "epoch": 0.88, + "grad_norm": 11.125, + "learning_rate": 2.976980017803796e-06, + "log_odds": 6.127514362335205, + "log_odds_ratio": -0.41826117038726807, + "loss": 0.2748, + "rejected_geometric_mean": -6.853959083557129, + "step": 3551 + }, + { + "chosen_geometric_mean": -1.165932059288025, + "epoch": 0.88, + "grad_norm": 8.75, + "learning_rate": 2.976024390714694e-06, + "log_odds": 6.337955474853516, + "log_odds_ratio": -0.1680646538734436, + "loss": 0.3252, + "rejected_geometric_mean": -7.2017364501953125, + "step": 3552 + }, + { + "chosen_geometric_mean": -1.0406643152236938, + "epoch": 0.88, + "grad_norm": 9.1875, + "learning_rate": 2.975068691448868e-06, + "log_odds": 8.112735748291016, + "log_odds_ratio": -0.00399437453597784, + "loss": 0.3286, + "rejected_geometric_mean": -8.705604553222656, + "step": 3553 + }, + { + "chosen_geometric_mean": -2.076359748840332, + "epoch": 0.88, + "grad_norm": 34.0, + "learning_rate": 2.974112920151225e-06, + "log_odds": 1.1197593212127686, + "log_odds_ratio": -0.50128173828125, + "loss": 0.3739, + "rejected_geometric_mean": -3.061439275741577, + "step": 3554 + }, + { + "chosen_geometric_mean": -1.2033761739730835, + "epoch": 0.88, + "grad_norm": 3.15625, + "learning_rate": 2.973157076966682e-06, + "log_odds": 1.4128223657608032, + "log_odds_ratio": -0.343918114900589, + "loss": 0.299, + "rejected_geometric_mean": -2.4170470237731934, + "step": 3555 + }, + { + "chosen_geometric_mean": -1.0390156507492065, + "epoch": 0.88, + "grad_norm": 2.421875, + "learning_rate": 2.972201162040169e-06, + "log_odds": 3.0793604850769043, + "log_odds_ratio": -0.24783216416835785, + "loss": 0.2891, + "rejected_geometric_mean": -3.8284177780151367, + "step": 3556 + }, + { + "chosen_geometric_mean": -1.0615390539169312, + "epoch": 0.88, + "grad_norm": 35.25, + "learning_rate": 2.9712451755166254e-06, + "log_odds": 1.4776676893234253, + "log_odds_ratio": -0.36334142088890076, + "loss": 0.2969, + "rejected_geometric_mean": -2.324796676635742, + "step": 3557 + }, + { + "chosen_geometric_mean": -1.3303430080413818, + "epoch": 0.88, + "grad_norm": 10.75, + "learning_rate": 2.9702891175410016e-06, + "log_odds": 2.3236746788024902, + "log_odds_ratio": -0.24016426503658295, + "loss": 0.3679, + "rejected_geometric_mean": -3.4517455101013184, + "step": 3558 + }, + { + "chosen_geometric_mean": -1.0792313814163208, + "epoch": 0.88, + "grad_norm": 4.34375, + "learning_rate": 2.9693329882582577e-06, + "log_odds": 9.061164855957031, + "log_odds_ratio": -0.27627962827682495, + "loss": 0.3075, + "rejected_geometric_mean": -9.909624099731445, + "step": 3559 + }, + { + "chosen_geometric_mean": -1.1135598421096802, + "epoch": 0.88, + "grad_norm": 4.5, + "learning_rate": 2.968376787813368e-06, + "log_odds": 2.7621817588806152, + "log_odds_ratio": -0.18818500638008118, + "loss": 0.2811, + "rejected_geometric_mean": -3.563178062438965, + "step": 3560 + }, + { + "chosen_geometric_mean": -1.1011557579040527, + "epoch": 0.88, + "grad_norm": 4.375, + "learning_rate": 2.9674205163513146e-06, + "log_odds": 2.850813627243042, + "log_odds_ratio": -0.5197030305862427, + "loss": 0.289, + "rejected_geometric_mean": -3.8603339195251465, + "step": 3561 + }, + { + "chosen_geometric_mean": -1.200387716293335, + "epoch": 0.88, + "grad_norm": 9.8125, + "learning_rate": 2.9664641740170903e-06, + "log_odds": 3.4910635948181152, + "log_odds_ratio": -0.2886965572834015, + "loss": 0.2836, + "rejected_geometric_mean": -4.416977882385254, + "step": 3562 + }, + { + "chosen_geometric_mean": -1.2845721244812012, + "epoch": 0.88, + "grad_norm": 10.75, + "learning_rate": 2.9655077609557005e-06, + "log_odds": 6.267590045928955, + "log_odds_ratio": -0.13790303468704224, + "loss": 0.3546, + "rejected_geometric_mean": -7.274231910705566, + "step": 3563 + }, + { + "chosen_geometric_mean": -1.5781084299087524, + "epoch": 0.88, + "grad_norm": 11.375, + "learning_rate": 2.9645512773121606e-06, + "log_odds": 8.167109489440918, + "log_odds_ratio": -0.046857044100761414, + "loss": 0.3446, + "rejected_geometric_mean": -9.418007850646973, + "step": 3564 + }, + { + "chosen_geometric_mean": -0.8851096630096436, + "epoch": 0.88, + "grad_norm": 2.078125, + "learning_rate": 2.963594723231495e-06, + "log_odds": 4.156970977783203, + "log_odds_ratio": -0.3617929518222809, + "loss": 0.259, + "rejected_geometric_mean": -4.737861156463623, + "step": 3565 + }, + { + "chosen_geometric_mean": -1.236466646194458, + "epoch": 0.88, + "grad_norm": 3.6875, + "learning_rate": 2.9626380988587416e-06, + "log_odds": 6.2373785972595215, + "log_odds_ratio": -0.23987656831741333, + "loss": 0.2522, + "rejected_geometric_mean": -7.282506465911865, + "step": 3566 + }, + { + "chosen_geometric_mean": -0.9928362369537354, + "epoch": 0.88, + "grad_norm": 1.921875, + "learning_rate": 2.9616814043389473e-06, + "log_odds": 6.0334601402282715, + "log_odds_ratio": -0.043212078511714935, + "loss": 0.2566, + "rejected_geometric_mean": -6.582902908325195, + "step": 3567 + }, + { + "chosen_geometric_mean": -1.2414109706878662, + "epoch": 0.88, + "grad_norm": 11.9375, + "learning_rate": 2.9607246398171704e-06, + "log_odds": 6.503857135772705, + "log_odds_ratio": -0.06696895509958267, + "loss": 0.2838, + "rejected_geometric_mean": -7.398567199707031, + "step": 3568 + }, + { + "chosen_geometric_mean": -1.0819079875946045, + "epoch": 0.88, + "grad_norm": 4.65625, + "learning_rate": 2.959767805438479e-06, + "log_odds": 4.999995231628418, + "log_odds_ratio": -0.2599535584449768, + "loss": 0.2854, + "rejected_geometric_mean": -5.824865818023682, + "step": 3569 + }, + { + "chosen_geometric_mean": -1.0056782960891724, + "epoch": 0.88, + "grad_norm": 4.46875, + "learning_rate": 2.9588109013479514e-06, + "log_odds": 6.782943248748779, + "log_odds_ratio": -0.1519833505153656, + "loss": 0.277, + "rejected_geometric_mean": -7.454791069030762, + "step": 3570 + }, + { + "chosen_geometric_mean": -1.076033592224121, + "epoch": 0.88, + "grad_norm": 6.5, + "learning_rate": 2.9578539276906782e-06, + "log_odds": 2.817915439605713, + "log_odds_ratio": -0.2404189109802246, + "loss": 0.3715, + "rejected_geometric_mean": -3.616957187652588, + "step": 3571 + }, + { + "chosen_geometric_mean": -0.9679189920425415, + "epoch": 0.88, + "grad_norm": 10.1875, + "learning_rate": 2.956896884611759e-06, + "log_odds": 9.29409408569336, + "log_odds_ratio": -0.027959579601883888, + "loss": 0.2671, + "rejected_geometric_mean": -9.785608291625977, + "step": 3572 + }, + { + "chosen_geometric_mean": -1.3801480531692505, + "epoch": 0.88, + "grad_norm": 31.75, + "learning_rate": 2.9559397722563048e-06, + "log_odds": 4.224283695220947, + "log_odds_ratio": -0.20739170908927917, + "loss": 0.3265, + "rejected_geometric_mean": -5.350362777709961, + "step": 3573 + }, + { + "chosen_geometric_mean": -1.2906008958816528, + "epoch": 0.88, + "grad_norm": 15.625, + "learning_rate": 2.9549825907694374e-06, + "log_odds": 3.4182467460632324, + "log_odds_ratio": -0.052616119384765625, + "loss": 0.3785, + "rejected_geometric_mean": -4.403909683227539, + "step": 3574 + }, + { + "chosen_geometric_mean": -1.058668851852417, + "epoch": 0.89, + "grad_norm": 12.6875, + "learning_rate": 2.9540253402962875e-06, + "log_odds": 5.965364456176758, + "log_odds_ratio": -0.047014426440000534, + "loss": 0.2942, + "rejected_geometric_mean": -6.609034061431885, + "step": 3575 + }, + { + "chosen_geometric_mean": -0.954745888710022, + "epoch": 0.89, + "grad_norm": 12.625, + "learning_rate": 2.9530680209819972e-06, + "log_odds": 6.678484916687012, + "log_odds_ratio": -0.0032183146104216576, + "loss": 0.3009, + "rejected_geometric_mean": -7.1433820724487305, + "step": 3576 + }, + { + "chosen_geometric_mean": -1.1078815460205078, + "epoch": 0.89, + "grad_norm": 10.875, + "learning_rate": 2.95211063297172e-06, + "log_odds": 9.280951499938965, + "log_odds_ratio": -0.11749491840600967, + "loss": 0.346, + "rejected_geometric_mean": -10.021363258361816, + "step": 3577 + }, + { + "chosen_geometric_mean": -1.3297138214111328, + "epoch": 0.89, + "grad_norm": 2.34375, + "learning_rate": 2.951153176410619e-06, + "log_odds": 4.869410514831543, + "log_odds_ratio": -0.2767234444618225, + "loss": 0.2682, + "rejected_geometric_mean": -5.986400127410889, + "step": 3578 + }, + { + "chosen_geometric_mean": -0.8325453400611877, + "epoch": 0.89, + "grad_norm": 5.125, + "learning_rate": 2.9501956514438657e-06, + "log_odds": 5.92609167098999, + "log_odds_ratio": -0.024415215477347374, + "loss": 0.2991, + "rejected_geometric_mean": -6.203536033630371, + "step": 3579 + }, + { + "chosen_geometric_mean": -1.156097650527954, + "epoch": 0.89, + "grad_norm": 12.625, + "learning_rate": 2.949238058216646e-06, + "log_odds": 1.1820311546325684, + "log_odds_ratio": -0.27605298161506653, + "loss": 0.259, + "rejected_geometric_mean": -2.081146240234375, + "step": 3580 + }, + { + "chosen_geometric_mean": -0.86589115858078, + "epoch": 0.89, + "grad_norm": 4.875, + "learning_rate": 2.948280396874153e-06, + "log_odds": 7.205129623413086, + "log_odds_ratio": -0.28908681869506836, + "loss": 0.322, + "rejected_geometric_mean": -7.708989143371582, + "step": 3581 + }, + { + "chosen_geometric_mean": -1.1709768772125244, + "epoch": 0.89, + "grad_norm": 4.1875, + "learning_rate": 2.947322667561591e-06, + "log_odds": 9.813261032104492, + "log_odds_ratio": -0.0002489305043127388, + "loss": 0.2798, + "rejected_geometric_mean": -10.589245796203613, + "step": 3582 + }, + { + "chosen_geometric_mean": -0.8509131073951721, + "epoch": 0.89, + "grad_norm": 11.75, + "learning_rate": 2.946364870424176e-06, + "log_odds": 0.36725783348083496, + "log_odds_ratio": -0.53853440284729, + "loss": 0.2787, + "rejected_geometric_mean": -1.0768697261810303, + "step": 3583 + }, + { + "chosen_geometric_mean": -1.148200273513794, + "epoch": 0.89, + "grad_norm": 14.0, + "learning_rate": 2.9454070056071317e-06, + "log_odds": 8.910100936889648, + "log_odds_ratio": -0.14767271280288696, + "loss": 0.2986, + "rejected_geometric_mean": -9.650135040283203, + "step": 3584 + }, + { + "chosen_geometric_mean": -1.1258783340454102, + "epoch": 0.89, + "grad_norm": 12.6875, + "learning_rate": 2.9444490732556936e-06, + "log_odds": 8.102551460266113, + "log_odds_ratio": -0.11354184150695801, + "loss": 0.3221, + "rejected_geometric_mean": -8.880457878112793, + "step": 3585 + }, + { + "chosen_geometric_mean": -1.130178689956665, + "epoch": 0.89, + "grad_norm": 3.484375, + "learning_rate": 2.943491073515108e-06, + "log_odds": 7.414538383483887, + "log_odds_ratio": -0.10800974071025848, + "loss": 0.2973, + "rejected_geometric_mean": -8.198307991027832, + "step": 3586 + }, + { + "chosen_geometric_mean": -1.1202893257141113, + "epoch": 0.89, + "grad_norm": 6.9375, + "learning_rate": 2.9425330065306296e-06, + "log_odds": 0.7688801288604736, + "log_odds_ratio": -0.4249640107154846, + "loss": 0.3251, + "rejected_geometric_mean": -1.7343766689300537, + "step": 3587 + }, + { + "chosen_geometric_mean": -1.0188584327697754, + "epoch": 0.89, + "grad_norm": 7.90625, + "learning_rate": 2.9415748724475246e-06, + "log_odds": 5.787130355834961, + "log_odds_ratio": -0.00980491004884243, + "loss": 0.2849, + "rejected_geometric_mean": -6.340664863586426, + "step": 3588 + }, + { + "chosen_geometric_mean": -1.007287621498108, + "epoch": 0.89, + "grad_norm": 9.5625, + "learning_rate": 2.9406166714110696e-06, + "log_odds": 2.651747226715088, + "log_odds_ratio": -0.20481672883033752, + "loss": 0.2874, + "rejected_geometric_mean": -3.3405091762542725, + "step": 3589 + }, + { + "chosen_geometric_mean": -1.082128882408142, + "epoch": 0.89, + "grad_norm": 15.75, + "learning_rate": 2.9396584035665515e-06, + "log_odds": 4.606463432312012, + "log_odds_ratio": -0.25160518288612366, + "loss": 0.279, + "rejected_geometric_mean": -5.364573955535889, + "step": 3590 + }, + { + "chosen_geometric_mean": -1.3472251892089844, + "epoch": 0.89, + "grad_norm": 30.0, + "learning_rate": 2.9387000690592653e-06, + "log_odds": 3.7020320892333984, + "log_odds_ratio": -0.16554348170757294, + "loss": 0.3197, + "rejected_geometric_mean": -4.805130958557129, + "step": 3591 + }, + { + "chosen_geometric_mean": -1.1710309982299805, + "epoch": 0.89, + "grad_norm": 6.875, + "learning_rate": 2.9377416680345177e-06, + "log_odds": 4.802628517150879, + "log_odds_ratio": -0.3374514877796173, + "loss": 0.3123, + "rejected_geometric_mean": -5.779487609863281, + "step": 3592 + }, + { + "chosen_geometric_mean": -1.229285478591919, + "epoch": 0.89, + "grad_norm": 3.0625, + "learning_rate": 2.936783200637626e-06, + "log_odds": 2.629892349243164, + "log_odds_ratio": -0.30110669136047363, + "loss": 0.289, + "rejected_geometric_mean": -3.6015748977661133, + "step": 3593 + }, + { + "chosen_geometric_mean": -1.0969839096069336, + "epoch": 0.89, + "grad_norm": 48.0, + "learning_rate": 2.9358246670139173e-06, + "log_odds": 4.323049545288086, + "log_odds_ratio": -0.13123419880867004, + "loss": 0.3279, + "rejected_geometric_mean": -5.047811031341553, + "step": 3594 + }, + { + "chosen_geometric_mean": -1.1862516403198242, + "epoch": 0.89, + "grad_norm": 5.6875, + "learning_rate": 2.934866067308727e-06, + "log_odds": 3.3414664268493652, + "log_odds_ratio": -0.24355055391788483, + "loss": 0.2772, + "rejected_geometric_mean": -4.294249534606934, + "step": 3595 + }, + { + "chosen_geometric_mean": -1.3126685619354248, + "epoch": 0.89, + "grad_norm": 7.28125, + "learning_rate": 2.9339074016674023e-06, + "log_odds": 3.3372273445129395, + "log_odds_ratio": -0.2706201374530792, + "loss": 0.2847, + "rejected_geometric_mean": -4.448864459991455, + "step": 3596 + }, + { + "chosen_geometric_mean": -1.0507885217666626, + "epoch": 0.89, + "grad_norm": 3.875, + "learning_rate": 2.9329486702353003e-06, + "log_odds": 2.5527729988098145, + "log_odds_ratio": -0.23916707932949066, + "loss": 0.289, + "rejected_geometric_mean": -3.2947497367858887, + "step": 3597 + }, + { + "chosen_geometric_mean": -1.229587197303772, + "epoch": 0.89, + "grad_norm": 5.15625, + "learning_rate": 2.9319898731577875e-06, + "log_odds": 4.889179706573486, + "log_odds_ratio": -0.061973124742507935, + "loss": 0.3008, + "rejected_geometric_mean": -5.754184722900391, + "step": 3598 + }, + { + "chosen_geometric_mean": -1.0442382097244263, + "epoch": 0.89, + "grad_norm": 13.9375, + "learning_rate": 2.9310310105802396e-06, + "log_odds": 1.7402561902999878, + "log_odds_ratio": -0.26874932646751404, + "loss": 0.3302, + "rejected_geometric_mean": -2.5192859172821045, + "step": 3599 + }, + { + "chosen_geometric_mean": -0.9243084788322449, + "epoch": 0.89, + "grad_norm": 15.375, + "learning_rate": 2.930072082648045e-06, + "log_odds": 7.090768814086914, + "log_odds_ratio": -0.18284861743450165, + "loss": 0.3074, + "rejected_geometric_mean": -7.66801643371582, + "step": 3600 + }, + { + "chosen_geometric_mean": -1.2366752624511719, + "epoch": 0.89, + "grad_norm": 3.734375, + "learning_rate": 2.929113089506599e-06, + "log_odds": 4.366105079650879, + "log_odds_ratio": -0.11934462934732437, + "loss": 0.2899, + "rejected_geometric_mean": -5.258669853210449, + "step": 3601 + }, + { + "chosen_geometric_mean": -1.1955844163894653, + "epoch": 0.89, + "grad_norm": 5.25, + "learning_rate": 2.9281540313013087e-06, + "log_odds": 1.5967814922332764, + "log_odds_ratio": -0.19992925226688385, + "loss": 0.2908, + "rejected_geometric_mean": -2.5130631923675537, + "step": 3602 + }, + { + "chosen_geometric_mean": -1.0497599840164185, + "epoch": 0.89, + "grad_norm": 23.375, + "learning_rate": 2.927194908177589e-06, + "log_odds": 3.2228100299835205, + "log_odds_ratio": -0.1431180238723755, + "loss": 0.2945, + "rejected_geometric_mean": -3.903980016708374, + "step": 3603 + }, + { + "chosen_geometric_mean": -1.092527985572815, + "epoch": 0.89, + "grad_norm": 6.625, + "learning_rate": 2.9262357202808672e-06, + "log_odds": 9.984138488769531, + "log_odds_ratio": -0.07689765095710754, + "loss": 0.3002, + "rejected_geometric_mean": -10.672768592834473, + "step": 3604 + }, + { + "chosen_geometric_mean": -1.3792020082473755, + "epoch": 0.89, + "grad_norm": 3.765625, + "learning_rate": 2.925276467756579e-06, + "log_odds": 4.348380088806152, + "log_odds_ratio": -0.13871639966964722, + "loss": 0.2854, + "rejected_geometric_mean": -5.476783752441406, + "step": 3605 + }, + { + "chosen_geometric_mean": -1.1025433540344238, + "epoch": 0.89, + "grad_norm": 5.59375, + "learning_rate": 2.92431715075017e-06, + "log_odds": 4.319823741912842, + "log_odds_ratio": -0.14675013720989227, + "loss": 0.3004, + "rejected_geometric_mean": -5.0961384773254395, + "step": 3606 + }, + { + "chosen_geometric_mean": -1.1094167232513428, + "epoch": 0.89, + "grad_norm": 10.5625, + "learning_rate": 2.9233577694070957e-06, + "log_odds": 3.6117467880249023, + "log_odds_ratio": -0.08071821182966232, + "loss": 0.3018, + "rejected_geometric_mean": -4.348361492156982, + "step": 3607 + }, + { + "chosen_geometric_mean": -1.1006224155426025, + "epoch": 0.89, + "grad_norm": 16.0, + "learning_rate": 2.9223983238728214e-06, + "log_odds": 0.1644531786441803, + "log_odds_ratio": -0.6167615056037903, + "loss": 0.294, + "rejected_geometric_mean": -1.2137924432754517, + "step": 3608 + }, + { + "chosen_geometric_mean": -1.2559983730316162, + "epoch": 0.89, + "grad_norm": 6.40625, + "learning_rate": 2.9214388142928215e-06, + "log_odds": 4.060374736785889, + "log_odds_ratio": -0.2133609503507614, + "loss": 0.3022, + "rejected_geometric_mean": -5.031286716461182, + "step": 3609 + }, + { + "chosen_geometric_mean": -1.0477919578552246, + "epoch": 0.89, + "grad_norm": 9.25, + "learning_rate": 2.9204792408125815e-06, + "log_odds": 4.1277546882629395, + "log_odds_ratio": -0.3041861653327942, + "loss": 0.3097, + "rejected_geometric_mean": -4.9117512702941895, + "step": 3610 + }, + { + "chosen_geometric_mean": -1.1389316320419312, + "epoch": 0.89, + "grad_norm": 2.140625, + "learning_rate": 2.9195196035775954e-06, + "log_odds": 3.0711522102355957, + "log_odds_ratio": -0.17411969602108002, + "loss": 0.2881, + "rejected_geometric_mean": -3.8904287815093994, + "step": 3611 + }, + { + "chosen_geometric_mean": -1.0451686382293701, + "epoch": 0.89, + "grad_norm": 27.375, + "learning_rate": 2.9185599027333674e-06, + "log_odds": 8.04610538482666, + "log_odds_ratio": -0.17086809873580933, + "loss": 0.3397, + "rejected_geometric_mean": -8.72302532196045, + "step": 3612 + }, + { + "chosen_geometric_mean": -1.1226876974105835, + "epoch": 0.89, + "grad_norm": 18.875, + "learning_rate": 2.917600138425411e-06, + "log_odds": 9.474397659301758, + "log_odds_ratio": -0.11484155058860779, + "loss": 0.3147, + "rejected_geometric_mean": -10.221739768981934, + "step": 3613 + }, + { + "chosen_geometric_mean": -1.1894303560256958, + "epoch": 0.89, + "grad_norm": 26.125, + "learning_rate": 2.9166403107992493e-06, + "log_odds": 2.907139778137207, + "log_odds_ratio": -0.2830268144607544, + "loss": 0.2942, + "rejected_geometric_mean": -3.849675416946411, + "step": 3614 + }, + { + "chosen_geometric_mean": -1.0472511053085327, + "epoch": 0.9, + "grad_norm": 3.25, + "learning_rate": 2.915680420000416e-06, + "log_odds": 3.3721847534179688, + "log_odds_ratio": -0.21927863359451294, + "loss": 0.2496, + "rejected_geometric_mean": -4.032891273498535, + "step": 3615 + }, + { + "chosen_geometric_mean": -1.2341365814208984, + "epoch": 0.9, + "grad_norm": 6.46875, + "learning_rate": 2.914720466174452e-06, + "log_odds": 10.25130558013916, + "log_odds_ratio": -0.1860431283712387, + "loss": 0.3251, + "rejected_geometric_mean": -11.204023361206055, + "step": 3616 + }, + { + "chosen_geometric_mean": -1.0465115308761597, + "epoch": 0.9, + "grad_norm": 4.4375, + "learning_rate": 2.913760449466911e-06, + "log_odds": 1.6600042581558228, + "log_odds_ratio": -0.3450143337249756, + "loss": 0.2276, + "rejected_geometric_mean": -2.4939072132110596, + "step": 3617 + }, + { + "chosen_geometric_mean": -1.0059202909469604, + "epoch": 0.9, + "grad_norm": 16.5, + "learning_rate": 2.912800370023355e-06, + "log_odds": 8.85812759399414, + "log_odds_ratio": -0.14463196694850922, + "loss": 0.2829, + "rejected_geometric_mean": -9.48863410949707, + "step": 3618 + }, + { + "chosen_geometric_mean": -1.1603991985321045, + "epoch": 0.9, + "grad_norm": 2.3125, + "learning_rate": 2.9118402279893533e-06, + "log_odds": 2.285865306854248, + "log_odds_ratio": -0.2862663269042969, + "loss": 0.3082, + "rejected_geometric_mean": -3.1837353706359863, + "step": 3619 + }, + { + "chosen_geometric_mean": -1.0148530006408691, + "epoch": 0.9, + "grad_norm": 15.625, + "learning_rate": 2.9108800235104873e-06, + "log_odds": 8.254911422729492, + "log_odds_ratio": -0.13318787515163422, + "loss": 0.3273, + "rejected_geometric_mean": -8.87806224822998, + "step": 3620 + }, + { + "chosen_geometric_mean": -1.2583673000335693, + "epoch": 0.9, + "grad_norm": 4.5, + "learning_rate": 2.9099197567323468e-06, + "log_odds": 6.2848992347717285, + "log_odds_ratio": -0.16425438225269318, + "loss": 0.2889, + "rejected_geometric_mean": -7.299540042877197, + "step": 3621 + }, + { + "chosen_geometric_mean": -1.3530547618865967, + "epoch": 0.9, + "grad_norm": 18.375, + "learning_rate": 2.908959427800531e-06, + "log_odds": 5.653528690338135, + "log_odds_ratio": -0.08449968695640564, + "loss": 0.3224, + "rejected_geometric_mean": -6.726142883300781, + "step": 3622 + }, + { + "chosen_geometric_mean": -1.725106954574585, + "epoch": 0.9, + "grad_norm": 7.625, + "learning_rate": 2.907999036860651e-06, + "log_odds": 3.1232855319976807, + "log_odds_ratio": -0.17210689187049866, + "loss": 0.3113, + "rejected_geometric_mean": -4.696187973022461, + "step": 3623 + }, + { + "chosen_geometric_mean": -1.0379674434661865, + "epoch": 0.9, + "grad_norm": 4.1875, + "learning_rate": 2.907038584058322e-06, + "log_odds": 3.2043821811676025, + "log_odds_ratio": -0.042475245893001556, + "loss": 0.2504, + "rejected_geometric_mean": -3.805210828781128, + "step": 3624 + }, + { + "chosen_geometric_mean": -1.103610873222351, + "epoch": 0.9, + "grad_norm": 3.9375, + "learning_rate": 2.9060780695391733e-06, + "log_odds": 9.797150611877441, + "log_odds_ratio": -0.0026377118192613125, + "loss": 0.2812, + "rejected_geometric_mean": -10.49154281616211, + "step": 3625 + }, + { + "chosen_geometric_mean": -1.0914556980133057, + "epoch": 0.9, + "grad_norm": 24.375, + "learning_rate": 2.9051174934488425e-06, + "log_odds": 2.4487767219543457, + "log_odds_ratio": -0.4029727876186371, + "loss": 0.3097, + "rejected_geometric_mean": -3.3999195098876953, + "step": 3626 + }, + { + "chosen_geometric_mean": -0.9523980617523193, + "epoch": 0.9, + "grad_norm": 2.328125, + "learning_rate": 2.9041568559329746e-06, + "log_odds": 1.7007241249084473, + "log_odds_ratio": -0.3247542679309845, + "loss": 0.2924, + "rejected_geometric_mean": -2.364027500152588, + "step": 3627 + }, + { + "chosen_geometric_mean": -1.1019062995910645, + "epoch": 0.9, + "grad_norm": 32.0, + "learning_rate": 2.9031961571372256e-06, + "log_odds": 3.5938124656677246, + "log_odds_ratio": -0.16697368025779724, + "loss": 0.2616, + "rejected_geometric_mean": -4.38015079498291, + "step": 3628 + }, + { + "chosen_geometric_mean": -1.120223045349121, + "epoch": 0.9, + "grad_norm": 2.5, + "learning_rate": 2.9022353972072612e-06, + "log_odds": 8.97643756866455, + "log_odds_ratio": -0.011053279042243958, + "loss": 0.2968, + "rejected_geometric_mean": -9.701994895935059, + "step": 3629 + }, + { + "chosen_geometric_mean": -1.576138973236084, + "epoch": 0.9, + "grad_norm": 16.75, + "learning_rate": 2.9012745762887562e-06, + "log_odds": 4.636913299560547, + "log_odds_ratio": -0.22597551345825195, + "loss": 0.309, + "rejected_geometric_mean": -6.022481918334961, + "step": 3630 + }, + { + "chosen_geometric_mean": -1.116706132888794, + "epoch": 0.9, + "grad_norm": 6.5625, + "learning_rate": 2.900313694527393e-06, + "log_odds": 5.0874223709106445, + "log_odds_ratio": -0.19054055213928223, + "loss": 0.303, + "rejected_geometric_mean": -5.8879241943359375, + "step": 3631 + }, + { + "chosen_geometric_mean": -1.0190621614456177, + "epoch": 0.9, + "grad_norm": 3.59375, + "learning_rate": 2.899352752068864e-06, + "log_odds": 3.3524417877197266, + "log_odds_ratio": -0.13786499202251434, + "loss": 0.3271, + "rejected_geometric_mean": -4.003912448883057, + "step": 3632 + }, + { + "chosen_geometric_mean": -1.0258588790893555, + "epoch": 0.9, + "grad_norm": 6.15625, + "learning_rate": 2.898391749058872e-06, + "log_odds": 2.0829923152923584, + "log_odds_ratio": -0.259285569190979, + "loss": 0.3163, + "rejected_geometric_mean": -2.799955129623413, + "step": 3633 + }, + { + "chosen_geometric_mean": -1.2012529373168945, + "epoch": 0.9, + "grad_norm": 6.3125, + "learning_rate": 2.897430685643128e-06, + "log_odds": 2.9743895530700684, + "log_odds_ratio": -0.183159738779068, + "loss": 0.2785, + "rejected_geometric_mean": -3.885610818862915, + "step": 3634 + }, + { + "chosen_geometric_mean": -1.0875834226608276, + "epoch": 0.9, + "grad_norm": 31.25, + "learning_rate": 2.896469561967352e-06, + "log_odds": 4.934579849243164, + "log_odds_ratio": -0.1585477739572525, + "loss": 0.2897, + "rejected_geometric_mean": -5.667276382446289, + "step": 3635 + }, + { + "chosen_geometric_mean": -1.2757182121276855, + "epoch": 0.9, + "grad_norm": 2.234375, + "learning_rate": 2.895508378177274e-06, + "log_odds": 3.3923192024230957, + "log_odds_ratio": -0.09336790442466736, + "loss": 0.2708, + "rejected_geometric_mean": -4.370585918426514, + "step": 3636 + }, + { + "chosen_geometric_mean": -1.0808286666870117, + "epoch": 0.9, + "grad_norm": 3.03125, + "learning_rate": 2.894547134418632e-06, + "log_odds": 2.150116443634033, + "log_odds_ratio": -0.3381175696849823, + "loss": 0.358, + "rejected_geometric_mean": -2.9459104537963867, + "step": 3637 + }, + { + "chosen_geometric_mean": -0.997897744178772, + "epoch": 0.9, + "grad_norm": 11.5, + "learning_rate": 2.8935858308371732e-06, + "log_odds": 8.41503620147705, + "log_odds_ratio": -0.09734462201595306, + "loss": 0.2676, + "rejected_geometric_mean": -8.946331024169922, + "step": 3638 + }, + { + "chosen_geometric_mean": -1.1629581451416016, + "epoch": 0.9, + "grad_norm": 7.28125, + "learning_rate": 2.8926244675786556e-06, + "log_odds": 0.6556364297866821, + "log_odds_ratio": -0.5034608840942383, + "loss": 0.3307, + "rejected_geometric_mean": -1.6783158779144287, + "step": 3639 + }, + { + "chosen_geometric_mean": -1.303264856338501, + "epoch": 0.9, + "grad_norm": 3.203125, + "learning_rate": 2.891663044788844e-06, + "log_odds": 3.7451529502868652, + "log_odds_ratio": -0.20573024451732635, + "loss": 0.255, + "rejected_geometric_mean": -4.809648036956787, + "step": 3640 + }, + { + "chosen_geometric_mean": -0.9917969703674316, + "epoch": 0.9, + "grad_norm": 17.5, + "learning_rate": 2.890701562613513e-06, + "log_odds": 5.284491539001465, + "log_odds_ratio": -0.1394333690404892, + "loss": 0.3327, + "rejected_geometric_mean": -5.896379470825195, + "step": 3641 + }, + { + "chosen_geometric_mean": -0.8662372827529907, + "epoch": 0.9, + "grad_norm": 6.125, + "learning_rate": 2.8897400211984465e-06, + "log_odds": 4.426843166351318, + "log_odds_ratio": -0.09283366054296494, + "loss": 0.3131, + "rejected_geometric_mean": -4.7489800453186035, + "step": 3642 + }, + { + "chosen_geometric_mean": -1.3447083234786987, + "epoch": 0.9, + "grad_norm": 28.75, + "learning_rate": 2.8887784206894385e-06, + "log_odds": 5.618002891540527, + "log_odds_ratio": -0.04873012751340866, + "loss": 0.3198, + "rejected_geometric_mean": -6.645260810852051, + "step": 3643 + }, + { + "chosen_geometric_mean": -1.0520159006118774, + "epoch": 0.9, + "grad_norm": 5.5625, + "learning_rate": 2.887816761232289e-06, + "log_odds": 5.160074710845947, + "log_odds_ratio": -0.007702185306698084, + "loss": 0.2674, + "rejected_geometric_mean": -5.731365203857422, + "step": 3644 + }, + { + "chosen_geometric_mean": -0.8564276695251465, + "epoch": 0.9, + "grad_norm": 9.1875, + "learning_rate": 2.8868550429728093e-06, + "log_odds": 4.8754425048828125, + "log_odds_ratio": -0.1406162679195404, + "loss": 0.3131, + "rejected_geometric_mean": -5.317910671234131, + "step": 3645 + }, + { + "chosen_geometric_mean": -1.1155874729156494, + "epoch": 0.9, + "grad_norm": 7.84375, + "learning_rate": 2.8858932660568195e-06, + "log_odds": 1.645460844039917, + "log_odds_ratio": -0.24185842275619507, + "loss": 0.2698, + "rejected_geometric_mean": -2.481344699859619, + "step": 3646 + }, + { + "chosen_geometric_mean": -1.089874267578125, + "epoch": 0.9, + "grad_norm": 2.40625, + "learning_rate": 2.8849314306301468e-06, + "log_odds": 0.07764619588851929, + "log_odds_ratio": -0.655417263507843, + "loss": 0.345, + "rejected_geometric_mean": -1.1442166566848755, + "step": 3647 + }, + { + "chosen_geometric_mean": -1.2863554954528809, + "epoch": 0.9, + "grad_norm": 1.9765625, + "learning_rate": 2.88396953683863e-06, + "log_odds": 1.1507208347320557, + "log_odds_ratio": -0.4443502426147461, + "loss": 0.283, + "rejected_geometric_mean": -2.3052048683166504, + "step": 3648 + }, + { + "chosen_geometric_mean": -1.0969483852386475, + "epoch": 0.9, + "grad_norm": 11.5, + "learning_rate": 2.8830075848281146e-06, + "log_odds": 3.8473260402679443, + "log_odds_ratio": -0.2794662415981293, + "loss": 0.3276, + "rejected_geometric_mean": -4.574648857116699, + "step": 3649 + }, + { + "chosen_geometric_mean": -0.8413825035095215, + "epoch": 0.9, + "grad_norm": 6.96875, + "learning_rate": 2.882045574744456e-06, + "log_odds": 2.0765128135681152, + "log_odds_ratio": -0.28599974513053894, + "loss": 0.2628, + "rejected_geometric_mean": -2.5621390342712402, + "step": 3650 + }, + { + "chosen_geometric_mean": -1.4038169384002686, + "epoch": 0.9, + "grad_norm": 3.4375, + "learning_rate": 2.8810835067335173e-06, + "log_odds": 2.1856436729431152, + "log_odds_ratio": -0.39961034059524536, + "loss": 0.4531, + "rejected_geometric_mean": -3.4418230056762695, + "step": 3651 + }, + { + "chosen_geometric_mean": -1.465499758720398, + "epoch": 0.9, + "grad_norm": 4.25, + "learning_rate": 2.8801213809411716e-06, + "log_odds": 3.0441973209381104, + "log_odds_ratio": -0.3753454089164734, + "loss": 0.262, + "rejected_geometric_mean": -4.395793437957764, + "step": 3652 + }, + { + "chosen_geometric_mean": -1.2118616104125977, + "epoch": 0.9, + "grad_norm": 2.828125, + "learning_rate": 2.8791591975133e-06, + "log_odds": 0.2012147307395935, + "log_odds_ratio": -0.6002745628356934, + "loss": 0.3489, + "rejected_geometric_mean": -1.3566076755523682, + "step": 3653 + }, + { + "chosen_geometric_mean": -1.3267457485198975, + "epoch": 0.9, + "grad_norm": 18.75, + "learning_rate": 2.878196956595793e-06, + "log_odds": 1.7172167301177979, + "log_odds_ratio": -0.2327679991722107, + "loss": 0.2908, + "rejected_geometric_mean": -2.8332653045654297, + "step": 3654 + }, + { + "chosen_geometric_mean": -0.9518958330154419, + "epoch": 0.9, + "grad_norm": 3.71875, + "learning_rate": 2.877234658334548e-06, + "log_odds": 4.195876121520996, + "log_odds_ratio": -0.4108436107635498, + "loss": 0.266, + "rejected_geometric_mean": -4.992962837219238, + "step": 3655 + }, + { + "chosen_geometric_mean": -1.3208112716674805, + "epoch": 0.91, + "grad_norm": 6.40625, + "learning_rate": 2.876272302875475e-06, + "log_odds": 7.611434459686279, + "log_odds_ratio": -0.30476683378219604, + "loss": 0.3162, + "rejected_geometric_mean": -8.770566940307617, + "step": 3656 + }, + { + "chosen_geometric_mean": -1.2042642831802368, + "epoch": 0.91, + "grad_norm": 3.703125, + "learning_rate": 2.8753098903644888e-06, + "log_odds": 2.220540761947632, + "log_odds_ratio": -0.22037041187286377, + "loss": 0.3089, + "rejected_geometric_mean": -3.148132801055908, + "step": 3657 + }, + { + "chosen_geometric_mean": -1.1823666095733643, + "epoch": 0.91, + "grad_norm": 34.5, + "learning_rate": 2.8743474209475137e-06, + "log_odds": 4.799980640411377, + "log_odds_ratio": -0.10252566635608673, + "loss": 0.3148, + "rejected_geometric_mean": -5.658935070037842, + "step": 3658 + }, + { + "chosen_geometric_mean": -1.0173523426055908, + "epoch": 0.91, + "grad_norm": 20.0, + "learning_rate": 2.8733848947704844e-06, + "log_odds": 2.1949315071105957, + "log_odds_ratio": -0.2132735252380371, + "loss": 0.2749, + "rejected_geometric_mean": -2.897094964981079, + "step": 3659 + }, + { + "chosen_geometric_mean": -1.1384170055389404, + "epoch": 0.91, + "grad_norm": 7.78125, + "learning_rate": 2.872422311979342e-06, + "log_odds": 2.915109634399414, + "log_odds_ratio": -0.37949851155281067, + "loss": 0.2704, + "rejected_geometric_mean": -3.9110090732574463, + "step": 3660 + }, + { + "chosen_geometric_mean": -0.9888440370559692, + "epoch": 0.91, + "grad_norm": 4.65625, + "learning_rate": 2.871459672720037e-06, + "log_odds": 2.34295654296875, + "log_odds_ratio": -0.3805975317955017, + "loss": 0.2873, + "rejected_geometric_mean": -3.094036340713501, + "step": 3661 + }, + { + "chosen_geometric_mean": -1.2320555448532104, + "epoch": 0.91, + "grad_norm": 18.875, + "learning_rate": 2.87049697713853e-06, + "log_odds": 7.338069438934326, + "log_odds_ratio": -0.2288684844970703, + "loss": 0.3577, + "rejected_geometric_mean": -8.30047607421875, + "step": 3662 + }, + { + "chosen_geometric_mean": -0.9990484714508057, + "epoch": 0.91, + "grad_norm": 4.4375, + "learning_rate": 2.8695342253807877e-06, + "log_odds": 4.2847137451171875, + "log_odds_ratio": -0.1496451199054718, + "loss": 0.31, + "rejected_geometric_mean": -4.862712383270264, + "step": 3663 + }, + { + "chosen_geometric_mean": -1.1320127248764038, + "epoch": 0.91, + "grad_norm": 26.875, + "learning_rate": 2.8685714175927866e-06, + "log_odds": 5.6141486167907715, + "log_odds_ratio": -0.21614542603492737, + "loss": 0.3691, + "rejected_geometric_mean": -6.467960357666016, + "step": 3664 + }, + { + "chosen_geometric_mean": -1.1929576396942139, + "epoch": 0.91, + "grad_norm": 8.75, + "learning_rate": 2.8676085539205112e-06, + "log_odds": 12.344486236572266, + "log_odds_ratio": -0.06371468305587769, + "loss": 0.2971, + "rejected_geometric_mean": -13.153438568115234, + "step": 3665 + }, + { + "chosen_geometric_mean": -1.0306634902954102, + "epoch": 0.91, + "grad_norm": 35.0, + "learning_rate": 2.8666456345099553e-06, + "log_odds": 3.6478524208068848, + "log_odds_ratio": -0.3223358392715454, + "loss": 0.3002, + "rejected_geometric_mean": -4.4598708152771, + "step": 3666 + }, + { + "chosen_geometric_mean": -1.3564375638961792, + "epoch": 0.91, + "grad_norm": 36.25, + "learning_rate": 2.8656826595071197e-06, + "log_odds": 4.079204559326172, + "log_odds_ratio": -0.17449267208576202, + "loss": 0.3405, + "rejected_geometric_mean": -5.08425760269165, + "step": 3667 + }, + { + "chosen_geometric_mean": -1.1169614791870117, + "epoch": 0.91, + "grad_norm": 5.15625, + "learning_rate": 2.8647196290580152e-06, + "log_odds": 1.48591947555542, + "log_odds_ratio": -0.272882342338562, + "loss": 0.3015, + "rejected_geometric_mean": -2.3577210903167725, + "step": 3668 + }, + { + "chosen_geometric_mean": -0.9918637275695801, + "epoch": 0.91, + "grad_norm": 5.71875, + "learning_rate": 2.863756543308661e-06, + "log_odds": 10.421611785888672, + "log_odds_ratio": -0.1342255175113678, + "loss": 0.2818, + "rejected_geometric_mean": -11.038453102111816, + "step": 3669 + }, + { + "chosen_geometric_mean": -1.004533290863037, + "epoch": 0.91, + "grad_norm": 31.875, + "learning_rate": 2.862793402405083e-06, + "log_odds": 9.271758079528809, + "log_odds_ratio": -0.006579787936061621, + "loss": 0.3557, + "rejected_geometric_mean": -9.81460189819336, + "step": 3670 + }, + { + "chosen_geometric_mean": -1.1105965375900269, + "epoch": 0.91, + "grad_norm": 3.921875, + "learning_rate": 2.8618302064933158e-06, + "log_odds": 5.042062759399414, + "log_odds_ratio": -0.07444532215595245, + "loss": 0.2904, + "rejected_geometric_mean": -5.754444122314453, + "step": 3671 + }, + { + "chosen_geometric_mean": -0.9448834657669067, + "epoch": 0.91, + "grad_norm": 35.25, + "learning_rate": 2.860866955719405e-06, + "log_odds": 3.282604694366455, + "log_odds_ratio": -0.2043435424566269, + "loss": 0.3859, + "rejected_geometric_mean": -3.9121832847595215, + "step": 3672 + }, + { + "chosen_geometric_mean": -1.1802693605422974, + "epoch": 0.91, + "grad_norm": 11.0, + "learning_rate": 2.859903650229401e-06, + "log_odds": 2.580254316329956, + "log_odds_ratio": -0.2829783260822296, + "loss": 0.322, + "rejected_geometric_mean": -3.544851303100586, + "step": 3673 + }, + { + "chosen_geometric_mean": -1.339737892150879, + "epoch": 0.91, + "grad_norm": 6.625, + "learning_rate": 2.8589402901693643e-06, + "log_odds": 4.878398895263672, + "log_odds_ratio": -0.18444222211837769, + "loss": 0.3358, + "rejected_geometric_mean": -5.958804130554199, + "step": 3674 + }, + { + "chosen_geometric_mean": -0.9636068344116211, + "epoch": 0.91, + "grad_norm": 6.9375, + "learning_rate": 2.8579768756853637e-06, + "log_odds": 6.369839668273926, + "log_odds_ratio": -0.2200089991092682, + "loss": 0.3326, + "rejected_geometric_mean": -6.97486686706543, + "step": 3675 + }, + { + "chosen_geometric_mean": -1.125316858291626, + "epoch": 0.91, + "grad_norm": 2.8125, + "learning_rate": 2.857013406923477e-06, + "log_odds": 1.8729753494262695, + "log_odds_ratio": -0.28747332096099854, + "loss": 0.275, + "rejected_geometric_mean": -2.767747402191162, + "step": 3676 + }, + { + "chosen_geometric_mean": -1.0368622541427612, + "epoch": 0.91, + "grad_norm": 2.1875, + "learning_rate": 2.8560498840297867e-06, + "log_odds": 6.219731330871582, + "log_odds_ratio": -0.05247136950492859, + "loss": 0.2366, + "rejected_geometric_mean": -6.788363456726074, + "step": 3677 + }, + { + "chosen_geometric_mean": -1.0638740062713623, + "epoch": 0.91, + "grad_norm": 4.3125, + "learning_rate": 2.855086307150388e-06, + "log_odds": 4.289785385131836, + "log_odds_ratio": -0.15800565481185913, + "loss": 0.2863, + "rejected_geometric_mean": -4.997376918792725, + "step": 3678 + }, + { + "chosen_geometric_mean": -1.285579800605774, + "epoch": 0.91, + "grad_norm": 5.1875, + "learning_rate": 2.8541226764313825e-06, + "log_odds": 1.43826425075531, + "log_odds_ratio": -0.31635475158691406, + "loss": 0.2764, + "rejected_geometric_mean": -2.5339808464050293, + "step": 3679 + }, + { + "chosen_geometric_mean": -1.1953833103179932, + "epoch": 0.91, + "grad_norm": 5.1875, + "learning_rate": 2.8531589920188774e-06, + "log_odds": 2.60835862159729, + "log_odds_ratio": -0.43399494886398315, + "loss": 0.3094, + "rejected_geometric_mean": -3.6905596256256104, + "step": 3680 + }, + { + "chosen_geometric_mean": -0.933924674987793, + "epoch": 0.91, + "grad_norm": 39.75, + "learning_rate": 2.8521952540589924e-06, + "log_odds": 6.669414043426514, + "log_odds_ratio": -0.20474080741405487, + "loss": 0.3569, + "rejected_geometric_mean": -7.172999858856201, + "step": 3681 + }, + { + "chosen_geometric_mean": -1.0201733112335205, + "epoch": 0.91, + "grad_norm": 8.9375, + "learning_rate": 2.8512314626978534e-06, + "log_odds": 5.44590425491333, + "log_odds_ratio": -0.16643206775188446, + "loss": 0.2944, + "rejected_geometric_mean": -6.134675979614258, + "step": 3682 + }, + { + "chosen_geometric_mean": -0.953940212726593, + "epoch": 0.91, + "grad_norm": 5.46875, + "learning_rate": 2.850267618081593e-06, + "log_odds": 2.4332275390625, + "log_odds_ratio": -0.24191029369831085, + "loss": 0.2956, + "rejected_geometric_mean": -3.0501890182495117, + "step": 3683 + }, + { + "chosen_geometric_mean": -1.188156008720398, + "epoch": 0.91, + "grad_norm": 3.546875, + "learning_rate": 2.8493037203563543e-06, + "log_odds": 5.85842752456665, + "log_odds_ratio": -0.25656843185424805, + "loss": 0.3189, + "rejected_geometric_mean": -6.726633071899414, + "step": 3684 + }, + { + "chosen_geometric_mean": -1.1897437572479248, + "epoch": 0.91, + "grad_norm": 34.0, + "learning_rate": 2.848339769668287e-06, + "log_odds": 0.4044334888458252, + "log_odds_ratio": -0.6211340427398682, + "loss": 0.3159, + "rejected_geometric_mean": -1.5949242115020752, + "step": 3685 + }, + { + "chosen_geometric_mean": -1.1680065393447876, + "epoch": 0.91, + "grad_norm": 6.625, + "learning_rate": 2.8473757661635485e-06, + "log_odds": 1.1508512496948242, + "log_odds_ratio": -0.42340195178985596, + "loss": 0.2782, + "rejected_geometric_mean": -2.189870595932007, + "step": 3686 + }, + { + "chosen_geometric_mean": -1.0749549865722656, + "epoch": 0.91, + "grad_norm": 16.5, + "learning_rate": 2.8464117099883057e-06, + "log_odds": 4.563220500946045, + "log_odds_ratio": -0.128414586186409, + "loss": 0.3021, + "rejected_geometric_mean": -5.255962371826172, + "step": 3687 + }, + { + "chosen_geometric_mean": -1.0538965463638306, + "epoch": 0.91, + "grad_norm": 7.5625, + "learning_rate": 2.8454476012887333e-06, + "log_odds": 5.3323845863342285, + "log_odds_ratio": -0.14268358051776886, + "loss": 0.3077, + "rejected_geometric_mean": -5.975120544433594, + "step": 3688 + }, + { + "chosen_geometric_mean": -1.1837955713272095, + "epoch": 0.91, + "grad_norm": 10.125, + "learning_rate": 2.8444834402110117e-06, + "log_odds": 4.931540489196777, + "log_odds_ratio": -0.21324999630451202, + "loss": 0.3059, + "rejected_geometric_mean": -5.739711761474609, + "step": 3689 + }, + { + "chosen_geometric_mean": -1.2867668867111206, + "epoch": 0.91, + "grad_norm": 2.046875, + "learning_rate": 2.8435192269013316e-06, + "log_odds": 2.8776254653930664, + "log_odds_ratio": -0.22787189483642578, + "loss": 0.3287, + "rejected_geometric_mean": -3.929056167602539, + "step": 3690 + }, + { + "chosen_geometric_mean": -1.2485673427581787, + "epoch": 0.91, + "grad_norm": 9.5625, + "learning_rate": 2.8425549615058905e-06, + "log_odds": 8.773207664489746, + "log_odds_ratio": -0.012933960184454918, + "loss": 0.2958, + "rejected_geometric_mean": -9.68712329864502, + "step": 3691 + }, + { + "chosen_geometric_mean": -0.9880152344703674, + "epoch": 0.91, + "grad_norm": 3.8125, + "learning_rate": 2.8415906441708956e-06, + "log_odds": 2.9597973823547363, + "log_odds_ratio": -0.2850715219974518, + "loss": 0.3317, + "rejected_geometric_mean": -3.690474510192871, + "step": 3692 + }, + { + "chosen_geometric_mean": -1.0531907081604004, + "epoch": 0.91, + "grad_norm": 6.5, + "learning_rate": 2.8406262750425584e-06, + "log_odds": 1.9974267482757568, + "log_odds_ratio": -0.5077074766159058, + "loss": 0.3338, + "rejected_geometric_mean": -2.8813369274139404, + "step": 3693 + }, + { + "chosen_geometric_mean": -1.0891860723495483, + "epoch": 0.91, + "grad_norm": 3.390625, + "learning_rate": 2.839661854267102e-06, + "log_odds": 0.6521075963973999, + "log_odds_ratio": -0.45590174198150635, + "loss": 0.2621, + "rejected_geometric_mean": -1.5849605798721313, + "step": 3694 + }, + { + "chosen_geometric_mean": -0.8962662816047668, + "epoch": 0.91, + "grad_norm": 4.6875, + "learning_rate": 2.8386973819907564e-06, + "log_odds": 3.6299386024475098, + "log_odds_ratio": -0.17740938067436218, + "loss": 0.2425, + "rejected_geometric_mean": -4.144327163696289, + "step": 3695 + }, + { + "chosen_geometric_mean": -0.8303221464157104, + "epoch": 0.92, + "grad_norm": 2.765625, + "learning_rate": 2.8377328583597564e-06, + "log_odds": 3.531125545501709, + "log_odds_ratio": -0.4247346818447113, + "loss": 0.267, + "rejected_geometric_mean": -4.107560157775879, + "step": 3696 + }, + { + "chosen_geometric_mean": -1.1257740259170532, + "epoch": 0.92, + "grad_norm": 6.53125, + "learning_rate": 2.836768283520348e-06, + "log_odds": 1.1822147369384766, + "log_odds_ratio": -0.32800376415252686, + "loss": 0.2855, + "rejected_geometric_mean": -2.077579975128174, + "step": 3697 + }, + { + "chosen_geometric_mean": -0.8822706937789917, + "epoch": 0.92, + "grad_norm": 5.90625, + "learning_rate": 2.8358036576187847e-06, + "log_odds": 3.5736372470855713, + "log_odds_ratio": -0.04077383130788803, + "loss": 0.3002, + "rejected_geometric_mean": -3.9457714557647705, + "step": 3698 + }, + { + "chosen_geometric_mean": -1.0146822929382324, + "epoch": 0.92, + "grad_norm": 5.6875, + "learning_rate": 2.834838980801326e-06, + "log_odds": 6.3035101890563965, + "log_odds_ratio": -0.04045549035072327, + "loss": 0.2917, + "rejected_geometric_mean": -6.868818283081055, + "step": 3699 + }, + { + "chosen_geometric_mean": -1.058628797531128, + "epoch": 0.92, + "grad_norm": 6.96875, + "learning_rate": 2.8338742532142407e-06, + "log_odds": 7.987271308898926, + "log_odds_ratio": -0.011512791737914085, + "loss": 0.287, + "rejected_geometric_mean": -8.58471965789795, + "step": 3700 + }, + { + "chosen_geometric_mean": -0.9057974815368652, + "epoch": 0.92, + "grad_norm": 3.359375, + "learning_rate": 2.832909475003804e-06, + "log_odds": 5.779371738433838, + "log_odds_ratio": -0.16045011579990387, + "loss": 0.2267, + "rejected_geometric_mean": -6.2650370597839355, + "step": 3701 + }, + { + "chosen_geometric_mean": -0.9491508603096008, + "epoch": 0.92, + "grad_norm": 10.1875, + "learning_rate": 2.8319446463163e-06, + "log_odds": 2.824608564376831, + "log_odds_ratio": -0.16642020642757416, + "loss": 0.2872, + "rejected_geometric_mean": -3.420003890991211, + "step": 3702 + }, + { + "chosen_geometric_mean": -1.0835731029510498, + "epoch": 0.92, + "grad_norm": 3.9375, + "learning_rate": 2.830979767298019e-06, + "log_odds": 6.590752124786377, + "log_odds_ratio": -0.23829425871372223, + "loss": 0.2742, + "rejected_geometric_mean": -7.341844081878662, + "step": 3703 + }, + { + "chosen_geometric_mean": -1.2674963474273682, + "epoch": 0.92, + "grad_norm": 35.75, + "learning_rate": 2.830014838095261e-06, + "log_odds": 3.1359591484069824, + "log_odds_ratio": -0.3018784821033478, + "loss": 0.353, + "rejected_geometric_mean": -4.211467742919922, + "step": 3704 + }, + { + "chosen_geometric_mean": -1.0821630954742432, + "epoch": 0.92, + "grad_norm": 3.671875, + "learning_rate": 2.829049858854332e-06, + "log_odds": 3.923910617828369, + "log_odds_ratio": -0.23666656017303467, + "loss": 0.2545, + "rejected_geometric_mean": -4.723580360412598, + "step": 3705 + }, + { + "chosen_geometric_mean": -1.236986517906189, + "epoch": 0.92, + "grad_norm": 15.5625, + "learning_rate": 2.828084829721546e-06, + "log_odds": 3.6396312713623047, + "log_odds_ratio": -0.48134076595306396, + "loss": 0.3947, + "rejected_geometric_mean": -4.765030384063721, + "step": 3706 + }, + { + "chosen_geometric_mean": -1.063676118850708, + "epoch": 0.92, + "grad_norm": 3.890625, + "learning_rate": 2.8271197508432237e-06, + "log_odds": 0.8923836350440979, + "log_odds_ratio": -0.4500559866428375, + "loss": 0.2714, + "rejected_geometric_mean": -1.7903234958648682, + "step": 3707 + }, + { + "chosen_geometric_mean": -1.1820075511932373, + "epoch": 0.92, + "grad_norm": 63.5, + "learning_rate": 2.8261546223656955e-06, + "log_odds": 6.248066425323486, + "log_odds_ratio": -0.21388399600982666, + "loss": 0.337, + "rejected_geometric_mean": -7.159443378448486, + "step": 3708 + }, + { + "chosen_geometric_mean": -0.8808426856994629, + "epoch": 0.92, + "grad_norm": 13.4375, + "learning_rate": 2.8251894444352978e-06, + "log_odds": 6.979060173034668, + "log_odds_ratio": -0.26901257038116455, + "loss": 0.3042, + "rejected_geometric_mean": -7.498979091644287, + "step": 3709 + }, + { + "chosen_geometric_mean": -1.8633878231048584, + "epoch": 0.92, + "grad_norm": 28.75, + "learning_rate": 2.8242242171983735e-06, + "log_odds": 3.4325451850891113, + "log_odds_ratio": -0.19274954497814178, + "loss": 0.3366, + "rejected_geometric_mean": -5.151453971862793, + "step": 3710 + }, + { + "chosen_geometric_mean": -1.1613245010375977, + "epoch": 0.92, + "grad_norm": 3.671875, + "learning_rate": 2.823258940801277e-06, + "log_odds": 1.6211204528808594, + "log_odds_ratio": -0.3550766408443451, + "loss": 0.2492, + "rejected_geometric_mean": -2.5991344451904297, + "step": 3711 + }, + { + "chosen_geometric_mean": -1.0770149230957031, + "epoch": 0.92, + "grad_norm": 1.90625, + "learning_rate": 2.822293615390364e-06, + "log_odds": 7.725186347961426, + "log_odds_ratio": -0.24998794496059418, + "loss": 0.2632, + "rejected_geometric_mean": -8.526531219482422, + "step": 3712 + }, + { + "chosen_geometric_mean": -1.086388111114502, + "epoch": 0.92, + "grad_norm": 10.25, + "learning_rate": 2.8213282411120034e-06, + "log_odds": 12.619803428649902, + "log_odds_ratio": -0.0794314369559288, + "loss": 0.2982, + "rejected_geometric_mean": -13.324346542358398, + "step": 3713 + }, + { + "chosen_geometric_mean": -1.0223760604858398, + "epoch": 0.92, + "grad_norm": 49.0, + "learning_rate": 2.820362818112568e-06, + "log_odds": 5.9528727531433105, + "log_odds_ratio": -0.225683331489563, + "loss": 0.3849, + "rejected_geometric_mean": -6.69172477722168, + "step": 3714 + }, + { + "chosen_geometric_mean": -0.9986741542816162, + "epoch": 0.92, + "grad_norm": 6.75, + "learning_rate": 2.8193973465384393e-06, + "log_odds": 12.356301307678223, + "log_odds_ratio": -0.15544167160987854, + "loss": 0.3708, + "rejected_geometric_mean": -12.987115859985352, + "step": 3715 + }, + { + "chosen_geometric_mean": -1.0516762733459473, + "epoch": 0.92, + "grad_norm": 3.78125, + "learning_rate": 2.8184318265360067e-06, + "log_odds": 6.519767761230469, + "log_odds_ratio": -0.21220146119594574, + "loss": 0.2687, + "rejected_geometric_mean": -7.247069358825684, + "step": 3716 + }, + { + "chosen_geometric_mean": -1.0355663299560547, + "epoch": 0.92, + "grad_norm": 30.125, + "learning_rate": 2.8174662582516653e-06, + "log_odds": 2.199798583984375, + "log_odds_ratio": -0.1510097086429596, + "loss": 0.3564, + "rejected_geometric_mean": -2.8619375228881836, + "step": 3717 + }, + { + "chosen_geometric_mean": -1.2261239290237427, + "epoch": 0.92, + "grad_norm": 3.625, + "learning_rate": 2.8165006418318187e-06, + "log_odds": 2.720072031021118, + "log_odds_ratio": -0.29530829191207886, + "loss": 0.2698, + "rejected_geometric_mean": -3.747964859008789, + "step": 3718 + }, + { + "chosen_geometric_mean": -1.0775079727172852, + "epoch": 0.92, + "grad_norm": 3.28125, + "learning_rate": 2.8155349774228775e-06, + "log_odds": 4.487623691558838, + "log_odds_ratio": -0.19731466472148895, + "loss": 0.3034, + "rejected_geometric_mean": -5.250039100646973, + "step": 3719 + }, + { + "chosen_geometric_mean": -1.118442177772522, + "epoch": 0.92, + "grad_norm": 50.75, + "learning_rate": 2.81456926517126e-06, + "log_odds": 0.5536215901374817, + "log_odds_ratio": -0.5748940110206604, + "loss": 0.3074, + "rejected_geometric_mean": -1.5473461151123047, + "step": 3720 + }, + { + "chosen_geometric_mean": -0.9071106910705566, + "epoch": 0.92, + "grad_norm": 25.25, + "learning_rate": 2.81360350522339e-06, + "log_odds": 6.534871578216553, + "log_odds_ratio": -0.278116911649704, + "loss": 0.3264, + "rejected_geometric_mean": -7.16289758682251, + "step": 3721 + }, + { + "chosen_geometric_mean": -0.9607836008071899, + "epoch": 0.92, + "grad_norm": 3.234375, + "learning_rate": 2.812637697725702e-06, + "log_odds": 10.05549430847168, + "log_odds_ratio": -0.16348151862621307, + "loss": 0.2951, + "rejected_geometric_mean": -10.560150146484375, + "step": 3722 + }, + { + "chosen_geometric_mean": -1.0888478755950928, + "epoch": 0.92, + "grad_norm": 4.78125, + "learning_rate": 2.811671842824633e-06, + "log_odds": 5.586523532867432, + "log_odds_ratio": -0.03681142255663872, + "loss": 0.3018, + "rejected_geometric_mean": -6.27401876449585, + "step": 3723 + }, + { + "chosen_geometric_mean": -1.0429296493530273, + "epoch": 0.92, + "grad_norm": 5.75, + "learning_rate": 2.8107059406666327e-06, + "log_odds": 8.326025009155273, + "log_odds_ratio": -0.09956149756908417, + "loss": 0.3264, + "rejected_geometric_mean": -8.972859382629395, + "step": 3724 + }, + { + "chosen_geometric_mean": -1.1088781356811523, + "epoch": 0.92, + "grad_norm": 4.46875, + "learning_rate": 2.8097399913981536e-06, + "log_odds": 3.4833974838256836, + "log_odds_ratio": -0.2839950919151306, + "loss": 0.2557, + "rejected_geometric_mean": -4.3807549476623535, + "step": 3725 + }, + { + "chosen_geometric_mean": -1.08278489112854, + "epoch": 0.92, + "grad_norm": 3.90625, + "learning_rate": 2.808773995165656e-06, + "log_odds": 9.723785400390625, + "log_odds_ratio": -0.00399465998634696, + "loss": 0.3147, + "rejected_geometric_mean": -10.389471054077148, + "step": 3726 + }, + { + "chosen_geometric_mean": -1.1254899501800537, + "epoch": 0.92, + "grad_norm": 16.375, + "learning_rate": 2.8078079521156095e-06, + "log_odds": 3.06404709815979, + "log_odds_ratio": -0.3722943365573883, + "loss": 0.2821, + "rejected_geometric_mean": -3.97066068649292, + "step": 3727 + }, + { + "chosen_geometric_mean": -0.9930245280265808, + "epoch": 0.92, + "grad_norm": 2.203125, + "learning_rate": 2.8068418623944895e-06, + "log_odds": 7.45329475402832, + "log_odds_ratio": -0.07604016363620758, + "loss": 0.2747, + "rejected_geometric_mean": -8.001334190368652, + "step": 3728 + }, + { + "chosen_geometric_mean": -0.9360761046409607, + "epoch": 0.92, + "grad_norm": 1.9375, + "learning_rate": 2.805875726148776e-06, + "log_odds": 6.460543155670166, + "log_odds_ratio": -0.13292714953422546, + "loss": 0.2951, + "rejected_geometric_mean": -6.978054046630859, + "step": 3729 + }, + { + "chosen_geometric_mean": -1.0826550722122192, + "epoch": 0.92, + "grad_norm": 2.53125, + "learning_rate": 2.8049095435249614e-06, + "log_odds": 1.2678203582763672, + "log_odds_ratio": -0.42043957114219666, + "loss": 0.2787, + "rejected_geometric_mean": -2.150613784790039, + "step": 3730 + }, + { + "chosen_geometric_mean": -0.9553641080856323, + "epoch": 0.92, + "grad_norm": 2.765625, + "learning_rate": 2.8039433146695403e-06, + "log_odds": 1.0317518711090088, + "log_odds_ratio": -0.45616668462753296, + "loss": 0.2494, + "rejected_geometric_mean": -1.8448716402053833, + "step": 3731 + }, + { + "chosen_geometric_mean": -1.048156499862671, + "epoch": 0.92, + "grad_norm": 13.5, + "learning_rate": 2.802977039729018e-06, + "log_odds": 7.59145450592041, + "log_odds_ratio": -0.33854395151138306, + "loss": 0.2972, + "rejected_geometric_mean": -8.40329647064209, + "step": 3732 + }, + { + "chosen_geometric_mean": -1.1405344009399414, + "epoch": 0.92, + "grad_norm": 6.6875, + "learning_rate": 2.802010718849903e-06, + "log_odds": 3.2114319801330566, + "log_odds_ratio": -0.1894618272781372, + "loss": 0.2945, + "rejected_geometric_mean": -4.05693244934082, + "step": 3733 + }, + { + "chosen_geometric_mean": -0.9684394598007202, + "epoch": 0.92, + "grad_norm": 23.75, + "learning_rate": 2.801044352178714e-06, + "log_odds": 8.358068466186523, + "log_odds_ratio": -0.03754011541604996, + "loss": 0.3024, + "rejected_geometric_mean": -8.844108581542969, + "step": 3734 + }, + { + "chosen_geometric_mean": -1.2519066333770752, + "epoch": 0.92, + "grad_norm": 1.9765625, + "learning_rate": 2.800077939861975e-06, + "log_odds": 9.3968505859375, + "log_odds_ratio": -0.04444681853055954, + "loss": 0.3024, + "rejected_geometric_mean": -10.326408386230469, + "step": 3735 + }, + { + "chosen_geometric_mean": -0.8118482828140259, + "epoch": 0.92, + "grad_norm": 22.875, + "learning_rate": 2.7991114820462166e-06, + "log_odds": 6.2863874435424805, + "log_odds_ratio": -0.4012368321418762, + "loss": 0.2607, + "rejected_geometric_mean": -6.860367298126221, + "step": 3736 + }, + { + "chosen_geometric_mean": -1.5449604988098145, + "epoch": 0.93, + "grad_norm": 10.75, + "learning_rate": 2.7981449788779774e-06, + "log_odds": 0.9467222690582275, + "log_odds_ratio": -0.515382707118988, + "loss": 0.2875, + "rejected_geometric_mean": -2.372607469558716, + "step": 3737 + }, + { + "chosen_geometric_mean": -1.4151173830032349, + "epoch": 0.93, + "grad_norm": 28.5, + "learning_rate": 2.797178430503804e-06, + "log_odds": 5.634861469268799, + "log_odds_ratio": -0.1426817774772644, + "loss": 0.3763, + "rejected_geometric_mean": -6.812798500061035, + "step": 3738 + }, + { + "chosen_geometric_mean": -0.8812516927719116, + "epoch": 0.93, + "grad_norm": 23.25, + "learning_rate": 2.7962118370702466e-06, + "log_odds": 7.047050476074219, + "log_odds_ratio": -0.14310108125209808, + "loss": 0.2961, + "rejected_geometric_mean": -7.469103813171387, + "step": 3739 + }, + { + "chosen_geometric_mean": -1.066208839416504, + "epoch": 0.93, + "grad_norm": 2.375, + "learning_rate": 2.7952451987238644e-06, + "log_odds": 10.024767875671387, + "log_odds_ratio": -0.13417980074882507, + "loss": 0.3249, + "rejected_geometric_mean": -10.72506332397461, + "step": 3740 + }, + { + "chosen_geometric_mean": -1.4799859523773193, + "epoch": 0.93, + "grad_norm": 6.375, + "learning_rate": 2.7942785156112234e-06, + "log_odds": 3.560465097427368, + "log_odds_ratio": -0.10563500225543976, + "loss": 0.298, + "rejected_geometric_mean": -4.803292751312256, + "step": 3741 + }, + { + "chosen_geometric_mean": -1.1225844621658325, + "epoch": 0.93, + "grad_norm": 11.3125, + "learning_rate": 2.7933117878788946e-06, + "log_odds": 5.453664779663086, + "log_odds_ratio": -0.1390576809644699, + "loss": 0.2925, + "rejected_geometric_mean": -6.240164279937744, + "step": 3742 + }, + { + "chosen_geometric_mean": -1.1042486429214478, + "epoch": 0.93, + "grad_norm": 20.0, + "learning_rate": 2.792345015673459e-06, + "log_odds": 6.811576843261719, + "log_odds_ratio": -0.1326395720243454, + "loss": 0.373, + "rejected_geometric_mean": -7.588820934295654, + "step": 3743 + }, + { + "chosen_geometric_mean": -1.0143394470214844, + "epoch": 0.93, + "grad_norm": 20.625, + "learning_rate": 2.7913781991415014e-06, + "log_odds": 10.016436576843262, + "log_odds_ratio": -0.018297698348760605, + "loss": 0.2939, + "rejected_geometric_mean": -10.556938171386719, + "step": 3744 + }, + { + "chosen_geometric_mean": -1.0916500091552734, + "epoch": 0.93, + "grad_norm": 11.875, + "learning_rate": 2.7904113384296145e-06, + "log_odds": 10.642024040222168, + "log_odds_ratio": -0.025504130870103836, + "loss": 0.2879, + "rejected_geometric_mean": -11.326204299926758, + "step": 3745 + }, + { + "chosen_geometric_mean": -1.0445467233657837, + "epoch": 0.93, + "grad_norm": 20.0, + "learning_rate": 2.789444433684397e-06, + "log_odds": 4.469048023223877, + "log_odds_ratio": -0.15785394608974457, + "loss": 0.2788, + "rejected_geometric_mean": -5.165825843811035, + "step": 3746 + }, + { + "chosen_geometric_mean": -1.127489447593689, + "epoch": 0.93, + "grad_norm": 3.296875, + "learning_rate": 2.788477485052457e-06, + "log_odds": 4.973198890686035, + "log_odds_ratio": -0.2633192539215088, + "loss": 0.2351, + "rejected_geometric_mean": -5.895496368408203, + "step": 3747 + }, + { + "chosen_geometric_mean": -1.338178038597107, + "epoch": 0.93, + "grad_norm": 30.875, + "learning_rate": 2.7875104926804046e-06, + "log_odds": 4.148333549499512, + "log_odds_ratio": -0.36412397027015686, + "loss": 0.3364, + "rejected_geometric_mean": -5.332833766937256, + "step": 3748 + }, + { + "chosen_geometric_mean": -0.9169645309448242, + "epoch": 0.93, + "grad_norm": 7.5625, + "learning_rate": 2.7865434567148607e-06, + "log_odds": 9.506811141967773, + "log_odds_ratio": -0.0030003152787685394, + "loss": 0.2893, + "rejected_geometric_mean": -9.898083686828613, + "step": 3749 + }, + { + "chosen_geometric_mean": -1.1261911392211914, + "epoch": 0.93, + "grad_norm": 5.28125, + "learning_rate": 2.7855763773024503e-06, + "log_odds": 7.179251670837402, + "log_odds_ratio": -0.0870814323425293, + "loss": 0.2609, + "rejected_geometric_mean": -7.900706768035889, + "step": 3750 + }, + { + "chosen_geometric_mean": -1.0151357650756836, + "epoch": 0.93, + "grad_norm": 17.25, + "learning_rate": 2.7846092545898063e-06, + "log_odds": 7.751830577850342, + "log_odds_ratio": -0.17982476949691772, + "loss": 0.2963, + "rejected_geometric_mean": -8.399580001831055, + "step": 3751 + }, + { + "chosen_geometric_mean": -1.0714068412780762, + "epoch": 0.93, + "grad_norm": 2.09375, + "learning_rate": 2.783642088723568e-06, + "log_odds": 5.701560974121094, + "log_odds_ratio": -0.14687258005142212, + "loss": 0.2704, + "rejected_geometric_mean": -6.467859745025635, + "step": 3752 + }, + { + "chosen_geometric_mean": -1.0098693370819092, + "epoch": 0.93, + "grad_norm": 3.609375, + "learning_rate": 2.782674879850379e-06, + "log_odds": 6.0090556144714355, + "log_odds_ratio": -0.16978585720062256, + "loss": 0.2724, + "rejected_geometric_mean": -6.684491157531738, + "step": 3753 + }, + { + "chosen_geometric_mean": -1.0403475761413574, + "epoch": 0.93, + "grad_norm": 6.34375, + "learning_rate": 2.781707628116895e-06, + "log_odds": 4.9992570877075195, + "log_odds_ratio": -0.18693950772285461, + "loss": 0.3441, + "rejected_geometric_mean": -5.6584014892578125, + "step": 3754 + }, + { + "chosen_geometric_mean": -1.0374889373779297, + "epoch": 0.93, + "grad_norm": 35.5, + "learning_rate": 2.7807403336697714e-06, + "log_odds": 5.3836541175842285, + "log_odds_ratio": -0.18772707879543304, + "loss": 0.2582, + "rejected_geometric_mean": -6.095024585723877, + "step": 3755 + }, + { + "chosen_geometric_mean": -1.3800352811813354, + "epoch": 0.93, + "grad_norm": 29.125, + "learning_rate": 2.7797729966556737e-06, + "log_odds": 2.496762752532959, + "log_odds_ratio": -0.1588030457496643, + "loss": 0.3356, + "rejected_geometric_mean": -3.5734939575195312, + "step": 3756 + }, + { + "chosen_geometric_mean": -1.0054540634155273, + "epoch": 0.93, + "grad_norm": 12.375, + "learning_rate": 2.7788056172212746e-06, + "log_odds": 2.340315341949463, + "log_odds_ratio": -0.4518123269081116, + "loss": 0.3151, + "rejected_geometric_mean": -3.1556284427642822, + "step": 3757 + }, + { + "chosen_geometric_mean": -1.208530068397522, + "epoch": 0.93, + "grad_norm": 4.96875, + "learning_rate": 2.777838195513252e-06, + "log_odds": 10.213139533996582, + "log_odds_ratio": -9.438017150387168e-05, + "loss": 0.2666, + "rejected_geometric_mean": -11.050559997558594, + "step": 3758 + }, + { + "chosen_geometric_mean": -1.2500324249267578, + "epoch": 0.93, + "grad_norm": 18.625, + "learning_rate": 2.776870731678289e-06, + "log_odds": 5.2708282470703125, + "log_odds_ratio": -0.1682780236005783, + "loss": 0.3992, + "rejected_geometric_mean": -6.252756595611572, + "step": 3759 + }, + { + "chosen_geometric_mean": -1.0327622890472412, + "epoch": 0.93, + "grad_norm": 6.9375, + "learning_rate": 2.7759032258630776e-06, + "log_odds": 1.5518356561660767, + "log_odds_ratio": -0.25051698088645935, + "loss": 0.3115, + "rejected_geometric_mean": -2.297933340072632, + "step": 3760 + }, + { + "chosen_geometric_mean": -1.1129541397094727, + "epoch": 0.93, + "grad_norm": 14.0, + "learning_rate": 2.774935678214315e-06, + "log_odds": 1.942511796951294, + "log_odds_ratio": -0.33647608757019043, + "loss": 0.307, + "rejected_geometric_mean": -2.8189616203308105, + "step": 3761 + }, + { + "chosen_geometric_mean": -1.1497714519500732, + "epoch": 0.93, + "grad_norm": 2.078125, + "learning_rate": 2.7739680888787026e-06, + "log_odds": 5.7718939781188965, + "log_odds_ratio": -0.02772224321961403, + "loss": 0.2931, + "rejected_geometric_mean": -6.504604339599609, + "step": 3762 + }, + { + "chosen_geometric_mean": -0.9831418991088867, + "epoch": 0.93, + "grad_norm": 2.703125, + "learning_rate": 2.7730004580029525e-06, + "log_odds": 2.714167833328247, + "log_odds_ratio": -0.36631613969802856, + "loss": 0.2934, + "rejected_geometric_mean": -3.443242073059082, + "step": 3763 + }, + { + "chosen_geometric_mean": -0.9830029606819153, + "epoch": 0.93, + "grad_norm": 2.15625, + "learning_rate": 2.7720327857337797e-06, + "log_odds": 5.399632930755615, + "log_odds_ratio": -0.2422943264245987, + "loss": 0.3007, + "rejected_geometric_mean": -6.094851016998291, + "step": 3764 + }, + { + "chosen_geometric_mean": -1.141721487045288, + "epoch": 0.93, + "grad_norm": 5.46875, + "learning_rate": 2.771065072217907e-06, + "log_odds": 1.327859878540039, + "log_odds_ratio": -0.3144295811653137, + "loss": 0.2949, + "rejected_geometric_mean": -2.2599875926971436, + "step": 3765 + }, + { + "chosen_geometric_mean": -0.7941409349441528, + "epoch": 0.93, + "grad_norm": 4.125, + "learning_rate": 2.7700973176020624e-06, + "log_odds": 6.468905448913574, + "log_odds_ratio": -0.18430446088314056, + "loss": 0.3337, + "rejected_geometric_mean": -6.776556968688965, + "step": 3766 + }, + { + "chosen_geometric_mean": -1.0887370109558105, + "epoch": 0.93, + "grad_norm": 7.21875, + "learning_rate": 2.769129522032981e-06, + "log_odds": 3.9913125038146973, + "log_odds_ratio": -0.24524135887622833, + "loss": 0.2831, + "rejected_geometric_mean": -4.790214538574219, + "step": 3767 + }, + { + "chosen_geometric_mean": -1.2601473331451416, + "epoch": 0.93, + "grad_norm": 6.6875, + "learning_rate": 2.768161685657404e-06, + "log_odds": 7.779118537902832, + "log_odds_ratio": -0.3095862567424774, + "loss": 0.3106, + "rejected_geometric_mean": -8.805231094360352, + "step": 3768 + }, + { + "chosen_geometric_mean": -1.0557048320770264, + "epoch": 0.93, + "grad_norm": 14.3125, + "learning_rate": 2.767193808622079e-06, + "log_odds": 5.082301616668701, + "log_odds_ratio": -0.30688202381134033, + "loss": 0.2754, + "rejected_geometric_mean": -5.817838191986084, + "step": 3769 + }, + { + "chosen_geometric_mean": -1.0197343826293945, + "epoch": 0.93, + "grad_norm": 3.578125, + "learning_rate": 2.7662258910737582e-06, + "log_odds": 8.521162986755371, + "log_odds_ratio": -0.302562952041626, + "loss": 0.2777, + "rejected_geometric_mean": -9.326431274414062, + "step": 3770 + }, + { + "chosen_geometric_mean": -0.9689042568206787, + "epoch": 0.93, + "grad_norm": 22.875, + "learning_rate": 2.765257933159202e-06, + "log_odds": 9.347213745117188, + "log_odds_ratio": -0.008403997868299484, + "loss": 0.2996, + "rejected_geometric_mean": -9.811607360839844, + "step": 3771 + }, + { + "chosen_geometric_mean": -1.122739315032959, + "epoch": 0.93, + "grad_norm": 37.5, + "learning_rate": 2.7642899350251763e-06, + "log_odds": 6.533702373504639, + "log_odds_ratio": -0.1466435343027115, + "loss": 0.3552, + "rejected_geometric_mean": -7.3137311935424805, + "step": 3772 + }, + { + "chosen_geometric_mean": -1.2681009769439697, + "epoch": 0.93, + "grad_norm": 5.46875, + "learning_rate": 2.7633218968184516e-06, + "log_odds": 2.214402675628662, + "log_odds_ratio": -0.46884632110595703, + "loss": 0.3252, + "rejected_geometric_mean": -3.2795028686523438, + "step": 3773 + }, + { + "chosen_geometric_mean": -1.4944394826889038, + "epoch": 0.93, + "grad_norm": 18.625, + "learning_rate": 2.762353818685808e-06, + "log_odds": 3.364530324935913, + "log_odds_ratio": -0.39710062742233276, + "loss": 0.3492, + "rejected_geometric_mean": -4.727489948272705, + "step": 3774 + }, + { + "chosen_geometric_mean": -1.7468427419662476, + "epoch": 0.93, + "grad_norm": 35.0, + "learning_rate": 2.7613857007740265e-06, + "log_odds": 3.2800376415252686, + "log_odds_ratio": -0.9132426381111145, + "loss": 0.3848, + "rejected_geometric_mean": -4.890753746032715, + "step": 3775 + }, + { + "chosen_geometric_mean": -1.317112684249878, + "epoch": 0.93, + "grad_norm": 24.25, + "learning_rate": 2.7604175432298992e-06, + "log_odds": 1.7985327243804932, + "log_odds_ratio": -0.35710394382476807, + "loss": 0.3315, + "rejected_geometric_mean": -2.9410698413848877, + "step": 3776 + }, + { + "chosen_geometric_mean": -1.1081247329711914, + "epoch": 0.94, + "grad_norm": 2.015625, + "learning_rate": 2.7594493462002215e-06, + "log_odds": 10.809806823730469, + "log_odds_ratio": -0.17425468564033508, + "loss": 0.2568, + "rejected_geometric_mean": -11.550651550292969, + "step": 3777 + }, + { + "chosen_geometric_mean": -1.0815755128860474, + "epoch": 0.94, + "grad_norm": 4.40625, + "learning_rate": 2.7584811098317943e-06, + "log_odds": 3.562765598297119, + "log_odds_ratio": -0.2563626766204834, + "loss": 0.3308, + "rejected_geometric_mean": -4.3300371170043945, + "step": 3778 + }, + { + "chosen_geometric_mean": -1.2273362874984741, + "epoch": 0.94, + "grad_norm": 3.65625, + "learning_rate": 2.757512834271427e-06, + "log_odds": 10.002167701721191, + "log_odds_ratio": -0.00835700985044241, + "loss": 0.2647, + "rejected_geometric_mean": -10.822954177856445, + "step": 3779 + }, + { + "chosen_geometric_mean": -0.985965371131897, + "epoch": 0.94, + "grad_norm": 5.09375, + "learning_rate": 2.7565445196659322e-06, + "log_odds": 0.982637882232666, + "log_odds_ratio": -0.3968035876750946, + "loss": 0.3105, + "rejected_geometric_mean": -1.765824317932129, + "step": 3780 + }, + { + "chosen_geometric_mean": -1.044321060180664, + "epoch": 0.94, + "grad_norm": 7.625, + "learning_rate": 2.755576166162131e-06, + "log_odds": 3.6520893573760986, + "log_odds_ratio": -0.07185705006122589, + "loss": 0.3123, + "rejected_geometric_mean": -4.296154499053955, + "step": 3781 + }, + { + "chosen_geometric_mean": -1.1528857946395874, + "epoch": 0.94, + "grad_norm": 24.125, + "learning_rate": 2.754607773906848e-06, + "log_odds": 5.7523603439331055, + "log_odds_ratio": -0.019721312448382378, + "loss": 0.2888, + "rejected_geometric_mean": -6.530098915100098, + "step": 3782 + }, + { + "chosen_geometric_mean": -1.2871274948120117, + "epoch": 0.94, + "grad_norm": 4.3125, + "learning_rate": 2.7536393430469145e-06, + "log_odds": 1.71315598487854, + "log_odds_ratio": -0.2937767803668976, + "loss": 0.2845, + "rejected_geometric_mean": -2.8328661918640137, + "step": 3783 + }, + { + "chosen_geometric_mean": -1.5672351121902466, + "epoch": 0.94, + "grad_norm": 17.0, + "learning_rate": 2.752670873729168e-06, + "log_odds": 4.078551769256592, + "log_odds_ratio": -0.3483608663082123, + "loss": 0.3653, + "rejected_geometric_mean": -5.539322376251221, + "step": 3784 + }, + { + "chosen_geometric_mean": -1.0098092555999756, + "epoch": 0.94, + "grad_norm": 17.25, + "learning_rate": 2.7517023661004528e-06, + "log_odds": 6.482118606567383, + "log_odds_ratio": -0.14852620661258698, + "loss": 0.3104, + "rejected_geometric_mean": -7.083258628845215, + "step": 3785 + }, + { + "chosen_geometric_mean": -1.06209397315979, + "epoch": 0.94, + "grad_norm": 4.125, + "learning_rate": 2.7507338203076157e-06, + "log_odds": 4.513003349304199, + "log_odds_ratio": -0.025927916169166565, + "loss": 0.2731, + "rejected_geometric_mean": -5.132381916046143, + "step": 3786 + }, + { + "chosen_geometric_mean": -1.2442147731781006, + "epoch": 0.94, + "grad_norm": 17.0, + "learning_rate": 2.749765236497514e-06, + "log_odds": 3.078256368637085, + "log_odds_ratio": -0.23420450091362, + "loss": 0.2399, + "rejected_geometric_mean": -4.105194091796875, + "step": 3787 + }, + { + "chosen_geometric_mean": -0.9979361295700073, + "epoch": 0.94, + "grad_norm": 3.109375, + "learning_rate": 2.748796614817007e-06, + "log_odds": 2.824014902114868, + "log_odds_ratio": -0.16558310389518738, + "loss": 0.3139, + "rejected_geometric_mean": -3.4179253578186035, + "step": 3788 + }, + { + "chosen_geometric_mean": -1.2251185178756714, + "epoch": 0.94, + "grad_norm": 10.75, + "learning_rate": 2.7478279554129604e-06, + "log_odds": 4.867589473724365, + "log_odds_ratio": -0.12167651206254959, + "loss": 0.3251, + "rejected_geometric_mean": -5.782898902893066, + "step": 3789 + }, + { + "chosen_geometric_mean": -1.2175811529159546, + "epoch": 0.94, + "grad_norm": 12.8125, + "learning_rate": 2.746859258432248e-06, + "log_odds": 7.096817493438721, + "log_odds_ratio": -0.026084356009960175, + "loss": 0.3742, + "rejected_geometric_mean": -7.970293998718262, + "step": 3790 + }, + { + "chosen_geometric_mean": -1.111419677734375, + "epoch": 0.94, + "grad_norm": 2.90625, + "learning_rate": 2.745890524021746e-06, + "log_odds": 3.315882921218872, + "log_odds_ratio": -0.095124751329422, + "loss": 0.2699, + "rejected_geometric_mean": -4.078708171844482, + "step": 3791 + }, + { + "chosen_geometric_mean": -1.019594669342041, + "epoch": 0.94, + "grad_norm": 2.671875, + "learning_rate": 2.7449217523283374e-06, + "log_odds": 5.524651527404785, + "log_odds_ratio": -0.08606982231140137, + "loss": 0.3519, + "rejected_geometric_mean": -6.144763946533203, + "step": 3792 + }, + { + "chosen_geometric_mean": -0.9942110776901245, + "epoch": 0.94, + "grad_norm": 7.40625, + "learning_rate": 2.7439529434989126e-06, + "log_odds": 4.330294609069824, + "log_odds_ratio": -0.28983834385871887, + "loss": 0.2635, + "rejected_geometric_mean": -5.068055152893066, + "step": 3793 + }, + { + "chosen_geometric_mean": -1.3801552057266235, + "epoch": 0.94, + "grad_norm": 9.8125, + "learning_rate": 2.742984097680366e-06, + "log_odds": 5.1332902908325195, + "log_odds_ratio": -0.25391024351119995, + "loss": 0.2846, + "rejected_geometric_mean": -6.26661491394043, + "step": 3794 + }, + { + "chosen_geometric_mean": -1.1271065473556519, + "epoch": 0.94, + "grad_norm": 16.5, + "learning_rate": 2.7420152150195974e-06, + "log_odds": 2.004298686981201, + "log_odds_ratio": -0.15632350742816925, + "loss": 0.2766, + "rejected_geometric_mean": -2.821993589401245, + "step": 3795 + }, + { + "chosen_geometric_mean": -0.9075315594673157, + "epoch": 0.94, + "grad_norm": 2.421875, + "learning_rate": 2.741046295663513e-06, + "log_odds": 8.453808784484863, + "log_odds_ratio": -0.13184607028961182, + "loss": 0.2947, + "rejected_geometric_mean": -8.865294456481934, + "step": 3796 + }, + { + "chosen_geometric_mean": -1.1396992206573486, + "epoch": 0.94, + "grad_norm": 17.5, + "learning_rate": 2.740077339759024e-06, + "log_odds": 11.54345703125, + "log_odds_ratio": -2.9147387976991013e-05, + "loss": 0.3537, + "rejected_geometric_mean": -12.283788681030273, + "step": 3797 + }, + { + "chosen_geometric_mean": -1.2670085430145264, + "epoch": 0.94, + "grad_norm": 9.8125, + "learning_rate": 2.739108347453048e-06, + "log_odds": 5.301084995269775, + "log_odds_ratio": -0.1506117284297943, + "loss": 0.2914, + "rejected_geometric_mean": -6.298435688018799, + "step": 3798 + }, + { + "chosen_geometric_mean": -1.0735273361206055, + "epoch": 0.94, + "grad_norm": 7.21875, + "learning_rate": 2.7381393188925065e-06, + "log_odds": 6.32896614074707, + "log_odds_ratio": -0.19961538910865784, + "loss": 0.2782, + "rejected_geometric_mean": -7.079799652099609, + "step": 3799 + }, + { + "chosen_geometric_mean": -1.3311535120010376, + "epoch": 0.94, + "grad_norm": 5.5625, + "learning_rate": 2.737170254224328e-06, + "log_odds": 4.301529407501221, + "log_odds_ratio": -0.3536902070045471, + "loss": 0.4045, + "rejected_geometric_mean": -5.4930419921875, + "step": 3800 + }, + { + "chosen_geometric_mean": -0.8164103031158447, + "epoch": 0.94, + "grad_norm": 2.1875, + "learning_rate": 2.736201153595446e-06, + "log_odds": 0.4180750250816345, + "log_odds_ratio": -0.5853059887886047, + "loss": 0.3148, + "rejected_geometric_mean": -1.1343015432357788, + "step": 3801 + }, + { + "chosen_geometric_mean": -1.2111701965332031, + "epoch": 0.94, + "grad_norm": 3.640625, + "learning_rate": 2.7352320171527997e-06, + "log_odds": 1.81863272190094, + "log_odds_ratio": -0.23601888120174408, + "loss": 0.289, + "rejected_geometric_mean": -2.774237632751465, + "step": 3802 + }, + { + "chosen_geometric_mean": -1.2461811304092407, + "epoch": 0.94, + "grad_norm": 4.21875, + "learning_rate": 2.734262845043332e-06, + "log_odds": 8.667694091796875, + "log_odds_ratio": -0.29959288239479065, + "loss": 0.2872, + "rejected_geometric_mean": -9.710413932800293, + "step": 3803 + }, + { + "chosen_geometric_mean": -1.1830663681030273, + "epoch": 0.94, + "grad_norm": 2.609375, + "learning_rate": 2.7332936374139952e-06, + "log_odds": 4.22305965423584, + "log_odds_ratio": -0.05797901004552841, + "loss": 0.3058, + "rejected_geometric_mean": -5.062751293182373, + "step": 3804 + }, + { + "chosen_geometric_mean": -1.1039143800735474, + "epoch": 0.94, + "grad_norm": 2.734375, + "learning_rate": 2.7323243944117417e-06, + "log_odds": 9.835086822509766, + "log_odds_ratio": -0.16241759061813354, + "loss": 0.3203, + "rejected_geometric_mean": -10.621238708496094, + "step": 3805 + }, + { + "chosen_geometric_mean": -0.9839054346084595, + "epoch": 0.94, + "grad_norm": 2.90625, + "learning_rate": 2.731355116183534e-06, + "log_odds": 1.3904728889465332, + "log_odds_ratio": -0.4260876774787903, + "loss": 0.2805, + "rejected_geometric_mean": -2.215909004211426, + "step": 3806 + }, + { + "chosen_geometric_mean": -0.9598618745803833, + "epoch": 0.94, + "grad_norm": 6.0625, + "learning_rate": 2.7303858028763374e-06, + "log_odds": 8.427120208740234, + "log_odds_ratio": -0.10612939298152924, + "loss": 0.2628, + "rejected_geometric_mean": -8.935958862304688, + "step": 3807 + }, + { + "chosen_geometric_mean": -1.1275739669799805, + "epoch": 0.94, + "grad_norm": 7.4375, + "learning_rate": 2.7294164546371228e-06, + "log_odds": 5.411275863647461, + "log_odds_ratio": -0.3704536557197571, + "loss": 0.3079, + "rejected_geometric_mean": -6.352281093597412, + "step": 3808 + }, + { + "chosen_geometric_mean": -1.0707457065582275, + "epoch": 0.94, + "grad_norm": 12.8125, + "learning_rate": 2.728447071612866e-06, + "log_odds": 7.262277603149414, + "log_odds_ratio": -0.20573565363883972, + "loss": 0.3175, + "rejected_geometric_mean": -7.994953632354736, + "step": 3809 + }, + { + "chosen_geometric_mean": -1.0735832452774048, + "epoch": 0.94, + "grad_norm": 1.59375, + "learning_rate": 2.7274776539505517e-06, + "log_odds": 8.089516639709473, + "log_odds_ratio": -0.026020169258117676, + "loss": 0.2178, + "rejected_geometric_mean": -8.749159812927246, + "step": 3810 + }, + { + "chosen_geometric_mean": -1.1351323127746582, + "epoch": 0.94, + "grad_norm": 25.375, + "learning_rate": 2.7265082017971623e-06, + "log_odds": 5.872207164764404, + "log_odds_ratio": -0.5274198651313782, + "loss": 0.329, + "rejected_geometric_mean": -6.664196014404297, + "step": 3811 + }, + { + "chosen_geometric_mean": -1.0546365976333618, + "epoch": 0.94, + "grad_norm": 2.765625, + "learning_rate": 2.7255387152996937e-06, + "log_odds": 2.371328115463257, + "log_odds_ratio": -0.24489685893058777, + "loss": 0.2972, + "rejected_geometric_mean": -3.105264902114868, + "step": 3812 + }, + { + "chosen_geometric_mean": -0.825296938419342, + "epoch": 0.94, + "grad_norm": 3.515625, + "learning_rate": 2.7245691946051415e-06, + "log_odds": 7.122231483459473, + "log_odds_ratio": -0.21376115083694458, + "loss": 0.2555, + "rejected_geometric_mean": -7.503261566162109, + "step": 3813 + }, + { + "chosen_geometric_mean": -1.2358578443527222, + "epoch": 0.94, + "grad_norm": 14.0625, + "learning_rate": 2.723599639860509e-06, + "log_odds": 3.4160852432250977, + "log_odds_ratio": -0.18354201316833496, + "loss": 0.2798, + "rejected_geometric_mean": -4.423125743865967, + "step": 3814 + }, + { + "chosen_geometric_mean": -1.0109686851501465, + "epoch": 0.94, + "grad_norm": 22.125, + "learning_rate": 2.722630051212804e-06, + "log_odds": 1.5838605165481567, + "log_odds_ratio": -0.3567662835121155, + "loss": 0.3268, + "rejected_geometric_mean": -2.2632880210876465, + "step": 3815 + }, + { + "chosen_geometric_mean": -1.3300695419311523, + "epoch": 0.94, + "grad_norm": 40.5, + "learning_rate": 2.7216604288090394e-06, + "log_odds": 2.8416152000427246, + "log_odds_ratio": -0.3541399836540222, + "loss": 0.3068, + "rejected_geometric_mean": -3.9565656185150146, + "step": 3816 + }, + { + "chosen_geometric_mean": -0.929958701133728, + "epoch": 0.95, + "grad_norm": 19.0, + "learning_rate": 2.720690772796233e-06, + "log_odds": 3.12459135055542, + "log_odds_ratio": -0.30053597688674927, + "loss": 0.3033, + "rejected_geometric_mean": -3.794339656829834, + "step": 3817 + }, + { + "chosen_geometric_mean": -0.9936584830284119, + "epoch": 0.95, + "grad_norm": 10.75, + "learning_rate": 2.719721083321408e-06, + "log_odds": 13.110475540161133, + "log_odds_ratio": -0.01186553854495287, + "loss": 0.2613, + "rejected_geometric_mean": -13.633731842041016, + "step": 3818 + }, + { + "chosen_geometric_mean": -1.0523827075958252, + "epoch": 0.95, + "grad_norm": 2.109375, + "learning_rate": 2.7187513605315914e-06, + "log_odds": 2.8673744201660156, + "log_odds_ratio": -0.25718188285827637, + "loss": 0.2991, + "rejected_geometric_mean": -3.5880613327026367, + "step": 3819 + }, + { + "chosen_geometric_mean": -1.0843687057495117, + "epoch": 0.95, + "grad_norm": 2.9375, + "learning_rate": 2.7177816045738193e-06, + "log_odds": 4.8575873374938965, + "log_odds_ratio": -0.20815566182136536, + "loss": 0.3271, + "rejected_geometric_mean": -5.593789100646973, + "step": 3820 + }, + { + "chosen_geometric_mean": -1.0356271266937256, + "epoch": 0.95, + "grad_norm": 12.0, + "learning_rate": 2.716811815595128e-06, + "log_odds": 3.3485991954803467, + "log_odds_ratio": -0.22106468677520752, + "loss": 0.3165, + "rejected_geometric_mean": -4.0457024574279785, + "step": 3821 + }, + { + "chosen_geometric_mean": -0.9741559028625488, + "epoch": 0.95, + "grad_norm": 25.5, + "learning_rate": 2.71584199374256e-06, + "log_odds": 8.006326675415039, + "log_odds_ratio": -0.09536942094564438, + "loss": 0.2741, + "rejected_geometric_mean": -8.574064254760742, + "step": 3822 + }, + { + "chosen_geometric_mean": -1.213606357574463, + "epoch": 0.95, + "grad_norm": 10.4375, + "learning_rate": 2.714872139163166e-06, + "log_odds": 7.715271472930908, + "log_odds_ratio": -0.1167389303445816, + "loss": 0.2967, + "rejected_geometric_mean": -8.60482120513916, + "step": 3823 + }, + { + "chosen_geometric_mean": -1.3541550636291504, + "epoch": 0.95, + "grad_norm": 46.0, + "learning_rate": 2.7139022520039972e-06, + "log_odds": 2.874985933303833, + "log_odds_ratio": -0.3168845474720001, + "loss": 0.3282, + "rejected_geometric_mean": -4.068028926849365, + "step": 3824 + }, + { + "chosen_geometric_mean": -1.1059844493865967, + "epoch": 0.95, + "grad_norm": 4.40625, + "learning_rate": 2.7129323324121127e-06, + "log_odds": 9.442787170410156, + "log_odds_ratio": -0.17835761606693268, + "loss": 0.3283, + "rejected_geometric_mean": -10.242212295532227, + "step": 3825 + }, + { + "chosen_geometric_mean": -1.207403302192688, + "epoch": 0.95, + "grad_norm": 7.15625, + "learning_rate": 2.711962380534575e-06, + "log_odds": 6.324188232421875, + "log_odds_ratio": -0.24962970614433289, + "loss": 0.275, + "rejected_geometric_mean": -7.299869537353516, + "step": 3826 + }, + { + "chosen_geometric_mean": -1.005439281463623, + "epoch": 0.95, + "grad_norm": 51.0, + "learning_rate": 2.710992396518453e-06, + "log_odds": 6.262500762939453, + "log_odds_ratio": -0.23629601299762726, + "loss": 0.3126, + "rejected_geometric_mean": -6.908750057220459, + "step": 3827 + }, + { + "chosen_geometric_mean": -0.9611599445343018, + "epoch": 0.95, + "grad_norm": 2.015625, + "learning_rate": 2.7100223805108195e-06, + "log_odds": 5.597561359405518, + "log_odds_ratio": -0.1509944200515747, + "loss": 0.2458, + "rejected_geometric_mean": -6.183685779571533, + "step": 3828 + }, + { + "chosen_geometric_mean": -1.0923608541488647, + "epoch": 0.95, + "grad_norm": 2.9375, + "learning_rate": 2.709052332658751e-06, + "log_odds": 3.503726005554199, + "log_odds_ratio": -0.1945311427116394, + "loss": 0.2769, + "rejected_geometric_mean": -4.266390323638916, + "step": 3829 + }, + { + "chosen_geometric_mean": -0.961806058883667, + "epoch": 0.95, + "grad_norm": 2.375, + "learning_rate": 2.708082253109331e-06, + "log_odds": 5.460874557495117, + "log_odds_ratio": -0.09387417882680893, + "loss": 0.2428, + "rejected_geometric_mean": -5.991433143615723, + "step": 3830 + }, + { + "chosen_geometric_mean": -0.8513804078102112, + "epoch": 0.95, + "grad_norm": 2.078125, + "learning_rate": 2.707112142009647e-06, + "log_odds": 6.757330894470215, + "log_odds_ratio": -0.1725383698940277, + "loss": 0.2824, + "rejected_geometric_mean": -7.130962371826172, + "step": 3831 + }, + { + "chosen_geometric_mean": -1.0580593347549438, + "epoch": 0.95, + "grad_norm": 3.390625, + "learning_rate": 2.7061419995067906e-06, + "log_odds": 9.018266677856445, + "log_odds_ratio": -0.21492712199687958, + "loss": 0.2537, + "rejected_geometric_mean": -9.802047729492188, + "step": 3832 + }, + { + "chosen_geometric_mean": -1.1982967853546143, + "epoch": 0.95, + "grad_norm": 17.0, + "learning_rate": 2.7051718257478586e-06, + "log_odds": 0.23246964812278748, + "log_odds_ratio": -0.5871098041534424, + "loss": 0.2967, + "rejected_geometric_mean": -1.3728886842727661, + "step": 3833 + }, + { + "chosen_geometric_mean": -1.1099666357040405, + "epoch": 0.95, + "grad_norm": 2.296875, + "learning_rate": 2.7042016208799527e-06, + "log_odds": 9.24205493927002, + "log_odds_ratio": -0.25279054045677185, + "loss": 0.3008, + "rejected_geometric_mean": -10.105158805847168, + "step": 3834 + }, + { + "chosen_geometric_mean": -1.1194299459457397, + "epoch": 0.95, + "grad_norm": 8.8125, + "learning_rate": 2.7032313850501795e-06, + "log_odds": 9.010629653930664, + "log_odds_ratio": -0.04489576444029808, + "loss": 0.2672, + "rejected_geometric_mean": -9.748252868652344, + "step": 3835 + }, + { + "chosen_geometric_mean": -1.2753554582595825, + "epoch": 0.95, + "grad_norm": 3.078125, + "learning_rate": 2.7022611184056506e-06, + "log_odds": 7.51285457611084, + "log_odds_ratio": -0.08683468401432037, + "loss": 0.2754, + "rejected_geometric_mean": -8.473499298095703, + "step": 3836 + }, + { + "chosen_geometric_mean": -0.9242405891418457, + "epoch": 0.95, + "grad_norm": 19.0, + "learning_rate": 2.7012908210934806e-06, + "log_odds": 4.253635406494141, + "log_odds_ratio": -0.09663918614387512, + "loss": 0.2594, + "rejected_geometric_mean": -4.725961208343506, + "step": 3837 + }, + { + "chosen_geometric_mean": -0.904473602771759, + "epoch": 0.95, + "grad_norm": 3.84375, + "learning_rate": 2.7003204932607903e-06, + "log_odds": 5.460289001464844, + "log_odds_ratio": -0.28924834728240967, + "loss": 0.2919, + "rejected_geometric_mean": -6.083212852478027, + "step": 3838 + }, + { + "chosen_geometric_mean": -1.162104845046997, + "epoch": 0.95, + "grad_norm": 4.75, + "learning_rate": 2.6993501350547048e-06, + "log_odds": 4.090257167816162, + "log_odds_ratio": -0.3600360155105591, + "loss": 0.3259, + "rejected_geometric_mean": -5.057740211486816, + "step": 3839 + }, + { + "chosen_geometric_mean": -0.9416233897209167, + "epoch": 0.95, + "grad_norm": 3.171875, + "learning_rate": 2.6983797466223544e-06, + "log_odds": 3.8017640113830566, + "log_odds_ratio": -0.2572017312049866, + "loss": 0.2704, + "rejected_geometric_mean": -4.3370585441589355, + "step": 3840 + }, + { + "chosen_geometric_mean": -0.9081772565841675, + "epoch": 0.95, + "grad_norm": 3.109375, + "learning_rate": 2.697409328110871e-06, + "log_odds": 8.487682342529297, + "log_odds_ratio": -0.09963453561067581, + "loss": 0.2469, + "rejected_geometric_mean": -8.952483177185059, + "step": 3841 + }, + { + "chosen_geometric_mean": -1.3935757875442505, + "epoch": 0.95, + "grad_norm": 4.8125, + "learning_rate": 2.6964388796673957e-06, + "log_odds": 3.4757466316223145, + "log_odds_ratio": -0.29609426856040955, + "loss": 0.2489, + "rejected_geometric_mean": -4.66963005065918, + "step": 3842 + }, + { + "chosen_geometric_mean": -1.2341513633728027, + "epoch": 0.95, + "grad_norm": 43.25, + "learning_rate": 2.695468401439072e-06, + "log_odds": 6.593810558319092, + "log_odds_ratio": -0.04302907735109329, + "loss": 0.4001, + "rejected_geometric_mean": -7.459233283996582, + "step": 3843 + }, + { + "chosen_geometric_mean": -1.2496697902679443, + "epoch": 0.95, + "grad_norm": 22.0, + "learning_rate": 2.694497893573045e-06, + "log_odds": 0.9587297439575195, + "log_odds_ratio": -0.5659735202789307, + "loss": 0.3281, + "rejected_geometric_mean": -2.154554605484009, + "step": 3844 + }, + { + "chosen_geometric_mean": -0.9927520155906677, + "epoch": 0.95, + "grad_norm": 3.34375, + "learning_rate": 2.693527356216469e-06, + "log_odds": 4.349319934844971, + "log_odds_ratio": -0.49306124448776245, + "loss": 0.2848, + "rejected_geometric_mean": -5.174957275390625, + "step": 3845 + }, + { + "chosen_geometric_mean": -1.0843225717544556, + "epoch": 0.95, + "grad_norm": 6.9375, + "learning_rate": 2.6925567895165005e-06, + "log_odds": 9.72653865814209, + "log_odds_ratio": -0.0033705951645970345, + "loss": 0.2684, + "rejected_geometric_mean": -10.382379531860352, + "step": 3846 + }, + { + "chosen_geometric_mean": -0.9197510480880737, + "epoch": 0.95, + "grad_norm": 15.25, + "learning_rate": 2.691586193620301e-06, + "log_odds": 4.626982688903809, + "log_odds_ratio": -0.1474650353193283, + "loss": 0.3322, + "rejected_geometric_mean": -5.083447456359863, + "step": 3847 + }, + { + "chosen_geometric_mean": -1.3696478605270386, + "epoch": 0.95, + "grad_norm": 21.625, + "learning_rate": 2.690615568675035e-06, + "log_odds": 4.208552360534668, + "log_odds_ratio": -0.3146294057369232, + "loss": 0.3076, + "rejected_geometric_mean": -5.433183193206787, + "step": 3848 + }, + { + "chosen_geometric_mean": -1.0176115036010742, + "epoch": 0.95, + "grad_norm": 41.75, + "learning_rate": 2.689644914827874e-06, + "log_odds": 9.045587539672852, + "log_odds_ratio": -0.004295751452445984, + "loss": 0.2672, + "rejected_geometric_mean": -9.588794708251953, + "step": 3849 + }, + { + "chosen_geometric_mean": -1.276519775390625, + "epoch": 0.95, + "grad_norm": 3.8125, + "learning_rate": 2.6886742322259912e-06, + "log_odds": 7.287690162658691, + "log_odds_ratio": -0.19443507492542267, + "loss": 0.263, + "rejected_geometric_mean": -8.319576263427734, + "step": 3850 + }, + { + "chosen_geometric_mean": -1.1105457544326782, + "epoch": 0.95, + "grad_norm": 3.0625, + "learning_rate": 2.6877035210165663e-06, + "log_odds": 3.8835031986236572, + "log_odds_ratio": -0.07633459568023682, + "loss": 0.2862, + "rejected_geometric_mean": -4.6241559982299805, + "step": 3851 + }, + { + "chosen_geometric_mean": -0.9334038496017456, + "epoch": 0.95, + "grad_norm": 21.75, + "learning_rate": 2.686732781346782e-06, + "log_odds": 10.307029724121094, + "log_odds_ratio": -0.007928132079541683, + "loss": 0.2456, + "rejected_geometric_mean": -10.742156028747559, + "step": 3852 + }, + { + "chosen_geometric_mean": -0.9983002543449402, + "epoch": 0.95, + "grad_norm": 5.21875, + "learning_rate": 2.685762013363825e-06, + "log_odds": 4.564572334289551, + "log_odds_ratio": -0.22880204021930695, + "loss": 0.2808, + "rejected_geometric_mean": -5.18511962890625, + "step": 3853 + }, + { + "chosen_geometric_mean": -1.2935389280319214, + "epoch": 0.95, + "grad_norm": 7.875, + "learning_rate": 2.6847912172148888e-06, + "log_odds": 7.007040977478027, + "log_odds_ratio": -0.19183793663978577, + "loss": 0.2926, + "rejected_geometric_mean": -8.047286987304688, + "step": 3854 + }, + { + "chosen_geometric_mean": -0.9684931039810181, + "epoch": 0.95, + "grad_norm": 4.125, + "learning_rate": 2.683820393047167e-06, + "log_odds": 9.194038391113281, + "log_odds_ratio": -0.17482280731201172, + "loss": 0.2435, + "rejected_geometric_mean": -9.731231689453125, + "step": 3855 + }, + { + "chosen_geometric_mean": -1.1260838508605957, + "epoch": 0.95, + "grad_norm": 8.8125, + "learning_rate": 2.6828495410078626e-06, + "log_odds": 6.628457069396973, + "log_odds_ratio": -0.16968391835689545, + "loss": 0.2721, + "rejected_geometric_mean": -7.403858661651611, + "step": 3856 + }, + { + "chosen_geometric_mean": -1.0462841987609863, + "epoch": 0.95, + "grad_norm": 11.1875, + "learning_rate": 2.6818786612441777e-06, + "log_odds": 2.3222270011901855, + "log_odds_ratio": -0.34769919514656067, + "loss": 0.3646, + "rejected_geometric_mean": -3.136199951171875, + "step": 3857 + }, + { + "chosen_geometric_mean": -1.066956639289856, + "epoch": 0.96, + "grad_norm": 2.578125, + "learning_rate": 2.680907753903322e-06, + "log_odds": 3.452131986618042, + "log_odds_ratio": -0.34592151641845703, + "loss": 0.2212, + "rejected_geometric_mean": -4.3054633140563965, + "step": 3858 + }, + { + "chosen_geometric_mean": -1.1559313535690308, + "epoch": 0.96, + "grad_norm": 5.5, + "learning_rate": 2.679936819132509e-06, + "log_odds": 5.849926948547363, + "log_odds_ratio": -0.0978253036737442, + "loss": 0.2709, + "rejected_geometric_mean": -6.656075477600098, + "step": 3859 + }, + { + "chosen_geometric_mean": -0.8270468711853027, + "epoch": 0.96, + "grad_norm": 2.8125, + "learning_rate": 2.6789658570789547e-06, + "log_odds": 6.12255334854126, + "log_odds_ratio": -0.004043467342853546, + "loss": 0.2244, + "rejected_geometric_mean": -6.341940402984619, + "step": 3860 + }, + { + "chosen_geometric_mean": -1.0197882652282715, + "epoch": 0.96, + "grad_norm": 5.5, + "learning_rate": 2.6779948678898803e-06, + "log_odds": 5.227257251739502, + "log_odds_ratio": -0.10923778265714645, + "loss": 0.2574, + "rejected_geometric_mean": -5.872077941894531, + "step": 3861 + }, + { + "chosen_geometric_mean": -0.8991889953613281, + "epoch": 0.96, + "grad_norm": 11.9375, + "learning_rate": 2.677023851712512e-06, + "log_odds": 8.167943000793457, + "log_odds_ratio": -0.0034896344877779484, + "loss": 0.2878, + "rejected_geometric_mean": -8.539346694946289, + "step": 3862 + }, + { + "chosen_geometric_mean": -1.0969724655151367, + "epoch": 0.96, + "grad_norm": 20.625, + "learning_rate": 2.676052808694078e-06, + "log_odds": 10.788658142089844, + "log_odds_ratio": -0.08009103685617447, + "loss": 0.3445, + "rejected_geometric_mean": -11.516550064086914, + "step": 3863 + }, + { + "chosen_geometric_mean": -0.9305874109268188, + "epoch": 0.96, + "grad_norm": 1.8984375, + "learning_rate": 2.6750817389818125e-06, + "log_odds": 8.838518142700195, + "log_odds_ratio": -0.00041950575541704893, + "loss": 0.2071, + "rejected_geometric_mean": -9.252887725830078, + "step": 3864 + }, + { + "chosen_geometric_mean": -1.2621753215789795, + "epoch": 0.96, + "grad_norm": 16.125, + "learning_rate": 2.6741106427229526e-06, + "log_odds": 4.83649206161499, + "log_odds_ratio": -0.09334736317396164, + "loss": 0.3161, + "rejected_geometric_mean": -5.810937881469727, + "step": 3865 + }, + { + "chosen_geometric_mean": -1.1809558868408203, + "epoch": 0.96, + "grad_norm": 2.46875, + "learning_rate": 2.6731395200647397e-06, + "log_odds": 7.5341691970825195, + "log_odds_ratio": -0.036387503147125244, + "loss": 0.2793, + "rejected_geometric_mean": -8.340482711791992, + "step": 3866 + }, + { + "chosen_geometric_mean": -1.0133328437805176, + "epoch": 0.96, + "grad_norm": 16.875, + "learning_rate": 2.6721683711544198e-06, + "log_odds": 1.2948057651519775, + "log_odds_ratio": -0.3307524025440216, + "loss": 0.2953, + "rejected_geometric_mean": -2.0953519344329834, + "step": 3867 + }, + { + "chosen_geometric_mean": -1.0678282976150513, + "epoch": 0.96, + "grad_norm": 23.625, + "learning_rate": 2.671197196139241e-06, + "log_odds": 3.2255544662475586, + "log_odds_ratio": -0.1963023841381073, + "loss": 0.3484, + "rejected_geometric_mean": -3.931974411010742, + "step": 3868 + }, + { + "chosen_geometric_mean": -1.101461410522461, + "epoch": 0.96, + "grad_norm": 3.28125, + "learning_rate": 2.67022599516646e-06, + "log_odds": 3.180105686187744, + "log_odds_ratio": -0.23988065123558044, + "loss": 0.2802, + "rejected_geometric_mean": -3.9807796478271484, + "step": 3869 + }, + { + "chosen_geometric_mean": -1.0131280422210693, + "epoch": 0.96, + "grad_norm": 3.015625, + "learning_rate": 2.669254768383331e-06, + "log_odds": 6.535216808319092, + "log_odds_ratio": -0.038893140852451324, + "loss": 0.2629, + "rejected_geometric_mean": -7.117879867553711, + "step": 3870 + }, + { + "chosen_geometric_mean": -1.4287420511245728, + "epoch": 0.96, + "grad_norm": 34.25, + "learning_rate": 2.6682835159371157e-06, + "log_odds": 2.876556396484375, + "log_odds_ratio": -0.19729319214820862, + "loss": 0.363, + "rejected_geometric_mean": -4.114314556121826, + "step": 3871 + }, + { + "chosen_geometric_mean": -1.1582939624786377, + "epoch": 0.96, + "grad_norm": 9.9375, + "learning_rate": 2.6673122379750816e-06, + "log_odds": 3.5735416412353516, + "log_odds_ratio": -0.1830572634935379, + "loss": 0.2631, + "rejected_geometric_mean": -4.372012615203857, + "step": 3872 + }, + { + "chosen_geometric_mean": -1.0713056325912476, + "epoch": 0.96, + "grad_norm": 3.640625, + "learning_rate": 2.6663409346444945e-06, + "log_odds": 3.256155490875244, + "log_odds_ratio": -0.11794877052307129, + "loss": 0.2877, + "rejected_geometric_mean": -3.9439525604248047, + "step": 3873 + }, + { + "chosen_geometric_mean": -0.905980110168457, + "epoch": 0.96, + "grad_norm": 2.078125, + "learning_rate": 2.6653696060926294e-06, + "log_odds": 8.457446098327637, + "log_odds_ratio": -0.17220719158649445, + "loss": 0.2875, + "rejected_geometric_mean": -8.925546646118164, + "step": 3874 + }, + { + "chosen_geometric_mean": -1.140299677848816, + "epoch": 0.96, + "grad_norm": 5.125, + "learning_rate": 2.664398252466762e-06, + "log_odds": 1.7896219491958618, + "log_odds_ratio": -0.36903560161590576, + "loss": 0.3017, + "rejected_geometric_mean": -2.7609453201293945, + "step": 3875 + }, + { + "chosen_geometric_mean": -1.0108286142349243, + "epoch": 0.96, + "grad_norm": 5.25, + "learning_rate": 2.663426873914174e-06, + "log_odds": 3.2497894763946533, + "log_odds_ratio": -0.09184722602367401, + "loss": 0.2478, + "rejected_geometric_mean": -3.8424131870269775, + "step": 3876 + }, + { + "chosen_geometric_mean": -1.1824238300323486, + "epoch": 0.96, + "grad_norm": 7.90625, + "learning_rate": 2.6624554705821488e-06, + "log_odds": 1.2801728248596191, + "log_odds_ratio": -0.3405793011188507, + "loss": 0.3462, + "rejected_geometric_mean": -2.278298854827881, + "step": 3877 + }, + { + "chosen_geometric_mean": -1.1266831159591675, + "epoch": 0.96, + "grad_norm": 14.125, + "learning_rate": 2.661484042617975e-06, + "log_odds": 3.983914375305176, + "log_odds_ratio": -0.4384545683860779, + "loss": 0.3286, + "rejected_geometric_mean": -4.825585842132568, + "step": 3878 + }, + { + "chosen_geometric_mean": -1.411383867263794, + "epoch": 0.96, + "grad_norm": 11.75, + "learning_rate": 2.660512590168944e-06, + "log_odds": 3.091453790664673, + "log_odds_ratio": -0.3295869827270508, + "loss": 0.3615, + "rejected_geometric_mean": -4.363338470458984, + "step": 3879 + }, + { + "chosen_geometric_mean": -1.0488877296447754, + "epoch": 0.96, + "grad_norm": 5.0, + "learning_rate": 2.6595411133823507e-06, + "log_odds": 3.312878370285034, + "log_odds_ratio": -0.09155716001987457, + "loss": 0.2532, + "rejected_geometric_mean": -3.9379079341888428, + "step": 3880 + }, + { + "chosen_geometric_mean": -1.1118922233581543, + "epoch": 0.96, + "grad_norm": 3.109375, + "learning_rate": 2.6585696124054956e-06, + "log_odds": 4.1343560218811035, + "log_odds_ratio": -0.15862996876239777, + "loss": 0.2824, + "rejected_geometric_mean": -4.9272308349609375, + "step": 3881 + }, + { + "chosen_geometric_mean": -1.0205204486846924, + "epoch": 0.96, + "grad_norm": 5.34375, + "learning_rate": 2.657598087385681e-06, + "log_odds": 2.7077035903930664, + "log_odds_ratio": -0.20356085896492004, + "loss": 0.3093, + "rejected_geometric_mean": -3.3467977046966553, + "step": 3882 + }, + { + "chosen_geometric_mean": -1.1957292556762695, + "epoch": 0.96, + "grad_norm": 21.0, + "learning_rate": 2.6566265384702134e-06, + "log_odds": 2.0562610626220703, + "log_odds_ratio": -0.2240271419286728, + "loss": 0.3121, + "rejected_geometric_mean": -2.957305431365967, + "step": 3883 + }, + { + "chosen_geometric_mean": -1.208230972290039, + "epoch": 0.96, + "grad_norm": 6.96875, + "learning_rate": 2.6556549658064023e-06, + "log_odds": 1.9343277215957642, + "log_odds_ratio": -0.17489419877529144, + "loss": 0.2963, + "rejected_geometric_mean": -2.877544403076172, + "step": 3884 + }, + { + "chosen_geometric_mean": -1.0467636585235596, + "epoch": 0.96, + "grad_norm": 5.0, + "learning_rate": 2.654683369541563e-06, + "log_odds": 3.295869827270508, + "log_odds_ratio": -0.2105405330657959, + "loss": 0.2895, + "rejected_geometric_mean": -3.994194269180298, + "step": 3885 + }, + { + "chosen_geometric_mean": -1.2975965738296509, + "epoch": 0.96, + "grad_norm": 8.375, + "learning_rate": 2.6537117498230114e-06, + "log_odds": 3.4379496574401855, + "log_odds_ratio": -0.3217507600784302, + "loss": 0.3185, + "rejected_geometric_mean": -4.494587421417236, + "step": 3886 + }, + { + "chosen_geometric_mean": -1.3181824684143066, + "epoch": 0.96, + "grad_norm": 9.3125, + "learning_rate": 2.652740106798068e-06, + "log_odds": 2.610386848449707, + "log_odds_ratio": -0.12682269513607025, + "loss": 0.2253, + "rejected_geometric_mean": -3.631255865097046, + "step": 3887 + }, + { + "chosen_geometric_mean": -1.088507056236267, + "epoch": 0.96, + "grad_norm": 6.5625, + "learning_rate": 2.651768440614059e-06, + "log_odds": 3.3886709213256836, + "log_odds_ratio": -0.1197056993842125, + "loss": 0.3311, + "rejected_geometric_mean": -4.136783599853516, + "step": 3888 + }, + { + "chosen_geometric_mean": -1.3074508905410767, + "epoch": 0.96, + "grad_norm": 5.8125, + "learning_rate": 2.6507967514183117e-06, + "log_odds": 3.4591081142425537, + "log_odds_ratio": -0.08636686205863953, + "loss": 0.2757, + "rejected_geometric_mean": -4.480347633361816, + "step": 3889 + }, + { + "chosen_geometric_mean": -1.1100184917449951, + "epoch": 0.96, + "grad_norm": 3.53125, + "learning_rate": 2.6498250393581563e-06, + "log_odds": 2.8808422088623047, + "log_odds_ratio": -0.16029179096221924, + "loss": 0.2819, + "rejected_geometric_mean": -3.661818027496338, + "step": 3890 + }, + { + "chosen_geometric_mean": -1.2550013065338135, + "epoch": 0.96, + "grad_norm": 6.46875, + "learning_rate": 2.648853304580929e-06, + "log_odds": 2.1111083030700684, + "log_odds_ratio": -0.12323834747076035, + "loss": 0.3406, + "rejected_geometric_mean": -3.069636821746826, + "step": 3891 + }, + { + "chosen_geometric_mean": -0.9950846433639526, + "epoch": 0.96, + "grad_norm": 15.8125, + "learning_rate": 2.647881547233968e-06, + "log_odds": 4.123851776123047, + "log_odds_ratio": -0.1095976009964943, + "loss": 0.3291, + "rejected_geometric_mean": -4.688501834869385, + "step": 3892 + }, + { + "chosen_geometric_mean": -1.0322648286819458, + "epoch": 0.96, + "grad_norm": 2.84375, + "learning_rate": 2.6469097674646145e-06, + "log_odds": 0.5444008111953735, + "log_odds_ratio": -0.48214447498321533, + "loss": 0.3041, + "rejected_geometric_mean": -1.4306190013885498, + "step": 3893 + }, + { + "chosen_geometric_mean": -1.491391658782959, + "epoch": 0.96, + "grad_norm": 29.75, + "learning_rate": 2.6459379654202134e-06, + "log_odds": 0.7709269523620605, + "log_odds_ratio": -0.4274784326553345, + "loss": 0.3363, + "rejected_geometric_mean": -2.159567356109619, + "step": 3894 + }, + { + "chosen_geometric_mean": -0.9524791240692139, + "epoch": 0.96, + "grad_norm": 20.0, + "learning_rate": 2.6449661412481137e-06, + "log_odds": 7.310918807983398, + "log_odds_ratio": -0.09191194176673889, + "loss": 0.2823, + "rejected_geometric_mean": -7.8400349617004395, + "step": 3895 + }, + { + "chosen_geometric_mean": -1.1572151184082031, + "epoch": 0.96, + "grad_norm": 8.1875, + "learning_rate": 2.6439942950956675e-06, + "log_odds": 3.5288634300231934, + "log_odds_ratio": -0.040598731487989426, + "loss": 0.3223, + "rejected_geometric_mean": -4.275703430175781, + "step": 3896 + }, + { + "chosen_geometric_mean": -1.0589420795440674, + "epoch": 0.96, + "grad_norm": 11.8125, + "learning_rate": 2.64302242711023e-06, + "log_odds": 8.61948013305664, + "log_odds_ratio": -0.034443363547325134, + "loss": 0.3453, + "rejected_geometric_mean": -9.246077537536621, + "step": 3897 + }, + { + "chosen_geometric_mean": -1.2480723857879639, + "epoch": 0.97, + "grad_norm": 4.96875, + "learning_rate": 2.642050537439159e-06, + "log_odds": 5.4004974365234375, + "log_odds_ratio": -0.18192772567272186, + "loss": 0.3152, + "rejected_geometric_mean": -6.393481254577637, + "step": 3898 + }, + { + "chosen_geometric_mean": -1.1528542041778564, + "epoch": 0.97, + "grad_norm": 16.875, + "learning_rate": 2.641078626229817e-06, + "log_odds": 5.658370494842529, + "log_odds_ratio": -0.23766785860061646, + "loss": 0.3153, + "rejected_geometric_mean": -6.578524112701416, + "step": 3899 + }, + { + "chosen_geometric_mean": -1.118905782699585, + "epoch": 0.97, + "grad_norm": 25.125, + "learning_rate": 2.6401066936295686e-06, + "log_odds": 7.418420791625977, + "log_odds_ratio": -0.020083926618099213, + "loss": 0.3805, + "rejected_geometric_mean": -8.144485473632812, + "step": 3900 + }, + { + "chosen_geometric_mean": -1.2145652770996094, + "epoch": 0.97, + "grad_norm": 11.875, + "learning_rate": 2.639134739785782e-06, + "log_odds": 2.7794482707977295, + "log_odds_ratio": -0.30256223678588867, + "loss": 0.4431, + "rejected_geometric_mean": -3.7520246505737305, + "step": 3901 + }, + { + "chosen_geometric_mean": -1.4001128673553467, + "epoch": 0.97, + "grad_norm": 22.375, + "learning_rate": 2.63816276484583e-06, + "log_odds": 2.066450357437134, + "log_odds_ratio": -0.3685142695903778, + "loss": 0.2632, + "rejected_geometric_mean": -3.2542762756347656, + "step": 3902 + }, + { + "chosen_geometric_mean": -0.9864199161529541, + "epoch": 0.97, + "grad_norm": 4.46875, + "learning_rate": 2.6371907689570866e-06, + "log_odds": 2.4380111694335938, + "log_odds_ratio": -0.29622769355773926, + "loss": 0.2519, + "rejected_geometric_mean": -3.090878486633301, + "step": 3903 + }, + { + "chosen_geometric_mean": -1.2627077102661133, + "epoch": 0.97, + "grad_norm": 6.5625, + "learning_rate": 2.636218752266928e-06, + "log_odds": 1.4919377565383911, + "log_odds_ratio": -0.40123704075813293, + "loss": 0.2981, + "rejected_geometric_mean": -2.6140122413635254, + "step": 3904 + }, + { + "chosen_geometric_mean": -0.9582016468048096, + "epoch": 0.97, + "grad_norm": 4.28125, + "learning_rate": 2.6352467149227382e-06, + "log_odds": 2.266414165496826, + "log_odds_ratio": -0.16922689974308014, + "loss": 0.3212, + "rejected_geometric_mean": -2.852879524230957, + "step": 3905 + }, + { + "chosen_geometric_mean": -1.0168757438659668, + "epoch": 0.97, + "grad_norm": 11.25, + "learning_rate": 2.634274657071899e-06, + "log_odds": 2.042123317718506, + "log_odds_ratio": -0.2083290070295334, + "loss": 0.3306, + "rejected_geometric_mean": -2.7315311431884766, + "step": 3906 + }, + { + "chosen_geometric_mean": -0.9051659107208252, + "epoch": 0.97, + "grad_norm": 10.1875, + "learning_rate": 2.633302578861799e-06, + "log_odds": 2.855919599533081, + "log_odds_ratio": -0.199387788772583, + "loss": 0.3536, + "rejected_geometric_mean": -3.2978515625, + "step": 3907 + }, + { + "chosen_geometric_mean": -1.0534319877624512, + "epoch": 0.97, + "grad_norm": 8.75, + "learning_rate": 2.632330480439828e-06, + "log_odds": 3.086115598678589, + "log_odds_ratio": -0.09025166183710098, + "loss": 0.342, + "rejected_geometric_mean": -3.7487895488739014, + "step": 3908 + }, + { + "chosen_geometric_mean": -1.3344519138336182, + "epoch": 0.97, + "grad_norm": 15.5, + "learning_rate": 2.6313583619533804e-06, + "log_odds": 2.747081995010376, + "log_odds_ratio": -0.11627593636512756, + "loss": 0.3247, + "rejected_geometric_mean": -3.8126964569091797, + "step": 3909 + }, + { + "chosen_geometric_mean": -1.0507640838623047, + "epoch": 0.97, + "grad_norm": 14.4375, + "learning_rate": 2.630386223549852e-06, + "log_odds": 2.1231729984283447, + "log_odds_ratio": -0.13256534934043884, + "loss": 0.291, + "rejected_geometric_mean": -2.812527656555176, + "step": 3910 + }, + { + "chosen_geometric_mean": -0.9997478723526001, + "epoch": 0.97, + "grad_norm": 3.0, + "learning_rate": 2.6294140653766415e-06, + "log_odds": 1.1655744314193726, + "log_odds_ratio": -0.3148081600666046, + "loss": 0.2763, + "rejected_geometric_mean": -1.8941446542739868, + "step": 3911 + }, + { + "chosen_geometric_mean": -1.0493931770324707, + "epoch": 0.97, + "grad_norm": 2.1875, + "learning_rate": 2.628441887581153e-06, + "log_odds": 1.258209228515625, + "log_odds_ratio": -0.35467925667762756, + "loss": 0.2776, + "rejected_geometric_mean": -2.0730907917022705, + "step": 3912 + }, + { + "chosen_geometric_mean": -1.2926881313323975, + "epoch": 0.97, + "grad_norm": 8.6875, + "learning_rate": 2.627469690310791e-06, + "log_odds": 2.7945001125335693, + "log_odds_ratio": -0.2223738431930542, + "loss": 0.3331, + "rejected_geometric_mean": -3.8660781383514404, + "step": 3913 + }, + { + "chosen_geometric_mean": -1.13035249710083, + "epoch": 0.97, + "grad_norm": 6.78125, + "learning_rate": 2.626497473712964e-06, + "log_odds": 3.4171142578125, + "log_odds_ratio": -0.3028157651424408, + "loss": 0.3163, + "rejected_geometric_mean": -4.314797401428223, + "step": 3914 + }, + { + "chosen_geometric_mean": -1.1923340559005737, + "epoch": 0.97, + "grad_norm": 5.6875, + "learning_rate": 2.6255252379350838e-06, + "log_odds": 4.180509567260742, + "log_odds_ratio": -0.1752960979938507, + "loss": 0.2454, + "rejected_geometric_mean": -5.063470840454102, + "step": 3915 + }, + { + "chosen_geometric_mean": -0.9594669938087463, + "epoch": 0.97, + "grad_norm": 2.578125, + "learning_rate": 2.6245529831245643e-06, + "log_odds": 4.350307941436768, + "log_odds_ratio": -0.04904816299676895, + "loss": 0.2903, + "rejected_geometric_mean": -4.8517584800720215, + "step": 3916 + }, + { + "chosen_geometric_mean": -0.7801414728164673, + "epoch": 0.97, + "grad_norm": 3.59375, + "learning_rate": 2.6235807094288223e-06, + "log_odds": 2.037268877029419, + "log_odds_ratio": -0.2454068809747696, + "loss": 0.2666, + "rejected_geometric_mean": -2.397434949874878, + "step": 3917 + }, + { + "chosen_geometric_mean": -1.2117016315460205, + "epoch": 0.97, + "grad_norm": 2.828125, + "learning_rate": 2.622608416995279e-06, + "log_odds": 0.85453200340271, + "log_odds_ratio": -0.4395155608654022, + "loss": 0.2963, + "rejected_geometric_mean": -1.9169833660125732, + "step": 3918 + }, + { + "chosen_geometric_mean": -1.0826315879821777, + "epoch": 0.97, + "grad_norm": 37.75, + "learning_rate": 2.621636105971357e-06, + "log_odds": 1.6373645067214966, + "log_odds_ratio": -0.4569999575614929, + "loss": 0.3144, + "rejected_geometric_mean": -2.589409112930298, + "step": 3919 + }, + { + "chosen_geometric_mean": -0.9859212636947632, + "epoch": 0.97, + "grad_norm": 10.625, + "learning_rate": 2.6206637765044806e-06, + "log_odds": 4.929290771484375, + "log_odds_ratio": -0.010094218887388706, + "loss": 0.2849, + "rejected_geometric_mean": -5.431441307067871, + "step": 3920 + }, + { + "chosen_geometric_mean": -1.200622320175171, + "epoch": 0.97, + "grad_norm": 2.9375, + "learning_rate": 2.6196914287420795e-06, + "log_odds": 1.6112987995147705, + "log_odds_ratio": -0.3329187035560608, + "loss": 0.2787, + "rejected_geometric_mean": -2.5472354888916016, + "step": 3921 + }, + { + "chosen_geometric_mean": -1.4642163515090942, + "epoch": 0.97, + "grad_norm": 6.4375, + "learning_rate": 2.6187190628315855e-06, + "log_odds": 1.9855968952178955, + "log_odds_ratio": -0.30616825819015503, + "loss": 0.3177, + "rejected_geometric_mean": -3.266690731048584, + "step": 3922 + }, + { + "chosen_geometric_mean": -1.0519698858261108, + "epoch": 0.97, + "grad_norm": 2.953125, + "learning_rate": 2.617746678920431e-06, + "log_odds": 4.93337869644165, + "log_odds_ratio": -0.29239797592163086, + "loss": 0.2637, + "rejected_geometric_mean": -5.752996921539307, + "step": 3923 + }, + { + "chosen_geometric_mean": -1.2315467596054077, + "epoch": 0.97, + "grad_norm": 17.75, + "learning_rate": 2.616774277156054e-06, + "log_odds": 2.492825508117676, + "log_odds_ratio": -0.32252347469329834, + "loss": 0.3439, + "rejected_geometric_mean": -3.531259775161743, + "step": 3924 + }, + { + "chosen_geometric_mean": -1.0144586563110352, + "epoch": 0.97, + "grad_norm": 2.328125, + "learning_rate": 2.615801857685894e-06, + "log_odds": 4.8044538497924805, + "log_odds_ratio": -0.2694213092327118, + "loss": 0.278, + "rejected_geometric_mean": -5.536789894104004, + "step": 3925 + }, + { + "chosen_geometric_mean": -1.071277379989624, + "epoch": 0.97, + "grad_norm": 2.828125, + "learning_rate": 2.6148294206573915e-06, + "log_odds": 2.2654454708099365, + "log_odds_ratio": -0.15360066294670105, + "loss": 0.2715, + "rejected_geometric_mean": -2.9971346855163574, + "step": 3926 + }, + { + "chosen_geometric_mean": -1.0309406518936157, + "epoch": 0.97, + "grad_norm": 3.609375, + "learning_rate": 2.613856966217993e-06, + "log_odds": 5.259388446807861, + "log_odds_ratio": -0.04955130070447922, + "loss": 0.3317, + "rejected_geometric_mean": -5.866730690002441, + "step": 3927 + }, + { + "chosen_geometric_mean": -1.2537015676498413, + "epoch": 0.97, + "grad_norm": 6.875, + "learning_rate": 2.6128844945151454e-06, + "log_odds": 1.2864172458648682, + "log_odds_ratio": -0.3671777844429016, + "loss": 0.2531, + "rejected_geometric_mean": -2.40129017829895, + "step": 3928 + }, + { + "chosen_geometric_mean": -0.968879222869873, + "epoch": 0.97, + "grad_norm": 10.3125, + "learning_rate": 2.6119120056962994e-06, + "log_odds": 4.278454780578613, + "log_odds_ratio": -0.16035671532154083, + "loss": 0.2953, + "rejected_geometric_mean": -4.8519463539123535, + "step": 3929 + }, + { + "chosen_geometric_mean": -0.9584509134292603, + "epoch": 0.97, + "grad_norm": 16.25, + "learning_rate": 2.6109394999089063e-06, + "log_odds": 5.467742919921875, + "log_odds_ratio": -0.05999675765633583, + "loss": 0.3162, + "rejected_geometric_mean": -5.954652309417725, + "step": 3930 + }, + { + "chosen_geometric_mean": -1.12981116771698, + "epoch": 0.97, + "grad_norm": 17.125, + "learning_rate": 2.6099669773004222e-06, + "log_odds": 4.904739856719971, + "log_odds_ratio": -0.29890549182891846, + "loss": 0.3119, + "rejected_geometric_mean": -5.718466758728027, + "step": 3931 + }, + { + "chosen_geometric_mean": -0.8141540884971619, + "epoch": 0.97, + "grad_norm": 2.0, + "learning_rate": 2.6089944380183047e-06, + "log_odds": 3.2114808559417725, + "log_odds_ratio": -0.18626634776592255, + "loss": 0.2877, + "rejected_geometric_mean": -3.6168949604034424, + "step": 3932 + }, + { + "chosen_geometric_mean": -0.9983469247817993, + "epoch": 0.97, + "grad_norm": 6.15625, + "learning_rate": 2.608021882210014e-06, + "log_odds": 4.872379779815674, + "log_odds_ratio": -0.044060930609703064, + "loss": 0.245, + "rejected_geometric_mean": -5.411769866943359, + "step": 3933 + }, + { + "chosen_geometric_mean": -1.1502916812896729, + "epoch": 0.97, + "grad_norm": 15.4375, + "learning_rate": 2.607049310023013e-06, + "log_odds": 6.224206924438477, + "log_odds_ratio": -0.157277449965477, + "loss": 0.2973, + "rejected_geometric_mean": -7.053030014038086, + "step": 3934 + }, + { + "chosen_geometric_mean": -0.8682446479797363, + "epoch": 0.97, + "grad_norm": 7.9375, + "learning_rate": 2.606076721604767e-06, + "log_odds": 2.3566393852233887, + "log_odds_ratio": -0.3128226399421692, + "loss": 0.2908, + "rejected_geometric_mean": -2.918219566345215, + "step": 3935 + }, + { + "chosen_geometric_mean": -1.325609564781189, + "epoch": 0.97, + "grad_norm": 25.25, + "learning_rate": 2.605104117102744e-06, + "log_odds": 7.5942206382751465, + "log_odds_ratio": -0.022822504863142967, + "loss": 0.3195, + "rejected_geometric_mean": -8.596845626831055, + "step": 3936 + }, + { + "chosen_geometric_mean": -1.1735728979110718, + "epoch": 0.97, + "grad_norm": 27.625, + "learning_rate": 2.6041314966644136e-06, + "log_odds": 4.226895809173584, + "log_odds_ratio": -0.3586789667606354, + "loss": 0.3083, + "rejected_geometric_mean": -5.203597068786621, + "step": 3937 + }, + { + "chosen_geometric_mean": -0.9098762273788452, + "epoch": 0.97, + "grad_norm": 6.65625, + "learning_rate": 2.603158860437249e-06, + "log_odds": 8.94052505493164, + "log_odds_ratio": -0.13437367975711823, + "loss": 0.2914, + "rejected_geometric_mean": -9.40888786315918, + "step": 3938 + }, + { + "chosen_geometric_mean": -1.0795601606369019, + "epoch": 0.98, + "grad_norm": 2.515625, + "learning_rate": 2.602186208568725e-06, + "log_odds": 0.5925424098968506, + "log_odds_ratio": -0.4619418978691101, + "loss": 0.2621, + "rejected_geometric_mean": -1.5192314386367798, + "step": 3939 + }, + { + "chosen_geometric_mean": -1.1858829259872437, + "epoch": 0.98, + "grad_norm": 4.3125, + "learning_rate": 2.6012135412063187e-06, + "log_odds": 2.813387155532837, + "log_odds_ratio": -0.07334104180335999, + "loss": 0.2941, + "rejected_geometric_mean": -3.655179977416992, + "step": 3940 + }, + { + "chosen_geometric_mean": -1.1294467449188232, + "epoch": 0.98, + "grad_norm": 4.09375, + "learning_rate": 2.600240858497511e-06, + "log_odds": 2.476653814315796, + "log_odds_ratio": -0.29118114709854126, + "loss": 0.2808, + "rejected_geometric_mean": -3.3713972568511963, + "step": 3941 + }, + { + "chosen_geometric_mean": -1.068627119064331, + "epoch": 0.98, + "grad_norm": 3.78125, + "learning_rate": 2.5992681605897814e-06, + "log_odds": 5.418651580810547, + "log_odds_ratio": -0.0403924360871315, + "loss": 0.4487, + "rejected_geometric_mean": -6.078269958496094, + "step": 3942 + }, + { + "chosen_geometric_mean": -0.9126531481742859, + "epoch": 0.98, + "grad_norm": 18.375, + "learning_rate": 2.5982954476306167e-06, + "log_odds": 1.569448471069336, + "log_odds_ratio": -0.3785703480243683, + "loss": 0.2823, + "rejected_geometric_mean": -2.2179484367370605, + "step": 3943 + }, + { + "chosen_geometric_mean": -1.2479203939437866, + "epoch": 0.98, + "grad_norm": 12.5625, + "learning_rate": 2.597322719767502e-06, + "log_odds": 1.4881781339645386, + "log_odds_ratio": -0.3261973261833191, + "loss": 0.3249, + "rejected_geometric_mean": -2.5553534030914307, + "step": 3944 + }, + { + "chosen_geometric_mean": -0.9269955158233643, + "epoch": 0.98, + "grad_norm": 7.78125, + "learning_rate": 2.596349977147928e-06, + "log_odds": 5.123684883117676, + "log_odds_ratio": -0.20148852467536926, + "loss": 0.2727, + "rejected_geometric_mean": -5.705318450927734, + "step": 3945 + }, + { + "chosen_geometric_mean": -1.0866082906723022, + "epoch": 0.98, + "grad_norm": 10.25, + "learning_rate": 2.5953772199193838e-06, + "log_odds": 4.995522499084473, + "log_odds_ratio": -0.08768489211797714, + "loss": 0.2643, + "rejected_geometric_mean": -5.7046966552734375, + "step": 3946 + }, + { + "chosen_geometric_mean": -0.9695860743522644, + "epoch": 0.98, + "grad_norm": 3.984375, + "learning_rate": 2.594404448229364e-06, + "log_odds": 6.696706295013428, + "log_odds_ratio": -0.1171209067106247, + "loss": 0.285, + "rejected_geometric_mean": -7.241469860076904, + "step": 3947 + }, + { + "chosen_geometric_mean": -1.2345452308654785, + "epoch": 0.98, + "grad_norm": 30.875, + "learning_rate": 2.5934316622253636e-06, + "log_odds": 3.49550199508667, + "log_odds_ratio": -0.28267282247543335, + "loss": 0.3466, + "rejected_geometric_mean": -4.463549613952637, + "step": 3948 + }, + { + "chosen_geometric_mean": -1.9981719255447388, + "epoch": 0.98, + "grad_norm": 12.75, + "learning_rate": 2.5924588620548803e-06, + "log_odds": 2.9731264114379883, + "log_odds_ratio": -0.19104142487049103, + "loss": 0.3466, + "rejected_geometric_mean": -4.759727478027344, + "step": 3949 + }, + { + "chosen_geometric_mean": -0.9314879775047302, + "epoch": 0.98, + "grad_norm": 4.0, + "learning_rate": 2.591486047865414e-06, + "log_odds": 2.0440452098846436, + "log_odds_ratio": -0.2428620308637619, + "loss": 0.3068, + "rejected_geometric_mean": -2.6040525436401367, + "step": 3950 + }, + { + "chosen_geometric_mean": -0.9389438629150391, + "epoch": 0.98, + "grad_norm": 2.078125, + "learning_rate": 2.5905132198044676e-06, + "log_odds": 3.110260248184204, + "log_odds_ratio": -0.19206492602825165, + "loss": 0.2438, + "rejected_geometric_mean": -3.679436445236206, + "step": 3951 + }, + { + "chosen_geometric_mean": -1.8268036842346191, + "epoch": 0.98, + "grad_norm": 13.5625, + "learning_rate": 2.5895403780195445e-06, + "log_odds": 4.251121997833252, + "log_odds_ratio": -0.1823897510766983, + "loss": 0.3043, + "rejected_geometric_mean": -5.893017768859863, + "step": 3952 + }, + { + "chosen_geometric_mean": -0.8219106793403625, + "epoch": 0.98, + "grad_norm": 2.5625, + "learning_rate": 2.588567522658149e-06, + "log_odds": 10.415410995483398, + "log_odds_ratio": -0.019516799598932266, + "loss": 0.2872, + "rejected_geometric_mean": -10.658897399902344, + "step": 3953 + }, + { + "chosen_geometric_mean": -0.922511100769043, + "epoch": 0.98, + "grad_norm": 10.875, + "learning_rate": 2.5875946538677933e-06, + "log_odds": 5.7405548095703125, + "log_odds_ratio": -0.017682364210486412, + "loss": 0.3386, + "rejected_geometric_mean": -6.144407749176025, + "step": 3954 + }, + { + "chosen_geometric_mean": -1.1463699340820312, + "epoch": 0.98, + "grad_norm": 14.5625, + "learning_rate": 2.5866217717959846e-06, + "log_odds": 5.351178169250488, + "log_odds_ratio": -0.15455001592636108, + "loss": 0.292, + "rejected_geometric_mean": -6.1867289543151855, + "step": 3955 + }, + { + "chosen_geometric_mean": -0.9565988779067993, + "epoch": 0.98, + "grad_norm": 1.953125, + "learning_rate": 2.5856488765902356e-06, + "log_odds": 1.7804893255233765, + "log_odds_ratio": -0.43637895584106445, + "loss": 0.2456, + "rejected_geometric_mean": -2.4725804328918457, + "step": 3956 + }, + { + "chosen_geometric_mean": -1.088597297668457, + "epoch": 0.98, + "grad_norm": 14.875, + "learning_rate": 2.584675968398061e-06, + "log_odds": 4.331789493560791, + "log_odds_ratio": -0.11829076707363129, + "loss": 0.2469, + "rejected_geometric_mean": -5.016384124755859, + "step": 3957 + }, + { + "chosen_geometric_mean": -1.0183058977127075, + "epoch": 0.98, + "grad_norm": 26.125, + "learning_rate": 2.583703047366978e-06, + "log_odds": 1.0671236515045166, + "log_odds_ratio": -0.37414923310279846, + "loss": 0.2809, + "rejected_geometric_mean": -1.8653572797775269, + "step": 3958 + }, + { + "chosen_geometric_mean": -1.244070291519165, + "epoch": 0.98, + "grad_norm": 5.34375, + "learning_rate": 2.582730113644503e-06, + "log_odds": 6.445953845977783, + "log_odds_ratio": -0.0788697600364685, + "loss": 0.2458, + "rejected_geometric_mean": -7.358046531677246, + "step": 3959 + }, + { + "chosen_geometric_mean": -1.16378915309906, + "epoch": 0.98, + "grad_norm": 28.5, + "learning_rate": 2.581757167378157e-06, + "log_odds": 5.156305313110352, + "log_odds_ratio": -0.05433641001582146, + "loss": 0.376, + "rejected_geometric_mean": -5.953372955322266, + "step": 3960 + }, + { + "chosen_geometric_mean": -1.162768006324768, + "epoch": 0.98, + "grad_norm": 6.15625, + "learning_rate": 2.5807842087154623e-06, + "log_odds": 4.040588855743408, + "log_odds_ratio": -0.36345356702804565, + "loss": 0.2763, + "rejected_geometric_mean": -5.017184734344482, + "step": 3961 + }, + { + "chosen_geometric_mean": -1.1926321983337402, + "epoch": 0.98, + "grad_norm": 9.75, + "learning_rate": 2.579811237803942e-06, + "log_odds": 6.239399433135986, + "log_odds_ratio": -0.23590987920761108, + "loss": 0.2693, + "rejected_geometric_mean": -7.1585283279418945, + "step": 3962 + }, + { + "chosen_geometric_mean": -1.0866793394088745, + "epoch": 0.98, + "grad_norm": 3.703125, + "learning_rate": 2.5788382547911223e-06, + "log_odds": 4.871023178100586, + "log_odds_ratio": -0.13336355984210968, + "loss": 0.2735, + "rejected_geometric_mean": -5.576329231262207, + "step": 3963 + }, + { + "chosen_geometric_mean": -1.0900624990463257, + "epoch": 0.98, + "grad_norm": 30.625, + "learning_rate": 2.5778652598245313e-06, + "log_odds": 4.822210311889648, + "log_odds_ratio": -0.2612898647785187, + "loss": 0.2849, + "rejected_geometric_mean": -5.605416774749756, + "step": 3964 + }, + { + "chosen_geometric_mean": -0.9443902373313904, + "epoch": 0.98, + "grad_norm": 4.96875, + "learning_rate": 2.576892253051697e-06, + "log_odds": 6.778277397155762, + "log_odds_ratio": -0.0033261473290622234, + "loss": 0.3476, + "rejected_geometric_mean": -7.2073774337768555, + "step": 3965 + }, + { + "chosen_geometric_mean": -0.9529456496238708, + "epoch": 0.98, + "grad_norm": 2.328125, + "learning_rate": 2.575919234620152e-06, + "log_odds": 7.539320945739746, + "log_odds_ratio": -0.07649051398038864, + "loss": 0.2863, + "rejected_geometric_mean": -8.01223373413086, + "step": 3966 + }, + { + "chosen_geometric_mean": -1.0124456882476807, + "epoch": 0.98, + "grad_norm": 14.125, + "learning_rate": 2.5749462046774286e-06, + "log_odds": 8.1987943649292, + "log_odds_ratio": -0.09896797686815262, + "loss": 0.3077, + "rejected_geometric_mean": -8.799060821533203, + "step": 3967 + }, + { + "chosen_geometric_mean": -1.2362140417099, + "epoch": 0.98, + "grad_norm": 5.84375, + "learning_rate": 2.573973163371062e-06, + "log_odds": 2.937976598739624, + "log_odds_ratio": -0.2290499359369278, + "loss": 0.3452, + "rejected_geometric_mean": -3.9362306594848633, + "step": 3968 + }, + { + "chosen_geometric_mean": -0.9451737403869629, + "epoch": 0.98, + "grad_norm": 3.28125, + "learning_rate": 2.5730001108485868e-06, + "log_odds": 2.7953996658325195, + "log_odds_ratio": -0.30025848746299744, + "loss": 0.2761, + "rejected_geometric_mean": -3.446150302886963, + "step": 3969 + }, + { + "chosen_geometric_mean": -1.1162505149841309, + "epoch": 0.98, + "grad_norm": 33.25, + "learning_rate": 2.572027047257544e-06, + "log_odds": 1.15478515625, + "log_odds_ratio": -0.38851362466812134, + "loss": 0.2953, + "rejected_geometric_mean": -2.062566041946411, + "step": 3970 + }, + { + "chosen_geometric_mean": -1.118201732635498, + "epoch": 0.98, + "grad_norm": 7.6875, + "learning_rate": 2.5710539727454713e-06, + "log_odds": 1.7663556337356567, + "log_odds_ratio": -0.32525449991226196, + "loss": 0.3038, + "rejected_geometric_mean": -2.6645753383636475, + "step": 3971 + }, + { + "chosen_geometric_mean": -1.1025978326797485, + "epoch": 0.98, + "grad_norm": 5.34375, + "learning_rate": 2.5700808874599105e-06, + "log_odds": 7.893601894378662, + "log_odds_ratio": -0.027757970616221428, + "loss": 0.2177, + "rejected_geometric_mean": -8.60289478302002, + "step": 3972 + }, + { + "chosen_geometric_mean": -0.9252667427062988, + "epoch": 0.98, + "grad_norm": 2.34375, + "learning_rate": 2.5691077915484052e-06, + "log_odds": 2.9152750968933105, + "log_odds_ratio": -0.21579864621162415, + "loss": 0.2891, + "rejected_geometric_mean": -3.4737253189086914, + "step": 3973 + }, + { + "chosen_geometric_mean": -1.2329249382019043, + "epoch": 0.98, + "grad_norm": 2.03125, + "learning_rate": 2.5681346851585006e-06, + "log_odds": 4.111944675445557, + "log_odds_ratio": -0.15674729645252228, + "loss": 0.3249, + "rejected_geometric_mean": -5.061855316162109, + "step": 3974 + }, + { + "chosen_geometric_mean": -1.0351468324661255, + "epoch": 0.98, + "grad_norm": 34.75, + "learning_rate": 2.5671615684377404e-06, + "log_odds": 4.086952209472656, + "log_odds_ratio": -0.2038688063621521, + "loss": 0.3055, + "rejected_geometric_mean": -4.799210548400879, + "step": 3975 + }, + { + "chosen_geometric_mean": -1.1860637664794922, + "epoch": 0.98, + "grad_norm": 7.125, + "learning_rate": 2.5661884415336756e-06, + "log_odds": 3.2204697132110596, + "log_odds_ratio": -0.23596639931201935, + "loss": 0.3021, + "rejected_geometric_mean": -4.071680068969727, + "step": 3976 + }, + { + "chosen_geometric_mean": -1.0508800745010376, + "epoch": 0.98, + "grad_norm": 28.5, + "learning_rate": 2.565215304593854e-06, + "log_odds": 0.14534279704093933, + "log_odds_ratio": -0.6387857794761658, + "loss": 0.3647, + "rejected_geometric_mean": -1.1539733409881592, + "step": 3977 + }, + { + "chosen_geometric_mean": -0.8888953924179077, + "epoch": 0.98, + "grad_norm": 4.90625, + "learning_rate": 2.5642421577658273e-06, + "log_odds": 4.272026538848877, + "log_odds_ratio": -0.03385339304804802, + "loss": 0.2567, + "rejected_geometric_mean": -4.645512580871582, + "step": 3978 + }, + { + "chosen_geometric_mean": -1.1845110654830933, + "epoch": 0.99, + "grad_norm": 21.125, + "learning_rate": 2.5632690011971467e-06, + "log_odds": 10.11064624786377, + "log_odds_ratio": -0.2777961790561676, + "loss": 0.3019, + "rejected_geometric_mean": -11.070404052734375, + "step": 3979 + }, + { + "chosen_geometric_mean": -1.1052567958831787, + "epoch": 0.99, + "grad_norm": 4.03125, + "learning_rate": 2.5622958350353673e-06, + "log_odds": 9.469999313354492, + "log_odds_ratio": -0.16814792156219482, + "loss": 0.3702, + "rejected_geometric_mean": -10.230086326599121, + "step": 3980 + }, + { + "chosen_geometric_mean": -1.202836513519287, + "epoch": 0.99, + "grad_norm": 9.0625, + "learning_rate": 2.561322659428043e-06, + "log_odds": 6.227272033691406, + "log_odds_ratio": -0.1400427222251892, + "loss": 0.3172, + "rejected_geometric_mean": -7.147899627685547, + "step": 3981 + }, + { + "chosen_geometric_mean": -1.246565341949463, + "epoch": 0.99, + "grad_norm": 10.0625, + "learning_rate": 2.5603494745227332e-06, + "log_odds": 4.827009677886963, + "log_odds_ratio": -0.40229225158691406, + "loss": 0.334, + "rejected_geometric_mean": -5.907498359680176, + "step": 3982 + }, + { + "chosen_geometric_mean": -0.969992995262146, + "epoch": 0.99, + "grad_norm": 5.78125, + "learning_rate": 2.5593762804669925e-06, + "log_odds": 2.6162772178649902, + "log_odds_ratio": -0.2268391102552414, + "loss": 0.3086, + "rejected_geometric_mean": -3.226113796234131, + "step": 3983 + }, + { + "chosen_geometric_mean": -0.9809112548828125, + "epoch": 0.99, + "grad_norm": 4.34375, + "learning_rate": 2.558403077408384e-06, + "log_odds": 5.2825446128845215, + "log_odds_ratio": -0.17448465526103973, + "loss": 0.2871, + "rejected_geometric_mean": -5.913507461547852, + "step": 3984 + }, + { + "chosen_geometric_mean": -1.0512583255767822, + "epoch": 0.99, + "grad_norm": 9.625, + "learning_rate": 2.557429865494467e-06, + "log_odds": 3.1175408363342285, + "log_odds_ratio": -0.4774656295776367, + "loss": 0.2791, + "rejected_geometric_mean": -4.0233540534973145, + "step": 3985 + }, + { + "chosen_geometric_mean": -1.1715587377548218, + "epoch": 0.99, + "grad_norm": 8.875, + "learning_rate": 2.5564566448728025e-06, + "log_odds": 9.576807975769043, + "log_odds_ratio": -0.0006831845967099071, + "loss": 0.2554, + "rejected_geometric_mean": -10.37389850616455, + "step": 3986 + }, + { + "chosen_geometric_mean": -1.222447156906128, + "epoch": 0.99, + "grad_norm": 7.84375, + "learning_rate": 2.555483415690957e-06, + "log_odds": 3.540102958679199, + "log_odds_ratio": -0.2957429885864258, + "loss": 0.2891, + "rejected_geometric_mean": -4.558824062347412, + "step": 3987 + }, + { + "chosen_geometric_mean": -1.9078632593154907, + "epoch": 0.99, + "grad_norm": 16.125, + "learning_rate": 2.5545101780964924e-06, + "log_odds": 10.544819831848145, + "log_odds_ratio": -0.032204438000917435, + "loss": 0.3776, + "rejected_geometric_mean": -12.17015552520752, + "step": 3988 + }, + { + "chosen_geometric_mean": -1.2875556945800781, + "epoch": 0.99, + "grad_norm": 51.5, + "learning_rate": 2.5535369322369773e-06, + "log_odds": 0.9636186957359314, + "log_odds_ratio": -0.43377411365509033, + "loss": 0.3336, + "rejected_geometric_mean": -2.11234974861145, + "step": 3989 + }, + { + "chosen_geometric_mean": -1.1053366661071777, + "epoch": 0.99, + "grad_norm": 3.453125, + "learning_rate": 2.5525636782599782e-06, + "log_odds": 5.614840507507324, + "log_odds_ratio": -0.010322139598429203, + "loss": 0.2823, + "rejected_geometric_mean": -6.296747207641602, + "step": 3990 + }, + { + "chosen_geometric_mean": -1.1655546426773071, + "epoch": 0.99, + "grad_norm": 25.25, + "learning_rate": 2.551590416313063e-06, + "log_odds": 6.410330295562744, + "log_odds_ratio": -0.159310981631279, + "loss": 0.3015, + "rejected_geometric_mean": -7.267911911010742, + "step": 3991 + }, + { + "chosen_geometric_mean": -0.8926420211791992, + "epoch": 0.99, + "grad_norm": 2.859375, + "learning_rate": 2.5506171465438035e-06, + "log_odds": 2.5399327278137207, + "log_odds_ratio": -0.3820105195045471, + "loss": 0.2691, + "rejected_geometric_mean": -3.2180330753326416, + "step": 3992 + }, + { + "chosen_geometric_mean": -1.0636974573135376, + "epoch": 0.99, + "grad_norm": 28.875, + "learning_rate": 2.549643869099769e-06, + "log_odds": 4.376119613647461, + "log_odds_ratio": -0.05413684621453285, + "loss": 0.2971, + "rejected_geometric_mean": -4.965243339538574, + "step": 3993 + }, + { + "chosen_geometric_mean": -1.2255865335464478, + "epoch": 0.99, + "grad_norm": 12.1875, + "learning_rate": 2.548670584128532e-06, + "log_odds": 3.600231170654297, + "log_odds_ratio": -0.16971348226070404, + "loss": 0.2916, + "rejected_geometric_mean": -4.514383316040039, + "step": 3994 + }, + { + "chosen_geometric_mean": -0.9349708557128906, + "epoch": 0.99, + "grad_norm": 5.5625, + "learning_rate": 2.5476972917776667e-06, + "log_odds": 2.2006168365478516, + "log_odds_ratio": -0.1972222626209259, + "loss": 0.2979, + "rejected_geometric_mean": -2.798774242401123, + "step": 3995 + }, + { + "chosen_geometric_mean": -0.9235665202140808, + "epoch": 0.99, + "grad_norm": 7.4375, + "learning_rate": 2.5467239921947468e-06, + "log_odds": 7.149328708648682, + "log_odds_ratio": -0.06708978861570358, + "loss": 0.2828, + "rejected_geometric_mean": -7.586345672607422, + "step": 3996 + }, + { + "chosen_geometric_mean": -0.9175522923469543, + "epoch": 0.99, + "grad_norm": 4.75, + "learning_rate": 2.545750685527348e-06, + "log_odds": 3.1232755184173584, + "log_odds_ratio": -0.2323487550020218, + "loss": 0.2867, + "rejected_geometric_mean": -3.7178852558135986, + "step": 3997 + }, + { + "chosen_geometric_mean": -1.3585363626480103, + "epoch": 0.99, + "grad_norm": 5.6875, + "learning_rate": 2.544777371923047e-06, + "log_odds": 2.6036806106567383, + "log_odds_ratio": -0.18331795930862427, + "loss": 0.3724, + "rejected_geometric_mean": -3.736956834793091, + "step": 3998 + }, + { + "chosen_geometric_mean": -1.1423225402832031, + "epoch": 0.99, + "grad_norm": 5.96875, + "learning_rate": 2.543804051529421e-06, + "log_odds": 4.794223308563232, + "log_odds_ratio": -0.1920863389968872, + "loss": 0.3188, + "rejected_geometric_mean": -5.637918949127197, + "step": 3999 + }, + { + "chosen_geometric_mean": -0.9459147453308105, + "epoch": 0.99, + "grad_norm": 37.5, + "learning_rate": 2.5428307244940507e-06, + "log_odds": 3.4662647247314453, + "log_odds_ratio": -0.09712301194667816, + "loss": 0.3253, + "rejected_geometric_mean": -3.9213554859161377, + "step": 4000 + }, + { + "chosen_geometric_mean": -1.0951117277145386, + "epoch": 0.99, + "grad_norm": 13.9375, + "learning_rate": 2.541857390964514e-06, + "log_odds": 2.0125904083251953, + "log_odds_ratio": -0.5651896595954895, + "loss": 0.3244, + "rejected_geometric_mean": -2.962463855743408, + "step": 4001 + }, + { + "chosen_geometric_mean": -0.9486945867538452, + "epoch": 0.99, + "grad_norm": 10.1875, + "learning_rate": 2.5408840510883912e-06, + "log_odds": 7.2248382568359375, + "log_odds_ratio": -0.15490323305130005, + "loss": 0.295, + "rejected_geometric_mean": -7.796125888824463, + "step": 4002 + }, + { + "chosen_geometric_mean": -0.9126287698745728, + "epoch": 0.99, + "grad_norm": 2.15625, + "learning_rate": 2.5399107050132656e-06, + "log_odds": 2.919161796569824, + "log_odds_ratio": -0.35603779554367065, + "loss": 0.2778, + "rejected_geometric_mean": -3.5255320072174072, + "step": 4003 + }, + { + "chosen_geometric_mean": -1.329326868057251, + "epoch": 0.99, + "grad_norm": 27.125, + "learning_rate": 2.5389373528867193e-06, + "log_odds": 0.7087961435317993, + "log_odds_ratio": -0.48101291060447693, + "loss": 0.3611, + "rejected_geometric_mean": -1.9313924312591553, + "step": 4004 + }, + { + "chosen_geometric_mean": -1.196496605873108, + "epoch": 0.99, + "grad_norm": 22.875, + "learning_rate": 2.537963994856335e-06, + "log_odds": 1.4280303716659546, + "log_odds_ratio": -0.2931724488735199, + "loss": 0.3659, + "rejected_geometric_mean": -2.3961501121520996, + "step": 4005 + }, + { + "chosen_geometric_mean": -1.0232468843460083, + "epoch": 0.99, + "grad_norm": 5.0625, + "learning_rate": 2.5369906310696986e-06, + "log_odds": 3.5209059715270996, + "log_odds_ratio": -0.21581412851810455, + "loss": 0.3114, + "rejected_geometric_mean": -4.241138458251953, + "step": 4006 + }, + { + "chosen_geometric_mean": -1.0999656915664673, + "epoch": 0.99, + "grad_norm": 7.59375, + "learning_rate": 2.536017261674395e-06, + "log_odds": 5.932910442352295, + "log_odds_ratio": -0.08655770868062973, + "loss": 0.3402, + "rejected_geometric_mean": -6.630213737487793, + "step": 4007 + }, + { + "chosen_geometric_mean": -1.0932915210723877, + "epoch": 0.99, + "grad_norm": 5.46875, + "learning_rate": 2.535043886818008e-06, + "log_odds": 2.9656546115875244, + "log_odds_ratio": -0.23316530883312225, + "loss": 0.3348, + "rejected_geometric_mean": -3.7778563499450684, + "step": 4008 + }, + { + "chosen_geometric_mean": -1.1134041547775269, + "epoch": 0.99, + "grad_norm": 8.375, + "learning_rate": 2.534070506648128e-06, + "log_odds": 1.6145977973937988, + "log_odds_ratio": -0.23718786239624023, + "loss": 0.3033, + "rejected_geometric_mean": -2.3968679904937744, + "step": 4009 + }, + { + "chosen_geometric_mean": -0.9368594288825989, + "epoch": 0.99, + "grad_norm": 5.5625, + "learning_rate": 2.5330971213123413e-06, + "log_odds": 5.934356689453125, + "log_odds_ratio": -0.18391084671020508, + "loss": 0.2448, + "rejected_geometric_mean": -6.538605213165283, + "step": 4010 + }, + { + "chosen_geometric_mean": -1.1377160549163818, + "epoch": 0.99, + "grad_norm": 3.09375, + "learning_rate": 2.5321237309582363e-06, + "log_odds": 1.282386064529419, + "log_odds_ratio": -0.43247124552726746, + "loss": 0.2536, + "rejected_geometric_mean": -2.31115984916687, + "step": 4011 + }, + { + "chosen_geometric_mean": -0.8225868344306946, + "epoch": 0.99, + "grad_norm": 25.625, + "learning_rate": 2.5311503357334026e-06, + "log_odds": 6.0801310539245605, + "log_odds_ratio": -0.19429723918437958, + "loss": 0.2969, + "rejected_geometric_mean": -6.464775085449219, + "step": 4012 + }, + { + "chosen_geometric_mean": -0.975569486618042, + "epoch": 0.99, + "grad_norm": 7.875, + "learning_rate": 2.5301769357854306e-06, + "log_odds": 4.430357456207275, + "log_odds_ratio": -0.029603954404592514, + "loss": 0.3004, + "rejected_geometric_mean": -4.918966293334961, + "step": 4013 + }, + { + "chosen_geometric_mean": -0.9588828086853027, + "epoch": 0.99, + "grad_norm": 2.734375, + "learning_rate": 2.5292035312619107e-06, + "log_odds": 3.4045190811157227, + "log_odds_ratio": -0.15773767232894897, + "loss": 0.2563, + "rejected_geometric_mean": -3.9016101360321045, + "step": 4014 + }, + { + "chosen_geometric_mean": -0.9955271482467651, + "epoch": 0.99, + "grad_norm": 2.1875, + "learning_rate": 2.5282301223104346e-06, + "log_odds": 2.5997226238250732, + "log_odds_ratio": -0.3049350082874298, + "loss": 0.296, + "rejected_geometric_mean": -3.4103293418884277, + "step": 4015 + }, + { + "chosen_geometric_mean": -1.1973742246627808, + "epoch": 0.99, + "grad_norm": 8.25, + "learning_rate": 2.527256709078594e-06, + "log_odds": 2.3516933917999268, + "log_odds_ratio": -0.21348775923252106, + "loss": 0.247, + "rejected_geometric_mean": -3.284360885620117, + "step": 4016 + }, + { + "chosen_geometric_mean": -1.1144204139709473, + "epoch": 0.99, + "grad_norm": 17.25, + "learning_rate": 2.5262832917139818e-06, + "log_odds": 6.9604811668396, + "log_odds_ratio": -0.0015762516995891929, + "loss": 0.2715, + "rejected_geometric_mean": -7.665332317352295, + "step": 4017 + }, + { + "chosen_geometric_mean": -1.063342809677124, + "epoch": 0.99, + "grad_norm": 2.765625, + "learning_rate": 2.525309870364192e-06, + "log_odds": 4.164438724517822, + "log_odds_ratio": -0.22975420951843262, + "loss": 0.2654, + "rejected_geometric_mean": -4.924239158630371, + "step": 4018 + }, + { + "chosen_geometric_mean": -1.3392380475997925, + "epoch": 1.0, + "grad_norm": 32.25, + "learning_rate": 2.524336445176818e-06, + "log_odds": 9.527138710021973, + "log_odds_ratio": -0.009917100891470909, + "loss": 0.3589, + "rejected_geometric_mean": -10.519416809082031, + "step": 4019 + }, + { + "chosen_geometric_mean": -1.0270427465438843, + "epoch": 1.0, + "grad_norm": 4.5625, + "learning_rate": 2.523363016299455e-06, + "log_odds": 5.764737129211426, + "log_odds_ratio": -0.26380184292793274, + "loss": 0.2838, + "rejected_geometric_mean": -6.473631858825684, + "step": 4020 + }, + { + "chosen_geometric_mean": -1.4325783252716064, + "epoch": 1.0, + "grad_norm": 19.0, + "learning_rate": 2.5223895838796973e-06, + "log_odds": 10.565759658813477, + "log_odds_ratio": -0.26573294401168823, + "loss": 0.2901, + "rejected_geometric_mean": -11.781721115112305, + "step": 4021 + }, + { + "chosen_geometric_mean": -1.0798994302749634, + "epoch": 1.0, + "grad_norm": 7.75, + "learning_rate": 2.521416148065142e-06, + "log_odds": 4.663435935974121, + "log_odds_ratio": -0.2145928144454956, + "loss": 0.3097, + "rejected_geometric_mean": -5.414175987243652, + "step": 4022 + }, + { + "chosen_geometric_mean": -1.1037187576293945, + "epoch": 1.0, + "grad_norm": 6.90625, + "learning_rate": 2.520442709003385e-06, + "log_odds": 1.7384237051010132, + "log_odds_ratio": -0.3458636999130249, + "loss": 0.2807, + "rejected_geometric_mean": -2.6434662342071533, + "step": 4023 + }, + { + "chosen_geometric_mean": -1.2224135398864746, + "epoch": 1.0, + "grad_norm": 4.1875, + "learning_rate": 2.5194692668420218e-06, + "log_odds": 2.142280340194702, + "log_odds_ratio": -0.15016454458236694, + "loss": 0.3067, + "rejected_geometric_mean": -3.0777220726013184, + "step": 4024 + }, + { + "chosen_geometric_mean": -1.2049522399902344, + "epoch": 1.0, + "grad_norm": 5.40625, + "learning_rate": 2.5184958217286504e-06, + "log_odds": 1.8369202613830566, + "log_odds_ratio": -0.23161569237709045, + "loss": 0.2874, + "rejected_geometric_mean": -2.778620719909668, + "step": 4025 + }, + { + "chosen_geometric_mean": -1.1050007343292236, + "epoch": 1.0, + "grad_norm": 10.375, + "learning_rate": 2.517522373810869e-06, + "log_odds": 7.620689392089844, + "log_odds_ratio": -0.006338824518024921, + "loss": 0.319, + "rejected_geometric_mean": -8.324268341064453, + "step": 4026 + }, + { + "chosen_geometric_mean": -0.7909507751464844, + "epoch": 1.0, + "grad_norm": 2.09375, + "learning_rate": 2.5165489232362744e-06, + "log_odds": 4.82157039642334, + "log_odds_ratio": -0.2740570902824402, + "loss": 0.269, + "rejected_geometric_mean": -5.265232086181641, + "step": 4027 + }, + { + "chosen_geometric_mean": -1.1789497137069702, + "epoch": 1.0, + "grad_norm": 10.9375, + "learning_rate": 2.5155754701524666e-06, + "log_odds": 5.121183395385742, + "log_odds_ratio": -0.1390816867351532, + "loss": 0.3445, + "rejected_geometric_mean": -5.9853596687316895, + "step": 4028 + }, + { + "chosen_geometric_mean": -0.9794621467590332, + "epoch": 1.0, + "grad_norm": 11.0, + "learning_rate": 2.5146020147070433e-06, + "log_odds": 3.6249632835388184, + "log_odds_ratio": -0.19023269414901733, + "loss": 0.3257, + "rejected_geometric_mean": -4.2330427169799805, + "step": 4029 + }, + { + "chosen_geometric_mean": -1.0096217393875122, + "epoch": 1.0, + "grad_norm": 13.375, + "learning_rate": 2.5136285570476038e-06, + "log_odds": 2.808413028717041, + "log_odds_ratio": -0.2536742389202118, + "loss": 0.2943, + "rejected_geometric_mean": -3.4999611377716064, + "step": 4030 + }, + { + "chosen_geometric_mean": -1.001123309135437, + "epoch": 1.0, + "grad_norm": 30.625, + "learning_rate": 2.5126550973217486e-06, + "log_odds": 6.523198127746582, + "log_odds_ratio": -0.04113246500492096, + "loss": 0.2899, + "rejected_geometric_mean": -7.076251029968262, + "step": 4031 + }, + { + "chosen_geometric_mean": -1.0586516857147217, + "epoch": 1.0, + "grad_norm": 34.0, + "learning_rate": 2.511681635677076e-06, + "log_odds": 5.491892337799072, + "log_odds_ratio": -0.25358712673187256, + "loss": 0.3484, + "rejected_geometric_mean": -6.278044700622559, + "step": 4032 + }, + { + "chosen_geometric_mean": -1.3024694919586182, + "epoch": 1.0, + "grad_norm": 3.453125, + "learning_rate": 2.5107081722611876e-06, + "log_odds": 1.9515897035598755, + "log_odds_ratio": -0.26498210430145264, + "loss": 0.2867, + "rejected_geometric_mean": -3.0346004962921143, + "step": 4033 + }, + { + "chosen_geometric_mean": -1.0352184772491455, + "epoch": 1.0, + "grad_norm": 18.25, + "learning_rate": 2.509734707221683e-06, + "log_odds": 5.149886131286621, + "log_odds_ratio": -0.009649693965911865, + "loss": 0.3062, + "rejected_geometric_mean": -5.742605209350586, + "step": 4034 + }, + { + "chosen_geometric_mean": -0.7991612553596497, + "epoch": 1.0, + "grad_norm": 3.296875, + "learning_rate": 2.508761240706163e-06, + "log_odds": 7.939626693725586, + "log_odds_ratio": -0.04752393439412117, + "loss": 0.2294, + "rejected_geometric_mean": -8.182832717895508, + "step": 4035 + }, + { + "chosen_geometric_mean": -0.9002494812011719, + "epoch": 1.0, + "grad_norm": 7.25, + "learning_rate": 2.507787772862229e-06, + "log_odds": 6.105609893798828, + "log_odds_ratio": -0.016174864023923874, + "loss": 0.3093, + "rejected_geometric_mean": -6.480843544006348, + "step": 4036 + }, + { + "chosen_geometric_mean": -1.1380372047424316, + "epoch": 1.0, + "grad_norm": 6.21875, + "learning_rate": 2.506814303837482e-06, + "log_odds": 1.5919723510742188, + "log_odds_ratio": -0.24012693762779236, + "loss": 0.3401, + "rejected_geometric_mean": -2.4204916954040527, + "step": 4037 + }, + { + "chosen_geometric_mean": -1.1800326108932495, + "epoch": 1.0, + "grad_norm": 3.71875, + "learning_rate": 2.505840833779522e-06, + "log_odds": 2.3279550075531006, + "log_odds_ratio": -0.2823469638824463, + "loss": 0.3029, + "rejected_geometric_mean": -3.2791967391967773, + "step": 4038 + }, + { + "chosen_geometric_mean": -1.047244668006897, + "epoch": 1.0, + "grad_norm": 16.375, + "learning_rate": 2.5048673628359516e-06, + "log_odds": 2.2551960945129395, + "log_odds_ratio": -0.30336278676986694, + "loss": 0.278, + "rejected_geometric_mean": -3.0530829429626465, + "step": 4039 + } + ], + "logging_steps": 1, + "max_steps": 8078, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 4039, + "total_flos": 1.1293146268726985e+19, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}