|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 43, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"debug/policy_chosen_logits": -1.5687581300735474, |
|
"debug/policy_chosen_logps": -240.2513427734375, |
|
"debug/policy_rejected_logits": -1.6221139430999756, |
|
"debug/policy_rejected_logps": -264.4752197265625, |
|
"debug/reference_chosen_logps": -240.2513427734375, |
|
"debug/reference_rejected_logps": -264.4752197265625, |
|
"epoch": 0.023255813953488372, |
|
"grad_norm": 14.314275545525218, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.5687581300735474, |
|
"logits/rejected": -1.6221139430999756, |
|
"logps/chosen": -240.2513427734375, |
|
"logps/rejected": -264.4752197265625, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.4775172472000122, |
|
"debug/policy_chosen_logps": -235.343994140625, |
|
"debug/policy_rejected_logits": -1.3492165803909302, |
|
"debug/policy_rejected_logps": -283.1033935546875, |
|
"debug/reference_chosen_logps": -234.93467712402344, |
|
"debug/reference_rejected_logps": -283.2170104980469, |
|
"epoch": 0.046511627906976744, |
|
"grad_norm": 18.67922014806989, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.4775172472000122, |
|
"logits/rejected": -1.3492165803909302, |
|
"logps/chosen": -235.343994140625, |
|
"logps/rejected": -283.1033935546875, |
|
"loss": 0.4959, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.004093170166015625, |
|
"rewards/margins": -0.005229205824434757, |
|
"rewards/rejected": 0.0011360361240804195, |
|
"step": 2 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.6865235567092896, |
|
"debug/policy_chosen_logps": -230.66635131835938, |
|
"debug/policy_rejected_logits": -1.6258912086486816, |
|
"debug/policy_rejected_logps": -228.0758514404297, |
|
"debug/reference_chosen_logps": -225.64306640625, |
|
"debug/reference_rejected_logps": -223.4805908203125, |
|
"epoch": 0.06976744186046512, |
|
"grad_norm": 25.946369831783773, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.6865235567092896, |
|
"logits/rejected": -1.6258912086486816, |
|
"logps/chosen": -230.66635131835938, |
|
"logps/rejected": -228.0758514404297, |
|
"loss": 0.5069, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.050232600420713425, |
|
"rewards/margins": -0.004280166234821081, |
|
"rewards/rejected": -0.04595243185758591, |
|
"step": 3 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.6345511674880981, |
|
"debug/policy_chosen_logps": -230.35598754882812, |
|
"debug/policy_rejected_logits": -1.594412088394165, |
|
"debug/policy_rejected_logps": -235.36544799804688, |
|
"debug/reference_chosen_logps": -227.8475799560547, |
|
"debug/reference_rejected_logps": -230.77169799804688, |
|
"epoch": 0.09302325581395349, |
|
"grad_norm": 12.562991726069878, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.6345511674880981, |
|
"logits/rejected": -1.594412088394165, |
|
"logps/chosen": -230.35598754882812, |
|
"logps/rejected": -235.36544799804688, |
|
"loss": 0.4907, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.025084247812628746, |
|
"rewards/margins": 0.02085309848189354, |
|
"rewards/rejected": -0.045937344431877136, |
|
"step": 4 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.5187644958496094, |
|
"debug/policy_chosen_logps": -209.38815307617188, |
|
"debug/policy_rejected_logits": -1.5565170049667358, |
|
"debug/policy_rejected_logps": -261.0048522949219, |
|
"debug/reference_chosen_logps": -204.9683837890625, |
|
"debug/reference_rejected_logps": -256.2153015136719, |
|
"epoch": 0.11627906976744186, |
|
"grad_norm": 42.7709320228073, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.5187644958496094, |
|
"logits/rejected": -1.5565170049667358, |
|
"logps/chosen": -209.38815307617188, |
|
"logps/rejected": -261.0048522949219, |
|
"loss": 0.5197, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.04419763758778572, |
|
"rewards/margins": 0.0036978721618652344, |
|
"rewards/rejected": -0.04789550602436066, |
|
"step": 5 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.654346227645874, |
|
"debug/policy_chosen_logps": -208.22152709960938, |
|
"debug/policy_rejected_logits": -1.472536325454712, |
|
"debug/policy_rejected_logps": -277.9122314453125, |
|
"debug/reference_chosen_logps": -208.6928253173828, |
|
"debug/reference_rejected_logps": -277.05023193359375, |
|
"epoch": 0.13953488372093023, |
|
"grad_norm": 18.148530267479675, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.654346227645874, |
|
"logits/rejected": -1.472536325454712, |
|
"logps/chosen": -208.22152709960938, |
|
"logps/rejected": -277.9122314453125, |
|
"loss": 0.507, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0047130584716796875, |
|
"rewards/margins": 0.013332920148968697, |
|
"rewards/rejected": -0.008619861677289009, |
|
"step": 6 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.619295597076416, |
|
"debug/policy_chosen_logps": -240.17440795898438, |
|
"debug/policy_rejected_logits": -1.5930582284927368, |
|
"debug/policy_rejected_logps": -303.7572326660156, |
|
"debug/reference_chosen_logps": -240.71119689941406, |
|
"debug/reference_rejected_logps": -304.2488708496094, |
|
"epoch": 0.16279069767441862, |
|
"grad_norm": 18.12592422181744, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.619295597076416, |
|
"logits/rejected": -1.5930582284927368, |
|
"logps/chosen": -240.17440795898438, |
|
"logps/rejected": -303.7572326660156, |
|
"loss": 0.5014, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.005367736332118511, |
|
"rewards/margins": 0.0004512788727879524, |
|
"rewards/rejected": 0.004916457924991846, |
|
"step": 7 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.5426918268203735, |
|
"debug/policy_chosen_logps": -242.49334716796875, |
|
"debug/policy_rejected_logits": -1.515419363975525, |
|
"debug/policy_rejected_logps": -246.33676147460938, |
|
"debug/reference_chosen_logps": -245.80419921875, |
|
"debug/reference_rejected_logps": -248.84983825683594, |
|
"epoch": 0.18604651162790697, |
|
"grad_norm": 37.16270378133235, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.5426918268203735, |
|
"logits/rejected": -1.515419363975525, |
|
"logps/chosen": -242.49334716796875, |
|
"logps/rejected": -246.33676147460938, |
|
"loss": 0.4963, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.03310825303196907, |
|
"rewards/margins": 0.007977409288287163, |
|
"rewards/rejected": 0.02513084188103676, |
|
"step": 8 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.5298963785171509, |
|
"debug/policy_chosen_logps": -225.92041015625, |
|
"debug/policy_rejected_logits": -1.4147241115570068, |
|
"debug/policy_rejected_logps": -270.5355224609375, |
|
"debug/reference_chosen_logps": -228.79443359375, |
|
"debug/reference_rejected_logps": -272.68603515625, |
|
"epoch": 0.20930232558139536, |
|
"grad_norm": 18.861065558487233, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.5298963785171509, |
|
"logits/rejected": -1.4147241115570068, |
|
"logps/chosen": -225.92041015625, |
|
"logps/rejected": -270.5355224609375, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0287402905523777, |
|
"rewards/margins": 0.007235164754092693, |
|
"rewards/rejected": 0.021505124866962433, |
|
"step": 9 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.5066547393798828, |
|
"debug/policy_chosen_logps": -216.030517578125, |
|
"debug/policy_rejected_logits": -1.4625401496887207, |
|
"debug/policy_rejected_logps": -217.58367919921875, |
|
"debug/reference_chosen_logps": -219.08502197265625, |
|
"debug/reference_rejected_logps": -219.8885955810547, |
|
"epoch": 0.23255813953488372, |
|
"grad_norm": 12.691840821738246, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.5066547393798828, |
|
"logits/rejected": -1.4625401496887207, |
|
"logps/chosen": -216.030517578125, |
|
"logps/rejected": -217.58367919921875, |
|
"loss": 0.4982, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.030545100569725037, |
|
"rewards/margins": 0.007496070582419634, |
|
"rewards/rejected": 0.02304903045296669, |
|
"step": 10 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.6109825372695923, |
|
"debug/policy_chosen_logps": -194.70681762695312, |
|
"debug/policy_rejected_logits": -1.5127055644989014, |
|
"debug/policy_rejected_logps": -261.20880126953125, |
|
"debug/reference_chosen_logps": -200.0032958984375, |
|
"debug/reference_rejected_logps": -264.6978454589844, |
|
"epoch": 0.2558139534883721, |
|
"grad_norm": 34.88211840288691, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.6109825372695923, |
|
"logits/rejected": -1.5127055644989014, |
|
"logps/chosen": -194.70681762695312, |
|
"logps/rejected": -261.20880126953125, |
|
"loss": 0.5055, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.05296493321657181, |
|
"rewards/margins": 0.018074415624141693, |
|
"rewards/rejected": 0.034890517592430115, |
|
"step": 11 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.644713282585144, |
|
"debug/policy_chosen_logps": -242.20831298828125, |
|
"debug/policy_rejected_logits": -1.6428948640823364, |
|
"debug/policy_rejected_logps": -256.0648498535156, |
|
"debug/reference_chosen_logps": -245.20326232910156, |
|
"debug/reference_rejected_logps": -257.87481689453125, |
|
"epoch": 0.27906976744186046, |
|
"grad_norm": 24.62141741438646, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.644713282585144, |
|
"logits/rejected": -1.6428948640823364, |
|
"logps/chosen": -242.20831298828125, |
|
"logps/rejected": -256.0648498535156, |
|
"loss": 0.507, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.029949625954031944, |
|
"rewards/margins": 0.0118501465767622, |
|
"rewards/rejected": 0.018099479377269745, |
|
"step": 12 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.6160894632339478, |
|
"debug/policy_chosen_logps": -227.63302612304688, |
|
"debug/policy_rejected_logits": -1.6384341716766357, |
|
"debug/policy_rejected_logps": -264.2388916015625, |
|
"debug/reference_chosen_logps": -230.62490844726562, |
|
"debug/reference_rejected_logps": -264.9801025390625, |
|
"epoch": 0.3023255813953488, |
|
"grad_norm": 10.776881537717472, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.6160894632339478, |
|
"logits/rejected": -1.6384341716766357, |
|
"logps/chosen": -227.63302612304688, |
|
"logps/rejected": -264.2388916015625, |
|
"loss": 0.4976, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.029918955639004707, |
|
"rewards/margins": 0.022506674751639366, |
|
"rewards/rejected": 0.007412281818687916, |
|
"step": 13 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.6279401779174805, |
|
"debug/policy_chosen_logps": -220.282958984375, |
|
"debug/policy_rejected_logits": -1.4893845319747925, |
|
"debug/policy_rejected_logps": -272.10931396484375, |
|
"debug/reference_chosen_logps": -222.31028747558594, |
|
"debug/reference_rejected_logps": -272.71044921875, |
|
"epoch": 0.32558139534883723, |
|
"grad_norm": 10.036756062227226, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.6279401779174805, |
|
"logits/rejected": -1.4893845319747925, |
|
"logps/chosen": -220.282958984375, |
|
"logps/rejected": -272.10931396484375, |
|
"loss": 0.4935, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.020273476839065552, |
|
"rewards/margins": 0.014262351207435131, |
|
"rewards/rejected": 0.006011123303323984, |
|
"step": 14 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.5787980556488037, |
|
"debug/policy_chosen_logps": -261.0044250488281, |
|
"debug/policy_rejected_logits": -1.3867720365524292, |
|
"debug/policy_rejected_logps": -301.5718994140625, |
|
"debug/reference_chosen_logps": -258.84735107421875, |
|
"debug/reference_rejected_logps": -297.58404541015625, |
|
"epoch": 0.3488372093023256, |
|
"grad_norm": 10.65558553278192, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.5787980556488037, |
|
"logits/rejected": -1.3867720365524292, |
|
"logps/chosen": -261.0044250488281, |
|
"logps/rejected": -301.5718994140625, |
|
"loss": 0.4873, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.021570798009634018, |
|
"rewards/margins": 0.018307799473404884, |
|
"rewards/rejected": -0.03987859562039375, |
|
"step": 15 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.5956577062606812, |
|
"debug/policy_chosen_logps": -222.5416259765625, |
|
"debug/policy_rejected_logits": -1.4434815645217896, |
|
"debug/policy_rejected_logps": -279.348388671875, |
|
"debug/reference_chosen_logps": -221.23260498046875, |
|
"debug/reference_rejected_logps": -273.97540283203125, |
|
"epoch": 0.37209302325581395, |
|
"grad_norm": 36.49853384207237, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.5956577062606812, |
|
"logits/rejected": -1.4434815645217896, |
|
"logps/chosen": -222.5416259765625, |
|
"logps/rejected": -279.348388671875, |
|
"loss": 0.5003, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.013090074062347412, |
|
"rewards/margins": 0.040639691054821014, |
|
"rewards/rejected": -0.053729765117168427, |
|
"step": 16 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.4420965909957886, |
|
"debug/policy_chosen_logps": -215.4423828125, |
|
"debug/policy_rejected_logits": -1.5232738256454468, |
|
"debug/policy_rejected_logps": -288.1341552734375, |
|
"debug/reference_chosen_logps": -213.68832397460938, |
|
"debug/reference_rejected_logps": -286.45086669921875, |
|
"epoch": 0.3953488372093023, |
|
"grad_norm": 24.203474268576745, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.4420965909957886, |
|
"logits/rejected": -1.5232738256454468, |
|
"logps/chosen": -215.4423828125, |
|
"logps/rejected": -288.1341552734375, |
|
"loss": 0.4918, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.01754041761159897, |
|
"rewards/margins": -0.0007077232003211975, |
|
"rewards/rejected": -0.01683269441127777, |
|
"step": 17 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.4279348850250244, |
|
"debug/policy_chosen_logps": -242.89749145507812, |
|
"debug/policy_rejected_logits": -1.3261935710906982, |
|
"debug/policy_rejected_logps": -230.28863525390625, |
|
"debug/reference_chosen_logps": -240.6783447265625, |
|
"debug/reference_rejected_logps": -226.51815795898438, |
|
"epoch": 0.4186046511627907, |
|
"grad_norm": 44.37824041962939, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.4279348850250244, |
|
"logits/rejected": -1.3261935710906982, |
|
"logps/chosen": -242.89749145507812, |
|
"logps/rejected": -230.28863525390625, |
|
"loss": 0.5044, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.022191638126969337, |
|
"rewards/margins": 0.01551321055740118, |
|
"rewards/rejected": -0.03770485147833824, |
|
"step": 18 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.58405339717865, |
|
"debug/policy_chosen_logps": -236.5458984375, |
|
"debug/policy_rejected_logits": -1.5539088249206543, |
|
"debug/policy_rejected_logps": -233.43719482421875, |
|
"debug/reference_chosen_logps": -236.3321533203125, |
|
"debug/reference_rejected_logps": -231.97726440429688, |
|
"epoch": 0.4418604651162791, |
|
"grad_norm": 12.540633658003186, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.58405339717865, |
|
"logits/rejected": -1.5539088249206543, |
|
"logps/chosen": -236.5458984375, |
|
"logps/rejected": -233.43719482421875, |
|
"loss": 0.5059, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.002137584611773491, |
|
"rewards/margins": 0.012461718171834946, |
|
"rewards/rejected": -0.014599304646253586, |
|
"step": 19 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.466562032699585, |
|
"debug/policy_chosen_logps": -194.52447509765625, |
|
"debug/policy_rejected_logits": -1.4163392782211304, |
|
"debug/policy_rejected_logps": -237.53216552734375, |
|
"debug/reference_chosen_logps": -194.98049926757812, |
|
"debug/reference_rejected_logps": -237.75314331054688, |
|
"epoch": 0.46511627906976744, |
|
"grad_norm": 14.133924214011163, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.466562032699585, |
|
"logits/rejected": -1.4163392782211304, |
|
"logps/chosen": -194.52447509765625, |
|
"logps/rejected": -237.53216552734375, |
|
"loss": 0.4895, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.004560394212603569, |
|
"rewards/margins": 0.00235048308968544, |
|
"rewards/rejected": 0.0022099113557487726, |
|
"step": 20 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.5900827646255493, |
|
"debug/policy_chosen_logps": -212.4789276123047, |
|
"debug/policy_rejected_logits": -1.4683177471160889, |
|
"debug/policy_rejected_logps": -257.5311279296875, |
|
"debug/reference_chosen_logps": -211.9457550048828, |
|
"debug/reference_rejected_logps": -254.302490234375, |
|
"epoch": 0.4883720930232558, |
|
"grad_norm": 17.882111129846162, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.5900827646255493, |
|
"logits/rejected": -1.4683177471160889, |
|
"logps/chosen": -212.4789276123047, |
|
"logps/rejected": -257.5311279296875, |
|
"loss": 0.4799, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.00533168762922287, |
|
"rewards/margins": 0.026954688131809235, |
|
"rewards/rejected": -0.032286375761032104, |
|
"step": 21 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.4656310081481934, |
|
"debug/policy_chosen_logps": -263.0152282714844, |
|
"debug/policy_rejected_logits": -1.3645009994506836, |
|
"debug/policy_rejected_logps": -283.24566650390625, |
|
"debug/reference_chosen_logps": -262.25970458984375, |
|
"debug/reference_rejected_logps": -280.8420715332031, |
|
"epoch": 0.5116279069767442, |
|
"grad_norm": 17.255770475731207, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.4656310081481934, |
|
"logits/rejected": -1.3645009994506836, |
|
"logps/chosen": -263.0152282714844, |
|
"logps/rejected": -283.24566650390625, |
|
"loss": 0.479, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.007554950192570686, |
|
"rewards/margins": 0.01648113504052162, |
|
"rewards/rejected": -0.02403608150780201, |
|
"step": 22 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.4521846771240234, |
|
"debug/policy_chosen_logps": -216.39169311523438, |
|
"debug/policy_rejected_logits": -1.3643300533294678, |
|
"debug/policy_rejected_logps": -281.2818603515625, |
|
"debug/reference_chosen_logps": -223.86587524414062, |
|
"debug/reference_rejected_logps": -278.6108703613281, |
|
"epoch": 0.5348837209302325, |
|
"grad_norm": 22.42065485842657, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.4521846771240234, |
|
"logits/rejected": -1.3643300533294678, |
|
"logps/chosen": -216.39169311523438, |
|
"logps/rejected": -281.2818603515625, |
|
"loss": 0.4761, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.07474187761545181, |
|
"rewards/margins": 0.10145200788974762, |
|
"rewards/rejected": -0.02671012654900551, |
|
"step": 23 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.596596121788025, |
|
"debug/policy_chosen_logps": -240.6717987060547, |
|
"debug/policy_rejected_logits": -1.5593181848526, |
|
"debug/policy_rejected_logps": -339.8119812011719, |
|
"debug/reference_chosen_logps": -242.0146484375, |
|
"debug/reference_rejected_logps": -335.9112548828125, |
|
"epoch": 0.5581395348837209, |
|
"grad_norm": 15.69048628226096, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.596596121788025, |
|
"logits/rejected": -1.5593181848526, |
|
"logps/chosen": -240.6717987060547, |
|
"logps/rejected": -339.8119812011719, |
|
"loss": 0.4765, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.013428498059511185, |
|
"rewards/margins": 0.05243583396077156, |
|
"rewards/rejected": -0.039007339626550674, |
|
"step": 24 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.5627775192260742, |
|
"debug/policy_chosen_logps": -243.03045654296875, |
|
"debug/policy_rejected_logits": -1.5146582126617432, |
|
"debug/policy_rejected_logps": -339.4427490234375, |
|
"debug/reference_chosen_logps": -244.926513671875, |
|
"debug/reference_rejected_logps": -337.1531982421875, |
|
"epoch": 0.5813953488372093, |
|
"grad_norm": 23.599376538578916, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.5627775192260742, |
|
"logits/rejected": -1.5146582126617432, |
|
"logps/chosen": -243.03045654296875, |
|
"logps/rejected": -339.4427490234375, |
|
"loss": 0.4976, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.018960533663630486, |
|
"rewards/margins": 0.04185573384165764, |
|
"rewards/rejected": -0.02289520390331745, |
|
"step": 25 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.4482909440994263, |
|
"debug/policy_chosen_logps": -243.66656494140625, |
|
"debug/policy_rejected_logits": -1.3864490985870361, |
|
"debug/policy_rejected_logps": -269.7567138671875, |
|
"debug/reference_chosen_logps": -243.5550079345703, |
|
"debug/reference_rejected_logps": -265.46270751953125, |
|
"epoch": 0.6046511627906976, |
|
"grad_norm": 15.856946618319062, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.4482909440994263, |
|
"logits/rejected": -1.3864490985870361, |
|
"logps/chosen": -243.66656494140625, |
|
"logps/rejected": -269.7567138671875, |
|
"loss": 0.4718, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.001115493942052126, |
|
"rewards/margins": 0.04182462394237518, |
|
"rewards/rejected": -0.04294012114405632, |
|
"step": 26 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.6767849922180176, |
|
"debug/policy_chosen_logps": -235.87283325195312, |
|
"debug/policy_rejected_logits": -1.4402155876159668, |
|
"debug/policy_rejected_logps": -255.25155639648438, |
|
"debug/reference_chosen_logps": -234.12271118164062, |
|
"debug/reference_rejected_logps": -252.92681884765625, |
|
"epoch": 0.627906976744186, |
|
"grad_norm": 26.8171661009806, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.6767849922180176, |
|
"logits/rejected": -1.4402155876159668, |
|
"logps/chosen": -235.87283325195312, |
|
"logps/rejected": -255.25155639648438, |
|
"loss": 0.4911, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.017501164227724075, |
|
"rewards/margins": 0.005746154114603996, |
|
"rewards/rejected": -0.023247316479682922, |
|
"step": 27 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.474959135055542, |
|
"debug/policy_chosen_logps": -250.03805541992188, |
|
"debug/policy_rejected_logits": -1.33174729347229, |
|
"debug/policy_rejected_logps": -275.7778015136719, |
|
"debug/reference_chosen_logps": -246.68450927734375, |
|
"debug/reference_rejected_logps": -271.7251892089844, |
|
"epoch": 0.6511627906976745, |
|
"grad_norm": 23.009106257244806, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.474959135055542, |
|
"logits/rejected": -1.33174729347229, |
|
"logps/chosen": -250.03805541992188, |
|
"logps/rejected": -275.7778015136719, |
|
"loss": 0.4741, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.03353559225797653, |
|
"rewards/margins": 0.006990719586610794, |
|
"rewards/rejected": -0.040526311844587326, |
|
"step": 28 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.5180094242095947, |
|
"debug/policy_chosen_logps": -239.39962768554688, |
|
"debug/policy_rejected_logits": -1.3486112356185913, |
|
"debug/policy_rejected_logps": -280.152587890625, |
|
"debug/reference_chosen_logps": -235.7870635986328, |
|
"debug/reference_rejected_logps": -271.69512939453125, |
|
"epoch": 0.6744186046511628, |
|
"grad_norm": 29.559799359310976, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.5180094242095947, |
|
"logits/rejected": -1.3486112356185913, |
|
"logps/chosen": -239.39962768554688, |
|
"logps/rejected": -280.152587890625, |
|
"loss": 0.4648, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.03612573444843292, |
|
"rewards/margins": 0.04844905436038971, |
|
"rewards/rejected": -0.08457479625940323, |
|
"step": 29 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.5323574542999268, |
|
"debug/policy_chosen_logps": -230.34732055664062, |
|
"debug/policy_rejected_logits": -1.4196269512176514, |
|
"debug/policy_rejected_logps": -299.549072265625, |
|
"debug/reference_chosen_logps": -227.33663940429688, |
|
"debug/reference_rejected_logps": -290.41229248046875, |
|
"epoch": 0.6976744186046512, |
|
"grad_norm": 24.104606390659608, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.5323574542999268, |
|
"logits/rejected": -1.4196269512176514, |
|
"logps/chosen": -230.34732055664062, |
|
"logps/rejected": -299.549072265625, |
|
"loss": 0.4845, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.03010694310069084, |
|
"rewards/margins": 0.0612606406211853, |
|
"rewards/rejected": -0.09136758744716644, |
|
"step": 30 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.5471208095550537, |
|
"debug/policy_chosen_logps": -233.48435974121094, |
|
"debug/policy_rejected_logits": -1.507702350616455, |
|
"debug/policy_rejected_logps": -299.49298095703125, |
|
"debug/reference_chosen_logps": -230.37808227539062, |
|
"debug/reference_rejected_logps": -290.85491943359375, |
|
"epoch": 0.7209302325581395, |
|
"grad_norm": 11.501362477806966, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.5471208095550537, |
|
"logits/rejected": -1.507702350616455, |
|
"logps/chosen": -233.48435974121094, |
|
"logps/rejected": -299.49298095703125, |
|
"loss": 0.4777, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.031062887981534004, |
|
"rewards/margins": 0.05531751364469528, |
|
"rewards/rejected": -0.08638040721416473, |
|
"step": 31 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.3573694229125977, |
|
"debug/policy_chosen_logps": -276.0484619140625, |
|
"debug/policy_rejected_logits": -1.4295967817306519, |
|
"debug/policy_rejected_logps": -251.19253540039062, |
|
"debug/reference_chosen_logps": -269.07147216796875, |
|
"debug/reference_rejected_logps": -245.41119384765625, |
|
"epoch": 0.7441860465116279, |
|
"grad_norm": 44.933121351184646, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.3573694229125977, |
|
"logits/rejected": -1.4295967817306519, |
|
"logps/chosen": -276.0484619140625, |
|
"logps/rejected": -251.19253540039062, |
|
"loss": 0.4882, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.06977000832557678, |
|
"rewards/margins": -0.011956671252846718, |
|
"rewards/rejected": -0.057813338935375214, |
|
"step": 32 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.532698392868042, |
|
"debug/policy_chosen_logps": -210.01528930664062, |
|
"debug/policy_rejected_logits": -1.5289320945739746, |
|
"debug/policy_rejected_logps": -266.588134765625, |
|
"debug/reference_chosen_logps": -212.3720245361328, |
|
"debug/reference_rejected_logps": -261.5830993652344, |
|
"epoch": 0.7674418604651163, |
|
"grad_norm": 16.92613442301389, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.532698392868042, |
|
"logits/rejected": -1.5289320945739746, |
|
"logps/chosen": -210.01528930664062, |
|
"logps/rejected": -266.588134765625, |
|
"loss": 0.4701, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.023567447438836098, |
|
"rewards/margins": 0.0736178606748581, |
|
"rewards/rejected": -0.050050411373376846, |
|
"step": 33 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.6075690984725952, |
|
"debug/policy_chosen_logps": -222.28057861328125, |
|
"debug/policy_rejected_logits": -1.5301072597503662, |
|
"debug/policy_rejected_logps": -231.0042724609375, |
|
"debug/reference_chosen_logps": -221.35081481933594, |
|
"debug/reference_rejected_logps": -232.4139404296875, |
|
"epoch": 0.7906976744186046, |
|
"grad_norm": 30.286832542440518, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.6075690984725952, |
|
"logits/rejected": -1.5301072597503662, |
|
"logps/chosen": -222.28057861328125, |
|
"logps/rejected": -231.0042724609375, |
|
"loss": 0.4957, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.009297618642449379, |
|
"rewards/margins": -0.023394297808408737, |
|
"rewards/rejected": 0.014096679165959358, |
|
"step": 34 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.6365246772766113, |
|
"debug/policy_chosen_logps": -214.15689086914062, |
|
"debug/policy_rejected_logits": -1.3568267822265625, |
|
"debug/policy_rejected_logps": -274.84320068359375, |
|
"debug/reference_chosen_logps": -219.61041259765625, |
|
"debug/reference_rejected_logps": -271.06793212890625, |
|
"epoch": 0.813953488372093, |
|
"grad_norm": 10.413515346816709, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.6365246772766113, |
|
"logits/rejected": -1.3568267822265625, |
|
"logps/chosen": -214.15689086914062, |
|
"logps/rejected": -274.84320068359375, |
|
"loss": 0.4745, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.05453508347272873, |
|
"rewards/margins": 0.09228822588920593, |
|
"rewards/rejected": -0.0377531424164772, |
|
"step": 35 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.4882985353469849, |
|
"debug/policy_chosen_logps": -224.943115234375, |
|
"debug/policy_rejected_logits": -1.5247392654418945, |
|
"debug/policy_rejected_logps": -291.1658020019531, |
|
"debug/reference_chosen_logps": -230.88302612304688, |
|
"debug/reference_rejected_logps": -292.3128967285156, |
|
"epoch": 0.8372093023255814, |
|
"grad_norm": 21.658575078370163, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.4882985353469849, |
|
"logits/rejected": -1.5247392654418945, |
|
"logps/chosen": -224.943115234375, |
|
"logps/rejected": -291.1658020019531, |
|
"loss": 0.4603, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.059399355202913284, |
|
"rewards/margins": 0.04792825132608414, |
|
"rewards/rejected": 0.011471100151538849, |
|
"step": 36 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.5648789405822754, |
|
"debug/policy_chosen_logps": -232.97958374023438, |
|
"debug/policy_rejected_logits": -1.4898722171783447, |
|
"debug/policy_rejected_logps": -264.453369140625, |
|
"debug/reference_chosen_logps": -236.86109924316406, |
|
"debug/reference_rejected_logps": -266.53448486328125, |
|
"epoch": 0.8604651162790697, |
|
"grad_norm": 13.183542989418173, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.5648789405822754, |
|
"logits/rejected": -1.4898722171783447, |
|
"logps/chosen": -232.97958374023438, |
|
"logps/rejected": -264.453369140625, |
|
"loss": 0.4871, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.03881513699889183, |
|
"rewards/margins": 0.018004285171628, |
|
"rewards/rejected": 0.020810849964618683, |
|
"step": 37 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.3791043758392334, |
|
"debug/policy_chosen_logps": -228.21205139160156, |
|
"debug/policy_rejected_logits": -1.3903659582138062, |
|
"debug/policy_rejected_logps": -227.752197265625, |
|
"debug/reference_chosen_logps": -230.1770477294922, |
|
"debug/reference_rejected_logps": -230.64239501953125, |
|
"epoch": 0.8837209302325582, |
|
"grad_norm": 17.81188100770416, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.3791043758392334, |
|
"logits/rejected": -1.3903659582138062, |
|
"logps/chosen": -228.21205139160156, |
|
"logps/rejected": -227.752197265625, |
|
"loss": 0.4778, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.019649982452392578, |
|
"rewards/margins": -0.009251842275261879, |
|
"rewards/rejected": 0.028901822865009308, |
|
"step": 38 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.4482218027114868, |
|
"debug/policy_chosen_logps": -247.0265655517578, |
|
"debug/policy_rejected_logits": -1.3742115497589111, |
|
"debug/policy_rejected_logps": -305.1549072265625, |
|
"debug/reference_chosen_logps": -249.18740844726562, |
|
"debug/reference_rejected_logps": -301.026123046875, |
|
"epoch": 0.9069767441860465, |
|
"grad_norm": 18.246091231595155, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.4482218027114868, |
|
"logits/rejected": -1.3742115497589111, |
|
"logps/chosen": -247.0265655517578, |
|
"logps/rejected": -305.1549072265625, |
|
"loss": 0.4569, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.021608371287584305, |
|
"rewards/margins": 0.0628962367773056, |
|
"rewards/rejected": -0.0412878580391407, |
|
"step": 39 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.5543980598449707, |
|
"debug/policy_chosen_logps": -257.4435729980469, |
|
"debug/policy_rejected_logits": -1.5073050260543823, |
|
"debug/policy_rejected_logps": -296.2988586425781, |
|
"debug/reference_chosen_logps": -256.39544677734375, |
|
"debug/reference_rejected_logps": -288.50433349609375, |
|
"epoch": 0.9302325581395349, |
|
"grad_norm": 14.177163201466067, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.5543980598449707, |
|
"logits/rejected": -1.5073050260543823, |
|
"logps/chosen": -257.4435729980469, |
|
"logps/rejected": -296.2988586425781, |
|
"loss": 0.4851, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.010481302626430988, |
|
"rewards/margins": 0.0674639493227005, |
|
"rewards/rejected": -0.07794524729251862, |
|
"step": 40 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.5510001182556152, |
|
"debug/policy_chosen_logps": -262.91461181640625, |
|
"debug/policy_rejected_logits": -1.4993880987167358, |
|
"debug/policy_rejected_logps": -288.2521667480469, |
|
"debug/reference_chosen_logps": -253.84947204589844, |
|
"debug/reference_rejected_logps": -285.00738525390625, |
|
"epoch": 0.9534883720930233, |
|
"grad_norm": 64.06356256852192, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.5510001182556152, |
|
"logits/rejected": -1.4993880987167358, |
|
"logps/chosen": -262.91461181640625, |
|
"logps/rejected": -288.2521667480469, |
|
"loss": 0.4995, |
|
"rewards/accuracies": 0.125, |
|
"rewards/chosen": -0.09065132588148117, |
|
"rewards/margins": -0.05820371210575104, |
|
"rewards/rejected": -0.032447606325149536, |
|
"step": 41 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.6003302335739136, |
|
"debug/policy_chosen_logps": -224.11663818359375, |
|
"debug/policy_rejected_logits": -1.5286082029342651, |
|
"debug/policy_rejected_logps": -271.67974853515625, |
|
"debug/reference_chosen_logps": -221.91209411621094, |
|
"debug/reference_rejected_logps": -258.791015625, |
|
"epoch": 0.9767441860465116, |
|
"grad_norm": 61.90638727730458, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.6003302335739136, |
|
"logits/rejected": -1.5286082029342651, |
|
"logps/chosen": -224.11663818359375, |
|
"logps/rejected": -271.67974853515625, |
|
"loss": 0.4904, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.022045554593205452, |
|
"rewards/margins": 0.10684183239936829, |
|
"rewards/rejected": -0.1288873851299286, |
|
"step": 42 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.5732522010803223, |
|
"debug/policy_chosen_logps": -237.1086883544922, |
|
"debug/policy_rejected_logits": -1.405612826347351, |
|
"debug/policy_rejected_logps": -320.40643310546875, |
|
"debug/reference_chosen_logps": -234.65167236328125, |
|
"debug/reference_rejected_logps": -311.38165283203125, |
|
"epoch": 1.0, |
|
"grad_norm": 43.37698691621295, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.5732522010803223, |
|
"logits/rejected": -1.405612826347351, |
|
"logps/chosen": -237.1086883544922, |
|
"logps/rejected": -320.40643310546875, |
|
"loss": 0.4906, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.024570178240537643, |
|
"rewards/margins": 0.065677709877491, |
|
"rewards/rejected": -0.09024789184331894, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 43, |
|
"total_flos": 0.0, |
|
"train_loss": 0.489490317743878, |
|
"train_runtime": 149.7969, |
|
"train_samples_per_second": 18.318, |
|
"train_steps_per_second": 0.287 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 43, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|