|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 51, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"debug/policy_chosen_logits": -3.131476879119873, |
|
"debug/policy_chosen_logps": -223.49798583984375, |
|
"debug/policy_rejected_logits": -3.0218234062194824, |
|
"debug/policy_rejected_logps": -181.94036865234375, |
|
"debug/reference_chosen_logps": -223.49798583984375, |
|
"debug/reference_rejected_logps": -181.94036865234375, |
|
"epoch": 0.0196078431372549, |
|
"grad_norm": 9.59268936350444, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.131476879119873, |
|
"logits/rejected": -3.0218234062194824, |
|
"logps/chosen": -223.49798583984375, |
|
"logps/rejected": -181.94036865234375, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.1443662643432617, |
|
"debug/policy_chosen_logps": -209.216552734375, |
|
"debug/policy_rejected_logits": -3.076768159866333, |
|
"debug/policy_rejected_logps": -170.2884521484375, |
|
"debug/reference_chosen_logps": -209.07872009277344, |
|
"debug/reference_rejected_logps": -169.68731689453125, |
|
"epoch": 0.0392156862745098, |
|
"grad_norm": 8.71018223332819, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.1443662643432617, |
|
"logits/rejected": -3.076768159866333, |
|
"logps/chosen": -209.216552734375, |
|
"logps/rejected": -170.2884521484375, |
|
"loss": 0.4974, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.001378459855914116, |
|
"rewards/margins": 0.004632873460650444, |
|
"rewards/rejected": -0.00601133331656456, |
|
"step": 2 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.135432243347168, |
|
"debug/policy_chosen_logps": -203.98123168945312, |
|
"debug/policy_rejected_logits": -3.058173179626465, |
|
"debug/policy_rejected_logps": -171.8382568359375, |
|
"debug/reference_chosen_logps": -206.17086791992188, |
|
"debug/reference_rejected_logps": -172.7147216796875, |
|
"epoch": 0.058823529411764705, |
|
"grad_norm": 11.448652871780954, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.135432243347168, |
|
"logits/rejected": -3.058173179626465, |
|
"logps/chosen": -203.98123168945312, |
|
"logps/rejected": -171.8382568359375, |
|
"loss": 0.4898, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.02189634181559086, |
|
"rewards/margins": 0.013131675310432911, |
|
"rewards/rejected": 0.008764667436480522, |
|
"step": 3 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.0565543174743652, |
|
"debug/policy_chosen_logps": -206.8490447998047, |
|
"debug/policy_rejected_logits": -2.9550375938415527, |
|
"debug/policy_rejected_logps": -174.601318359375, |
|
"debug/reference_chosen_logps": -206.09422302246094, |
|
"debug/reference_rejected_logps": -172.06332397460938, |
|
"epoch": 0.0784313725490196, |
|
"grad_norm": 8.681717474563778, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.0565543174743652, |
|
"logits/rejected": -2.9550375938415527, |
|
"logps/chosen": -206.8490447998047, |
|
"logps/rejected": -174.601318359375, |
|
"loss": 0.4855, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.007548102643340826, |
|
"rewards/margins": 0.017831895500421524, |
|
"rewards/rejected": -0.025379998609423637, |
|
"step": 4 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.0399742126464844, |
|
"debug/policy_chosen_logps": -206.2752227783203, |
|
"debug/policy_rejected_logits": -2.9232938289642334, |
|
"debug/policy_rejected_logps": -156.70419311523438, |
|
"debug/reference_chosen_logps": -205.56788635253906, |
|
"debug/reference_rejected_logps": -150.99920654296875, |
|
"epoch": 0.09803921568627451, |
|
"grad_norm": 8.125347499138313, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.0399742126464844, |
|
"logits/rejected": -2.9232938289642334, |
|
"logps/chosen": -206.2752227783203, |
|
"logps/rejected": -156.70419311523438, |
|
"loss": 0.4648, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.00707334466278553, |
|
"rewards/margins": 0.04997648298740387, |
|
"rewards/rejected": -0.05704982578754425, |
|
"step": 5 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.2029356956481934, |
|
"debug/policy_chosen_logps": -206.69122314453125, |
|
"debug/policy_rejected_logits": -3.122507095336914, |
|
"debug/policy_rejected_logps": -199.9852752685547, |
|
"debug/reference_chosen_logps": -210.3363494873047, |
|
"debug/reference_rejected_logps": -200.11654663085938, |
|
"epoch": 0.11764705882352941, |
|
"grad_norm": 10.6811806285762, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.2029356956481934, |
|
"logits/rejected": -3.122507095336914, |
|
"logps/chosen": -206.69122314453125, |
|
"logps/rejected": -199.9852752685547, |
|
"loss": 0.4523, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.03645110875368118, |
|
"rewards/margins": 0.03513820841908455, |
|
"rewards/rejected": 0.0013129040598869324, |
|
"step": 6 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.202118396759033, |
|
"debug/policy_chosen_logps": -198.13885498046875, |
|
"debug/policy_rejected_logits": -2.9787750244140625, |
|
"debug/policy_rejected_logps": -169.29666137695312, |
|
"debug/reference_chosen_logps": -199.79531860351562, |
|
"debug/reference_rejected_logps": -164.98037719726562, |
|
"epoch": 0.13725490196078433, |
|
"grad_norm": 7.511392268060962, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.202118396759033, |
|
"logits/rejected": -2.9787750244140625, |
|
"logps/chosen": -198.13885498046875, |
|
"logps/rejected": -169.29666137695312, |
|
"loss": 0.4439, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.01656484603881836, |
|
"rewards/margins": 0.05972753465175629, |
|
"rewards/rejected": -0.04316268861293793, |
|
"step": 7 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.1585052013397217, |
|
"debug/policy_chosen_logps": -209.9014892578125, |
|
"debug/policy_rejected_logits": -3.147646427154541, |
|
"debug/policy_rejected_logps": -184.0531005859375, |
|
"debug/reference_chosen_logps": -213.74742126464844, |
|
"debug/reference_rejected_logps": -184.40182495117188, |
|
"epoch": 0.1568627450980392, |
|
"grad_norm": 9.384905399946895, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.1585052013397217, |
|
"logits/rejected": -3.147646427154541, |
|
"logps/chosen": -209.9014892578125, |
|
"logps/rejected": -184.0531005859375, |
|
"loss": 0.4553, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.03845922276377678, |
|
"rewards/margins": 0.03497195988893509, |
|
"rewards/rejected": 0.00348726287484169, |
|
"step": 8 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.219722032546997, |
|
"debug/policy_chosen_logps": -204.11474609375, |
|
"debug/policy_rejected_logits": -3.0252864360809326, |
|
"debug/policy_rejected_logps": -165.35894775390625, |
|
"debug/reference_chosen_logps": -207.07034301757812, |
|
"debug/reference_rejected_logps": -154.7605438232422, |
|
"epoch": 0.17647058823529413, |
|
"grad_norm": 6.128726988165846, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.219722032546997, |
|
"logits/rejected": -3.0252864360809326, |
|
"logps/chosen": -204.11474609375, |
|
"logps/rejected": -165.35894775390625, |
|
"loss": 0.4068, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.02955583482980728, |
|
"rewards/margins": 0.1355399787425995, |
|
"rewards/rejected": -0.1059841513633728, |
|
"step": 9 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.3743577003479004, |
|
"debug/policy_chosen_logps": -182.16360473632812, |
|
"debug/policy_rejected_logits": -2.9649839401245117, |
|
"debug/policy_rejected_logps": -173.23670959472656, |
|
"debug/reference_chosen_logps": -190.21826171875, |
|
"debug/reference_rejected_logps": -162.2075958251953, |
|
"epoch": 0.19607843137254902, |
|
"grad_norm": 6.364236944929035, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.3743577003479004, |
|
"logits/rejected": -2.9649839401245117, |
|
"logps/chosen": -182.16360473632812, |
|
"logps/rejected": -173.23670959472656, |
|
"loss": 0.3573, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.08054651319980621, |
|
"rewards/margins": 0.19083772599697113, |
|
"rewards/rejected": -0.11029121279716492, |
|
"step": 10 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.173790454864502, |
|
"debug/policy_chosen_logps": -203.52264404296875, |
|
"debug/policy_rejected_logits": -2.949070692062378, |
|
"debug/policy_rejected_logps": -178.64117431640625, |
|
"debug/reference_chosen_logps": -212.055908203125, |
|
"debug/reference_rejected_logps": -169.71719360351562, |
|
"epoch": 0.21568627450980393, |
|
"grad_norm": 5.353556279413172, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.173790454864502, |
|
"logits/rejected": -2.949070692062378, |
|
"logps/chosen": -203.52264404296875, |
|
"logps/rejected": -178.64117431640625, |
|
"loss": 0.3634, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.08533257991075516, |
|
"rewards/margins": 0.1745723932981491, |
|
"rewards/rejected": -0.08923980593681335, |
|
"step": 11 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.277130126953125, |
|
"debug/policy_chosen_logps": -199.06358337402344, |
|
"debug/policy_rejected_logits": -3.132725954055786, |
|
"debug/policy_rejected_logps": -179.83506774902344, |
|
"debug/reference_chosen_logps": -208.1540985107422, |
|
"debug/reference_rejected_logps": -177.0548095703125, |
|
"epoch": 0.23529411764705882, |
|
"grad_norm": 5.672030188679155, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.277130126953125, |
|
"logits/rejected": -3.132725954055786, |
|
"logps/chosen": -199.06358337402344, |
|
"logps/rejected": -179.83506774902344, |
|
"loss": 0.3502, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.09090512990951538, |
|
"rewards/margins": 0.11870768666267395, |
|
"rewards/rejected": -0.027802541851997375, |
|
"step": 12 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.1242167949676514, |
|
"debug/policy_chosen_logps": -191.25042724609375, |
|
"debug/policy_rejected_logits": -2.9863691329956055, |
|
"debug/policy_rejected_logps": -175.88369750976562, |
|
"debug/reference_chosen_logps": -196.898193359375, |
|
"debug/reference_rejected_logps": -166.8399658203125, |
|
"epoch": 0.2549019607843137, |
|
"grad_norm": 5.205320381513838, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.1242167949676514, |
|
"logits/rejected": -2.9863691329956055, |
|
"logps/chosen": -191.25042724609375, |
|
"logps/rejected": -175.88369750976562, |
|
"loss": 0.37, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.056477658450603485, |
|
"rewards/margins": 0.14691495895385742, |
|
"rewards/rejected": -0.09043729305267334, |
|
"step": 13 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.144406318664551, |
|
"debug/policy_chosen_logps": -218.8365478515625, |
|
"debug/policy_rejected_logits": -2.8921687602996826, |
|
"debug/policy_rejected_logps": -180.90426635742188, |
|
"debug/reference_chosen_logps": -229.2056884765625, |
|
"debug/reference_rejected_logps": -170.25051879882812, |
|
"epoch": 0.27450980392156865, |
|
"grad_norm": 5.1865412600469725, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.144406318664551, |
|
"logits/rejected": -2.8921687602996826, |
|
"logps/chosen": -218.8365478515625, |
|
"logps/rejected": -180.90426635742188, |
|
"loss": 0.3456, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.10369150340557098, |
|
"rewards/margins": 0.21022894978523254, |
|
"rewards/rejected": -0.10653746128082275, |
|
"step": 14 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.2248146533966064, |
|
"debug/policy_chosen_logps": -201.5963134765625, |
|
"debug/policy_rejected_logits": -2.995159149169922, |
|
"debug/policy_rejected_logps": -184.41653442382812, |
|
"debug/reference_chosen_logps": -210.53994750976562, |
|
"debug/reference_rejected_logps": -177.02459716796875, |
|
"epoch": 0.29411764705882354, |
|
"grad_norm": 5.281953187102778, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.2248146533966064, |
|
"logits/rejected": -2.995159149169922, |
|
"logps/chosen": -201.5963134765625, |
|
"logps/rejected": -184.41653442382812, |
|
"loss": 0.3298, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.08943626284599304, |
|
"rewards/margins": 0.163355752825737, |
|
"rewards/rejected": -0.07391948252916336, |
|
"step": 15 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.152334213256836, |
|
"debug/policy_chosen_logps": -203.98538208007812, |
|
"debug/policy_rejected_logits": -3.0059425830841064, |
|
"debug/policy_rejected_logps": -174.17031860351562, |
|
"debug/reference_chosen_logps": -212.34693908691406, |
|
"debug/reference_rejected_logps": -161.72183227539062, |
|
"epoch": 0.3137254901960784, |
|
"grad_norm": 4.990441768681764, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.152334213256836, |
|
"logits/rejected": -3.0059425830841064, |
|
"logps/chosen": -203.98538208007812, |
|
"logps/rejected": -174.17031860351562, |
|
"loss": 0.3315, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.08361560851335526, |
|
"rewards/margins": 0.2081003040075302, |
|
"rewards/rejected": -0.12448470294475555, |
|
"step": 16 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.3237087726593018, |
|
"debug/policy_chosen_logps": -197.6831512451172, |
|
"debug/policy_rejected_logits": -3.1152756214141846, |
|
"debug/policy_rejected_logps": -185.88690185546875, |
|
"debug/reference_chosen_logps": -209.38027954101562, |
|
"debug/reference_rejected_logps": -171.8429412841797, |
|
"epoch": 0.3333333333333333, |
|
"grad_norm": 3.847170282499073, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.3237087726593018, |
|
"logits/rejected": -3.1152756214141846, |
|
"logps/chosen": -197.6831512451172, |
|
"logps/rejected": -185.88690185546875, |
|
"loss": 0.3262, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.11697111278772354, |
|
"rewards/margins": 0.25741061568260193, |
|
"rewards/rejected": -0.14043951034545898, |
|
"step": 17 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.423741102218628, |
|
"debug/policy_chosen_logps": -197.50125122070312, |
|
"debug/policy_rejected_logits": -3.0541563034057617, |
|
"debug/policy_rejected_logps": -194.04254150390625, |
|
"debug/reference_chosen_logps": -214.143310546875, |
|
"debug/reference_rejected_logps": -176.38931274414062, |
|
"epoch": 0.35294117647058826, |
|
"grad_norm": 4.6640459530058145, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.423741102218628, |
|
"logits/rejected": -3.0541563034057617, |
|
"logps/chosen": -197.50125122070312, |
|
"logps/rejected": -194.04254150390625, |
|
"loss": 0.2745, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.16642045974731445, |
|
"rewards/margins": 0.3429526686668396, |
|
"rewards/rejected": -0.17653217911720276, |
|
"step": 18 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.1585283279418945, |
|
"debug/policy_chosen_logps": -205.2724151611328, |
|
"debug/policy_rejected_logits": -2.9066219329833984, |
|
"debug/policy_rejected_logps": -190.3876953125, |
|
"debug/reference_chosen_logps": -227.2172088623047, |
|
"debug/reference_rejected_logps": -159.18490600585938, |
|
"epoch": 0.37254901960784315, |
|
"grad_norm": 4.373436851253389, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.1585283279418945, |
|
"logits/rejected": -2.9066219329833984, |
|
"logps/chosen": -205.2724151611328, |
|
"logps/rejected": -190.3876953125, |
|
"loss": 0.2051, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.2194480001926422, |
|
"rewards/margins": 0.5314759016036987, |
|
"rewards/rejected": -0.3120279312133789, |
|
"step": 19 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.3925397396087646, |
|
"debug/policy_chosen_logps": -182.79222106933594, |
|
"debug/policy_rejected_logits": -2.9614052772521973, |
|
"debug/policy_rejected_logps": -192.5382843017578, |
|
"debug/reference_chosen_logps": -208.93617248535156, |
|
"debug/reference_rejected_logps": -170.37913513183594, |
|
"epoch": 0.39215686274509803, |
|
"grad_norm": 7.041418694876974, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.3925397396087646, |
|
"logits/rejected": -2.9614052772521973, |
|
"logps/chosen": -182.79222106933594, |
|
"logps/rejected": -192.5382843017578, |
|
"loss": 0.2688, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.2614395320415497, |
|
"rewards/margins": 0.48303085565567017, |
|
"rewards/rejected": -0.22159132361412048, |
|
"step": 20 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.3081858158111572, |
|
"debug/policy_chosen_logps": -170.76687622070312, |
|
"debug/policy_rejected_logits": -2.9337339401245117, |
|
"debug/policy_rejected_logps": -190.08706665039062, |
|
"debug/reference_chosen_logps": -194.40255737304688, |
|
"debug/reference_rejected_logps": -161.28668212890625, |
|
"epoch": 0.4117647058823529, |
|
"grad_norm": 5.913800772738065, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.3081858158111572, |
|
"logits/rejected": -2.9337339401245117, |
|
"logps/chosen": -170.76687622070312, |
|
"logps/rejected": -190.08706665039062, |
|
"loss": 0.2346, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.2363569736480713, |
|
"rewards/margins": 0.5243606567382812, |
|
"rewards/rejected": -0.28800368309020996, |
|
"step": 21 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.262423276901245, |
|
"debug/policy_chosen_logps": -174.46621704101562, |
|
"debug/policy_rejected_logits": -2.9751062393188477, |
|
"debug/policy_rejected_logps": -194.79879760742188, |
|
"debug/reference_chosen_logps": -195.777587890625, |
|
"debug/reference_rejected_logps": -166.50228881835938, |
|
"epoch": 0.43137254901960786, |
|
"grad_norm": 3.6878540395392996, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.262423276901245, |
|
"logits/rejected": -2.9751062393188477, |
|
"logps/chosen": -174.46621704101562, |
|
"logps/rejected": -194.79879760742188, |
|
"loss": 0.1804, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.21311378479003906, |
|
"rewards/margins": 0.4960786700248718, |
|
"rewards/rejected": -0.28296488523483276, |
|
"step": 22 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.284288167953491, |
|
"debug/policy_chosen_logps": -201.29129028320312, |
|
"debug/policy_rejected_logits": -3.055243730545044, |
|
"debug/policy_rejected_logps": -189.35101318359375, |
|
"debug/reference_chosen_logps": -227.23031616210938, |
|
"debug/reference_rejected_logps": -170.58262634277344, |
|
"epoch": 0.45098039215686275, |
|
"grad_norm": 3.4160494981935057, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.284288167953491, |
|
"logits/rejected": -3.055243730545044, |
|
"logps/chosen": -201.29129028320312, |
|
"logps/rejected": -189.35101318359375, |
|
"loss": 0.2467, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.2593901753425598, |
|
"rewards/margins": 0.44707390666007996, |
|
"rewards/rejected": -0.18768377602100372, |
|
"step": 23 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.3582632541656494, |
|
"debug/policy_chosen_logps": -180.80130004882812, |
|
"debug/policy_rejected_logits": -2.9310052394866943, |
|
"debug/policy_rejected_logps": -198.58038330078125, |
|
"debug/reference_chosen_logps": -208.0858917236328, |
|
"debug/reference_rejected_logps": -156.55642700195312, |
|
"epoch": 0.47058823529411764, |
|
"grad_norm": 4.3584453143571515, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.3582632541656494, |
|
"logits/rejected": -2.9310052394866943, |
|
"logps/chosen": -180.80130004882812, |
|
"logps/rejected": -198.58038330078125, |
|
"loss": 0.1894, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.2728460729122162, |
|
"rewards/margins": 0.6930855512619019, |
|
"rewards/rejected": -0.42023950815200806, |
|
"step": 24 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.294647455215454, |
|
"debug/policy_chosen_logps": -200.66555786132812, |
|
"debug/policy_rejected_logits": -2.9623827934265137, |
|
"debug/policy_rejected_logps": -209.60763549804688, |
|
"debug/reference_chosen_logps": -226.37498474121094, |
|
"debug/reference_rejected_logps": -175.77857971191406, |
|
"epoch": 0.49019607843137253, |
|
"grad_norm": 5.190786042631128, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.294647455215454, |
|
"logits/rejected": -2.9623827934265137, |
|
"logps/chosen": -200.66555786132812, |
|
"logps/rejected": -209.60763549804688, |
|
"loss": 0.2067, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.257094144821167, |
|
"rewards/margins": 0.5953845977783203, |
|
"rewards/rejected": -0.3382904529571533, |
|
"step": 25 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.303532123565674, |
|
"debug/policy_chosen_logps": -172.21371459960938, |
|
"debug/policy_rejected_logits": -3.037935495376587, |
|
"debug/policy_rejected_logps": -192.28829956054688, |
|
"debug/reference_chosen_logps": -197.53707885742188, |
|
"debug/reference_rejected_logps": -173.1179656982422, |
|
"epoch": 0.5098039215686274, |
|
"grad_norm": 3.424028522129365, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.303532123565674, |
|
"logits/rejected": -3.037935495376587, |
|
"logps/chosen": -172.21371459960938, |
|
"logps/rejected": -192.28829956054688, |
|
"loss": 0.2072, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.25323352217674255, |
|
"rewards/margins": 0.44493675231933594, |
|
"rewards/rejected": -0.19170325994491577, |
|
"step": 26 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.2078473567962646, |
|
"debug/policy_chosen_logps": -185.5386199951172, |
|
"debug/policy_rejected_logits": -3.058324098587036, |
|
"debug/policy_rejected_logps": -181.49124145507812, |
|
"debug/reference_chosen_logps": -216.10214233398438, |
|
"debug/reference_rejected_logps": -166.07806396484375, |
|
"epoch": 0.5294117647058824, |
|
"grad_norm": 3.2542715272608427, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.2078473567962646, |
|
"logits/rejected": -3.058324098587036, |
|
"logps/chosen": -185.5386199951172, |
|
"logps/rejected": -181.49124145507812, |
|
"loss": 0.1898, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.3056352138519287, |
|
"rewards/margins": 0.45976707339286804, |
|
"rewards/rejected": -0.15413185954093933, |
|
"step": 27 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.1834847927093506, |
|
"debug/policy_chosen_logps": -189.82064819335938, |
|
"debug/policy_rejected_logits": -3.0259480476379395, |
|
"debug/policy_rejected_logps": -192.5255126953125, |
|
"debug/reference_chosen_logps": -225.47145080566406, |
|
"debug/reference_rejected_logps": -174.45132446289062, |
|
"epoch": 0.5490196078431373, |
|
"grad_norm": 5.011972117551199, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.1834847927093506, |
|
"logits/rejected": -3.0259480476379395, |
|
"logps/chosen": -189.82064819335938, |
|
"logps/rejected": -192.5255126953125, |
|
"loss": 0.2206, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.3565079867839813, |
|
"rewards/margins": 0.5372498631477356, |
|
"rewards/rejected": -0.18074187636375427, |
|
"step": 28 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.439246416091919, |
|
"debug/policy_chosen_logps": -161.62721252441406, |
|
"debug/policy_rejected_logits": -2.9180073738098145, |
|
"debug/policy_rejected_logps": -201.15945434570312, |
|
"debug/reference_chosen_logps": -197.59341430664062, |
|
"debug/reference_rejected_logps": -162.81692504882812, |
|
"epoch": 0.5686274509803921, |
|
"grad_norm": 2.4188165120482044, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.439246416091919, |
|
"logits/rejected": -2.9180073738098145, |
|
"logps/chosen": -161.62721252441406, |
|
"logps/rejected": -201.15945434570312, |
|
"loss": 0.1577, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.35966184735298157, |
|
"rewards/margins": 0.7430870532989502, |
|
"rewards/rejected": -0.38342520594596863, |
|
"step": 29 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.3921971321105957, |
|
"debug/policy_chosen_logps": -167.86477661132812, |
|
"debug/policy_rejected_logits": -3.1083662509918213, |
|
"debug/policy_rejected_logps": -186.5076141357422, |
|
"debug/reference_chosen_logps": -202.75399780273438, |
|
"debug/reference_rejected_logps": -165.93167114257812, |
|
"epoch": 0.5882352941176471, |
|
"grad_norm": 5.111153930194959, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.3921971321105957, |
|
"logits/rejected": -3.1083662509918213, |
|
"logps/chosen": -167.86477661132812, |
|
"logps/rejected": -186.5076141357422, |
|
"loss": 0.2254, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.3488922417163849, |
|
"rewards/margins": 0.5546516180038452, |
|
"rewards/rejected": -0.20575937628746033, |
|
"step": 30 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.296841621398926, |
|
"debug/policy_chosen_logps": -186.3404541015625, |
|
"debug/policy_rejected_logits": -3.0427982807159424, |
|
"debug/policy_rejected_logps": -189.97247314453125, |
|
"debug/reference_chosen_logps": -225.02894592285156, |
|
"debug/reference_rejected_logps": -176.0540771484375, |
|
"epoch": 0.6078431372549019, |
|
"grad_norm": 5.753832158358882, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.296841621398926, |
|
"logits/rejected": -3.0427982807159424, |
|
"logps/chosen": -186.3404541015625, |
|
"logps/rejected": -189.97247314453125, |
|
"loss": 0.2514, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3868849575519562, |
|
"rewards/margins": 0.5260686874389648, |
|
"rewards/rejected": -0.13918372988700867, |
|
"step": 31 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.241013765335083, |
|
"debug/policy_chosen_logps": -183.28062438964844, |
|
"debug/policy_rejected_logits": -3.004754066467285, |
|
"debug/policy_rejected_logps": -206.055419921875, |
|
"debug/reference_chosen_logps": -215.31048583984375, |
|
"debug/reference_rejected_logps": -162.4297637939453, |
|
"epoch": 0.6274509803921569, |
|
"grad_norm": 2.9239290358904837, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.241013765335083, |
|
"logits/rejected": -3.004754066467285, |
|
"logps/chosen": -183.28062438964844, |
|
"logps/rejected": -206.055419921875, |
|
"loss": 0.1757, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.32029855251312256, |
|
"rewards/margins": 0.7565551996231079, |
|
"rewards/rejected": -0.43625661730766296, |
|
"step": 32 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.3382985591888428, |
|
"debug/policy_chosen_logps": -180.47259521484375, |
|
"debug/policy_rejected_logits": -3.019533395767212, |
|
"debug/policy_rejected_logps": -206.90989685058594, |
|
"debug/reference_chosen_logps": -214.73683166503906, |
|
"debug/reference_rejected_logps": -155.3048095703125, |
|
"epoch": 0.6470588235294118, |
|
"grad_norm": 3.2393949291557003, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.3382985591888428, |
|
"logits/rejected": -3.019533395767212, |
|
"logps/chosen": -180.47259521484375, |
|
"logps/rejected": -206.90989685058594, |
|
"loss": 0.2186, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3426423668861389, |
|
"rewards/margins": 0.8586931228637695, |
|
"rewards/rejected": -0.5160508155822754, |
|
"step": 33 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.25339412689209, |
|
"debug/policy_chosen_logps": -177.0988006591797, |
|
"debug/policy_rejected_logits": -2.986337900161743, |
|
"debug/policy_rejected_logps": -201.41714477539062, |
|
"debug/reference_chosen_logps": -204.36630249023438, |
|
"debug/reference_rejected_logps": -173.47714233398438, |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 4.241231231386345, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.25339412689209, |
|
"logits/rejected": -2.986337900161743, |
|
"logps/chosen": -177.0988006591797, |
|
"logps/rejected": -201.41714477539062, |
|
"loss": 0.1937, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.2726749777793884, |
|
"rewards/margins": 0.5520750284194946, |
|
"rewards/rejected": -0.2794000804424286, |
|
"step": 34 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.251969337463379, |
|
"debug/policy_chosen_logps": -190.11709594726562, |
|
"debug/policy_rejected_logits": -3.0741655826568604, |
|
"debug/policy_rejected_logps": -194.2032470703125, |
|
"debug/reference_chosen_logps": -217.17962646484375, |
|
"debug/reference_rejected_logps": -160.67848205566406, |
|
"epoch": 0.6862745098039216, |
|
"grad_norm": 4.744228039712321, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.251969337463379, |
|
"logits/rejected": -3.0741655826568604, |
|
"logps/chosen": -190.11709594726562, |
|
"logps/rejected": -194.2032470703125, |
|
"loss": 0.1497, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.2706252336502075, |
|
"rewards/margins": 0.6058727502822876, |
|
"rewards/rejected": -0.3352475166320801, |
|
"step": 35 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.3483757972717285, |
|
"debug/policy_chosen_logps": -179.4599609375, |
|
"debug/policy_rejected_logits": -3.1370773315429688, |
|
"debug/policy_rejected_logps": -190.57760620117188, |
|
"debug/reference_chosen_logps": -211.63006591796875, |
|
"debug/reference_rejected_logps": -169.59912109375, |
|
"epoch": 0.7058823529411765, |
|
"grad_norm": 3.186431463787634, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.3483757972717285, |
|
"logits/rejected": -3.1370773315429688, |
|
"logps/chosen": -179.4599609375, |
|
"logps/rejected": -190.57760620117188, |
|
"loss": 0.2442, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.32170116901397705, |
|
"rewards/margins": 0.531485915184021, |
|
"rewards/rejected": -0.20978482067584991, |
|
"step": 36 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.2231926918029785, |
|
"debug/policy_chosen_logps": -172.751220703125, |
|
"debug/policy_rejected_logits": -3.042171001434326, |
|
"debug/policy_rejected_logps": -192.6194610595703, |
|
"debug/reference_chosen_logps": -204.07984924316406, |
|
"debug/reference_rejected_logps": -162.96307373046875, |
|
"epoch": 0.7254901960784313, |
|
"grad_norm": 3.76917946103672, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.2231926918029785, |
|
"logits/rejected": -3.042171001434326, |
|
"logps/chosen": -172.751220703125, |
|
"logps/rejected": -192.6194610595703, |
|
"loss": 0.222, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.31328636407852173, |
|
"rewards/margins": 0.609850287437439, |
|
"rewards/rejected": -0.29656392335891724, |
|
"step": 37 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.370556116104126, |
|
"debug/policy_chosen_logps": -179.37933349609375, |
|
"debug/policy_rejected_logits": -2.9038238525390625, |
|
"debug/policy_rejected_logps": -197.517333984375, |
|
"debug/reference_chosen_logps": -211.8248291015625, |
|
"debug/reference_rejected_logps": -159.6433563232422, |
|
"epoch": 0.7450980392156863, |
|
"grad_norm": 3.7765583713304034, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.370556116104126, |
|
"logits/rejected": -2.9038238525390625, |
|
"logps/chosen": -179.37933349609375, |
|
"logps/rejected": -197.517333984375, |
|
"loss": 0.214, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3244548439979553, |
|
"rewards/margins": 0.7031944990158081, |
|
"rewards/rejected": -0.3787396550178528, |
|
"step": 38 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.253465414047241, |
|
"debug/policy_chosen_logps": -188.2485809326172, |
|
"debug/policy_rejected_logits": -3.051933526992798, |
|
"debug/policy_rejected_logps": -186.41270446777344, |
|
"debug/reference_chosen_logps": -218.99688720703125, |
|
"debug/reference_rejected_logps": -167.86203002929688, |
|
"epoch": 0.7647058823529411, |
|
"grad_norm": 3.533915411617125, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.253465414047241, |
|
"logits/rejected": -3.051933526992798, |
|
"logps/chosen": -188.2485809326172, |
|
"logps/rejected": -186.41270446777344, |
|
"loss": 0.2102, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.3074829876422882, |
|
"rewards/margins": 0.492989718914032, |
|
"rewards/rejected": -0.18550674617290497, |
|
"step": 39 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.397189140319824, |
|
"debug/policy_chosen_logps": -184.84124755859375, |
|
"debug/policy_rejected_logits": -2.951024055480957, |
|
"debug/policy_rejected_logps": -205.2071533203125, |
|
"debug/reference_chosen_logps": -216.28897094726562, |
|
"debug/reference_rejected_logps": -167.5351104736328, |
|
"epoch": 0.7843137254901961, |
|
"grad_norm": 4.343119675678065, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.397189140319824, |
|
"logits/rejected": -2.951024055480957, |
|
"logps/chosen": -184.84124755859375, |
|
"logps/rejected": -205.2071533203125, |
|
"loss": 0.1484, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.3144773840904236, |
|
"rewards/margins": 0.6911977529525757, |
|
"rewards/rejected": -0.3767203688621521, |
|
"step": 40 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.4680333137512207, |
|
"debug/policy_chosen_logps": -174.45204162597656, |
|
"debug/policy_rejected_logits": -3.177816867828369, |
|
"debug/policy_rejected_logps": -200.5006103515625, |
|
"debug/reference_chosen_logps": -201.8401641845703, |
|
"debug/reference_rejected_logps": -168.33837890625, |
|
"epoch": 0.803921568627451, |
|
"grad_norm": 4.7398844072134, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.4680333137512207, |
|
"logits/rejected": -3.177816867828369, |
|
"logps/chosen": -174.45204162597656, |
|
"logps/rejected": -200.5006103515625, |
|
"loss": 0.1898, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.273881196975708, |
|
"rewards/margins": 0.595503568649292, |
|
"rewards/rejected": -0.3216223418712616, |
|
"step": 41 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.3945956230163574, |
|
"debug/policy_chosen_logps": -172.79193115234375, |
|
"debug/policy_rejected_logits": -3.0443050861358643, |
|
"debug/policy_rejected_logps": -194.8880615234375, |
|
"debug/reference_chosen_logps": -198.82882690429688, |
|
"debug/reference_rejected_logps": -170.7233123779297, |
|
"epoch": 0.8235294117647058, |
|
"grad_norm": 5.166045026790051, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.3945956230163574, |
|
"logits/rejected": -3.0443050861358643, |
|
"logps/chosen": -172.79193115234375, |
|
"logps/rejected": -194.8880615234375, |
|
"loss": 0.1759, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.26036909222602844, |
|
"rewards/margins": 0.5020167827606201, |
|
"rewards/rejected": -0.24164767563343048, |
|
"step": 42 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.366682291030884, |
|
"debug/policy_chosen_logps": -174.59375, |
|
"debug/policy_rejected_logits": -3.141162157058716, |
|
"debug/policy_rejected_logps": -200.96633911132812, |
|
"debug/reference_chosen_logps": -205.5618438720703, |
|
"debug/reference_rejected_logps": -192.6947021484375, |
|
"epoch": 0.8431372549019608, |
|
"grad_norm": 3.316025693224064, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.366682291030884, |
|
"logits/rejected": -3.141162157058716, |
|
"logps/chosen": -174.59375, |
|
"logps/rejected": -200.96633911132812, |
|
"loss": 0.1598, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.3096809387207031, |
|
"rewards/margins": 0.3923972249031067, |
|
"rewards/rejected": -0.08271628618240356, |
|
"step": 43 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.4701032638549805, |
|
"debug/policy_chosen_logps": -172.69961547851562, |
|
"debug/policy_rejected_logits": -3.0816352367401123, |
|
"debug/policy_rejected_logps": -192.14527893066406, |
|
"debug/reference_chosen_logps": -203.4981231689453, |
|
"debug/reference_rejected_logps": -165.00967407226562, |
|
"epoch": 0.8627450980392157, |
|
"grad_norm": 5.732457245614043, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.4701032638549805, |
|
"logits/rejected": -3.0816352367401123, |
|
"logps/chosen": -172.69961547851562, |
|
"logps/rejected": -192.14527893066406, |
|
"loss": 0.2551, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.30798494815826416, |
|
"rewards/margins": 0.5793408155441284, |
|
"rewards/rejected": -0.27135586738586426, |
|
"step": 44 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.3291234970092773, |
|
"debug/policy_chosen_logps": -170.3388671875, |
|
"debug/policy_rejected_logits": -2.937793493270874, |
|
"debug/policy_rejected_logps": -192.89581298828125, |
|
"debug/reference_chosen_logps": -202.19802856445312, |
|
"debug/reference_rejected_logps": -159.46893310546875, |
|
"epoch": 0.8823529411764706, |
|
"grad_norm": 5.771805579285295, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.3291234970092773, |
|
"logits/rejected": -2.937793493270874, |
|
"logps/chosen": -170.3388671875, |
|
"logps/rejected": -192.89581298828125, |
|
"loss": 0.2151, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.31859153509140015, |
|
"rewards/margins": 0.6528602242469788, |
|
"rewards/rejected": -0.334268718957901, |
|
"step": 45 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.226428270339966, |
|
"debug/policy_chosen_logps": -187.73739624023438, |
|
"debug/policy_rejected_logits": -2.986654043197632, |
|
"debug/policy_rejected_logps": -192.33885192871094, |
|
"debug/reference_chosen_logps": -220.32342529296875, |
|
"debug/reference_rejected_logps": -167.2851104736328, |
|
"epoch": 0.9019607843137255, |
|
"grad_norm": 2.8823875670773695, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.226428270339966, |
|
"logits/rejected": -2.986654043197632, |
|
"logps/chosen": -187.73739624023438, |
|
"logps/rejected": -192.33885192871094, |
|
"loss": 0.1605, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.32586026191711426, |
|
"rewards/margins": 0.5763977766036987, |
|
"rewards/rejected": -0.2505374550819397, |
|
"step": 46 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.5862808227539062, |
|
"debug/policy_chosen_logps": -182.93630981445312, |
|
"debug/policy_rejected_logits": -3.0501527786254883, |
|
"debug/policy_rejected_logps": -197.54100036621094, |
|
"debug/reference_chosen_logps": -213.4707794189453, |
|
"debug/reference_rejected_logps": -172.90322875976562, |
|
"epoch": 0.9215686274509803, |
|
"grad_norm": 2.720609192674643, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.5862808227539062, |
|
"logits/rejected": -3.0501527786254883, |
|
"logps/chosen": -182.93630981445312, |
|
"logps/rejected": -197.54100036621094, |
|
"loss": 0.1686, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.30534470081329346, |
|
"rewards/margins": 0.5517222881317139, |
|
"rewards/rejected": -0.2463776171207428, |
|
"step": 47 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.3840320110321045, |
|
"debug/policy_chosen_logps": -175.84884643554688, |
|
"debug/policy_rejected_logits": -3.043017625808716, |
|
"debug/policy_rejected_logps": -202.57257080078125, |
|
"debug/reference_chosen_logps": -203.88320922851562, |
|
"debug/reference_rejected_logps": -172.34735107421875, |
|
"epoch": 0.9411764705882353, |
|
"grad_norm": 2.309188784503618, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.3840320110321045, |
|
"logits/rejected": -3.043017625808716, |
|
"logps/chosen": -175.84884643554688, |
|
"logps/rejected": -202.57257080078125, |
|
"loss": 0.1666, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.28034350275993347, |
|
"rewards/margins": 0.5825955271720886, |
|
"rewards/rejected": -0.30225205421447754, |
|
"step": 48 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.3747286796569824, |
|
"debug/policy_chosen_logps": -185.90341186523438, |
|
"debug/policy_rejected_logits": -3.0797741413116455, |
|
"debug/policy_rejected_logps": -202.92413330078125, |
|
"debug/reference_chosen_logps": -221.17340087890625, |
|
"debug/reference_rejected_logps": -154.19923400878906, |
|
"epoch": 0.9607843137254902, |
|
"grad_norm": 2.7307344297698157, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.3747286796569824, |
|
"logits/rejected": -3.0797741413116455, |
|
"logps/chosen": -185.90341186523438, |
|
"logps/rejected": -202.92413330078125, |
|
"loss": 0.1461, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3527000844478607, |
|
"rewards/margins": 0.8399491310119629, |
|
"rewards/rejected": -0.48724907636642456, |
|
"step": 49 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.4781532287597656, |
|
"debug/policy_chosen_logps": -177.86842346191406, |
|
"debug/policy_rejected_logits": -3.0594165325164795, |
|
"debug/policy_rejected_logps": -195.89004516601562, |
|
"debug/reference_chosen_logps": -203.58407592773438, |
|
"debug/reference_rejected_logps": -161.39697265625, |
|
"epoch": 0.9803921568627451, |
|
"grad_norm": 3.3901490209041247, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.4781532287597656, |
|
"logits/rejected": -3.0594165325164795, |
|
"logps/chosen": -177.86842346191406, |
|
"logps/rejected": -195.89004516601562, |
|
"loss": 0.1895, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.2571565508842468, |
|
"rewards/margins": 0.6020870208740234, |
|
"rewards/rejected": -0.3449305295944214, |
|
"step": 50 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -3.4821436405181885, |
|
"debug/policy_chosen_logps": -177.99000549316406, |
|
"debug/policy_rejected_logits": -3.0359578132629395, |
|
"debug/policy_rejected_logps": -203.94029235839844, |
|
"debug/reference_chosen_logps": -212.01348876953125, |
|
"debug/reference_rejected_logps": -166.06936645507812, |
|
"epoch": 1.0, |
|
"grad_norm": 3.0238129167739807, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -3.4821436405181885, |
|
"logits/rejected": -3.0359578132629395, |
|
"logps/chosen": -177.99000549316406, |
|
"logps/rejected": -203.94029235839844, |
|
"loss": 0.1383, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.3402349352836609, |
|
"rewards/margins": 0.7189440727233887, |
|
"rewards/rejected": -0.37870916724205017, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 51, |
|
"total_flos": 0.0, |
|
"train_loss": 0.26999635322421206, |
|
"train_runtime": 166.8574, |
|
"train_samples_per_second": 19.31, |
|
"train_steps_per_second": 0.306 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 51, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|