|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9981298423724285, |
|
"eval_steps": 400, |
|
"global_step": 467, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"abs_diff": 0.043448589742183685, |
|
"all_logps_1": -124.6441650390625, |
|
"all_logps_1_values": -124.64417266845703, |
|
"all_logps_2": 459.15625, |
|
"all_logps_2_values": 459.15625, |
|
"epoch": 0.0021373230029388193, |
|
"grad_norm": 16.66867807446414, |
|
"learning_rate": 2.127659574468085e-08, |
|
"logits/chosen": -1.1381689310073853, |
|
"logits/rejected": -0.9913416504859924, |
|
"logps/chosen": -0.2839311361312866, |
|
"logps/rejected": -0.29555341601371765, |
|
"loss": 1.5077, |
|
"original_losses": 1.5989841222763062, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7098277807235718, |
|
"rewards/margins": 0.029055725783109665, |
|
"rewards/rejected": -0.7388835549354553, |
|
"step": 1, |
|
"weight": 0.9598712921142578 |
|
}, |
|
{ |
|
"abs_diff": 0.050563473254442215, |
|
"all_logps_1": -113.89578247070312, |
|
"all_logps_1_values": -113.89578247070312, |
|
"all_logps_2": 426.234375, |
|
"all_logps_2_values": 426.234375, |
|
"epoch": 0.010686615014694095, |
|
"grad_norm": 12.434660441186981, |
|
"learning_rate": 1.0638297872340425e-07, |
|
"logits/chosen": -0.9904537796974182, |
|
"logits/rejected": -0.9189692735671997, |
|
"logps/chosen": -0.2694719731807709, |
|
"logps/rejected": -0.2684631943702698, |
|
"loss": 1.5251, |
|
"original_losses": 1.6255850791931152, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6736798286437988, |
|
"rewards/margins": -0.0025218012742698193, |
|
"rewards/rejected": -0.6711580753326416, |
|
"step": 5, |
|
"weight": 0.9548923373222351 |
|
}, |
|
{ |
|
"abs_diff": 0.06418919563293457, |
|
"all_logps_1": -118.16609191894531, |
|
"all_logps_1_values": -118.16609191894531, |
|
"all_logps_2": 443.21875, |
|
"all_logps_2_values": 443.21875, |
|
"epoch": 0.02137323002938819, |
|
"grad_norm": 11.724962863400911, |
|
"learning_rate": 2.127659574468085e-07, |
|
"logits/chosen": -0.9794756174087524, |
|
"logits/rejected": -0.9353710412979126, |
|
"logps/chosen": -0.2719997763633728, |
|
"logps/rejected": -0.2735568881034851, |
|
"loss": 1.5172, |
|
"original_losses": 1.620931625366211, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.6799993515014648, |
|
"rewards/margins": 0.0038928240537643433, |
|
"rewards/rejected": -0.6838923096656799, |
|
"step": 10, |
|
"weight": 0.9420804977416992 |
|
}, |
|
{ |
|
"abs_diff": 0.06552017480134964, |
|
"all_logps_1": -101.9596939086914, |
|
"all_logps_1_values": -101.95967864990234, |
|
"all_logps_2": 370.20001220703125, |
|
"all_logps_2_values": 370.20001220703125, |
|
"epoch": 0.03205984504408229, |
|
"grad_norm": 9.773542967175878, |
|
"learning_rate": 3.1914893617021275e-07, |
|
"logits/chosen": -0.9607246518135071, |
|
"logits/rejected": -0.9163097143173218, |
|
"logps/chosen": -0.29539960622787476, |
|
"logps/rejected": -0.2832711338996887, |
|
"loss": 1.5128, |
|
"original_losses": 1.6492595672607422, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.7384990453720093, |
|
"rewards/margins": -0.030321191996335983, |
|
"rewards/rejected": -0.708177924156189, |
|
"step": 15, |
|
"weight": 0.9420396089553833 |
|
}, |
|
{ |
|
"abs_diff": 0.082237109541893, |
|
"all_logps_1": -95.52127075195312, |
|
"all_logps_1_values": -95.52125549316406, |
|
"all_logps_2": 368.6625061035156, |
|
"all_logps_2_values": 368.6625061035156, |
|
"epoch": 0.04274646005877638, |
|
"grad_norm": 14.386337719633973, |
|
"learning_rate": 4.25531914893617e-07, |
|
"logits/chosen": -0.9820459485054016, |
|
"logits/rejected": -0.9820452928543091, |
|
"logps/chosen": -0.26204216480255127, |
|
"logps/rejected": -0.26956799626350403, |
|
"loss": 1.5149, |
|
"original_losses": 1.6124236583709717, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.6551053524017334, |
|
"rewards/margins": 0.018814602866768837, |
|
"rewards/rejected": -0.6739200353622437, |
|
"step": 20, |
|
"weight": 0.9291993379592896 |
|
}, |
|
{ |
|
"abs_diff": 0.07468467205762863, |
|
"all_logps_1": -101.43566131591797, |
|
"all_logps_1_values": -101.43565368652344, |
|
"all_logps_2": 359.6499938964844, |
|
"all_logps_2_values": 359.6499938964844, |
|
"epoch": 0.053433075073470476, |
|
"grad_norm": 12.506683302853757, |
|
"learning_rate": 5.319148936170212e-07, |
|
"logits/chosen": -1.0295155048370361, |
|
"logits/rejected": -1.0065571069717407, |
|
"logps/chosen": -0.28278106451034546, |
|
"logps/rejected": -0.2869016230106354, |
|
"loss": 1.5005, |
|
"original_losses": 1.6180095672607422, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.706952691078186, |
|
"rewards/margins": 0.010301386937499046, |
|
"rewards/rejected": -0.7172540426254272, |
|
"step": 25, |
|
"weight": 0.9346221089363098 |
|
}, |
|
{ |
|
"abs_diff": 0.07145524024963379, |
|
"all_logps_1": -96.14094543457031, |
|
"all_logps_1_values": -96.14093780517578, |
|
"all_logps_2": 358.6937561035156, |
|
"all_logps_2_values": 358.6937561035156, |
|
"epoch": 0.06411969008816458, |
|
"grad_norm": 17.486598946846197, |
|
"learning_rate": 6.382978723404255e-07, |
|
"logits/chosen": -1.0747442245483398, |
|
"logits/rejected": -0.9867307543754578, |
|
"logps/chosen": -0.27444857358932495, |
|
"logps/rejected": -0.27685946226119995, |
|
"loss": 1.5207, |
|
"original_losses": 1.6215848922729492, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.6861215233802795, |
|
"rewards/margins": 0.006027159281075001, |
|
"rewards/rejected": -0.6921486258506775, |
|
"step": 30, |
|
"weight": 0.9376131296157837 |
|
}, |
|
{ |
|
"abs_diff": 0.08128118515014648, |
|
"all_logps_1": -110.31912994384766, |
|
"all_logps_1_values": -110.3191146850586, |
|
"all_logps_2": 396.7250061035156, |
|
"all_logps_2_values": 396.7250061035156, |
|
"epoch": 0.07480630510285867, |
|
"grad_norm": 10.190092324128308, |
|
"learning_rate": 7.446808510638297e-07, |
|
"logits/chosen": -1.0031483173370361, |
|
"logits/rejected": -0.9225772023200989, |
|
"logps/chosen": -0.2776695191860199, |
|
"logps/rejected": -0.3029964566230774, |
|
"loss": 1.5058, |
|
"original_losses": 1.5780258178710938, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.6941738128662109, |
|
"rewards/margins": 0.06331733614206314, |
|
"rewards/rejected": -0.7574911713600159, |
|
"step": 35, |
|
"weight": 0.9304083585739136 |
|
}, |
|
{ |
|
"abs_diff": 0.06388907134532928, |
|
"all_logps_1": -94.03665924072266, |
|
"all_logps_1_values": -94.03666687011719, |
|
"all_logps_2": 347.20001220703125, |
|
"all_logps_2_values": 347.20001220703125, |
|
"epoch": 0.08549292011755276, |
|
"grad_norm": 12.383837039803712, |
|
"learning_rate": 8.51063829787234e-07, |
|
"logits/chosen": -0.9180997014045715, |
|
"logits/rejected": -0.9071486592292786, |
|
"logps/chosen": -0.28308817744255066, |
|
"logps/rejected": -0.29446059465408325, |
|
"loss": 1.5141, |
|
"original_losses": 1.6014320850372314, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.7077205181121826, |
|
"rewards/margins": 0.028431018814444542, |
|
"rewards/rejected": -0.7361515760421753, |
|
"step": 40, |
|
"weight": 0.9425530433654785 |
|
}, |
|
{ |
|
"abs_diff": 0.09521429240703583, |
|
"all_logps_1": -106.0528793334961, |
|
"all_logps_1_values": -106.0528793334961, |
|
"all_logps_2": 362.95623779296875, |
|
"all_logps_2_values": 362.95623779296875, |
|
"epoch": 0.09617953513224686, |
|
"grad_norm": 9.970613374779385, |
|
"learning_rate": 9.574468085106384e-07, |
|
"logits/chosen": -0.9140686988830566, |
|
"logits/rejected": -0.8324721455574036, |
|
"logps/chosen": -0.33634239435195923, |
|
"logps/rejected": -0.34527257084846497, |
|
"loss": 1.4915, |
|
"original_losses": 1.614324927330017, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.8408559560775757, |
|
"rewards/margins": 0.02232544682919979, |
|
"rewards/rejected": -0.8631814122200012, |
|
"step": 45, |
|
"weight": 0.9211470484733582 |
|
}, |
|
{ |
|
"abs_diff": 0.12202360481023788, |
|
"all_logps_1": -105.84830474853516, |
|
"all_logps_1_values": -105.84830474853516, |
|
"all_logps_2": 377.7437438964844, |
|
"all_logps_2_values": 377.7437438964844, |
|
"epoch": 0.10686615014694095, |
|
"grad_norm": 10.765426712830973, |
|
"learning_rate": 9.998741174712533e-07, |
|
"logits/chosen": -0.8902776837348938, |
|
"logits/rejected": -0.8994420766830444, |
|
"logps/chosen": -0.31167787313461304, |
|
"logps/rejected": -0.3589983582496643, |
|
"loss": 1.466, |
|
"original_losses": 1.5521076917648315, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.7791945934295654, |
|
"rewards/margins": 0.11830125004053116, |
|
"rewards/rejected": -0.8974958658218384, |
|
"step": 50, |
|
"weight": 0.9070577621459961 |
|
}, |
|
{ |
|
"abs_diff": 0.11367271095514297, |
|
"all_logps_1": -112.1168441772461, |
|
"all_logps_1_values": -112.1168441772461, |
|
"all_logps_2": 420.46875, |
|
"all_logps_2_values": 420.46875, |
|
"epoch": 0.11755276516163506, |
|
"grad_norm": 10.584693183679102, |
|
"learning_rate": 9.991050648838675e-07, |
|
"logits/chosen": -0.8847481608390808, |
|
"logits/rejected": -0.8255330920219421, |
|
"logps/chosen": -0.28891468048095703, |
|
"logps/rejected": -0.3513794541358948, |
|
"loss": 1.465, |
|
"original_losses": 1.557521939277649, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.7222867012023926, |
|
"rewards/margins": 0.15616199374198914, |
|
"rewards/rejected": -0.8784486651420593, |
|
"step": 55, |
|
"weight": 0.9259511828422546 |
|
}, |
|
{ |
|
"abs_diff": 0.08213352411985397, |
|
"all_logps_1": -120.3653564453125, |
|
"all_logps_1_values": -120.36537170410156, |
|
"all_logps_2": 451.7250061035156, |
|
"all_logps_2_values": 451.7250061035156, |
|
"epoch": 0.12823938017632916, |
|
"grad_norm": 20.487281254270606, |
|
"learning_rate": 9.97637968732563e-07, |
|
"logits/chosen": -0.9171462059020996, |
|
"logits/rejected": -0.8949100375175476, |
|
"logps/chosen": -0.2980085015296936, |
|
"logps/rejected": -0.32817280292510986, |
|
"loss": 1.4606, |
|
"original_losses": 1.5710750818252563, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.7450211644172668, |
|
"rewards/margins": 0.07541082799434662, |
|
"rewards/rejected": -0.8204320073127747, |
|
"step": 60, |
|
"weight": 0.9325092434883118 |
|
}, |
|
{ |
|
"abs_diff": 0.08584319800138474, |
|
"all_logps_1": -115.28419494628906, |
|
"all_logps_1_values": -115.28419494628906, |
|
"all_logps_2": 410.28125, |
|
"all_logps_2_values": 410.28125, |
|
"epoch": 0.13892599519102325, |
|
"grad_norm": 13.268818877197086, |
|
"learning_rate": 9.954748808839674e-07, |
|
"logits/chosen": -0.9003847241401672, |
|
"logits/rejected": -0.9516555666923523, |
|
"logps/chosen": -0.31763237714767456, |
|
"logps/rejected": -0.3270418345928192, |
|
"loss": 1.4586, |
|
"original_losses": 1.614269495010376, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.794080913066864, |
|
"rewards/margins": 0.023523610085248947, |
|
"rewards/rejected": -0.817604660987854, |
|
"step": 65, |
|
"weight": 0.9301543235778809 |
|
}, |
|
{ |
|
"abs_diff": 0.23710966110229492, |
|
"all_logps_1": -129.6254119873047, |
|
"all_logps_1_values": -129.6254425048828, |
|
"all_logps_2": 391.6187438964844, |
|
"all_logps_2_values": 391.6187438964844, |
|
"epoch": 0.14961261020571734, |
|
"grad_norm": 19.008527618804656, |
|
"learning_rate": 9.926188266120295e-07, |
|
"logits/chosen": -0.9297588467597961, |
|
"logits/rejected": -0.8964225053787231, |
|
"logps/chosen": -0.4621095657348633, |
|
"logps/rejected": -0.565943717956543, |
|
"loss": 1.4309, |
|
"original_losses": 1.5991542339324951, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -1.1552739143371582, |
|
"rewards/margins": 0.25958532094955444, |
|
"rewards/rejected": -1.414859414100647, |
|
"step": 70, |
|
"weight": 0.8780097961425781 |
|
}, |
|
{ |
|
"abs_diff": 0.22396209836006165, |
|
"all_logps_1": -126.3341064453125, |
|
"all_logps_1_values": -126.33412170410156, |
|
"all_logps_2": 375.15625, |
|
"all_logps_2_values": 375.15625, |
|
"epoch": 0.16029922522041143, |
|
"grad_norm": 14.741661325228266, |
|
"learning_rate": 9.890738003669027e-07, |
|
"logits/chosen": -0.88294517993927, |
|
"logits/rejected": -0.8696261644363403, |
|
"logps/chosen": -0.6373583078384399, |
|
"logps/rejected": -0.7649468779563904, |
|
"loss": 1.371, |
|
"original_losses": 1.5059027671813965, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.593395709991455, |
|
"rewards/margins": 0.3189714848995209, |
|
"rewards/rejected": -1.9123672246932983, |
|
"step": 75, |
|
"weight": 0.874294102191925 |
|
}, |
|
{ |
|
"abs_diff": 0.4753897786140442, |
|
"all_logps_1": -154.002197265625, |
|
"all_logps_1_values": -154.002197265625, |
|
"all_logps_2": 385.40625, |
|
"all_logps_2_values": 385.40625, |
|
"epoch": 0.17098584023510552, |
|
"grad_norm": 10.653088582817368, |
|
"learning_rate": 9.848447601883433e-07, |
|
"logits/chosen": -0.9209216833114624, |
|
"logits/rejected": -0.905800461769104, |
|
"logps/chosen": -0.9318068623542786, |
|
"logps/rejected": -1.1782509088516235, |
|
"loss": 1.3728, |
|
"original_losses": 1.6557430028915405, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -2.329517126083374, |
|
"rewards/margins": 0.61611008644104, |
|
"rewards/rejected": -2.945627212524414, |
|
"step": 80, |
|
"weight": 0.8384539484977722 |
|
}, |
|
{ |
|
"abs_diff": 0.4482264518737793, |
|
"all_logps_1": -181.6018829345703, |
|
"all_logps_1_values": -181.6018829345703, |
|
"all_logps_2": 381.91876220703125, |
|
"all_logps_2_values": 381.91876220703125, |
|
"epoch": 0.18167245524979964, |
|
"grad_norm": 8.388730168314039, |
|
"learning_rate": 9.799376207714444e-07, |
|
"logits/chosen": -0.8116687536239624, |
|
"logits/rejected": -0.7630541324615479, |
|
"logps/chosen": -1.007387638092041, |
|
"logps/rejected": -1.0764662027359009, |
|
"loss": 1.3965, |
|
"original_losses": 1.8705193996429443, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -2.5184690952301025, |
|
"rewards/margins": 0.17269621789455414, |
|
"rewards/rejected": -2.6911654472351074, |
|
"step": 85, |
|
"weight": 0.8248960375785828 |
|
}, |
|
{ |
|
"abs_diff": 0.638414204120636, |
|
"all_logps_1": -197.71530151367188, |
|
"all_logps_1_values": -197.7152862548828, |
|
"all_logps_2": 368.6000061035156, |
|
"all_logps_2_values": 368.6000061035156, |
|
"epoch": 0.19235907026449373, |
|
"grad_norm": 12.62143276771947, |
|
"learning_rate": 9.743592451943998e-07, |
|
"logits/chosen": -0.7098425626754761, |
|
"logits/rejected": -0.6454850435256958, |
|
"logps/chosen": -1.299263596534729, |
|
"logps/rejected": -1.3454030752182007, |
|
"loss": 1.3792, |
|
"original_losses": 2.042982578277588, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -3.2481586933135986, |
|
"rewards/margins": 0.11534923315048218, |
|
"rewards/rejected": -3.3635077476501465, |
|
"step": 90, |
|
"weight": 0.788603663444519 |
|
}, |
|
{ |
|
"abs_diff": 0.3771124482154846, |
|
"all_logps_1": -198.22885131835938, |
|
"all_logps_1_values": -198.22885131835938, |
|
"all_logps_2": 307.64373779296875, |
|
"all_logps_2_values": 307.64373779296875, |
|
"epoch": 0.20304568527918782, |
|
"grad_norm": 9.223783777700444, |
|
"learning_rate": 9.681174353198686e-07, |
|
"logits/chosen": -0.7450689077377319, |
|
"logits/rejected": -0.7714122533798218, |
|
"logps/chosen": -1.5162893533706665, |
|
"logps/rejected": -1.538206696510315, |
|
"loss": 1.3537, |
|
"original_losses": 1.7573131322860718, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -3.7907233238220215, |
|
"rewards/margins": 0.054793525487184525, |
|
"rewards/rejected": -3.8455166816711426, |
|
"step": 95, |
|
"weight": 0.7987316846847534 |
|
}, |
|
{ |
|
"abs_diff": 0.531648576259613, |
|
"all_logps_1": -257.82080078125, |
|
"all_logps_1_values": -257.82080078125, |
|
"all_logps_2": 405.08123779296875, |
|
"all_logps_2_values": 405.08123779296875, |
|
"epoch": 0.2137323002938819, |
|
"grad_norm": 13.130824511623645, |
|
"learning_rate": 9.612209208833646e-07, |
|
"logits/chosen": -0.7543559074401855, |
|
"logits/rejected": -0.6947053074836731, |
|
"logps/chosen": -1.3733211755752563, |
|
"logps/rejected": -1.4744349718093872, |
|
"loss": 1.3472, |
|
"original_losses": 1.8884124755859375, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -3.433303117752075, |
|
"rewards/margins": 0.2527844309806824, |
|
"rewards/rejected": -3.6860873699188232, |
|
"step": 100, |
|
"weight": 0.8195359110832214 |
|
}, |
|
{ |
|
"abs_diff": 0.4814772605895996, |
|
"all_logps_1": -285.88824462890625, |
|
"all_logps_1_values": -285.88824462890625, |
|
"all_logps_2": 447.76251220703125, |
|
"all_logps_2_values": 447.76251220703125, |
|
"epoch": 0.224418915308576, |
|
"grad_norm": 15.741233324493118, |
|
"learning_rate": 9.536793472839324e-07, |
|
"logits/chosen": -0.5685318112373352, |
|
"logits/rejected": -0.5175650119781494, |
|
"logps/chosen": -1.1041462421417236, |
|
"logps/rejected": -1.3609198331832886, |
|
"loss": 1.347, |
|
"original_losses": 1.60434091091156, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.7603654861450195, |
|
"rewards/margins": 0.6419342756271362, |
|
"rewards/rejected": -3.4022998809814453, |
|
"step": 105, |
|
"weight": 0.8199658393859863 |
|
}, |
|
{ |
|
"abs_diff": 0.5063992738723755, |
|
"all_logps_1": -312.87860107421875, |
|
"all_logps_1_values": -312.8785705566406, |
|
"all_logps_2": 410.79998779296875, |
|
"all_logps_2_values": 410.79998779296875, |
|
"epoch": 0.2351055303232701, |
|
"grad_norm": 14.779833008390499, |
|
"learning_rate": 9.455032620941839e-07, |
|
"logits/chosen": -0.3194349706172943, |
|
"logits/rejected": -0.27131232619285583, |
|
"logps/chosen": -1.436680793762207, |
|
"logps/rejected": -1.3837544918060303, |
|
"loss": 1.3485, |
|
"original_losses": 2.0654890537261963, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -3.5917022228240967, |
|
"rewards/margins": -0.13231578469276428, |
|
"rewards/rejected": -3.459386110305786, |
|
"step": 110, |
|
"weight": 0.8112524151802063 |
|
}, |
|
{ |
|
"abs_diff": 0.79926997423172, |
|
"all_logps_1": -352.8046875, |
|
"all_logps_1_values": -352.8046875, |
|
"all_logps_2": 401.26873779296875, |
|
"all_logps_2_values": 401.26873779296875, |
|
"epoch": 0.2457921453379642, |
|
"grad_norm": 17.098670325278757, |
|
"learning_rate": 9.367041003085648e-07, |
|
"logits/chosen": -0.27068907022476196, |
|
"logits/rejected": -0.25977402925491333, |
|
"logps/chosen": -1.8351905345916748, |
|
"logps/rejected": -2.079685688018799, |
|
"loss": 1.2568, |
|
"original_losses": 1.9370386600494385, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -4.587975978851318, |
|
"rewards/margins": 0.6112388968467712, |
|
"rewards/rejected": -5.199214458465576, |
|
"step": 115, |
|
"weight": 0.7355886101722717 |
|
}, |
|
{ |
|
"abs_diff": 0.4315846860408783, |
|
"all_logps_1": -371.93505859375, |
|
"all_logps_1_values": -371.93505859375, |
|
"all_logps_2": 397.9624938964844, |
|
"all_logps_2_values": 397.9624938964844, |
|
"epoch": 0.2564787603526583, |
|
"grad_norm": 17.135021647585766, |
|
"learning_rate": 9.272941683504808e-07, |
|
"logits/chosen": -0.18766793608665466, |
|
"logits/rejected": -0.1377825289964676, |
|
"logps/chosen": -1.6060386896133423, |
|
"logps/rejected": -1.7283703088760376, |
|
"loss": 1.2524, |
|
"original_losses": 1.669327974319458, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -4.015096187591553, |
|
"rewards/margins": 0.30582934617996216, |
|
"rewards/rejected": -4.320925712585449, |
|
"step": 120, |
|
"weight": 0.7726086378097534 |
|
}, |
|
{ |
|
"abs_diff": 0.8556106686592102, |
|
"all_logps_1": -424.2312927246094, |
|
"all_logps_1_values": -424.2313537597656, |
|
"all_logps_2": 358.1312561035156, |
|
"all_logps_2_values": 358.1312561035156, |
|
"epoch": 0.2671653753673524, |
|
"grad_norm": 18.949047249790798, |
|
"learning_rate": 9.172866268606513e-07, |
|
"logits/chosen": -0.0937797874212265, |
|
"logits/rejected": -0.08780622482299805, |
|
"logps/chosen": -2.3565449714660645, |
|
"logps/rejected": -2.821481227874756, |
|
"loss": 1.2455, |
|
"original_losses": 1.5799314975738525, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -5.891362190246582, |
|
"rewards/margins": 1.1623404026031494, |
|
"rewards/rejected": -7.053703308105469, |
|
"step": 125, |
|
"weight": 0.6997275352478027 |
|
}, |
|
{ |
|
"abs_diff": 1.122897982597351, |
|
"all_logps_1": -483.11285400390625, |
|
"all_logps_1_values": -483.11279296875, |
|
"all_logps_2": 356.2250061035156, |
|
"all_logps_2_values": 356.2250061035156, |
|
"epoch": 0.2778519903820465, |
|
"grad_norm": 16.067627857167523, |
|
"learning_rate": 9.066954722907638e-07, |
|
"logits/chosen": 0.18425658345222473, |
|
"logits/rejected": 0.12208795547485352, |
|
"logps/chosen": -2.2584593296051025, |
|
"logps/rejected": -2.747421979904175, |
|
"loss": 1.2378, |
|
"original_losses": 1.9530925750732422, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -5.646147727966309, |
|
"rewards/margins": 1.2224081754684448, |
|
"rewards/rejected": -6.868556022644043, |
|
"step": 130, |
|
"weight": 0.6967185139656067 |
|
}, |
|
{ |
|
"abs_diff": 0.5274697542190552, |
|
"all_logps_1": -584.13671875, |
|
"all_logps_1_values": -584.13671875, |
|
"all_logps_2": 443.01873779296875, |
|
"all_logps_2_values": 443.01873779296875, |
|
"epoch": 0.2885386053967406, |
|
"grad_norm": 29.366033343143968, |
|
"learning_rate": 8.955355173281707e-07, |
|
"logits/chosen": 0.3088318705558777, |
|
"logits/rejected": 0.3932690918445587, |
|
"logps/chosen": -2.3267366886138916, |
|
"logps/rejected": -2.385960102081299, |
|
"loss": 1.1916, |
|
"original_losses": 1.8465898036956787, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -5.816841125488281, |
|
"rewards/margins": 0.1480589658021927, |
|
"rewards/rejected": -5.964900016784668, |
|
"step": 135, |
|
"weight": 0.7594529390335083 |
|
}, |
|
{ |
|
"abs_diff": 0.9901386499404907, |
|
"all_logps_1": -715.9130859375, |
|
"all_logps_1_values": -715.9131469726562, |
|
"all_logps_2": 402.9312438964844, |
|
"all_logps_2_values": 402.9312438964844, |
|
"epoch": 0.2992252204114347, |
|
"grad_norm": 27.69156284264097, |
|
"learning_rate": 8.838223701790055e-07, |
|
"logits/chosen": 0.5694825649261475, |
|
"logits/rejected": 0.5738533139228821, |
|
"logps/chosen": -3.3967947959899902, |
|
"logps/rejected": -3.4784629344940186, |
|
"loss": 1.1521, |
|
"original_losses": 2.2902231216430664, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -8.491987228393555, |
|
"rewards/margins": 0.20417042076587677, |
|
"rewards/rejected": -8.696157455444336, |
|
"step": 140, |
|
"weight": 0.6874681115150452 |
|
}, |
|
{ |
|
"abs_diff": 0.9199058413505554, |
|
"all_logps_1": -995.3132934570312, |
|
"all_logps_1_values": -995.3132934570312, |
|
"all_logps_2": 409.5249938964844, |
|
"all_logps_2_values": 409.5249938964844, |
|
"epoch": 0.30991183542612877, |
|
"grad_norm": 28.11539806786062, |
|
"learning_rate": 8.71572412738697e-07, |
|
"logits/chosen": 0.8747909665107727, |
|
"logits/rejected": 0.9098325967788696, |
|
"logps/chosen": -3.898921251296997, |
|
"logps/rejected": -3.9907355308532715, |
|
"loss": 1.1592, |
|
"original_losses": 2.074253797531128, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -9.74730110168457, |
|
"rewards/margins": 0.22953681647777557, |
|
"rewards/rejected": -9.976838111877441, |
|
"step": 145, |
|
"weight": 0.6336122751235962 |
|
}, |
|
{ |
|
"abs_diff": 1.7418813705444336, |
|
"all_logps_1": -1663.3861083984375, |
|
"all_logps_1_values": -1663.3861083984375, |
|
"all_logps_2": 383.75, |
|
"all_logps_2_values": 383.75, |
|
"epoch": 0.32059845044082286, |
|
"grad_norm": 43.30888911111554, |
|
"learning_rate": 8.588027776804058e-07, |
|
"logits/chosen": 1.2933635711669922, |
|
"logits/rejected": 1.2684452533721924, |
|
"logps/chosen": -6.538305759429932, |
|
"logps/rejected": -7.486212253570557, |
|
"loss": 1.0994, |
|
"original_losses": 1.926180124282837, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -16.34576416015625, |
|
"rewards/margins": 2.3697667121887207, |
|
"rewards/rejected": -18.715530395507812, |
|
"step": 150, |
|
"weight": 0.5583394765853882 |
|
}, |
|
{ |
|
"abs_diff": 1.5373389720916748, |
|
"all_logps_1": -2462.133056640625, |
|
"all_logps_1_values": -2462.13330078125, |
|
"all_logps_2": 434.73748779296875, |
|
"all_logps_2_values": 434.73748779296875, |
|
"epoch": 0.33128506545551695, |
|
"grad_norm": 47.421884036345716, |
|
"learning_rate": 8.455313244934324e-07, |
|
"logits/chosen": 1.8083369731903076, |
|
"logits/rejected": 1.890794038772583, |
|
"logps/chosen": -8.33267879486084, |
|
"logps/rejected": -9.018165588378906, |
|
"loss": 1.0741, |
|
"original_losses": 2.0032851696014404, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -20.83169937133789, |
|
"rewards/margins": 1.713716745376587, |
|
"rewards/rejected": -22.5454158782959, |
|
"step": 155, |
|
"weight": 0.5593416094779968 |
|
}, |
|
{ |
|
"abs_diff": 1.8985588550567627, |
|
"all_logps_1": -2538.660400390625, |
|
"all_logps_1_values": -2538.66064453125, |
|
"all_logps_2": 403.66876220703125, |
|
"all_logps_2_values": 403.66876220703125, |
|
"epoch": 0.34197168047021104, |
|
"grad_norm": 58.88642904599502, |
|
"learning_rate": 8.317766145051057e-07, |
|
"logits/chosen": 2.1515212059020996, |
|
"logits/rejected": 2.141986846923828, |
|
"logps/chosen": -8.633856773376465, |
|
"logps/rejected": -9.374483108520508, |
|
"loss": 1.0769, |
|
"original_losses": 2.3099827766418457, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -21.584644317626953, |
|
"rewards/margins": 1.8515657186508179, |
|
"rewards/rejected": -23.436208724975586, |
|
"step": 160, |
|
"weight": 0.5209288001060486 |
|
}, |
|
{ |
|
"abs_diff": 2.2082934379577637, |
|
"all_logps_1": -3570.93603515625, |
|
"all_logps_1_values": -3570.936279296875, |
|
"all_logps_2": 442.4437561035156, |
|
"all_logps_2_values": 442.4437561035156, |
|
"epoch": 0.3526582954849052, |
|
"grad_norm": 32.977138977170775, |
|
"learning_rate": 8.175578849210894e-07, |
|
"logits/chosen": 2.5748469829559326, |
|
"logits/rejected": 2.677804470062256, |
|
"logps/chosen": -9.694478988647461, |
|
"logps/rejected": -10.093037605285645, |
|
"loss": 1.0398, |
|
"original_losses": 3.134640693664551, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -24.236202239990234, |
|
"rewards/margins": 0.9963935017585754, |
|
"rewards/rejected": -25.232593536376953, |
|
"step": 165, |
|
"weight": 0.49304407835006714 |
|
}, |
|
{ |
|
"abs_diff": 2.007434129714966, |
|
"all_logps_1": -3220.789794921875, |
|
"all_logps_1_values": -3220.789794921875, |
|
"all_logps_2": 357.3062438964844, |
|
"all_logps_2_values": 357.3062438964844, |
|
"epoch": 0.36334491049959927, |
|
"grad_norm": 44.745926058943496, |
|
"learning_rate": 8.028950219204099e-07, |
|
"logits/chosen": 2.934321641921997, |
|
"logits/rejected": 2.8931219577789307, |
|
"logps/chosen": -11.122208595275879, |
|
"logps/rejected": -11.998506546020508, |
|
"loss": 0.9596, |
|
"original_losses": 2.2297332286834717, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -27.80552101135254, |
|
"rewards/margins": 2.1907458305358887, |
|
"rewards/rejected": -29.996265411376953, |
|
"step": 170, |
|
"weight": 0.49520620703697205 |
|
}, |
|
{ |
|
"abs_diff": 2.5391037464141846, |
|
"all_logps_1": -3010.77099609375, |
|
"all_logps_1_values": -3010.77099609375, |
|
"all_logps_2": 336.26251220703125, |
|
"all_logps_2_values": 336.26251220703125, |
|
"epoch": 0.37403152551429336, |
|
"grad_norm": 50.44282847929724, |
|
"learning_rate": 7.878085328428368e-07, |
|
"logits/chosen": 2.6517717838287354, |
|
"logits/rejected": 2.698502779006958, |
|
"logps/chosen": -11.271635055541992, |
|
"logps/rejected": -12.422686576843262, |
|
"loss": 0.953, |
|
"original_losses": 2.4483256340026855, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -28.1790828704834, |
|
"rewards/margins": 2.8776297569274902, |
|
"rewards/rejected": -31.056713104248047, |
|
"step": 175, |
|
"weight": 0.45478373765945435 |
|
}, |
|
{ |
|
"abs_diff": 2.311084270477295, |
|
"all_logps_1": -3630.26123046875, |
|
"all_logps_1_values": -3630.26123046875, |
|
"all_logps_2": 367.6937561035156, |
|
"all_logps_2_values": 367.6937561035156, |
|
"epoch": 0.38471814052898745, |
|
"grad_norm": 54.556403188950036, |
|
"learning_rate": 7.723195175075135e-07, |
|
"logits/chosen": 2.640475273132324, |
|
"logits/rejected": 2.6134068965911865, |
|
"logps/chosen": -12.537522315979004, |
|
"logps/rejected": -13.568713188171387, |
|
"loss": 0.9044, |
|
"original_losses": 2.333768844604492, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -31.343807220458984, |
|
"rewards/margins": 2.577979564666748, |
|
"rewards/rejected": -33.921791076660156, |
|
"step": 180, |
|
"weight": 0.46685990691185 |
|
}, |
|
{ |
|
"abs_diff": 2.934654951095581, |
|
"all_logps_1": -5179.39404296875, |
|
"all_logps_1_values": -5179.39404296875, |
|
"all_logps_2": 370.9624938964844, |
|
"all_logps_2_values": 370.9624938964844, |
|
"epoch": 0.39540475554368154, |
|
"grad_norm": 57.260252425269734, |
|
"learning_rate": 7.564496387029531e-07, |
|
"logits/chosen": 2.326862096786499, |
|
"logits/rejected": 2.4421494007110596, |
|
"logps/chosen": -15.849513053894043, |
|
"logps/rejected": -17.323734283447266, |
|
"loss": 0.9407, |
|
"original_losses": 2.5988547801971436, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -39.623779296875, |
|
"rewards/margins": 3.6855552196502686, |
|
"rewards/rejected": -43.30933380126953, |
|
"step": 185, |
|
"weight": 0.40877920389175415 |
|
}, |
|
{ |
|
"abs_diff": 2.9652016162872314, |
|
"all_logps_1": -5177.00244140625, |
|
"all_logps_1_values": -5177.00244140625, |
|
"all_logps_2": 374.4312438964844, |
|
"all_logps_2_values": 374.4312438964844, |
|
"epoch": 0.40609137055837563, |
|
"grad_norm": 83.2255328888069, |
|
"learning_rate": 7.402210918896689e-07, |
|
"logits/chosen": 2.44303297996521, |
|
"logits/rejected": 2.4873244762420654, |
|
"logps/chosen": -15.580667495727539, |
|
"logps/rejected": -17.045442581176758, |
|
"loss": 0.9238, |
|
"original_losses": 2.6292238235473633, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -38.95166778564453, |
|
"rewards/margins": 3.661935329437256, |
|
"rewards/rejected": -42.61360168457031, |
|
"step": 190, |
|
"weight": 0.40674179792404175 |
|
}, |
|
{ |
|
"abs_diff": 2.7273154258728027, |
|
"all_logps_1": -4500.06005859375, |
|
"all_logps_1_values": -4500.06005859375, |
|
"all_logps_2": 380.1312561035156, |
|
"all_logps_2_values": 380.1312561035156, |
|
"epoch": 0.4167779855730697, |
|
"grad_norm": 84.18074257984793, |
|
"learning_rate": 7.236565741578162e-07, |
|
"logits/chosen": 2.6910769939422607, |
|
"logits/rejected": 2.7326107025146484, |
|
"logps/chosen": -13.98046875, |
|
"logps/rejected": -15.500396728515625, |
|
"loss": 0.9048, |
|
"original_losses": 2.1395676136016846, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -34.95117950439453, |
|
"rewards/margins": 3.7998204231262207, |
|
"rewards/rejected": -38.75099182128906, |
|
"step": 195, |
|
"weight": 0.3992369771003723 |
|
}, |
|
{ |
|
"abs_diff": 2.3771374225616455, |
|
"all_logps_1": -4996.7001953125, |
|
"all_logps_1_values": -4996.7001953125, |
|
"all_logps_2": 438.8500061035156, |
|
"all_logps_2_values": 438.8500061035156, |
|
"epoch": 0.4274646005877638, |
|
"grad_norm": 51.852682835194706, |
|
"learning_rate": 7.067792524832603e-07, |
|
"logits/chosen": 2.5128085613250732, |
|
"logits/rejected": 2.454047679901123, |
|
"logps/chosen": -13.007303237915039, |
|
"logps/rejected": -13.782841682434082, |
|
"loss": 0.9745, |
|
"original_losses": 2.816681385040283, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -32.51825714111328, |
|
"rewards/margins": 1.9388458728790283, |
|
"rewards/rejected": -34.45710372924805, |
|
"step": 200, |
|
"weight": 0.4336828589439392 |
|
}, |
|
{ |
|
"abs_diff": 2.789199113845825, |
|
"all_logps_1": -5606.87744140625, |
|
"all_logps_1_values": -5606.87744140625, |
|
"all_logps_2": 413.7875061035156, |
|
"all_logps_2_values": 413.7875061035156, |
|
"epoch": 0.4381512156024579, |
|
"grad_norm": 82.65919240097834, |
|
"learning_rate": 6.896127313264642e-07, |
|
"logits/chosen": 2.4827866554260254, |
|
"logits/rejected": 2.610020399093628, |
|
"logps/chosen": -15.495327949523926, |
|
"logps/rejected": -16.71689224243164, |
|
"loss": 0.8079, |
|
"original_losses": 2.6531503200531006, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -38.738319396972656, |
|
"rewards/margins": 3.053907871246338, |
|
"rewards/rejected": -41.7922248840332, |
|
"step": 205, |
|
"weight": 0.40062981843948364 |
|
}, |
|
{ |
|
"abs_diff": 3.1174449920654297, |
|
"all_logps_1": -6078.1650390625, |
|
"all_logps_1_values": -6078.1650390625, |
|
"all_logps_2": 408.83123779296875, |
|
"all_logps_2_values": 408.83123779296875, |
|
"epoch": 0.448837830617152, |
|
"grad_norm": 66.91462129577006, |
|
"learning_rate": 6.721810196195174e-07, |
|
"logits/chosen": 2.3251194953918457, |
|
"logits/rejected": 2.481720209121704, |
|
"logps/chosen": -15.918850898742676, |
|
"logps/rejected": -17.23949432373047, |
|
"loss": 0.8447, |
|
"original_losses": 2.9006853103637695, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -39.79712677001953, |
|
"rewards/margins": 3.3016059398651123, |
|
"rewards/rejected": -43.09873580932617, |
|
"step": 210, |
|
"weight": 0.37287402153015137 |
|
}, |
|
{ |
|
"abs_diff": 3.3388848304748535, |
|
"all_logps_1": -6523.8935546875, |
|
"all_logps_1_values": -6523.8935546875, |
|
"all_logps_2": 405.98748779296875, |
|
"all_logps_2_values": 405.98748779296875, |
|
"epoch": 0.45952444563184613, |
|
"grad_norm": 95.8421548369589, |
|
"learning_rate": 6.545084971874736e-07, |
|
"logits/chosen": 2.866258382797241, |
|
"logits/rejected": 2.9341139793395996, |
|
"logps/chosen": -16.77628517150879, |
|
"logps/rejected": -18.90264320373535, |
|
"loss": 0.8426, |
|
"original_losses": 2.032466411590576, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -41.940711975097656, |
|
"rewards/margins": 5.31589412689209, |
|
"rewards/rejected": -47.25660705566406, |
|
"step": 215, |
|
"weight": 0.35115545988082886 |
|
}, |
|
{ |
|
"abs_diff": 2.8094236850738525, |
|
"all_logps_1": -4738.73046875, |
|
"all_logps_1_values": -4738.73046875, |
|
"all_logps_2": 363.98126220703125, |
|
"all_logps_2_values": 363.98126220703125, |
|
"epoch": 0.4702110606465402, |
|
"grad_norm": 112.65545034373879, |
|
"learning_rate": 6.3661988065096e-07, |
|
"logits/chosen": 2.7162396907806396, |
|
"logits/rejected": 2.835710048675537, |
|
"logps/chosen": -15.200531005859375, |
|
"logps/rejected": -15.732034683227539, |
|
"loss": 0.7804, |
|
"original_losses": 3.5092949867248535, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -38.0013313293457, |
|
"rewards/margins": 1.32875394821167, |
|
"rewards/rejected": -39.33008575439453, |
|
"step": 220, |
|
"weight": 0.4126754403114319 |
|
}, |
|
{ |
|
"abs_diff": 3.209429979324341, |
|
"all_logps_1": -5642.91943359375, |
|
"all_logps_1_values": -5642.92041015625, |
|
"all_logps_2": 383.92498779296875, |
|
"all_logps_2_values": 383.92498779296875, |
|
"epoch": 0.4808976756612343, |
|
"grad_norm": 37.46832492030243, |
|
"learning_rate": 6.185401888577487e-07, |
|
"logits/chosen": 2.5830130577087402, |
|
"logits/rejected": 2.689384937286377, |
|
"logps/chosen": -15.603918075561523, |
|
"logps/rejected": -16.610340118408203, |
|
"loss": 0.9122, |
|
"original_losses": 3.4112372398376465, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -39.00979995727539, |
|
"rewards/margins": 2.5160529613494873, |
|
"rewards/rejected": -41.525856018066406, |
|
"step": 225, |
|
"weight": 0.3604838252067566 |
|
}, |
|
{ |
|
"abs_diff": 3.7125911712646484, |
|
"all_logps_1": -5569.94384765625, |
|
"all_logps_1_values": -5569.94384765625, |
|
"all_logps_2": 361.3125, |
|
"all_logps_2_values": 361.3125, |
|
"epoch": 0.4915842906759284, |
|
"grad_norm": 66.55702343700871, |
|
"learning_rate": 6.002947078916364e-07, |
|
"logits/chosen": 2.3465304374694824, |
|
"logits/rejected": 2.66461181640625, |
|
"logps/chosen": -17.106571197509766, |
|
"logps/rejected": -19.080835342407227, |
|
"loss": 0.8076, |
|
"original_losses": 2.705897808074951, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -42.76642608642578, |
|
"rewards/margins": 4.935657501220703, |
|
"rewards/rejected": -47.70208740234375, |
|
"step": 230, |
|
"weight": 0.3060615658760071 |
|
}, |
|
{ |
|
"abs_diff": 2.281184434890747, |
|
"all_logps_1": -3926.673828125, |
|
"all_logps_1_values": -3926.67333984375, |
|
"all_logps_2": 311.42498779296875, |
|
"all_logps_2_values": 311.42498779296875, |
|
"epoch": 0.5022709056906225, |
|
"grad_norm": 52.12193473626352, |
|
"learning_rate": 5.819089557075688e-07, |
|
"logits/chosen": 2.5927655696868896, |
|
"logits/rejected": 2.721041679382324, |
|
"logps/chosen": -14.924860000610352, |
|
"logps/rejected": -15.577176094055176, |
|
"loss": 0.8527, |
|
"original_losses": 2.7761876583099365, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -37.3121452331543, |
|
"rewards/margins": 1.6307960748672485, |
|
"rewards/rejected": -38.94294357299805, |
|
"step": 235, |
|
"weight": 0.42170318961143494 |
|
}, |
|
{ |
|
"abs_diff": 2.7382442951202393, |
|
"all_logps_1": -5511.8671875, |
|
"all_logps_1_values": -5511.8671875, |
|
"all_logps_2": 424.04376220703125, |
|
"all_logps_2_values": 424.04376220703125, |
|
"epoch": 0.5129575207053166, |
|
"grad_norm": 59.31175783914156, |
|
"learning_rate": 5.634086464424742e-07, |
|
"logits/chosen": 2.750415086746216, |
|
"logits/rejected": 2.8377902507781982, |
|
"logps/chosen": -15.228363037109375, |
|
"logps/rejected": -16.618165969848633, |
|
"loss": 0.8222, |
|
"original_losses": 2.4813647270202637, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -38.07091522216797, |
|
"rewards/margins": 3.474503993988037, |
|
"rewards/rejected": -41.545413970947266, |
|
"step": 240, |
|
"weight": 0.43811964988708496 |
|
}, |
|
{ |
|
"abs_diff": 3.254149913787842, |
|
"all_logps_1": -5742.85595703125, |
|
"all_logps_1_values": -5742.85546875, |
|
"all_logps_2": 412.4624938964844, |
|
"all_logps_2_values": 412.4624938964844, |
|
"epoch": 0.5236441357200107, |
|
"grad_norm": 54.9226284927014, |
|
"learning_rate": 5.448196544517167e-07, |
|
"logits/chosen": 2.565314531326294, |
|
"logits/rejected": 2.691755533218384, |
|
"logps/chosen": -15.381324768066406, |
|
"logps/rejected": -17.23483657836914, |
|
"loss": 0.7997, |
|
"original_losses": 2.286261558532715, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -38.45330810546875, |
|
"rewards/margins": 4.633780479431152, |
|
"rewards/rejected": -43.08708953857422, |
|
"step": 245, |
|
"weight": 0.3790872097015381 |
|
}, |
|
{ |
|
"abs_diff": 3.364607334136963, |
|
"all_logps_1": -5477.4482421875, |
|
"all_logps_1_values": -5477.4482421875, |
|
"all_logps_2": 341.70623779296875, |
|
"all_logps_2_values": 341.70623779296875, |
|
"epoch": 0.5343307507347048, |
|
"grad_norm": 75.70050581279018, |
|
"learning_rate": 5.26167978121472e-07, |
|
"logits/chosen": 2.6822657585144043, |
|
"logits/rejected": 2.7521121501922607, |
|
"logps/chosen": -16.76608657836914, |
|
"logps/rejected": -19.173168182373047, |
|
"loss": 0.8369, |
|
"original_losses": 1.7328109741210938, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -41.91521453857422, |
|
"rewards/margins": 6.017703056335449, |
|
"rewards/rejected": -47.932918548583984, |
|
"step": 250, |
|
"weight": 0.3458004593849182 |
|
}, |
|
{ |
|
"abs_diff": 3.0323586463928223, |
|
"all_logps_1": -6443.626953125, |
|
"all_logps_1_values": -6443.62646484375, |
|
"all_logps_2": 363.6000061035156, |
|
"all_logps_2_values": 363.6000061035156, |
|
"epoch": 0.5450173657493989, |
|
"grad_norm": 39.687795704366174, |
|
"learning_rate": 5.074797035076318e-07, |
|
"logits/chosen": 2.954530954360962, |
|
"logits/rejected": 2.9405295848846436, |
|
"logps/chosen": -18.485279083251953, |
|
"logps/rejected": -19.909687042236328, |
|
"loss": 0.7436, |
|
"original_losses": 2.6259872913360596, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -46.21319580078125, |
|
"rewards/margins": 3.561020612716675, |
|
"rewards/rejected": -49.77421569824219, |
|
"step": 255, |
|
"weight": 0.37864193320274353 |
|
}, |
|
{ |
|
"abs_diff": 3.276740312576294, |
|
"all_logps_1": -8267.275390625, |
|
"all_logps_1_values": -8267.275390625, |
|
"all_logps_2": 393.79376220703125, |
|
"all_logps_2_values": 393.79376220703125, |
|
"epoch": 0.555703980764093, |
|
"grad_norm": 82.01295751328553, |
|
"learning_rate": 4.887809678520975e-07, |
|
"logits/chosen": 3.072216749191284, |
|
"logits/rejected": 3.1636574268341064, |
|
"logps/chosen": -20.29796600341797, |
|
"logps/rejected": -22.58323860168457, |
|
"loss": 0.767, |
|
"original_losses": 1.7455909252166748, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -50.74491882324219, |
|
"rewards/margins": 5.713181972503662, |
|
"rewards/rejected": -56.458106994628906, |
|
"step": 260, |
|
"weight": 0.35711461305618286 |
|
}, |
|
{ |
|
"abs_diff": 3.182936429977417, |
|
"all_logps_1": -9216.587890625, |
|
"all_logps_1_values": -9216.5869140625, |
|
"all_logps_2": 407.7562561035156, |
|
"all_logps_2_values": 407.7562561035156, |
|
"epoch": 0.566390595778787, |
|
"grad_norm": 52.240919124363245, |
|
"learning_rate": 4.700979230274829e-07, |
|
"logits/chosen": 3.0337119102478027, |
|
"logits/rejected": 3.0206868648529053, |
|
"logps/chosen": -23.038707733154297, |
|
"logps/rejected": -23.99751091003418, |
|
"loss": 0.807, |
|
"original_losses": 3.4282360076904297, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -57.596778869628906, |
|
"rewards/margins": 2.3969998359680176, |
|
"rewards/rejected": -59.9937744140625, |
|
"step": 265, |
|
"weight": 0.34545254707336426 |
|
}, |
|
{ |
|
"abs_diff": 3.006873607635498, |
|
"all_logps_1": -10153.31640625, |
|
"all_logps_1_values": -10153.3154296875, |
|
"all_logps_2": 477.38751220703125, |
|
"all_logps_2_values": 477.38751220703125, |
|
"epoch": 0.5770772107934812, |
|
"grad_norm": 59.52695646189972, |
|
"learning_rate": 4.514566989613559e-07, |
|
"logits/chosen": 2.972503185272217, |
|
"logits/rejected": 2.9690961837768555, |
|
"logps/chosen": -22.136503219604492, |
|
"logps/rejected": -23.38858413696289, |
|
"loss": 0.8091, |
|
"original_losses": 2.8519082069396973, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -55.34125900268555, |
|
"rewards/margins": 3.1301934719085693, |
|
"rewards/rejected": -58.47145462036133, |
|
"step": 270, |
|
"weight": 0.36154988408088684 |
|
}, |
|
{ |
|
"abs_diff": 2.563995599746704, |
|
"all_logps_1": -7391.75, |
|
"all_logps_1_values": -7391.75, |
|
"all_logps_2": 375.40625, |
|
"all_logps_2_values": 375.40625, |
|
"epoch": 0.5877638258081752, |
|
"grad_norm": 59.32000543668621, |
|
"learning_rate": 4.328833670911724e-07, |
|
"logits/chosen": 3.481792449951172, |
|
"logits/rejected": 3.5533995628356934, |
|
"logps/chosen": -21.077594757080078, |
|
"logps/rejected": -22.37049674987793, |
|
"loss": 0.7438, |
|
"original_losses": 2.241507053375244, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -52.6939811706543, |
|
"rewards/margins": 3.2322616577148438, |
|
"rewards/rejected": -55.926246643066406, |
|
"step": 275, |
|
"weight": 0.41661015152931213 |
|
}, |
|
{ |
|
"abs_diff": 3.315411329269409, |
|
"all_logps_1": -7719.34619140625, |
|
"all_logps_1_values": -7719.34521484375, |
|
"all_logps_2": 439.35626220703125, |
|
"all_logps_2_values": 439.35626220703125, |
|
"epoch": 0.5984504408228694, |
|
"grad_norm": 53.61685628912313, |
|
"learning_rate": 4.144039039010124e-07, |
|
"logits/chosen": 2.6844732761383057, |
|
"logits/rejected": 2.87386417388916, |
|
"logps/chosen": -17.859844207763672, |
|
"logps/rejected": -19.173076629638672, |
|
"loss": 0.7914, |
|
"original_losses": 3.27254056930542, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -44.64960861206055, |
|
"rewards/margins": 3.283079147338867, |
|
"rewards/rejected": -47.93268966674805, |
|
"step": 280, |
|
"weight": 0.38690507411956787 |
|
}, |
|
{ |
|
"abs_diff": 2.917543649673462, |
|
"all_logps_1": -6426.8310546875, |
|
"all_logps_1_values": -6426.8310546875, |
|
"all_logps_2": 355.16876220703125, |
|
"all_logps_2_values": 355.16876220703125, |
|
"epoch": 0.6091370558375635, |
|
"grad_norm": 55.70128923603701, |
|
"learning_rate": 3.960441545911204e-07, |
|
"logits/chosen": 3.0214133262634277, |
|
"logits/rejected": 3.1276047229766846, |
|
"logps/chosen": -20.0152530670166, |
|
"logps/rejected": -20.51242446899414, |
|
"loss": 0.8001, |
|
"original_losses": 3.778569459915161, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -50.03813552856445, |
|
"rewards/margins": 1.2429269552230835, |
|
"rewards/rejected": -51.28105926513672, |
|
"step": 285, |
|
"weight": 0.4056159555912018 |
|
}, |
|
{ |
|
"abs_diff": 3.5806915760040283, |
|
"all_logps_1": -6845.4326171875, |
|
"all_logps_1_values": -6845.4326171875, |
|
"all_logps_2": 341.95001220703125, |
|
"all_logps_2_values": 341.95001220703125, |
|
"epoch": 0.6198236708522575, |
|
"grad_norm": 67.43658438729601, |
|
"learning_rate": 3.778297969310529e-07, |
|
"logits/chosen": 2.87160325050354, |
|
"logits/rejected": 2.953885555267334, |
|
"logps/chosen": -19.99938201904297, |
|
"logps/rejected": -22.214576721191406, |
|
"loss": 0.8043, |
|
"original_losses": 2.356289863586426, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -49.99845504760742, |
|
"rewards/margins": 5.537986755371094, |
|
"rewards/rejected": -55.53644561767578, |
|
"step": 290, |
|
"weight": 0.3736080527305603 |
|
}, |
|
{ |
|
"abs_diff": 3.2340712547302246, |
|
"all_logps_1": -7549.24755859375, |
|
"all_logps_1_values": -7549.24755859375, |
|
"all_logps_2": 351.07501220703125, |
|
"all_logps_2_values": 351.07501220703125, |
|
"epoch": 0.6305102858669517, |
|
"grad_norm": 48.7199759637811, |
|
"learning_rate": 3.5978630534699865e-07, |
|
"logits/chosen": 2.5181379318237305, |
|
"logits/rejected": 2.6238226890563965, |
|
"logps/chosen": -21.65777587890625, |
|
"logps/rejected": -23.368385314941406, |
|
"loss": 0.8187, |
|
"original_losses": 2.4767355918884277, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -54.144432067871094, |
|
"rewards/margins": 4.276528835296631, |
|
"rewards/rejected": -58.42096710205078, |
|
"step": 295, |
|
"weight": 0.35286107659339905 |
|
}, |
|
{ |
|
"abs_diff": 3.32385516166687, |
|
"all_logps_1": -8850.7470703125, |
|
"all_logps_1_values": -8850.748046875, |
|
"all_logps_2": 415.8999938964844, |
|
"all_logps_2_values": 415.8999938964844, |
|
"epoch": 0.6411969008816457, |
|
"grad_norm": 40.177069639353974, |
|
"learning_rate": 3.4193891529348795e-07, |
|
"logits/chosen": 2.7022032737731934, |
|
"logits/rejected": 2.7918949127197266, |
|
"logps/chosen": -22.0867862701416, |
|
"logps/rejected": -24.649303436279297, |
|
"loss": 0.7237, |
|
"original_losses": 1.4938082695007324, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -55.21696090698242, |
|
"rewards/margins": 6.4062957763671875, |
|
"rewards/rejected": -61.623252868652344, |
|
"step": 300, |
|
"weight": 0.38917768001556396 |
|
}, |
|
{ |
|
"abs_diff": 2.9255619049072266, |
|
"all_logps_1": -7401.28662109375, |
|
"all_logps_1_values": -7401.2861328125, |
|
"all_logps_2": 407.0562438964844, |
|
"all_logps_2_values": 407.0562438964844, |
|
"epoch": 0.6518835158963399, |
|
"grad_norm": 48.44087105424344, |
|
"learning_rate": 3.243125879593286e-07, |
|
"logits/chosen": 2.6635046005249023, |
|
"logits/rejected": 2.777791976928711, |
|
"logps/chosen": -18.8937931060791, |
|
"logps/rejected": -20.48404312133789, |
|
"loss": 0.8144, |
|
"original_losses": 2.4159512519836426, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -47.23448181152344, |
|
"rewards/margins": 3.975621461868286, |
|
"rewards/rejected": -51.21010208129883, |
|
"step": 305, |
|
"weight": 0.4257276952266693 |
|
}, |
|
{ |
|
"abs_diff": 3.013671875, |
|
"all_logps_1": -7221.7607421875, |
|
"all_logps_1_values": -7221.76171875, |
|
"all_logps_2": 377.16876220703125, |
|
"all_logps_2_values": 377.16876220703125, |
|
"epoch": 0.6625701309110339, |
|
"grad_norm": 59.965292421288716, |
|
"learning_rate": 3.069319753571269e-07, |
|
"logits/chosen": 2.7733490467071533, |
|
"logits/rejected": 2.600106954574585, |
|
"logps/chosen": -19.796558380126953, |
|
"logps/rejected": -20.72552490234375, |
|
"loss": 0.8117, |
|
"original_losses": 3.373765230178833, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -49.491390228271484, |
|
"rewards/margins": 2.3224196434020996, |
|
"rewards/rejected": -51.813812255859375, |
|
"step": 310, |
|
"weight": 0.3705739974975586 |
|
}, |
|
{ |
|
"abs_diff": 3.0523111820220947, |
|
"all_logps_1": -7815.6552734375, |
|
"all_logps_1_values": -7815.65478515625, |
|
"all_logps_2": 449.16876220703125, |
|
"all_logps_2_values": 449.16876220703125, |
|
"epoch": 0.673256745925728, |
|
"grad_norm": 52.38266043751792, |
|
"learning_rate": 2.898213858452173e-07, |
|
"logits/chosen": 2.1578516960144043, |
|
"logits/rejected": 2.247980833053589, |
|
"logps/chosen": -17.26466941833496, |
|
"logps/rejected": -18.508235931396484, |
|
"loss": 0.7937, |
|
"original_losses": 2.871872901916504, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -43.16167449951172, |
|
"rewards/margins": 3.1089208126068115, |
|
"rewards/rejected": -46.270591735839844, |
|
"step": 315, |
|
"weight": 0.3665739893913269 |
|
}, |
|
{ |
|
"abs_diff": 3.318554639816284, |
|
"all_logps_1": -6473.89013671875, |
|
"all_logps_1_values": -6473.890625, |
|
"all_logps_2": 359.54376220703125, |
|
"all_logps_2_values": 359.54376220703125, |
|
"epoch": 0.6839433609404221, |
|
"grad_norm": 87.91813204561389, |
|
"learning_rate": 2.730047501302266e-07, |
|
"logits/chosen": 2.3339014053344727, |
|
"logits/rejected": 2.4213125705718994, |
|
"logps/chosen": -17.509052276611328, |
|
"logps/rejected": -19.367351531982422, |
|
"loss": 0.7705, |
|
"original_losses": 2.49141263961792, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -43.77263259887695, |
|
"rewards/margins": 4.645747184753418, |
|
"rewards/rejected": -48.41838455200195, |
|
"step": 320, |
|
"weight": 0.37379634380340576 |
|
}, |
|
{ |
|
"abs_diff": 4.029627799987793, |
|
"all_logps_1": -8260.576171875, |
|
"all_logps_1_values": -8260.576171875, |
|
"all_logps_2": 420.7562561035156, |
|
"all_logps_2_values": 420.7562561035156, |
|
"epoch": 0.6946299759551162, |
|
"grad_norm": 56.877689091030156, |
|
"learning_rate": 2.5650558779781635e-07, |
|
"logits/chosen": 2.8901479244232178, |
|
"logits/rejected": 2.8577167987823486, |
|
"logps/chosen": -18.898571014404297, |
|
"logps/rejected": -21.253376007080078, |
|
"loss": 0.7293, |
|
"original_losses": 2.648833751678467, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -47.24642562866211, |
|
"rewards/margins": 5.887020111083984, |
|
"rewards/rejected": -53.133445739746094, |
|
"step": 325, |
|
"weight": 0.299586683511734 |
|
}, |
|
{ |
|
"abs_diff": 3.5890209674835205, |
|
"all_logps_1": -8075.91650390625, |
|
"all_logps_1_values": -8075.91552734375, |
|
"all_logps_2": 370.53125, |
|
"all_logps_2_values": 370.53125, |
|
"epoch": 0.7053165909698104, |
|
"grad_norm": 51.745088875170836, |
|
"learning_rate": 2.403469744184154e-07, |
|
"logits/chosen": 2.560868978500366, |
|
"logits/rejected": 2.73579740524292, |
|
"logps/chosen": -20.837478637695312, |
|
"logps/rejected": -23.191274642944336, |
|
"loss": 0.8048, |
|
"original_losses": 2.0950331687927246, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -52.09369659423828, |
|
"rewards/margins": 5.884491443634033, |
|
"rewards/rejected": -57.978187561035156, |
|
"step": 330, |
|
"weight": 0.35120078921318054 |
|
}, |
|
{ |
|
"abs_diff": 3.3162055015563965, |
|
"all_logps_1": -8510.986328125, |
|
"all_logps_1_values": -8510.9873046875, |
|
"all_logps_2": 404.1875, |
|
"all_logps_2_values": 404.1875, |
|
"epoch": 0.7160032059845044, |
|
"grad_norm": 69.38405615517823, |
|
"learning_rate": 2.2455150927394878e-07, |
|
"logits/chosen": 2.6478374004364014, |
|
"logits/rejected": 2.565058946609497, |
|
"logps/chosen": -20.254060745239258, |
|
"logps/rejected": -22.03819465637207, |
|
"loss": 0.7845, |
|
"original_losses": 2.5941619873046875, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -50.63515090942383, |
|
"rewards/margins": 4.460334300994873, |
|
"rewards/rejected": -55.095489501953125, |
|
"step": 335, |
|
"weight": 0.3666679263114929 |
|
}, |
|
{ |
|
"abs_diff": 3.677370548248291, |
|
"all_logps_1": -8691.5263671875, |
|
"all_logps_1_values": -8691.5263671875, |
|
"all_logps_2": 381.01873779296875, |
|
"all_logps_2_values": 381.01873779296875, |
|
"epoch": 0.7266898209991985, |
|
"grad_norm": 84.25483121877998, |
|
"learning_rate": 2.0914128375069722e-07, |
|
"logits/chosen": 2.709319829940796, |
|
"logits/rejected": 2.7781405448913574, |
|
"logps/chosen": -21.70474624633789, |
|
"logps/rejected": -23.93856430053711, |
|
"loss": 0.7836, |
|
"original_losses": 2.420710802078247, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -54.261871337890625, |
|
"rewards/margins": 5.5845465660095215, |
|
"rewards/rejected": -59.84641647338867, |
|
"step": 340, |
|
"weight": 0.35216349363327026 |
|
}, |
|
{ |
|
"abs_diff": 2.6053452491760254, |
|
"all_logps_1": -8825.68359375, |
|
"all_logps_1_values": -8825.68359375, |
|
"all_logps_2": 365.8812561035156, |
|
"all_logps_2_values": 365.8812561035156, |
|
"epoch": 0.7373764360138926, |
|
"grad_norm": 70.75060453919657, |
|
"learning_rate": 1.9413785044249676e-07, |
|
"logits/chosen": 2.845489501953125, |
|
"logits/rejected": 2.95839262008667, |
|
"logps/chosen": -24.01942253112793, |
|
"logps/rejected": -25.074626922607422, |
|
"loss": 0.7906, |
|
"original_losses": 2.7006657123565674, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -60.048553466796875, |
|
"rewards/margins": 2.63801908493042, |
|
"rewards/rejected": -62.68656539916992, |
|
"step": 345, |
|
"weight": 0.4165709912776947 |
|
}, |
|
{ |
|
"abs_diff": 3.481846570968628, |
|
"all_logps_1": -9110.2353515625, |
|
"all_logps_1_values": -9110.2353515625, |
|
"all_logps_2": 392.26251220703125, |
|
"all_logps_2_values": 392.26251220703125, |
|
"epoch": 0.7480630510285867, |
|
"grad_norm": 50.64757547547787, |
|
"learning_rate": 1.7956219300748792e-07, |
|
"logits/chosen": 2.579031467437744, |
|
"logits/rejected": 2.5901365280151367, |
|
"logps/chosen": -21.98320198059082, |
|
"logps/rejected": -24.78140640258789, |
|
"loss": 0.7388, |
|
"original_losses": 1.231533408164978, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -54.9580078125, |
|
"rewards/margins": 6.995513916015625, |
|
"rewards/rejected": -61.953514099121094, |
|
"step": 350, |
|
"weight": 0.3367912769317627 |
|
}, |
|
{ |
|
"abs_diff": 3.284003496170044, |
|
"all_logps_1": -9058.169921875, |
|
"all_logps_1_values": -9058.169921875, |
|
"all_logps_2": 396.1812438964844, |
|
"all_logps_2_values": 396.1812438964844, |
|
"epoch": 0.7587496660432808, |
|
"grad_norm": 74.67147548055407, |
|
"learning_rate": 1.6543469682057104e-07, |
|
"logits/chosen": 2.3805794715881348, |
|
"logits/rejected": 2.5762991905212402, |
|
"logps/chosen": -21.627700805664062, |
|
"logps/rejected": -23.67769432067871, |
|
"loss": 0.7775, |
|
"original_losses": 2.081150531768799, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -54.06926345825195, |
|
"rewards/margins": 5.124981880187988, |
|
"rewards/rejected": -59.194244384765625, |
|
"step": 355, |
|
"weight": 0.333683043718338 |
|
}, |
|
{ |
|
"abs_diff": 3.9802608489990234, |
|
"all_logps_1": -8140.62646484375, |
|
"all_logps_1_values": -8140.625, |
|
"all_logps_2": 368.1812438964844, |
|
"all_logps_2_values": 368.1812438964844, |
|
"epoch": 0.7694362810579749, |
|
"grad_norm": 58.567962370545146, |
|
"learning_rate": 1.5177512046261666e-07, |
|
"logits/chosen": 2.5346484184265137, |
|
"logits/rejected": 2.3816428184509277, |
|
"logps/chosen": -22.101619720458984, |
|
"logps/rejected": -24.49993896484375, |
|
"loss": 0.6993, |
|
"original_losses": 2.540489435195923, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -55.254051208496094, |
|
"rewards/margins": 5.995795249938965, |
|
"rewards/rejected": -61.249847412109375, |
|
"step": 360, |
|
"weight": 0.34169501066207886 |
|
}, |
|
{ |
|
"abs_diff": 3.0081470012664795, |
|
"all_logps_1": -7452.68115234375, |
|
"all_logps_1_values": -7452.68115234375, |
|
"all_logps_2": 344.38751220703125, |
|
"all_logps_2_values": 344.38751220703125, |
|
"epoch": 0.7801228960726689, |
|
"grad_norm": 83.23124267198439, |
|
"learning_rate": 1.3860256808630427e-07, |
|
"logits/chosen": 2.4369776248931885, |
|
"logits/rejected": 2.584667682647705, |
|
"logps/chosen": -21.06991195678711, |
|
"logps/rejected": -22.77521324157715, |
|
"loss": 0.7695, |
|
"original_losses": 2.1701793670654297, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -52.674774169921875, |
|
"rewards/margins": 4.263253211975098, |
|
"rewards/rejected": -56.93803024291992, |
|
"step": 365, |
|
"weight": 0.345781534910202 |
|
}, |
|
{ |
|
"abs_diff": 3.4343185424804688, |
|
"all_logps_1": -9116.8271484375, |
|
"all_logps_1_values": -9116.826171875, |
|
"all_logps_2": 410.375, |
|
"all_logps_2_values": 410.375, |
|
"epoch": 0.7908095110873631, |
|
"grad_norm": 70.00940117238335, |
|
"learning_rate": 1.2593546269723647e-07, |
|
"logits/chosen": 2.4547030925750732, |
|
"logits/rejected": 2.5984954833984375, |
|
"logps/chosen": -21.283931732177734, |
|
"logps/rejected": -23.039413452148438, |
|
"loss": 0.7116, |
|
"original_losses": 2.7036542892456055, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -53.20983123779297, |
|
"rewards/margins": 4.388695240020752, |
|
"rewards/rejected": -57.59852981567383, |
|
"step": 370, |
|
"weight": 0.3490845561027527 |
|
}, |
|
{ |
|
"abs_diff": 3.5221400260925293, |
|
"all_logps_1": -8307.474609375, |
|
"all_logps_1_values": -8307.4755859375, |
|
"all_logps_2": 382.3999938964844, |
|
"all_logps_2_values": 382.3999938964844, |
|
"epoch": 0.8014961261020572, |
|
"grad_norm": 46.47990793449235, |
|
"learning_rate": 1.1379152038770029e-07, |
|
"logits/chosen": 2.5157063007354736, |
|
"logits/rejected": 2.4793992042541504, |
|
"logps/chosen": -20.3429012298584, |
|
"logps/rejected": -21.467952728271484, |
|
"loss": 0.836, |
|
"original_losses": 3.6247520446777344, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -50.85725784301758, |
|
"rewards/margins": 2.8126296997070312, |
|
"rewards/rejected": -53.669883728027344, |
|
"step": 375, |
|
"weight": 0.3106473684310913 |
|
}, |
|
{ |
|
"abs_diff": 3.1884102821350098, |
|
"all_logps_1": -7604.51953125, |
|
"all_logps_1_values": -7604.5185546875, |
|
"all_logps_2": 386.5625, |
|
"all_logps_2_values": 386.5625, |
|
"epoch": 0.8121827411167513, |
|
"grad_norm": 53.33016210631404, |
|
"learning_rate": 1.0218772555910954e-07, |
|
"logits/chosen": 2.299121141433716, |
|
"logits/rejected": 2.4894156455993652, |
|
"logps/chosen": -18.67618179321289, |
|
"logps/rejected": -20.802087783813477, |
|
"loss": 0.7253, |
|
"original_losses": 1.8635917901992798, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -46.690452575683594, |
|
"rewards/margins": 5.314764499664307, |
|
"rewards/rejected": -52.005226135253906, |
|
"step": 380, |
|
"weight": 0.36687955260276794 |
|
}, |
|
{ |
|
"abs_diff": 3.728355884552002, |
|
"all_logps_1": -6403.2841796875, |
|
"all_logps_1_values": -6403.2841796875, |
|
"all_logps_2": 352.4937438964844, |
|
"all_logps_2_values": 352.4937438964844, |
|
"epoch": 0.8228693561314454, |
|
"grad_norm": 51.015747481657996, |
|
"learning_rate": 9.114030716778432e-08, |
|
"logits/chosen": 2.5289080142974854, |
|
"logits/rejected": 2.568324565887451, |
|
"logps/chosen": -17.892498016357422, |
|
"logps/rejected": -20.8332462310791, |
|
"loss": 0.6978, |
|
"original_losses": 1.4315834045410156, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -44.73124694824219, |
|
"rewards/margins": 7.351869106292725, |
|
"rewards/rejected": -52.0831184387207, |
|
"step": 385, |
|
"weight": 0.3369835317134857 |
|
}, |
|
{ |
|
"abs_diff": 3.4457297325134277, |
|
"all_logps_1": -7086.70947265625, |
|
"all_logps_1_values": -7086.70849609375, |
|
"all_logps_2": 400.7562561035156, |
|
"all_logps_2_values": 400.7562561035156, |
|
"epoch": 0.8335559711461394, |
|
"grad_norm": 68.15400742768429, |
|
"learning_rate": 8.066471602728803e-08, |
|
"logits/chosen": 2.300518751144409, |
|
"logits/rejected": 2.432492256164551, |
|
"logps/chosen": -17.285266876220703, |
|
"logps/rejected": -19.280744552612305, |
|
"loss": 0.6869, |
|
"original_losses": 2.3130502700805664, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -43.213172912597656, |
|
"rewards/margins": 4.9886932373046875, |
|
"rewards/rejected": -48.201866149902344, |
|
"step": 390, |
|
"weight": 0.337992399930954 |
|
}, |
|
{ |
|
"abs_diff": 2.9501354694366455, |
|
"all_logps_1": -7602.40478515625, |
|
"all_logps_1_values": -7602.40380859375, |
|
"all_logps_2": 396.3125, |
|
"all_logps_2_values": 396.3125, |
|
"epoch": 0.8442425861608336, |
|
"grad_norm": 72.89829906287879, |
|
"learning_rate": 7.077560319906694e-08, |
|
"logits/chosen": 2.815917491912842, |
|
"logits/rejected": 3.0646049976348877, |
|
"logps/chosen": -17.960046768188477, |
|
"logps/rejected": -19.63981056213379, |
|
"loss": 0.7686, |
|
"original_losses": 2.1916909217834473, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -44.900108337402344, |
|
"rewards/margins": 4.1994123458862305, |
|
"rewards/rejected": -49.099525451660156, |
|
"step": 395, |
|
"weight": 0.36895015835762024 |
|
}, |
|
{ |
|
"abs_diff": 3.306037187576294, |
|
"all_logps_1": -6128.6064453125, |
|
"all_logps_1_values": -6128.6064453125, |
|
"all_logps_2": 348.07501220703125, |
|
"all_logps_2_values": 348.07501220703125, |
|
"epoch": 0.8549292011755276, |
|
"grad_norm": 49.93516351014214, |
|
"learning_rate": 6.148679950161672e-08, |
|
"logits/chosen": 2.5622057914733887, |
|
"logits/rejected": 2.715359926223755, |
|
"logps/chosen": -18.067874908447266, |
|
"logps/rejected": -20.04085922241211, |
|
"loss": 0.7506, |
|
"original_losses": 2.2437596321105957, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -45.169681549072266, |
|
"rewards/margins": 4.932468891143799, |
|
"rewards/rejected": -50.10215377807617, |
|
"step": 400, |
|
"weight": 0.34681177139282227 |
|
}, |
|
{ |
|
"epoch": 0.8549292011755276, |
|
"eval_abs_diff": 3.175931930541992, |
|
"eval_all_logps_1": -7614.6904296875, |
|
"eval_all_logps_1_values": -7614.69091796875, |
|
"eval_all_logps_2": 414.86090087890625, |
|
"eval_all_logps_2_values": 414.86090087890625, |
|
"eval_logits/chosen": 1.7177369594573975, |
|
"eval_logits/rejected": 1.830857753753662, |
|
"eval_logps/chosen": -18.158353805541992, |
|
"eval_logps/rejected": -20.146547317504883, |
|
"eval_loss": 0.752778172492981, |
|
"eval_original_losses": 2.049124002456665, |
|
"eval_rewards/accuracies": 0.6975806355476379, |
|
"eval_rewards/chosen": -45.3958854675293, |
|
"eval_rewards/margins": 4.970486640930176, |
|
"eval_rewards/rejected": -50.36636734008789, |
|
"eval_runtime": 70.2236, |
|
"eval_samples_per_second": 27.925, |
|
"eval_steps_per_second": 0.883, |
|
"eval_weight": 0.37132638692855835, |
|
"step": 400 |
|
}, |
|
{ |
|
"abs_diff": 3.7374179363250732, |
|
"all_logps_1": -6704.875, |
|
"all_logps_1_values": -6704.875, |
|
"all_logps_2": 385.4375, |
|
"all_logps_2_values": 385.4375, |
|
"epoch": 0.8656158161902218, |
|
"grad_norm": 69.66297582257639, |
|
"learning_rate": 5.2811296166831666e-08, |
|
"logits/chosen": 2.536898612976074, |
|
"logits/rejected": 2.8442349433898926, |
|
"logps/chosen": -17.0179443359375, |
|
"logps/rejected": -19.512527465820312, |
|
"loss": 0.6907, |
|
"original_losses": 1.9623138904571533, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -42.544864654541016, |
|
"rewards/margins": 6.236458778381348, |
|
"rewards/rejected": -48.78131866455078, |
|
"step": 405, |
|
"weight": 0.320218563079834 |
|
}, |
|
{ |
|
"abs_diff": 3.427241802215576, |
|
"all_logps_1": -6360.1455078125, |
|
"all_logps_1_values": -6360.1455078125, |
|
"all_logps_2": 339.8062438964844, |
|
"all_logps_2_values": 339.8062438964844, |
|
"epoch": 0.8763024312049158, |
|
"grad_norm": 61.75715741585555, |
|
"learning_rate": 4.4761226670592066e-08, |
|
"logits/chosen": 2.682762861251831, |
|
"logits/rejected": 2.7268879413604736, |
|
"logps/chosen": -18.33367347717285, |
|
"logps/rejected": -20.431079864501953, |
|
"loss": 0.7588, |
|
"original_losses": 2.1594674587249756, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -45.83418273925781, |
|
"rewards/margins": 5.243517875671387, |
|
"rewards/rejected": -51.07769775390625, |
|
"step": 410, |
|
"weight": 0.32417041063308716 |
|
}, |
|
{ |
|
"abs_diff": 3.5729141235351562, |
|
"all_logps_1": -8468.05078125, |
|
"all_logps_1_values": -8468.05078125, |
|
"all_logps_2": 414.2124938964844, |
|
"all_logps_2_values": 414.2124938964844, |
|
"epoch": 0.88698904621961, |
|
"grad_norm": 40.72517812799497, |
|
"learning_rate": 3.734784976300165e-08, |
|
"logits/chosen": 2.8361315727233887, |
|
"logits/rejected": 2.8616833686828613, |
|
"logps/chosen": -19.978229522705078, |
|
"logps/rejected": -22.11844825744629, |
|
"loss": 0.7242, |
|
"original_losses": 2.3812079429626465, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -49.94557189941406, |
|
"rewards/margins": 5.3505539894104, |
|
"rewards/rejected": -55.29612350463867, |
|
"step": 415, |
|
"weight": 0.3600180447101593 |
|
}, |
|
{ |
|
"abs_diff": 2.8872458934783936, |
|
"all_logps_1": -8678.85546875, |
|
"all_logps_1_values": -8678.85546875, |
|
"all_logps_2": 427.64373779296875, |
|
"all_logps_2_values": 427.64373779296875, |
|
"epoch": 0.897675661234304, |
|
"grad_norm": 40.225954100303696, |
|
"learning_rate": 3.058153372200695e-08, |
|
"logits/chosen": 2.452263355255127, |
|
"logits/rejected": 2.515206813812256, |
|
"logps/chosen": -20.152559280395508, |
|
"logps/rejected": -21.298845291137695, |
|
"loss": 0.7959, |
|
"original_losses": 2.8674798011779785, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -50.38140106201172, |
|
"rewards/margins": 2.865709066390991, |
|
"rewards/rejected": -53.247108459472656, |
|
"step": 420, |
|
"weight": 0.39061683416366577 |
|
}, |
|
{ |
|
"abs_diff": 4.009498119354248, |
|
"all_logps_1": -7007.01708984375, |
|
"all_logps_1_values": -7007.01708984375, |
|
"all_logps_2": 359.6187438964844, |
|
"all_logps_2_values": 359.6187438964844, |
|
"epoch": 0.9083622762489981, |
|
"grad_norm": 54.351457754994804, |
|
"learning_rate": 2.4471741852423233e-08, |
|
"logits/chosen": 2.617743968963623, |
|
"logits/rejected": 2.7704989910125732, |
|
"logps/chosen": -19.43728256225586, |
|
"logps/rejected": -21.93575668334961, |
|
"loss": 0.7422, |
|
"original_losses": 2.389147996902466, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -48.59320831298828, |
|
"rewards/margins": 6.246188163757324, |
|
"rewards/rejected": -54.83939743041992, |
|
"step": 425, |
|
"weight": 0.31762319803237915 |
|
}, |
|
{ |
|
"abs_diff": 2.755589723587036, |
|
"all_logps_1": -8104.44921875, |
|
"all_logps_1_values": -8104.44921875, |
|
"all_logps_2": 428.79376220703125, |
|
"all_logps_2_values": 428.79376220703125, |
|
"epoch": 0.9190488912636923, |
|
"grad_norm": 45.34333002111428, |
|
"learning_rate": 1.9027019250647036e-08, |
|
"logits/chosen": 2.6327333450317383, |
|
"logits/rejected": 2.7319021224975586, |
|
"logps/chosen": -18.6940975189209, |
|
"logps/rejected": -20.32192039489746, |
|
"loss": 0.6933, |
|
"original_losses": 2.010368824005127, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -46.735252380371094, |
|
"rewards/margins": 4.069557189941406, |
|
"rewards/rejected": -50.80480194091797, |
|
"step": 430, |
|
"weight": 0.38695794343948364 |
|
}, |
|
{ |
|
"abs_diff": 3.834909439086914, |
|
"all_logps_1": -7406.4482421875, |
|
"all_logps_1_values": -7406.44775390625, |
|
"all_logps_2": 382.15625, |
|
"all_logps_2_values": 382.15625, |
|
"epoch": 0.9297355062783863, |
|
"grad_norm": 103.89987589364694, |
|
"learning_rate": 1.4254980853566246e-08, |
|
"logits/chosen": 2.688000440597534, |
|
"logits/rejected": 2.763110399246216, |
|
"logps/chosen": -19.011985778808594, |
|
"logps/rejected": -21.563823699951172, |
|
"loss": 0.7401, |
|
"original_losses": 2.1428942680358887, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -47.52996063232422, |
|
"rewards/margins": 6.379598617553711, |
|
"rewards/rejected": -53.90956497192383, |
|
"step": 435, |
|
"weight": 0.35418570041656494 |
|
}, |
|
{ |
|
"abs_diff": 3.49601411819458, |
|
"all_logps_1": -7640.515625, |
|
"all_logps_1_values": -7640.515625, |
|
"all_logps_2": 394.25, |
|
"all_logps_2_values": 394.25, |
|
"epoch": 0.9404221212930804, |
|
"grad_norm": 66.9604311531267, |
|
"learning_rate": 1.016230078838226e-08, |
|
"logits/chosen": 2.6405506134033203, |
|
"logits/rejected": 2.7150299549102783, |
|
"logps/chosen": -18.938282012939453, |
|
"logps/rejected": -21.01675796508789, |
|
"loss": 0.7279, |
|
"original_losses": 2.3662502765655518, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -47.345703125, |
|
"rewards/margins": 5.196188449859619, |
|
"rewards/rejected": -52.541893005371094, |
|
"step": 440, |
|
"weight": 0.35034170746803284 |
|
}, |
|
{ |
|
"abs_diff": 3.1276192665100098, |
|
"all_logps_1": -9211.677734375, |
|
"all_logps_1_values": -9211.6787109375, |
|
"all_logps_2": 462.4624938964844, |
|
"all_logps_2_values": 462.4624938964844, |
|
"epoch": 0.9511087363077745, |
|
"grad_norm": 62.83164635980714, |
|
"learning_rate": 6.754703038239329e-09, |
|
"logits/chosen": 2.502159357070923, |
|
"logits/rejected": 2.6519925594329834, |
|
"logps/chosen": -18.46548080444336, |
|
"logps/rejected": -20.194454193115234, |
|
"loss": 0.6978, |
|
"original_losses": 2.2807629108428955, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -46.16370391845703, |
|
"rewards/margins": 4.322434902191162, |
|
"rewards/rejected": -50.48613739013672, |
|
"step": 445, |
|
"weight": 0.35711297392845154 |
|
}, |
|
{ |
|
"abs_diff": 3.5259463787078857, |
|
"all_logps_1": -7040.4140625, |
|
"all_logps_1_values": -7040.4140625, |
|
"all_logps_2": 358.57501220703125, |
|
"all_logps_2_values": 358.57501220703125, |
|
"epoch": 0.9617953513224686, |
|
"grad_norm": 58.22216553617623, |
|
"learning_rate": 4.036953436716895e-09, |
|
"logits/chosen": 2.841308832168579, |
|
"logits/rejected": 2.788696050643921, |
|
"logps/chosen": -19.402172088623047, |
|
"logps/rejected": -21.435121536254883, |
|
"loss": 0.6648, |
|
"original_losses": 2.3494513034820557, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -48.505435943603516, |
|
"rewards/margins": 5.082365989685059, |
|
"rewards/rejected": -53.587799072265625, |
|
"step": 450, |
|
"weight": 0.32446950674057007 |
|
}, |
|
{ |
|
"abs_diff": 2.527660369873047, |
|
"all_logps_1": -7083.1259765625, |
|
"all_logps_1_values": -7083.1259765625, |
|
"all_logps_2": 354.76873779296875, |
|
"all_logps_2_values": 354.76873779296875, |
|
"epoch": 0.9724819663371627, |
|
"grad_norm": 63.64964025419041, |
|
"learning_rate": 2.0128530023804656e-09, |
|
"logits/chosen": 2.5407052040100098, |
|
"logits/rejected": 2.6334285736083984, |
|
"logps/chosen": -19.97518539428711, |
|
"logps/rejected": -21.144289016723633, |
|
"loss": 0.773, |
|
"original_losses": 2.366673469543457, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -49.937965393066406, |
|
"rewards/margins": 2.922760486602783, |
|
"rewards/rejected": -52.86072540283203, |
|
"step": 455, |
|
"weight": 0.4015112519264221 |
|
}, |
|
{ |
|
"abs_diff": 3.792357921600342, |
|
"all_logps_1": -6872.0908203125, |
|
"all_logps_1_values": -6872.0908203125, |
|
"all_logps_2": 352.35626220703125, |
|
"all_logps_2_values": 352.35626220703125, |
|
"epoch": 0.9831685813518568, |
|
"grad_norm": 70.18502240580426, |
|
"learning_rate": 6.852326227130833e-10, |
|
"logits/chosen": 2.659250020980835, |
|
"logits/rejected": 2.507812976837158, |
|
"logps/chosen": -20.08974266052246, |
|
"logps/rejected": -22.00864028930664, |
|
"loss": 0.7596, |
|
"original_losses": 3.0322279930114746, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -50.22435760498047, |
|
"rewards/margins": 4.797248840332031, |
|
"rewards/rejected": -55.0216064453125, |
|
"step": 460, |
|
"weight": 0.3797384202480316 |
|
}, |
|
{ |
|
"abs_diff": 3.1223583221435547, |
|
"all_logps_1": -7477.0185546875, |
|
"all_logps_1_values": -7477.0185546875, |
|
"all_logps_2": 386.9937438964844, |
|
"all_logps_2_values": 386.9937438964844, |
|
"epoch": 0.9938551963665508, |
|
"grad_norm": 70.11026953873642, |
|
"learning_rate": 5.594909486328348e-11, |
|
"logits/chosen": 2.367159366607666, |
|
"logits/rejected": 2.6166296005249023, |
|
"logps/chosen": -18.468345642089844, |
|
"logps/rejected": -20.6806697845459, |
|
"loss": 0.6765, |
|
"original_losses": 1.560880422592163, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -46.17086410522461, |
|
"rewards/margins": 5.5308074951171875, |
|
"rewards/rejected": -51.7016716003418, |
|
"step": 465, |
|
"weight": 0.36222249269485474 |
|
}, |
|
{ |
|
"epoch": 0.9981298423724285, |
|
"step": 467, |
|
"total_flos": 0.0, |
|
"train_loss": 0.9884350126254227, |
|
"train_runtime": 7236.0008, |
|
"train_samples_per_second": 8.275, |
|
"train_steps_per_second": 0.065 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 467, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|