|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9807355516637478, |
|
"eval_steps": 18, |
|
"global_step": 35, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.028021015761821366, |
|
"grad_norm": 97.3614384660342, |
|
"learning_rate": 8e-08, |
|
"logits/chosen": -7.731139183044434, |
|
"logits/rejected": -8.25313663482666, |
|
"logps/chosen": -1.6605920791625977, |
|
"logps/rejected": -1.683160424232483, |
|
"loss": 6.8307, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -16.60592269897461, |
|
"rewards/margins": 0.22568130493164062, |
|
"rewards/rejected": -16.83160400390625, |
|
"sft_loss": 0.04547927528619766, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.05604203152364273, |
|
"grad_norm": 121.91139131956491, |
|
"learning_rate": 2.6e-07, |
|
"logits/chosen": -7.893815040588379, |
|
"logits/rejected": -8.019620895385742, |
|
"logps/chosen": -1.952430248260498, |
|
"logps/rejected": -1.8113142251968384, |
|
"loss": 7.3627, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -19.52429962158203, |
|
"rewards/margins": -1.4111591577529907, |
|
"rewards/rejected": -18.113142013549805, |
|
"sft_loss": 0.00891563668847084, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0840630472854641, |
|
"grad_norm": 115.99875317435591, |
|
"learning_rate": 4.4e-07, |
|
"logits/chosen": -9.035008430480957, |
|
"logits/rejected": -8.950678825378418, |
|
"logps/chosen": -1.4869132041931152, |
|
"logps/rejected": -1.50464928150177, |
|
"loss": 7.025, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -14.869132995605469, |
|
"rewards/margins": 0.17736005783081055, |
|
"rewards/rejected": -15.046493530273438, |
|
"sft_loss": 0.010355046950280666, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.11208406304728546, |
|
"grad_norm": 143.98542912632848, |
|
"learning_rate": 6.2e-07, |
|
"logits/chosen": -9.924211502075195, |
|
"logits/rejected": -9.45657730102539, |
|
"logps/chosen": -1.4292300939559937, |
|
"logps/rejected": -1.2033114433288574, |
|
"loss": 6.4674, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -14.292301177978516, |
|
"rewards/margins": -2.2591874599456787, |
|
"rewards/rejected": -12.033114433288574, |
|
"sft_loss": 0.006642716005444527, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.14010507880910683, |
|
"grad_norm": 105.0416363604681, |
|
"learning_rate": 7.981529564210822e-07, |
|
"logits/chosen": -8.538932800292969, |
|
"logits/rejected": -8.046061515808105, |
|
"logps/chosen": -1.5071882009506226, |
|
"logps/rejected": -1.675721526145935, |
|
"loss": 7.1364, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -15.071882247924805, |
|
"rewards/margins": 1.6853327751159668, |
|
"rewards/rejected": -16.757217407226562, |
|
"sft_loss": 0.01169000007212162, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.1681260945709282, |
|
"grad_norm": 136.46572113440772, |
|
"learning_rate": 7.926307788508979e-07, |
|
"logits/chosen": -8.856929779052734, |
|
"logits/rejected": -9.344861030578613, |
|
"logps/chosen": -1.6353546380996704, |
|
"logps/rejected": -1.7302504777908325, |
|
"loss": 5.6445, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -16.353546142578125, |
|
"rewards/margins": 0.9489572644233704, |
|
"rewards/rejected": -17.30250358581543, |
|
"sft_loss": 0.03519538417458534, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.19614711033274956, |
|
"grad_norm": 125.56078536529542, |
|
"learning_rate": 7.834901323040175e-07, |
|
"logits/chosen": -7.622992515563965, |
|
"logits/rejected": -7.627020835876465, |
|
"logps/chosen": -2.047703981399536, |
|
"logps/rejected": -1.6984105110168457, |
|
"loss": 6.5965, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -20.477039337158203, |
|
"rewards/margins": -3.4929349422454834, |
|
"rewards/rejected": -16.98410415649414, |
|
"sft_loss": 0.006214356515556574, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.22416812609457093, |
|
"grad_norm": 135.07237608120852, |
|
"learning_rate": 7.70824812183283e-07, |
|
"logits/chosen": -8.93583869934082, |
|
"logits/rejected": -9.021809577941895, |
|
"logps/chosen": -1.3730394840240479, |
|
"logps/rejected": -1.4458472728729248, |
|
"loss": 6.3454, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -13.730398178100586, |
|
"rewards/margins": 0.7280769944190979, |
|
"rewards/rejected": -14.458473205566406, |
|
"sft_loss": 0.03004990890622139, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.2521891418563923, |
|
"grad_norm": 239.98627324152338, |
|
"learning_rate": 7.547647818120495e-07, |
|
"logits/chosen": -9.406291961669922, |
|
"logits/rejected": -9.9324951171875, |
|
"logps/chosen": -1.5991909503936768, |
|
"logps/rejected": -1.6271830797195435, |
|
"loss": 5.2256, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -15.991909980773926, |
|
"rewards/margins": 0.2799214720726013, |
|
"rewards/rejected": -16.271831512451172, |
|
"sft_loss": 0.03231532499194145, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.28021015761821366, |
|
"grad_norm": 234.53437032749468, |
|
"learning_rate": 7.354748388346194e-07, |
|
"logits/chosen": -7.813473701477051, |
|
"logits/rejected": -9.137899398803711, |
|
"logps/chosen": -1.8943036794662476, |
|
"logps/rejected": -1.9154584407806396, |
|
"loss": 4.8325, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -18.943037033081055, |
|
"rewards/margins": 0.2115485668182373, |
|
"rewards/rejected": -19.154584884643555, |
|
"sft_loss": 0.008024048060178757, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.30823117338003503, |
|
"grad_norm": 287.5499688149926, |
|
"learning_rate": 7.131529241694047e-07, |
|
"logits/chosen": -10.093656539916992, |
|
"logits/rejected": -10.823583602905273, |
|
"logps/chosen": -1.676328182220459, |
|
"logps/rejected": -2.193615198135376, |
|
"loss": 4.8686, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -16.763280868530273, |
|
"rewards/margins": 5.17287015914917, |
|
"rewards/rejected": -21.9361515045166, |
|
"sft_loss": 0.018158258870244026, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.3362521891418564, |
|
"grad_norm": 279.35544138821984, |
|
"learning_rate": 6.880280908672471e-07, |
|
"logits/chosen": -7.598231792449951, |
|
"logits/rejected": -8.863749504089355, |
|
"logps/chosen": -1.8558087348937988, |
|
"logps/rejected": -2.048665761947632, |
|
"loss": 4.1241, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -18.558086395263672, |
|
"rewards/margins": 1.9285707473754883, |
|
"rewards/rejected": -20.486658096313477, |
|
"sft_loss": 0.003531986614689231, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.36427320490367776, |
|
"grad_norm": 308.20678092603185, |
|
"learning_rate": 6.603581537171586e-07, |
|
"logits/chosen": -8.397397994995117, |
|
"logits/rejected": -10.13599681854248, |
|
"logps/chosen": -1.745999813079834, |
|
"logps/rejected": -1.8406281471252441, |
|
"loss": 3.8904, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -17.459999084472656, |
|
"rewards/margins": 0.9462810754776001, |
|
"rewards/rejected": -18.406280517578125, |
|
"sft_loss": 0.010710272938013077, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.3922942206654991, |
|
"grad_norm": 88.53513750352447, |
|
"learning_rate": 6.304270437177064e-07, |
|
"logits/chosen": -11.391769409179688, |
|
"logits/rejected": -13.656466484069824, |
|
"logps/chosen": -1.820195198059082, |
|
"logps/rejected": -2.2572662830352783, |
|
"loss": 2.6976, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -18.201950073242188, |
|
"rewards/margins": 4.370712757110596, |
|
"rewards/rejected": -22.572664260864258, |
|
"sft_loss": 0.006363618653267622, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.4203152364273205, |
|
"grad_norm": 82.22128437782617, |
|
"learning_rate": 5.985418945607484e-07, |
|
"logits/chosen": -12.035834312438965, |
|
"logits/rejected": -14.527205467224121, |
|
"logps/chosen": -2.3111374378204346, |
|
"logps/rejected": -3.1396684646606445, |
|
"loss": 1.2788, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -23.111371994018555, |
|
"rewards/margins": 8.285309791564941, |
|
"rewards/rejected": -31.396682739257812, |
|
"sft_loss": 0.007131902035325766, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.44833625218914186, |
|
"grad_norm": 120.40204277011782, |
|
"learning_rate": 5.650298910241353e-07, |
|
"logits/chosen": -12.784805297851562, |
|
"logits/rejected": -15.177325248718262, |
|
"logps/chosen": -2.0324227809906006, |
|
"logps/rejected": -3.119992971420288, |
|
"loss": 1.4116, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -20.324228286743164, |
|
"rewards/margins": 10.875699996948242, |
|
"rewards/rejected": -31.199928283691406, |
|
"sft_loss": 0.005817623808979988, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.4763572679509632, |
|
"grad_norm": 97.73770625062221, |
|
"learning_rate": 5.302349116131393e-07, |
|
"logits/chosen": -15.652244567871094, |
|
"logits/rejected": -17.80880355834961, |
|
"logps/chosen": -1.9218964576721191, |
|
"logps/rejected": -2.738666296005249, |
|
"loss": 1.6013, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -19.218965530395508, |
|
"rewards/margins": 8.167696952819824, |
|
"rewards/rejected": -27.386659622192383, |
|
"sft_loss": 0.004993550945073366, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.5043782837127846, |
|
"grad_norm": 189.96208267597956, |
|
"learning_rate": 4.945139999016476e-07, |
|
"logits/chosen": -15.17531681060791, |
|
"logits/rejected": -15.677058219909668, |
|
"logps/chosen": -1.9880082607269287, |
|
"logps/rejected": -3.052130699157715, |
|
"loss": 1.8319, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -19.880081176757812, |
|
"rewards/margins": 10.641225814819336, |
|
"rewards/rejected": -30.52130889892578, |
|
"sft_loss": 0.008743491023778915, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.532399299474606, |
|
"grad_norm": 114.52318633727846, |
|
"learning_rate": 4.5823370078193663e-07, |
|
"logits/chosen": -10.547327995300293, |
|
"logits/rejected": -14.194029808044434, |
|
"logps/chosen": -2.2875614166259766, |
|
"logps/rejected": -3.8111658096313477, |
|
"loss": 1.7772, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -22.875612258911133, |
|
"rewards/margins": 15.236043930053711, |
|
"rewards/rejected": -38.111656188964844, |
|
"sft_loss": 0.01470925658941269, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.5604203152364273, |
|
"grad_norm": 82.97272038448429, |
|
"learning_rate": 4.217662992180634e-07, |
|
"logits/chosen": -10.6708345413208, |
|
"logits/rejected": -15.503955841064453, |
|
"logps/chosen": -2.2511441707611084, |
|
"logps/rejected": -3.763054132461548, |
|
"loss": 1.6103, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -22.511442184448242, |
|
"rewards/margins": 15.119099617004395, |
|
"rewards/rejected": -37.63053894042969, |
|
"sft_loss": 0.012447498738765717, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.5884413309982487, |
|
"grad_norm": 93.13289791788793, |
|
"learning_rate": 3.8548600009835237e-07, |
|
"logits/chosen": -11.672554016113281, |
|
"logits/rejected": -16.79704475402832, |
|
"logps/chosen": -2.7983806133270264, |
|
"logps/rejected": -4.703005790710449, |
|
"loss": 1.4708, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -27.983802795410156, |
|
"rewards/margins": 19.046255111694336, |
|
"rewards/rejected": -47.030059814453125, |
|
"sft_loss": 0.0047310409136116505, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.6164623467600701, |
|
"grad_norm": 97.54389148924957, |
|
"learning_rate": 3.4976508838686066e-07, |
|
"logits/chosen": -16.980070114135742, |
|
"logits/rejected": -18.32730484008789, |
|
"logps/chosen": -2.150972843170166, |
|
"logps/rejected": -3.424234390258789, |
|
"loss": 1.1655, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -21.509729385375977, |
|
"rewards/margins": 12.732614517211914, |
|
"rewards/rejected": -34.242340087890625, |
|
"sft_loss": 0.007569438312202692, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.6444833625218914, |
|
"grad_norm": 73.41731467567557, |
|
"learning_rate": 3.149701089758648e-07, |
|
"logits/chosen": -11.438061714172363, |
|
"logits/rejected": -14.675212860107422, |
|
"logps/chosen": -2.3481569290161133, |
|
"logps/rejected": -4.2203450202941895, |
|
"loss": 1.2242, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -23.481565475463867, |
|
"rewards/margins": 18.721879959106445, |
|
"rewards/rejected": -42.20344543457031, |
|
"sft_loss": 0.010319937951862812, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.6725043782837128, |
|
"grad_norm": 96.48356219878691, |
|
"learning_rate": 2.8145810543925163e-07, |
|
"logits/chosen": -11.793488502502441, |
|
"logits/rejected": -15.576847076416016, |
|
"logps/chosen": -2.3053696155548096, |
|
"logps/rejected": -4.343653202056885, |
|
"loss": 1.4752, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -23.053693771362305, |
|
"rewards/margins": 20.382837295532227, |
|
"rewards/rejected": -43.4365348815918, |
|
"sft_loss": 0.024834871292114258, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.7005253940455342, |
|
"grad_norm": 122.92321660024119, |
|
"learning_rate": 2.495729562822935e-07, |
|
"logits/chosen": -16.064531326293945, |
|
"logits/rejected": -17.884010314941406, |
|
"logps/chosen": -2.647796869277954, |
|
"logps/rejected": -3.9600863456726074, |
|
"loss": 1.4732, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -26.477968215942383, |
|
"rewards/margins": 13.12289810180664, |
|
"rewards/rejected": -39.60086441040039, |
|
"sft_loss": 0.021936513483524323, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.7285464098073555, |
|
"grad_norm": 80.26268266585235, |
|
"learning_rate": 2.196418462828415e-07, |
|
"logits/chosen": -12.343572616577148, |
|
"logits/rejected": -15.574173927307129, |
|
"logps/chosen": -2.2388336658477783, |
|
"logps/rejected": -4.106793403625488, |
|
"loss": 1.2593, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -22.388338088989258, |
|
"rewards/margins": 18.679595947265625, |
|
"rewards/rejected": -41.06793212890625, |
|
"sft_loss": 0.013025043532252312, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.7565674255691769, |
|
"grad_norm": 66.28989889413502, |
|
"learning_rate": 1.9197190913275294e-07, |
|
"logits/chosen": -12.330286026000977, |
|
"logits/rejected": -15.901168823242188, |
|
"logps/chosen": -2.5048580169677734, |
|
"logps/rejected": -4.5386857986450195, |
|
"loss": 1.1459, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -25.048580169677734, |
|
"rewards/margins": 20.338275909423828, |
|
"rewards/rejected": -45.38685607910156, |
|
"sft_loss": 0.006859698798507452, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.7845884413309983, |
|
"grad_norm": 81.14927404545568, |
|
"learning_rate": 1.6684707583059529e-07, |
|
"logits/chosen": -17.190406799316406, |
|
"logits/rejected": -19.05614471435547, |
|
"logps/chosen": -2.768648147583008, |
|
"logps/rejected": -4.382925033569336, |
|
"loss": 1.3002, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -27.68647575378418, |
|
"rewards/margins": 16.142770767211914, |
|
"rewards/rejected": -43.82925033569336, |
|
"sft_loss": 0.005794988479465246, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.8126094570928196, |
|
"grad_norm": 70.98888001094448, |
|
"learning_rate": 1.4452516116538054e-07, |
|
"logits/chosen": -10.294852256774902, |
|
"logits/rejected": -15.053112030029297, |
|
"logps/chosen": -2.106778383255005, |
|
"logps/rejected": -4.66357421875, |
|
"loss": 0.8862, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -21.067781448364258, |
|
"rewards/margins": 25.567956924438477, |
|
"rewards/rejected": -46.6357421875, |
|
"sft_loss": 0.008147615939378738, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.840630472854641, |
|
"grad_norm": 99.24795263935427, |
|
"learning_rate": 1.2523521818795044e-07, |
|
"logits/chosen": -9.799162864685059, |
|
"logits/rejected": -15.37686538696289, |
|
"logps/chosen": -2.270378828048706, |
|
"logps/rejected": -5.39860725402832, |
|
"loss": 1.3672, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -22.70379066467285, |
|
"rewards/margins": 31.282283782958984, |
|
"rewards/rejected": -53.98607635498047, |
|
"sft_loss": 0.005661052651703358, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.8686514886164624, |
|
"grad_norm": 105.42160708641147, |
|
"learning_rate": 1.0917518781671699e-07, |
|
"logits/chosen": -10.193641662597656, |
|
"logits/rejected": -13.275125503540039, |
|
"logps/chosen": -2.434311866760254, |
|
"logps/rejected": -3.9474875926971436, |
|
"loss": 1.3672, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -24.343116760253906, |
|
"rewards/margins": 15.131752967834473, |
|
"rewards/rejected": -39.47487258911133, |
|
"sft_loss": 0.01922934129834175, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.8966725043782837, |
|
"grad_norm": 75.46584751909548, |
|
"learning_rate": 9.650986769598242e-08, |
|
"logits/chosen": -9.355328559875488, |
|
"logits/rejected": -16.710058212280273, |
|
"logps/chosen": -2.3611741065979004, |
|
"logps/rejected": -5.737414360046387, |
|
"loss": 1.6485, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -23.61174201965332, |
|
"rewards/margins": 33.76239776611328, |
|
"rewards/rejected": -57.3741455078125, |
|
"sft_loss": 0.010620678775012493, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.9246935201401051, |
|
"grad_norm": 84.00269490747272, |
|
"learning_rate": 8.736922114910199e-08, |
|
"logits/chosen": -10.082009315490723, |
|
"logits/rejected": -14.500289916992188, |
|
"logps/chosen": -2.227128267288208, |
|
"logps/rejected": -5.038478374481201, |
|
"loss": 1.3242, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -22.271284103393555, |
|
"rewards/margins": 28.113502502441406, |
|
"rewards/rejected": -50.384788513183594, |
|
"sft_loss": 0.009218152612447739, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.9527145359019265, |
|
"grad_norm": 75.42419711071338, |
|
"learning_rate": 8.184704357891779e-08, |
|
"logits/chosen": -10.325528144836426, |
|
"logits/rejected": -16.278217315673828, |
|
"logps/chosen": -2.2617452144622803, |
|
"logps/rejected": -4.84274435043335, |
|
"loss": 0.8932, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -22.617454528808594, |
|
"rewards/margins": 25.809988021850586, |
|
"rewards/rejected": -48.42743682861328, |
|
"sft_loss": 0.014543527737259865, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.9807355516637478, |
|
"grad_norm": 108.8748972501278, |
|
"learning_rate": 8e-08, |
|
"logits/chosen": -12.176219940185547, |
|
"logits/rejected": -16.480701446533203, |
|
"logps/chosen": -2.5212955474853516, |
|
"logps/rejected": -4.665997505187988, |
|
"loss": 1.4444, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -25.21295166015625, |
|
"rewards/margins": 21.447019577026367, |
|
"rewards/rejected": -46.65997314453125, |
|
"sft_loss": 0.04498640075325966, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.9807355516637478, |
|
"eval_logits/chosen": -12.004097938537598, |
|
"eval_logits/rejected": -17.047502517700195, |
|
"eval_logps/chosen": -2.168222427368164, |
|
"eval_logps/rejected": -4.787535667419434, |
|
"eval_loss": 1.0162526369094849, |
|
"eval_rewards/accuracies": 0.9166666865348816, |
|
"eval_rewards/chosen": -21.682226181030273, |
|
"eval_rewards/margins": 26.193130493164062, |
|
"eval_rewards/rejected": -47.875362396240234, |
|
"eval_runtime": 9.3123, |
|
"eval_samples_per_second": 10.094, |
|
"eval_sft_loss": 0.01844729855656624, |
|
"eval_steps_per_second": 1.289, |
|
"step": 35 |
|
}, |
|
{ |
|
"before_init_mem_cpu": 3802071040, |
|
"before_init_mem_gpu": 22016, |
|
"epoch": 0.9807355516637478, |
|
"init_mem_cpu_alloc_delta": 364544, |
|
"init_mem_cpu_peaked_delta": 0, |
|
"init_mem_gpu_alloc_delta": 0, |
|
"init_mem_gpu_peaked_delta": 0, |
|
"step": 35, |
|
"total_flos": 39867492466688.0, |
|
"train_loss": 3.085822834287371, |
|
"train_mem_cpu_alloc_delta": 5213659136, |
|
"train_mem_cpu_peaked_delta": 22737326080, |
|
"train_mem_gpu_alloc_delta": 16267848704, |
|
"train_mem_gpu_peaked_delta": 36029468160, |
|
"train_runtime": 1628.7465, |
|
"train_samples_per_second": 2.805, |
|
"train_steps_per_second": 0.021 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 35, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 18, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 39867492466688.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|