|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 100, |
|
"global_step": 1065, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"dpo_losses": 0.6931471824645996, |
|
"epoch": 0.0, |
|
"grad_norm": 2.1187342100202096, |
|
"learning_rate": 4.6728971962616815e-09, |
|
"logits/chosen": -2.8477635383605957, |
|
"logits/rejected": -2.8469698429107666, |
|
"logps/chosen": -522.6112670898438, |
|
"logps/rejected": -359.48583984375, |
|
"loss": 0.6931, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/margins_max": 0.0, |
|
"rewards/margins_min": 0.0, |
|
"rewards/margins_std": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"dpo_losses": 0.6926888227462769, |
|
"epoch": 0.03, |
|
"grad_norm": 24.895678335901746, |
|
"learning_rate": 4.672897196261682e-08, |
|
"logits/chosen": -2.9204907417297363, |
|
"logits/rejected": -2.7960145473480225, |
|
"logps/chosen": -313.4462890625, |
|
"logps/rejected": -170.4320068359375, |
|
"loss": 0.6989, |
|
"positive_losses": 0.036266010254621506, |
|
"rewards/accuracies": 0.3888888955116272, |
|
"rewards/chosen": 0.00024180197215173393, |
|
"rewards/margins": 0.0009183046640828252, |
|
"rewards/margins_max": 0.0020684306509792805, |
|
"rewards/margins_min": -0.0002318212646059692, |
|
"rewards/margins_std": 0.0016265236772596836, |
|
"rewards/rejected": -0.0006765025900676847, |
|
"step": 10 |
|
}, |
|
{ |
|
"dpo_losses": 0.6935218572616577, |
|
"epoch": 0.06, |
|
"grad_norm": 26.261976434571714, |
|
"learning_rate": 9.345794392523364e-08, |
|
"logits/chosen": -2.764099597930908, |
|
"logits/rejected": -2.7112996578216553, |
|
"logps/chosen": -381.15252685546875, |
|
"logps/rejected": -244.30020141601562, |
|
"loss": 0.702, |
|
"positive_losses": 0.120574951171875, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.00042650121031329036, |
|
"rewards/margins": -0.0007463769870810211, |
|
"rewards/margins_max": 0.0008523863507434726, |
|
"rewards/margins_min": -0.0023451403249055147, |
|
"rewards/margins_std": 0.0022609930019825697, |
|
"rewards/rejected": 0.0003198757185600698, |
|
"step": 20 |
|
}, |
|
{ |
|
"dpo_losses": 0.6929606795310974, |
|
"epoch": 0.08, |
|
"grad_norm": 26.568307039870714, |
|
"learning_rate": 1.4018691588785045e-07, |
|
"logits/chosen": -2.87646484375, |
|
"logits/rejected": -2.8245913982391357, |
|
"logps/chosen": -375.969970703125, |
|
"logps/rejected": -252.5666961669922, |
|
"loss": 0.6964, |
|
"positive_losses": 0.026204681023955345, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": 0.0013370837550610304, |
|
"rewards/margins": 0.0003754205536097288, |
|
"rewards/margins_max": 0.001665195683017373, |
|
"rewards/margins_min": -0.00091435422655195, |
|
"rewards/margins_std": 0.0018240170320495963, |
|
"rewards/rejected": 0.0009616632014513016, |
|
"step": 30 |
|
}, |
|
{ |
|
"dpo_losses": 0.6924411058425903, |
|
"epoch": 0.11, |
|
"grad_norm": 10.124655258526282, |
|
"learning_rate": 1.8691588785046729e-07, |
|
"logits/chosen": -2.7380053997039795, |
|
"logits/rejected": -2.7716264724731445, |
|
"logps/chosen": -306.62396240234375, |
|
"logps/rejected": -317.3713684082031, |
|
"loss": 0.6942, |
|
"positive_losses": 0.01987609826028347, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.003017005743458867, |
|
"rewards/margins": 0.0014187573688104749, |
|
"rewards/margins_max": 0.0038917693309485912, |
|
"rewards/margins_min": -0.0010542543604969978, |
|
"rewards/margins_std": 0.003497367026284337, |
|
"rewards/rejected": 0.001598248491063714, |
|
"step": 40 |
|
}, |
|
{ |
|
"dpo_losses": 0.6920477151870728, |
|
"epoch": 0.14, |
|
"grad_norm": 2.3619576775282343, |
|
"learning_rate": 2.336448598130841e-07, |
|
"logits/chosen": -2.7991483211517334, |
|
"logits/rejected": -2.7094950675964355, |
|
"logps/chosen": -244.6391143798828, |
|
"logps/rejected": -173.8690643310547, |
|
"loss": 0.6924, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.005839168094098568, |
|
"rewards/margins": 0.0022048726677894592, |
|
"rewards/margins_max": 0.004601255524903536, |
|
"rewards/margins_min": -0.00019151013111695647, |
|
"rewards/margins_std": 0.0033889967016875744, |
|
"rewards/rejected": 0.0036342956591397524, |
|
"step": 50 |
|
}, |
|
{ |
|
"dpo_losses": 0.6909143328666687, |
|
"epoch": 0.17, |
|
"grad_norm": 2.3567546921207154, |
|
"learning_rate": 2.803738317757009e-07, |
|
"logits/chosen": -2.7648746967315674, |
|
"logits/rejected": -2.6973958015441895, |
|
"logps/chosen": -266.65625, |
|
"logps/rejected": -235.16543579101562, |
|
"loss": 0.6914, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.0077917128801345825, |
|
"rewards/margins": 0.004477448761463165, |
|
"rewards/margins_max": 0.006284839008003473, |
|
"rewards/margins_min": 0.0026700585149228573, |
|
"rewards/margins_std": 0.0025560357607901096, |
|
"rewards/rejected": 0.0033142641186714172, |
|
"step": 60 |
|
}, |
|
{ |
|
"dpo_losses": 0.6901382207870483, |
|
"epoch": 0.2, |
|
"grad_norm": 1.8400876800751589, |
|
"learning_rate": 3.271028037383177e-07, |
|
"logits/chosen": -2.6974568367004395, |
|
"logits/rejected": -2.699871063232422, |
|
"logps/chosen": -332.7781677246094, |
|
"logps/rejected": -214.24301147460938, |
|
"loss": 0.6902, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.01162528432905674, |
|
"rewards/margins": 0.006035626865923405, |
|
"rewards/margins_max": 0.010061298497021198, |
|
"rewards/margins_min": 0.0020099543035030365, |
|
"rewards/margins_std": 0.005693159066140652, |
|
"rewards/rejected": 0.005589658860117197, |
|
"step": 70 |
|
}, |
|
{ |
|
"dpo_losses": 0.688714861869812, |
|
"epoch": 0.23, |
|
"grad_norm": 4.087198387473911, |
|
"learning_rate": 3.7383177570093457e-07, |
|
"logits/chosen": -2.944587469100952, |
|
"logits/rejected": -2.859727382659912, |
|
"logps/chosen": -335.75775146484375, |
|
"logps/rejected": -286.72412109375, |
|
"loss": 0.6886, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.012916642241179943, |
|
"rewards/margins": 0.00890201423317194, |
|
"rewards/margins_max": 0.013774129562079906, |
|
"rewards/margins_min": 0.004029898438602686, |
|
"rewards/margins_std": 0.006890212185680866, |
|
"rewards/rejected": 0.004014627076685429, |
|
"step": 80 |
|
}, |
|
{ |
|
"dpo_losses": 0.6876496076583862, |
|
"epoch": 0.25, |
|
"grad_norm": 2.4851076976994455, |
|
"learning_rate": 4.205607476635514e-07, |
|
"logits/chosen": -2.777012586593628, |
|
"logits/rejected": -2.767564296722412, |
|
"logps/chosen": -275.3215637207031, |
|
"logps/rejected": -202.66746520996094, |
|
"loss": 0.6869, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.01653674617409706, |
|
"rewards/margins": 0.011065873317420483, |
|
"rewards/margins_max": 0.019997073337435722, |
|
"rewards/margins_min": 0.002134673995897174, |
|
"rewards/margins_std": 0.012630623765289783, |
|
"rewards/rejected": 0.005470870994031429, |
|
"step": 90 |
|
}, |
|
{ |
|
"dpo_losses": 0.6823551058769226, |
|
"epoch": 0.28, |
|
"grad_norm": 2.301867398682679, |
|
"learning_rate": 4.672897196261682e-07, |
|
"logits/chosen": -2.96421480178833, |
|
"logits/rejected": -2.8904025554656982, |
|
"logps/chosen": -455.4552307128906, |
|
"logps/rejected": -341.5201416015625, |
|
"loss": 0.6835, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.028032690286636353, |
|
"rewards/margins": 0.02175181731581688, |
|
"rewards/margins_max": 0.03047388233244419, |
|
"rewards/margins_min": 0.013029751367866993, |
|
"rewards/margins_std": 0.01233486458659172, |
|
"rewards/rejected": 0.00628087529912591, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_dpo_losses": 0.6916564702987671, |
|
"eval_logits/chosen": -2.854139804840088, |
|
"eval_logits/rejected": -2.808910608291626, |
|
"eval_logps/chosen": -284.305908203125, |
|
"eval_logps/rejected": -258.5689392089844, |
|
"eval_loss": 0.6964598298072815, |
|
"eval_positive_losses": 0.0435669906437397, |
|
"eval_rewards/accuracies": 0.5833333134651184, |
|
"eval_rewards/chosen": 0.009152961894869804, |
|
"eval_rewards/margins": 0.0030159971211105585, |
|
"eval_rewards/margins_max": 0.015517139807343483, |
|
"eval_rewards/margins_min": -0.007612254936248064, |
|
"eval_rewards/margins_std": 0.01032496802508831, |
|
"eval_rewards/rejected": 0.006136965472251177, |
|
"eval_runtime": 380.9273, |
|
"eval_samples_per_second": 5.25, |
|
"eval_steps_per_second": 0.165, |
|
"step": 100 |
|
}, |
|
{ |
|
"dpo_losses": 0.6818417310714722, |
|
"epoch": 0.31, |
|
"grad_norm": 2.49767805990243, |
|
"learning_rate": 4.999879018839287e-07, |
|
"logits/chosen": -2.851139545440674, |
|
"logits/rejected": -2.726069688796997, |
|
"logps/chosen": -324.7695007324219, |
|
"logps/rejected": -245.7756805419922, |
|
"loss": 0.6789, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.029296237975358963, |
|
"rewards/margins": 0.022873710840940475, |
|
"rewards/margins_max": 0.03740672022104263, |
|
"rewards/margins_min": 0.008340701460838318, |
|
"rewards/margins_std": 0.020552778616547585, |
|
"rewards/rejected": 0.006422528065741062, |
|
"step": 110 |
|
}, |
|
{ |
|
"dpo_losses": 0.6763116717338562, |
|
"epoch": 0.34, |
|
"grad_norm": 2.0754455271947125, |
|
"learning_rate": 4.997728568369408e-07, |
|
"logits/chosen": -2.9743714332580566, |
|
"logits/rejected": -2.9403293132781982, |
|
"logps/chosen": -367.7530212402344, |
|
"logps/rejected": -330.84222412109375, |
|
"loss": 0.6754, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.041877757757902145, |
|
"rewards/margins": 0.03419335186481476, |
|
"rewards/margins_max": 0.04843282699584961, |
|
"rewards/margins_min": 0.019953874871134758, |
|
"rewards/margins_std": 0.02013765648007393, |
|
"rewards/rejected": 0.0076844110153615475, |
|
"step": 120 |
|
}, |
|
{ |
|
"dpo_losses": 0.6634218692779541, |
|
"epoch": 0.37, |
|
"grad_norm": 2.0479204842061978, |
|
"learning_rate": 4.992892309373227e-07, |
|
"logits/chosen": -2.8403239250183105, |
|
"logits/rejected": -2.7551536560058594, |
|
"logps/chosen": -384.84814453125, |
|
"logps/rejected": -258.2791442871094, |
|
"loss": 0.6672, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.061241261661052704, |
|
"rewards/margins": 0.06080120801925659, |
|
"rewards/margins_max": 0.08230480551719666, |
|
"rewards/margins_min": 0.039297617971897125, |
|
"rewards/margins_std": 0.03041067160665989, |
|
"rewards/rejected": 0.00044005707604810596, |
|
"step": 130 |
|
}, |
|
{ |
|
"dpo_losses": 0.6647445559501648, |
|
"epoch": 0.39, |
|
"grad_norm": 1.7367015733096829, |
|
"learning_rate": 4.985375442281968e-07, |
|
"logits/chosen": -2.8128199577331543, |
|
"logits/rejected": -2.783970355987549, |
|
"logps/chosen": -345.33953857421875, |
|
"logps/rejected": -231.666259765625, |
|
"loss": 0.666, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.05871708318591118, |
|
"rewards/margins": 0.058249205350875854, |
|
"rewards/margins_max": 0.0843496099114418, |
|
"rewards/margins_min": 0.03214879333972931, |
|
"rewards/margins_std": 0.03691155090928078, |
|
"rewards/rejected": 0.0004678791738115251, |
|
"step": 140 |
|
}, |
|
{ |
|
"dpo_losses": 0.6681698560714722, |
|
"epoch": 0.42, |
|
"grad_norm": 2.1268042590236584, |
|
"learning_rate": 4.975186049985817e-07, |
|
"logits/chosen": -2.8787732124328613, |
|
"logits/rejected": -2.80892276763916, |
|
"logps/chosen": -291.04962158203125, |
|
"logps/rejected": -247.11825561523438, |
|
"loss": 0.6614, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.05772637203335762, |
|
"rewards/margins": 0.051212601363658905, |
|
"rewards/margins_max": 0.08002050220966339, |
|
"rewards/margins_min": 0.022404693067073822, |
|
"rewards/margins_std": 0.04074053093791008, |
|
"rewards/rejected": 0.006513768341392279, |
|
"step": 150 |
|
}, |
|
{ |
|
"dpo_losses": 0.6561421751976013, |
|
"epoch": 0.45, |
|
"grad_norm": 1.7754914073644856, |
|
"learning_rate": 4.962335089142375e-07, |
|
"logits/chosen": -2.888826608657837, |
|
"logits/rejected": -2.7711613178253174, |
|
"logps/chosen": -323.6219787597656, |
|
"logps/rejected": -242.70925903320312, |
|
"loss": 0.6537, |
|
"positive_losses": 0.020649338141083717, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.08097714930772781, |
|
"rewards/margins": 0.07674388587474823, |
|
"rewards/margins_max": 0.12844815850257874, |
|
"rewards/margins_min": 0.02503962442278862, |
|
"rewards/margins_std": 0.07312087714672089, |
|
"rewards/rejected": 0.004233261104673147, |
|
"step": 160 |
|
}, |
|
{ |
|
"dpo_losses": 0.6377928853034973, |
|
"epoch": 0.48, |
|
"grad_norm": 1.8314743659672637, |
|
"learning_rate": 4.946836378394966e-07, |
|
"logits/chosen": -2.927858591079712, |
|
"logits/rejected": -2.771623134613037, |
|
"logps/chosen": -388.8592224121094, |
|
"logps/rejected": -233.54714965820312, |
|
"loss": 0.6505, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.11622031778097153, |
|
"rewards/margins": 0.11623603105545044, |
|
"rewards/margins_max": 0.1615394800901413, |
|
"rewards/margins_min": 0.07093258202075958, |
|
"rewards/margins_std": 0.0640687644481659, |
|
"rewards/rejected": -1.5713460015831515e-05, |
|
"step": 170 |
|
}, |
|
{ |
|
"dpo_losses": 0.6507928371429443, |
|
"epoch": 0.51, |
|
"grad_norm": 2.090628503360293, |
|
"learning_rate": 4.92870658351344e-07, |
|
"logits/chosen": -2.827677011489868, |
|
"logits/rejected": -2.7344062328338623, |
|
"logps/chosen": -314.31219482421875, |
|
"logps/rejected": -378.66693115234375, |
|
"loss": 0.6443, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.09823790937662125, |
|
"rewards/margins": 0.08783474564552307, |
|
"rewards/margins_max": 0.11842919886112213, |
|
"rewards/margins_min": 0.05724028870463371, |
|
"rewards/margins_std": 0.043267086148262024, |
|
"rewards/rejected": 0.010403157211840153, |
|
"step": 180 |
|
}, |
|
{ |
|
"dpo_losses": 0.6270108222961426, |
|
"epoch": 0.54, |
|
"grad_norm": 2.5880477915128974, |
|
"learning_rate": 4.90796519947347e-07, |
|
"logits/chosen": -2.7683603763580322, |
|
"logits/rejected": -2.600294589996338, |
|
"logps/chosen": -441.5703125, |
|
"logps/rejected": -221.9513702392578, |
|
"loss": 0.6377, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.14541961252689362, |
|
"rewards/margins": 0.13966473937034607, |
|
"rewards/margins_max": 0.2020079791545868, |
|
"rewards/margins_min": 0.07732154428958893, |
|
"rewards/margins_std": 0.08816662430763245, |
|
"rewards/rejected": 0.005754842888563871, |
|
"step": 190 |
|
}, |
|
{ |
|
"dpo_losses": 0.6278845071792603, |
|
"epoch": 0.56, |
|
"grad_norm": 6.409126045529177, |
|
"learning_rate": 4.88463452949359e-07, |
|
"logits/chosen": -2.9043941497802734, |
|
"logits/rejected": -2.8018863201141357, |
|
"logps/chosen": -317.3318786621094, |
|
"logps/rejected": -204.9638671875, |
|
"loss": 0.6367, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.13463780283927917, |
|
"rewards/margins": 0.13884270191192627, |
|
"rewards/margins_max": 0.20423416793346405, |
|
"rewards/margins_min": 0.07345118373632431, |
|
"rewards/margins_std": 0.09247754514217377, |
|
"rewards/rejected": -0.004204885568469763, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_dpo_losses": 0.6862879395484924, |
|
"eval_logits/chosen": -2.8224480152130127, |
|
"eval_logits/rejected": -2.777904510498047, |
|
"eval_logps/chosen": -283.0695495605469, |
|
"eval_logps/rejected": -258.4836120605469, |
|
"eval_loss": 0.7632620930671692, |
|
"eval_positive_losses": 0.6990463733673096, |
|
"eval_rewards/accuracies": 0.5873016119003296, |
|
"eval_rewards/chosen": 0.02151678316295147, |
|
"eval_rewards/margins": 0.014526319690048695, |
|
"eval_rewards/margins_max": 0.07610397040843964, |
|
"eval_rewards/margins_min": -0.039124276489019394, |
|
"eval_rewards/margins_std": 0.051066700369119644, |
|
"eval_rewards/rejected": 0.006990462075918913, |
|
"eval_runtime": 353.0866, |
|
"eval_samples_per_second": 5.664, |
|
"eval_steps_per_second": 0.178, |
|
"step": 200 |
|
}, |
|
{ |
|
"dpo_losses": 0.6355398893356323, |
|
"epoch": 0.59, |
|
"grad_norm": 2.0528134694636475, |
|
"learning_rate": 4.858739661052539e-07, |
|
"logits/chosen": -2.6923115253448486, |
|
"logits/rejected": -2.6051807403564453, |
|
"logps/chosen": -343.03765869140625, |
|
"logps/rejected": -267.228271484375, |
|
"loss": 0.6285, |
|
"positive_losses": 0.02857360802590847, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.1379883587360382, |
|
"rewards/margins": 0.12217812240123749, |
|
"rewards/margins_max": 0.2092159241437912, |
|
"rewards/margins_min": 0.035140346735715866, |
|
"rewards/margins_std": 0.12309001386165619, |
|
"rewards/rejected": 0.01581023633480072, |
|
"step": 210 |
|
}, |
|
{ |
|
"dpo_losses": 0.6001917719841003, |
|
"epoch": 0.62, |
|
"grad_norm": 2.0452189261623355, |
|
"learning_rate": 4.830308438912687e-07, |
|
"logits/chosen": -2.9291024208068848, |
|
"logits/rejected": -2.7718849182128906, |
|
"logps/chosen": -389.2041931152344, |
|
"logps/rejected": -278.2353515625, |
|
"loss": 0.6208, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20065200328826904, |
|
"rewards/margins": 0.2000540941953659, |
|
"rewards/margins_max": 0.27747657895088196, |
|
"rewards/margins_min": 0.12263162434101105, |
|
"rewards/margins_std": 0.10949190706014633, |
|
"rewards/rejected": 0.0005978975677862763, |
|
"step": 220 |
|
}, |
|
{ |
|
"dpo_losses": 0.6307692527770996, |
|
"epoch": 0.65, |
|
"grad_norm": 2.2006814811498345, |
|
"learning_rate": 4.799371435178545e-07, |
|
"logits/chosen": -2.8787121772766113, |
|
"logits/rejected": -2.7966089248657227, |
|
"logps/chosen": -366.04168701171875, |
|
"logps/rejected": -335.1198425292969, |
|
"loss": 0.611, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.14716704189777374, |
|
"rewards/margins": 0.13250373303890228, |
|
"rewards/margins_max": 0.20927949249744415, |
|
"rewards/margins_min": 0.05572795867919922, |
|
"rewards/margins_std": 0.10857733339071274, |
|
"rewards/rejected": 0.01466330885887146, |
|
"step": 230 |
|
}, |
|
{ |
|
"dpo_losses": 0.6340683102607727, |
|
"epoch": 0.68, |
|
"grad_norm": 1.8268647373784295, |
|
"learning_rate": 4.765961916422574e-07, |
|
"logits/chosen": -2.884403944015503, |
|
"logits/rejected": -2.725059986114502, |
|
"logps/chosen": -327.7989196777344, |
|
"logps/rejected": -292.8590087890625, |
|
"loss": 0.6248, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.13348861038684845, |
|
"rewards/margins": 0.12396250665187836, |
|
"rewards/margins_max": 0.17216315865516663, |
|
"rewards/margins_min": 0.0757618397474289, |
|
"rewards/margins_std": 0.06816603988409042, |
|
"rewards/rejected": 0.009526104666292667, |
|
"step": 240 |
|
}, |
|
{ |
|
"dpo_losses": 0.6013151407241821, |
|
"epoch": 0.7, |
|
"grad_norm": 1.9729455716823008, |
|
"learning_rate": 4.730115807913626e-07, |
|
"logits/chosen": -2.900844097137451, |
|
"logits/rejected": -2.716480016708374, |
|
"logps/chosen": -394.9379577636719, |
|
"logps/rejected": -255.08218383789062, |
|
"loss": 0.6072, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20289082825183868, |
|
"rewards/margins": 0.19841626286506653, |
|
"rewards/margins_max": 0.2661207318305969, |
|
"rewards/margins_min": 0.13071177899837494, |
|
"rewards/margins_std": 0.09574858844280243, |
|
"rewards/rejected": 0.004474560730159283, |
|
"step": 250 |
|
}, |
|
{ |
|
"dpo_losses": 0.6106809973716736, |
|
"epoch": 0.73, |
|
"grad_norm": 5.819181150531001, |
|
"learning_rate": 4.691871654986485e-07, |
|
"logits/chosen": -2.8338541984558105, |
|
"logits/rejected": -2.799828052520752, |
|
"logps/chosen": -303.6441345214844, |
|
"logps/rejected": -224.24734497070312, |
|
"loss": 0.6119, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.18178164958953857, |
|
"rewards/margins": 0.18184307217597961, |
|
"rewards/margins_max": 0.31147363781929016, |
|
"rewards/margins_min": 0.05221250653266907, |
|
"rewards/margins_std": 0.18332532048225403, |
|
"rewards/rejected": -6.143822974991053e-05, |
|
"step": 260 |
|
}, |
|
{ |
|
"dpo_losses": 0.5980932116508484, |
|
"epoch": 0.76, |
|
"grad_norm": 2.0922326810258784, |
|
"learning_rate": 4.6512705815940536e-07, |
|
"logits/chosen": -2.8425519466400146, |
|
"logits/rejected": -2.702369451522827, |
|
"logps/chosen": -409.5565490722656, |
|
"logps/rejected": -233.59228515625, |
|
"loss": 0.6087, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.19440071284770966, |
|
"rewards/margins": 0.20493817329406738, |
|
"rewards/margins_max": 0.2935718894004822, |
|
"rewards/margins_min": 0.11630449444055557, |
|
"rewards/margins_std": 0.12534697353839874, |
|
"rewards/rejected": -0.010537461377680302, |
|
"step": 270 |
|
}, |
|
{ |
|
"dpo_losses": 0.628231406211853, |
|
"epoch": 0.79, |
|
"grad_norm": 3.7503530971865384, |
|
"learning_rate": 4.6083562460867544e-07, |
|
"logits/chosen": -2.710557460784912, |
|
"logits/rejected": -2.6718039512634277, |
|
"logps/chosen": -314.34271240234375, |
|
"logps/rejected": -264.9143981933594, |
|
"loss": 0.608, |
|
"positive_losses": 0.09001044929027557, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.1473403126001358, |
|
"rewards/margins": 0.13957121968269348, |
|
"rewards/margins_max": 0.22123010456562042, |
|
"rewards/margins_min": 0.05791233107447624, |
|
"rewards/margins_std": 0.1154831051826477, |
|
"rewards/rejected": 0.0077690863981842995, |
|
"step": 280 |
|
}, |
|
{ |
|
"dpo_losses": 0.5966510772705078, |
|
"epoch": 0.82, |
|
"grad_norm": 2.147411420458553, |
|
"learning_rate": 4.563174794266683e-07, |
|
"logits/chosen": -2.9081060886383057, |
|
"logits/rejected": -2.7455201148986816, |
|
"logps/chosen": -339.8738708496094, |
|
"logps/rejected": -270.620849609375, |
|
"loss": 0.6098, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.21971745789051056, |
|
"rewards/margins": 0.21189472079277039, |
|
"rewards/margins_max": 0.33527523279190063, |
|
"rewards/margins_min": 0.08851419389247894, |
|
"rewards/margins_std": 0.17448639869689941, |
|
"rewards/rejected": 0.00782275851815939, |
|
"step": 290 |
|
}, |
|
{ |
|
"dpo_losses": 0.6003170013427734, |
|
"epoch": 0.85, |
|
"grad_norm": 6.556261857915724, |
|
"learning_rate": 4.515774809767012e-07, |
|
"logits/chosen": -2.829227924346924, |
|
"logits/rejected": -2.7758102416992188, |
|
"logps/chosen": -317.3044128417969, |
|
"logps/rejected": -312.4596862792969, |
|
"loss": 0.5913, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.20438948273658752, |
|
"rewards/margins": 0.20183344185352325, |
|
"rewards/margins_max": 0.29987072944641113, |
|
"rewards/margins_min": 0.10379616171121597, |
|
"rewards/margins_std": 0.13864566385746002, |
|
"rewards/rejected": 0.0025560318026691675, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_dpo_losses": 0.6810439825057983, |
|
"eval_logits/chosen": -2.7852697372436523, |
|
"eval_logits/rejected": -2.7412352561950684, |
|
"eval_logps/chosen": -283.9922180175781, |
|
"eval_logps/rejected": -260.6202087402344, |
|
"eval_loss": 0.919795036315918, |
|
"eval_positive_losses": 2.2040839195251465, |
|
"eval_rewards/accuracies": 0.5714285969734192, |
|
"eval_rewards/chosen": 0.012289770878851414, |
|
"eval_rewards/margins": 0.02666497975587845, |
|
"eval_rewards/margins_max": 0.13577593863010406, |
|
"eval_rewards/margins_min": -0.06833065301179886, |
|
"eval_rewards/margins_std": 0.08987120538949966, |
|
"eval_rewards/rejected": -0.01437520980834961, |
|
"eval_runtime": 376.3495, |
|
"eval_samples_per_second": 5.314, |
|
"eval_steps_per_second": 0.167, |
|
"step": 300 |
|
}, |
|
{ |
|
"dpo_losses": 0.5683969259262085, |
|
"epoch": 0.87, |
|
"grad_norm": 1.838064317269046, |
|
"learning_rate": 4.4662072618099887e-07, |
|
"logits/chosen": -2.98117995262146, |
|
"logits/rejected": -2.7507948875427246, |
|
"logps/chosen": -399.4131164550781, |
|
"logps/rejected": -263.9547424316406, |
|
"loss": 0.593, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.2483425885438919, |
|
"rewards/margins": 0.2780510485172272, |
|
"rewards/margins_max": 0.41263166069984436, |
|
"rewards/margins_min": 0.14347048103809357, |
|
"rewards/margins_std": 0.19032566249370575, |
|
"rewards/rejected": -0.029708484187722206, |
|
"step": 310 |
|
}, |
|
{ |
|
"dpo_losses": 0.5936921834945679, |
|
"epoch": 0.9, |
|
"grad_norm": 1.773863081765084, |
|
"learning_rate": 4.414525450399712e-07, |
|
"logits/chosen": -2.838667631149292, |
|
"logits/rejected": -2.7466790676116943, |
|
"logps/chosen": -326.25115966796875, |
|
"logps/rejected": -239.88632202148438, |
|
"loss": 0.5855, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.2000167816877365, |
|
"rewards/margins": 0.21792948246002197, |
|
"rewards/margins_max": 0.3168962597846985, |
|
"rewards/margins_min": 0.11896270513534546, |
|
"rewards/margins_std": 0.1399601548910141, |
|
"rewards/rejected": -0.01791267842054367, |
|
"step": 320 |
|
}, |
|
{ |
|
"dpo_losses": 0.5716263651847839, |
|
"epoch": 0.93, |
|
"grad_norm": 1.998569334452282, |
|
"learning_rate": 4.360784949008615e-07, |
|
"logits/chosen": -2.938047409057617, |
|
"logits/rejected": -2.8133039474487305, |
|
"logps/chosen": -355.54949951171875, |
|
"logps/rejected": -251.0133056640625, |
|
"loss": 0.5905, |
|
"positive_losses": 0.12526169419288635, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.24137118458747864, |
|
"rewards/margins": 0.2771782875061035, |
|
"rewards/margins_max": 0.4031391143798828, |
|
"rewards/margins_min": 0.1512174755334854, |
|
"rewards/margins_std": 0.17813549935817719, |
|
"rewards/rejected": -0.03580709546804428, |
|
"step": 330 |
|
}, |
|
{ |
|
"dpo_losses": 0.6020129919052124, |
|
"epoch": 0.96, |
|
"grad_norm": 1.9680439727569183, |
|
"learning_rate": 4.305043544819289e-07, |
|
"logits/chosen": -2.8385097980499268, |
|
"logits/rejected": -2.6945478916168213, |
|
"logps/chosen": -348.60601806640625, |
|
"logps/rejected": -204.32705688476562, |
|
"loss": 0.5827, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.19889523088932037, |
|
"rewards/margins": 0.19913819432258606, |
|
"rewards/margins_max": 0.2865811586380005, |
|
"rewards/margins_min": 0.11169523000717163, |
|
"rewards/margins_std": 0.123663030564785, |
|
"rewards/rejected": -0.00024295822368003428, |
|
"step": 340 |
|
}, |
|
{ |
|
"dpo_losses": 0.5496091842651367, |
|
"epoch": 0.99, |
|
"grad_norm": 4.932583836163482, |
|
"learning_rate": 4.247361176585903e-07, |
|
"logits/chosen": -2.8020033836364746, |
|
"logits/rejected": -2.7020020484924316, |
|
"logps/chosen": -401.37969970703125, |
|
"logps/rejected": -303.43603515625, |
|
"loss": 0.5792, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.27023789286613464, |
|
"rewards/margins": 0.32071441411972046, |
|
"rewards/margins_max": 0.4361411929130554, |
|
"rewards/margins_min": 0.2052876502275467, |
|
"rewards/margins_std": 0.16323810815811157, |
|
"rewards/rejected": -0.05047653242945671, |
|
"step": 350 |
|
}, |
|
{ |
|
"dpo_losses": 0.5841382741928101, |
|
"epoch": 1.01, |
|
"grad_norm": 1.907764958469603, |
|
"learning_rate": 4.187799870182038e-07, |
|
"logits/chosen": -2.7761244773864746, |
|
"logits/rejected": -2.671306610107422, |
|
"logps/chosen": -325.5156555175781, |
|
"logps/rejected": -205.7819061279297, |
|
"loss": 0.5713, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.20690234005451202, |
|
"rewards/margins": 0.2369709461927414, |
|
"rewards/margins_max": 0.33839744329452515, |
|
"rewards/margins_min": 0.13554444909095764, |
|
"rewards/margins_std": 0.14343872666358948, |
|
"rewards/rejected": -0.03006860613822937, |
|
"step": 360 |
|
}, |
|
{ |
|
"dpo_losses": 0.571262776851654, |
|
"epoch": 1.04, |
|
"grad_norm": 7.1801497317882195, |
|
"learning_rate": 4.126423671904236e-07, |
|
"logits/chosen": -2.667179822921753, |
|
"logits/rejected": -2.659519672393799, |
|
"logps/chosen": -327.2443542480469, |
|
"logps/rejected": -267.2635192871094, |
|
"loss": 0.566, |
|
"positive_losses": 0.06003761291503906, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.2441311627626419, |
|
"rewards/margins": 0.2789308428764343, |
|
"rewards/margins_max": 0.43240299820899963, |
|
"rewards/margins_min": 0.12545865774154663, |
|
"rewards/margins_std": 0.217042475938797, |
|
"rewards/rejected": -0.03479967638850212, |
|
"step": 370 |
|
}, |
|
{ |
|
"dpo_losses": 0.5604450106620789, |
|
"epoch": 1.07, |
|
"grad_norm": 3.976395938884955, |
|
"learning_rate": 4.0632985796030007e-07, |
|
"logits/chosen": -2.775261640548706, |
|
"logits/rejected": -2.5520777702331543, |
|
"logps/chosen": -365.10052490234375, |
|
"logps/rejected": -188.36569213867188, |
|
"loss": 0.5666, |
|
"positive_losses": 0.1160304993391037, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.2588001787662506, |
|
"rewards/margins": 0.29767391085624695, |
|
"rewards/margins_max": 0.4550997316837311, |
|
"rewards/margins_min": 0.1402481645345688, |
|
"rewards/margins_std": 0.22263364493846893, |
|
"rewards/rejected": -0.03887376934289932, |
|
"step": 380 |
|
}, |
|
{ |
|
"dpo_losses": 0.5191501379013062, |
|
"epoch": 1.1, |
|
"grad_norm": 2.0256079149805704, |
|
"learning_rate": 3.9984924717152713e-07, |
|
"logits/chosen": -2.7934913635253906, |
|
"logits/rejected": -2.7538201808929443, |
|
"logps/chosen": -368.53741455078125, |
|
"logps/rejected": -343.0877380371094, |
|
"loss": 0.568, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.2897542417049408, |
|
"rewards/margins": 0.4042133390903473, |
|
"rewards/margins_max": 0.5862180590629578, |
|
"rewards/margins_min": 0.22220861911773682, |
|
"rewards/margins_std": 0.2573935389518738, |
|
"rewards/rejected": -0.11445906013250351, |
|
"step": 390 |
|
}, |
|
{ |
|
"dpo_losses": 0.5777878761291504, |
|
"epoch": 1.13, |
|
"grad_norm": 1.913623993136942, |
|
"learning_rate": 3.932075034274723e-07, |
|
"logits/chosen": -2.720954418182373, |
|
"logits/rejected": -2.731393337249756, |
|
"logps/chosen": -278.46453857421875, |
|
"logps/rejected": -233.5457000732422, |
|
"loss": 0.5502, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.2027483880519867, |
|
"rewards/margins": 0.26584392786026, |
|
"rewards/margins_max": 0.36782872676849365, |
|
"rewards/margins_min": 0.16385909914970398, |
|
"rewards/margins_std": 0.14422830939292908, |
|
"rewards/rejected": -0.06309551745653152, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"eval_dpo_losses": 0.6769555807113647, |
|
"eval_logits/chosen": -2.7545130252838135, |
|
"eval_logits/rejected": -2.7112648487091064, |
|
"eval_logps/chosen": -285.1260681152344, |
|
"eval_logps/rejected": -262.7898864746094, |
|
"eval_loss": 1.0825976133346558, |
|
"eval_positive_losses": 3.7846007347106934, |
|
"eval_rewards/accuracies": 0.5753968358039856, |
|
"eval_rewards/chosen": 0.0009513738332316279, |
|
"eval_rewards/margins": 0.037023574113845825, |
|
"eval_rewards/margins_max": 0.18605393171310425, |
|
"eval_rewards/margins_min": -0.09629133343696594, |
|
"eval_rewards/margins_std": 0.12429077178239822, |
|
"eval_rewards/rejected": -0.036072202026844025, |
|
"eval_runtime": 389.163, |
|
"eval_samples_per_second": 5.139, |
|
"eval_steps_per_second": 0.162, |
|
"step": 400 |
|
}, |
|
{ |
|
"dpo_losses": 0.5712305307388306, |
|
"epoch": 1.15, |
|
"grad_norm": 2.3137171866935726, |
|
"learning_rate": 3.8641176859783383e-07, |
|
"logits/chosen": -2.801839590072632, |
|
"logits/rejected": -2.7598752975463867, |
|
"logps/chosen": -269.1628723144531, |
|
"logps/rejected": -211.1710662841797, |
|
"loss": 0.5608, |
|
"positive_losses": 0.19508972764015198, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.2552756071090698, |
|
"rewards/margins": 0.2850131392478943, |
|
"rewards/margins_max": 0.5092954635620117, |
|
"rewards/margins_min": 0.06073073670268059, |
|
"rewards/margins_std": 0.31718316674232483, |
|
"rewards/rejected": -0.029737496748566628, |
|
"step": 410 |
|
}, |
|
{ |
|
"dpo_losses": 0.5262824296951294, |
|
"epoch": 1.18, |
|
"grad_norm": 2.2394985330776582, |
|
"learning_rate": 3.7946935013898606e-07, |
|
"logits/chosen": -2.8580517768859863, |
|
"logits/rejected": -2.7411797046661377, |
|
"logps/chosen": -366.37841796875, |
|
"logps/rejected": -270.374755859375, |
|
"loss": 0.5707, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.33375903964042664, |
|
"rewards/margins": 0.3817841410636902, |
|
"rewards/margins_max": 0.5622913241386414, |
|
"rewards/margins_min": 0.20127694308757782, |
|
"rewards/margins_std": 0.255275696516037, |
|
"rewards/rejected": -0.048025064170360565, |
|
"step": 420 |
|
}, |
|
{ |
|
"dpo_losses": 0.543947160243988, |
|
"epoch": 1.21, |
|
"grad_norm": 5.549999635713583, |
|
"learning_rate": 3.7238771323626817e-07, |
|
"logits/chosen": -2.782466173171997, |
|
"logits/rejected": -2.675513744354248, |
|
"logps/chosen": -357.595458984375, |
|
"logps/rejected": -268.3279724121094, |
|
"loss": 0.5395, |
|
"positive_losses": 0.051211167126894, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.2949376702308655, |
|
"rewards/margins": 0.3435710072517395, |
|
"rewards/margins_max": 0.5000001192092896, |
|
"rewards/margins_min": 0.18714189529418945, |
|
"rewards/margins_std": 0.22122418880462646, |
|
"rewards/rejected": -0.04863337427377701, |
|
"step": 430 |
|
}, |
|
{ |
|
"dpo_losses": 0.5732239484786987, |
|
"epoch": 1.24, |
|
"grad_norm": 1.8704082850098978, |
|
"learning_rate": 3.651744727766676e-07, |
|
"logits/chosen": -2.7718758583068848, |
|
"logits/rejected": -2.657120704650879, |
|
"logps/chosen": -292.4672546386719, |
|
"logps/rejected": -199.0224609375, |
|
"loss": 0.5501, |
|
"positive_losses": 0.02040863037109375, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.22786328196525574, |
|
"rewards/margins": 0.2696700394153595, |
|
"rewards/margins_max": 0.39789730310440063, |
|
"rewards/margins_min": 0.14144271612167358, |
|
"rewards/margins_std": 0.1813407838344574, |
|
"rewards/rejected": -0.04180673882365227, |
|
"step": 440 |
|
}, |
|
{ |
|
"dpo_losses": 0.5517903566360474, |
|
"epoch": 1.27, |
|
"grad_norm": 1.988829111938975, |
|
"learning_rate": 3.5783738516052897e-07, |
|
"logits/chosen": -2.73368501663208, |
|
"logits/rejected": -2.664750576019287, |
|
"logps/chosen": -314.8210144042969, |
|
"logps/rejected": -276.9835205078125, |
|
"loss": 0.555, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.23353490233421326, |
|
"rewards/margins": 0.32702386379241943, |
|
"rewards/margins_max": 0.47540155053138733, |
|
"rewards/margins_min": 0.17864616215229034, |
|
"rewards/margins_std": 0.20983779430389404, |
|
"rewards/rejected": -0.09348895400762558, |
|
"step": 450 |
|
}, |
|
{ |
|
"dpo_losses": 0.5432690382003784, |
|
"epoch": 1.3, |
|
"grad_norm": 6.009256851534543, |
|
"learning_rate": 3.5038433996109404e-07, |
|
"logits/chosen": -2.723193645477295, |
|
"logits/rejected": -2.7120513916015625, |
|
"logps/chosen": -370.0165710449219, |
|
"logps/rejected": -410.7750549316406, |
|
"loss": 0.5516, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.25261393189430237, |
|
"rewards/margins": 0.33443617820739746, |
|
"rewards/margins_max": 0.4311138689517975, |
|
"rewards/margins_min": 0.23775847256183624, |
|
"rewards/margins_std": 0.1367228925228119, |
|
"rewards/rejected": -0.0818222239613533, |
|
"step": 460 |
|
}, |
|
{ |
|
"dpo_losses": 0.5220767259597778, |
|
"epoch": 1.32, |
|
"grad_norm": 9.755904928259515, |
|
"learning_rate": 3.428233514408398e-07, |
|
"logits/chosen": -2.768638849258423, |
|
"logits/rejected": -2.6755402088165283, |
|
"logps/chosen": -328.5848388671875, |
|
"logps/rejected": -230.9825439453125, |
|
"loss": 0.538, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.28665998578071594, |
|
"rewards/margins": 0.38968348503112793, |
|
"rewards/margins_max": 0.48602980375289917, |
|
"rewards/margins_min": 0.2933371067047119, |
|
"rewards/margins_std": 0.13625434041023254, |
|
"rewards/rejected": -0.10302351415157318, |
|
"step": 470 |
|
}, |
|
{ |
|
"dpo_losses": 0.4982558786869049, |
|
"epoch": 1.35, |
|
"grad_norm": 1.8535108656666373, |
|
"learning_rate": 3.3516254993373945e-07, |
|
"logits/chosen": -2.855764150619507, |
|
"logits/rejected": -2.696035861968994, |
|
"logps/chosen": -384.2411193847656, |
|
"logps/rejected": -291.2297668457031, |
|
"loss": 0.542, |
|
"positive_losses": 0.18444347381591797, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.3161051273345947, |
|
"rewards/margins": 0.46426716446876526, |
|
"rewards/margins_max": 0.6109346747398376, |
|
"rewards/margins_min": 0.3175995349884033, |
|
"rewards/margins_std": 0.2074192762374878, |
|
"rewards/rejected": -0.14816200733184814, |
|
"step": 480 |
|
}, |
|
{ |
|
"dpo_losses": 0.5826688408851624, |
|
"epoch": 1.38, |
|
"grad_norm": 2.1891083390571136, |
|
"learning_rate": 3.274101731027105e-07, |
|
"logits/chosen": -2.7111635208129883, |
|
"logits/rejected": -2.5885300636291504, |
|
"logps/chosen": -197.87130737304688, |
|
"logps/rejected": -222.28286743164062, |
|
"loss": 0.5455, |
|
"positive_losses": 0.21304932236671448, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.19410201907157898, |
|
"rewards/margins": 0.2511723041534424, |
|
"rewards/margins_max": 0.4196406304836273, |
|
"rewards/margins_min": 0.08270399272441864, |
|
"rewards/margins_std": 0.23825016617774963, |
|
"rewards/rejected": -0.057070292532444, |
|
"step": 490 |
|
}, |
|
{ |
|
"dpo_losses": 0.512485682964325, |
|
"epoch": 1.41, |
|
"grad_norm": 7.948297320675137, |
|
"learning_rate": 3.1957455708165314e-07, |
|
"logits/chosen": -2.642686367034912, |
|
"logits/rejected": -2.5784401893615723, |
|
"logps/chosen": -350.71331787109375, |
|
"logps/rejected": -246.94711303710938, |
|
"loss": 0.5398, |
|
"positive_losses": 0.1300731599330902, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.33491355180740356, |
|
"rewards/margins": 0.42528876662254333, |
|
"rewards/margins_max": 0.566541850566864, |
|
"rewards/margins_min": 0.28403571248054504, |
|
"rewards/margins_std": 0.1997620314359665, |
|
"rewards/rejected": -0.09037523716688156, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_dpo_losses": 0.6733708381652832, |
|
"eval_logits/chosen": -2.736778974533081, |
|
"eval_logits/rejected": -2.6934895515441895, |
|
"eval_logps/chosen": -284.954833984375, |
|
"eval_logps/rejected": -263.591796875, |
|
"eval_loss": 1.1571382284164429, |
|
"eval_positive_losses": 4.656679630279541, |
|
"eval_rewards/accuracies": 0.5833333134651184, |
|
"eval_rewards/chosen": 0.0026637099217623472, |
|
"eval_rewards/margins": 0.04675525426864624, |
|
"eval_rewards/margins_max": 0.23377186059951782, |
|
"eval_rewards/margins_min": -0.11662713438272476, |
|
"eval_rewards/margins_std": 0.15486779808998108, |
|
"eval_rewards/rejected": -0.04409153386950493, |
|
"eval_runtime": 348.5206, |
|
"eval_samples_per_second": 5.739, |
|
"eval_steps_per_second": 0.181, |
|
"step": 500 |
|
}, |
|
{ |
|
"dpo_losses": 0.558113694190979, |
|
"epoch": 1.44, |
|
"grad_norm": 1.9204678229765098, |
|
"learning_rate": 3.116641275116018e-07, |
|
"logits/chosen": -2.4792568683624268, |
|
"logits/rejected": -2.477318286895752, |
|
"logps/chosen": -267.8980407714844, |
|
"logps/rejected": -321.2754821777344, |
|
"loss": 0.5399, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.24782295525074005, |
|
"rewards/margins": 0.30803701281547546, |
|
"rewards/margins_max": 0.4859069287776947, |
|
"rewards/margins_min": 0.13016708195209503, |
|
"rewards/margins_std": 0.25154608488082886, |
|
"rewards/rejected": -0.06021404266357422, |
|
"step": 510 |
|
}, |
|
{ |
|
"dpo_losses": 0.5207396149635315, |
|
"epoch": 1.46, |
|
"grad_norm": 1.8920245074404747, |
|
"learning_rate": 3.036873904806295e-07, |
|
"logits/chosen": -2.7549643516540527, |
|
"logits/rejected": -2.6787781715393066, |
|
"logps/chosen": -313.6898498535156, |
|
"logps/rejected": -246.35202026367188, |
|
"loss": 0.5479, |
|
"positive_losses": 0.4201123118400574, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.2792617380619049, |
|
"rewards/margins": 0.4105965197086334, |
|
"rewards/margins_max": 0.6103520393371582, |
|
"rewards/margins_min": 0.21084094047546387, |
|
"rewards/margins_std": 0.28249698877334595, |
|
"rewards/rejected": -0.13133473694324493, |
|
"step": 520 |
|
}, |
|
{ |
|
"dpo_losses": 0.5030455589294434, |
|
"epoch": 1.49, |
|
"grad_norm": 2.1161532532804963, |
|
"learning_rate": 2.956529233772492e-07, |
|
"logits/chosen": -2.721666097640991, |
|
"logits/rejected": -2.7209858894348145, |
|
"logps/chosen": -350.1697998046875, |
|
"logps/rejected": -284.701416015625, |
|
"loss": 0.5383, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2883579134941101, |
|
"rewards/margins": 0.4384874701499939, |
|
"rewards/margins_max": 0.5584603548049927, |
|
"rewards/margins_min": 0.31851455569267273, |
|
"rewards/margins_std": 0.16966724395751953, |
|
"rewards/rejected": -0.15012958645820618, |
|
"step": 530 |
|
}, |
|
{ |
|
"dpo_losses": 0.4899216592311859, |
|
"epoch": 1.52, |
|
"grad_norm": 8.076099370517953, |
|
"learning_rate": 2.875693656671431e-07, |
|
"logits/chosen": -2.8387975692749023, |
|
"logits/rejected": -2.7006285190582275, |
|
"logps/chosen": -364.0811462402344, |
|
"logps/rejected": -250.73196411132812, |
|
"loss": 0.5309, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3547210097312927, |
|
"rewards/margins": 0.4873575270175934, |
|
"rewards/margins_max": 0.6359044313430786, |
|
"rewards/margins_min": 0.3388107419013977, |
|
"rewards/margins_std": 0.2100769281387329, |
|
"rewards/rejected": -0.13263657689094543, |
|
"step": 540 |
|
}, |
|
{ |
|
"dpo_losses": 0.5343499183654785, |
|
"epoch": 1.55, |
|
"grad_norm": 2.5715523833788465, |
|
"learning_rate": 2.794454096031429e-07, |
|
"logits/chosen": -2.7994258403778076, |
|
"logits/rejected": -2.7424252033233643, |
|
"logps/chosen": -283.88751220703125, |
|
"logps/rejected": -277.2805480957031, |
|
"loss": 0.5163, |
|
"positive_losses": 0.1207679733633995, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2706112265586853, |
|
"rewards/margins": 0.3728974759578705, |
|
"rewards/margins_max": 0.5763125419616699, |
|
"rewards/margins_min": 0.16948243975639343, |
|
"rewards/margins_std": 0.2876723110675812, |
|
"rewards/rejected": -0.10228625684976578, |
|
"step": 550 |
|
}, |
|
{ |
|
"dpo_losses": 0.5260319709777832, |
|
"epoch": 1.58, |
|
"grad_norm": 2.100863996516755, |
|
"learning_rate": 2.7128979087844593e-07, |
|
"logits/chosen": -2.7390799522399902, |
|
"logits/rejected": -2.6754238605499268, |
|
"logps/chosen": -291.78973388671875, |
|
"logps/rejected": -352.7436218261719, |
|
"loss": 0.5334, |
|
"positive_losses": 0.1672992706298828, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.24893280863761902, |
|
"rewards/margins": 0.388366162776947, |
|
"rewards/margins_max": 0.5333099961280823, |
|
"rewards/margins_min": 0.24342235922813416, |
|
"rewards/margins_std": 0.2049814909696579, |
|
"rewards/rejected": -0.139433354139328, |
|
"step": 560 |
|
}, |
|
{ |
|
"dpo_losses": 0.49240389466285706, |
|
"epoch": 1.61, |
|
"grad_norm": 8.972280412241831, |
|
"learning_rate": 2.6311127923312153e-07, |
|
"logits/chosen": -2.757660388946533, |
|
"logits/rejected": -2.627717971801758, |
|
"logps/chosen": -397.9381103515625, |
|
"logps/rejected": -329.2608947753906, |
|
"loss": 0.5165, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3426300883293152, |
|
"rewards/margins": 0.47666874527931213, |
|
"rewards/margins_max": 0.6256591081619263, |
|
"rewards/margins_min": 0.32767823338508606, |
|
"rewards/margins_std": 0.21070432662963867, |
|
"rewards/rejected": -0.13403865694999695, |
|
"step": 570 |
|
}, |
|
{ |
|
"dpo_losses": 0.5157249569892883, |
|
"epoch": 1.63, |
|
"grad_norm": 1.8838498119092633, |
|
"learning_rate": 2.5491866902400565e-07, |
|
"logits/chosen": -2.7878096103668213, |
|
"logits/rejected": -2.70076060295105, |
|
"logps/chosen": -270.0621337890625, |
|
"logps/rejected": -235.17105102539062, |
|
"loss": 0.5244, |
|
"positive_losses": 0.14246292412281036, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.28403371572494507, |
|
"rewards/margins": 0.4212573170661926, |
|
"rewards/margins_max": 0.6702337861061096, |
|
"rewards/margins_min": 0.17228081822395325, |
|
"rewards/margins_std": 0.35210588574409485, |
|
"rewards/rejected": -0.13722361624240875, |
|
"step": 580 |
|
}, |
|
{ |
|
"dpo_losses": 0.524344801902771, |
|
"epoch": 1.66, |
|
"grad_norm": 7.342743240499319, |
|
"learning_rate": 2.4672076976812543e-07, |
|
"logits/chosen": -2.6465201377868652, |
|
"logits/rejected": -2.559436082839966, |
|
"logps/chosen": -339.4754943847656, |
|
"logps/rejected": -302.15557861328125, |
|
"loss": 0.5432, |
|
"positive_losses": 0.1482059508562088, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.2978511452674866, |
|
"rewards/margins": 0.3972291350364685, |
|
"rewards/margins_max": 0.5903550982475281, |
|
"rewards/margins_min": 0.2041032314300537, |
|
"rewards/margins_std": 0.27312135696411133, |
|
"rewards/rejected": -0.09937803447246552, |
|
"step": 590 |
|
}, |
|
{ |
|
"dpo_losses": 0.5494340658187866, |
|
"epoch": 1.69, |
|
"grad_norm": 8.369464468282295, |
|
"learning_rate": 2.385263966698222e-07, |
|
"logits/chosen": -2.7804884910583496, |
|
"logits/rejected": -2.7123289108276367, |
|
"logps/chosen": -253.48031616210938, |
|
"logps/rejected": -268.9811096191406, |
|
"loss": 0.5293, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2517229914665222, |
|
"rewards/margins": 0.3363966941833496, |
|
"rewards/margins_max": 0.48714661598205566, |
|
"rewards/margins_min": 0.18564683198928833, |
|
"rewards/margins_std": 0.21319253742694855, |
|
"rewards/rejected": -0.08467370271682739, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"eval_dpo_losses": 0.6702548265457153, |
|
"eval_logits/chosen": -2.7200992107391357, |
|
"eval_logits/rejected": -2.6766622066497803, |
|
"eval_logps/chosen": -285.0616149902344, |
|
"eval_logps/rejected": -264.5409851074219, |
|
"eval_loss": 1.2245166301727295, |
|
"eval_positive_losses": 5.373989582061768, |
|
"eval_rewards/accuracies": 0.591269850730896, |
|
"eval_rewards/chosen": 0.0015958804870024323, |
|
"eval_rewards/margins": 0.05517909303307533, |
|
"eval_rewards/margins_max": 0.2654685378074646, |
|
"eval_rewards/margins_min": -0.128387451171875, |
|
"eval_rewards/margins_std": 0.1752447932958603, |
|
"eval_rewards/rejected": -0.053583212196826935, |
|
"eval_runtime": 377.5968, |
|
"eval_samples_per_second": 5.297, |
|
"eval_steps_per_second": 0.167, |
|
"step": 600 |
|
}, |
|
{ |
|
"dpo_losses": 0.5357104539871216, |
|
"epoch": 1.72, |
|
"grad_norm": 8.256557748215014, |
|
"learning_rate": 2.3034436114175838e-07, |
|
"logits/chosen": -2.6488845348358154, |
|
"logits/rejected": -2.538515567779541, |
|
"logps/chosen": -336.7628479003906, |
|
"logps/rejected": -277.02264404296875, |
|
"loss": 0.5238, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.28726598620414734, |
|
"rewards/margins": 0.37427616119384766, |
|
"rewards/margins_max": 0.564933717250824, |
|
"rewards/margins_min": 0.18361851572990417, |
|
"rewards/margins_std": 0.2696306109428406, |
|
"rewards/rejected": -0.08701014518737793, |
|
"step": 610 |
|
}, |
|
{ |
|
"dpo_losses": 0.5276176929473877, |
|
"epoch": 1.75, |
|
"grad_norm": 2.4842449610043764, |
|
"learning_rate": 2.2218346133000264e-07, |
|
"logits/chosen": -2.6663379669189453, |
|
"logits/rejected": -2.5575757026672363, |
|
"logps/chosen": -281.8042297363281, |
|
"logps/rejected": -206.41580200195312, |
|
"loss": 0.5162, |
|
"positive_losses": 0.15401744842529297, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.2947912812232971, |
|
"rewards/margins": 0.38691216707229614, |
|
"rewards/margins_max": 0.5189955234527588, |
|
"rewards/margins_min": 0.2548287510871887, |
|
"rewards/margins_std": 0.18679411709308624, |
|
"rewards/rejected": -0.09212087094783783, |
|
"step": 620 |
|
}, |
|
{ |
|
"dpo_losses": 0.5108110308647156, |
|
"epoch": 1.77, |
|
"grad_norm": 9.294890068434233, |
|
"learning_rate": 2.1405247265337917e-07, |
|
"logits/chosen": -2.6686511039733887, |
|
"logits/rejected": -2.5705299377441406, |
|
"logps/chosen": -360.513671875, |
|
"logps/rejected": -233.205322265625, |
|
"loss": 0.5195, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3424997925758362, |
|
"rewards/margins": 0.4331623613834381, |
|
"rewards/margins_max": 0.5366414785385132, |
|
"rewards/margins_min": 0.3296832740306854, |
|
"rewards/margins_std": 0.14634151756763458, |
|
"rewards/rejected": -0.09066257625818253, |
|
"step": 630 |
|
}, |
|
{ |
|
"dpo_losses": 0.5333245396614075, |
|
"epoch": 1.8, |
|
"grad_norm": 1.9615557786341105, |
|
"learning_rate": 2.0596013836725657e-07, |
|
"logits/chosen": -2.793367862701416, |
|
"logits/rejected": -2.739849090576172, |
|
"logps/chosen": -275.52984619140625, |
|
"logps/rejected": -210.9861297607422, |
|
"loss": 0.542, |
|
"positive_losses": 0.4148605465888977, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.26945605874061584, |
|
"rewards/margins": 0.38035932183265686, |
|
"rewards/margins_max": 0.5337552428245544, |
|
"rewards/margins_min": 0.2269633710384369, |
|
"rewards/margins_std": 0.21693463623523712, |
|
"rewards/rejected": -0.11090326309204102, |
|
"step": 640 |
|
}, |
|
{ |
|
"dpo_losses": 0.5089551210403442, |
|
"epoch": 1.83, |
|
"grad_norm": 11.059235344334038, |
|
"learning_rate": 1.9791516016192213e-07, |
|
"logits/chosen": -2.844893217086792, |
|
"logits/rejected": -2.7059712409973145, |
|
"logps/chosen": -298.34588623046875, |
|
"logps/rejected": -231.402099609375, |
|
"loss": 0.5102, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.3605387806892395, |
|
"rewards/margins": 0.446432501077652, |
|
"rewards/margins_max": 0.6981122493743896, |
|
"rewards/margins_min": 0.1947527527809143, |
|
"rewards/margins_std": 0.35592886805534363, |
|
"rewards/rejected": -0.0858936682343483, |
|
"step": 650 |
|
}, |
|
{ |
|
"dpo_losses": 0.5299183130264282, |
|
"epoch": 1.86, |
|
"grad_norm": 16.447544756456054, |
|
"learning_rate": 1.8992618880565036e-07, |
|
"logits/chosen": -2.5619029998779297, |
|
"logits/rejected": -2.5244908332824707, |
|
"logps/chosen": -294.64801025390625, |
|
"logps/rejected": -199.9069061279297, |
|
"loss": 0.54, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.293754518032074, |
|
"rewards/margins": 0.39233601093292236, |
|
"rewards/margins_max": 0.5898939371109009, |
|
"rewards/margins_min": 0.19477804005146027, |
|
"rewards/margins_std": 0.2793891131877899, |
|
"rewards/rejected": -0.0985814779996872, |
|
"step": 660 |
|
}, |
|
{ |
|
"dpo_losses": 0.4770924150943756, |
|
"epoch": 1.89, |
|
"grad_norm": 14.950375677893724, |
|
"learning_rate": 1.8200181484252885e-07, |
|
"logits/chosen": -2.7151541709899902, |
|
"logits/rejected": -2.7282559871673584, |
|
"logps/chosen": -347.63665771484375, |
|
"logps/rejected": -335.4634094238281, |
|
"loss": 0.5076, |
|
"positive_losses": 0.011021423153579235, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3512026071548462, |
|
"rewards/margins": 0.532378077507019, |
|
"rewards/margins_max": 0.7549656629562378, |
|
"rewards/margins_min": 0.3097904622554779, |
|
"rewards/margins_std": 0.314786434173584, |
|
"rewards/rejected": -0.18117551505565643, |
|
"step": 670 |
|
}, |
|
{ |
|
"dpo_losses": 0.48109301924705505, |
|
"epoch": 1.92, |
|
"grad_norm": 1.8077778304327392, |
|
"learning_rate": 1.7415055935504233e-07, |
|
"logits/chosen": -2.796618938446045, |
|
"logits/rejected": -2.6364896297454834, |
|
"logps/chosen": -357.8368835449219, |
|
"logps/rejected": -312.83050537109375, |
|
"loss": 0.5045, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3552249073982239, |
|
"rewards/margins": 0.5131222009658813, |
|
"rewards/margins_max": 0.6571098566055298, |
|
"rewards/margins_min": 0.3691345155239105, |
|
"rewards/margins_std": 0.2036292850971222, |
|
"rewards/rejected": -0.15789727866649628, |
|
"step": 680 |
|
}, |
|
{ |
|
"dpo_losses": 0.5480272769927979, |
|
"epoch": 1.94, |
|
"grad_norm": 7.710262187848809, |
|
"learning_rate": 1.6638086480134952e-07, |
|
"logits/chosen": -2.6526730060577393, |
|
"logits/rejected": -2.5982117652893066, |
|
"logps/chosen": -211.858154296875, |
|
"logps/rejected": -139.07669067382812, |
|
"loss": 0.519, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.25107190012931824, |
|
"rewards/margins": 0.34679529070854187, |
|
"rewards/margins_max": 0.5567124485969543, |
|
"rewards/margins_min": 0.1368781328201294, |
|
"rewards/margins_std": 0.2968676686286926, |
|
"rewards/rejected": -0.09572339057922363, |
|
"step": 690 |
|
}, |
|
{ |
|
"dpo_losses": 0.4729584753513336, |
|
"epoch": 1.97, |
|
"grad_norm": 13.004502323752208, |
|
"learning_rate": 1.5870108593710471e-07, |
|
"logits/chosen": -2.577094793319702, |
|
"logits/rejected": -2.474807024002075, |
|
"logps/chosen": -388.23529052734375, |
|
"logps/rejected": -229.945556640625, |
|
"loss": 0.5238, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.3872971832752228, |
|
"rewards/margins": 0.5365989804267883, |
|
"rewards/margins_max": 0.6989376544952393, |
|
"rewards/margins_min": 0.3742601275444031, |
|
"rewards/margins_std": 0.22958168387413025, |
|
"rewards/rejected": -0.14930173754692078, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_dpo_losses": 0.6682811975479126, |
|
"eval_logits/chosen": -2.7153635025024414, |
|
"eval_logits/rejected": -2.6725597381591797, |
|
"eval_logps/chosen": -287.1236877441406, |
|
"eval_logps/rejected": -267.1868896484375, |
|
"eval_loss": 1.3783056735992432, |
|
"eval_positive_losses": 6.938729763031006, |
|
"eval_rewards/accuracies": 0.60317462682724, |
|
"eval_rewards/chosen": -0.019024791195988655, |
|
"eval_rewards/margins": 0.061017535626888275, |
|
"eval_rewards/margins_max": 0.28910568356513977, |
|
"eval_rewards/margins_min": -0.14251713454723358, |
|
"eval_rewards/margins_std": 0.1922098994255066, |
|
"eval_rewards/rejected": -0.08004232496023178, |
|
"eval_runtime": 374.9449, |
|
"eval_samples_per_second": 5.334, |
|
"eval_steps_per_second": 0.168, |
|
"step": 700 |
|
}, |
|
{ |
|
"dpo_losses": 0.5287314057350159, |
|
"epoch": 2.0, |
|
"grad_norm": 2.5225442066988153, |
|
"learning_rate": 1.5111948083158528e-07, |
|
"logits/chosen": -2.645501136779785, |
|
"logits/rejected": -2.5416979789733887, |
|
"logps/chosen": -277.56146240234375, |
|
"logps/rejected": -194.89093017578125, |
|
"loss": 0.509, |
|
"positive_losses": 0.014240646734833717, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2871246635913849, |
|
"rewards/margins": 0.39854830503463745, |
|
"rewards/margins_max": 0.6037707328796387, |
|
"rewards/margins_min": 0.19332581758499146, |
|
"rewards/margins_std": 0.2902284264564514, |
|
"rewards/rejected": -0.11142361164093018, |
|
"step": 710 |
|
}, |
|
{ |
|
"dpo_losses": 0.4969090521335602, |
|
"epoch": 2.03, |
|
"grad_norm": 6.7685343277404435, |
|
"learning_rate": 1.4364420198778658e-07, |
|
"logits/chosen": -2.711702823638916, |
|
"logits/rejected": -2.667511224746704, |
|
"logps/chosen": -318.25323486328125, |
|
"logps/rejected": -349.77325439453125, |
|
"loss": 0.4995, |
|
"positive_losses": 0.2103443145751953, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.29361721873283386, |
|
"rewards/margins": 0.48742538690567017, |
|
"rewards/margins_max": 0.7825571298599243, |
|
"rewards/margins_min": 0.19229364395141602, |
|
"rewards/margins_std": 0.417379230260849, |
|
"rewards/rejected": -0.1938081681728363, |
|
"step": 720 |
|
}, |
|
{ |
|
"dpo_losses": 0.46872028708457947, |
|
"epoch": 2.06, |
|
"grad_norm": 1.9152175221021666, |
|
"learning_rate": 1.3628328757603242e-07, |
|
"logits/chosen": -2.715259552001953, |
|
"logits/rejected": -2.639958143234253, |
|
"logps/chosen": -387.1348571777344, |
|
"logps/rejected": -270.98504638671875, |
|
"loss": 0.5078, |
|
"positive_losses": 0.07048721611499786, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.3561549186706543, |
|
"rewards/margins": 0.5383674502372742, |
|
"rewards/margins_max": 0.6862602829933167, |
|
"rewards/margins_min": 0.39047467708587646, |
|
"rewards/margins_std": 0.20915205776691437, |
|
"rewards/rejected": -0.18221257627010345, |
|
"step": 730 |
|
}, |
|
{ |
|
"dpo_losses": 0.48394322395324707, |
|
"epoch": 2.08, |
|
"grad_norm": 1.9934270854366338, |
|
"learning_rate": 1.2904465279052723e-07, |
|
"logits/chosen": -2.743479013442993, |
|
"logits/rejected": -2.6603636741638184, |
|
"logps/chosen": -314.8957824707031, |
|
"logps/rejected": -239.50637817382812, |
|
"loss": 0.5064, |
|
"positive_losses": 0.24646854400634766, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.37057873606681824, |
|
"rewards/margins": 0.5130593776702881, |
|
"rewards/margins_max": 0.7771711945533752, |
|
"rewards/margins_min": 0.2489476501941681, |
|
"rewards/margins_std": 0.373510479927063, |
|
"rewards/rejected": -0.14248065650463104, |
|
"step": 740 |
|
}, |
|
{ |
|
"dpo_losses": 0.5582190752029419, |
|
"epoch": 2.11, |
|
"grad_norm": 9.439233885890266, |
|
"learning_rate": 1.219360813381446e-07, |
|
"logits/chosen": -2.647359848022461, |
|
"logits/rejected": -2.597470760345459, |
|
"logps/chosen": -167.36611938476562, |
|
"logps/rejected": -147.13504028320312, |
|
"loss": 0.4945, |
|
"positive_losses": 0.07765503227710724, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.21209602057933807, |
|
"rewards/margins": 0.31703463196754456, |
|
"rewards/margins_max": 0.4738802909851074, |
|
"rewards/margins_min": 0.1601889729499817, |
|
"rewards/margins_std": 0.22181324660778046, |
|
"rewards/rejected": -0.10493861138820648, |
|
"step": 750 |
|
}, |
|
{ |
|
"dpo_losses": 0.4905478060245514, |
|
"epoch": 2.14, |
|
"grad_norm": 11.006048100367254, |
|
"learning_rate": 1.149652170686039e-07, |
|
"logits/chosen": -2.7402448654174805, |
|
"logits/rejected": -2.6278908252716064, |
|
"logps/chosen": -300.5604553222656, |
|
"logps/rejected": -279.05877685546875, |
|
"loss": 0.5065, |
|
"positive_losses": 0.7673536539077759, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.3250073492527008, |
|
"rewards/margins": 0.48759451508522034, |
|
"rewards/margins_max": 0.6997831463813782, |
|
"rewards/margins_min": 0.2754059433937073, |
|
"rewards/margins_std": 0.30007994174957275, |
|
"rewards/rejected": -0.1625872105360031, |
|
"step": 760 |
|
}, |
|
{ |
|
"dpo_losses": 0.4878745675086975, |
|
"epoch": 2.17, |
|
"grad_norm": 2.041360663493708, |
|
"learning_rate": 1.0813955575503587e-07, |
|
"logits/chosen": -2.6781890392303467, |
|
"logits/rejected": -2.673537254333496, |
|
"logps/chosen": -303.26922607421875, |
|
"logps/rejected": -278.1116638183594, |
|
"loss": 0.4955, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.303671658039093, |
|
"rewards/margins": 0.5081648230552673, |
|
"rewards/margins_max": 0.7443748712539673, |
|
"rewards/margins_min": 0.27195480465888977, |
|
"rewards/margins_std": 0.3340514898300171, |
|
"rewards/rejected": -0.2044931948184967, |
|
"step": 770 |
|
}, |
|
{ |
|
"dpo_losses": 0.5306052565574646, |
|
"epoch": 2.2, |
|
"grad_norm": 12.846225026696498, |
|
"learning_rate": 1.0146643703377486e-07, |
|
"logits/chosen": -2.753620147705078, |
|
"logits/rejected": -2.623445749282837, |
|
"logps/chosen": -295.22698974609375, |
|
"logps/rejected": -235.02810668945312, |
|
"loss": 0.5182, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.31814926862716675, |
|
"rewards/margins": 0.3883030414581299, |
|
"rewards/margins_max": 0.6206892728805542, |
|
"rewards/margins_min": 0.15591678023338318, |
|
"rewards/margins_std": 0.3286438286304474, |
|
"rewards/rejected": -0.07015376538038254, |
|
"step": 780 |
|
}, |
|
{ |
|
"dpo_losses": 0.4863054156303406, |
|
"epoch": 2.23, |
|
"grad_norm": 3.0491196409319192, |
|
"learning_rate": 9.495303651204494e-08, |
|
"logits/chosen": -2.697542667388916, |
|
"logits/rejected": -2.644935131072998, |
|
"logps/chosen": -334.7068786621094, |
|
"logps/rejected": -294.2601318359375, |
|
"loss": 0.4865, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3127191960811615, |
|
"rewards/margins": 0.4877268671989441, |
|
"rewards/margins_max": 0.660971999168396, |
|
"rewards/margins_min": 0.3144817650318146, |
|
"rewards/margins_std": 0.24500557780265808, |
|
"rewards/rejected": -0.17500770092010498, |
|
"step": 790 |
|
}, |
|
{ |
|
"dpo_losses": 0.4887468218803406, |
|
"epoch": 2.25, |
|
"grad_norm": 10.205242036662113, |
|
"learning_rate": 8.860635805202615e-08, |
|
"logits/chosen": -2.723806381225586, |
|
"logits/rejected": -2.6369194984436035, |
|
"logps/chosen": -329.61749267578125, |
|
"logps/rejected": -261.149169921875, |
|
"loss": 0.488, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.33780962228775024, |
|
"rewards/margins": 0.49595513939857483, |
|
"rewards/margins_max": 0.7063379287719727, |
|
"rewards/margins_min": 0.2855724096298218, |
|
"rewards/margins_std": 0.29752615094184875, |
|
"rewards/rejected": -0.15814556181430817, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"eval_dpo_losses": 0.6669806838035583, |
|
"eval_logits/chosen": -2.707551956176758, |
|
"eval_logits/rejected": -2.664377450942993, |
|
"eval_logps/chosen": -288.50439453125, |
|
"eval_logps/rejected": -268.966552734375, |
|
"eval_loss": 1.4895577430725098, |
|
"eval_positive_losses": 8.096439361572266, |
|
"eval_rewards/accuracies": 0.6111111044883728, |
|
"eval_rewards/chosen": -0.03283155709505081, |
|
"eval_rewards/margins": 0.06500754505395889, |
|
"eval_rewards/margins_max": 0.30630454421043396, |
|
"eval_rewards/margins_min": -0.15107154846191406, |
|
"eval_rewards/margins_std": 0.20374149084091187, |
|
"eval_rewards/rejected": -0.09783907979726791, |
|
"eval_runtime": 342.0756, |
|
"eval_samples_per_second": 5.847, |
|
"eval_steps_per_second": 0.184, |
|
"step": 800 |
|
}, |
|
{ |
|
"dpo_losses": 0.5151875019073486, |
|
"epoch": 2.28, |
|
"grad_norm": 8.219325520486382, |
|
"learning_rate": 8.24332262395994e-08, |
|
"logits/chosen": -2.7791354656219482, |
|
"logits/rejected": -2.720329999923706, |
|
"logps/chosen": -268.9266052246094, |
|
"logps/rejected": -261.6560363769531, |
|
"loss": 0.4995, |
|
"positive_losses": 0.07818031311035156, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.25640755891799927, |
|
"rewards/margins": 0.4340863823890686, |
|
"rewards/margins_max": 0.7111722230911255, |
|
"rewards/margins_min": 0.15700046718120575, |
|
"rewards/margins_std": 0.39185863733291626, |
|
"rewards/rejected": -0.17767879366874695, |
|
"step": 810 |
|
}, |
|
{ |
|
"dpo_losses": 0.5199899673461914, |
|
"epoch": 2.31, |
|
"grad_norm": 11.3286779229284, |
|
"learning_rate": 7.644027904586586e-08, |
|
"logits/chosen": -2.7511487007141113, |
|
"logits/rejected": -2.6695072650909424, |
|
"logps/chosen": -276.63726806640625, |
|
"logps/rejected": -221.2215118408203, |
|
"loss": 0.5014, |
|
"positive_losses": 0.24863624572753906, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.2888849079608917, |
|
"rewards/margins": 0.42505329847335815, |
|
"rewards/margins_max": 0.6548727750778198, |
|
"rewards/margins_min": 0.1952337622642517, |
|
"rewards/margins_std": 0.3250138759613037, |
|
"rewards/rejected": -0.13616837561130524, |
|
"step": 820 |
|
}, |
|
{ |
|
"dpo_losses": 0.4646497666835785, |
|
"epoch": 2.34, |
|
"grad_norm": 8.714610877659624, |
|
"learning_rate": 7.063396068933469e-08, |
|
"logits/chosen": -2.73659086227417, |
|
"logits/rejected": -2.634260654449463, |
|
"logps/chosen": -415.6689453125, |
|
"logps/rejected": -272.5089111328125, |
|
"loss": 0.4872, |
|
"positive_losses": 0.20364531874656677, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.38227415084838867, |
|
"rewards/margins": 0.5651294589042664, |
|
"rewards/margins_max": 0.7014433145523071, |
|
"rewards/margins_min": 0.4288156032562256, |
|
"rewards/margins_std": 0.19277691841125488, |
|
"rewards/rejected": -0.1828552931547165, |
|
"step": 830 |
|
}, |
|
{ |
|
"dpo_losses": 0.49954962730407715, |
|
"epoch": 2.37, |
|
"grad_norm": 6.5716343841546685, |
|
"learning_rate": 6.502051470645148e-08, |
|
"logits/chosen": -2.883086919784546, |
|
"logits/rejected": -2.6989049911499023, |
|
"logps/chosen": -359.88653564453125, |
|
"logps/rejected": -300.28302001953125, |
|
"loss": 0.5152, |
|
"positive_losses": 0.010610580444335938, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.3157649636268616, |
|
"rewards/margins": 0.4664032459259033, |
|
"rewards/margins_max": 0.6633520126342773, |
|
"rewards/margins_min": 0.2694544792175293, |
|
"rewards/margins_std": 0.2785276472568512, |
|
"rewards/rejected": -0.15063826739788055, |
|
"step": 840 |
|
}, |
|
{ |
|
"dpo_losses": 0.4566032290458679, |
|
"epoch": 2.39, |
|
"grad_norm": 8.455170840404687, |
|
"learning_rate": 5.960597723792194e-08, |
|
"logits/chosen": -2.742910623550415, |
|
"logits/rejected": -2.619158983230591, |
|
"logps/chosen": -344.3529052734375, |
|
"logps/rejected": -276.9927673339844, |
|
"loss": 0.4916, |
|
"positive_losses": 0.32693710923194885, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.40712490677833557, |
|
"rewards/margins": 0.5964423418045044, |
|
"rewards/margins_max": 0.88237464427948, |
|
"rewards/margins_min": 0.31051012873649597, |
|
"rewards/margins_std": 0.4043692648410797, |
|
"rewards/rejected": -0.1893174648284912, |
|
"step": 850 |
|
}, |
|
{ |
|
"dpo_losses": 0.4746529459953308, |
|
"epoch": 2.42, |
|
"grad_norm": 11.331305453316807, |
|
"learning_rate": 5.4396170538046486e-08, |
|
"logits/chosen": -2.788321018218994, |
|
"logits/rejected": -2.7099854946136475, |
|
"logps/chosen": -321.00872802734375, |
|
"logps/rejected": -288.5102844238281, |
|
"loss": 0.4835, |
|
"positive_losses": 0.03827323764562607, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.34411686658859253, |
|
"rewards/margins": 0.5461706519126892, |
|
"rewards/margins_max": 0.8038791418075562, |
|
"rewards/margins_min": 0.28846222162246704, |
|
"rewards/margins_std": 0.36445480585098267, |
|
"rewards/rejected": -0.20205385982990265, |
|
"step": 860 |
|
}, |
|
{ |
|
"dpo_losses": 0.515887439250946, |
|
"epoch": 2.45, |
|
"grad_norm": 2.8323654503779396, |
|
"learning_rate": 4.93966967140487e-08, |
|
"logits/chosen": -2.714071750640869, |
|
"logits/rejected": -2.624173641204834, |
|
"logps/chosen": -314.1236267089844, |
|
"logps/rejected": -330.68817138671875, |
|
"loss": 0.4782, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2643023431301117, |
|
"rewards/margins": 0.4125981330871582, |
|
"rewards/margins_max": 0.6127610802650452, |
|
"rewards/margins_min": 0.21243515610694885, |
|
"rewards/margins_std": 0.28307315707206726, |
|
"rewards/rejected": -0.14829573035240173, |
|
"step": 870 |
|
}, |
|
{ |
|
"dpo_losses": 0.48759278655052185, |
|
"epoch": 2.48, |
|
"grad_norm": 2.371240249959621, |
|
"learning_rate": 4.4612931702126433e-08, |
|
"logits/chosen": -2.7775819301605225, |
|
"logits/rejected": -2.6954903602600098, |
|
"logps/chosen": -286.12994384765625, |
|
"logps/rejected": -263.48614501953125, |
|
"loss": 0.4882, |
|
"positive_losses": 0.3580247759819031, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.31085458397865295, |
|
"rewards/margins": 0.4919183850288391, |
|
"rewards/margins_max": 0.6966783404350281, |
|
"rewards/margins_min": 0.2871583104133606, |
|
"rewards/margins_std": 0.28957444429397583, |
|
"rewards/rejected": -0.181063711643219, |
|
"step": 880 |
|
}, |
|
{ |
|
"dpo_losses": 0.4207460284233093, |
|
"epoch": 2.51, |
|
"grad_norm": 15.410592912064324, |
|
"learning_rate": 4.005001948670605e-08, |
|
"logits/chosen": -2.787081003189087, |
|
"logits/rejected": -2.7058584690093994, |
|
"logps/chosen": -433.3082580566406, |
|
"logps/rejected": -349.69342041015625, |
|
"loss": 0.4854, |
|
"positive_losses": 0.38000088930130005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.43636050820350647, |
|
"rewards/margins": 0.6726460456848145, |
|
"rewards/margins_max": 0.818884015083313, |
|
"rewards/margins_min": 0.5264080762863159, |
|
"rewards/margins_std": 0.20681175589561462, |
|
"rewards/rejected": -0.23628559708595276, |
|
"step": 890 |
|
}, |
|
{ |
|
"dpo_losses": 0.44164472818374634, |
|
"epoch": 2.54, |
|
"grad_norm": 9.649985247190592, |
|
"learning_rate": 3.571286656911376e-08, |
|
"logits/chosen": -2.757315158843994, |
|
"logits/rejected": -2.582063674926758, |
|
"logps/chosen": -362.41473388671875, |
|
"logps/rejected": -289.0292663574219, |
|
"loss": 0.5027, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3792465031147003, |
|
"rewards/margins": 0.6161192655563354, |
|
"rewards/margins_max": 0.8344296216964722, |
|
"rewards/margins_min": 0.3978089690208435, |
|
"rewards/margins_std": 0.30873745679855347, |
|
"rewards/rejected": -0.2368728220462799, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"eval_dpo_losses": 0.6661122441291809, |
|
"eval_logits/chosen": -2.7059361934661865, |
|
"eval_logits/rejected": -2.662884473800659, |
|
"eval_logps/chosen": -289.38092041015625, |
|
"eval_logps/rejected": -270.0925598144531, |
|
"eval_loss": 1.5575090646743774, |
|
"eval_positive_losses": 8.782768249511719, |
|
"eval_rewards/accuracies": 0.6190476417541504, |
|
"eval_rewards/chosen": -0.04159707948565483, |
|
"eval_rewards/margins": 0.06750191748142242, |
|
"eval_rewards/margins_max": 0.31507408618927, |
|
"eval_rewards/margins_min": -0.1562977135181427, |
|
"eval_rewards/margins_std": 0.2099103033542633, |
|
"eval_rewards/rejected": -0.10909900069236755, |
|
"eval_runtime": 373.4993, |
|
"eval_samples_per_second": 5.355, |
|
"eval_steps_per_second": 0.169, |
|
"step": 900 |
|
}, |
|
{ |
|
"dpo_losses": 0.45572906732559204, |
|
"epoch": 2.56, |
|
"grad_norm": 2.7671497689189013, |
|
"learning_rate": 3.160613669161255e-08, |
|
"logits/chosen": -2.8252711296081543, |
|
"logits/rejected": -2.692148208618164, |
|
"logps/chosen": -382.2273254394531, |
|
"logps/rejected": -260.42022705078125, |
|
"loss": 0.4747, |
|
"positive_losses": 0.24142150580883026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.36954694986343384, |
|
"rewards/margins": 0.5896551012992859, |
|
"rewards/margins_max": 0.8211862444877625, |
|
"rewards/margins_min": 0.35812389850616455, |
|
"rewards/margins_std": 0.32743456959724426, |
|
"rewards/rejected": -0.22010818123817444, |
|
"step": 910 |
|
}, |
|
{ |
|
"dpo_losses": 0.4458266794681549, |
|
"epoch": 2.59, |
|
"grad_norm": 2.5946275076947987, |
|
"learning_rate": 2.7734245822478436e-08, |
|
"logits/chosen": -2.7092032432556152, |
|
"logits/rejected": -2.5538458824157715, |
|
"logps/chosen": -320.76727294921875, |
|
"logps/rejected": -214.5659942626953, |
|
"loss": 0.4918, |
|
"positive_losses": 0.05587196350097656, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.4010286331176758, |
|
"rewards/margins": 0.6215327382087708, |
|
"rewards/margins_max": 0.8581794500350952, |
|
"rewards/margins_min": 0.3848857879638672, |
|
"rewards/margins_std": 0.3346691429615021, |
|
"rewards/rejected": -0.2205040454864502, |
|
"step": 920 |
|
}, |
|
{ |
|
"dpo_losses": 0.48411306738853455, |
|
"epoch": 2.62, |
|
"grad_norm": 9.968121643352859, |
|
"learning_rate": 2.410135740750821e-08, |
|
"logits/chosen": -2.7608590126037598, |
|
"logits/rejected": -2.713438034057617, |
|
"logps/chosen": -317.3198547363281, |
|
"logps/rejected": -291.6824645996094, |
|
"loss": 0.4877, |
|
"positive_losses": 0.46479111909866333, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2928626835346222, |
|
"rewards/margins": 0.5131780505180359, |
|
"rewards/margins_max": 0.6953937411308289, |
|
"rewards/margins_min": 0.33096247911453247, |
|
"rewards/margins_std": 0.25769174098968506, |
|
"rewards/rejected": -0.22031545639038086, |
|
"step": 930 |
|
}, |
|
{ |
|
"dpo_losses": 0.5361374616622925, |
|
"epoch": 2.65, |
|
"grad_norm": 13.584990105876331, |
|
"learning_rate": 2.071137789306418e-08, |
|
"logits/chosen": -2.739955425262451, |
|
"logits/rejected": -2.62253999710083, |
|
"logps/chosen": -313.5608825683594, |
|
"logps/rejected": -221.56198120117188, |
|
"loss": 0.499, |
|
"positive_losses": 0.22314663231372833, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.2565317451953888, |
|
"rewards/margins": 0.3702241778373718, |
|
"rewards/margins_max": 0.5549638271331787, |
|
"rewards/margins_min": 0.18548452854156494, |
|
"rewards/margins_std": 0.26126131415367126, |
|
"rewards/rejected": -0.11369242519140244, |
|
"step": 940 |
|
}, |
|
{ |
|
"dpo_losses": 0.4967280328273773, |
|
"epoch": 2.68, |
|
"grad_norm": 8.2589469715046, |
|
"learning_rate": 1.7567952525471107e-08, |
|
"logits/chosen": -2.7256572246551514, |
|
"logits/rejected": -2.615950584411621, |
|
"logps/chosen": -271.1017150878906, |
|
"logps/rejected": -220.56100463867188, |
|
"loss": 0.5302, |
|
"positive_losses": 1.0882877111434937, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.31043368577957153, |
|
"rewards/margins": 0.48788315057754517, |
|
"rewards/margins_max": 0.704791784286499, |
|
"rewards/margins_min": 0.2709745466709137, |
|
"rewards/margins_std": 0.30675509572029114, |
|
"rewards/rejected": -0.17744943499565125, |
|
"step": 950 |
|
}, |
|
{ |
|
"dpo_losses": 0.4955645501613617, |
|
"epoch": 2.7, |
|
"grad_norm": 18.114348971792364, |
|
"learning_rate": 1.467446143128101e-08, |
|
"logits/chosen": -2.8821053504943848, |
|
"logits/rejected": -2.793936252593994, |
|
"logps/chosen": -297.2878112792969, |
|
"logps/rejected": -261.58392333984375, |
|
"loss": 0.5139, |
|
"positive_losses": 0.06438522040843964, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.29606151580810547, |
|
"rewards/margins": 0.4726499915122986, |
|
"rewards/margins_max": 0.6590073704719543, |
|
"rewards/margins_min": 0.2862926125526428, |
|
"rewards/margins_std": 0.26354914903640747, |
|
"rewards/rejected": -0.1765884906053543, |
|
"step": 960 |
|
}, |
|
{ |
|
"dpo_losses": 0.47625431418418884, |
|
"epoch": 2.73, |
|
"grad_norm": 2.2745052596169986, |
|
"learning_rate": 1.2034015982622243e-08, |
|
"logits/chosen": -2.7958290576934814, |
|
"logits/rejected": -2.691840887069702, |
|
"logps/chosen": -341.50140380859375, |
|
"logps/rejected": -322.9205627441406, |
|
"loss": 0.4943, |
|
"positive_losses": 0.23634567856788635, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.33635348081588745, |
|
"rewards/margins": 0.5307850241661072, |
|
"rewards/margins_max": 0.7724507451057434, |
|
"rewards/margins_min": 0.2891193926334381, |
|
"rewards/margins_std": 0.34176692366600037, |
|
"rewards/rejected": -0.1944316029548645, |
|
"step": 970 |
|
}, |
|
{ |
|
"dpo_losses": 0.514926552772522, |
|
"epoch": 2.76, |
|
"grad_norm": 9.098303215376248, |
|
"learning_rate": 9.649455451539418e-09, |
|
"logits/chosen": -2.6313929557800293, |
|
"logits/rejected": -2.6137583255767822, |
|
"logps/chosen": -211.71536254882812, |
|
"logps/rejected": -190.9324493408203, |
|
"loss": 0.4996, |
|
"positive_losses": 0.12087974697351456, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2592369318008423, |
|
"rewards/margins": 0.4249873757362366, |
|
"rewards/margins_max": 0.6153701543807983, |
|
"rewards/margins_min": 0.2346045970916748, |
|
"rewards/margins_std": 0.26924189925193787, |
|
"rewards/rejected": -0.1657504439353943, |
|
"step": 980 |
|
}, |
|
{ |
|
"dpo_losses": 0.471360445022583, |
|
"epoch": 2.79, |
|
"grad_norm": 7.946144983120647, |
|
"learning_rate": 7.523343956923194e-09, |
|
"logits/chosen": -2.797616481781006, |
|
"logits/rejected": -2.7332680225372314, |
|
"logps/chosen": -315.5093688964844, |
|
"logps/rejected": -304.4640808105469, |
|
"loss": 0.4919, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.36568814516067505, |
|
"rewards/margins": 0.5568908452987671, |
|
"rewards/margins_max": 0.824783980846405, |
|
"rewards/margins_min": 0.28899770975112915, |
|
"rewards/margins_std": 0.37885811924934387, |
|
"rewards/rejected": -0.19120268523693085, |
|
"step": 990 |
|
}, |
|
{ |
|
"dpo_losses": 0.513391375541687, |
|
"epoch": 2.82, |
|
"grad_norm": 7.57336855401101, |
|
"learning_rate": 5.6579677073121945e-09, |
|
"logits/chosen": -2.6762197017669678, |
|
"logits/rejected": -2.677922487258911, |
|
"logps/chosen": -255.44204711914062, |
|
"logps/rejected": -303.0498962402344, |
|
"loss": 0.4962, |
|
"positive_losses": 0.01267166156321764, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.2648389935493469, |
|
"rewards/margins": 0.43319040536880493, |
|
"rewards/margins_max": 0.6835809946060181, |
|
"rewards/margins_min": 0.18279966711997986, |
|
"rewards/margins_std": 0.3541058897972107, |
|
"rewards/rejected": -0.168351411819458, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_dpo_losses": 0.6659845113754272, |
|
"eval_logits/chosen": -2.7036855220794678, |
|
"eval_logits/rejected": -2.6605641841888428, |
|
"eval_logps/chosen": -289.52734375, |
|
"eval_logps/rejected": -270.2825012207031, |
|
"eval_loss": 1.5707319974899292, |
|
"eval_positive_losses": 8.908056259155273, |
|
"eval_rewards/accuracies": 0.6150793433189392, |
|
"eval_rewards/chosen": -0.04306148737668991, |
|
"eval_rewards/margins": 0.06793692708015442, |
|
"eval_rewards/margins_max": 0.3166824281215668, |
|
"eval_rewards/margins_min": -0.15675365924835205, |
|
"eval_rewards/margins_std": 0.2111140936613083, |
|
"eval_rewards/rejected": -0.11099842935800552, |
|
"eval_runtime": 385.4819, |
|
"eval_samples_per_second": 5.188, |
|
"eval_steps_per_second": 0.163, |
|
"step": 1000 |
|
}, |
|
{ |
|
"dpo_losses": 0.4899842143058777, |
|
"epoch": 2.85, |
|
"grad_norm": 11.596233256209617, |
|
"learning_rate": 4.0553325425319585e-09, |
|
"logits/chosen": -2.7825233936309814, |
|
"logits/rejected": -2.710404872894287, |
|
"logps/chosen": -283.56024169921875, |
|
"logps/rejected": -277.7485656738281, |
|
"loss": 0.5045, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.32106488943099976, |
|
"rewards/margins": 0.503676176071167, |
|
"rewards/margins_max": 0.7390400171279907, |
|
"rewards/margins_min": 0.2683122456073761, |
|
"rewards/margins_std": 0.3328548073768616, |
|
"rewards/rejected": -0.18261122703552246, |
|
"step": 1010 |
|
}, |
|
{ |
|
"dpo_losses": 0.4729464650154114, |
|
"epoch": 2.87, |
|
"grad_norm": 2.4410647222056254, |
|
"learning_rate": 2.717161776814747e-09, |
|
"logits/chosen": -2.742475986480713, |
|
"logits/rejected": -2.64007568359375, |
|
"logps/chosen": -277.25311279296875, |
|
"logps/rejected": -253.4837188720703, |
|
"loss": 0.4843, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.3422829210758209, |
|
"rewards/margins": 0.5467172861099243, |
|
"rewards/margins_max": 0.7624825835227966, |
|
"rewards/margins_min": 0.3309520483016968, |
|
"rewards/margins_std": 0.30513814091682434, |
|
"rewards/rejected": -0.20443439483642578, |
|
"step": 1020 |
|
}, |
|
{ |
|
"dpo_losses": 0.48559775948524475, |
|
"epoch": 2.9, |
|
"grad_norm": 8.212943342949115, |
|
"learning_rate": 1.6448943457189613e-09, |
|
"logits/chosen": -2.765453815460205, |
|
"logits/rejected": -2.705345630645752, |
|
"logps/chosen": -331.9368896484375, |
|
"logps/rejected": -294.2420349121094, |
|
"loss": 0.4995, |
|
"positive_losses": 0.18167057633399963, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.33231136202812195, |
|
"rewards/margins": 0.5095351934432983, |
|
"rewards/margins_max": 0.6868753433227539, |
|
"rewards/margins_min": 0.332194983959198, |
|
"rewards/margins_std": 0.2507968842983246, |
|
"rewards/rejected": -0.17722377181053162, |
|
"step": 1030 |
|
}, |
|
{ |
|
"dpo_losses": 0.4621458053588867, |
|
"epoch": 2.93, |
|
"grad_norm": 8.550282024171548, |
|
"learning_rate": 8.396832588411229e-10, |
|
"logits/chosen": -2.6322901248931885, |
|
"logits/rejected": -2.517449378967285, |
|
"logps/chosen": -312.41729736328125, |
|
"logps/rejected": -251.61849975585938, |
|
"loss": 0.5147, |
|
"positive_losses": 0.4247921109199524, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.33802804350852966, |
|
"rewards/margins": 0.5674580335617065, |
|
"rewards/margins_max": 0.8604093790054321, |
|
"rewards/margins_min": 0.2745068669319153, |
|
"rewards/margins_std": 0.41429558396339417, |
|
"rewards/rejected": -0.22943000495433807, |
|
"step": 1040 |
|
}, |
|
{ |
|
"dpo_losses": 0.49349918961524963, |
|
"epoch": 2.96, |
|
"grad_norm": 2.0362116151192713, |
|
"learning_rate": 3.0239435998430374e-10, |
|
"logits/chosen": -2.7547783851623535, |
|
"logits/rejected": -2.6397135257720947, |
|
"logps/chosen": -298.6687927246094, |
|
"logps/rejected": -274.22381591796875, |
|
"loss": 0.5095, |
|
"positive_losses": 0.5305103063583374, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.32915833592414856, |
|
"rewards/margins": 0.4934717118740082, |
|
"rewards/margins_max": 0.7064648866653442, |
|
"rewards/margins_min": 0.28047865629196167, |
|
"rewards/margins_std": 0.3012176752090454, |
|
"rewards/rejected": -0.1643133908510208, |
|
"step": 1050 |
|
}, |
|
{ |
|
"dpo_losses": 0.5019891858100891, |
|
"epoch": 2.99, |
|
"grad_norm": 2.0527644568071164, |
|
"learning_rate": 3.360539611582669e-11, |
|
"logits/chosen": -2.6836140155792236, |
|
"logits/rejected": -2.597043514251709, |
|
"logps/chosen": -294.3243103027344, |
|
"logps/rejected": -225.4201202392578, |
|
"loss": 0.5343, |
|
"positive_losses": 0.3622688353061676, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.3153269588947296, |
|
"rewards/margins": 0.4751824736595154, |
|
"rewards/margins_max": 0.7748254537582397, |
|
"rewards/margins_min": 0.1755395084619522, |
|
"rewards/margins_std": 0.4237591624259949, |
|
"rewards/rejected": -0.15985555946826935, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 1065, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5596254680078354, |
|
"train_runtime": 11165.5936, |
|
"train_samples_per_second": 1.526, |
|
"train_steps_per_second": 0.095 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1065, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|