|
{ |
|
"best_metric": 0.42961153388023376, |
|
"best_model_checkpoint": "./mistral/19-04-24-Weni-WeniGPT-Agents-Mistral-1.0.6-SFT-1.0.4-DPO_Experiment on DPO with other hyperparameters and best SFT model of WeniGPT-2_max_steps-732_batch_4_2024-04-19_ppid_9/checkpoint-270", |
|
"epoch": 7.317073170731708, |
|
"eval_steps": 30, |
|
"global_step": 450, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 38.021175384521484, |
|
"learning_rate": 1.590909090909091e-06, |
|
"logits/chosen": -1.7808929681777954, |
|
"logits/rejected": -1.8136215209960938, |
|
"logps/chosen": -52.2663688659668, |
|
"logps/rejected": -88.76155853271484, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.20000000298023224, |
|
"rewards/chosen": 0.0008833981119096279, |
|
"rewards/margins": 0.0009520913590677083, |
|
"rewards/rejected": -6.869318895041943e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 19.505126953125, |
|
"learning_rate": 3.863636363636364e-06, |
|
"logits/chosen": -1.865220308303833, |
|
"logits/rejected": -1.880090355873108, |
|
"logps/chosen": -34.3599967956543, |
|
"logps/rejected": -53.656646728515625, |
|
"loss": 0.6846, |
|
"rewards/accuracies": 0.22499999403953552, |
|
"rewards/chosen": 0.019346294924616814, |
|
"rewards/margins": 0.017682768404483795, |
|
"rewards/rejected": 0.0016635276842862368, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 16.67194938659668, |
|
"learning_rate": 4.964788732394366e-06, |
|
"logits/chosen": -1.7692855596542358, |
|
"logits/rejected": -1.7932851314544678, |
|
"logps/chosen": -38.679237365722656, |
|
"logps/rejected": -79.23713684082031, |
|
"loss": 0.665, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.07158979028463364, |
|
"rewards/margins": 0.061105482280254364, |
|
"rewards/rejected": 0.010484304279088974, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_logits/chosen": -1.7801028490066528, |
|
"eval_logits/rejected": -1.7981160879135132, |
|
"eval_logps/chosen": -54.71597671508789, |
|
"eval_logps/rejected": -95.75775909423828, |
|
"eval_loss": 0.6322354674339294, |
|
"eval_rewards/accuracies": 0.4285714328289032, |
|
"eval_rewards/chosen": 0.14569346606731415, |
|
"eval_rewards/margins": 0.13385750353336334, |
|
"eval_rewards/rejected": 0.011835969984531403, |
|
"eval_runtime": 8.5124, |
|
"eval_samples_per_second": 3.289, |
|
"eval_steps_per_second": 1.645, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 22.69721031188965, |
|
"learning_rate": 4.894366197183099e-06, |
|
"logits/chosen": -1.8458446264266968, |
|
"logits/rejected": -1.8594070672988892, |
|
"logps/chosen": -39.180992126464844, |
|
"logps/rejected": -57.04741287231445, |
|
"loss": 0.6164, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": 0.19042351841926575, |
|
"rewards/margins": 0.18086455762386322, |
|
"rewards/rejected": 0.009558964520692825, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.823943661971832e-06, |
|
"logits/chosen": -1.8411815166473389, |
|
"logits/rejected": -1.8659855127334595, |
|
"logps/chosen": -31.194744110107422, |
|
"logps/rejected": -58.17326736450195, |
|
"loss": 0.6138, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": 0.19823171198368073, |
|
"rewards/margins": 0.19669394195079803, |
|
"rewards/rejected": 0.0015377718955278397, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 10.77029800415039, |
|
"learning_rate": 4.753521126760564e-06, |
|
"logits/chosen": -1.816936731338501, |
|
"logits/rejected": -1.8421777486801147, |
|
"logps/chosen": -50.517478942871094, |
|
"logps/rejected": -80.6562728881836, |
|
"loss": 0.5445, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": 0.4567543864250183, |
|
"rewards/margins": 0.42180705070495605, |
|
"rewards/rejected": 0.03494739532470703, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_logits/chosen": -1.784444808959961, |
|
"eval_logits/rejected": -1.8030898571014404, |
|
"eval_logps/chosen": -53.55432891845703, |
|
"eval_logps/rejected": -95.73511505126953, |
|
"eval_loss": 0.527186930179596, |
|
"eval_rewards/accuracies": 0.4285714328289032, |
|
"eval_rewards/chosen": 0.4941868484020233, |
|
"eval_rewards/margins": 0.47555941343307495, |
|
"eval_rewards/rejected": 0.018627464771270752, |
|
"eval_runtime": 8.5149, |
|
"eval_samples_per_second": 3.288, |
|
"eval_steps_per_second": 1.644, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 8.583382606506348, |
|
"learning_rate": 4.683098591549296e-06, |
|
"logits/chosen": -1.7579460144042969, |
|
"logits/rejected": -1.776581048965454, |
|
"logps/chosen": -54.420066833496094, |
|
"logps/rejected": -79.0608901977539, |
|
"loss": 0.451, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.8353117108345032, |
|
"rewards/margins": 0.8680375814437866, |
|
"rewards/rejected": -0.032725896686315536, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 2.3610572814941406, |
|
"learning_rate": 4.612676056338028e-06, |
|
"logits/chosen": -1.8833833932876587, |
|
"logits/rejected": -1.9092212915420532, |
|
"logps/chosen": -25.5800724029541, |
|
"logps/rejected": -64.96099853515625, |
|
"loss": 0.5302, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": 0.5431563258171082, |
|
"rewards/margins": 0.6763826012611389, |
|
"rewards/rejected": -0.13322624564170837, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 3.739367723464966, |
|
"learning_rate": 4.542253521126761e-06, |
|
"logits/chosen": -1.8194096088409424, |
|
"logits/rejected": -1.8343127965927124, |
|
"logps/chosen": -41.80801773071289, |
|
"logps/rejected": -63.005592346191406, |
|
"loss": 0.4616, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.830731213092804, |
|
"rewards/margins": 0.9403120875358582, |
|
"rewards/rejected": -0.109580859541893, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_logits/chosen": -1.7902206182479858, |
|
"eval_logits/rejected": -1.8099161386489868, |
|
"eval_logps/chosen": -52.01105880737305, |
|
"eval_logps/rejected": -95.83198547363281, |
|
"eval_loss": 0.46216800808906555, |
|
"eval_rewards/accuracies": 0.4285714328289032, |
|
"eval_rewards/chosen": 0.957168459892273, |
|
"eval_rewards/margins": 0.9676021337509155, |
|
"eval_rewards/rejected": -0.010433738119900227, |
|
"eval_runtime": 8.5144, |
|
"eval_samples_per_second": 3.289, |
|
"eval_steps_per_second": 1.644, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 0.5382364988327026, |
|
"learning_rate": 4.471830985915494e-06, |
|
"logits/chosen": -1.845868468284607, |
|
"logits/rejected": -1.8715919256210327, |
|
"logps/chosen": -40.285335540771484, |
|
"logps/rejected": -73.16102600097656, |
|
"loss": 0.4611, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": 1.0631928443908691, |
|
"rewards/margins": 1.3067411184310913, |
|
"rewards/rejected": -0.24354824423789978, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"grad_norm": 12.538246154785156, |
|
"learning_rate": 4.401408450704226e-06, |
|
"logits/chosen": -1.8342373371124268, |
|
"logits/rejected": -1.862433671951294, |
|
"logps/chosen": -29.438589096069336, |
|
"logps/rejected": -62.31974411010742, |
|
"loss": 0.5266, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": 0.7012811899185181, |
|
"rewards/margins": 0.8140069246292114, |
|
"rewards/rejected": -0.11272567510604858, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 2.3604652881622314, |
|
"learning_rate": 4.3309859154929575e-06, |
|
"logits/chosen": -1.8481504917144775, |
|
"logits/rejected": -1.8528194427490234, |
|
"logps/chosen": -35.794334411621094, |
|
"logps/rejected": -56.22735595703125, |
|
"loss": 0.5327, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": 1.078013300895691, |
|
"rewards/margins": 0.9431617856025696, |
|
"rewards/rejected": 0.1348516196012497, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_logits/chosen": -1.7962983846664429, |
|
"eval_logits/rejected": -1.8171442747116089, |
|
"eval_logps/chosen": -50.74287796020508, |
|
"eval_logps/rejected": -96.0649185180664, |
|
"eval_loss": 0.4424423575401306, |
|
"eval_rewards/accuracies": 0.4285714328289032, |
|
"eval_rewards/chosen": 1.337624192237854, |
|
"eval_rewards/margins": 1.4179356098175049, |
|
"eval_rewards/rejected": -0.08031141757965088, |
|
"eval_runtime": 8.5188, |
|
"eval_samples_per_second": 3.287, |
|
"eval_steps_per_second": 1.643, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"grad_norm": 5.3829216957092285, |
|
"learning_rate": 4.26056338028169e-06, |
|
"logits/chosen": -1.8153988122940063, |
|
"logits/rejected": -1.853118658065796, |
|
"logps/chosen": -44.251441955566406, |
|
"logps/rejected": -85.38258361816406, |
|
"loss": 0.4011, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 1.5001299381256104, |
|
"rewards/margins": 1.9703232049942017, |
|
"rewards/rejected": -0.4701933264732361, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"grad_norm": 0.6282079815864563, |
|
"learning_rate": 4.190140845070423e-06, |
|
"logits/chosen": -1.8527545928955078, |
|
"logits/rejected": -1.8757705688476562, |
|
"logps/chosen": -35.6223258972168, |
|
"logps/rejected": -80.99830627441406, |
|
"loss": 0.4423, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 1.484961748123169, |
|
"rewards/margins": 1.8964126110076904, |
|
"rewards/rejected": -0.41145071387290955, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.119718309859156e-06, |
|
"logits/chosen": -1.9129869937896729, |
|
"logits/rejected": -1.927869439125061, |
|
"logps/chosen": -24.400936126708984, |
|
"logps/rejected": -50.861839294433594, |
|
"loss": 0.5459, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": 0.8408910036087036, |
|
"rewards/margins": 1.1376488208770752, |
|
"rewards/rejected": -0.2967577874660492, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"eval_logits/chosen": -1.8025389909744263, |
|
"eval_logits/rejected": -1.824258804321289, |
|
"eval_logps/chosen": -49.72334671020508, |
|
"eval_logps/rejected": -96.41252899169922, |
|
"eval_loss": 0.4335039258003235, |
|
"eval_rewards/accuracies": 0.4285714328289032, |
|
"eval_rewards/chosen": 1.6434847116470337, |
|
"eval_rewards/margins": 1.828076720237732, |
|
"eval_rewards/rejected": -0.1845920830965042, |
|
"eval_runtime": 8.5149, |
|
"eval_samples_per_second": 3.288, |
|
"eval_steps_per_second": 1.644, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"grad_norm": 0.7459288239479065, |
|
"learning_rate": 4.0492957746478875e-06, |
|
"logits/chosen": -1.786231279373169, |
|
"logits/rejected": -1.8223682641983032, |
|
"logps/chosen": -40.68749237060547, |
|
"logps/rejected": -78.1619873046875, |
|
"loss": 0.4348, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 1.9040056467056274, |
|
"rewards/margins": 2.482433795928955, |
|
"rewards/rejected": -0.5784280896186829, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"grad_norm": 1.4669339656829834, |
|
"learning_rate": 3.97887323943662e-06, |
|
"logits/chosen": -1.8449195623397827, |
|
"logits/rejected": -1.871626615524292, |
|
"logps/chosen": -46.335182189941406, |
|
"logps/rejected": -86.10281372070312, |
|
"loss": 0.4232, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": 1.8989893198013306, |
|
"rewards/margins": 2.384274482727051, |
|
"rewards/rejected": -0.4852851927280426, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"grad_norm": 0.0301869735121727, |
|
"learning_rate": 3.908450704225352e-06, |
|
"logits/chosen": -1.8375946283340454, |
|
"logits/rejected": -1.8527040481567383, |
|
"logps/chosen": -39.01136016845703, |
|
"logps/rejected": -65.78593444824219, |
|
"loss": 0.4055, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": 1.9841420650482178, |
|
"rewards/margins": 2.3045599460601807, |
|
"rewards/rejected": -0.32041770219802856, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"eval_logits/chosen": -1.8074016571044922, |
|
"eval_logits/rejected": -1.830051064491272, |
|
"eval_logps/chosen": -48.993629455566406, |
|
"eval_logps/rejected": -96.92731475830078, |
|
"eval_loss": 0.4326101839542389, |
|
"eval_rewards/accuracies": 0.4285714328289032, |
|
"eval_rewards/chosen": 1.8623981475830078, |
|
"eval_rewards/margins": 2.201425075531006, |
|
"eval_rewards/rejected": -0.3390272557735443, |
|
"eval_runtime": 8.5144, |
|
"eval_samples_per_second": 3.289, |
|
"eval_steps_per_second": 1.644, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"grad_norm": 2.224493980407715, |
|
"learning_rate": 3.838028169014085e-06, |
|
"logits/chosen": -1.8459789752960205, |
|
"logits/rejected": -1.8675992488861084, |
|
"logps/chosen": -32.56737518310547, |
|
"logps/rejected": -61.4633674621582, |
|
"loss": 0.4576, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 1.5155870914459229, |
|
"rewards/margins": 1.9375633001327515, |
|
"rewards/rejected": -0.42197614908218384, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"grad_norm": 16.606130599975586, |
|
"learning_rate": 3.767605633802817e-06, |
|
"logits/chosen": -1.8678531646728516, |
|
"logits/rejected": -1.8987194299697876, |
|
"logps/chosen": -36.32281494140625, |
|
"logps/rejected": -82.02101135253906, |
|
"loss": 0.4697, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": 1.7492707967758179, |
|
"rewards/margins": 2.534116268157959, |
|
"rewards/rejected": -0.7848455309867859, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"grad_norm": 2.050732135772705, |
|
"learning_rate": 3.69718309859155e-06, |
|
"logits/chosen": -1.8438726663589478, |
|
"logits/rejected": -1.8704954385757446, |
|
"logps/chosen": -31.156131744384766, |
|
"logps/rejected": -71.87327575683594, |
|
"loss": 0.4694, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": 1.7663137912750244, |
|
"rewards/margins": 2.468672513961792, |
|
"rewards/rejected": -0.702358603477478, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"eval_logits/chosen": -1.8135889768600464, |
|
"eval_logits/rejected": -1.836788535118103, |
|
"eval_logps/chosen": -48.54452133178711, |
|
"eval_logps/rejected": -97.27555084228516, |
|
"eval_loss": 0.43106651306152344, |
|
"eval_rewards/accuracies": 0.4285714328289032, |
|
"eval_rewards/chosen": 1.99713134765625, |
|
"eval_rewards/margins": 2.4406321048736572, |
|
"eval_rewards/rejected": -0.4435007870197296, |
|
"eval_runtime": 8.5161, |
|
"eval_samples_per_second": 3.288, |
|
"eval_steps_per_second": 1.644, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"grad_norm": 1.3447190523147583, |
|
"learning_rate": 3.626760563380282e-06, |
|
"logits/chosen": -1.9027411937713623, |
|
"logits/rejected": -1.9270541667938232, |
|
"logps/chosen": -31.44101333618164, |
|
"logps/rejected": -62.2841911315918, |
|
"loss": 0.4686, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": 1.8702812194824219, |
|
"rewards/margins": 2.6481127738952637, |
|
"rewards/rejected": -0.7778315544128418, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"grad_norm": 0.8471798300743103, |
|
"learning_rate": 3.5563380281690144e-06, |
|
"logits/chosen": -1.7770761251449585, |
|
"logits/rejected": -1.8111178874969482, |
|
"logps/chosen": -48.4028434753418, |
|
"logps/rejected": -93.4535903930664, |
|
"loss": 0.3062, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 2.7809433937072754, |
|
"rewards/margins": 3.951282501220703, |
|
"rewards/rejected": -1.1703392267227173, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"grad_norm": 0.017692934721708298, |
|
"learning_rate": 3.4859154929577467e-06, |
|
"logits/chosen": -1.8823143243789673, |
|
"logits/rejected": -1.908074975013733, |
|
"logps/chosen": -24.35366439819336, |
|
"logps/rejected": -55.66550827026367, |
|
"loss": 0.5431, |
|
"rewards/accuracies": 0.2750000059604645, |
|
"rewards/chosen": 1.2430522441864014, |
|
"rewards/margins": 1.8208128213882446, |
|
"rewards/rejected": -0.577760636806488, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"eval_logits/chosen": -1.8164188861846924, |
|
"eval_logits/rejected": -1.840105652809143, |
|
"eval_logps/chosen": -48.241355895996094, |
|
"eval_logps/rejected": -97.6273422241211, |
|
"eval_loss": 0.4247165322303772, |
|
"eval_rewards/accuracies": 0.4285714328289032, |
|
"eval_rewards/chosen": 2.0880820751190186, |
|
"eval_rewards/margins": 2.6371231079101562, |
|
"eval_rewards/rejected": -0.5490409731864929, |
|
"eval_runtime": 8.5143, |
|
"eval_samples_per_second": 3.289, |
|
"eval_steps_per_second": 1.644, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"grad_norm": 0.06990335881710052, |
|
"learning_rate": 3.415492957746479e-06, |
|
"logits/chosen": -1.855215311050415, |
|
"logits/rejected": -1.8647253513336182, |
|
"logps/chosen": -41.163124084472656, |
|
"logps/rejected": -63.0202751159668, |
|
"loss": 0.3954, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 2.336212635040283, |
|
"rewards/margins": 2.88411283493042, |
|
"rewards/rejected": -0.5479003190994263, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"grad_norm": 0.5815927386283875, |
|
"learning_rate": 3.3450704225352113e-06, |
|
"logits/chosen": -1.9256658554077148, |
|
"logits/rejected": -1.930415153503418, |
|
"logps/chosen": -24.930946350097656, |
|
"logps/rejected": -40.15483474731445, |
|
"loss": 0.5089, |
|
"rewards/accuracies": 0.2750000059604645, |
|
"rewards/chosen": 1.2408175468444824, |
|
"rewards/margins": 1.7317492961883545, |
|
"rewards/rejected": -0.49093180894851685, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"grad_norm": 0.38188156485557556, |
|
"learning_rate": 3.274647887323944e-06, |
|
"logits/chosen": -1.8994340896606445, |
|
"logits/rejected": -1.9313939809799194, |
|
"logps/chosen": -27.106714248657227, |
|
"logps/rejected": -67.90898132324219, |
|
"loss": 0.4547, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": 1.7479994297027588, |
|
"rewards/margins": 2.5788474082946777, |
|
"rewards/rejected": -0.830848217010498, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"eval_logits/chosen": -1.8190696239471436, |
|
"eval_logits/rejected": -1.8433464765548706, |
|
"eval_logps/chosen": -47.96824264526367, |
|
"eval_logps/rejected": -98.09535217285156, |
|
"eval_loss": 0.42961153388023376, |
|
"eval_rewards/accuracies": 0.4285714328289032, |
|
"eval_rewards/chosen": 2.170013666152954, |
|
"eval_rewards/margins": 2.8594539165496826, |
|
"eval_rewards/rejected": -0.6894403696060181, |
|
"eval_runtime": 8.5148, |
|
"eval_samples_per_second": 3.288, |
|
"eval_steps_per_second": 1.644, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"grad_norm": 0.3809558153152466, |
|
"learning_rate": 3.2042253521126764e-06, |
|
"logits/chosen": -1.861537218093872, |
|
"logits/rejected": -1.880858063697815, |
|
"logps/chosen": -33.568603515625, |
|
"logps/rejected": -57.01904296875, |
|
"loss": 0.4508, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": 2.471442699432373, |
|
"rewards/margins": 3.4677319526672363, |
|
"rewards/rejected": -0.996289074420929, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"grad_norm": 0.03738081455230713, |
|
"learning_rate": 3.133802816901409e-06, |
|
"logits/chosen": -1.8528019189834595, |
|
"logits/rejected": -1.8946154117584229, |
|
"logps/chosen": -35.256317138671875, |
|
"logps/rejected": -99.2815933227539, |
|
"loss": 0.442, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 1.5687124729156494, |
|
"rewards/margins": 3.0280191898345947, |
|
"rewards/rejected": -1.4593069553375244, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"grad_norm": 0.007441267836838961, |
|
"learning_rate": 3.063380281690141e-06, |
|
"logits/chosen": -1.7671687602996826, |
|
"logits/rejected": -1.7900078296661377, |
|
"logps/chosen": -58.0703239440918, |
|
"logps/rejected": -107.0584487915039, |
|
"loss": 0.3606, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 3.007930278778076, |
|
"rewards/margins": 4.793817043304443, |
|
"rewards/rejected": -1.7858864068984985, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"eval_logits/chosen": -1.8213108777999878, |
|
"eval_logits/rejected": -1.8460028171539307, |
|
"eval_logps/chosen": -47.7896842956543, |
|
"eval_logps/rejected": -98.4369125366211, |
|
"eval_loss": 0.4289851188659668, |
|
"eval_rewards/accuracies": 0.4285714328289032, |
|
"eval_rewards/chosen": 2.223581314086914, |
|
"eval_rewards/margins": 3.015495777130127, |
|
"eval_rewards/rejected": -0.7919142842292786, |
|
"eval_runtime": 8.5154, |
|
"eval_samples_per_second": 3.288, |
|
"eval_steps_per_second": 1.644, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"grad_norm": 0.037768810987472534, |
|
"learning_rate": 2.9929577464788733e-06, |
|
"logits/chosen": -1.8851121664047241, |
|
"logits/rejected": -1.9122593402862549, |
|
"logps/chosen": -21.110910415649414, |
|
"logps/rejected": -48.944602966308594, |
|
"loss": 0.4731, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": 1.653512954711914, |
|
"rewards/margins": 2.38820219039917, |
|
"rewards/rejected": -0.7346888780593872, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.922535211267606e-06, |
|
"logits/chosen": -1.8363854885101318, |
|
"logits/rejected": -1.869361162185669, |
|
"logps/chosen": -36.65054702758789, |
|
"logps/rejected": -84.7163314819336, |
|
"loss": 0.3992, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": 2.3141231536865234, |
|
"rewards/margins": 3.9065093994140625, |
|
"rewards/rejected": -1.5923866033554077, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"grad_norm": 0.08495225757360458, |
|
"learning_rate": 2.8521126760563383e-06, |
|
"logits/chosen": -1.854119062423706, |
|
"logits/rejected": -1.8729274272918701, |
|
"logps/chosen": -40.588775634765625, |
|
"logps/rejected": -68.68611145019531, |
|
"loss": 0.4021, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": 2.146328926086426, |
|
"rewards/margins": 3.4630661010742188, |
|
"rewards/rejected": -1.3167372941970825, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"eval_logits/chosen": -1.8219343423843384, |
|
"eval_logits/rejected": -1.8470803499221802, |
|
"eval_logps/chosen": -47.683902740478516, |
|
"eval_logps/rejected": -98.87833404541016, |
|
"eval_loss": 0.4302048683166504, |
|
"eval_rewards/accuracies": 0.4285714328289032, |
|
"eval_rewards/chosen": 2.255316734313965, |
|
"eval_rewards/margins": 3.179651975631714, |
|
"eval_rewards/rejected": -0.9243355989456177, |
|
"eval_runtime": 8.5149, |
|
"eval_samples_per_second": 3.288, |
|
"eval_steps_per_second": 1.644, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"grad_norm": 0.00047089156578294933, |
|
"learning_rate": 2.781690140845071e-06, |
|
"logits/chosen": -1.9298601150512695, |
|
"logits/rejected": -1.9418340921401978, |
|
"logps/chosen": -28.818872451782227, |
|
"logps/rejected": -59.13349151611328, |
|
"loss": 0.5413, |
|
"rewards/accuracies": 0.2750000059604645, |
|
"rewards/chosen": 1.8383833169937134, |
|
"rewards/margins": 2.883132219314575, |
|
"rewards/rejected": -1.0447486639022827, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"grad_norm": 2.729531764984131, |
|
"learning_rate": 2.7112676056338033e-06, |
|
"logits/chosen": -1.8659921884536743, |
|
"logits/rejected": -1.905010461807251, |
|
"logps/chosen": -27.63974380493164, |
|
"logps/rejected": -73.26609802246094, |
|
"loss": 0.4368, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 2.2738237380981445, |
|
"rewards/margins": 3.4326729774475098, |
|
"rewards/rejected": -1.1588493585586548, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"grad_norm": 0.01013993564993143, |
|
"learning_rate": 2.640845070422535e-06, |
|
"logits/chosen": -1.8495924472808838, |
|
"logits/rejected": -1.8761341571807861, |
|
"logps/chosen": -33.9546012878418, |
|
"logps/rejected": -70.13398742675781, |
|
"loss": 0.419, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": 2.0099234580993652, |
|
"rewards/margins": 3.498170852661133, |
|
"rewards/rejected": -1.488247275352478, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"eval_logits/chosen": -1.8214120864868164, |
|
"eval_logits/rejected": -1.8469568490982056, |
|
"eval_logps/chosen": -47.67514419555664, |
|
"eval_logps/rejected": -99.1513900756836, |
|
"eval_loss": 0.43358945846557617, |
|
"eval_rewards/accuracies": 0.4285714328289032, |
|
"eval_rewards/chosen": 2.257944107055664, |
|
"eval_rewards/margins": 3.264200210571289, |
|
"eval_rewards/rejected": -1.0062559843063354, |
|
"eval_runtime": 8.5148, |
|
"eval_samples_per_second": 3.288, |
|
"eval_steps_per_second": 1.644, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"grad_norm": 0.10058386623859406, |
|
"learning_rate": 2.570422535211268e-06, |
|
"logits/chosen": -1.8099803924560547, |
|
"logits/rejected": -1.8450464010238647, |
|
"logps/chosen": -40.64691925048828, |
|
"logps/rejected": -88.50006103515625, |
|
"loss": 0.4072, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": 2.367983341217041, |
|
"rewards/margins": 3.652095079421997, |
|
"rewards/rejected": -1.2841118574142456, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.5e-06, |
|
"logits/chosen": -1.8781477212905884, |
|
"logits/rejected": -1.9100782871246338, |
|
"logps/chosen": -33.296836853027344, |
|
"logps/rejected": -83.65415954589844, |
|
"loss": 0.4006, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": 2.6570353507995605, |
|
"rewards/margins": 4.220401763916016, |
|
"rewards/rejected": -1.5633666515350342, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 6.34, |
|
"grad_norm": 0.6534411907196045, |
|
"learning_rate": 2.4295774647887325e-06, |
|
"logits/chosen": -1.8289324045181274, |
|
"logits/rejected": -1.8389921188354492, |
|
"logps/chosen": -46.14112091064453, |
|
"logps/rejected": -61.8495979309082, |
|
"loss": 0.3984, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 2.3760623931884766, |
|
"rewards/margins": 3.9815852642059326, |
|
"rewards/rejected": -1.605522871017456, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 6.34, |
|
"eval_logits/chosen": -1.824257493019104, |
|
"eval_logits/rejected": -1.8499394655227661, |
|
"eval_logps/chosen": -47.629581451416016, |
|
"eval_logps/rejected": -99.36784362792969, |
|
"eval_loss": 0.42913129925727844, |
|
"eval_rewards/accuracies": 0.4285714328289032, |
|
"eval_rewards/chosen": 2.2716121673583984, |
|
"eval_rewards/margins": 3.3427982330322266, |
|
"eval_rewards/rejected": -1.0711863040924072, |
|
"eval_runtime": 8.5204, |
|
"eval_samples_per_second": 3.286, |
|
"eval_steps_per_second": 1.643, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"grad_norm": 0.10979699343442917, |
|
"learning_rate": 2.359154929577465e-06, |
|
"logits/chosen": -1.8595476150512695, |
|
"logits/rejected": -1.8786380290985107, |
|
"logps/chosen": -36.14017105102539, |
|
"logps/rejected": -69.46199798583984, |
|
"loss": 0.4672, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 2.0604913234710693, |
|
"rewards/margins": 3.718773365020752, |
|
"rewards/rejected": -1.658281683921814, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"grad_norm": 0.00019151422020513564, |
|
"learning_rate": 2.2887323943661975e-06, |
|
"logits/chosen": -1.8760582208633423, |
|
"logits/rejected": -1.903466820716858, |
|
"logps/chosen": -28.268178939819336, |
|
"logps/rejected": -71.25038146972656, |
|
"loss": 0.468, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": 2.309755802154541, |
|
"rewards/margins": 3.500605344772339, |
|
"rewards/rejected": -1.190850019454956, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"grad_norm": 0.015070440247654915, |
|
"learning_rate": 2.21830985915493e-06, |
|
"logits/chosen": -1.877672553062439, |
|
"logits/rejected": -1.8992106914520264, |
|
"logps/chosen": -34.51100158691406, |
|
"logps/rejected": -69.979248046875, |
|
"loss": 0.435, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 2.112297534942627, |
|
"rewards/margins": 3.443708896636963, |
|
"rewards/rejected": -1.3314111232757568, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"eval_logits/chosen": -1.8236370086669922, |
|
"eval_logits/rejected": -1.8495099544525146, |
|
"eval_logps/chosen": -47.62683868408203, |
|
"eval_logps/rejected": -99.54405212402344, |
|
"eval_loss": 0.4284569323062897, |
|
"eval_rewards/accuracies": 0.4285714328289032, |
|
"eval_rewards/chosen": 2.272435426712036, |
|
"eval_rewards/margins": 3.396484613418579, |
|
"eval_rewards/rejected": -1.1240490674972534, |
|
"eval_runtime": 8.5146, |
|
"eval_samples_per_second": 3.288, |
|
"eval_steps_per_second": 1.644, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"grad_norm": 0.027868641540408134, |
|
"learning_rate": 2.147887323943662e-06, |
|
"logits/chosen": -1.8732540607452393, |
|
"logits/rejected": -1.9188286066055298, |
|
"logps/chosen": -28.66534996032715, |
|
"logps/rejected": -87.85456085205078, |
|
"loss": 0.4359, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 1.747658133506775, |
|
"rewards/margins": 3.2419276237487793, |
|
"rewards/rejected": -1.494269847869873, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"grad_norm": 1.6011111736297607, |
|
"learning_rate": 2.0774647887323944e-06, |
|
"logits/chosen": -1.8680269718170166, |
|
"logits/rejected": -1.883652925491333, |
|
"logps/chosen": -22.022457122802734, |
|
"logps/rejected": -51.710731506347656, |
|
"loss": 0.4878, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": 1.6784204244613647, |
|
"rewards/margins": 2.697260856628418, |
|
"rewards/rejected": -1.0188405513763428, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 7.32, |
|
"grad_norm": 0.005799587815999985, |
|
"learning_rate": 2.007042253521127e-06, |
|
"logits/chosen": -1.9159187078475952, |
|
"logits/rejected": -1.942246675491333, |
|
"logps/chosen": -26.426944732666016, |
|
"logps/rejected": -63.260643005371094, |
|
"loss": 0.5148, |
|
"rewards/accuracies": 0.2750000059604645, |
|
"rewards/chosen": 1.5094796419143677, |
|
"rewards/margins": 2.8935487270355225, |
|
"rewards/rejected": -1.3840690851211548, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 7.32, |
|
"eval_logits/chosen": -1.8220092058181763, |
|
"eval_logits/rejected": -1.8481791019439697, |
|
"eval_logps/chosen": -47.637298583984375, |
|
"eval_logps/rejected": -99.83494567871094, |
|
"eval_loss": 0.43089213967323303, |
|
"eval_rewards/accuracies": 0.4285714328289032, |
|
"eval_rewards/chosen": 2.2692975997924805, |
|
"eval_rewards/margins": 3.480616331100464, |
|
"eval_rewards/rejected": -1.211318850517273, |
|
"eval_runtime": 8.5151, |
|
"eval_samples_per_second": 3.288, |
|
"eval_steps_per_second": 1.644, |
|
"step": 450 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 732, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 12, |
|
"save_steps": 90, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|