|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 42, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"debug/policy_chosen_logits": -1.7926865816116333, |
|
"debug/policy_chosen_logps": -217.38279724121094, |
|
"debug/policy_rejected_logits": -1.8328309059143066, |
|
"debug/policy_rejected_logps": -225.83067321777344, |
|
"debug/reference_chosen_logps": -217.38279724121094, |
|
"debug/reference_rejected_logps": -225.83067321777344, |
|
"epoch": 0.023809523809523808, |
|
"grad_norm": 4.886963528314891, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.7926865816116333, |
|
"logits/rejected": -1.8328309059143066, |
|
"logps/chosen": -217.38279724121094, |
|
"logps/rejected": -225.83067321777344, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.8736802339553833, |
|
"debug/policy_chosen_logps": -237.7906036376953, |
|
"debug/policy_rejected_logits": -1.6878823041915894, |
|
"debug/policy_rejected_logps": -252.66542053222656, |
|
"debug/reference_chosen_logps": -237.90771484375, |
|
"debug/reference_rejected_logps": -252.53955078125, |
|
"epoch": 0.047619047619047616, |
|
"grad_norm": 4.425053566972872, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.8736802339553833, |
|
"logits/rejected": -1.6878823041915894, |
|
"logps/chosen": -237.7906036376953, |
|
"logps/rejected": -252.66542053222656, |
|
"loss": 0.4983, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.001171283656731248, |
|
"rewards/margins": 0.0024298285134136677, |
|
"rewards/rejected": -0.001258544740267098, |
|
"step": 2 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.6082555055618286, |
|
"debug/policy_chosen_logps": -239.86978149414062, |
|
"debug/policy_rejected_logits": -1.6321099996566772, |
|
"debug/policy_rejected_logps": -217.70059204101562, |
|
"debug/reference_chosen_logps": -240.5919189453125, |
|
"debug/reference_rejected_logps": -218.04476928710938, |
|
"epoch": 0.07142857142857142, |
|
"grad_norm": 4.882938644343874, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.6082555055618286, |
|
"logits/rejected": -1.6321099996566772, |
|
"logps/chosen": -239.86978149414062, |
|
"logps/rejected": -217.70059204101562, |
|
"loss": 0.4997, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.007221374660730362, |
|
"rewards/margins": 0.0037795070093125105, |
|
"rewards/rejected": 0.0034418676514178514, |
|
"step": 3 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.8915444612503052, |
|
"debug/policy_chosen_logps": -215.7807159423828, |
|
"debug/policy_rejected_logits": -1.7285552024841309, |
|
"debug/policy_rejected_logps": -219.4071044921875, |
|
"debug/reference_chosen_logps": -215.05958557128906, |
|
"debug/reference_rejected_logps": -219.14736938476562, |
|
"epoch": 0.09523809523809523, |
|
"grad_norm": 4.417539068018447, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.8915444612503052, |
|
"logits/rejected": -1.7285552024841309, |
|
"logps/chosen": -215.7807159423828, |
|
"logps/rejected": -219.4071044921875, |
|
"loss": 0.5018, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.007211265154182911, |
|
"rewards/margins": -0.004613952711224556, |
|
"rewards/rejected": -0.0025973126757889986, |
|
"step": 4 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.8312164545059204, |
|
"debug/policy_chosen_logps": -228.4812469482422, |
|
"debug/policy_rejected_logits": -1.8293319940567017, |
|
"debug/policy_rejected_logps": -219.43865966796875, |
|
"debug/reference_chosen_logps": -228.2222137451172, |
|
"debug/reference_rejected_logps": -218.95147705078125, |
|
"epoch": 0.11904761904761904, |
|
"grad_norm": 4.499121781225099, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.8312164545059204, |
|
"logits/rejected": -1.8293319940567017, |
|
"logps/chosen": -228.4812469482422, |
|
"logps/rejected": -219.43865966796875, |
|
"loss": 0.4982, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0025903128553181887, |
|
"rewards/margins": 0.002281456021592021, |
|
"rewards/rejected": -0.00487176887691021, |
|
"step": 5 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.7772536277770996, |
|
"debug/policy_chosen_logps": -251.26417541503906, |
|
"debug/policy_rejected_logits": -1.7712210416793823, |
|
"debug/policy_rejected_logps": -224.58258056640625, |
|
"debug/reference_chosen_logps": -251.5955047607422, |
|
"debug/reference_rejected_logps": -224.013427734375, |
|
"epoch": 0.14285714285714285, |
|
"grad_norm": 4.746380335819719, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.7772536277770996, |
|
"logits/rejected": -1.7712210416793823, |
|
"logps/chosen": -251.26417541503906, |
|
"logps/rejected": -224.58258056640625, |
|
"loss": 0.5024, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.0033132743556052446, |
|
"rewards/margins": 0.009004650637507439, |
|
"rewards/rejected": -0.005691375583410263, |
|
"step": 6 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.8587639331817627, |
|
"debug/policy_chosen_logps": -215.83526611328125, |
|
"debug/policy_rejected_logits": -1.830519437789917, |
|
"debug/policy_rejected_logps": -228.4679412841797, |
|
"debug/reference_chosen_logps": -215.88973999023438, |
|
"debug/reference_rejected_logps": -228.85028076171875, |
|
"epoch": 0.16666666666666666, |
|
"grad_norm": 4.703836780587687, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.8587639331817627, |
|
"logits/rejected": -1.830519437789917, |
|
"logps/chosen": -215.83526611328125, |
|
"logps/rejected": -228.4679412841797, |
|
"loss": 0.4979, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0005447575822472572, |
|
"rewards/margins": -0.0032785222865641117, |
|
"rewards/rejected": 0.0038232803344726562, |
|
"step": 7 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.713200569152832, |
|
"debug/policy_chosen_logps": -214.9654083251953, |
|
"debug/policy_rejected_logits": -1.6877814531326294, |
|
"debug/policy_rejected_logps": -237.10147094726562, |
|
"debug/reference_chosen_logps": -215.1925811767578, |
|
"debug/reference_rejected_logps": -237.26712036132812, |
|
"epoch": 0.19047619047619047, |
|
"grad_norm": 4.614150814199241, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.713200569152832, |
|
"logits/rejected": -1.6877814531326294, |
|
"logps/chosen": -214.9654083251953, |
|
"logps/rejected": -237.10147094726562, |
|
"loss": 0.4993, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0022717281244695187, |
|
"rewards/margins": 0.0006152723217383027, |
|
"rewards/rejected": 0.0016564556863158941, |
|
"step": 8 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.830588459968567, |
|
"debug/policy_chosen_logps": -264.9453430175781, |
|
"debug/policy_rejected_logits": -1.7403862476348877, |
|
"debug/policy_rejected_logps": -236.23504638671875, |
|
"debug/reference_chosen_logps": -265.33489990234375, |
|
"debug/reference_rejected_logps": -236.4605712890625, |
|
"epoch": 0.21428571428571427, |
|
"grad_norm": 4.507439835242966, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.830588459968567, |
|
"logits/rejected": -1.7403862476348877, |
|
"logps/chosen": -264.9453430175781, |
|
"logps/rejected": -236.23504638671875, |
|
"loss": 0.498, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.003895435482263565, |
|
"rewards/margins": 0.0016400336753576994, |
|
"rewards/rejected": 0.002255401574075222, |
|
"step": 9 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.7635490894317627, |
|
"debug/policy_chosen_logps": -212.54888916015625, |
|
"debug/policy_rejected_logits": -1.7577309608459473, |
|
"debug/policy_rejected_logps": -231.78260803222656, |
|
"debug/reference_chosen_logps": -212.55520629882812, |
|
"debug/reference_rejected_logps": -231.5526885986328, |
|
"epoch": 0.23809523809523808, |
|
"grad_norm": 4.646994116106454, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.7635490894317627, |
|
"logits/rejected": -1.7577309608459473, |
|
"logps/chosen": -212.54888916015625, |
|
"logps/rejected": -231.78260803222656, |
|
"loss": 0.4997, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 6.330502219498158e-05, |
|
"rewards/margins": 0.0023624992463737726, |
|
"rewards/rejected": -0.002299194224178791, |
|
"step": 10 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.8275775909423828, |
|
"debug/policy_chosen_logps": -223.30955505371094, |
|
"debug/policy_rejected_logits": -1.7878596782684326, |
|
"debug/policy_rejected_logps": -228.84654235839844, |
|
"debug/reference_chosen_logps": -223.7688751220703, |
|
"debug/reference_rejected_logps": -228.8212890625, |
|
"epoch": 0.2619047619047619, |
|
"grad_norm": 4.501453979601088, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.8275775909423828, |
|
"logits/rejected": -1.7878596782684326, |
|
"logps/chosen": -223.30955505371094, |
|
"logps/rejected": -228.84654235839844, |
|
"loss": 0.4975, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.004593219608068466, |
|
"rewards/margins": 0.004845847375690937, |
|
"rewards/rejected": -0.0002526284661144018, |
|
"step": 11 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.8328152894973755, |
|
"debug/policy_chosen_logps": -229.270751953125, |
|
"debug/policy_rejected_logits": -1.7323297262191772, |
|
"debug/policy_rejected_logps": -245.7848358154297, |
|
"debug/reference_chosen_logps": -231.10006713867188, |
|
"debug/reference_rejected_logps": -244.46746826171875, |
|
"epoch": 0.2857142857142857, |
|
"grad_norm": 4.605283271613626, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.8328152894973755, |
|
"logits/rejected": -1.7323297262191772, |
|
"logps/chosen": -229.270751953125, |
|
"logps/rejected": -245.7848358154297, |
|
"loss": 0.4964, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.018293190747499466, |
|
"rewards/margins": 0.03146692365407944, |
|
"rewards/rejected": -0.013173731975257397, |
|
"step": 12 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.7267065048217773, |
|
"debug/policy_chosen_logps": -218.44247436523438, |
|
"debug/policy_rejected_logits": -1.6951403617858887, |
|
"debug/policy_rejected_logps": -240.42892456054688, |
|
"debug/reference_chosen_logps": -217.90109252929688, |
|
"debug/reference_rejected_logps": -239.02613830566406, |
|
"epoch": 0.30952380952380953, |
|
"grad_norm": 4.941489045777509, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.7267065048217773, |
|
"logits/rejected": -1.6951403617858887, |
|
"logps/chosen": -218.44247436523438, |
|
"logps/rejected": -240.42892456054688, |
|
"loss": 0.4982, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.005413799546658993, |
|
"rewards/margins": 0.008614081889390945, |
|
"rewards/rejected": -0.014027881436049938, |
|
"step": 13 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.795722484588623, |
|
"debug/policy_chosen_logps": -235.3023681640625, |
|
"debug/policy_rejected_logits": -1.7265783548355103, |
|
"debug/policy_rejected_logps": -227.51165771484375, |
|
"debug/reference_chosen_logps": -233.77352905273438, |
|
"debug/reference_rejected_logps": -225.99020385742188, |
|
"epoch": 0.3333333333333333, |
|
"grad_norm": 5.251200069837007, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.795722484588623, |
|
"logits/rejected": -1.7265783548355103, |
|
"logps/chosen": -235.3023681640625, |
|
"logps/rejected": -227.51165771484375, |
|
"loss": 0.4978, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.015288218855857849, |
|
"rewards/margins": -7.373839616775513e-05, |
|
"rewards/rejected": -0.015214480459690094, |
|
"step": 14 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.7187248468399048, |
|
"debug/policy_chosen_logps": -219.02468872070312, |
|
"debug/policy_rejected_logits": -1.710004210472107, |
|
"debug/policy_rejected_logps": -239.28665161132812, |
|
"debug/reference_chosen_logps": -218.7939453125, |
|
"debug/reference_rejected_logps": -236.74258422851562, |
|
"epoch": 0.35714285714285715, |
|
"grad_norm": 5.232970784816434, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.7187248468399048, |
|
"logits/rejected": -1.710004210472107, |
|
"logps/chosen": -219.02468872070312, |
|
"logps/rejected": -239.28665161132812, |
|
"loss": 0.4967, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.002307548187673092, |
|
"rewards/margins": 0.023132991045713425, |
|
"rewards/rejected": -0.025440538302063942, |
|
"step": 15 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.7510279417037964, |
|
"debug/policy_chosen_logps": -213.79452514648438, |
|
"debug/policy_rejected_logits": -1.6364609003067017, |
|
"debug/policy_rejected_logps": -230.83267211914062, |
|
"debug/reference_chosen_logps": -216.0415802001953, |
|
"debug/reference_rejected_logps": -228.79180908203125, |
|
"epoch": 0.38095238095238093, |
|
"grad_norm": 4.507041423446427, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.7510279417037964, |
|
"logits/rejected": -1.6364609003067017, |
|
"logps/chosen": -213.79452514648438, |
|
"logps/rejected": -230.83267211914062, |
|
"loss": 0.4978, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.022470567375421524, |
|
"rewards/margins": 0.04287933558225632, |
|
"rewards/rejected": -0.020408764481544495, |
|
"step": 16 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.7066298723220825, |
|
"debug/policy_chosen_logps": -234.27340698242188, |
|
"debug/policy_rejected_logits": -1.7131836414337158, |
|
"debug/policy_rejected_logps": -234.25454711914062, |
|
"debug/reference_chosen_logps": -235.27389526367188, |
|
"debug/reference_rejected_logps": -234.8538818359375, |
|
"epoch": 0.40476190476190477, |
|
"grad_norm": 4.798628310057689, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.7066298723220825, |
|
"logits/rejected": -1.7131836414337158, |
|
"logps/chosen": -234.27340698242188, |
|
"logps/rejected": -234.25454711914062, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.010004977695643902, |
|
"rewards/margins": 0.004011764191091061, |
|
"rewards/rejected": 0.005993213504552841, |
|
"step": 17 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.7528653144836426, |
|
"debug/policy_chosen_logps": -251.7496337890625, |
|
"debug/policy_rejected_logits": -1.561893343925476, |
|
"debug/policy_rejected_logps": -227.67825317382812, |
|
"debug/reference_chosen_logps": -250.92552185058594, |
|
"debug/reference_rejected_logps": -227.5825958251953, |
|
"epoch": 0.42857142857142855, |
|
"grad_norm": 4.585902807542465, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.7528653144836426, |
|
"logits/rejected": -1.561893343925476, |
|
"logps/chosen": -251.7496337890625, |
|
"logps/rejected": -227.67825317382812, |
|
"loss": 0.4927, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.008241119794547558, |
|
"rewards/margins": -0.007284717168658972, |
|
"rewards/rejected": -0.0009564019273966551, |
|
"step": 18 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.7094969749450684, |
|
"debug/policy_chosen_logps": -218.6276397705078, |
|
"debug/policy_rejected_logits": -1.6478347778320312, |
|
"debug/policy_rejected_logps": -222.29495239257812, |
|
"debug/reference_chosen_logps": -218.80328369140625, |
|
"debug/reference_rejected_logps": -223.06387329101562, |
|
"epoch": 0.4523809523809524, |
|
"grad_norm": 4.499072587904488, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.7094969749450684, |
|
"logits/rejected": -1.6478347778320312, |
|
"logps/chosen": -218.6276397705078, |
|
"logps/rejected": -222.29495239257812, |
|
"loss": 0.4999, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.001756363082677126, |
|
"rewards/margins": -0.0059328461065888405, |
|
"rewards/rejected": 0.007689208723604679, |
|
"step": 19 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.7157065868377686, |
|
"debug/policy_chosen_logps": -233.549560546875, |
|
"debug/policy_rejected_logits": -1.6739444732666016, |
|
"debug/policy_rejected_logps": -228.99595642089844, |
|
"debug/reference_chosen_logps": -234.72760009765625, |
|
"debug/reference_rejected_logps": -227.83563232421875, |
|
"epoch": 0.47619047619047616, |
|
"grad_norm": 4.575115483844653, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.7157065868377686, |
|
"logits/rejected": -1.6739444732666016, |
|
"logps/chosen": -233.549560546875, |
|
"logps/rejected": -228.99595642089844, |
|
"loss": 0.4987, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.011780375614762306, |
|
"rewards/margins": 0.023383673280477524, |
|
"rewards/rejected": -0.011603298597037792, |
|
"step": 20 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.6785526275634766, |
|
"debug/policy_chosen_logps": -229.71510314941406, |
|
"debug/policy_rejected_logits": -1.5240322351455688, |
|
"debug/policy_rejected_logps": -219.5631103515625, |
|
"debug/reference_chosen_logps": -229.66180419921875, |
|
"debug/reference_rejected_logps": -219.04263305664062, |
|
"epoch": 0.5, |
|
"grad_norm": 4.692408631629601, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.6785526275634766, |
|
"logits/rejected": -1.5240322351455688, |
|
"logps/chosen": -229.71510314941406, |
|
"logps/rejected": -219.5631103515625, |
|
"loss": 0.504, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0005329509731382132, |
|
"rewards/margins": 0.004671857692301273, |
|
"rewards/rejected": -0.00520480889827013, |
|
"step": 21 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.7138959169387817, |
|
"debug/policy_chosen_logps": -258.2476806640625, |
|
"debug/policy_rejected_logits": -1.6827083826065063, |
|
"debug/policy_rejected_logps": -217.47686767578125, |
|
"debug/reference_chosen_logps": -258.50408935546875, |
|
"debug/reference_rejected_logps": -218.35800170898438, |
|
"epoch": 0.5238095238095238, |
|
"grad_norm": 5.349249118587083, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.7138959169387817, |
|
"logits/rejected": -1.6827083826065063, |
|
"logps/chosen": -258.2476806640625, |
|
"logps/rejected": -217.47686767578125, |
|
"loss": 0.5027, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.0025640674866735935, |
|
"rewards/margins": -0.00624719588086009, |
|
"rewards/rejected": 0.008811263367533684, |
|
"step": 22 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.7912077903747559, |
|
"debug/policy_chosen_logps": -216.38299560546875, |
|
"debug/policy_rejected_logits": -1.8135955333709717, |
|
"debug/policy_rejected_logps": -212.30813598632812, |
|
"debug/reference_chosen_logps": -217.41909790039062, |
|
"debug/reference_rejected_logps": -213.4788055419922, |
|
"epoch": 0.5476190476190477, |
|
"grad_norm": 5.700609487442719, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.7912077903747559, |
|
"logits/rejected": -1.8135955333709717, |
|
"logps/chosen": -216.38299560546875, |
|
"logps/rejected": -212.30813598632812, |
|
"loss": 0.5012, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.010360946878790855, |
|
"rewards/margins": -0.0013457299210131168, |
|
"rewards/rejected": 0.011706676334142685, |
|
"step": 23 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.7215830087661743, |
|
"debug/policy_chosen_logps": -224.13088989257812, |
|
"debug/policy_rejected_logits": -1.7539652585983276, |
|
"debug/policy_rejected_logps": -252.66017150878906, |
|
"debug/reference_chosen_logps": -223.94528198242188, |
|
"debug/reference_rejected_logps": -251.1186065673828, |
|
"epoch": 0.5714285714285714, |
|
"grad_norm": 4.945978414250695, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.7215830087661743, |
|
"logits/rejected": -1.7539652585983276, |
|
"logps/chosen": -224.13088989257812, |
|
"logps/rejected": -252.66017150878906, |
|
"loss": 0.4913, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0018562317127361894, |
|
"rewards/margins": 0.013559339568018913, |
|
"rewards/rejected": -0.015415573492646217, |
|
"step": 24 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.7326081991195679, |
|
"debug/policy_chosen_logps": -226.6582794189453, |
|
"debug/policy_rejected_logits": -1.701865553855896, |
|
"debug/policy_rejected_logps": -225.5404052734375, |
|
"debug/reference_chosen_logps": -227.42691040039062, |
|
"debug/reference_rejected_logps": -224.72911071777344, |
|
"epoch": 0.5952380952380952, |
|
"grad_norm": 5.05266825293041, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.7326081991195679, |
|
"logits/rejected": -1.701865553855896, |
|
"logps/chosen": -226.6582794189453, |
|
"logps/rejected": -225.5404052734375, |
|
"loss": 0.4988, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.007686404511332512, |
|
"rewards/margins": 0.01579955965280533, |
|
"rewards/rejected": -0.008113155141472816, |
|
"step": 25 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.797877550125122, |
|
"debug/policy_chosen_logps": -232.49713134765625, |
|
"debug/policy_rejected_logits": -1.7819045782089233, |
|
"debug/policy_rejected_logps": -239.3779296875, |
|
"debug/reference_chosen_logps": -231.29766845703125, |
|
"debug/reference_rejected_logps": -239.3263702392578, |
|
"epoch": 0.6190476190476191, |
|
"grad_norm": 5.6239784950743745, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.797877550125122, |
|
"logits/rejected": -1.7819045782089233, |
|
"logps/chosen": -232.49713134765625, |
|
"logps/rejected": -239.3779296875, |
|
"loss": 0.4983, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.011994647793471813, |
|
"rewards/margins": -0.011478938162326813, |
|
"rewards/rejected": -0.0005157091654837132, |
|
"step": 26 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.6310824155807495, |
|
"debug/policy_chosen_logps": -223.48495483398438, |
|
"debug/policy_rejected_logits": -1.5401698350906372, |
|
"debug/policy_rejected_logps": -237.32968139648438, |
|
"debug/reference_chosen_logps": -225.53250122070312, |
|
"debug/reference_rejected_logps": -237.51010131835938, |
|
"epoch": 0.6428571428571429, |
|
"grad_norm": 5.775503509441982, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.6310824155807495, |
|
"logits/rejected": -1.5401698350906372, |
|
"logps/chosen": -223.48495483398438, |
|
"logps/rejected": -237.32968139648438, |
|
"loss": 0.4974, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.020475387573242188, |
|
"rewards/margins": 0.018671227619051933, |
|
"rewards/rejected": 0.0018041613511741161, |
|
"step": 27 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.8868080377578735, |
|
"debug/policy_chosen_logps": -228.5826416015625, |
|
"debug/policy_rejected_logits": -1.640351414680481, |
|
"debug/policy_rejected_logps": -232.02215576171875, |
|
"debug/reference_chosen_logps": -227.31100463867188, |
|
"debug/reference_rejected_logps": -231.01402282714844, |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 4.615984568922464, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.8868080377578735, |
|
"logits/rejected": -1.640351414680481, |
|
"logps/chosen": -228.5826416015625, |
|
"logps/rejected": -232.02215576171875, |
|
"loss": 0.5044, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.012716369703412056, |
|
"rewards/margins": -0.0026350021362304688, |
|
"rewards/rejected": -0.010081367567181587, |
|
"step": 28 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.6878303289413452, |
|
"debug/policy_chosen_logps": -207.95928955078125, |
|
"debug/policy_rejected_logits": -1.6597003936767578, |
|
"debug/policy_rejected_logps": -203.81204223632812, |
|
"debug/reference_chosen_logps": -209.3488006591797, |
|
"debug/reference_rejected_logps": -204.5792694091797, |
|
"epoch": 0.6904761904761905, |
|
"grad_norm": 5.141749172246753, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.6878303289413452, |
|
"logits/rejected": -1.6597003936767578, |
|
"logps/chosen": -207.95928955078125, |
|
"logps/rejected": -203.81204223632812, |
|
"loss": 0.5038, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.013895034790039062, |
|
"rewards/margins": 0.006222839932888746, |
|
"rewards/rejected": 0.0076721953228116035, |
|
"step": 29 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.654941201210022, |
|
"debug/policy_chosen_logps": -214.6881866455078, |
|
"debug/policy_rejected_logits": -1.7197308540344238, |
|
"debug/policy_rejected_logps": -230.4603271484375, |
|
"debug/reference_chosen_logps": -213.96749877929688, |
|
"debug/reference_rejected_logps": -229.0462188720703, |
|
"epoch": 0.7142857142857143, |
|
"grad_norm": 4.802743085723465, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.654941201210022, |
|
"logits/rejected": -1.7197308540344238, |
|
"logps/chosen": -214.6881866455078, |
|
"logps/rejected": -230.4603271484375, |
|
"loss": 0.4929, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0072069549933075905, |
|
"rewards/margins": 0.006934070959687233, |
|
"rewards/rejected": -0.014141025952994823, |
|
"step": 30 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.5828349590301514, |
|
"debug/policy_chosen_logps": -237.3705596923828, |
|
"debug/policy_rejected_logits": -1.5794568061828613, |
|
"debug/policy_rejected_logps": -254.6802978515625, |
|
"debug/reference_chosen_logps": -235.51742553710938, |
|
"debug/reference_rejected_logps": -251.95379638671875, |
|
"epoch": 0.7380952380952381, |
|
"grad_norm": 5.710393511185945, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.5828349590301514, |
|
"logits/rejected": -1.5794568061828613, |
|
"logps/chosen": -237.3705596923828, |
|
"logps/rejected": -254.6802978515625, |
|
"loss": 0.4965, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0185314379632473, |
|
"rewards/margins": 0.00873336661607027, |
|
"rewards/rejected": -0.027264803647994995, |
|
"step": 31 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.8172823190689087, |
|
"debug/policy_chosen_logps": -233.02899169921875, |
|
"debug/policy_rejected_logits": -1.8409276008605957, |
|
"debug/policy_rejected_logps": -226.48519897460938, |
|
"debug/reference_chosen_logps": -232.89686584472656, |
|
"debug/reference_rejected_logps": -226.06564331054688, |
|
"epoch": 0.7619047619047619, |
|
"grad_norm": 4.735509214480415, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.8172823190689087, |
|
"logits/rejected": -1.8409276008605957, |
|
"logps/chosen": -233.02899169921875, |
|
"logps/rejected": -226.48519897460938, |
|
"loss": 0.5039, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.0013212203048169613, |
|
"rewards/margins": 0.0028741275891661644, |
|
"rewards/rejected": -0.0041953460313379765, |
|
"step": 32 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.7416476011276245, |
|
"debug/policy_chosen_logps": -210.79525756835938, |
|
"debug/policy_rejected_logits": -1.65560781955719, |
|
"debug/policy_rejected_logps": -239.12855529785156, |
|
"debug/reference_chosen_logps": -209.15957641601562, |
|
"debug/reference_rejected_logps": -239.18234252929688, |
|
"epoch": 0.7857142857142857, |
|
"grad_norm": 4.895068163607816, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.7416476011276245, |
|
"logits/rejected": -1.65560781955719, |
|
"logps/chosen": -210.79525756835938, |
|
"logps/rejected": -239.12855529785156, |
|
"loss": 0.5056, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.016356829553842545, |
|
"rewards/margins": -0.016894912347197533, |
|
"rewards/rejected": 0.0005380823276937008, |
|
"step": 33 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.7726426124572754, |
|
"debug/policy_chosen_logps": -225.79507446289062, |
|
"debug/policy_rejected_logits": -1.7965084314346313, |
|
"debug/policy_rejected_logps": -220.8616943359375, |
|
"debug/reference_chosen_logps": -225.74952697753906, |
|
"debug/reference_rejected_logps": -221.5814666748047, |
|
"epoch": 0.8095238095238095, |
|
"grad_norm": 5.792452647884142, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.7726426124572754, |
|
"logits/rejected": -1.7965084314346313, |
|
"logps/chosen": -225.79507446289062, |
|
"logps/rejected": -220.8616943359375, |
|
"loss": 0.4985, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.00045545492321252823, |
|
"rewards/margins": -0.007653160020709038, |
|
"rewards/rejected": 0.007197704166173935, |
|
"step": 34 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.6315754652023315, |
|
"debug/policy_chosen_logps": -234.38040161132812, |
|
"debug/policy_rejected_logits": -1.613837718963623, |
|
"debug/policy_rejected_logps": -246.2030029296875, |
|
"debug/reference_chosen_logps": -236.5584716796875, |
|
"debug/reference_rejected_logps": -244.6172332763672, |
|
"epoch": 0.8333333333333334, |
|
"grad_norm": 5.023497019038571, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.6315754652023315, |
|
"logits/rejected": -1.613837718963623, |
|
"logps/chosen": -234.38040161132812, |
|
"logps/rejected": -246.2030029296875, |
|
"loss": 0.4951, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.021780777722597122, |
|
"rewards/margins": 0.0376385897397995, |
|
"rewards/rejected": -0.015857810154557228, |
|
"step": 35 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.733331561088562, |
|
"debug/policy_chosen_logps": -232.724609375, |
|
"debug/policy_rejected_logits": -1.7223798036575317, |
|
"debug/policy_rejected_logps": -222.6837158203125, |
|
"debug/reference_chosen_logps": -233.48643493652344, |
|
"debug/reference_rejected_logps": -222.51834106445312, |
|
"epoch": 0.8571428571428571, |
|
"grad_norm": 4.953158777901106, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.733331561088562, |
|
"logits/rejected": -1.7223798036575317, |
|
"logps/chosen": -232.724609375, |
|
"logps/rejected": -222.6837158203125, |
|
"loss": 0.4898, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.00761817954480648, |
|
"rewards/margins": 0.009272022172808647, |
|
"rewards/rejected": -0.0016538426280021667, |
|
"step": 36 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.859521508216858, |
|
"debug/policy_chosen_logps": -225.0269775390625, |
|
"debug/policy_rejected_logits": -1.8794121742248535, |
|
"debug/policy_rejected_logps": -238.612548828125, |
|
"debug/reference_chosen_logps": -226.9108428955078, |
|
"debug/reference_rejected_logps": -235.47463989257812, |
|
"epoch": 0.8809523809523809, |
|
"grad_norm": 4.654227939974065, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.859521508216858, |
|
"logits/rejected": -1.8794121742248535, |
|
"logps/chosen": -225.0269775390625, |
|
"logps/rejected": -238.612548828125, |
|
"loss": 0.4792, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.01883869245648384, |
|
"rewards/margins": 0.05021755024790764, |
|
"rewards/rejected": -0.031378861516714096, |
|
"step": 37 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.8789831399917603, |
|
"debug/policy_chosen_logps": -208.9774169921875, |
|
"debug/policy_rejected_logits": -1.8391132354736328, |
|
"debug/policy_rejected_logps": -209.525390625, |
|
"debug/reference_chosen_logps": -207.11997985839844, |
|
"debug/reference_rejected_logps": -208.88546752929688, |
|
"epoch": 0.9047619047619048, |
|
"grad_norm": 5.750158702312609, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.8789831399917603, |
|
"logits/rejected": -1.8391132354736328, |
|
"logps/chosen": -208.9774169921875, |
|
"logps/rejected": -209.525390625, |
|
"loss": 0.4916, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.018574314191937447, |
|
"rewards/margins": -0.012175025418400764, |
|
"rewards/rejected": -0.0063992878422141075, |
|
"step": 38 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.8669378757476807, |
|
"debug/policy_chosen_logps": -256.5040283203125, |
|
"debug/policy_rejected_logits": -1.771195411682129, |
|
"debug/policy_rejected_logps": -240.91348266601562, |
|
"debug/reference_chosen_logps": -256.64874267578125, |
|
"debug/reference_rejected_logps": -240.50926208496094, |
|
"epoch": 0.9285714285714286, |
|
"grad_norm": 5.01330786712291, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.8669378757476807, |
|
"logits/rejected": -1.771195411682129, |
|
"logps/chosen": -256.5040283203125, |
|
"logps/rejected": -240.91348266601562, |
|
"loss": 0.4963, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0014472389593720436, |
|
"rewards/margins": 0.005489482544362545, |
|
"rewards/rejected": -0.004042243584990501, |
|
"step": 39 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.8057416677474976, |
|
"debug/policy_chosen_logps": -199.05746459960938, |
|
"debug/policy_rejected_logits": -1.72264564037323, |
|
"debug/policy_rejected_logps": -230.5078125, |
|
"debug/reference_chosen_logps": -198.6903533935547, |
|
"debug/reference_rejected_logps": -230.05313110351562, |
|
"epoch": 0.9523809523809523, |
|
"grad_norm": 4.6267949338716425, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.8057416677474976, |
|
"logits/rejected": -1.72264564037323, |
|
"logps/chosen": -199.05746459960938, |
|
"logps/rejected": -230.5078125, |
|
"loss": 0.5011, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.0036712647415697575, |
|
"rewards/margins": 0.0008756828028708696, |
|
"rewards/rejected": -0.004546947777271271, |
|
"step": 40 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.7298365831375122, |
|
"debug/policy_chosen_logps": -215.87039184570312, |
|
"debug/policy_rejected_logits": -1.7618889808654785, |
|
"debug/policy_rejected_logps": -218.1046600341797, |
|
"debug/reference_chosen_logps": -216.2220001220703, |
|
"debug/reference_rejected_logps": -218.1280059814453, |
|
"epoch": 0.9761904761904762, |
|
"grad_norm": 5.043714508829569, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.7298365831375122, |
|
"logits/rejected": -1.7618889808654785, |
|
"logps/chosen": -215.87039184570312, |
|
"logps/rejected": -218.1046600341797, |
|
"loss": 0.4946, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.003516044234856963, |
|
"rewards/margins": 0.003282546764239669, |
|
"rewards/rejected": 0.00023349723778665066, |
|
"step": 41 |
|
}, |
|
{ |
|
"debug/policy_chosen_logits": -1.9528135061264038, |
|
"debug/policy_chosen_logps": -210.9334259033203, |
|
"debug/policy_rejected_logits": -1.8600839376449585, |
|
"debug/policy_rejected_logps": -225.0123291015625, |
|
"debug/reference_chosen_logps": -212.72988891601562, |
|
"debug/reference_rejected_logps": -225.8553924560547, |
|
"epoch": 1.0, |
|
"grad_norm": 5.608640172933804, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -1.9528135061264038, |
|
"logits/rejected": -1.8600839376449585, |
|
"logps/chosen": -210.9334259033203, |
|
"logps/rejected": -225.0123291015625, |
|
"loss": 0.468, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.017964612692594528, |
|
"rewards/margins": 0.009534033946692944, |
|
"rewards/rejected": 0.00843057781457901, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 42, |
|
"total_flos": 0.0, |
|
"train_loss": 0.49727832632405417, |
|
"train_runtime": 148.4721, |
|
"train_samples_per_second": 17.822, |
|
"train_steps_per_second": 0.283 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 42, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|