{ "epoch": 0.9993060374739764, "eval_logits/chosen": -0.9284578561782837, "eval_logits/rejected": -0.923488974571228, "eval_logps/chosen": -483.4286193847656, "eval_logps/rejected": -527.1544189453125, "eval_loss": 0.6072572469711304, "eval_rewards/accuracies": 0.7875939607620239, "eval_rewards/chosen": -0.22044964134693146, "eval_rewards/margins": 0.19842886924743652, "eval_rewards/rejected": -0.4188785254955292, "eval_runtime": 379.0532, "eval_samples": 8491, "eval_samples_per_second": 22.401, "eval_steps_per_second": 0.351 }