{ "epoch": 1.9978142076502732, "eval_logits/chosen": -1.4140465259552002, "eval_logits/rejected": -1.3327398300170898, "eval_logps/chosen": -0.8071117401123047, "eval_logps/rejected": -1.9723843336105347, "eval_loss": 1.2685352563858032, "eval_rewards/accuracies": 0.8674699068069458, "eval_rewards/chosen": -8.071117401123047, "eval_rewards/margins": 11.652724266052246, "eval_rewards/rejected": -19.723840713500977, "eval_runtime": 33.7378, "eval_samples": 1318, "eval_samples_per_second": 39.066, "eval_steps_per_second": 2.46 }