{ "epoch": 3.0, "eval_logits/chosen": -2.259361505508423, "eval_logits/rejected": -2.14780330657959, "eval_logps/chosen": -263.0772705078125, "eval_logps/rejected": -221.30685424804688, "eval_loss": 0.5746620893478394, "eval_rewards/accuracies": 0.7059999704360962, "eval_rewards/chosen": -0.014065464027225971, "eval_rewards/margins": 0.4006173312664032, "eval_rewards/rejected": -0.41468286514282227, "eval_runtime": 237.4099, "eval_samples": 2000, "eval_samples_per_second": 8.424, "eval_steps_per_second": 0.527 }