{ "epoch": 1.0, "eval_logits/chosen": -2.159156560897827, "eval_logits/rejected": -1.9642761945724487, "eval_logps/chosen": -304.89056396484375, "eval_logps/rejected": -175.10516357421875, "eval_loss": 0.04293975234031677, "eval_rewards/accuracies": 0.9741379022598267, "eval_rewards/chosen": -0.5987315773963928, "eval_rewards/margins": 9.656463623046875, "eval_rewards/rejected": -10.255194664001465, "eval_runtime": 497.484, "eval_samples": 1843, "eval_samples_per_second": 3.705, "eval_steps_per_second": 0.117 }