{ "epoch": 3.0, "eval_logits/chosen": -1.0759214162826538, "eval_logits/rejected": -0.9100367426872253, "eval_logps/chosen": -2645.154052734375, "eval_logps/rejected": -3544.43115234375, "eval_loss": 6.142495155334473, "eval_rewards/accuracies": 0.6259999871253967, "eval_rewards/chosen": -23.702728271484375, "eval_rewards/margins": 9.166375160217285, "eval_rewards/margins_max": 58.90415954589844, "eval_rewards/margins_min": -33.25897216796875, "eval_rewards/margins_std": 29.858272552490234, "eval_rewards/rejected": -32.86910629272461, "eval_runtime": 737.8634, "eval_samples": 2000, "eval_samples_per_second": 2.711, "eval_steps_per_second": 0.169 }