{ "epoch": 3.997808219178082, "eval_logits/chosen": -2.860849618911743, "eval_logits/rejected": -2.3769683837890625, "eval_logps/chosen": -706.0082397460938, "eval_logps/rejected": -596.4307861328125, "eval_loss": 0.40534183382987976, "eval_rewards/accuracies": 0.7142857313156128, "eval_rewards/chosen": 2.9905052185058594, "eval_rewards/margins": 4.6127777099609375, "eval_rewards/rejected": -1.6222723722457886, "eval_runtime": 14.2958, "eval_samples_per_second": 7.695, "eval_steps_per_second": 0.979 }