{ "epoch": 1.0, "eval_logits/chosen": 0.951361358165741, "eval_logits/rejected": 1.4587281942367554, "eval_logps/chosen": -426.3906555175781, "eval_logps/rejected": -491.477783203125, "eval_loss": 0.5107128024101257, "eval_rewards/accuracies": 0.77182537317276, "eval_rewards/chosen": -1.464455246925354, "eval_rewards/margins": 0.8910558819770813, "eval_rewards/rejected": -2.355511426925659, "eval_runtime": 244.9143, "eval_samples": 35044, "eval_samples_per_second": 8.166, "eval_steps_per_second": 0.257, "train_loss": 0.5487770005670517, "train_runtime": 16595.3532, "train_samples": 179264, "train_samples_per_second": 3.684, "train_steps_per_second": 0.058 }