{ "epoch": 3.0, "eval_logits/chosen": -2.065433979034424, "eval_logits/rejected": -1.9405803680419922, "eval_logps/chosen": -266.1706848144531, "eval_logps/rejected": -228.30780029296875, "eval_loss": 0.5255534052848816, "eval_rewards/accuracies": 0.7419999837875366, "eval_rewards/chosen": -0.15385985374450684, "eval_rewards/margins": 0.7486297488212585, "eval_rewards/rejected": -0.9024895429611206, "eval_runtime": 601.6531, "eval_samples": 2000, "eval_samples_per_second": 3.324, "eval_steps_per_second": 0.208, "train_loss": 0.5642068754707158, "train_runtime": 89225.6094, "train_samples": 61966, "train_samples_per_second": 2.083, "train_steps_per_second": 0.033 }