{ "epoch": 0.9994767137624281, "eval_logits/chosen": -1.0034536123275757, "eval_logits/rejected": -0.6818602085113525, "eval_logps/chosen": -1738.2310791015625, "eval_logps/rejected": -2073.0146484375, "eval_loss": 1.3376935720443726, "eval_rewards/accuracies": 0.6388888955116272, "eval_rewards/chosen": -14.562629699707031, "eval_rewards/margins": 3.5654470920562744, "eval_rewards/rejected": -18.12807846069336, "eval_runtime": 177.2147, "eval_samples": 2000, "eval_samples_per_second": 11.286, "eval_steps_per_second": 0.356, "total_flos": 0.0, "train_loss": 2.1165736393154604, "train_runtime": 18133.1885, "train_samples": 61134, "train_samples_per_second": 3.371, "train_steps_per_second": 0.053 }