{ "epoch": 1.0, "eval_logits/chosen": -2.159156560897827, "eval_logits/rejected": -1.9642761945724487, "eval_logps/chosen": -304.89056396484375, "eval_logps/rejected": -175.10516357421875, "eval_loss": 0.04293975234031677, "eval_rewards/accuracies": 0.9741379022598267, "eval_rewards/chosen": -0.5987315773963928, "eval_rewards/margins": 9.656463623046875, "eval_rewards/rejected": -10.255194664001465, "eval_runtime": 497.484, "eval_samples": 1843, "eval_samples_per_second": 3.705, "eval_steps_per_second": 0.117, "train_loss": 0.06308119606848633, "train_runtime": 23531.0319, "train_samples": 55762, "train_samples_per_second": 2.37, "train_steps_per_second": 0.074 }