{ "epoch": 0.9990762978015888, "eval_logits/chosen": -1.0541683435440063, "eval_logits/rejected": -0.8055270910263062, "eval_logps/chosen": -522.3192749023438, "eval_logps/rejected": -728.829833984375, "eval_loss": 0.7119433283805847, "eval_rewards/accuracies": 0.7080000042915344, "eval_rewards/chosen": -4.293468475341797, "eval_rewards/margins": 1.8028156757354736, "eval_rewards/rejected": -6.096283435821533, "eval_runtime": 191.3844, "eval_samples": 1999, "eval_samples_per_second": 10.445, "eval_steps_per_second": 1.306, "total_flos": 0.0, "train_loss": 0.3691282767280789, "train_runtime": 28319.5283, "train_samples": 64953, "train_samples_per_second": 2.294, "train_steps_per_second": 0.018 }