{ "epoch": 1.0, "eval_logits/chosen": -1.1930192708969116, "eval_logits/rejected": -1.2072994709014893, "eval_logps/chosen": -79.19204711914062, "eval_logps/rejected": -98.96011352539062, "eval_loss": 0.6765820384025574, "eval_rewards/accuracies": 0.6319444179534912, "eval_rewards/chosen": -0.08277318626642227, "eval_rewards/margins": 0.03162948787212372, "eval_rewards/rejected": -0.11440268903970718, "eval_runtime": 48.68, "eval_samples": 567, "eval_samples_per_second": 11.647, "eval_steps_per_second": 0.37, "train_loss": 0.6676328546471066, "train_runtime": 2437.1919, "train_samples": 13823, "train_samples_per_second": 5.672, "train_steps_per_second": 0.089 }