{ "epoch": 3.997808219178082, "eval_logits/chosen": -2.860849618911743, "eval_logits/rejected": -2.3769683837890625, "eval_logps/chosen": -706.0082397460938, "eval_logps/rejected": -596.4307861328125, "eval_loss": 0.40534183382987976, "eval_rewards/accuracies": 0.7142857313156128, "eval_rewards/chosen": 2.9905052185058594, "eval_rewards/margins": 4.6127777099609375, "eval_rewards/rejected": -1.6222723722457886, "eval_runtime": 14.2958, "eval_samples_per_second": 7.695, "eval_steps_per_second": 0.979, "total_flos": 519662108934144.0, "train_loss": 0.17924007512469045, "train_runtime": 53882.8337, "train_samples_per_second": 3.251, "train_steps_per_second": 0.102 }