llama3.1-cpo-full-0912 / all_results.json
jbjeong91's picture
End of training
8ff9048 verified
raw
history blame contribute delete
821 Bytes
{
"epoch": 0.9985553308292401,
"eval_logits/chosen": -0.4112316370010376,
"eval_logits/rejected": -0.38529691100120544,
"eval_logps/chosen": -154.36468505859375,
"eval_logps/rejected": -161.3667755126953,
"eval_loss": 1.598533034324646,
"eval_nll_loss": 0.42096075415611267,
"eval_rewards/accuracies": 0.623913049697876,
"eval_rewards/chosen": -15.436468124389648,
"eval_rewards/margins": 0.7002089619636536,
"eval_rewards/rejected": -16.13667869567871,
"eval_runtime": 73.3622,
"eval_samples": 1826,
"eval_samples_per_second": 24.89,
"eval_steps_per_second": 1.568,
"total_flos": 0.0,
"train_loss": 1.77929983039697,
"train_runtime": 9807.604,
"train_samples": 55376,
"train_samples_per_second": 5.646,
"train_steps_per_second": 0.044
}