zephyr-7b-dpo-qlora / eval_results.json
guoqiang-x's picture
End of training
0fedd69 verified
raw
history blame contribute delete
574 Bytes
{
"epoch": 1.0,
"eval_logits/chosen": -1.1651915311813354,
"eval_logits/rejected": -1.0289729833602905,
"eval_logps/chosen": -526.7527465820312,
"eval_logps/rejected": -636.4378662109375,
"eval_loss": 0.47883233428001404,
"eval_rewards/accuracies": 0.7465000152587891,
"eval_rewards/chosen": -2.6215085983276367,
"eval_rewards/margins": 1.2971659898757935,
"eval_rewards/rejected": -3.9186742305755615,
"eval_runtime": 1597.3624,
"eval_samples": 2000,
"eval_samples_per_second": 1.252,
"eval_steps_per_second": 0.157
}