|
{ |
|
"epoch": 3.0, |
|
"eval_logits/chosen": -1.0759214162826538, |
|
"eval_logits/rejected": -0.9100367426872253, |
|
"eval_logps/chosen": -2645.154052734375, |
|
"eval_logps/rejected": -3544.43115234375, |
|
"eval_loss": 6.142495155334473, |
|
"eval_rewards/accuracies": 0.6259999871253967, |
|
"eval_rewards/chosen": -23.702728271484375, |
|
"eval_rewards/margins": 9.166375160217285, |
|
"eval_rewards/margins_max": 58.90415954589844, |
|
"eval_rewards/margins_min": -33.25897216796875, |
|
"eval_rewards/margins_std": 29.858272552490234, |
|
"eval_rewards/rejected": -32.86910629272461, |
|
"eval_runtime": 737.8634, |
|
"eval_samples": 2000, |
|
"eval_samples_per_second": 2.711, |
|
"eval_steps_per_second": 0.169, |
|
"train_loss": 0.1413031851636692, |
|
"train_runtime": 20921.2576, |
|
"train_samples": 5678, |
|
"train_samples_per_second": 0.814, |
|
"train_steps_per_second": 0.051 |
|
} |