NicholasCorrado's picture
End of training
30a9f05 verified
{
"epoch": 0.9993060374739764,
"eval_alpha_0_uf": 0.62870192527771,
"eval_alpha_1_rlced_conifer": 0.3712981045246124,
"eval_excess_loss": 0.053242921321547794,
"eval_logits/chosen": 0.24552154541015625,
"eval_logits/chosen_0_uf": 1.2438372373580933,
"eval_logits/chosen_1_rlced_conifer": -0.06254024803638458,
"eval_logits/rejected": 2.208714246749878,
"eval_logits/rejected_0_uf": 3.119086503982544,
"eval_logits/rejected_1_rlced_conifer": 2.0085506439208984,
"eval_logps/chosen": -612.7692260742188,
"eval_logps/chosen_0_uf": -476.1427001953125,
"eval_logps/chosen_1_rlced_conifer": -652.2681884765625,
"eval_logps/rejected": -988.8446655273438,
"eval_logps/rejected_0_uf": -531.029541015625,
"eval_logps/rejected_1_rlced_conifer": -1115.4857177734375,
"eval_loss": 0.25722306966781616,
"eval_rewards/accuracies": 0.8674812316894531,
"eval_rewards/accuracies_0_uf": 0.7396755218505859,
"eval_rewards/accuracies_1_rlced_conifer": 0.902998685836792,
"eval_rewards/chosen": -2.2029974460601807,
"eval_rewards/chosen_0_uf": -1.8688322305679321,
"eval_rewards/chosen_1_rlced_conifer": -2.286877155303955,
"eval_rewards/margins": 3.64809513092041,
"eval_rewards/margins_0_uf": 1.0253992080688477,
"eval_rewards/margins_1_rlced_conifer": 4.392623424530029,
"eval_rewards/rejected": -5.851092338562012,
"eval_rewards/rejected_0_uf": -2.8942313194274902,
"eval_rewards/rejected_1_rlced_conifer": -6.679501056671143,
"eval_runtime": 385.4796,
"eval_samples": 8491,
"eval_samples_per_second": 22.027,
"eval_steps_per_second": 0.345,
"eval_task_excess_loss_0_uf": 0.06643412373465511,
"eval_task_excess_loss_1_rlced_conifer": 0.06451128244947527,
"eval_task_loss_0_uf": 0.5240045189857483,
"eval_task_loss_1_rlced_conifer": 0.1961873173713684,
"total_flos": 0.0,
"train_loss": 0.20697549391123984,
"train_runtime": 21529.0067,
"train_samples": 184443,
"train_samples_per_second": 8.567,
"train_steps_per_second": 0.033
}