|
{ |
|
"epoch": 0.9993060374739764, |
|
"eval_alpha_0_uf": 0.62870192527771, |
|
"eval_alpha_1_rlced_conifer": 0.3712981045246124, |
|
"eval_excess_loss": 0.053242921321547794, |
|
"eval_logits/chosen": 0.24552154541015625, |
|
"eval_logits/chosen_0_uf": 1.2438372373580933, |
|
"eval_logits/chosen_1_rlced_conifer": -0.06254024803638458, |
|
"eval_logits/rejected": 2.208714246749878, |
|
"eval_logits/rejected_0_uf": 3.119086503982544, |
|
"eval_logits/rejected_1_rlced_conifer": 2.0085506439208984, |
|
"eval_logps/chosen": -612.7692260742188, |
|
"eval_logps/chosen_0_uf": -476.1427001953125, |
|
"eval_logps/chosen_1_rlced_conifer": -652.2681884765625, |
|
"eval_logps/rejected": -988.8446655273438, |
|
"eval_logps/rejected_0_uf": -531.029541015625, |
|
"eval_logps/rejected_1_rlced_conifer": -1115.4857177734375, |
|
"eval_loss": 0.25722306966781616, |
|
"eval_rewards/accuracies": 0.8674812316894531, |
|
"eval_rewards/accuracies_0_uf": 0.7396755218505859, |
|
"eval_rewards/accuracies_1_rlced_conifer": 0.902998685836792, |
|
"eval_rewards/chosen": -2.2029974460601807, |
|
"eval_rewards/chosen_0_uf": -1.8688322305679321, |
|
"eval_rewards/chosen_1_rlced_conifer": -2.286877155303955, |
|
"eval_rewards/margins": 3.64809513092041, |
|
"eval_rewards/margins_0_uf": 1.0253992080688477, |
|
"eval_rewards/margins_1_rlced_conifer": 4.392623424530029, |
|
"eval_rewards/rejected": -5.851092338562012, |
|
"eval_rewards/rejected_0_uf": -2.8942313194274902, |
|
"eval_rewards/rejected_1_rlced_conifer": -6.679501056671143, |
|
"eval_runtime": 385.4796, |
|
"eval_samples": 8491, |
|
"eval_samples_per_second": 22.027, |
|
"eval_steps_per_second": 0.345, |
|
"eval_task_excess_loss_0_uf": 0.06643412373465511, |
|
"eval_task_excess_loss_1_rlced_conifer": 0.06451128244947527, |
|
"eval_task_loss_0_uf": 0.5240045189857483, |
|
"eval_task_loss_1_rlced_conifer": 0.1961873173713684, |
|
"total_flos": 0.0, |
|
"train_loss": 0.20697549391123984, |
|
"train_runtime": 21529.0067, |
|
"train_samples": 184443, |
|
"train_samples_per_second": 8.567, |
|
"train_steps_per_second": 0.033 |
|
} |