{ "epoch": 0.9993060374739764, "eval_alpha_0_uf": 0.62870192527771, "eval_alpha_1_rlced_conifer": 0.3712981045246124, "eval_excess_loss": 0.053242921321547794, "eval_logits/chosen": 0.24552154541015625, "eval_logits/chosen_0_uf": 1.2438372373580933, "eval_logits/chosen_1_rlced_conifer": -0.06254024803638458, "eval_logits/rejected": 2.208714246749878, "eval_logits/rejected_0_uf": 3.119086503982544, "eval_logits/rejected_1_rlced_conifer": 2.0085506439208984, "eval_logps/chosen": -612.7692260742188, "eval_logps/chosen_0_uf": -476.1427001953125, "eval_logps/chosen_1_rlced_conifer": -652.2681884765625, "eval_logps/rejected": -988.8446655273438, "eval_logps/rejected_0_uf": -531.029541015625, "eval_logps/rejected_1_rlced_conifer": -1115.4857177734375, "eval_loss": 0.25722306966781616, "eval_rewards/accuracies": 0.8674812316894531, "eval_rewards/accuracies_0_uf": 0.7396755218505859, "eval_rewards/accuracies_1_rlced_conifer": 0.902998685836792, "eval_rewards/chosen": -2.2029974460601807, "eval_rewards/chosen_0_uf": -1.8688322305679321, "eval_rewards/chosen_1_rlced_conifer": -2.286877155303955, "eval_rewards/margins": 3.64809513092041, "eval_rewards/margins_0_uf": 1.0253992080688477, "eval_rewards/margins_1_rlced_conifer": 4.392623424530029, "eval_rewards/rejected": -5.851092338562012, "eval_rewards/rejected_0_uf": -2.8942313194274902, "eval_rewards/rejected_1_rlced_conifer": -6.679501056671143, "eval_runtime": 385.4796, "eval_samples": 8491, "eval_samples_per_second": 22.027, "eval_steps_per_second": 0.345, "eval_task_excess_loss_0_uf": 0.06643412373465511, "eval_task_excess_loss_1_rlced_conifer": 0.06451128244947527, "eval_task_loss_0_uf": 0.5240045189857483, "eval_task_loss_1_rlced_conifer": 0.1961873173713684, "total_flos": 0.0, "train_loss": 0.20697549391123984, "train_runtime": 21529.0067, "train_samples": 184443, "train_samples_per_second": 8.567, "train_steps_per_second": 0.033 }