|
{ |
|
"epoch": 0.9994756161510225, |
|
"eval_log_odds_chosen": 0.4455896019935608, |
|
"eval_log_odds_ratio": -0.6093403697013855, |
|
"eval_logits/chosen": -2.943448543548584, |
|
"eval_logits/rejected": -2.9263010025024414, |
|
"eval_logps/chosen": -0.7282419204711914, |
|
"eval_logps/rejected": -0.9978163242340088, |
|
"eval_loss": 0.4700748324394226, |
|
"eval_nll_loss": 0.4356931149959564, |
|
"eval_rewards/accuracies": 0.658730149269104, |
|
"eval_rewards/chosen": -0.03641209378838539, |
|
"eval_rewards/margins": 0.013478721491992474, |
|
"eval_rewards/rejected": -0.04989081248641014, |
|
"eval_runtime": 137.5022, |
|
"eval_samples": 1994, |
|
"eval_samples_per_second": 14.502, |
|
"eval_steps_per_second": 0.458, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5301580581685054, |
|
"train_runtime": 20737.8205, |
|
"train_samples": 61005, |
|
"train_samples_per_second": 2.942, |
|
"train_steps_per_second": 0.046 |
|
} |