|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9874476987447699, |
|
"eval_steps": 500, |
|
"global_step": 59, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.333333333333333e-08, |
|
"logits/chosen": -1.778551697731018, |
|
"logits/rejected": -1.5408724546432495, |
|
"logps/chosen": -277.6687927246094, |
|
"logps/pi_response": -231.2880859375, |
|
"logps/ref_response": -231.2880859375, |
|
"logps/rejected": -499.2101135253906, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.930057285201027e-07, |
|
"logits/chosen": -1.5793159008026123, |
|
"logits/rejected": -1.3809763193130493, |
|
"logps/chosen": -353.9909362792969, |
|
"logps/pi_response": -215.0353240966797, |
|
"logps/ref_response": -209.04591369628906, |
|
"logps/rejected": -626.6854858398438, |
|
"loss": 0.634, |
|
"rewards/accuracies": 0.6215277910232544, |
|
"rewards/chosen": -0.19814111292362213, |
|
"rewards/margins": 0.29235708713531494, |
|
"rewards/rejected": -0.49049821496009827, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.187457503795526e-07, |
|
"logits/chosen": -0.5755993127822876, |
|
"logits/rejected": -0.19093285501003265, |
|
"logps/chosen": -442.72906494140625, |
|
"logps/pi_response": -229.82565307617188, |
|
"logps/ref_response": -177.60939025878906, |
|
"logps/rejected": -871.7946166992188, |
|
"loss": 0.5897, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.2860958576202393, |
|
"rewards/margins": 1.7290973663330078, |
|
"rewards/rejected": -3.015193462371826, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.8691164100062034e-07, |
|
"logits/chosen": -0.4923222064971924, |
|
"logits/rejected": 0.08401201665401459, |
|
"logps/chosen": -401.0098571777344, |
|
"logps/pi_response": -204.19842529296875, |
|
"logps/ref_response": -172.53121948242188, |
|
"logps/rejected": -765.5057373046875, |
|
"loss": 0.4652, |
|
"rewards/accuracies": 0.784375011920929, |
|
"rewards/chosen": -0.8636873364448547, |
|
"rewards/margins": 1.3079454898834229, |
|
"rewards/rejected": -2.171632766723633, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4248369943086995e-07, |
|
"logits/chosen": 0.057084761559963226, |
|
"logits/rejected": 0.7233976125717163, |
|
"logps/chosen": -394.339111328125, |
|
"logps/pi_response": -229.233642578125, |
|
"logps/ref_response": -169.03140258789062, |
|
"logps/rejected": -873.4837646484375, |
|
"loss": 0.4326, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -1.0300148725509644, |
|
"rewards/margins": 1.727870225906372, |
|
"rewards/rejected": -2.757884979248047, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.473909705816111e-08, |
|
"logits/chosen": 0.40286731719970703, |
|
"logits/rejected": 1.1685023307800293, |
|
"logps/chosen": -439.542724609375, |
|
"logps/pi_response": -257.29583740234375, |
|
"logps/ref_response": -178.077880859375, |
|
"logps/rejected": -824.6487426757812, |
|
"loss": 0.438, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.21076238155365, |
|
"rewards/margins": 1.5581530332565308, |
|
"rewards/rejected": -2.7689156532287598, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"step": 59, |
|
"total_flos": 0.0, |
|
"train_loss": 0.500854310342821, |
|
"train_runtime": 3404.3015, |
|
"train_samples_per_second": 4.489, |
|
"train_steps_per_second": 0.017 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 59, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|