|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9874476987447699, |
|
"eval_steps": 500, |
|
"global_step": 59, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.333333333333333e-08, |
|
"logits/chosen": -2.809058427810669, |
|
"logits/rejected": -2.8124935626983643, |
|
"logps/chosen": -318.11346435546875, |
|
"logps/rejected": -229.77012634277344, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.930057285201027e-07, |
|
"logits/chosen": -2.7690165042877197, |
|
"logits/rejected": -2.7547106742858887, |
|
"logps/chosen": -277.6565246582031, |
|
"logps/rejected": -264.4493408203125, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.4791666567325592, |
|
"rewards/chosen": 0.003391754813492298, |
|
"rewards/margins": 0.002254640683531761, |
|
"rewards/rejected": 0.0011371138971298933, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.187457503795526e-07, |
|
"logits/chosen": -2.79248046875, |
|
"logits/rejected": -2.789917230606079, |
|
"logps/chosen": -264.5072326660156, |
|
"logps/rejected": -252.63845825195312, |
|
"loss": 0.6752, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.026929911226034164, |
|
"rewards/margins": 0.03922198340296745, |
|
"rewards/rejected": -0.012292074970901012, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.8691164100062034e-07, |
|
"logits/chosen": -2.7963314056396484, |
|
"logits/rejected": -2.796976089477539, |
|
"logps/chosen": -298.3793640136719, |
|
"logps/rejected": -256.82110595703125, |
|
"loss": 0.6504, |
|
"rewards/accuracies": 0.6656249761581421, |
|
"rewards/chosen": -0.021613802760839462, |
|
"rewards/margins": 0.11929114162921906, |
|
"rewards/rejected": -0.14090493321418762, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4248369943086995e-07, |
|
"logits/chosen": -2.7643628120422363, |
|
"logits/rejected": -2.7510218620300293, |
|
"logps/chosen": -265.435302734375, |
|
"logps/rejected": -256.5634765625, |
|
"loss": 0.6363, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.1126113086938858, |
|
"rewards/margins": 0.15844564139842987, |
|
"rewards/rejected": -0.2710569500923157, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.473909705816111e-08, |
|
"logits/chosen": -2.76975679397583, |
|
"logits/rejected": -2.7606043815612793, |
|
"logps/chosen": -280.33734130859375, |
|
"logps/rejected": -287.0810241699219, |
|
"loss": 0.6219, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -0.20978038012981415, |
|
"rewards/margins": 0.129803866147995, |
|
"rewards/rejected": -0.33958423137664795, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"step": 59, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6501501697604939, |
|
"train_runtime": 1921.715, |
|
"train_samples_per_second": 7.953, |
|
"train_steps_per_second": 0.031 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 59, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|