|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 78, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 5.038770139725663, |
|
"learning_rate": 6.25e-07, |
|
"logits/chosen": -1.3596761226654053, |
|
"logits/rejected": -1.0023326873779297, |
|
"logps/chosen": -450.79583740234375, |
|
"logps/rejected": -781.127197265625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 5.222015988708302, |
|
"learning_rate": 4.989935734988098e-06, |
|
"logits/chosen": -1.0887341499328613, |
|
"logits/rejected": -1.0588740110397339, |
|
"logps/chosen": -564.32080078125, |
|
"logps/rejected": -855.5671997070312, |
|
"loss": 0.6684, |
|
"rewards/accuracies": 0.6944444179534912, |
|
"rewards/chosen": -0.007345028221607208, |
|
"rewards/margins": 0.07519946992397308, |
|
"rewards/rejected": -0.08254450559616089, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 3.800980531623392, |
|
"learning_rate": 4.646121984004666e-06, |
|
"logits/chosen": -1.4986072778701782, |
|
"logits/rejected": -2.113752603530884, |
|
"logps/chosen": -606.9659423828125, |
|
"logps/rejected": -1018.4754638671875, |
|
"loss": 0.4124, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.3804299235343933, |
|
"rewards/margins": 1.465038537979126, |
|
"rewards/rejected": -1.845468521118164, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 4.950457492388352, |
|
"learning_rate": 3.8772424536302565e-06, |
|
"logits/chosen": -1.7479159832000732, |
|
"logits/rejected": -2.567039966583252, |
|
"logps/chosen": -614.0457153320312, |
|
"logps/rejected": -1108.7679443359375, |
|
"loss": 0.3199, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -0.10914883762598038, |
|
"rewards/margins": 2.6420645713806152, |
|
"rewards/rejected": -2.751213550567627, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 4.011791311794506, |
|
"learning_rate": 2.835583164544139e-06, |
|
"logits/chosen": -1.7483808994293213, |
|
"logits/rejected": -2.6819939613342285, |
|
"logps/chosen": -644.5540161132812, |
|
"logps/rejected": -1273.226318359375, |
|
"loss": 0.2279, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.04285002499818802, |
|
"rewards/margins": 4.04502010345459, |
|
"rewards/rejected": -4.002170085906982, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 3.2682180093043582, |
|
"learning_rate": 1.7274575140626318e-06, |
|
"logits/chosen": -1.5594079494476318, |
|
"logits/rejected": -2.4641501903533936, |
|
"logps/chosen": -627.8310546875, |
|
"logps/rejected": -1211.9755859375, |
|
"loss": 0.2181, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.09700597822666168, |
|
"rewards/margins": 3.8286356925964355, |
|
"rewards/rejected": -3.7316298484802246, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 2.445366662132522, |
|
"learning_rate": 7.723433775328385e-07, |
|
"logits/chosen": -1.6432807445526123, |
|
"logits/rejected": -2.286984920501709, |
|
"logps/chosen": -544.6373291015625, |
|
"logps/rejected": -1150.681396484375, |
|
"loss": 0.1556, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 0.27296125888824463, |
|
"rewards/margins": 3.291428327560425, |
|
"rewards/rejected": -3.0184669494628906, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 4.67000331566183, |
|
"learning_rate": 1.59412823400657e-07, |
|
"logits/chosen": -1.5773608684539795, |
|
"logits/rejected": -2.4722862243652344, |
|
"logps/chosen": -636.8548583984375, |
|
"logps/rejected": -1234.2066650390625, |
|
"loss": 0.1503, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": 0.26559901237487793, |
|
"rewards/margins": 4.093817234039307, |
|
"rewards/rejected": -3.828218460083008, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 78, |
|
"total_flos": 0.0, |
|
"train_loss": 0.2915988182410216, |
|
"train_runtime": 1026.489, |
|
"train_samples_per_second": 4.863, |
|
"train_steps_per_second": 0.076 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 78, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|