|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.971563981042654, |
|
"eval_steps": 128, |
|
"global_step": 104, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.018957345971563982, |
|
"grad_norm": 7707.786720275403, |
|
"learning_rate": 4.545454545454545e-08, |
|
"logits/chosen": 117.53560638427734, |
|
"logits/rejected": 126.8960952758789, |
|
"logps/chosen": -335.40118408203125, |
|
"logps/rejected": -439.16552734375, |
|
"loss": 1.0441, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.1895734597156398, |
|
"grad_norm": 5401.612875785957, |
|
"learning_rate": 4.545454545454545e-07, |
|
"logits/chosen": 135.22718811035156, |
|
"logits/rejected": 138.51141357421875, |
|
"logps/chosen": -397.9040222167969, |
|
"logps/rejected": -441.5177001953125, |
|
"loss": 4.4651, |
|
"rewards/accuracies": 0.4583333432674408, |
|
"rewards/chosen": -0.09537385404109955, |
|
"rewards/margins": 0.01413203775882721, |
|
"rewards/rejected": -0.10950590670108795, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.3791469194312796, |
|
"grad_norm": 3264.226616028638, |
|
"learning_rate": 4.885348141000122e-07, |
|
"logits/chosen": 125.89383697509766, |
|
"logits/rejected": 129.39395141601562, |
|
"logps/chosen": -378.54852294921875, |
|
"logps/rejected": -427.40765380859375, |
|
"loss": 4.7546, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.30682721734046936, |
|
"rewards/margins": 0.06398675590753555, |
|
"rewards/rejected": -0.3708139657974243, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.5687203791469194, |
|
"grad_norm": 4931.31560217951, |
|
"learning_rate": 4.5025027361734613e-07, |
|
"logits/chosen": 149.55715942382812, |
|
"logits/rejected": 142.95787048339844, |
|
"logps/chosen": -387.85272216796875, |
|
"logps/rejected": -423.25390625, |
|
"loss": 4.6409, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.23062637448310852, |
|
"rewards/margins": 0.39320963621139526, |
|
"rewards/rejected": -0.16258326172828674, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.7582938388625592, |
|
"grad_norm": 3460.158543068679, |
|
"learning_rate": 3.893311157806091e-07, |
|
"logits/chosen": 132.449462890625, |
|
"logits/rejected": 120.46671295166016, |
|
"logps/chosen": -360.9330139160156, |
|
"logps/rejected": -373.2904968261719, |
|
"loss": 4.3411, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.33694085478782654, |
|
"rewards/margins": 0.4338204264640808, |
|
"rewards/rejected": -0.7707613110542297, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.9478672985781991, |
|
"grad_norm": 3177.2465625206323, |
|
"learning_rate": 3.126631330646801e-07, |
|
"logits/chosen": 147.91026306152344, |
|
"logits/rejected": 152.0844268798828, |
|
"logps/chosen": -425.0814514160156, |
|
"logps/rejected": -489.4386291503906, |
|
"loss": 3.941, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.2968811094760895, |
|
"rewards/margins": 0.3587321639060974, |
|
"rewards/rejected": -0.6556132435798645, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.1374407582938388, |
|
"grad_norm": 2157.319676771431, |
|
"learning_rate": 2.2891223348923882e-07, |
|
"logits/chosen": 137.85789489746094, |
|
"logits/rejected": 141.48818969726562, |
|
"logps/chosen": -409.8984375, |
|
"logps/rejected": -465.19024658203125, |
|
"loss": 2.3787, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.5946376919746399, |
|
"rewards/margins": 0.846598744392395, |
|
"rewards/rejected": -1.4412362575531006, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.3270142180094786, |
|
"grad_norm": 2309.104547581061, |
|
"learning_rate": 1.4754491880085317e-07, |
|
"logits/chosen": 131.42678833007812, |
|
"logits/rejected": 133.21844482421875, |
|
"logps/chosen": -372.8443298339844, |
|
"logps/rejected": -443.36358642578125, |
|
"loss": 1.8286, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.28502368927001953, |
|
"rewards/margins": 1.2074631452560425, |
|
"rewards/rejected": -1.492486834526062, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.5165876777251186, |
|
"grad_norm": 1891.7921596452784, |
|
"learning_rate": 7.775827023107834e-08, |
|
"logits/chosen": 119.9256820678711, |
|
"logits/rejected": 136.48963928222656, |
|
"logps/chosen": -362.90484619140625, |
|
"logps/rejected": -442.4962463378906, |
|
"loss": 1.3789, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.46269264817237854, |
|
"rewards/margins": 1.0002342462539673, |
|
"rewards/rejected": -1.4629267454147339, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.7061611374407581, |
|
"grad_norm": 1690.6090366045441, |
|
"learning_rate": 2.7440387297912122e-08, |
|
"logits/chosen": 121.07257080078125, |
|
"logits/rejected": 134.4269561767578, |
|
"logps/chosen": -393.3634033203125, |
|
"logps/rejected": -468.45050048828125, |
|
"loss": 1.3089, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.346617192029953, |
|
"rewards/margins": 1.1918184757232666, |
|
"rewards/rejected": -1.538435697555542, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.8957345971563981, |
|
"grad_norm": 1998.5413209764397, |
|
"learning_rate": 2.27878296044029e-09, |
|
"logits/chosen": 128.20663452148438, |
|
"logits/rejected": 128.03355407714844, |
|
"logps/chosen": -385.4119873046875, |
|
"logps/rejected": -443.74346923828125, |
|
"loss": 1.0587, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -0.17184433341026306, |
|
"rewards/margins": 1.1771280765533447, |
|
"rewards/rejected": -1.3489725589752197, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.971563981042654, |
|
"step": 104, |
|
"total_flos": 0.0, |
|
"train_loss": 2.9736096469255595, |
|
"train_runtime": 2166.7366, |
|
"train_samples_per_second": 6.231, |
|
"train_steps_per_second": 0.048 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 104, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|