|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 96, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.010416666666666666, |
|
"grad_norm": 23.252944189766428, |
|
"learning_rate": 8.639811904061041e-08, |
|
"logits/chosen": -2.590585231781006, |
|
"logits/rejected": -2.5664222240448, |
|
"logps/chosen": -80.29847717285156, |
|
"logps/rejected": -53.10200881958008, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.10416666666666667, |
|
"grad_norm": 21.182382477045586, |
|
"learning_rate": 8.639811904061041e-07, |
|
"logits/chosen": -2.5559909343719482, |
|
"logits/rejected": -2.5379226207733154, |
|
"logps/chosen": -87.82003021240234, |
|
"logps/rejected": -80.9332046508789, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.2013888955116272, |
|
"rewards/chosen": 0.00335866492241621, |
|
"rewards/margins": -0.0003586374514270574, |
|
"rewards/rejected": 0.003717302344739437, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.20833333333333334, |
|
"grad_norm": 17.233720974528225, |
|
"learning_rate": 7.635182612891153e-07, |
|
"logits/chosen": -2.5788445472717285, |
|
"logits/rejected": -2.528242588043213, |
|
"logps/chosen": -101.24139404296875, |
|
"logps/rejected": -88.24673461914062, |
|
"loss": 0.6743, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": 0.07447633892297745, |
|
"rewards/margins": 0.016661062836647034, |
|
"rewards/rejected": 0.057815272361040115, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.3125, |
|
"grad_norm": 25.6903065091126, |
|
"learning_rate": 6.630553321721264e-07, |
|
"logits/chosen": -2.4120750427246094, |
|
"logits/rejected": -2.4285130500793457, |
|
"logps/chosen": -67.22891235351562, |
|
"logps/rejected": -77.24456787109375, |
|
"loss": 0.6697, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": 0.007355662528425455, |
|
"rewards/margins": 0.08921505510807037, |
|
"rewards/rejected": -0.08185939490795135, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.4166666666666667, |
|
"grad_norm": 19.22554951858603, |
|
"learning_rate": 5.625924030551376e-07, |
|
"logits/chosen": -2.4893252849578857, |
|
"logits/rejected": -2.4751393795013428, |
|
"logps/chosen": -74.36286163330078, |
|
"logps/rejected": -75.44730377197266, |
|
"loss": 0.6618, |
|
"rewards/accuracies": 0.28125, |
|
"rewards/chosen": -0.07824570685625076, |
|
"rewards/margins": 0.08888493478298187, |
|
"rewards/rejected": -0.16713064908981323, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.5208333333333334, |
|
"grad_norm": 19.08251678108842, |
|
"learning_rate": 4.6212947393814867e-07, |
|
"logits/chosen": -2.4168150424957275, |
|
"logits/rejected": -2.4291889667510986, |
|
"logps/chosen": -52.590057373046875, |
|
"logps/rejected": -62.784461975097656, |
|
"loss": 0.6552, |
|
"rewards/accuracies": 0.23125000298023224, |
|
"rewards/chosen": -0.059627026319503784, |
|
"rewards/margins": 0.07718921452760696, |
|
"rewards/rejected": -0.13681624829769135, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"grad_norm": 21.66166836395161, |
|
"learning_rate": 3.6166654482115984e-07, |
|
"logits/chosen": -2.489243984222412, |
|
"logits/rejected": -2.4673056602478027, |
|
"logps/chosen": -82.04798889160156, |
|
"logps/rejected": -87.74610137939453, |
|
"loss": 0.6569, |
|
"rewards/accuracies": 0.33125001192092896, |
|
"rewards/chosen": -0.10516528785228729, |
|
"rewards/margins": 0.19169361889362335, |
|
"rewards/rejected": -0.29685890674591064, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.7291666666666666, |
|
"grad_norm": 19.133621385765668, |
|
"learning_rate": 2.6120361570417096e-07, |
|
"logits/chosen": -2.452115535736084, |
|
"logits/rejected": -2.4336700439453125, |
|
"logps/chosen": -96.10133361816406, |
|
"logps/rejected": -90.39111328125, |
|
"loss": 0.6395, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": 0.008597126230597496, |
|
"rewards/margins": 0.1830345243215561, |
|
"rewards/rejected": -0.17443740367889404, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.8333333333333334, |
|
"grad_norm": 34.22517043366701, |
|
"learning_rate": 1.6074068658718216e-07, |
|
"logits/chosen": -2.4384665489196777, |
|
"logits/rejected": -2.3800644874572754, |
|
"logps/chosen": -83.1066665649414, |
|
"logps/rejected": -85.15121459960938, |
|
"loss": 0.6206, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": 0.0896192193031311, |
|
"rewards/margins": 0.29328036308288574, |
|
"rewards/rejected": -0.20366115868091583, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.9375, |
|
"grad_norm": 28.270052144663545, |
|
"learning_rate": 6.027775747019331e-08, |
|
"logits/chosen": -2.3832895755767822, |
|
"logits/rejected": -2.3764870166778564, |
|
"logps/chosen": -53.896759033203125, |
|
"logps/rejected": -69.18147277832031, |
|
"loss": 0.6399, |
|
"rewards/accuracies": 0.28125, |
|
"rewards/chosen": 0.0029330668039619923, |
|
"rewards/margins": 0.20463672280311584, |
|
"rewards/rejected": -0.20170363783836365, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 96, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6540692721803983, |
|
"train_runtime": 977.0227, |
|
"train_samples_per_second": 6.257, |
|
"train_steps_per_second": 0.098 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 96, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|