|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 137, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.145985401459854, |
|
"grad_norm": 2.3561489582061768, |
|
"learning_rate": 4.970701059450872e-06, |
|
"log_odds_chosen": 2.3852906227111816, |
|
"log_odds_ratio": -0.13181139528751373, |
|
"logits/chosen": -0.7752584218978882, |
|
"logits/rejected": -0.7647705674171448, |
|
"logps/chosen": -0.44917869567871094, |
|
"logps/rejected": -1.702678918838501, |
|
"loss": 0.6002, |
|
"nll_loss": 0.5870461463928223, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.04491787031292915, |
|
"rewards/margins": 0.12535002827644348, |
|
"rewards/rejected": -0.17026789486408234, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.291970802919708, |
|
"grad_norm": 1.8824354410171509, |
|
"learning_rate": 4.4687157653336645e-06, |
|
"log_odds_chosen": 2.5571141242980957, |
|
"log_odds_ratio": -0.10861808061599731, |
|
"logits/chosen": -0.7587701082229614, |
|
"logits/rejected": -0.7438877820968628, |
|
"logps/chosen": -0.3850471079349518, |
|
"logps/rejected": -1.695639967918396, |
|
"loss": 0.4998, |
|
"nll_loss": 0.48895248770713806, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.038504708558321, |
|
"rewards/margins": 0.13105927407741547, |
|
"rewards/rejected": -0.16956397891044617, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.43795620437956206, |
|
"grad_norm": 1.936366081237793, |
|
"learning_rate": 3.46407781122034e-06, |
|
"log_odds_chosen": 2.8851490020751953, |
|
"log_odds_ratio": -0.09776415675878525, |
|
"logits/chosen": -0.8007665872573853, |
|
"logits/rejected": -0.7795315980911255, |
|
"logps/chosen": -0.3232823610305786, |
|
"logps/rejected": -1.8261045217514038, |
|
"loss": 0.454, |
|
"nll_loss": 0.4442494809627533, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.03232823684811592, |
|
"rewards/margins": 0.15028223395347595, |
|
"rewards/rejected": -0.18261046707630157, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.583941605839416, |
|
"grad_norm": 1.9113825559616089, |
|
"learning_rate": 2.2132914365039993e-06, |
|
"log_odds_chosen": 2.7028260231018066, |
|
"log_odds_ratio": -0.10017122328281403, |
|
"logits/chosen": -0.8422906994819641, |
|
"logits/rejected": -0.8227804899215698, |
|
"logps/chosen": -0.3254713714122772, |
|
"logps/rejected": -1.66717529296875, |
|
"loss": 0.4622, |
|
"nll_loss": 0.45214977860450745, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.03254713863134384, |
|
"rewards/margins": 0.13417038321495056, |
|
"rewards/rejected": -0.166717529296875, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.7299270072992701, |
|
"grad_norm": 1.831419587135315, |
|
"learning_rate": 1.035707514179513e-06, |
|
"log_odds_chosen": 2.8770365715026855, |
|
"log_odds_ratio": -0.08625087887048721, |
|
"logits/chosen": -0.866462230682373, |
|
"logits/rejected": -0.8304919004440308, |
|
"logps/chosen": -0.33102989196777344, |
|
"logps/rejected": -1.7656580209732056, |
|
"loss": 0.4503, |
|
"nll_loss": 0.441666841506958, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.03310299292206764, |
|
"rewards/margins": 0.14346280694007874, |
|
"rewards/rejected": -0.17656579613685608, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.8759124087591241, |
|
"grad_norm": 1.8673110008239746, |
|
"learning_rate": 2.3198686149022016e-07, |
|
"log_odds_chosen": 3.082756519317627, |
|
"log_odds_ratio": -0.0681430920958519, |
|
"logits/chosen": -0.8562874794006348, |
|
"logits/rejected": -0.8275081515312195, |
|
"logps/chosen": -0.32306355237960815, |
|
"logps/rejected": -1.9355922937393188, |
|
"loss": 0.4358, |
|
"nll_loss": 0.42900004982948303, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.032306358218193054, |
|
"rewards/margins": 0.16125287115573883, |
|
"rewards/rejected": -0.1935592144727707, |
|
"step": 120 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 137, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|