{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 137, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.145985401459854, "grad_norm": 2.3561489582061768, "learning_rate": 4.970701059450872e-06, "log_odds_chosen": 2.3852906227111816, "log_odds_ratio": -0.13181139528751373, "logits/chosen": -0.7752584218978882, "logits/rejected": -0.7647705674171448, "logps/chosen": -0.44917869567871094, "logps/rejected": -1.702678918838501, "loss": 0.6002, "nll_loss": 0.5870461463928223, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.04491787031292915, "rewards/margins": 0.12535002827644348, "rewards/rejected": -0.17026789486408234, "step": 20 }, { "epoch": 0.291970802919708, "grad_norm": 1.8824354410171509, "learning_rate": 4.4687157653336645e-06, "log_odds_chosen": 2.5571141242980957, "log_odds_ratio": -0.10861808061599731, "logits/chosen": -0.7587701082229614, "logits/rejected": -0.7438877820968628, "logps/chosen": -0.3850471079349518, "logps/rejected": -1.695639967918396, "loss": 0.4998, "nll_loss": 0.48895248770713806, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.038504708558321, "rewards/margins": 0.13105927407741547, "rewards/rejected": -0.16956397891044617, "step": 40 }, { "epoch": 0.43795620437956206, "grad_norm": 1.936366081237793, "learning_rate": 3.46407781122034e-06, "log_odds_chosen": 2.8851490020751953, "log_odds_ratio": -0.09776415675878525, "logits/chosen": -0.8007665872573853, "logits/rejected": -0.7795315980911255, "logps/chosen": -0.3232823610305786, "logps/rejected": -1.8261045217514038, "loss": 0.454, "nll_loss": 0.4442494809627533, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.03232823684811592, "rewards/margins": 0.15028223395347595, "rewards/rejected": -0.18261046707630157, "step": 60 }, { "epoch": 0.583941605839416, "grad_norm": 1.9113825559616089, "learning_rate": 2.2132914365039993e-06, "log_odds_chosen": 2.7028260231018066, "log_odds_ratio": -0.10017122328281403, "logits/chosen": -0.8422906994819641, "logits/rejected": -0.8227804899215698, "logps/chosen": -0.3254713714122772, "logps/rejected": -1.66717529296875, "loss": 0.4622, "nll_loss": 0.45214977860450745, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.03254713863134384, "rewards/margins": 0.13417038321495056, "rewards/rejected": -0.166717529296875, "step": 80 }, { "epoch": 0.7299270072992701, "grad_norm": 1.831419587135315, "learning_rate": 1.035707514179513e-06, "log_odds_chosen": 2.8770365715026855, "log_odds_ratio": -0.08625087887048721, "logits/chosen": -0.866462230682373, "logits/rejected": -0.8304919004440308, "logps/chosen": -0.33102989196777344, "logps/rejected": -1.7656580209732056, "loss": 0.4503, "nll_loss": 0.441666841506958, "rewards/accuracies": 1.0, "rewards/chosen": -0.03310299292206764, "rewards/margins": 0.14346280694007874, "rewards/rejected": -0.17656579613685608, "step": 100 }, { "epoch": 0.8759124087591241, "grad_norm": 1.8673110008239746, "learning_rate": 2.3198686149022016e-07, "log_odds_chosen": 3.082756519317627, "log_odds_ratio": -0.0681430920958519, "logits/chosen": -0.8562874794006348, "logits/rejected": -0.8275081515312195, "logps/chosen": -0.32306355237960815, "logps/rejected": -1.9355922937393188, "loss": 0.4358, "nll_loss": 0.42900004982948303, "rewards/accuracies": 1.0, "rewards/chosen": -0.032306358218193054, "rewards/margins": 0.16125287115573883, "rewards/rejected": -0.1935592144727707, "step": 120 } ], "logging_steps": 20, "max_steps": 137, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }