{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.0,
  "eval_steps": 500,
  "global_step": 137,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.145985401459854,
      "grad_norm": 2.3561489582061768,
      "learning_rate": 4.970701059450872e-06,
      "log_odds_chosen": 2.3852906227111816,
      "log_odds_ratio": -0.13181139528751373,
      "logits/chosen": -0.7752584218978882,
      "logits/rejected": -0.7647705674171448,
      "logps/chosen": -0.44917869567871094,
      "logps/rejected": -1.702678918838501,
      "loss": 0.6002,
      "nll_loss": 0.5870461463928223,
      "rewards/accuracies": 0.9937499761581421,
      "rewards/chosen": -0.04491787031292915,
      "rewards/margins": 0.12535002827644348,
      "rewards/rejected": -0.17026789486408234,
      "step": 20
    },
    {
      "epoch": 0.291970802919708,
      "grad_norm": 1.8824354410171509,
      "learning_rate": 4.4687157653336645e-06,
      "log_odds_chosen": 2.5571141242980957,
      "log_odds_ratio": -0.10861808061599731,
      "logits/chosen": -0.7587701082229614,
      "logits/rejected": -0.7438877820968628,
      "logps/chosen": -0.3850471079349518,
      "logps/rejected": -1.695639967918396,
      "loss": 0.4998,
      "nll_loss": 0.48895248770713806,
      "rewards/accuracies": 0.9937499761581421,
      "rewards/chosen": -0.038504708558321,
      "rewards/margins": 0.13105927407741547,
      "rewards/rejected": -0.16956397891044617,
      "step": 40
    },
    {
      "epoch": 0.43795620437956206,
      "grad_norm": 1.936366081237793,
      "learning_rate": 3.46407781122034e-06,
      "log_odds_chosen": 2.8851490020751953,
      "log_odds_ratio": -0.09776415675878525,
      "logits/chosen": -0.8007665872573853,
      "logits/rejected": -0.7795315980911255,
      "logps/chosen": -0.3232823610305786,
      "logps/rejected": -1.8261045217514038,
      "loss": 0.454,
      "nll_loss": 0.4442494809627533,
      "rewards/accuracies": 0.9937499761581421,
      "rewards/chosen": -0.03232823684811592,
      "rewards/margins": 0.15028223395347595,
      "rewards/rejected": -0.18261046707630157,
      "step": 60
    },
    {
      "epoch": 0.583941605839416,
      "grad_norm": 1.9113825559616089,
      "learning_rate": 2.2132914365039993e-06,
      "log_odds_chosen": 2.7028260231018066,
      "log_odds_ratio": -0.10017122328281403,
      "logits/chosen": -0.8422906994819641,
      "logits/rejected": -0.8227804899215698,
      "logps/chosen": -0.3254713714122772,
      "logps/rejected": -1.66717529296875,
      "loss": 0.4622,
      "nll_loss": 0.45214977860450745,
      "rewards/accuracies": 0.9937499761581421,
      "rewards/chosen": -0.03254713863134384,
      "rewards/margins": 0.13417038321495056,
      "rewards/rejected": -0.166717529296875,
      "step": 80
    },
    {
      "epoch": 0.7299270072992701,
      "grad_norm": 1.831419587135315,
      "learning_rate": 1.035707514179513e-06,
      "log_odds_chosen": 2.8770365715026855,
      "log_odds_ratio": -0.08625087887048721,
      "logits/chosen": -0.866462230682373,
      "logits/rejected": -0.8304919004440308,
      "logps/chosen": -0.33102989196777344,
      "logps/rejected": -1.7656580209732056,
      "loss": 0.4503,
      "nll_loss": 0.441666841506958,
      "rewards/accuracies": 1.0,
      "rewards/chosen": -0.03310299292206764,
      "rewards/margins": 0.14346280694007874,
      "rewards/rejected": -0.17656579613685608,
      "step": 100
    },
    {
      "epoch": 0.8759124087591241,
      "grad_norm": 1.8673110008239746,
      "learning_rate": 2.3198686149022016e-07,
      "log_odds_chosen": 3.082756519317627,
      "log_odds_ratio": -0.0681430920958519,
      "logits/chosen": -0.8562874794006348,
      "logits/rejected": -0.8275081515312195,
      "logps/chosen": -0.32306355237960815,
      "logps/rejected": -1.9355922937393188,
      "loss": 0.4358,
      "nll_loss": 0.42900004982948303,
      "rewards/accuracies": 1.0,
      "rewards/chosen": -0.032306358218193054,
      "rewards/margins": 0.16125287115573883,
      "rewards/rejected": -0.1935592144727707,
      "step": 120
    }
  ],
  "logging_steps": 20,
  "max_steps": 137,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 0.0,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}