{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 2.1432945499081444,
  "eval_steps": 500,
  "global_step": 1750,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0612369871402327,
      "grad_norm": 0.8098917283898412,
      "learning_rate": 9.996294850025658e-06,
      "loss": 1.0009,
      "step": 50
    },
    {
      "epoch": 0.1224739742804654,
      "grad_norm": 0.8351009158740302,
      "learning_rate": 9.985184891357165e-06,
      "loss": 0.9902,
      "step": 100
    },
    {
      "epoch": 0.1837109614206981,
      "grad_norm": 0.7857771081781793,
      "learning_rate": 9.96668658961975e-06,
      "loss": 0.972,
      "step": 150
    },
    {
      "epoch": 0.2449479485609308,
      "grad_norm": 0.7820326486942377,
      "learning_rate": 9.940827360406297e-06,
      "loss": 0.9727,
      "step": 200
    },
    {
      "epoch": 0.3061849357011635,
      "grad_norm": 0.7529490157497454,
      "learning_rate": 9.907645528645791e-06,
      "loss": 0.9652,
      "step": 250
    },
    {
      "epoch": 0.3674219228413962,
      "grad_norm": 0.8000880788190894,
      "learning_rate": 9.867190271803466e-06,
      "loss": 0.9661,
      "step": 300
    },
    {
      "epoch": 0.4286589099816289,
      "grad_norm": 0.7925477606605517,
      "learning_rate": 9.819521546996864e-06,
      "loss": 0.9599,
      "step": 350
    },
    {
      "epoch": 0.4898958971218616,
      "grad_norm": 0.7624902603510945,
      "learning_rate": 9.764710002135784e-06,
      "loss": 0.9547,
      "step": 400
    },
    {
      "epoch": 0.5511328842620943,
      "grad_norm": 0.7431039272381413,
      "learning_rate": 9.702836871217838e-06,
      "loss": 0.9466,
      "step": 450
    },
    {
      "epoch": 0.612369871402327,
      "grad_norm": 0.7574806145615771,
      "learning_rate": 9.633993853934803e-06,
      "loss": 0.9439,
      "step": 500
    },
    {
      "epoch": 0.6736068585425597,
      "grad_norm": 0.748375839743952,
      "learning_rate": 9.558282979768164e-06,
      "loss": 0.9429,
      "step": 550
    },
    {
      "epoch": 0.7348438456827924,
      "grad_norm": 0.7952459759871126,
      "learning_rate": 9.475816456775313e-06,
      "loss": 0.9366,
      "step": 600
    },
    {
      "epoch": 0.7960808328230251,
      "grad_norm": 0.7782924796375454,
      "learning_rate": 9.386716505290467e-06,
      "loss": 0.9356,
      "step": 650
    },
    {
      "epoch": 0.8573178199632578,
      "grad_norm": 0.7443095452962357,
      "learning_rate": 9.291115176786814e-06,
      "loss": 0.9304,
      "step": 700
    },
    {
      "epoch": 0.9185548071034905,
      "grad_norm": 0.7822224402996899,
      "learning_rate": 9.189154158168293e-06,
      "loss": 0.9339,
      "step": 750
    },
    {
      "epoch": 0.9797917942437232,
      "grad_norm": 0.7817236760492132,
      "learning_rate": 9.08098456178111e-06,
      "loss": 0.9295,
      "step": 800
    },
    {
      "epoch": 1.0410287813839558,
      "grad_norm": 0.7824573951031337,
      "learning_rate": 8.966766701456177e-06,
      "loss": 0.9029,
      "step": 850
    },
    {
      "epoch": 1.1022657685241886,
      "grad_norm": 0.7621342939491088,
      "learning_rate": 8.846669854914395e-06,
      "loss": 0.8871,
      "step": 900
    },
    {
      "epoch": 1.1635027556644213,
      "grad_norm": 0.7431557953761879,
      "learning_rate": 8.720872012886918e-06,
      "loss": 0.882,
      "step": 950
    },
    {
      "epoch": 1.224739742804654,
      "grad_norm": 0.3886245437993066,
      "learning_rate": 8.58955961532221e-06,
      "loss": 0.8864,
      "step": 1000
    },
    {
      "epoch": 1.2859767299448868,
      "grad_norm": 0.7536794211653792,
      "learning_rate": 8.452927275070858e-06,
      "loss": 0.8805,
      "step": 1050
    },
    {
      "epoch": 1.3472137170851193,
      "grad_norm": 0.7577244481174435,
      "learning_rate": 8.311177489457653e-06,
      "loss": 0.8855,
      "step": 1100
    },
    {
      "epoch": 1.408450704225352,
      "grad_norm": 0.777343889968524,
      "learning_rate": 8.164520340168404e-06,
      "loss": 0.8862,
      "step": 1150
    },
    {
      "epoch": 1.4696876913655847,
      "grad_norm": 0.736623306272446,
      "learning_rate": 8.013173181896283e-06,
      "loss": 0.8839,
      "step": 1200
    },
    {
      "epoch": 1.5309246785058175,
      "grad_norm": 0.7451843323741638,
      "learning_rate": 7.857360320209126e-06,
      "loss": 0.8738,
      "step": 1250
    },
    {
      "epoch": 1.5921616656460502,
      "grad_norm": 0.8023889052201314,
      "learning_rate": 7.700553608573257e-06,
      "loss": 0.8846,
      "step": 1300
    },
    {
      "epoch": 1.653398652786283,
      "grad_norm": 0.7595177276389167,
      "learning_rate": 7.536585976390474e-06,
      "loss": 0.8818,
      "step": 1350
    },
    {
      "epoch": 1.7146356399265157,
      "grad_norm": 0.7673121148739883,
      "learning_rate": 7.3688589716215555e-06,
      "loss": 0.8775,
      "step": 1400
    },
    {
      "epoch": 1.7758726270667484,
      "grad_norm": 0.7577794985624852,
      "learning_rate": 7.197621175749467e-06,
      "loss": 0.8738,
      "step": 1450
    },
    {
      "epoch": 1.8371096142069812,
      "grad_norm": 0.7927915981856463,
      "learning_rate": 7.023126373460202e-06,
      "loss": 0.8771,
      "step": 1500
    },
    {
      "epoch": 1.8983466013472137,
      "grad_norm": 0.7595080397715767,
      "learning_rate": 6.849210730229846e-06,
      "loss": 0.8775,
      "step": 1550
    },
    {
      "epoch": 1.9595835884874464,
      "grad_norm": 0.7976605652572579,
      "learning_rate": 6.669034296168855e-06,
      "loss": 0.8751,
      "step": 1600
    },
    {
      "epoch": 2.020820575627679,
      "grad_norm": 0.7958737424052013,
      "learning_rate": 6.486384253156014e-06,
      "loss": 0.8579,
      "step": 1650
    },
    {
      "epoch": 2.0820575627679117,
      "grad_norm": 0.8331069734198607,
      "learning_rate": 6.301531299512195e-06,
      "loss": 0.837,
      "step": 1700
    },
    {
      "epoch": 2.1432945499081444,
      "grad_norm": 0.7511629331953945,
      "learning_rate": 6.11474939840398e-06,
      "loss": 0.8327,
      "step": 1750
    }
  ],
  "logging_steps": 50,
  "max_steps": 4080,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 5,
  "save_steps": 250,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 534961795891200.0,
  "train_batch_size": 48,
  "trial_name": null,
  "trial_params": null
}