{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 3.2222222222222223,
  "eval_steps": 50,
  "global_step": 21,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.14814814814814814,
      "grad_norm": 9.025846481323242,
      "learning_rate": 1.06e-05,
      "loss": 2.2598,
      "step": 1
    },
    {
      "epoch": 0.14814814814814814,
      "eval_loss": 2.293095111846924,
      "eval_runtime": 0.9049,
      "eval_samples_per_second": 99.46,
      "eval_steps_per_second": 3.315,
      "step": 1
    },
    {
      "epoch": 0.2962962962962963,
      "grad_norm": 9.928245544433594,
      "learning_rate": 2.12e-05,
      "loss": 2.3139,
      "step": 2
    },
    {
      "epoch": 0.4444444444444444,
      "grad_norm": 7.822314739227295,
      "learning_rate": 3.18e-05,
      "loss": 2.0956,
      "step": 3
    },
    {
      "epoch": 0.5925925925925926,
      "grad_norm": 12.399734497070312,
      "learning_rate": 4.24e-05,
      "loss": 1.8688,
      "step": 4
    },
    {
      "epoch": 0.7407407407407407,
      "grad_norm": 5.776300430297852,
      "learning_rate": 5.3e-05,
      "loss": 1.5517,
      "step": 5
    },
    {
      "epoch": 0.8888888888888888,
      "grad_norm": 4.726962566375732,
      "learning_rate": 6.36e-05,
      "loss": 1.2675,
      "step": 6
    },
    {
      "epoch": 1.074074074074074,
      "grad_norm": 10.351492881774902,
      "learning_rate": 7.42e-05,
      "loss": 1.8753,
      "step": 7
    },
    {
      "epoch": 1.2222222222222223,
      "grad_norm": 9.424615859985352,
      "learning_rate": 8.48e-05,
      "loss": 1.2173,
      "step": 8
    },
    {
      "epoch": 1.3703703703703702,
      "grad_norm": 2.993288993835449,
      "learning_rate": 9.54e-05,
      "loss": 1.1298,
      "step": 9
    },
    {
      "epoch": 1.5185185185185186,
      "grad_norm": 3.4643940925598145,
      "learning_rate": 0.000106,
      "loss": 0.9959,
      "step": 10
    },
    {
      "epoch": 1.6666666666666665,
      "grad_norm": 4.57342529296875,
      "learning_rate": 0.00010385312760156837,
      "loss": 1.1387,
      "step": 11
    },
    {
      "epoch": 1.8148148148148149,
      "grad_norm": 2.0306742191314697,
      "learning_rate": 9.75864372400526e-05,
      "loss": 1.0047,
      "step": 12
    },
    {
      "epoch": 1.9629629629629628,
      "grad_norm": 3.058318614959717,
      "learning_rate": 8.770761889910012e-05,
      "loss": 1.4257,
      "step": 13
    },
    {
      "epoch": 2.148148148148148,
      "grad_norm": 2.2051618099212646,
      "learning_rate": 7.501699568909999e-05,
      "loss": 0.8452,
      "step": 14
    },
    {
      "epoch": 2.2962962962962963,
      "grad_norm": 1.9626882076263428,
      "learning_rate": 6.054268642848412e-05,
      "loss": 0.7865,
      "step": 15
    },
    {
      "epoch": 2.4444444444444446,
      "grad_norm": 2.194836139678955,
      "learning_rate": 4.545731357151589e-05,
      "loss": 0.7938,
      "step": 16
    },
    {
      "epoch": 2.5925925925925926,
      "grad_norm": 1.5503541231155396,
      "learning_rate": 3.0983004310900024e-05,
      "loss": 0.7757,
      "step": 17
    },
    {
      "epoch": 2.7407407407407405,
      "grad_norm": 1.4564896821975708,
      "learning_rate": 1.8292381100899895e-05,
      "loss": 0.7369,
      "step": 18
    },
    {
      "epoch": 2.888888888888889,
      "grad_norm": 1.3275120258331299,
      "learning_rate": 8.41356275994739e-06,
      "loss": 0.7387,
      "step": 19
    },
    {
      "epoch": 3.074074074074074,
      "grad_norm": 2.068755865097046,
      "learning_rate": 2.146872398431645e-06,
      "loss": 1.0643,
      "step": 20
    },
    {
      "epoch": 3.2222222222222223,
      "grad_norm": 1.0983883142471313,
      "learning_rate": 0.0,
      "loss": 0.6089,
      "step": 21
    }
  ],
  "logging_steps": 1,
  "max_steps": 21,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 4,
  "save_steps": 50,
  "stateful_callbacks": {
    "EarlyStoppingCallback": {
      "args": {
        "early_stopping_patience": 5,
        "early_stopping_threshold": 0.0
      },
      "attributes": {
        "early_stopping_patience_counter": 0
      }
    },
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 3.58447037684908e+17,
  "train_batch_size": 8,
  "trial_name": null,
  "trial_params": null
}