{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 2.2081148219707423,
  "eval_steps": 500,
  "global_step": 4000,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.05520287054926856,
      "grad_norm": 1.2856197357177734,
      "learning_rate": 1.1037527593818985e-05,
      "loss": 0.7609,
      "step": 100
    },
    {
      "epoch": 0.11040574109853712,
      "grad_norm": 1.0077331066131592,
      "learning_rate": 2.207505518763797e-05,
      "loss": 0.5552,
      "step": 200
    },
    {
      "epoch": 0.1656086116478057,
      "grad_norm": 1.2774763107299805,
      "learning_rate": 3.311258278145696e-05,
      "loss": 0.5229,
      "step": 300
    },
    {
      "epoch": 0.22081148219707425,
      "grad_norm": 1.3611174821853638,
      "learning_rate": 4.415011037527594e-05,
      "loss": 0.5191,
      "step": 400
    },
    {
      "epoch": 0.2760143527463428,
      "grad_norm": 1.112617015838623,
      "learning_rate": 5.518763796909493e-05,
      "loss": 0.5068,
      "step": 500
    },
    {
      "epoch": 0.3312172232956114,
      "grad_norm": 1.1730616092681885,
      "learning_rate": 6.622516556291392e-05,
      "loss": 0.4895,
      "step": 600
    },
    {
      "epoch": 0.3864200938448799,
      "grad_norm": 1.1149307489395142,
      "learning_rate": 7.726269315673289e-05,
      "loss": 0.5044,
      "step": 700
    },
    {
      "epoch": 0.4416229643941485,
      "grad_norm": 1.1326206922531128,
      "learning_rate": 8.830022075055188e-05,
      "loss": 0.5081,
      "step": 800
    },
    {
      "epoch": 0.49682583494341703,
      "grad_norm": 1.1592661142349243,
      "learning_rate": 9.933774834437086e-05,
      "loss": 0.5024,
      "step": 900
    },
    {
      "epoch": 0.5520287054926856,
      "grad_norm": 1.4300315380096436,
      "learning_rate": 9.996717238759354e-05,
      "loss": 0.5078,
      "step": 1000
    },
    {
      "epoch": 0.6072315760419542,
      "grad_norm": 1.3841311931610107,
      "learning_rate": 9.986022415440564e-05,
      "loss": 0.5091,
      "step": 1100
    },
    {
      "epoch": 0.6624344465912227,
      "grad_norm": 1.3825188875198364,
      "learning_rate": 9.967918047007774e-05,
      "loss": 0.4915,
      "step": 1200
    },
    {
      "epoch": 0.7176373171404913,
      "grad_norm": 1.248660683631897,
      "learning_rate": 9.942431037699172e-05,
      "loss": 0.5049,
      "step": 1300
    },
    {
      "epoch": 0.7728401876897598,
      "grad_norm": 1.2805066108703613,
      "learning_rate": 9.909599262824883e-05,
      "loss": 0.4787,
      "step": 1400
    },
    {
      "epoch": 0.8280430582390285,
      "grad_norm": 1.5455862283706665,
      "learning_rate": 9.869471512481871e-05,
      "loss": 0.4844,
      "step": 1500
    },
    {
      "epoch": 0.883245928788297,
      "grad_norm": 1.647222876548767,
      "learning_rate": 9.822107419048758e-05,
      "loss": 0.4732,
      "step": 1600
    },
    {
      "epoch": 0.9384487993375655,
      "grad_norm": 1.128386378288269,
      "learning_rate": 9.76757736856833e-05,
      "loss": 0.4515,
      "step": 1700
    },
    {
      "epoch": 0.9936516698868341,
      "grad_norm": 1.1730600595474243,
      "learning_rate": 9.705962396149427e-05,
      "loss": 0.4496,
      "step": 1800
    },
    {
      "epoch": 1.0488545404361027,
      "grad_norm": 1.2323981523513794,
      "learning_rate": 9.637354065543631e-05,
      "loss": 0.3201,
      "step": 1900
    },
    {
      "epoch": 1.1040574109853711,
      "grad_norm": 1.3514504432678223,
      "learning_rate": 9.561854333075736e-05,
      "loss": 0.3029,
      "step": 2000
    },
    {
      "epoch": 1.1592602815346398,
      "grad_norm": 1.305432915687561,
      "learning_rate": 9.479575396130191e-05,
      "loss": 0.2974,
      "step": 2100
    },
    {
      "epoch": 1.2144631520839084,
      "grad_norm": 1.2165225744247437,
      "learning_rate": 9.390639526418681e-05,
      "loss": 0.2999,
      "step": 2200
    },
    {
      "epoch": 1.2696660226331768,
      "grad_norm": 1.5560479164123535,
      "learning_rate": 9.295178888276614e-05,
      "loss": 0.2957,
      "step": 2300
    },
    {
      "epoch": 1.3248688931824455,
      "grad_norm": 1.236303448677063,
      "learning_rate": 9.193335342258558e-05,
      "loss": 0.3,
      "step": 2400
    },
    {
      "epoch": 1.380071763731714,
      "grad_norm": 1.3867113590240479,
      "learning_rate": 9.08526023432446e-05,
      "loss": 0.3113,
      "step": 2500
    },
    {
      "epoch": 1.4352746342809826,
      "grad_norm": 1.3542137145996094,
      "learning_rate": 8.971114170929969e-05,
      "loss": 0.2965,
      "step": 2600
    },
    {
      "epoch": 1.4904775048302512,
      "grad_norm": 1.0694419145584106,
      "learning_rate": 8.851066780355073e-05,
      "loss": 0.2907,
      "step": 2700
    },
    {
      "epoch": 1.5456803753795199,
      "grad_norm": 1.2589095830917358,
      "learning_rate": 8.72529646062573e-05,
      "loss": 0.2881,
      "step": 2800
    },
    {
      "epoch": 1.6008832459287883,
      "grad_norm": 0.9907445311546326,
      "learning_rate": 8.593990114403092e-05,
      "loss": 0.2833,
      "step": 2900
    },
    {
      "epoch": 1.6560861164780567,
      "grad_norm": 1.1165964603424072,
      "learning_rate": 8.45734287123433e-05,
      "loss": 0.279,
      "step": 3000
    },
    {
      "epoch": 1.7112889870273253,
      "grad_norm": 1.4846047163009644,
      "learning_rate": 8.315557797577755e-05,
      "loss": 0.2788,
      "step": 3100
    },
    {
      "epoch": 1.766491857576594,
      "grad_norm": 1.2695362567901611,
      "learning_rate": 8.168845595033201e-05,
      "loss": 0.2764,
      "step": 3200
    },
    {
      "epoch": 1.8216947281258626,
      "grad_norm": 1.3499970436096191,
      "learning_rate": 8.017424287226106e-05,
      "loss": 0.2576,
      "step": 3300
    },
    {
      "epoch": 1.876897598675131,
      "grad_norm": 1.3490862846374512,
      "learning_rate": 7.861518895810596e-05,
      "loss": 0.2529,
      "step": 3400
    },
    {
      "epoch": 1.9321004692243997,
      "grad_norm": 1.3637194633483887,
      "learning_rate": 7.701361106073043e-05,
      "loss": 0.247,
      "step": 3500
    },
    {
      "epoch": 1.9873033397736681,
      "grad_norm": 1.1327725648880005,
      "learning_rate": 7.537188922633076e-05,
      "loss": 0.2376,
      "step": 3600
    },
    {
      "epoch": 2.0425062103229368,
      "grad_norm": 0.8765416741371155,
      "learning_rate": 7.369246315753623e-05,
      "loss": 0.1724,
      "step": 3700
    },
    {
      "epoch": 2.0977090808722054,
      "grad_norm": 0.8936216831207275,
      "learning_rate": 7.19778285878565e-05,
      "loss": 0.1515,
      "step": 3800
    },
    {
      "epoch": 2.152911951421474,
      "grad_norm": 0.7982431650161743,
      "learning_rate": 7.023053357286366e-05,
      "loss": 0.1542,
      "step": 3900
    },
    {
      "epoch": 2.2081148219707423,
      "grad_norm": 0.7692477107048035,
      "learning_rate": 6.845317470362014e-05,
      "loss": 0.154,
      "step": 4000
    }
  ],
  "logging_steps": 100,
  "max_steps": 9055,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 5,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 2.1211958631160873e+18,
  "train_batch_size": 16,
  "trial_name": null,
  "trial_params": null
}