{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.013247665099026296,
  "eval_steps": 500,
  "global_step": 800,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.001655958137378287,
      "grad_norm": 0.19597935676574707,
      "learning_rate": 0.00019966890756553004,
      "loss": 2.1972,
      "step": 100
    },
    {
      "epoch": 0.003311916274756574,
      "grad_norm": 0.25808241963386536,
      "learning_rate": 0.00019933771577352244,
      "loss": 1.9677,
      "step": 200
    },
    {
      "epoch": 0.0049678744121348616,
      "grad_norm": 0.23811133205890656,
      "learning_rate": 0.00019900652398151486,
      "loss": 1.9341,
      "step": 300
    },
    {
      "epoch": 0.006623832549513148,
      "grad_norm": 0.26714324951171875,
      "learning_rate": 0.00019867533218950728,
      "loss": 1.915,
      "step": 400
    },
    {
      "epoch": 0.008279790686891435,
      "grad_norm": 0.23645658791065216,
      "learning_rate": 0.0001983441403974997,
      "loss": 1.8916,
      "step": 500
    },
    {
      "epoch": 0.009935748824269723,
      "grad_norm": 0.2878512740135193,
      "learning_rate": 0.00019801294860549213,
      "loss": 1.9003,
      "step": 600
    },
    {
      "epoch": 0.01159170696164801,
      "grad_norm": 0.2687942087650299,
      "learning_rate": 0.00019768175681348456,
      "loss": 1.876,
      "step": 700
    },
    {
      "epoch": 0.013247665099026296,
      "grad_norm": 0.2722982168197632,
      "learning_rate": 0.00019735056502147698,
      "loss": 1.9004,
      "step": 800
    }
  ],
  "logging_steps": 100,
  "max_steps": 60388,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 800,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 5.53158703054848e+17,
  "train_batch_size": 16,
  "trial_name": null,
  "trial_params": null
}