{
  "best_metric": 0.6890727877616882,
  "best_model_checkpoint": "miner_id_24/checkpoint-25",
  "epoch": 0.17293558149589278,
  "eval_steps": 25,
  "global_step": 25,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.006917423259835711,
      "grad_norm": 790.2554321289062,
      "learning_rate": 5e-05,
      "loss": 134.0684,
      "step": 1
    },
    {
      "epoch": 0.006917423259835711,
      "eval_loss": 8.13693618774414,
      "eval_runtime": 4.67,
      "eval_samples_per_second": 10.707,
      "eval_steps_per_second": 2.784,
      "step": 1
    },
    {
      "epoch": 0.013834846519671422,
      "grad_norm": 746.5008544921875,
      "learning_rate": 0.0001,
      "loss": 132.2386,
      "step": 2
    },
    {
      "epoch": 0.020752269779507133,
      "grad_norm": 526.3362426757812,
      "learning_rate": 9.990365154573717e-05,
      "loss": 91.3663,
      "step": 3
    },
    {
      "epoch": 0.027669693039342844,
      "grad_norm": 530.220703125,
      "learning_rate": 9.961501876182148e-05,
      "loss": 39.3475,
      "step": 4
    },
    {
      "epoch": 0.03458711629917856,
      "grad_norm": 166.1348114013672,
      "learning_rate": 9.913533761814537e-05,
      "loss": 16.5734,
      "step": 5
    },
    {
      "epoch": 0.041504539559014265,
      "grad_norm": 172.0724334716797,
      "learning_rate": 9.846666218300807e-05,
      "loss": 14.6946,
      "step": 6
    },
    {
      "epoch": 0.04842196281884998,
      "grad_norm": 139.84767150878906,
      "learning_rate": 9.761185582727977e-05,
      "loss": 12.4791,
      "step": 7
    },
    {
      "epoch": 0.05533938607868569,
      "grad_norm": 54.628971099853516,
      "learning_rate": 9.657457896300791e-05,
      "loss": 11.4158,
      "step": 8
    },
    {
      "epoch": 0.0622568093385214,
      "grad_norm": 84.83878326416016,
      "learning_rate": 9.535927336897098e-05,
      "loss": 11.5588,
      "step": 9
    },
    {
      "epoch": 0.06917423259835712,
      "grad_norm": 54.20598220825195,
      "learning_rate": 9.397114317029975e-05,
      "loss": 11.2957,
      "step": 10
    },
    {
      "epoch": 0.07609165585819282,
      "grad_norm": 142.3564453125,
      "learning_rate": 9.241613255361455e-05,
      "loss": 12.3199,
      "step": 11
    },
    {
      "epoch": 0.08300907911802853,
      "grad_norm": 24.020206451416016,
      "learning_rate": 9.070090031310558e-05,
      "loss": 11.0949,
      "step": 12
    },
    {
      "epoch": 0.08992650237786425,
      "grad_norm": 24.632556915283203,
      "learning_rate": 8.883279133655399e-05,
      "loss": 10.9799,
      "step": 13
    },
    {
      "epoch": 0.09684392563769996,
      "grad_norm": 65.8178482055664,
      "learning_rate": 8.681980515339464e-05,
      "loss": 11.6228,
      "step": 14
    },
    {
      "epoch": 0.10376134889753567,
      "grad_norm": 38.240047454833984,
      "learning_rate": 8.467056167950311e-05,
      "loss": 11.3225,
      "step": 15
    },
    {
      "epoch": 0.11067877215737137,
      "grad_norm": 17.695785522460938,
      "learning_rate": 8.239426430539243e-05,
      "loss": 10.995,
      "step": 16
    },
    {
      "epoch": 0.11759619541720709,
      "grad_norm": 14.087479591369629,
      "learning_rate": 8.000066048588211e-05,
      "loss": 10.9745,
      "step": 17
    },
    {
      "epoch": 0.1245136186770428,
      "grad_norm": 60.94573974609375,
      "learning_rate": 7.75e-05,
      "loss": 11.3823,
      "step": 18
    },
    {
      "epoch": 0.13143104193687852,
      "grad_norm": 51.727134704589844,
      "learning_rate": 7.490299105985507e-05,
      "loss": 11.4921,
      "step": 19
    },
    {
      "epoch": 0.13834846519671423,
      "grad_norm": 45.008323669433594,
      "learning_rate": 7.222075445642904e-05,
      "loss": 11.578,
      "step": 20
    },
    {
      "epoch": 0.14526588845654995,
      "grad_norm": 31.297468185424805,
      "learning_rate": 6.946477593864228e-05,
      "loss": 11.1494,
      "step": 21
    },
    {
      "epoch": 0.15218331171638563,
      "grad_norm": 39.989479064941406,
      "learning_rate": 6.664685702961344e-05,
      "loss": 11.2147,
      "step": 22
    },
    {
      "epoch": 0.15910073497622135,
      "grad_norm": 27.28616714477539,
      "learning_rate": 6.377906449072578e-05,
      "loss": 11.2717,
      "step": 23
    },
    {
      "epoch": 0.16601815823605706,
      "grad_norm": 34.64227294921875,
      "learning_rate": 6.087367864990233e-05,
      "loss": 11.2599,
      "step": 24
    },
    {
      "epoch": 0.17293558149589278,
      "grad_norm": 14.136774063110352,
      "learning_rate": 5.794314081535644e-05,
      "loss": 11.0432,
      "step": 25
    },
    {
      "epoch": 0.17293558149589278,
      "eval_loss": 0.6890727877616882,
      "eval_runtime": 4.7098,
      "eval_samples_per_second": 10.616,
      "eval_steps_per_second": 2.76,
      "step": 25
    }
  ],
  "logging_steps": 1,
  "max_steps": 50,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 25,
  "stateful_callbacks": {
    "EarlyStoppingCallback": {
      "args": {
        "early_stopping_patience": 1,
        "early_stopping_threshold": 0.0
      },
      "attributes": {
        "early_stopping_patience_counter": 0
      }
    },
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 2.829010669142016e+17,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}