Training in progress, step 279, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +2 -2
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +88 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:781893ab268a460d6c6281d7328238df1862c326fc57bc1a1cce25d4e4fca254
 size 639691872

 version https://git-lfs.github.com/spec/v1
+oid sha256:c0cf25f76e8662756856fd1a097be6ff3870d9f310686bf41f15fe00f6e14dc4
 size 639691872

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f6927a83945068a1faaa14b39736f3798ea663c353aa04ca6e7da7e5afc917be
-size 325339796

 version https://git-lfs.github.com/spec/v1
+oid sha256:742095389fc358571b77e198008ebd180a04e2384f21dd14cc648803058e1cfe
+size 325340244

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5f5d3012cf842ae96026cfb7617d36b883980d38f99422e689133b1c4367194e
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:28e1effb06033458f08c521267ddbc73b4a5a3e148e528b2cfd2ce1d0d17a805
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dd4195f21f3ab721f41e157ad5801251bc3ef7a33dc24988ba85a5ba8d1d1f0e
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:de2e7670b3561000eee216684d0727bea9800d1c3f3b2422105732155595c43d
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 2.0121703853955375,
   "eval_steps": 31,
-  "global_step": 248,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -653,6 +653,91 @@
       "eval_samples_per_second": 15.489,
       "eval_steps_per_second": 1.936,
       "step": 248
     }
   ],
   "logging_steps": 3,
@@ -672,7 +757,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.293808601155502e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 2.26369168356998,
   "eval_steps": 31,
+  "global_step": 279,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 15.489,
       "eval_steps_per_second": 1.936,
       "step": 248
+    },
+    {
+      "epoch": 2.020283975659229,
+      "grad_norm": 3.395972967147827,
+      "learning_rate": 2.537882199482665e-05,
+      "loss": 1.1378,
+      "step": 249
+    },
+    {
+      "epoch": 2.0446247464503045,
+      "grad_norm": 4.442982196807861,
+      "learning_rate": 2.4248096254497288e-05,
+      "loss": 1.253,
+      "step": 252
+    },
+    {
+      "epoch": 2.0689655172413794,
+      "grad_norm": 5.2550764083862305,
+      "learning_rate": 2.3135019582658802e-05,
+      "loss": 1.0433,
+      "step": 255
+    },
+    {
+      "epoch": 2.0933062880324544,
+      "grad_norm": 4.615274429321289,
+      "learning_rate": 2.2040354826462668e-05,
+      "loss": 1.1078,
+      "step": 258
+    },
+    {
+      "epoch": 2.1176470588235294,
+      "grad_norm": 5.723622798919678,
+      "learning_rate": 2.0964852214453013e-05,
+      "loss": 0.9585,
+      "step": 261
+    },
+    {
+      "epoch": 2.1419878296146044,
+      "grad_norm": 4.3719587326049805,
+      "learning_rate": 1.9909248842397584e-05,
+      "loss": 0.9587,
+      "step": 264
+    },
+    {
+      "epoch": 2.1663286004056794,
+      "grad_norm": 6.265243053436279,
+      "learning_rate": 1.887426816811903e-05,
+      "loss": 0.9681,
+      "step": 267
+    },
+    {
+      "epoch": 2.1906693711967544,
+      "grad_norm": 5.796363830566406,
+      "learning_rate": 1.7860619515673033e-05,
+      "loss": 1.0059,
+      "step": 270
+    },
+    {
+      "epoch": 2.2150101419878294,
+      "grad_norm": 5.817225456237793,
+      "learning_rate": 1.6868997589213136e-05,
+      "loss": 1.0253,
+      "step": 273
+    },
+    {
+      "epoch": 2.239350912778905,
+      "grad_norm": 4.450856685638428,
+      "learning_rate": 1.5900081996875083e-05,
+      "loss": 0.7533,
+      "step": 276
+    },
+    {
+      "epoch": 2.26369168356998,
+      "grad_norm": 7.340899467468262,
+      "learning_rate": 1.4954536785007456e-05,
+      "loss": 0.9246,
+      "step": 279
+    },
+    {
+      "epoch": 2.26369168356998,
+      "eval_loss": 0.5402039885520935,
+      "eval_runtime": 13.4355,
+      "eval_samples_per_second": 15.481,
+      "eval_steps_per_second": 1.935,
+      "step": 279
     }
   ],
   "logging_steps": 3,
       "attributes": {}
     }
   },
+  "total_flos": 3.706262291176489e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null