Training in progress, step 20, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +46 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:959933e1af9857170a4b99347ec3762d0d5f8de070b5a1d13603b3caf975bf48
 size 83945296

 version https://git-lfs.github.com/spec/v1
+oid sha256:4c233f1c620c09bf19f826d1fa61facea592ff868c11502b37369601e02f8356
 size 83945296

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d4c40c52a45a881eb464287fa64d73edebeb642339a644660b5f6de8173aa66d
 size 168149074

 version https://git-lfs.github.com/spec/v1
+oid sha256:d552aa8a592378110f79c37c3fed8a2f487ef00c846e2d86597ff438816332e7
 size 168149074

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:18c91c570b07019a1068294715f275674400f8131145c713b4a085e0034f5c76
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:5a3b96852ea2a689adf8f1cc86874c6e93a130324b13ae84494c715e8376b0b4
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9d42533d2ce676231a5f1b4cd529190e6e167a0f19701b5e4bdd8cf1b59bbcf1
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:5d605401690d7669ff16aeaca6820cbd8d0d605afe748c51045ce90888810a22
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.06571741511500548,
   "eval_steps": 5,
-  "global_step": 15,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -144,6 +144,49 @@
       "eval_samples_per_second": 9.406,
       "eval_steps_per_second": 4.751,
       "step": 15
     }
   ],
   "logging_steps": 1,
@@ -163,7 +206,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.109738548887552e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.08762322015334063,
   "eval_steps": 5,
+  "global_step": 20,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 9.406,
       "eval_steps_per_second": 4.751,
       "step": 15
+    },
+    {
+      "epoch": 0.0700985761226725,
+      "grad_norm": 0.1723138689994812,
+      "learning_rate": 0.0001891006524188368,
+      "loss": 0.0143,
+      "step": 16
+    },
+    {
+      "epoch": 0.07447973713033954,
+      "grad_norm": 0.11893691122531891,
+      "learning_rate": 0.00018526401643540922,
+      "loss": 0.0115,
+      "step": 17
+    },
+    {
+      "epoch": 0.07886089813800658,
+      "grad_norm": 0.23403066396713257,
+      "learning_rate": 0.00018090169943749476,
+      "loss": 0.0339,
+      "step": 18
+    },
+    {
+      "epoch": 0.0832420591456736,
+      "grad_norm": 0.4720830023288727,
+      "learning_rate": 0.0001760405965600031,
+      "loss": 0.0228,
+      "step": 19
+    },
+    {
+      "epoch": 0.08762322015334063,
+      "grad_norm": 0.11689777672290802,
+      "learning_rate": 0.00017071067811865476,
+      "loss": 0.0248,
+      "step": 20
+    },
+    {
+      "epoch": 0.08762322015334063,
+      "eval_loss": 0.017750361934304237,
+      "eval_runtime": 10.3124,
+      "eval_samples_per_second": 9.406,
+      "eval_steps_per_second": 4.752,
+      "step": 20
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 1.479651398516736e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null