Training in progress, step 1000, checkpoint

Files changed (4) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8dbcd6e3a8591bfaed32d8abcf25a9918694b23aa2ff2e871d72f30f3cfd28de
 size 4785762744

 version https://git-lfs.github.com/spec/v1
+oid sha256:99c1e8afc5d6e724647aa6282eea420a5674252560cfe3ca227d9079c5ab3503
 size 4785762744

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:262612171054ff07dc873ba588a92aaaca5297f604de6fbcc2b57a0efda9c97b
 size 3497859804

 version https://git-lfs.github.com/spec/v1
+oid sha256:925357869e6ba85e6c835b49837b1223de96991a316b48a2f12c8541300e39be
 size 3497859804

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4c065ee50c4698f410f7f4a607a3fd9ca80c14452c25d8cb68381d6bcdc56f10
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:8062f178cdbb1c5d808c1d4d26e9f0f29c4b409a65e50547129df87250075971
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.2752546105147261,
   "eval_steps": 500,
-  "global_step": 500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -42,6 +42,41 @@
       "learning_rate": 9.09422799766929e-05,
       "loss": 1.5134,
       "step": 500
     }
   ],
   "logging_steps": 100,
@@ -61,7 +96,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.650241834511237e+17,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.5505092210294522,
   "eval_steps": 500,
+  "global_step": 1000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.09422799766929e-05,
       "loss": 1.5134,
       "step": 500
+    },
+    {
+      "epoch": 0.33030553261767137,
+      "grad_norm": 2.3742384910583496,
+      "learning_rate": 8.47037097610317e-05,
+      "loss": 1.4548,
+      "step": 600
+    },
+    {
+      "epoch": 0.3853564547206166,
+      "grad_norm": 2.96374773979187,
+      "learning_rate": 7.718624920225358e-05,
+      "loss": 1.4309,
+      "step": 700
+    },
+    {
+      "epoch": 0.4404073768235618,
+      "grad_norm": 2.140960693359375,
+      "learning_rate": 6.866692942410824e-05,
+      "loss": 1.4181,
+      "step": 800
+    },
+    {
+      "epoch": 0.495458298926507,
+      "grad_norm": 1.9198336601257324,
+      "learning_rate": 5.9459701755408125e-05,
+      "loss": 1.3413,
+      "step": 900
+    },
+    {
+      "epoch": 0.5505092210294522,
+      "grad_norm": 1.8933072090148926,
+      "learning_rate": 4.9903868093081854e-05,
+      "loss": 1.3367,
+      "step": 1000
     }
   ],
   "logging_steps": 100,
       "attributes": {}
     }
   },
+  "total_flos": 5.293567140023501e+17,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null