Training in progress, step 60, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +46 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:36219f4df01f6540de43e20af601c95b792f0e8b395a1faec9012b7942e4bc37
 size 1001465824

 version https://git-lfs.github.com/spec/v1
+oid sha256:184231790e9a47327b2753c0830bb738e8fe8d9415817371828e63a694b4ed1b
 size 1001465824

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:585f9ab65b3cbf0614231a35f714d5862e5c46100460503306f45bca7fc5a217
 size 509176980

 version https://git-lfs.github.com/spec/v1
+oid sha256:14a76d922d59bc1b0020e776875374796ad6902a69089b31610eda3c5aa070d9
 size 509176980

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aa8a3d63161f76c741ed27e5548e0954b55bada46f17ff65da09dc9004692f5a
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:3d6040f13b5818505a6ab000dd3b2f9edff910b36a9f5a3a034da7779eb7d8ce
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:78a060285bfc64e2914a2dfa3484550144c9851d788510de86ca8a657edce764
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:1e31ea59fbcacae53c010c1baf2222b21437849ba2abcafeea6c68dd11ef083b
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.7792207792207793,
   "eval_steps": 15,
-  "global_step": 45,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -144,6 +144,49 @@
       "eval_samples_per_second": 4.436,
       "eval_steps_per_second": 0.595,
       "step": 45
     }
   ],
   "logging_steps": 3,
@@ -163,7 +206,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.318428010446848e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0432900432900434,
   "eval_steps": 15,
+  "global_step": 60,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 4.436,
       "eval_steps_per_second": 0.595,
       "step": 45
+    },
+    {
+      "epoch": 0.8311688311688312,
+      "grad_norm": 0.7336857318878174,
+      "learning_rate": 8.717872783521047e-05,
+      "loss": 2.4763,
+      "step": 48
+    },
+    {
+      "epoch": 0.8831168831168831,
+      "grad_norm": 0.6515845060348511,
+      "learning_rate": 8.518457335743926e-05,
+      "loss": 2.5329,
+      "step": 51
+    },
+    {
+      "epoch": 0.935064935064935,
+      "grad_norm": 0.7598447203636169,
+      "learning_rate": 8.307282131280804e-05,
+      "loss": 2.6017,
+      "step": 54
+    },
+    {
+      "epoch": 0.987012987012987,
+      "grad_norm": 0.6881215572357178,
+      "learning_rate": 8.085052982021847e-05,
+      "loss": 2.5885,
+      "step": 57
+    },
+    {
+      "epoch": 1.0432900432900434,
+      "grad_norm": 0.6885347962379456,
+      "learning_rate": 7.85251264550948e-05,
+      "loss": 2.3153,
+      "step": 60
+    },
+    {
+      "epoch": 1.0432900432900434,
+      "eval_loss": 0.6708703637123108,
+      "eval_runtime": 21.8845,
+      "eval_samples_per_second": 4.432,
+      "eval_steps_per_second": 0.594,
+      "step": 60
     }
   ],
   "logging_steps": 3,
       "attributes": {}
     }
   },
+  "total_flos": 3.091237347262464e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null