Training in progress, step 58, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +95 -4

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4e4b409903b918c6df7d1cf23cdf0ef713e7f8f395ecf3d8b59468d0342b1e21
 size 90365754

 version https://git-lfs.github.com/spec/v1
+oid sha256:0b76075d4a3d012579db952baf9418d59b986f46b5b3c3860a05ab67d50904c4
 size 90365754

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1dc4928675a55c735fd8b40848204aeb479d0a3c4cb847cd8b779245fffeab2c
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:ee9fcbc8d3234618f314ef0b53e8c38e0e1f71f833659a191e403922e22a99c4
 size 14512

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8bec9259f8861eb32b9f9ceb9f92b307a160be2011844e232460a52b5467663d
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:a92f1fa8b400f34899d5037d3290d20c7c71112f27062bed29bcc30210c60a86
 size 14512

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cd1931e4016bf28b07346e79413c71e240f12f43909f7431de607b5c05407707
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:749d7b3c653370fdb2ddd4cda64192cb766369c2fa4f17aa0d667e8b51a6c233
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.7643312101910829,
   "eval_steps": 15,
-  "global_step": 45,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -354,6 +354,97 @@
       "eval_samples_per_second": 23.588,
       "eval_steps_per_second": 5.897,
       "step": 45
     }
   ],
   "logging_steps": 1,
@@ -368,12 +459,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.741900625215488e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.9851380042462845,
   "eval_steps": 15,
+  "global_step": 58,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 23.588,
       "eval_steps_per_second": 5.897,
       "step": 45
+    },
+    {
+      "epoch": 0.7813163481953291,
+      "grad_norm": NaN,
+      "learning_rate": 9.200000000000001e-05,
+      "loss": 0.0,
+      "step": 46
+    },
+    {
+      "epoch": 0.7983014861995754,
+      "grad_norm": NaN,
+      "learning_rate": 9.4e-05,
+      "loss": 0.0,
+      "step": 47
+    },
+    {
+      "epoch": 0.8152866242038217,
+      "grad_norm": NaN,
+      "learning_rate": 9.6e-05,
+      "loss": 0.0,
+      "step": 48
+    },
+    {
+      "epoch": 0.832271762208068,
+      "grad_norm": NaN,
+      "learning_rate": 9.8e-05,
+      "loss": 0.0,
+      "step": 49
+    },
+    {
+      "epoch": 0.8492569002123143,
+      "grad_norm": NaN,
+      "learning_rate": 0.0001,
+      "loss": 0.0,
+      "step": 50
+    },
+    {
+      "epoch": 0.8662420382165605,
+      "grad_norm": NaN,
+      "learning_rate": 9.619397662556435e-05,
+      "loss": 0.0,
+      "step": 51
+    },
+    {
+      "epoch": 0.8832271762208068,
+      "grad_norm": NaN,
+      "learning_rate": 8.535533905932738e-05,
+      "loss": 0.0,
+      "step": 52
+    },
+    {
+      "epoch": 0.9002123142250531,
+      "grad_norm": NaN,
+      "learning_rate": 6.91341716182545e-05,
+      "loss": 0.0,
+      "step": 53
+    },
+    {
+      "epoch": 0.9171974522292994,
+      "grad_norm": NaN,
+      "learning_rate": 5e-05,
+      "loss": 0.0,
+      "step": 54
+    },
+    {
+      "epoch": 0.9341825902335457,
+      "grad_norm": NaN,
+      "learning_rate": 3.086582838174551e-05,
+      "loss": 0.0,
+      "step": 55
+    },
+    {
+      "epoch": 0.9511677282377919,
+      "grad_norm": NaN,
+      "learning_rate": 1.4644660940672627e-05,
+      "loss": 0.0,
+      "step": 56
+    },
+    {
+      "epoch": 0.9681528662420382,
+      "grad_norm": NaN,
+      "learning_rate": 3.8060233744356633e-06,
+      "loss": 0.0,
+      "step": 57
+    },
+    {
+      "epoch": 0.9851380042462845,
+      "grad_norm": NaN,
+      "learning_rate": 0.0,
+      "loss": 0.0,
+      "step": 58
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.245116361388851e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null