Training in progress, step 6, checkpoint

Files changed (11) hide show

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dae31359390ebd44baeca72772348828cbb370f33737791b739de72f9d91a9c2
-size 2280

 version https://git-lfs.github.com/spec/v1
+oid sha256:40553dd62707e6ac72ba8ab73d4daac9e8edb315217654d6c1ef881386bf9e25
+size 85723284

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6e0d2c38231e94b9ec25168267a3390e377c7e96b8c366407e5710cbc8c44d5d
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:4015cf50661106d4c867455d8fcb77ddec867e70f0f882bba91a3ca1ad588408
 size 15984

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6a314a5d18c765e4391952313db28bb66a33f4a6331471d415f71a3501c38a6c
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:49fc59e53e240b3838dadf2af67159484079789c12a330a205e27c230ccee1c9
 size 15984

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:941015a43600f0029776dc6d9d56f1625fb007b61acaea790e2657d8592b44d5
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:11722b6394ecb3833f200e9d6276276d305ce41580dceb70733998603948c9fc
 size 15984

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6047659e7e57219e01f2d0aec27643dc145b7deb70cbf9fd16ba27f1bbe17782
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:dd64b1aeac101aad3ba6d07f9650b71a9fb5556ba4d6b15ea569bef72eb6f8a8
 size 15984

last-checkpoint/rng_state_4.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:96235b77ebcd46e5ea0dfbbaa3dd31608205a209d4a05cd49d1fadc6d43ae041
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:5d5ac429c02100c8ea139dc1bcd52228cac30b904cf2277e02640b844afb2bf8
 size 15984

last-checkpoint/rng_state_5.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f66d633de5f92f220598195851371ad9a060b558826c9694f93ce4324ed8cea5
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:fa0e4c1cc21bbb8ce749c794ec6e7606eb777967d6020afb33f90761e165cff8
 size 15984

last-checkpoint/rng_state_6.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4d90eb28075ce42b35f16f8a71090ab3f890d8eb5481fe805620601503e380ff
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:3a5c2dab0c365a1d31bee5cff9b44deecc376511a9331204b441812b880c65e5
 size 15984

last-checkpoint/rng_state_7.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:783a581c253b79a52148bf899b22b99e854f8be7682acbec120e86e3921fa257
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:a9cad6b44df5f70ced5eafddb1c84bd1a3ffb87a9e3b79a3b70e4204f0ed6e8a
 size 15984

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:219c62af0a99d0a3db4c57e4189ef3b142499e30571ad5cc800b8b67c9ce1583
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:764cf599d0b83c5d302be0978d09e435a466a8511dc1f7e86d5ee3704818d46f
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.010676156583629894,
   "eval_steps": 3,
-  "global_step": 3,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -44,6 +44,35 @@
       "eval_samples_per_second": 31.24,
       "eval_steps_per_second": 3.954,
       "step": 3
     }
   ],
   "logging_steps": 1,
@@ -63,7 +92,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.7805294805450752e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.021352313167259787,
   "eval_steps": 3,
+  "global_step": 6,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 31.24,
       "eval_steps_per_second": 3.954,
       "step": 3
+    },
+    {
+      "epoch": 0.014234875444839857,
+      "grad_norm": NaN,
+      "learning_rate": 0.0,
+      "loss": 8.7306,
+      "step": 4
+    },
+    {
+      "epoch": 0.017793594306049824,
+      "grad_norm": 14.29753589630127,
+      "learning_rate": 2e-05,
+      "loss": 9.3229,
+      "step": 5
+    },
+    {
+      "epoch": 0.021352313167259787,
+      "grad_norm": NaN,
+      "learning_rate": 2e-05,
+      "loss": 9.4419,
+      "step": 6
+    },
+    {
+      "epoch": 0.021352313167259787,
+      "eval_loss": 10.556809425354004,
+      "eval_runtime": 14.0309,
+      "eval_samples_per_second": 33.783,
+      "eval_steps_per_second": 4.276,
+      "step": 6
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 3.5610589610901504e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null