Training in progress, step 1500, checkpoint

Files changed (5) hide show

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b533ba61614af35ac91cced1f614248e8154ed0dd46572e8c699448f56030c76
 size 1423793692

 version https://git-lfs.github.com/spec/v1
+oid sha256:fafbc4b695fd8bfdea32039508732f28aad09b914411f7b745968d96db5a527d
 size 1423793692

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ee647db17a124f48c017f66c388e1fc846268a7b9f1858edd0dabf5b598223d3
 size 2847809392

 version https://git-lfs.github.com/spec/v1
+oid sha256:91ee2b5f2695e677f3f7d19bd6c7ee32347c3d70682d9581e9db11b0868ae4b3
 size 2847809392

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:98a084e546fd6d851f7063fec6f082b51f0aceba0b14196f963fddc59794a486
 size 13990

 version https://git-lfs.github.com/spec/v1
+oid sha256:6f0307c22c9eaef5bdd0acdfa0f36120192eebe56339a218faa4b8fc8466c182
 size 13990

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:00dfa93d58e416a9ddd98713a7f0497854ec0cf4ca81b8b7df525d8239e1008f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:492c71fdfddaa74e5ff08b634deba26aa598bdfa223039762d4d422cf8dc7688
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.02334967810800894,
   "eval_steps": 200,
-  "global_step": 1400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -105,6 +105,13 @@
       "learning_rate": 1.999998931473612e-05,
       "loss": 7.6066,
       "step": 1400
     }
   ],
   "logging_steps": 100,
@@ -124,7 +131,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 67825624179654.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.025017512258581006,
   "eval_steps": 200,
+  "global_step": 1500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.999998931473612e-05,
       "loss": 7.6066,
       "step": 1400
+    },
+    {
+      "epoch": 0.025017512258581006,
+      "grad_norm": 14.43734073638916,
+      "learning_rate": 1.9999987727890814e-05,
+      "loss": 7.6378,
+      "step": 1500
     }
   ],
   "logging_steps": 100,
       "attributes": {}
     }
   },
+  "total_flos": 72658998638064.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null