Training in progress, step 9800, checkpoint

Files changed (5) hide show

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0e10b04549b2c2e70165ffc1d8ae22026b88e479d86fb505c085edccdd341dc3
 size 1423793692

 version https://git-lfs.github.com/spec/v1
+oid sha256:b9d3097201c581d2fc1e0ac63741fdb2046404d91572a71ec0a2d1df566229b9
 size 1423793692

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:87c564637674bc4ef708b32c27681f7d62f778d13e969fff2a1586db5695deba
 size 2847809392

 version https://git-lfs.github.com/spec/v1
+oid sha256:95ea64d249ea2c1d4b60471ad44dd1911880e972b512c7a1a2c13af619a6f036
 size 2847809392

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:769069b70f19d1c4a5c8a816b7666b1d3c5714eb9ad7485b3c720d03b9793953
 size 13990

 version https://git-lfs.github.com/spec/v1
+oid sha256:8b67ac0dc56e87afd828d12fa63e2974563d42a9d56163f968e03e8ed24c8b3d
 size 13990

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ad03c43584b30749232c412b92b2efd3cecbc7fe0311c3b1cbddcf5c0550b7e3
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:e4063efcf27ef2e21873156771b92a5297bf15faaf7ac9fcdd7d3704dc05c483
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.16177991260549052,
   "eval_steps": 200,
-  "global_step": 9700,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -686,6 +686,13 @@
       "learning_rate": 1.9999483906005676e-05,
       "loss": 7.1899,
       "step": 9700
     }
   ],
   "logging_steps": 100,
@@ -705,7 +712,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 483151756878756.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.16344774675606258,
   "eval_steps": 200,
+  "global_step": 9800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.9999483906005676e-05,
       "loss": 7.1899,
       "step": 9700
+    },
+    {
+      "epoch": 0.16344774675606258,
+      "grad_norm": 34.184627532958984,
+      "learning_rate": 1.9999473204590722e-05,
+      "loss": 7.0871,
+      "step": 9800
     }
   ],
   "logging_steps": 100,
       "attributes": {}
     }
   },
+  "total_flos": 487830390535296.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null