Training in progress, step 49151, checkpoint

Files changed (5) hide show

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:40ecfe3a0c12439a9f90edc2fc9f1e2a668d7c1b656b38af5a2a2d0cfa333c8c
 size 355970836

 version https://git-lfs.github.com/spec/v1
+oid sha256:97f2edc2a659fa0ab865d96aba1342985ceee6f8b9e7e2e0bc2734e4fa8199de
 size 355970836

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1e5e79d9e80c18e1c2060a8874e824cf002565f3eb04b11dd46e4e8a117cb0bb
 size 712036922

 version https://git-lfs.github.com/spec/v1
+oid sha256:f01610765b026e4073dc792fa519799ddfee84d97839ac7114420dc7fad095dc
 size 712036922

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7bfa3cbc2a58dc585521846b280140b11ba732e023276e6bebe874d968d3681d
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:906573e220485578a572456e29e3b97d5ff5468e5305bea1db395e9e4e84a615
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:25b31a876da34080aeaad6646395178da9de75ac374aa229009a32b050d1ae89
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:e6efd166389749e81c893bfe8d8ade06bfd5d181d3c7a713ed9d971329711d93
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.9989477733166757,
   "eval_steps": 500,
-  "global_step": 49100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -34377,6 +34377,41 @@
       "learning_rate": 6.384977135377723e-11,
       "loss": 26.9088,
       "step": 49100
     }
   ],
   "logging_steps": 10,
@@ -34391,7 +34426,7 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.9999853769101411,
   "eval_steps": 500,
+  "global_step": 49151,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 6.384977135377723e-11,
       "loss": 26.9088,
       "step": 49100
+    },
+    {
+      "epoch": 0.999151225001669,
+      "grad_norm": 103247.6640625,
+      "learning_rate": 5.505419123797567e-11,
+      "loss": 25.8345,
+      "step": 49110
+    },
+    {
+      "epoch": 0.9993546766866621,
+      "grad_norm": 92363.125,
+      "learning_rate": 4.6910120859305904e-11,
+      "loss": 27.5682,
+      "step": 49120
+    },
+    {
+      "epoch": 0.9995581283716554,
+      "grad_norm": 43590.00390625,
+      "learning_rate": 3.941756361400956e-11,
+      "loss": 23.7671,
+      "step": 49130
+    },
+    {
+      "epoch": 0.9997615800566486,
+      "grad_norm": 84118.6953125,
+      "learning_rate": 3.25765226263236e-11,
+      "loss": 24.8344,
+      "step": 49140
+    },
+    {
+      "epoch": 0.9999650317416418,
+      "grad_norm": 88703.15625,
+      "learning_rate": 2.6387000748480383e-11,
+      "loss": 24.6881,
+      "step": 49150
     }
   ],
   "logging_steps": 10,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }