Training in progress, step 166, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +116 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d7a4badba070813dd4dffb29191acb613764e44d167a24151aa79e92b18167bd
 size 335604696

 version https://git-lfs.github.com/spec/v1
+oid sha256:4f2c775e4545903835c7d87abda98c1a26cbf4701f00d7baddc13d768e2697ff
 size 335604696

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6ca5c092981a6a8c077240e302146e7134995fe3fd122344acd6dd77967b9925
 size 671466706

 version https://git-lfs.github.com/spec/v1
+oid sha256:25c8963ce2ba916ef8eab829306053aacb2d91470fc8a1676d098d2665cd90e9
 size 671466706

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6134ac16e34c98d433ebbee4c57656cf7fa4d7418acdba1cf90ca8f2fc421ff1
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:bc0b191b4797e200d61c94545745999fce4bd5da0439819c3a09163bf991f35f
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:442d35fdd8f035149dd189332292077851133dce57ad65477bd773e133f2c810
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:5b2ce6896037c31b7758572f9a2fb930fa6d093ae265425c5633672a67c2eba0
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.06985995918512344,
   "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 2.229447282861124,
   "eval_steps": 25,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1113,6 +1113,118 @@
       "eval_samples_per_second": 3.984,
       "eval_steps_per_second": 3.984,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1136,12 +1248,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 4.4863203567049114e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.06985995918512344,
   "best_model_checkpoint": "miner_id_24/checkpoint-150",
+  "epoch": 2.4672549930329772,
   "eval_steps": 25,
+  "global_step": 166,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 3.984,
       "eval_steps_per_second": 3.984,
       "step": 150
+    },
+    {
+      "epoch": 2.244310264746865,
+      "grad_norm": 0.31291377544403076,
+      "learning_rate": 4.794673694696306e-06,
+      "loss": 0.0571,
+      "step": 151
+    },
+    {
+      "epoch": 2.2591732466326055,
+      "grad_norm": 0.3577059805393219,
+      "learning_rate": 4.692911722106433e-06,
+      "loss": 0.0726,
+      "step": 152
+    },
+    {
+      "epoch": 2.2740362285183466,
+      "grad_norm": 0.4630509614944458,
+      "learning_rate": 4.597991941755332e-06,
+      "loss": 0.0784,
+      "step": 153
+    },
+    {
+      "epoch": 2.2888992104040873,
+      "grad_norm": 0.4654407501220703,
+      "learning_rate": 4.50995187927262e-06,
+      "loss": 0.081,
+      "step": 154
+    },
+    {
+      "epoch": 2.3037621922898284,
+      "grad_norm": 0.532038152217865,
+      "learning_rate": 4.428826340457088e-06,
+      "loss": 0.0843,
+      "step": 155
+    },
+    {
+      "epoch": 2.318625174175569,
+      "grad_norm": 0.4133490324020386,
+      "learning_rate": 4.354647397516551e-06,
+      "loss": 0.0635,
+      "step": 156
+    },
+    {
+      "epoch": 2.3334881560613097,
+      "grad_norm": 0.49163010716438293,
+      "learning_rate": 4.287444376388429e-06,
+      "loss": 0.0773,
+      "step": 157
+    },
+    {
+      "epoch": 2.3483511379470507,
+      "grad_norm": 0.4378342628479004,
+      "learning_rate": 4.227243845146e-06,
+      "loss": 0.0575,
+      "step": 158
+    },
+    {
+      "epoch": 2.3632141198327914,
+      "grad_norm": 0.5189927220344543,
+      "learning_rate": 4.174069603494967e-06,
+      "loss": 0.0468,
+      "step": 159
+    },
+    {
+      "epoch": 2.3780771017185325,
+      "grad_norm": 0.3820410370826721,
+      "learning_rate": 4.127942673364479e-06,
+      "loss": 0.0488,
+      "step": 160
+    },
+    {
+      "epoch": 2.392940083604273,
+      "grad_norm": 0.3878793716430664,
+      "learning_rate": 4.088881290596307e-06,
+      "loss": 0.0475,
+      "step": 161
+    },
+    {
+      "epoch": 2.4078030654900138,
+      "grad_norm": 0.34497255086898804,
+      "learning_rate": 4.0569008977354756e-06,
+      "loss": 0.0365,
+      "step": 162
+    },
+    {
+      "epoch": 2.422666047375755,
+      "grad_norm": 0.32925593852996826,
+      "learning_rate": 4.032014137925207e-06,
+      "loss": 0.0354,
+      "step": 163
+    },
+    {
+      "epoch": 2.4375290292614955,
+      "grad_norm": 0.37827885150909424,
+      "learning_rate": 4.014230849908567e-06,
+      "loss": 0.0354,
+      "step": 164
+    },
+    {
+      "epoch": 2.4523920111472366,
+      "grad_norm": 0.31829050183296204,
+      "learning_rate": 4.003558064138821e-06,
+      "loss": 0.033,
+      "step": 165
+    },
+    {
+      "epoch": 2.4672549930329772,
+      "grad_norm": 0.4024122655391693,
+      "learning_rate": 4.000000000000001e-06,
+      "loss": 0.0324,
+      "step": 166
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 4.965632072220672e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null