Training in progress, step 110, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +74 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:43c3946a5a89d36dfa1fc36228b54d7c970649172043020b6e528b995b6b00fe
 size 45118424

 version https://git-lfs.github.com/spec/v1
+oid sha256:816a4ec8d4f9ef73c31dbbf25e73cf8ae6af0fc27b317aa556ff26eaf6178d1e
 size 45118424

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b5301d5597f3da64893b3d9e2e609104f4243ffff9c9302a9bdd9c47057329e6
 size 90365754

 version https://git-lfs.github.com/spec/v1
+oid sha256:13306704d6539b57927415351d65adc41718ea580cb04f72f858b1d721b8d475
 size 90365754

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:819e8f8d7c618e04879ebe16dcdcf92dc0610755b13a10ec192932587c41e3d1
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:3c5ce7adad1aada30f43bdfff0900a492d19806b846335afffebce67d207df97
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:df88ad9d29a5b994fc668c3ab662b1d4e6baa321c3f5068caf8ff1c21c6e351d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:eda1b3688163acb4c0de1a0c7c611576b5a46451ca11dac78f3f571adee24be0
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.004337400663644075,
   "best_model_checkpoint": "miner_id_24/checkpoint-100",
-  "epoch": 1.8310502283105023,
   "eval_steps": 25,
-  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -747,6 +747,76 @@
       "eval_samples_per_second": 14.334,
       "eval_steps_per_second": 2.007,
       "step": 100
     }
   ],
   "logging_steps": 1,
@@ -770,12 +840,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.637346415365325e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.004337400663644075,
   "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 2.018264840182648,
   "eval_steps": 25,
+  "global_step": 110,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 14.334,
       "eval_steps_per_second": 2.007,
       "step": 100
+    },
+    {
+      "epoch": 1.8493150684931505,
+      "grad_norm": 0.026043424382805824,
+      "learning_rate": 5.95594714845854e-06,
+      "loss": 0.0015,
+      "step": 101
+    },
+    {
+      "epoch": 1.8675799086757991,
+      "grad_norm": 0.05837749317288399,
+      "learning_rate": 4.712525830705338e-06,
+      "loss": 0.0033,
+      "step": 102
+    },
+    {
+      "epoch": 1.8858447488584473,
+      "grad_norm": 1.0614242553710938,
+      "learning_rate": 3.6124857091878845e-06,
+      "loss": 0.0117,
+      "step": 103
+    },
+    {
+      "epoch": 1.904109589041096,
+      "grad_norm": 0.020967742428183556,
+      "learning_rate": 2.656912390696708e-06,
+      "loss": 0.001,
+      "step": 104
+    },
+    {
+      "epoch": 1.9223744292237441,
+      "grad_norm": 0.08478910475969315,
+      "learning_rate": 1.8467489107293509e-06,
+      "loss": 0.0009,
+      "step": 105
+    },
+    {
+      "epoch": 1.9406392694063928,
+      "grad_norm": 0.028337517753243446,
+      "learning_rate": 1.1827948028283352e-06,
+      "loss": 0.0007,
+      "step": 106
+    },
+    {
+      "epoch": 1.958904109589041,
+      "grad_norm": 1.1033185720443726,
+      "learning_rate": 6.657053095380005e-07,
+      "loss": 0.0129,
+      "step": 107
+    },
+    {
+      "epoch": 1.9771689497716896,
+      "grad_norm": 0.06699586659669876,
+      "learning_rate": 2.959907357592661e-07,
+      "loss": 0.0026,
+      "step": 108
+    },
+    {
+      "epoch": 1.9954337899543377,
+      "grad_norm": 0.02767285704612732,
+      "learning_rate": 7.401594514025999e-08,
+      "loss": 0.0019,
+      "step": 109
+    },
+    {
+      "epoch": 2.018264840182648,
+      "grad_norm": 0.1497020125389099,
+      "learning_rate": 0.0,
+      "loss": 0.0052,
+      "step": 110
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.878974644453376e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null