Training in progress, step 263, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +95 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1cce123c0683ef9cd6975eb4e7a421ac1277037ddb1f56ad868f4857ff4175af
 size 13587864

 version https://git-lfs.github.com/spec/v1
+oid sha256:f7ce00d367a44025a1b2da50a0ae92cee71ebcbcead329d510ab87a96278e546
 size 13587864

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8682728647446457022a625059062c8515250852f037491dc6e4ba299841c1d0
 size 27273018

 version https://git-lfs.github.com/spec/v1
+oid sha256:95f45bad474ddce38decca2703f28ddd5b9a0a09fe2b16720a3653880f6a0601
 size 27273018

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:050e050c2c08370bd1f9ba1c8620beabe7ff029584c29e4cd6c089022f033e90
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:046b00f812867968a1818522ac11681f858ea22743cfa05206d826f8045b9556
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0cf8a26ba280c3e7ef5570adf2f8b789f2d472d32e5416b32aa7525b9226e0be
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:cd2ead64f33343a280abdcb643df0d01950c3c0b6535ac38a3a98d6ac73c83cd
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f3fbc12a030d5fdd31b311d40ef25f1be4dcd3dcc88032d5c293f225b052d01f
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:b1aabf29ec0654883753d299afbd096dd4035a6709b53bf61f2dfd484c605863
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:39522c45970a3ae5ecbf73e1051ca4989117a8b949dcd4b7f4ee9663321048b0
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:9cc7c048ba9c68e39fb9b9445846747d668b45aba649c5a4c4593f02246b45f0
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b8f008ed445b652736016defc4807eff9b5ec48a500ab7e9db898ce35023867e
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:31fa8d31bc0f0c1d5f52cc48b24cec7fdded44317d4f4282fb7b9258ac0bfb34
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.6708096265792847,
   "best_model_checkpoint": "miner_id_24/checkpoint-250",
-  "epoch": 1.9065776930409915,
   "eval_steps": 25,
-  "global_step": 250,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1845,6 +1845,97 @@
       "eval_samples_per_second": 178.136,
       "eval_steps_per_second": 46.315,
       "step": 250
     }
   ],
   "logging_steps": 1,
@@ -1868,12 +1959,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.11070068867072e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.6708096265792847,
   "best_model_checkpoint": "miner_id_24/checkpoint-250",
+  "epoch": 2.0057197330791228,
   "eval_steps": 25,
+  "global_step": 263,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 178.136,
       "eval_steps_per_second": 46.315,
       "step": 250
+    },
+    {
+      "epoch": 1.9142040038131554,
+      "grad_norm": 0.4055742621421814,
+      "learning_rate": 3.146117115475456e-05,
+      "loss": 1.6014,
+      "step": 251
+    },
+    {
+      "epoch": 1.9218303145853195,
+      "grad_norm": 0.3974815607070923,
+      "learning_rate": 3.1228144217249694e-05,
+      "loss": 1.5712,
+      "step": 252
+    },
+    {
+      "epoch": 1.9294566253574832,
+      "grad_norm": 0.4176608920097351,
+      "learning_rate": 3.1015262829174156e-05,
+      "loss": 1.5899,
+      "step": 253
+    },
+    {
+      "epoch": 1.9370829361296473,
+      "grad_norm": 0.4495166540145874,
+      "learning_rate": 3.082255904968193e-05,
+      "loss": 1.636,
+      "step": 254
+    },
+    {
+      "epoch": 1.9447092469018112,
+      "grad_norm": 0.46961352229118347,
+      "learning_rate": 3.065006189925343e-05,
+      "loss": 1.6164,
+      "step": 255
+    },
+    {
+      "epoch": 1.9523355576739752,
+      "grad_norm": 0.536354124546051,
+      "learning_rate": 3.049779735532497e-05,
+      "loss": 1.7097,
+      "step": 256
+    },
+    {
+      "epoch": 1.9599618684461393,
+      "grad_norm": 0.41169190406799316,
+      "learning_rate": 3.036578834837682e-05,
+      "loss": 1.554,
+      "step": 257
+    },
+    {
+      "epoch": 1.967588179218303,
+      "grad_norm": 0.3979185223579407,
+      "learning_rate": 3.025405475847986e-05,
+      "loss": 1.5038,
+      "step": 258
+    },
+    {
+      "epoch": 1.9752144899904671,
+      "grad_norm": 0.3882528245449066,
+      "learning_rate": 3.0162613412301724e-05,
+      "loss": 1.5635,
+      "step": 259
+    },
+    {
+      "epoch": 1.982840800762631,
+      "grad_norm": 0.4156327247619629,
+      "learning_rate": 3.0091478080572808e-05,
+      "loss": 1.61,
+      "step": 260
+    },
+    {
+      "epoch": 1.990467111534795,
+      "grad_norm": 0.4123241603374481,
+      "learning_rate": 3.0040659476012428e-05,
+      "loss": 1.6104,
+      "step": 261
+    },
+    {
+      "epoch": 1.998093422306959,
+      "grad_norm": 0.45598259568214417,
+      "learning_rate": 3.0010165251715492e-05,
+      "loss": 1.6804,
+      "step": 262
+    },
+    {
+      "epoch": 2.0057197330791228,
+      "grad_norm": 1.43690025806427,
+      "learning_rate": 2.9999999999999997e-05,
+      "loss": 3.2788,
+      "step": 263
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.1684571244815974e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null