Training in progress, step 361, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +81 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:097d4fdc64d9ee31a96db161db9847ba9166f14e21a958dcd74c46699dc78bed
 size 156926880

 version https://git-lfs.github.com/spec/v1
+oid sha256:e67a979db55d6147b4bb468cc931d0dc8300c8fa1ab3c13b8c292d544c3bd1e3
 size 156926880

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:599ab671ddb516283dd476bb8dab0a2d13c74dd97a085f6c7370357fb354d6db
 size 79968964

 version https://git-lfs.github.com/spec/v1
+oid sha256:9d339f91f68c41787265c459e58cdd100e6584ccd25d3117f3c5009d953ffe0e
 size 79968964

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:da88a4b66e5e83e157425af0894871d0d630078858be2e7385a95662ae93763b
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:7f80ea97359604fbd8013d8795dac270f4675ca2d0b7dc0a23dab94f1eb1a2d7
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c6e032ea5899f5eb25699f91880bee459f3b4e6bc3c26d1b136de1a7fd506249
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b857be7b95ff8324c4727de3c0f481a268cea8c6e2533b10d776846f18e23993
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.32943063974380493,
   "best_model_checkpoint": "miner_id_24/checkpoint-350",
-  "epoch": 2.908713692946058,
   "eval_steps": 50,
-  "global_step": 350,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2521,6 +2521,83 @@
       "eval_samples_per_second": 28.433,
       "eval_steps_per_second": 7.143,
       "step": 350
     }
   ],
   "logging_steps": 1,
@@ -2544,12 +2621,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.390780807643136e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.32943063974380493,
   "best_model_checkpoint": "miner_id_24/checkpoint-350",
+  "epoch": 3.0020746887966805,
   "eval_steps": 50,
+  "global_step": 361,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 28.433,
       "eval_steps_per_second": 7.143,
       "step": 350
+    },
+    {
+      "epoch": 2.91701244813278,
+      "grad_norm": 1.8393205404281616,
+      "learning_rate": 2.0014077392525031e-07,
+      "loss": 0.0879,
+      "step": 351
+    },
+    {
+      "epoch": 2.9253112033195023,
+      "grad_norm": 1.0853371620178223,
+      "learning_rate": 1.6213459328950352e-07,
+      "loss": 0.0175,
+      "step": 352
+    },
+    {
+      "epoch": 2.9336099585062243,
+      "grad_norm": 1.8038127422332764,
+      "learning_rate": 1.281208861894201e-07,
+      "loss": 0.0201,
+      "step": 353
+    },
+    {
+      "epoch": 2.9419087136929463,
+      "grad_norm": 0.9513048529624939,
+      "learning_rate": 9.810237743724803e-08,
+      "loss": 0.0115,
+      "step": 354
+    },
+    {
+      "epoch": 2.9502074688796682,
+      "grad_norm": 0.7663481831550598,
+      "learning_rate": 7.208147179291192e-08,
+      "loss": 0.0143,
+      "step": 355
+    },
+    {
+      "epoch": 2.95850622406639,
+      "grad_norm": 1.1975268125534058,
+      "learning_rate": 5.006025377138901e-08,
+      "loss": 0.0185,
+      "step": 356
+    },
+    {
+      "epoch": 2.966804979253112,
+      "grad_norm": 1.4600636959075928,
+      "learning_rate": 3.2040487475731854e-08,
+      "loss": 0.0263,
+      "step": 357
+    },
+    {
+      "epoch": 2.975103734439834,
+      "grad_norm": 1.718473196029663,
+      "learning_rate": 1.802361645573125e-08,
+      "loss": 0.0435,
+      "step": 358
+    },
+    {
+      "epoch": 2.983402489626556,
+      "grad_norm": 1.1524897813796997,
+      "learning_rate": 8.010763592264381e-09,
+      "loss": 0.024,
+      "step": 359
+    },
+    {
+      "epoch": 2.991701244813278,
+      "grad_norm": 5.361166954040527,
+      "learning_rate": 2.0027310073833518e-09,
+      "loss": 0.0739,
+      "step": 360
+    },
+    {
+      "epoch": 3.0020746887966805,
+      "grad_norm": 4.823180675506592,
+      "learning_rate": 0.0,
+      "loss": 0.1414,
+      "step": 361
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.4344910615976346e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null