Training in progress, step 50, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +189 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:54d3f14fe31a372e578c1dc0d8908ebe3f29e4468ffab7e5594131baadafd264
 size 1521616

 version https://git-lfs.github.com/spec/v1
+oid sha256:5a7203d388313339015871afa1b820cb8b5f8d90b33e3857f2381bd17a09ebc6
 size 1521616

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:51e9fecea2b344162123f185894c05525213e9b658c0ae1978783e7281284843
 size 3108666

 version https://git-lfs.github.com/spec/v1
+oid sha256:bd70f2ec9e75f728a64dd9c57f81a2475ced66d310550e8c5ca57d4ed393cce9
 size 3108666

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1a854af56ba3062948b617f1f7e6f113b29929a2846a87db828edf4040bc3d11
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:f7c2f8d10e617966c24bef996b7c7920b826b0f1d68c5bcc380ff1095c8f48c0
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aac0812835628c7364c2e3558405a808e19eb6eaf3280bee3bdca0af8d01152d
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:6605aa12aa0bed9fba110e1a95163dffb972ffa4879bb7b965e128a888a4a8db
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:826240b1f632bf0e955149932f493c1e6a804523a356f884a9857390e7eb30d1
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:6ae6b0a50521cefc9e91faf10816cb47209ac61a908aa7eefe1a3e81a8eb18cf
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e5f0c2ba2ab01525f6de3c48f0ca5c3d0376648622e0b985e457da430b8f6efc
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:015dee1ff54880712e5f389f1e433b1ad2b05eb5f60d3f395875db4eeee243a8
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f37b2aa490ccb1598b01e14cda36e9081f7ce646deab4d3c2d03de0d2169a755
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b1df0528620c07325b8faa7567e59b0c1e86a1f1ee6af1245a69c6c0463fe4e2
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 7.168290615081787,
-  "best_model_checkpoint": "miner_id_24/checkpoint-25",
-  "epoch": 0.8620689655172413,
   "eval_steps": 25,
-  "global_step": 25,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -198,6 +198,189 @@
       "eval_samples_per_second": 453.024,
       "eval_steps_per_second": 58.08,
       "step": 25
     }
   ],
   "logging_steps": 1,
@@ -221,12 +404,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 116006898892800.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 7.077526569366455,
+  "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 1.7241379310344827,
   "eval_steps": 25,
+  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 453.024,
       "eval_steps_per_second": 58.08,
       "step": 25
+    },
+    {
+      "epoch": 0.896551724137931,
+      "grad_norm": 1.4194085597991943,
+      "learning_rate": 5e-05,
+      "loss": 6.9967,
+      "step": 26
+    },
+    {
+      "epoch": 0.9310344827586207,
+      "grad_norm": 1.587035059928894,
+      "learning_rate": 4.6729843538492847e-05,
+      "loss": 7.0725,
+      "step": 27
+    },
+    {
+      "epoch": 0.9655172413793104,
+      "grad_norm": 1.8046797513961792,
+      "learning_rate": 4.347369038899744e-05,
+      "loss": 7.0556,
+      "step": 28
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 1.3161629438400269,
+      "learning_rate": 4.0245483899193595e-05,
+      "loss": 7.5348,
+      "step": 29
+    },
+    {
+      "epoch": 1.0344827586206897,
+      "grad_norm": 1.4184800386428833,
+      "learning_rate": 3.705904774487396e-05,
+      "loss": 7.8463,
+      "step": 30
+    },
+    {
+      "epoch": 1.0689655172413792,
+      "grad_norm": 1.1865646839141846,
+      "learning_rate": 3.392802673484193e-05,
+      "loss": 7.283,
+      "step": 31
+    },
+    {
+      "epoch": 1.103448275862069,
+      "grad_norm": 1.246429443359375,
+      "learning_rate": 3.086582838174551e-05,
+      "loss": 7.1212,
+      "step": 32
+    },
+    {
+      "epoch": 1.1379310344827587,
+      "grad_norm": 1.2378802299499512,
+      "learning_rate": 2.7885565489049946e-05,
+      "loss": 6.95,
+      "step": 33
+    },
+    {
+      "epoch": 1.1724137931034484,
+      "grad_norm": 1.3604445457458496,
+      "learning_rate": 2.500000000000001e-05,
+      "loss": 6.9843,
+      "step": 34
+    },
+    {
+      "epoch": 1.206896551724138,
+      "grad_norm": 1.4330813884735107,
+      "learning_rate": 2.2221488349019903e-05,
+      "loss": 6.9673,
+      "step": 35
+    },
+    {
+      "epoch": 1.2413793103448276,
+      "grad_norm": 1.8923879861831665,
+      "learning_rate": 1.9561928549563968e-05,
+      "loss": 7.0346,
+      "step": 36
+    },
+    {
+      "epoch": 1.2758620689655173,
+      "grad_norm": 1.2486348152160645,
+      "learning_rate": 1.703270924499656e-05,
+      "loss": 7.693,
+      "step": 37
+    },
+    {
+      "epoch": 1.3103448275862069,
+      "grad_norm": 1.0940896272659302,
+      "learning_rate": 1.4644660940672627e-05,
+      "loss": 7.1865,
+      "step": 38
+    },
+    {
+      "epoch": 1.3448275862068966,
+      "grad_norm": 1.177599310874939,
+      "learning_rate": 1.2408009626051137e-05,
+      "loss": 7.1623,
+      "step": 39
+    },
+    {
+      "epoch": 1.3793103448275863,
+      "grad_norm": 1.242546796798706,
+      "learning_rate": 1.0332332985438248e-05,
+      "loss": 6.99,
+      "step": 40
+    },
+    {
+      "epoch": 1.4137931034482758,
+      "grad_norm": 1.2213631868362427,
+      "learning_rate": 8.426519384872733e-06,
+      "loss": 6.8713,
+      "step": 41
+    },
+    {
+      "epoch": 1.4482758620689655,
+      "grad_norm": 1.5992289781570435,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 6.9354,
+      "step": 42
+    },
+    {
+      "epoch": 1.4827586206896552,
+      "grad_norm": 1.8412748575210571,
+      "learning_rate": 5.156362923365588e-06,
+      "loss": 7.1334,
+      "step": 43
+    },
+    {
+      "epoch": 1.5172413793103448,
+      "grad_norm": 1.2640374898910522,
+      "learning_rate": 3.8060233744356633e-06,
+      "loss": 7.7586,
+      "step": 44
+    },
+    {
+      "epoch": 1.5517241379310345,
+      "grad_norm": 1.0526336431503296,
+      "learning_rate": 2.653493525244721e-06,
+      "loss": 7.2043,
+      "step": 45
+    },
+    {
+      "epoch": 1.5862068965517242,
+      "grad_norm": 1.1139310598373413,
+      "learning_rate": 1.70370868554659e-06,
+      "loss": 7.1491,
+      "step": 46
+    },
+    {
+      "epoch": 1.6206896551724137,
+      "grad_norm": 1.2013845443725586,
+      "learning_rate": 9.607359798384785e-07,
+      "loss": 6.9623,
+      "step": 47
+    },
+    {
+      "epoch": 1.6551724137931034,
+      "grad_norm": 1.2519060373306274,
+      "learning_rate": 4.277569313094809e-07,
+      "loss": 6.9016,
+      "step": 48
+    },
+    {
+      "epoch": 1.6896551724137931,
+      "grad_norm": 1.4968074560165405,
+      "learning_rate": 1.0705383806982606e-07,
+      "loss": 6.8076,
+      "step": 49
+    },
+    {
+      "epoch": 1.7241379310344827,
+      "grad_norm": 1.561446189880371,
+      "learning_rate": 0.0,
+      "loss": 6.843,
+      "step": 50
+    },
+    {
+      "epoch": 1.7241379310344827,
+      "eval_loss": 7.077526569366455,
+      "eval_runtime": 0.429,
+      "eval_samples_per_second": 454.519,
+      "eval_steps_per_second": 58.272,
+      "step": 50
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 232013797785600.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null