Training in progress, step 50, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +188 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8520090386099f2b5ca0fe4189cf75ebcca2e7500a36121e96eaea565df1465f
 size 70430032

 version https://git-lfs.github.com/spec/v1
+oid sha256:3073c47e3bbfce7ebabf34d77a0bbcfb4a7e6bd7ef33bc89d1badeb8d523f6d3
 size 70430032

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5f6775ab34f9b1b674f372e6121721c1e1f0ccc394a8a1ef02d1d9d5feb00813
 size 141053442

 version https://git-lfs.github.com/spec/v1
+oid sha256:a93e05334bda20a81ef513964ed4c891c2f9bc3a6e5f92a7ca2adb8143c2fa64
 size 141053442

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:58c1b7815ef970b931bc812dfacaee4900dfaccd350370a4b8f06f9c99394132
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:fc10f2ceaad75e87b391f885f4793bbd6b334a5bf9a82b5b61d33fc3068afd88
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d91a37b13e9375ca7e5c90cccb6df7dc680e0f082bfbd4c73123bfadc048cd18
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:497ddda6be2c603033bd7b616a0961725984b0523f1968701ed3b2e19db147e1
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b4d4cb6b6c3f41f5b0ef4de8e0754723303a38f94962cbc488228d79380df4ca
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:06400d00988a30bdd2a529c4cd1ddfcb070f515ebd8d8738d13a836a89a862ed
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a3e78b44bc0c2be117e00fa1bbb864139da324f671001ac1a66d365302065de2
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:02aa8e1f0747d2ca69d7b06916ee2a6b6647107a98e2500294dbbacca01610a5
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d271cdb95f63cd655315f063ca2e25c78dc5ae4275523c5d4f80f367586b3351
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:5607f6de446164d9d9adb8b91c44cec55b14aa391e24ba5637c08b834eedda2a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": NaN,
   "best_model_checkpoint": "miner_id_24/checkpoint-25",
-  "epoch": 0.018944327357976746,
   "eval_steps": 25,
-  "global_step": 25,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -198,6 +198,189 @@
       "eval_samples_per_second": 33.656,
       "eval_steps_per_second": 8.75,
       "step": 25
     }
   ],
   "logging_steps": 1,
@@ -212,7 +395,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
@@ -221,12 +404,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.47650456322048e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": NaN,
   "best_model_checkpoint": "miner_id_24/checkpoint-25",
+  "epoch": 0.03788865471595349,
   "eval_steps": 25,
+  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 33.656,
       "eval_steps_per_second": 8.75,
       "step": 25
+    },
+    {
+      "epoch": 0.019702100452295816,
+      "grad_norm": 1.749448537826538,
+      "learning_rate": 5.500000000000001e-05,
+      "loss": 2.2472,
+      "step": 26
+    },
+    {
+      "epoch": 0.020459873546614885,
+      "grad_norm": 2.111176013946533,
+      "learning_rate": 5.205685918464356e-05,
+      "loss": 2.1888,
+      "step": 27
+    },
+    {
+      "epoch": 0.021217646640933955,
+      "grad_norm": 2.5237910747528076,
+      "learning_rate": 4.912632135009769e-05,
+      "loss": 2.5017,
+      "step": 28
+    },
+    {
+      "epoch": 0.021975419735253025,
+      "grad_norm": 2.5511085987091064,
+      "learning_rate": 4.6220935509274235e-05,
+      "loss": 2.3932,
+      "step": 29
+    },
+    {
+      "epoch": 0.022733192829572094,
+      "grad_norm": 3.7789735794067383,
+      "learning_rate": 4.3353142970386564e-05,
+      "loss": 2.2392,
+      "step": 30
+    },
+    {
+      "epoch": 0.023490965923891164,
+      "grad_norm": 2.9801998138427734,
+      "learning_rate": 4.053522406135775e-05,
+      "loss": 2.3539,
+      "step": 31
+    },
+    {
+      "epoch": 0.024248739018210234,
+      "grad_norm": 3.6138417720794678,
+      "learning_rate": 3.777924554357096e-05,
+      "loss": 2.4737,
+      "step": 32
+    },
+    {
+      "epoch": 0.025006512112529303,
+      "grad_norm": 5.1532440185546875,
+      "learning_rate": 3.509700894014496e-05,
+      "loss": 0.966,
+      "step": 33
+    },
+    {
+      "epoch": 0.025764285206848373,
+      "grad_norm": 7.934006690979004,
+      "learning_rate": 3.250000000000001e-05,
+      "loss": 2.3545,
+      "step": 34
+    },
+    {
+      "epoch": 0.026522058301167443,
+      "grad_norm": 6.219781875610352,
+      "learning_rate": 2.9999339514117912e-05,
+      "loss": 2.1804,
+      "step": 35
+    },
+    {
+      "epoch": 0.027279831395486513,
+      "grad_norm": 7.571138381958008,
+      "learning_rate": 2.760573569460757e-05,
+      "loss": 2.1309,
+      "step": 36
+    },
+    {
+      "epoch": 0.028037604489805582,
+      "grad_norm": 6.972225666046143,
+      "learning_rate": 2.53294383204969e-05,
+      "loss": 1.5274,
+      "step": 37
+    },
+    {
+      "epoch": 0.028795377584124655,
+      "grad_norm": 1.4634031057357788,
+      "learning_rate": 2.3180194846605367e-05,
+      "loss": 2.2254,
+      "step": 38
+    },
+    {
+      "epoch": 0.029553150678443725,
+      "grad_norm": 1.465823769569397,
+      "learning_rate": 2.1167208663446025e-05,
+      "loss": 2.1561,
+      "step": 39
+    },
+    {
+      "epoch": 0.030310923772762795,
+      "grad_norm": 1.9862278699874878,
+      "learning_rate": 1.9299099686894423e-05,
+      "loss": 2.2085,
+      "step": 40
+    },
+    {
+      "epoch": 0.031068696867081864,
+      "grad_norm": 3.049453020095825,
+      "learning_rate": 1.758386744638546e-05,
+      "loss": 1.9606,
+      "step": 41
+    },
+    {
+      "epoch": 0.031826469961400934,
+      "grad_norm": 2.612853527069092,
+      "learning_rate": 1.602885682970026e-05,
+      "loss": 2.5356,
+      "step": 42
+    },
+    {
+      "epoch": 0.032584243055720004,
+      "grad_norm": 4.496172904968262,
+      "learning_rate": 1.464072663102903e-05,
+      "loss": 2.1076,
+      "step": 43
+    },
+    {
+      "epoch": 0.033342016150039074,
+      "grad_norm": 6.122722148895264,
+      "learning_rate": 1.3425421036992098e-05,
+      "loss": 2.7963,
+      "step": 44
+    },
+    {
+      "epoch": 0.03409978924435814,
+      "grad_norm": 4.462775230407715,
+      "learning_rate": 1.2388144172720251e-05,
+      "loss": 2.1375,
+      "step": 45
+    },
+    {
+      "epoch": 0.03485756233867721,
+      "grad_norm": 5.868408203125,
+      "learning_rate": 1.1533337816991932e-05,
+      "loss": 2.4557,
+      "step": 46
+    },
+    {
+      "epoch": 0.03561533543299628,
+      "grad_norm": 8.171089172363281,
+      "learning_rate": 1.0864662381854632e-05,
+      "loss": 3.0499,
+      "step": 47
+    },
+    {
+      "epoch": 0.03637310852731535,
+      "grad_norm": 6.449460029602051,
+      "learning_rate": 1.0384981238178534e-05,
+      "loss": 2.7635,
+      "step": 48
+    },
+    {
+      "epoch": 0.03713088162163442,
+      "grad_norm": 8.228046417236328,
+      "learning_rate": 1.0096348454262845e-05,
+      "loss": 2.2257,
+      "step": 49
+    },
+    {
+      "epoch": 0.03788865471595349,
+      "grad_norm": 10.642376899719238,
+      "learning_rate": 1e-05,
+      "loss": 2.1021,
+      "step": 50
+    },
+    {
+      "epoch": 0.03788865471595349,
+      "eval_loss": NaN,
+      "eval_runtime": 1.5062,
+      "eval_samples_per_second": 33.195,
+      "eval_steps_per_second": 8.631,
+      "step": 50
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.95300912644096e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null