Training in progress, step 50, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +189 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3509006e05941335e4fedcccc62b6832c8759f0f916e4e9a846259e72704b21d
 size 201361312

 version https://git-lfs.github.com/spec/v1
+oid sha256:385d8ae5b31ac1044bd05487dee0344d9bb0535cf0ce71bb97a06b8f8e2e9bc2
 size 201361312

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:00b98fc13970f172c1ed29b9c10b8b2fab993bc701f5cbfc3cd423e7b49ae0e1
 size 402868986

 version https://git-lfs.github.com/spec/v1
+oid sha256:5194b987daaeac7e7a7c5eb439efe6e2e158c3c86dcd9b0fa46a06505a58a80c
 size 402868986

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:98ccd11b00bae3ef4bff0ec9d8a4986ff4b41a3d490090c8bc6df9ebdefb9df5
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:93fdba768be06ecb8d82beabc762980b75e4fa86779fa23b425a671cbaeb6a53
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b78362f5c4037f0720a9a9b1662f4ed2a533a35224c64e3bc01c30ecdcb5c45c
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:d77a9f61cd76d47af6f298eb031e99333cdc3de06b4afd8cf98603a3262646be
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aa14929c99d7e7deee3d9367cf63b13218efd66c131ea7746a041755a61d83ed
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:b46a016ca20d80bbf46c87a0b3176f96735503cb591437a6e800b470845d4e5b
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:372e8783649c8077b23efca291d26437d974a10ece7beca379e4196cdeb4020a
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:3ac2bcc26c39f904b42cb54269d75b9fc51ac51e591642a9f442286b9a0964ff
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d271cdb95f63cd655315f063ca2e25c78dc5ae4275523c5d4f80f367586b3351
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:5607f6de446164d9d9adb8b91c44cec55b14aa391e24ba5637c08b834eedda2a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.9972949028015137,
-  "best_model_checkpoint": "miner_id_24/checkpoint-25",
-  "epoch": 0.970873786407767,
   "eval_steps": 25,
-  "global_step": 25,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -198,6 +198,189 @@
       "eval_samples_per_second": 47.238,
       "eval_steps_per_second": 12.282,
       "step": 25
     }
   ],
   "logging_steps": 1,
@@ -221,12 +404,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.483568811343872e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.9236457347869873,
+  "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 1.941747572815534,
   "eval_steps": 25,
+  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 47.238,
       "eval_steps_per_second": 12.282,
       "step": 25
+    },
+    {
+      "epoch": 1.0097087378640777,
+      "grad_norm": 25.719419479370117,
+      "learning_rate": 5.500000000000001e-05,
+      "loss": 16.4112,
+      "step": 26
+    },
+    {
+      "epoch": 1.0485436893203883,
+      "grad_norm": 29.739490509033203,
+      "learning_rate": 5.205685918464356e-05,
+      "loss": 13.7014,
+      "step": 27
+    },
+    {
+      "epoch": 1.087378640776699,
+      "grad_norm": 34.622406005859375,
+      "learning_rate": 4.912632135009769e-05,
+      "loss": 14.9217,
+      "step": 28
+    },
+    {
+      "epoch": 1.1262135922330097,
+      "grad_norm": 30.102741241455078,
+      "learning_rate": 4.6220935509274235e-05,
+      "loss": 17.4172,
+      "step": 29
+    },
+    {
+      "epoch": 1.1650485436893203,
+      "grad_norm": 11.93884563446045,
+      "learning_rate": 4.3353142970386564e-05,
+      "loss": 14.4702,
+      "step": 30
+    },
+    {
+      "epoch": 1.203883495145631,
+      "grad_norm": 6.051325798034668,
+      "learning_rate": 4.053522406135775e-05,
+      "loss": 18.1343,
+      "step": 31
+    },
+    {
+      "epoch": 1.2427184466019416,
+      "grad_norm": 21.685884475708008,
+      "learning_rate": 3.777924554357096e-05,
+      "loss": 18.9043,
+      "step": 32
+    },
+    {
+      "epoch": 1.2815533980582523,
+      "grad_norm": 103.92913055419922,
+      "learning_rate": 3.509700894014496e-05,
+      "loss": 17.5716,
+      "step": 33
+    },
+    {
+      "epoch": 1.3203883495145632,
+      "grad_norm": 41.143226623535156,
+      "learning_rate": 3.250000000000001e-05,
+      "loss": 16.5813,
+      "step": 34
+    },
+    {
+      "epoch": 1.3592233009708738,
+      "grad_norm": 13.322393417358398,
+      "learning_rate": 2.9999339514117912e-05,
+      "loss": 17.0534,
+      "step": 35
+    },
+    {
+      "epoch": 1.3980582524271845,
+      "grad_norm": 26.480480194091797,
+      "learning_rate": 2.760573569460757e-05,
+      "loss": 14.6343,
+      "step": 36
+    },
+    {
+      "epoch": 1.4368932038834952,
+      "grad_norm": 4.912123680114746,
+      "learning_rate": 2.53294383204969e-05,
+      "loss": 17.8308,
+      "step": 37
+    },
+    {
+      "epoch": 1.4757281553398058,
+      "grad_norm": 7.7494282722473145,
+      "learning_rate": 2.3180194846605367e-05,
+      "loss": 18.1934,
+      "step": 38
+    },
+    {
+      "epoch": 1.5145631067961165,
+      "grad_norm": 28.394657135009766,
+      "learning_rate": 2.1167208663446025e-05,
+      "loss": 15.4537,
+      "step": 39
+    },
+    {
+      "epoch": 1.5533980582524272,
+      "grad_norm": 29.47159767150879,
+      "learning_rate": 1.9299099686894423e-05,
+      "loss": 12.7481,
+      "step": 40
+    },
+    {
+      "epoch": 1.5922330097087378,
+      "grad_norm": 23.724668502807617,
+      "learning_rate": 1.758386744638546e-05,
+      "loss": 16.9,
+      "step": 41
+    },
+    {
+      "epoch": 1.6310679611650487,
+      "grad_norm": 23.437341690063477,
+      "learning_rate": 1.602885682970026e-05,
+      "loss": 15.56,
+      "step": 42
+    },
+    {
+      "epoch": 1.6699029126213594,
+      "grad_norm": 17.16952133178711,
+      "learning_rate": 1.464072663102903e-05,
+      "loss": 16.7025,
+      "step": 43
+    },
+    {
+      "epoch": 1.70873786407767,
+      "grad_norm": 16.27042579650879,
+      "learning_rate": 1.3425421036992098e-05,
+      "loss": 17.9029,
+      "step": 44
+    },
+    {
+      "epoch": 1.7475728155339807,
+      "grad_norm": 16.81924057006836,
+      "learning_rate": 1.2388144172720251e-05,
+      "loss": 14.544,
+      "step": 45
+    },
+    {
+      "epoch": 1.7864077669902914,
+      "grad_norm": 17.303743362426758,
+      "learning_rate": 1.1533337816991932e-05,
+      "loss": 11.5598,
+      "step": 46
+    },
+    {
+      "epoch": 1.825242718446602,
+      "grad_norm": 24.92502212524414,
+      "learning_rate": 1.0864662381854632e-05,
+      "loss": 15.3898,
+      "step": 47
+    },
+    {
+      "epoch": 1.8640776699029127,
+      "grad_norm": 12.400360107421875,
+      "learning_rate": 1.0384981238178534e-05,
+      "loss": 16.3068,
+      "step": 48
+    },
+    {
+      "epoch": 1.9029126213592233,
+      "grad_norm": 11.315818786621094,
+      "learning_rate": 1.0096348454262845e-05,
+      "loss": 16.1665,
+      "step": 49
+    },
+    {
+      "epoch": 1.941747572815534,
+      "grad_norm": 8.587471961975098,
+      "learning_rate": 1e-05,
+      "loss": 17.7138,
+      "step": 50
+    },
+    {
+      "epoch": 1.941747572815534,
+      "eval_loss": 0.9236457347869873,
+      "eval_runtime": 1.0598,
+      "eval_samples_per_second": 47.178,
+      "eval_steps_per_second": 12.266,
+      "step": 50
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.967137622687744e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null