Training in progress, step 95, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +144 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:be992a161695a4dac5a9f77be39443ed60c63ddae05bc57d8053443e345f9506
 size 166182480

 version https://git-lfs.github.com/spec/v1
+oid sha256:b999115b6df7a381fab7604f66e4d51a0cf1f976b0e2b684bed9a8f41d6fa1f7
 size 166182480

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:807109ba001f38b278cc17333e68ff3a853562f2cc87fac19435533332b70839
 size 332574358

 version https://git-lfs.github.com/spec/v1
+oid sha256:aca4a222f47e6ab792c0375fce2bf7aaa549d4c173f9a3eb7c6bb3577433a851
 size 332574358

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:74fbdb24bbb7f1a23abb124ce4728c07d6c90bdb4ac7d58bfd88dae2d675f02d
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:d39a51cba14ee5517dd7211234366310c9fe5cd1978037f831eeca410dd7cd3b
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:54a1e7643f3a17e01a320f4f18782654ea34853ce31b95cf58724bea0701e829
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:ea833ddabd67b4da85b2f72ae4352700595ab583a3c7dfc08a4c3e0fc8d033ad
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1e4797be13d57c6026b4d4edc4b60c1aef6fec4f42f525d49e6bc2393434b157
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:42e232d37078476b9d840b312d87e2e7f4b19b21ae46845588655d8277dbc435
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6a49a618d6fe2cb02958c4d240c002e254d892a629ebad0b005364212d46c782
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:b194c05c48554a14357fab2f061783a77faf28bba2a1ea54483270d9b95c9114
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a5b53655d80c3ade692dacae57cafa4aff84c325b5cb8d0fba89d01b50d41566
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:1fd9769bcb04586b7ead175b588f12a0bbbeba72d5f308b868ba8f36b77aa801
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.28790074586868286,
   "best_model_checkpoint": "miner_id_24/checkpoint-75",
-  "epoch": 0.4136504653567735,
   "eval_steps": 25,
-  "global_step": 75,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -564,6 +564,146 @@
       "eval_samples_per_second": 23.869,
       "eval_steps_per_second": 6.206,
       "step": 75
     }
   ],
   "logging_steps": 1,
@@ -587,12 +727,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.43721341763584e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.28790074586868286,
   "best_model_checkpoint": "miner_id_24/checkpoint-75",
+  "epoch": 0.5239572561185798,
   "eval_steps": 25,
+  "global_step": 95,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 23.869,
       "eval_steps_per_second": 6.206,
       "step": 75
+    },
+    {
+      "epoch": 0.4191658048948638,
+      "grad_norm": 8.318974494934082,
+      "learning_rate": 1.91437699862843e-05,
+      "loss": 0.6345,
+      "step": 76
+    },
+    {
+      "epoch": 0.4246811444329541,
+      "grad_norm": 8.486698150634766,
+      "learning_rate": 1.8236354814530112e-05,
+      "loss": 0.6599,
+      "step": 77
+    },
+    {
+      "epoch": 0.4301964839710445,
+      "grad_norm": 8.49923038482666,
+      "learning_rate": 1.7371804408538024e-05,
+      "loss": 0.6645,
+      "step": 78
+    },
+    {
+      "epoch": 0.4357118235091348,
+      "grad_norm": 8.403206825256348,
+      "learning_rate": 1.6551126795408016e-05,
+      "loss": 0.657,
+      "step": 79
+    },
+    {
+      "epoch": 0.4412271630472251,
+      "grad_norm": 8.299516677856445,
+      "learning_rate": 1.577527884852619e-05,
+      "loss": 0.6464,
+      "step": 80
+    },
+    {
+      "epoch": 0.4467425025853154,
+      "grad_norm": 1.9985052347183228,
+      "learning_rate": 1.5045165171893116e-05,
+      "loss": 0.1938,
+      "step": 81
+    },
+    {
+      "epoch": 0.4522578421234057,
+      "grad_norm": 0.7599959969520569,
+      "learning_rate": 1.4361637045396029e-05,
+      "loss": 0.0196,
+      "step": 82
+    },
+    {
+      "epoch": 0.45777318166149605,
+      "grad_norm": 0.8468267917633057,
+      "learning_rate": 1.3725491432254624e-05,
+      "loss": 0.0219,
+      "step": 83
+    },
+    {
+      "epoch": 0.46328852119958636,
+      "grad_norm": 0.8709926009178162,
+      "learning_rate": 1.313747004979751e-05,
+      "loss": 0.0224,
+      "step": 84
+    },
+    {
+      "epoch": 0.46880386073767666,
+      "grad_norm": 0.857819676399231,
+      "learning_rate": 1.2598258504653081e-05,
+      "loss": 0.0221,
+      "step": 85
+    },
+    {
+      "epoch": 0.47431920027576696,
+      "grad_norm": 0.807769775390625,
+      "learning_rate": 1.2108485493362765e-05,
+      "loss": 0.0207,
+      "step": 86
+    },
+    {
+      "epoch": 0.47983453981385726,
+      "grad_norm": 0.8083691000938416,
+      "learning_rate": 1.1668722069349041e-05,
+      "loss": 0.0209,
+      "step": 87
+    },
+    {
+      "epoch": 0.4853498793519476,
+      "grad_norm": 4.770956993103027,
+      "learning_rate": 1.1279480977092635e-05,
+      "loss": 0.3815,
+      "step": 88
+    },
+    {
+      "epoch": 0.4908652188900379,
+      "grad_norm": 8.195169448852539,
+      "learning_rate": 1.094121605429547e-05,
+      "loss": 0.6112,
+      "step": 89
+    },
+    {
+      "epoch": 0.4963805584281282,
+      "grad_norm": 8.203980445861816,
+      "learning_rate": 1.0654321702726141e-05,
+      "loss": 0.6149,
+      "step": 90
+    },
+    {
+      "epoch": 0.5018958979662186,
+      "grad_norm": 8.144838333129883,
+      "learning_rate": 1.0419132428365116e-05,
+      "loss": 0.609,
+      "step": 91
+    },
+    {
+      "epoch": 0.5074112375043088,
+      "grad_norm": 8.066883087158203,
+      "learning_rate": 1.0235922451385733e-05,
+      "loss": 0.5882,
+      "step": 92
+    },
+    {
+      "epoch": 0.5129265770423992,
+      "grad_norm": 6.3228607177734375,
+      "learning_rate": 1.0104905386425733e-05,
+      "loss": 0.4687,
+      "step": 93
+    },
+    {
+      "epoch": 0.5184419165804894,
+      "grad_norm": 0.907468318939209,
+      "learning_rate": 1.002623399352217e-05,
+      "loss": 0.0236,
+      "step": 94
+    },
+    {
+      "epoch": 0.5239572561185798,
+      "grad_norm": 1.0146715641021729,
+      "learning_rate": 1e-05,
+      "loss": 0.0264,
+      "step": 95
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.087136995672064e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null