Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +372 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:905784b1048f98c95ff6cc21673082d54460e712d2986f623fc9d47d38372233
 size 101752088

 version https://git-lfs.github.com/spec/v1
+oid sha256:4caef3788f3c565e373be5a1f9c0719b7a9d5cb25b70139c8780763372d826f5
 size 101752088

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:235383a798ef5ffa7609f41f8c071ca2557a64f3410600093a56a4b48ce101c1
 size 203713238

 version https://git-lfs.github.com/spec/v1
+oid sha256:ba05cec15f38fa9d8808f3863bc871e28f216ce221888f2e188a1ad1f3a5157f
 size 203713238

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ac2b9be23aca6b3c6b606296f233023852b331af6ec8b655164d21179af9d619
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:b279027b949ce75b86e736686644cad9e408595f5e19a07d24feaa42f342c507
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e70710c409284f74d525f8db5cfaccc22a8afd29416f19c595da9242ec92d936
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe157715eb8e05b3bab2a7f2fafac33705dc4a1a9dd7f6d860c3a7f9597d78bb
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.8031909465789795,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.08006405124099279,
   "eval_steps": 25,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1113,6 +1113,372 @@
       "eval_samples_per_second": 5.854,
       "eval_steps_per_second": 0.82,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1136,12 +1502,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.0600410333708288e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.7776356339454651,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.10675206832132372,
   "eval_steps": 25,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 5.854,
       "eval_steps_per_second": 0.82,
       "step": 150
+    },
+    {
+      "epoch": 0.08059781158259942,
+      "grad_norm": 0.22424638271331787,
+      "learning_rate": 4.659698863221513e-05,
+      "loss": 0.8929,
+      "step": 151
+    },
+    {
+      "epoch": 0.08113157192420603,
+      "grad_norm": 0.21926997601985931,
+      "learning_rate": 4.481448235912671e-05,
+      "loss": 0.7804,
+      "step": 152
+    },
+    {
+      "epoch": 0.08166533226581266,
+      "grad_norm": 0.1946469396352768,
+      "learning_rate": 4.306073275629044e-05,
+      "loss": 0.5875,
+      "step": 153
+    },
+    {
+      "epoch": 0.08219909260741927,
+      "grad_norm": 0.224387064576149,
+      "learning_rate": 4.133621928133665e-05,
+      "loss": 0.5804,
+      "step": 154
+    },
+    {
+      "epoch": 0.08273285294902588,
+      "grad_norm": 0.19957810640335083,
+      "learning_rate": 3.964141339903026e-05,
+      "loss": 0.6845,
+      "step": 155
+    },
+    {
+      "epoch": 0.08326661329063251,
+      "grad_norm": 0.19750872254371643,
+      "learning_rate": 3.797677845237696e-05,
+      "loss": 0.7725,
+      "step": 156
+    },
+    {
+      "epoch": 0.08380037363223912,
+      "grad_norm": 0.19317828118801117,
+      "learning_rate": 3.634276953594982e-05,
+      "loss": 0.7535,
+      "step": 157
+    },
+    {
+      "epoch": 0.08433413397384575,
+      "grad_norm": 0.18441618978977203,
+      "learning_rate": 3.473983337147118e-05,
+      "loss": 0.727,
+      "step": 158
+    },
+    {
+      "epoch": 0.08486789431545236,
+      "grad_norm": 0.17659269273281097,
+      "learning_rate": 3.316840818568315e-05,
+      "loss": 0.7734,
+      "step": 159
+    },
+    {
+      "epoch": 0.08540165465705898,
+      "grad_norm": 0.1908242404460907,
+      "learning_rate": 3.162892359054098e-05,
+      "loss": 0.6538,
+      "step": 160
+    },
+    {
+      "epoch": 0.0859354149986656,
+      "grad_norm": 0.16535666584968567,
+      "learning_rate": 3.0121800465761293e-05,
+      "loss": 0.6792,
+      "step": 161
+    },
+    {
+      "epoch": 0.08646917534027222,
+      "grad_norm": 0.1628248691558838,
+      "learning_rate": 2.8647450843757897e-05,
+      "loss": 0.6498,
+      "step": 162
+    },
+    {
+      "epoch": 0.08700293568187883,
+      "grad_norm": 0.15709134936332703,
+      "learning_rate": 2.7206277796996144e-05,
+      "loss": 0.7424,
+      "step": 163
+    },
+    {
+      "epoch": 0.08753669602348546,
+      "grad_norm": 0.15620659291744232,
+      "learning_rate": 2.5798675327796993e-05,
+      "loss": 0.622,
+      "step": 164
+    },
+    {
+      "epoch": 0.08807045636509207,
+      "grad_norm": 0.15363053977489471,
+      "learning_rate": 2.4425028260620715e-05,
+      "loss": 0.6708,
+      "step": 165
+    },
+    {
+      "epoch": 0.0886042167066987,
+      "grad_norm": 0.17389169335365295,
+      "learning_rate": 2.3085712136859668e-05,
+      "loss": 0.7038,
+      "step": 166
+    },
+    {
+      "epoch": 0.08913797704830531,
+      "grad_norm": 0.17458435893058777,
+      "learning_rate": 2.178109311216913e-05,
+      "loss": 0.5587,
+      "step": 167
+    },
+    {
+      "epoch": 0.08967173738991192,
+      "grad_norm": 0.18360799551010132,
+      "learning_rate": 2.0511527856363912e-05,
+      "loss": 0.7867,
+      "step": 168
+    },
+    {
+      "epoch": 0.09020549773151855,
+      "grad_norm": 0.17545649409294128,
+      "learning_rate": 1.927736345590839e-05,
+      "loss": 0.7613,
+      "step": 169
+    },
+    {
+      "epoch": 0.09073925807312516,
+      "grad_norm": 0.20500288903713226,
+      "learning_rate": 1.8078937319026654e-05,
+      "loss": 0.6963,
+      "step": 170
+    },
+    {
+      "epoch": 0.09127301841473179,
+      "grad_norm": 0.1982969343662262,
+      "learning_rate": 1.6916577083458228e-05,
+      "loss": 0.7627,
+      "step": 171
+    },
+    {
+      "epoch": 0.0918067787563384,
+      "grad_norm": 0.20568282902240753,
+      "learning_rate": 1.579060052688548e-05,
+      "loss": 0.7856,
+      "step": 172
+    },
+    {
+      "epoch": 0.09234053909794503,
+      "grad_norm": 0.24100317060947418,
+      "learning_rate": 1.4701315480056164e-05,
+      "loss": 0.6432,
+      "step": 173
+    },
+    {
+      "epoch": 0.09287429943955164,
+      "grad_norm": 0.23193538188934326,
+      "learning_rate": 1.3649019742625623e-05,
+      "loss": 0.8268,
+      "step": 174
+    },
+    {
+      "epoch": 0.09340805978115826,
+      "grad_norm": 0.2548185884952545,
+      "learning_rate": 1.2634001001741373e-05,
+      "loss": 0.7186,
+      "step": 175
+    },
+    {
+      "epoch": 0.09340805978115826,
+      "eval_loss": 0.7821811437606812,
+      "eval_runtime": 8.5383,
+      "eval_samples_per_second": 5.856,
+      "eval_steps_per_second": 0.82,
+      "step": 175
+    },
+    {
+      "epoch": 0.09394182012276488,
+      "grad_norm": 0.2970718741416931,
+      "learning_rate": 1.1656536753392287e-05,
+      "loss": 0.6491,
+      "step": 176
+    },
+    {
+      "epoch": 0.0944755804643715,
+      "grad_norm": 0.3625304400920868,
+      "learning_rate": 1.0716894226543953e-05,
+      "loss": 0.6183,
+      "step": 177
+    },
+    {
+      "epoch": 0.09500934080597812,
+      "grad_norm": 0.3171631395816803,
+      "learning_rate": 9.815330310080887e-06,
+      "loss": 0.8154,
+      "step": 178
+    },
+    {
+      "epoch": 0.09554310114758474,
+      "grad_norm": 0.26550373435020447,
+      "learning_rate": 8.952091482575824e-06,
+      "loss": 0.7494,
+      "step": 179
+    },
+    {
+      "epoch": 0.09607686148919135,
+      "grad_norm": 0.3588647246360779,
+      "learning_rate": 8.127413744904804e-06,
+      "loss": 0.5587,
+      "step": 180
+    },
+    {
+      "epoch": 0.09661062183079797,
+      "grad_norm": 0.32790133357048035,
+      "learning_rate": 7.34152255572697e-06,
+      "loss": 0.5329,
+      "step": 181
+    },
+    {
+      "epoch": 0.09714438217240459,
+      "grad_norm": 0.4465741515159607,
+      "learning_rate": 6.594632769846353e-06,
+      "loss": 0.3294,
+      "step": 182
+    },
+    {
+      "epoch": 0.0976781425140112,
+      "grad_norm": 0.33372461795806885,
+      "learning_rate": 5.886948579472778e-06,
+      "loss": 0.386,
+      "step": 183
+    },
+    {
+      "epoch": 0.09821190285561783,
+      "grad_norm": 0.38536709547042847,
+      "learning_rate": 5.218663458397715e-06,
+      "loss": 0.6134,
+      "step": 184
+    },
+    {
+      "epoch": 0.09874566319722444,
+      "grad_norm": 0.3927061855792999,
+      "learning_rate": 4.589960109100444e-06,
+      "loss": 0.3782,
+      "step": 185
+    },
+    {
+      "epoch": 0.09927942353883107,
+      "grad_norm": 0.41644033789634705,
+      "learning_rate": 4.001010412799138e-06,
+      "loss": 0.3936,
+      "step": 186
+    },
+    {
+      "epoch": 0.09981318388043768,
+      "grad_norm": 0.38533538579940796,
+      "learning_rate": 3.451975382460109e-06,
+      "loss": 0.478,
+      "step": 187
+    },
+    {
+      "epoch": 0.10034694422204431,
+      "grad_norm": 0.3394060730934143,
+      "learning_rate": 2.9430051187785962e-06,
+      "loss": 0.5653,
+      "step": 188
+    },
+    {
+      "epoch": 0.10088070456365092,
+      "grad_norm": 0.47515639662742615,
+      "learning_rate": 2.4742387691426445e-06,
+      "loss": 0.6107,
+      "step": 189
+    },
+    {
+      "epoch": 0.10141446490525755,
+      "grad_norm": 0.7346295118331909,
+      "learning_rate": 2.0458044895916513e-06,
+      "loss": 0.3244,
+      "step": 190
+    },
+    {
+      "epoch": 0.10194822524686416,
+      "grad_norm": 0.41510871052742004,
+      "learning_rate": 1.6578194097797258e-06,
+      "loss": 0.5225,
+      "step": 191
+    },
+    {
+      "epoch": 0.10248198558847077,
+      "grad_norm": 0.43027785420417786,
+      "learning_rate": 1.3103896009537207e-06,
+      "loss": 0.6227,
+      "step": 192
+    },
+    {
+      "epoch": 0.1030157459300774,
+      "grad_norm": 0.4818989932537079,
+      "learning_rate": 1.0036100469542786e-06,
+      "loss": 0.6171,
+      "step": 193
+    },
+    {
+      "epoch": 0.10354950627168401,
+      "grad_norm": 0.4520052373409271,
+      "learning_rate": 7.375646182482875e-07,
+      "loss": 0.5551,
+      "step": 194
+    },
+    {
+      "epoch": 0.10408326661329063,
+      "grad_norm": 0.4551260769367218,
+      "learning_rate": 5.123260489995229e-07,
+      "loss": 0.4864,
+      "step": 195
+    },
+    {
+      "epoch": 0.10461702695489725,
+      "grad_norm": 0.5285947918891907,
+      "learning_rate": 3.2795591718381975e-07,
+      "loss": 0.7324,
+      "step": 196
+    },
+    {
+      "epoch": 0.10515078729650387,
+      "grad_norm": 0.6320045590400696,
+      "learning_rate": 1.8450462775428942e-07,
+      "loss": 0.7488,
+      "step": 197
+    },
+    {
+      "epoch": 0.10568454763811048,
+      "grad_norm": 0.6292834877967834,
+      "learning_rate": 8.201139886109264e-08,
+      "loss": 0.9015,
+      "step": 198
+    },
+    {
+      "epoch": 0.10621830797971711,
+      "grad_norm": 0.8106058239936829,
+      "learning_rate": 2.0504251129649374e-08,
+      "loss": 1.0793,
+      "step": 199
+    },
+    {
+      "epoch": 0.10675206832132372,
+      "grad_norm": 1.118652582168579,
+      "learning_rate": 0.0,
+      "loss": 1.5208,
+      "step": 200
+    },
+    {
+      "epoch": 0.10675206832132372,
+      "eval_loss": 0.7776356339454651,
+      "eval_runtime": 8.533,
+      "eval_samples_per_second": 5.86,
+      "eval_steps_per_second": 0.82,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.4133880444944384e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null