Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +372 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:215714e8437d1c15e8e1702f14a0fcc73a2df366dda8205685018bd671c3a63d
 size 50624

 version https://git-lfs.github.com/spec/v1
+oid sha256:c1359abdfb3fe98806cdf998eaf6e65d146d41a84c99d19b2b980f045e404a3c
 size 50624

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:df437e55b7901979152758c329bfb7327b141cc02264ea5bd0dcd0c39442eaa9
 size 118090

 version https://git-lfs.github.com/spec/v1
+oid sha256:66fc3ad0a41fe3d5558f3364622386981177a93cee55d3b96fd263bb059903e2
 size 118090

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:50b31a12abb0fe787dcc02726d9a38040feab13cc1f429dd559b86503b46862c
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:af32999e9f2aefe713c3a9758402866078ae7081ccfdbf7357bdeccab2cc807e
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e70710c409284f74d525f8db5cfaccc22a8afd29416f19c595da9242ec92d936
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe157715eb8e05b3bab2a7f2fafac33705dc4a1a9dd7f6d860c3a7f9597d78bb
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 10.3402681350708,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.006003782382901228,
   "eval_steps": 25,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1113,6 +1113,372 @@
       "eval_samples_per_second": 347.135,
       "eval_steps_per_second": 48.599,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1136,12 +1502,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 16838118899712.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 10.339714050292969,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.008005043177201637,
   "eval_steps": 25,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 347.135,
       "eval_steps_per_second": 48.599,
       "step": 150
+    },
+    {
+      "epoch": 0.006043807598787236,
+      "grad_norm": 0.03880951926112175,
+      "learning_rate": 4.659698863221513e-05,
+      "loss": 10.3405,
+      "step": 151
+    },
+    {
+      "epoch": 0.006083832814673244,
+      "grad_norm": 0.0317707322537899,
+      "learning_rate": 4.481448235912671e-05,
+      "loss": 10.3416,
+      "step": 152
+    },
+    {
+      "epoch": 0.006123858030559252,
+      "grad_norm": 0.033779390156269073,
+      "learning_rate": 4.306073275629044e-05,
+      "loss": 10.3395,
+      "step": 153
+    },
+    {
+      "epoch": 0.00616388324644526,
+      "grad_norm": 0.027501322329044342,
+      "learning_rate": 4.133621928133665e-05,
+      "loss": 10.3412,
+      "step": 154
+    },
+    {
+      "epoch": 0.006203908462331269,
+      "grad_norm": 0.022303588688373566,
+      "learning_rate": 3.964141339903026e-05,
+      "loss": 10.3411,
+      "step": 155
+    },
+    {
+      "epoch": 0.006243933678217277,
+      "grad_norm": 0.026241958141326904,
+      "learning_rate": 3.797677845237696e-05,
+      "loss": 10.3406,
+      "step": 156
+    },
+    {
+      "epoch": 0.006283958894103285,
+      "grad_norm": 0.02380531281232834,
+      "learning_rate": 3.634276953594982e-05,
+      "loss": 10.3418,
+      "step": 157
+    },
+    {
+      "epoch": 0.006323984109989293,
+      "grad_norm": 0.024870751425623894,
+      "learning_rate": 3.473983337147118e-05,
+      "loss": 10.341,
+      "step": 158
+    },
+    {
+      "epoch": 0.006364009325875301,
+      "grad_norm": 0.025397121906280518,
+      "learning_rate": 3.316840818568315e-05,
+      "loss": 10.3411,
+      "step": 159
+    },
+    {
+      "epoch": 0.00640403454176131,
+      "grad_norm": 0.02394135482609272,
+      "learning_rate": 3.162892359054098e-05,
+      "loss": 10.3409,
+      "step": 160
+    },
+    {
+      "epoch": 0.0064440597576473175,
+      "grad_norm": 0.02467886172235012,
+      "learning_rate": 3.0121800465761293e-05,
+      "loss": 10.3416,
+      "step": 161
+    },
+    {
+      "epoch": 0.006484084973533326,
+      "grad_norm": 0.02491372637450695,
+      "learning_rate": 2.8647450843757897e-05,
+      "loss": 10.3397,
+      "step": 162
+    },
+    {
+      "epoch": 0.006524110189419334,
+      "grad_norm": 0.026415346190333366,
+      "learning_rate": 2.7206277796996144e-05,
+      "loss": 10.3395,
+      "step": 163
+    },
+    {
+      "epoch": 0.006564135405305343,
+      "grad_norm": 0.027454102411866188,
+      "learning_rate": 2.5798675327796993e-05,
+      "loss": 10.3417,
+      "step": 164
+    },
+    {
+      "epoch": 0.0066041606211913505,
+      "grad_norm": 0.02785174734890461,
+      "learning_rate": 2.4425028260620715e-05,
+      "loss": 10.3392,
+      "step": 165
+    },
+    {
+      "epoch": 0.006644185837077358,
+      "grad_norm": 0.03313513472676277,
+      "learning_rate": 2.3085712136859668e-05,
+      "loss": 10.3402,
+      "step": 166
+    },
+    {
+      "epoch": 0.006684211052963367,
+      "grad_norm": 0.020563622936606407,
+      "learning_rate": 2.178109311216913e-05,
+      "loss": 10.3419,
+      "step": 167
+    },
+    {
+      "epoch": 0.006724236268849375,
+      "grad_norm": 0.030172914266586304,
+      "learning_rate": 2.0511527856363912e-05,
+      "loss": 10.3421,
+      "step": 168
+    },
+    {
+      "epoch": 0.0067642614847353835,
+      "grad_norm": 0.025563696399331093,
+      "learning_rate": 1.927736345590839e-05,
+      "loss": 10.3392,
+      "step": 169
+    },
+    {
+      "epoch": 0.006804286700621391,
+      "grad_norm": 0.01891487091779709,
+      "learning_rate": 1.8078937319026654e-05,
+      "loss": 10.3414,
+      "step": 170
+    },
+    {
+      "epoch": 0.0068443119165074,
+      "grad_norm": 0.028518687933683395,
+      "learning_rate": 1.6916577083458228e-05,
+      "loss": 10.3406,
+      "step": 171
+    },
+    {
+      "epoch": 0.006884337132393408,
+      "grad_norm": 0.020258715376257896,
+      "learning_rate": 1.579060052688548e-05,
+      "loss": 10.3395,
+      "step": 172
+    },
+    {
+      "epoch": 0.006924362348279416,
+      "grad_norm": 0.02436778135597706,
+      "learning_rate": 1.4701315480056164e-05,
+      "loss": 10.3414,
+      "step": 173
+    },
+    {
+      "epoch": 0.006964387564165424,
+      "grad_norm": 0.018401464447379112,
+      "learning_rate": 1.3649019742625623e-05,
+      "loss": 10.3402,
+      "step": 174
+    },
+    {
+      "epoch": 0.007004412780051432,
+      "grad_norm": 0.027630239725112915,
+      "learning_rate": 1.2634001001741373e-05,
+      "loss": 10.341,
+      "step": 175
+    },
+    {
+      "epoch": 0.007004412780051432,
+      "eval_loss": 10.339688301086426,
+      "eval_runtime": 0.15,
+      "eval_samples_per_second": 333.266,
+      "eval_steps_per_second": 46.657,
+      "step": 175
+    },
+    {
+      "epoch": 0.007044437995937441,
+      "grad_norm": 0.02613791450858116,
+      "learning_rate": 1.1656536753392287e-05,
+      "loss": 10.3422,
+      "step": 176
+    },
+    {
+      "epoch": 0.007084463211823449,
+      "grad_norm": 0.028299255296587944,
+      "learning_rate": 1.0716894226543953e-05,
+      "loss": 10.3424,
+      "step": 177
+    },
+    {
+      "epoch": 0.007124488427709457,
+      "grad_norm": 0.027093924582004547,
+      "learning_rate": 9.815330310080887e-06,
+      "loss": 10.3432,
+      "step": 178
+    },
+    {
+      "epoch": 0.007164513643595465,
+      "grad_norm": 0.026260150596499443,
+      "learning_rate": 8.952091482575824e-06,
+      "loss": 10.3408,
+      "step": 179
+    },
+    {
+      "epoch": 0.007204538859481474,
+      "grad_norm": 0.024819303303956985,
+      "learning_rate": 8.127413744904804e-06,
+      "loss": 10.341,
+      "step": 180
+    },
+    {
+      "epoch": 0.0072445640753674816,
+      "grad_norm": 0.02742963470518589,
+      "learning_rate": 7.34152255572697e-06,
+      "loss": 10.3404,
+      "step": 181
+    },
+    {
+      "epoch": 0.007284589291253489,
+      "grad_norm": 0.024038875475525856,
+      "learning_rate": 6.594632769846353e-06,
+      "loss": 10.3424,
+      "step": 182
+    },
+    {
+      "epoch": 0.007324614507139498,
+      "grad_norm": 0.03462977334856987,
+      "learning_rate": 5.886948579472778e-06,
+      "loss": 10.3423,
+      "step": 183
+    },
+    {
+      "epoch": 0.007364639723025506,
+      "grad_norm": 0.0273030623793602,
+      "learning_rate": 5.218663458397715e-06,
+      "loss": 10.3419,
+      "step": 184
+    },
+    {
+      "epoch": 0.0074046649389115145,
+      "grad_norm": 0.021748429164290428,
+      "learning_rate": 4.589960109100444e-06,
+      "loss": 10.3398,
+      "step": 185
+    },
+    {
+      "epoch": 0.007444690154797522,
+      "grad_norm": 0.022143211215734482,
+      "learning_rate": 4.001010412799138e-06,
+      "loss": 10.3427,
+      "step": 186
+    },
+    {
+      "epoch": 0.007484715370683531,
+      "grad_norm": 0.028321022167801857,
+      "learning_rate": 3.451975382460109e-06,
+      "loss": 10.3435,
+      "step": 187
+    },
+    {
+      "epoch": 0.007524740586569539,
+      "grad_norm": 0.02573862113058567,
+      "learning_rate": 2.9430051187785962e-06,
+      "loss": 10.3411,
+      "step": 188
+    },
+    {
+      "epoch": 0.007564765802455547,
+      "grad_norm": 0.027865024283528328,
+      "learning_rate": 2.4742387691426445e-06,
+      "loss": 10.3441,
+      "step": 189
+    },
+    {
+      "epoch": 0.007604791018341555,
+      "grad_norm": 0.024411842226982117,
+      "learning_rate": 2.0458044895916513e-06,
+      "loss": 10.3414,
+      "step": 190
+    },
+    {
+      "epoch": 0.007644816234227563,
+      "grad_norm": 0.026974577456712723,
+      "learning_rate": 1.6578194097797258e-06,
+      "loss": 10.3414,
+      "step": 191
+    },
+    {
+      "epoch": 0.007684841450113572,
+      "grad_norm": 0.02850463055074215,
+      "learning_rate": 1.3103896009537207e-06,
+      "loss": 10.343,
+      "step": 192
+    },
+    {
+      "epoch": 0.00772486666599958,
+      "grad_norm": 0.034683018922805786,
+      "learning_rate": 1.0036100469542786e-06,
+      "loss": 10.3408,
+      "step": 193
+    },
+    {
+      "epoch": 0.007764891881885588,
+      "grad_norm": 0.03067103587090969,
+      "learning_rate": 7.375646182482875e-07,
+      "loss": 10.3422,
+      "step": 194
+    },
+    {
+      "epoch": 0.007804917097771596,
+      "grad_norm": 0.039638079702854156,
+      "learning_rate": 5.123260489995229e-07,
+      "loss": 10.3411,
+      "step": 195
+    },
+    {
+      "epoch": 0.007844942313657604,
+      "grad_norm": 0.03353327140212059,
+      "learning_rate": 3.2795591718381975e-07,
+      "loss": 10.3424,
+      "step": 196
+    },
+    {
+      "epoch": 0.007884967529543612,
+      "grad_norm": 0.040784239768981934,
+      "learning_rate": 1.8450462775428942e-07,
+      "loss": 10.3434,
+      "step": 197
+    },
+    {
+      "epoch": 0.007924992745429621,
+      "grad_norm": 0.03665102273225784,
+      "learning_rate": 8.201139886109264e-08,
+      "loss": 10.343,
+      "step": 198
+    },
+    {
+      "epoch": 0.007965017961315629,
+      "grad_norm": 0.04679227992892265,
+      "learning_rate": 2.0504251129649374e-08,
+      "loss": 10.3428,
+      "step": 199
+    },
+    {
+      "epoch": 0.008005043177201637,
+      "grad_norm": 0.052072253078222275,
+      "learning_rate": 0.0,
+      "loss": 10.3442,
+      "step": 200
+    },
+    {
+      "epoch": 0.008005043177201637,
+      "eval_loss": 10.339714050292969,
+      "eval_runtime": 0.1466,
+      "eval_samples_per_second": 340.956,
+      "eval_steps_per_second": 47.734,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 22423039524864.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null