Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +372 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3af41c010d5f4dca50fc921764204ded45f143a170eaade034bdabc05e2a815e
 size 50624

 version https://git-lfs.github.com/spec/v1
+oid sha256:38916f69139d262490b0bb4dcf3d3c69b40e09752f8403100bf5dc7388d4b186
 size 50624

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2ec4e22200d29197b10f14e20591d8c6726b993678ffd233b5a1916a3a6db822
 size 118090

 version https://git-lfs.github.com/spec/v1
+oid sha256:dbd99cc4e5385b61da43c3966dab7f67670c7eaa19c6d710090c53ae55c6493a
 size 118090

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:510d90f7b0173aebe8226f1e639b098f44560972df26fc78fab90dcf71f62011
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:6beb0e19e82eaf34146a032e84a382b3a713fdb5f6af6ac0adf523ddbfed1f45
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e70710c409284f74d525f8db5cfaccc22a8afd29416f19c595da9242ec92d936
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe157715eb8e05b3bab2a7f2fafac33705dc4a1a9dd7f6d860c3a7f9597d78bb
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 10.34074592590332,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.006003782382901228,
   "eval_steps": 25,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1113,6 +1113,372 @@
       "eval_samples_per_second": 467.082,
       "eval_steps_per_second": 65.391,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1136,12 +1502,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 16838118899712.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 10.340373992919922,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.008005043177201637,
   "eval_steps": 25,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 467.082,
       "eval_steps_per_second": 65.391,
       "step": 150
+    },
+    {
+      "epoch": 0.006043807598787236,
+      "grad_norm": 0.041127968579530716,
+      "learning_rate": 4.659698863221513e-05,
+      "loss": 10.3402,
+      "step": 151
+    },
+    {
+      "epoch": 0.006083832814673244,
+      "grad_norm": 0.03272487223148346,
+      "learning_rate": 4.481448235912671e-05,
+      "loss": 10.3421,
+      "step": 152
+    },
+    {
+      "epoch": 0.006123858030559252,
+      "grad_norm": 0.034761957824230194,
+      "learning_rate": 4.306073275629044e-05,
+      "loss": 10.34,
+      "step": 153
+    },
+    {
+      "epoch": 0.00616388324644526,
+      "grad_norm": 0.02753298729658127,
+      "learning_rate": 4.133621928133665e-05,
+      "loss": 10.3418,
+      "step": 154
+    },
+    {
+      "epoch": 0.006203908462331269,
+      "grad_norm": 0.021701285615563393,
+      "learning_rate": 3.964141339903026e-05,
+      "loss": 10.3416,
+      "step": 155
+    },
+    {
+      "epoch": 0.006243933678217277,
+      "grad_norm": 0.02291003055870533,
+      "learning_rate": 3.797677845237696e-05,
+      "loss": 10.3414,
+      "step": 156
+    },
+    {
+      "epoch": 0.006283958894103285,
+      "grad_norm": 0.020840736106038094,
+      "learning_rate": 3.634276953594982e-05,
+      "loss": 10.3418,
+      "step": 157
+    },
+    {
+      "epoch": 0.006323984109989293,
+      "grad_norm": 0.02168063446879387,
+      "learning_rate": 3.473983337147118e-05,
+      "loss": 10.3416,
+      "step": 158
+    },
+    {
+      "epoch": 0.006364009325875301,
+      "grad_norm": 0.023091401904821396,
+      "learning_rate": 3.316840818568315e-05,
+      "loss": 10.3418,
+      "step": 159
+    },
+    {
+      "epoch": 0.00640403454176131,
+      "grad_norm": 0.019541895017027855,
+      "learning_rate": 3.162892359054098e-05,
+      "loss": 10.3415,
+      "step": 160
+    },
+    {
+      "epoch": 0.0064440597576473175,
+      "grad_norm": 0.021600117906928062,
+      "learning_rate": 3.0121800465761293e-05,
+      "loss": 10.3418,
+      "step": 161
+    },
+    {
+      "epoch": 0.006484084973533326,
+      "grad_norm": 0.019653281196951866,
+      "learning_rate": 2.8647450843757897e-05,
+      "loss": 10.3404,
+      "step": 162
+    },
+    {
+      "epoch": 0.006524110189419334,
+      "grad_norm": 0.023136168718338013,
+      "learning_rate": 2.7206277796996144e-05,
+      "loss": 10.3397,
+      "step": 163
+    },
+    {
+      "epoch": 0.006564135405305343,
+      "grad_norm": 0.021499428898096085,
+      "learning_rate": 2.5798675327796993e-05,
+      "loss": 10.3421,
+      "step": 164
+    },
+    {
+      "epoch": 0.0066041606211913505,
+      "grad_norm": 0.024124084040522575,
+      "learning_rate": 2.4425028260620715e-05,
+      "loss": 10.3405,
+      "step": 165
+    },
+    {
+      "epoch": 0.006644185837077358,
+      "grad_norm": 0.02903626300394535,
+      "learning_rate": 2.3085712136859668e-05,
+      "loss": 10.3405,
+      "step": 166
+    },
+    {
+      "epoch": 0.006684211052963367,
+      "grad_norm": 0.01823560893535614,
+      "learning_rate": 2.178109311216913e-05,
+      "loss": 10.3424,
+      "step": 167
+    },
+    {
+      "epoch": 0.006724236268849375,
+      "grad_norm": 0.02546961046755314,
+      "learning_rate": 2.0511527856363912e-05,
+      "loss": 10.3428,
+      "step": 168
+    },
+    {
+      "epoch": 0.0067642614847353835,
+      "grad_norm": 0.021871374920010567,
+      "learning_rate": 1.927736345590839e-05,
+      "loss": 10.3398,
+      "step": 169
+    },
+    {
+      "epoch": 0.006804286700621391,
+      "grad_norm": 0.017352674156427383,
+      "learning_rate": 1.8078937319026654e-05,
+      "loss": 10.3417,
+      "step": 170
+    },
+    {
+      "epoch": 0.0068443119165074,
+      "grad_norm": 0.026552794501185417,
+      "learning_rate": 1.6916577083458228e-05,
+      "loss": 10.3411,
+      "step": 171
+    },
+    {
+      "epoch": 0.006884337132393408,
+      "grad_norm": 0.020366569980978966,
+      "learning_rate": 1.579060052688548e-05,
+      "loss": 10.3393,
+      "step": 172
+    },
+    {
+      "epoch": 0.006924362348279416,
+      "grad_norm": 0.020691609010100365,
+      "learning_rate": 1.4701315480056164e-05,
+      "loss": 10.3417,
+      "step": 173
+    },
+    {
+      "epoch": 0.006964387564165424,
+      "grad_norm": 0.016214264556765556,
+      "learning_rate": 1.3649019742625623e-05,
+      "loss": 10.3407,
+      "step": 174
+    },
+    {
+      "epoch": 0.007004412780051432,
+      "grad_norm": 0.024319324642419815,
+      "learning_rate": 1.2634001001741373e-05,
+      "loss": 10.3417,
+      "step": 175
+    },
+    {
+      "epoch": 0.007004412780051432,
+      "eval_loss": 10.340204238891602,
+      "eval_runtime": 0.1134,
+      "eval_samples_per_second": 440.76,
+      "eval_steps_per_second": 61.706,
+      "step": 175
+    },
+    {
+      "epoch": 0.007044437995937441,
+      "grad_norm": 0.023183995857834816,
+      "learning_rate": 1.1656536753392287e-05,
+      "loss": 10.3424,
+      "step": 176
+    },
+    {
+      "epoch": 0.007084463211823449,
+      "grad_norm": 0.025537928566336632,
+      "learning_rate": 1.0716894226543953e-05,
+      "loss": 10.3429,
+      "step": 177
+    },
+    {
+      "epoch": 0.007124488427709457,
+      "grad_norm": 0.025992251932621002,
+      "learning_rate": 9.815330310080887e-06,
+      "loss": 10.343,
+      "step": 178
+    },
+    {
+      "epoch": 0.007164513643595465,
+      "grad_norm": 0.023507488891482353,
+      "learning_rate": 8.952091482575824e-06,
+      "loss": 10.3418,
+      "step": 179
+    },
+    {
+      "epoch": 0.007204538859481474,
+      "grad_norm": 0.021433303132653236,
+      "learning_rate": 8.127413744904804e-06,
+      "loss": 10.3419,
+      "step": 180
+    },
+    {
+      "epoch": 0.0072445640753674816,
+      "grad_norm": 0.024079183116555214,
+      "learning_rate": 7.34152255572697e-06,
+      "loss": 10.3406,
+      "step": 181
+    },
+    {
+      "epoch": 0.007284589291253489,
+      "grad_norm": 0.01925087533891201,
+      "learning_rate": 6.594632769846353e-06,
+      "loss": 10.3433,
+      "step": 182
+    },
+    {
+      "epoch": 0.007324614507139498,
+      "grad_norm": 0.02907833270728588,
+      "learning_rate": 5.886948579472778e-06,
+      "loss": 10.3431,
+      "step": 183
+    },
+    {
+      "epoch": 0.007364639723025506,
+      "grad_norm": 0.024236485362052917,
+      "learning_rate": 5.218663458397715e-06,
+      "loss": 10.3432,
+      "step": 184
+    },
+    {
+      "epoch": 0.0074046649389115145,
+      "grad_norm": 0.01904798485338688,
+      "learning_rate": 4.589960109100444e-06,
+      "loss": 10.3402,
+      "step": 185
+    },
+    {
+      "epoch": 0.007444690154797522,
+      "grad_norm": 0.020322727039456367,
+      "learning_rate": 4.001010412799138e-06,
+      "loss": 10.3431,
+      "step": 186
+    },
+    {
+      "epoch": 0.007484715370683531,
+      "grad_norm": 0.026508722454309464,
+      "learning_rate": 3.451975382460109e-06,
+      "loss": 10.344,
+      "step": 187
+    },
+    {
+      "epoch": 0.007524740586569539,
+      "grad_norm": 0.022202813997864723,
+      "learning_rate": 2.9430051187785962e-06,
+      "loss": 10.3422,
+      "step": 188
+    },
+    {
+      "epoch": 0.007564765802455547,
+      "grad_norm": 0.025712674483656883,
+      "learning_rate": 2.4742387691426445e-06,
+      "loss": 10.344,
+      "step": 189
+    },
+    {
+      "epoch": 0.007604791018341555,
+      "grad_norm": 0.022380555048584938,
+      "learning_rate": 2.0458044895916513e-06,
+      "loss": 10.342,
+      "step": 190
+    },
+    {
+      "epoch": 0.007644816234227563,
+      "grad_norm": 0.027425525709986687,
+      "learning_rate": 1.6578194097797258e-06,
+      "loss": 10.3421,
+      "step": 191
+    },
+    {
+      "epoch": 0.007684841450113572,
+      "grad_norm": 0.024072714149951935,
+      "learning_rate": 1.3103896009537207e-06,
+      "loss": 10.3432,
+      "step": 192
+    },
+    {
+      "epoch": 0.00772486666599958,
+      "grad_norm": 0.031706303358078,
+      "learning_rate": 1.0036100469542786e-06,
+      "loss": 10.3416,
+      "step": 193
+    },
+    {
+      "epoch": 0.007764891881885588,
+      "grad_norm": 0.030905872583389282,
+      "learning_rate": 7.375646182482875e-07,
+      "loss": 10.3426,
+      "step": 194
+    },
+    {
+      "epoch": 0.007804917097771596,
+      "grad_norm": 0.037017177790403366,
+      "learning_rate": 5.123260489995229e-07,
+      "loss": 10.3419,
+      "step": 195
+    },
+    {
+      "epoch": 0.007844942313657604,
+      "grad_norm": 0.03284130245447159,
+      "learning_rate": 3.2795591718381975e-07,
+      "loss": 10.3429,
+      "step": 196
+    },
+    {
+      "epoch": 0.007884967529543612,
+      "grad_norm": 0.03693537414073944,
+      "learning_rate": 1.8450462775428942e-07,
+      "loss": 10.3446,
+      "step": 197
+    },
+    {
+      "epoch": 0.007924992745429621,
+      "grad_norm": 0.038120418787002563,
+      "learning_rate": 8.201139886109264e-08,
+      "loss": 10.3429,
+      "step": 198
+    },
+    {
+      "epoch": 0.007965017961315629,
+      "grad_norm": 0.044258493930101395,
+      "learning_rate": 2.0504251129649374e-08,
+      "loss": 10.3437,
+      "step": 199
+    },
+    {
+      "epoch": 0.008005043177201637,
+      "grad_norm": 0.04641018435359001,
+      "learning_rate": 0.0,
+      "loss": 10.346,
+      "step": 200
+    },
+    {
+      "epoch": 0.008005043177201637,
+      "eval_loss": 10.340373992919922,
+      "eval_runtime": 0.1071,
+      "eval_samples_per_second": 466.854,
+      "eval_steps_per_second": 65.36,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 22423039524864.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null