Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +372 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cab56aaa5810fb910c7e9e236025f9c97797db338006927c1f59849c5cf914b4
 size 50503544

 version https://git-lfs.github.com/spec/v1
+oid sha256:ad8cc488a4232e4bb07af21caf9a5a9828012025c962a32082570728e12ff8cf
 size 50503544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4aabab1826befe4f2cbbb925ab2f17b61e7a27b7b34174912f7335b0b59c3342
 size 101184122

 version https://git-lfs.github.com/spec/v1
+oid sha256:dee48ebb34e2b388c1c2e22a54461c621e0d1c6acbf2c0c55f122693393c56b5
 size 101184122

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e15669b74a9459ea8f38adb6ce534bdd9e5864aca0db86a3caf5a0987ebfa396
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:eecbe6db51533f74a58d571c69110b223d4abb6bcb19d1ad0d04af620a680663
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e70710c409284f74d525f8db5cfaccc22a8afd29416f19c595da9242ec92d936
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe157715eb8e05b3bab2a7f2fafac33705dc4a1a9dd7f6d860c3a7f9597d78bb
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.4293454885482788,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.21344717182497333,
   "eval_steps": 25,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1113,6 +1113,372 @@
       "eval_samples_per_second": 31.468,
       "eval_steps_per_second": 4.406,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1136,12 +1502,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.292050004063027e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.4162873327732086,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.2845962290999644,
   "eval_steps": 25,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 31.468,
       "eval_steps_per_second": 4.406,
       "step": 150
+    },
+    {
+      "epoch": 0.21487015297047315,
+      "grad_norm": 0.6226702332496643,
+      "learning_rate": 4.659698863221513e-05,
+      "loss": 0.4393,
+      "step": 151
+    },
+    {
+      "epoch": 0.21629313411597295,
+      "grad_norm": 0.6401143670082092,
+      "learning_rate": 4.481448235912671e-05,
+      "loss": 0.633,
+      "step": 152
+    },
+    {
+      "epoch": 0.21771611526147278,
+      "grad_norm": 0.49035051465034485,
+      "learning_rate": 4.306073275629044e-05,
+      "loss": 0.4039,
+      "step": 153
+    },
+    {
+      "epoch": 0.2191390964069726,
+      "grad_norm": 0.5134478807449341,
+      "learning_rate": 4.133621928133665e-05,
+      "loss": 0.3892,
+      "step": 154
+    },
+    {
+      "epoch": 0.22056207755247242,
+      "grad_norm": 0.4497911334037781,
+      "learning_rate": 3.964141339903026e-05,
+      "loss": 0.3839,
+      "step": 155
+    },
+    {
+      "epoch": 0.22198505869797225,
+      "grad_norm": 0.572187602519989,
+      "learning_rate": 3.797677845237696e-05,
+      "loss": 0.3897,
+      "step": 156
+    },
+    {
+      "epoch": 0.22340803984347207,
+      "grad_norm": 0.5317263007164001,
+      "learning_rate": 3.634276953594982e-05,
+      "loss": 0.4003,
+      "step": 157
+    },
+    {
+      "epoch": 0.2248310209889719,
+      "grad_norm": 0.4672944247722626,
+      "learning_rate": 3.473983337147118e-05,
+      "loss": 0.3659,
+      "step": 158
+    },
+    {
+      "epoch": 0.22625400213447172,
+      "grad_norm": 0.42732876539230347,
+      "learning_rate": 3.316840818568315e-05,
+      "loss": 0.3495,
+      "step": 159
+    },
+    {
+      "epoch": 0.22767698327997155,
+      "grad_norm": 0.48700952529907227,
+      "learning_rate": 3.162892359054098e-05,
+      "loss": 0.4266,
+      "step": 160
+    },
+    {
+      "epoch": 0.22909996442547137,
+      "grad_norm": 0.4742146134376526,
+      "learning_rate": 3.0121800465761293e-05,
+      "loss": 0.3789,
+      "step": 161
+    },
+    {
+      "epoch": 0.2305229455709712,
+      "grad_norm": 0.46112704277038574,
+      "learning_rate": 2.8647450843757897e-05,
+      "loss": 0.3756,
+      "step": 162
+    },
+    {
+      "epoch": 0.23194592671647102,
+      "grad_norm": 0.45304158329963684,
+      "learning_rate": 2.7206277796996144e-05,
+      "loss": 0.4077,
+      "step": 163
+    },
+    {
+      "epoch": 0.23336890786197084,
+      "grad_norm": 0.47427666187286377,
+      "learning_rate": 2.5798675327796993e-05,
+      "loss": 0.3493,
+      "step": 164
+    },
+    {
+      "epoch": 0.23479188900747064,
+      "grad_norm": 0.4082260727882385,
+      "learning_rate": 2.4425028260620715e-05,
+      "loss": 0.3632,
+      "step": 165
+    },
+    {
+      "epoch": 0.23621487015297046,
+      "grad_norm": 0.517911970615387,
+      "learning_rate": 2.3085712136859668e-05,
+      "loss": 0.4325,
+      "step": 166
+    },
+    {
+      "epoch": 0.2376378512984703,
+      "grad_norm": 0.4603246748447418,
+      "learning_rate": 2.178109311216913e-05,
+      "loss": 0.3829,
+      "step": 167
+    },
+    {
+      "epoch": 0.2390608324439701,
+      "grad_norm": 0.5936574935913086,
+      "learning_rate": 2.0511527856363912e-05,
+      "loss": 0.4366,
+      "step": 168
+    },
+    {
+      "epoch": 0.24048381358946994,
+      "grad_norm": 0.47505584359169006,
+      "learning_rate": 1.927736345590839e-05,
+      "loss": 0.3416,
+      "step": 169
+    },
+    {
+      "epoch": 0.24190679473496976,
+      "grad_norm": 0.4642045199871063,
+      "learning_rate": 1.8078937319026654e-05,
+      "loss": 0.3977,
+      "step": 170
+    },
+    {
+      "epoch": 0.24332977588046958,
+      "grad_norm": 0.4804611802101135,
+      "learning_rate": 1.6916577083458228e-05,
+      "loss": 0.4259,
+      "step": 171
+    },
+    {
+      "epoch": 0.2447527570259694,
+      "grad_norm": 0.4980257749557495,
+      "learning_rate": 1.579060052688548e-05,
+      "loss": 0.3874,
+      "step": 172
+    },
+    {
+      "epoch": 0.24617573817146923,
+      "grad_norm": 0.42624157667160034,
+      "learning_rate": 1.4701315480056164e-05,
+      "loss": 0.3618,
+      "step": 173
+    },
+    {
+      "epoch": 0.24759871931696906,
+      "grad_norm": 0.4734053611755371,
+      "learning_rate": 1.3649019742625623e-05,
+      "loss": 0.3711,
+      "step": 174
+    },
+    {
+      "epoch": 0.24902170046246888,
+      "grad_norm": 0.42860615253448486,
+      "learning_rate": 1.2634001001741373e-05,
+      "loss": 0.3982,
+      "step": 175
+    },
+    {
+      "epoch": 0.24902170046246888,
+      "eval_loss": 0.41627341508865356,
+      "eval_runtime": 1.5905,
+      "eval_samples_per_second": 31.436,
+      "eval_steps_per_second": 4.401,
+      "step": 175
+    },
+    {
+      "epoch": 0.2504446816079687,
+      "grad_norm": 0.5777260661125183,
+      "learning_rate": 1.1656536753392287e-05,
+      "loss": 0.4148,
+      "step": 176
+    },
+    {
+      "epoch": 0.2518676627534685,
+      "grad_norm": 0.4316408932209015,
+      "learning_rate": 1.0716894226543953e-05,
+      "loss": 0.4256,
+      "step": 177
+    },
+    {
+      "epoch": 0.2532906438989683,
+      "grad_norm": 0.402228981256485,
+      "learning_rate": 9.815330310080887e-06,
+      "loss": 0.3952,
+      "step": 178
+    },
+    {
+      "epoch": 0.25471362504446815,
+      "grad_norm": 0.49347391724586487,
+      "learning_rate": 8.952091482575824e-06,
+      "loss": 0.4133,
+      "step": 179
+    },
+    {
+      "epoch": 0.256136606189968,
+      "grad_norm": 0.5483438372612,
+      "learning_rate": 8.127413744904804e-06,
+      "loss": 0.398,
+      "step": 180
+    },
+    {
+      "epoch": 0.2575595873354678,
+      "grad_norm": 0.3619908094406128,
+      "learning_rate": 7.34152255572697e-06,
+      "loss": 0.347,
+      "step": 181
+    },
+    {
+      "epoch": 0.2589825684809676,
+      "grad_norm": 0.521479070186615,
+      "learning_rate": 6.594632769846353e-06,
+      "loss": 0.419,
+      "step": 182
+    },
+    {
+      "epoch": 0.26040554962646745,
+      "grad_norm": 0.3947046995162964,
+      "learning_rate": 5.886948579472778e-06,
+      "loss": 0.3599,
+      "step": 183
+    },
+    {
+      "epoch": 0.26182853077196727,
+      "grad_norm": 0.41507527232170105,
+      "learning_rate": 5.218663458397715e-06,
+      "loss": 0.3696,
+      "step": 184
+    },
+    {
+      "epoch": 0.2632515119174671,
+      "grad_norm": 0.4486597180366516,
+      "learning_rate": 4.589960109100444e-06,
+      "loss": 0.3821,
+      "step": 185
+    },
+    {
+      "epoch": 0.2646744930629669,
+      "grad_norm": 0.463120698928833,
+      "learning_rate": 4.001010412799138e-06,
+      "loss": 0.4136,
+      "step": 186
+    },
+    {
+      "epoch": 0.26609747420846674,
+      "grad_norm": 0.49710506200790405,
+      "learning_rate": 3.451975382460109e-06,
+      "loss": 0.43,
+      "step": 187
+    },
+    {
+      "epoch": 0.26752045535396657,
+      "grad_norm": 0.4926173985004425,
+      "learning_rate": 2.9430051187785962e-06,
+      "loss": 0.406,
+      "step": 188
+    },
+    {
+      "epoch": 0.2689434364994664,
+      "grad_norm": 0.5663373470306396,
+      "learning_rate": 2.4742387691426445e-06,
+      "loss": 0.4088,
+      "step": 189
+    },
+    {
+      "epoch": 0.2703664176449662,
+      "grad_norm": 0.536439836025238,
+      "learning_rate": 2.0458044895916513e-06,
+      "loss": 0.46,
+      "step": 190
+    },
+    {
+      "epoch": 0.27178939879046604,
+      "grad_norm": 0.44043686985969543,
+      "learning_rate": 1.6578194097797258e-06,
+      "loss": 0.3958,
+      "step": 191
+    },
+    {
+      "epoch": 0.27321237993596587,
+      "grad_norm": 0.5519529581069946,
+      "learning_rate": 1.3103896009537207e-06,
+      "loss": 0.4312,
+      "step": 192
+    },
+    {
+      "epoch": 0.2746353610814657,
+      "grad_norm": 0.5283083915710449,
+      "learning_rate": 1.0036100469542786e-06,
+      "loss": 0.4127,
+      "step": 193
+    },
+    {
+      "epoch": 0.2760583422269655,
+      "grad_norm": 0.4783058166503906,
+      "learning_rate": 7.375646182482875e-07,
+      "loss": 0.4041,
+      "step": 194
+    },
+    {
+      "epoch": 0.27748132337246534,
+      "grad_norm": 0.45658767223358154,
+      "learning_rate": 5.123260489995229e-07,
+      "loss": 0.4002,
+      "step": 195
+    },
+    {
+      "epoch": 0.27890430451796516,
+      "grad_norm": 0.5881150364875793,
+      "learning_rate": 3.2795591718381975e-07,
+      "loss": 0.4642,
+      "step": 196
+    },
+    {
+      "epoch": 0.280327285663465,
+      "grad_norm": 0.5470696687698364,
+      "learning_rate": 1.8450462775428942e-07,
+      "loss": 0.4564,
+      "step": 197
+    },
+    {
+      "epoch": 0.28175026680896476,
+      "grad_norm": 0.7771428823471069,
+      "learning_rate": 8.201139886109264e-08,
+      "loss": 0.4493,
+      "step": 198
+    },
+    {
+      "epoch": 0.2831732479544646,
+      "grad_norm": 0.7079588174819946,
+      "learning_rate": 2.0504251129649374e-08,
+      "loss": 0.5309,
+      "step": 199
+    },
+    {
+      "epoch": 0.2845962290999644,
+      "grad_norm": 0.9976850748062134,
+      "learning_rate": 0.0,
+      "loss": 0.6441,
+      "step": 200
+    },
+    {
+      "epoch": 0.2845962290999644,
+      "eval_loss": 0.4162873327732086,
+      "eval_runtime": 1.592,
+      "eval_samples_per_second": 31.406,
+      "eval_steps_per_second": 4.397,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 4.385754324349747e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null