Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +371 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0faee85fcaf3d1e8aaf3e288b390b35a98e13e54183f6fd76d04292c4592c41c
 size 45118424

 version https://git-lfs.github.com/spec/v1
+oid sha256:ed8ea5d23a25ac30d05af667e4c06daa55b71429149c868c11e684de99310d15
 size 45118424

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:966aa2673f30de905e913b25ac167e74ddc3dccd3d3561d1172263a0cd503ade
 size 90365754

 version https://git-lfs.github.com/spec/v1
+oid sha256:d034d8c3ab3bde45ac7c5eb4cb23e80ab369b4e2c7d07bbf0ddadc9b6b49f3e3
 size 90365754

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e8faa987a2ae2e9fa291778dff6631c49a41e56785ff2e1567591f4de2fda5d9
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:17dde9c5ddf7ce10b87ffc2cd7de40f14d6706b143c7ea26df78e352e4d3b714
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e70710c409284f74d525f8db5cfaccc22a8afd29416f19c595da9242ec92d936
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe157715eb8e05b3bab2a7f2fafac33705dc4a1a9dd7f6d860c3a7f9597d78bb
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.7496414184570312,
   "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.45489006823351025,
   "eval_steps": 25,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1113,6 +1113,372 @@
       "eval_samples_per_second": 30.517,
       "eval_steps_per_second": 4.272,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1127,7 +1493,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
@@ -1136,12 +1502,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.100038768938189e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.7496414184570312,
   "best_model_checkpoint": "miner_id_24/checkpoint-150",
+  "epoch": 0.6065200909780136,
   "eval_steps": 25,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 30.517,
       "eval_steps_per_second": 4.272,
       "step": 150
+    },
+    {
+      "epoch": 0.4579226686884003,
+      "grad_norm": 1.176184058189392,
+      "learning_rate": 4.659698863221513e-05,
+      "loss": 0.3398,
+      "step": 151
+    },
+    {
+      "epoch": 0.4609552691432904,
+      "grad_norm": 1.151708960533142,
+      "learning_rate": 4.481448235912671e-05,
+      "loss": 0.3601,
+      "step": 152
+    },
+    {
+      "epoch": 0.46398786959818045,
+      "grad_norm": 0.974331259727478,
+      "learning_rate": 4.306073275629044e-05,
+      "loss": 0.2929,
+      "step": 153
+    },
+    {
+      "epoch": 0.4670204700530705,
+      "grad_norm": 0.9359554052352905,
+      "learning_rate": 4.133621928133665e-05,
+      "loss": 0.3445,
+      "step": 154
+    },
+    {
+      "epoch": 0.4700530705079606,
+      "grad_norm": 2.2175326347351074,
+      "learning_rate": 3.964141339903026e-05,
+      "loss": 1.4245,
+      "step": 155
+    },
+    {
+      "epoch": 0.47308567096285065,
+      "grad_norm": 1.6879606246948242,
+      "learning_rate": 3.797677845237696e-05,
+      "loss": 1.6504,
+      "step": 156
+    },
+    {
+      "epoch": 0.4761182714177407,
+      "grad_norm": 1.5661247968673706,
+      "learning_rate": 3.634276953594982e-05,
+      "loss": 1.2227,
+      "step": 157
+    },
+    {
+      "epoch": 0.4791508718726308,
+      "grad_norm": 1.6378949880599976,
+      "learning_rate": 3.473983337147118e-05,
+      "loss": 1.0772,
+      "step": 158
+    },
+    {
+      "epoch": 0.48218347232752085,
+      "grad_norm": 1.839331865310669,
+      "learning_rate": 3.316840818568315e-05,
+      "loss": 0.6356,
+      "step": 159
+    },
+    {
+      "epoch": 0.4852160727824109,
+      "grad_norm": 1.9953904151916504,
+      "learning_rate": 3.162892359054098e-05,
+      "loss": 0.6139,
+      "step": 160
+    },
+    {
+      "epoch": 0.488248673237301,
+      "grad_norm": 4.8791184425354,
+      "learning_rate": 3.0121800465761293e-05,
+      "loss": 0.7722,
+      "step": 161
+    },
+    {
+      "epoch": 0.49128127369219105,
+      "grad_norm": 2.2169203758239746,
+      "learning_rate": 2.8647450843757897e-05,
+      "loss": 0.757,
+      "step": 162
+    },
+    {
+      "epoch": 0.4943138741470811,
+      "grad_norm": 1.8224825859069824,
+      "learning_rate": 2.7206277796996144e-05,
+      "loss": 0.8179,
+      "step": 163
+    },
+    {
+      "epoch": 0.4973464746019712,
+      "grad_norm": 0.8757375478744507,
+      "learning_rate": 2.5798675327796993e-05,
+      "loss": 0.5202,
+      "step": 164
+    },
+    {
+      "epoch": 0.5003790750568613,
+      "grad_norm": 0.8012860417366028,
+      "learning_rate": 2.4425028260620715e-05,
+      "loss": 0.1801,
+      "step": 165
+    },
+    {
+      "epoch": 0.5034116755117514,
+      "grad_norm": 0.6913158297538757,
+      "learning_rate": 2.3085712136859668e-05,
+      "loss": 0.2741,
+      "step": 166
+    },
+    {
+      "epoch": 0.5064442759666414,
+      "grad_norm": 0.6833317279815674,
+      "learning_rate": 2.178109311216913e-05,
+      "loss": 0.2653,
+      "step": 167
+    },
+    {
+      "epoch": 0.5094768764215315,
+      "grad_norm": 0.9797353744506836,
+      "learning_rate": 2.0511527856363912e-05,
+      "loss": 0.6669,
+      "step": 168
+    },
+    {
+      "epoch": 0.5125094768764216,
+      "grad_norm": 2.5909066200256348,
+      "learning_rate": 1.927736345590839e-05,
+      "loss": 0.2745,
+      "step": 169
+    },
+    {
+      "epoch": 0.5155420773313116,
+      "grad_norm": 2.055574655532837,
+      "learning_rate": 1.8078937319026654e-05,
+      "loss": 0.336,
+      "step": 170
+    },
+    {
+      "epoch": 0.5185746777862017,
+      "grad_norm": 1.7324495315551758,
+      "learning_rate": 1.6916577083458228e-05,
+      "loss": 0.2687,
+      "step": 171
+    },
+    {
+      "epoch": 0.5216072782410918,
+      "grad_norm": 0.9579265117645264,
+      "learning_rate": 1.579060052688548e-05,
+      "loss": 0.8671,
+      "step": 172
+    },
+    {
+      "epoch": 0.5246398786959818,
+      "grad_norm": 0.8632516860961914,
+      "learning_rate": 1.4701315480056164e-05,
+      "loss": 0.5554,
+      "step": 173
+    },
+    {
+      "epoch": 0.5276724791508719,
+      "grad_norm": 0.8002261519432068,
+      "learning_rate": 1.3649019742625623e-05,
+      "loss": 0.5221,
+      "step": 174
+    },
+    {
+      "epoch": 0.530705079605762,
+      "grad_norm": 0.9454600811004639,
+      "learning_rate": 1.2634001001741373e-05,
+      "loss": 0.7456,
+      "step": 175
+    },
+    {
+      "epoch": 0.530705079605762,
+      "eval_loss": 0.7249814867973328,
+      "eval_runtime": 1.6332,
+      "eval_samples_per_second": 30.615,
+      "eval_steps_per_second": 4.286,
+      "step": 175
+    },
+    {
+      "epoch": 0.533737680060652,
+      "grad_norm": 0.9941129088401794,
+      "learning_rate": 1.1656536753392287e-05,
+      "loss": 1.0431,
+      "step": 176
+    },
+    {
+      "epoch": 0.5367702805155421,
+      "grad_norm": 1.2236559391021729,
+      "learning_rate": 1.0716894226543953e-05,
+      "loss": 1.7665,
+      "step": 177
+    },
+    {
+      "epoch": 0.5398028809704322,
+      "grad_norm": 1.2112020254135132,
+      "learning_rate": 9.815330310080887e-06,
+      "loss": 1.407,
+      "step": 178
+    },
+    {
+      "epoch": 0.5428354814253222,
+      "grad_norm": 0.9938328266143799,
+      "learning_rate": 8.952091482575824e-06,
+      "loss": 1.0979,
+      "step": 179
+    },
+    {
+      "epoch": 0.5458680818802123,
+      "grad_norm": 1.114398717880249,
+      "learning_rate": 8.127413744904804e-06,
+      "loss": 1.1426,
+      "step": 180
+    },
+    {
+      "epoch": 0.5489006823351024,
+      "grad_norm": 1.4169811010360718,
+      "learning_rate": 7.34152255572697e-06,
+      "loss": 1.6449,
+      "step": 181
+    },
+    {
+      "epoch": 0.5519332827899924,
+      "grad_norm": 1.2509397268295288,
+      "learning_rate": 6.594632769846353e-06,
+      "loss": 1.3782,
+      "step": 182
+    },
+    {
+      "epoch": 0.5549658832448825,
+      "grad_norm": 1.321242094039917,
+      "learning_rate": 5.886948579472778e-06,
+      "loss": 1.46,
+      "step": 183
+    },
+    {
+      "epoch": 0.5579984836997726,
+      "grad_norm": 1.6065739393234253,
+      "learning_rate": 5.218663458397715e-06,
+      "loss": 2.0705,
+      "step": 184
+    },
+    {
+      "epoch": 0.5610310841546626,
+      "grad_norm": 1.637670874595642,
+      "learning_rate": 4.589960109100444e-06,
+      "loss": 1.8571,
+      "step": 185
+    },
+    {
+      "epoch": 0.5640636846095527,
+      "grad_norm": 1.617815613746643,
+      "learning_rate": 4.001010412799138e-06,
+      "loss": 2.0011,
+      "step": 186
+    },
+    {
+      "epoch": 0.5670962850644428,
+      "grad_norm": 1.380850911140442,
+      "learning_rate": 3.451975382460109e-06,
+      "loss": 1.7508,
+      "step": 187
+    },
+    {
+      "epoch": 0.5701288855193328,
+      "grad_norm": 1.7648086547851562,
+      "learning_rate": 2.9430051187785962e-06,
+      "loss": 1.2211,
+      "step": 188
+    },
+    {
+      "epoch": 0.5731614859742229,
+      "grad_norm": 1.3612362146377563,
+      "learning_rate": 2.4742387691426445e-06,
+      "loss": 1.0974,
+      "step": 189
+    },
+    {
+      "epoch": 0.576194086429113,
+      "grad_norm": 1.6548246145248413,
+      "learning_rate": 2.0458044895916513e-06,
+      "loss": 0.7674,
+      "step": 190
+    },
+    {
+      "epoch": 0.579226686884003,
+      "grad_norm": 1.6227363348007202,
+      "learning_rate": 1.6578194097797258e-06,
+      "loss": 0.4966,
+      "step": 191
+    },
+    {
+      "epoch": 0.5822592873388931,
+      "grad_norm": 2.1612672805786133,
+      "learning_rate": 1.3103896009537207e-06,
+      "loss": 0.5805,
+      "step": 192
+    },
+    {
+      "epoch": 0.5852918877937832,
+      "grad_norm": 1.482163667678833,
+      "learning_rate": 1.0036100469542786e-06,
+      "loss": 0.5068,
+      "step": 193
+    },
+    {
+      "epoch": 0.5883244882486732,
+      "grad_norm": 1.0745387077331543,
+      "learning_rate": 7.375646182482875e-07,
+      "loss": 0.2758,
+      "step": 194
+    },
+    {
+      "epoch": 0.5913570887035633,
+      "grad_norm": 1.2835489511489868,
+      "learning_rate": 5.123260489995229e-07,
+      "loss": 0.2952,
+      "step": 195
+    },
+    {
+      "epoch": 0.5943896891584534,
+      "grad_norm": 1.2066874504089355,
+      "learning_rate": 3.2795591718381975e-07,
+      "loss": 0.2233,
+      "step": 196
+    },
+    {
+      "epoch": 0.5974222896133434,
+      "grad_norm": 0.9106021523475647,
+      "learning_rate": 1.8450462775428942e-07,
+      "loss": 0.2002,
+      "step": 197
+    },
+    {
+      "epoch": 0.6004548900682335,
+      "grad_norm": 1.0497798919677734,
+      "learning_rate": 8.201139886109264e-08,
+      "loss": 0.1902,
+      "step": 198
+    },
+    {
+      "epoch": 0.6034874905231236,
+      "grad_norm": 0.9898223876953125,
+      "learning_rate": 2.0504251129649374e-08,
+      "loss": 0.157,
+      "step": 199
+    },
+    {
+      "epoch": 0.6065200909780136,
+      "grad_norm": 0.5523911118507385,
+      "learning_rate": 0.0,
+      "loss": 0.0983,
+      "step": 200
+    },
+    {
+      "epoch": 0.6065200909780136,
+      "eval_loss": 0.7618037462234497,
+      "eval_runtime": 1.6354,
+      "eval_samples_per_second": 30.573,
+      "eval_steps_per_second": 4.28,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 4.128243999100109e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null