Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +372 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dec6bf8f49f369eefb83dcacbbf00fa4059f824fb49bc13f502dc0e6528472b2
 size 167832240

 version https://git-lfs.github.com/spec/v1
+oid sha256:77835b66136bbbc7690bcb128e3e4eb97602fe56b56cdb5c1b580887dd1d8d33
 size 167832240

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c78fb449d24ddefaef121db0e9850c71250fab43dfa01a7ae6c7d94de032e384
 size 335922386

 version https://git-lfs.github.com/spec/v1
+oid sha256:1ee4b659edf90ea182776fe8a5315a932ce64ddad99ad7b15f7ee5a2c6f8018a
 size 335922386

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bd24905c365684771848f29aa7309f0a0567630b6dd98b4ba064d55c47751b7b
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:dceab492eb55d5b625a66b337338737db8eda4331a3228103f2ce6c58d9452cf
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e70710c409284f74d525f8db5cfaccc22a8afd29416f19c595da9242ec92d936
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe157715eb8e05b3bab2a7f2fafac33705dc4a1a9dd7f6d860c3a7f9597d78bb
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.5096783638000488,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.07796257796257797,
   "eval_steps": 25,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1113,6 +1113,372 @@
       "eval_samples_per_second": 5.155,
       "eval_steps_per_second": 0.722,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1136,12 +1502,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.5105929355237786e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.4919222295284271,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.10395010395010396,
   "eval_steps": 25,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 5.155,
       "eval_steps_per_second": 0.722,
       "step": 150
+    },
+    {
+      "epoch": 0.07848232848232849,
+      "grad_norm": 0.22046735882759094,
+      "learning_rate": 4.659698863221513e-05,
+      "loss": 0.3254,
+      "step": 151
+    },
+    {
+      "epoch": 0.079002079002079,
+      "grad_norm": 0.2166861742734909,
+      "learning_rate": 4.481448235912671e-05,
+      "loss": 0.4457,
+      "step": 152
+    },
+    {
+      "epoch": 0.07952182952182953,
+      "grad_norm": 0.2074378877878189,
+      "learning_rate": 4.306073275629044e-05,
+      "loss": 0.5091,
+      "step": 153
+    },
+    {
+      "epoch": 0.08004158004158005,
+      "grad_norm": 0.221967875957489,
+      "learning_rate": 4.133621928133665e-05,
+      "loss": 0.4409,
+      "step": 154
+    },
+    {
+      "epoch": 0.08056133056133057,
+      "grad_norm": 0.20840375125408173,
+      "learning_rate": 3.964141339903026e-05,
+      "loss": 0.4149,
+      "step": 155
+    },
+    {
+      "epoch": 0.08108108108108109,
+      "grad_norm": 0.23525193333625793,
+      "learning_rate": 3.797677845237696e-05,
+      "loss": 0.4934,
+      "step": 156
+    },
+    {
+      "epoch": 0.0816008316008316,
+      "grad_norm": 0.25270870327949524,
+      "learning_rate": 3.634276953594982e-05,
+      "loss": 0.5205,
+      "step": 157
+    },
+    {
+      "epoch": 0.08212058212058213,
+      "grad_norm": 0.22861601412296295,
+      "learning_rate": 3.473983337147118e-05,
+      "loss": 0.4039,
+      "step": 158
+    },
+    {
+      "epoch": 0.08264033264033264,
+      "grad_norm": 0.2319110780954361,
+      "learning_rate": 3.316840818568315e-05,
+      "loss": 0.4785,
+      "step": 159
+    },
+    {
+      "epoch": 0.08316008316008316,
+      "grad_norm": 0.22803109884262085,
+      "learning_rate": 3.162892359054098e-05,
+      "loss": 0.486,
+      "step": 160
+    },
+    {
+      "epoch": 0.08367983367983368,
+      "grad_norm": 0.24335117638111115,
+      "learning_rate": 3.0121800465761293e-05,
+      "loss": 0.4956,
+      "step": 161
+    },
+    {
+      "epoch": 0.0841995841995842,
+      "grad_norm": 0.23986086249351501,
+      "learning_rate": 2.8647450843757897e-05,
+      "loss": 0.566,
+      "step": 162
+    },
+    {
+      "epoch": 0.08471933471933472,
+      "grad_norm": 0.24391090869903564,
+      "learning_rate": 2.7206277796996144e-05,
+      "loss": 0.5142,
+      "step": 163
+    },
+    {
+      "epoch": 0.08523908523908524,
+      "grad_norm": 0.29568424820899963,
+      "learning_rate": 2.5798675327796993e-05,
+      "loss": 0.6023,
+      "step": 164
+    },
+    {
+      "epoch": 0.08575883575883576,
+      "grad_norm": 0.2564331591129303,
+      "learning_rate": 2.4425028260620715e-05,
+      "loss": 0.5528,
+      "step": 165
+    },
+    {
+      "epoch": 0.08627858627858628,
+      "grad_norm": 0.2405325472354889,
+      "learning_rate": 2.3085712136859668e-05,
+      "loss": 0.4999,
+      "step": 166
+    },
+    {
+      "epoch": 0.0867983367983368,
+      "grad_norm": 0.2567991018295288,
+      "learning_rate": 2.178109311216913e-05,
+      "loss": 0.5486,
+      "step": 167
+    },
+    {
+      "epoch": 0.08731808731808732,
+      "grad_norm": 0.2838067412376404,
+      "learning_rate": 2.0511527856363912e-05,
+      "loss": 0.5459,
+      "step": 168
+    },
+    {
+      "epoch": 0.08783783783783784,
+      "grad_norm": 0.2707315683364868,
+      "learning_rate": 1.927736345590839e-05,
+      "loss": 0.5376,
+      "step": 169
+    },
+    {
+      "epoch": 0.08835758835758836,
+      "grad_norm": 0.26636192202568054,
+      "learning_rate": 1.8078937319026654e-05,
+      "loss": 0.535,
+      "step": 170
+    },
+    {
+      "epoch": 0.08887733887733888,
+      "grad_norm": 0.26214924454689026,
+      "learning_rate": 1.6916577083458228e-05,
+      "loss": 0.5322,
+      "step": 171
+    },
+    {
+      "epoch": 0.0893970893970894,
+      "grad_norm": 0.2562224566936493,
+      "learning_rate": 1.579060052688548e-05,
+      "loss": 0.5278,
+      "step": 172
+    },
+    {
+      "epoch": 0.08991683991683992,
+      "grad_norm": 0.30574294924736023,
+      "learning_rate": 1.4701315480056164e-05,
+      "loss": 0.5209,
+      "step": 173
+    },
+    {
+      "epoch": 0.09043659043659044,
+      "grad_norm": 0.3442355692386627,
+      "learning_rate": 1.3649019742625623e-05,
+      "loss": 0.5682,
+      "step": 174
+    },
+    {
+      "epoch": 0.09095634095634096,
+      "grad_norm": 0.2936060428619385,
+      "learning_rate": 1.2634001001741373e-05,
+      "loss": 0.5359,
+      "step": 175
+    },
+    {
+      "epoch": 0.09095634095634096,
+      "eval_loss": 0.4958566725254059,
+      "eval_runtime": 10.7748,
+      "eval_samples_per_second": 4.64,
+      "eval_steps_per_second": 0.65,
+      "step": 175
+    },
+    {
+      "epoch": 0.09147609147609148,
+      "grad_norm": 0.34981048107147217,
+      "learning_rate": 1.1656536753392287e-05,
+      "loss": 0.543,
+      "step": 176
+    },
+    {
+      "epoch": 0.091995841995842,
+      "grad_norm": 0.38105377554893494,
+      "learning_rate": 1.0716894226543953e-05,
+      "loss": 0.573,
+      "step": 177
+    },
+    {
+      "epoch": 0.09251559251559252,
+      "grad_norm": 0.3455210328102112,
+      "learning_rate": 9.815330310080887e-06,
+      "loss": 0.4597,
+      "step": 178
+    },
+    {
+      "epoch": 0.09303534303534304,
+      "grad_norm": 0.388644814491272,
+      "learning_rate": 8.952091482575824e-06,
+      "loss": 0.5382,
+      "step": 179
+    },
+    {
+      "epoch": 0.09355509355509356,
+      "grad_norm": 0.3758420944213867,
+      "learning_rate": 8.127413744904804e-06,
+      "loss": 0.5452,
+      "step": 180
+    },
+    {
+      "epoch": 0.09407484407484408,
+      "grad_norm": 0.4773421585559845,
+      "learning_rate": 7.34152255572697e-06,
+      "loss": 0.5899,
+      "step": 181
+    },
+    {
+      "epoch": 0.0945945945945946,
+      "grad_norm": 0.3917699158191681,
+      "learning_rate": 6.594632769846353e-06,
+      "loss": 0.5736,
+      "step": 182
+    },
+    {
+      "epoch": 0.09511434511434512,
+      "grad_norm": 0.5945054292678833,
+      "learning_rate": 5.886948579472778e-06,
+      "loss": 0.629,
+      "step": 183
+    },
+    {
+      "epoch": 0.09563409563409564,
+      "grad_norm": 0.49390462040901184,
+      "learning_rate": 5.218663458397715e-06,
+      "loss": 0.5509,
+      "step": 184
+    },
+    {
+      "epoch": 0.09615384615384616,
+      "grad_norm": 0.4783254861831665,
+      "learning_rate": 4.589960109100444e-06,
+      "loss": 0.5791,
+      "step": 185
+    },
+    {
+      "epoch": 0.09667359667359668,
+      "grad_norm": 0.5506903529167175,
+      "learning_rate": 4.001010412799138e-06,
+      "loss": 0.52,
+      "step": 186
+    },
+    {
+      "epoch": 0.0971933471933472,
+      "grad_norm": 0.5402777194976807,
+      "learning_rate": 3.451975382460109e-06,
+      "loss": 0.6098,
+      "step": 187
+    },
+    {
+      "epoch": 0.09771309771309772,
+      "grad_norm": 0.6247908473014832,
+      "learning_rate": 2.9430051187785962e-06,
+      "loss": 0.5823,
+      "step": 188
+    },
+    {
+      "epoch": 0.09823284823284824,
+      "grad_norm": 0.6075006723403931,
+      "learning_rate": 2.4742387691426445e-06,
+      "loss": 0.6047,
+      "step": 189
+    },
+    {
+      "epoch": 0.09875259875259876,
+      "grad_norm": 0.6293641328811646,
+      "learning_rate": 2.0458044895916513e-06,
+      "loss": 0.648,
+      "step": 190
+    },
+    {
+      "epoch": 0.09927234927234928,
+      "grad_norm": 0.656559944152832,
+      "learning_rate": 1.6578194097797258e-06,
+      "loss": 0.5545,
+      "step": 191
+    },
+    {
+      "epoch": 0.0997920997920998,
+      "grad_norm": 0.6681039333343506,
+      "learning_rate": 1.3103896009537207e-06,
+      "loss": 0.6438,
+      "step": 192
+    },
+    {
+      "epoch": 0.10031185031185032,
+      "grad_norm": 0.9185191988945007,
+      "learning_rate": 1.0036100469542786e-06,
+      "loss": 0.625,
+      "step": 193
+    },
+    {
+      "epoch": 0.10083160083160084,
+      "grad_norm": 0.7245170474052429,
+      "learning_rate": 7.375646182482875e-07,
+      "loss": 0.6581,
+      "step": 194
+    },
+    {
+      "epoch": 0.10135135135135136,
+      "grad_norm": 0.8444747924804688,
+      "learning_rate": 5.123260489995229e-07,
+      "loss": 0.7302,
+      "step": 195
+    },
+    {
+      "epoch": 0.10187110187110188,
+      "grad_norm": 0.8954965472221375,
+      "learning_rate": 3.2795591718381975e-07,
+      "loss": 0.6679,
+      "step": 196
+    },
+    {
+      "epoch": 0.1023908523908524,
+      "grad_norm": 0.9438778758049011,
+      "learning_rate": 1.8450462775428942e-07,
+      "loss": 0.6426,
+      "step": 197
+    },
+    {
+      "epoch": 0.10291060291060292,
+      "grad_norm": 0.9339571595191956,
+      "learning_rate": 8.201139886109264e-08,
+      "loss": 0.7167,
+      "step": 198
+    },
+    {
+      "epoch": 0.10343035343035344,
+      "grad_norm": 1.101068377494812,
+      "learning_rate": 2.0504251129649374e-08,
+      "loss": 0.7554,
+      "step": 199
+    },
+    {
+      "epoch": 0.10395010395010396,
+      "grad_norm": 1.1291167736053467,
+      "learning_rate": 0.0,
+      "loss": 0.687,
+      "step": 200
+    },
+    {
+      "epoch": 0.10395010395010396,
+      "eval_loss": 0.4919222295284271,
+      "eval_runtime": 10.7529,
+      "eval_samples_per_second": 4.65,
+      "eval_steps_per_second": 0.651,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.350084764827648e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null