Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +372 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2ddc2e048d9b2620d847609d58fed0b564cc2c2a6e69253b1bc55ae4a1e2646f
 size 26008

 version https://git-lfs.github.com/spec/v1
+oid sha256:b0793d0d0324684a9c067c2a0c373d631f58d548e36526c619ce74cf38c69087
 size 26008

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f1e9494721fda79c0e6dec64f77c4174ebe3d699ff2c7a81cf87b9bd38feb8cb
 size 68874

 version https://git-lfs.github.com/spec/v1
+oid sha256:bdf2dba77021b967d9b07e451eed6edcf9c6b9f88fbaa608666fd8bf84f22efa
 size 68874

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:489f4c718389e76241df37853a3d90a85c228ef3fca2c34c8c80ee5aff9312b1
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:02fad5a2035e91aa39e84350bb3b91887956200ae82bb8e22764ba6bab57471c
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e70710c409284f74d525f8db5cfaccc22a8afd29416f19c595da9242ec92d936
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe157715eb8e05b3bab2a7f2fafac33705dc4a1a9dd7f6d860c3a7f9597d78bb
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 11.920156478881836,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.04637501932292472,
   "eval_steps": 25,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1113,6 +1113,372 @@
       "eval_samples_per_second": 188.058,
       "eval_steps_per_second": 26.328,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1136,12 +1502,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 238893465600.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 11.919875144958496,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.061833359097232955,
   "eval_steps": 25,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 188.058,
       "eval_steps_per_second": 26.328,
       "step": 150
+    },
+    {
+      "epoch": 0.04668418611841088,
+      "grad_norm": 0.018268408253788948,
+      "learning_rate": 4.659698863221513e-05,
+      "loss": 11.9227,
+      "step": 151
+    },
+    {
+      "epoch": 0.04699335291389705,
+      "grad_norm": 0.017765438184142113,
+      "learning_rate": 4.481448235912671e-05,
+      "loss": 11.9186,
+      "step": 152
+    },
+    {
+      "epoch": 0.04730251970938321,
+      "grad_norm": 0.016532015055418015,
+      "learning_rate": 4.306073275629044e-05,
+      "loss": 11.919,
+      "step": 153
+    },
+    {
+      "epoch": 0.047611686504869376,
+      "grad_norm": 0.021047212183475494,
+      "learning_rate": 4.133621928133665e-05,
+      "loss": 11.9202,
+      "step": 154
+    },
+    {
+      "epoch": 0.04792085330035554,
+      "grad_norm": 0.019600670784711838,
+      "learning_rate": 3.964141339903026e-05,
+      "loss": 11.9195,
+      "step": 155
+    },
+    {
+      "epoch": 0.04823002009584171,
+      "grad_norm": 0.024230562150478363,
+      "learning_rate": 3.797677845237696e-05,
+      "loss": 11.9213,
+      "step": 156
+    },
+    {
+      "epoch": 0.04853918689132787,
+      "grad_norm": 0.014714769087731838,
+      "learning_rate": 3.634276953594982e-05,
+      "loss": 11.9205,
+      "step": 157
+    },
+    {
+      "epoch": 0.04884835368681403,
+      "grad_norm": 0.022888755425810814,
+      "learning_rate": 3.473983337147118e-05,
+      "loss": 11.9208,
+      "step": 158
+    },
+    {
+      "epoch": 0.0491575204823002,
+      "grad_norm": 0.016698090359568596,
+      "learning_rate": 3.316840818568315e-05,
+      "loss": 11.9209,
+      "step": 159
+    },
+    {
+      "epoch": 0.049466687277786366,
+      "grad_norm": 0.017420414835214615,
+      "learning_rate": 3.162892359054098e-05,
+      "loss": 11.9208,
+      "step": 160
+    },
+    {
+      "epoch": 0.04977585407327253,
+      "grad_norm": 0.0220603346824646,
+      "learning_rate": 3.0121800465761293e-05,
+      "loss": 11.9193,
+      "step": 161
+    },
+    {
+      "epoch": 0.0500850208687587,
+      "grad_norm": 0.015847139060497284,
+      "learning_rate": 2.8647450843757897e-05,
+      "loss": 11.9195,
+      "step": 162
+    },
+    {
+      "epoch": 0.05039418766424486,
+      "grad_norm": 0.0156802237033844,
+      "learning_rate": 2.7206277796996144e-05,
+      "loss": 11.9208,
+      "step": 163
+    },
+    {
+      "epoch": 0.05070335445973102,
+      "grad_norm": 0.011310800909996033,
+      "learning_rate": 2.5798675327796993e-05,
+      "loss": 11.9199,
+      "step": 164
+    },
+    {
+      "epoch": 0.05101252125521719,
+      "grad_norm": 0.0168291088193655,
+      "learning_rate": 2.4425028260620715e-05,
+      "loss": 11.9212,
+      "step": 165
+    },
+    {
+      "epoch": 0.051321688050703355,
+      "grad_norm": 0.018020465970039368,
+      "learning_rate": 2.3085712136859668e-05,
+      "loss": 11.9209,
+      "step": 166
+    },
+    {
+      "epoch": 0.05163085484618952,
+      "grad_norm": 0.016029614955186844,
+      "learning_rate": 2.178109311216913e-05,
+      "loss": 11.9191,
+      "step": 167
+    },
+    {
+      "epoch": 0.05194002164167568,
+      "grad_norm": 0.02043992653489113,
+      "learning_rate": 2.0511527856363912e-05,
+      "loss": 11.9198,
+      "step": 168
+    },
+    {
+      "epoch": 0.05224918843716185,
+      "grad_norm": 0.021433966234326363,
+      "learning_rate": 1.927736345590839e-05,
+      "loss": 11.9197,
+      "step": 169
+    },
+    {
+      "epoch": 0.05255835523264801,
+      "grad_norm": 0.021875297650694847,
+      "learning_rate": 1.8078937319026654e-05,
+      "loss": 11.92,
+      "step": 170
+    },
+    {
+      "epoch": 0.052867522028134176,
+      "grad_norm": 0.014994239434599876,
+      "learning_rate": 1.6916577083458228e-05,
+      "loss": 11.9216,
+      "step": 171
+    },
+    {
+      "epoch": 0.053176688823620345,
+      "grad_norm": 0.02252444624900818,
+      "learning_rate": 1.579060052688548e-05,
+      "loss": 11.9202,
+      "step": 172
+    },
+    {
+      "epoch": 0.05348585561910651,
+      "grad_norm": 0.012826811522245407,
+      "learning_rate": 1.4701315480056164e-05,
+      "loss": 11.9207,
+      "step": 173
+    },
+    {
+      "epoch": 0.05379502241459267,
+      "grad_norm": 0.016058798879384995,
+      "learning_rate": 1.3649019742625623e-05,
+      "loss": 11.9194,
+      "step": 174
+    },
+    {
+      "epoch": 0.05410418921007884,
+      "grad_norm": 0.016530612483620644,
+      "learning_rate": 1.2634001001741373e-05,
+      "loss": 11.9192,
+      "step": 175
+    },
+    {
+      "epoch": 0.05410418921007884,
+      "eval_loss": 11.919890403747559,
+      "eval_runtime": 0.2662,
+      "eval_samples_per_second": 187.831,
+      "eval_steps_per_second": 26.296,
+      "step": 175
+    },
+    {
+      "epoch": 0.054413356005565,
+      "grad_norm": 0.020826896652579308,
+      "learning_rate": 1.1656536753392287e-05,
+      "loss": 11.9189,
+      "step": 176
+    },
+    {
+      "epoch": 0.054722522801051166,
+      "grad_norm": 0.026398515328764915,
+      "learning_rate": 1.0716894226543953e-05,
+      "loss": 11.919,
+      "step": 177
+    },
+    {
+      "epoch": 0.055031689596537335,
+      "grad_norm": 0.013567643240094185,
+      "learning_rate": 9.815330310080887e-06,
+      "loss": 11.9208,
+      "step": 178
+    },
+    {
+      "epoch": 0.0553408563920235,
+      "grad_norm": 0.02200714312493801,
+      "learning_rate": 8.952091482575824e-06,
+      "loss": 11.9213,
+      "step": 179
+    },
+    {
+      "epoch": 0.05565002318750966,
+      "grad_norm": 0.018054096028208733,
+      "learning_rate": 8.127413744904804e-06,
+      "loss": 11.9207,
+      "step": 180
+    },
+    {
+      "epoch": 0.05595918998299582,
+      "grad_norm": 0.02852029912173748,
+      "learning_rate": 7.34152255572697e-06,
+      "loss": 11.9187,
+      "step": 181
+    },
+    {
+      "epoch": 0.05626835677848199,
+      "grad_norm": 0.021837683394551277,
+      "learning_rate": 6.594632769846353e-06,
+      "loss": 11.9186,
+      "step": 182
+    },
+    {
+      "epoch": 0.056577523573968155,
+      "grad_norm": 0.020435063168406487,
+      "learning_rate": 5.886948579472778e-06,
+      "loss": 11.92,
+      "step": 183
+    },
+    {
+      "epoch": 0.05688669036945432,
+      "grad_norm": 0.01715935580432415,
+      "learning_rate": 5.218663458397715e-06,
+      "loss": 11.9194,
+      "step": 184
+    },
+    {
+      "epoch": 0.05719585716494049,
+      "grad_norm": 0.022182030603289604,
+      "learning_rate": 4.589960109100444e-06,
+      "loss": 11.9206,
+      "step": 185
+    },
+    {
+      "epoch": 0.05750502396042665,
+      "grad_norm": 0.021311897784471512,
+      "learning_rate": 4.001010412799138e-06,
+      "loss": 11.919,
+      "step": 186
+    },
+    {
+      "epoch": 0.05781419075591281,
+      "grad_norm": 0.02074601501226425,
+      "learning_rate": 3.451975382460109e-06,
+      "loss": 11.9186,
+      "step": 187
+    },
+    {
+      "epoch": 0.05812335755139898,
+      "grad_norm": 0.01680840365588665,
+      "learning_rate": 2.9430051187785962e-06,
+      "loss": 11.9207,
+      "step": 188
+    },
+    {
+      "epoch": 0.058432524346885145,
+      "grad_norm": 0.016003690659999847,
+      "learning_rate": 2.4742387691426445e-06,
+      "loss": 11.9168,
+      "step": 189
+    },
+    {
+      "epoch": 0.05874169114237131,
+      "grad_norm": 0.01928616128861904,
+      "learning_rate": 2.0458044895916513e-06,
+      "loss": 11.919,
+      "step": 190
+    },
+    {
+      "epoch": 0.05905085793785748,
+      "grad_norm": 0.02857852727174759,
+      "learning_rate": 1.6578194097797258e-06,
+      "loss": 11.92,
+      "step": 191
+    },
+    {
+      "epoch": 0.05936002473334364,
+      "grad_norm": 0.020610762760043144,
+      "learning_rate": 1.3103896009537207e-06,
+      "loss": 11.9201,
+      "step": 192
+    },
+    {
+      "epoch": 0.0596691915288298,
+      "grad_norm": 0.028517993167042732,
+      "learning_rate": 1.0036100469542786e-06,
+      "loss": 11.9188,
+      "step": 193
+    },
+    {
+      "epoch": 0.059978358324315965,
+      "grad_norm": 0.023285964503884315,
+      "learning_rate": 7.375646182482875e-07,
+      "loss": 11.9216,
+      "step": 194
+    },
+    {
+      "epoch": 0.060287525119802135,
+      "grad_norm": 0.017108747735619545,
+      "learning_rate": 5.123260489995229e-07,
+      "loss": 11.9205,
+      "step": 195
+    },
+    {
+      "epoch": 0.0605966919152883,
+      "grad_norm": 0.018555685877799988,
+      "learning_rate": 3.2795591718381975e-07,
+      "loss": 11.9207,
+      "step": 196
+    },
+    {
+      "epoch": 0.06090585871077446,
+      "grad_norm": 0.024458199739456177,
+      "learning_rate": 1.8450462775428942e-07,
+      "loss": 11.9181,
+      "step": 197
+    },
+    {
+      "epoch": 0.06121502550626063,
+      "grad_norm": 0.03316584974527359,
+      "learning_rate": 8.201139886109264e-08,
+      "loss": 11.9203,
+      "step": 198
+    },
+    {
+      "epoch": 0.06152419230174679,
+      "grad_norm": 0.023227546364068985,
+      "learning_rate": 2.0504251129649374e-08,
+      "loss": 11.9185,
+      "step": 199
+    },
+    {
+      "epoch": 0.061833359097232955,
+      "grad_norm": 0.03278621658682823,
+      "learning_rate": 0.0,
+      "loss": 11.9194,
+      "step": 200
+    },
+    {
+      "epoch": 0.061833359097232955,
+      "eval_loss": 11.919875144958496,
+      "eval_runtime": 0.266,
+      "eval_samples_per_second": 187.983,
+      "eval_steps_per_second": 26.318,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 318524620800.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null