Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +372 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ba98ba0d3a2eaf38ae5dc7687c149aad344d456d2ff949393d204ca73eb4a2d9
 size 767856

 version https://git-lfs.github.com/spec/v1
+oid sha256:59cc5c104f9bd3d4e80afd5b157c2effec5e2ff935d7890ae2a43b6461325ae1
 size 767856

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9767070661350c2a7f4ae9ef5ab927c7d993346861abb16d4d89be55db6cbf6c
 size 1601338

 version https://git-lfs.github.com/spec/v1
+oid sha256:3c2f7f7d1b7ab990fcc75c0ace31b4b290048cd72685345d84857893e929e945
 size 1601338

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ed7ffce74426652d5a5834ed90eeac3adfd16f12f3977e4bc417cf070ea818c2
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:7ba799120ab2b64a9a62a8cd6a7de4a93dff11edf8b59aabf8017db9f9a50b8d
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e70710c409284f74d525f8db5cfaccc22a8afd29416f19c595da9242ec92d936
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe157715eb8e05b3bab2a7f2fafac33705dc4a1a9dd7f6d860c3a7f9597d78bb
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 7.293600082397461,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.040346984062941293,
   "eval_steps": 25,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1113,6 +1113,372 @@
       "eval_samples_per_second": 46.541,
       "eval_steps_per_second": 6.516,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1136,12 +1502,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 130241338343424.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 7.1964921951293945,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.05379597875058839,
   "eval_steps": 25,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 46.541,
       "eval_steps_per_second": 6.516,
       "step": 150
+    },
+    {
+      "epoch": 0.040615963956694236,
+      "grad_norm": 1.378065586090088,
+      "learning_rate": 4.659698863221513e-05,
+      "loss": 6.2633,
+      "step": 151
+    },
+    {
+      "epoch": 0.04088494385044718,
+      "grad_norm": 1.318018913269043,
+      "learning_rate": 4.481448235912671e-05,
+      "loss": 6.6505,
+      "step": 152
+    },
+    {
+      "epoch": 0.04115392374420012,
+      "grad_norm": 1.5204188823699951,
+      "learning_rate": 4.306073275629044e-05,
+      "loss": 7.352,
+      "step": 153
+    },
+    {
+      "epoch": 0.04142290363795306,
+      "grad_norm": 1.6174471378326416,
+      "learning_rate": 4.133621928133665e-05,
+      "loss": 7.3426,
+      "step": 154
+    },
+    {
+      "epoch": 0.041691883531706005,
+      "grad_norm": 0.9823394417762756,
+      "learning_rate": 3.964141339903026e-05,
+      "loss": 7.2325,
+      "step": 155
+    },
+    {
+      "epoch": 0.04196086342545895,
+      "grad_norm": 0.9155641794204712,
+      "learning_rate": 3.797677845237696e-05,
+      "loss": 7.0975,
+      "step": 156
+    },
+    {
+      "epoch": 0.04222984331921189,
+      "grad_norm": 0.9602285623550415,
+      "learning_rate": 3.634276953594982e-05,
+      "loss": 7.4331,
+      "step": 157
+    },
+    {
+      "epoch": 0.04249882321296483,
+      "grad_norm": 0.4766939878463745,
+      "learning_rate": 3.473983337147118e-05,
+      "loss": 6.4512,
+      "step": 158
+    },
+    {
+      "epoch": 0.042767803106717774,
+      "grad_norm": 0.6891216039657593,
+      "learning_rate": 3.316840818568315e-05,
+      "loss": 6.2009,
+      "step": 159
+    },
+    {
+      "epoch": 0.043036783000470716,
+      "grad_norm": 0.7973335385322571,
+      "learning_rate": 3.162892359054098e-05,
+      "loss": 6.0294,
+      "step": 160
+    },
+    {
+      "epoch": 0.04330576289422366,
+      "grad_norm": 0.7474548816680908,
+      "learning_rate": 3.0121800465761293e-05,
+      "loss": 6.385,
+      "step": 161
+    },
+    {
+      "epoch": 0.0435747427879766,
+      "grad_norm": 0.5858322381973267,
+      "learning_rate": 2.8647450843757897e-05,
+      "loss": 6.2609,
+      "step": 162
+    },
+    {
+      "epoch": 0.04384372268172954,
+      "grad_norm": 0.7844979763031006,
+      "learning_rate": 2.7206277796996144e-05,
+      "loss": 6.3406,
+      "step": 163
+    },
+    {
+      "epoch": 0.044112702575482485,
+      "grad_norm": 1.182106614112854,
+      "learning_rate": 2.5798675327796993e-05,
+      "loss": 6.4119,
+      "step": 164
+    },
+    {
+      "epoch": 0.04438168246923543,
+      "grad_norm": 0.9624720215797424,
+      "learning_rate": 2.4425028260620715e-05,
+      "loss": 6.6251,
+      "step": 165
+    },
+    {
+      "epoch": 0.04465066236298837,
+      "grad_norm": 0.8138177990913391,
+      "learning_rate": 2.3085712136859668e-05,
+      "loss": 7.0401,
+      "step": 166
+    },
+    {
+      "epoch": 0.04491964225674131,
+      "grad_norm": 0.5879483222961426,
+      "learning_rate": 2.178109311216913e-05,
+      "loss": 6.9187,
+      "step": 167
+    },
+    {
+      "epoch": 0.045188622150494254,
+      "grad_norm": 1.2505601644515991,
+      "learning_rate": 2.0511527856363912e-05,
+      "loss": 7.2914,
+      "step": 168
+    },
+    {
+      "epoch": 0.045457602044247196,
+      "grad_norm": 1.0231761932373047,
+      "learning_rate": 1.927736345590839e-05,
+      "loss": 6.8207,
+      "step": 169
+    },
+    {
+      "epoch": 0.04572658193800013,
+      "grad_norm": 0.8401983976364136,
+      "learning_rate": 1.8078937319026654e-05,
+      "loss": 7.1883,
+      "step": 170
+    },
+    {
+      "epoch": 0.04599556183175307,
+      "grad_norm": 0.5049845576286316,
+      "learning_rate": 1.6916577083458228e-05,
+      "loss": 6.8629,
+      "step": 171
+    },
+    {
+      "epoch": 0.046264541725506016,
+      "grad_norm": 0.7277731895446777,
+      "learning_rate": 1.579060052688548e-05,
+      "loss": 7.2852,
+      "step": 172
+    },
+    {
+      "epoch": 0.04653352161925896,
+      "grad_norm": 0.977936863899231,
+      "learning_rate": 1.4701315480056164e-05,
+      "loss": 6.9577,
+      "step": 173
+    },
+    {
+      "epoch": 0.0468025015130119,
+      "grad_norm": 0.7111912369728088,
+      "learning_rate": 1.3649019742625623e-05,
+      "loss": 7.0556,
+      "step": 174
+    },
+    {
+      "epoch": 0.04707148140676484,
+      "grad_norm": 0.7338117361068726,
+      "learning_rate": 1.2634001001741373e-05,
+      "loss": 7.2456,
+      "step": 175
+    },
+    {
+      "epoch": 0.04707148140676484,
+      "eval_loss": 7.184196949005127,
+      "eval_runtime": 1.074,
+      "eval_samples_per_second": 46.556,
+      "eval_steps_per_second": 6.518,
+      "step": 175
+    },
+    {
+      "epoch": 0.047340461300517785,
+      "grad_norm": 0.5232920050621033,
+      "learning_rate": 1.1656536753392287e-05,
+      "loss": 6.4342,
+      "step": 176
+    },
+    {
+      "epoch": 0.04760944119427073,
+      "grad_norm": 0.45352447032928467,
+      "learning_rate": 1.0716894226543953e-05,
+      "loss": 6.9281,
+      "step": 177
+    },
+    {
+      "epoch": 0.04787842108802367,
+      "grad_norm": 0.611677885055542,
+      "learning_rate": 9.815330310080887e-06,
+      "loss": 7.319,
+      "step": 178
+    },
+    {
+      "epoch": 0.04814740098177661,
+      "grad_norm": 0.9648262858390808,
+      "learning_rate": 8.952091482575824e-06,
+      "loss": 6.9275,
+      "step": 179
+    },
+    {
+      "epoch": 0.048416380875529554,
+      "grad_norm": 0.7132251262664795,
+      "learning_rate": 8.127413744904804e-06,
+      "loss": 6.5056,
+      "step": 180
+    },
+    {
+      "epoch": 0.048685360769282496,
+      "grad_norm": 0.5821110606193542,
+      "learning_rate": 7.34152255572697e-06,
+      "loss": 6.84,
+      "step": 181
+    },
+    {
+      "epoch": 0.04895434066303544,
+      "grad_norm": 0.7894052267074585,
+      "learning_rate": 6.594632769846353e-06,
+      "loss": 6.9713,
+      "step": 182
+    },
+    {
+      "epoch": 0.04922332055678838,
+      "grad_norm": 0.7041998505592346,
+      "learning_rate": 5.886948579472778e-06,
+      "loss": 6.4816,
+      "step": 183
+    },
+    {
+      "epoch": 0.04949230045054132,
+      "grad_norm": 0.5108593702316284,
+      "learning_rate": 5.218663458397715e-06,
+      "loss": 7.2662,
+      "step": 184
+    },
+    {
+      "epoch": 0.049761280344294265,
+      "grad_norm": 0.6600824594497681,
+      "learning_rate": 4.589960109100444e-06,
+      "loss": 7.2239,
+      "step": 185
+    },
+    {
+      "epoch": 0.05003026023804721,
+      "grad_norm": 0.679880678653717,
+      "learning_rate": 4.001010412799138e-06,
+      "loss": 7.0421,
+      "step": 186
+    },
+    {
+      "epoch": 0.05029924013180015,
+      "grad_norm": 0.5693497657775879,
+      "learning_rate": 3.451975382460109e-06,
+      "loss": 7.0777,
+      "step": 187
+    },
+    {
+      "epoch": 0.05056822002555309,
+      "grad_norm": 0.6687301397323608,
+      "learning_rate": 2.9430051187785962e-06,
+      "loss": 6.6986,
+      "step": 188
+    },
+    {
+      "epoch": 0.050837199919306034,
+      "grad_norm": 0.7745783925056458,
+      "learning_rate": 2.4742387691426445e-06,
+      "loss": 7.0626,
+      "step": 189
+    },
+    {
+      "epoch": 0.051106179813058976,
+      "grad_norm": 0.5398703813552856,
+      "learning_rate": 2.0458044895916513e-06,
+      "loss": 7.5387,
+      "step": 190
+    },
+    {
+      "epoch": 0.05137515970681192,
+      "grad_norm": 0.5536245703697205,
+      "learning_rate": 1.6578194097797258e-06,
+      "loss": 6.8114,
+      "step": 191
+    },
+    {
+      "epoch": 0.05164413960056486,
+      "grad_norm": 0.8733762502670288,
+      "learning_rate": 1.3103896009537207e-06,
+      "loss": 7.109,
+      "step": 192
+    },
+    {
+      "epoch": 0.0519131194943178,
+      "grad_norm": 0.9818241000175476,
+      "learning_rate": 1.0036100469542786e-06,
+      "loss": 6.679,
+      "step": 193
+    },
+    {
+      "epoch": 0.052182099388070745,
+      "grad_norm": 0.9805580377578735,
+      "learning_rate": 7.375646182482875e-07,
+      "loss": 6.5897,
+      "step": 194
+    },
+    {
+      "epoch": 0.05245107928182369,
+      "grad_norm": 1.4828003644943237,
+      "learning_rate": 5.123260489995229e-07,
+      "loss": 7.2332,
+      "step": 195
+    },
+    {
+      "epoch": 0.05272005917557662,
+      "grad_norm": 1.0649274587631226,
+      "learning_rate": 3.2795591718381975e-07,
+      "loss": 6.9496,
+      "step": 196
+    },
+    {
+      "epoch": 0.052989039069329565,
+      "grad_norm": 1.1552672386169434,
+      "learning_rate": 1.8450462775428942e-07,
+      "loss": 6.3958,
+      "step": 197
+    },
+    {
+      "epoch": 0.05325801896308251,
+      "grad_norm": 1.6000089645385742,
+      "learning_rate": 8.201139886109264e-08,
+      "loss": 7.0364,
+      "step": 198
+    },
+    {
+      "epoch": 0.05352699885683545,
+      "grad_norm": 0.86728435754776,
+      "learning_rate": 2.0504251129649374e-08,
+      "loss": 7.7624,
+      "step": 199
+    },
+    {
+      "epoch": 0.05379597875058839,
+      "grad_norm": 1.0310410261154175,
+      "learning_rate": 0.0,
+      "loss": 7.2514,
+      "step": 200
+    },
+    {
+      "epoch": 0.05379597875058839,
+      "eval_loss": 7.1964921951293945,
+      "eval_runtime": 1.1003,
+      "eval_samples_per_second": 45.441,
+      "eval_steps_per_second": 6.362,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 174087813267456.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null