Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d5f615566631c3398cedd76dca15307bfaf7dabea5340e5c18820943368274c4
 size 402688040

 version https://git-lfs.github.com/spec/v1
+oid sha256:a57248c08a3a2206eab0c59cd38d7ef90b377fda9a775e0538a4aa6deaf4d76c
 size 402688040

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6aaa1d0a7fc83e69915da62a8fc0b308b4ebe3a9d4dc792195021a0202261174
 size 204773716

 version https://git-lfs.github.com/spec/v1
+oid sha256:b0431d899b3cbc25a66d83fa489883559e763ca52223420e02c99d96f72cf864
 size 204773716

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1a247c149b98a20301ce2303ce630042580645d34c1fd5677567097286d79216
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:134f300f23c190bf2d764b08553ca00edead1d1d23c82854d3d4e3f4da1ef83f
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:970068ebf9c0dc6a40c93653c563bb0b2ba5296a6c46496b504a3f1343bf3a62
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:7df65c8f4b426598b0abc4173b1983dcf7411aee63ea4061d980eae7a1af2363
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 2.351506471633911,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.31931878658861096,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 11.46,
       "eval_steps_per_second": 2.865,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.230876016902144e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 2.349104642868042,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.4257583821181479,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 11.46,
       "eval_steps_per_second": 2.865,
       "step": 150
+    },
+    {
+      "epoch": 0.3214475784992017,
+      "grad_norm": 0.44929131865501404,
+      "learning_rate": 1.7085562498478883e-05,
+      "loss": 8.8144,
+      "step": 151
+    },
+    {
+      "epoch": 0.3235763704097924,
+      "grad_norm": 0.46485283970832825,
+      "learning_rate": 1.6431976865013128e-05,
+      "loss": 9.0647,
+      "step": 152
+    },
+    {
+      "epoch": 0.32570516232038316,
+      "grad_norm": 0.482954204082489,
+      "learning_rate": 1.5788935343973164e-05,
+      "loss": 9.0857,
+      "step": 153
+    },
+    {
+      "epoch": 0.32783395423097395,
+      "grad_norm": 0.4682692289352417,
+      "learning_rate": 1.5156613736490108e-05,
+      "loss": 9.4186,
+      "step": 154
+    },
+    {
+      "epoch": 0.3299627461415647,
+      "grad_norm": 0.4939897358417511,
+      "learning_rate": 1.4535184912977763e-05,
+      "loss": 9.3078,
+      "step": 155
+    },
+    {
+      "epoch": 0.3320915380521554,
+      "grad_norm": 0.5013800263404846,
+      "learning_rate": 1.3924818765871553e-05,
+      "loss": 8.985,
+      "step": 156
+    },
+    {
+      "epoch": 0.33422032996274614,
+      "grad_norm": 0.521223247051239,
+      "learning_rate": 1.3325682163181601e-05,
+      "loss": 8.9674,
+      "step": 157
+    },
+    {
+      "epoch": 0.3363491218733369,
+      "grad_norm": 0.5110321640968323,
+      "learning_rate": 1.2737938902872767e-05,
+      "loss": 9.5186,
+      "step": 158
+    },
+    {
+      "epoch": 0.3384779137839276,
+      "grad_norm": 0.5578789114952087,
+      "learning_rate": 1.2161749668083823e-05,
+      "loss": 9.0838,
+      "step": 159
+    },
+    {
+      "epoch": 0.34060670569451834,
+      "grad_norm": 0.5438776612281799,
+      "learning_rate": 1.159727198319836e-05,
+      "loss": 9.4288,
+      "step": 160
+    },
+    {
+      "epoch": 0.3427354976051091,
+      "grad_norm": 0.5206118822097778,
+      "learning_rate": 1.1044660170779142e-05,
+      "loss": 9.4825,
+      "step": 161
+    },
+    {
+      "epoch": 0.34486428951569986,
+      "grad_norm": 0.5261669754981995,
+      "learning_rate": 1.0504065309377897e-05,
+      "loss": 9.1035,
+      "step": 162
+    },
+    {
+      "epoch": 0.3469930814262906,
+      "grad_norm": 0.5535460710525513,
+      "learning_rate": 9.97563519223192e-06,
+      "loss": 9.0846,
+      "step": 163
+    },
+    {
+      "epoch": 0.3491218733368813,
+      "grad_norm": 0.5460895895957947,
+      "learning_rate": 9.459514286858898e-06,
+      "loss": 9.4422,
+      "step": 164
+    },
+    {
+      "epoch": 0.35125066524747206,
+      "grad_norm": 0.5579862594604492,
+      "learning_rate": 8.95584369556093e-06,
+      "loss": 9.1487,
+      "step": 165
+    },
+    {
+      "epoch": 0.3533794571580628,
+      "grad_norm": 0.606539785861969,
+      "learning_rate": 8.464761116848546e-06,
+      "loss": 9.0932,
+      "step": 166
+    },
+    {
+      "epoch": 0.3555082490686535,
+      "grad_norm": 0.6063513159751892,
+      "learning_rate": 7.986400807795349e-06,
+      "loss": 9.5204,
+      "step": 167
+    },
+    {
+      "epoch": 0.35763704097924426,
+      "grad_norm": 0.5974627137184143,
+      "learning_rate": 7.520893547333436e-06,
+      "loss": 9.2515,
+      "step": 168
+    },
+    {
+      "epoch": 0.359765832889835,
+      "grad_norm": 0.5933561325073242,
+      "learning_rate": 7.068366600499744e-06,
+      "loss": 9.62,
+      "step": 169
+    },
+    {
+      "epoch": 0.3618946248004258,
+      "grad_norm": 0.6004334092140198,
+      "learning_rate": 6.6289436836431076e-06,
+      "loss": 9.3208,
+      "step": 170
+    },
+    {
+      "epoch": 0.3640234167110165,
+      "grad_norm": 0.5610815286636353,
+      "learning_rate": 6.20274493060135e-06,
+      "loss": 8.734,
+      "step": 171
+    },
+    {
+      "epoch": 0.36615220862160724,
+      "grad_norm": 0.670384407043457,
+      "learning_rate": 5.789886859858009e-06,
+      "loss": 9.2437,
+      "step": 172
+    },
+    {
+      "epoch": 0.368281000532198,
+      "grad_norm": 0.637337863445282,
+      "learning_rate": 5.3904823426872605e-06,
+      "loss": 9.0407,
+      "step": 173
+    },
+    {
+      "epoch": 0.3704097924427887,
+      "grad_norm": 0.6556326746940613,
+      "learning_rate": 5.004640572296062e-06,
+      "loss": 9.6361,
+      "step": 174
+    },
+    {
+      "epoch": 0.37253858435337944,
+      "grad_norm": 0.6882710456848145,
+      "learning_rate": 4.632467033971838e-06,
+      "loss": 9.332,
+      "step": 175
+    },
+    {
+      "epoch": 0.3746673762639702,
+      "grad_norm": 0.6392144560813904,
+      "learning_rate": 4.274063476243839e-06,
+      "loss": 9.5139,
+      "step": 176
+    },
+    {
+      "epoch": 0.37679616817456096,
+      "grad_norm": 0.6317657232284546,
+      "learning_rate": 3.929527883066117e-06,
+      "loss": 9.3777,
+      "step": 177
+    },
+    {
+      "epoch": 0.3789249600851517,
+      "grad_norm": 0.6690914034843445,
+      "learning_rate": 3.5989544470296595e-06,
+      "loss": 9.4876,
+      "step": 178
+    },
+    {
+      "epoch": 0.38105375199574243,
+      "grad_norm": 0.6618176698684692,
+      "learning_rate": 3.282433543611136e-06,
+      "loss": 9.4371,
+      "step": 179
+    },
+    {
+      "epoch": 0.38318254390633316,
+      "grad_norm": 0.718135416507721,
+      "learning_rate": 2.980051706465095e-06,
+      "loss": 9.2814,
+      "step": 180
+    },
+    {
+      "epoch": 0.3853113358169239,
+      "grad_norm": 0.6930022835731506,
+      "learning_rate": 2.691891603766556e-06,
+      "loss": 9.0157,
+      "step": 181
+    },
+    {
+      "epoch": 0.3874401277275146,
+      "grad_norm": 0.6553264856338501,
+      "learning_rate": 2.4180320156103298e-06,
+      "loss": 9.0813,
+      "step": 182
+    },
+    {
+      "epoch": 0.38956891963810536,
+      "grad_norm": 0.6928482055664062,
+      "learning_rate": 2.158547812473352e-06,
+      "loss": 9.4288,
+      "step": 183
+    },
+    {
+      "epoch": 0.3916977115486961,
+      "grad_norm": 0.7257071137428284,
+      "learning_rate": 1.9135099347458293e-06,
+      "loss": 9.1517,
+      "step": 184
+    },
+    {
+      "epoch": 0.3938265034592869,
+      "grad_norm": 0.7439829111099243,
+      "learning_rate": 1.6829853733368294e-06,
+      "loss": 9.3247,
+      "step": 185
+    },
+    {
+      "epoch": 0.3959552953698776,
+      "grad_norm": 0.7517066597938538,
+      "learning_rate": 1.4670371513596842e-06,
+      "loss": 8.967,
+      "step": 186
+    },
+    {
+      "epoch": 0.39808408728046835,
+      "grad_norm": 0.7198683023452759,
+      "learning_rate": 1.2657243069020402e-06,
+      "loss": 9.2388,
+      "step": 187
+    },
+    {
+      "epoch": 0.4002128791910591,
+      "grad_norm": 0.7926408648490906,
+      "learning_rate": 1.0791018768854855e-06,
+      "loss": 9.7638,
+      "step": 188
+    },
+    {
+      "epoch": 0.4023416711016498,
+      "grad_norm": 0.7303494811058044,
+      "learning_rate": 9.072208820189698e-07,
+      "loss": 9.6027,
+      "step": 189
+    },
+    {
+      "epoch": 0.40447046301224054,
+      "grad_norm": 0.7899345755577087,
+      "learning_rate": 7.501283128502722e-07,
+      "loss": 9.5104,
+      "step": 190
+    },
+    {
+      "epoch": 0.4065992549228313,
+      "grad_norm": 0.8408921957015991,
+      "learning_rate": 6.07867116919233e-07,
+      "loss": 9.8036,
+      "step": 191
+    },
+    {
+      "epoch": 0.408728046833422,
+      "grad_norm": 0.7526921629905701,
+      "learning_rate": 4.804761870163643e-07,
+      "loss": 9.3014,
+      "step": 192
+    },
+    {
+      "epoch": 0.4108568387440128,
+      "grad_norm": 0.7957383990287781,
+      "learning_rate": 3.6799035054990215e-07,
+      "loss": 9.3406,
+      "step": 193
+    },
+    {
+      "epoch": 0.41298563065460353,
+      "grad_norm": 0.7857971787452698,
+      "learning_rate": 2.704403600243721e-07,
+      "loss": 9.3666,
+      "step": 194
+    },
+    {
+      "epoch": 0.41511442256519426,
+      "grad_norm": 0.787301242351532,
+      "learning_rate": 1.878528846331584e-07,
+      "loss": 9.4993,
+      "step": 195
+    },
+    {
+      "epoch": 0.417243214475785,
+      "grad_norm": 1.0130836963653564,
+      "learning_rate": 1.202505029674006e-07,
+      "loss": 9.2734,
+      "step": 196
+    },
+    {
+      "epoch": 0.41937200638637573,
+      "grad_norm": 0.8680523633956909,
+      "learning_rate": 6.765169684323947e-08,
+      "loss": 9.6988,
+      "step": 197
+    },
+    {
+      "epoch": 0.42150079829696646,
+      "grad_norm": 0.9896531701087952,
+      "learning_rate": 3.007084624906731e-08,
+      "loss": 9.9704,
+      "step": 198
+    },
+    {
+      "epoch": 0.4236295902075572,
+      "grad_norm": 0.9238101840019226,
+      "learning_rate": 7.518225414204771e-09,
+      "loss": 9.6142,
+      "step": 199
+    },
+    {
+      "epoch": 0.4257583821181479,
+      "grad_norm": 0.9510082006454468,
+      "learning_rate": 0.0,
+      "loss": 9.7021,
+      "step": 200
+    },
+    {
+      "epoch": 0.4257583821181479,
+      "eval_loss": 2.349104642868042,
+      "eval_runtime": 76.2599,
+      "eval_samples_per_second": 10.386,
+      "eval_steps_per_second": 2.596,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.6443000225792e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null