Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +372 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:630301a955cd6e47cf725853e562f28cb2ddfae7cebaf6e700d49e8dab27e4be
 size 26008

 version https://git-lfs.github.com/spec/v1
+oid sha256:040ec18b4a708ceecd2e1e5aa7c5493ec5c3acf92a706e098b251640fbeabac4
 size 26008

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:711b73a9833c76aacbc470df40138bde4b966a8ccfe2767078c495894bca03d6
 size 68874

 version https://git-lfs.github.com/spec/v1
+oid sha256:1c0630d66ca72c8abbe1322c138c9a48507aa32130007ed2e1bbc14e0c1962fe
 size 68874

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:470086bbf4b067c782d472928fd87effca18f38bf529e0e3198b085d5b271275
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:9e36c60f6ffedf3170dca0582a775cf56fd6cd5d577cce9b4e3892765966ec61
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e70710c409284f74d525f8db5cfaccc22a8afd29416f19c595da9242ec92d936
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe157715eb8e05b3bab2a7f2fafac33705dc4a1a9dd7f6d860c3a7f9597d78bb
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 11.91579818725586,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.06213913989073868,
   "eval_steps": 25,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1113,6 +1113,372 @@
       "eval_samples_per_second": 204.63,
       "eval_steps_per_second": 28.648,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1136,12 +1502,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 238893465600.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 11.915698051452637,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.0828521865209849,
   "eval_steps": 25,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 204.63,
       "eval_steps_per_second": 28.648,
       "step": 150
+    },
+    {
+      "epoch": 0.0625534008233436,
+      "grad_norm": 0.011331605724990368,
+      "learning_rate": 4.659698863221513e-05,
+      "loss": 11.9162,
+      "step": 151
+    },
+    {
+      "epoch": 0.06296766175594853,
+      "grad_norm": 0.01841769553720951,
+      "learning_rate": 4.481448235912671e-05,
+      "loss": 11.9174,
+      "step": 152
+    },
+    {
+      "epoch": 0.06338192268855346,
+      "grad_norm": 0.011241200380027294,
+      "learning_rate": 4.306073275629044e-05,
+      "loss": 11.9168,
+      "step": 153
+    },
+    {
+      "epoch": 0.06379618362115838,
+      "grad_norm": 0.01512005552649498,
+      "learning_rate": 4.133621928133665e-05,
+      "loss": 11.9139,
+      "step": 154
+    },
+    {
+      "epoch": 0.0642104445537633,
+      "grad_norm": 0.020751705393195152,
+      "learning_rate": 3.964141339903026e-05,
+      "loss": 11.9149,
+      "step": 155
+    },
+    {
+      "epoch": 0.06462470548636823,
+      "grad_norm": 0.01683048903942108,
+      "learning_rate": 3.797677845237696e-05,
+      "loss": 11.9163,
+      "step": 156
+    },
+    {
+      "epoch": 0.06503896641897315,
+      "grad_norm": 0.01599193550646305,
+      "learning_rate": 3.634276953594982e-05,
+      "loss": 11.9185,
+      "step": 157
+    },
+    {
+      "epoch": 0.06545322735157807,
+      "grad_norm": 0.010102267377078533,
+      "learning_rate": 3.473983337147118e-05,
+      "loss": 11.9162,
+      "step": 158
+    },
+    {
+      "epoch": 0.065867488284183,
+      "grad_norm": 0.015134149231016636,
+      "learning_rate": 3.316840818568315e-05,
+      "loss": 11.9156,
+      "step": 159
+    },
+    {
+      "epoch": 0.06628174921678792,
+      "grad_norm": 0.018699511885643005,
+      "learning_rate": 3.162892359054098e-05,
+      "loss": 11.9171,
+      "step": 160
+    },
+    {
+      "epoch": 0.06669601014939285,
+      "grad_norm": 0.016042305156588554,
+      "learning_rate": 3.0121800465761293e-05,
+      "loss": 11.9159,
+      "step": 161
+    },
+    {
+      "epoch": 0.06711027108199777,
+      "grad_norm": 0.01940661109983921,
+      "learning_rate": 2.8647450843757897e-05,
+      "loss": 11.9143,
+      "step": 162
+    },
+    {
+      "epoch": 0.06752453201460269,
+      "grad_norm": 0.018022019416093826,
+      "learning_rate": 2.7206277796996144e-05,
+      "loss": 11.9143,
+      "step": 163
+    },
+    {
+      "epoch": 0.06793879294720762,
+      "grad_norm": 0.010029465891420841,
+      "learning_rate": 2.5798675327796993e-05,
+      "loss": 11.9146,
+      "step": 164
+    },
+    {
+      "epoch": 0.06835305387981255,
+      "grad_norm": 0.01938021183013916,
+      "learning_rate": 2.4425028260620715e-05,
+      "loss": 11.9165,
+      "step": 165
+    },
+    {
+      "epoch": 0.06876731481241748,
+      "grad_norm": 0.012679509818553925,
+      "learning_rate": 2.3085712136859668e-05,
+      "loss": 11.9142,
+      "step": 166
+    },
+    {
+      "epoch": 0.0691815757450224,
+      "grad_norm": 0.014771286398172379,
+      "learning_rate": 2.178109311216913e-05,
+      "loss": 11.9161,
+      "step": 167
+    },
+    {
+      "epoch": 0.06959583667762732,
+      "grad_norm": 0.014276309870183468,
+      "learning_rate": 2.0511527856363912e-05,
+      "loss": 11.9143,
+      "step": 168
+    },
+    {
+      "epoch": 0.07001009761023225,
+      "grad_norm": 0.01856043189764023,
+      "learning_rate": 1.927736345590839e-05,
+      "loss": 11.9153,
+      "step": 169
+    },
+    {
+      "epoch": 0.07042435854283717,
+      "grad_norm": 0.011656641028821468,
+      "learning_rate": 1.8078937319026654e-05,
+      "loss": 11.9139,
+      "step": 170
+    },
+    {
+      "epoch": 0.0708386194754421,
+      "grad_norm": 0.010782111436128616,
+      "learning_rate": 1.6916577083458228e-05,
+      "loss": 11.9123,
+      "step": 171
+    },
+    {
+      "epoch": 0.07125288040804702,
+      "grad_norm": 0.012261268682777882,
+      "learning_rate": 1.579060052688548e-05,
+      "loss": 11.9135,
+      "step": 172
+    },
+    {
+      "epoch": 0.07166714134065194,
+      "grad_norm": 0.017579607665538788,
+      "learning_rate": 1.4701315480056164e-05,
+      "loss": 11.9138,
+      "step": 173
+    },
+    {
+      "epoch": 0.07208140227325686,
+      "grad_norm": 0.010836518369615078,
+      "learning_rate": 1.3649019742625623e-05,
+      "loss": 11.9121,
+      "step": 174
+    },
+    {
+      "epoch": 0.07249566320586179,
+      "grad_norm": 0.01106127630919218,
+      "learning_rate": 1.2634001001741373e-05,
+      "loss": 11.9147,
+      "step": 175
+    },
+    {
+      "epoch": 0.07249566320586179,
+      "eval_loss": 11.915741920471191,
+      "eval_runtime": 0.2454,
+      "eval_samples_per_second": 203.783,
+      "eval_steps_per_second": 28.53,
+      "step": 175
+    },
+    {
+      "epoch": 0.07290992413846671,
+      "grad_norm": 0.015575578436255455,
+      "learning_rate": 1.1656536753392287e-05,
+      "loss": 11.9162,
+      "step": 176
+    },
+    {
+      "epoch": 0.07332418507107163,
+      "grad_norm": 0.018252311274409294,
+      "learning_rate": 1.0716894226543953e-05,
+      "loss": 11.9174,
+      "step": 177
+    },
+    {
+      "epoch": 0.07373844600367657,
+      "grad_norm": 0.020452966913580894,
+      "learning_rate": 9.815330310080887e-06,
+      "loss": 11.9149,
+      "step": 178
+    },
+    {
+      "epoch": 0.0741527069362815,
+      "grad_norm": 0.019538022577762604,
+      "learning_rate": 8.952091482575824e-06,
+      "loss": 11.9159,
+      "step": 179
+    },
+    {
+      "epoch": 0.07456696786888642,
+      "grad_norm": 0.016847524791955948,
+      "learning_rate": 8.127413744904804e-06,
+      "loss": 11.9151,
+      "step": 180
+    },
+    {
+      "epoch": 0.07498122880149134,
+      "grad_norm": 0.01843886263668537,
+      "learning_rate": 7.34152255572697e-06,
+      "loss": 11.9139,
+      "step": 181
+    },
+    {
+      "epoch": 0.07539548973409627,
+      "grad_norm": 0.019982196390628815,
+      "learning_rate": 6.594632769846353e-06,
+      "loss": 11.9164,
+      "step": 182
+    },
+    {
+      "epoch": 0.07580975066670119,
+      "grad_norm": 0.021740185096859932,
+      "learning_rate": 5.886948579472778e-06,
+      "loss": 11.9154,
+      "step": 183
+    },
+    {
+      "epoch": 0.07622401159930611,
+      "grad_norm": 0.016634231433272362,
+      "learning_rate": 5.218663458397715e-06,
+      "loss": 11.9136,
+      "step": 184
+    },
+    {
+      "epoch": 0.07663827253191104,
+      "grad_norm": 0.020411711186170578,
+      "learning_rate": 4.589960109100444e-06,
+      "loss": 11.9147,
+      "step": 185
+    },
+    {
+      "epoch": 0.07705253346451596,
+      "grad_norm": 0.01920330338180065,
+      "learning_rate": 4.001010412799138e-06,
+      "loss": 11.9133,
+      "step": 186
+    },
+    {
+      "epoch": 0.07746679439712088,
+      "grad_norm": 0.011669190600514412,
+      "learning_rate": 3.451975382460109e-06,
+      "loss": 11.9145,
+      "step": 187
+    },
+    {
+      "epoch": 0.0778810553297258,
+      "grad_norm": 0.01726020686328411,
+      "learning_rate": 2.9430051187785962e-06,
+      "loss": 11.916,
+      "step": 188
+    },
+    {
+      "epoch": 0.07829531626233073,
+      "grad_norm": 0.018781933933496475,
+      "learning_rate": 2.4742387691426445e-06,
+      "loss": 11.9125,
+      "step": 189
+    },
+    {
+      "epoch": 0.07870957719493565,
+      "grad_norm": 0.016961760818958282,
+      "learning_rate": 2.0458044895916513e-06,
+      "loss": 11.916,
+      "step": 190
+    },
+    {
+      "epoch": 0.07912383812754059,
+      "grad_norm": 0.017457900568842888,
+      "learning_rate": 1.6578194097797258e-06,
+      "loss": 11.9143,
+      "step": 191
+    },
+    {
+      "epoch": 0.07953809906014551,
+      "grad_norm": 0.014584069140255451,
+      "learning_rate": 1.3103896009537207e-06,
+      "loss": 11.9143,
+      "step": 192
+    },
+    {
+      "epoch": 0.07995235999275044,
+      "grad_norm": 0.017070403322577477,
+      "learning_rate": 1.0036100469542786e-06,
+      "loss": 11.9154,
+      "step": 193
+    },
+    {
+      "epoch": 0.08036662092535536,
+      "grad_norm": 0.017877068370580673,
+      "learning_rate": 7.375646182482875e-07,
+      "loss": 11.9087,
+      "step": 194
+    },
+    {
+      "epoch": 0.08078088185796028,
+      "grad_norm": 0.01924355886876583,
+      "learning_rate": 5.123260489995229e-07,
+      "loss": 11.9126,
+      "step": 195
+    },
+    {
+      "epoch": 0.08119514279056521,
+      "grad_norm": 0.03216341882944107,
+      "learning_rate": 3.2795591718381975e-07,
+      "loss": 11.9105,
+      "step": 196
+    },
+    {
+      "epoch": 0.08160940372317013,
+      "grad_norm": 0.02557109296321869,
+      "learning_rate": 1.8450462775428942e-07,
+      "loss": 11.9097,
+      "step": 197
+    },
+    {
+      "epoch": 0.08202366465577506,
+      "grad_norm": 0.02249646559357643,
+      "learning_rate": 8.201139886109264e-08,
+      "loss": 11.9116,
+      "step": 198
+    },
+    {
+      "epoch": 0.08243792558837998,
+      "grad_norm": 0.031640082597732544,
+      "learning_rate": 2.0504251129649374e-08,
+      "loss": 11.9138,
+      "step": 199
+    },
+    {
+      "epoch": 0.0828521865209849,
+      "grad_norm": 0.02583852969110012,
+      "learning_rate": 0.0,
+      "loss": 11.9111,
+      "step": 200
+    },
+    {
+      "epoch": 0.0828521865209849,
+      "eval_loss": 11.915698051452637,
+      "eval_runtime": 0.2451,
+      "eval_samples_per_second": 204.03,
+      "eval_steps_per_second": 28.564,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 318524620800.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null