Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +372 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5a9f2ddfed6ede52f73a950089950c6bb990fc8b26c06bd60f64244866e76ec3
 size 26008

 version https://git-lfs.github.com/spec/v1
+oid sha256:f0bf94ccefd17b7bc85ad5d61ebeb387b5f5a6596e41fa5e791fbf60980d90e3
 size 26008

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8f60b982f6c06da3d41cd56c3d1bec4214ba3f1777ee9a10a9e7308e10981f3a
 size 68874

 version https://git-lfs.github.com/spec/v1
+oid sha256:e377817c7e01039b31bc74e529c4730e2d3c5853500843649e8894b684b93e2d
 size 68874

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:489f4c718389e76241df37853a3d90a85c228ef3fca2c34c8c80ee5aff9312b1
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:02fad5a2035e91aa39e84350bb3b91887956200ae82bb8e22764ba6bab57471c
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e70710c409284f74d525f8db5cfaccc22a8afd29416f19c595da9242ec92d936
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe157715eb8e05b3bab2a7f2fafac33705dc4a1a9dd7f6d860c3a7f9597d78bb
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 11.920450210571289,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.04637501932292472,
   "eval_steps": 25,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1113,6 +1113,372 @@
       "eval_samples_per_second": 160.265,
       "eval_steps_per_second": 22.437,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1136,12 +1502,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 238893465600.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 11.920330047607422,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.061833359097232955,
   "eval_steps": 25,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 160.265,
       "eval_steps_per_second": 22.437,
       "step": 150
+    },
+    {
+      "epoch": 0.04668418611841088,
+      "grad_norm": 0.018326055258512497,
+      "learning_rate": 4.659698863221513e-05,
+      "loss": 11.9223,
+      "step": 151
+    },
+    {
+      "epoch": 0.04699335291389705,
+      "grad_norm": 0.015073740854859352,
+      "learning_rate": 4.481448235912671e-05,
+      "loss": 11.9185,
+      "step": 152
+    },
+    {
+      "epoch": 0.04730251970938321,
+      "grad_norm": 0.014074956998229027,
+      "learning_rate": 4.306073275629044e-05,
+      "loss": 11.9191,
+      "step": 153
+    },
+    {
+      "epoch": 0.047611686504869376,
+      "grad_norm": 0.017671145498752594,
+      "learning_rate": 4.133621928133665e-05,
+      "loss": 11.9207,
+      "step": 154
+    },
+    {
+      "epoch": 0.04792085330035554,
+      "grad_norm": 0.016020517796278,
+      "learning_rate": 3.964141339903026e-05,
+      "loss": 11.9197,
+      "step": 155
+    },
+    {
+      "epoch": 0.04823002009584171,
+      "grad_norm": 0.019396420568227768,
+      "learning_rate": 3.797677845237696e-05,
+      "loss": 11.9215,
+      "step": 156
+    },
+    {
+      "epoch": 0.04853918689132787,
+      "grad_norm": 0.010420717298984528,
+      "learning_rate": 3.634276953594982e-05,
+      "loss": 11.9209,
+      "step": 157
+    },
+    {
+      "epoch": 0.04884835368681403,
+      "grad_norm": 0.01945878006517887,
+      "learning_rate": 3.473983337147118e-05,
+      "loss": 11.9211,
+      "step": 158
+    },
+    {
+      "epoch": 0.0491575204823002,
+      "grad_norm": 0.015013448894023895,
+      "learning_rate": 3.316840818568315e-05,
+      "loss": 11.9209,
+      "step": 159
+    },
+    {
+      "epoch": 0.049466687277786366,
+      "grad_norm": 0.01401391439139843,
+      "learning_rate": 3.162892359054098e-05,
+      "loss": 11.9211,
+      "step": 160
+    },
+    {
+      "epoch": 0.04977585407327253,
+      "grad_norm": 0.017041126266121864,
+      "learning_rate": 3.0121800465761293e-05,
+      "loss": 11.9195,
+      "step": 161
+    },
+    {
+      "epoch": 0.0500850208687587,
+      "grad_norm": 0.01230812631547451,
+      "learning_rate": 2.8647450843757897e-05,
+      "loss": 11.92,
+      "step": 162
+    },
+    {
+      "epoch": 0.05039418766424486,
+      "grad_norm": 0.013004007749259472,
+      "learning_rate": 2.7206277796996144e-05,
+      "loss": 11.9208,
+      "step": 163
+    },
+    {
+      "epoch": 0.05070335445973102,
+      "grad_norm": 0.01057471428066492,
+      "learning_rate": 2.5798675327796993e-05,
+      "loss": 11.9199,
+      "step": 164
+    },
+    {
+      "epoch": 0.05101252125521719,
+      "grad_norm": 0.013515602797269821,
+      "learning_rate": 2.4425028260620715e-05,
+      "loss": 11.9216,
+      "step": 165
+    },
+    {
+      "epoch": 0.051321688050703355,
+      "grad_norm": 0.01369836088269949,
+      "learning_rate": 2.3085712136859668e-05,
+      "loss": 11.921,
+      "step": 166
+    },
+    {
+      "epoch": 0.05163085484618952,
+      "grad_norm": 0.012219869531691074,
+      "learning_rate": 2.178109311216913e-05,
+      "loss": 11.9192,
+      "step": 167
+    },
+    {
+      "epoch": 0.05194002164167568,
+      "grad_norm": 0.018385522067546844,
+      "learning_rate": 2.0511527856363912e-05,
+      "loss": 11.9201,
+      "step": 168
+    },
+    {
+      "epoch": 0.05224918843716185,
+      "grad_norm": 0.018301496282219887,
+      "learning_rate": 1.927736345590839e-05,
+      "loss": 11.9198,
+      "step": 169
+    },
+    {
+      "epoch": 0.05255835523264801,
+      "grad_norm": 0.01974300853908062,
+      "learning_rate": 1.8078937319026654e-05,
+      "loss": 11.9206,
+      "step": 170
+    },
+    {
+      "epoch": 0.052867522028134176,
+      "grad_norm": 0.010678272694349289,
+      "learning_rate": 1.6916577083458228e-05,
+      "loss": 11.9222,
+      "step": 171
+    },
+    {
+      "epoch": 0.053176688823620345,
+      "grad_norm": 0.019584400579333305,
+      "learning_rate": 1.579060052688548e-05,
+      "loss": 11.9203,
+      "step": 172
+    },
+    {
+      "epoch": 0.05348585561910651,
+      "grad_norm": 0.012021908536553383,
+      "learning_rate": 1.4701315480056164e-05,
+      "loss": 11.9207,
+      "step": 173
+    },
+    {
+      "epoch": 0.05379502241459267,
+      "grad_norm": 0.016563573852181435,
+      "learning_rate": 1.3649019742625623e-05,
+      "loss": 11.9191,
+      "step": 174
+    },
+    {
+      "epoch": 0.05410418921007884,
+      "grad_norm": 0.013715994544327259,
+      "learning_rate": 1.2634001001741373e-05,
+      "loss": 11.9195,
+      "step": 175
+    },
+    {
+      "epoch": 0.05410418921007884,
+      "eval_loss": 11.920323371887207,
+      "eval_runtime": 0.3127,
+      "eval_samples_per_second": 159.882,
+      "eval_steps_per_second": 22.383,
+      "step": 175
+    },
+    {
+      "epoch": 0.054413356005565,
+      "grad_norm": 0.016443433240056038,
+      "learning_rate": 1.1656536753392287e-05,
+      "loss": 11.9194,
+      "step": 176
+    },
+    {
+      "epoch": 0.054722522801051166,
+      "grad_norm": 0.020550759509205818,
+      "learning_rate": 1.0716894226543953e-05,
+      "loss": 11.9196,
+      "step": 177
+    },
+    {
+      "epoch": 0.055031689596537335,
+      "grad_norm": 0.008727199397981167,
+      "learning_rate": 9.815330310080887e-06,
+      "loss": 11.921,
+      "step": 178
+    },
+    {
+      "epoch": 0.0553408563920235,
+      "grad_norm": 0.016882145777344704,
+      "learning_rate": 8.952091482575824e-06,
+      "loss": 11.9219,
+      "step": 179
+    },
+    {
+      "epoch": 0.05565002318750966,
+      "grad_norm": 0.014393302612006664,
+      "learning_rate": 8.127413744904804e-06,
+      "loss": 11.9213,
+      "step": 180
+    },
+    {
+      "epoch": 0.05595918998299582,
+      "grad_norm": 0.024645017459988594,
+      "learning_rate": 7.34152255572697e-06,
+      "loss": 11.9191,
+      "step": 181
+    },
+    {
+      "epoch": 0.05626835677848199,
+      "grad_norm": 0.015890292823314667,
+      "learning_rate": 6.594632769846353e-06,
+      "loss": 11.9192,
+      "step": 182
+    },
+    {
+      "epoch": 0.056577523573968155,
+      "grad_norm": 0.015051585622131824,
+      "learning_rate": 5.886948579472778e-06,
+      "loss": 11.921,
+      "step": 183
+    },
+    {
+      "epoch": 0.05688669036945432,
+      "grad_norm": 0.0134736318141222,
+      "learning_rate": 5.218663458397715e-06,
+      "loss": 11.9198,
+      "step": 184
+    },
+    {
+      "epoch": 0.05719585716494049,
+      "grad_norm": 0.018503418192267418,
+      "learning_rate": 4.589960109100444e-06,
+      "loss": 11.921,
+      "step": 185
+    },
+    {
+      "epoch": 0.05750502396042665,
+      "grad_norm": 0.013675318099558353,
+      "learning_rate": 4.001010412799138e-06,
+      "loss": 11.9198,
+      "step": 186
+    },
+    {
+      "epoch": 0.05781419075591281,
+      "grad_norm": 0.016024842858314514,
+      "learning_rate": 3.451975382460109e-06,
+      "loss": 11.9193,
+      "step": 187
+    },
+    {
+      "epoch": 0.05812335755139898,
+      "grad_norm": 0.01202288456261158,
+      "learning_rate": 2.9430051187785962e-06,
+      "loss": 11.9214,
+      "step": 188
+    },
+    {
+      "epoch": 0.058432524346885145,
+      "grad_norm": 0.014596652239561081,
+      "learning_rate": 2.4742387691426445e-06,
+      "loss": 11.9172,
+      "step": 189
+    },
+    {
+      "epoch": 0.05874169114237131,
+      "grad_norm": 0.013716932386159897,
+      "learning_rate": 2.0458044895916513e-06,
+      "loss": 11.9202,
+      "step": 190
+    },
+    {
+      "epoch": 0.05905085793785748,
+      "grad_norm": 0.02387954853475094,
+      "learning_rate": 1.6578194097797258e-06,
+      "loss": 11.9207,
+      "step": 191
+    },
+    {
+      "epoch": 0.05936002473334364,
+      "grad_norm": 0.014165020547807217,
+      "learning_rate": 1.3103896009537207e-06,
+      "loss": 11.9204,
+      "step": 192
+    },
+    {
+      "epoch": 0.0596691915288298,
+      "grad_norm": 0.025135591626167297,
+      "learning_rate": 1.0036100469542786e-06,
+      "loss": 11.9194,
+      "step": 193
+    },
+    {
+      "epoch": 0.059978358324315965,
+      "grad_norm": 0.02197529375553131,
+      "learning_rate": 7.375646182482875e-07,
+      "loss": 11.922,
+      "step": 194
+    },
+    {
+      "epoch": 0.060287525119802135,
+      "grad_norm": 0.015857767313718796,
+      "learning_rate": 5.123260489995229e-07,
+      "loss": 11.9212,
+      "step": 195
+    },
+    {
+      "epoch": 0.0605966919152883,
+      "grad_norm": 0.012184608727693558,
+      "learning_rate": 3.2795591718381975e-07,
+      "loss": 11.9211,
+      "step": 196
+    },
+    {
+      "epoch": 0.06090585871077446,
+      "grad_norm": 0.020152973011136055,
+      "learning_rate": 1.8450462775428942e-07,
+      "loss": 11.919,
+      "step": 197
+    },
+    {
+      "epoch": 0.06121502550626063,
+      "grad_norm": 0.02711832895874977,
+      "learning_rate": 8.201139886109264e-08,
+      "loss": 11.921,
+      "step": 198
+    },
+    {
+      "epoch": 0.06152419230174679,
+      "grad_norm": 0.018907776102423668,
+      "learning_rate": 2.0504251129649374e-08,
+      "loss": 11.9194,
+      "step": 199
+    },
+    {
+      "epoch": 0.061833359097232955,
+      "grad_norm": 0.029230380430817604,
+      "learning_rate": 0.0,
+      "loss": 11.9202,
+      "step": 200
+    },
+    {
+      "epoch": 0.061833359097232955,
+      "eval_loss": 11.920330047607422,
+      "eval_runtime": 0.312,
+      "eval_samples_per_second": 160.246,
+      "eval_steps_per_second": 22.434,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 318524620800.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null