Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +372 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:eddf16328bf46cba59331ad2cf5abba1de677bca8c9198a8be4d6451a9075cf0
 size 26008

 version https://git-lfs.github.com/spec/v1
+oid sha256:a9015b7f5ea424f27864723c07c3da499223bde5a5a6bce8132c0ab18216747e
 size 26008

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cbb5127b520293dc1b2c3f64d59f8018e461c9d4646ab5a30546fe839c956445
 size 68874

 version https://git-lfs.github.com/spec/v1
+oid sha256:27c1be7c791e259a1855bbff94ac17becf5296c365f0cb335ed3ca26242aace8
 size 68874

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e44bada34a22fd64d9118761f45270eaf59986831cefd3be3a801bd9679e8c3a
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:9fd32c10fcf742197ef7a19e71aa610821a050aa3ca6ebb9873d7cdad46c78d8
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e70710c409284f74d525f8db5cfaccc22a8afd29416f19c595da9242ec92d936
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe157715eb8e05b3bab2a7f2fafac33705dc4a1a9dd7f6d860c3a7f9597d78bb
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 11.914360046386719,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.1364877161055505,
   "eval_steps": 25,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1113,6 +1113,372 @@
       "eval_samples_per_second": 126.748,
       "eval_steps_per_second": 126.748,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1136,12 +1502,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 238893465600.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 11.913970947265625,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.18198362147406733,
   "eval_steps": 25,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 126.748,
       "eval_steps_per_second": 126.748,
       "step": 150
+    },
+    {
+      "epoch": 0.13739763421292084,
+      "grad_norm": 0.12348955869674683,
+      "learning_rate": 4.659698863221513e-05,
+      "loss": 11.9017,
+      "step": 151
+    },
+    {
+      "epoch": 0.13830755232029118,
+      "grad_norm": 0.11510408669710159,
+      "learning_rate": 4.481448235912671e-05,
+      "loss": 11.9001,
+      "step": 152
+    },
+    {
+      "epoch": 0.1392174704276615,
+      "grad_norm": 0.08950791507959366,
+      "learning_rate": 4.306073275629044e-05,
+      "loss": 11.9053,
+      "step": 153
+    },
+    {
+      "epoch": 0.14012738853503184,
+      "grad_norm": 0.08536588400602341,
+      "learning_rate": 4.133621928133665e-05,
+      "loss": 11.9046,
+      "step": 154
+    },
+    {
+      "epoch": 0.1410373066424022,
+      "grad_norm": 0.09876493364572525,
+      "learning_rate": 3.964141339903026e-05,
+      "loss": 11.905,
+      "step": 155
+    },
+    {
+      "epoch": 0.14194722474977253,
+      "grad_norm": 0.11245915293693542,
+      "learning_rate": 3.797677845237696e-05,
+      "loss": 11.9043,
+      "step": 156
+    },
+    {
+      "epoch": 0.14285714285714285,
+      "grad_norm": 0.052722182124853134,
+      "learning_rate": 3.634276953594982e-05,
+      "loss": 11.9073,
+      "step": 157
+    },
+    {
+      "epoch": 0.1437670609645132,
+      "grad_norm": 0.07163731008768082,
+      "learning_rate": 3.473983337147118e-05,
+      "loss": 11.9085,
+      "step": 158
+    },
+    {
+      "epoch": 0.14467697907188354,
+      "grad_norm": 0.07469494640827179,
+      "learning_rate": 3.316840818568315e-05,
+      "loss": 11.9059,
+      "step": 159
+    },
+    {
+      "epoch": 0.14558689717925385,
+      "grad_norm": 0.06093104928731918,
+      "learning_rate": 3.162892359054098e-05,
+      "loss": 11.9057,
+      "step": 160
+    },
+    {
+      "epoch": 0.1464968152866242,
+      "grad_norm": 0.04979480803012848,
+      "learning_rate": 3.0121800465761293e-05,
+      "loss": 11.9111,
+      "step": 161
+    },
+    {
+      "epoch": 0.14740673339399454,
+      "grad_norm": 0.04737919196486473,
+      "learning_rate": 2.8647450843757897e-05,
+      "loss": 11.9108,
+      "step": 162
+    },
+    {
+      "epoch": 0.1483166515013649,
+      "grad_norm": 0.07306618988513947,
+      "learning_rate": 2.7206277796996144e-05,
+      "loss": 11.9118,
+      "step": 163
+    },
+    {
+      "epoch": 0.1492265696087352,
+      "grad_norm": 0.07405707985162735,
+      "learning_rate": 2.5798675327796993e-05,
+      "loss": 11.908,
+      "step": 164
+    },
+    {
+      "epoch": 0.15013648771610555,
+      "grad_norm": 0.06463942676782608,
+      "learning_rate": 2.4425028260620715e-05,
+      "loss": 11.9078,
+      "step": 165
+    },
+    {
+      "epoch": 0.1510464058234759,
+      "grad_norm": 0.05370442196726799,
+      "learning_rate": 2.3085712136859668e-05,
+      "loss": 11.9129,
+      "step": 166
+    },
+    {
+      "epoch": 0.15195632393084624,
+      "grad_norm": 0.07165276259183884,
+      "learning_rate": 2.178109311216913e-05,
+      "loss": 11.913,
+      "step": 167
+    },
+    {
+      "epoch": 0.15286624203821655,
+      "grad_norm": 0.049962639808654785,
+      "learning_rate": 2.0511527856363912e-05,
+      "loss": 11.9099,
+      "step": 168
+    },
+    {
+      "epoch": 0.1537761601455869,
+      "grad_norm": 0.04808368161320686,
+      "learning_rate": 1.927736345590839e-05,
+      "loss": 11.9139,
+      "step": 169
+    },
+    {
+      "epoch": 0.15468607825295724,
+      "grad_norm": 0.05554522946476936,
+      "learning_rate": 1.8078937319026654e-05,
+      "loss": 11.9162,
+      "step": 170
+    },
+    {
+      "epoch": 0.15559599636032756,
+      "grad_norm": 0.07787572592496872,
+      "learning_rate": 1.6916577083458228e-05,
+      "loss": 11.9158,
+      "step": 171
+    },
+    {
+      "epoch": 0.1565059144676979,
+      "grad_norm": 0.055409956723451614,
+      "learning_rate": 1.579060052688548e-05,
+      "loss": 11.9144,
+      "step": 172
+    },
+    {
+      "epoch": 0.15741583257506825,
+      "grad_norm": 0.09151246398687363,
+      "learning_rate": 1.4701315480056164e-05,
+      "loss": 11.9083,
+      "step": 173
+    },
+    {
+      "epoch": 0.1583257506824386,
+      "grad_norm": 0.07995638996362686,
+      "learning_rate": 1.3649019742625623e-05,
+      "loss": 11.9108,
+      "step": 174
+    },
+    {
+      "epoch": 0.1592356687898089,
+      "grad_norm": 0.04699774831533432,
+      "learning_rate": 1.2634001001741373e-05,
+      "loss": 11.9139,
+      "step": 175
+    },
+    {
+      "epoch": 0.1592356687898089,
+      "eval_loss": 11.914013862609863,
+      "eval_runtime": 0.3934,
+      "eval_samples_per_second": 127.105,
+      "eval_steps_per_second": 127.105,
+      "step": 175
+    },
+    {
+      "epoch": 0.16014558689717925,
+      "grad_norm": 0.0662156343460083,
+      "learning_rate": 1.1656536753392287e-05,
+      "loss": 11.916,
+      "step": 176
+    },
+    {
+      "epoch": 0.1610555050045496,
+      "grad_norm": 0.05447092279791832,
+      "learning_rate": 1.0716894226543953e-05,
+      "loss": 11.9107,
+      "step": 177
+    },
+    {
+      "epoch": 0.16196542311191992,
+      "grad_norm": 0.07941412925720215,
+      "learning_rate": 9.815330310080887e-06,
+      "loss": 11.9055,
+      "step": 178
+    },
+    {
+      "epoch": 0.16287534121929026,
+      "grad_norm": 0.05880994349718094,
+      "learning_rate": 8.952091482575824e-06,
+      "loss": 11.9072,
+      "step": 179
+    },
+    {
+      "epoch": 0.1637852593266606,
+      "grad_norm": 0.056635115295648575,
+      "learning_rate": 8.127413744904804e-06,
+      "loss": 11.9153,
+      "step": 180
+    },
+    {
+      "epoch": 0.16469517743403095,
+      "grad_norm": 0.0853646844625473,
+      "learning_rate": 7.34152255572697e-06,
+      "loss": 11.9109,
+      "step": 181
+    },
+    {
+      "epoch": 0.16560509554140126,
+      "grad_norm": 0.09290549904108047,
+      "learning_rate": 6.594632769846353e-06,
+      "loss": 11.9111,
+      "step": 182
+    },
+    {
+      "epoch": 0.1665150136487716,
+      "grad_norm": 0.05124865844845772,
+      "learning_rate": 5.886948579472778e-06,
+      "loss": 11.9119,
+      "step": 183
+    },
+    {
+      "epoch": 0.16742493175614195,
+      "grad_norm": 0.11257482320070267,
+      "learning_rate": 5.218663458397715e-06,
+      "loss": 11.9144,
+      "step": 184
+    },
+    {
+      "epoch": 0.16833484986351227,
+      "grad_norm": 0.061337102204561234,
+      "learning_rate": 4.589960109100444e-06,
+      "loss": 11.906,
+      "step": 185
+    },
+    {
+      "epoch": 0.16924476797088261,
+      "grad_norm": 0.055634934455156326,
+      "learning_rate": 4.001010412799138e-06,
+      "loss": 11.9124,
+      "step": 186
+    },
+    {
+      "epoch": 0.17015468607825296,
+      "grad_norm": 0.08984959125518799,
+      "learning_rate": 3.451975382460109e-06,
+      "loss": 11.9058,
+      "step": 187
+    },
+    {
+      "epoch": 0.1710646041856233,
+      "grad_norm": 0.09038890153169632,
+      "learning_rate": 2.9430051187785962e-06,
+      "loss": 11.916,
+      "step": 188
+    },
+    {
+      "epoch": 0.17197452229299362,
+      "grad_norm": 0.09575130045413971,
+      "learning_rate": 2.4742387691426445e-06,
+      "loss": 11.9111,
+      "step": 189
+    },
+    {
+      "epoch": 0.17288444040036396,
+      "grad_norm": 0.08616434037685394,
+      "learning_rate": 2.0458044895916513e-06,
+      "loss": 11.9045,
+      "step": 190
+    },
+    {
+      "epoch": 0.1737943585077343,
+      "grad_norm": 0.08434485644102097,
+      "learning_rate": 1.6578194097797258e-06,
+      "loss": 11.917,
+      "step": 191
+    },
+    {
+      "epoch": 0.17470427661510465,
+      "grad_norm": 0.06836060434579849,
+      "learning_rate": 1.3103896009537207e-06,
+      "loss": 11.9066,
+      "step": 192
+    },
+    {
+      "epoch": 0.17561419472247497,
+      "grad_norm": 0.05706486850976944,
+      "learning_rate": 1.0036100469542786e-06,
+      "loss": 11.9108,
+      "step": 193
+    },
+    {
+      "epoch": 0.17652411282984531,
+      "grad_norm": 0.08672205358743668,
+      "learning_rate": 7.375646182482875e-07,
+      "loss": 11.9167,
+      "step": 194
+    },
+    {
+      "epoch": 0.17743403093721566,
+      "grad_norm": 0.08131056278944016,
+      "learning_rate": 5.123260489995229e-07,
+      "loss": 11.9167,
+      "step": 195
+    },
+    {
+      "epoch": 0.17834394904458598,
+      "grad_norm": 0.07311462610960007,
+      "learning_rate": 3.2795591718381975e-07,
+      "loss": 11.9137,
+      "step": 196
+    },
+    {
+      "epoch": 0.17925386715195632,
+      "grad_norm": 0.07971935719251633,
+      "learning_rate": 1.8450462775428942e-07,
+      "loss": 11.913,
+      "step": 197
+    },
+    {
+      "epoch": 0.18016378525932666,
+      "grad_norm": 0.0999036505818367,
+      "learning_rate": 8.201139886109264e-08,
+      "loss": 11.9194,
+      "step": 198
+    },
+    {
+      "epoch": 0.181073703366697,
+      "grad_norm": 0.09455912560224533,
+      "learning_rate": 2.0504251129649374e-08,
+      "loss": 11.9173,
+      "step": 199
+    },
+    {
+      "epoch": 0.18198362147406733,
+      "grad_norm": 0.12160496413707733,
+      "learning_rate": 0.0,
+      "loss": 11.9234,
+      "step": 200
+    },
+    {
+      "epoch": 0.18198362147406733,
+      "eval_loss": 11.913970947265625,
+      "eval_runtime": 0.3964,
+      "eval_samples_per_second": 126.139,
+      "eval_steps_per_second": 126.139,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 318524620800.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null