Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +372 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:92ec4d3e1b50a4ff30e2bf1f05006f56ea3efd447c41d64d5623d87102449157
 size 100697728

 version https://git-lfs.github.com/spec/v1
+oid sha256:d904635aafd7262fb48d82774a33f0c63581e73eacca262a5d8b05bb7237a3da
 size 100697728

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:888b5e1a6607855d891c7bab38874bf60ffa824cef391e980c6ca0c206a9f735
 size 201541754

 version https://git-lfs.github.com/spec/v1
+oid sha256:396af0d89ed29d0664236084b68385ae8e41c5ba51925f56fd8e5625fb45dd04
 size 201541754

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f71b19c65eeaf96f89e07535f2c5affc99f8d23dd1cf5a7b0662cabc65ffe57d
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:b55ae87a977c230ca7cfec962e3d2a84a1ca4d96a78bc38cae6c48f9a8e480b8
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e70710c409284f74d525f8db5cfaccc22a8afd29416f19c595da9242ec92d936
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe157715eb8e05b3bab2a7f2fafac33705dc4a1a9dd7f6d860c3a7f9597d78bb
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.3069193363189697,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.26155187445510025,
   "eval_steps": 25,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1113,6 +1113,372 @@
       "eval_samples_per_second": 9.055,
       "eval_steps_per_second": 1.268,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1136,12 +1502,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.174333336584192e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.21677221357822418,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.34873583260680036,
   "eval_steps": 25,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 9.055,
       "eval_steps_per_second": 1.268,
       "step": 150
+    },
+    {
+      "epoch": 0.2632955536181343,
+      "grad_norm": 2.6976211071014404,
+      "learning_rate": 4.659698863221513e-05,
+      "loss": 0.9924,
+      "step": 151
+    },
+    {
+      "epoch": 0.26503923278116825,
+      "grad_norm": 2.4865119457244873,
+      "learning_rate": 4.481448235912671e-05,
+      "loss": 0.9836,
+      "step": 152
+    },
+    {
+      "epoch": 0.2667829119442023,
+      "grad_norm": 2.388842821121216,
+      "learning_rate": 4.306073275629044e-05,
+      "loss": 0.9307,
+      "step": 153
+    },
+    {
+      "epoch": 0.26852659110723626,
+      "grad_norm": 2.6463711261749268,
+      "learning_rate": 4.133621928133665e-05,
+      "loss": 0.9441,
+      "step": 154
+    },
+    {
+      "epoch": 0.2702702702702703,
+      "grad_norm": 3.185688018798828,
+      "learning_rate": 3.964141339903026e-05,
+      "loss": 1.5189,
+      "step": 155
+    },
+    {
+      "epoch": 0.27201394943330426,
+      "grad_norm": 2.5308520793914795,
+      "learning_rate": 3.797677845237696e-05,
+      "loss": 1.0876,
+      "step": 156
+    },
+    {
+      "epoch": 0.2737576285963383,
+      "grad_norm": 2.8542747497558594,
+      "learning_rate": 3.634276953594982e-05,
+      "loss": 1.1529,
+      "step": 157
+    },
+    {
+      "epoch": 0.27550130775937226,
+      "grad_norm": 2.9631950855255127,
+      "learning_rate": 3.473983337147118e-05,
+      "loss": 1.3245,
+      "step": 158
+    },
+    {
+      "epoch": 0.2772449869224063,
+      "grad_norm": 2.4584150314331055,
+      "learning_rate": 3.316840818568315e-05,
+      "loss": 1.1519,
+      "step": 159
+    },
+    {
+      "epoch": 0.27898866608544026,
+      "grad_norm": 1.8335541486740112,
+      "learning_rate": 3.162892359054098e-05,
+      "loss": 1.0986,
+      "step": 160
+    },
+    {
+      "epoch": 0.2807323452484743,
+      "grad_norm": 2.91326904296875,
+      "learning_rate": 3.0121800465761293e-05,
+      "loss": 1.215,
+      "step": 161
+    },
+    {
+      "epoch": 0.28247602441150826,
+      "grad_norm": 2.9668750762939453,
+      "learning_rate": 2.8647450843757897e-05,
+      "loss": 1.1686,
+      "step": 162
+    },
+    {
+      "epoch": 0.2842197035745423,
+      "grad_norm": 5.294821262359619,
+      "learning_rate": 2.7206277796996144e-05,
+      "loss": 2.1123,
+      "step": 163
+    },
+    {
+      "epoch": 0.28596338273757627,
+      "grad_norm": 2.0044138431549072,
+      "learning_rate": 2.5798675327796993e-05,
+      "loss": 1.032,
+      "step": 164
+    },
+    {
+      "epoch": 0.2877070619006103,
+      "grad_norm": 2.5393261909484863,
+      "learning_rate": 2.4425028260620715e-05,
+      "loss": 1.515,
+      "step": 165
+    },
+    {
+      "epoch": 0.28945074106364427,
+      "grad_norm": 2.9860572814941406,
+      "learning_rate": 2.3085712136859668e-05,
+      "loss": 1.3507,
+      "step": 166
+    },
+    {
+      "epoch": 0.2911944202266783,
+      "grad_norm": 2.4634785652160645,
+      "learning_rate": 2.178109311216913e-05,
+      "loss": 1.2136,
+      "step": 167
+    },
+    {
+      "epoch": 0.29293809938971227,
+      "grad_norm": 3.3339245319366455,
+      "learning_rate": 2.0511527856363912e-05,
+      "loss": 1.321,
+      "step": 168
+    },
+    {
+      "epoch": 0.2946817785527463,
+      "grad_norm": 2.1197123527526855,
+      "learning_rate": 1.927736345590839e-05,
+      "loss": 0.9253,
+      "step": 169
+    },
+    {
+      "epoch": 0.29642545771578027,
+      "grad_norm": 3.588160753250122,
+      "learning_rate": 1.8078937319026654e-05,
+      "loss": 1.6052,
+      "step": 170
+    },
+    {
+      "epoch": 0.2981691368788143,
+      "grad_norm": 2.9059865474700928,
+      "learning_rate": 1.6916577083458228e-05,
+      "loss": 1.4387,
+      "step": 171
+    },
+    {
+      "epoch": 0.2999128160418483,
+      "grad_norm": 2.4765589237213135,
+      "learning_rate": 1.579060052688548e-05,
+      "loss": 1.382,
+      "step": 172
+    },
+    {
+      "epoch": 0.3016564952048823,
+      "grad_norm": 2.4395883083343506,
+      "learning_rate": 1.4701315480056164e-05,
+      "loss": 1.0238,
+      "step": 173
+    },
+    {
+      "epoch": 0.30340017436791633,
+      "grad_norm": 2.7470932006835938,
+      "learning_rate": 1.3649019742625623e-05,
+      "loss": 1.0419,
+      "step": 174
+    },
+    {
+      "epoch": 0.3051438535309503,
+      "grad_norm": 1.5619500875473022,
+      "learning_rate": 1.2634001001741373e-05,
+      "loss": 0.8899,
+      "step": 175
+    },
+    {
+      "epoch": 0.3051438535309503,
+      "eval_loss": 0.22784070670604706,
+      "eval_runtime": 5.5215,
+      "eval_samples_per_second": 9.055,
+      "eval_steps_per_second": 1.268,
+      "step": 175
+    },
+    {
+      "epoch": 0.30688753269398433,
+      "grad_norm": 2.5386416912078857,
+      "learning_rate": 1.1656536753392287e-05,
+      "loss": 0.8694,
+      "step": 176
+    },
+    {
+      "epoch": 0.3086312118570183,
+      "grad_norm": 3.259190320968628,
+      "learning_rate": 1.0716894226543953e-05,
+      "loss": 1.0628,
+      "step": 177
+    },
+    {
+      "epoch": 0.31037489102005233,
+      "grad_norm": 1.8797160387039185,
+      "learning_rate": 9.815330310080887e-06,
+      "loss": 0.9535,
+      "step": 178
+    },
+    {
+      "epoch": 0.3121185701830863,
+      "grad_norm": 2.192770004272461,
+      "learning_rate": 8.952091482575824e-06,
+      "loss": 1.4844,
+      "step": 179
+    },
+    {
+      "epoch": 0.31386224934612034,
+      "grad_norm": 2.7970409393310547,
+      "learning_rate": 8.127413744904804e-06,
+      "loss": 1.439,
+      "step": 180
+    },
+    {
+      "epoch": 0.3156059285091543,
+      "grad_norm": 2.338233709335327,
+      "learning_rate": 7.34152255572697e-06,
+      "loss": 0.9264,
+      "step": 181
+    },
+    {
+      "epoch": 0.31734960767218834,
+      "grad_norm": 2.306898832321167,
+      "learning_rate": 6.594632769846353e-06,
+      "loss": 1.3452,
+      "step": 182
+    },
+    {
+      "epoch": 0.3190932868352223,
+      "grad_norm": 1.7065132856369019,
+      "learning_rate": 5.886948579472778e-06,
+      "loss": 0.6402,
+      "step": 183
+    },
+    {
+      "epoch": 0.32083696599825634,
+      "grad_norm": 2.7839744091033936,
+      "learning_rate": 5.218663458397715e-06,
+      "loss": 1.1321,
+      "step": 184
+    },
+    {
+      "epoch": 0.3225806451612903,
+      "grad_norm": 3.4941210746765137,
+      "learning_rate": 4.589960109100444e-06,
+      "loss": 1.2375,
+      "step": 185
+    },
+    {
+      "epoch": 0.32432432432432434,
+      "grad_norm": 2.152266502380371,
+      "learning_rate": 4.001010412799138e-06,
+      "loss": 0.9531,
+      "step": 186
+    },
+    {
+      "epoch": 0.3260680034873583,
+      "grad_norm": 3.6408121585845947,
+      "learning_rate": 3.451975382460109e-06,
+      "loss": 1.459,
+      "step": 187
+    },
+    {
+      "epoch": 0.32781168265039234,
+      "grad_norm": 1.6680314540863037,
+      "learning_rate": 2.9430051187785962e-06,
+      "loss": 0.9177,
+      "step": 188
+    },
+    {
+      "epoch": 0.3295553618134263,
+      "grad_norm": 2.857945203781128,
+      "learning_rate": 2.4742387691426445e-06,
+      "loss": 1.4882,
+      "step": 189
+    },
+    {
+      "epoch": 0.33129904097646035,
+      "grad_norm": 2.823565721511841,
+      "learning_rate": 2.0458044895916513e-06,
+      "loss": 1.6463,
+      "step": 190
+    },
+    {
+      "epoch": 0.3330427201394943,
+      "grad_norm": 5.935178756713867,
+      "learning_rate": 1.6578194097797258e-06,
+      "loss": 1.3552,
+      "step": 191
+    },
+    {
+      "epoch": 0.33478639930252835,
+      "grad_norm": 2.990522623062134,
+      "learning_rate": 1.3103896009537207e-06,
+      "loss": 1.763,
+      "step": 192
+    },
+    {
+      "epoch": 0.3365300784655623,
+      "grad_norm": 2.7563226222991943,
+      "learning_rate": 1.0036100469542786e-06,
+      "loss": 1.8906,
+      "step": 193
+    },
+    {
+      "epoch": 0.33827375762859635,
+      "grad_norm": 2.6643290519714355,
+      "learning_rate": 7.375646182482875e-07,
+      "loss": 1.5991,
+      "step": 194
+    },
+    {
+      "epoch": 0.3400174367916303,
+      "grad_norm": 2.767608880996704,
+      "learning_rate": 5.123260489995229e-07,
+      "loss": 1.4336,
+      "step": 195
+    },
+    {
+      "epoch": 0.34176111595466435,
+      "grad_norm": 2.827756643295288,
+      "learning_rate": 3.2795591718381975e-07,
+      "loss": 1.2425,
+      "step": 196
+    },
+    {
+      "epoch": 0.3435047951176983,
+      "grad_norm": 2.2509312629699707,
+      "learning_rate": 1.8450462775428942e-07,
+      "loss": 1.4668,
+      "step": 197
+    },
+    {
+      "epoch": 0.34524847428073235,
+      "grad_norm": 3.5071616172790527,
+      "learning_rate": 8.201139886109264e-08,
+      "loss": 1.2136,
+      "step": 198
+    },
+    {
+      "epoch": 0.3469921534437663,
+      "grad_norm": 3.1070590019226074,
+      "learning_rate": 2.0504251129649374e-08,
+      "loss": 1.1762,
+      "step": 199
+    },
+    {
+      "epoch": 0.34873583260680036,
+      "grad_norm": 5.442651748657227,
+      "learning_rate": 0.0,
+      "loss": 1.4769,
+      "step": 200
+    },
+    {
+      "epoch": 0.34873583260680036,
+      "eval_loss": 0.21677221357822418,
+      "eval_runtime": 5.5193,
+      "eval_samples_per_second": 9.059,
+      "eval_steps_per_second": 1.268,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.565777782112256e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null