Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +372 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7fba420663cbebe5f3b03536b6f1ca6b651910d6f8aa9697d6431ef6835356e0
 size 83115256

 version https://git-lfs.github.com/spec/v1
+oid sha256:58a23582d3d04b0eb768b00e4cfa4a8bfe168b252a0d83ed28616028d04c0555
 size 83115256

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a4dc95f0ba3f579c3c5924db505fdeab67a88cf7ca53f7d1a32ec390060fef81
 size 166439638

 version https://git-lfs.github.com/spec/v1
+oid sha256:929eadbf524b55077dde3b9855e6c718c7a7ba5d8bd50eac75a6a4ba4a2f4be2
 size 166439638

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d6aa0c7e54e102d2bceaa82bc3b95c1605bc73dd395e8c7288f451278953b27f
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:f4eb18e9732ca8f07a3deaa1f0a61ffbabadea61a48a75a1ce53aa4b5474a969
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e70710c409284f74d525f8db5cfaccc22a8afd29416f19c595da9242ec92d936
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe157715eb8e05b3bab2a7f2fafac33705dc4a1a9dd7f6d860c3a7f9597d78bb
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.08700338006019592,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.022581009371118888,
   "eval_steps": 25,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1113,6 +1113,372 @@
       "eval_samples_per_second": 8.486,
       "eval_steps_per_second": 1.188,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1136,12 +1502,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.20164561289216e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.07521108537912369,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.030108012494825184,
   "eval_steps": 25,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 8.486,
       "eval_steps_per_second": 1.188,
       "step": 150
+    },
+    {
+      "epoch": 0.022731549433593016,
+      "grad_norm": 0.15644125640392303,
+      "learning_rate": 4.659698863221513e-05,
+      "loss": 0.0615,
+      "step": 151
+    },
+    {
+      "epoch": 0.02288208949606714,
+      "grad_norm": 0.13763482868671417,
+      "learning_rate": 4.481448235912671e-05,
+      "loss": 0.0345,
+      "step": 152
+    },
+    {
+      "epoch": 0.023032629558541268,
+      "grad_norm": 0.1581212878227234,
+      "learning_rate": 4.306073275629044e-05,
+      "loss": 0.0534,
+      "step": 153
+    },
+    {
+      "epoch": 0.023183169621015392,
+      "grad_norm": 0.14264962077140808,
+      "learning_rate": 4.133621928133665e-05,
+      "loss": 0.0723,
+      "step": 154
+    },
+    {
+      "epoch": 0.02333370968348952,
+      "grad_norm": 0.15869100391864777,
+      "learning_rate": 3.964141339903026e-05,
+      "loss": 0.0649,
+      "step": 155
+    },
+    {
+      "epoch": 0.023484249745963644,
+      "grad_norm": 0.13853639364242554,
+      "learning_rate": 3.797677845237696e-05,
+      "loss": 0.0582,
+      "step": 156
+    },
+    {
+      "epoch": 0.02363478980843777,
+      "grad_norm": 0.16589723527431488,
+      "learning_rate": 3.634276953594982e-05,
+      "loss": 0.0942,
+      "step": 157
+    },
+    {
+      "epoch": 0.023785329870911896,
+      "grad_norm": 0.12482775747776031,
+      "learning_rate": 3.473983337147118e-05,
+      "loss": 0.04,
+      "step": 158
+    },
+    {
+      "epoch": 0.023935869933386023,
+      "grad_norm": 0.12084036320447922,
+      "learning_rate": 3.316840818568315e-05,
+      "loss": 0.0433,
+      "step": 159
+    },
+    {
+      "epoch": 0.024086409995860147,
+      "grad_norm": 0.19188204407691956,
+      "learning_rate": 3.162892359054098e-05,
+      "loss": 0.0683,
+      "step": 160
+    },
+    {
+      "epoch": 0.024236950058334275,
+      "grad_norm": 0.1759098321199417,
+      "learning_rate": 3.0121800465761293e-05,
+      "loss": 0.0658,
+      "step": 161
+    },
+    {
+      "epoch": 0.0243874901208084,
+      "grad_norm": 0.11549676209688187,
+      "learning_rate": 2.8647450843757897e-05,
+      "loss": 0.0428,
+      "step": 162
+    },
+    {
+      "epoch": 0.024538030183282527,
+      "grad_norm": 0.1507856547832489,
+      "learning_rate": 2.7206277796996144e-05,
+      "loss": 0.0735,
+      "step": 163
+    },
+    {
+      "epoch": 0.02468857024575665,
+      "grad_norm": 0.14003053307533264,
+      "learning_rate": 2.5798675327796993e-05,
+      "loss": 0.0741,
+      "step": 164
+    },
+    {
+      "epoch": 0.02483911030823078,
+      "grad_norm": 0.12259425222873688,
+      "learning_rate": 2.4425028260620715e-05,
+      "loss": 0.0429,
+      "step": 165
+    },
+    {
+      "epoch": 0.024989650370704903,
+      "grad_norm": 0.11335761100053787,
+      "learning_rate": 2.3085712136859668e-05,
+      "loss": 0.0405,
+      "step": 166
+    },
+    {
+      "epoch": 0.02514019043317903,
+      "grad_norm": 0.30729901790618896,
+      "learning_rate": 2.178109311216913e-05,
+      "loss": 0.0947,
+      "step": 167
+    },
+    {
+      "epoch": 0.025290730495653155,
+      "grad_norm": 0.21062178909778595,
+      "learning_rate": 2.0511527856363912e-05,
+      "loss": 0.0986,
+      "step": 168
+    },
+    {
+      "epoch": 0.025441270558127282,
+      "grad_norm": 0.2791978418827057,
+      "learning_rate": 1.927736345590839e-05,
+      "loss": 0.1273,
+      "step": 169
+    },
+    {
+      "epoch": 0.025591810620601407,
+      "grad_norm": 0.23478782176971436,
+      "learning_rate": 1.8078937319026654e-05,
+      "loss": 0.1027,
+      "step": 170
+    },
+    {
+      "epoch": 0.025742350683075534,
+      "grad_norm": 0.20816297829151154,
+      "learning_rate": 1.6916577083458228e-05,
+      "loss": 0.074,
+      "step": 171
+    },
+    {
+      "epoch": 0.02589289074554966,
+      "grad_norm": 0.2092941403388977,
+      "learning_rate": 1.579060052688548e-05,
+      "loss": 0.1411,
+      "step": 172
+    },
+    {
+      "epoch": 0.026043430808023786,
+      "grad_norm": 0.2681998908519745,
+      "learning_rate": 1.4701315480056164e-05,
+      "loss": 0.1129,
+      "step": 173
+    },
+    {
+      "epoch": 0.02619397087049791,
+      "grad_norm": 0.23058678209781647,
+      "learning_rate": 1.3649019742625623e-05,
+      "loss": 0.1003,
+      "step": 174
+    },
+    {
+      "epoch": 0.026344510932972038,
+      "grad_norm": 0.2815040349960327,
+      "learning_rate": 1.2634001001741373e-05,
+      "loss": 0.1296,
+      "step": 175
+    },
+    {
+      "epoch": 0.026344510932972038,
+      "eval_loss": 0.07823067903518677,
+      "eval_runtime": 5.8874,
+      "eval_samples_per_second": 8.493,
+      "eval_steps_per_second": 1.189,
+      "step": 175
+    },
+    {
+      "epoch": 0.026495050995446162,
+      "grad_norm": 0.1958063393831253,
+      "learning_rate": 1.1656536753392287e-05,
+      "loss": 0.0977,
+      "step": 176
+    },
+    {
+      "epoch": 0.02664559105792029,
+      "grad_norm": 0.25271862745285034,
+      "learning_rate": 1.0716894226543953e-05,
+      "loss": 0.1516,
+      "step": 177
+    },
+    {
+      "epoch": 0.026796131120394414,
+      "grad_norm": 0.2665039598941803,
+      "learning_rate": 9.815330310080887e-06,
+      "loss": 0.1355,
+      "step": 178
+    },
+    {
+      "epoch": 0.02694667118286854,
+      "grad_norm": 0.1967024803161621,
+      "learning_rate": 8.952091482575824e-06,
+      "loss": 0.1041,
+      "step": 179
+    },
+    {
+      "epoch": 0.027097211245342666,
+      "grad_norm": 0.22323350608348846,
+      "learning_rate": 8.127413744904804e-06,
+      "loss": 0.1508,
+      "step": 180
+    },
+    {
+      "epoch": 0.027247751307816793,
+      "grad_norm": 0.26765817403793335,
+      "learning_rate": 7.34152255572697e-06,
+      "loss": 0.1542,
+      "step": 181
+    },
+    {
+      "epoch": 0.027398291370290918,
+      "grad_norm": 0.22527235746383667,
+      "learning_rate": 6.594632769846353e-06,
+      "loss": 0.1327,
+      "step": 182
+    },
+    {
+      "epoch": 0.027548831432765045,
+      "grad_norm": 0.19589683413505554,
+      "learning_rate": 5.886948579472778e-06,
+      "loss": 0.1088,
+      "step": 183
+    },
+    {
+      "epoch": 0.02769937149523917,
+      "grad_norm": 0.7182509303092957,
+      "learning_rate": 5.218663458397715e-06,
+      "loss": 0.1013,
+      "step": 184
+    },
+    {
+      "epoch": 0.027849911557713297,
+      "grad_norm": 0.2169928103685379,
+      "learning_rate": 4.589960109100444e-06,
+      "loss": 0.1138,
+      "step": 185
+    },
+    {
+      "epoch": 0.02800045162018742,
+      "grad_norm": 0.6080103516578674,
+      "learning_rate": 4.001010412799138e-06,
+      "loss": 0.1358,
+      "step": 186
+    },
+    {
+      "epoch": 0.02815099168266155,
+      "grad_norm": 0.2407759577035904,
+      "learning_rate": 3.451975382460109e-06,
+      "loss": 0.1595,
+      "step": 187
+    },
+    {
+      "epoch": 0.028301531745135673,
+      "grad_norm": 0.3847437798976898,
+      "learning_rate": 2.9430051187785962e-06,
+      "loss": 0.1593,
+      "step": 188
+    },
+    {
+      "epoch": 0.0284520718076098,
+      "grad_norm": 0.27828317880630493,
+      "learning_rate": 2.4742387691426445e-06,
+      "loss": 0.1293,
+      "step": 189
+    },
+    {
+      "epoch": 0.028602611870083925,
+      "grad_norm": 0.2508259117603302,
+      "learning_rate": 2.0458044895916513e-06,
+      "loss": 0.1092,
+      "step": 190
+    },
+    {
+      "epoch": 0.028753151932558053,
+      "grad_norm": 0.23639361560344696,
+      "learning_rate": 1.6578194097797258e-06,
+      "loss": 0.1025,
+      "step": 191
+    },
+    {
+      "epoch": 0.028903691995032177,
+      "grad_norm": 0.2561388909816742,
+      "learning_rate": 1.3103896009537207e-06,
+      "loss": 0.1349,
+      "step": 192
+    },
+    {
+      "epoch": 0.029054232057506305,
+      "grad_norm": 0.27320489287376404,
+      "learning_rate": 1.0036100469542786e-06,
+      "loss": 0.172,
+      "step": 193
+    },
+    {
+      "epoch": 0.02920477211998043,
+      "grad_norm": 0.336984783411026,
+      "learning_rate": 7.375646182482875e-07,
+      "loss": 0.1359,
+      "step": 194
+    },
+    {
+      "epoch": 0.029355312182454556,
+      "grad_norm": 0.4405703544616699,
+      "learning_rate": 5.123260489995229e-07,
+      "loss": 0.1344,
+      "step": 195
+    },
+    {
+      "epoch": 0.02950585224492868,
+      "grad_norm": 0.7872334718704224,
+      "learning_rate": 3.2795591718381975e-07,
+      "loss": 0.1815,
+      "step": 196
+    },
+    {
+      "epoch": 0.029656392307402808,
+      "grad_norm": 0.2895253300666809,
+      "learning_rate": 1.8450462775428942e-07,
+      "loss": 0.1195,
+      "step": 197
+    },
+    {
+      "epoch": 0.029806932369876932,
+      "grad_norm": 0.6127339601516724,
+      "learning_rate": 8.201139886109264e-08,
+      "loss": 0.1486,
+      "step": 198
+    },
+    {
+      "epoch": 0.02995747243235106,
+      "grad_norm": 0.3820211589336395,
+      "learning_rate": 2.0504251129649374e-08,
+      "loss": 0.1565,
+      "step": 199
+    },
+    {
+      "epoch": 0.030108012494825184,
+      "grad_norm": 0.5019745230674744,
+      "learning_rate": 0.0,
+      "loss": 0.178,
+      "step": 200
+    },
+    {
+      "epoch": 0.030108012494825184,
+      "eval_loss": 0.07521108537912369,
+      "eval_runtime": 5.8896,
+      "eval_samples_per_second": 8.49,
+      "eval_steps_per_second": 1.189,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.60219415052288e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null