Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7857a41452566b94de332c9ebb4786c584cfb3ae379aaf891512435182a5bc99
 size 402688040

 version https://git-lfs.github.com/spec/v1
+oid sha256:99f7c8e9a51a63a584c9ffca15a650a3e9bf9c7f287bd029e46ae2c34fa3fd15
 size 402688040

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c3573d80f631f61b5348a8ac0a8ed64bbe4762884a549fce5cb3a09f29242c08
 size 805522170

 version https://git-lfs.github.com/spec/v1
+oid sha256:84aac0690d09f86f349d64621c122e39d9afaebb427ef858fc1e8149acafafd1
 size 805522170

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:705dcffba374c61be211d8151d1cf15e1f94fa410e0096f9beff34a0cd7f007b
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:803de0aaf47f343f0e8ba74b22296a593665fa296eaad0f2fcd5d129f206c682
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dfd59dca009004df561617f8f6994512d029a952a68609cac24b36df5a0757ce
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.2374709844589233,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.04572473708276177,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 18.764,
       "eval_steps_per_second": 9.384,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.1312784155344896e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.224632740020752,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.060966316110349035,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 18.764,
       "eval_steps_per_second": 9.384,
       "step": 150
+    },
+    {
+      "epoch": 0.04602956866331352,
+      "grad_norm": 0.9370346665382385,
+      "learning_rate": 1.9136088935510362e-05,
+      "loss": 4.6741,
+      "step": 151
+    },
+    {
+      "epoch": 0.046334400243865266,
+      "grad_norm": 0.9067674279212952,
+      "learning_rate": 1.8414449687337464e-05,
+      "loss": 4.6304,
+      "step": 152
+    },
+    {
+      "epoch": 0.04663923182441701,
+      "grad_norm": 1.0935213565826416,
+      "learning_rate": 1.7703596875660645e-05,
+      "loss": 5.5383,
+      "step": 153
+    },
+    {
+      "epoch": 0.046944063404968754,
+      "grad_norm": 1.1216908693313599,
+      "learning_rate": 1.700377325606388e-05,
+      "loss": 6.1104,
+      "step": 154
+    },
+    {
+      "epoch": 0.0472488949855205,
+      "grad_norm": 1.2409911155700684,
+      "learning_rate": 1.631521781767214e-05,
+      "loss": 5.6602,
+      "step": 155
+    },
+    {
+      "epoch": 0.04755372656607224,
+      "grad_norm": 0.9131847023963928,
+      "learning_rate": 1.5638165701536868e-05,
+      "loss": 6.2138,
+      "step": 156
+    },
+    {
+      "epoch": 0.04785855814662399,
+      "grad_norm": 1.2213008403778076,
+      "learning_rate": 1.4972848120335453e-05,
+      "loss": 6.5254,
+      "step": 157
+    },
+    {
+      "epoch": 0.04816338972717574,
+      "grad_norm": 0.9096271991729736,
+      "learning_rate": 1.4319492279412388e-05,
+      "loss": 5.9392,
+      "step": 158
+    },
+    {
+      "epoch": 0.04846822130772748,
+      "grad_norm": 0.866348147392273,
+      "learning_rate": 1.3678321299188801e-05,
+      "loss": 4.8568,
+      "step": 159
+    },
+    {
+      "epoch": 0.048773052888279225,
+      "grad_norm": 0.8141571283340454,
+      "learning_rate": 1.3049554138967051e-05,
+      "loss": 5.4341,
+      "step": 160
+    },
+    {
+      "epoch": 0.04907788446883097,
+      "grad_norm": 0.8729768395423889,
+      "learning_rate": 1.2433405522156332e-05,
+      "loss": 5.0825,
+      "step": 161
+    },
+    {
+      "epoch": 0.04938271604938271,
+      "grad_norm": 0.880744993686676,
+      "learning_rate": 1.183008586294485e-05,
+      "loss": 5.1073,
+      "step": 162
+    },
+    {
+      "epoch": 0.049687547629934464,
+      "grad_norm": 0.8431662321090698,
+      "learning_rate": 1.1239801194443506e-05,
+      "loss": 6.6863,
+      "step": 163
+    },
+    {
+      "epoch": 0.04999237921048621,
+      "grad_norm": 0.9774351119995117,
+      "learning_rate": 1.066275309832584e-05,
+      "loss": 4.8752,
+      "step": 164
+    },
+    {
+      "epoch": 0.05029721079103795,
+      "grad_norm": 0.8619078993797302,
+      "learning_rate": 1.0099138635988026e-05,
+      "loss": 5.8747,
+      "step": 165
+    },
+    {
+      "epoch": 0.050602042371589696,
+      "grad_norm": 0.7690417766571045,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 5.261,
+      "step": 166
+    },
+    {
+      "epoch": 0.05090687395214144,
+      "grad_norm": 0.8820409178733826,
+      "learning_rate": 9.012975854638949e-06,
+      "loss": 5.8991,
+      "step": 167
+    },
+    {
+      "epoch": 0.051211705532693184,
+      "grad_norm": 1.0724667310714722,
+      "learning_rate": 8.490798459222476e-06,
+      "loss": 5.0322,
+      "step": 168
+    },
+    {
+      "epoch": 0.051516537113244935,
+      "grad_norm": 0.9853336215019226,
+      "learning_rate": 7.982796418105371e-06,
+      "loss": 5.3792,
+      "step": 169
+    },
+    {
+      "epoch": 0.05182136869379668,
+      "grad_norm": 0.913477897644043,
+      "learning_rate": 7.489143213519301e-06,
+      "loss": 6.1459,
+      "step": 170
+    },
+    {
+      "epoch": 0.05212620027434842,
+      "grad_norm": 0.9908555746078491,
+      "learning_rate": 7.010007427581378e-06,
+      "loss": 4.3843,
+      "step": 171
+    },
+    {
+      "epoch": 0.05243103185490017,
+      "grad_norm": 0.8242432475090027,
+      "learning_rate": 6.5455526847235825e-06,
+      "loss": 5.4643,
+      "step": 172
+    },
+    {
+      "epoch": 0.05273586343545191,
+      "grad_norm": 0.8281580209732056,
+      "learning_rate": 6.0959375958151045e-06,
+      "loss": 5.3037,
+      "step": 173
+    },
+    {
+      "epoch": 0.053040695016003656,
+      "grad_norm": 0.7910247445106506,
+      "learning_rate": 5.6613157039969055e-06,
+      "loss": 4.6242,
+      "step": 174
+    },
+    {
+      "epoch": 0.053345526596555407,
+      "grad_norm": 0.8933544158935547,
+      "learning_rate": 5.241835432246889e-06,
+      "loss": 5.4644,
+      "step": 175
+    },
+    {
+      "epoch": 0.05365035817710715,
+      "grad_norm": 0.9154403805732727,
+      "learning_rate": 4.837640032693558e-06,
+      "loss": 5.3111,
+      "step": 176
+    },
+    {
+      "epoch": 0.053955189757658895,
+      "grad_norm": 0.833807110786438,
+      "learning_rate": 4.448867537695578e-06,
+      "loss": 3.8325,
+      "step": 177
+    },
+    {
+      "epoch": 0.05426002133821064,
+      "grad_norm": 0.8899303674697876,
+      "learning_rate": 4.075650712703849e-06,
+      "loss": 5.2321,
+      "step": 178
+    },
+    {
+      "epoch": 0.05456485291876238,
+      "grad_norm": 0.8820135593414307,
+      "learning_rate": 3.71811701092219e-06,
+      "loss": 4.4328,
+      "step": 179
+    },
+    {
+      "epoch": 0.05486968449931413,
+      "grad_norm": 1.0561301708221436,
+      "learning_rate": 3.376388529782215e-06,
+      "loss": 6.0862,
+      "step": 180
+    },
+    {
+      "epoch": 0.05517451607986587,
+      "grad_norm": 0.9490888714790344,
+      "learning_rate": 3.0505819692471792e-06,
+      "loss": 4.5827,
+      "step": 181
+    },
+    {
+      "epoch": 0.05547934766041762,
+      "grad_norm": 0.9599933624267578,
+      "learning_rate": 2.7408085919590264e-06,
+      "loss": 5.1208,
+      "step": 182
+    },
+    {
+      "epoch": 0.055784179240969366,
+      "grad_norm": 1.0407111644744873,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 4.6982,
+      "step": 183
+    },
+    {
+      "epoch": 0.05608901082152111,
+      "grad_norm": 0.9729002118110657,
+      "learning_rate": 2.1697790249779636e-06,
+      "loss": 4.3347,
+      "step": 184
+    },
+    {
+      "epoch": 0.056393842402072854,
+      "grad_norm": 1.0244466066360474,
+      "learning_rate": 1.908717841359048e-06,
+      "loss": 4.8428,
+      "step": 185
+    },
+    {
+      "epoch": 0.0566986739826246,
+      "grad_norm": 0.9946762323379517,
+      "learning_rate": 1.6640797865406288e-06,
+      "loss": 4.1583,
+      "step": 186
+    },
+    {
+      "epoch": 0.05700350556317634,
+      "grad_norm": 1.0568474531173706,
+      "learning_rate": 1.4359484041943038e-06,
+      "loss": 4.2906,
+      "step": 187
+    },
+    {
+      "epoch": 0.05730833714372809,
+      "grad_norm": 0.9759791493415833,
+      "learning_rate": 1.2244016009781701e-06,
+      "loss": 4.1194,
+      "step": 188
+    },
+    {
+      "epoch": 0.05761316872427984,
+      "grad_norm": 0.9902230501174927,
+      "learning_rate": 1.0295116199317057e-06,
+      "loss": 4.2028,
+      "step": 189
+    },
+    {
+      "epoch": 0.05791800030483158,
+      "grad_norm": 1.1896251440048218,
+      "learning_rate": 8.513450158049108e-07,
+      "loss": 3.8738,
+      "step": 190
+    },
+    {
+      "epoch": 0.058222831885383325,
+      "grad_norm": 1.0268330574035645,
+      "learning_rate": 6.899626323298713e-07,
+      "loss": 3.3157,
+      "step": 191
+    },
+    {
+      "epoch": 0.05852766346593507,
+      "grad_norm": 1.0296571254730225,
+      "learning_rate": 5.454195814427021e-07,
+      "loss": 3.7593,
+      "step": 192
+    },
+    {
+      "epoch": 0.05883249504648681,
+      "grad_norm": 1.1712251901626587,
+      "learning_rate": 4.177652244628627e-07,
+      "loss": 3.9181,
+      "step": 193
+    },
+    {
+      "epoch": 0.059137326627038564,
+      "grad_norm": 1.2218458652496338,
+      "learning_rate": 3.0704315523631953e-07,
+      "loss": 3.2565,
+      "step": 194
+    },
+    {
+      "epoch": 0.05944215820759031,
+      "grad_norm": 1.21880042552948,
+      "learning_rate": 2.1329118524827662e-07,
+      "loss": 3.3098,
+      "step": 195
+    },
+    {
+      "epoch": 0.05974698978814205,
+      "grad_norm": 1.210938572883606,
+      "learning_rate": 1.3654133071059893e-07,
+      "loss": 3.1741,
+      "step": 196
+    },
+    {
+      "epoch": 0.060051821368693796,
+      "grad_norm": 1.3736565113067627,
+      "learning_rate": 7.681980162830282e-08,
+      "loss": 3.1215,
+      "step": 197
+    },
+    {
+      "epoch": 0.06035665294924554,
+      "grad_norm": 1.5788041353225708,
+      "learning_rate": 3.4146992848854695e-08,
+      "loss": 3.0409,
+      "step": 198
+    },
+    {
+      "epoch": 0.060661484529797284,
+      "grad_norm": 2.2263410091400146,
+      "learning_rate": 8.537477097364522e-09,
+      "loss": 3.291,
+      "step": 199
+    },
+    {
+      "epoch": 0.060966316110349035,
+      "grad_norm": 4.716347694396973,
+      "learning_rate": 0.0,
+      "loss": 3.6232,
+      "step": 200
+    },
+    {
+      "epoch": 0.060966316110349035,
+      "eval_loss": 1.224632740020752,
+      "eval_runtime": 294.8119,
+      "eval_samples_per_second": 18.741,
+      "eval_steps_per_second": 9.372,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.5071184206954496e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null