Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:29aac5ed6c96fa321d25e64b04bc3b75a6f99a17b1c7010d95f54edf371a2cf7
 size 639691872

 version https://git-lfs.github.com/spec/v1
+oid sha256:92003b032d43d254dca04b99f995f5aa01a440343247b2d74daefdc3e380ef53
 size 639691872

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:344f1b718a5aead0d799f0403b8f2f2efd6038ce5df0cdde371f72a52f31d2bd
 size 1279641042

 version https://git-lfs.github.com/spec/v1
+oid sha256:d9071b3ee404fa4e8f13c393e1916d8db7cf569871c45446a0e5deb516b69a3e
 size 1279641042

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7dffca4d4c399cbbae6f08637961606f3c1dea636b1c9ec553952c4be4d88006
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:6377e85ccc57cc71303058c1970d59d734649d2f81d016942a12c420596f7976
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f4c9c807f0681c8b7e53ada9b6ec3dba530d303de7da0d0a0562a3d8d0bbba08
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.46980416774749756,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.016394338488441993,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 12.905,
       "eval_steps_per_second": 6.453,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.0093599183640986e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.46923279762268066,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.02185911798458932,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 12.905,
       "eval_steps_per_second": 6.453,
       "step": 150
+    },
+    {
+      "epoch": 0.01650363407836494,
+      "grad_norm": 0.05671292915940285,
+      "learning_rate": 1.7197048550474643e-05,
+      "loss": 0.3498,
+      "step": 151
+    },
+    {
+      "epoch": 0.016612929668287885,
+      "grad_norm": 0.07922675460577011,
+      "learning_rate": 1.6543469682057106e-05,
+      "loss": 0.5638,
+      "step": 152
+    },
+    {
+      "epoch": 0.01672222525821083,
+      "grad_norm": 0.06751131266355515,
+      "learning_rate": 1.5900081996875083e-05,
+      "loss": 0.2989,
+      "step": 153
+    },
+    {
+      "epoch": 0.016831520848133778,
+      "grad_norm": 0.08673208206892014,
+      "learning_rate": 1.526708147705013e-05,
+      "loss": 0.3434,
+      "step": 154
+    },
+    {
+      "epoch": 0.016940816438056724,
+      "grad_norm": 0.10014075040817261,
+      "learning_rate": 1.4644660940672627e-05,
+      "loss": 0.3313,
+      "step": 155
+    },
+    {
+      "epoch": 0.01705011202797967,
+      "grad_norm": 0.0925699844956398,
+      "learning_rate": 1.4033009983067452e-05,
+      "loss": 0.3351,
+      "step": 156
+    },
+    {
+      "epoch": 0.017159407617902616,
+      "grad_norm": 0.10073317587375641,
+      "learning_rate": 1.3432314919041478e-05,
+      "loss": 0.4239,
+      "step": 157
+    },
+    {
+      "epoch": 0.017268703207825566,
+      "grad_norm": 0.09681018441915512,
+      "learning_rate": 1.2842758726130283e-05,
+      "loss": 0.3196,
+      "step": 158
+    },
+    {
+      "epoch": 0.017377998797748512,
+      "grad_norm": 0.10117655992507935,
+      "learning_rate": 1.22645209888614e-05,
+      "loss": 0.3483,
+      "step": 159
+    },
+    {
+      "epoch": 0.017487294387671458,
+      "grad_norm": 0.10316099971532822,
+      "learning_rate": 1.1697777844051105e-05,
+      "loss": 0.3765,
+      "step": 160
+    },
+    {
+      "epoch": 0.017596589977594404,
+      "grad_norm": 0.10998133569955826,
+      "learning_rate": 1.1142701927151456e-05,
+      "loss": 0.3845,
+      "step": 161
+    },
+    {
+      "epoch": 0.01770588556751735,
+      "grad_norm": 0.10539877414703369,
+      "learning_rate": 1.0599462319663905e-05,
+      "loss": 0.4918,
+      "step": 162
+    },
+    {
+      "epoch": 0.017815181157440296,
+      "grad_norm": 0.1355060189962387,
+      "learning_rate": 1.006822449763537e-05,
+      "loss": 0.472,
+      "step": 163
+    },
+    {
+      "epoch": 0.017924476747363242,
+      "grad_norm": 0.12122693657875061,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 0.3635,
+      "step": 164
+    },
+    {
+      "epoch": 0.018033772337286192,
+      "grad_norm": 0.11582615971565247,
+      "learning_rate": 9.042397785550405e-06,
+      "loss": 0.5667,
+      "step": 165
+    },
+    {
+      "epoch": 0.018143067927209138,
+      "grad_norm": 0.12401402741670609,
+      "learning_rate": 8.548121372247918e-06,
+      "loss": 0.3543,
+      "step": 166
+    },
+    {
+      "epoch": 0.018252363517132084,
+      "grad_norm": 0.12098807841539383,
+      "learning_rate": 8.066471602728803e-06,
+      "loss": 0.4069,
+      "step": 167
+    },
+    {
+      "epoch": 0.01836165910705503,
+      "grad_norm": 0.12497370690107346,
+      "learning_rate": 7.597595192178702e-06,
+      "loss": 0.4353,
+      "step": 168
+    },
+    {
+      "epoch": 0.018470954696977977,
+      "grad_norm": 0.11947774142026901,
+      "learning_rate": 7.1416349648943894e-06,
+      "loss": 0.4151,
+      "step": 169
+    },
+    {
+      "epoch": 0.018580250286900923,
+      "grad_norm": 0.12466960400342941,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 0.3535,
+      "step": 170
+    },
+    {
+      "epoch": 0.01868954587682387,
+      "grad_norm": 0.14808349311351776,
+      "learning_rate": 6.269014643030213e-06,
+      "loss": 0.531,
+      "step": 171
+    },
+    {
+      "epoch": 0.01879884146674682,
+      "grad_norm": 0.13079893589019775,
+      "learning_rate": 5.852620357053651e-06,
+      "loss": 0.3183,
+      "step": 172
+    },
+    {
+      "epoch": 0.018908137056669765,
+      "grad_norm": 0.153338223695755,
+      "learning_rate": 5.449673790581611e-06,
+      "loss": 0.4308,
+      "step": 173
+    },
+    {
+      "epoch": 0.01901743264659271,
+      "grad_norm": 0.1578577756881714,
+      "learning_rate": 5.060297685041659e-06,
+      "loss": 0.5877,
+      "step": 174
+    },
+    {
+      "epoch": 0.019126728236515657,
+      "grad_norm": 0.1331440508365631,
+      "learning_rate": 4.684610648167503e-06,
+      "loss": 0.3637,
+      "step": 175
+    },
+    {
+      "epoch": 0.019236023826438603,
+      "grad_norm": 0.1452208012342453,
+      "learning_rate": 4.322727117869951e-06,
+      "loss": 0.4592,
+      "step": 176
+    },
+    {
+      "epoch": 0.01934531941636155,
+      "grad_norm": 0.1602085381746292,
+      "learning_rate": 3.974757327377981e-06,
+      "loss": 0.5317,
+      "step": 177
+    },
+    {
+      "epoch": 0.019454615006284495,
+      "grad_norm": 0.15811043977737427,
+      "learning_rate": 3.6408072716606346e-06,
+      "loss": 0.5555,
+      "step": 178
+    },
+    {
+      "epoch": 0.01956391059620744,
+      "grad_norm": 0.1520129144191742,
+      "learning_rate": 3.3209786751399187e-06,
+      "loss": 0.4347,
+      "step": 179
+    },
+    {
+      "epoch": 0.01967320618613039,
+      "grad_norm": 0.17405475676059723,
+      "learning_rate": 3.0153689607045845e-06,
+      "loss": 0.4352,
+      "step": 180
+    },
+    {
+      "epoch": 0.019782501776053337,
+      "grad_norm": 0.20877398550510406,
+      "learning_rate": 2.724071220034158e-06,
+      "loss": 0.5259,
+      "step": 181
+    },
+    {
+      "epoch": 0.019891797365976283,
+      "grad_norm": 0.1546728014945984,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 0.4277,
+      "step": 182
+    },
+    {
+      "epoch": 0.02000109295589923,
+      "grad_norm": 0.207755446434021,
+      "learning_rate": 2.1847622018482283e-06,
+      "loss": 0.5322,
+      "step": 183
+    },
+    {
+      "epoch": 0.020110388545822176,
+      "grad_norm": 0.17791689932346344,
+      "learning_rate": 1.9369152030840556e-06,
+      "loss": 0.3993,
+      "step": 184
+    },
+    {
+      "epoch": 0.02021968413574512,
+      "grad_norm": 0.1971198171377182,
+      "learning_rate": 1.70370868554659e-06,
+      "loss": 0.4922,
+      "step": 185
+    },
+    {
+      "epoch": 0.020328979725668068,
+      "grad_norm": 0.18837548792362213,
+      "learning_rate": 1.4852136862001764e-06,
+      "loss": 0.669,
+      "step": 186
+    },
+    {
+      "epoch": 0.020438275315591017,
+      "grad_norm": 0.21775005757808685,
+      "learning_rate": 1.2814967607382432e-06,
+      "loss": 0.5298,
+      "step": 187
+    },
+    {
+      "epoch": 0.020547570905513964,
+      "grad_norm": 0.23758465051651,
+      "learning_rate": 1.0926199633097157e-06,
+      "loss": 0.574,
+      "step": 188
+    },
+    {
+      "epoch": 0.02065686649543691,
+      "grad_norm": 0.23550760746002197,
+      "learning_rate": 9.186408276168013e-07,
+      "loss": 0.6356,
+      "step": 189
+    },
+    {
+      "epoch": 0.020766162085359856,
+      "grad_norm": 0.2456381767988205,
+      "learning_rate": 7.596123493895991e-07,
+      "loss": 0.5453,
+      "step": 190
+    },
+    {
+      "epoch": 0.020875457675282802,
+      "grad_norm": 0.2014353722333908,
+      "learning_rate": 6.15582970243117e-07,
+      "loss": 0.4603,
+      "step": 191
+    },
+    {
+      "epoch": 0.020984753265205748,
+      "grad_norm": 0.2850938141345978,
+      "learning_rate": 4.865965629214819e-07,
+      "loss": 0.6694,
+      "step": 192
+    },
+    {
+      "epoch": 0.021094048855128694,
+      "grad_norm": 0.2577400803565979,
+      "learning_rate": 3.7269241793390085e-07,
+      "loss": 0.6761,
+      "step": 193
+    },
+    {
+      "epoch": 0.021203344445051644,
+      "grad_norm": 0.2811027467250824,
+      "learning_rate": 2.7390523158633554e-07,
+      "loss": 0.621,
+      "step": 194
+    },
+    {
+      "epoch": 0.02131264003497459,
+      "grad_norm": 0.2799067199230194,
+      "learning_rate": 1.9026509541272275e-07,
+      "loss": 0.6045,
+      "step": 195
+    },
+    {
+      "epoch": 0.021421935624897536,
+      "grad_norm": 0.40083521604537964,
+      "learning_rate": 1.2179748700879012e-07,
+      "loss": 0.8314,
+      "step": 196
+    },
+    {
+      "epoch": 0.021531231214820482,
+      "grad_norm": 0.48128101229667664,
+      "learning_rate": 6.852326227130834e-08,
+      "loss": 0.5519,
+      "step": 197
+    },
+    {
+      "epoch": 0.02164052680474343,
+      "grad_norm": 0.3779641389846802,
+      "learning_rate": 3.04586490452119e-08,
+      "loss": 0.6474,
+      "step": 198
+    },
+    {
+      "epoch": 0.021749822394666374,
+      "grad_norm": 0.38349881768226624,
+      "learning_rate": 7.615242180436522e-09,
+      "loss": 0.6104,
+      "step": 199
+    },
+    {
+      "epoch": 0.02185911798458932,
+      "grad_norm": 0.6852728724479675,
+      "learning_rate": 0.0,
+      "loss": 0.8279,
+      "step": 200
+    },
+    {
+      "epoch": 0.02185911798458932,
+      "eval_loss": 0.46923279762268066,
+      "eval_runtime": 1195.4856,
+      "eval_samples_per_second": 12.89,
+      "eval_steps_per_second": 6.445,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.6780376395415552e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null