Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b49cd1df38e3b867d0f6179efc8ff5b8e2259954570bcb5ede36b73f8fc00810
 size 1163996488

 version https://git-lfs.github.com/spec/v1
+oid sha256:f6a283f0b1540c93e42d6a03f44ea1ef452e05e0dfbc8160686a69eb6c19ff7d
 size 1163996488

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d6dc122f7461c14714e3bb795e67c455d2396e3a3d5cc54b08219a3f5705f8a3
 size 1279641042

 version https://git-lfs.github.com/spec/v1
+oid sha256:90a959cfdc58518fea19a8c4e47c9dff95de0be269f46ff88e9eea3409c8cb48
 size 1279641042

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c25d7bd1393453292cb28bb1da31e93070548d594957a6c536f4670e1797a153
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:bf18fce92eb86ddf9279369334bd1b9efeb4ba7f8318b427ce4eb10b8333ca37
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dfd59dca009004df561617f8f6994512d029a952a68609cac24b36df5a0757ce
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.3055230975151062,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.15535991714137753,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 12.259,
       "eval_steps_per_second": 6.13,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.995744869351424e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.2649017870426178,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.20714655618850336,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 12.259,
       "eval_steps_per_second": 6.13,
       "step": 150
+    },
+    {
+      "epoch": 0.15639564992232005,
+      "grad_norm": 1.0121864080429077,
+      "learning_rate": 1.9136088935510362e-05,
+      "loss": 0.5259,
+      "step": 151
+    },
+    {
+      "epoch": 0.15743138270326257,
+      "grad_norm": 1.1729909181594849,
+      "learning_rate": 1.8414449687337464e-05,
+      "loss": 0.6326,
+      "step": 152
+    },
+    {
+      "epoch": 0.15846711548420508,
+      "grad_norm": 0.995249330997467,
+      "learning_rate": 1.7703596875660645e-05,
+      "loss": 0.5665,
+      "step": 153
+    },
+    {
+      "epoch": 0.1595028482651476,
+      "grad_norm": 0.9343785643577576,
+      "learning_rate": 1.700377325606388e-05,
+      "loss": 0.5047,
+      "step": 154
+    },
+    {
+      "epoch": 0.16053858104609012,
+      "grad_norm": 1.207404375076294,
+      "learning_rate": 1.631521781767214e-05,
+      "loss": 0.5292,
+      "step": 155
+    },
+    {
+      "epoch": 0.16157431382703263,
+      "grad_norm": 1.1004372835159302,
+      "learning_rate": 1.5638165701536868e-05,
+      "loss": 0.4597,
+      "step": 156
+    },
+    {
+      "epoch": 0.16261004660797515,
+      "grad_norm": 0.9158421754837036,
+      "learning_rate": 1.4972848120335453e-05,
+      "loss": 0.4519,
+      "step": 157
+    },
+    {
+      "epoch": 0.16364577938891767,
+      "grad_norm": 0.6641129851341248,
+      "learning_rate": 1.4319492279412388e-05,
+      "loss": 0.3259,
+      "step": 158
+    },
+    {
+      "epoch": 0.16468151216986018,
+      "grad_norm": 0.798582136631012,
+      "learning_rate": 1.3678321299188801e-05,
+      "loss": 0.3704,
+      "step": 159
+    },
+    {
+      "epoch": 0.1657172449508027,
+      "grad_norm": 0.7701166868209839,
+      "learning_rate": 1.3049554138967051e-05,
+      "loss": 0.336,
+      "step": 160
+    },
+    {
+      "epoch": 0.16675297773174522,
+      "grad_norm": 0.9152107238769531,
+      "learning_rate": 1.2433405522156332e-05,
+      "loss": 0.3435,
+      "step": 161
+    },
+    {
+      "epoch": 0.16778871051268773,
+      "grad_norm": 0.9507977366447449,
+      "learning_rate": 1.183008586294485e-05,
+      "loss": 0.4295,
+      "step": 162
+    },
+    {
+      "epoch": 0.16882444329363025,
+      "grad_norm": 0.6636240482330322,
+      "learning_rate": 1.1239801194443506e-05,
+      "loss": 0.2605,
+      "step": 163
+    },
+    {
+      "epoch": 0.16986017607457277,
+      "grad_norm": 0.6697791814804077,
+      "learning_rate": 1.066275309832584e-05,
+      "loss": 0.2748,
+      "step": 164
+    },
+    {
+      "epoch": 0.17089590885551528,
+      "grad_norm": 0.763862669467926,
+      "learning_rate": 1.0099138635988026e-05,
+      "loss": 0.2329,
+      "step": 165
+    },
+    {
+      "epoch": 0.1719316416364578,
+      "grad_norm": 0.5785434246063232,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 0.219,
+      "step": 166
+    },
+    {
+      "epoch": 0.17296737441740032,
+      "grad_norm": 0.7660300135612488,
+      "learning_rate": 9.012975854638949e-06,
+      "loss": 0.278,
+      "step": 167
+    },
+    {
+      "epoch": 0.17400310719834283,
+      "grad_norm": 0.701310932636261,
+      "learning_rate": 8.490798459222476e-06,
+      "loss": 0.2762,
+      "step": 168
+    },
+    {
+      "epoch": 0.17503883997928535,
+      "grad_norm": 0.7874704003334045,
+      "learning_rate": 7.982796418105371e-06,
+      "loss": 0.3061,
+      "step": 169
+    },
+    {
+      "epoch": 0.17607457276022787,
+      "grad_norm": 0.586209237575531,
+      "learning_rate": 7.489143213519301e-06,
+      "loss": 0.2317,
+      "step": 170
+    },
+    {
+      "epoch": 0.17711030554117038,
+      "grad_norm": 0.8026313781738281,
+      "learning_rate": 7.010007427581378e-06,
+      "loss": 0.1928,
+      "step": 171
+    },
+    {
+      "epoch": 0.1781460383221129,
+      "grad_norm": 0.6386998295783997,
+      "learning_rate": 6.5455526847235825e-06,
+      "loss": 0.2347,
+      "step": 172
+    },
+    {
+      "epoch": 0.17918177110305541,
+      "grad_norm": 0.8125709295272827,
+      "learning_rate": 6.0959375958151045e-06,
+      "loss": 0.2274,
+      "step": 173
+    },
+    {
+      "epoch": 0.18021750388399793,
+      "grad_norm": 0.8692264556884766,
+      "learning_rate": 5.6613157039969055e-06,
+      "loss": 0.4774,
+      "step": 174
+    },
+    {
+      "epoch": 0.18125323666494045,
+      "grad_norm": 0.7680791616439819,
+      "learning_rate": 5.241835432246889e-06,
+      "loss": 0.2766,
+      "step": 175
+    },
+    {
+      "epoch": 0.18228896944588296,
+      "grad_norm": 0.8680493831634521,
+      "learning_rate": 4.837640032693558e-06,
+      "loss": 0.2881,
+      "step": 176
+    },
+    {
+      "epoch": 0.18332470222682548,
+      "grad_norm": 0.5676426291465759,
+      "learning_rate": 4.448867537695578e-06,
+      "loss": 0.2081,
+      "step": 177
+    },
+    {
+      "epoch": 0.184360435007768,
+      "grad_norm": 0.5735141038894653,
+      "learning_rate": 4.075650712703849e-06,
+      "loss": 0.222,
+      "step": 178
+    },
+    {
+      "epoch": 0.18539616778871051,
+      "grad_norm": 0.5580146908760071,
+      "learning_rate": 3.71811701092219e-06,
+      "loss": 0.1749,
+      "step": 179
+    },
+    {
+      "epoch": 0.18643190056965303,
+      "grad_norm": 0.5910290479660034,
+      "learning_rate": 3.376388529782215e-06,
+      "loss": 0.2142,
+      "step": 180
+    },
+    {
+      "epoch": 0.18746763335059555,
+      "grad_norm": 0.8039419651031494,
+      "learning_rate": 3.0505819692471792e-06,
+      "loss": 0.2259,
+      "step": 181
+    },
+    {
+      "epoch": 0.18850336613153806,
+      "grad_norm": 0.6810411214828491,
+      "learning_rate": 2.7408085919590264e-06,
+      "loss": 0.1636,
+      "step": 182
+    },
+    {
+      "epoch": 0.18953909891248058,
+      "grad_norm": 0.595245361328125,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 0.1396,
+      "step": 183
+    },
+    {
+      "epoch": 0.1905748316934231,
+      "grad_norm": 0.9223597645759583,
+      "learning_rate": 2.1697790249779636e-06,
+      "loss": 0.287,
+      "step": 184
+    },
+    {
+      "epoch": 0.1916105644743656,
+      "grad_norm": 0.7162910103797913,
+      "learning_rate": 1.908717841359048e-06,
+      "loss": 0.1353,
+      "step": 185
+    },
+    {
+      "epoch": 0.19264629725530813,
+      "grad_norm": 0.9694966077804565,
+      "learning_rate": 1.6640797865406288e-06,
+      "loss": 0.2145,
+      "step": 186
+    },
+    {
+      "epoch": 0.19368203003625065,
+      "grad_norm": 0.644223153591156,
+      "learning_rate": 1.4359484041943038e-06,
+      "loss": 0.1416,
+      "step": 187
+    },
+    {
+      "epoch": 0.19471776281719316,
+      "grad_norm": 1.156400442123413,
+      "learning_rate": 1.2244016009781701e-06,
+      "loss": 0.265,
+      "step": 188
+    },
+    {
+      "epoch": 0.19575349559813568,
+      "grad_norm": 0.7973266839981079,
+      "learning_rate": 1.0295116199317057e-06,
+      "loss": 0.2066,
+      "step": 189
+    },
+    {
+      "epoch": 0.1967892283790782,
+      "grad_norm": 0.8829982876777649,
+      "learning_rate": 8.513450158049108e-07,
+      "loss": 0.2602,
+      "step": 190
+    },
+    {
+      "epoch": 0.1978249611600207,
+      "grad_norm": 0.8796784281730652,
+      "learning_rate": 6.899626323298713e-07,
+      "loss": 0.3425,
+      "step": 191
+    },
+    {
+      "epoch": 0.19886069394096323,
+      "grad_norm": 0.6855463981628418,
+      "learning_rate": 5.454195814427021e-07,
+      "loss": 0.127,
+      "step": 192
+    },
+    {
+      "epoch": 0.19989642672190575,
+      "grad_norm": 0.5383428335189819,
+      "learning_rate": 4.177652244628627e-07,
+      "loss": 0.1866,
+      "step": 193
+    },
+    {
+      "epoch": 0.20093215950284826,
+      "grad_norm": 0.45419228076934814,
+      "learning_rate": 3.0704315523631953e-07,
+      "loss": 0.0759,
+      "step": 194
+    },
+    {
+      "epoch": 0.20196789228379078,
+      "grad_norm": 0.7053388357162476,
+      "learning_rate": 2.1329118524827662e-07,
+      "loss": 0.1603,
+      "step": 195
+    },
+    {
+      "epoch": 0.2030036250647333,
+      "grad_norm": 0.5925009846687317,
+      "learning_rate": 1.3654133071059893e-07,
+      "loss": 0.1047,
+      "step": 196
+    },
+    {
+      "epoch": 0.2040393578456758,
+      "grad_norm": 1.1599555015563965,
+      "learning_rate": 7.681980162830282e-08,
+      "loss": 0.4283,
+      "step": 197
+    },
+    {
+      "epoch": 0.20507509062661833,
+      "grad_norm": 1.152112603187561,
+      "learning_rate": 3.4146992848854695e-08,
+      "loss": 0.3806,
+      "step": 198
+    },
+    {
+      "epoch": 0.20611082340756084,
+      "grad_norm": 1.0169992446899414,
+      "learning_rate": 8.537477097364522e-09,
+      "loss": 0.1831,
+      "step": 199
+    },
+    {
+      "epoch": 0.20714655618850336,
+      "grad_norm": 1.3161717653274536,
+      "learning_rate": 0.0,
+      "loss": 0.6848,
+      "step": 200
+    },
+    {
+      "epoch": 0.20714655618850336,
+      "eval_loss": 0.2649017870426178,
+      "eval_runtime": 133.2348,
+      "eval_samples_per_second": 12.204,
+      "eval_steps_per_second": 6.102,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.660993159135232e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null