Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:37db7fb44fe7409c8eeefe06c23732eb002927f018a93092853cd15f1bf50182
 size 406863720

 version https://git-lfs.github.com/spec/v1
+oid sha256:002b780f62a4342ac1ad0cf92ce4daf67769652b04ad87317ad062c8ddf4a42f
 size 406863720

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2f462112c36676934b0a91a72464dc3eda52da25ca78933c1b50295d8d32a3fc
 size 207013892

 version https://git-lfs.github.com/spec/v1
+oid sha256:45f4f3bf8fb0a6b2b0655dbaca18040e9a24aeed4945e5182f00ee55fd587d37
 size 207013892

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f747703cecfbb297e364bf7c251aa9acd87eb073506e79e94981550e55e2e690
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:dca678f2ecd7ddaaab26580d4447f0dc12e813fa00845e5281af84463ccfe67a
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cc4a786186a574bdc543ff4b4563aab7c5e0b442c74c85899bb42a25553c5d0c
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:ca62d85cf5423834480d3c20680f93b185c8e6574a8a14021d285e0b05c7449f
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.7413969039916992,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.7255139056831923,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 18.962,
       "eval_steps_per_second": 4.74,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.1904735004065792e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.7320468425750732,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.9673518742442564,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 18.962,
       "eval_steps_per_second": 4.74,
       "step": 150
+    },
+    {
+      "epoch": 0.7303506650544136,
+      "grad_norm": 0.46916308999061584,
+      "learning_rate": 9.319397726443026e-06,
+      "loss": 2.028,
+      "step": 151
+    },
+    {
+      "epoch": 0.7351874244256348,
+      "grad_norm": 0.43807369470596313,
+      "learning_rate": 8.962896471825342e-06,
+      "loss": 1.7917,
+      "step": 152
+    },
+    {
+      "epoch": 0.7400241837968561,
+      "grad_norm": 0.530551016330719,
+      "learning_rate": 8.61214655125809e-06,
+      "loss": 1.8804,
+      "step": 153
+    },
+    {
+      "epoch": 0.7448609431680774,
+      "grad_norm": 0.5489689707756042,
+      "learning_rate": 8.267243856267331e-06,
+      "loss": 1.8999,
+      "step": 154
+    },
+    {
+      "epoch": 0.7496977025392987,
+      "grad_norm": 0.47269207239151,
+      "learning_rate": 7.928282679806052e-06,
+      "loss": 1.6923,
+      "step": 155
+    },
+    {
+      "epoch": 0.75453446191052,
+      "grad_norm": 0.5164920687675476,
+      "learning_rate": 7.595355690475393e-06,
+      "loss": 1.7004,
+      "step": 156
+    },
+    {
+      "epoch": 0.7593712212817413,
+      "grad_norm": 0.4851786494255066,
+      "learning_rate": 7.268553907189964e-06,
+      "loss": 1.6288,
+      "step": 157
+    },
+    {
+      "epoch": 0.7642079806529625,
+      "grad_norm": 0.5359386801719666,
+      "learning_rate": 6.947966674294236e-06,
+      "loss": 1.6777,
+      "step": 158
+    },
+    {
+      "epoch": 0.7690447400241838,
+      "grad_norm": 0.5173937678337097,
+      "learning_rate": 6.6336816371366305e-06,
+      "loss": 1.6535,
+      "step": 159
+    },
+    {
+      "epoch": 0.7738814993954051,
+      "grad_norm": 0.4466940462589264,
+      "learning_rate": 6.325784718108196e-06,
+      "loss": 1.8034,
+      "step": 160
+    },
+    {
+      "epoch": 0.7787182587666264,
+      "grad_norm": 0.4748440086841583,
+      "learning_rate": 6.0243600931522595e-06,
+      "loss": 1.7022,
+      "step": 161
+    },
+    {
+      "epoch": 0.7835550181378477,
+      "grad_norm": 0.49979591369628906,
+      "learning_rate": 5.72949016875158e-06,
+      "loss": 1.6983,
+      "step": 162
+    },
+    {
+      "epoch": 0.788391777509069,
+      "grad_norm": 0.47810453176498413,
+      "learning_rate": 5.44125555939923e-06,
+      "loss": 1.7931,
+      "step": 163
+    },
+    {
+      "epoch": 0.7932285368802902,
+      "grad_norm": 0.444297194480896,
+      "learning_rate": 5.159735065559399e-06,
+      "loss": 1.785,
+      "step": 164
+    },
+    {
+      "epoch": 0.7980652962515115,
+      "grad_norm": 0.4740820825099945,
+      "learning_rate": 4.885005652124144e-06,
+      "loss": 1.7403,
+      "step": 165
+    },
+    {
+      "epoch": 0.8029020556227328,
+      "grad_norm": 0.4951646625995636,
+      "learning_rate": 4.617142427371934e-06,
+      "loss": 1.7834,
+      "step": 166
+    },
+    {
+      "epoch": 0.8077388149939541,
+      "grad_norm": 0.49473491311073303,
+      "learning_rate": 4.3562186224338265e-06,
+      "loss": 1.7891,
+      "step": 167
+    },
+    {
+      "epoch": 0.8125755743651754,
+      "grad_norm": 0.5567054152488708,
+      "learning_rate": 4.102305571272783e-06,
+      "loss": 1.742,
+      "step": 168
+    },
+    {
+      "epoch": 0.8174123337363967,
+      "grad_norm": 0.531502366065979,
+      "learning_rate": 3.855472691181678e-06,
+      "loss": 1.7225,
+      "step": 169
+    },
+    {
+      "epoch": 0.8222490931076178,
+      "grad_norm": 0.4598694443702698,
+      "learning_rate": 3.615787463805331e-06,
+      "loss": 1.6636,
+      "step": 170
+    },
+    {
+      "epoch": 0.8270858524788391,
+      "grad_norm": 0.46727102994918823,
+      "learning_rate": 3.383315416691646e-06,
+      "loss": 1.6728,
+      "step": 171
+    },
+    {
+      "epoch": 0.8319226118500604,
+      "grad_norm": 0.4816998541355133,
+      "learning_rate": 3.158120105377096e-06,
+      "loss": 1.7808,
+      "step": 172
+    },
+    {
+      "epoch": 0.8367593712212817,
+      "grad_norm": 0.4520528316497803,
+      "learning_rate": 2.940263096011233e-06,
+      "loss": 1.573,
+      "step": 173
+    },
+    {
+      "epoch": 0.841596130592503,
+      "grad_norm": 0.4842596650123596,
+      "learning_rate": 2.729803948525125e-06,
+      "loss": 1.838,
+      "step": 174
+    },
+    {
+      "epoch": 0.8464328899637243,
+      "grad_norm": 0.5535080432891846,
+      "learning_rate": 2.526800200348275e-06,
+      "loss": 1.7461,
+      "step": 175
+    },
+    {
+      "epoch": 0.8512696493349455,
+      "grad_norm": 0.5143581032752991,
+      "learning_rate": 2.3313073506784575e-06,
+      "loss": 1.6,
+      "step": 176
+    },
+    {
+      "epoch": 0.8561064087061668,
+      "grad_norm": 0.5159276127815247,
+      "learning_rate": 2.143378845308791e-06,
+      "loss": 1.6323,
+      "step": 177
+    },
+    {
+      "epoch": 0.8609431680773881,
+      "grad_norm": 0.4979780316352844,
+      "learning_rate": 1.9630660620161777e-06,
+      "loss": 1.779,
+      "step": 178
+    },
+    {
+      "epoch": 0.8657799274486094,
+      "grad_norm": 0.5675414800643921,
+      "learning_rate": 1.790418296515165e-06,
+      "loss": 1.7153,
+      "step": 179
+    },
+    {
+      "epoch": 0.8706166868198307,
+      "grad_norm": 0.4874797761440277,
+      "learning_rate": 1.625482748980961e-06,
+      "loss": 1.7539,
+      "step": 180
+    },
+    {
+      "epoch": 0.875453446191052,
+      "grad_norm": 0.5073657631874084,
+      "learning_rate": 1.4683045111453942e-06,
+      "loss": 1.6124,
+      "step": 181
+    },
+    {
+      "epoch": 0.8802902055622733,
+      "grad_norm": 0.4889651834964752,
+      "learning_rate": 1.3189265539692707e-06,
+      "loss": 1.6506,
+      "step": 182
+    },
+    {
+      "epoch": 0.8851269649334945,
+      "grad_norm": 0.6806184649467468,
+      "learning_rate": 1.1773897158945557e-06,
+      "loss": 1.6727,
+      "step": 183
+    },
+    {
+      "epoch": 0.8899637243047158,
+      "grad_norm": 0.5414454936981201,
+      "learning_rate": 1.0437326916795432e-06,
+      "loss": 1.7342,
+      "step": 184
+    },
+    {
+      "epoch": 0.8948004836759371,
+      "grad_norm": 0.42585650086402893,
+      "learning_rate": 9.179920218200888e-07,
+      "loss": 1.6879,
+      "step": 185
+    },
+    {
+      "epoch": 0.8996372430471584,
+      "grad_norm": 0.45129069685935974,
+      "learning_rate": 8.002020825598277e-07,
+      "loss": 1.6605,
+      "step": 186
+    },
+    {
+      "epoch": 0.9044740024183797,
+      "grad_norm": 0.6295913457870483,
+      "learning_rate": 6.90395076492022e-07,
+      "loss": 1.7574,
+      "step": 187
+    },
+    {
+      "epoch": 0.909310761789601,
+      "grad_norm": 0.4469881057739258,
+      "learning_rate": 5.886010237557194e-07,
+      "loss": 1.5844,
+      "step": 188
+    },
+    {
+      "epoch": 0.9141475211608222,
+      "grad_norm": 0.5988472700119019,
+      "learning_rate": 4.94847753828529e-07,
+      "loss": 1.9071,
+      "step": 189
+    },
+    {
+      "epoch": 0.9189842805320435,
+      "grad_norm": 0.4462520182132721,
+      "learning_rate": 4.091608979183303e-07,
+      "loss": 1.7049,
+      "step": 190
+    },
+    {
+      "epoch": 0.9238210399032648,
+      "grad_norm": 0.5181970596313477,
+      "learning_rate": 3.315638819559452e-07,
+      "loss": 1.8397,
+      "step": 191
+    },
+    {
+      "epoch": 0.9286577992744861,
+      "grad_norm": 0.5435971617698669,
+      "learning_rate": 2.6207792019074414e-07,
+      "loss": 1.6886,
+      "step": 192
+    },
+    {
+      "epoch": 0.9334945586457074,
+      "grad_norm": 0.5636361241340637,
+      "learning_rate": 2.0072200939085573e-07,
+      "loss": 1.6915,
+      "step": 193
+    },
+    {
+      "epoch": 0.9383313180169287,
+      "grad_norm": 0.5563094615936279,
+      "learning_rate": 1.475129236496575e-07,
+      "loss": 1.7518,
+      "step": 194
+    },
+    {
+      "epoch": 0.9431680773881499,
+      "grad_norm": 0.5630325078964233,
+      "learning_rate": 1.0246520979990459e-07,
+      "loss": 1.8014,
+      "step": 195
+    },
+    {
+      "epoch": 0.9480048367593712,
+      "grad_norm": 0.6778345108032227,
+      "learning_rate": 6.559118343676396e-08,
+      "loss": 1.6761,
+      "step": 196
+    },
+    {
+      "epoch": 0.9528415961305925,
+      "grad_norm": 0.8598151803016663,
+      "learning_rate": 3.690092555085789e-08,
+      "loss": 1.7755,
+      "step": 197
+    },
+    {
+      "epoch": 0.9576783555018138,
+      "grad_norm": 0.8544036149978638,
+      "learning_rate": 1.640227977221853e-08,
+      "loss": 1.7744,
+      "step": 198
+    },
+    {
+      "epoch": 0.9625151148730351,
+      "grad_norm": 0.988280177116394,
+      "learning_rate": 4.1008502259298755e-09,
+      "loss": 1.751,
+      "step": 199
+    },
+    {
+      "epoch": 0.9673518742442564,
+      "grad_norm": 1.538628101348877,
+      "learning_rate": 0.0,
+      "loss": 1.4039,
+      "step": 200
+    },
+    {
+      "epoch": 0.9673518742442564,
+      "eval_loss": 1.7320468425750732,
+      "eval_runtime": 18.3522,
+      "eval_samples_per_second": 18.962,
+      "eval_steps_per_second": 4.741,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.582807765745664e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null