Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8674c4857d86cff24cba072c2b541b830bc871f68c3dfa393082fecd1816b215
 size 671149168

 version https://git-lfs.github.com/spec/v1
+oid sha256:0e2ef02ce27c8aad4dfa5798e2a99f1f0911ceb6149328fbf5c9f4c21434b4df
 size 671149168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:43e9cb9ba608f95f54ae254d2f2110070fd2b10f727b39af981b94ccab5dc3d4
 size 1342555602

 version https://git-lfs.github.com/spec/v1
+oid sha256:f032622ff8918ccf32030b132b9e73354f023c481c9ec089372bf85f5f84a12b
 size 1342555602

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9f561c8f873ea0b5dfb5639d1c4834359f395f862f04c10d1f68400083b0bed5
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:016b0a04bcf8b8804250e0f1db4d0cba5dfab4d4f62a8110c21ffa6c4bc4b65a
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f4c9c807f0681c8b7e53ada9b6ec3dba530d303de7da0d0a0562a3d8d0bbba08
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.3925303518772125,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.09468202619536058,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 11.092,
       "eval_steps_per_second": 5.546,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.3933441700318413e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.38890671730041504,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.1262427015938141,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 11.092,
       "eval_steps_per_second": 5.546,
       "step": 150
+    },
+    {
+      "epoch": 0.09531323970332965,
+      "grad_norm": 0.9829754829406738,
+      "learning_rate": 1.7197048550474643e-05,
+      "loss": 0.9275,
+      "step": 151
+    },
+    {
+      "epoch": 0.09594445321129873,
+      "grad_norm": 1.0895384550094604,
+      "learning_rate": 1.6543469682057106e-05,
+      "loss": 1.1512,
+      "step": 152
+    },
+    {
+      "epoch": 0.09657566671926779,
+      "grad_norm": 1.1453516483306885,
+      "learning_rate": 1.5900081996875083e-05,
+      "loss": 1.2615,
+      "step": 153
+    },
+    {
+      "epoch": 0.09720688022723686,
+      "grad_norm": 1.2454159259796143,
+      "learning_rate": 1.526708147705013e-05,
+      "loss": 1.4501,
+      "step": 154
+    },
+    {
+      "epoch": 0.09783809373520594,
+      "grad_norm": 1.273112177848816,
+      "learning_rate": 1.4644660940672627e-05,
+      "loss": 1.3519,
+      "step": 155
+    },
+    {
+      "epoch": 0.098469307243175,
+      "grad_norm": 1.3017650842666626,
+      "learning_rate": 1.4033009983067452e-05,
+      "loss": 1.4724,
+      "step": 156
+    },
+    {
+      "epoch": 0.09910052075114407,
+      "grad_norm": 1.2280182838439941,
+      "learning_rate": 1.3432314919041478e-05,
+      "loss": 1.2936,
+      "step": 157
+    },
+    {
+      "epoch": 0.09973173425911315,
+      "grad_norm": 1.365878701210022,
+      "learning_rate": 1.2842758726130283e-05,
+      "loss": 1.4915,
+      "step": 158
+    },
+    {
+      "epoch": 0.10036294776708221,
+      "grad_norm": 1.3801015615463257,
+      "learning_rate": 1.22645209888614e-05,
+      "loss": 1.4224,
+      "step": 159
+    },
+    {
+      "epoch": 0.10099416127505129,
+      "grad_norm": 1.328890085220337,
+      "learning_rate": 1.1697777844051105e-05,
+      "loss": 1.3808,
+      "step": 160
+    },
+    {
+      "epoch": 0.10162537478302036,
+      "grad_norm": 1.4287265539169312,
+      "learning_rate": 1.1142701927151456e-05,
+      "loss": 1.6477,
+      "step": 161
+    },
+    {
+      "epoch": 0.10225658829098942,
+      "grad_norm": 1.3774094581604004,
+      "learning_rate": 1.0599462319663905e-05,
+      "loss": 1.5419,
+      "step": 162
+    },
+    {
+      "epoch": 0.1028878017989585,
+      "grad_norm": 1.4486933946609497,
+      "learning_rate": 1.006822449763537e-05,
+      "loss": 1.5195,
+      "step": 163
+    },
+    {
+      "epoch": 0.10351901530692757,
+      "grad_norm": 1.4375733137130737,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 1.7109,
+      "step": 164
+    },
+    {
+      "epoch": 0.10415022881489663,
+      "grad_norm": 1.4317312240600586,
+      "learning_rate": 9.042397785550405e-06,
+      "loss": 1.4966,
+      "step": 165
+    },
+    {
+      "epoch": 0.10478144232286571,
+      "grad_norm": 1.5089269876480103,
+      "learning_rate": 8.548121372247918e-06,
+      "loss": 1.5649,
+      "step": 166
+    },
+    {
+      "epoch": 0.10541265583083478,
+      "grad_norm": 1.5436687469482422,
+      "learning_rate": 8.066471602728803e-06,
+      "loss": 1.6303,
+      "step": 167
+    },
+    {
+      "epoch": 0.10604386933880385,
+      "grad_norm": 1.5016186237335205,
+      "learning_rate": 7.597595192178702e-06,
+      "loss": 1.6437,
+      "step": 168
+    },
+    {
+      "epoch": 0.10667508284677292,
+      "grad_norm": 1.5110390186309814,
+      "learning_rate": 7.1416349648943894e-06,
+      "loss": 1.5768,
+      "step": 169
+    },
+    {
+      "epoch": 0.107306296354742,
+      "grad_norm": 1.5523266792297363,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 1.7035,
+      "step": 170
+    },
+    {
+      "epoch": 0.10793750986271106,
+      "grad_norm": 1.5230318307876587,
+      "learning_rate": 6.269014643030213e-06,
+      "loss": 1.5579,
+      "step": 171
+    },
+    {
+      "epoch": 0.10856872337068013,
+      "grad_norm": 1.508470892906189,
+      "learning_rate": 5.852620357053651e-06,
+      "loss": 1.5116,
+      "step": 172
+    },
+    {
+      "epoch": 0.1091999368786492,
+      "grad_norm": 1.504398226737976,
+      "learning_rate": 5.449673790581611e-06,
+      "loss": 1.4858,
+      "step": 173
+    },
+    {
+      "epoch": 0.10983115038661827,
+      "grad_norm": 1.4883153438568115,
+      "learning_rate": 5.060297685041659e-06,
+      "loss": 1.7057,
+      "step": 174
+    },
+    {
+      "epoch": 0.11046236389458734,
+      "grad_norm": 1.4853483438491821,
+      "learning_rate": 4.684610648167503e-06,
+      "loss": 1.5148,
+      "step": 175
+    },
+    {
+      "epoch": 0.11109357740255642,
+      "grad_norm": 1.553637981414795,
+      "learning_rate": 4.322727117869951e-06,
+      "loss": 1.6167,
+      "step": 176
+    },
+    {
+      "epoch": 0.11172479091052548,
+      "grad_norm": 1.578129529953003,
+      "learning_rate": 3.974757327377981e-06,
+      "loss": 1.5491,
+      "step": 177
+    },
+    {
+      "epoch": 0.11235600441849455,
+      "grad_norm": 1.5624874830245972,
+      "learning_rate": 3.6408072716606346e-06,
+      "loss": 1.6006,
+      "step": 178
+    },
+    {
+      "epoch": 0.11298721792646363,
+      "grad_norm": 1.5715187788009644,
+      "learning_rate": 3.3209786751399187e-06,
+      "loss": 1.6976,
+      "step": 179
+    },
+    {
+      "epoch": 0.11361843143443269,
+      "grad_norm": 1.6272014379501343,
+      "learning_rate": 3.0153689607045845e-06,
+      "loss": 1.6757,
+      "step": 180
+    },
+    {
+      "epoch": 0.11424964494240177,
+      "grad_norm": 1.546974539756775,
+      "learning_rate": 2.724071220034158e-06,
+      "loss": 1.5974,
+      "step": 181
+    },
+    {
+      "epoch": 0.11488085845037084,
+      "grad_norm": 1.6230578422546387,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 1.6203,
+      "step": 182
+    },
+    {
+      "epoch": 0.11551207195833992,
+      "grad_norm": 1.5810596942901611,
+      "learning_rate": 2.1847622018482283e-06,
+      "loss": 1.5336,
+      "step": 183
+    },
+    {
+      "epoch": 0.11614328546630898,
+      "grad_norm": 1.7239481210708618,
+      "learning_rate": 1.9369152030840556e-06,
+      "loss": 1.676,
+      "step": 184
+    },
+    {
+      "epoch": 0.11677449897427805,
+      "grad_norm": 1.6532574892044067,
+      "learning_rate": 1.70370868554659e-06,
+      "loss": 1.8418,
+      "step": 185
+    },
+    {
+      "epoch": 0.11740571248224713,
+      "grad_norm": 1.6952757835388184,
+      "learning_rate": 1.4852136862001764e-06,
+      "loss": 1.5633,
+      "step": 186
+    },
+    {
+      "epoch": 0.11803692599021619,
+      "grad_norm": 1.7153465747833252,
+      "learning_rate": 1.2814967607382432e-06,
+      "loss": 1.6121,
+      "step": 187
+    },
+    {
+      "epoch": 0.11866813949818526,
+      "grad_norm": 1.8238195180892944,
+      "learning_rate": 1.0926199633097157e-06,
+      "loss": 1.8494,
+      "step": 188
+    },
+    {
+      "epoch": 0.11929935300615434,
+      "grad_norm": 1.7845555543899536,
+      "learning_rate": 9.186408276168013e-07,
+      "loss": 1.6834,
+      "step": 189
+    },
+    {
+      "epoch": 0.1199305665141234,
+      "grad_norm": 1.8193535804748535,
+      "learning_rate": 7.596123493895991e-07,
+      "loss": 1.624,
+      "step": 190
+    },
+    {
+      "epoch": 0.12056178002209247,
+      "grad_norm": 1.7187602519989014,
+      "learning_rate": 6.15582970243117e-07,
+      "loss": 1.6485,
+      "step": 191
+    },
+    {
+      "epoch": 0.12119299353006155,
+      "grad_norm": 1.9166899919509888,
+      "learning_rate": 4.865965629214819e-07,
+      "loss": 1.8563,
+      "step": 192
+    },
+    {
+      "epoch": 0.12182420703803061,
+      "grad_norm": 1.856611728668213,
+      "learning_rate": 3.7269241793390085e-07,
+      "loss": 1.7069,
+      "step": 193
+    },
+    {
+      "epoch": 0.12245542054599969,
+      "grad_norm": 1.9729714393615723,
+      "learning_rate": 2.7390523158633554e-07,
+      "loss": 1.6582,
+      "step": 194
+    },
+    {
+      "epoch": 0.12308663405396876,
+      "grad_norm": 1.8488212823867798,
+      "learning_rate": 1.9026509541272275e-07,
+      "loss": 1.5962,
+      "step": 195
+    },
+    {
+      "epoch": 0.12371784756193782,
+      "grad_norm": 1.8829165697097778,
+      "learning_rate": 1.2179748700879012e-07,
+      "loss": 1.6283,
+      "step": 196
+    },
+    {
+      "epoch": 0.1243490610699069,
+      "grad_norm": 2.0195155143737793,
+      "learning_rate": 6.852326227130834e-08,
+      "loss": 1.5913,
+      "step": 197
+    },
+    {
+      "epoch": 0.12498027457787597,
+      "grad_norm": 1.976454257965088,
+      "learning_rate": 3.04586490452119e-08,
+      "loss": 1.4226,
+      "step": 198
+    },
+    {
+      "epoch": 0.12561148808584505,
+      "grad_norm": 2.081446647644043,
+      "learning_rate": 7.615242180436522e-09,
+      "loss": 1.3406,
+      "step": 199
+    },
+    {
+      "epoch": 0.1262427015938141,
+      "grad_norm": 2.4766805171966553,
+      "learning_rate": 0.0,
+      "loss": 1.5761,
+      "step": 200
+    },
+    {
+      "epoch": 0.1262427015938141,
+      "eval_loss": 0.38890671730041504,
+      "eval_runtime": 240.7623,
+      "eval_samples_per_second": 11.081,
+      "eval_steps_per_second": 5.541,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.194703055064924e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null