Training in progress, step 200, checkpoint

Browse files

Files changed (12) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/rng_state_4.pth +1 -1
last-checkpoint/rng_state_5.pth +1 -1
last-checkpoint/rng_state_6.pth +1 -1
last-checkpoint/rng_state_7.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +363 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e1bd6861dfa7a9274e35db9e22ce8f38c5d23b17fc0722202f544838de089c41
 size 1006723888

 version https://git-lfs.github.com/spec/v1
+oid sha256:54173519d58de2beb15332dd1be794d5aeebd25ee65f8c14a88b9251ff789112
 size 1006723888

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7a6758dbad5f1cc62b17452a2873d66e17470a3c0a84336e30268d5f9e1b2a1c
 size 511971028

 version https://git-lfs.github.com/spec/v1
+oid sha256:4f8c27b5ac41b291baea82df4c9b5fb0a41f04a69418e43fa1f37c46fd58cef9
 size 511971028

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:df0a398107cc46e45baaaf079c37320a4504759a4cbb349ef9352e2f25b03da3
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:cf2475423e66ca940efb5cc9f71fdf4aed6aacc7a2f1eaf9982e9ceaffc95d12
 size 15984

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:42a19f0f149a94ec473aaf8b8279b314cdd8fdcc5b751c18bdae1be9b68ff289
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:3c44010f78c69f1244b63af4f0cd4bfb4df115765a78efb827730fb8bfcfb71f
 size 15984

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8a4067d755cb39c67dda7e58a385cbbc53df92063e20028e18d4660848d13b87
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:96e8f809143bf9edaa314ab657a4c4718d6d56c835a4dcb53b05213314239eda
 size 15984

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:645f64d06d50896146bad4299bb49ca6991edfcc1c9c714371a2893474238e19
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:06d806200c0be42ec6e959b819c37fb297cc4b35538f0ad784bef82d91b7db49
 size 15984

last-checkpoint/rng_state_4.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:14ee8cce6fe8f0a641f142c16c054ad113ebf414938c00ba5b29867c4fde5a47
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:9eeae16b2b296c8f442659dd4d3ec07a538c57c0a29d76dbc93046a2ab399ac8
 size 15984

last-checkpoint/rng_state_5.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bfad9d39c2b42eb35d6e6f54b8777fea1ec8ac48ee62be0e82d70fa233ad72bb
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:5b8734871cbc1136f44d7b4115060c04a30cd16289899d67d209efea2fb70033
 size 15984

last-checkpoint/rng_state_6.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:26a826f795b46bf62a90ed1ce559fa9f06ec837877a6162fa678e9751d4245c0
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:ba279d673ef228a1260e113246c095b2878a96232ced2d6a91573cd13ae8d3d8
 size 15984

last-checkpoint/rng_state_7.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5fd611129d61846756bec14d4569986c754a91295b6a181f503f0e6ba3d47dfa
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:1302b7f2974f1ba833885aa860a173f611b1622bf61e515c6f6fb83d32e3f9bc
 size 15984

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2b0835001899346d545dd7ab354c70da2bff994a80b8a6b07d311a886ba1cc68
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:f520c95bc9eafa23f7fa75fff2cbc49519024ba677b8729ada29693bec3da0ff
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.654043972492218,
   "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 1.1516314779270633,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 63.307,
       "eval_steps_per_second": 1.984,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1103,7 +1461,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.2708267350415114e+18,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.654043972492218,
   "best_model_checkpoint": "miner_id_24/checkpoint-150",
+  "epoch": 1.5355086372360844,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 63.307,
       "eval_steps_per_second": 1.984,
       "step": 150
+    },
+    {
+      "epoch": 1.1593090211132437,
+      "grad_norm": 2.45980167388916,
+      "learning_rate": 1.770685568024175e-05,
+      "loss": 2.4155,
+      "step": 151
+    },
+    {
+      "epoch": 1.1669865642994242,
+      "grad_norm": 1.3351049423217773,
+      "learning_rate": 1.702950329646815e-05,
+      "loss": 1.6516,
+      "step": 152
+    },
+    {
+      "epoch": 1.1746641074856046,
+      "grad_norm": 1.439468502998352,
+      "learning_rate": 1.636307844739037e-05,
+      "loss": 1.4729,
+      "step": 153
+    },
+    {
+      "epoch": 1.182341650671785,
+      "grad_norm": 1.5308640003204346,
+      "learning_rate": 1.570776332690793e-05,
+      "loss": 1.9692,
+      "step": 154
+    },
+    {
+      "epoch": 1.1900191938579654,
+      "grad_norm": 1.4437023401260376,
+      "learning_rate": 1.50637370916315e-05,
+      "loss": 2.5024,
+      "step": 155
+    },
+    {
+      "epoch": 1.1976967370441458,
+      "grad_norm": 1.6083168983459473,
+      "learning_rate": 1.4431175811903247e-05,
+      "loss": 2.1522,
+      "step": 156
+    },
+    {
+      "epoch": 1.2053742802303262,
+      "grad_norm": 2.654719114303589,
+      "learning_rate": 1.3810252423660933e-05,
+      "loss": 2.3272,
+      "step": 157
+    },
+    {
+      "epoch": 1.2130518234165066,
+      "grad_norm": 1.3192472457885742,
+      "learning_rate": 1.320113668115905e-05,
+      "loss": 1.7521,
+      "step": 158
+    },
+    {
+      "epoch": 1.220729366602687,
+      "grad_norm": 1.4346808195114136,
+      "learning_rate": 1.2603995110559598e-05,
+      "loss": 1.5204,
+      "step": 159
+    },
+    {
+      "epoch": 1.2284069097888675,
+      "grad_norm": 1.4627543687820435,
+      "learning_rate": 1.2018990964405573e-05,
+      "loss": 1.6693,
+      "step": 160
+    },
+    {
+      "epoch": 1.236084452975048,
+      "grad_norm": 1.3848214149475098,
+      "learning_rate": 1.1446284176989293e-05,
+      "loss": 2.3089,
+      "step": 161
+    },
+    {
+      "epoch": 1.2437619961612283,
+      "grad_norm": 2.183250904083252,
+      "learning_rate": 1.0886031320628002e-05,
+      "loss": 2.3767,
+      "step": 162
+    },
+    {
+      "epoch": 1.251439539347409,
+      "grad_norm": 1.6018089056015015,
+      "learning_rate": 1.0338385562858536e-05,
+      "loss": 2.4607,
+      "step": 163
+    },
+    {
+      "epoch": 1.2591170825335891,
+      "grad_norm": 2.037428379058838,
+      "learning_rate": 9.803496624562858e-06,
+      "loss": 2.0091,
+      "step": 164
+    },
+    {
+      "epoch": 1.2667946257197698,
+      "grad_norm": 1.3462529182434082,
+      "learning_rate": 9.281510739035874e-06,
+      "loss": 1.4434,
+      "step": 165
+    },
+    {
+      "epoch": 1.2744721689059502,
+      "grad_norm": 1.64255690574646,
+      "learning_rate": 8.772570612006676e-06,
+      "loss": 1.64,
+      "step": 166
+    },
+    {
+      "epoch": 1.2821497120921306,
+      "grad_norm": 1.2866475582122803,
+      "learning_rate": 8.27681538262427e-06,
+      "loss": 2.3162,
+      "step": 167
+    },
+    {
+      "epoch": 1.289827255278311,
+      "grad_norm": 1.9135746955871582,
+      "learning_rate": 7.794380585418288e-06,
+      "loss": 2.3422,
+      "step": 168
+    },
+    {
+      "epoch": 1.2975047984644914,
+      "grad_norm": 1.3082396984100342,
+      "learning_rate": 7.325398113245189e-06,
+      "loss": 2.3385,
+      "step": 169
+    },
+    {
+      "epoch": 1.3051823416506718,
+      "grad_norm": 2.714218854904175,
+      "learning_rate": 6.869996181230129e-06,
+      "loss": 2.1251,
+      "step": 170
+    },
+    {
+      "epoch": 1.3128598848368522,
+      "grad_norm": 1.3126639127731323,
+      "learning_rate": 6.4282992917141276e-06,
+      "loss": 1.5057,
+      "step": 171
+    },
+    {
+      "epoch": 1.3205374280230326,
+      "grad_norm": 1.4953969717025757,
+      "learning_rate": 6.000428200216482e-06,
+      "loss": 1.4831,
+      "step": 172
+    },
+    {
+      "epoch": 1.328214971209213,
+      "grad_norm": 1.2520190477371216,
+      "learning_rate": 5.586499882421343e-06,
+      "loss": 2.194,
+      "step": 173
+    },
+    {
+      "epoch": 1.3358925143953935,
+      "grad_norm": 2.088040590286255,
+      "learning_rate": 5.186627502197737e-06,
+      "loss": 2.3981,
+      "step": 174
+    },
+    {
+      "epoch": 1.3435700575815739,
+      "grad_norm": 1.2573398351669312,
+      "learning_rate": 4.800920380661722e-06,
+      "loss": 2.2797,
+      "step": 175
+    },
+    {
+      "epoch": 1.3512476007677543,
+      "grad_norm": 3.4339168071746826,
+      "learning_rate": 4.42948396628907e-06,
+      "loss": 2.4717,
+      "step": 176
+    },
+    {
+      "epoch": 1.3589251439539347,
+      "grad_norm": 1.2305986881256104,
+      "learning_rate": 4.072419806086703e-06,
+      "loss": 1.544,
+      "step": 177
+    },
+    {
+      "epoch": 1.3666026871401151,
+      "grad_norm": 1.383890151977539,
+      "learning_rate": 3.7298255178307377e-06,
+      "loss": 1.506,
+      "step": 178
+    },
+    {
+      "epoch": 1.3742802303262955,
+      "grad_norm": 1.4508851766586304,
+      "learning_rate": 3.4017947633788134e-06,
+      "loss": 2.0022,
+      "step": 179
+    },
+    {
+      "epoch": 1.381957773512476,
+      "grad_norm": 1.5980404615402222,
+      "learning_rate": 3.088417223063826e-06,
+      "loss": 2.3528,
+      "step": 180
+    },
+    {
+      "epoch": 1.3896353166986564,
+      "grad_norm": 1.817231297492981,
+      "learning_rate": 2.7897785711762488e-06,
+      "loss": 2.3382,
+      "step": 181
+    },
+    {
+      "epoch": 1.3973128598848368,
+      "grad_norm": 2.2860965728759766,
+      "learning_rate": 2.5059604525416144e-06,
+      "loss": 2.2293,
+      "step": 182
+    },
+    {
+      "epoch": 1.4049904030710172,
+      "grad_norm": 1.3367491960525513,
+      "learning_rate": 2.237040460199656e-06,
+      "loss": 1.6935,
+      "step": 183
+    },
+    {
+      "epoch": 1.4126679462571978,
+      "grad_norm": 1.4708188772201538,
+      "learning_rate": 1.983092114191132e-06,
+      "loss": 1.545,
+      "step": 184
+    },
+    {
+      "epoch": 1.420345489443378,
+      "grad_norm": 1.3639698028564453,
+      "learning_rate": 1.7441848414581687e-06,
+      "loss": 1.7446,
+      "step": 185
+    },
+    {
+      "epoch": 1.4280230326295587,
+      "grad_norm": 1.3364663124084473,
+      "learning_rate": 1.5203839568636728e-06,
+      "loss": 2.3282,
+      "step": 186
+    },
+    {
+      "epoch": 1.4357005758157388,
+      "grad_norm": 1.7868165969848633,
+      "learning_rate": 1.3117506453348417e-06,
+      "loss": 2.2418,
+      "step": 187
+    },
+    {
+      "epoch": 1.4433781190019195,
+      "grad_norm": 1.588409185409546,
+      "learning_rate": 1.1183419451358668e-06,
+      "loss": 2.4858,
+      "step": 188
+    },
+    {
+      "epoch": 1.4510556621880997,
+      "grad_norm": 1.8464293479919434,
+      "learning_rate": 9.402107322742051e-07,
+      "loss": 1.9824,
+      "step": 189
+    },
+    {
+      "epoch": 1.4587332053742803,
+      "grad_norm": 1.3277325630187988,
+      "learning_rate": 7.774057060448276e-07,
+      "loss": 1.5312,
+      "step": 190
+    },
+    {
+      "epoch": 1.4664107485604607,
+      "grad_norm": 1.6554378271102905,
+      "learning_rate": 6.29971375716296e-07,
+      "loss": 1.5629,
+      "step": 191
+    },
+    {
+      "epoch": 1.4740882917466411,
+      "grad_norm": 1.274581789970398,
+      "learning_rate": 4.979480483624139e-07,
+      "loss": 2.4214,
+      "step": 192
+    },
+    {
+      "epoch": 1.4817658349328215,
+      "grad_norm": 1.98646080493927,
+      "learning_rate": 3.813718178426259e-07,
+      "loss": 2.2505,
+      "step": 193
+    },
+    {
+      "epoch": 1.489443378119002,
+      "grad_norm": 1.4712586402893066,
+      "learning_rate": 2.802745549343493e-07,
+      "loss": 2.4768,
+      "step": 194
+    },
+    {
+      "epoch": 1.4971209213051824,
+      "grad_norm": 3.4789063930511475,
+      "learning_rate": 1.9468389861981874e-07,
+      "loss": 2.1313,
+      "step": 195
+    },
+    {
+      "epoch": 1.5047984644913628,
+      "grad_norm": 1.3971290588378906,
+      "learning_rate": 1.2462324852985152e-07,
+      "loss": 1.6584,
+      "step": 196
+    },
+    {
+      "epoch": 1.5124760076775432,
+      "grad_norm": 1.6234114170074463,
+      "learning_rate": 7.011175854663e-08,
+      "loss": 1.5564,
+      "step": 197
+    },
+    {
+      "epoch": 1.5201535508637236,
+      "grad_norm": 1.249504804611206,
+      "learning_rate": 3.116433156721521e-08,
+      "loss": 2.0635,
+      "step": 198
+    },
+    {
+      "epoch": 1.527831094049904,
+      "grad_norm": 1.8600101470947266,
+      "learning_rate": 7.791615429266763e-09,
+      "loss": 2.3208,
+      "step": 199
+    },
+    {
+      "epoch": 1.5355086372360844,
+      "grad_norm": 1.1695365905761719,
+      "learning_rate": 0.0,
+      "loss": 2.2136,
+      "step": 200
+    },
+    {
+      "epoch": 1.5355086372360844,
+      "eval_loss": 0.6552443504333496,
+      "eval_runtime": 28.8218,
+      "eval_samples_per_second": 60.891,
+      "eval_steps_per_second": 1.908,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 4.362169118124671e+18,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null