Training in progress, step 187, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +326 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f4c3aa1b7c7e2131be00257b3a8ad8d8a5cc00d488de310e3aa2ab739b792e83
 size 50503544

 version https://git-lfs.github.com/spec/v1
+oid sha256:c109d0352421c1cb05b2ff3fc87f8bd43bf633d39b152a46ee64665256439a6d
 size 50503544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:db6b4c4d50e731ccf97aba3d5220dc59dc330f0eba15762debc58c947b14c121
 size 101184122

 version https://git-lfs.github.com/spec/v1
+oid sha256:f79a32be0dd3059fb93b2da55666cfc3e419bf5b75845923fd42b704177228fc
 size 101184122

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e4b688646a1843cb3001738b5b25a88991e005c9c88cb1e32423e5d4a76cb0fc
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:c72d6bf0f78b2357465d590b9ffc3c74333bf1690eaca071e21b5b7bee96dbb3
 size 14512

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:90a3f1a7b3324e65a1d3becf5bb547ac5dc1f1e4bf3ec0e53f905de1e26d2dee
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:89d6b36d890e4feb9f504b11799c7b132518b64e8b4e20ff987bf434d0dbe85d
 size 14512

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c9344f562fad06c4f4d31fd318ba7c558b48f7df5b7e58f8a207127dca92aacd
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3a5837b01af5a87d0f5eb6d6423726a5527c672ed560086af1a89b1f12d78ea5
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.7535070140280561,
   "eval_steps": 47,
-  "global_step": 141,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1026,6 +1026,328 @@
       "eval_samples_per_second": 19.75,
       "eval_steps_per_second": 4.938,
       "step": 141
     }
   ],
   "logging_steps": 1,
@@ -1040,12 +1362,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.1611279748549837e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.9993319973279893,
   "eval_steps": 47,
+  "global_step": 187,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 19.75,
       "eval_steps_per_second": 4.938,
       "step": 141
+    },
+    {
+      "epoch": 0.7588510354041417,
+      "grad_norm": 2.703543186187744,
+      "learning_rate": 3.3741908964485414e-05,
+      "loss": 1.3041,
+      "step": 142
+    },
+    {
+      "epoch": 0.7641950567802271,
+      "grad_norm": 2.4582364559173584,
+      "learning_rate": 3.234472017105313e-05,
+      "loss": 1.5893,
+      "step": 143
+    },
+    {
+      "epoch": 0.7695390781563126,
+      "grad_norm": 2.735029935836792,
+      "learning_rate": 3.0971473116511394e-05,
+      "loss": 1.1325,
+      "step": 144
+    },
+    {
+      "epoch": 0.7748830995323981,
+      "grad_norm": 2.368544578552246,
+      "learning_rate": 2.9622653763263874e-05,
+      "loss": 1.3839,
+      "step": 145
+    },
+    {
+      "epoch": 0.7802271209084837,
+      "grad_norm": 2.601339340209961,
+      "learning_rate": 2.8298739429279707e-05,
+      "loss": 2.6563,
+      "step": 146
+    },
+    {
+      "epoch": 0.7855711422845691,
+      "grad_norm": 3.593987464904785,
+      "learning_rate": 2.7000198619180794e-05,
+      "loss": 3.1875,
+      "step": 147
+    },
+    {
+      "epoch": 0.7909151636606546,
+      "grad_norm": 5.363809108734131,
+      "learning_rate": 2.5727490858448288e-05,
+      "loss": 3.8335,
+      "step": 148
+    },
+    {
+      "epoch": 0.7962591850367402,
+      "grad_norm": 6.153328895568848,
+      "learning_rate": 2.4481066530806395e-05,
+      "loss": 3.3706,
+      "step": 149
+    },
+    {
+      "epoch": 0.8016032064128257,
+      "grad_norm": 7.4249091148376465,
+      "learning_rate": 2.3261366718841305e-05,
+      "loss": 3.1387,
+      "step": 150
+    },
+    {
+      "epoch": 0.8069472277889111,
+      "grad_norm": 1.7294468879699707,
+      "learning_rate": 2.206882304791176e-05,
+      "loss": 3.2262,
+      "step": 151
+    },
+    {
+      "epoch": 0.8122912491649966,
+      "grad_norm": 2.171717643737793,
+      "learning_rate": 2.0903857533405958e-05,
+      "loss": 3.2502,
+      "step": 152
+    },
+    {
+      "epoch": 0.8176352705410822,
+      "grad_norm": 1.9437127113342285,
+      "learning_rate": 1.9766882431399646e-05,
+      "loss": 3.0417,
+      "step": 153
+    },
+    {
+      "epoch": 0.8229792919171677,
+      "grad_norm": 2.4127237796783447,
+      "learning_rate": 1.8658300092767544e-05,
+      "loss": 1.985,
+      "step": 154
+    },
+    {
+      "epoch": 0.8283233132932531,
+      "grad_norm": 1.9797170162200928,
+      "learning_rate": 1.7578502820800045e-05,
+      "loss": 2.8997,
+      "step": 155
+    },
+    {
+      "epoch": 0.8336673346693386,
+      "grad_norm": 1.9933407306671143,
+      "learning_rate": 1.652787273237565e-05,
+      "loss": 3.0551,
+      "step": 156
+    },
+    {
+      "epoch": 0.8390113560454242,
+      "grad_norm": 2.6005380153656006,
+      "learning_rate": 1.5506781622737942e-05,
+      "loss": 3.1719,
+      "step": 157
+    },
+    {
+      "epoch": 0.8443553774215097,
+      "grad_norm": 2.0730040073394775,
+      "learning_rate": 1.4515590833925507e-05,
+      "loss": 1.8285,
+      "step": 158
+    },
+    {
+      "epoch": 0.8496993987975952,
+      "grad_norm": 1.7980684041976929,
+      "learning_rate": 1.3554651126900564e-05,
+      "loss": 2.2764,
+      "step": 159
+    },
+    {
+      "epoch": 0.8550434201736807,
+      "grad_norm": 1.8802449703216553,
+      "learning_rate": 1.2624302557422473e-05,
+      "loss": 3.1112,
+      "step": 160
+    },
+    {
+      "epoch": 0.8603874415497662,
+      "grad_norm": 2.0457687377929688,
+      "learning_rate": 1.172487435570926e-05,
+      "loss": 3.0587,
+      "step": 161
+    },
+    {
+      "epoch": 0.8657314629258517,
+      "grad_norm": 1.9472899436950684,
+      "learning_rate": 1.0856684809930151e-05,
+      "loss": 2.3786,
+      "step": 162
+    },
+    {
+      "epoch": 0.8710754843019372,
+      "grad_norm": 1.9495573043823242,
+      "learning_rate": 1.0020041153570347e-05,
+      "loss": 2.9965,
+      "step": 163
+    },
+    {
+      "epoch": 0.8764195056780227,
+      "grad_norm": 2.6365323066711426,
+      "learning_rate": 9.215239456707635e-06,
+      "loss": 3.3418,
+      "step": 164
+    },
+    {
+      "epoch": 0.8817635270541082,
+      "grad_norm": 2.1325201988220215,
+      "learning_rate": 8.442564521239782e-06,
+      "loss": 2.9538,
+      "step": 165
+    },
+    {
+      "epoch": 0.8871075484301937,
+      "grad_norm": 2.537140369415283,
+      "learning_rate": 7.70228978009907e-06,
+      "loss": 1.329,
+      "step": 166
+    },
+    {
+      "epoch": 0.8924515698062793,
+      "grad_norm": 2.8866047859191895,
+      "learning_rate": 6.994677200490507e-06,
+      "loss": 1.2163,
+      "step": 167
+    },
+    {
+      "epoch": 0.8977955911823647,
+      "grad_norm": 2.537121295928955,
+      "learning_rate": 6.319977191187232e-06,
+      "loss": 1.4877,
+      "step": 168
+    },
+    {
+      "epoch": 0.9031396125584502,
+      "grad_norm": 2.798138380050659,
+      "learning_rate": 5.678428513916212e-06,
+      "loss": 1.2401,
+      "step": 169
+    },
+    {
+      "epoch": 0.9084836339345357,
+      "grad_norm": 1.8588306903839111,
+      "learning_rate": 5.07025819886574e-06,
+      "loss": 2.3853,
+      "step": 170
+    },
+    {
+      "epoch": 0.9138276553106213,
+      "grad_norm": 3.1269595623016357,
+      "learning_rate": 4.495681464344259e-06,
+      "loss": 3.4073,
+      "step": 171
+    },
+    {
+      "epoch": 0.9191716766867067,
+      "grad_norm": 3.1233201026916504,
+      "learning_rate": 3.954901640619368e-06,
+      "loss": 3.1646,
+      "step": 172
+    },
+    {
+      "epoch": 0.9245156980627922,
+      "grad_norm": 4.395522117614746,
+      "learning_rate": 3.4481100979635306e-06,
+      "loss": 3.6045,
+      "step": 173
+    },
+    {
+      "epoch": 0.9298597194388778,
+      "grad_norm": 5.26246452331543,
+      "learning_rate": 2.9754861789324073e-06,
+      "loss": 3.1748,
+      "step": 174
+    },
+    {
+      "epoch": 0.9352037408149633,
+      "grad_norm": 7.157742977142334,
+      "learning_rate": 2.537197134899294e-06,
+      "loss": 3.3458,
+      "step": 175
+    },
+    {
+      "epoch": 0.9405477621910487,
+      "grad_norm": 1.6674422025680542,
+      "learning_rate": 2.1333980668685414e-06,
+      "loss": 3.3945,
+      "step": 176
+    },
+    {
+      "epoch": 0.9458917835671342,
+      "grad_norm": 1.6258163452148438,
+      "learning_rate": 1.7642318705886286e-06,
+      "loss": 2.398,
+      "step": 177
+    },
+    {
+      "epoch": 0.9512358049432198,
+      "grad_norm": 1.771727442741394,
+      "learning_rate": 1.4298291859845214e-06,
+      "loss": 3.235,
+      "step": 178
+    },
+    {
+      "epoch": 0.9565798263193053,
+      "grad_norm": 1.9831771850585938,
+      "learning_rate": 1.1303083509269452e-06,
+      "loss": 2.1936,
+      "step": 179
+    },
+    {
+      "epoch": 0.9619238476953907,
+      "grad_norm": 2.070289373397827,
+      "learning_rate": 8.657753593552143e-07,
+      "loss": 2.3432,
+      "step": 180
+    },
+    {
+      "epoch": 0.9672678690714763,
+      "grad_norm": 1.823572039604187,
+      "learning_rate": 6.363238237683033e-07,
+      "loss": 2.6982,
+      "step": 181
+    },
+    {
+      "epoch": 0.9726118904475618,
+      "grad_norm": 1.9612380266189575,
+      "learning_rate": 4.4203494209733576e-07,
+      "loss": 2.9414,
+      "step": 182
+    },
+    {
+      "epoch": 0.9779559118236473,
+      "grad_norm": 2.1800384521484375,
+      "learning_rate": 2.8297746897146816e-07,
+      "loss": 1.5728,
+      "step": 183
+    },
+    {
+      "epoch": 0.9832999331997327,
+      "grad_norm": 2.164794921875,
+      "learning_rate": 1.5920769138706438e-07,
+      "loss": 1.5844,
+      "step": 184
+    },
+    {
+      "epoch": 0.9886439545758183,
+      "grad_norm": 2.399670362472534,
+      "learning_rate": 7.07694087889621e-08,
+      "loss": 2.907,
+      "step": 185
+    },
+    {
+      "epoch": 0.9939879759519038,
+      "grad_norm": 3.2542049884796143,
+      "learning_rate": 1.7693917570837936e-08,
+      "loss": 3.2396,
+      "step": 186
+    },
+    {
+      "epoch": 0.9993319973279893,
+      "grad_norm": 6.379725933074951,
+      "learning_rate": 0.0,
+      "loss": 3.212,
+      "step": 187
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.5399356829637018e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null