Training in progress, step 400, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +152 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ce9aac62294993f36d80147fc9d17da17586d22e952c325c6af63259870c0543
 size 17425352

 version https://git-lfs.github.com/spec/v1
+oid sha256:926cb90faeac72bc01bd16c9cfe6f27ba2999c8d2418ede55b2ab0f6bb924251
 size 17425352

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8a16b5b17c9db309d8fc8f984b1e1857d49df7dca79ddeb5b811408bb7bd47c8
 size 10252116

 version https://git-lfs.github.com/spec/v1
+oid sha256:173e3b56481ac1faad65817a5c87d6290db52c6186ff8feed699c25e320e1b5e
 size 10252116

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0790c1a4e674a9ced8e4ba06bf81e8ca6c7f6cc25e0bc865ffc27c524e8bbe92
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:21b0ec3ee9533871c215e821b6c863f82727baf80e99d0e9076878ab947252c7
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:43da0688aca60835f4e18fa7e0f3cc099504828f82fd5dd994118be26b760a0f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:705cabf5cbc3a6ab0feb67c77b9b453d59efcc939ce90d310af96e621810f990
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.13496794511303564,
   "eval_steps": 100,
-  "global_step": 300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -459,6 +459,154 @@
       "eval_samples_per_second": 33.549,
       "eval_steps_per_second": 16.775,
       "step": 300
     }
   ],
   "logging_steps": 5,
@@ -473,12 +621,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 5742197715763200.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.1799572601507142,
   "eval_steps": 100,
+  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 33.549,
       "eval_steps_per_second": 16.775,
       "step": 300
+    },
+    {
+      "epoch": 0.1372174108649196,
+      "grad_norm": 0.1710512787103653,
+      "learning_rate": 1.3939877632809278e-05,
+      "loss": 1.7437,
+      "step": 305
+    },
+    {
+      "epoch": 0.13946687661680351,
+      "grad_norm": 0.1375483125448227,
+      "learning_rate": 1.257446259144494e-05,
+      "loss": 1.7683,
+      "step": 310
+    },
+    {
+      "epoch": 0.14171634236868744,
+      "grad_norm": 0.15804438292980194,
+      "learning_rate": 1.1269751908617277e-05,
+      "loss": 1.6869,
+      "step": 315
+    },
+    {
+      "epoch": 0.14396580812057136,
+      "grad_norm": 0.17680715024471283,
+      "learning_rate": 1.0027861829824952e-05,
+      "loss": 1.6359,
+      "step": 320
+    },
+    {
+      "epoch": 0.14621527387245528,
+      "grad_norm": 0.13866226375102997,
+      "learning_rate": 8.850806705317183e-06,
+      "loss": 1.7821,
+      "step": 325
+    },
+    {
+      "epoch": 0.14846473962433923,
+      "grad_norm": 0.1694657802581787,
+      "learning_rate": 7.740495722810271e-06,
+      "loss": 1.502,
+      "step": 330
+    },
+    {
+      "epoch": 0.15071420537622315,
+      "grad_norm": 0.16602936387062073,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 1.6365,
+      "step": 335
+    },
+    {
+      "epoch": 0.15296367112810708,
+      "grad_norm": 0.21175408363342285,
+      "learning_rate": 5.727198717339511e-06,
+      "loss": 1.9111,
+      "step": 340
+    },
+    {
+      "epoch": 0.155213136879991,
+      "grad_norm": 0.18618881702423096,
+      "learning_rate": 4.827478269480895e-06,
+      "loss": 1.7057,
+      "step": 345
+    },
+    {
+      "epoch": 0.15746260263187492,
+      "grad_norm": 0.19256582856178284,
+      "learning_rate": 4.001027817058789e-06,
+      "loss": 1.7551,
+      "step": 350
+    },
+    {
+      "epoch": 0.15971206838375887,
+      "grad_norm": 0.1473885327577591,
+      "learning_rate": 3.249187865729264e-06,
+      "loss": 1.6551,
+      "step": 355
+    },
+    {
+      "epoch": 0.1619615341356428,
+      "grad_norm": 0.14483609795570374,
+      "learning_rate": 2.573177902642726e-06,
+      "loss": 1.5671,
+      "step": 360
+    },
+    {
+      "epoch": 0.16421099988752672,
+      "grad_norm": 0.17720504105091095,
+      "learning_rate": 1.974094418431388e-06,
+      "loss": 1.5816,
+      "step": 365
+    },
+    {
+      "epoch": 0.16646046563941064,
+      "grad_norm": 0.1897529512643814,
+      "learning_rate": 1.4529091286973995e-06,
+      "loss": 1.5579,
+      "step": 370
+    },
+    {
+      "epoch": 0.16870993139129456,
+      "grad_norm": 0.12367403507232666,
+      "learning_rate": 1.0104673978866164e-06,
+      "loss": 1.4958,
+      "step": 375
+    },
+    {
+      "epoch": 0.17095939714317848,
+      "grad_norm": 0.11187849193811417,
+      "learning_rate": 6.474868681043578e-07,
+      "loss": 1.6368,
+      "step": 380
+    },
+    {
+      "epoch": 0.17320886289506243,
+      "grad_norm": 0.12326706200838089,
+      "learning_rate": 3.6455629509730136e-07,
+      "loss": 1.7971,
+      "step": 385
+    },
+    {
+      "epoch": 0.17545832864694635,
+      "grad_norm": 0.11397775262594223,
+      "learning_rate": 1.6213459328950352e-07,
+      "loss": 1.6835,
+      "step": 390
+    },
+    {
+      "epoch": 0.17770779439883028,
+      "grad_norm": 0.12962651252746582,
+      "learning_rate": 4.055009142152067e-08,
+      "loss": 1.7831,
+      "step": 395
+    },
+    {
+      "epoch": 0.1799572601507142,
+      "grad_norm": 0.19526903331279755,
+      "learning_rate": 0.0,
+      "loss": 1.6751,
+      "step": 400
+    },
+    {
+      "epoch": 0.1799572601507142,
+      "eval_loss": 1.7608678340911865,
+      "eval_runtime": 27.7885,
+      "eval_samples_per_second": 33.683,
+      "eval_steps_per_second": 16.841,
+      "step": 400
     }
   ],
   "logging_steps": 5,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 7619680791429120.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null