Training in progress, step 9060, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ba139240646f433f9f0106d855aea9e350f07f031693b316743dd388f006a45a
 size 13982248

 version https://git-lfs.github.com/spec/v1
+oid sha256:b0bac75f5d24a596f406e46fc168b931d808d6267d31438980869f07fbf3a128
 size 13982248

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cd6c424fd81f222bd0b9ad0388c7d2dd3efa61cf565f0946a945da841c18c43f
 size 7062522

 version https://git-lfs.github.com/spec/v1
+oid sha256:e0b8471d164ccb5b5a89e3785c2f86274dfc686377b29ef93b82e37a46816e91
 size 7062522

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1ac19e6550ed28e30f1e0773f61b5a93cb672dde550a8914d4067b84caffee97
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:8161b0139ec703aca722250be53f42af04367d09585f2a6f1c4cab858b114415
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3bb2ad4299490e0d014e4083d00933053d6f0c483383ad6b71adbd0a8a72b6c2
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:34103ee724ff7cce18f877e6016120f3e8e183a8a04d54ad5db73b9ab0f1e98e
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.7232,
   "eval_steps": 500,
-  "global_step": 9040,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3171,6 +3171,13 @@
       "learning_rate": 1.8039430793163753e-05,
       "loss": 3.5014,
       "step": 9040
     }
   ],
   "logging_steps": 20,
@@ -3178,7 +3185,7 @@
   "num_input_tokens_seen": 0,
   "num_train_epochs": 2,
   "save_steps": 20,
-  "total_flos": 2.1373183266914304e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.7248,
   "eval_steps": 500,
+  "global_step": 9060,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.8039430793163753e-05,
       "loss": 3.5014,
       "step": 9040
+    },
+    {
+      "epoch": 0.72,
+      "grad_norm": 21.284992218017578,
+      "learning_rate": 1.7991841394296962e-05,
+      "loss": 3.4575,
+      "step": 9060
     }
   ],
   "logging_steps": 20,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 2,
   "save_steps": 20,
+  "total_flos": 2.1428297200041984e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null