Training in progress, step 84, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +109 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:737dda2c8ab498387a00bd24aa7c0d08bdd5ee19bdc598dbbcb34c17168f94d3
 size 251748704

 version https://git-lfs.github.com/spec/v1
+oid sha256:3247a83bd662ee3c8232596960d6a25cabf898caea369595613695a5b3f03049
 size 251748704

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:531d89fe208e10e3de0e6d204dd96b4f7291ec071cf239cced9223c4d69c2a0e
 size 128584660

 version https://git-lfs.github.com/spec/v1
+oid sha256:4674064354528629f1da62041a637cafbd5dc49580292c043645f8ed27e4bc00
 size 128584660

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:677802ae5808f426b0001ec16a702e7204d5e40420283881d554068480bd6b45
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:d3ad1813b61f9aeda21ece749ebd4219af60a75b7d9e01e14a21fd7086048054
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:737dd3d5554130425622a4ea7fdad100dc74e66c486de8fcb46aacc7b23a116f
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:8322210a6e62381f5f4da36848e81a3ad544422a34aefaad93081c8d9440a766
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4615d363378ede0700dfbbedd8fb5726a1863063125349e3ae5b76ec8993ec64
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:3704589451a34ca164c39706dc117ac35229ebf91525b9d9578c0f82da124a3b
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6b0267a2844c6018a92aaf166d6587b94ab07fad00da9906fa031e3efd843861
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:c57a68fa5ad6d303c41397e7083ef7108d14275b0ba81d8c4dd7d139d6a8df47
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3101e5b327a48ff01ba2c03545970ed09eef14c8b179cc52c21d8ec3e72950f1
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:200db5d515f7fe0d1c4e0bac456fe6b9042173a9f2b890836ad8c5bd6dfb4d2e
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.14788732394366197,
   "eval_steps": 42,
-  "global_step": 42,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -121,6 +121,112 @@
       "eval_samples_per_second": 57.023,
       "eval_steps_per_second": 1.788,
       "step": 42
     }
   ],
   "logging_steps": 3,
@@ -140,7 +246,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.522125211400929e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.29577464788732394,
   "eval_steps": 42,
+  "global_step": 84,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 57.023,
       "eval_steps_per_second": 1.788,
       "step": 42
+    },
+    {
+      "epoch": 0.15845070422535212,
+      "grad_norm": 2.373122215270996,
+      "learning_rate": 4.937319780454559e-05,
+      "loss": 0.6457,
+      "step": 45
+    },
+    {
+      "epoch": 0.16901408450704225,
+      "grad_norm": 2.525294303894043,
+      "learning_rate": 4.926169550509787e-05,
+      "loss": 0.7483,
+      "step": 48
+    },
+    {
+      "epoch": 0.1795774647887324,
+      "grad_norm": 1.940788745880127,
+      "learning_rate": 4.914121772213898e-05,
+      "loss": 0.6246,
+      "step": 51
+    },
+    {
+      "epoch": 0.19014084507042253,
+      "grad_norm": 4.233572483062744,
+      "learning_rate": 4.9011809025775486e-05,
+      "loss": 0.7226,
+      "step": 54
+    },
+    {
+      "epoch": 0.2007042253521127,
+      "grad_norm": 1.9542790651321411,
+      "learning_rate": 4.887351729005726e-05,
+      "loss": 0.6427,
+      "step": 57
+    },
+    {
+      "epoch": 0.2112676056338028,
+      "grad_norm": 1.8455359935760498,
+      "learning_rate": 4.8726393675266716e-05,
+      "loss": 0.5821,
+      "step": 60
+    },
+    {
+      "epoch": 0.22183098591549297,
+      "grad_norm": 1.9550554752349854,
+      "learning_rate": 4.8570492608992325e-05,
+      "loss": 0.6654,
+      "step": 63
+    },
+    {
+      "epoch": 0.2323943661971831,
+      "grad_norm": 2.3653812408447266,
+      "learning_rate": 4.8405871765993433e-05,
+      "loss": 0.6337,
+      "step": 66
+    },
+    {
+      "epoch": 0.24295774647887325,
+      "grad_norm": 2.590679883956909,
+      "learning_rate": 4.82325920468638e-05,
+      "loss": 0.6181,
+      "step": 69
+    },
+    {
+      "epoch": 0.2535211267605634,
+      "grad_norm": 1.894047737121582,
+      "learning_rate": 4.805071755550177e-05,
+      "loss": 0.5873,
+      "step": 72
+    },
+    {
+      "epoch": 0.2640845070422535,
+      "grad_norm": 1.9947123527526855,
+      "learning_rate": 4.7860315575395316e-05,
+      "loss": 0.5982,
+      "step": 75
+    },
+    {
+      "epoch": 0.2746478873239437,
+      "grad_norm": 1.8202632665634155,
+      "learning_rate": 4.766145654473095e-05,
+      "loss": 0.6069,
+      "step": 78
+    },
+    {
+      "epoch": 0.2852112676056338,
+      "grad_norm": 1.9413880109786987,
+      "learning_rate": 4.745421403033548e-05,
+      "loss": 0.5623,
+      "step": 81
+    },
+    {
+      "epoch": 0.29577464788732394,
+      "grad_norm": 2.8411378860473633,
+      "learning_rate": 4.72386647004603e-05,
+      "loss": 0.6329,
+      "step": 84
+    },
+    {
+      "epoch": 0.29577464788732394,
+      "eval_loss": 0.1504964828491211,
+      "eval_runtime": 33.5299,
+      "eval_samples_per_second": 57.054,
+      "eval_steps_per_second": 1.789,
+      "step": 84
     }
   ],
   "logging_steps": 3,
       "attributes": {}
     }
   },
+  "total_flos": 7.044250422801859e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null