Training in progress, step 210, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +109 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:91f844815a99969d53bc2b5482574dd9b19e68159754e281af8560e82a8e6d24
 size 251748704

 version https://git-lfs.github.com/spec/v1
+oid sha256:f8015ad1b90998e7f0857f169e74f56e8528dbd895ed12de6fa03cd77eb70f20
 size 251748704

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9618c84c860f05c85c4d1d721835f8ba7b9f55d9737a97624f4e3d41b076837f
 size 128584660

 version https://git-lfs.github.com/spec/v1
+oid sha256:9ac80cf462e241c01804a992e9e36b0c38023b8e84916b5a04dc633422f14c15
 size 128584660

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:de714458b8c60cd70dc5898863dcb9f93dd49a3f8feee11facdafd7c4100efa2
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:de85a0566d1069a2510f9dc83b85a8c653bcf06b351c866368e9deceb9ba5aaf
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d4f64720d8d0d269c8858be2aa4bd6e8b33e03737c0a24854963da8ae0ad210a
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:7eaa7f1231f032e2bdbad374ea7773e0a19b4d1d783ccbfff83e0e437c08c030
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:284ec0e14baeb59b8e0a8e1045d504ab9e19f0c0329738d0ae614878482b08a6
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:507ab39648779b4356484d9346f34241f836cee0bf54b545311958a62a524c9b
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e44c8ce92df8c0fd5b8022970525acf2e72a262c19b8760135eca567e033e880
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:e044fd25289daa09cd566ab153818d141356f8d72b3bcf17809c3084c7e37835
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:79783dc253db8561cbce9475c59c8adb4831f213eda27032651f04af31d6a595
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d485fc873443ef7ad597c6f4a82e93694f8fe8522b8ffecf4d60075246020043
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.5915492957746479,
   "eval_steps": 42,
-  "global_step": 168,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -439,6 +439,112 @@
       "eval_samples_per_second": 57.04,
       "eval_steps_per_second": 1.789,
       "step": 168
     }
   ],
   "logging_steps": 3,
@@ -458,7 +564,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.4088500845603717e+18,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.7394366197183099,
   "eval_steps": 42,
+  "global_step": 210,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 57.04,
       "eval_steps_per_second": 1.789,
       "step": 168
+    },
+    {
+      "epoch": 0.602112676056338,
+      "grad_norm": 1.719423770904541,
+      "learning_rate": 3.782248193514766e-05,
+      "loss": 0.5673,
+      "step": 171
+    },
+    {
+      "epoch": 0.6126760563380281,
+      "grad_norm": 1.777215600013733,
+      "learning_rate": 3.740734531410626e-05,
+      "loss": 0.5777,
+      "step": 174
+    },
+    {
+      "epoch": 0.6232394366197183,
+      "grad_norm": 1.7212867736816406,
+      "learning_rate": 3.698761866252635e-05,
+      "loss": 0.5956,
+      "step": 177
+    },
+    {
+      "epoch": 0.6338028169014085,
+      "grad_norm": 1.849625825881958,
+      "learning_rate": 3.656345725602089e-05,
+      "loss": 0.5805,
+      "step": 180
+    },
+    {
+      "epoch": 0.6443661971830986,
+      "grad_norm": 1.5129181146621704,
+      "learning_rate": 3.6135018010816477e-05,
+      "loss": 0.5271,
+      "step": 183
+    },
+    {
+      "epoch": 0.6549295774647887,
+      "grad_norm": 1.7070224285125732,
+      "learning_rate": 3.570245942570315e-05,
+      "loss": 0.5715,
+      "step": 186
+    },
+    {
+      "epoch": 0.6654929577464789,
+      "grad_norm": 1.416464924812317,
+      "learning_rate": 3.526594152339845e-05,
+      "loss": 0.528,
+      "step": 189
+    },
+    {
+      "epoch": 0.676056338028169,
+      "grad_norm": 1.569150686264038,
+      "learning_rate": 3.4825625791348096e-05,
+      "loss": 0.5646,
+      "step": 192
+    },
+    {
+      "epoch": 0.6866197183098591,
+      "grad_norm": 1.3740211725234985,
+      "learning_rate": 3.438167512198436e-05,
+      "loss": 0.5574,
+      "step": 195
+    },
+    {
+      "epoch": 0.6971830985915493,
+      "grad_norm": 1.7749651670455933,
+      "learning_rate": 3.393425375246503e-05,
+      "loss": 0.5988,
+      "step": 198
+    },
+    {
+      "epoch": 0.7077464788732394,
+      "grad_norm": 1.5180374383926392,
+      "learning_rate": 3.348352720391469e-05,
+      "loss": 0.5696,
+      "step": 201
+    },
+    {
+      "epoch": 0.7183098591549296,
+      "grad_norm": 1.4863420724868774,
+      "learning_rate": 3.3029662220191144e-05,
+      "loss": 0.5903,
+      "step": 204
+    },
+    {
+      "epoch": 0.7288732394366197,
+      "grad_norm": 1.5769625902175903,
+      "learning_rate": 3.2572826706199305e-05,
+      "loss": 0.5445,
+      "step": 207
+    },
+    {
+      "epoch": 0.7394366197183099,
+      "grad_norm": 1.7347217798233032,
+      "learning_rate": 3.211318966577581e-05,
+      "loss": 0.5684,
+      "step": 210
+    },
+    {
+      "epoch": 0.7394366197183099,
+      "eval_loss": 0.13805150985717773,
+      "eval_runtime": 33.5478,
+      "eval_samples_per_second": 57.023,
+      "eval_steps_per_second": 1.788,
+      "step": 210
     }
   ],
   "logging_steps": 3,
       "attributes": {}
     }
   },
+  "total_flos": 1.7610626057004646e+18,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null