Training in progress, step 252, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +109 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f8015ad1b90998e7f0857f169e74f56e8528dbd895ed12de6fa03cd77eb70f20
 size 251748704

 version https://git-lfs.github.com/spec/v1
+oid sha256:8639e4cf35bac89412a8a4dd47f897e36f7e86dc97159e12851d524408c3bfbe
 size 251748704

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9ac80cf462e241c01804a992e9e36b0c38023b8e84916b5a04dc633422f14c15
 size 128584660

 version https://git-lfs.github.com/spec/v1
+oid sha256:15d11b9a991a305ea4e24a740d991378e7bdb087c772f6f65f19d91bf8d6564a
 size 128584660

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:de85a0566d1069a2510f9dc83b85a8c653bcf06b351c866368e9deceb9ba5aaf
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:7acdccf5cc66b61c42791dfd01a91f8a854ce9c52a1630a5ecf79a1b80ac8d20
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7eaa7f1231f032e2bdbad374ea7773e0a19b4d1d783ccbfff83e0e437c08c030
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:e76c92f25f2a396a5973022fb02f2ad8e4729eb895d4a8ab011b5483c5a4ffa8
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:507ab39648779b4356484d9346f34241f836cee0bf54b545311958a62a524c9b
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:493669684e2e122ba889b5c50803cdf31c2d3ce9d4b8a85969dd9cd4a550e603
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e044fd25289daa09cd566ab153818d141356f8d72b3bcf17809c3084c7e37835
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:013fade482032a43fbecef38a00eb8b3ce00bcd639e2273d38a5451174dfe05c
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d485fc873443ef7ad597c6f4a82e93694f8fe8522b8ffecf4d60075246020043
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fde5cce913d9c5501edc422466fae8378d9b13dc57f22397ec7fc38f7801fc24
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.7394366197183099,
   "eval_steps": 42,
-  "global_step": 210,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -545,6 +545,112 @@
       "eval_samples_per_second": 57.023,
       "eval_steps_per_second": 1.788,
       "step": 210
     }
   ],
   "logging_steps": 3,
@@ -564,7 +670,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.7610626057004646e+18,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.8873239436619719,
   "eval_steps": 42,
+  "global_step": 252,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 57.023,
       "eval_steps_per_second": 1.788,
       "step": 210
+    },
+    {
+      "epoch": 0.75,
+      "grad_norm": 2.053368330001831,
+      "learning_rate": 3.165092113916688e-05,
+      "loss": 0.573,
+      "step": 213
+    },
+    {
+      "epoch": 0.7605633802816901,
+      "grad_norm": 1.5371932983398438,
+      "learning_rate": 3.118619214012286e-05,
+      "loss": 0.5915,
+      "step": 216
+    },
+    {
+      "epoch": 0.7711267605633803,
+      "grad_norm": 1.4873214960098267,
+      "learning_rate": 3.071917459263264e-05,
+      "loss": 0.5577,
+      "step": 219
+    },
+    {
+      "epoch": 0.7816901408450704,
+      "grad_norm": 1.5104900598526,
+      "learning_rate": 3.0250041267321232e-05,
+      "loss": 0.5432,
+      "step": 222
+    },
+    {
+      "epoch": 0.7922535211267606,
+      "grad_norm": 1.6719636917114258,
+      "learning_rate": 2.9778965717534313e-05,
+      "loss": 0.5495,
+      "step": 225
+    },
+    {
+      "epoch": 0.8028169014084507,
+      "grad_norm": 1.4213000535964966,
+      "learning_rate": 2.9306122215132976e-05,
+      "loss": 0.5193,
+      "step": 228
+    },
+    {
+      "epoch": 0.8133802816901409,
+      "grad_norm": 1.4301376342773438,
+      "learning_rate": 2.8831685686022897e-05,
+      "loss": 0.5207,
+      "step": 231
+    },
+    {
+      "epoch": 0.823943661971831,
+      "grad_norm": 1.6136512756347656,
+      "learning_rate": 2.8355831645441388e-05,
+      "loss": 0.5428,
+      "step": 234
+    },
+    {
+      "epoch": 0.8345070422535211,
+      "grad_norm": 1.4768859148025513,
+      "learning_rate": 2.787873613302649e-05,
+      "loss": 0.5475,
+      "step": 237
+    },
+    {
+      "epoch": 0.8450704225352113,
+      "grad_norm": 1.5305073261260986,
+      "learning_rate": 2.7400575647692046e-05,
+      "loss": 0.5587,
+      "step": 240
+    },
+    {
+      "epoch": 0.8556338028169014,
+      "grad_norm": 1.3817962408065796,
+      "learning_rate": 2.692152708233292e-05,
+      "loss": 0.5434,
+      "step": 243
+    },
+    {
+      "epoch": 0.8661971830985915,
+      "grad_norm": 1.2823965549468994,
+      "learning_rate": 2.6441767658384366e-05,
+      "loss": 0.5301,
+      "step": 246
+    },
+    {
+      "epoch": 0.8767605633802817,
+      "grad_norm": 1.4110352993011475,
+      "learning_rate": 2.596147486025996e-05,
+      "loss": 0.5135,
+      "step": 249
+    },
+    {
+      "epoch": 0.8873239436619719,
+      "grad_norm": 1.5290179252624512,
+      "learning_rate": 2.5480826369692178e-05,
+      "loss": 0.5662,
+      "step": 252
+    },
+    {
+      "epoch": 0.8873239436619719,
+      "eval_loss": 0.13341283798217773,
+      "eval_runtime": 33.5418,
+      "eval_samples_per_second": 57.033,
+      "eval_steps_per_second": 1.789,
+      "step": 252
     }
   ],
   "logging_steps": 3,
       "attributes": {}
     }
   },
+  "total_flos": 2.1132751268405576e+18,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null