Training in progress, step 25, checkpoint

Browse files

Files changed (9) hide show

last-checkpoint/adapter_config.json +3 -3
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +2 -2
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +11 -194
last-checkpoint/training_args.bin +1 -1

last-checkpoint/adapter_config.json CHANGED Viewed

@@ -20,12 +20,12 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "q_proj",
     "k_proj",
-    "down_proj",
     "up_proj",
     "v_proj",
-    "o_proj",
     "gate_proj"
   ],
   "task_type": "CAUSAL_LM",

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "k_proj",
+    "o_proj",
     "up_proj",
     "v_proj",
+    "down_proj",
+    "q_proj",
     "gate_proj"
   ],
   "task_type": "CAUSAL_LM",

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3c6c3b49517918f71b5a8df86ce64802524c101812f6310fcee02fef5d317af6
 size 671466706

 version https://git-lfs.github.com/spec/v1
+oid sha256:ecacf0120c3313aa6d110d54cdb0747ee47b71b2cd60beeadeed0c4feac79962
 size 671466706

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e4096bbde6d582953ae6d3e0451be1d831604d98af6d8a1de69bbcb44ff1e950
-size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:06d1f61bfc6ffc8d8e1c8ff53a0044cf9902a1ddf2c637f9612610bf4762747d
+size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1cdc5d4fb006b4b93de906620f37a527e4627449685bf46a5ac4411228b2cb2f
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:2f616f705ba7cb291af7d9f513f11d6e0e42ec727e886627085ef561e88f6bad
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cbe389e1cf00ae954d39bffec47561432676c1e4ab5f84230142dbc16ac036f4
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:d30fd2f50252b0b0137d88cde7a2c3bc7bdfb6dc03455e0923b6a91b2be12da1
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aa6ef691995f1233121abf8096e6f45d06d1dd2fab2496a7e8a454c331df8991
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:0fd2dda9948c4cbe11886349ce34a931605871e324bf11c12c42b99ab05f62fb
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b1df0528620c07325b8faa7567e59b0c1e86a1f1ee6af1245a69c6c0463fe4e2
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:f37b2aa490ccb1598b01e14cda36e9081f7ce646deab4d3c2d03de0d2169a755
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": NaN,
   "best_model_checkpoint": "miner_id_24/checkpoint-25",
-  "epoch": 0.029967036260113874,
   "eval_steps": 25,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -18,9 +18,9 @@
     {
       "epoch": 0.0005993407252022775,
       "eval_loss": NaN,
-      "eval_runtime": 302.1838,
-      "eval_samples_per_second": 37.199,
-      "eval_steps_per_second": 4.653,
       "step": 1
     },
     {
@@ -194,193 +194,10 @@
     {
       "epoch": 0.014983518130056937,
       "eval_loss": NaN,
-      "eval_runtime": 302.4917,
-      "eval_samples_per_second": 37.161,
-      "eval_steps_per_second": 4.648,
       "step": 25
-    },
-    {
-      "epoch": 0.015582858855259216,
-      "grad_norm": NaN,
-      "learning_rate": 5e-05,
-      "loss": 0.0,
-      "step": 26
-    },
-    {
-      "epoch": 0.016182199580461493,
-      "grad_norm": NaN,
-      "learning_rate": 4.6729843538492847e-05,
-      "loss": 0.0,
-      "step": 27
-    },
-    {
-      "epoch": 0.01678154030566377,
-      "grad_norm": NaN,
-      "learning_rate": 4.347369038899744e-05,
-      "loss": 0.0,
-      "step": 28
-    },
-    {
-      "epoch": 0.017380881030866047,
-      "grad_norm": NaN,
-      "learning_rate": 4.0245483899193595e-05,
-      "loss": 0.0,
-      "step": 29
-    },
-    {
-      "epoch": 0.017980221756068324,
-      "grad_norm": NaN,
-      "learning_rate": 3.705904774487396e-05,
-      "loss": 0.0,
-      "step": 30
-    },
-    {
-      "epoch": 0.0185795624812706,
-      "grad_norm": NaN,
-      "learning_rate": 3.392802673484193e-05,
-      "loss": 0.0,
-      "step": 31
-    },
-    {
-      "epoch": 0.01917890320647288,
-      "grad_norm": NaN,
-      "learning_rate": 3.086582838174551e-05,
-      "loss": 0.0,
-      "step": 32
-    },
-    {
-      "epoch": 0.019778243931675158,
-      "grad_norm": NaN,
-      "learning_rate": 2.7885565489049946e-05,
-      "loss": 0.0,
-      "step": 33
-    },
-    {
-      "epoch": 0.020377584656877435,
-      "grad_norm": NaN,
-      "learning_rate": 2.500000000000001e-05,
-      "loss": 0.0,
-      "step": 34
-    },
-    {
-      "epoch": 0.020976925382079712,
-      "grad_norm": NaN,
-      "learning_rate": 2.2221488349019903e-05,
-      "loss": 0.0,
-      "step": 35
-    },
-    {
-      "epoch": 0.02157626610728199,
-      "grad_norm": NaN,
-      "learning_rate": 1.9561928549563968e-05,
-      "loss": 0.0,
-      "step": 36
-    },
-    {
-      "epoch": 0.022175606832484266,
-      "grad_norm": NaN,
-      "learning_rate": 1.703270924499656e-05,
-      "loss": 0.0,
-      "step": 37
-    },
-    {
-      "epoch": 0.022774947557686546,
-      "grad_norm": NaN,
-      "learning_rate": 1.4644660940672627e-05,
-      "loss": 0.0,
-      "step": 38
-    },
-    {
-      "epoch": 0.023374288282888823,
-      "grad_norm": NaN,
-      "learning_rate": 1.2408009626051137e-05,
-      "loss": 0.0,
-      "step": 39
-    },
-    {
-      "epoch": 0.0239736290080911,
-      "grad_norm": NaN,
-      "learning_rate": 1.0332332985438248e-05,
-      "loss": 0.0,
-      "step": 40
-    },
-    {
-      "epoch": 0.024572969733293377,
-      "grad_norm": NaN,
-      "learning_rate": 8.426519384872733e-06,
-      "loss": 0.0,
-      "step": 41
-    },
-    {
-      "epoch": 0.025172310458495654,
-      "grad_norm": NaN,
-      "learning_rate": 6.698729810778065e-06,
-      "loss": 0.0,
-      "step": 42
-    },
-    {
-      "epoch": 0.02577165118369793,
-      "grad_norm": NaN,
-      "learning_rate": 5.156362923365588e-06,
-      "loss": 0.0,
-      "step": 43
-    },
-    {
-      "epoch": 0.02637099190890021,
-      "grad_norm": NaN,
-      "learning_rate": 3.8060233744356633e-06,
-      "loss": 0.0,
-      "step": 44
-    },
-    {
-      "epoch": 0.02697033263410249,
-      "grad_norm": NaN,
-      "learning_rate": 2.653493525244721e-06,
-      "loss": 0.0,
-      "step": 45
-    },
-    {
-      "epoch": 0.027569673359304766,
-      "grad_norm": NaN,
-      "learning_rate": 1.70370868554659e-06,
-      "loss": 0.0,
-      "step": 46
-    },
-    {
-      "epoch": 0.028169014084507043,
-      "grad_norm": NaN,
-      "learning_rate": 9.607359798384785e-07,
-      "loss": 0.0,
-      "step": 47
-    },
-    {
-      "epoch": 0.02876835480970932,
-      "grad_norm": NaN,
-      "learning_rate": 4.277569313094809e-07,
-      "loss": 0.0,
-      "step": 48
-    },
-    {
-      "epoch": 0.029367695534911597,
-      "grad_norm": NaN,
-      "learning_rate": 1.0705383806982606e-07,
-      "loss": 0.0,
-      "step": 49
-    },
-    {
-      "epoch": 0.029967036260113874,
-      "grad_norm": NaN,
-      "learning_rate": 0.0,
-      "loss": 0.0,
-      "step": 50
-    },
-    {
-      "epoch": 0.029967036260113874,
-      "eval_loss": NaN,
-      "eval_runtime": 301.9125,
-      "eval_samples_per_second": 37.233,
-      "eval_steps_per_second": 4.657,
-      "step": 50
     }
   ],
   "logging_steps": 1,
@@ -395,7 +212,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
@@ -404,12 +221,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": true
       },
       "attributes": {}
     }
   },
-  "total_flos": 5.658027780734976e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": NaN,
   "best_model_checkpoint": "miner_id_24/checkpoint-25",
+  "epoch": 0.014983518130056937,
   "eval_steps": 25,
+  "global_step": 25,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
     {
       "epoch": 0.0005993407252022775,
       "eval_loss": NaN,
+      "eval_runtime": 619.6678,
+      "eval_samples_per_second": 18.14,
+      "eval_steps_per_second": 2.269,
       "step": 1
     },
     {
     {
       "epoch": 0.014983518130056937,
       "eval_loss": NaN,
+      "eval_runtime": 620.162,
+      "eval_samples_per_second": 18.126,
+      "eval_steps_per_second": 2.267,
       "step": 25
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": false
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.829013890367488e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9f3518b5c56f452274c8d035d0612a31cf62121706984847bd1089aad20b8410
 size 6776

 version https://git-lfs.github.com/spec/v1
+oid sha256:1c0f2acb7d5fd05e7352048c13298f1bc785726fd9d87185bd40cad477c92f76
 size 6776