Training in progress, step 50, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +189 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2637669bcc380b32d8bf04a0f7a9ea82381a2cccca6382a967e6b7ae11bfa66e
 size 432223744

 version https://git-lfs.github.com/spec/v1
+oid sha256:49de1588b27d5375c1465e90ccc25640db36e999391a3c61d5e002112a1b7d16
 size 432223744

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:005b9787fe114af93b615c537df6c767794502ab4563f0a2965d5c2606f62b82
 size 864785974

 version https://git-lfs.github.com/spec/v1
+oid sha256:e78fd505e62291583c71a0b8a9ff2316547d328efa67b17507e3f58612718e17
 size 864785974

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:33aab6001c5fbfa9dd31365fbe1de313ed661d328437f142df866cc0c247c640
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:c2a3545180fe8665d3b1efe975ccb776db717312a908ea084274a79c64a6d14a
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:90153d1b05ae91a8b0ab5c6e14d953c84cd18ab5a3267b6d939534e4bc6e3689
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:a554ffbebe6d60d65a3f2ce8fe1f053a9d3bd1d51783c095bf0e1c1321cebdf1
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d1ee6099c5c030a70e2ffcf778cb3a53de3ce0485927b9ca6f750ed52ddb4ae5
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:358ea0be3ae16bb1e78f80915fb55b2b60141a082990322dc496e2ae578e1a4b
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5b1d21ef87415263bc930d7066c249347c5e6694ff6fc220fef6a39c42b0efde
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:eb41e6b7d16e5b8a1f86f01874910c500cbbebf88581bbdc0a53b03330c0f7b9
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d271cdb95f63cd655315f063ca2e25c78dc5ae4275523c5d4f80f367586b3351
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:5607f6de446164d9d9adb8b91c44cec55b14aa391e24ba5637c08b834eedda2a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 3.733593702316284,
-  "best_model_checkpoint": "miner_id_24/checkpoint-25",
-  "epoch": 0.0048100626510660304,
   "eval_steps": 25,
-  "global_step": 25,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -198,6 +198,189 @@
       "eval_samples_per_second": 14.713,
       "eval_steps_per_second": 3.825,
       "step": 25
     }
   ],
   "logging_steps": 1,
@@ -221,12 +404,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.315690861756416e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 3.511137008666992,
+  "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 0.009620125302132061,
   "eval_steps": 25,
+  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 14.713,
       "eval_steps_per_second": 3.825,
       "step": 25
+    },
+    {
+      "epoch": 0.005002465157108672,
+      "grad_norm": 4.074855804443359,
+      "learning_rate": 5.500000000000001e-05,
+      "loss": 2.7415,
+      "step": 26
+    },
+    {
+      "epoch": 0.005194867663151312,
+      "grad_norm": 5.183741092681885,
+      "learning_rate": 5.205685918464356e-05,
+      "loss": 3.1624,
+      "step": 27
+    },
+    {
+      "epoch": 0.0053872701691939535,
+      "grad_norm": 5.362056255340576,
+      "learning_rate": 4.912632135009769e-05,
+      "loss": 3.3544,
+      "step": 28
+    },
+    {
+      "epoch": 0.005579672675236595,
+      "grad_norm": 4.090317726135254,
+      "learning_rate": 4.6220935509274235e-05,
+      "loss": 3.569,
+      "step": 29
+    },
+    {
+      "epoch": 0.005772075181279236,
+      "grad_norm": 3.477642297744751,
+      "learning_rate": 4.3353142970386564e-05,
+      "loss": 3.5337,
+      "step": 30
+    },
+    {
+      "epoch": 0.0059644776873218775,
+      "grad_norm": 3.0103671550750732,
+      "learning_rate": 4.053522406135775e-05,
+      "loss": 3.6925,
+      "step": 31
+    },
+    {
+      "epoch": 0.006156880193364519,
+      "grad_norm": 2.481553792953491,
+      "learning_rate": 3.777924554357096e-05,
+      "loss": 3.7062,
+      "step": 32
+    },
+    {
+      "epoch": 0.00634928269940716,
+      "grad_norm": 2.371243476867676,
+      "learning_rate": 3.509700894014496e-05,
+      "loss": 3.7112,
+      "step": 33
+    },
+    {
+      "epoch": 0.006541685205449801,
+      "grad_norm": 2.5417237281799316,
+      "learning_rate": 3.250000000000001e-05,
+      "loss": 4.0359,
+      "step": 34
+    },
+    {
+      "epoch": 0.006734087711492442,
+      "grad_norm": 2.821629762649536,
+      "learning_rate": 2.9999339514117912e-05,
+      "loss": 4.0407,
+      "step": 35
+    },
+    {
+      "epoch": 0.006926490217535083,
+      "grad_norm": 3.3124289512634277,
+      "learning_rate": 2.760573569460757e-05,
+      "loss": 4.1073,
+      "step": 36
+    },
+    {
+      "epoch": 0.007118892723577725,
+      "grad_norm": 5.673077583312988,
+      "learning_rate": 2.53294383204969e-05,
+      "loss": 4.4741,
+      "step": 37
+    },
+    {
+      "epoch": 0.007311295229620366,
+      "grad_norm": 1.9838043451309204,
+      "learning_rate": 2.3180194846605367e-05,
+      "loss": 2.7683,
+      "step": 38
+    },
+    {
+      "epoch": 0.007503697735663007,
+      "grad_norm": 2.109670877456665,
+      "learning_rate": 2.1167208663446025e-05,
+      "loss": 2.7531,
+      "step": 39
+    },
+    {
+      "epoch": 0.0076961002417056485,
+      "grad_norm": 2.0147898197174072,
+      "learning_rate": 1.9299099686894423e-05,
+      "loss": 2.9084,
+      "step": 40
+    },
+    {
+      "epoch": 0.007888502747748289,
+      "grad_norm": 2.1483073234558105,
+      "learning_rate": 1.758386744638546e-05,
+      "loss": 3.1465,
+      "step": 41
+    },
+    {
+      "epoch": 0.00808090525379093,
+      "grad_norm": 1.920487403869629,
+      "learning_rate": 1.602885682970026e-05,
+      "loss": 3.1783,
+      "step": 42
+    },
+    {
+      "epoch": 0.008273307759833572,
+      "grad_norm": 2.0920512676239014,
+      "learning_rate": 1.464072663102903e-05,
+      "loss": 3.47,
+      "step": 43
+    },
+    {
+      "epoch": 0.008465710265876213,
+      "grad_norm": 2.128572940826416,
+      "learning_rate": 1.3425421036992098e-05,
+      "loss": 3.6677,
+      "step": 44
+    },
+    {
+      "epoch": 0.008658112771918854,
+      "grad_norm": 2.0647780895233154,
+      "learning_rate": 1.2388144172720251e-05,
+      "loss": 3.5179,
+      "step": 45
+    },
+    {
+      "epoch": 0.008850515277961496,
+      "grad_norm": 2.5102503299713135,
+      "learning_rate": 1.1533337816991932e-05,
+      "loss": 3.6965,
+      "step": 46
+    },
+    {
+      "epoch": 0.009042917784004137,
+      "grad_norm": 2.516993761062622,
+      "learning_rate": 1.0864662381854632e-05,
+      "loss": 3.8702,
+      "step": 47
+    },
+    {
+      "epoch": 0.009235320290046778,
+      "grad_norm": 3.2718374729156494,
+      "learning_rate": 1.0384981238178534e-05,
+      "loss": 4.1012,
+      "step": 48
+    },
+    {
+      "epoch": 0.00942772279608942,
+      "grad_norm": 3.6114237308502197,
+      "learning_rate": 1.0096348454262845e-05,
+      "loss": 4.1246,
+      "step": 49
+    },
+    {
+      "epoch": 0.009620125302132061,
+      "grad_norm": 6.421860218048096,
+      "learning_rate": 1e-05,
+      "loss": 4.5367,
+      "step": 50
+    },
+    {
+      "epoch": 0.009620125302132061,
+      "eval_loss": 3.511137008666992,
+      "eval_runtime": 3.4049,
+      "eval_samples_per_second": 14.685,
+      "eval_steps_per_second": 3.818,
+      "step": 50
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 6.631381723512832e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null