Training in progress, step 50, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +189 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ff36cb5b13ab72618011d489e51482a332f05a5b45061c4b099deeb720ab1c62
 size 319876032

 version https://git-lfs.github.com/spec/v1
+oid sha256:34d07eb5380e96d5e791a3ecd753179c7acb95944feec557d098cf6b6d543f63
 size 319876032

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e8763481db7be9f326264a81beedbff3e9bcdb07301124b357b633913f222735
 size 640009682

 version https://git-lfs.github.com/spec/v1
+oid sha256:99143540cf3ccfc9e2208eb18f6ff593f26140a559e60951363b707a1a477405
 size 640009682

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2582681deddb4536c67af0b6e0a9d2fa4c7e6d140569ce9907495f5c249a47ea
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:eef94ecc4f6620d80a5df52c9e2c8bb2dde70d4bc37bcb690ab095c59d937d5d
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:88e9fdfe5e814903c75696aac5773ec597bca67dd3b79be38ccdfe8aecf08070
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:4d0b679507c870d9fb220882cbb4c9c656af0671c31434c7d90079993b797f53
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9e54566635f21f926f14ad429291289b3d44e248f4a6883045d7883e90d57ddd
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:e7d6ebdbab45feeede7eb7e01c400981df9bbd491ecd5477f14330cdbe7cf123
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ab6c89d6719982a35b4026234cc3804240728bedad333420d2706a553627ae65
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:a635cb715f9a83ae2e7e30ede64cf9b1b72d05ad6e78c7ca219107623d333b9f
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d271cdb95f63cd655315f063ca2e25c78dc5ae4275523c5d4f80f367586b3351
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:5607f6de446164d9d9adb8b91c44cec55b14aa391e24ba5637c08b834eedda2a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.1073193550109863,
-  "best_model_checkpoint": "miner_id_24/checkpoint-25",
-  "epoch": 0.012519953676171399,
   "eval_steps": 25,
-  "global_step": 25,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -198,6 +198,189 @@
       "eval_samples_per_second": 12.522,
       "eval_steps_per_second": 3.256,
       "step": 25
     }
   ],
   "logging_steps": 1,
@@ -221,12 +404,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.9582754301188506e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.0611395835876465,
+  "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 0.025039907352342797,
   "eval_steps": 25,
+  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 12.522,
       "eval_steps_per_second": 3.256,
       "step": 25
+    },
+    {
+      "epoch": 0.013020751823218255,
+      "grad_norm": 6.128429889678955,
+      "learning_rate": 5.500000000000001e-05,
+      "loss": 20.2164,
+      "step": 26
+    },
+    {
+      "epoch": 0.01352154997026511,
+      "grad_norm": 7.410825729370117,
+      "learning_rate": 5.205685918464356e-05,
+      "loss": 20.2266,
+      "step": 27
+    },
+    {
+      "epoch": 0.014022348117311966,
+      "grad_norm": 4.50378942489624,
+      "learning_rate": 4.912632135009769e-05,
+      "loss": 17.0934,
+      "step": 28
+    },
+    {
+      "epoch": 0.014523146264358822,
+      "grad_norm": 4.2456464767456055,
+      "learning_rate": 4.6220935509274235e-05,
+      "loss": 17.7946,
+      "step": 29
+    },
+    {
+      "epoch": 0.015023944411405678,
+      "grad_norm": 3.301119327545166,
+      "learning_rate": 4.3353142970386564e-05,
+      "loss": 19.8457,
+      "step": 30
+    },
+    {
+      "epoch": 0.015524742558452534,
+      "grad_norm": 3.3931143283843994,
+      "learning_rate": 4.053522406135775e-05,
+      "loss": 17.4589,
+      "step": 31
+    },
+    {
+      "epoch": 0.01602554070549939,
+      "grad_norm": 3.908799409866333,
+      "learning_rate": 3.777924554357096e-05,
+      "loss": 17.9799,
+      "step": 32
+    },
+    {
+      "epoch": 0.016526338852546247,
+      "grad_norm": 4.308777809143066,
+      "learning_rate": 3.509700894014496e-05,
+      "loss": 18.2389,
+      "step": 33
+    },
+    {
+      "epoch": 0.0170271369995931,
+      "grad_norm": 5.028868198394775,
+      "learning_rate": 3.250000000000001e-05,
+      "loss": 18.8644,
+      "step": 34
+    },
+    {
+      "epoch": 0.01752793514663996,
+      "grad_norm": 5.518270492553711,
+      "learning_rate": 2.9999339514117912e-05,
+      "loss": 17.9908,
+      "step": 35
+    },
+    {
+      "epoch": 0.018028733293686813,
+      "grad_norm": 4.996335506439209,
+      "learning_rate": 2.760573569460757e-05,
+      "loss": 15.7478,
+      "step": 36
+    },
+    {
+      "epoch": 0.01852953144073367,
+      "grad_norm": 6.510356426239014,
+      "learning_rate": 2.53294383204969e-05,
+      "loss": 16.2257,
+      "step": 37
+    },
+    {
+      "epoch": 0.019030329587780524,
+      "grad_norm": 5.005591869354248,
+      "learning_rate": 2.3180194846605367e-05,
+      "loss": 21.489,
+      "step": 38
+    },
+    {
+      "epoch": 0.019531127734827382,
+      "grad_norm": 9.856449127197266,
+      "learning_rate": 2.1167208663446025e-05,
+      "loss": 21.2883,
+      "step": 39
+    },
+    {
+      "epoch": 0.020031925881874236,
+      "grad_norm": 3.623931646347046,
+      "learning_rate": 1.9299099686894423e-05,
+      "loss": 17.6629,
+      "step": 40
+    },
+    {
+      "epoch": 0.020532724028921093,
+      "grad_norm": 3.4106252193450928,
+      "learning_rate": 1.758386744638546e-05,
+      "loss": 15.4309,
+      "step": 41
+    },
+    {
+      "epoch": 0.021033522175967947,
+      "grad_norm": 4.296844005584717,
+      "learning_rate": 1.602885682970026e-05,
+      "loss": 17.6033,
+      "step": 42
+    },
+    {
+      "epoch": 0.021534320323014805,
+      "grad_norm": 4.559662342071533,
+      "learning_rate": 1.464072663102903e-05,
+      "loss": 16.8699,
+      "step": 43
+    },
+    {
+      "epoch": 0.02203511847006166,
+      "grad_norm": 4.427708625793457,
+      "learning_rate": 1.3425421036992098e-05,
+      "loss": 17.4621,
+      "step": 44
+    },
+    {
+      "epoch": 0.022535916617108517,
+      "grad_norm": 3.9432215690612793,
+      "learning_rate": 1.2388144172720251e-05,
+      "loss": 17.8905,
+      "step": 45
+    },
+    {
+      "epoch": 0.023036714764155374,
+      "grad_norm": 3.188026189804077,
+      "learning_rate": 1.1533337816991932e-05,
+      "loss": 17.3133,
+      "step": 46
+    },
+    {
+      "epoch": 0.02353751291120223,
+      "grad_norm": 3.24480938911438,
+      "learning_rate": 1.0864662381854632e-05,
+      "loss": 16.5617,
+      "step": 47
+    },
+    {
+      "epoch": 0.024038311058249086,
+      "grad_norm": 3.795017719268799,
+      "learning_rate": 1.0384981238178534e-05,
+      "loss": 16.9529,
+      "step": 48
+    },
+    {
+      "epoch": 0.02453910920529594,
+      "grad_norm": 3.4241626262664795,
+      "learning_rate": 1.0096348454262845e-05,
+      "loss": 14.9861,
+      "step": 49
+    },
+    {
+      "epoch": 0.025039907352342797,
+      "grad_norm": 5.961770057678223,
+      "learning_rate": 1e-05,
+      "loss": 17.1319,
+      "step": 50
+    },
+    {
+      "epoch": 0.025039907352342797,
+      "eval_loss": 1.0611395835876465,
+      "eval_runtime": 3.8308,
+      "eval_samples_per_second": 13.052,
+      "eval_steps_per_second": 3.394,
+      "step": 50
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 5.9050464574754e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null