Training in progress, step 50, checkpoint

Browse files

Files changed (4) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +187 -4

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e22e51d643ca8d5785f7636575ee2ec2bd641fba9c2b1a031735d9f095d47f34
 size 395561332

 version https://git-lfs.github.com/spec/v1
+oid sha256:130297698c956b4f7fda0709e82e7e4434c4d9ec1647ec2153c4134610be3fb5
 size 395561332

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:86518eff8d2e6d7850888c057f264ffdccacb58972cd3cc34ea991e145c0c843
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:105ba386ee3a088d812badd92d28ff5c82411293cdecdbf2ae7ecee2500350ff
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:88a45edbc56b9a0cdffa656f785d1ae35c7da6062bcf2f201460ce16e0abd32b
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:ff0b7924e6c0ec80013d291ef745a1d1d8bcaa3192c982bb7f9fd4b651f17d67
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": NaN,
   "best_model_checkpoint": "miner_id_24/checkpoint-25",
-  "epoch": 0.08035355564483729,
   "eval_steps": 25,
-  "global_step": 25,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -198,6 +198,189 @@
       "eval_samples_per_second": 10.853,
       "eval_steps_per_second": 5.437,
       "step": 25
     }
   ],
   "logging_steps": 1,
@@ -212,7 +395,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
@@ -226,7 +409,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.021396447854592e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": NaN,
   "best_model_checkpoint": "miner_id_24/checkpoint-25",
+  "epoch": 0.16070711128967458,
   "eval_steps": 25,
+  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 10.853,
       "eval_steps_per_second": 5.437,
       "step": 25
+    },
+    {
+      "epoch": 0.08356769787063077,
+      "grad_norm": NaN,
+      "learning_rate": 0.00017155989607441213,
+      "loss": 0.0,
+      "step": 26
+    },
+    {
+      "epoch": 0.08678184009642427,
+      "grad_norm": NaN,
+      "learning_rate": 0.00016810158587867973,
+      "loss": 0.0,
+      "step": 27
+    },
+    {
+      "epoch": 0.08999598232221775,
+      "grad_norm": NaN,
+      "learning_rate": 0.00016448422127361706,
+      "loss": 0.0,
+      "step": 28
+    },
+    {
+      "epoch": 0.09321012454801125,
+      "grad_norm": NaN,
+      "learning_rate": 0.00016071625078187114,
+      "loss": 0.0,
+      "step": 29
+    },
+    {
+      "epoch": 0.09642426677380474,
+      "grad_norm": NaN,
+      "learning_rate": 0.00015680647467311557,
+      "loss": 0.0,
+      "step": 30
+    },
+    {
+      "epoch": 0.09963840899959824,
+      "grad_norm": NaN,
+      "learning_rate": 0.0001527640244106133,
+      "loss": 0.0,
+      "step": 31
+    },
+    {
+      "epoch": 0.10285255122539172,
+      "grad_norm": NaN,
+      "learning_rate": 0.0001485983413242606,
+      "loss": 0.0,
+      "step": 32
+    },
+    {
+      "epoch": 0.10606669345118522,
+      "grad_norm": NaN,
+      "learning_rate": 0.00014431915455992414,
+      "loss": 0.0,
+      "step": 33
+    },
+    {
+      "epoch": 0.1092808356769787,
+      "grad_norm": NaN,
+      "learning_rate": 0.00013993645835656953,
+      "loss": 0.0,
+      "step": 34
+    },
+    {
+      "epoch": 0.1124949779027722,
+      "grad_norm": NaN,
+      "learning_rate": 0.00013546048870425356,
+      "loss": 0.0,
+      "step": 35
+    },
+    {
+      "epoch": 0.11570912012856568,
+      "grad_norm": NaN,
+      "learning_rate": 0.00013090169943749476,
+      "loss": 0.0,
+      "step": 36
+    },
+    {
+      "epoch": 0.11892326235435918,
+      "grad_norm": NaN,
+      "learning_rate": 0.0001262707378198587,
+      "loss": 0.0,
+      "step": 37
+    },
+    {
+      "epoch": 0.12213740458015267,
+      "grad_norm": NaN,
+      "learning_rate": 0.00012157841967678063,
+      "loss": 0.0,
+      "step": 38
+    },
+    {
+      "epoch": 0.12535154680594615,
+      "grad_norm": NaN,
+      "learning_rate": 0.00011683570413470383,
+      "loss": 0.0,
+      "step": 39
+    },
+    {
+      "epoch": 0.12856568903173965,
+      "grad_norm": NaN,
+      "learning_rate": 0.0001120536680255323,
+      "loss": 0.0,
+      "step": 40
+    },
+    {
+      "epoch": 0.13177983125753315,
+      "grad_norm": NaN,
+      "learning_rate": 0.00010724348001617625,
+      "loss": 0.0,
+      "step": 41
+    },
+    {
+      "epoch": 0.13499397348332665,
+      "grad_norm": NaN,
+      "learning_rate": 0.00010241637452361323,
+      "loss": 0.0,
+      "step": 42
+    },
+    {
+      "epoch": 0.13820811570912012,
+      "grad_norm": NaN,
+      "learning_rate": 9.75836254763868e-05,
+      "loss": 0.0,
+      "step": 43
+    },
+    {
+      "epoch": 0.14142225793491361,
+      "grad_norm": NaN,
+      "learning_rate": 9.275651998382377e-05,
+      "loss": 0.0,
+      "step": 44
+    },
+    {
+      "epoch": 0.1446364001607071,
+      "grad_norm": NaN,
+      "learning_rate": 8.79463319744677e-05,
+      "loss": 0.0,
+      "step": 45
+    },
+    {
+      "epoch": 0.1478505423865006,
+      "grad_norm": NaN,
+      "learning_rate": 8.316429586529615e-05,
+      "loss": 0.0,
+      "step": 46
+    },
+    {
+      "epoch": 0.15106468461229408,
+      "grad_norm": NaN,
+      "learning_rate": 7.84215803232194e-05,
+      "loss": 0.0,
+      "step": 47
+    },
+    {
+      "epoch": 0.15427882683808758,
+      "grad_norm": NaN,
+      "learning_rate": 7.372926218014131e-05,
+      "loss": 0.0,
+      "step": 48
+    },
+    {
+      "epoch": 0.15749296906388108,
+      "grad_norm": NaN,
+      "learning_rate": 6.909830056250527e-05,
+      "loss": 0.0,
+      "step": 49
+    },
+    {
+      "epoch": 0.16070711128967458,
+      "grad_norm": NaN,
+      "learning_rate": 6.453951129574644e-05,
+      "loss": 0.0,
+      "step": 50
+    },
+    {
+      "epoch": 0.16070711128967458,
+      "eval_loss": NaN,
+      "eval_runtime": 48.3643,
+      "eval_samples_per_second": 10.855,
+      "eval_steps_per_second": 5.438,
+      "step": 50
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 6.042792895709184e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null