Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7f00a359f23f841c35bc7f2c05b50f88fa4ffbafde6529bb8f0f7e934f0d0015
 size 167832240

 version https://git-lfs.github.com/spec/v1
+oid sha256:cbd3af65a1a21661d230da092c1951623375451bc9baa538f5596df6bfd31a35
 size 167832240

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9da25129dd0c067e7e717a6476a567ca1ccd55d4f78cc432d2665932ac0b9445
 size 335922386

 version https://git-lfs.github.com/spec/v1
+oid sha256:7c6ee579cd2527284739dd4c51f9da3ee7749d73c40497777b11422e9214323e
 size 335922386

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:04abeca279b226a6a380f6f6ebf9f5fc6a39afdf329aa2fca2946deb1fed163a
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:171c613a15ea21d0246cef4d9e299db55469335aca7c6fa2134b058abf79ccb5
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1900bd8021f13c38b942ed30aea6e2cea1b47664e4ce28d0276b142334732307
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:f2c4a11c3ec7ace2e963dc6e2b0b5b6372cc0250cefb36d5f7289475908638cb
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.9812448024749756,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 1.3986013986013985,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 12.971,
       "eval_steps_per_second": 3.402,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -396,12 +754,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 7.026908475477197e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.9307088255882263,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 2.797202797202797,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 12.971,
       "eval_steps_per_second": 3.402,
       "step": 50
+    },
+    {
+      "epoch": 1.4265734265734267,
+      "grad_norm": 19.437496185302734,
+      "learning_rate": 2.847932752400164e-06,
+      "loss": 7.9052,
+      "step": 51
+    },
+    {
+      "epoch": 1.4545454545454546,
+      "grad_norm": 26.167152404785156,
+      "learning_rate": 2.761321158169134e-06,
+      "loss": 9.7995,
+      "step": 52
+    },
+    {
+      "epoch": 1.4825174825174825,
+      "grad_norm": 13.2216157913208,
+      "learning_rate": 2.6743911843603134e-06,
+      "loss": 7.3562,
+      "step": 53
+    },
+    {
+      "epoch": 1.5104895104895104,
+      "grad_norm": 15.14995002746582,
+      "learning_rate": 2.587248741756253e-06,
+      "loss": 7.6083,
+      "step": 54
+    },
+    {
+      "epoch": 1.5384615384615383,
+      "grad_norm": 14.735421180725098,
+      "learning_rate": 2.5e-06,
+      "loss": 7.493,
+      "step": 55
+    },
+    {
+      "epoch": 1.5664335664335665,
+      "grad_norm": 16.057226181030273,
+      "learning_rate": 2.4127512582437486e-06,
+      "loss": 7.7143,
+      "step": 56
+    },
+    {
+      "epoch": 1.5944055944055944,
+      "grad_norm": 17.881492614746094,
+      "learning_rate": 2.325608815639687e-06,
+      "loss": 7.9213,
+      "step": 57
+    },
+    {
+      "epoch": 1.6223776223776225,
+      "grad_norm": 18.69293785095215,
+      "learning_rate": 2.238678841830867e-06,
+      "loss": 8.0768,
+      "step": 58
+    },
+    {
+      "epoch": 1.6503496503496504,
+      "grad_norm": 20.132923126220703,
+      "learning_rate": 2.1520672475998374e-06,
+      "loss": 8.4112,
+      "step": 59
+    },
+    {
+      "epoch": 1.6783216783216783,
+      "grad_norm": 22.801490783691406,
+      "learning_rate": 2.0658795558326745e-06,
+      "loss": 8.7676,
+      "step": 60
+    },
+    {
+      "epoch": 1.7062937062937062,
+      "grad_norm": 12.906396865844727,
+      "learning_rate": 1.9802207729556023e-06,
+      "loss": 7.2744,
+      "step": 61
+    },
+    {
+      "epoch": 1.7342657342657342,
+      "grad_norm": 14.026016235351562,
+      "learning_rate": 1.895195261000831e-06,
+      "loss": 7.0761,
+      "step": 62
+    },
+    {
+      "epoch": 1.762237762237762,
+      "grad_norm": 13.737709999084473,
+      "learning_rate": 1.8109066104575023e-06,
+      "loss": 7.4639,
+      "step": 63
+    },
+    {
+      "epoch": 1.7902097902097902,
+      "grad_norm": 15.159075736999512,
+      "learning_rate": 1.7274575140626318e-06,
+      "loss": 7.1799,
+      "step": 64
+    },
+    {
+      "epoch": 1.8181818181818183,
+      "grad_norm": 16.306058883666992,
+      "learning_rate": 1.6449496416858285e-06,
+      "loss": 7.3868,
+      "step": 65
+    },
+    {
+      "epoch": 1.8461538461538463,
+      "grad_norm": 16.63066291809082,
+      "learning_rate": 1.56348351646022e-06,
+      "loss": 7.9422,
+      "step": 66
+    },
+    {
+      "epoch": 1.8741258741258742,
+      "grad_norm": 19.77796173095703,
+      "learning_rate": 1.4831583923105e-06,
+      "loss": 8.3306,
+      "step": 67
+    },
+    {
+      "epoch": 1.902097902097902,
+      "grad_norm": 23.449113845825195,
+      "learning_rate": 1.4040721330273063e-06,
+      "loss": 8.1032,
+      "step": 68
+    },
+    {
+      "epoch": 1.93006993006993,
+      "grad_norm": 13.089604377746582,
+      "learning_rate": 1.3263210930352737e-06,
+      "loss": 6.8976,
+      "step": 69
+    },
+    {
+      "epoch": 1.958041958041958,
+      "grad_norm": 15.323660850524902,
+      "learning_rate": 1.2500000000000007e-06,
+      "loss": 7.1423,
+      "step": 70
+    },
+    {
+      "epoch": 1.986013986013986,
+      "grad_norm": 17.456003189086914,
+      "learning_rate": 1.1752018394169882e-06,
+      "loss": 8.0554,
+      "step": 71
+    },
+    {
+      "epoch": 2.013986013986014,
+      "grad_norm": 17.351194381713867,
+      "learning_rate": 1.1020177413231334e-06,
+      "loss": 7.5417,
+      "step": 72
+    },
+    {
+      "epoch": 2.041958041958042,
+      "grad_norm": 12.989124298095703,
+      "learning_rate": 1.0305368692688175e-06,
+      "loss": 6.9213,
+      "step": 73
+    },
+    {
+      "epoch": 2.06993006993007,
+      "grad_norm": 13.786518096923828,
+      "learning_rate": 9.608463116858544e-07,
+      "loss": 6.993,
+      "step": 74
+    },
+    {
+      "epoch": 2.097902097902098,
+      "grad_norm": 14.11694622039795,
+      "learning_rate": 8.930309757836517e-07,
+      "loss": 7.277,
+      "step": 75
+    },
+    {
+      "epoch": 2.125874125874126,
+      "grad_norm": 15.72767162322998,
+      "learning_rate": 8.271734841028553e-07,
+      "loss": 7.5282,
+      "step": 76
+    },
+    {
+      "epoch": 2.1538461538461537,
+      "grad_norm": 17.46666717529297,
+      "learning_rate": 7.633540738525066e-07,
+      "loss": 7.2148,
+      "step": 77
+    },
+    {
+      "epoch": 2.1818181818181817,
+      "grad_norm": 17.60526466369629,
+      "learning_rate": 7.016504991533727e-07,
+      "loss": 7.6238,
+      "step": 78
+    },
+    {
+      "epoch": 2.20979020979021,
+      "grad_norm": 18.603511810302734,
+      "learning_rate": 6.421379363065142e-07,
+      "loss": 8.0623,
+      "step": 79
+    },
+    {
+      "epoch": 2.237762237762238,
+      "grad_norm": 18.113256454467773,
+      "learning_rate": 5.848888922025553e-07,
+      "loss": 7.6123,
+      "step": 80
+    },
+    {
+      "epoch": 2.265734265734266,
+      "grad_norm": 12.54310417175293,
+      "learning_rate": 5.299731159831953e-07,
+      "loss": 7.2639,
+      "step": 81
+    },
+    {
+      "epoch": 2.2937062937062938,
+      "grad_norm": 14.775126457214355,
+      "learning_rate": 4.774575140626317e-07,
+      "loss": 6.9113,
+      "step": 82
+    },
+    {
+      "epoch": 2.3216783216783217,
+      "grad_norm": 13.778559684753418,
+      "learning_rate": 4.27406068612396e-07,
+      "loss": 7.214,
+      "step": 83
+    },
+    {
+      "epoch": 2.3496503496503496,
+      "grad_norm": 15.231853485107422,
+      "learning_rate": 3.798797596089351e-07,
+      "loss": 7.3562,
+      "step": 84
+    },
+    {
+      "epoch": 2.3776223776223775,
+      "grad_norm": 16.52434539794922,
+      "learning_rate": 3.3493649053890325e-07,
+      "loss": 7.6841,
+      "step": 85
+    },
+    {
+      "epoch": 2.4055944055944054,
+      "grad_norm": 16.546457290649414,
+      "learning_rate": 2.9263101785268253e-07,
+      "loss": 7.2925,
+      "step": 86
+    },
+    {
+      "epoch": 2.4335664335664333,
+      "grad_norm": 20.887779235839844,
+      "learning_rate": 2.53014884252083e-07,
+      "loss": 8.0049,
+      "step": 87
+    },
+    {
+      "epoch": 2.4615384615384617,
+      "grad_norm": 24.01591682434082,
+      "learning_rate": 2.1613635589349756e-07,
+      "loss": 8.0908,
+      "step": 88
+    },
+    {
+      "epoch": 2.4895104895104896,
+      "grad_norm": 13.272303581237793,
+      "learning_rate": 1.8204036358303173e-07,
+      "loss": 6.8586,
+      "step": 89
+    },
+    {
+      "epoch": 2.5174825174825175,
+      "grad_norm": 13.375730514526367,
+      "learning_rate": 1.507684480352292e-07,
+      "loss": 6.9535,
+      "step": 90
+    },
+    {
+      "epoch": 2.5454545454545454,
+      "grad_norm": 15.659381866455078,
+      "learning_rate": 1.223587092621162e-07,
+      "loss": 7.2084,
+      "step": 91
+    },
+    {
+      "epoch": 2.5734265734265733,
+      "grad_norm": 15.646546363830566,
+      "learning_rate": 9.684576015420277e-08,
+      "loss": 6.8886,
+      "step": 92
+    },
+    {
+      "epoch": 2.6013986013986012,
+      "grad_norm": 17.0341796875,
+      "learning_rate": 7.426068431000883e-08,
+      "loss": 7.3088,
+      "step": 93
+    },
+    {
+      "epoch": 2.629370629370629,
+      "grad_norm": 17.291650772094727,
+      "learning_rate": 5.463099816548578e-08,
+      "loss": 7.3454,
+      "step": 94
+    },
+    {
+      "epoch": 2.6573426573426575,
+      "grad_norm": 18.05946922302246,
+      "learning_rate": 3.798061746947995e-08,
+      "loss": 7.7567,
+      "step": 95
+    },
+    {
+      "epoch": 2.6853146853146854,
+      "grad_norm": 16.397621154785156,
+      "learning_rate": 2.4329828146074096e-08,
+      "loss": 7.8379,
+      "step": 96
+    },
+    {
+      "epoch": 2.7132867132867133,
+      "grad_norm": 14.185662269592285,
+      "learning_rate": 1.3695261579316776e-08,
+      "loss": 6.869,
+      "step": 97
+    },
+    {
+      "epoch": 2.7412587412587412,
+      "grad_norm": 13.866539001464844,
+      "learning_rate": 6.089874350439507e-09,
+      "loss": 6.8378,
+      "step": 98
+    },
+    {
+      "epoch": 2.769230769230769,
+      "grad_norm": 15.03514575958252,
+      "learning_rate": 1.5229324522605949e-09,
+      "loss": 7.0574,
+      "step": 99
+    },
+    {
+      "epoch": 2.797202797202797,
+      "grad_norm": 15.289778709411621,
+      "learning_rate": 0.0,
+      "loss": 7.0866,
+      "step": 100
+    },
+    {
+      "epoch": 2.797202797202797,
+      "eval_loss": 0.9307088255882263,
+      "eval_runtime": 4.6988,
+      "eval_samples_per_second": 12.982,
+      "eval_steps_per_second": 3.405,
+      "step": 100
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.4053816950954394e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null