Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:588d2be79aab2a633885e96f80d57c67ed3d11dda9968902f426b9d4890b35d6
 size 671149168

 version https://git-lfs.github.com/spec/v1
+oid sha256:398a63a914601e65fd72d955e3ee1636962e1c25746ced4da85c0451cbcc754c
 size 671149168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4c8a9ae1fb4a736e73433d8d2bd00c4019abb28addf3fc70a19612c8a178ea2f
 size 1342555602

 version https://git-lfs.github.com/spec/v1
+oid sha256:27995e8c851b6ed1b65e6635000fe406eba2a896b7919c5074b4510897740b41
 size 1342555602

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e5a6f644270a3c224c532247f2b74d6dc78ff4bb191eb8d7e5e4549800f90be6
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:1c7760b223f3bf0b5bc9dd2b260337eae03f343febddf5bffebf9d6c3538aff8
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7d90116c540b4ff0066495fbccc9c914a568905fb44c6564f227952cc4231b00
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:03ad66011cfc1fc727a51190602a41adc332b48eeef62a5ee87c2ca9f9b90b2b
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.2451000064611435,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.003562776115148924,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 8.062,
       "eval_steps_per_second": 2.017,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -396,12 +754,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.904498393166643e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.23508746922016144,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.007125552230297848,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 8.062,
       "eval_steps_per_second": 2.017,
       "step": 50
+    },
+    {
+      "epoch": 0.0036340316374519027,
+      "grad_norm": 0.9585462808609009,
+      "learning_rate": 2.3816778784387097e-05,
+      "loss": 0.4262,
+      "step": 51
+    },
+    {
+      "epoch": 0.003705287159754881,
+      "grad_norm": 1.3350521326065063,
+      "learning_rate": 2.3263454721781537e-05,
+      "loss": 0.3508,
+      "step": 52
+    },
+    {
+      "epoch": 0.0037765426820578593,
+      "grad_norm": 0.9149147868156433,
+      "learning_rate": 2.2693489161088592e-05,
+      "loss": 0.2918,
+      "step": 53
+    },
+    {
+      "epoch": 0.003847798204360838,
+      "grad_norm": 0.9929086565971375,
+      "learning_rate": 2.210802993709498e-05,
+      "loss": 0.2762,
+      "step": 54
+    },
+    {
+      "epoch": 0.003919053726663816,
+      "grad_norm": 1.152302861213684,
+      "learning_rate": 2.1508256086763372e-05,
+      "loss": 0.4493,
+      "step": 55
+    },
+    {
+      "epoch": 0.003990309248966795,
+      "grad_norm": 0.7821376919746399,
+      "learning_rate": 2.0895375474808857e-05,
+      "loss": 0.3416,
+      "step": 56
+    },
+    {
+      "epoch": 0.004061564771269773,
+      "grad_norm": 0.9475064277648926,
+      "learning_rate": 2.0270622361220143e-05,
+      "loss": 0.2441,
+      "step": 57
+    },
+    {
+      "epoch": 0.004132820293572752,
+      "grad_norm": 1.1275452375411987,
+      "learning_rate": 1.963525491562421e-05,
+      "loss": 0.2919,
+      "step": 58
+    },
+    {
+      "epoch": 0.0042040758158757305,
+      "grad_norm": 1.3180280923843384,
+      "learning_rate": 1.8990552683500128e-05,
+      "loss": 0.3509,
+      "step": 59
+    },
+    {
+      "epoch": 0.004275331338178709,
+      "grad_norm": 0.9340567588806152,
+      "learning_rate": 1.8337814009344716e-05,
+      "loss": 0.3231,
+      "step": 60
+    },
+    {
+      "epoch": 0.004346586860481687,
+      "grad_norm": 0.8491362929344177,
+      "learning_rate": 1.767835342197955e-05,
+      "loss": 0.241,
+      "step": 61
+    },
+    {
+      "epoch": 0.004417842382784666,
+      "grad_norm": 1.1892591714859009,
+      "learning_rate": 1.7013498987264832e-05,
+      "loss": 0.2353,
+      "step": 62
+    },
+    {
+      "epoch": 0.004489097905087645,
+      "grad_norm": 0.8448580503463745,
+      "learning_rate": 1.6344589633551502e-05,
+      "loss": 0.317,
+      "step": 63
+    },
+    {
+      "epoch": 0.0045603534273906225,
+      "grad_norm": 1.1618448495864868,
+      "learning_rate": 1.5672972455257726e-05,
+      "loss": 0.3225,
+      "step": 64
+    },
+    {
+      "epoch": 0.004631608949693601,
+      "grad_norm": 1.12700355052948,
+      "learning_rate": 1.5e-05,
+      "loss": 0.3619,
+      "step": 65
+    },
+    {
+      "epoch": 0.00470286447199658,
+      "grad_norm": 1.1308168172836304,
+      "learning_rate": 1.4327027544742281e-05,
+      "loss": 0.3482,
+      "step": 66
+    },
+    {
+      "epoch": 0.004774119994299558,
+      "grad_norm": 0.7516178488731384,
+      "learning_rate": 1.36554103664485e-05,
+      "loss": 0.1622,
+      "step": 67
+    },
+    {
+      "epoch": 0.004845375516602537,
+      "grad_norm": 1.1769253015518188,
+      "learning_rate": 1.2986501012735174e-05,
+      "loss": 0.2047,
+      "step": 68
+    },
+    {
+      "epoch": 0.004916631038905515,
+      "grad_norm": 0.9646146297454834,
+      "learning_rate": 1.2321646578020452e-05,
+      "loss": 0.2437,
+      "step": 69
+    },
+    {
+      "epoch": 0.004987886561208493,
+      "grad_norm": 1.2298526763916016,
+      "learning_rate": 1.1662185990655285e-05,
+      "loss": 0.4731,
+      "step": 70
+    },
+    {
+      "epoch": 0.005059142083511472,
+      "grad_norm": 0.7508978247642517,
+      "learning_rate": 1.1009447316499875e-05,
+      "loss": 0.1173,
+      "step": 71
+    },
+    {
+      "epoch": 0.005130397605814451,
+      "grad_norm": 1.2414405345916748,
+      "learning_rate": 1.036474508437579e-05,
+      "loss": 0.293,
+      "step": 72
+    },
+    {
+      "epoch": 0.0052016531281174295,
+      "grad_norm": 0.951433539390564,
+      "learning_rate": 9.729377638779859e-06,
+      "loss": 0.1768,
+      "step": 73
+    },
+    {
+      "epoch": 0.005272908650420407,
+      "grad_norm": 1.0032628774642944,
+      "learning_rate": 9.104624525191147e-06,
+      "loss": 0.2115,
+      "step": 74
+    },
+    {
+      "epoch": 0.005344164172723386,
+      "grad_norm": 1.182035207748413,
+      "learning_rate": 8.491743913236629e-06,
+      "loss": 0.2316,
+      "step": 75
+    },
+    {
+      "epoch": 0.005415419695026365,
+      "grad_norm": 1.3811928033828735,
+      "learning_rate": 7.89197006290502e-06,
+      "loss": 0.2767,
+      "step": 76
+    },
+    {
+      "epoch": 0.005486675217329343,
+      "grad_norm": 1.2715567350387573,
+      "learning_rate": 7.30651083891141e-06,
+      "loss": 0.1368,
+      "step": 77
+    },
+    {
+      "epoch": 0.0055579307396323215,
+      "grad_norm": 1.1821553707122803,
+      "learning_rate": 6.736545278218464e-06,
+      "loss": 0.164,
+      "step": 78
+    },
+    {
+      "epoch": 0.0056291862619353,
+      "grad_norm": 1.0368329286575317,
+      "learning_rate": 6.1832212156129045e-06,
+      "loss": 0.2001,
+      "step": 79
+    },
+    {
+      "epoch": 0.005700441784238278,
+      "grad_norm": 0.8645971417427063,
+      "learning_rate": 5.647652972118998e-06,
+      "loss": 0.1707,
+      "step": 80
+    },
+    {
+      "epoch": 0.005771697306541257,
+      "grad_norm": 0.9727141857147217,
+      "learning_rate": 5.130919110904311e-06,
+      "loss": 0.2369,
+      "step": 81
+    },
+    {
+      "epoch": 0.005842952828844236,
+      "grad_norm": 0.7467105388641357,
+      "learning_rate": 4.6340602651970304e-06,
+      "loss": 0.1482,
+      "step": 82
+    },
+    {
+      "epoch": 0.0059142083511472135,
+      "grad_norm": 1.139006495475769,
+      "learning_rate": 4.158077042589129e-06,
+      "loss": 0.1787,
+      "step": 83
+    },
+    {
+      "epoch": 0.005985463873450192,
+      "grad_norm": 1.2143248319625854,
+      "learning_rate": 3.7039280099458373e-06,
+      "loss": 0.2871,
+      "step": 84
+    },
+    {
+      "epoch": 0.006056719395753171,
+      "grad_norm": 2.230907678604126,
+      "learning_rate": 3.272527762979553e-06,
+      "loss": 0.2485,
+      "step": 85
+    },
+    {
+      "epoch": 0.00612797491805615,
+      "grad_norm": 1.4834065437316895,
+      "learning_rate": 2.86474508437579e-06,
+      "loss": 0.2082,
+      "step": 86
+    },
+    {
+      "epoch": 0.006199230440359128,
+      "grad_norm": 1.2078521251678467,
+      "learning_rate": 2.4814011941804603e-06,
+      "loss": 0.205,
+      "step": 87
+    },
+    {
+      "epoch": 0.006270485962662106,
+      "grad_norm": 1.078839659690857,
+      "learning_rate": 2.1232680959720085e-06,
+      "loss": 0.1255,
+      "step": 88
+    },
+    {
+      "epoch": 0.006341741484965085,
+      "grad_norm": 1.7553359270095825,
+      "learning_rate": 1.79106702214893e-06,
+      "loss": 0.271,
+      "step": 89
+    },
+    {
+      "epoch": 0.006412997007268063,
+      "grad_norm": 1.4102157354354858,
+      "learning_rate": 1.4854669814637145e-06,
+      "loss": 0.192,
+      "step": 90
+    },
+    {
+      "epoch": 0.006484252529571042,
+      "grad_norm": 0.9362900853157043,
+      "learning_rate": 1.2070834117282414e-06,
+      "loss": 0.1759,
+      "step": 91
+    },
+    {
+      "epoch": 0.0065555080518740205,
+      "grad_norm": 1.3166675567626953,
+      "learning_rate": 9.56476940403942e-07,
+      "loss": 0.2363,
+      "step": 92
+    },
+    {
+      "epoch": 0.006626763574176998,
+      "grad_norm": 1.0089662075042725,
+      "learning_rate": 7.341522555726971e-07,
+      "loss": 0.2477,
+      "step": 93
+    },
+    {
+      "epoch": 0.006698019096479977,
+      "grad_norm": 1.362122654914856,
+      "learning_rate": 5.405570895622014e-07,
+      "loss": 0.2324,
+      "step": 94
+    },
+    {
+      "epoch": 0.006769274618782956,
+      "grad_norm": 0.8471099734306335,
+      "learning_rate": 3.760813172726457e-07,
+      "loss": 0.1179,
+      "step": 95
+    },
+    {
+      "epoch": 0.006840530141085934,
+      "grad_norm": 1.2971007823944092,
+      "learning_rate": 2.41056171020555e-07,
+      "loss": 0.1556,
+      "step": 96
+    },
+    {
+      "epoch": 0.0069117856633889125,
+      "grad_norm": 1.1084494590759277,
+      "learning_rate": 1.357535734809795e-07,
+      "loss": 0.2148,
+      "step": 97
+    },
+    {
+      "epoch": 0.006983041185691891,
+      "grad_norm": 1.6969143152236938,
+      "learning_rate": 6.038559007141397e-08,
+      "loss": 0.2654,
+      "step": 98
+    },
+    {
+      "epoch": 0.00705429670799487,
+      "grad_norm": 1.321176290512085,
+      "learning_rate": 1.510400188028116e-08,
+      "loss": 0.2057,
+      "step": 99
+    },
+    {
+      "epoch": 0.007125552230297848,
+      "grad_norm": 1.1489577293395996,
+      "learning_rate": 0.0,
+      "loss": 0.1503,
+      "step": 100
+    },
+    {
+      "epoch": 0.007125552230297848,
+      "eval_loss": 0.23508746922016144,
+      "eval_runtime": 733.1631,
+      "eval_samples_per_second": 8.06,
+      "eval_steps_per_second": 2.016,
+      "step": 100
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.808996786333286e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null