Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +371 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:14425cb6f2aed42a774aaf4bae36025555af0fc081a4802b83ef440044be9994
 size 50624

 version https://git-lfs.github.com/spec/v1
+oid sha256:bd828ba53b8c6623ff8e36bd9441bb1d75a54a0e8342e5fd432bb89b65e94dc9
 size 50624

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3bb585da1ebfb31af76699ce96ae9de5fff1f7fb518fe2b87f101abd47eb01b2
 size 118090

 version https://git-lfs.github.com/spec/v1
+oid sha256:7e332a7ee8d83e8bcd0c09d44b4a2c12c420ffad631917b2fa01a1b869e94483
 size 118090

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:251bd011cb18f64cfef2f2d3c817f9ddbdd20f24196ade20b75388f7d65f6f4a
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:e65dacef02e8f16b4107578c296de6f7809ff6786b3688b2253b1e317ee5a998
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7ad2841b888ce0ae948634757c3fcacf0119c249e0fec8f3ca61ea266369ef92
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:c5d2a6c6aafc669cea03b9634666f204de949a3d45ce2f48a07e7e3eaf18c715
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 10.347302436828613,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.0020012607943004092,
   "eval_steps": 25,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -381,6 +381,372 @@
       "eval_samples_per_second": 344.282,
       "eval_steps_per_second": 48.2,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -409,7 +775,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5612706299904.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 10.342674255371094,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.0040025215886008185,
   "eval_steps": 25,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 344.282,
       "eval_steps_per_second": 48.2,
       "step": 50
+    },
+    {
+      "epoch": 0.0020412860101864175,
+      "grad_norm": 0.059315428137779236,
+      "learning_rate": 0.0002668315918143169,
+      "loss": 10.3472,
+      "step": 51
+    },
+    {
+      "epoch": 0.0020813112260724257,
+      "grad_norm": 0.054883792996406555,
+      "learning_rate": 0.00026526016662852886,
+      "loss": 10.3476,
+      "step": 52
+    },
+    {
+      "epoch": 0.002121336441958434,
+      "grad_norm": 0.05184870585799217,
+      "learning_rate": 0.00026365723046405023,
+      "loss": 10.346,
+      "step": 53
+    },
+    {
+      "epoch": 0.002161361657844442,
+      "grad_norm": 0.04580259323120117,
+      "learning_rate": 0.0002620232215476231,
+      "loss": 10.3466,
+      "step": 54
+    },
+    {
+      "epoch": 0.00220138687373045,
+      "grad_norm": 0.03658067807555199,
+      "learning_rate": 0.0002603585866009697,
+      "loss": 10.3447,
+      "step": 55
+    },
+    {
+      "epoch": 0.0022414120896164583,
+      "grad_norm": 0.03653530403971672,
+      "learning_rate": 0.00025866378071866334,
+      "loss": 10.3457,
+      "step": 56
+    },
+    {
+      "epoch": 0.0022814373055024665,
+      "grad_norm": 0.03813878819346428,
+      "learning_rate": 0.00025693926724370956,
+      "loss": 10.3469,
+      "step": 57
+    },
+    {
+      "epoch": 0.0023214625213884748,
+      "grad_norm": 0.026853034272789955,
+      "learning_rate": 0.00025518551764087326,
+      "loss": 10.3462,
+      "step": 58
+    },
+    {
+      "epoch": 0.002361487737274483,
+      "grad_norm": 0.03866073861718178,
+      "learning_rate": 0.00025340301136778483,
+      "loss": 10.3471,
+      "step": 59
+    },
+    {
+      "epoch": 0.0024015129531604913,
+      "grad_norm": 0.03002983331680298,
+      "learning_rate": 0.00025159223574386114,
+      "loss": 10.3458,
+      "step": 60
+    },
+    {
+      "epoch": 0.0024415381690464995,
+      "grad_norm": 0.04265514016151428,
+      "learning_rate": 0.0002497536858170772,
+      "loss": 10.3481,
+      "step": 61
+    },
+    {
+      "epoch": 0.0024815633849325073,
+      "grad_norm": 0.035741109400987625,
+      "learning_rate": 0.00024788786422862526,
+      "loss": 10.3474,
+      "step": 62
+    },
+    {
+      "epoch": 0.0025215886008185156,
+      "grad_norm": 0.030464909970760345,
+      "learning_rate": 0.00024599528107549745,
+      "loss": 10.3462,
+      "step": 63
+    },
+    {
+      "epoch": 0.002561613816704524,
+      "grad_norm": 0.03574507683515549,
+      "learning_rate": 0.00024407645377103054,
+      "loss": 10.3464,
+      "step": 64
+    },
+    {
+      "epoch": 0.002601639032590532,
+      "grad_norm": 0.03380665183067322,
+      "learning_rate": 0.00024213190690345018,
+      "loss": 10.3463,
+      "step": 65
+    },
+    {
+      "epoch": 0.0026416642484765403,
+      "grad_norm": 0.03315335139632225,
+      "learning_rate": 0.00024016217209245374,
+      "loss": 10.3463,
+      "step": 66
+    },
+    {
+      "epoch": 0.0026816894643625485,
+      "grad_norm": 0.037679944187402725,
+      "learning_rate": 0.00023816778784387094,
+      "loss": 10.3445,
+      "step": 67
+    },
+    {
+      "epoch": 0.0027217146802485568,
+      "grad_norm": 0.029793309047818184,
+      "learning_rate": 0.0002361492994024415,
+      "loss": 10.3457,
+      "step": 68
+    },
+    {
+      "epoch": 0.0027617398961345646,
+      "grad_norm": 0.029628276824951172,
+      "learning_rate": 0.0002341072586027509,
+      "loss": 10.3461,
+      "step": 69
+    },
+    {
+      "epoch": 0.002801765112020573,
+      "grad_norm": 0.02894148789346218,
+      "learning_rate": 0.00023204222371836405,
+      "loss": 10.3458,
+      "step": 70
+    },
+    {
+      "epoch": 0.002841790327906581,
+      "grad_norm": 0.025133224204182625,
+      "learning_rate": 0.00022995475930919905,
+      "loss": 10.3462,
+      "step": 71
+    },
+    {
+      "epoch": 0.0028818155437925893,
+      "grad_norm": 0.029297346249222755,
+      "learning_rate": 0.00022784543606718227,
+      "loss": 10.3457,
+      "step": 72
+    },
+    {
+      "epoch": 0.0029218407596785976,
+      "grad_norm": 0.02636672556400299,
+      "learning_rate": 0.00022571483066022657,
+      "loss": 10.3437,
+      "step": 73
+    },
+    {
+      "epoch": 0.002961865975564606,
+      "grad_norm": 0.027142280712723732,
+      "learning_rate": 0.0002235635255745762,
+      "loss": 10.347,
+      "step": 74
+    },
+    {
+      "epoch": 0.003001891191450614,
+      "grad_norm": 0.032178737223148346,
+      "learning_rate": 0.00022139210895556104,
+      "loss": 10.3449,
+      "step": 75
+    },
+    {
+      "epoch": 0.003001891191450614,
+      "eval_loss": 10.344524383544922,
+      "eval_runtime": 0.1491,
+      "eval_samples_per_second": 335.263,
+      "eval_steps_per_second": 46.937,
+      "step": 75
+    },
+    {
+      "epoch": 0.003041916407336622,
+      "grad_norm": 0.03191864490509033,
+      "learning_rate": 0.00021920117444680317,
+      "loss": 10.3465,
+      "step": 76
+    },
+    {
+      "epoch": 0.00308194162322263,
+      "grad_norm": 0.030026139691472054,
+      "learning_rate": 0.00021699132102792097,
+      "loss": 10.3456,
+      "step": 77
+    },
+    {
+      "epoch": 0.0031219668391086384,
+      "grad_norm": 0.030527856200933456,
+      "learning_rate": 0.0002147631528507739,
+      "loss": 10.3456,
+      "step": 78
+    },
+    {
+      "epoch": 0.0031619920549946466,
+      "grad_norm": 0.02526240237057209,
+      "learning_rate": 0.00021251727907429355,
+      "loss": 10.346,
+      "step": 79
+    },
+    {
+      "epoch": 0.003202017270880655,
+      "grad_norm": 0.028264405205845833,
+      "learning_rate": 0.0002102543136979454,
+      "loss": 10.3447,
+      "step": 80
+    },
+    {
+      "epoch": 0.003242042486766663,
+      "grad_norm": 0.03205219283699989,
+      "learning_rate": 0.0002079748753938678,
+      "loss": 10.3443,
+      "step": 81
+    },
+    {
+      "epoch": 0.0032820677026526713,
+      "grad_norm": 0.0387556366622448,
+      "learning_rate": 0.0002056795873377331,
+      "loss": 10.3443,
+      "step": 82
+    },
+    {
+      "epoch": 0.003322092918538679,
+      "grad_norm": 0.02674328349530697,
+      "learning_rate": 0.00020336907703837748,
+      "loss": 10.3467,
+      "step": 83
+    },
+    {
+      "epoch": 0.0033621181344246874,
+      "grad_norm": 0.029292669147253036,
+      "learning_rate": 0.00020104397616624645,
+      "loss": 10.346,
+      "step": 84
+    },
+    {
+      "epoch": 0.0034021433503106956,
+      "grad_norm": 0.02718389220535755,
+      "learning_rate": 0.00019870492038070252,
+      "loss": 10.3449,
+      "step": 85
+    },
+    {
+      "epoch": 0.003442168566196704,
+      "grad_norm": 0.023716706782579422,
+      "learning_rate": 0.0001963525491562421,
+      "loss": 10.3446,
+      "step": 86
+    },
+    {
+      "epoch": 0.003482193782082712,
+      "grad_norm": 0.030224012210965157,
+      "learning_rate": 0.0001939875056076697,
+      "loss": 10.3442,
+      "step": 87
+    },
+    {
+      "epoch": 0.0035222189979687204,
+      "grad_norm": 0.027252135798335075,
+      "learning_rate": 0.00019161043631427666,
+      "loss": 10.3446,
+      "step": 88
+    },
+    {
+      "epoch": 0.0035622442138547286,
+      "grad_norm": 0.031860899180173874,
+      "learning_rate": 0.00018922199114307294,
+      "loss": 10.3461,
+      "step": 89
+    },
+    {
+      "epoch": 0.003602269429740737,
+      "grad_norm": 0.024651000276207924,
+      "learning_rate": 0.00018682282307111987,
+      "loss": 10.3456,
+      "step": 90
+    },
+    {
+      "epoch": 0.0036422946456267447,
+      "grad_norm": 0.021395694464445114,
+      "learning_rate": 0.00018441358800701273,
+      "loss": 10.3475,
+      "step": 91
+    },
+    {
+      "epoch": 0.003682319861512753,
+      "grad_norm": 0.02844615839421749,
+      "learning_rate": 0.00018199494461156203,
+      "loss": 10.3456,
+      "step": 92
+    },
+    {
+      "epoch": 0.003722345077398761,
+      "grad_norm": 0.029111430048942566,
+      "learning_rate": 0.000179567554117722,
+      "loss": 10.3441,
+      "step": 93
+    },
+    {
+      "epoch": 0.0037623702932847694,
+      "grad_norm": 0.04474746063351631,
+      "learning_rate": 0.00017713208014981648,
+      "loss": 10.343,
+      "step": 94
+    },
+    {
+      "epoch": 0.0038023955091707777,
+      "grad_norm": 0.041326723992824554,
+      "learning_rate": 0.00017468918854211007,
+      "loss": 10.3458,
+      "step": 95
+    },
+    {
+      "epoch": 0.003842420725056786,
+      "grad_norm": 0.037037789821624756,
+      "learning_rate": 0.00017223954715677627,
+      "loss": 10.3435,
+      "step": 96
+    },
+    {
+      "epoch": 0.003882445940942794,
+      "grad_norm": 0.049130525439977646,
+      "learning_rate": 0.00016978382570131034,
+      "loss": 10.3454,
+      "step": 97
+    },
+    {
+      "epoch": 0.003922471156828802,
+      "grad_norm": 0.0425594188272953,
+      "learning_rate": 0.00016732269554543794,
+      "loss": 10.3448,
+      "step": 98
+    },
+    {
+      "epoch": 0.003962496372714811,
+      "grad_norm": 0.04153624549508095,
+      "learning_rate": 0.00016485682953756942,
+      "loss": 10.3475,
+      "step": 99
+    },
+    {
+      "epoch": 0.0040025215886008185,
+      "grad_norm": 0.04395274072885513,
+      "learning_rate": 0.00016238690182084986,
+      "loss": 10.3474,
+      "step": 100
+    },
+    {
+      "epoch": 0.0040025215886008185,
+      "eval_loss": 10.342674255371094,
+      "eval_runtime": 0.1446,
+      "eval_samples_per_second": 345.872,
+      "eval_steps_per_second": 48.422,
+      "step": 100
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 11225412599808.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null