Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +371 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:26d37785ddc8ffc9db9097edeb1ec736ab43261dc5908d8c67fba54a7a3f3f77
 size 767856

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe7d367849eecb8dda941dfe4cc125baa244258d2522b6e8063b56df13f50a07
 size 767856

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aeeba2bdb6761c6aeb443fea9dbf9e6360c4682cdd9a6c30658ab1d2bd1e8273
 size 1601338

 version https://git-lfs.github.com/spec/v1
+oid sha256:70f8d29ab82dcf882a4c0f7f61e641a0d66fd875a85f9ec4fbc2838618655778
 size 1601338

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:af28b0653babcb4ba6d11c0c01e24ea618be8dd5ecdada95d9ec39ca75c1d088
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:d8cfa92e786dc8058a98db4c8d65fae09ff5a09363959f173afe0900a72ea440
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7ad2841b888ce0ae948634757c3fcacf0119c249e0fec8f3ca61ea266369ef92
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:c5d2a6c6aafc669cea03b9634666f204de949a3d45ce2f48a07e7e3eaf18c715
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 7.4686126708984375,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.013448994687647098,
   "eval_steps": 25,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -381,6 +381,372 @@
       "eval_samples_per_second": 45.352,
       "eval_steps_per_second": 6.349,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -409,7 +775,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 43846474924032.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 7.306600570678711,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.026897989375294196,
   "eval_steps": 25,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 45.352,
       "eval_steps_per_second": 6.349,
       "step": 50
+    },
+    {
+      "epoch": 0.01371797458140004,
+      "grad_norm": 2.073488235473633,
+      "learning_rate": 0.0002668315918143169,
+      "loss": 6.6067,
+      "step": 51
+    },
+    {
+      "epoch": 0.013986954475152982,
+      "grad_norm": 1.6959115266799927,
+      "learning_rate": 0.00026526016662852886,
+      "loss": 7.4615,
+      "step": 52
+    },
+    {
+      "epoch": 0.014255934368905925,
+      "grad_norm": 1.1312484741210938,
+      "learning_rate": 0.00026365723046405023,
+      "loss": 7.562,
+      "step": 53
+    },
+    {
+      "epoch": 0.014524914262658867,
+      "grad_norm": 0.8906031847000122,
+      "learning_rate": 0.0002620232215476231,
+      "loss": 7.8548,
+      "step": 54
+    },
+    {
+      "epoch": 0.014793894156411809,
+      "grad_norm": 1.1955941915512085,
+      "learning_rate": 0.0002603585866009697,
+      "loss": 7.5388,
+      "step": 55
+    },
+    {
+      "epoch": 0.01506287405016475,
+      "grad_norm": 1.6056780815124512,
+      "learning_rate": 0.00025866378071866334,
+      "loss": 6.7458,
+      "step": 56
+    },
+    {
+      "epoch": 0.015331853943917692,
+      "grad_norm": 0.9653465151786804,
+      "learning_rate": 0.00025693926724370956,
+      "loss": 7.2338,
+      "step": 57
+    },
+    {
+      "epoch": 0.015600833837670634,
+      "grad_norm": 1.055378794670105,
+      "learning_rate": 0.00025518551764087326,
+      "loss": 7.2497,
+      "step": 58
+    },
+    {
+      "epoch": 0.015869813731423574,
+      "grad_norm": 0.982029378414154,
+      "learning_rate": 0.00025340301136778483,
+      "loss": 6.3055,
+      "step": 59
+    },
+    {
+      "epoch": 0.016138793625176517,
+      "grad_norm": 1.4051647186279297,
+      "learning_rate": 0.00025159223574386114,
+      "loss": 6.7392,
+      "step": 60
+    },
+    {
+      "epoch": 0.01640777351892946,
+      "grad_norm": 1.224535346031189,
+      "learning_rate": 0.0002497536858170772,
+      "loss": 6.2815,
+      "step": 61
+    },
+    {
+      "epoch": 0.0166767534126824,
+      "grad_norm": 0.9965676069259644,
+      "learning_rate": 0.00024788786422862526,
+      "loss": 6.5987,
+      "step": 62
+    },
+    {
+      "epoch": 0.016945733306435343,
+      "grad_norm": 0.7063784599304199,
+      "learning_rate": 0.00024599528107549745,
+      "loss": 6.5176,
+      "step": 63
+    },
+    {
+      "epoch": 0.017214713200188286,
+      "grad_norm": 1.3905584812164307,
+      "learning_rate": 0.00024407645377103054,
+      "loss": 7.3527,
+      "step": 64
+    },
+    {
+      "epoch": 0.017483693093941228,
+      "grad_norm": 0.6794236898422241,
+      "learning_rate": 0.00024213190690345018,
+      "loss": 7.1635,
+      "step": 65
+    },
+    {
+      "epoch": 0.01775267298769417,
+      "grad_norm": 0.85134357213974,
+      "learning_rate": 0.00024016217209245374,
+      "loss": 6.9162,
+      "step": 66
+    },
+    {
+      "epoch": 0.018021652881447112,
+      "grad_norm": 1.1831365823745728,
+      "learning_rate": 0.00023816778784387094,
+      "loss": 7.4826,
+      "step": 67
+    },
+    {
+      "epoch": 0.018290632775200055,
+      "grad_norm": 0.7540268301963806,
+      "learning_rate": 0.0002361492994024415,
+      "loss": 7.5541,
+      "step": 68
+    },
+    {
+      "epoch": 0.018559612668952997,
+      "grad_norm": 0.8305040597915649,
+      "learning_rate": 0.0002341072586027509,
+      "loss": 7.0236,
+      "step": 69
+    },
+    {
+      "epoch": 0.01882859256270594,
+      "grad_norm": 0.8232185244560242,
+      "learning_rate": 0.00023204222371836405,
+      "loss": 7.256,
+      "step": 70
+    },
+    {
+      "epoch": 0.01909757245645888,
+      "grad_norm": 0.8017435669898987,
+      "learning_rate": 0.00022995475930919905,
+      "loss": 7.2468,
+      "step": 71
+    },
+    {
+      "epoch": 0.01936655235021182,
+      "grad_norm": 0.7299101948738098,
+      "learning_rate": 0.00022784543606718227,
+      "loss": 6.8815,
+      "step": 72
+    },
+    {
+      "epoch": 0.019635532243964762,
+      "grad_norm": 0.9603165984153748,
+      "learning_rate": 0.00022571483066022657,
+      "loss": 7.1806,
+      "step": 73
+    },
+    {
+      "epoch": 0.019904512137717705,
+      "grad_norm": 0.7412318587303162,
+      "learning_rate": 0.0002235635255745762,
+      "loss": 6.8687,
+      "step": 74
+    },
+    {
+      "epoch": 0.020173492031470647,
+      "grad_norm": 1.5883235931396484,
+      "learning_rate": 0.00022139210895556104,
+      "loss": 6.6477,
+      "step": 75
+    },
+    {
+      "epoch": 0.020173492031470647,
+      "eval_loss": 7.266612529754639,
+      "eval_runtime": 1.1026,
+      "eval_samples_per_second": 45.347,
+      "eval_steps_per_second": 6.349,
+      "step": 75
+    },
+    {
+      "epoch": 0.02044247192522359,
+      "grad_norm": 0.874472975730896,
+      "learning_rate": 0.00021920117444680317,
+      "loss": 6.5988,
+      "step": 76
+    },
+    {
+      "epoch": 0.02071145181897653,
+      "grad_norm": 1.0724687576293945,
+      "learning_rate": 0.00021699132102792097,
+      "loss": 7.0543,
+      "step": 77
+    },
+    {
+      "epoch": 0.020980431712729473,
+      "grad_norm": 0.7699567079544067,
+      "learning_rate": 0.0002147631528507739,
+      "loss": 7.1625,
+      "step": 78
+    },
+    {
+      "epoch": 0.021249411606482416,
+      "grad_norm": 0.622986912727356,
+      "learning_rate": 0.00021251727907429355,
+      "loss": 7.4872,
+      "step": 79
+    },
+    {
+      "epoch": 0.021518391500235358,
+      "grad_norm": 0.9322075843811035,
+      "learning_rate": 0.0002102543136979454,
+      "loss": 7.0894,
+      "step": 80
+    },
+    {
+      "epoch": 0.0217873713939883,
+      "grad_norm": 0.6225010752677917,
+      "learning_rate": 0.0002079748753938678,
+      "loss": 6.9691,
+      "step": 81
+    },
+    {
+      "epoch": 0.022056351287741242,
+      "grad_norm": 0.9002844095230103,
+      "learning_rate": 0.0002056795873377331,
+      "loss": 6.5822,
+      "step": 82
+    },
+    {
+      "epoch": 0.022325331181494185,
+      "grad_norm": 0.6348511576652527,
+      "learning_rate": 0.00020336907703837748,
+      "loss": 6.8699,
+      "step": 83
+    },
+    {
+      "epoch": 0.022594311075247127,
+      "grad_norm": 0.5231301188468933,
+      "learning_rate": 0.00020104397616624645,
+      "loss": 6.8725,
+      "step": 84
+    },
+    {
+      "epoch": 0.022863290969000066,
+      "grad_norm": 0.68709397315979,
+      "learning_rate": 0.00019870492038070252,
+      "loss": 6.7312,
+      "step": 85
+    },
+    {
+      "epoch": 0.023132270862753008,
+      "grad_norm": 0.47030457854270935,
+      "learning_rate": 0.0001963525491562421,
+      "loss": 6.7572,
+      "step": 86
+    },
+    {
+      "epoch": 0.02340125075650595,
+      "grad_norm": 0.7477748394012451,
+      "learning_rate": 0.0001939875056076697,
+      "loss": 7.014,
+      "step": 87
+    },
+    {
+      "epoch": 0.023670230650258892,
+      "grad_norm": 0.9135372638702393,
+      "learning_rate": 0.00019161043631427666,
+      "loss": 6.8656,
+      "step": 88
+    },
+    {
+      "epoch": 0.023939210544011835,
+      "grad_norm": 0.7729247212409973,
+      "learning_rate": 0.00018922199114307294,
+      "loss": 6.8811,
+      "step": 89
+    },
+    {
+      "epoch": 0.024208190437764777,
+      "grad_norm": 0.8843768835067749,
+      "learning_rate": 0.00018682282307111987,
+      "loss": 6.8623,
+      "step": 90
+    },
+    {
+      "epoch": 0.02447717033151772,
+      "grad_norm": 0.5804451107978821,
+      "learning_rate": 0.00018441358800701273,
+      "loss": 6.8921,
+      "step": 91
+    },
+    {
+      "epoch": 0.02474615022527066,
+      "grad_norm": 0.9439037442207336,
+      "learning_rate": 0.00018199494461156203,
+      "loss": 7.4667,
+      "step": 92
+    },
+    {
+      "epoch": 0.025015130119023603,
+      "grad_norm": 0.6652296185493469,
+      "learning_rate": 0.000179567554117722,
+      "loss": 7.1213,
+      "step": 93
+    },
+    {
+      "epoch": 0.025284110012776546,
+      "grad_norm": 0.9313641786575317,
+      "learning_rate": 0.00017713208014981648,
+      "loss": 6.9055,
+      "step": 94
+    },
+    {
+      "epoch": 0.025553089906529488,
+      "grad_norm": 0.5718141198158264,
+      "learning_rate": 0.00017468918854211007,
+      "loss": 7.4156,
+      "step": 95
+    },
+    {
+      "epoch": 0.02582206980028243,
+      "grad_norm": 0.90057772397995,
+      "learning_rate": 0.00017223954715677627,
+      "loss": 7.0547,
+      "step": 96
+    },
+    {
+      "epoch": 0.026091049694035372,
+      "grad_norm": 0.8088359236717224,
+      "learning_rate": 0.00016978382570131034,
+      "loss": 6.8638,
+      "step": 97
+    },
+    {
+      "epoch": 0.02636002958778831,
+      "grad_norm": 0.7129319310188293,
+      "learning_rate": 0.00016732269554543794,
+      "loss": 7.0757,
+      "step": 98
+    },
+    {
+      "epoch": 0.026629009481541253,
+      "grad_norm": 0.7212324142456055,
+      "learning_rate": 0.00016485682953756942,
+      "loss": 7.3563,
+      "step": 99
+    },
+    {
+      "epoch": 0.026897989375294196,
+      "grad_norm": 1.3920371532440186,
+      "learning_rate": 0.00016238690182084986,
+      "loss": 7.3419,
+      "step": 100
+    },
+    {
+      "epoch": 0.026897989375294196,
+      "eval_loss": 7.306600570678711,
+      "eval_runtime": 1.1005,
+      "eval_samples_per_second": 45.435,
+      "eval_steps_per_second": 6.361,
+      "step": 100
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 87404486197248.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null