Training in progress, step 174, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +417 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:898721c74f3f6db9754d0e51763590d601ef6f6aa1ea960702dec11ce90f7aa8
 size 97307544

 version https://git-lfs.github.com/spec/v1
+oid sha256:abcde8ac3d04c98a861549359d1224345d51b37788f3b8a385d055e42467481e
 size 97307544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e3bbc704f2d554dba80c269377a8d05a954a71224277b51a710ffc1c8c2bcf47
 size 194840426

 version https://git-lfs.github.com/spec/v1
+oid sha256:e12dbbc77c83e3643e10bdd747b43283d299a4a0443c375284bff3cb8b034d78
 size 194840426

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1b16142a47bc7a326beb92cd8c6e770378643df7060bf0910e73d114e53bbb34
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:6885c98f9944031b1d47a617f1a2d46af56909da93ca8c4ac4a873f90d3142fe
 size 14512

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9d7200ae41ef4ca4e25b400cb2c31ee104706220a889be74188adae29a4f900f
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:3901384dd9ab7f4272cbe89ec0e7d7be7b55f7e04d725cfcd750d27555d4c8c0
 size 14512

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ab7b6fce0ffd68999dfaed3e79782a798f64a449077f4f179771fda8a7a023e3
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:1cfc363eda5dfe78796b361134c848de53d3bd2047f481ddb99265e158e573b4
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.5032537960954447,
   "eval_steps": 58,
-  "global_step": 116,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -843,6 +843,420 @@
       "eval_samples_per_second": 8.853,
       "eval_steps_per_second": 2.23,
       "step": 116
     }
   ],
   "logging_steps": 1,
@@ -862,7 +1276,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.5936142383290778e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.754880694143167,
   "eval_steps": 58,
+  "global_step": 174,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 8.853,
       "eval_steps_per_second": 2.23,
       "step": 116
+    },
+    {
+      "epoch": 0.5075921908893709,
+      "grad_norm": 1.3170753717422485,
+      "learning_rate": 0.00013907311284892736,
+      "loss": 2.9572,
+      "step": 117
+    },
+    {
+      "epoch": 0.5119305856832972,
+      "grad_norm": 1.5243107080459595,
+      "learning_rate": 0.00013746065934159123,
+      "loss": 3.3082,
+      "step": 118
+    },
+    {
+      "epoch": 0.5162689804772235,
+      "grad_norm": 1.5845880508422852,
+      "learning_rate": 0.00013583679495453,
+      "loss": 3.4819,
+      "step": 119
+    },
+    {
+      "epoch": 0.5206073752711496,
+      "grad_norm": 1.66307532787323,
+      "learning_rate": 0.00013420201433256689,
+      "loss": 3.1131,
+      "step": 120
+    },
+    {
+      "epoch": 0.5249457700650759,
+      "grad_norm": 1.6470588445663452,
+      "learning_rate": 0.00013255681544571568,
+      "loss": 3.2847,
+      "step": 121
+    },
+    {
+      "epoch": 0.5292841648590022,
+      "grad_norm": 2.1118075847625732,
+      "learning_rate": 0.00013090169943749476,
+      "loss": 3.4669,
+      "step": 122
+    },
+    {
+      "epoch": 0.5336225596529284,
+      "grad_norm": 2.056396722793579,
+      "learning_rate": 0.00012923717047227368,
+      "loss": 3.1136,
+      "step": 123
+    },
+    {
+      "epoch": 0.5379609544468547,
+      "grad_norm": 2.2389657497406006,
+      "learning_rate": 0.0001275637355816999,
+      "loss": 2.9323,
+      "step": 124
+    },
+    {
+      "epoch": 0.5422993492407809,
+      "grad_norm": 2.863621711730957,
+      "learning_rate": 0.00012588190451025207,
+      "loss": 2.9585,
+      "step": 125
+    },
+    {
+      "epoch": 0.5466377440347071,
+      "grad_norm": 0.8712321519851685,
+      "learning_rate": 0.00012419218955996676,
+      "loss": 3.1439,
+      "step": 126
+    },
+    {
+      "epoch": 0.5509761388286334,
+      "grad_norm": 1.0713740587234497,
+      "learning_rate": 0.0001224951054343865,
+      "loss": 3.2213,
+      "step": 127
+    },
+    {
+      "epoch": 0.5553145336225597,
+      "grad_norm": 1.104315996170044,
+      "learning_rate": 0.00012079116908177593,
+      "loss": 3.4522,
+      "step": 128
+    },
+    {
+      "epoch": 0.559652928416486,
+      "grad_norm": 1.0883917808532715,
+      "learning_rate": 0.00011908089953765449,
+      "loss": 3.3503,
+      "step": 129
+    },
+    {
+      "epoch": 0.5639913232104121,
+      "grad_norm": 1.0000834465026855,
+      "learning_rate": 0.00011736481776669306,
+      "loss": 3.4036,
+      "step": 130
+    },
+    {
+      "epoch": 0.5683297180043384,
+      "grad_norm": 0.8869354128837585,
+      "learning_rate": 0.0001156434465040231,
+      "loss": 3.2749,
+      "step": 131
+    },
+    {
+      "epoch": 0.5726681127982647,
+      "grad_norm": 0.8651937246322632,
+      "learning_rate": 0.00011391731009600654,
+      "loss": 3.3679,
+      "step": 132
+    },
+    {
+      "epoch": 0.5770065075921909,
+      "grad_norm": 0.9174556136131287,
+      "learning_rate": 0.00011218693434051475,
+      "loss": 3.311,
+      "step": 133
+    },
+    {
+      "epoch": 0.5813449023861171,
+      "grad_norm": 0.930533230304718,
+      "learning_rate": 0.00011045284632676536,
+      "loss": 3.3761,
+      "step": 134
+    },
+    {
+      "epoch": 0.5856832971800434,
+      "grad_norm": 0.9851680994033813,
+      "learning_rate": 0.00010871557427476583,
+      "loss": 3.2752,
+      "step": 135
+    },
+    {
+      "epoch": 0.5900216919739696,
+      "grad_norm": 0.9633740782737732,
+      "learning_rate": 0.00010697564737441252,
+      "loss": 3.2373,
+      "step": 136
+    },
+    {
+      "epoch": 0.5943600867678959,
+      "grad_norm": 1.132585048675537,
+      "learning_rate": 0.0001052335956242944,
+      "loss": 3.2323,
+      "step": 137
+    },
+    {
+      "epoch": 0.5986984815618221,
+      "grad_norm": 1.1232091188430786,
+      "learning_rate": 0.00010348994967025012,
+      "loss": 3.2874,
+      "step": 138
+    },
+    {
+      "epoch": 0.6030368763557483,
+      "grad_norm": 1.2559125423431396,
+      "learning_rate": 0.00010174524064372837,
+      "loss": 3.2367,
+      "step": 139
+    },
+    {
+      "epoch": 0.6073752711496746,
+      "grad_norm": 1.2623041868209839,
+      "learning_rate": 0.0001,
+      "loss": 3.2243,
+      "step": 140
+    },
+    {
+      "epoch": 0.6117136659436009,
+      "grad_norm": 1.3554457426071167,
+      "learning_rate": 9.825475935627165e-05,
+      "loss": 3.4802,
+      "step": 141
+    },
+    {
+      "epoch": 0.6160520607375272,
+      "grad_norm": 1.4170132875442505,
+      "learning_rate": 9.651005032974994e-05,
+      "loss": 3.354,
+      "step": 142
+    },
+    {
+      "epoch": 0.6203904555314533,
+      "grad_norm": 1.4309097528457642,
+      "learning_rate": 9.476640437570562e-05,
+      "loss": 3.1352,
+      "step": 143
+    },
+    {
+      "epoch": 0.6247288503253796,
+      "grad_norm": 1.5829153060913086,
+      "learning_rate": 9.302435262558747e-05,
+      "loss": 3.2455,
+      "step": 144
+    },
+    {
+      "epoch": 0.6290672451193059,
+      "grad_norm": 1.8210502862930298,
+      "learning_rate": 9.128442572523417e-05,
+      "loss": 3.2991,
+      "step": 145
+    },
+    {
+      "epoch": 0.6334056399132321,
+      "grad_norm": 1.842761516571045,
+      "learning_rate": 8.954715367323468e-05,
+      "loss": 3.2255,
+      "step": 146
+    },
+    {
+      "epoch": 0.6377440347071583,
+      "grad_norm": 1.9258646965026855,
+      "learning_rate": 8.781306565948528e-05,
+      "loss": 3.1397,
+      "step": 147
+    },
+    {
+      "epoch": 0.6420824295010846,
+      "grad_norm": 2.1189215183258057,
+      "learning_rate": 8.608268990399349e-05,
+      "loss": 3.0414,
+      "step": 148
+    },
+    {
+      "epoch": 0.6464208242950108,
+      "grad_norm": 2.4063761234283447,
+      "learning_rate": 8.435655349597689e-05,
+      "loss": 2.8524,
+      "step": 149
+    },
+    {
+      "epoch": 0.6507592190889371,
+      "grad_norm": 3.6420836448669434,
+      "learning_rate": 8.263518223330697e-05,
+      "loss": 3.0156,
+      "step": 150
+    },
+    {
+      "epoch": 0.6550976138828634,
+      "grad_norm": 0.7080674171447754,
+      "learning_rate": 8.091910046234552e-05,
+      "loss": 3.1636,
+      "step": 151
+    },
+    {
+      "epoch": 0.6594360086767896,
+      "grad_norm": 0.798520565032959,
+      "learning_rate": 7.920883091822408e-05,
+      "loss": 3.212,
+      "step": 152
+    },
+    {
+      "epoch": 0.6637744034707158,
+      "grad_norm": 0.8640486001968384,
+      "learning_rate": 7.750489456561352e-05,
+      "loss": 3.1644,
+      "step": 153
+    },
+    {
+      "epoch": 0.6681127982646421,
+      "grad_norm": 0.870906412601471,
+      "learning_rate": 7.580781044003324e-05,
+      "loss": 3.1876,
+      "step": 154
+    },
+    {
+      "epoch": 0.6724511930585684,
+      "grad_norm": 0.8581348061561584,
+      "learning_rate": 7.411809548974792e-05,
+      "loss": 3.2739,
+      "step": 155
+    },
+    {
+      "epoch": 0.6767895878524945,
+      "grad_norm": 0.8691614270210266,
+      "learning_rate": 7.243626441830009e-05,
+      "loss": 3.2444,
+      "step": 156
+    },
+    {
+      "epoch": 0.6811279826464208,
+      "grad_norm": 0.9455673098564148,
+      "learning_rate": 7.076282952772633e-05,
+      "loss": 3.3004,
+      "step": 157
+    },
+    {
+      "epoch": 0.6854663774403471,
+      "grad_norm": 0.8873337507247925,
+      "learning_rate": 6.909830056250527e-05,
+      "loss": 3.1778,
+      "step": 158
+    },
+    {
+      "epoch": 0.6898047722342733,
+      "grad_norm": 0.910775363445282,
+      "learning_rate": 6.744318455428436e-05,
+      "loss": 3.1346,
+      "step": 159
+    },
+    {
+      "epoch": 0.6941431670281996,
+      "grad_norm": 0.9872409105300903,
+      "learning_rate": 6.579798566743314e-05,
+      "loss": 3.1665,
+      "step": 160
+    },
+    {
+      "epoch": 0.6984815618221258,
+      "grad_norm": 1.0516481399536133,
+      "learning_rate": 6.416320504546997e-05,
+      "loss": 3.3064,
+      "step": 161
+    },
+    {
+      "epoch": 0.702819956616052,
+      "grad_norm": 1.0263571739196777,
+      "learning_rate": 6.25393406584088e-05,
+      "loss": 3.3698,
+      "step": 162
+    },
+    {
+      "epoch": 0.7071583514099783,
+      "grad_norm": 1.1050878763198853,
+      "learning_rate": 6.092688715107264e-05,
+      "loss": 3.2436,
+      "step": 163
+    },
+    {
+      "epoch": 0.7114967462039046,
+      "grad_norm": 1.1121841669082642,
+      "learning_rate": 5.9326335692419995e-05,
+      "loss": 2.9433,
+      "step": 164
+    },
+    {
+      "epoch": 0.7158351409978309,
+      "grad_norm": 1.2424358129501343,
+      "learning_rate": 5.773817382593008e-05,
+      "loss": 3.3616,
+      "step": 165
+    },
+    {
+      "epoch": 0.720173535791757,
+      "grad_norm": 1.1899327039718628,
+      "learning_rate": 5.616288532109225e-05,
+      "loss": 3.0392,
+      "step": 166
+    },
+    {
+      "epoch": 0.7245119305856833,
+      "grad_norm": 1.3395730257034302,
+      "learning_rate": 5.4600950026045326e-05,
+      "loss": 3.0905,
+      "step": 167
+    },
+    {
+      "epoch": 0.7288503253796096,
+      "grad_norm": 1.4268842935562134,
+      "learning_rate": 5.305284372141095e-05,
+      "loss": 3.1247,
+      "step": 168
+    },
+    {
+      "epoch": 0.7331887201735358,
+      "grad_norm": 1.5514875650405884,
+      "learning_rate": 5.15190379753663e-05,
+      "loss": 3.3772,
+      "step": 169
+    },
+    {
+      "epoch": 0.737527114967462,
+      "grad_norm": 1.8371058702468872,
+      "learning_rate": 5.000000000000002e-05,
+      "loss": 2.9262,
+      "step": 170
+    },
+    {
+      "epoch": 0.7418655097613883,
+      "grad_norm": 1.7641676664352417,
+      "learning_rate": 4.8496192508994576e-05,
+      "loss": 3.0113,
+      "step": 171
+    },
+    {
+      "epoch": 0.7462039045553145,
+      "grad_norm": 1.8325039148330688,
+      "learning_rate": 4.700807357667952e-05,
+      "loss": 3.0551,
+      "step": 172
+    },
+    {
+      "epoch": 0.7505422993492408,
+      "grad_norm": 1.9740185737609863,
+      "learning_rate": 4.5536096498497295e-05,
+      "loss": 3.1479,
+      "step": 173
+    },
+    {
+      "epoch": 0.754880694143167,
+      "grad_norm": 2.327420234680176,
+      "learning_rate": 4.4080709652925336e-05,
+      "loss": 3.2149,
+      "step": 174
+    },
+    {
+      "epoch": 0.754880694143167,
+      "eval_loss": 3.163884162902832,
+      "eval_runtime": 43.9914,
+      "eval_samples_per_second": 8.843,
+      "eval_steps_per_second": 2.228,
+      "step": 174
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 3.8904213574936166e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null