Training in progress, step 5846, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 50358592
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:25745c4e48331c53fd5a30e76601bed2b2c294b2fa84b0b99dc053205d796695
|
3 |
size 50358592
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 100824826
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2be6675b9b9b80a7d2257d3cf7288a27aea082042953111315f2974cd98cf39a
|
3 |
size 100824826
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b9f870579aad726ad5dd33eb2c2ef3c173af7d0105d1964cc901d9f9fddc786e
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:01d224b5db2d008aab6901376593307ed413c8767c47ba9d506ac5ec4e271040
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -40167,6 +40167,776 @@
|
|
40167 |
"learning_rate": 9.039886378601204e-08,
|
40168 |
"loss": 22.0672,
|
40169 |
"step": 5736
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40170 |
}
|
40171 |
],
|
40172 |
"logging_steps": 1,
|
@@ -40181,12 +40951,12 @@
|
|
40181 |
"should_evaluate": false,
|
40182 |
"should_log": false,
|
40183 |
"should_save": true,
|
40184 |
-
"should_training_stop":
|
40185 |
},
|
40186 |
"attributes": {}
|
40187 |
}
|
40188 |
},
|
40189 |
-
"total_flos": 1.
|
40190 |
"train_batch_size": 4,
|
40191 |
"trial_name": null,
|
40192 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 5846,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
40167 |
"learning_rate": 9.039886378601204e-08,
|
40168 |
"loss": 22.0672,
|
40169 |
"step": 5736
|
40170 |
+
},
|
40171 |
+
{
|
40172 |
+
"epoch": 1.962709544988026,
|
40173 |
+
"grad_norm": 6.91142463684082,
|
40174 |
+
"learning_rate": 8.87632033329977e-08,
|
40175 |
+
"loss": 19.9954,
|
40176 |
+
"step": 5737
|
40177 |
+
},
|
40178 |
+
{
|
40179 |
+
"epoch": 1.963051659254191,
|
40180 |
+
"grad_norm": 6.9987006187438965,
|
40181 |
+
"learning_rate": 8.71424628112738e-08,
|
40182 |
+
"loss": 19.3593,
|
40183 |
+
"step": 5738
|
40184 |
+
},
|
40185 |
+
{
|
40186 |
+
"epoch": 1.9633937735203557,
|
40187 |
+
"grad_norm": 7.478214740753174,
|
40188 |
+
"learning_rate": 8.553664270531947e-08,
|
40189 |
+
"loss": 20.6897,
|
40190 |
+
"step": 5739
|
40191 |
+
},
|
40192 |
+
{
|
40193 |
+
"epoch": 1.9637358877865208,
|
40194 |
+
"grad_norm": 9.3041410446167,
|
40195 |
+
"learning_rate": 8.394574349517293e-08,
|
40196 |
+
"loss": 19.2391,
|
40197 |
+
"step": 5740
|
40198 |
+
},
|
40199 |
+
{
|
40200 |
+
"epoch": 1.9640780020526856,
|
40201 |
+
"grad_norm": 9.374120712280273,
|
40202 |
+
"learning_rate": 8.236976565639265e-08,
|
40203 |
+
"loss": 19.4552,
|
40204 |
+
"step": 5741
|
40205 |
+
},
|
40206 |
+
{
|
40207 |
+
"epoch": 1.9644201163188506,
|
40208 |
+
"grad_norm": 9.557108879089355,
|
40209 |
+
"learning_rate": 8.080870966008514e-08,
|
40210 |
+
"loss": 19.067,
|
40211 |
+
"step": 5742
|
40212 |
+
},
|
40213 |
+
{
|
40214 |
+
"epoch": 1.9647622305850154,
|
40215 |
+
"grad_norm": 8.268561363220215,
|
40216 |
+
"learning_rate": 7.926257597289376e-08,
|
40217 |
+
"loss": 20.9947,
|
40218 |
+
"step": 5743
|
40219 |
+
},
|
40220 |
+
{
|
40221 |
+
"epoch": 1.9651043448511802,
|
40222 |
+
"grad_norm": 8.512595176696777,
|
40223 |
+
"learning_rate": 7.773136505700995e-08,
|
40224 |
+
"loss": 20.4771,
|
40225 |
+
"step": 5744
|
40226 |
+
},
|
40227 |
+
{
|
40228 |
+
"epoch": 1.965446459117345,
|
40229 |
+
"grad_norm": 8.379534721374512,
|
40230 |
+
"learning_rate": 7.621507737015088e-08,
|
40231 |
+
"loss": 21.1643,
|
40232 |
+
"step": 5745
|
40233 |
+
},
|
40234 |
+
{
|
40235 |
+
"epoch": 1.96578857338351,
|
40236 |
+
"grad_norm": 9.614562034606934,
|
40237 |
+
"learning_rate": 7.471371336558174e-08,
|
40238 |
+
"loss": 20.1646,
|
40239 |
+
"step": 5746
|
40240 |
+
},
|
40241 |
+
{
|
40242 |
+
"epoch": 1.9661306876496751,
|
40243 |
+
"grad_norm": 9.188095092773438,
|
40244 |
+
"learning_rate": 7.322727349211023e-08,
|
40245 |
+
"loss": 21.6648,
|
40246 |
+
"step": 5747
|
40247 |
+
},
|
40248 |
+
{
|
40249 |
+
"epoch": 1.96647280191584,
|
40250 |
+
"grad_norm": 10.135308265686035,
|
40251 |
+
"learning_rate": 7.175575819406421e-08,
|
40252 |
+
"loss": 19.3838,
|
40253 |
+
"step": 5748
|
40254 |
+
},
|
40255 |
+
{
|
40256 |
+
"epoch": 1.9668149161820048,
|
40257 |
+
"grad_norm": 9.67789363861084,
|
40258 |
+
"learning_rate": 7.029916791133074e-08,
|
40259 |
+
"loss": 18.6511,
|
40260 |
+
"step": 5749
|
40261 |
+
},
|
40262 |
+
{
|
40263 |
+
"epoch": 1.9671570304481696,
|
40264 |
+
"grad_norm": 10.633426666259766,
|
40265 |
+
"learning_rate": 6.885750307933369e-08,
|
40266 |
+
"loss": 20.5941,
|
40267 |
+
"step": 5750
|
40268 |
+
},
|
40269 |
+
{
|
40270 |
+
"epoch": 1.9674991447143346,
|
40271 |
+
"grad_norm": 10.650802612304688,
|
40272 |
+
"learning_rate": 6.743076412901173e-08,
|
40273 |
+
"loss": 20.8643,
|
40274 |
+
"step": 5751
|
40275 |
+
},
|
40276 |
+
{
|
40277 |
+
"epoch": 1.9678412589804994,
|
40278 |
+
"grad_norm": 10.619598388671875,
|
40279 |
+
"learning_rate": 6.601895148687365e-08,
|
40280 |
+
"loss": 20.3627,
|
40281 |
+
"step": 5752
|
40282 |
+
},
|
40283 |
+
{
|
40284 |
+
"epoch": 1.9681833732466645,
|
40285 |
+
"grad_norm": 12.187576293945312,
|
40286 |
+
"learning_rate": 6.462206557494854e-08,
|
40287 |
+
"loss": 20.6054,
|
40288 |
+
"step": 5753
|
40289 |
+
},
|
40290 |
+
{
|
40291 |
+
"epoch": 1.9685254875128293,
|
40292 |
+
"grad_norm": 12.619000434875488,
|
40293 |
+
"learning_rate": 6.324010681080239e-08,
|
40294 |
+
"loss": 21.6063,
|
40295 |
+
"step": 5754
|
40296 |
+
},
|
40297 |
+
{
|
40298 |
+
"epoch": 1.9688676017789941,
|
40299 |
+
"grad_norm": 15.741616249084473,
|
40300 |
+
"learning_rate": 6.187307560754363e-08,
|
40301 |
+
"loss": 22.4582,
|
40302 |
+
"step": 5755
|
40303 |
+
},
|
40304 |
+
{
|
40305 |
+
"epoch": 1.969209716045159,
|
40306 |
+
"grad_norm": 14.282339096069336,
|
40307 |
+
"learning_rate": 6.052097237382315e-08,
|
40308 |
+
"loss": 23.1004,
|
40309 |
+
"step": 5756
|
40310 |
+
},
|
40311 |
+
{
|
40312 |
+
"epoch": 1.969551830311324,
|
40313 |
+
"grad_norm": 15.24374771118164,
|
40314 |
+
"learning_rate": 5.918379751381764e-08,
|
40315 |
+
"loss": 24.0451,
|
40316 |
+
"step": 5757
|
40317 |
+
},
|
40318 |
+
{
|
40319 |
+
"epoch": 1.969893944577489,
|
40320 |
+
"grad_norm": 14.704069137573242,
|
40321 |
+
"learning_rate": 5.78615514272518e-08,
|
40322 |
+
"loss": 22.778,
|
40323 |
+
"step": 5758
|
40324 |
+
},
|
40325 |
+
{
|
40326 |
+
"epoch": 1.9702360588436538,
|
40327 |
+
"grad_norm": 16.28843116760254,
|
40328 |
+
"learning_rate": 5.655423450938724e-08,
|
40329 |
+
"loss": 19.8016,
|
40330 |
+
"step": 5759
|
40331 |
+
},
|
40332 |
+
{
|
40333 |
+
"epoch": 1.9705781731098186,
|
40334 |
+
"grad_norm": 16.319915771484375,
|
40335 |
+
"learning_rate": 5.5261847151011346e-08,
|
40336 |
+
"loss": 20.8609,
|
40337 |
+
"step": 5760
|
40338 |
+
},
|
40339 |
+
{
|
40340 |
+
"epoch": 1.9709202873759835,
|
40341 |
+
"grad_norm": 16.698453903198242,
|
40342 |
+
"learning_rate": 5.398438973845954e-08,
|
40343 |
+
"loss": 21.4025,
|
40344 |
+
"step": 5761
|
40345 |
+
},
|
40346 |
+
{
|
40347 |
+
"epoch": 1.9712624016421485,
|
40348 |
+
"grad_norm": 17.40521240234375,
|
40349 |
+
"learning_rate": 5.272186265360413e-08,
|
40350 |
+
"loss": 24.3388,
|
40351 |
+
"step": 5762
|
40352 |
+
},
|
40353 |
+
{
|
40354 |
+
"epoch": 1.9716045159083135,
|
40355 |
+
"grad_norm": 19.207595825195312,
|
40356 |
+
"learning_rate": 5.147426627384877e-08,
|
40357 |
+
"loss": 20.9791,
|
40358 |
+
"step": 5763
|
40359 |
+
},
|
40360 |
+
{
|
40361 |
+
"epoch": 1.9719466301744784,
|
40362 |
+
"grad_norm": 21.78555679321289,
|
40363 |
+
"learning_rate": 5.0241600972139594e-08,
|
40364 |
+
"loss": 21.4271,
|
40365 |
+
"step": 5764
|
40366 |
+
},
|
40367 |
+
{
|
40368 |
+
"epoch": 1.9722887444406432,
|
40369 |
+
"grad_norm": 21.837677001953125,
|
40370 |
+
"learning_rate": 4.9023867116948506e-08,
|
40371 |
+
"loss": 22.5372,
|
40372 |
+
"step": 5765
|
40373 |
+
},
|
40374 |
+
{
|
40375 |
+
"epoch": 1.972630858706808,
|
40376 |
+
"grad_norm": 24.989015579223633,
|
40377 |
+
"learning_rate": 4.7821065072301e-08,
|
40378 |
+
"loss": 20.4801,
|
40379 |
+
"step": 5766
|
40380 |
+
},
|
40381 |
+
{
|
40382 |
+
"epoch": 1.972972972972973,
|
40383 |
+
"grad_norm": 26.47430992126465,
|
40384 |
+
"learning_rate": 4.6633195197742784e-08,
|
40385 |
+
"loss": 19.5036,
|
40386 |
+
"step": 5767
|
40387 |
+
},
|
40388 |
+
{
|
40389 |
+
"epoch": 1.9733150872391378,
|
40390 |
+
"grad_norm": 28.229026794433594,
|
40391 |
+
"learning_rate": 4.5460257848373156e-08,
|
40392 |
+
"loss": 24.5793,
|
40393 |
+
"step": 5768
|
40394 |
+
},
|
40395 |
+
{
|
40396 |
+
"epoch": 1.9736572015053029,
|
40397 |
+
"grad_norm": 29.432619094848633,
|
40398 |
+
"learning_rate": 4.430225337480609e-08,
|
40399 |
+
"loss": 20.9656,
|
40400 |
+
"step": 5769
|
40401 |
+
},
|
40402 |
+
{
|
40403 |
+
"epoch": 1.9739993157714677,
|
40404 |
+
"grad_norm": 29.067018508911133,
|
40405 |
+
"learning_rate": 4.315918212320358e-08,
|
40406 |
+
"loss": 23.4043,
|
40407 |
+
"step": 5770
|
40408 |
+
},
|
40409 |
+
{
|
40410 |
+
"epoch": 1.9743414300376325,
|
40411 |
+
"grad_norm": 33.93965530395508,
|
40412 |
+
"learning_rate": 4.2031044435270063e-08,
|
40413 |
+
"loss": 21.3011,
|
40414 |
+
"step": 5771
|
40415 |
+
},
|
40416 |
+
{
|
40417 |
+
"epoch": 1.9746835443037973,
|
40418 |
+
"grad_norm": 37.01821517944336,
|
40419 |
+
"learning_rate": 4.0917840648241334e-08,
|
40420 |
+
"loss": 25.6067,
|
40421 |
+
"step": 5772
|
40422 |
+
},
|
40423 |
+
{
|
40424 |
+
"epoch": 1.9750256585699624,
|
40425 |
+
"grad_norm": 42.83258056640625,
|
40426 |
+
"learning_rate": 3.9819571094878993e-08,
|
40427 |
+
"loss": 25.7397,
|
40428 |
+
"step": 5773
|
40429 |
+
},
|
40430 |
+
{
|
40431 |
+
"epoch": 1.9753677728361274,
|
40432 |
+
"grad_norm": 8.533278465270996,
|
40433 |
+
"learning_rate": 3.873623610348709e-08,
|
40434 |
+
"loss": 19.5421,
|
40435 |
+
"step": 5774
|
40436 |
+
},
|
40437 |
+
{
|
40438 |
+
"epoch": 1.9757098871022922,
|
40439 |
+
"grad_norm": 6.471829414367676,
|
40440 |
+
"learning_rate": 3.766783599791213e-08,
|
40441 |
+
"loss": 18.0767,
|
40442 |
+
"step": 5775
|
40443 |
+
},
|
40444 |
+
{
|
40445 |
+
"epoch": 1.976052001368457,
|
40446 |
+
"grad_norm": 6.663585186004639,
|
40447 |
+
"learning_rate": 3.661437109752641e-08,
|
40448 |
+
"loss": 17.9218,
|
40449 |
+
"step": 5776
|
40450 |
+
},
|
40451 |
+
{
|
40452 |
+
"epoch": 1.9763941156346219,
|
40453 |
+
"grad_norm": 6.855971813201904,
|
40454 |
+
"learning_rate": 3.5575841717239157e-08,
|
40455 |
+
"loss": 18.6801,
|
40456 |
+
"step": 5777
|
40457 |
+
},
|
40458 |
+
{
|
40459 |
+
"epoch": 1.976736229900787,
|
40460 |
+
"grad_norm": 6.7459330558776855,
|
40461 |
+
"learning_rate": 3.455224816750757e-08,
|
40462 |
+
"loss": 17.6948,
|
40463 |
+
"step": 5778
|
40464 |
+
},
|
40465 |
+
{
|
40466 |
+
"epoch": 1.9770783441669517,
|
40467 |
+
"grad_norm": 6.8546552658081055,
|
40468 |
+
"learning_rate": 3.354359075430358e-08,
|
40469 |
+
"loss": 18.4751,
|
40470 |
+
"step": 5779
|
40471 |
+
},
|
40472 |
+
{
|
40473 |
+
"epoch": 1.9774204584331168,
|
40474 |
+
"grad_norm": 6.872806549072266,
|
40475 |
+
"learning_rate": 3.254986977914709e-08,
|
40476 |
+
"loss": 19.4655,
|
40477 |
+
"step": 5780
|
40478 |
+
},
|
40479 |
+
{
|
40480 |
+
"epoch": 1.9777625726992816,
|
40481 |
+
"grad_norm": 18.566171646118164,
|
40482 |
+
"learning_rate": 3.1571085539089384e-08,
|
40483 |
+
"loss": 19.6944,
|
40484 |
+
"step": 5781
|
40485 |
+
},
|
40486 |
+
{
|
40487 |
+
"epoch": 1.9781046869654464,
|
40488 |
+
"grad_norm": 6.615222454071045,
|
40489 |
+
"learning_rate": 3.0607238326724186e-08,
|
40490 |
+
"loss": 18.9052,
|
40491 |
+
"step": 5782
|
40492 |
+
},
|
40493 |
+
{
|
40494 |
+
"epoch": 1.9784468012316112,
|
40495 |
+
"grad_norm": 7.05173921585083,
|
40496 |
+
"learning_rate": 2.9658328430165472e-08,
|
40497 |
+
"loss": 19.9629,
|
40498 |
+
"step": 5783
|
40499 |
+
},
|
40500 |
+
{
|
40501 |
+
"epoch": 1.9787889154977762,
|
40502 |
+
"grad_norm": 6.960274696350098,
|
40503 |
+
"learning_rate": 2.8724356133075226e-08,
|
40504 |
+
"loss": 20.1454,
|
40505 |
+
"step": 5784
|
40506 |
+
},
|
40507 |
+
{
|
40508 |
+
"epoch": 1.9791310297639413,
|
40509 |
+
"grad_norm": 7.180263996124268,
|
40510 |
+
"learning_rate": 2.780532171464123e-08,
|
40511 |
+
"loss": 20.9793,
|
40512 |
+
"step": 5785
|
40513 |
+
},
|
40514 |
+
{
|
40515 |
+
"epoch": 1.979473144030106,
|
40516 |
+
"grad_norm": 7.336266040802002,
|
40517 |
+
"learning_rate": 2.6901225449593726e-08,
|
40518 |
+
"loss": 19.6709,
|
40519 |
+
"step": 5786
|
40520 |
+
},
|
40521 |
+
{
|
40522 |
+
"epoch": 1.979815258296271,
|
40523 |
+
"grad_norm": 8.403460502624512,
|
40524 |
+
"learning_rate": 2.6012067608194303e-08,
|
40525 |
+
"loss": 19.4493,
|
40526 |
+
"step": 5787
|
40527 |
+
},
|
40528 |
+
{
|
40529 |
+
"epoch": 1.9801573725624357,
|
40530 |
+
"grad_norm": 11.085399627685547,
|
40531 |
+
"learning_rate": 2.513784845623035e-08,
|
40532 |
+
"loss": 21.0099,
|
40533 |
+
"step": 5788
|
40534 |
+
},
|
40535 |
+
{
|
40536 |
+
"epoch": 1.9804994868286008,
|
40537 |
+
"grad_norm": 7.737322807312012,
|
40538 |
+
"learning_rate": 2.427856825504282e-08,
|
40539 |
+
"loss": 19.7549,
|
40540 |
+
"step": 5789
|
40541 |
+
},
|
40542 |
+
{
|
40543 |
+
"epoch": 1.9808416010947658,
|
40544 |
+
"grad_norm": 7.702394008636475,
|
40545 |
+
"learning_rate": 2.3434227261487362e-08,
|
40546 |
+
"loss": 20.8117,
|
40547 |
+
"step": 5790
|
40548 |
+
},
|
40549 |
+
{
|
40550 |
+
"epoch": 1.9811837153609306,
|
40551 |
+
"grad_norm": 7.653695106506348,
|
40552 |
+
"learning_rate": 2.2604825727962075e-08,
|
40553 |
+
"loss": 18.3907,
|
40554 |
+
"step": 5791
|
40555 |
+
},
|
40556 |
+
{
|
40557 |
+
"epoch": 1.9815258296270954,
|
40558 |
+
"grad_norm": 7.7871575355529785,
|
40559 |
+
"learning_rate": 2.179036390240752e-08,
|
40560 |
+
"loss": 19.3076,
|
40561 |
+
"step": 5792
|
40562 |
+
},
|
40563 |
+
{
|
40564 |
+
"epoch": 1.9818679438932603,
|
40565 |
+
"grad_norm": 8.504340171813965,
|
40566 |
+
"learning_rate": 2.0990842028284496e-08,
|
40567 |
+
"loss": 21.0861,
|
40568 |
+
"step": 5793
|
40569 |
+
},
|
40570 |
+
{
|
40571 |
+
"epoch": 1.9822100581594253,
|
40572 |
+
"grad_norm": 8.407926559448242,
|
40573 |
+
"learning_rate": 2.020626034459072e-08,
|
40574 |
+
"loss": 19.538,
|
40575 |
+
"step": 5794
|
40576 |
+
},
|
40577 |
+
{
|
40578 |
+
"epoch": 1.9825521724255901,
|
40579 |
+
"grad_norm": 8.925575256347656,
|
40580 |
+
"learning_rate": 1.943661908586636e-08,
|
40581 |
+
"loss": 20.1966,
|
40582 |
+
"step": 5795
|
40583 |
+
},
|
40584 |
+
{
|
40585 |
+
"epoch": 1.9828942866917552,
|
40586 |
+
"grad_norm": 8.412221908569336,
|
40587 |
+
"learning_rate": 1.8681918482177375e-08,
|
40588 |
+
"loss": 19.9603,
|
40589 |
+
"step": 5796
|
40590 |
+
},
|
40591 |
+
{
|
40592 |
+
"epoch": 1.98323640095792,
|
40593 |
+
"grad_norm": 9.20413589477539,
|
40594 |
+
"learning_rate": 1.7942158759126637e-08,
|
40595 |
+
"loss": 19.8316,
|
40596 |
+
"step": 5797
|
40597 |
+
},
|
40598 |
+
{
|
40599 |
+
"epoch": 1.9835785152240848,
|
40600 |
+
"grad_norm": 9.628589630126953,
|
40601 |
+
"learning_rate": 1.721734013784837e-08,
|
40602 |
+
"loss": 19.4429,
|
40603 |
+
"step": 5798
|
40604 |
+
},
|
40605 |
+
{
|
40606 |
+
"epoch": 1.9839206294902496,
|
40607 |
+
"grad_norm": 9.633048057556152,
|
40608 |
+
"learning_rate": 1.6507462835013697e-08,
|
40609 |
+
"loss": 19.0393,
|
40610 |
+
"step": 5799
|
40611 |
+
},
|
40612 |
+
{
|
40613 |
+
"epoch": 1.9842627437564146,
|
40614 |
+
"grad_norm": 12.57917308807373,
|
40615 |
+
"learning_rate": 1.581252706281955e-08,
|
40616 |
+
"loss": 21.2715,
|
40617 |
+
"step": 5800
|
40618 |
+
},
|
40619 |
+
{
|
40620 |
+
"epoch": 1.9846048580225797,
|
40621 |
+
"grad_norm": 10.881157875061035,
|
40622 |
+
"learning_rate": 1.5132533029016405e-08,
|
40623 |
+
"loss": 20.7091,
|
40624 |
+
"step": 5801
|
40625 |
+
},
|
40626 |
+
{
|
40627 |
+
"epoch": 1.9849469722887445,
|
40628 |
+
"grad_norm": 11.508024215698242,
|
40629 |
+
"learning_rate": 1.4467480936858347e-08,
|
40630 |
+
"loss": 20.5245,
|
40631 |
+
"step": 5802
|
40632 |
+
},
|
40633 |
+
{
|
40634 |
+
"epoch": 1.9852890865549093,
|
40635 |
+
"grad_norm": 12.641556739807129,
|
40636 |
+
"learning_rate": 1.3817370985164113e-08,
|
40637 |
+
"loss": 20.9665,
|
40638 |
+
"step": 5803
|
40639 |
+
},
|
40640 |
+
{
|
40641 |
+
"epoch": 1.9856312008210741,
|
40642 |
+
"grad_norm": 54.07820510864258,
|
40643 |
+
"learning_rate": 1.3182203368256041e-08,
|
40644 |
+
"loss": 21.8294,
|
40645 |
+
"step": 5804
|
40646 |
+
},
|
40647 |
+
{
|
40648 |
+
"epoch": 1.9859733150872392,
|
40649 |
+
"grad_norm": 13.773715019226074,
|
40650 |
+
"learning_rate": 1.2561978276015574e-08,
|
40651 |
+
"loss": 20.4164,
|
40652 |
+
"step": 5805
|
40653 |
+
},
|
40654 |
+
{
|
40655 |
+
"epoch": 1.986315429353404,
|
40656 |
+
"grad_norm": 13.61760425567627,
|
40657 |
+
"learning_rate": 1.1956695893844405e-08,
|
40658 |
+
"loss": 24.0563,
|
40659 |
+
"step": 5806
|
40660 |
+
},
|
40661 |
+
{
|
40662 |
+
"epoch": 1.986657543619569,
|
40663 |
+
"grad_norm": 14.020088195800781,
|
40664 |
+
"learning_rate": 1.1366356402670032e-08,
|
40665 |
+
"loss": 20.9675,
|
40666 |
+
"step": 5807
|
40667 |
+
},
|
40668 |
+
{
|
40669 |
+
"epoch": 1.9869996578857338,
|
40670 |
+
"grad_norm": 14.913092613220215,
|
40671 |
+
"learning_rate": 1.0790959978973503e-08,
|
40672 |
+
"loss": 21.9978,
|
40673 |
+
"step": 5808
|
40674 |
+
},
|
40675 |
+
{
|
40676 |
+
"epoch": 1.9873417721518987,
|
40677 |
+
"grad_norm": 15.699370384216309,
|
40678 |
+
"learning_rate": 1.0230506794750572e-08,
|
40679 |
+
"loss": 19.9998,
|
40680 |
+
"step": 5809
|
40681 |
+
},
|
40682 |
+
{
|
40683 |
+
"epoch": 1.9876838864180635,
|
40684 |
+
"grad_norm": 16.686582565307617,
|
40685 |
+
"learning_rate": 9.684997017544995e-09,
|
40686 |
+
"loss": 18.9121,
|
40687 |
+
"step": 5810
|
40688 |
+
},
|
40689 |
+
{
|
40690 |
+
"epoch": 1.9880260006842285,
|
40691 |
+
"grad_norm": 17.049999237060547,
|
40692 |
+
"learning_rate": 9.154430810415227e-09,
|
40693 |
+
"loss": 21.2301,
|
40694 |
+
"step": 5811
|
40695 |
+
},
|
40696 |
+
{
|
40697 |
+
"epoch": 1.9883681149503936,
|
40698 |
+
"grad_norm": 19.32858657836914,
|
40699 |
+
"learning_rate": 8.63880833197328e-09,
|
40700 |
+
"loss": 22.4003,
|
40701 |
+
"step": 5812
|
40702 |
+
},
|
40703 |
+
{
|
40704 |
+
"epoch": 1.9887102292165584,
|
40705 |
+
"grad_norm": 19.711584091186523,
|
40706 |
+
"learning_rate": 8.138129736340317e-09,
|
40707 |
+
"loss": 22.6676,
|
40708 |
+
"step": 5813
|
40709 |
+
},
|
40710 |
+
{
|
40711 |
+
"epoch": 1.9890523434827232,
|
40712 |
+
"grad_norm": 22.30181884765625,
|
40713 |
+
"learning_rate": 7.652395173202154e-09,
|
40714 |
+
"loss": 19.9137,
|
40715 |
+
"step": 5814
|
40716 |
+
},
|
40717 |
+
{
|
40718 |
+
"epoch": 1.989394457748888,
|
40719 |
+
"grad_norm": 33.89694595336914,
|
40720 |
+
"learning_rate": 7.181604787742657e-09,
|
40721 |
+
"loss": 22.0087,
|
40722 |
+
"step": 5815
|
40723 |
+
},
|
40724 |
+
{
|
40725 |
+
"epoch": 1.989736572015053,
|
40726 |
+
"grad_norm": 27.179582595825195,
|
40727 |
+
"learning_rate": 6.7257587207048e-09,
|
40728 |
+
"loss": 25.1015,
|
40729 |
+
"step": 5816
|
40730 |
+
},
|
40731 |
+
{
|
40732 |
+
"epoch": 1.990078686281218,
|
40733 |
+
"grad_norm": 22.285991668701172,
|
40734 |
+
"learning_rate": 6.284857108346254e-09,
|
40735 |
+
"loss": 18.6589,
|
40736 |
+
"step": 5817
|
40737 |
+
},
|
40738 |
+
{
|
40739 |
+
"epoch": 1.990420800547383,
|
40740 |
+
"grad_norm": 24.79540252685547,
|
40741 |
+
"learning_rate": 5.858900082472696e-09,
|
40742 |
+
"loss": 20.7354,
|
40743 |
+
"step": 5818
|
40744 |
+
},
|
40745 |
+
{
|
40746 |
+
"epoch": 1.9907629148135477,
|
40747 |
+
"grad_norm": 27.761415481567383,
|
40748 |
+
"learning_rate": 5.447887770415605e-09,
|
40749 |
+
"loss": 25.7395,
|
40750 |
+
"step": 5819
|
40751 |
+
},
|
40752 |
+
{
|
40753 |
+
"epoch": 1.9911050290797125,
|
40754 |
+
"grad_norm": 26.287521362304688,
|
40755 |
+
"learning_rate": 5.051820295032261e-09,
|
40756 |
+
"loss": 22.7141,
|
40757 |
+
"step": 5820
|
40758 |
+
},
|
40759 |
+
{
|
40760 |
+
"epoch": 1.9914471433458774,
|
40761 |
+
"grad_norm": 30.279993057250977,
|
40762 |
+
"learning_rate": 4.670697774722399e-09,
|
40763 |
+
"loss": 22.8432,
|
40764 |
+
"step": 5821
|
40765 |
+
},
|
40766 |
+
{
|
40767 |
+
"epoch": 1.9917892576120424,
|
40768 |
+
"grad_norm": 34.489742279052734,
|
40769 |
+
"learning_rate": 4.3045203234115535e-09,
|
40770 |
+
"loss": 21.1695,
|
40771 |
+
"step": 5822
|
40772 |
+
},
|
40773 |
+
{
|
40774 |
+
"epoch": 1.9921313718782074,
|
40775 |
+
"grad_norm": 42.58619689941406,
|
40776 |
+
"learning_rate": 3.953288050567716e-09,
|
40777 |
+
"loss": 27.3762,
|
40778 |
+
"step": 5823
|
40779 |
+
},
|
40780 |
+
{
|
40781 |
+
"epoch": 1.9924734861443723,
|
40782 |
+
"grad_norm": 6.662477970123291,
|
40783 |
+
"learning_rate": 3.617001061179126e-09,
|
40784 |
+
"loss": 18.4796,
|
40785 |
+
"step": 5824
|
40786 |
+
},
|
40787 |
+
{
|
40788 |
+
"epoch": 1.992815600410537,
|
40789 |
+
"grad_norm": 9.788829803466797,
|
40790 |
+
"learning_rate": 3.2956594557764785e-09,
|
40791 |
+
"loss": 17.4526,
|
40792 |
+
"step": 5825
|
40793 |
+
},
|
40794 |
+
{
|
40795 |
+
"epoch": 1.9931577146767019,
|
40796 |
+
"grad_norm": 6.742092609405518,
|
40797 |
+
"learning_rate": 2.9892633304107186e-09,
|
40798 |
+
"loss": 19.3224,
|
40799 |
+
"step": 5826
|
40800 |
+
},
|
40801 |
+
{
|
40802 |
+
"epoch": 1.993499828942867,
|
40803 |
+
"grad_norm": 12.77696418762207,
|
40804 |
+
"learning_rate": 2.697812776680797e-09,
|
40805 |
+
"loss": 18.8464,
|
40806 |
+
"step": 5827
|
40807 |
+
},
|
40808 |
+
{
|
40809 |
+
"epoch": 1.993841943209032,
|
40810 |
+
"grad_norm": 7.268036365509033,
|
40811 |
+
"learning_rate": 2.4213078817059143e-09,
|
40812 |
+
"loss": 19.2486,
|
40813 |
+
"step": 5828
|
40814 |
+
},
|
40815 |
+
{
|
40816 |
+
"epoch": 1.9941840574751968,
|
40817 |
+
"grad_norm": 6.920243740081787,
|
40818 |
+
"learning_rate": 2.1597487281366235e-09,
|
40819 |
+
"loss": 20.0134,
|
40820 |
+
"step": 5829
|
40821 |
+
},
|
40822 |
+
{
|
40823 |
+
"epoch": 1.9945261717413616,
|
40824 |
+
"grad_norm": 7.157983779907227,
|
40825 |
+
"learning_rate": 1.9131353941714836e-09,
|
40826 |
+
"loss": 20.9693,
|
40827 |
+
"step": 5830
|
40828 |
+
},
|
40829 |
+
{
|
40830 |
+
"epoch": 1.9948682860075264,
|
40831 |
+
"grad_norm": 7.626955986022949,
|
40832 |
+
"learning_rate": 1.681467953518201e-09,
|
40833 |
+
"loss": 19.7387,
|
40834 |
+
"step": 5831
|
40835 |
+
},
|
40836 |
+
{
|
40837 |
+
"epoch": 1.9952104002736915,
|
40838 |
+
"grad_norm": 7.880003929138184,
|
40839 |
+
"learning_rate": 1.4647464754380391e-09,
|
40840 |
+
"loss": 20.6881,
|
40841 |
+
"step": 5832
|
40842 |
+
},
|
40843 |
+
{
|
40844 |
+
"epoch": 1.9955525145398563,
|
40845 |
+
"grad_norm": 8.679123878479004,
|
40846 |
+
"learning_rate": 1.2629710247180627e-09,
|
40847 |
+
"loss": 19.434,
|
40848 |
+
"step": 5833
|
40849 |
+
},
|
40850 |
+
{
|
40851 |
+
"epoch": 1.9958946288060213,
|
40852 |
+
"grad_norm": 10.085281372070312,
|
40853 |
+
"learning_rate": 1.076141661660035e-09,
|
40854 |
+
"loss": 20.0026,
|
40855 |
+
"step": 5834
|
40856 |
+
},
|
40857 |
+
{
|
40858 |
+
"epoch": 1.9962367430721861,
|
40859 |
+
"grad_norm": 10.148959159851074,
|
40860 |
+
"learning_rate": 9.04258442130379e-10,
|
40861 |
+
"loss": 19.6373,
|
40862 |
+
"step": 5835
|
40863 |
+
},
|
40864 |
+
{
|
40865 |
+
"epoch": 1.996578857338351,
|
40866 |
+
"grad_norm": 11.545489311218262,
|
40867 |
+
"learning_rate": 7.473214174935628e-10,
|
40868 |
+
"loss": 20.4251,
|
40869 |
+
"step": 5836
|
40870 |
+
},
|
40871 |
+
{
|
40872 |
+
"epoch": 1.9969209716045158,
|
40873 |
+
"grad_norm": 14.113458633422852,
|
40874 |
+
"learning_rate": 6.053306346787135e-10,
|
40875 |
+
"loss": 23.9484,
|
40876 |
+
"step": 5837
|
40877 |
+
},
|
40878 |
+
{
|
40879 |
+
"epoch": 1.9972630858706808,
|
40880 |
+
"grad_norm": 13.124404907226562,
|
40881 |
+
"learning_rate": 4.782861361185554e-10,
|
40882 |
+
"loss": 21.1148,
|
40883 |
+
"step": 5838
|
40884 |
+
},
|
40885 |
+
{
|
40886 |
+
"epoch": 1.9976052001368458,
|
40887 |
+
"grad_norm": 15.107348442077637,
|
40888 |
+
"learning_rate": 3.661879597938178e-10,
|
40889 |
+
"loss": 20.0414,
|
40890 |
+
"step": 5839
|
40891 |
+
},
|
40892 |
+
{
|
40893 |
+
"epoch": 1.9979473144030107,
|
40894 |
+
"grad_norm": 14.699893951416016,
|
40895 |
+
"learning_rate": 2.690361392221341e-10,
|
40896 |
+
"loss": 18.2349,
|
40897 |
+
"step": 5840
|
40898 |
+
},
|
40899 |
+
{
|
40900 |
+
"epoch": 1.9982894286691755,
|
40901 |
+
"grad_norm": 19.990739822387695,
|
40902 |
+
"learning_rate": 1.868307034302852e-10,
|
40903 |
+
"loss": 22.159,
|
40904 |
+
"step": 5841
|
40905 |
+
},
|
40906 |
+
{
|
40907 |
+
"epoch": 1.9986315429353403,
|
40908 |
+
"grad_norm": 22.302776336669922,
|
40909 |
+
"learning_rate": 1.1957167700415996e-10,
|
40910 |
+
"loss": 24.7348,
|
40911 |
+
"step": 5842
|
40912 |
+
},
|
40913 |
+
{
|
40914 |
+
"epoch": 1.9989736572015053,
|
40915 |
+
"grad_norm": 23.003162384033203,
|
40916 |
+
"learning_rate": 6.725908004434622e-11,
|
40917 |
+
"loss": 24.44,
|
40918 |
+
"step": 5843
|
40919 |
+
},
|
40920 |
+
{
|
40921 |
+
"epoch": 1.9993157714676704,
|
40922 |
+
"grad_norm": 25.137348175048828,
|
40923 |
+
"learning_rate": 2.9892928188335335e-11,
|
40924 |
+
"loss": 23.8174,
|
40925 |
+
"step": 5844
|
40926 |
+
},
|
40927 |
+
{
|
40928 |
+
"epoch": 1.9996578857338352,
|
40929 |
+
"grad_norm": 31.00333023071289,
|
40930 |
+
"learning_rate": 7.473232604970903e-12,
|
40931 |
+
"loss": 22.5995,
|
40932 |
+
"step": 5845
|
40933 |
+
},
|
40934 |
+
{
|
40935 |
+
"epoch": 2.0,
|
40936 |
+
"grad_norm": 44.4307975769043,
|
40937 |
+
"learning_rate": 0.0,
|
40938 |
+
"loss": 32.1773,
|
40939 |
+
"step": 5846
|
40940 |
}
|
40941 |
],
|
40942 |
"logging_steps": 1,
|
|
|
40951 |
"should_evaluate": false,
|
40952 |
"should_log": false,
|
40953 |
"should_save": true,
|
40954 |
+
"should_training_stop": true
|
40955 |
},
|
40956 |
"attributes": {}
|
40957 |
}
|
40958 |
},
|
40959 |
+
"total_flos": 1.8484734148686643e+17,
|
40960 |
"train_batch_size": 4,
|
40961 |
"trial_name": null,
|
40962 |
"trial_params": null
|