penglingwei
commited on
Commit
•
b5e8292
1
Parent(s):
bfd34bb
Training in progress, step 49151, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 355970836
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:97f2edc2a659fa0ab865d96aba1342985ceee6f8b9e7e2e0bc2734e4fa8199de
|
3 |
size 355970836
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 712036922
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f01610765b026e4073dc792fa519799ddfee84d97839ac7114420dc7fad095dc
|
3 |
size 712036922
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:906573e220485578a572456e29e3b97d5ff5468e5305bea1db395e9e4e84a615
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e6efd166389749e81c893bfe8d8ade06bfd5d181d3c7a713ed9d971329711d93
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -34377,6 +34377,41 @@
|
|
34377 |
"learning_rate": 6.384977135377723e-11,
|
34378 |
"loss": 26.9088,
|
34379 |
"step": 49100
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34380 |
}
|
34381 |
],
|
34382 |
"logging_steps": 10,
|
@@ -34391,7 +34426,7 @@
|
|
34391 |
"should_evaluate": false,
|
34392 |
"should_log": false,
|
34393 |
"should_save": true,
|
34394 |
-
"should_training_stop":
|
34395 |
},
|
34396 |
"attributes": {}
|
34397 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9999853769101411,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 49151,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
34377 |
"learning_rate": 6.384977135377723e-11,
|
34378 |
"loss": 26.9088,
|
34379 |
"step": 49100
|
34380 |
+
},
|
34381 |
+
{
|
34382 |
+
"epoch": 0.999151225001669,
|
34383 |
+
"grad_norm": 103247.6640625,
|
34384 |
+
"learning_rate": 5.505419123797567e-11,
|
34385 |
+
"loss": 25.8345,
|
34386 |
+
"step": 49110
|
34387 |
+
},
|
34388 |
+
{
|
34389 |
+
"epoch": 0.9993546766866621,
|
34390 |
+
"grad_norm": 92363.125,
|
34391 |
+
"learning_rate": 4.6910120859305904e-11,
|
34392 |
+
"loss": 27.5682,
|
34393 |
+
"step": 49120
|
34394 |
+
},
|
34395 |
+
{
|
34396 |
+
"epoch": 0.9995581283716554,
|
34397 |
+
"grad_norm": 43590.00390625,
|
34398 |
+
"learning_rate": 3.941756361400956e-11,
|
34399 |
+
"loss": 23.7671,
|
34400 |
+
"step": 49130
|
34401 |
+
},
|
34402 |
+
{
|
34403 |
+
"epoch": 0.9997615800566486,
|
34404 |
+
"grad_norm": 84118.6953125,
|
34405 |
+
"learning_rate": 3.25765226263236e-11,
|
34406 |
+
"loss": 24.8344,
|
34407 |
+
"step": 49140
|
34408 |
+
},
|
34409 |
+
{
|
34410 |
+
"epoch": 0.9999650317416418,
|
34411 |
+
"grad_norm": 88703.15625,
|
34412 |
+
"learning_rate": 2.6387000748480383e-11,
|
34413 |
+
"loss": 24.6881,
|
34414 |
+
"step": 49150
|
34415 |
}
|
34416 |
],
|
34417 |
"logging_steps": 10,
|
|
|
34426 |
"should_evaluate": false,
|
34427 |
"should_log": false,
|
34428 |
"should_save": true,
|
34429 |
+
"should_training_stop": true
|
34430 |
},
|
34431 |
"attributes": {}
|
34432 |
}
|