penglingwei
commited on
Commit
•
09e135a
1
Parent(s):
18b2222
Training in progress, step 32400, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 355967616
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7d064f2332ced8d589411b314d74aa64dbff4eaac037d31068824e3f0f523bb4
|
3 |
size 355967616
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 712029114
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2ab97c4aa3f15e2600cfe333e231ff19c0e23e67658b81c4a2261ae5c1af8966
|
3 |
size 712029114
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cae11df3d2278a8ff4a04dff28369bcaea0cb0b7c145bd92f25a776466f1de16
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7303cf337b0f3230e3aa10fc27672d49524c56a76ad0cc51d0583606599733d7
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -22617,6 +22617,76 @@
|
|
22617 |
"learning_rate": 4.355681129189493e-09,
|
22618 |
"loss": 0.3696,
|
22619 |
"step": 32300
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22620 |
}
|
22621 |
],
|
22622 |
"logging_steps": 10,
|
@@ -22636,7 +22706,7 @@
|
|
22636 |
"attributes": {}
|
22637 |
}
|
22638 |
},
|
22639 |
-
"total_flos": 2.
|
22640 |
"train_batch_size": 8,
|
22641 |
"trial_name": null,
|
22642 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9976444506027435,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 32400,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
22617 |
"learning_rate": 4.355681129189493e-09,
|
22618 |
"loss": 0.3696,
|
22619 |
"step": 32300
|
22620 |
+
},
|
22621 |
+
{
|
22622 |
+
"epoch": 0.9948732160177359,
|
22623 |
+
"grad_norm": 17.661426544189453,
|
22624 |
+
"learning_rate": 3.909265612511703e-09,
|
22625 |
+
"loss": 0.4423,
|
22626 |
+
"step": 32310
|
22627 |
+
},
|
22628 |
+
{
|
22629 |
+
"epoch": 0.9951811309716256,
|
22630 |
+
"grad_norm": 25.57279396057129,
|
22631 |
+
"learning_rate": 3.486978218514292e-09,
|
22632 |
+
"loss": 0.3579,
|
22633 |
+
"step": 32320
|
22634 |
+
},
|
22635 |
+
{
|
22636 |
+
"epoch": 0.9954890459255153,
|
22637 |
+
"grad_norm": 14.529350280761719,
|
22638 |
+
"learning_rate": 3.0888193548239687e-09,
|
22639 |
+
"loss": 0.3905,
|
22640 |
+
"step": 32330
|
22641 |
+
},
|
22642 |
+
{
|
22643 |
+
"epoch": 0.9957969608794051,
|
22644 |
+
"grad_norm": 13.980672836303711,
|
22645 |
+
"learning_rate": 2.714789405772189e-09,
|
22646 |
+
"loss": 0.2954,
|
22647 |
+
"step": 32340
|
22648 |
+
},
|
22649 |
+
{
|
22650 |
+
"epoch": 0.9961048758332949,
|
22651 |
+
"grad_norm": 15.562602043151855,
|
22652 |
+
"learning_rate": 2.364888732403481e-09,
|
22653 |
+
"loss": 0.2862,
|
22654 |
+
"step": 32350
|
22655 |
+
},
|
22656 |
+
{
|
22657 |
+
"epoch": 0.9964127907871846,
|
22658 |
+
"grad_norm": 13.155916213989258,
|
22659 |
+
"learning_rate": 2.039117672464341e-09,
|
22660 |
+
"loss": 0.4238,
|
22661 |
+
"step": 32360
|
22662 |
+
},
|
22663 |
+
{
|
22664 |
+
"epoch": 0.9967207057410743,
|
22665 |
+
"grad_norm": 10.378691673278809,
|
22666 |
+
"learning_rate": 1.7374765404143401e-09,
|
22667 |
+
"loss": 0.4403,
|
22668 |
+
"step": 32370
|
22669 |
+
},
|
22670 |
+
{
|
22671 |
+
"epoch": 0.997028620694964,
|
22672 |
+
"grad_norm": 11.533722877502441,
|
22673 |
+
"learning_rate": 1.4599656274261186e-09,
|
22674 |
+
"loss": 0.4543,
|
22675 |
+
"step": 32380
|
22676 |
+
},
|
22677 |
+
{
|
22678 |
+
"epoch": 0.9973365356488538,
|
22679 |
+
"grad_norm": 15.00934886932373,
|
22680 |
+
"learning_rate": 1.2065852013659618e-09,
|
22681 |
+
"loss": 0.5758,
|
22682 |
+
"step": 32390
|
22683 |
+
},
|
22684 |
+
{
|
22685 |
+
"epoch": 0.9976444506027435,
|
22686 |
+
"grad_norm": 19.549671173095703,
|
22687 |
+
"learning_rate": 9.773355068215528e-10,
|
22688 |
+
"loss": 0.6151,
|
22689 |
+
"step": 32400
|
22690 |
}
|
22691 |
],
|
22692 |
"logging_steps": 10,
|
|
|
22706 |
"attributes": {}
|
22707 |
}
|
22708 |
},
|
22709 |
+
"total_flos": 2.1672606302208e+18,
|
22710 |
"train_batch_size": 8,
|
22711 |
"trial_name": null,
|
22712 |
"trial_params": null
|