Femboyuwu2000
commited on
Commit
•
9b06720
1
Parent(s):
5ea9e13
Training in progress, step 14200, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13982248
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c223a46d77a0541dcf420db07f84a2df91cb6efadc0cbaf5a4f31fc8b2d4bda2
|
3 |
size 13982248
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 7062522
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e22222c7e481af1bfbb43f8dbffa1dee9047bc0752a761bb5f0ff95b2a33adbb
|
3 |
size 7062522
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:766c0d79e698c319904b5a6e85f8ea9f97dc14779ac45e924ad2fddbd9283585
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2c6bde81151072c66a172cffde1bbb8230ce11a6a939cceab6c676f6ad133502
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -4970,6 +4970,13 @@
|
|
4970 |
"learning_rate": 1.0305368692688174e-05,
|
4971 |
"loss": 3.4774,
|
4972 |
"step": 14180
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4973 |
}
|
4974 |
],
|
4975 |
"logging_steps": 20,
|
@@ -4977,7 +4984,7 @@
|
|
4977 |
"num_input_tokens_seen": 0,
|
4978 |
"num_train_epochs": 1,
|
4979 |
"save_steps": 20,
|
4980 |
-
"total_flos": 3.
|
4981 |
"train_batch_size": 8,
|
4982 |
"trial_name": null,
|
4983 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.20095382307572562,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 14200,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
4970 |
"learning_rate": 1.0305368692688174e-05,
|
4971 |
"loss": 3.4774,
|
4972 |
"step": 14180
|
4973 |
+
},
|
4974 |
+
{
|
4975 |
+
"epoch": 0.2,
|
4976 |
+
"grad_norm": 23.29683494567871,
|
4977 |
+
"learning_rate": 1.0239940674851941e-05,
|
4978 |
+
"loss": 3.5437,
|
4979 |
+
"step": 14200
|
4980 |
}
|
4981 |
],
|
4982 |
"logging_steps": 20,
|
|
|
4984 |
"num_input_tokens_seen": 0,
|
4985 |
"num_train_epochs": 1,
|
4986 |
"save_steps": 20,
|
4987 |
+
"total_flos": 3.084582491140915e+16,
|
4988 |
"train_batch_size": 8,
|
4989 |
"trial_name": null,
|
4990 |
"trial_params": null
|