Femboyuwu2000
commited on
Commit
•
a2aea50
1
Parent(s):
45fa759
Training in progress, step 720, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13982248
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1c45456e15b715e832578812b0fb91f6d12ecbc982f37418d06778355c669ff0
|
3 |
size 13982248
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 7062522
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7c738e308e224171798b402d3a6ac45b253baab98b34c3df38c3ff42425b504b
|
3 |
size 7062522
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eeda755d8af1c86d4b713bb10ccad9341acf16a71464d9a966bcdbb17ea7bbc4
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b25c4cabb03cb09a4b2b9dff8deb04b1e269253acadef5d021452c833fa5d68c
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -252,6 +252,13 @@
|
|
252 |
"learning_rate": 2.9998033254984483e-05,
|
253 |
"loss": 3.7841,
|
254 |
"step": 700
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
255 |
}
|
256 |
],
|
257 |
"logging_steps": 20,
|
@@ -259,7 +266,7 @@
|
|
259 |
"num_input_tokens_seen": 0,
|
260 |
"num_train_epochs": 2,
|
261 |
"save_steps": 20,
|
262 |
-
"total_flos":
|
263 |
"train_batch_size": 8,
|
264 |
"trial_name": null,
|
265 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.0576,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 720,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
252 |
"learning_rate": 2.9998033254984483e-05,
|
253 |
"loss": 3.7841,
|
254 |
"step": 700
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"epoch": 0.06,
|
258 |
+
"grad_norm": 24.310373306274414,
|
259 |
+
"learning_rate": 2.999716791440959e-05,
|
260 |
+
"loss": 3.679,
|
261 |
+
"step": 720
|
262 |
}
|
263 |
],
|
264 |
"logging_steps": 20,
|
|
|
266 |
"num_input_tokens_seen": 0,
|
267 |
"num_train_epochs": 2,
|
268 |
"save_steps": 20,
|
269 |
+
"total_flos": 1713780872970240.0,
|
270 |
"train_batch_size": 8,
|
271 |
"trial_name": null,
|
272 |
"trial_params": null
|