Training in progress, step 100000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +2419 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 893439185
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:37f683478bb8b21e00c0c74eb9d446bcf7b5cb5d8713b232385c7f1a4c9244d9
|
3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2ece555ebd5e5c6fe697b5c0892a0069538c02673bcdea4dd5ba7a2fbb86221c
|
3 |
size 449471589
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 19603
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1563cbc1ed661f4d7cd48c7ebfe5d97447f92fa708e98efa17ebd50f337cd1f3
|
3 |
size 19603
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f28e7dd5089fc108050723c00a6956391616d298b19b067cb1fe799c89cf3b3c
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bc741bd201acfbf14b76c991f1da2695d0be2064cfb57a0af565660dec5446de
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 18.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -118390,11 +118390,2427 @@
|
|
118390 |
"eval_samples_per_second": 63.888,
|
118391 |
"eval_steps_per_second": 0.677,
|
118392 |
"step": 98000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
118393 |
}
|
118394 |
],
|
118395 |
"max_steps": 100000,
|
118396 |
"num_train_epochs": 20,
|
118397 |
-
"total_flos": 1.
|
118398 |
"trial_name": null,
|
118399 |
"trial_params": null
|
118400 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 18.46807269899893,
|
5 |
+
"global_step": 100000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
118390 |
"eval_samples_per_second": 63.888,
|
118391 |
"eval_steps_per_second": 0.677,
|
118392 |
"step": 98000
|
118393 |
+
},
|
118394 |
+
{
|
118395 |
+
"epoch": 18.08,
|
118396 |
+
"learning_rate": 1.0218966728802669e-05,
|
118397 |
+
"loss": 0.4018,
|
118398 |
+
"step": 98005
|
118399 |
+
},
|
118400 |
+
{
|
118401 |
+
"epoch": 18.08,
|
118402 |
+
"learning_rate": 1.021789082613816e-05,
|
118403 |
+
"loss": 0.4046,
|
118404 |
+
"step": 98010
|
118405 |
+
},
|
118406 |
+
{
|
118407 |
+
"epoch": 18.08,
|
118408 |
+
"learning_rate": 1.0216817570553575e-05,
|
118409 |
+
"loss": 0.4138,
|
118410 |
+
"step": 98015
|
118411 |
+
},
|
118412 |
+
{
|
118413 |
+
"epoch": 18.08,
|
118414 |
+
"learning_rate": 1.0215746962075409e-05,
|
118415 |
+
"loss": 0.4123,
|
118416 |
+
"step": 98020
|
118417 |
+
},
|
118418 |
+
{
|
118419 |
+
"epoch": 18.08,
|
118420 |
+
"learning_rate": 1.0214679000730085e-05,
|
118421 |
+
"loss": 0.4035,
|
118422 |
+
"step": 98025
|
118423 |
+
},
|
118424 |
+
{
|
118425 |
+
"epoch": 18.09,
|
118426 |
+
"learning_rate": 1.0213613686543943e-05,
|
118427 |
+
"loss": 0.4105,
|
118428 |
+
"step": 98030
|
118429 |
+
},
|
118430 |
+
{
|
118431 |
+
"epoch": 18.09,
|
118432 |
+
"learning_rate": 1.0212551019543276e-05,
|
118433 |
+
"loss": 0.4044,
|
118434 |
+
"step": 98035
|
118435 |
+
},
|
118436 |
+
{
|
118437 |
+
"epoch": 18.09,
|
118438 |
+
"learning_rate": 1.0211490999754295e-05,
|
118439 |
+
"loss": 0.411,
|
118440 |
+
"step": 98040
|
118441 |
+
},
|
118442 |
+
{
|
118443 |
+
"epoch": 18.09,
|
118444 |
+
"learning_rate": 1.0210433627203157e-05,
|
118445 |
+
"loss": 0.4045,
|
118446 |
+
"step": 98045
|
118447 |
+
},
|
118448 |
+
{
|
118449 |
+
"epoch": 18.09,
|
118450 |
+
"learning_rate": 1.0209378901915956e-05,
|
118451 |
+
"loss": 0.4116,
|
118452 |
+
"step": 98050
|
118453 |
+
},
|
118454 |
+
{
|
118455 |
+
"epoch": 18.09,
|
118456 |
+
"learning_rate": 1.020832682391872e-05,
|
118457 |
+
"loss": 0.4044,
|
118458 |
+
"step": 98055
|
118459 |
+
},
|
118460 |
+
{
|
118461 |
+
"epoch": 18.09,
|
118462 |
+
"learning_rate": 1.0207277393237397e-05,
|
118463 |
+
"loss": 0.4102,
|
118464 |
+
"step": 98060
|
118465 |
+
},
|
118466 |
+
{
|
118467 |
+
"epoch": 18.09,
|
118468 |
+
"learning_rate": 1.0206230609897896e-05,
|
118469 |
+
"loss": 0.4097,
|
118470 |
+
"step": 98065
|
118471 |
+
},
|
118472 |
+
{
|
118473 |
+
"epoch": 18.09,
|
118474 |
+
"learning_rate": 1.020518647392603e-05,
|
118475 |
+
"loss": 0.4076,
|
118476 |
+
"step": 98070
|
118477 |
+
},
|
118478 |
+
{
|
118479 |
+
"epoch": 18.09,
|
118480 |
+
"learning_rate": 1.0204144985347559e-05,
|
118481 |
+
"loss": 0.4071,
|
118482 |
+
"step": 98075
|
118483 |
+
},
|
118484 |
+
{
|
118485 |
+
"epoch": 18.09,
|
118486 |
+
"learning_rate": 1.0203106144188201e-05,
|
118487 |
+
"loss": 0.4087,
|
118488 |
+
"step": 98080
|
118489 |
+
},
|
118490 |
+
{
|
118491 |
+
"epoch": 18.1,
|
118492 |
+
"learning_rate": 1.020206995047358e-05,
|
118493 |
+
"loss": 0.4125,
|
118494 |
+
"step": 98085
|
118495 |
+
},
|
118496 |
+
{
|
118497 |
+
"epoch": 18.1,
|
118498 |
+
"learning_rate": 1.020103640422925e-05,
|
118499 |
+
"loss": 0.4144,
|
118500 |
+
"step": 98090
|
118501 |
+
},
|
118502 |
+
{
|
118503 |
+
"epoch": 18.1,
|
118504 |
+
"learning_rate": 1.0200005505480736e-05,
|
118505 |
+
"loss": 0.4106,
|
118506 |
+
"step": 98095
|
118507 |
+
},
|
118508 |
+
{
|
118509 |
+
"epoch": 18.1,
|
118510 |
+
"learning_rate": 1.0198977254253452e-05,
|
118511 |
+
"loss": 0.4036,
|
118512 |
+
"step": 98100
|
118513 |
+
},
|
118514 |
+
{
|
118515 |
+
"epoch": 18.1,
|
118516 |
+
"learning_rate": 1.0197951650572778e-05,
|
118517 |
+
"loss": 0.4086,
|
118518 |
+
"step": 98105
|
118519 |
+
},
|
118520 |
+
{
|
118521 |
+
"epoch": 18.1,
|
118522 |
+
"learning_rate": 1.0196928694464023e-05,
|
118523 |
+
"loss": 0.4063,
|
118524 |
+
"step": 98110
|
118525 |
+
},
|
118526 |
+
{
|
118527 |
+
"epoch": 18.1,
|
118528 |
+
"learning_rate": 1.0195908385952423e-05,
|
118529 |
+
"loss": 0.4091,
|
118530 |
+
"step": 98115
|
118531 |
+
},
|
118532 |
+
{
|
118533 |
+
"epoch": 18.1,
|
118534 |
+
"learning_rate": 1.0194890725063153e-05,
|
118535 |
+
"loss": 0.41,
|
118536 |
+
"step": 98120
|
118537 |
+
},
|
118538 |
+
{
|
118539 |
+
"epoch": 18.1,
|
118540 |
+
"learning_rate": 1.0193875711821333e-05,
|
118541 |
+
"loss": 0.409,
|
118542 |
+
"step": 98125
|
118543 |
+
},
|
118544 |
+
{
|
118545 |
+
"epoch": 18.1,
|
118546 |
+
"learning_rate": 1.0192863346251992e-05,
|
118547 |
+
"loss": 0.4071,
|
118548 |
+
"step": 98130
|
118549 |
+
},
|
118550 |
+
{
|
118551 |
+
"epoch": 18.11,
|
118552 |
+
"learning_rate": 1.019185362838011e-05,
|
118553 |
+
"loss": 0.4115,
|
118554 |
+
"step": 98135
|
118555 |
+
},
|
118556 |
+
{
|
118557 |
+
"epoch": 18.11,
|
118558 |
+
"learning_rate": 1.019084655823062e-05,
|
118559 |
+
"loss": 0.4109,
|
118560 |
+
"step": 98140
|
118561 |
+
},
|
118562 |
+
{
|
118563 |
+
"epoch": 18.11,
|
118564 |
+
"learning_rate": 1.0189842135828343e-05,
|
118565 |
+
"loss": 0.4073,
|
118566 |
+
"step": 98145
|
118567 |
+
},
|
118568 |
+
{
|
118569 |
+
"epoch": 18.11,
|
118570 |
+
"learning_rate": 1.0188840361198094e-05,
|
118571 |
+
"loss": 0.4062,
|
118572 |
+
"step": 98150
|
118573 |
+
},
|
118574 |
+
{
|
118575 |
+
"epoch": 18.11,
|
118576 |
+
"learning_rate": 1.0187841234364555e-05,
|
118577 |
+
"loss": 0.4083,
|
118578 |
+
"step": 98155
|
118579 |
+
},
|
118580 |
+
{
|
118581 |
+
"epoch": 18.11,
|
118582 |
+
"learning_rate": 1.0186844755352406e-05,
|
118583 |
+
"loss": 0.4064,
|
118584 |
+
"step": 98160
|
118585 |
+
},
|
118586 |
+
{
|
118587 |
+
"epoch": 18.11,
|
118588 |
+
"learning_rate": 1.018585092418622e-05,
|
118589 |
+
"loss": 0.4069,
|
118590 |
+
"step": 98165
|
118591 |
+
},
|
118592 |
+
{
|
118593 |
+
"epoch": 18.11,
|
118594 |
+
"learning_rate": 1.0184859740890526e-05,
|
118595 |
+
"loss": 0.4071,
|
118596 |
+
"step": 98170
|
118597 |
+
},
|
118598 |
+
{
|
118599 |
+
"epoch": 18.11,
|
118600 |
+
"learning_rate": 1.018387120548979e-05,
|
118601 |
+
"loss": 0.4037,
|
118602 |
+
"step": 98175
|
118603 |
+
},
|
118604 |
+
{
|
118605 |
+
"epoch": 18.11,
|
118606 |
+
"learning_rate": 1.018288531800838e-05,
|
118607 |
+
"loss": 0.4053,
|
118608 |
+
"step": 98180
|
118609 |
+
},
|
118610 |
+
{
|
118611 |
+
"epoch": 18.12,
|
118612 |
+
"learning_rate": 1.0181902078470635e-05,
|
118613 |
+
"loss": 0.4081,
|
118614 |
+
"step": 98185
|
118615 |
+
},
|
118616 |
+
{
|
118617 |
+
"epoch": 18.12,
|
118618 |
+
"learning_rate": 1.0180921486900806e-05,
|
118619 |
+
"loss": 0.4062,
|
118620 |
+
"step": 98190
|
118621 |
+
},
|
118622 |
+
{
|
118623 |
+
"epoch": 18.12,
|
118624 |
+
"learning_rate": 1.0179943543323107e-05,
|
118625 |
+
"loss": 0.409,
|
118626 |
+
"step": 98195
|
118627 |
+
},
|
118628 |
+
{
|
118629 |
+
"epoch": 18.12,
|
118630 |
+
"learning_rate": 1.0178968247761648e-05,
|
118631 |
+
"loss": 0.4101,
|
118632 |
+
"step": 98200
|
118633 |
+
},
|
118634 |
+
{
|
118635 |
+
"epoch": 18.12,
|
118636 |
+
"learning_rate": 1.01779956002405e-05,
|
118637 |
+
"loss": 0.4067,
|
118638 |
+
"step": 98205
|
118639 |
+
},
|
118640 |
+
{
|
118641 |
+
"epoch": 18.12,
|
118642 |
+
"learning_rate": 1.0177025600783679e-05,
|
118643 |
+
"loss": 0.4126,
|
118644 |
+
"step": 98210
|
118645 |
+
},
|
118646 |
+
{
|
118647 |
+
"epoch": 18.12,
|
118648 |
+
"learning_rate": 1.0176058249415088e-05,
|
118649 |
+
"loss": 0.4064,
|
118650 |
+
"step": 98215
|
118651 |
+
},
|
118652 |
+
{
|
118653 |
+
"epoch": 18.12,
|
118654 |
+
"learning_rate": 1.017509354615861e-05,
|
118655 |
+
"loss": 0.408,
|
118656 |
+
"step": 98220
|
118657 |
+
},
|
118658 |
+
{
|
118659 |
+
"epoch": 18.12,
|
118660 |
+
"learning_rate": 1.0174131491038067e-05,
|
118661 |
+
"loss": 0.4072,
|
118662 |
+
"step": 98225
|
118663 |
+
},
|
118664 |
+
{
|
118665 |
+
"epoch": 18.12,
|
118666 |
+
"learning_rate": 1.017317208407716e-05,
|
118667 |
+
"loss": 0.4067,
|
118668 |
+
"step": 98230
|
118669 |
+
},
|
118670 |
+
{
|
118671 |
+
"epoch": 18.12,
|
118672 |
+
"learning_rate": 1.0172215325299595e-05,
|
118673 |
+
"loss": 0.4086,
|
118674 |
+
"step": 98235
|
118675 |
+
},
|
118676 |
+
{
|
118677 |
+
"epoch": 18.13,
|
118678 |
+
"learning_rate": 1.0171261214728953e-05,
|
118679 |
+
"loss": 0.4045,
|
118680 |
+
"step": 98240
|
118681 |
+
},
|
118682 |
+
{
|
118683 |
+
"epoch": 18.13,
|
118684 |
+
"learning_rate": 1.0170309752388794e-05,
|
118685 |
+
"loss": 0.4113,
|
118686 |
+
"step": 98245
|
118687 |
+
},
|
118688 |
+
{
|
118689 |
+
"epoch": 18.13,
|
118690 |
+
"learning_rate": 1.0169360938302582e-05,
|
118691 |
+
"loss": 0.4164,
|
118692 |
+
"step": 98250
|
118693 |
+
},
|
118694 |
+
{
|
118695 |
+
"epoch": 18.13,
|
118696 |
+
"learning_rate": 1.0168414772493744e-05,
|
118697 |
+
"loss": 0.4084,
|
118698 |
+
"step": 98255
|
118699 |
+
},
|
118700 |
+
{
|
118701 |
+
"epoch": 18.13,
|
118702 |
+
"learning_rate": 1.0167471254985602e-05,
|
118703 |
+
"loss": 0.4106,
|
118704 |
+
"step": 98260
|
118705 |
+
},
|
118706 |
+
{
|
118707 |
+
"epoch": 18.13,
|
118708 |
+
"learning_rate": 1.0166530385801455e-05,
|
118709 |
+
"loss": 0.4108,
|
118710 |
+
"step": 98265
|
118711 |
+
},
|
118712 |
+
{
|
118713 |
+
"epoch": 18.13,
|
118714 |
+
"learning_rate": 1.0165592164964525e-05,
|
118715 |
+
"loss": 0.4145,
|
118716 |
+
"step": 98270
|
118717 |
+
},
|
118718 |
+
{
|
118719 |
+
"epoch": 18.13,
|
118720 |
+
"learning_rate": 1.0164656592497931e-05,
|
118721 |
+
"loss": 0.4124,
|
118722 |
+
"step": 98275
|
118723 |
+
},
|
118724 |
+
{
|
118725 |
+
"epoch": 18.13,
|
118726 |
+
"learning_rate": 1.016372366842479e-05,
|
118727 |
+
"loss": 0.4069,
|
118728 |
+
"step": 98280
|
118729 |
+
},
|
118730 |
+
{
|
118731 |
+
"epoch": 18.13,
|
118732 |
+
"learning_rate": 1.0162793392768091e-05,
|
118733 |
+
"loss": 0.4078,
|
118734 |
+
"step": 98285
|
118735 |
+
},
|
118736 |
+
{
|
118737 |
+
"epoch": 18.14,
|
118738 |
+
"learning_rate": 1.0161865765550818e-05,
|
118739 |
+
"loss": 0.4144,
|
118740 |
+
"step": 98290
|
118741 |
+
},
|
118742 |
+
{
|
118743 |
+
"epoch": 18.14,
|
118744 |
+
"learning_rate": 1.016094078679584e-05,
|
118745 |
+
"loss": 0.4066,
|
118746 |
+
"step": 98295
|
118747 |
+
},
|
118748 |
+
{
|
118749 |
+
"epoch": 18.14,
|
118750 |
+
"learning_rate": 1.0160018456525988e-05,
|
118751 |
+
"loss": 0.4154,
|
118752 |
+
"step": 98300
|
118753 |
+
},
|
118754 |
+
{
|
118755 |
+
"epoch": 18.14,
|
118756 |
+
"learning_rate": 1.015909877476401e-05,
|
118757 |
+
"loss": 0.4052,
|
118758 |
+
"step": 98305
|
118759 |
+
},
|
118760 |
+
{
|
118761 |
+
"epoch": 18.14,
|
118762 |
+
"learning_rate": 1.0158181741532607e-05,
|
118763 |
+
"loss": 0.4074,
|
118764 |
+
"step": 98310
|
118765 |
+
},
|
118766 |
+
{
|
118767 |
+
"epoch": 18.14,
|
118768 |
+
"learning_rate": 1.0157267356854405e-05,
|
118769 |
+
"loss": 0.4119,
|
118770 |
+
"step": 98315
|
118771 |
+
},
|
118772 |
+
{
|
118773 |
+
"epoch": 18.14,
|
118774 |
+
"learning_rate": 1.0156355620751963e-05,
|
118775 |
+
"loss": 0.4069,
|
118776 |
+
"step": 98320
|
118777 |
+
},
|
118778 |
+
{
|
118779 |
+
"epoch": 18.14,
|
118780 |
+
"learning_rate": 1.0155446533247778e-05,
|
118781 |
+
"loss": 0.4046,
|
118782 |
+
"step": 98325
|
118783 |
+
},
|
118784 |
+
{
|
118785 |
+
"epoch": 18.14,
|
118786 |
+
"learning_rate": 1.0154540094364277e-05,
|
118787 |
+
"loss": 0.4049,
|
118788 |
+
"step": 98330
|
118789 |
+
},
|
118790 |
+
{
|
118791 |
+
"epoch": 18.14,
|
118792 |
+
"learning_rate": 1.0153636304123833e-05,
|
118793 |
+
"loss": 0.4092,
|
118794 |
+
"step": 98335
|
118795 |
+
},
|
118796 |
+
{
|
118797 |
+
"epoch": 18.15,
|
118798 |
+
"learning_rate": 1.0152735162548734e-05,
|
118799 |
+
"loss": 0.405,
|
118800 |
+
"step": 98340
|
118801 |
+
},
|
118802 |
+
{
|
118803 |
+
"epoch": 18.15,
|
118804 |
+
"learning_rate": 1.0151836669661236e-05,
|
118805 |
+
"loss": 0.4064,
|
118806 |
+
"step": 98345
|
118807 |
+
},
|
118808 |
+
{
|
118809 |
+
"epoch": 18.15,
|
118810 |
+
"learning_rate": 1.0150940825483491e-05,
|
118811 |
+
"loss": 0.4118,
|
118812 |
+
"step": 98350
|
118813 |
+
},
|
118814 |
+
{
|
118815 |
+
"epoch": 18.15,
|
118816 |
+
"learning_rate": 1.0150047630037602e-05,
|
118817 |
+
"loss": 0.4049,
|
118818 |
+
"step": 98355
|
118819 |
+
},
|
118820 |
+
{
|
118821 |
+
"epoch": 18.15,
|
118822 |
+
"learning_rate": 1.0149157083345625e-05,
|
118823 |
+
"loss": 0.4039,
|
118824 |
+
"step": 98360
|
118825 |
+
},
|
118826 |
+
{
|
118827 |
+
"epoch": 18.15,
|
118828 |
+
"learning_rate": 1.0148269185429508e-05,
|
118829 |
+
"loss": 0.4085,
|
118830 |
+
"step": 98365
|
118831 |
+
},
|
118832 |
+
{
|
118833 |
+
"epoch": 18.15,
|
118834 |
+
"learning_rate": 1.0147383936311186e-05,
|
118835 |
+
"loss": 0.4053,
|
118836 |
+
"step": 98370
|
118837 |
+
},
|
118838 |
+
{
|
118839 |
+
"epoch": 18.15,
|
118840 |
+
"learning_rate": 1.014650133601248e-05,
|
118841 |
+
"loss": 0.4029,
|
118842 |
+
"step": 98375
|
118843 |
+
},
|
118844 |
+
{
|
118845 |
+
"epoch": 18.15,
|
118846 |
+
"learning_rate": 1.0145621384555178e-05,
|
118847 |
+
"loss": 0.4047,
|
118848 |
+
"step": 98380
|
118849 |
+
},
|
118850 |
+
{
|
118851 |
+
"epoch": 18.15,
|
118852 |
+
"learning_rate": 1.0144744081960993e-05,
|
118853 |
+
"loss": 0.41,
|
118854 |
+
"step": 98385
|
118855 |
+
},
|
118856 |
+
{
|
118857 |
+
"epoch": 18.16,
|
118858 |
+
"learning_rate": 1.0143869428251562e-05,
|
118859 |
+
"loss": 0.4048,
|
118860 |
+
"step": 98390
|
118861 |
+
},
|
118862 |
+
{
|
118863 |
+
"epoch": 18.16,
|
118864 |
+
"learning_rate": 1.0142997423448477e-05,
|
118865 |
+
"loss": 0.4069,
|
118866 |
+
"step": 98395
|
118867 |
+
},
|
118868 |
+
{
|
118869 |
+
"epoch": 18.16,
|
118870 |
+
"learning_rate": 1.0142128067573252e-05,
|
118871 |
+
"loss": 0.4037,
|
118872 |
+
"step": 98400
|
118873 |
+
},
|
118874 |
+
{
|
118875 |
+
"epoch": 18.16,
|
118876 |
+
"learning_rate": 1.0141261360647338e-05,
|
118877 |
+
"loss": 0.4071,
|
118878 |
+
"step": 98405
|
118879 |
+
},
|
118880 |
+
{
|
118881 |
+
"epoch": 18.16,
|
118882 |
+
"learning_rate": 1.014039730269211e-05,
|
118883 |
+
"loss": 0.4031,
|
118884 |
+
"step": 98410
|
118885 |
+
},
|
118886 |
+
{
|
118887 |
+
"epoch": 18.16,
|
118888 |
+
"learning_rate": 1.0139535893728894e-05,
|
118889 |
+
"loss": 0.4099,
|
118890 |
+
"step": 98415
|
118891 |
+
},
|
118892 |
+
{
|
118893 |
+
"epoch": 18.16,
|
118894 |
+
"learning_rate": 1.0138677133778948e-05,
|
118895 |
+
"loss": 0.4134,
|
118896 |
+
"step": 98420
|
118897 |
+
},
|
118898 |
+
{
|
118899 |
+
"epoch": 18.16,
|
118900 |
+
"learning_rate": 1.0137821022863454e-05,
|
118901 |
+
"loss": 0.4097,
|
118902 |
+
"step": 98425
|
118903 |
+
},
|
118904 |
+
{
|
118905 |
+
"epoch": 18.16,
|
118906 |
+
"learning_rate": 1.0136967561003539e-05,
|
118907 |
+
"loss": 0.4054,
|
118908 |
+
"step": 98430
|
118909 |
+
},
|
118910 |
+
{
|
118911 |
+
"epoch": 18.16,
|
118912 |
+
"learning_rate": 1.0136116748220269e-05,
|
118913 |
+
"loss": 0.4041,
|
118914 |
+
"step": 98435
|
118915 |
+
},
|
118916 |
+
{
|
118917 |
+
"epoch": 18.16,
|
118918 |
+
"learning_rate": 1.0135268584534622e-05,
|
118919 |
+
"loss": 0.4148,
|
118920 |
+
"step": 98440
|
118921 |
+
},
|
118922 |
+
{
|
118923 |
+
"epoch": 18.17,
|
118924 |
+
"learning_rate": 1.0134423069967546e-05,
|
118925 |
+
"loss": 0.4012,
|
118926 |
+
"step": 98445
|
118927 |
+
},
|
118928 |
+
{
|
118929 |
+
"epoch": 18.17,
|
118930 |
+
"learning_rate": 1.0133580204539879e-05,
|
118931 |
+
"loss": 0.4096,
|
118932 |
+
"step": 98450
|
118933 |
+
},
|
118934 |
+
{
|
118935 |
+
"epoch": 18.17,
|
118936 |
+
"learning_rate": 1.0132739988272434e-05,
|
118937 |
+
"loss": 0.4123,
|
118938 |
+
"step": 98455
|
118939 |
+
},
|
118940 |
+
{
|
118941 |
+
"epoch": 18.17,
|
118942 |
+
"learning_rate": 1.0131902421185933e-05,
|
118943 |
+
"loss": 0.4108,
|
118944 |
+
"step": 98460
|
118945 |
+
},
|
118946 |
+
{
|
118947 |
+
"epoch": 18.17,
|
118948 |
+
"learning_rate": 1.0131067503301058e-05,
|
118949 |
+
"loss": 0.4102,
|
118950 |
+
"step": 98465
|
118951 |
+
},
|
118952 |
+
{
|
118953 |
+
"epoch": 18.17,
|
118954 |
+
"learning_rate": 1.0130235234638395e-05,
|
118955 |
+
"loss": 0.4018,
|
118956 |
+
"step": 98470
|
118957 |
+
},
|
118958 |
+
{
|
118959 |
+
"epoch": 18.17,
|
118960 |
+
"learning_rate": 1.0129405615218487e-05,
|
118961 |
+
"loss": 0.4055,
|
118962 |
+
"step": 98475
|
118963 |
+
},
|
118964 |
+
{
|
118965 |
+
"epoch": 18.17,
|
118966 |
+
"learning_rate": 1.0128578645061798e-05,
|
118967 |
+
"loss": 0.4092,
|
118968 |
+
"step": 98480
|
118969 |
+
},
|
118970 |
+
{
|
118971 |
+
"epoch": 18.17,
|
118972 |
+
"learning_rate": 1.0127754324188729e-05,
|
118973 |
+
"loss": 0.4109,
|
118974 |
+
"step": 98485
|
118975 |
+
},
|
118976 |
+
{
|
118977 |
+
"epoch": 18.17,
|
118978 |
+
"learning_rate": 1.0126932652619638e-05,
|
118979 |
+
"loss": 0.4084,
|
118980 |
+
"step": 98490
|
118981 |
+
},
|
118982 |
+
{
|
118983 |
+
"epoch": 18.18,
|
118984 |
+
"learning_rate": 1.0126113630374782e-05,
|
118985 |
+
"loss": 0.4056,
|
118986 |
+
"step": 98495
|
118987 |
+
},
|
118988 |
+
{
|
118989 |
+
"epoch": 18.18,
|
118990 |
+
"learning_rate": 1.0125297257474363e-05,
|
118991 |
+
"loss": 0.4108,
|
118992 |
+
"step": 98500
|
118993 |
+
},
|
118994 |
+
{
|
118995 |
+
"epoch": 18.18,
|
118996 |
+
"learning_rate": 1.0124483533938553e-05,
|
118997 |
+
"loss": 0.4039,
|
118998 |
+
"step": 98505
|
118999 |
+
},
|
119000 |
+
{
|
119001 |
+
"epoch": 18.18,
|
119002 |
+
"learning_rate": 1.0123672459787401e-05,
|
119003 |
+
"loss": 0.4064,
|
119004 |
+
"step": 98510
|
119005 |
+
},
|
119006 |
+
{
|
119007 |
+
"epoch": 18.18,
|
119008 |
+
"learning_rate": 1.0122864035040937e-05,
|
119009 |
+
"loss": 0.4131,
|
119010 |
+
"step": 98515
|
119011 |
+
},
|
119012 |
+
{
|
119013 |
+
"epoch": 18.18,
|
119014 |
+
"learning_rate": 1.01220582597191e-05,
|
119015 |
+
"loss": 0.4057,
|
119016 |
+
"step": 98520
|
119017 |
+
},
|
119018 |
+
{
|
119019 |
+
"epoch": 18.18,
|
119020 |
+
"learning_rate": 1.0121255133841766e-05,
|
119021 |
+
"loss": 0.4069,
|
119022 |
+
"step": 98525
|
119023 |
+
},
|
119024 |
+
{
|
119025 |
+
"epoch": 18.18,
|
119026 |
+
"learning_rate": 1.0120454657428767e-05,
|
119027 |
+
"loss": 0.4016,
|
119028 |
+
"step": 98530
|
119029 |
+
},
|
119030 |
+
{
|
119031 |
+
"epoch": 18.18,
|
119032 |
+
"learning_rate": 1.0119656830499845e-05,
|
119033 |
+
"loss": 0.4096,
|
119034 |
+
"step": 98535
|
119035 |
+
},
|
119036 |
+
{
|
119037 |
+
"epoch": 18.18,
|
119038 |
+
"learning_rate": 1.011886165307468e-05,
|
119039 |
+
"loss": 0.4103,
|
119040 |
+
"step": 98540
|
119041 |
+
},
|
119042 |
+
{
|
119043 |
+
"epoch": 18.19,
|
119044 |
+
"learning_rate": 1.0118069125172907e-05,
|
119045 |
+
"loss": 0.4084,
|
119046 |
+
"step": 98545
|
119047 |
+
},
|
119048 |
+
{
|
119049 |
+
"epoch": 18.19,
|
119050 |
+
"learning_rate": 1.0117279246814072e-05,
|
119051 |
+
"loss": 0.4096,
|
119052 |
+
"step": 98550
|
119053 |
+
},
|
119054 |
+
{
|
119055 |
+
"epoch": 18.19,
|
119056 |
+
"learning_rate": 1.0116492018017667e-05,
|
119057 |
+
"loss": 0.404,
|
119058 |
+
"step": 98555
|
119059 |
+
},
|
119060 |
+
{
|
119061 |
+
"epoch": 18.19,
|
119062 |
+
"learning_rate": 1.0115707438803099e-05,
|
119063 |
+
"loss": 0.4102,
|
119064 |
+
"step": 98560
|
119065 |
+
},
|
119066 |
+
{
|
119067 |
+
"epoch": 18.19,
|
119068 |
+
"learning_rate": 1.0114925509189759e-05,
|
119069 |
+
"loss": 0.4064,
|
119070 |
+
"step": 98565
|
119071 |
+
},
|
119072 |
+
{
|
119073 |
+
"epoch": 18.19,
|
119074 |
+
"learning_rate": 1.0114146229196912e-05,
|
119075 |
+
"loss": 0.4036,
|
119076 |
+
"step": 98570
|
119077 |
+
},
|
119078 |
+
{
|
119079 |
+
"epoch": 18.19,
|
119080 |
+
"learning_rate": 1.0113369598843809e-05,
|
119081 |
+
"loss": 0.4089,
|
119082 |
+
"step": 98575
|
119083 |
+
},
|
119084 |
+
{
|
119085 |
+
"epoch": 18.19,
|
119086 |
+
"learning_rate": 1.0112595618149581e-05,
|
119087 |
+
"loss": 0.4079,
|
119088 |
+
"step": 98580
|
119089 |
+
},
|
119090 |
+
{
|
119091 |
+
"epoch": 18.19,
|
119092 |
+
"learning_rate": 1.0111824287133362e-05,
|
119093 |
+
"loss": 0.4104,
|
119094 |
+
"step": 98585
|
119095 |
+
},
|
119096 |
+
{
|
119097 |
+
"epoch": 18.19,
|
119098 |
+
"learning_rate": 1.011105560581416e-05,
|
119099 |
+
"loss": 0.4072,
|
119100 |
+
"step": 98590
|
119101 |
+
},
|
119102 |
+
{
|
119103 |
+
"epoch": 18.19,
|
119104 |
+
"learning_rate": 1.0110289574210955e-05,
|
119105 |
+
"loss": 0.4052,
|
119106 |
+
"step": 98595
|
119107 |
+
},
|
119108 |
+
{
|
119109 |
+
"epoch": 18.2,
|
119110 |
+
"learning_rate": 1.0109526192342637e-05,
|
119111 |
+
"loss": 0.4058,
|
119112 |
+
"step": 98600
|
119113 |
+
},
|
119114 |
+
{
|
119115 |
+
"epoch": 18.2,
|
119116 |
+
"learning_rate": 1.0108765460228043e-05,
|
119117 |
+
"loss": 0.4105,
|
119118 |
+
"step": 98605
|
119119 |
+
},
|
119120 |
+
{
|
119121 |
+
"epoch": 18.2,
|
119122 |
+
"learning_rate": 1.0108007377885944e-05,
|
119123 |
+
"loss": 0.4057,
|
119124 |
+
"step": 98610
|
119125 |
+
},
|
119126 |
+
{
|
119127 |
+
"epoch": 18.2,
|
119128 |
+
"learning_rate": 1.0107251945335054e-05,
|
119129 |
+
"loss": 0.4056,
|
119130 |
+
"step": 98615
|
119131 |
+
},
|
119132 |
+
{
|
119133 |
+
"epoch": 18.2,
|
119134 |
+
"learning_rate": 1.0106499162593993e-05,
|
119135 |
+
"loss": 0.4074,
|
119136 |
+
"step": 98620
|
119137 |
+
},
|
119138 |
+
{
|
119139 |
+
"epoch": 18.2,
|
119140 |
+
"learning_rate": 1.0105749029681366e-05,
|
119141 |
+
"loss": 0.4061,
|
119142 |
+
"step": 98625
|
119143 |
+
},
|
119144 |
+
{
|
119145 |
+
"epoch": 18.2,
|
119146 |
+
"learning_rate": 1.0105001546615648e-05,
|
119147 |
+
"loss": 0.4065,
|
119148 |
+
"step": 98630
|
119149 |
+
},
|
119150 |
+
{
|
119151 |
+
"epoch": 18.2,
|
119152 |
+
"learning_rate": 1.0104256713415303e-05,
|
119153 |
+
"loss": 0.4138,
|
119154 |
+
"step": 98635
|
119155 |
+
},
|
119156 |
+
{
|
119157 |
+
"epoch": 18.2,
|
119158 |
+
"learning_rate": 1.0103514530098711e-05,
|
119159 |
+
"loss": 0.4169,
|
119160 |
+
"step": 98640
|
119161 |
+
},
|
119162 |
+
{
|
119163 |
+
"epoch": 18.2,
|
119164 |
+
"learning_rate": 1.0102774996684168e-05,
|
119165 |
+
"loss": 0.4108,
|
119166 |
+
"step": 98645
|
119167 |
+
},
|
119168 |
+
{
|
119169 |
+
"epoch": 18.21,
|
119170 |
+
"learning_rate": 1.0102038113189948e-05,
|
119171 |
+
"loss": 0.4073,
|
119172 |
+
"step": 98650
|
119173 |
+
},
|
119174 |
+
{
|
119175 |
+
"epoch": 18.21,
|
119176 |
+
"learning_rate": 1.01013038796342e-05,
|
119177 |
+
"loss": 0.4045,
|
119178 |
+
"step": 98655
|
119179 |
+
},
|
119180 |
+
{
|
119181 |
+
"epoch": 18.21,
|
119182 |
+
"learning_rate": 1.0100572296035055e-05,
|
119183 |
+
"loss": 0.4051,
|
119184 |
+
"step": 98660
|
119185 |
+
},
|
119186 |
+
{
|
119187 |
+
"epoch": 18.21,
|
119188 |
+
"learning_rate": 1.0099843362410572e-05,
|
119189 |
+
"loss": 0.4035,
|
119190 |
+
"step": 98665
|
119191 |
+
},
|
119192 |
+
{
|
119193 |
+
"epoch": 18.21,
|
119194 |
+
"learning_rate": 1.0099117078778724e-05,
|
119195 |
+
"loss": 0.4086,
|
119196 |
+
"step": 98670
|
119197 |
+
},
|
119198 |
+
{
|
119199 |
+
"epoch": 18.21,
|
119200 |
+
"learning_rate": 1.0098393445157448e-05,
|
119201 |
+
"loss": 0.4056,
|
119202 |
+
"step": 98675
|
119203 |
+
},
|
119204 |
+
{
|
119205 |
+
"epoch": 18.21,
|
119206 |
+
"learning_rate": 1.0097672461564575e-05,
|
119207 |
+
"loss": 0.4068,
|
119208 |
+
"step": 98680
|
119209 |
+
},
|
119210 |
+
{
|
119211 |
+
"epoch": 18.21,
|
119212 |
+
"learning_rate": 1.0096954128017914e-05,
|
119213 |
+
"loss": 0.4016,
|
119214 |
+
"step": 98685
|
119215 |
+
},
|
119216 |
+
{
|
119217 |
+
"epoch": 18.21,
|
119218 |
+
"learning_rate": 1.0096238444535187e-05,
|
119219 |
+
"loss": 0.4096,
|
119220 |
+
"step": 98690
|
119221 |
+
},
|
119222 |
+
{
|
119223 |
+
"epoch": 18.21,
|
119224 |
+
"learning_rate": 1.0095525411134045e-05,
|
119225 |
+
"loss": 0.4083,
|
119226 |
+
"step": 98695
|
119227 |
+
},
|
119228 |
+
{
|
119229 |
+
"epoch": 18.22,
|
119230 |
+
"learning_rate": 1.0094815027832091e-05,
|
119231 |
+
"loss": 0.4043,
|
119232 |
+
"step": 98700
|
119233 |
+
},
|
119234 |
+
{
|
119235 |
+
"epoch": 18.22,
|
119236 |
+
"learning_rate": 1.0094107294646838e-05,
|
119237 |
+
"loss": 0.4121,
|
119238 |
+
"step": 98705
|
119239 |
+
},
|
119240 |
+
{
|
119241 |
+
"epoch": 18.22,
|
119242 |
+
"learning_rate": 1.0093402211595765e-05,
|
119243 |
+
"loss": 0.4081,
|
119244 |
+
"step": 98710
|
119245 |
+
},
|
119246 |
+
{
|
119247 |
+
"epoch": 18.22,
|
119248 |
+
"learning_rate": 1.0092699778696253e-05,
|
119249 |
+
"loss": 0.401,
|
119250 |
+
"step": 98715
|
119251 |
+
},
|
119252 |
+
{
|
119253 |
+
"epoch": 18.22,
|
119254 |
+
"learning_rate": 1.0091999995965654e-05,
|
119255 |
+
"loss": 0.4035,
|
119256 |
+
"step": 98720
|
119257 |
+
},
|
119258 |
+
{
|
119259 |
+
"epoch": 18.22,
|
119260 |
+
"learning_rate": 1.0091302863421215e-05,
|
119261 |
+
"loss": 0.4106,
|
119262 |
+
"step": 98725
|
119263 |
+
},
|
119264 |
+
{
|
119265 |
+
"epoch": 18.22,
|
119266 |
+
"learning_rate": 1.0090608381080157e-05,
|
119267 |
+
"loss": 0.4083,
|
119268 |
+
"step": 98730
|
119269 |
+
},
|
119270 |
+
{
|
119271 |
+
"epoch": 18.22,
|
119272 |
+
"learning_rate": 1.0089916548959594e-05,
|
119273 |
+
"loss": 0.408,
|
119274 |
+
"step": 98735
|
119275 |
+
},
|
119276 |
+
{
|
119277 |
+
"epoch": 18.22,
|
119278 |
+
"learning_rate": 1.0089227367076605e-05,
|
119279 |
+
"loss": 0.4098,
|
119280 |
+
"step": 98740
|
119281 |
+
},
|
119282 |
+
{
|
119283 |
+
"epoch": 18.22,
|
119284 |
+
"learning_rate": 1.0088540835448211e-05,
|
119285 |
+
"loss": 0.4056,
|
119286 |
+
"step": 98745
|
119287 |
+
},
|
119288 |
+
{
|
119289 |
+
"epoch": 18.23,
|
119290 |
+
"learning_rate": 1.0087856954091324e-05,
|
119291 |
+
"loss": 0.4047,
|
119292 |
+
"step": 98750
|
119293 |
+
},
|
119294 |
+
{
|
119295 |
+
"epoch": 18.23,
|
119296 |
+
"learning_rate": 1.0087175723022828e-05,
|
119297 |
+
"loss": 0.4058,
|
119298 |
+
"step": 98755
|
119299 |
+
},
|
119300 |
+
{
|
119301 |
+
"epoch": 18.23,
|
119302 |
+
"learning_rate": 1.0086497142259544e-05,
|
119303 |
+
"loss": 0.4101,
|
119304 |
+
"step": 98760
|
119305 |
+
},
|
119306 |
+
{
|
119307 |
+
"epoch": 18.23,
|
119308 |
+
"learning_rate": 1.00858212118182e-05,
|
119309 |
+
"loss": 0.4065,
|
119310 |
+
"step": 98765
|
119311 |
+
},
|
119312 |
+
{
|
119313 |
+
"epoch": 18.23,
|
119314 |
+
"learning_rate": 1.0085147931715484e-05,
|
119315 |
+
"loss": 0.4072,
|
119316 |
+
"step": 98770
|
119317 |
+
},
|
119318 |
+
{
|
119319 |
+
"epoch": 18.23,
|
119320 |
+
"learning_rate": 1.0084477301967998e-05,
|
119321 |
+
"loss": 0.4091,
|
119322 |
+
"step": 98775
|
119323 |
+
},
|
119324 |
+
{
|
119325 |
+
"epoch": 18.23,
|
119326 |
+
"learning_rate": 1.0083809322592294e-05,
|
119327 |
+
"loss": 0.4107,
|
119328 |
+
"step": 98780
|
119329 |
+
},
|
119330 |
+
{
|
119331 |
+
"epoch": 18.23,
|
119332 |
+
"learning_rate": 1.0083143993604866e-05,
|
119333 |
+
"loss": 0.4122,
|
119334 |
+
"step": 98785
|
119335 |
+
},
|
119336 |
+
{
|
119337 |
+
"epoch": 18.23,
|
119338 |
+
"learning_rate": 1.0082481315022114e-05,
|
119339 |
+
"loss": 0.4057,
|
119340 |
+
"step": 98790
|
119341 |
+
},
|
119342 |
+
{
|
119343 |
+
"epoch": 18.23,
|
119344 |
+
"learning_rate": 1.0081821286860389e-05,
|
119345 |
+
"loss": 0.4054,
|
119346 |
+
"step": 98795
|
119347 |
+
},
|
119348 |
+
{
|
119349 |
+
"epoch": 18.23,
|
119350 |
+
"learning_rate": 1.0081163909135992e-05,
|
119351 |
+
"loss": 0.4039,
|
119352 |
+
"step": 98800
|
119353 |
+
},
|
119354 |
+
{
|
119355 |
+
"epoch": 18.24,
|
119356 |
+
"learning_rate": 1.0080509181865132e-05,
|
119357 |
+
"loss": 0.4037,
|
119358 |
+
"step": 98805
|
119359 |
+
},
|
119360 |
+
{
|
119361 |
+
"epoch": 18.24,
|
119362 |
+
"learning_rate": 1.0079857105063955e-05,
|
119363 |
+
"loss": 0.4078,
|
119364 |
+
"step": 98810
|
119365 |
+
},
|
119366 |
+
{
|
119367 |
+
"epoch": 18.24,
|
119368 |
+
"learning_rate": 1.0079207678748576e-05,
|
119369 |
+
"loss": 0.4079,
|
119370 |
+
"step": 98815
|
119371 |
+
},
|
119372 |
+
{
|
119373 |
+
"epoch": 18.24,
|
119374 |
+
"learning_rate": 1.0078560902934987e-05,
|
119375 |
+
"loss": 0.4112,
|
119376 |
+
"step": 98820
|
119377 |
+
},
|
119378 |
+
{
|
119379 |
+
"epoch": 18.24,
|
119380 |
+
"learning_rate": 1.0077916777639168e-05,
|
119381 |
+
"loss": 0.4096,
|
119382 |
+
"step": 98825
|
119383 |
+
},
|
119384 |
+
{
|
119385 |
+
"epoch": 18.24,
|
119386 |
+
"learning_rate": 1.0077275302877016e-05,
|
119387 |
+
"loss": 0.4044,
|
119388 |
+
"step": 98830
|
119389 |
+
},
|
119390 |
+
{
|
119391 |
+
"epoch": 18.24,
|
119392 |
+
"learning_rate": 1.0076636478664332e-05,
|
119393 |
+
"loss": 0.4104,
|
119394 |
+
"step": 98835
|
119395 |
+
},
|
119396 |
+
{
|
119397 |
+
"epoch": 18.24,
|
119398 |
+
"learning_rate": 1.007600030501691e-05,
|
119399 |
+
"loss": 0.4124,
|
119400 |
+
"step": 98840
|
119401 |
+
},
|
119402 |
+
{
|
119403 |
+
"epoch": 18.24,
|
119404 |
+
"learning_rate": 1.0075366781950428e-05,
|
119405 |
+
"loss": 0.4088,
|
119406 |
+
"step": 98845
|
119407 |
+
},
|
119408 |
+
{
|
119409 |
+
"epoch": 18.24,
|
119410 |
+
"learning_rate": 1.0074735909480522e-05,
|
119411 |
+
"loss": 0.4078,
|
119412 |
+
"step": 98850
|
119413 |
+
},
|
119414 |
+
{
|
119415 |
+
"epoch": 18.25,
|
119416 |
+
"learning_rate": 1.0074107687622768e-05,
|
119417 |
+
"loss": 0.4062,
|
119418 |
+
"step": 98855
|
119419 |
+
},
|
119420 |
+
{
|
119421 |
+
"epoch": 18.25,
|
119422 |
+
"learning_rate": 1.0073482116392646e-05,
|
119423 |
+
"loss": 0.4033,
|
119424 |
+
"step": 98860
|
119425 |
+
},
|
119426 |
+
{
|
119427 |
+
"epoch": 18.25,
|
119428 |
+
"learning_rate": 1.0072859195805609e-05,
|
119429 |
+
"loss": 0.4058,
|
119430 |
+
"step": 98865
|
119431 |
+
},
|
119432 |
+
{
|
119433 |
+
"epoch": 18.25,
|
119434 |
+
"learning_rate": 1.007223892587702e-05,
|
119435 |
+
"loss": 0.4087,
|
119436 |
+
"step": 98870
|
119437 |
+
},
|
119438 |
+
{
|
119439 |
+
"epoch": 18.25,
|
119440 |
+
"learning_rate": 1.0071621306622179e-05,
|
119441 |
+
"loss": 0.4071,
|
119442 |
+
"step": 98875
|
119443 |
+
},
|
119444 |
+
{
|
119445 |
+
"epoch": 18.25,
|
119446 |
+
"learning_rate": 1.0071006338056337e-05,
|
119447 |
+
"loss": 0.4073,
|
119448 |
+
"step": 98880
|
119449 |
+
},
|
119450 |
+
{
|
119451 |
+
"epoch": 18.25,
|
119452 |
+
"learning_rate": 1.0070394020194652e-05,
|
119453 |
+
"loss": 0.4113,
|
119454 |
+
"step": 98885
|
119455 |
+
},
|
119456 |
+
{
|
119457 |
+
"epoch": 18.25,
|
119458 |
+
"learning_rate": 1.0069784353052248e-05,
|
119459 |
+
"loss": 0.4056,
|
119460 |
+
"step": 98890
|
119461 |
+
},
|
119462 |
+
{
|
119463 |
+
"epoch": 18.25,
|
119464 |
+
"learning_rate": 1.0069177336644172e-05,
|
119465 |
+
"loss": 0.4058,
|
119466 |
+
"step": 98895
|
119467 |
+
},
|
119468 |
+
{
|
119469 |
+
"epoch": 18.25,
|
119470 |
+
"learning_rate": 1.006857297098538e-05,
|
119471 |
+
"loss": 0.4078,
|
119472 |
+
"step": 98900
|
119473 |
+
},
|
119474 |
+
{
|
119475 |
+
"epoch": 18.26,
|
119476 |
+
"learning_rate": 1.0067971256090789e-05,
|
119477 |
+
"loss": 0.4122,
|
119478 |
+
"step": 98905
|
119479 |
+
},
|
119480 |
+
{
|
119481 |
+
"epoch": 18.26,
|
119482 |
+
"learning_rate": 1.0067372191975258e-05,
|
119483 |
+
"loss": 0.4115,
|
119484 |
+
"step": 98910
|
119485 |
+
},
|
119486 |
+
{
|
119487 |
+
"epoch": 18.26,
|
119488 |
+
"learning_rate": 1.0066775778653566e-05,
|
119489 |
+
"loss": 0.4116,
|
119490 |
+
"step": 98915
|
119491 |
+
},
|
119492 |
+
{
|
119493 |
+
"epoch": 18.26,
|
119494 |
+
"learning_rate": 1.0066182016140424e-05,
|
119495 |
+
"loss": 0.4109,
|
119496 |
+
"step": 98920
|
119497 |
+
},
|
119498 |
+
{
|
119499 |
+
"epoch": 18.26,
|
119500 |
+
"learning_rate": 1.006559090445048e-05,
|
119501 |
+
"loss": 0.4067,
|
119502 |
+
"step": 98925
|
119503 |
+
},
|
119504 |
+
{
|
119505 |
+
"epoch": 18.26,
|
119506 |
+
"learning_rate": 1.0065002443598315e-05,
|
119507 |
+
"loss": 0.4049,
|
119508 |
+
"step": 98930
|
119509 |
+
},
|
119510 |
+
{
|
119511 |
+
"epoch": 18.26,
|
119512 |
+
"learning_rate": 1.006441663359847e-05,
|
119513 |
+
"loss": 0.4073,
|
119514 |
+
"step": 98935
|
119515 |
+
},
|
119516 |
+
{
|
119517 |
+
"epoch": 18.26,
|
119518 |
+
"learning_rate": 1.006383347446538e-05,
|
119519 |
+
"loss": 0.4077,
|
119520 |
+
"step": 98940
|
119521 |
+
},
|
119522 |
+
{
|
119523 |
+
"epoch": 18.26,
|
119524 |
+
"learning_rate": 1.0063252966213445e-05,
|
119525 |
+
"loss": 0.4094,
|
119526 |
+
"step": 98945
|
119527 |
+
},
|
119528 |
+
{
|
119529 |
+
"epoch": 18.26,
|
119530 |
+
"learning_rate": 1.006267510885697e-05,
|
119531 |
+
"loss": 0.4119,
|
119532 |
+
"step": 98950
|
119533 |
+
},
|
119534 |
+
{
|
119535 |
+
"epoch": 18.26,
|
119536 |
+
"learning_rate": 1.0062099902410232e-05,
|
119537 |
+
"loss": 0.4034,
|
119538 |
+
"step": 98955
|
119539 |
+
},
|
119540 |
+
{
|
119541 |
+
"epoch": 18.27,
|
119542 |
+
"learning_rate": 1.0061527346887421e-05,
|
119543 |
+
"loss": 0.4068,
|
119544 |
+
"step": 98960
|
119545 |
+
},
|
119546 |
+
{
|
119547 |
+
"epoch": 18.27,
|
119548 |
+
"learning_rate": 1.0060957442302668e-05,
|
119549 |
+
"loss": 0.4095,
|
119550 |
+
"step": 98965
|
119551 |
+
},
|
119552 |
+
{
|
119553 |
+
"epoch": 18.27,
|
119554 |
+
"learning_rate": 1.006039018867002e-05,
|
119555 |
+
"loss": 0.4047,
|
119556 |
+
"step": 98970
|
119557 |
+
},
|
119558 |
+
{
|
119559 |
+
"epoch": 18.27,
|
119560 |
+
"learning_rate": 1.0059825586003476e-05,
|
119561 |
+
"loss": 0.4051,
|
119562 |
+
"step": 98975
|
119563 |
+
},
|
119564 |
+
{
|
119565 |
+
"epoch": 18.27,
|
119566 |
+
"learning_rate": 1.0059263634316976e-05,
|
119567 |
+
"loss": 0.4065,
|
119568 |
+
"step": 98980
|
119569 |
+
},
|
119570 |
+
{
|
119571 |
+
"epoch": 18.27,
|
119572 |
+
"learning_rate": 1.0058704333624379e-05,
|
119573 |
+
"loss": 0.4063,
|
119574 |
+
"step": 98985
|
119575 |
+
},
|
119576 |
+
{
|
119577 |
+
"epoch": 18.27,
|
119578 |
+
"learning_rate": 1.005814768393949e-05,
|
119579 |
+
"loss": 0.4114,
|
119580 |
+
"step": 98990
|
119581 |
+
},
|
119582 |
+
{
|
119583 |
+
"epoch": 18.27,
|
119584 |
+
"learning_rate": 1.0057593685276049e-05,
|
119585 |
+
"loss": 0.4101,
|
119586 |
+
"step": 98995
|
119587 |
+
},
|
119588 |
+
{
|
119589 |
+
"epoch": 18.27,
|
119590 |
+
"learning_rate": 1.0057042337647708e-05,
|
119591 |
+
"loss": 0.4138,
|
119592 |
+
"step": 99000
|
119593 |
+
},
|
119594 |
+
{
|
119595 |
+
"epoch": 18.27,
|
119596 |
+
"eval_loss": 0.38938432931900024,
|
119597 |
+
"eval_runtime": 98.526,
|
119598 |
+
"eval_samples_per_second": 50.748,
|
119599 |
+
"eval_steps_per_second": 0.538,
|
119600 |
+
"step": 99000
|
119601 |
+
},
|
119602 |
+
{
|
119603 |
+
"epoch": 18.27,
|
119604 |
+
"learning_rate": 1.0056493641068084e-05,
|
119605 |
+
"loss": 0.4099,
|
119606 |
+
"step": 99005
|
119607 |
+
},
|
119608 |
+
{
|
119609 |
+
"epoch": 18.28,
|
119610 |
+
"learning_rate": 1.0055947595550713e-05,
|
119611 |
+
"loss": 0.4116,
|
119612 |
+
"step": 99010
|
119613 |
+
},
|
119614 |
+
{
|
119615 |
+
"epoch": 18.28,
|
119616 |
+
"learning_rate": 1.0055404201109082e-05,
|
119617 |
+
"loss": 0.4075,
|
119618 |
+
"step": 99015
|
119619 |
+
},
|
119620 |
+
{
|
119621 |
+
"epoch": 18.28,
|
119622 |
+
"learning_rate": 1.0054863457756567e-05,
|
119623 |
+
"loss": 0.4075,
|
119624 |
+
"step": 99020
|
119625 |
+
},
|
119626 |
+
{
|
119627 |
+
"epoch": 18.28,
|
119628 |
+
"learning_rate": 1.0054325365506526e-05,
|
119629 |
+
"loss": 0.4103,
|
119630 |
+
"step": 99025
|
119631 |
+
},
|
119632 |
+
{
|
119633 |
+
"epoch": 18.28,
|
119634 |
+
"learning_rate": 1.0053789924372251e-05,
|
119635 |
+
"loss": 0.4088,
|
119636 |
+
"step": 99030
|
119637 |
+
},
|
119638 |
+
{
|
119639 |
+
"epoch": 18.28,
|
119640 |
+
"learning_rate": 1.0053257134366935e-05,
|
119641 |
+
"loss": 0.4081,
|
119642 |
+
"step": 99035
|
119643 |
+
},
|
119644 |
+
{
|
119645 |
+
"epoch": 18.28,
|
119646 |
+
"learning_rate": 1.0052726995503727e-05,
|
119647 |
+
"loss": 0.4065,
|
119648 |
+
"step": 99040
|
119649 |
+
},
|
119650 |
+
{
|
119651 |
+
"epoch": 18.28,
|
119652 |
+
"learning_rate": 1.0052199507795707e-05,
|
119653 |
+
"loss": 0.4121,
|
119654 |
+
"step": 99045
|
119655 |
+
},
|
119656 |
+
{
|
119657 |
+
"epoch": 18.28,
|
119658 |
+
"learning_rate": 1.0051674671255899e-05,
|
119659 |
+
"loss": 0.409,
|
119660 |
+
"step": 99050
|
119661 |
+
},
|
119662 |
+
{
|
119663 |
+
"epoch": 18.28,
|
119664 |
+
"learning_rate": 1.0051152485897237e-05,
|
119665 |
+
"loss": 0.407,
|
119666 |
+
"step": 99055
|
119667 |
+
},
|
119668 |
+
{
|
119669 |
+
"epoch": 18.29,
|
119670 |
+
"learning_rate": 1.0050632951732626e-05,
|
119671 |
+
"loss": 0.4081,
|
119672 |
+
"step": 99060
|
119673 |
+
},
|
119674 |
+
{
|
119675 |
+
"epoch": 18.29,
|
119676 |
+
"learning_rate": 1.0050116068774868e-05,
|
119677 |
+
"loss": 0.4073,
|
119678 |
+
"step": 99065
|
119679 |
+
},
|
119680 |
+
{
|
119681 |
+
"epoch": 18.29,
|
119682 |
+
"learning_rate": 1.0049601837036726e-05,
|
119683 |
+
"loss": 0.4074,
|
119684 |
+
"step": 99070
|
119685 |
+
},
|
119686 |
+
{
|
119687 |
+
"epoch": 18.29,
|
119688 |
+
"learning_rate": 1.0049090256530885e-05,
|
119689 |
+
"loss": 0.4143,
|
119690 |
+
"step": 99075
|
119691 |
+
},
|
119692 |
+
{
|
119693 |
+
"epoch": 18.29,
|
119694 |
+
"learning_rate": 1.004858132726997e-05,
|
119695 |
+
"loss": 0.409,
|
119696 |
+
"step": 99080
|
119697 |
+
},
|
119698 |
+
{
|
119699 |
+
"epoch": 18.29,
|
119700 |
+
"learning_rate": 1.0048075049266527e-05,
|
119701 |
+
"loss": 0.4047,
|
119702 |
+
"step": 99085
|
119703 |
+
},
|
119704 |
+
{
|
119705 |
+
"epoch": 18.29,
|
119706 |
+
"learning_rate": 1.0047571422533064e-05,
|
119707 |
+
"loss": 0.4048,
|
119708 |
+
"step": 99090
|
119709 |
+
},
|
119710 |
+
{
|
119711 |
+
"epoch": 18.29,
|
119712 |
+
"learning_rate": 1.0047070447082003e-05,
|
119713 |
+
"loss": 0.4061,
|
119714 |
+
"step": 99095
|
119715 |
+
},
|
119716 |
+
{
|
119717 |
+
"epoch": 18.29,
|
119718 |
+
"learning_rate": 1.0046572122925699e-05,
|
119719 |
+
"loss": 0.4091,
|
119720 |
+
"step": 99100
|
119721 |
+
},
|
119722 |
+
{
|
119723 |
+
"epoch": 18.29,
|
119724 |
+
"learning_rate": 1.0046076450076455e-05,
|
119725 |
+
"loss": 0.4034,
|
119726 |
+
"step": 99105
|
119727 |
+
},
|
119728 |
+
{
|
119729 |
+
"epoch": 18.3,
|
119730 |
+
"learning_rate": 1.0045583428546493e-05,
|
119731 |
+
"loss": 0.4098,
|
119732 |
+
"step": 99110
|
119733 |
+
},
|
119734 |
+
{
|
119735 |
+
"epoch": 18.3,
|
119736 |
+
"learning_rate": 1.0045093058347987e-05,
|
119737 |
+
"loss": 0.4053,
|
119738 |
+
"step": 99115
|
119739 |
+
},
|
119740 |
+
{
|
119741 |
+
"epoch": 18.3,
|
119742 |
+
"learning_rate": 1.0044605339493029e-05,
|
119743 |
+
"loss": 0.4017,
|
119744 |
+
"step": 99120
|
119745 |
+
},
|
119746 |
+
{
|
119747 |
+
"epoch": 18.3,
|
119748 |
+
"learning_rate": 1.0044120271993656e-05,
|
119749 |
+
"loss": 0.4101,
|
119750 |
+
"step": 99125
|
119751 |
+
},
|
119752 |
+
{
|
119753 |
+
"epoch": 18.3,
|
119754 |
+
"learning_rate": 1.0043637855861832e-05,
|
119755 |
+
"loss": 0.4097,
|
119756 |
+
"step": 99130
|
119757 |
+
},
|
119758 |
+
{
|
119759 |
+
"epoch": 18.3,
|
119760 |
+
"learning_rate": 1.0043158091109465e-05,
|
119761 |
+
"loss": 0.4057,
|
119762 |
+
"step": 99135
|
119763 |
+
},
|
119764 |
+
{
|
119765 |
+
"epoch": 18.3,
|
119766 |
+
"learning_rate": 1.0042680977748395e-05,
|
119767 |
+
"loss": 0.4076,
|
119768 |
+
"step": 99140
|
119769 |
+
},
|
119770 |
+
{
|
119771 |
+
"epoch": 18.3,
|
119772 |
+
"learning_rate": 1.0042206515790391e-05,
|
119773 |
+
"loss": 0.4091,
|
119774 |
+
"step": 99145
|
119775 |
+
},
|
119776 |
+
{
|
119777 |
+
"epoch": 18.3,
|
119778 |
+
"learning_rate": 1.0041734705247159e-05,
|
119779 |
+
"loss": 0.4097,
|
119780 |
+
"step": 99150
|
119781 |
+
},
|
119782 |
+
{
|
119783 |
+
"epoch": 18.3,
|
119784 |
+
"learning_rate": 1.004126554613035e-05,
|
119785 |
+
"loss": 0.4043,
|
119786 |
+
"step": 99155
|
119787 |
+
},
|
119788 |
+
{
|
119789 |
+
"epoch": 18.3,
|
119790 |
+
"learning_rate": 1.0040799038451528e-05,
|
119791 |
+
"loss": 0.4049,
|
119792 |
+
"step": 99160
|
119793 |
+
},
|
119794 |
+
{
|
119795 |
+
"epoch": 18.31,
|
119796 |
+
"learning_rate": 1.0040335182222196e-05,
|
119797 |
+
"loss": 0.4086,
|
119798 |
+
"step": 99165
|
119799 |
+
},
|
119800 |
+
{
|
119801 |
+
"epoch": 18.31,
|
119802 |
+
"learning_rate": 1.0039873977453827e-05,
|
119803 |
+
"loss": 0.4102,
|
119804 |
+
"step": 99170
|
119805 |
+
},
|
119806 |
+
{
|
119807 |
+
"epoch": 18.31,
|
119808 |
+
"learning_rate": 1.003941542415777e-05,
|
119809 |
+
"loss": 0.4139,
|
119810 |
+
"step": 99175
|
119811 |
+
},
|
119812 |
+
{
|
119813 |
+
"epoch": 18.31,
|
119814 |
+
"learning_rate": 1.003895952234536e-05,
|
119815 |
+
"loss": 0.4056,
|
119816 |
+
"step": 99180
|
119817 |
+
},
|
119818 |
+
{
|
119819 |
+
"epoch": 18.31,
|
119820 |
+
"learning_rate": 1.0038506272027845e-05,
|
119821 |
+
"loss": 0.4038,
|
119822 |
+
"step": 99185
|
119823 |
+
},
|
119824 |
+
{
|
119825 |
+
"epoch": 18.31,
|
119826 |
+
"learning_rate": 1.0038055673216404e-05,
|
119827 |
+
"loss": 0.4081,
|
119828 |
+
"step": 99190
|
119829 |
+
},
|
119830 |
+
{
|
119831 |
+
"epoch": 18.31,
|
119832 |
+
"learning_rate": 1.0037607725922138e-05,
|
119833 |
+
"loss": 0.4074,
|
119834 |
+
"step": 99195
|
119835 |
+
},
|
119836 |
+
{
|
119837 |
+
"epoch": 18.31,
|
119838 |
+
"learning_rate": 1.0037162430156133e-05,
|
119839 |
+
"loss": 0.4054,
|
119840 |
+
"step": 99200
|
119841 |
+
},
|
119842 |
+
{
|
119843 |
+
"epoch": 18.31,
|
119844 |
+
"learning_rate": 1.0036719785929348e-05,
|
119845 |
+
"loss": 0.406,
|
119846 |
+
"step": 99205
|
119847 |
+
},
|
119848 |
+
{
|
119849 |
+
"epoch": 18.31,
|
119850 |
+
"learning_rate": 1.0036279793252725e-05,
|
119851 |
+
"loss": 0.4024,
|
119852 |
+
"step": 99210
|
119853 |
+
},
|
119854 |
+
{
|
119855 |
+
"epoch": 18.32,
|
119856 |
+
"learning_rate": 1.0035842452137103e-05,
|
119857 |
+
"loss": 0.4115,
|
119858 |
+
"step": 99215
|
119859 |
+
},
|
119860 |
+
{
|
119861 |
+
"epoch": 18.32,
|
119862 |
+
"learning_rate": 1.0035407762593293e-05,
|
119863 |
+
"loss": 0.4085,
|
119864 |
+
"step": 99220
|
119865 |
+
},
|
119866 |
+
{
|
119867 |
+
"epoch": 18.32,
|
119868 |
+
"learning_rate": 1.0034975724632e-05,
|
119869 |
+
"loss": 0.4079,
|
119870 |
+
"step": 99225
|
119871 |
+
},
|
119872 |
+
{
|
119873 |
+
"epoch": 18.32,
|
119874 |
+
"learning_rate": 1.0034546338263884e-05,
|
119875 |
+
"loss": 0.4126,
|
119876 |
+
"step": 99230
|
119877 |
+
},
|
119878 |
+
{
|
119879 |
+
"epoch": 18.32,
|
119880 |
+
"learning_rate": 1.0034119603499567e-05,
|
119881 |
+
"loss": 0.4075,
|
119882 |
+
"step": 99235
|
119883 |
+
},
|
119884 |
+
{
|
119885 |
+
"epoch": 18.32,
|
119886 |
+
"learning_rate": 1.003369552034955e-05,
|
119887 |
+
"loss": 0.4106,
|
119888 |
+
"step": 99240
|
119889 |
+
},
|
119890 |
+
{
|
119891 |
+
"epoch": 18.32,
|
119892 |
+
"learning_rate": 1.0033274088824313e-05,
|
119893 |
+
"loss": 0.4061,
|
119894 |
+
"step": 99245
|
119895 |
+
},
|
119896 |
+
{
|
119897 |
+
"epoch": 18.32,
|
119898 |
+
"learning_rate": 1.0032855308934238e-05,
|
119899 |
+
"loss": 0.4057,
|
119900 |
+
"step": 99250
|
119901 |
+
},
|
119902 |
+
{
|
119903 |
+
"epoch": 18.32,
|
119904 |
+
"learning_rate": 1.0032439180689676e-05,
|
119905 |
+
"loss": 0.4143,
|
119906 |
+
"step": 99255
|
119907 |
+
},
|
119908 |
+
{
|
119909 |
+
"epoch": 18.32,
|
119910 |
+
"learning_rate": 1.0032025704100874e-05,
|
119911 |
+
"loss": 0.4075,
|
119912 |
+
"step": 99260
|
119913 |
+
},
|
119914 |
+
{
|
119915 |
+
"epoch": 18.33,
|
119916 |
+
"learning_rate": 1.0031614879178065e-05,
|
119917 |
+
"loss": 0.4065,
|
119918 |
+
"step": 99265
|
119919 |
+
},
|
119920 |
+
{
|
119921 |
+
"epoch": 18.33,
|
119922 |
+
"learning_rate": 1.0031206705931356e-05,
|
119923 |
+
"loss": 0.4065,
|
119924 |
+
"step": 99270
|
119925 |
+
},
|
119926 |
+
{
|
119927 |
+
"epoch": 18.33,
|
119928 |
+
"learning_rate": 1.003080118437083e-05,
|
119929 |
+
"loss": 0.4082,
|
119930 |
+
"step": 99275
|
119931 |
+
},
|
119932 |
+
{
|
119933 |
+
"epoch": 18.33,
|
119934 |
+
"learning_rate": 1.0030398314506504e-05,
|
119935 |
+
"loss": 0.4102,
|
119936 |
+
"step": 99280
|
119937 |
+
},
|
119938 |
+
{
|
119939 |
+
"epoch": 18.33,
|
119940 |
+
"learning_rate": 1.0029998096348292e-05,
|
119941 |
+
"loss": 0.4099,
|
119942 |
+
"step": 99285
|
119943 |
+
},
|
119944 |
+
{
|
119945 |
+
"epoch": 18.33,
|
119946 |
+
"learning_rate": 1.0029600529906092e-05,
|
119947 |
+
"loss": 0.4104,
|
119948 |
+
"step": 99290
|
119949 |
+
},
|
119950 |
+
{
|
119951 |
+
"epoch": 18.33,
|
119952 |
+
"learning_rate": 1.00292056151897e-05,
|
119953 |
+
"loss": 0.4153,
|
119954 |
+
"step": 99295
|
119955 |
+
},
|
119956 |
+
{
|
119957 |
+
"epoch": 18.33,
|
119958 |
+
"learning_rate": 1.002881335220888e-05,
|
119959 |
+
"loss": 0.4129,
|
119960 |
+
"step": 99300
|
119961 |
+
},
|
119962 |
+
{
|
119963 |
+
"epoch": 18.33,
|
119964 |
+
"learning_rate": 1.0028423740973285e-05,
|
119965 |
+
"loss": 0.4081,
|
119966 |
+
"step": 99305
|
119967 |
+
},
|
119968 |
+
{
|
119969 |
+
"epoch": 18.33,
|
119970 |
+
"learning_rate": 1.0028036781492538e-05,
|
119971 |
+
"loss": 0.4113,
|
119972 |
+
"step": 99310
|
119973 |
+
},
|
119974 |
+
{
|
119975 |
+
"epoch": 18.33,
|
119976 |
+
"learning_rate": 1.0027652473776184e-05,
|
119977 |
+
"loss": 0.4107,
|
119978 |
+
"step": 99315
|
119979 |
+
},
|
119980 |
+
{
|
119981 |
+
"epoch": 18.34,
|
119982 |
+
"learning_rate": 1.0027270817833725e-05,
|
119983 |
+
"loss": 0.4021,
|
119984 |
+
"step": 99320
|
119985 |
+
},
|
119986 |
+
{
|
119987 |
+
"epoch": 18.34,
|
119988 |
+
"learning_rate": 1.0026891813674553e-05,
|
119989 |
+
"loss": 0.4101,
|
119990 |
+
"step": 99325
|
119991 |
+
},
|
119992 |
+
{
|
119993 |
+
"epoch": 18.34,
|
119994 |
+
"learning_rate": 1.0026515461308025e-05,
|
119995 |
+
"loss": 0.4071,
|
119996 |
+
"step": 99330
|
119997 |
+
},
|
119998 |
+
{
|
119999 |
+
"epoch": 18.34,
|
120000 |
+
"learning_rate": 1.0026141760743439e-05,
|
120001 |
+
"loss": 0.4101,
|
120002 |
+
"step": 99335
|
120003 |
+
},
|
120004 |
+
{
|
120005 |
+
"epoch": 18.34,
|
120006 |
+
"learning_rate": 1.0025770711990008e-05,
|
120007 |
+
"loss": 0.4055,
|
120008 |
+
"step": 99340
|
120009 |
+
},
|
120010 |
+
{
|
120011 |
+
"epoch": 18.34,
|
120012 |
+
"learning_rate": 1.0025402315056886e-05,
|
120013 |
+
"loss": 0.4078,
|
120014 |
+
"step": 99345
|
120015 |
+
},
|
120016 |
+
{
|
120017 |
+
"epoch": 18.34,
|
120018 |
+
"learning_rate": 1.0025036569953166e-05,
|
120019 |
+
"loss": 0.4069,
|
120020 |
+
"step": 99350
|
120021 |
+
},
|
120022 |
+
{
|
120023 |
+
"epoch": 18.34,
|
120024 |
+
"learning_rate": 1.0024673476687874e-05,
|
120025 |
+
"loss": 0.412,
|
120026 |
+
"step": 99355
|
120027 |
+
},
|
120028 |
+
{
|
120029 |
+
"epoch": 18.34,
|
120030 |
+
"learning_rate": 1.002431303526996e-05,
|
120031 |
+
"loss": 0.4085,
|
120032 |
+
"step": 99360
|
120033 |
+
},
|
120034 |
+
{
|
120035 |
+
"epoch": 18.34,
|
120036 |
+
"learning_rate": 1.0023955245708325e-05,
|
120037 |
+
"loss": 0.4087,
|
120038 |
+
"step": 99365
|
120039 |
+
},
|
120040 |
+
{
|
120041 |
+
"epoch": 18.35,
|
120042 |
+
"learning_rate": 1.0023600108011792e-05,
|
120043 |
+
"loss": 0.4044,
|
120044 |
+
"step": 99370
|
120045 |
+
},
|
120046 |
+
{
|
120047 |
+
"epoch": 18.35,
|
120048 |
+
"learning_rate": 1.0023247622189132e-05,
|
120049 |
+
"loss": 0.4055,
|
120050 |
+
"step": 99375
|
120051 |
+
},
|
120052 |
+
{
|
120053 |
+
"epoch": 18.35,
|
120054 |
+
"learning_rate": 1.0022897788249034e-05,
|
120055 |
+
"loss": 0.4082,
|
120056 |
+
"step": 99380
|
120057 |
+
},
|
120058 |
+
{
|
120059 |
+
"epoch": 18.35,
|
120060 |
+
"learning_rate": 1.0022550606200138e-05,
|
120061 |
+
"loss": 0.4089,
|
120062 |
+
"step": 99385
|
120063 |
+
},
|
120064 |
+
{
|
120065 |
+
"epoch": 18.35,
|
120066 |
+
"learning_rate": 1.0022206076051002e-05,
|
120067 |
+
"loss": 0.4116,
|
120068 |
+
"step": 99390
|
120069 |
+
},
|
120070 |
+
{
|
120071 |
+
"epoch": 18.35,
|
120072 |
+
"learning_rate": 1.0021864197810123e-05,
|
120073 |
+
"loss": 0.4054,
|
120074 |
+
"step": 99395
|
120075 |
+
},
|
120076 |
+
{
|
120077 |
+
"epoch": 18.35,
|
120078 |
+
"learning_rate": 1.0021524971485951e-05,
|
120079 |
+
"loss": 0.4038,
|
120080 |
+
"step": 99400
|
120081 |
+
},
|
120082 |
+
{
|
120083 |
+
"epoch": 18.35,
|
120084 |
+
"learning_rate": 1.0021188397086854e-05,
|
120085 |
+
"loss": 0.4054,
|
120086 |
+
"step": 99405
|
120087 |
+
},
|
120088 |
+
{
|
120089 |
+
"epoch": 18.35,
|
120090 |
+
"learning_rate": 1.0020854474621124e-05,
|
120091 |
+
"loss": 0.4094,
|
120092 |
+
"step": 99410
|
120093 |
+
},
|
120094 |
+
{
|
120095 |
+
"epoch": 18.35,
|
120096 |
+
"learning_rate": 1.0020523204097011e-05,
|
120097 |
+
"loss": 0.415,
|
120098 |
+
"step": 99415
|
120099 |
+
},
|
120100 |
+
{
|
120101 |
+
"epoch": 18.36,
|
120102 |
+
"learning_rate": 1.0020194585522688e-05,
|
120103 |
+
"loss": 0.4151,
|
120104 |
+
"step": 99420
|
120105 |
+
},
|
120106 |
+
{
|
120107 |
+
"epoch": 18.36,
|
120108 |
+
"learning_rate": 1.001986861890626e-05,
|
120109 |
+
"loss": 0.4111,
|
120110 |
+
"step": 99425
|
120111 |
+
},
|
120112 |
+
{
|
120113 |
+
"epoch": 18.36,
|
120114 |
+
"learning_rate": 1.001954530425577e-05,
|
120115 |
+
"loss": 0.4041,
|
120116 |
+
"step": 99430
|
120117 |
+
},
|
120118 |
+
{
|
120119 |
+
"epoch": 18.36,
|
120120 |
+
"learning_rate": 1.0019224641579193e-05,
|
120121 |
+
"loss": 0.4105,
|
120122 |
+
"step": 99435
|
120123 |
+
},
|
120124 |
+
{
|
120125 |
+
"epoch": 18.36,
|
120126 |
+
"learning_rate": 1.001890663088445e-05,
|
120127 |
+
"loss": 0.4002,
|
120128 |
+
"step": 99440
|
120129 |
+
},
|
120130 |
+
{
|
120131 |
+
"epoch": 18.36,
|
120132 |
+
"learning_rate": 1.0018591272179373e-05,
|
120133 |
+
"loss": 0.4093,
|
120134 |
+
"step": 99445
|
120135 |
+
},
|
120136 |
+
{
|
120137 |
+
"epoch": 18.36,
|
120138 |
+
"learning_rate": 1.0018278565471755e-05,
|
120139 |
+
"loss": 0.4067,
|
120140 |
+
"step": 99450
|
120141 |
+
},
|
120142 |
+
{
|
120143 |
+
"epoch": 18.36,
|
120144 |
+
"learning_rate": 1.001796851076932e-05,
|
120145 |
+
"loss": 0.4063,
|
120146 |
+
"step": 99455
|
120147 |
+
},
|
120148 |
+
{
|
120149 |
+
"epoch": 18.36,
|
120150 |
+
"learning_rate": 1.0017661108079689e-05,
|
120151 |
+
"loss": 0.411,
|
120152 |
+
"step": 99460
|
120153 |
+
},
|
120154 |
+
{
|
120155 |
+
"epoch": 18.36,
|
120156 |
+
"learning_rate": 1.0017356357410481e-05,
|
120157 |
+
"loss": 0.4074,
|
120158 |
+
"step": 99465
|
120159 |
+
},
|
120160 |
+
{
|
120161 |
+
"epoch": 18.37,
|
120162 |
+
"learning_rate": 1.0017054258769189e-05,
|
120163 |
+
"loss": 0.4023,
|
120164 |
+
"step": 99470
|
120165 |
+
},
|
120166 |
+
{
|
120167 |
+
"epoch": 18.37,
|
120168 |
+
"learning_rate": 1.0016754812163285e-05,
|
120169 |
+
"loss": 0.4112,
|
120170 |
+
"step": 99475
|
120171 |
+
},
|
120172 |
+
{
|
120173 |
+
"epoch": 18.37,
|
120174 |
+
"learning_rate": 1.0016458017600145e-05,
|
120175 |
+
"loss": 0.4056,
|
120176 |
+
"step": 99480
|
120177 |
+
},
|
120178 |
+
{
|
120179 |
+
"epoch": 18.37,
|
120180 |
+
"learning_rate": 1.0016163875087098e-05,
|
120181 |
+
"loss": 0.4081,
|
120182 |
+
"step": 99485
|
120183 |
+
},
|
120184 |
+
{
|
120185 |
+
"epoch": 18.37,
|
120186 |
+
"learning_rate": 1.0015872384631397e-05,
|
120187 |
+
"loss": 0.4075,
|
120188 |
+
"step": 99490
|
120189 |
+
},
|
120190 |
+
{
|
120191 |
+
"epoch": 18.37,
|
120192 |
+
"learning_rate": 1.0015583546240243e-05,
|
120193 |
+
"loss": 0.4126,
|
120194 |
+
"step": 99495
|
120195 |
+
},
|
120196 |
+
{
|
120197 |
+
"epoch": 18.37,
|
120198 |
+
"learning_rate": 1.0015297359920758e-05,
|
120199 |
+
"loss": 0.4031,
|
120200 |
+
"step": 99500
|
120201 |
+
},
|
120202 |
+
{
|
120203 |
+
"epoch": 18.37,
|
120204 |
+
"learning_rate": 1.0015013825679996e-05,
|
120205 |
+
"loss": 0.4064,
|
120206 |
+
"step": 99505
|
120207 |
+
},
|
120208 |
+
{
|
120209 |
+
"epoch": 18.37,
|
120210 |
+
"learning_rate": 1.0014732943524964e-05,
|
120211 |
+
"loss": 0.4032,
|
120212 |
+
"step": 99510
|
120213 |
+
},
|
120214 |
+
{
|
120215 |
+
"epoch": 18.37,
|
120216 |
+
"learning_rate": 1.0014454713462595e-05,
|
120217 |
+
"loss": 0.4062,
|
120218 |
+
"step": 99515
|
120219 |
+
},
|
120220 |
+
{
|
120221 |
+
"epoch": 18.37,
|
120222 |
+
"learning_rate": 1.0014179135499741e-05,
|
120223 |
+
"loss": 0.4057,
|
120224 |
+
"step": 99520
|
120225 |
+
},
|
120226 |
+
{
|
120227 |
+
"epoch": 18.38,
|
120228 |
+
"learning_rate": 1.0013906209643215e-05,
|
120229 |
+
"loss": 0.4094,
|
120230 |
+
"step": 99525
|
120231 |
+
},
|
120232 |
+
{
|
120233 |
+
"epoch": 18.38,
|
120234 |
+
"learning_rate": 1.0013635935899738e-05,
|
120235 |
+
"loss": 0.4036,
|
120236 |
+
"step": 99530
|
120237 |
+
},
|
120238 |
+
{
|
120239 |
+
"epoch": 18.38,
|
120240 |
+
"learning_rate": 1.0013368314275982e-05,
|
120241 |
+
"loss": 0.4058,
|
120242 |
+
"step": 99535
|
120243 |
+
},
|
120244 |
+
{
|
120245 |
+
"epoch": 18.38,
|
120246 |
+
"learning_rate": 1.0013103344778556e-05,
|
120247 |
+
"loss": 0.403,
|
120248 |
+
"step": 99540
|
120249 |
+
},
|
120250 |
+
{
|
120251 |
+
"epoch": 18.38,
|
120252 |
+
"learning_rate": 1.0012841027414003e-05,
|
120253 |
+
"loss": 0.406,
|
120254 |
+
"step": 99545
|
120255 |
+
},
|
120256 |
+
{
|
120257 |
+
"epoch": 18.38,
|
120258 |
+
"learning_rate": 1.0012581362188779e-05,
|
120259 |
+
"loss": 0.4118,
|
120260 |
+
"step": 99550
|
120261 |
+
},
|
120262 |
+
{
|
120263 |
+
"epoch": 18.38,
|
120264 |
+
"learning_rate": 1.0012324349109304e-05,
|
120265 |
+
"loss": 0.4054,
|
120266 |
+
"step": 99555
|
120267 |
+
},
|
120268 |
+
{
|
120269 |
+
"epoch": 18.38,
|
120270 |
+
"learning_rate": 1.0012069988181906e-05,
|
120271 |
+
"loss": 0.4096,
|
120272 |
+
"step": 99560
|
120273 |
+
},
|
120274 |
+
{
|
120275 |
+
"epoch": 18.38,
|
120276 |
+
"learning_rate": 1.0011818279412885e-05,
|
120277 |
+
"loss": 0.4102,
|
120278 |
+
"step": 99565
|
120279 |
+
},
|
120280 |
+
{
|
120281 |
+
"epoch": 18.38,
|
120282 |
+
"learning_rate": 1.0011569222808423e-05,
|
120283 |
+
"loss": 0.407,
|
120284 |
+
"step": 99570
|
120285 |
+
},
|
120286 |
+
{
|
120287 |
+
"epoch": 18.39,
|
120288 |
+
"learning_rate": 1.001132281837469e-05,
|
120289 |
+
"loss": 0.4094,
|
120290 |
+
"step": 99575
|
120291 |
+
},
|
120292 |
+
{
|
120293 |
+
"epoch": 18.39,
|
120294 |
+
"learning_rate": 1.0011079066117751e-05,
|
120295 |
+
"loss": 0.4093,
|
120296 |
+
"step": 99580
|
120297 |
+
},
|
120298 |
+
{
|
120299 |
+
"epoch": 18.39,
|
120300 |
+
"learning_rate": 1.001083796604363e-05,
|
120301 |
+
"loss": 0.4031,
|
120302 |
+
"step": 99585
|
120303 |
+
},
|
120304 |
+
{
|
120305 |
+
"epoch": 18.39,
|
120306 |
+
"learning_rate": 1.0010599518158261e-05,
|
120307 |
+
"loss": 0.4067,
|
120308 |
+
"step": 99590
|
120309 |
+
},
|
120310 |
+
{
|
120311 |
+
"epoch": 18.39,
|
120312 |
+
"learning_rate": 1.001036372246754e-05,
|
120313 |
+
"loss": 0.4115,
|
120314 |
+
"step": 99595
|
120315 |
+
},
|
120316 |
+
{
|
120317 |
+
"epoch": 18.39,
|
120318 |
+
"learning_rate": 1.0010130578977279e-05,
|
120319 |
+
"loss": 0.4164,
|
120320 |
+
"step": 99600
|
120321 |
+
},
|
120322 |
+
{
|
120323 |
+
"epoch": 18.39,
|
120324 |
+
"learning_rate": 1.000990008769324e-05,
|
120325 |
+
"loss": 0.4044,
|
120326 |
+
"step": 99605
|
120327 |
+
},
|
120328 |
+
{
|
120329 |
+
"epoch": 18.39,
|
120330 |
+
"learning_rate": 1.0009672248621096e-05,
|
120331 |
+
"loss": 0.4044,
|
120332 |
+
"step": 99610
|
120333 |
+
},
|
120334 |
+
{
|
120335 |
+
"epoch": 18.39,
|
120336 |
+
"learning_rate": 1.0009447061766477e-05,
|
120337 |
+
"loss": 0.4103,
|
120338 |
+
"step": 99615
|
120339 |
+
},
|
120340 |
+
{
|
120341 |
+
"epoch": 18.39,
|
120342 |
+
"learning_rate": 1.0009224527134947e-05,
|
120343 |
+
"loss": 0.4079,
|
120344 |
+
"step": 99620
|
120345 |
+
},
|
120346 |
+
{
|
120347 |
+
"epoch": 18.4,
|
120348 |
+
"learning_rate": 1.0009004644731984e-05,
|
120349 |
+
"loss": 0.4089,
|
120350 |
+
"step": 99625
|
120351 |
+
},
|
120352 |
+
{
|
120353 |
+
"epoch": 18.4,
|
120354 |
+
"learning_rate": 1.0008787414563016e-05,
|
120355 |
+
"loss": 0.4108,
|
120356 |
+
"step": 99630
|
120357 |
+
},
|
120358 |
+
{
|
120359 |
+
"epoch": 18.4,
|
120360 |
+
"learning_rate": 1.0008572836633405e-05,
|
120361 |
+
"loss": 0.4099,
|
120362 |
+
"step": 99635
|
120363 |
+
},
|
120364 |
+
{
|
120365 |
+
"epoch": 18.4,
|
120366 |
+
"learning_rate": 1.0008360910948447e-05,
|
120367 |
+
"loss": 0.4044,
|
120368 |
+
"step": 99640
|
120369 |
+
},
|
120370 |
+
{
|
120371 |
+
"epoch": 18.4,
|
120372 |
+
"learning_rate": 1.000815163751337e-05,
|
120373 |
+
"loss": 0.4017,
|
120374 |
+
"step": 99645
|
120375 |
+
},
|
120376 |
+
{
|
120377 |
+
"epoch": 18.4,
|
120378 |
+
"learning_rate": 1.0007945016333332e-05,
|
120379 |
+
"loss": 0.408,
|
120380 |
+
"step": 99650
|
120381 |
+
},
|
120382 |
+
{
|
120383 |
+
"epoch": 18.4,
|
120384 |
+
"learning_rate": 1.000774104741343e-05,
|
120385 |
+
"loss": 0.4067,
|
120386 |
+
"step": 99655
|
120387 |
+
},
|
120388 |
+
{
|
120389 |
+
"epoch": 18.4,
|
120390 |
+
"learning_rate": 1.000753973075872e-05,
|
120391 |
+
"loss": 0.4058,
|
120392 |
+
"step": 99660
|
120393 |
+
},
|
120394 |
+
{
|
120395 |
+
"epoch": 18.4,
|
120396 |
+
"learning_rate": 1.0007341066374134e-05,
|
120397 |
+
"loss": 0.406,
|
120398 |
+
"step": 99665
|
120399 |
+
},
|
120400 |
+
{
|
120401 |
+
"epoch": 18.4,
|
120402 |
+
"learning_rate": 1.0007145054264601e-05,
|
120403 |
+
"loss": 0.4101,
|
120404 |
+
"step": 99670
|
120405 |
+
},
|
120406 |
+
{
|
120407 |
+
"epoch": 18.4,
|
120408 |
+
"learning_rate": 1.0006951694434954e-05,
|
120409 |
+
"loss": 0.4063,
|
120410 |
+
"step": 99675
|
120411 |
+
},
|
120412 |
+
{
|
120413 |
+
"epoch": 18.41,
|
120414 |
+
"learning_rate": 1.0006760986889943e-05,
|
120415 |
+
"loss": 0.41,
|
120416 |
+
"step": 99680
|
120417 |
+
},
|
120418 |
+
{
|
120419 |
+
"epoch": 18.41,
|
120420 |
+
"learning_rate": 1.0006572931634295e-05,
|
120421 |
+
"loss": 0.4072,
|
120422 |
+
"step": 99685
|
120423 |
+
},
|
120424 |
+
{
|
120425 |
+
"epoch": 18.41,
|
120426 |
+
"learning_rate": 1.0006387528672654e-05,
|
120427 |
+
"loss": 0.4118,
|
120428 |
+
"step": 99690
|
120429 |
+
},
|
120430 |
+
{
|
120431 |
+
"epoch": 18.41,
|
120432 |
+
"learning_rate": 1.0006204778009574e-05,
|
120433 |
+
"loss": 0.4164,
|
120434 |
+
"step": 99695
|
120435 |
+
},
|
120436 |
+
{
|
120437 |
+
"epoch": 18.41,
|
120438 |
+
"learning_rate": 1.0006024679649572e-05,
|
120439 |
+
"loss": 0.4075,
|
120440 |
+
"step": 99700
|
120441 |
+
},
|
120442 |
+
{
|
120443 |
+
"epoch": 18.41,
|
120444 |
+
"learning_rate": 1.000584723359711e-05,
|
120445 |
+
"loss": 0.4063,
|
120446 |
+
"step": 99705
|
120447 |
+
},
|
120448 |
+
{
|
120449 |
+
"epoch": 18.41,
|
120450 |
+
"learning_rate": 1.000567243985653e-05,
|
120451 |
+
"loss": 0.4074,
|
120452 |
+
"step": 99710
|
120453 |
+
},
|
120454 |
+
{
|
120455 |
+
"epoch": 18.41,
|
120456 |
+
"learning_rate": 1.0005500298432178e-05,
|
120457 |
+
"loss": 0.4118,
|
120458 |
+
"step": 99715
|
120459 |
+
},
|
120460 |
+
{
|
120461 |
+
"epoch": 18.41,
|
120462 |
+
"learning_rate": 1.0005330809328291e-05,
|
120463 |
+
"loss": 0.4088,
|
120464 |
+
"step": 99720
|
120465 |
+
},
|
120466 |
+
{
|
120467 |
+
"epoch": 18.41,
|
120468 |
+
"learning_rate": 1.0005163972549048e-05,
|
120469 |
+
"loss": 0.409,
|
120470 |
+
"step": 99725
|
120471 |
+
},
|
120472 |
+
{
|
120473 |
+
"epoch": 18.42,
|
120474 |
+
"learning_rate": 1.000499978809855e-05,
|
120475 |
+
"loss": 0.4069,
|
120476 |
+
"step": 99730
|
120477 |
+
},
|
120478 |
+
{
|
120479 |
+
"epoch": 18.42,
|
120480 |
+
"learning_rate": 1.0004838255980884e-05,
|
120481 |
+
"loss": 0.4042,
|
120482 |
+
"step": 99735
|
120483 |
+
},
|
120484 |
+
{
|
120485 |
+
"epoch": 18.42,
|
120486 |
+
"learning_rate": 1.0004679376200009e-05,
|
120487 |
+
"loss": 0.4142,
|
120488 |
+
"step": 99740
|
120489 |
+
},
|
120490 |
+
{
|
120491 |
+
"epoch": 18.42,
|
120492 |
+
"learning_rate": 1.0004523148759851e-05,
|
120493 |
+
"loss": 0.4096,
|
120494 |
+
"step": 99745
|
120495 |
+
},
|
120496 |
+
{
|
120497 |
+
"epoch": 18.42,
|
120498 |
+
"learning_rate": 1.0004369573664269e-05,
|
120499 |
+
"loss": 0.409,
|
120500 |
+
"step": 99750
|
120501 |
+
},
|
120502 |
+
{
|
120503 |
+
"epoch": 18.42,
|
120504 |
+
"learning_rate": 1.0004218650917053e-05,
|
120505 |
+
"loss": 0.4107,
|
120506 |
+
"step": 99755
|
120507 |
+
},
|
120508 |
+
{
|
120509 |
+
"epoch": 18.42,
|
120510 |
+
"learning_rate": 1.0004070380521919e-05,
|
120511 |
+
"loss": 0.4091,
|
120512 |
+
"step": 99760
|
120513 |
+
},
|
120514 |
+
{
|
120515 |
+
"epoch": 18.42,
|
120516 |
+
"learning_rate": 1.0003924762482518e-05,
|
120517 |
+
"loss": 0.4059,
|
120518 |
+
"step": 99765
|
120519 |
+
},
|
120520 |
+
{
|
120521 |
+
"epoch": 18.42,
|
120522 |
+
"learning_rate": 1.0003781796802478e-05,
|
120523 |
+
"loss": 0.4076,
|
120524 |
+
"step": 99770
|
120525 |
+
},
|
120526 |
+
{
|
120527 |
+
"epoch": 18.42,
|
120528 |
+
"learning_rate": 1.0003641483485282e-05,
|
120529 |
+
"loss": 0.4045,
|
120530 |
+
"step": 99775
|
120531 |
+
},
|
120532 |
+
{
|
120533 |
+
"epoch": 18.43,
|
120534 |
+
"learning_rate": 1.000350382253443e-05,
|
120535 |
+
"loss": 0.4117,
|
120536 |
+
"step": 99780
|
120537 |
+
},
|
120538 |
+
{
|
120539 |
+
"epoch": 18.43,
|
120540 |
+
"learning_rate": 1.0003368813953286e-05,
|
120541 |
+
"loss": 0.4119,
|
120542 |
+
"step": 99785
|
120543 |
+
},
|
120544 |
+
{
|
120545 |
+
"epoch": 18.43,
|
120546 |
+
"learning_rate": 1.0003236457745215e-05,
|
120547 |
+
"loss": 0.4067,
|
120548 |
+
"step": 99790
|
120549 |
+
},
|
120550 |
+
{
|
120551 |
+
"epoch": 18.43,
|
120552 |
+
"learning_rate": 1.0003106753913454e-05,
|
120553 |
+
"loss": 0.41,
|
120554 |
+
"step": 99795
|
120555 |
+
},
|
120556 |
+
{
|
120557 |
+
"epoch": 18.43,
|
120558 |
+
"learning_rate": 1.0002979702461211e-05,
|
120559 |
+
"loss": 0.4118,
|
120560 |
+
"step": 99800
|
120561 |
+
},
|
120562 |
+
{
|
120563 |
+
"epoch": 18.43,
|
120564 |
+
"learning_rate": 1.0002855303391625e-05,
|
120565 |
+
"loss": 0.4082,
|
120566 |
+
"step": 99805
|
120567 |
+
},
|
120568 |
+
{
|
120569 |
+
"epoch": 18.43,
|
120570 |
+
"learning_rate": 1.0002733556707777e-05,
|
120571 |
+
"loss": 0.4099,
|
120572 |
+
"step": 99810
|
120573 |
+
},
|
120574 |
+
{
|
120575 |
+
"epoch": 18.43,
|
120576 |
+
"learning_rate": 1.0002614462412648e-05,
|
120577 |
+
"loss": 0.4129,
|
120578 |
+
"step": 99815
|
120579 |
+
},
|
120580 |
+
{
|
120581 |
+
"epoch": 18.43,
|
120582 |
+
"learning_rate": 1.000249802050919e-05,
|
120583 |
+
"loss": 0.4092,
|
120584 |
+
"step": 99820
|
120585 |
+
},
|
120586 |
+
{
|
120587 |
+
"epoch": 18.43,
|
120588 |
+
"learning_rate": 1.0002384231000275e-05,
|
120589 |
+
"loss": 0.4107,
|
120590 |
+
"step": 99825
|
120591 |
+
},
|
120592 |
+
{
|
120593 |
+
"epoch": 18.44,
|
120594 |
+
"learning_rate": 1.0002273093888698e-05,
|
120595 |
+
"loss": 0.4083,
|
120596 |
+
"step": 99830
|
120597 |
+
},
|
120598 |
+
{
|
120599 |
+
"epoch": 18.44,
|
120600 |
+
"learning_rate": 1.0002164609177228e-05,
|
120601 |
+
"loss": 0.4097,
|
120602 |
+
"step": 99835
|
120603 |
+
},
|
120604 |
+
{
|
120605 |
+
"epoch": 18.44,
|
120606 |
+
"learning_rate": 1.0002058776868513e-05,
|
120607 |
+
"loss": 0.4085,
|
120608 |
+
"step": 99840
|
120609 |
+
},
|
120610 |
+
{
|
120611 |
+
"epoch": 18.44,
|
120612 |
+
"learning_rate": 1.0001955596965181e-05,
|
120613 |
+
"loss": 0.4072,
|
120614 |
+
"step": 99845
|
120615 |
+
},
|
120616 |
+
{
|
120617 |
+
"epoch": 18.44,
|
120618 |
+
"learning_rate": 1.0001855069469772e-05,
|
120619 |
+
"loss": 0.4096,
|
120620 |
+
"step": 99850
|
120621 |
+
},
|
120622 |
+
{
|
120623 |
+
"epoch": 18.44,
|
120624 |
+
"learning_rate": 1.000175719438478e-05,
|
120625 |
+
"loss": 0.4097,
|
120626 |
+
"step": 99855
|
120627 |
+
},
|
120628 |
+
{
|
120629 |
+
"epoch": 18.44,
|
120630 |
+
"learning_rate": 1.0001661971712595e-05,
|
120631 |
+
"loss": 0.4089,
|
120632 |
+
"step": 99860
|
120633 |
+
},
|
120634 |
+
{
|
120635 |
+
"epoch": 18.44,
|
120636 |
+
"learning_rate": 1.0001569401455578e-05,
|
120637 |
+
"loss": 0.4112,
|
120638 |
+
"step": 99865
|
120639 |
+
},
|
120640 |
+
{
|
120641 |
+
"epoch": 18.44,
|
120642 |
+
"learning_rate": 1.0001479483616021e-05,
|
120643 |
+
"loss": 0.4012,
|
120644 |
+
"step": 99870
|
120645 |
+
},
|
120646 |
+
{
|
120647 |
+
"epoch": 18.44,
|
120648 |
+
"learning_rate": 1.0001392218196143e-05,
|
120649 |
+
"loss": 0.4062,
|
120650 |
+
"step": 99875
|
120651 |
+
},
|
120652 |
+
{
|
120653 |
+
"epoch": 18.44,
|
120654 |
+
"learning_rate": 1.0001307605198083e-05,
|
120655 |
+
"loss": 0.4054,
|
120656 |
+
"step": 99880
|
120657 |
+
},
|
120658 |
+
{
|
120659 |
+
"epoch": 18.45,
|
120660 |
+
"learning_rate": 1.0001225644623938e-05,
|
120661 |
+
"loss": 0.4085,
|
120662 |
+
"step": 99885
|
120663 |
+
},
|
120664 |
+
{
|
120665 |
+
"epoch": 18.45,
|
120666 |
+
"learning_rate": 1.0001146336475726e-05,
|
120667 |
+
"loss": 0.4098,
|
120668 |
+
"step": 99890
|
120669 |
+
},
|
120670 |
+
{
|
120671 |
+
"epoch": 18.45,
|
120672 |
+
"learning_rate": 1.0001069680755418e-05,
|
120673 |
+
"loss": 0.4079,
|
120674 |
+
"step": 99895
|
120675 |
+
},
|
120676 |
+
{
|
120677 |
+
"epoch": 18.45,
|
120678 |
+
"learning_rate": 1.0000995677464887e-05,
|
120679 |
+
"loss": 0.4102,
|
120680 |
+
"step": 99900
|
120681 |
+
},
|
120682 |
+
{
|
120683 |
+
"epoch": 18.45,
|
120684 |
+
"learning_rate": 1.0000924326605957e-05,
|
120685 |
+
"loss": 0.4079,
|
120686 |
+
"step": 99905
|
120687 |
+
},
|
120688 |
+
{
|
120689 |
+
"epoch": 18.45,
|
120690 |
+
"learning_rate": 1.000085562818041e-05,
|
120691 |
+
"loss": 0.41,
|
120692 |
+
"step": 99910
|
120693 |
+
},
|
120694 |
+
{
|
120695 |
+
"epoch": 18.45,
|
120696 |
+
"learning_rate": 1.0000789582189913e-05,
|
120697 |
+
"loss": 0.405,
|
120698 |
+
"step": 99915
|
120699 |
+
},
|
120700 |
+
{
|
120701 |
+
"epoch": 18.45,
|
120702 |
+
"learning_rate": 1.0000726188636128e-05,
|
120703 |
+
"loss": 0.4091,
|
120704 |
+
"step": 99920
|
120705 |
+
},
|
120706 |
+
{
|
120707 |
+
"epoch": 18.45,
|
120708 |
+
"learning_rate": 1.0000665447520593e-05,
|
120709 |
+
"loss": 0.4043,
|
120710 |
+
"step": 99925
|
120711 |
+
},
|
120712 |
+
{
|
120713 |
+
"epoch": 18.45,
|
120714 |
+
"learning_rate": 1.0000607358844825e-05,
|
120715 |
+
"loss": 0.4068,
|
120716 |
+
"step": 99930
|
120717 |
+
},
|
120718 |
+
{
|
120719 |
+
"epoch": 18.46,
|
120720 |
+
"learning_rate": 1.0000551922610243e-05,
|
120721 |
+
"loss": 0.4049,
|
120722 |
+
"step": 99935
|
120723 |
+
},
|
120724 |
+
{
|
120725 |
+
"epoch": 18.46,
|
120726 |
+
"learning_rate": 1.000049913881822e-05,
|
120727 |
+
"loss": 0.4048,
|
120728 |
+
"step": 99940
|
120729 |
+
},
|
120730 |
+
{
|
120731 |
+
"epoch": 18.46,
|
120732 |
+
"learning_rate": 1.0000449007470055e-05,
|
120733 |
+
"loss": 0.412,
|
120734 |
+
"step": 99945
|
120735 |
+
},
|
120736 |
+
{
|
120737 |
+
"epoch": 18.46,
|
120738 |
+
"learning_rate": 1.0000401528566993e-05,
|
120739 |
+
"loss": 0.4076,
|
120740 |
+
"step": 99950
|
120741 |
+
},
|
120742 |
+
{
|
120743 |
+
"epoch": 18.46,
|
120744 |
+
"learning_rate": 1.00003567021102e-05,
|
120745 |
+
"loss": 0.4085,
|
120746 |
+
"step": 99955
|
120747 |
+
},
|
120748 |
+
{
|
120749 |
+
"epoch": 18.46,
|
120750 |
+
"learning_rate": 1.0000314528100778e-05,
|
120751 |
+
"loss": 0.405,
|
120752 |
+
"step": 99960
|
120753 |
+
},
|
120754 |
+
{
|
120755 |
+
"epoch": 18.46,
|
120756 |
+
"learning_rate": 1.0000275006539773e-05,
|
120757 |
+
"loss": 0.4089,
|
120758 |
+
"step": 99965
|
120759 |
+
},
|
120760 |
+
{
|
120761 |
+
"epoch": 18.46,
|
120762 |
+
"learning_rate": 1.0000238137428167e-05,
|
120763 |
+
"loss": 0.4059,
|
120764 |
+
"step": 99970
|
120765 |
+
},
|
120766 |
+
{
|
120767 |
+
"epoch": 18.46,
|
120768 |
+
"learning_rate": 1.0000203920766867e-05,
|
120769 |
+
"loss": 0.4062,
|
120770 |
+
"step": 99975
|
120771 |
+
},
|
120772 |
+
{
|
120773 |
+
"epoch": 18.46,
|
120774 |
+
"learning_rate": 1.0000172356556704e-05,
|
120775 |
+
"loss": 0.4075,
|
120776 |
+
"step": 99980
|
120777 |
+
},
|
120778 |
+
{
|
120779 |
+
"epoch": 18.47,
|
120780 |
+
"learning_rate": 1.000014344479847e-05,
|
120781 |
+
"loss": 0.4041,
|
120782 |
+
"step": 99985
|
120783 |
+
},
|
120784 |
+
{
|
120785 |
+
"epoch": 18.47,
|
120786 |
+
"learning_rate": 1.0000117185492867e-05,
|
120787 |
+
"loss": 0.4062,
|
120788 |
+
"step": 99990
|
120789 |
+
},
|
120790 |
+
{
|
120791 |
+
"epoch": 18.47,
|
120792 |
+
"learning_rate": 1.0000093578640555e-05,
|
120793 |
+
"loss": 0.4035,
|
120794 |
+
"step": 99995
|
120795 |
+
},
|
120796 |
+
{
|
120797 |
+
"epoch": 18.47,
|
120798 |
+
"learning_rate": 1.0000072624242102e-05,
|
120799 |
+
"loss": 0.408,
|
120800 |
+
"step": 100000
|
120801 |
+
},
|
120802 |
+
{
|
120803 |
+
"epoch": 18.47,
|
120804 |
+
"eval_loss": 0.38928771018981934,
|
120805 |
+
"eval_runtime": 99.578,
|
120806 |
+
"eval_samples_per_second": 50.212,
|
120807 |
+
"eval_steps_per_second": 0.532,
|
120808 |
+
"step": 100000
|
120809 |
}
|
120810 |
],
|
120811 |
"max_steps": 100000,
|
120812 |
"num_train_epochs": 20,
|
120813 |
+
"total_flos": 1.0515635686869263e+22,
|
120814 |
"trial_name": null,
|
120815 |
"trial_params": null
|
120816 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2ece555ebd5e5c6fe697b5c0892a0069538c02673bcdea4dd5ba7a2fbb86221c
|
3 |
size 449471589
|