Training in progress, step 18500, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6832600
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:68dce92e38b5669ed6af11f3c9b53539b4b596623a39b20a0447a548beedcdd3
|
3 |
size 6832600
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3612474
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4be4e12d392e4c555e25b8c9bb85f734284e812f224aa91cc5771977c0329d01
|
3 |
size 3612474
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:17134db01c21afe75f26941e4b935cb7f5490e9c25fded9cd19de49137ddf1a9
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5d04c4fae5a8a22ac933ab7ace077380dd0ddd468b3feecba88e276032e86917
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -126007,6 +126007,3506 @@
|
|
126007 |
"learning_rate": 2.631024493020806e-05,
|
126008 |
"loss": 1.8519,
|
126009 |
"step": 18000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126010 |
}
|
126011 |
],
|
126012 |
"logging_steps": 1,
|
@@ -126026,7 +129526,7 @@
|
|
126026 |
"attributes": {}
|
126027 |
}
|
126028 |
},
|
126029 |
-
"total_flos": 3.
|
126030 |
"train_batch_size": 1,
|
126031 |
"trial_name": null,
|
126032 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.4872267579668159,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 18500,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
126007 |
"learning_rate": 2.631024493020806e-05,
|
126008 |
"loss": 1.8519,
|
126009 |
"step": 18000
|
126010 |
+
},
|
126011 |
+
{
|
126012 |
+
"epoch": 0.47408480379246776,
|
126013 |
+
"grad_norm": 3.261470317840576,
|
126014 |
+
"learning_rate": 2.6308928101132473e-05,
|
126015 |
+
"loss": 1.7502,
|
126016 |
+
"step": 18001
|
126017 |
+
},
|
126018 |
+
{
|
126019 |
+
"epoch": 0.47411114037397944,
|
126020 |
+
"grad_norm": 3.209693431854248,
|
126021 |
+
"learning_rate": 2.6307611272056888e-05,
|
126022 |
+
"loss": 0.7119,
|
126023 |
+
"step": 18002
|
126024 |
+
},
|
126025 |
+
{
|
126026 |
+
"epoch": 0.4741374769554912,
|
126027 |
+
"grad_norm": 1.4962162971496582,
|
126028 |
+
"learning_rate": 2.63062944429813e-05,
|
126029 |
+
"loss": 1.766,
|
126030 |
+
"step": 18003
|
126031 |
+
},
|
126032 |
+
{
|
126033 |
+
"epoch": 0.4741638135370029,
|
126034 |
+
"grad_norm": 1.7053147554397583,
|
126035 |
+
"learning_rate": 2.630497761390572e-05,
|
126036 |
+
"loss": 1.629,
|
126037 |
+
"step": 18004
|
126038 |
+
},
|
126039 |
+
{
|
126040 |
+
"epoch": 0.4741901501185146,
|
126041 |
+
"grad_norm": 3.461153268814087,
|
126042 |
+
"learning_rate": 2.630366078483013e-05,
|
126043 |
+
"loss": 1.2987,
|
126044 |
+
"step": 18005
|
126045 |
+
},
|
126046 |
+
{
|
126047 |
+
"epoch": 0.47421648670002636,
|
126048 |
+
"grad_norm": 5.332024574279785,
|
126049 |
+
"learning_rate": 2.6302343955754544e-05,
|
126050 |
+
"loss": 1.4677,
|
126051 |
+
"step": 18006
|
126052 |
+
},
|
126053 |
+
{
|
126054 |
+
"epoch": 0.47424282328153805,
|
126055 |
+
"grad_norm": 3.26020884513855,
|
126056 |
+
"learning_rate": 2.6301027126678956e-05,
|
126057 |
+
"loss": 2.0604,
|
126058 |
+
"step": 18007
|
126059 |
+
},
|
126060 |
+
{
|
126061 |
+
"epoch": 0.4742691598630498,
|
126062 |
+
"grad_norm": 1.7316187620162964,
|
126063 |
+
"learning_rate": 2.6299710297603375e-05,
|
126064 |
+
"loss": 1.8331,
|
126065 |
+
"step": 18008
|
126066 |
+
},
|
126067 |
+
{
|
126068 |
+
"epoch": 0.4742954964445615,
|
126069 |
+
"grad_norm": 3.970562219619751,
|
126070 |
+
"learning_rate": 2.6298393468527787e-05,
|
126071 |
+
"loss": 1.4967,
|
126072 |
+
"step": 18009
|
126073 |
+
},
|
126074 |
+
{
|
126075 |
+
"epoch": 0.4743218330260732,
|
126076 |
+
"grad_norm": 1.3757078647613525,
|
126077 |
+
"learning_rate": 2.62970766394522e-05,
|
126078 |
+
"loss": 2.077,
|
126079 |
+
"step": 18010
|
126080 |
+
},
|
126081 |
+
{
|
126082 |
+
"epoch": 0.4743481696075849,
|
126083 |
+
"grad_norm": 2.3905718326568604,
|
126084 |
+
"learning_rate": 2.6295759810376615e-05,
|
126085 |
+
"loss": 1.4518,
|
126086 |
+
"step": 18011
|
126087 |
+
},
|
126088 |
+
{
|
126089 |
+
"epoch": 0.47437450618909666,
|
126090 |
+
"grad_norm": 4.833202362060547,
|
126091 |
+
"learning_rate": 2.6294442981301027e-05,
|
126092 |
+
"loss": 2.1774,
|
126093 |
+
"step": 18012
|
126094 |
+
},
|
126095 |
+
{
|
126096 |
+
"epoch": 0.4744008427706084,
|
126097 |
+
"grad_norm": 2.995392322540283,
|
126098 |
+
"learning_rate": 2.6293126152225446e-05,
|
126099 |
+
"loss": 1.8886,
|
126100 |
+
"step": 18013
|
126101 |
+
},
|
126102 |
+
{
|
126103 |
+
"epoch": 0.4744271793521201,
|
126104 |
+
"grad_norm": 1.6298213005065918,
|
126105 |
+
"learning_rate": 2.629180932314986e-05,
|
126106 |
+
"loss": 1.6117,
|
126107 |
+
"step": 18014
|
126108 |
+
},
|
126109 |
+
{
|
126110 |
+
"epoch": 0.47445351593363183,
|
126111 |
+
"grad_norm": 2.83526611328125,
|
126112 |
+
"learning_rate": 2.629049249407427e-05,
|
126113 |
+
"loss": 0.5354,
|
126114 |
+
"step": 18015
|
126115 |
+
},
|
126116 |
+
{
|
126117 |
+
"epoch": 0.4744798525151435,
|
126118 |
+
"grad_norm": 2.0443403720855713,
|
126119 |
+
"learning_rate": 2.6289175664998683e-05,
|
126120 |
+
"loss": 1.8038,
|
126121 |
+
"step": 18016
|
126122 |
+
},
|
126123 |
+
{
|
126124 |
+
"epoch": 0.47450618909665526,
|
126125 |
+
"grad_norm": 2.0520477294921875,
|
126126 |
+
"learning_rate": 2.6287858835923095e-05,
|
126127 |
+
"loss": 0.98,
|
126128 |
+
"step": 18017
|
126129 |
+
},
|
126130 |
+
{
|
126131 |
+
"epoch": 0.47453252567816695,
|
126132 |
+
"grad_norm": 1.6275315284729004,
|
126133 |
+
"learning_rate": 2.6286542006847514e-05,
|
126134 |
+
"loss": 1.947,
|
126135 |
+
"step": 18018
|
126136 |
+
},
|
126137 |
+
{
|
126138 |
+
"epoch": 0.4745588622596787,
|
126139 |
+
"grad_norm": 1.6136394739151,
|
126140 |
+
"learning_rate": 2.6285225177771926e-05,
|
126141 |
+
"loss": 1.8218,
|
126142 |
+
"step": 18019
|
126143 |
+
},
|
126144 |
+
{
|
126145 |
+
"epoch": 0.47458519884119044,
|
126146 |
+
"grad_norm": 3.588017225265503,
|
126147 |
+
"learning_rate": 2.628390834869634e-05,
|
126148 |
+
"loss": 1.897,
|
126149 |
+
"step": 18020
|
126150 |
+
},
|
126151 |
+
{
|
126152 |
+
"epoch": 0.4746115354227021,
|
126153 |
+
"grad_norm": 1.9499776363372803,
|
126154 |
+
"learning_rate": 2.6282591519620754e-05,
|
126155 |
+
"loss": 2.0482,
|
126156 |
+
"step": 18021
|
126157 |
+
},
|
126158 |
+
{
|
126159 |
+
"epoch": 0.47463787200421387,
|
126160 |
+
"grad_norm": 1.7800002098083496,
|
126161 |
+
"learning_rate": 2.628127469054517e-05,
|
126162 |
+
"loss": 1.8747,
|
126163 |
+
"step": 18022
|
126164 |
+
},
|
126165 |
+
{
|
126166 |
+
"epoch": 0.47466420858572556,
|
126167 |
+
"grad_norm": 4.833249568939209,
|
126168 |
+
"learning_rate": 2.6279957861469585e-05,
|
126169 |
+
"loss": 1.8213,
|
126170 |
+
"step": 18023
|
126171 |
+
},
|
126172 |
+
{
|
126173 |
+
"epoch": 0.4746905451672373,
|
126174 |
+
"grad_norm": 2.453986883163452,
|
126175 |
+
"learning_rate": 2.6278641032393997e-05,
|
126176 |
+
"loss": 1.0012,
|
126177 |
+
"step": 18024
|
126178 |
+
},
|
126179 |
+
{
|
126180 |
+
"epoch": 0.474716881748749,
|
126181 |
+
"grad_norm": 2.8022005558013916,
|
126182 |
+
"learning_rate": 2.627732420331841e-05,
|
126183 |
+
"loss": 0.8829,
|
126184 |
+
"step": 18025
|
126185 |
+
},
|
126186 |
+
{
|
126187 |
+
"epoch": 0.47474321833026073,
|
126188 |
+
"grad_norm": 1.5881845951080322,
|
126189 |
+
"learning_rate": 2.6276007374242822e-05,
|
126190 |
+
"loss": 1.8127,
|
126191 |
+
"step": 18026
|
126192 |
+
},
|
126193 |
+
{
|
126194 |
+
"epoch": 0.4747695549117725,
|
126195 |
+
"grad_norm": 2.413527011871338,
|
126196 |
+
"learning_rate": 2.627469054516724e-05,
|
126197 |
+
"loss": 2.3022,
|
126198 |
+
"step": 18027
|
126199 |
+
},
|
126200 |
+
{
|
126201 |
+
"epoch": 0.47479589149328416,
|
126202 |
+
"grad_norm": 1.7777903079986572,
|
126203 |
+
"learning_rate": 2.6273373716091653e-05,
|
126204 |
+
"loss": 0.6391,
|
126205 |
+
"step": 18028
|
126206 |
+
},
|
126207 |
+
{
|
126208 |
+
"epoch": 0.4748222280747959,
|
126209 |
+
"grad_norm": 2.0796475410461426,
|
126210 |
+
"learning_rate": 2.6272056887016065e-05,
|
126211 |
+
"loss": 1.2444,
|
126212 |
+
"step": 18029
|
126213 |
+
},
|
126214 |
+
{
|
126215 |
+
"epoch": 0.4748485646563076,
|
126216 |
+
"grad_norm": 1.6656315326690674,
|
126217 |
+
"learning_rate": 2.627074005794048e-05,
|
126218 |
+
"loss": 1.5637,
|
126219 |
+
"step": 18030
|
126220 |
+
},
|
126221 |
+
{
|
126222 |
+
"epoch": 0.47487490123781934,
|
126223 |
+
"grad_norm": 1.4762217998504639,
|
126224 |
+
"learning_rate": 2.6269423228864893e-05,
|
126225 |
+
"loss": 1.8589,
|
126226 |
+
"step": 18031
|
126227 |
+
},
|
126228 |
+
{
|
126229 |
+
"epoch": 0.474901237819331,
|
126230 |
+
"grad_norm": 1.888584852218628,
|
126231 |
+
"learning_rate": 2.6268106399789312e-05,
|
126232 |
+
"loss": 2.2992,
|
126233 |
+
"step": 18032
|
126234 |
+
},
|
126235 |
+
{
|
126236 |
+
"epoch": 0.47492757440084277,
|
126237 |
+
"grad_norm": 3.5453085899353027,
|
126238 |
+
"learning_rate": 2.6266789570713724e-05,
|
126239 |
+
"loss": 1.7032,
|
126240 |
+
"step": 18033
|
126241 |
+
},
|
126242 |
+
{
|
126243 |
+
"epoch": 0.4749539109823545,
|
126244 |
+
"grad_norm": 2.0390994548797607,
|
126245 |
+
"learning_rate": 2.6265472741638136e-05,
|
126246 |
+
"loss": 1.3441,
|
126247 |
+
"step": 18034
|
126248 |
+
},
|
126249 |
+
{
|
126250 |
+
"epoch": 0.4749802475638662,
|
126251 |
+
"grad_norm": 1.6482908725738525,
|
126252 |
+
"learning_rate": 2.626415591256255e-05,
|
126253 |
+
"loss": 0.3402,
|
126254 |
+
"step": 18035
|
126255 |
+
},
|
126256 |
+
{
|
126257 |
+
"epoch": 0.47500658414537794,
|
126258 |
+
"grad_norm": 3.6160387992858887,
|
126259 |
+
"learning_rate": 2.626283908348696e-05,
|
126260 |
+
"loss": 0.8216,
|
126261 |
+
"step": 18036
|
126262 |
+
},
|
126263 |
+
{
|
126264 |
+
"epoch": 0.47503292072688963,
|
126265 |
+
"grad_norm": 1.7986429929733276,
|
126266 |
+
"learning_rate": 2.626152225441138e-05,
|
126267 |
+
"loss": 1.7053,
|
126268 |
+
"step": 18037
|
126269 |
+
},
|
126270 |
+
{
|
126271 |
+
"epoch": 0.4750592573084014,
|
126272 |
+
"grad_norm": 1.9232220649719238,
|
126273 |
+
"learning_rate": 2.6260205425335792e-05,
|
126274 |
+
"loss": 2.0929,
|
126275 |
+
"step": 18038
|
126276 |
+
},
|
126277 |
+
{
|
126278 |
+
"epoch": 0.4750855938899131,
|
126279 |
+
"grad_norm": 2.988654851913452,
|
126280 |
+
"learning_rate": 2.6258888596260208e-05,
|
126281 |
+
"loss": 1.2714,
|
126282 |
+
"step": 18039
|
126283 |
+
},
|
126284 |
+
{
|
126285 |
+
"epoch": 0.4751119304714248,
|
126286 |
+
"grad_norm": 1.9279531240463257,
|
126287 |
+
"learning_rate": 2.625757176718462e-05,
|
126288 |
+
"loss": 1.3988,
|
126289 |
+
"step": 18040
|
126290 |
+
},
|
126291 |
+
{
|
126292 |
+
"epoch": 0.47513826705293655,
|
126293 |
+
"grad_norm": 3.3459930419921875,
|
126294 |
+
"learning_rate": 2.625625493810904e-05,
|
126295 |
+
"loss": 1.2646,
|
126296 |
+
"step": 18041
|
126297 |
+
},
|
126298 |
+
{
|
126299 |
+
"epoch": 0.47516460363444823,
|
126300 |
+
"grad_norm": 4.728421688079834,
|
126301 |
+
"learning_rate": 2.625493810903345e-05,
|
126302 |
+
"loss": 0.8438,
|
126303 |
+
"step": 18042
|
126304 |
+
},
|
126305 |
+
{
|
126306 |
+
"epoch": 0.47519094021596,
|
126307 |
+
"grad_norm": 2.0103824138641357,
|
126308 |
+
"learning_rate": 2.6253621279957863e-05,
|
126309 |
+
"loss": 1.8723,
|
126310 |
+
"step": 18043
|
126311 |
+
},
|
126312 |
+
{
|
126313 |
+
"epoch": 0.47521727679747167,
|
126314 |
+
"grad_norm": 2.583697557449341,
|
126315 |
+
"learning_rate": 2.6252304450882275e-05,
|
126316 |
+
"loss": 2.0797,
|
126317 |
+
"step": 18044
|
126318 |
+
},
|
126319 |
+
{
|
126320 |
+
"epoch": 0.4752436133789834,
|
126321 |
+
"grad_norm": 6.7954559326171875,
|
126322 |
+
"learning_rate": 2.6250987621806688e-05,
|
126323 |
+
"loss": 2.6605,
|
126324 |
+
"step": 18045
|
126325 |
+
},
|
126326 |
+
{
|
126327 |
+
"epoch": 0.47526994996049515,
|
126328 |
+
"grad_norm": 3.5881361961364746,
|
126329 |
+
"learning_rate": 2.6249670792731107e-05,
|
126330 |
+
"loss": 2.1259,
|
126331 |
+
"step": 18046
|
126332 |
+
},
|
126333 |
+
{
|
126334 |
+
"epoch": 0.47529628654200684,
|
126335 |
+
"grad_norm": 2.464256525039673,
|
126336 |
+
"learning_rate": 2.624835396365552e-05,
|
126337 |
+
"loss": 0.9051,
|
126338 |
+
"step": 18047
|
126339 |
+
},
|
126340 |
+
{
|
126341 |
+
"epoch": 0.4753226231235186,
|
126342 |
+
"grad_norm": 7.696624755859375,
|
126343 |
+
"learning_rate": 2.624703713457993e-05,
|
126344 |
+
"loss": 2.0595,
|
126345 |
+
"step": 18048
|
126346 |
+
},
|
126347 |
+
{
|
126348 |
+
"epoch": 0.47534895970503027,
|
126349 |
+
"grad_norm": 5.125574588775635,
|
126350 |
+
"learning_rate": 2.6245720305504347e-05,
|
126351 |
+
"loss": 0.432,
|
126352 |
+
"step": 18049
|
126353 |
+
},
|
126354 |
+
{
|
126355 |
+
"epoch": 0.475375296286542,
|
126356 |
+
"grad_norm": 2.2241811752319336,
|
126357 |
+
"learning_rate": 2.624440347642876e-05,
|
126358 |
+
"loss": 1.2333,
|
126359 |
+
"step": 18050
|
126360 |
+
},
|
126361 |
+
{
|
126362 |
+
"epoch": 0.4754016328680537,
|
126363 |
+
"grad_norm": 2.0132031440734863,
|
126364 |
+
"learning_rate": 2.6243086647353178e-05,
|
126365 |
+
"loss": 1.7055,
|
126366 |
+
"step": 18051
|
126367 |
+
},
|
126368 |
+
{
|
126369 |
+
"epoch": 0.47542796944956545,
|
126370 |
+
"grad_norm": 1.9278985261917114,
|
126371 |
+
"learning_rate": 2.624176981827759e-05,
|
126372 |
+
"loss": 1.8635,
|
126373 |
+
"step": 18052
|
126374 |
+
},
|
126375 |
+
{
|
126376 |
+
"epoch": 0.4754543060310772,
|
126377 |
+
"grad_norm": 2.030756711959839,
|
126378 |
+
"learning_rate": 2.6240452989202002e-05,
|
126379 |
+
"loss": 0.4427,
|
126380 |
+
"step": 18053
|
126381 |
+
},
|
126382 |
+
{
|
126383 |
+
"epoch": 0.4754806426125889,
|
126384 |
+
"grad_norm": 1.6091283559799194,
|
126385 |
+
"learning_rate": 2.6239136160126415e-05,
|
126386 |
+
"loss": 1.5705,
|
126387 |
+
"step": 18054
|
126388 |
+
},
|
126389 |
+
{
|
126390 |
+
"epoch": 0.4755069791941006,
|
126391 |
+
"grad_norm": 2.5038156509399414,
|
126392 |
+
"learning_rate": 2.6237819331050834e-05,
|
126393 |
+
"loss": 0.2297,
|
126394 |
+
"step": 18055
|
126395 |
+
},
|
126396 |
+
{
|
126397 |
+
"epoch": 0.4755333157756123,
|
126398 |
+
"grad_norm": 2.3296658992767334,
|
126399 |
+
"learning_rate": 2.6236502501975246e-05,
|
126400 |
+
"loss": 1.9659,
|
126401 |
+
"step": 18056
|
126402 |
+
},
|
126403 |
+
{
|
126404 |
+
"epoch": 0.47555965235712405,
|
126405 |
+
"grad_norm": 2.38259220123291,
|
126406 |
+
"learning_rate": 2.6235185672899658e-05,
|
126407 |
+
"loss": 1.9652,
|
126408 |
+
"step": 18057
|
126409 |
+
},
|
126410 |
+
{
|
126411 |
+
"epoch": 0.47558598893863574,
|
126412 |
+
"grad_norm": 1.7944610118865967,
|
126413 |
+
"learning_rate": 2.6233868843824074e-05,
|
126414 |
+
"loss": 0.7851,
|
126415 |
+
"step": 18058
|
126416 |
+
},
|
126417 |
+
{
|
126418 |
+
"epoch": 0.4756123255201475,
|
126419 |
+
"grad_norm": 1.4154953956604004,
|
126420 |
+
"learning_rate": 2.6232552014748486e-05,
|
126421 |
+
"loss": 1.5766,
|
126422 |
+
"step": 18059
|
126423 |
+
},
|
126424 |
+
{
|
126425 |
+
"epoch": 0.4756386621016592,
|
126426 |
+
"grad_norm": 1.9560688734054565,
|
126427 |
+
"learning_rate": 2.6231235185672905e-05,
|
126428 |
+
"loss": 1.9532,
|
126429 |
+
"step": 18060
|
126430 |
+
},
|
126431 |
+
{
|
126432 |
+
"epoch": 0.4756649986831709,
|
126433 |
+
"grad_norm": 1.9043681621551514,
|
126434 |
+
"learning_rate": 2.6229918356597317e-05,
|
126435 |
+
"loss": 1.5029,
|
126436 |
+
"step": 18061
|
126437 |
+
},
|
126438 |
+
{
|
126439 |
+
"epoch": 0.47569133526468266,
|
126440 |
+
"grad_norm": 2.039125680923462,
|
126441 |
+
"learning_rate": 2.622860152752173e-05,
|
126442 |
+
"loss": 1.3208,
|
126443 |
+
"step": 18062
|
126444 |
+
},
|
126445 |
+
{
|
126446 |
+
"epoch": 0.47571767184619435,
|
126447 |
+
"grad_norm": 1.4919660091400146,
|
126448 |
+
"learning_rate": 2.622728469844614e-05,
|
126449 |
+
"loss": 1.4258,
|
126450 |
+
"step": 18063
|
126451 |
+
},
|
126452 |
+
{
|
126453 |
+
"epoch": 0.4757440084277061,
|
126454 |
+
"grad_norm": 2.184338331222534,
|
126455 |
+
"learning_rate": 2.6225967869370554e-05,
|
126456 |
+
"loss": 1.5314,
|
126457 |
+
"step": 18064
|
126458 |
+
},
|
126459 |
+
{
|
126460 |
+
"epoch": 0.4757703450092178,
|
126461 |
+
"grad_norm": 3.4698526859283447,
|
126462 |
+
"learning_rate": 2.6224651040294973e-05,
|
126463 |
+
"loss": 1.8335,
|
126464 |
+
"step": 18065
|
126465 |
+
},
|
126466 |
+
{
|
126467 |
+
"epoch": 0.4757966815907295,
|
126468 |
+
"grad_norm": 2.0690839290618896,
|
126469 |
+
"learning_rate": 2.6223334211219385e-05,
|
126470 |
+
"loss": 1.3201,
|
126471 |
+
"step": 18066
|
126472 |
+
},
|
126473 |
+
{
|
126474 |
+
"epoch": 0.47582301817224126,
|
126475 |
+
"grad_norm": 1.9289405345916748,
|
126476 |
+
"learning_rate": 2.62220173821438e-05,
|
126477 |
+
"loss": 1.8343,
|
126478 |
+
"step": 18067
|
126479 |
+
},
|
126480 |
+
{
|
126481 |
+
"epoch": 0.47584935475375295,
|
126482 |
+
"grad_norm": 1.8096168041229248,
|
126483 |
+
"learning_rate": 2.6220700553068213e-05,
|
126484 |
+
"loss": 0.6406,
|
126485 |
+
"step": 18068
|
126486 |
+
},
|
126487 |
+
{
|
126488 |
+
"epoch": 0.4758756913352647,
|
126489 |
+
"grad_norm": 1.51333749294281,
|
126490 |
+
"learning_rate": 2.6219383723992625e-05,
|
126491 |
+
"loss": 2.2432,
|
126492 |
+
"step": 18069
|
126493 |
+
},
|
126494 |
+
{
|
126495 |
+
"epoch": 0.4759020279167764,
|
126496 |
+
"grad_norm": 1.675201416015625,
|
126497 |
+
"learning_rate": 2.6218066894917044e-05,
|
126498 |
+
"loss": 1.9982,
|
126499 |
+
"step": 18070
|
126500 |
+
},
|
126501 |
+
{
|
126502 |
+
"epoch": 0.4759283644982881,
|
126503 |
+
"grad_norm": 3.723294734954834,
|
126504 |
+
"learning_rate": 2.6216750065841456e-05,
|
126505 |
+
"loss": 1.6848,
|
126506 |
+
"step": 18071
|
126507 |
+
},
|
126508 |
+
{
|
126509 |
+
"epoch": 0.47595470107979987,
|
126510 |
+
"grad_norm": 1.902472734451294,
|
126511 |
+
"learning_rate": 2.6215433236765868e-05,
|
126512 |
+
"loss": 1.6854,
|
126513 |
+
"step": 18072
|
126514 |
+
},
|
126515 |
+
{
|
126516 |
+
"epoch": 0.47598103766131156,
|
126517 |
+
"grad_norm": 2.0708296298980713,
|
126518 |
+
"learning_rate": 2.621411640769028e-05,
|
126519 |
+
"loss": 1.6196,
|
126520 |
+
"step": 18073
|
126521 |
+
},
|
126522 |
+
{
|
126523 |
+
"epoch": 0.4760073742428233,
|
126524 |
+
"grad_norm": 2.0596871376037598,
|
126525 |
+
"learning_rate": 2.62127995786147e-05,
|
126526 |
+
"loss": 0.9181,
|
126527 |
+
"step": 18074
|
126528 |
+
},
|
126529 |
+
{
|
126530 |
+
"epoch": 0.476033710824335,
|
126531 |
+
"grad_norm": 1.582932949066162,
|
126532 |
+
"learning_rate": 2.621148274953911e-05,
|
126533 |
+
"loss": 1.9876,
|
126534 |
+
"step": 18075
|
126535 |
+
},
|
126536 |
+
{
|
126537 |
+
"epoch": 0.47606004740584673,
|
126538 |
+
"grad_norm": 2.512171983718872,
|
126539 |
+
"learning_rate": 2.6210165920463524e-05,
|
126540 |
+
"loss": 2.3788,
|
126541 |
+
"step": 18076
|
126542 |
+
},
|
126543 |
+
{
|
126544 |
+
"epoch": 0.4760863839873584,
|
126545 |
+
"grad_norm": 2.1394431591033936,
|
126546 |
+
"learning_rate": 2.620884909138794e-05,
|
126547 |
+
"loss": 0.6411,
|
126548 |
+
"step": 18077
|
126549 |
+
},
|
126550 |
+
{
|
126551 |
+
"epoch": 0.47611272056887016,
|
126552 |
+
"grad_norm": 1.6871570348739624,
|
126553 |
+
"learning_rate": 2.620753226231235e-05,
|
126554 |
+
"loss": 1.7547,
|
126555 |
+
"step": 18078
|
126556 |
+
},
|
126557 |
+
{
|
126558 |
+
"epoch": 0.4761390571503819,
|
126559 |
+
"grad_norm": 3.2810633182525635,
|
126560 |
+
"learning_rate": 2.620621543323677e-05,
|
126561 |
+
"loss": 1.3224,
|
126562 |
+
"step": 18079
|
126563 |
+
},
|
126564 |
+
{
|
126565 |
+
"epoch": 0.4761653937318936,
|
126566 |
+
"grad_norm": 2.3216912746429443,
|
126567 |
+
"learning_rate": 2.6204898604161183e-05,
|
126568 |
+
"loss": 0.2762,
|
126569 |
+
"step": 18080
|
126570 |
+
},
|
126571 |
+
{
|
126572 |
+
"epoch": 0.47619173031340534,
|
126573 |
+
"grad_norm": 2.0798046588897705,
|
126574 |
+
"learning_rate": 2.6203581775085595e-05,
|
126575 |
+
"loss": 1.8598,
|
126576 |
+
"step": 18081
|
126577 |
+
},
|
126578 |
+
{
|
126579 |
+
"epoch": 0.476218066894917,
|
126580 |
+
"grad_norm": 2.1319127082824707,
|
126581 |
+
"learning_rate": 2.6202264946010007e-05,
|
126582 |
+
"loss": 1.3377,
|
126583 |
+
"step": 18082
|
126584 |
+
},
|
126585 |
+
{
|
126586 |
+
"epoch": 0.47624440347642877,
|
126587 |
+
"grad_norm": 2.078587055206299,
|
126588 |
+
"learning_rate": 2.620094811693442e-05,
|
126589 |
+
"loss": 1.2751,
|
126590 |
+
"step": 18083
|
126591 |
+
},
|
126592 |
+
{
|
126593 |
+
"epoch": 0.47627074005794046,
|
126594 |
+
"grad_norm": 2.120863914489746,
|
126595 |
+
"learning_rate": 2.619963128785884e-05,
|
126596 |
+
"loss": 0.5739,
|
126597 |
+
"step": 18084
|
126598 |
+
},
|
126599 |
+
{
|
126600 |
+
"epoch": 0.4762970766394522,
|
126601 |
+
"grad_norm": 1.5160518884658813,
|
126602 |
+
"learning_rate": 2.619831445878325e-05,
|
126603 |
+
"loss": 1.5942,
|
126604 |
+
"step": 18085
|
126605 |
+
},
|
126606 |
+
{
|
126607 |
+
"epoch": 0.47632341322096394,
|
126608 |
+
"grad_norm": 2.545600175857544,
|
126609 |
+
"learning_rate": 2.6196997629707666e-05,
|
126610 |
+
"loss": 1.5817,
|
126611 |
+
"step": 18086
|
126612 |
+
},
|
126613 |
+
{
|
126614 |
+
"epoch": 0.47634974980247563,
|
126615 |
+
"grad_norm": 2.5309865474700928,
|
126616 |
+
"learning_rate": 2.619568080063208e-05,
|
126617 |
+
"loss": 1.8868,
|
126618 |
+
"step": 18087
|
126619 |
+
},
|
126620 |
+
{
|
126621 |
+
"epoch": 0.4763760863839874,
|
126622 |
+
"grad_norm": 3.169647216796875,
|
126623 |
+
"learning_rate": 2.6194363971556497e-05,
|
126624 |
+
"loss": 1.0168,
|
126625 |
+
"step": 18088
|
126626 |
+
},
|
126627 |
+
{
|
126628 |
+
"epoch": 0.47640242296549906,
|
126629 |
+
"grad_norm": 1.8885867595672607,
|
126630 |
+
"learning_rate": 2.619304714248091e-05,
|
126631 |
+
"loss": 1.8344,
|
126632 |
+
"step": 18089
|
126633 |
+
},
|
126634 |
+
{
|
126635 |
+
"epoch": 0.4764287595470108,
|
126636 |
+
"grad_norm": 1.8588078022003174,
|
126637 |
+
"learning_rate": 2.6191730313405322e-05,
|
126638 |
+
"loss": 1.9987,
|
126639 |
+
"step": 18090
|
126640 |
+
},
|
126641 |
+
{
|
126642 |
+
"epoch": 0.4764550961285225,
|
126643 |
+
"grad_norm": 2.69921875,
|
126644 |
+
"learning_rate": 2.6190413484329734e-05,
|
126645 |
+
"loss": 1.5042,
|
126646 |
+
"step": 18091
|
126647 |
+
},
|
126648 |
+
{
|
126649 |
+
"epoch": 0.47648143271003424,
|
126650 |
+
"grad_norm": 1.9179502725601196,
|
126651 |
+
"learning_rate": 2.6189096655254146e-05,
|
126652 |
+
"loss": 2.1202,
|
126653 |
+
"step": 18092
|
126654 |
+
},
|
126655 |
+
{
|
126656 |
+
"epoch": 0.476507769291546,
|
126657 |
+
"grad_norm": 1.971161127090454,
|
126658 |
+
"learning_rate": 2.6187779826178565e-05,
|
126659 |
+
"loss": 0.6615,
|
126660 |
+
"step": 18093
|
126661 |
+
},
|
126662 |
+
{
|
126663 |
+
"epoch": 0.47653410587305767,
|
126664 |
+
"grad_norm": 4.208078384399414,
|
126665 |
+
"learning_rate": 2.6186462997102977e-05,
|
126666 |
+
"loss": 1.2328,
|
126667 |
+
"step": 18094
|
126668 |
+
},
|
126669 |
+
{
|
126670 |
+
"epoch": 0.4765604424545694,
|
126671 |
+
"grad_norm": 2.099757432937622,
|
126672 |
+
"learning_rate": 2.618514616802739e-05,
|
126673 |
+
"loss": 2.2761,
|
126674 |
+
"step": 18095
|
126675 |
+
},
|
126676 |
+
{
|
126677 |
+
"epoch": 0.4765867790360811,
|
126678 |
+
"grad_norm": 1.5804299116134644,
|
126679 |
+
"learning_rate": 2.6183829338951805e-05,
|
126680 |
+
"loss": 1.5915,
|
126681 |
+
"step": 18096
|
126682 |
+
},
|
126683 |
+
{
|
126684 |
+
"epoch": 0.47661311561759284,
|
126685 |
+
"grad_norm": 2.426229953765869,
|
126686 |
+
"learning_rate": 2.6182512509876217e-05,
|
126687 |
+
"loss": 0.1391,
|
126688 |
+
"step": 18097
|
126689 |
+
},
|
126690 |
+
{
|
126691 |
+
"epoch": 0.47663945219910453,
|
126692 |
+
"grad_norm": 3.338651418685913,
|
126693 |
+
"learning_rate": 2.6181195680800636e-05,
|
126694 |
+
"loss": 0.873,
|
126695 |
+
"step": 18098
|
126696 |
+
},
|
126697 |
+
{
|
126698 |
+
"epoch": 0.4766657887806163,
|
126699 |
+
"grad_norm": 3.4374072551727295,
|
126700 |
+
"learning_rate": 2.617987885172505e-05,
|
126701 |
+
"loss": 1.6158,
|
126702 |
+
"step": 18099
|
126703 |
+
},
|
126704 |
+
{
|
126705 |
+
"epoch": 0.476692125362128,
|
126706 |
+
"grad_norm": 1.865473985671997,
|
126707 |
+
"learning_rate": 2.617856202264946e-05,
|
126708 |
+
"loss": 2.1476,
|
126709 |
+
"step": 18100
|
126710 |
+
},
|
126711 |
+
{
|
126712 |
+
"epoch": 0.4767184619436397,
|
126713 |
+
"grad_norm": 5.584752559661865,
|
126714 |
+
"learning_rate": 2.6177245193573873e-05,
|
126715 |
+
"loss": 0.7848,
|
126716 |
+
"step": 18101
|
126717 |
+
},
|
126718 |
+
{
|
126719 |
+
"epoch": 0.47674479852515145,
|
126720 |
+
"grad_norm": 2.1740102767944336,
|
126721 |
+
"learning_rate": 2.6175928364498285e-05,
|
126722 |
+
"loss": 1.6618,
|
126723 |
+
"step": 18102
|
126724 |
+
},
|
126725 |
+
{
|
126726 |
+
"epoch": 0.47677113510666314,
|
126727 |
+
"grad_norm": 1.813247561454773,
|
126728 |
+
"learning_rate": 2.6174611535422704e-05,
|
126729 |
+
"loss": 2.3947,
|
126730 |
+
"step": 18103
|
126731 |
+
},
|
126732 |
+
{
|
126733 |
+
"epoch": 0.4767974716881749,
|
126734 |
+
"grad_norm": 2.389674663543701,
|
126735 |
+
"learning_rate": 2.6173294706347116e-05,
|
126736 |
+
"loss": 0.7996,
|
126737 |
+
"step": 18104
|
126738 |
+
},
|
126739 |
+
{
|
126740 |
+
"epoch": 0.4768238082696866,
|
126741 |
+
"grad_norm": 2.842863082885742,
|
126742 |
+
"learning_rate": 2.6171977877271532e-05,
|
126743 |
+
"loss": 1.9356,
|
126744 |
+
"step": 18105
|
126745 |
+
},
|
126746 |
+
{
|
126747 |
+
"epoch": 0.4768501448511983,
|
126748 |
+
"grad_norm": 2.1585025787353516,
|
126749 |
+
"learning_rate": 2.6170661048195944e-05,
|
126750 |
+
"loss": 2.4486,
|
126751 |
+
"step": 18106
|
126752 |
+
},
|
126753 |
+
{
|
126754 |
+
"epoch": 0.47687648143271005,
|
126755 |
+
"grad_norm": 2.1451637744903564,
|
126756 |
+
"learning_rate": 2.6169344219120363e-05,
|
126757 |
+
"loss": 2.1151,
|
126758 |
+
"step": 18107
|
126759 |
+
},
|
126760 |
+
{
|
126761 |
+
"epoch": 0.47690281801422174,
|
126762 |
+
"grad_norm": 5.163798809051514,
|
126763 |
+
"learning_rate": 2.6168027390044775e-05,
|
126764 |
+
"loss": 0.9405,
|
126765 |
+
"step": 18108
|
126766 |
+
},
|
126767 |
+
{
|
126768 |
+
"epoch": 0.4769291545957335,
|
126769 |
+
"grad_norm": 1.7317043542861938,
|
126770 |
+
"learning_rate": 2.6166710560969188e-05,
|
126771 |
+
"loss": 2.1485,
|
126772 |
+
"step": 18109
|
126773 |
+
},
|
126774 |
+
{
|
126775 |
+
"epoch": 0.47695549117724517,
|
126776 |
+
"grad_norm": 1.8907835483551025,
|
126777 |
+
"learning_rate": 2.61653937318936e-05,
|
126778 |
+
"loss": 1.6107,
|
126779 |
+
"step": 18110
|
126780 |
+
},
|
126781 |
+
{
|
126782 |
+
"epoch": 0.4769818277587569,
|
126783 |
+
"grad_norm": 3.3649308681488037,
|
126784 |
+
"learning_rate": 2.6164076902818012e-05,
|
126785 |
+
"loss": 1.0804,
|
126786 |
+
"step": 18111
|
126787 |
+
},
|
126788 |
+
{
|
126789 |
+
"epoch": 0.47700816434026866,
|
126790 |
+
"grad_norm": 4.079046726226807,
|
126791 |
+
"learning_rate": 2.616276007374243e-05,
|
126792 |
+
"loss": 1.4524,
|
126793 |
+
"step": 18112
|
126794 |
+
},
|
126795 |
+
{
|
126796 |
+
"epoch": 0.47703450092178035,
|
126797 |
+
"grad_norm": 1.972220540046692,
|
126798 |
+
"learning_rate": 2.6161443244666843e-05,
|
126799 |
+
"loss": 2.1815,
|
126800 |
+
"step": 18113
|
126801 |
+
},
|
126802 |
+
{
|
126803 |
+
"epoch": 0.4770608375032921,
|
126804 |
+
"grad_norm": 1.8180415630340576,
|
126805 |
+
"learning_rate": 2.616012641559126e-05,
|
126806 |
+
"loss": 2.4495,
|
126807 |
+
"step": 18114
|
126808 |
+
},
|
126809 |
+
{
|
126810 |
+
"epoch": 0.4770871740848038,
|
126811 |
+
"grad_norm": 1.9662320613861084,
|
126812 |
+
"learning_rate": 2.615880958651567e-05,
|
126813 |
+
"loss": 1.647,
|
126814 |
+
"step": 18115
|
126815 |
+
},
|
126816 |
+
{
|
126817 |
+
"epoch": 0.4771135106663155,
|
126818 |
+
"grad_norm": 2.095525026321411,
|
126819 |
+
"learning_rate": 2.6157492757440083e-05,
|
126820 |
+
"loss": 1.5167,
|
126821 |
+
"step": 18116
|
126822 |
+
},
|
126823 |
+
{
|
126824 |
+
"epoch": 0.4771398472478272,
|
126825 |
+
"grad_norm": 2.066540479660034,
|
126826 |
+
"learning_rate": 2.6156175928364502e-05,
|
126827 |
+
"loss": 1.2558,
|
126828 |
+
"step": 18117
|
126829 |
+
},
|
126830 |
+
{
|
126831 |
+
"epoch": 0.47716618382933895,
|
126832 |
+
"grad_norm": 3.7202439308166504,
|
126833 |
+
"learning_rate": 2.6154859099288915e-05,
|
126834 |
+
"loss": 1.4575,
|
126835 |
+
"step": 18118
|
126836 |
+
},
|
126837 |
+
{
|
126838 |
+
"epoch": 0.4771925204108507,
|
126839 |
+
"grad_norm": 1.8115872144699097,
|
126840 |
+
"learning_rate": 2.6153542270213327e-05,
|
126841 |
+
"loss": 2.5709,
|
126842 |
+
"step": 18119
|
126843 |
+
},
|
126844 |
+
{
|
126845 |
+
"epoch": 0.4772188569923624,
|
126846 |
+
"grad_norm": 2.043302536010742,
|
126847 |
+
"learning_rate": 2.615222544113774e-05,
|
126848 |
+
"loss": 1.1423,
|
126849 |
+
"step": 18120
|
126850 |
+
},
|
126851 |
+
{
|
126852 |
+
"epoch": 0.4772451935738741,
|
126853 |
+
"grad_norm": 2.0807416439056396,
|
126854 |
+
"learning_rate": 2.6150908612062158e-05,
|
126855 |
+
"loss": 1.6837,
|
126856 |
+
"step": 18121
|
126857 |
+
},
|
126858 |
+
{
|
126859 |
+
"epoch": 0.4772715301553858,
|
126860 |
+
"grad_norm": 3.1593334674835205,
|
126861 |
+
"learning_rate": 2.614959178298657e-05,
|
126862 |
+
"loss": 1.8308,
|
126863 |
+
"step": 18122
|
126864 |
+
},
|
126865 |
+
{
|
126866 |
+
"epoch": 0.47729786673689756,
|
126867 |
+
"grad_norm": 3.4992871284484863,
|
126868 |
+
"learning_rate": 2.6148274953910982e-05,
|
126869 |
+
"loss": 2.1369,
|
126870 |
+
"step": 18123
|
126871 |
+
},
|
126872 |
+
{
|
126873 |
+
"epoch": 0.47732420331840925,
|
126874 |
+
"grad_norm": 1.5538591146469116,
|
126875 |
+
"learning_rate": 2.6146958124835398e-05,
|
126876 |
+
"loss": 2.5318,
|
126877 |
+
"step": 18124
|
126878 |
+
},
|
126879 |
+
{
|
126880 |
+
"epoch": 0.477350539899921,
|
126881 |
+
"grad_norm": 1.703216552734375,
|
126882 |
+
"learning_rate": 2.614564129575981e-05,
|
126883 |
+
"loss": 2.0921,
|
126884 |
+
"step": 18125
|
126885 |
+
},
|
126886 |
+
{
|
126887 |
+
"epoch": 0.47737687648143273,
|
126888 |
+
"grad_norm": 2.0972647666931152,
|
126889 |
+
"learning_rate": 2.614432446668423e-05,
|
126890 |
+
"loss": 2.5599,
|
126891 |
+
"step": 18126
|
126892 |
+
},
|
126893 |
+
{
|
126894 |
+
"epoch": 0.4774032130629444,
|
126895 |
+
"grad_norm": 4.844968795776367,
|
126896 |
+
"learning_rate": 2.614300763760864e-05,
|
126897 |
+
"loss": 1.8993,
|
126898 |
+
"step": 18127
|
126899 |
+
},
|
126900 |
+
{
|
126901 |
+
"epoch": 0.47742954964445616,
|
126902 |
+
"grad_norm": 1.9064234495162964,
|
126903 |
+
"learning_rate": 2.6141690808533054e-05,
|
126904 |
+
"loss": 1.6282,
|
126905 |
+
"step": 18128
|
126906 |
+
},
|
126907 |
+
{
|
126908 |
+
"epoch": 0.47745588622596785,
|
126909 |
+
"grad_norm": 3.336031198501587,
|
126910 |
+
"learning_rate": 2.6140373979457466e-05,
|
126911 |
+
"loss": 1.6488,
|
126912 |
+
"step": 18129
|
126913 |
+
},
|
126914 |
+
{
|
126915 |
+
"epoch": 0.4774822228074796,
|
126916 |
+
"grad_norm": 1.554504632949829,
|
126917 |
+
"learning_rate": 2.6139057150381878e-05,
|
126918 |
+
"loss": 0.3676,
|
126919 |
+
"step": 18130
|
126920 |
+
},
|
126921 |
+
{
|
126922 |
+
"epoch": 0.4775085593889913,
|
126923 |
+
"grad_norm": 1.4876564741134644,
|
126924 |
+
"learning_rate": 2.6137740321306297e-05,
|
126925 |
+
"loss": 1.8622,
|
126926 |
+
"step": 18131
|
126927 |
+
},
|
126928 |
+
{
|
126929 |
+
"epoch": 0.477534895970503,
|
126930 |
+
"grad_norm": 3.5113332271575928,
|
126931 |
+
"learning_rate": 2.613642349223071e-05,
|
126932 |
+
"loss": 1.1884,
|
126933 |
+
"step": 18132
|
126934 |
+
},
|
126935 |
+
{
|
126936 |
+
"epoch": 0.47756123255201477,
|
126937 |
+
"grad_norm": 2.460200786590576,
|
126938 |
+
"learning_rate": 2.6135106663155125e-05,
|
126939 |
+
"loss": 0.6754,
|
126940 |
+
"step": 18133
|
126941 |
+
},
|
126942 |
+
{
|
126943 |
+
"epoch": 0.47758756913352646,
|
126944 |
+
"grad_norm": 2.1989099979400635,
|
126945 |
+
"learning_rate": 2.6133789834079537e-05,
|
126946 |
+
"loss": 2.3714,
|
126947 |
+
"step": 18134
|
126948 |
+
},
|
126949 |
+
{
|
126950 |
+
"epoch": 0.4776139057150382,
|
126951 |
+
"grad_norm": 2.667492389678955,
|
126952 |
+
"learning_rate": 2.613247300500395e-05,
|
126953 |
+
"loss": 0.6263,
|
126954 |
+
"step": 18135
|
126955 |
+
},
|
126956 |
+
{
|
126957 |
+
"epoch": 0.4776402422965499,
|
126958 |
+
"grad_norm": 2.299617052078247,
|
126959 |
+
"learning_rate": 2.6131156175928368e-05,
|
126960 |
+
"loss": 2.8219,
|
126961 |
+
"step": 18136
|
126962 |
+
},
|
126963 |
+
{
|
126964 |
+
"epoch": 0.47766657887806163,
|
126965 |
+
"grad_norm": 1.612807035446167,
|
126966 |
+
"learning_rate": 2.612983934685278e-05,
|
126967 |
+
"loss": 0.2344,
|
126968 |
+
"step": 18137
|
126969 |
+
},
|
126970 |
+
{
|
126971 |
+
"epoch": 0.4776929154595734,
|
126972 |
+
"grad_norm": 1.7575221061706543,
|
126973 |
+
"learning_rate": 2.6128522517777193e-05,
|
126974 |
+
"loss": 1.6634,
|
126975 |
+
"step": 18138
|
126976 |
+
},
|
126977 |
+
{
|
126978 |
+
"epoch": 0.47771925204108506,
|
126979 |
+
"grad_norm": 4.1696457862854,
|
126980 |
+
"learning_rate": 2.6127205688701605e-05,
|
126981 |
+
"loss": 0.5157,
|
126982 |
+
"step": 18139
|
126983 |
+
},
|
126984 |
+
{
|
126985 |
+
"epoch": 0.4777455886225968,
|
126986 |
+
"grad_norm": 1.7531259059906006,
|
126987 |
+
"learning_rate": 2.6125888859626024e-05,
|
126988 |
+
"loss": 2.1103,
|
126989 |
+
"step": 18140
|
126990 |
+
},
|
126991 |
+
{
|
126992 |
+
"epoch": 0.4777719252041085,
|
126993 |
+
"grad_norm": 1.9951260089874268,
|
126994 |
+
"learning_rate": 2.6124572030550436e-05,
|
126995 |
+
"loss": 1.7099,
|
126996 |
+
"step": 18141
|
126997 |
+
},
|
126998 |
+
{
|
126999 |
+
"epoch": 0.47779826178562024,
|
127000 |
+
"grad_norm": 2.5088744163513184,
|
127001 |
+
"learning_rate": 2.612325520147485e-05,
|
127002 |
+
"loss": 1.277,
|
127003 |
+
"step": 18142
|
127004 |
+
},
|
127005 |
+
{
|
127006 |
+
"epoch": 0.4778245983671319,
|
127007 |
+
"grad_norm": 1.8694902658462524,
|
127008 |
+
"learning_rate": 2.6121938372399264e-05,
|
127009 |
+
"loss": 1.7016,
|
127010 |
+
"step": 18143
|
127011 |
+
},
|
127012 |
+
{
|
127013 |
+
"epoch": 0.47785093494864367,
|
127014 |
+
"grad_norm": 2.1027448177337646,
|
127015 |
+
"learning_rate": 2.6120621543323676e-05,
|
127016 |
+
"loss": 2.3466,
|
127017 |
+
"step": 18144
|
127018 |
+
},
|
127019 |
+
{
|
127020 |
+
"epoch": 0.4778772715301554,
|
127021 |
+
"grad_norm": 5.901076793670654,
|
127022 |
+
"learning_rate": 2.6119304714248095e-05,
|
127023 |
+
"loss": 1.5969,
|
127024 |
+
"step": 18145
|
127025 |
+
},
|
127026 |
+
{
|
127027 |
+
"epoch": 0.4779036081116671,
|
127028 |
+
"grad_norm": 1.9883469343185425,
|
127029 |
+
"learning_rate": 2.6117987885172507e-05,
|
127030 |
+
"loss": 1.7854,
|
127031 |
+
"step": 18146
|
127032 |
+
},
|
127033 |
+
{
|
127034 |
+
"epoch": 0.47792994469317884,
|
127035 |
+
"grad_norm": 2.4258086681365967,
|
127036 |
+
"learning_rate": 2.611667105609692e-05,
|
127037 |
+
"loss": 2.0893,
|
127038 |
+
"step": 18147
|
127039 |
+
},
|
127040 |
+
{
|
127041 |
+
"epoch": 0.47795628127469053,
|
127042 |
+
"grad_norm": 1.5321277379989624,
|
127043 |
+
"learning_rate": 2.611535422702133e-05,
|
127044 |
+
"loss": 2.0222,
|
127045 |
+
"step": 18148
|
127046 |
+
},
|
127047 |
+
{
|
127048 |
+
"epoch": 0.4779826178562023,
|
127049 |
+
"grad_norm": 2.2107131481170654,
|
127050 |
+
"learning_rate": 2.6114037397945744e-05,
|
127051 |
+
"loss": 0.276,
|
127052 |
+
"step": 18149
|
127053 |
+
},
|
127054 |
+
{
|
127055 |
+
"epoch": 0.47800895443771396,
|
127056 |
+
"grad_norm": 2.2959635257720947,
|
127057 |
+
"learning_rate": 2.6112720568870163e-05,
|
127058 |
+
"loss": 2.1733,
|
127059 |
+
"step": 18150
|
127060 |
+
},
|
127061 |
+
{
|
127062 |
+
"epoch": 0.4780352910192257,
|
127063 |
+
"grad_norm": 2.7273948192596436,
|
127064 |
+
"learning_rate": 2.6111403739794575e-05,
|
127065 |
+
"loss": 2.3528,
|
127066 |
+
"step": 18151
|
127067 |
+
},
|
127068 |
+
{
|
127069 |
+
"epoch": 0.47806162760073745,
|
127070 |
+
"grad_norm": 1.675957441329956,
|
127071 |
+
"learning_rate": 2.611008691071899e-05,
|
127072 |
+
"loss": 1.7183,
|
127073 |
+
"step": 18152
|
127074 |
+
},
|
127075 |
+
{
|
127076 |
+
"epoch": 0.47808796418224914,
|
127077 |
+
"grad_norm": 2.2190258502960205,
|
127078 |
+
"learning_rate": 2.6108770081643403e-05,
|
127079 |
+
"loss": 1.826,
|
127080 |
+
"step": 18153
|
127081 |
+
},
|
127082 |
+
{
|
127083 |
+
"epoch": 0.4781143007637609,
|
127084 |
+
"grad_norm": 3.512843370437622,
|
127085 |
+
"learning_rate": 2.6107453252567822e-05,
|
127086 |
+
"loss": 2.0663,
|
127087 |
+
"step": 18154
|
127088 |
+
},
|
127089 |
+
{
|
127090 |
+
"epoch": 0.47814063734527257,
|
127091 |
+
"grad_norm": 1.5189660787582397,
|
127092 |
+
"learning_rate": 2.6106136423492234e-05,
|
127093 |
+
"loss": 2.2426,
|
127094 |
+
"step": 18155
|
127095 |
+
},
|
127096 |
+
{
|
127097 |
+
"epoch": 0.4781669739267843,
|
127098 |
+
"grad_norm": 5.404087543487549,
|
127099 |
+
"learning_rate": 2.6104819594416646e-05,
|
127100 |
+
"loss": 2.1597,
|
127101 |
+
"step": 18156
|
127102 |
+
},
|
127103 |
+
{
|
127104 |
+
"epoch": 0.478193310508296,
|
127105 |
+
"grad_norm": 2.591707229614258,
|
127106 |
+
"learning_rate": 2.610350276534106e-05,
|
127107 |
+
"loss": 2.4166,
|
127108 |
+
"step": 18157
|
127109 |
+
},
|
127110 |
+
{
|
127111 |
+
"epoch": 0.47821964708980774,
|
127112 |
+
"grad_norm": 4.782519817352295,
|
127113 |
+
"learning_rate": 2.610218593626547e-05,
|
127114 |
+
"loss": 1.4625,
|
127115 |
+
"step": 18158
|
127116 |
+
},
|
127117 |
+
{
|
127118 |
+
"epoch": 0.4782459836713195,
|
127119 |
+
"grad_norm": 2.67582106590271,
|
127120 |
+
"learning_rate": 2.610086910718989e-05,
|
127121 |
+
"loss": 1.7045,
|
127122 |
+
"step": 18159
|
127123 |
+
},
|
127124 |
+
{
|
127125 |
+
"epoch": 0.4782723202528312,
|
127126 |
+
"grad_norm": 1.6655147075653076,
|
127127 |
+
"learning_rate": 2.6099552278114302e-05,
|
127128 |
+
"loss": 1.421,
|
127129 |
+
"step": 18160
|
127130 |
+
},
|
127131 |
+
{
|
127132 |
+
"epoch": 0.4782986568343429,
|
127133 |
+
"grad_norm": 3.701991081237793,
|
127134 |
+
"learning_rate": 2.6098235449038717e-05,
|
127135 |
+
"loss": 1.9044,
|
127136 |
+
"step": 18161
|
127137 |
+
},
|
127138 |
+
{
|
127139 |
+
"epoch": 0.4783249934158546,
|
127140 |
+
"grad_norm": 1.59872567653656,
|
127141 |
+
"learning_rate": 2.609691861996313e-05,
|
127142 |
+
"loss": 2.3976,
|
127143 |
+
"step": 18162
|
127144 |
+
},
|
127145 |
+
{
|
127146 |
+
"epoch": 0.47835132999736635,
|
127147 |
+
"grad_norm": 1.5172570943832397,
|
127148 |
+
"learning_rate": 2.6095601790887542e-05,
|
127149 |
+
"loss": 1.5677,
|
127150 |
+
"step": 18163
|
127151 |
+
},
|
127152 |
+
{
|
127153 |
+
"epoch": 0.47837766657887804,
|
127154 |
+
"grad_norm": 2.137535333633423,
|
127155 |
+
"learning_rate": 2.609428496181196e-05,
|
127156 |
+
"loss": 1.8379,
|
127157 |
+
"step": 18164
|
127158 |
+
},
|
127159 |
+
{
|
127160 |
+
"epoch": 0.4784040031603898,
|
127161 |
+
"grad_norm": 5.482143402099609,
|
127162 |
+
"learning_rate": 2.6092968132736373e-05,
|
127163 |
+
"loss": 1.0263,
|
127164 |
+
"step": 18165
|
127165 |
+
},
|
127166 |
+
{
|
127167 |
+
"epoch": 0.4784303397419015,
|
127168 |
+
"grad_norm": 1.9915279150009155,
|
127169 |
+
"learning_rate": 2.6091651303660785e-05,
|
127170 |
+
"loss": 1.9052,
|
127171 |
+
"step": 18166
|
127172 |
+
},
|
127173 |
+
{
|
127174 |
+
"epoch": 0.4784566763234132,
|
127175 |
+
"grad_norm": 2.3375186920166016,
|
127176 |
+
"learning_rate": 2.6090334474585197e-05,
|
127177 |
+
"loss": 0.7127,
|
127178 |
+
"step": 18167
|
127179 |
+
},
|
127180 |
+
{
|
127181 |
+
"epoch": 0.47848301290492495,
|
127182 |
+
"grad_norm": 1.8854601383209229,
|
127183 |
+
"learning_rate": 2.608901764550961e-05,
|
127184 |
+
"loss": 2.0186,
|
127185 |
+
"step": 18168
|
127186 |
+
},
|
127187 |
+
{
|
127188 |
+
"epoch": 0.47850934948643664,
|
127189 |
+
"grad_norm": 1.6205345392227173,
|
127190 |
+
"learning_rate": 2.608770081643403e-05,
|
127191 |
+
"loss": 2.5091,
|
127192 |
+
"step": 18169
|
127193 |
+
},
|
127194 |
+
{
|
127195 |
+
"epoch": 0.4785356860679484,
|
127196 |
+
"grad_norm": 1.597391128540039,
|
127197 |
+
"learning_rate": 2.608638398735844e-05,
|
127198 |
+
"loss": 1.7359,
|
127199 |
+
"step": 18170
|
127200 |
+
},
|
127201 |
+
{
|
127202 |
+
"epoch": 0.4785620226494601,
|
127203 |
+
"grad_norm": 3.3700568675994873,
|
127204 |
+
"learning_rate": 2.6085067158282856e-05,
|
127205 |
+
"loss": 1.5659,
|
127206 |
+
"step": 18171
|
127207 |
+
},
|
127208 |
+
{
|
127209 |
+
"epoch": 0.4785883592309718,
|
127210 |
+
"grad_norm": 1.645609974861145,
|
127211 |
+
"learning_rate": 2.608375032920727e-05,
|
127212 |
+
"loss": 1.4701,
|
127213 |
+
"step": 18172
|
127214 |
+
},
|
127215 |
+
{
|
127216 |
+
"epoch": 0.47861469581248356,
|
127217 |
+
"grad_norm": 2.141824245452881,
|
127218 |
+
"learning_rate": 2.6082433500131688e-05,
|
127219 |
+
"loss": 2.4564,
|
127220 |
+
"step": 18173
|
127221 |
+
},
|
127222 |
+
{
|
127223 |
+
"epoch": 0.47864103239399525,
|
127224 |
+
"grad_norm": 1.818982481956482,
|
127225 |
+
"learning_rate": 2.60811166710561e-05,
|
127226 |
+
"loss": 1.02,
|
127227 |
+
"step": 18174
|
127228 |
+
},
|
127229 |
+
{
|
127230 |
+
"epoch": 0.478667368975507,
|
127231 |
+
"grad_norm": 1.7090140581130981,
|
127232 |
+
"learning_rate": 2.6079799841980512e-05,
|
127233 |
+
"loss": 1.7205,
|
127234 |
+
"step": 18175
|
127235 |
+
},
|
127236 |
+
{
|
127237 |
+
"epoch": 0.4786937055570187,
|
127238 |
+
"grad_norm": 2.0209977626800537,
|
127239 |
+
"learning_rate": 2.6078483012904924e-05,
|
127240 |
+
"loss": 1.3472,
|
127241 |
+
"step": 18176
|
127242 |
+
},
|
127243 |
+
{
|
127244 |
+
"epoch": 0.4787200421385304,
|
127245 |
+
"grad_norm": 1.909293532371521,
|
127246 |
+
"learning_rate": 2.6077166183829337e-05,
|
127247 |
+
"loss": 1.2932,
|
127248 |
+
"step": 18177
|
127249 |
+
},
|
127250 |
+
{
|
127251 |
+
"epoch": 0.47874637872004216,
|
127252 |
+
"grad_norm": 2.426377773284912,
|
127253 |
+
"learning_rate": 2.6075849354753756e-05,
|
127254 |
+
"loss": 1.3127,
|
127255 |
+
"step": 18178
|
127256 |
+
},
|
127257 |
+
{
|
127258 |
+
"epoch": 0.47877271530155385,
|
127259 |
+
"grad_norm": 5.718318939208984,
|
127260 |
+
"learning_rate": 2.6074532525678168e-05,
|
127261 |
+
"loss": 2.0245,
|
127262 |
+
"step": 18179
|
127263 |
+
},
|
127264 |
+
{
|
127265 |
+
"epoch": 0.4787990518830656,
|
127266 |
+
"grad_norm": 3.2824246883392334,
|
127267 |
+
"learning_rate": 2.6073215696602583e-05,
|
127268 |
+
"loss": 1.6523,
|
127269 |
+
"step": 18180
|
127270 |
+
},
|
127271 |
+
{
|
127272 |
+
"epoch": 0.4788253884645773,
|
127273 |
+
"grad_norm": 2.88808274269104,
|
127274 |
+
"learning_rate": 2.6071898867526996e-05,
|
127275 |
+
"loss": 1.4564,
|
127276 |
+
"step": 18181
|
127277 |
+
},
|
127278 |
+
{
|
127279 |
+
"epoch": 0.478851725046089,
|
127280 |
+
"grad_norm": 1.67848539352417,
|
127281 |
+
"learning_rate": 2.6070582038451408e-05,
|
127282 |
+
"loss": 1.81,
|
127283 |
+
"step": 18182
|
127284 |
+
},
|
127285 |
+
{
|
127286 |
+
"epoch": 0.4788780616276007,
|
127287 |
+
"grad_norm": 6.433162689208984,
|
127288 |
+
"learning_rate": 2.6069265209375827e-05,
|
127289 |
+
"loss": 1.011,
|
127290 |
+
"step": 18183
|
127291 |
+
},
|
127292 |
+
{
|
127293 |
+
"epoch": 0.47890439820911246,
|
127294 |
+
"grad_norm": 2.4767961502075195,
|
127295 |
+
"learning_rate": 2.606794838030024e-05,
|
127296 |
+
"loss": 1.3045,
|
127297 |
+
"step": 18184
|
127298 |
+
},
|
127299 |
+
{
|
127300 |
+
"epoch": 0.4789307347906242,
|
127301 |
+
"grad_norm": 5.748504638671875,
|
127302 |
+
"learning_rate": 2.606663155122465e-05,
|
127303 |
+
"loss": 1.2878,
|
127304 |
+
"step": 18185
|
127305 |
+
},
|
127306 |
+
{
|
127307 |
+
"epoch": 0.4789570713721359,
|
127308 |
+
"grad_norm": 1.5263091325759888,
|
127309 |
+
"learning_rate": 2.6065314722149063e-05,
|
127310 |
+
"loss": 1.87,
|
127311 |
+
"step": 18186
|
127312 |
+
},
|
127313 |
+
{
|
127314 |
+
"epoch": 0.47898340795364763,
|
127315 |
+
"grad_norm": 1.9353070259094238,
|
127316 |
+
"learning_rate": 2.6063997893073482e-05,
|
127317 |
+
"loss": 2.8552,
|
127318 |
+
"step": 18187
|
127319 |
+
},
|
127320 |
+
{
|
127321 |
+
"epoch": 0.4790097445351593,
|
127322 |
+
"grad_norm": 1.4300810098648071,
|
127323 |
+
"learning_rate": 2.6062681063997895e-05,
|
127324 |
+
"loss": 2.3514,
|
127325 |
+
"step": 18188
|
127326 |
+
},
|
127327 |
+
{
|
127328 |
+
"epoch": 0.47903608111667106,
|
127329 |
+
"grad_norm": 1.5277667045593262,
|
127330 |
+
"learning_rate": 2.606136423492231e-05,
|
127331 |
+
"loss": 1.7833,
|
127332 |
+
"step": 18189
|
127333 |
+
},
|
127334 |
+
{
|
127335 |
+
"epoch": 0.47906241769818275,
|
127336 |
+
"grad_norm": 1.8952159881591797,
|
127337 |
+
"learning_rate": 2.6060047405846722e-05,
|
127338 |
+
"loss": 1.7228,
|
127339 |
+
"step": 18190
|
127340 |
+
},
|
127341 |
+
{
|
127342 |
+
"epoch": 0.4790887542796945,
|
127343 |
+
"grad_norm": 1.6774052381515503,
|
127344 |
+
"learning_rate": 2.6058730576771135e-05,
|
127345 |
+
"loss": 1.7671,
|
127346 |
+
"step": 18191
|
127347 |
+
},
|
127348 |
+
{
|
127349 |
+
"epoch": 0.47911509086120624,
|
127350 |
+
"grad_norm": 2.0082924365997314,
|
127351 |
+
"learning_rate": 2.6057413747695554e-05,
|
127352 |
+
"loss": 1.6773,
|
127353 |
+
"step": 18192
|
127354 |
+
},
|
127355 |
+
{
|
127356 |
+
"epoch": 0.4791414274427179,
|
127357 |
+
"grad_norm": 4.2807297706604,
|
127358 |
+
"learning_rate": 2.6056096918619966e-05,
|
127359 |
+
"loss": 0.799,
|
127360 |
+
"step": 18193
|
127361 |
+
},
|
127362 |
+
{
|
127363 |
+
"epoch": 0.47916776402422967,
|
127364 |
+
"grad_norm": 2.550044059753418,
|
127365 |
+
"learning_rate": 2.6054780089544378e-05,
|
127366 |
+
"loss": 2.0308,
|
127367 |
+
"step": 18194
|
127368 |
+
},
|
127369 |
+
{
|
127370 |
+
"epoch": 0.47919410060574136,
|
127371 |
+
"grad_norm": 1.816925287246704,
|
127372 |
+
"learning_rate": 2.605346326046879e-05,
|
127373 |
+
"loss": 1.781,
|
127374 |
+
"step": 18195
|
127375 |
+
},
|
127376 |
+
{
|
127377 |
+
"epoch": 0.4792204371872531,
|
127378 |
+
"grad_norm": 1.9360864162445068,
|
127379 |
+
"learning_rate": 2.6052146431393202e-05,
|
127380 |
+
"loss": 2.1195,
|
127381 |
+
"step": 18196
|
127382 |
+
},
|
127383 |
+
{
|
127384 |
+
"epoch": 0.4792467737687648,
|
127385 |
+
"grad_norm": 2.2189080715179443,
|
127386 |
+
"learning_rate": 2.605082960231762e-05,
|
127387 |
+
"loss": 1.9188,
|
127388 |
+
"step": 18197
|
127389 |
+
},
|
127390 |
+
{
|
127391 |
+
"epoch": 0.47927311035027653,
|
127392 |
+
"grad_norm": 1.4427847862243652,
|
127393 |
+
"learning_rate": 2.6049512773242034e-05,
|
127394 |
+
"loss": 1.5472,
|
127395 |
+
"step": 18198
|
127396 |
+
},
|
127397 |
+
{
|
127398 |
+
"epoch": 0.4792994469317883,
|
127399 |
+
"grad_norm": 2.8791027069091797,
|
127400 |
+
"learning_rate": 2.604819594416645e-05,
|
127401 |
+
"loss": 1.3314,
|
127402 |
+
"step": 18199
|
127403 |
+
},
|
127404 |
+
{
|
127405 |
+
"epoch": 0.47932578351329996,
|
127406 |
+
"grad_norm": 5.540414333343506,
|
127407 |
+
"learning_rate": 2.604687911509086e-05,
|
127408 |
+
"loss": 2.2421,
|
127409 |
+
"step": 18200
|
127410 |
+
},
|
127411 |
+
{
|
127412 |
+
"epoch": 0.4793521200948117,
|
127413 |
+
"grad_norm": 5.057022571563721,
|
127414 |
+
"learning_rate": 2.6045562286015274e-05,
|
127415 |
+
"loss": 0.7574,
|
127416 |
+
"step": 18201
|
127417 |
+
},
|
127418 |
+
{
|
127419 |
+
"epoch": 0.4793784566763234,
|
127420 |
+
"grad_norm": 2.3301544189453125,
|
127421 |
+
"learning_rate": 2.6044245456939693e-05,
|
127422 |
+
"loss": 1.0735,
|
127423 |
+
"step": 18202
|
127424 |
+
},
|
127425 |
+
{
|
127426 |
+
"epoch": 0.47940479325783514,
|
127427 |
+
"grad_norm": 1.5644989013671875,
|
127428 |
+
"learning_rate": 2.6042928627864105e-05,
|
127429 |
+
"loss": 1.4331,
|
127430 |
+
"step": 18203
|
127431 |
+
},
|
127432 |
+
{
|
127433 |
+
"epoch": 0.4794311298393468,
|
127434 |
+
"grad_norm": 3.384436845779419,
|
127435 |
+
"learning_rate": 2.6041611798788517e-05,
|
127436 |
+
"loss": 0.5878,
|
127437 |
+
"step": 18204
|
127438 |
+
},
|
127439 |
+
{
|
127440 |
+
"epoch": 0.47945746642085857,
|
127441 |
+
"grad_norm": 4.111256122589111,
|
127442 |
+
"learning_rate": 2.604029496971293e-05,
|
127443 |
+
"loss": 2.0897,
|
127444 |
+
"step": 18205
|
127445 |
+
},
|
127446 |
+
{
|
127447 |
+
"epoch": 0.4794838030023703,
|
127448 |
+
"grad_norm": 2.0982885360717773,
|
127449 |
+
"learning_rate": 2.6038978140637348e-05,
|
127450 |
+
"loss": 1.9615,
|
127451 |
+
"step": 18206
|
127452 |
+
},
|
127453 |
+
{
|
127454 |
+
"epoch": 0.479510139583882,
|
127455 |
+
"grad_norm": 3.7931666374206543,
|
127456 |
+
"learning_rate": 2.603766131156176e-05,
|
127457 |
+
"loss": 0.831,
|
127458 |
+
"step": 18207
|
127459 |
+
},
|
127460 |
+
{
|
127461 |
+
"epoch": 0.47953647616539374,
|
127462 |
+
"grad_norm": 4.855052947998047,
|
127463 |
+
"learning_rate": 2.6036344482486176e-05,
|
127464 |
+
"loss": 0.9936,
|
127465 |
+
"step": 18208
|
127466 |
+
},
|
127467 |
+
{
|
127468 |
+
"epoch": 0.47956281274690543,
|
127469 |
+
"grad_norm": 1.6595323085784912,
|
127470 |
+
"learning_rate": 2.6035027653410588e-05,
|
127471 |
+
"loss": 1.7072,
|
127472 |
+
"step": 18209
|
127473 |
+
},
|
127474 |
+
{
|
127475 |
+
"epoch": 0.4795891493284172,
|
127476 |
+
"grad_norm": 1.57021963596344,
|
127477 |
+
"learning_rate": 2.6033710824335e-05,
|
127478 |
+
"loss": 1.811,
|
127479 |
+
"step": 18210
|
127480 |
+
},
|
127481 |
+
{
|
127482 |
+
"epoch": 0.4796154859099289,
|
127483 |
+
"grad_norm": 1.3585107326507568,
|
127484 |
+
"learning_rate": 2.603239399525942e-05,
|
127485 |
+
"loss": 1.5773,
|
127486 |
+
"step": 18211
|
127487 |
+
},
|
127488 |
+
{
|
127489 |
+
"epoch": 0.4796418224914406,
|
127490 |
+
"grad_norm": 1.8488690853118896,
|
127491 |
+
"learning_rate": 2.603107716618383e-05,
|
127492 |
+
"loss": 1.7188,
|
127493 |
+
"step": 18212
|
127494 |
+
},
|
127495 |
+
{
|
127496 |
+
"epoch": 0.47966815907295235,
|
127497 |
+
"grad_norm": 3.2584662437438965,
|
127498 |
+
"learning_rate": 2.6029760337108244e-05,
|
127499 |
+
"loss": 1.1955,
|
127500 |
+
"step": 18213
|
127501 |
+
},
|
127502 |
+
{
|
127503 |
+
"epoch": 0.47969449565446404,
|
127504 |
+
"grad_norm": 1.6268631219863892,
|
127505 |
+
"learning_rate": 2.6028443508032656e-05,
|
127506 |
+
"loss": 1.4492,
|
127507 |
+
"step": 18214
|
127508 |
+
},
|
127509 |
+
{
|
127510 |
+
"epoch": 0.4797208322359758,
|
127511 |
+
"grad_norm": 1.6208415031433105,
|
127512 |
+
"learning_rate": 2.602712667895707e-05,
|
127513 |
+
"loss": 1.8537,
|
127514 |
+
"step": 18215
|
127515 |
+
},
|
127516 |
+
{
|
127517 |
+
"epoch": 0.47974716881748747,
|
127518 |
+
"grad_norm": 1.8901538848876953,
|
127519 |
+
"learning_rate": 2.6025809849881487e-05,
|
127520 |
+
"loss": 2.321,
|
127521 |
+
"step": 18216
|
127522 |
+
},
|
127523 |
+
{
|
127524 |
+
"epoch": 0.4797735053989992,
|
127525 |
+
"grad_norm": 1.631158471107483,
|
127526 |
+
"learning_rate": 2.6024493020805903e-05,
|
127527 |
+
"loss": 0.445,
|
127528 |
+
"step": 18217
|
127529 |
+
},
|
127530 |
+
{
|
127531 |
+
"epoch": 0.47979984198051095,
|
127532 |
+
"grad_norm": 2.484971523284912,
|
127533 |
+
"learning_rate": 2.6023176191730315e-05,
|
127534 |
+
"loss": 2.1254,
|
127535 |
+
"step": 18218
|
127536 |
+
},
|
127537 |
+
{
|
127538 |
+
"epoch": 0.47982617856202264,
|
127539 |
+
"grad_norm": 1.7783170938491821,
|
127540 |
+
"learning_rate": 2.6021859362654727e-05,
|
127541 |
+
"loss": 1.9419,
|
127542 |
+
"step": 18219
|
127543 |
+
},
|
127544 |
+
{
|
127545 |
+
"epoch": 0.4798525151435344,
|
127546 |
+
"grad_norm": 2.196690082550049,
|
127547 |
+
"learning_rate": 2.6020542533579146e-05,
|
127548 |
+
"loss": 2.1618,
|
127549 |
+
"step": 18220
|
127550 |
+
},
|
127551 |
+
{
|
127552 |
+
"epoch": 0.4798788517250461,
|
127553 |
+
"grad_norm": 1.8459336757659912,
|
127554 |
+
"learning_rate": 2.601922570450356e-05,
|
127555 |
+
"loss": 1.5345,
|
127556 |
+
"step": 18221
|
127557 |
+
},
|
127558 |
+
{
|
127559 |
+
"epoch": 0.4799051883065578,
|
127560 |
+
"grad_norm": 2.015507936477661,
|
127561 |
+
"learning_rate": 2.601790887542797e-05,
|
127562 |
+
"loss": 1.4419,
|
127563 |
+
"step": 18222
|
127564 |
+
},
|
127565 |
+
{
|
127566 |
+
"epoch": 0.4799315248880695,
|
127567 |
+
"grad_norm": 1.6010900735855103,
|
127568 |
+
"learning_rate": 2.6016592046352383e-05,
|
127569 |
+
"loss": 1.935,
|
127570 |
+
"step": 18223
|
127571 |
+
},
|
127572 |
+
{
|
127573 |
+
"epoch": 0.47995786146958125,
|
127574 |
+
"grad_norm": 1.7097499370574951,
|
127575 |
+
"learning_rate": 2.6015275217276795e-05,
|
127576 |
+
"loss": 1.0837,
|
127577 |
+
"step": 18224
|
127578 |
+
},
|
127579 |
+
{
|
127580 |
+
"epoch": 0.479984198051093,
|
127581 |
+
"grad_norm": 3.9856553077697754,
|
127582 |
+
"learning_rate": 2.6013958388201214e-05,
|
127583 |
+
"loss": 2.4888,
|
127584 |
+
"step": 18225
|
127585 |
+
},
|
127586 |
+
{
|
127587 |
+
"epoch": 0.4800105346326047,
|
127588 |
+
"grad_norm": 3.3391363620758057,
|
127589 |
+
"learning_rate": 2.6012641559125626e-05,
|
127590 |
+
"loss": 1.6933,
|
127591 |
+
"step": 18226
|
127592 |
+
},
|
127593 |
+
{
|
127594 |
+
"epoch": 0.4800368712141164,
|
127595 |
+
"grad_norm": 2.165415048599243,
|
127596 |
+
"learning_rate": 2.6011324730050042e-05,
|
127597 |
+
"loss": 2.046,
|
127598 |
+
"step": 18227
|
127599 |
+
},
|
127600 |
+
{
|
127601 |
+
"epoch": 0.4800632077956281,
|
127602 |
+
"grad_norm": 3.911407232284546,
|
127603 |
+
"learning_rate": 2.6010007900974454e-05,
|
127604 |
+
"loss": 0.6511,
|
127605 |
+
"step": 18228
|
127606 |
+
},
|
127607 |
+
{
|
127608 |
+
"epoch": 0.48008954437713985,
|
127609 |
+
"grad_norm": 1.8012775182724,
|
127610 |
+
"learning_rate": 2.6008691071898866e-05,
|
127611 |
+
"loss": 1.3854,
|
127612 |
+
"step": 18229
|
127613 |
+
},
|
127614 |
+
{
|
127615 |
+
"epoch": 0.48011588095865154,
|
127616 |
+
"grad_norm": 2.567828416824341,
|
127617 |
+
"learning_rate": 2.6007374242823285e-05,
|
127618 |
+
"loss": 0.7743,
|
127619 |
+
"step": 18230
|
127620 |
+
},
|
127621 |
+
{
|
127622 |
+
"epoch": 0.4801422175401633,
|
127623 |
+
"grad_norm": 2.0181589126586914,
|
127624 |
+
"learning_rate": 2.6006057413747697e-05,
|
127625 |
+
"loss": 2.3429,
|
127626 |
+
"step": 18231
|
127627 |
+
},
|
127628 |
+
{
|
127629 |
+
"epoch": 0.48016855412167503,
|
127630 |
+
"grad_norm": 1.8341394662857056,
|
127631 |
+
"learning_rate": 2.600474058467211e-05,
|
127632 |
+
"loss": 1.6094,
|
127633 |
+
"step": 18232
|
127634 |
+
},
|
127635 |
+
{
|
127636 |
+
"epoch": 0.4801948907031867,
|
127637 |
+
"grad_norm": 3.8895981311798096,
|
127638 |
+
"learning_rate": 2.6003423755596522e-05,
|
127639 |
+
"loss": 1.0459,
|
127640 |
+
"step": 18233
|
127641 |
+
},
|
127642 |
+
{
|
127643 |
+
"epoch": 0.48022122728469846,
|
127644 |
+
"grad_norm": 2.5247411727905273,
|
127645 |
+
"learning_rate": 2.600210692652094e-05,
|
127646 |
+
"loss": 1.2766,
|
127647 |
+
"step": 18234
|
127648 |
+
},
|
127649 |
+
{
|
127650 |
+
"epoch": 0.48024756386621015,
|
127651 |
+
"grad_norm": 1.8207776546478271,
|
127652 |
+
"learning_rate": 2.6000790097445353e-05,
|
127653 |
+
"loss": 2.0494,
|
127654 |
+
"step": 18235
|
127655 |
+
},
|
127656 |
+
{
|
127657 |
+
"epoch": 0.4802739004477219,
|
127658 |
+
"grad_norm": 2.1421167850494385,
|
127659 |
+
"learning_rate": 2.599947326836977e-05,
|
127660 |
+
"loss": 2.0554,
|
127661 |
+
"step": 18236
|
127662 |
+
},
|
127663 |
+
{
|
127664 |
+
"epoch": 0.4803002370292336,
|
127665 |
+
"grad_norm": 1.9610754251480103,
|
127666 |
+
"learning_rate": 2.599815643929418e-05,
|
127667 |
+
"loss": 1.6792,
|
127668 |
+
"step": 18237
|
127669 |
+
},
|
127670 |
+
{
|
127671 |
+
"epoch": 0.4803265736107453,
|
127672 |
+
"grad_norm": 2.152301073074341,
|
127673 |
+
"learning_rate": 2.5996839610218593e-05,
|
127674 |
+
"loss": 1.2206,
|
127675 |
+
"step": 18238
|
127676 |
+
},
|
127677 |
+
{
|
127678 |
+
"epoch": 0.48035291019225707,
|
127679 |
+
"grad_norm": 1.690805435180664,
|
127680 |
+
"learning_rate": 2.5995522781143012e-05,
|
127681 |
+
"loss": 2.1232,
|
127682 |
+
"step": 18239
|
127683 |
+
},
|
127684 |
+
{
|
127685 |
+
"epoch": 0.48037924677376875,
|
127686 |
+
"grad_norm": 1.7222952842712402,
|
127687 |
+
"learning_rate": 2.5994205952067424e-05,
|
127688 |
+
"loss": 0.591,
|
127689 |
+
"step": 18240
|
127690 |
+
},
|
127691 |
+
{
|
127692 |
+
"epoch": 0.4804055833552805,
|
127693 |
+
"grad_norm": 1.9552398920059204,
|
127694 |
+
"learning_rate": 2.5992889122991837e-05,
|
127695 |
+
"loss": 1.0758,
|
127696 |
+
"step": 18241
|
127697 |
+
},
|
127698 |
+
{
|
127699 |
+
"epoch": 0.4804319199367922,
|
127700 |
+
"grad_norm": 2.508448839187622,
|
127701 |
+
"learning_rate": 2.599157229391625e-05,
|
127702 |
+
"loss": 1.3292,
|
127703 |
+
"step": 18242
|
127704 |
+
},
|
127705 |
+
{
|
127706 |
+
"epoch": 0.48045825651830393,
|
127707 |
+
"grad_norm": 2.046084403991699,
|
127708 |
+
"learning_rate": 2.5990255464840664e-05,
|
127709 |
+
"loss": 1.4425,
|
127710 |
+
"step": 18243
|
127711 |
+
},
|
127712 |
+
{
|
127713 |
+
"epoch": 0.48048459309981567,
|
127714 |
+
"grad_norm": 1.7621982097625732,
|
127715 |
+
"learning_rate": 2.598893863576508e-05,
|
127716 |
+
"loss": 1.9327,
|
127717 |
+
"step": 18244
|
127718 |
+
},
|
127719 |
+
{
|
127720 |
+
"epoch": 0.48051092968132736,
|
127721 |
+
"grad_norm": 3.080350399017334,
|
127722 |
+
"learning_rate": 2.5987621806689496e-05,
|
127723 |
+
"loss": 0.7348,
|
127724 |
+
"step": 18245
|
127725 |
+
},
|
127726 |
+
{
|
127727 |
+
"epoch": 0.4805372662628391,
|
127728 |
+
"grad_norm": 2.543154001235962,
|
127729 |
+
"learning_rate": 2.5986304977613908e-05,
|
127730 |
+
"loss": 1.0225,
|
127731 |
+
"step": 18246
|
127732 |
+
},
|
127733 |
+
{
|
127734 |
+
"epoch": 0.4805636028443508,
|
127735 |
+
"grad_norm": 2.1923797130584717,
|
127736 |
+
"learning_rate": 2.598498814853832e-05,
|
127737 |
+
"loss": 2.7072,
|
127738 |
+
"step": 18247
|
127739 |
+
},
|
127740 |
+
{
|
127741 |
+
"epoch": 0.48058993942586253,
|
127742 |
+
"grad_norm": 1.423647403717041,
|
127743 |
+
"learning_rate": 2.5983671319462732e-05,
|
127744 |
+
"loss": 1.8857,
|
127745 |
+
"step": 18248
|
127746 |
+
},
|
127747 |
+
{
|
127748 |
+
"epoch": 0.4806162760073742,
|
127749 |
+
"grad_norm": 2.073836326599121,
|
127750 |
+
"learning_rate": 2.598235449038715e-05,
|
127751 |
+
"loss": 1.231,
|
127752 |
+
"step": 18249
|
127753 |
+
},
|
127754 |
+
{
|
127755 |
+
"epoch": 0.48064261258888596,
|
127756 |
+
"grad_norm": 2.9497969150543213,
|
127757 |
+
"learning_rate": 2.5981037661311563e-05,
|
127758 |
+
"loss": 1.6494,
|
127759 |
+
"step": 18250
|
127760 |
+
},
|
127761 |
+
{
|
127762 |
+
"epoch": 0.4806689491703977,
|
127763 |
+
"grad_norm": 1.5961450338363647,
|
127764 |
+
"learning_rate": 2.5979720832235976e-05,
|
127765 |
+
"loss": 2.1089,
|
127766 |
+
"step": 18251
|
127767 |
+
},
|
127768 |
+
{
|
127769 |
+
"epoch": 0.4806952857519094,
|
127770 |
+
"grad_norm": 1.5951591730117798,
|
127771 |
+
"learning_rate": 2.5978404003160388e-05,
|
127772 |
+
"loss": 1.7174,
|
127773 |
+
"step": 18252
|
127774 |
+
},
|
127775 |
+
{
|
127776 |
+
"epoch": 0.48072162233342114,
|
127777 |
+
"grad_norm": 2.378539800643921,
|
127778 |
+
"learning_rate": 2.5977087174084807e-05,
|
127779 |
+
"loss": 1.7267,
|
127780 |
+
"step": 18253
|
127781 |
+
},
|
127782 |
+
{
|
127783 |
+
"epoch": 0.4807479589149328,
|
127784 |
+
"grad_norm": 2.2286441326141357,
|
127785 |
+
"learning_rate": 2.597577034500922e-05,
|
127786 |
+
"loss": 1.2339,
|
127787 |
+
"step": 18254
|
127788 |
+
},
|
127789 |
+
{
|
127790 |
+
"epoch": 0.48077429549644457,
|
127791 |
+
"grad_norm": 2.03003191947937,
|
127792 |
+
"learning_rate": 2.5974453515933635e-05,
|
127793 |
+
"loss": 2.0078,
|
127794 |
+
"step": 18255
|
127795 |
+
},
|
127796 |
+
{
|
127797 |
+
"epoch": 0.48080063207795626,
|
127798 |
+
"grad_norm": 1.9827549457550049,
|
127799 |
+
"learning_rate": 2.5973136686858047e-05,
|
127800 |
+
"loss": 1.9373,
|
127801 |
+
"step": 18256
|
127802 |
+
},
|
127803 |
+
{
|
127804 |
+
"epoch": 0.480826968659468,
|
127805 |
+
"grad_norm": 3.3949034214019775,
|
127806 |
+
"learning_rate": 2.597181985778246e-05,
|
127807 |
+
"loss": 2.0879,
|
127808 |
+
"step": 18257
|
127809 |
+
},
|
127810 |
+
{
|
127811 |
+
"epoch": 0.48085330524097974,
|
127812 |
+
"grad_norm": 1.7559983730316162,
|
127813 |
+
"learning_rate": 2.5970503028706878e-05,
|
127814 |
+
"loss": 1.0735,
|
127815 |
+
"step": 18258
|
127816 |
+
},
|
127817 |
+
{
|
127818 |
+
"epoch": 0.48087964182249143,
|
127819 |
+
"grad_norm": 2.3310892581939697,
|
127820 |
+
"learning_rate": 2.596918619963129e-05,
|
127821 |
+
"loss": 0.3768,
|
127822 |
+
"step": 18259
|
127823 |
+
},
|
127824 |
+
{
|
127825 |
+
"epoch": 0.4809059784040032,
|
127826 |
+
"grad_norm": 1.615135908126831,
|
127827 |
+
"learning_rate": 2.5967869370555702e-05,
|
127828 |
+
"loss": 2.1364,
|
127829 |
+
"step": 18260
|
127830 |
+
},
|
127831 |
+
{
|
127832 |
+
"epoch": 0.48093231498551486,
|
127833 |
+
"grad_norm": 3.7796401977539062,
|
127834 |
+
"learning_rate": 2.5966552541480115e-05,
|
127835 |
+
"loss": 1.1534,
|
127836 |
+
"step": 18261
|
127837 |
+
},
|
127838 |
+
{
|
127839 |
+
"epoch": 0.4809586515670266,
|
127840 |
+
"grad_norm": 2.0392417907714844,
|
127841 |
+
"learning_rate": 2.596523571240453e-05,
|
127842 |
+
"loss": 1.9045,
|
127843 |
+
"step": 18262
|
127844 |
+
},
|
127845 |
+
{
|
127846 |
+
"epoch": 0.4809849881485383,
|
127847 |
+
"grad_norm": 6.02974271774292,
|
127848 |
+
"learning_rate": 2.5963918883328946e-05,
|
127849 |
+
"loss": 0.8431,
|
127850 |
+
"step": 18263
|
127851 |
+
},
|
127852 |
+
{
|
127853 |
+
"epoch": 0.48101132473005004,
|
127854 |
+
"grad_norm": 1.9574013948440552,
|
127855 |
+
"learning_rate": 2.596260205425336e-05,
|
127856 |
+
"loss": 2.3259,
|
127857 |
+
"step": 18264
|
127858 |
+
},
|
127859 |
+
{
|
127860 |
+
"epoch": 0.4810376613115618,
|
127861 |
+
"grad_norm": 4.27133321762085,
|
127862 |
+
"learning_rate": 2.5961285225177774e-05,
|
127863 |
+
"loss": 0.8533,
|
127864 |
+
"step": 18265
|
127865 |
+
},
|
127866 |
+
{
|
127867 |
+
"epoch": 0.48106399789307347,
|
127868 |
+
"grad_norm": 2.9611685276031494,
|
127869 |
+
"learning_rate": 2.5959968396102186e-05,
|
127870 |
+
"loss": 1.4665,
|
127871 |
+
"step": 18266
|
127872 |
+
},
|
127873 |
+
{
|
127874 |
+
"epoch": 0.4810903344745852,
|
127875 |
+
"grad_norm": 2.1282074451446533,
|
127876 |
+
"learning_rate": 2.5958651567026605e-05,
|
127877 |
+
"loss": 0.51,
|
127878 |
+
"step": 18267
|
127879 |
+
},
|
127880 |
+
{
|
127881 |
+
"epoch": 0.4811166710560969,
|
127882 |
+
"grad_norm": 3.044217109680176,
|
127883 |
+
"learning_rate": 2.5957334737951017e-05,
|
127884 |
+
"loss": 2.7244,
|
127885 |
+
"step": 18268
|
127886 |
+
},
|
127887 |
+
{
|
127888 |
+
"epoch": 0.48114300763760864,
|
127889 |
+
"grad_norm": 1.874723196029663,
|
127890 |
+
"learning_rate": 2.595601790887543e-05,
|
127891 |
+
"loss": 1.4811,
|
127892 |
+
"step": 18269
|
127893 |
+
},
|
127894 |
+
{
|
127895 |
+
"epoch": 0.48116934421912033,
|
127896 |
+
"grad_norm": 1.7602438926696777,
|
127897 |
+
"learning_rate": 2.595470107979984e-05,
|
127898 |
+
"loss": 1.9281,
|
127899 |
+
"step": 18270
|
127900 |
+
},
|
127901 |
+
{
|
127902 |
+
"epoch": 0.4811956808006321,
|
127903 |
+
"grad_norm": 2.15421462059021,
|
127904 |
+
"learning_rate": 2.5953384250724254e-05,
|
127905 |
+
"loss": 2.1792,
|
127906 |
+
"step": 18271
|
127907 |
+
},
|
127908 |
+
{
|
127909 |
+
"epoch": 0.4812220173821438,
|
127910 |
+
"grad_norm": 3.2429089546203613,
|
127911 |
+
"learning_rate": 2.5952067421648673e-05,
|
127912 |
+
"loss": 2.1389,
|
127913 |
+
"step": 18272
|
127914 |
+
},
|
127915 |
+
{
|
127916 |
+
"epoch": 0.4812483539636555,
|
127917 |
+
"grad_norm": 1.878605604171753,
|
127918 |
+
"learning_rate": 2.5950750592573085e-05,
|
127919 |
+
"loss": 1.7448,
|
127920 |
+
"step": 18273
|
127921 |
+
},
|
127922 |
+
{
|
127923 |
+
"epoch": 0.48127469054516725,
|
127924 |
+
"grad_norm": 2.79583477973938,
|
127925 |
+
"learning_rate": 2.59494337634975e-05,
|
127926 |
+
"loss": 1.6102,
|
127927 |
+
"step": 18274
|
127928 |
+
},
|
127929 |
+
{
|
127930 |
+
"epoch": 0.48130102712667894,
|
127931 |
+
"grad_norm": 2.2260453701019287,
|
127932 |
+
"learning_rate": 2.5948116934421913e-05,
|
127933 |
+
"loss": 1.8317,
|
127934 |
+
"step": 18275
|
127935 |
+
},
|
127936 |
+
{
|
127937 |
+
"epoch": 0.4813273637081907,
|
127938 |
+
"grad_norm": 2.351231813430786,
|
127939 |
+
"learning_rate": 2.5946800105346325e-05,
|
127940 |
+
"loss": 1.7722,
|
127941 |
+
"step": 18276
|
127942 |
+
},
|
127943 |
+
{
|
127944 |
+
"epoch": 0.4813537002897024,
|
127945 |
+
"grad_norm": 2.5532896518707275,
|
127946 |
+
"learning_rate": 2.5945483276270744e-05,
|
127947 |
+
"loss": 2.4244,
|
127948 |
+
"step": 18277
|
127949 |
+
},
|
127950 |
+
{
|
127951 |
+
"epoch": 0.4813800368712141,
|
127952 |
+
"grad_norm": 4.415773391723633,
|
127953 |
+
"learning_rate": 2.5944166447195156e-05,
|
127954 |
+
"loss": 1.5416,
|
127955 |
+
"step": 18278
|
127956 |
+
},
|
127957 |
+
{
|
127958 |
+
"epoch": 0.48140637345272586,
|
127959 |
+
"grad_norm": 2.400348663330078,
|
127960 |
+
"learning_rate": 2.5942849618119568e-05,
|
127961 |
+
"loss": 1.9609,
|
127962 |
+
"step": 18279
|
127963 |
+
},
|
127964 |
+
{
|
127965 |
+
"epoch": 0.48143271003423754,
|
127966 |
+
"grad_norm": 2.8336586952209473,
|
127967 |
+
"learning_rate": 2.594153278904398e-05,
|
127968 |
+
"loss": 0.6991,
|
127969 |
+
"step": 18280
|
127970 |
+
},
|
127971 |
+
{
|
127972 |
+
"epoch": 0.4814590466157493,
|
127973 |
+
"grad_norm": 1.6775230169296265,
|
127974 |
+
"learning_rate": 2.5940215959968396e-05,
|
127975 |
+
"loss": 1.4768,
|
127976 |
+
"step": 18281
|
127977 |
+
},
|
127978 |
+
{
|
127979 |
+
"epoch": 0.481485383197261,
|
127980 |
+
"grad_norm": 1.9266937971115112,
|
127981 |
+
"learning_rate": 2.593889913089281e-05,
|
127982 |
+
"loss": 1.7842,
|
127983 |
+
"step": 18282
|
127984 |
+
},
|
127985 |
+
{
|
127986 |
+
"epoch": 0.4815117197787727,
|
127987 |
+
"grad_norm": 2.667290210723877,
|
127988 |
+
"learning_rate": 2.5937582301817227e-05,
|
127989 |
+
"loss": 1.1571,
|
127990 |
+
"step": 18283
|
127991 |
+
},
|
127992 |
+
{
|
127993 |
+
"epoch": 0.48153805636028446,
|
127994 |
+
"grad_norm": 1.645289421081543,
|
127995 |
+
"learning_rate": 2.593626547274164e-05,
|
127996 |
+
"loss": 1.5977,
|
127997 |
+
"step": 18284
|
127998 |
+
},
|
127999 |
+
{
|
128000 |
+
"epoch": 0.48156439294179615,
|
128001 |
+
"grad_norm": 3.0950942039489746,
|
128002 |
+
"learning_rate": 2.593494864366605e-05,
|
128003 |
+
"loss": 2.1776,
|
128004 |
+
"step": 18285
|
128005 |
+
},
|
128006 |
+
{
|
128007 |
+
"epoch": 0.4815907295233079,
|
128008 |
+
"grad_norm": 2.822568893432617,
|
128009 |
+
"learning_rate": 2.593363181459047e-05,
|
128010 |
+
"loss": 1.7076,
|
128011 |
+
"step": 18286
|
128012 |
+
},
|
128013 |
+
{
|
128014 |
+
"epoch": 0.4816170661048196,
|
128015 |
+
"grad_norm": 1.8029379844665527,
|
128016 |
+
"learning_rate": 2.5932314985514883e-05,
|
128017 |
+
"loss": 1.6493,
|
128018 |
+
"step": 18287
|
128019 |
+
},
|
128020 |
+
{
|
128021 |
+
"epoch": 0.4816434026863313,
|
128022 |
+
"grad_norm": 1.8903818130493164,
|
128023 |
+
"learning_rate": 2.5930998156439295e-05,
|
128024 |
+
"loss": 1.4257,
|
128025 |
+
"step": 18288
|
128026 |
+
},
|
128027 |
+
{
|
128028 |
+
"epoch": 0.481669739267843,
|
128029 |
+
"grad_norm": 1.7043659687042236,
|
128030 |
+
"learning_rate": 2.5929681327363707e-05,
|
128031 |
+
"loss": 1.8823,
|
128032 |
+
"step": 18289
|
128033 |
+
},
|
128034 |
+
{
|
128035 |
+
"epoch": 0.48169607584935475,
|
128036 |
+
"grad_norm": 2.1602940559387207,
|
128037 |
+
"learning_rate": 2.5928364498288123e-05,
|
128038 |
+
"loss": 1.569,
|
128039 |
+
"step": 18290
|
128040 |
+
},
|
128041 |
+
{
|
128042 |
+
"epoch": 0.4817224124308665,
|
128043 |
+
"grad_norm": 1.5170824527740479,
|
128044 |
+
"learning_rate": 2.592704766921254e-05,
|
128045 |
+
"loss": 2.5523,
|
128046 |
+
"step": 18291
|
128047 |
+
},
|
128048 |
+
{
|
128049 |
+
"epoch": 0.4817487490123782,
|
128050 |
+
"grad_norm": 1.7479232549667358,
|
128051 |
+
"learning_rate": 2.5925730840136954e-05,
|
128052 |
+
"loss": 2.0913,
|
128053 |
+
"step": 18292
|
128054 |
+
},
|
128055 |
+
{
|
128056 |
+
"epoch": 0.48177508559388993,
|
128057 |
+
"grad_norm": 2.471151351928711,
|
128058 |
+
"learning_rate": 2.5924414011061366e-05,
|
128059 |
+
"loss": 1.2299,
|
128060 |
+
"step": 18293
|
128061 |
+
},
|
128062 |
+
{
|
128063 |
+
"epoch": 0.4818014221754016,
|
128064 |
+
"grad_norm": 2.0613322257995605,
|
128065 |
+
"learning_rate": 2.592309718198578e-05,
|
128066 |
+
"loss": 0.9024,
|
128067 |
+
"step": 18294
|
128068 |
+
},
|
128069 |
+
{
|
128070 |
+
"epoch": 0.48182775875691336,
|
128071 |
+
"grad_norm": 2.1863367557525635,
|
128072 |
+
"learning_rate": 2.592178035291019e-05,
|
128073 |
+
"loss": 1.3717,
|
128074 |
+
"step": 18295
|
128075 |
+
},
|
128076 |
+
{
|
128077 |
+
"epoch": 0.48185409533842505,
|
128078 |
+
"grad_norm": 1.8875648975372314,
|
128079 |
+
"learning_rate": 2.592046352383461e-05,
|
128080 |
+
"loss": 1.5818,
|
128081 |
+
"step": 18296
|
128082 |
+
},
|
128083 |
+
{
|
128084 |
+
"epoch": 0.4818804319199368,
|
128085 |
+
"grad_norm": 3.3052406311035156,
|
128086 |
+
"learning_rate": 2.5919146694759022e-05,
|
128087 |
+
"loss": 1.8997,
|
128088 |
+
"step": 18297
|
128089 |
+
},
|
128090 |
+
{
|
128091 |
+
"epoch": 0.48190676850144853,
|
128092 |
+
"grad_norm": 2.7625679969787598,
|
128093 |
+
"learning_rate": 2.5917829865683434e-05,
|
128094 |
+
"loss": 1.493,
|
128095 |
+
"step": 18298
|
128096 |
+
},
|
128097 |
+
{
|
128098 |
+
"epoch": 0.4819331050829602,
|
128099 |
+
"grad_norm": 2.534208297729492,
|
128100 |
+
"learning_rate": 2.5916513036607846e-05,
|
128101 |
+
"loss": 1.599,
|
128102 |
+
"step": 18299
|
128103 |
+
},
|
128104 |
+
{
|
128105 |
+
"epoch": 0.48195944166447197,
|
128106 |
+
"grad_norm": 4.247109413146973,
|
128107 |
+
"learning_rate": 2.5915196207532265e-05,
|
128108 |
+
"loss": 1.4251,
|
128109 |
+
"step": 18300
|
128110 |
+
},
|
128111 |
+
{
|
128112 |
+
"epoch": 0.48198577824598365,
|
128113 |
+
"grad_norm": 2.1316025257110596,
|
128114 |
+
"learning_rate": 2.5913879378456678e-05,
|
128115 |
+
"loss": 2.5669,
|
128116 |
+
"step": 18301
|
128117 |
+
},
|
128118 |
+
{
|
128119 |
+
"epoch": 0.4820121148274954,
|
128120 |
+
"grad_norm": 3.536114454269409,
|
128121 |
+
"learning_rate": 2.5912562549381093e-05,
|
128122 |
+
"loss": 0.5308,
|
128123 |
+
"step": 18302
|
128124 |
+
},
|
128125 |
+
{
|
128126 |
+
"epoch": 0.4820384514090071,
|
128127 |
+
"grad_norm": 2.2415454387664795,
|
128128 |
+
"learning_rate": 2.5911245720305505e-05,
|
128129 |
+
"loss": 1.7294,
|
128130 |
+
"step": 18303
|
128131 |
+
},
|
128132 |
+
{
|
128133 |
+
"epoch": 0.48206478799051883,
|
128134 |
+
"grad_norm": 1.5645477771759033,
|
128135 |
+
"learning_rate": 2.5909928891229918e-05,
|
128136 |
+
"loss": 1.7197,
|
128137 |
+
"step": 18304
|
128138 |
+
},
|
128139 |
+
{
|
128140 |
+
"epoch": 0.48209112457203057,
|
128141 |
+
"grad_norm": 1.7059056758880615,
|
128142 |
+
"learning_rate": 2.5908612062154337e-05,
|
128143 |
+
"loss": 0.9997,
|
128144 |
+
"step": 18305
|
128145 |
+
},
|
128146 |
+
{
|
128147 |
+
"epoch": 0.48211746115354226,
|
128148 |
+
"grad_norm": 3.156730890274048,
|
128149 |
+
"learning_rate": 2.590729523307875e-05,
|
128150 |
+
"loss": 0.336,
|
128151 |
+
"step": 18306
|
128152 |
+
},
|
128153 |
+
{
|
128154 |
+
"epoch": 0.482143797735054,
|
128155 |
+
"grad_norm": 1.3372992277145386,
|
128156 |
+
"learning_rate": 2.590597840400316e-05,
|
128157 |
+
"loss": 1.411,
|
128158 |
+
"step": 18307
|
128159 |
+
},
|
128160 |
+
{
|
128161 |
+
"epoch": 0.4821701343165657,
|
128162 |
+
"grad_norm": 2.8801941871643066,
|
128163 |
+
"learning_rate": 2.5904661574927573e-05,
|
128164 |
+
"loss": 1.6704,
|
128165 |
+
"step": 18308
|
128166 |
+
},
|
128167 |
+
{
|
128168 |
+
"epoch": 0.48219647089807743,
|
128169 |
+
"grad_norm": 1.8397315740585327,
|
128170 |
+
"learning_rate": 2.590334474585199e-05,
|
128171 |
+
"loss": 2.1636,
|
128172 |
+
"step": 18309
|
128173 |
+
},
|
128174 |
+
{
|
128175 |
+
"epoch": 0.4822228074795891,
|
128176 |
+
"grad_norm": 2.7086033821105957,
|
128177 |
+
"learning_rate": 2.5902027916776404e-05,
|
128178 |
+
"loss": 0.6801,
|
128179 |
+
"step": 18310
|
128180 |
+
},
|
128181 |
+
{
|
128182 |
+
"epoch": 0.48224914406110087,
|
128183 |
+
"grad_norm": 4.560272693634033,
|
128184 |
+
"learning_rate": 2.590071108770082e-05,
|
128185 |
+
"loss": 0.6993,
|
128186 |
+
"step": 18311
|
128187 |
+
},
|
128188 |
+
{
|
128189 |
+
"epoch": 0.4822754806426126,
|
128190 |
+
"grad_norm": 1.794438362121582,
|
128191 |
+
"learning_rate": 2.5899394258625232e-05,
|
128192 |
+
"loss": 2.3285,
|
128193 |
+
"step": 18312
|
128194 |
+
},
|
128195 |
+
{
|
128196 |
+
"epoch": 0.4823018172241243,
|
128197 |
+
"grad_norm": 1.6634902954101562,
|
128198 |
+
"learning_rate": 2.5898077429549644e-05,
|
128199 |
+
"loss": 1.7613,
|
128200 |
+
"step": 18313
|
128201 |
+
},
|
128202 |
+
{
|
128203 |
+
"epoch": 0.48232815380563604,
|
128204 |
+
"grad_norm": 2.5338780879974365,
|
128205 |
+
"learning_rate": 2.5896760600474057e-05,
|
128206 |
+
"loss": 0.7794,
|
128207 |
+
"step": 18314
|
128208 |
+
},
|
128209 |
+
{
|
128210 |
+
"epoch": 0.4823544903871477,
|
128211 |
+
"grad_norm": 1.5597007274627686,
|
128212 |
+
"learning_rate": 2.5895443771398476e-05,
|
128213 |
+
"loss": 1.9812,
|
128214 |
+
"step": 18315
|
128215 |
+
},
|
128216 |
+
{
|
128217 |
+
"epoch": 0.48238082696865947,
|
128218 |
+
"grad_norm": 1.5586351156234741,
|
128219 |
+
"learning_rate": 2.5894126942322888e-05,
|
128220 |
+
"loss": 2.5375,
|
128221 |
+
"step": 18316
|
128222 |
+
},
|
128223 |
+
{
|
128224 |
+
"epoch": 0.4824071635501712,
|
128225 |
+
"grad_norm": 2.1696550846099854,
|
128226 |
+
"learning_rate": 2.58928101132473e-05,
|
128227 |
+
"loss": 1.6872,
|
128228 |
+
"step": 18317
|
128229 |
+
},
|
128230 |
+
{
|
128231 |
+
"epoch": 0.4824335001316829,
|
128232 |
+
"grad_norm": 2.1278188228607178,
|
128233 |
+
"learning_rate": 2.5891493284171716e-05,
|
128234 |
+
"loss": 1.9642,
|
128235 |
+
"step": 18318
|
128236 |
+
},
|
128237 |
+
{
|
128238 |
+
"epoch": 0.48245983671319465,
|
128239 |
+
"grad_norm": 1.742224097251892,
|
128240 |
+
"learning_rate": 2.589017645509613e-05,
|
128241 |
+
"loss": 0.7188,
|
128242 |
+
"step": 18319
|
128243 |
+
},
|
128244 |
+
{
|
128245 |
+
"epoch": 0.48248617329470633,
|
128246 |
+
"grad_norm": 4.575339317321777,
|
128247 |
+
"learning_rate": 2.5888859626020547e-05,
|
128248 |
+
"loss": 1.0558,
|
128249 |
+
"step": 18320
|
128250 |
+
},
|
128251 |
+
{
|
128252 |
+
"epoch": 0.4825125098762181,
|
128253 |
+
"grad_norm": 5.727512359619141,
|
128254 |
+
"learning_rate": 2.588754279694496e-05,
|
128255 |
+
"loss": 1.3297,
|
128256 |
+
"step": 18321
|
128257 |
+
},
|
128258 |
+
{
|
128259 |
+
"epoch": 0.48253884645772976,
|
128260 |
+
"grad_norm": 1.7224305868148804,
|
128261 |
+
"learning_rate": 2.588622596786937e-05,
|
128262 |
+
"loss": 1.606,
|
128263 |
+
"step": 18322
|
128264 |
+
},
|
128265 |
+
{
|
128266 |
+
"epoch": 0.4825651830392415,
|
128267 |
+
"grad_norm": 1.7859320640563965,
|
128268 |
+
"learning_rate": 2.5884909138793783e-05,
|
128269 |
+
"loss": 1.5833,
|
128270 |
+
"step": 18323
|
128271 |
+
},
|
128272 |
+
{
|
128273 |
+
"epoch": 0.48259151962075325,
|
128274 |
+
"grad_norm": 1.3457127809524536,
|
128275 |
+
"learning_rate": 2.5883592309718202e-05,
|
128276 |
+
"loss": 1.9783,
|
128277 |
+
"step": 18324
|
128278 |
+
},
|
128279 |
+
{
|
128280 |
+
"epoch": 0.48261785620226494,
|
128281 |
+
"grad_norm": 1.807705044746399,
|
128282 |
+
"learning_rate": 2.5882275480642615e-05,
|
128283 |
+
"loss": 1.6852,
|
128284 |
+
"step": 18325
|
128285 |
+
},
|
128286 |
+
{
|
128287 |
+
"epoch": 0.4826441927837767,
|
128288 |
+
"grad_norm": 3.553330421447754,
|
128289 |
+
"learning_rate": 2.5880958651567027e-05,
|
128290 |
+
"loss": 1.1183,
|
128291 |
+
"step": 18326
|
128292 |
+
},
|
128293 |
+
{
|
128294 |
+
"epoch": 0.48267052936528837,
|
128295 |
+
"grad_norm": 3.3810200691223145,
|
128296 |
+
"learning_rate": 2.587964182249144e-05,
|
128297 |
+
"loss": 2.8145,
|
128298 |
+
"step": 18327
|
128299 |
+
},
|
128300 |
+
{
|
128301 |
+
"epoch": 0.4826968659468001,
|
128302 |
+
"grad_norm": 2.4965574741363525,
|
128303 |
+
"learning_rate": 2.5878324993415855e-05,
|
128304 |
+
"loss": 1.7972,
|
128305 |
+
"step": 18328
|
128306 |
+
},
|
128307 |
+
{
|
128308 |
+
"epoch": 0.4827232025283118,
|
128309 |
+
"grad_norm": 2.483815908432007,
|
128310 |
+
"learning_rate": 2.587700816434027e-05,
|
128311 |
+
"loss": 0.8307,
|
128312 |
+
"step": 18329
|
128313 |
+
},
|
128314 |
+
{
|
128315 |
+
"epoch": 0.48274953910982354,
|
128316 |
+
"grad_norm": 2.7984135150909424,
|
128317 |
+
"learning_rate": 2.5875691335264686e-05,
|
128318 |
+
"loss": 2.2607,
|
128319 |
+
"step": 18330
|
128320 |
+
},
|
128321 |
+
{
|
128322 |
+
"epoch": 0.4827758756913353,
|
128323 |
+
"grad_norm": 1.6728575229644775,
|
128324 |
+
"learning_rate": 2.5874374506189098e-05,
|
128325 |
+
"loss": 1.8287,
|
128326 |
+
"step": 18331
|
128327 |
+
},
|
128328 |
+
{
|
128329 |
+
"epoch": 0.482802212272847,
|
128330 |
+
"grad_norm": 2.076826810836792,
|
128331 |
+
"learning_rate": 2.587305767711351e-05,
|
128332 |
+
"loss": 1.2793,
|
128333 |
+
"step": 18332
|
128334 |
+
},
|
128335 |
+
{
|
128336 |
+
"epoch": 0.4828285488543587,
|
128337 |
+
"grad_norm": 2.7556533813476562,
|
128338 |
+
"learning_rate": 2.587174084803793e-05,
|
128339 |
+
"loss": 2.1633,
|
128340 |
+
"step": 18333
|
128341 |
+
},
|
128342 |
+
{
|
128343 |
+
"epoch": 0.4828548854358704,
|
128344 |
+
"grad_norm": 2.11775803565979,
|
128345 |
+
"learning_rate": 2.587042401896234e-05,
|
128346 |
+
"loss": 1.8455,
|
128347 |
+
"step": 18334
|
128348 |
+
},
|
128349 |
+
{
|
128350 |
+
"epoch": 0.48288122201738215,
|
128351 |
+
"grad_norm": 3.899864435195923,
|
128352 |
+
"learning_rate": 2.5869107189886754e-05,
|
128353 |
+
"loss": 1.2825,
|
128354 |
+
"step": 18335
|
128355 |
+
},
|
128356 |
+
{
|
128357 |
+
"epoch": 0.48290755859889384,
|
128358 |
+
"grad_norm": 2.0632147789001465,
|
128359 |
+
"learning_rate": 2.5867790360811166e-05,
|
128360 |
+
"loss": 2.3838,
|
128361 |
+
"step": 18336
|
128362 |
+
},
|
128363 |
+
{
|
128364 |
+
"epoch": 0.4829338951804056,
|
128365 |
+
"grad_norm": 1.4783732891082764,
|
128366 |
+
"learning_rate": 2.586647353173558e-05,
|
128367 |
+
"loss": 0.6448,
|
128368 |
+
"step": 18337
|
128369 |
+
},
|
128370 |
+
{
|
128371 |
+
"epoch": 0.4829602317619173,
|
128372 |
+
"grad_norm": 3.414386510848999,
|
128373 |
+
"learning_rate": 2.5865156702659997e-05,
|
128374 |
+
"loss": 1.6303,
|
128375 |
+
"step": 18338
|
128376 |
+
},
|
128377 |
+
{
|
128378 |
+
"epoch": 0.482986568343429,
|
128379 |
+
"grad_norm": 2.6229095458984375,
|
128380 |
+
"learning_rate": 2.5863839873584413e-05,
|
128381 |
+
"loss": 1.1264,
|
128382 |
+
"step": 18339
|
128383 |
+
},
|
128384 |
+
{
|
128385 |
+
"epoch": 0.48301290492494076,
|
128386 |
+
"grad_norm": 1.6566879749298096,
|
128387 |
+
"learning_rate": 2.5862523044508825e-05,
|
128388 |
+
"loss": 1.7613,
|
128389 |
+
"step": 18340
|
128390 |
+
},
|
128391 |
+
{
|
128392 |
+
"epoch": 0.48303924150645244,
|
128393 |
+
"grad_norm": 2.410510778427124,
|
128394 |
+
"learning_rate": 2.5861206215433237e-05,
|
128395 |
+
"loss": 0.9519,
|
128396 |
+
"step": 18341
|
128397 |
+
},
|
128398 |
+
{
|
128399 |
+
"epoch": 0.4830655780879642,
|
128400 |
+
"grad_norm": 1.8161232471466064,
|
128401 |
+
"learning_rate": 2.585988938635765e-05,
|
128402 |
+
"loss": 1.752,
|
128403 |
+
"step": 18342
|
128404 |
+
},
|
128405 |
+
{
|
128406 |
+
"epoch": 0.4830919146694759,
|
128407 |
+
"grad_norm": 3.2136988639831543,
|
128408 |
+
"learning_rate": 2.5858572557282068e-05,
|
128409 |
+
"loss": 1.9701,
|
128410 |
+
"step": 18343
|
128411 |
+
},
|
128412 |
+
{
|
128413 |
+
"epoch": 0.4831182512509876,
|
128414 |
+
"grad_norm": 1.500807523727417,
|
128415 |
+
"learning_rate": 2.585725572820648e-05,
|
128416 |
+
"loss": 1.8475,
|
128417 |
+
"step": 18344
|
128418 |
+
},
|
128419 |
+
{
|
128420 |
+
"epoch": 0.48314458783249936,
|
128421 |
+
"grad_norm": 2.430117607116699,
|
128422 |
+
"learning_rate": 2.5855938899130893e-05,
|
128423 |
+
"loss": 2.1502,
|
128424 |
+
"step": 18345
|
128425 |
+
},
|
128426 |
+
{
|
128427 |
+
"epoch": 0.48317092441401105,
|
128428 |
+
"grad_norm": 1.8485294580459595,
|
128429 |
+
"learning_rate": 2.5854622070055305e-05,
|
128430 |
+
"loss": 2.0114,
|
128431 |
+
"step": 18346
|
128432 |
+
},
|
128433 |
+
{
|
128434 |
+
"epoch": 0.4831972609955228,
|
128435 |
+
"grad_norm": 3.348314046859741,
|
128436 |
+
"learning_rate": 2.585330524097972e-05,
|
128437 |
+
"loss": 1.9875,
|
128438 |
+
"step": 18347
|
128439 |
+
},
|
128440 |
+
{
|
128441 |
+
"epoch": 0.4832235975770345,
|
128442 |
+
"grad_norm": 1.8929311037063599,
|
128443 |
+
"learning_rate": 2.585198841190414e-05,
|
128444 |
+
"loss": 1.7272,
|
128445 |
+
"step": 18348
|
128446 |
+
},
|
128447 |
+
{
|
128448 |
+
"epoch": 0.4832499341585462,
|
128449 |
+
"grad_norm": 1.764998197555542,
|
128450 |
+
"learning_rate": 2.585067158282855e-05,
|
128451 |
+
"loss": 1.7428,
|
128452 |
+
"step": 18349
|
128453 |
+
},
|
128454 |
+
{
|
128455 |
+
"epoch": 0.48327627074005797,
|
128456 |
+
"grad_norm": 4.091507911682129,
|
128457 |
+
"learning_rate": 2.5849354753752964e-05,
|
128458 |
+
"loss": 2.518,
|
128459 |
+
"step": 18350
|
128460 |
+
},
|
128461 |
+
{
|
128462 |
+
"epoch": 0.48330260732156965,
|
128463 |
+
"grad_norm": 3.2935714721679688,
|
128464 |
+
"learning_rate": 2.5848037924677376e-05,
|
128465 |
+
"loss": 1.1943,
|
128466 |
+
"step": 18351
|
128467 |
+
},
|
128468 |
+
{
|
128469 |
+
"epoch": 0.4833289439030814,
|
128470 |
+
"grad_norm": 2.3311710357666016,
|
128471 |
+
"learning_rate": 2.5846721095601795e-05,
|
128472 |
+
"loss": 0.9638,
|
128473 |
+
"step": 18352
|
128474 |
+
},
|
128475 |
+
{
|
128476 |
+
"epoch": 0.4833552804845931,
|
128477 |
+
"grad_norm": 1.7031148672103882,
|
128478 |
+
"learning_rate": 2.5845404266526207e-05,
|
128479 |
+
"loss": 2.1818,
|
128480 |
+
"step": 18353
|
128481 |
+
},
|
128482 |
+
{
|
128483 |
+
"epoch": 0.48338161706610483,
|
128484 |
+
"grad_norm": 3.7465553283691406,
|
128485 |
+
"learning_rate": 2.584408743745062e-05,
|
128486 |
+
"loss": 1.2991,
|
128487 |
+
"step": 18354
|
128488 |
+
},
|
128489 |
+
{
|
128490 |
+
"epoch": 0.4834079536476165,
|
128491 |
+
"grad_norm": 3.8707756996154785,
|
128492 |
+
"learning_rate": 2.5842770608375032e-05,
|
128493 |
+
"loss": 2.0623,
|
128494 |
+
"step": 18355
|
128495 |
+
},
|
128496 |
+
{
|
128497 |
+
"epoch": 0.48343429022912826,
|
128498 |
+
"grad_norm": 1.9105716943740845,
|
128499 |
+
"learning_rate": 2.5841453779299447e-05,
|
128500 |
+
"loss": 1.3937,
|
128501 |
+
"step": 18356
|
128502 |
+
},
|
128503 |
+
{
|
128504 |
+
"epoch": 0.48346062681064,
|
128505 |
+
"grad_norm": 2.868964672088623,
|
128506 |
+
"learning_rate": 2.5840136950223863e-05,
|
128507 |
+
"loss": 1.844,
|
128508 |
+
"step": 18357
|
128509 |
+
},
|
128510 |
+
{
|
128511 |
+
"epoch": 0.4834869633921517,
|
128512 |
+
"grad_norm": 2.0943338871002197,
|
128513 |
+
"learning_rate": 2.583882012114828e-05,
|
128514 |
+
"loss": 1.582,
|
128515 |
+
"step": 18358
|
128516 |
+
},
|
128517 |
+
{
|
128518 |
+
"epoch": 0.48351329997366344,
|
128519 |
+
"grad_norm": 3.172908067703247,
|
128520 |
+
"learning_rate": 2.583750329207269e-05,
|
128521 |
+
"loss": 1.6974,
|
128522 |
+
"step": 18359
|
128523 |
+
},
|
128524 |
+
{
|
128525 |
+
"epoch": 0.4835396365551751,
|
128526 |
+
"grad_norm": 2.3387062549591064,
|
128527 |
+
"learning_rate": 2.5836186462997103e-05,
|
128528 |
+
"loss": 1.1011,
|
128529 |
+
"step": 18360
|
128530 |
+
},
|
128531 |
+
{
|
128532 |
+
"epoch": 0.48356597313668687,
|
128533 |
+
"grad_norm": 3.5115859508514404,
|
128534 |
+
"learning_rate": 2.5834869633921515e-05,
|
128535 |
+
"loss": 1.3581,
|
128536 |
+
"step": 18361
|
128537 |
+
},
|
128538 |
+
{
|
128539 |
+
"epoch": 0.48359230971819855,
|
128540 |
+
"grad_norm": 2.8621480464935303,
|
128541 |
+
"learning_rate": 2.5833552804845934e-05,
|
128542 |
+
"loss": 1.1026,
|
128543 |
+
"step": 18362
|
128544 |
+
},
|
128545 |
+
{
|
128546 |
+
"epoch": 0.4836186462997103,
|
128547 |
+
"grad_norm": 2.5172183513641357,
|
128548 |
+
"learning_rate": 2.5832235975770346e-05,
|
128549 |
+
"loss": 1.8828,
|
128550 |
+
"step": 18363
|
128551 |
+
},
|
128552 |
+
{
|
128553 |
+
"epoch": 0.48364498288122204,
|
128554 |
+
"grad_norm": 2.953972816467285,
|
128555 |
+
"learning_rate": 2.583091914669476e-05,
|
128556 |
+
"loss": 0.8589,
|
128557 |
+
"step": 18364
|
128558 |
+
},
|
128559 |
+
{
|
128560 |
+
"epoch": 0.48367131946273373,
|
128561 |
+
"grad_norm": 1.8448625802993774,
|
128562 |
+
"learning_rate": 2.5829602317619174e-05,
|
128563 |
+
"loss": 1.9318,
|
128564 |
+
"step": 18365
|
128565 |
+
},
|
128566 |
+
{
|
128567 |
+
"epoch": 0.48369765604424547,
|
128568 |
+
"grad_norm": 1.8998172283172607,
|
128569 |
+
"learning_rate": 2.582828548854359e-05,
|
128570 |
+
"loss": 1.8291,
|
128571 |
+
"step": 18366
|
128572 |
+
},
|
128573 |
+
{
|
128574 |
+
"epoch": 0.48372399262575716,
|
128575 |
+
"grad_norm": 1.906227469444275,
|
128576 |
+
"learning_rate": 2.5826968659468005e-05,
|
128577 |
+
"loss": 1.3863,
|
128578 |
+
"step": 18367
|
128579 |
+
},
|
128580 |
+
{
|
128581 |
+
"epoch": 0.4837503292072689,
|
128582 |
+
"grad_norm": 1.814586877822876,
|
128583 |
+
"learning_rate": 2.5825651830392418e-05,
|
128584 |
+
"loss": 1.9128,
|
128585 |
+
"step": 18368
|
128586 |
+
},
|
128587 |
+
{
|
128588 |
+
"epoch": 0.4837766657887806,
|
128589 |
+
"grad_norm": 2.9641284942626953,
|
128590 |
+
"learning_rate": 2.582433500131683e-05,
|
128591 |
+
"loss": 1.5397,
|
128592 |
+
"step": 18369
|
128593 |
+
},
|
128594 |
+
{
|
128595 |
+
"epoch": 0.48380300237029233,
|
128596 |
+
"grad_norm": 4.049939155578613,
|
128597 |
+
"learning_rate": 2.5823018172241242e-05,
|
128598 |
+
"loss": 1.5262,
|
128599 |
+
"step": 18370
|
128600 |
+
},
|
128601 |
+
{
|
128602 |
+
"epoch": 0.4838293389518041,
|
128603 |
+
"grad_norm": 1.870164155960083,
|
128604 |
+
"learning_rate": 2.582170134316566e-05,
|
128605 |
+
"loss": 0.9732,
|
128606 |
+
"step": 18371
|
128607 |
+
},
|
128608 |
+
{
|
128609 |
+
"epoch": 0.48385567553331577,
|
128610 |
+
"grad_norm": 3.756333351135254,
|
128611 |
+
"learning_rate": 2.5820384514090073e-05,
|
128612 |
+
"loss": 1.1043,
|
128613 |
+
"step": 18372
|
128614 |
+
},
|
128615 |
+
{
|
128616 |
+
"epoch": 0.4838820121148275,
|
128617 |
+
"grad_norm": 2.0509378910064697,
|
128618 |
+
"learning_rate": 2.5819067685014485e-05,
|
128619 |
+
"loss": 1.9517,
|
128620 |
+
"step": 18373
|
128621 |
+
},
|
128622 |
+
{
|
128623 |
+
"epoch": 0.4839083486963392,
|
128624 |
+
"grad_norm": 5.011322975158691,
|
128625 |
+
"learning_rate": 2.5817750855938898e-05,
|
128626 |
+
"loss": 1.9828,
|
128627 |
+
"step": 18374
|
128628 |
+
},
|
128629 |
+
{
|
128630 |
+
"epoch": 0.48393468527785094,
|
128631 |
+
"grad_norm": 2.274775743484497,
|
128632 |
+
"learning_rate": 2.5816434026863313e-05,
|
128633 |
+
"loss": 2.181,
|
128634 |
+
"step": 18375
|
128635 |
+
},
|
128636 |
+
{
|
128637 |
+
"epoch": 0.48396102185936263,
|
128638 |
+
"grad_norm": 3.4376394748687744,
|
128639 |
+
"learning_rate": 2.581511719778773e-05,
|
128640 |
+
"loss": 2.1538,
|
128641 |
+
"step": 18376
|
128642 |
+
},
|
128643 |
+
{
|
128644 |
+
"epoch": 0.48398735844087437,
|
128645 |
+
"grad_norm": 1.577967643737793,
|
128646 |
+
"learning_rate": 2.5813800368712144e-05,
|
128647 |
+
"loss": 2.4193,
|
128648 |
+
"step": 18377
|
128649 |
+
},
|
128650 |
+
{
|
128651 |
+
"epoch": 0.4840136950223861,
|
128652 |
+
"grad_norm": 2.75225830078125,
|
128653 |
+
"learning_rate": 2.5812483539636557e-05,
|
128654 |
+
"loss": 1.4493,
|
128655 |
+
"step": 18378
|
128656 |
+
},
|
128657 |
+
{
|
128658 |
+
"epoch": 0.4840400316038978,
|
128659 |
+
"grad_norm": 1.5984172821044922,
|
128660 |
+
"learning_rate": 2.581116671056097e-05,
|
128661 |
+
"loss": 1.8152,
|
128662 |
+
"step": 18379
|
128663 |
+
},
|
128664 |
+
{
|
128665 |
+
"epoch": 0.48406636818540955,
|
128666 |
+
"grad_norm": 2.0202548503875732,
|
128667 |
+
"learning_rate": 2.580984988148538e-05,
|
128668 |
+
"loss": 1.726,
|
128669 |
+
"step": 18380
|
128670 |
+
},
|
128671 |
+
{
|
128672 |
+
"epoch": 0.48409270476692123,
|
128673 |
+
"grad_norm": 1.554030418395996,
|
128674 |
+
"learning_rate": 2.58085330524098e-05,
|
128675 |
+
"loss": 1.9233,
|
128676 |
+
"step": 18381
|
128677 |
+
},
|
128678 |
+
{
|
128679 |
+
"epoch": 0.484119041348433,
|
128680 |
+
"grad_norm": 1.8082032203674316,
|
128681 |
+
"learning_rate": 2.5807216223334212e-05,
|
128682 |
+
"loss": 1.015,
|
128683 |
+
"step": 18382
|
128684 |
+
},
|
128685 |
+
{
|
128686 |
+
"epoch": 0.4841453779299447,
|
128687 |
+
"grad_norm": 1.779860258102417,
|
128688 |
+
"learning_rate": 2.5805899394258624e-05,
|
128689 |
+
"loss": 0.5784,
|
128690 |
+
"step": 18383
|
128691 |
+
},
|
128692 |
+
{
|
128693 |
+
"epoch": 0.4841717145114564,
|
128694 |
+
"grad_norm": 2.2123851776123047,
|
128695 |
+
"learning_rate": 2.580458256518304e-05,
|
128696 |
+
"loss": 1.8172,
|
128697 |
+
"step": 18384
|
128698 |
+
},
|
128699 |
+
{
|
128700 |
+
"epoch": 0.48419805109296815,
|
128701 |
+
"grad_norm": 1.8945305347442627,
|
128702 |
+
"learning_rate": 2.5803265736107456e-05,
|
128703 |
+
"loss": 1.8525,
|
128704 |
+
"step": 18385
|
128705 |
+
},
|
128706 |
+
{
|
128707 |
+
"epoch": 0.48422438767447984,
|
128708 |
+
"grad_norm": 2.622844934463501,
|
128709 |
+
"learning_rate": 2.580194890703187e-05,
|
128710 |
+
"loss": 0.7779,
|
128711 |
+
"step": 18386
|
128712 |
+
},
|
128713 |
+
{
|
128714 |
+
"epoch": 0.4842507242559916,
|
128715 |
+
"grad_norm": 2.0889954566955566,
|
128716 |
+
"learning_rate": 2.5800632077956283e-05,
|
128717 |
+
"loss": 1.5266,
|
128718 |
+
"step": 18387
|
128719 |
+
},
|
128720 |
+
{
|
128721 |
+
"epoch": 0.48427706083750327,
|
128722 |
+
"grad_norm": 2.3298048973083496,
|
128723 |
+
"learning_rate": 2.5799315248880696e-05,
|
128724 |
+
"loss": 2.2032,
|
128725 |
+
"step": 18388
|
128726 |
+
},
|
128727 |
+
{
|
128728 |
+
"epoch": 0.484303397419015,
|
128729 |
+
"grad_norm": 1.5841577053070068,
|
128730 |
+
"learning_rate": 2.5797998419805108e-05,
|
128731 |
+
"loss": 1.8769,
|
128732 |
+
"step": 18389
|
128733 |
+
},
|
128734 |
+
{
|
128735 |
+
"epoch": 0.48432973400052676,
|
128736 |
+
"grad_norm": 3.13581919670105,
|
128737 |
+
"learning_rate": 2.5796681590729527e-05,
|
128738 |
+
"loss": 1.656,
|
128739 |
+
"step": 18390
|
128740 |
+
},
|
128741 |
+
{
|
128742 |
+
"epoch": 0.48435607058203844,
|
128743 |
+
"grad_norm": 2.65350341796875,
|
128744 |
+
"learning_rate": 2.579536476165394e-05,
|
128745 |
+
"loss": 0.3908,
|
128746 |
+
"step": 18391
|
128747 |
+
},
|
128748 |
+
{
|
128749 |
+
"epoch": 0.4843824071635502,
|
128750 |
+
"grad_norm": 3.9427220821380615,
|
128751 |
+
"learning_rate": 2.579404793257835e-05,
|
128752 |
+
"loss": 0.5817,
|
128753 |
+
"step": 18392
|
128754 |
+
},
|
128755 |
+
{
|
128756 |
+
"epoch": 0.4844087437450619,
|
128757 |
+
"grad_norm": 2.438901424407959,
|
128758 |
+
"learning_rate": 2.5792731103502767e-05,
|
128759 |
+
"loss": 2.3633,
|
128760 |
+
"step": 18393
|
128761 |
+
},
|
128762 |
+
{
|
128763 |
+
"epoch": 0.4844350803265736,
|
128764 |
+
"grad_norm": 2.4363667964935303,
|
128765 |
+
"learning_rate": 2.579141427442718e-05,
|
128766 |
+
"loss": 2.085,
|
128767 |
+
"step": 18394
|
128768 |
+
},
|
128769 |
+
{
|
128770 |
+
"epoch": 0.4844614169080853,
|
128771 |
+
"grad_norm": 1.9361634254455566,
|
128772 |
+
"learning_rate": 2.5790097445351598e-05,
|
128773 |
+
"loss": 2.1063,
|
128774 |
+
"step": 18395
|
128775 |
+
},
|
128776 |
+
{
|
128777 |
+
"epoch": 0.48448775348959705,
|
128778 |
+
"grad_norm": 2.282162666320801,
|
128779 |
+
"learning_rate": 2.578878061627601e-05,
|
128780 |
+
"loss": 0.898,
|
128781 |
+
"step": 18396
|
128782 |
+
},
|
128783 |
+
{
|
128784 |
+
"epoch": 0.4845140900711088,
|
128785 |
+
"grad_norm": 1.8887823820114136,
|
128786 |
+
"learning_rate": 2.5787463787200422e-05,
|
128787 |
+
"loss": 1.9128,
|
128788 |
+
"step": 18397
|
128789 |
+
},
|
128790 |
+
{
|
128791 |
+
"epoch": 0.4845404266526205,
|
128792 |
+
"grad_norm": 2.7833309173583984,
|
128793 |
+
"learning_rate": 2.5786146958124835e-05,
|
128794 |
+
"loss": 1.4563,
|
128795 |
+
"step": 18398
|
128796 |
+
},
|
128797 |
+
{
|
128798 |
+
"epoch": 0.4845667632341322,
|
128799 |
+
"grad_norm": 1.7120144367218018,
|
128800 |
+
"learning_rate": 2.5784830129049254e-05,
|
128801 |
+
"loss": 1.7172,
|
128802 |
+
"step": 18399
|
128803 |
+
},
|
128804 |
+
{
|
128805 |
+
"epoch": 0.4845930998156439,
|
128806 |
+
"grad_norm": 2.7394728660583496,
|
128807 |
+
"learning_rate": 2.5783513299973666e-05,
|
128808 |
+
"loss": 1.1691,
|
128809 |
+
"step": 18400
|
128810 |
+
},
|
128811 |
+
{
|
128812 |
+
"epoch": 0.48461943639715566,
|
128813 |
+
"grad_norm": 2.6390151977539062,
|
128814 |
+
"learning_rate": 2.5782196470898078e-05,
|
128815 |
+
"loss": 0.4177,
|
128816 |
+
"step": 18401
|
128817 |
+
},
|
128818 |
+
{
|
128819 |
+
"epoch": 0.48464577297866734,
|
128820 |
+
"grad_norm": 1.8348826169967651,
|
128821 |
+
"learning_rate": 2.578087964182249e-05,
|
128822 |
+
"loss": 3.2067,
|
128823 |
+
"step": 18402
|
128824 |
+
},
|
128825 |
+
{
|
128826 |
+
"epoch": 0.4846721095601791,
|
128827 |
+
"grad_norm": 2.5606539249420166,
|
128828 |
+
"learning_rate": 2.5779562812746906e-05,
|
128829 |
+
"loss": 2.3002,
|
128830 |
+
"step": 18403
|
128831 |
+
},
|
128832 |
+
{
|
128833 |
+
"epoch": 0.48469844614169083,
|
128834 |
+
"grad_norm": 2.3585174083709717,
|
128835 |
+
"learning_rate": 2.577824598367132e-05,
|
128836 |
+
"loss": 1.7013,
|
128837 |
+
"step": 18404
|
128838 |
+
},
|
128839 |
+
{
|
128840 |
+
"epoch": 0.4847247827232025,
|
128841 |
+
"grad_norm": 1.8755167722702026,
|
128842 |
+
"learning_rate": 2.5776929154595737e-05,
|
128843 |
+
"loss": 1.9228,
|
128844 |
+
"step": 18405
|
128845 |
+
},
|
128846 |
+
{
|
128847 |
+
"epoch": 0.48475111930471426,
|
128848 |
+
"grad_norm": 1.3895341157913208,
|
128849 |
+
"learning_rate": 2.577561232552015e-05,
|
128850 |
+
"loss": 1.8269,
|
128851 |
+
"step": 18406
|
128852 |
+
},
|
128853 |
+
{
|
128854 |
+
"epoch": 0.48477745588622595,
|
128855 |
+
"grad_norm": 1.6985033750534058,
|
128856 |
+
"learning_rate": 2.577429549644456e-05,
|
128857 |
+
"loss": 1.8735,
|
128858 |
+
"step": 18407
|
128859 |
+
},
|
128860 |
+
{
|
128861 |
+
"epoch": 0.4848037924677377,
|
128862 |
+
"grad_norm": 3.8967151641845703,
|
128863 |
+
"learning_rate": 2.5772978667368974e-05,
|
128864 |
+
"loss": 0.7406,
|
128865 |
+
"step": 18408
|
128866 |
+
},
|
128867 |
+
{
|
128868 |
+
"epoch": 0.4848301290492494,
|
128869 |
+
"grad_norm": 4.177510738372803,
|
128870 |
+
"learning_rate": 2.5771661838293393e-05,
|
128871 |
+
"loss": 1.4695,
|
128872 |
+
"step": 18409
|
128873 |
+
},
|
128874 |
+
{
|
128875 |
+
"epoch": 0.4848564656307611,
|
128876 |
+
"grad_norm": 1.816548228263855,
|
128877 |
+
"learning_rate": 2.5770345009217805e-05,
|
128878 |
+
"loss": 1.8405,
|
128879 |
+
"step": 18410
|
128880 |
+
},
|
128881 |
+
{
|
128882 |
+
"epoch": 0.48488280221227287,
|
128883 |
+
"grad_norm": 2.2118592262268066,
|
128884 |
+
"learning_rate": 2.5769028180142217e-05,
|
128885 |
+
"loss": 1.6375,
|
128886 |
+
"step": 18411
|
128887 |
+
},
|
128888 |
+
{
|
128889 |
+
"epoch": 0.48490913879378456,
|
128890 |
+
"grad_norm": 1.968703269958496,
|
128891 |
+
"learning_rate": 2.5767711351066633e-05,
|
128892 |
+
"loss": 1.5512,
|
128893 |
+
"step": 18412
|
128894 |
+
},
|
128895 |
+
{
|
128896 |
+
"epoch": 0.4849354753752963,
|
128897 |
+
"grad_norm": 2.9055683612823486,
|
128898 |
+
"learning_rate": 2.5766394521991045e-05,
|
128899 |
+
"loss": 1.5991,
|
128900 |
+
"step": 18413
|
128901 |
+
},
|
128902 |
+
{
|
128903 |
+
"epoch": 0.484961811956808,
|
128904 |
+
"grad_norm": 2.03057861328125,
|
128905 |
+
"learning_rate": 2.5765077692915464e-05,
|
128906 |
+
"loss": 0.9919,
|
128907 |
+
"step": 18414
|
128908 |
+
},
|
128909 |
+
{
|
128910 |
+
"epoch": 0.48498814853831973,
|
128911 |
+
"grad_norm": 1.6863213777542114,
|
128912 |
+
"learning_rate": 2.5763760863839876e-05,
|
128913 |
+
"loss": 1.9363,
|
128914 |
+
"step": 18415
|
128915 |
+
},
|
128916 |
+
{
|
128917 |
+
"epoch": 0.4850144851198315,
|
128918 |
+
"grad_norm": 2.704174041748047,
|
128919 |
+
"learning_rate": 2.5762444034764288e-05,
|
128920 |
+
"loss": 1.5539,
|
128921 |
+
"step": 18416
|
128922 |
+
},
|
128923 |
+
{
|
128924 |
+
"epoch": 0.48504082170134316,
|
128925 |
+
"grad_norm": 6.385892391204834,
|
128926 |
+
"learning_rate": 2.57611272056887e-05,
|
128927 |
+
"loss": 1.2474,
|
128928 |
+
"step": 18417
|
128929 |
+
},
|
128930 |
+
{
|
128931 |
+
"epoch": 0.4850671582828549,
|
128932 |
+
"grad_norm": 1.6319364309310913,
|
128933 |
+
"learning_rate": 2.575981037661312e-05,
|
128934 |
+
"loss": 2.1538,
|
128935 |
+
"step": 18418
|
128936 |
+
},
|
128937 |
+
{
|
128938 |
+
"epoch": 0.4850934948643666,
|
128939 |
+
"grad_norm": 2.219048261642456,
|
128940 |
+
"learning_rate": 2.5758493547537532e-05,
|
128941 |
+
"loss": 1.7055,
|
128942 |
+
"step": 18419
|
128943 |
+
},
|
128944 |
+
{
|
128945 |
+
"epoch": 0.48511983144587834,
|
128946 |
+
"grad_norm": 1.4454690217971802,
|
128947 |
+
"learning_rate": 2.5757176718461944e-05,
|
128948 |
+
"loss": 2.0454,
|
128949 |
+
"step": 18420
|
128950 |
+
},
|
128951 |
+
{
|
128952 |
+
"epoch": 0.48514616802739,
|
128953 |
+
"grad_norm": 1.5924925804138184,
|
128954 |
+
"learning_rate": 2.575585988938636e-05,
|
128955 |
+
"loss": 1.7619,
|
128956 |
+
"step": 18421
|
128957 |
+
},
|
128958 |
+
{
|
128959 |
+
"epoch": 0.48517250460890177,
|
128960 |
+
"grad_norm": 1.780044436454773,
|
128961 |
+
"learning_rate": 2.5754543060310772e-05,
|
128962 |
+
"loss": 1.9857,
|
128963 |
+
"step": 18422
|
128964 |
+
},
|
128965 |
+
{
|
128966 |
+
"epoch": 0.4851988411904135,
|
128967 |
+
"grad_norm": 2.2101314067840576,
|
128968 |
+
"learning_rate": 2.575322623123519e-05,
|
128969 |
+
"loss": 1.9862,
|
128970 |
+
"step": 18423
|
128971 |
+
},
|
128972 |
+
{
|
128973 |
+
"epoch": 0.4852251777719252,
|
128974 |
+
"grad_norm": 2.3404316902160645,
|
128975 |
+
"learning_rate": 2.5751909402159603e-05,
|
128976 |
+
"loss": 1.8558,
|
128977 |
+
"step": 18424
|
128978 |
+
},
|
128979 |
+
{
|
128980 |
+
"epoch": 0.48525151435343694,
|
128981 |
+
"grad_norm": 2.050753355026245,
|
128982 |
+
"learning_rate": 2.5750592573084015e-05,
|
128983 |
+
"loss": 1.8381,
|
128984 |
+
"step": 18425
|
128985 |
+
},
|
128986 |
+
{
|
128987 |
+
"epoch": 0.48527785093494863,
|
128988 |
+
"grad_norm": 3.0578525066375732,
|
128989 |
+
"learning_rate": 2.5749275744008427e-05,
|
128990 |
+
"loss": 0.5958,
|
128991 |
+
"step": 18426
|
128992 |
+
},
|
128993 |
+
{
|
128994 |
+
"epoch": 0.4853041875164604,
|
128995 |
+
"grad_norm": 1.8695067167282104,
|
128996 |
+
"learning_rate": 2.574795891493284e-05,
|
128997 |
+
"loss": 1.622,
|
128998 |
+
"step": 18427
|
128999 |
+
},
|
129000 |
+
{
|
129001 |
+
"epoch": 0.48533052409797206,
|
129002 |
+
"grad_norm": 2.1240391731262207,
|
129003 |
+
"learning_rate": 2.574664208585726e-05,
|
129004 |
+
"loss": 1.2508,
|
129005 |
+
"step": 18428
|
129006 |
+
},
|
129007 |
+
{
|
129008 |
+
"epoch": 0.4853568606794838,
|
129009 |
+
"grad_norm": 2.0256903171539307,
|
129010 |
+
"learning_rate": 2.574532525678167e-05,
|
129011 |
+
"loss": 1.4946,
|
129012 |
+
"step": 18429
|
129013 |
+
},
|
129014 |
+
{
|
129015 |
+
"epoch": 0.48538319726099555,
|
129016 |
+
"grad_norm": 2.7355430126190186,
|
129017 |
+
"learning_rate": 2.5744008427706083e-05,
|
129018 |
+
"loss": 2.0422,
|
129019 |
+
"step": 18430
|
129020 |
+
},
|
129021 |
+
{
|
129022 |
+
"epoch": 0.48540953384250723,
|
129023 |
+
"grad_norm": 1.7283477783203125,
|
129024 |
+
"learning_rate": 2.57426915986305e-05,
|
129025 |
+
"loss": 1.4768,
|
129026 |
+
"step": 18431
|
129027 |
+
},
|
129028 |
+
{
|
129029 |
+
"epoch": 0.485435870424019,
|
129030 |
+
"grad_norm": 3.126765727996826,
|
129031 |
+
"learning_rate": 2.5741374769554914e-05,
|
129032 |
+
"loss": 1.2515,
|
129033 |
+
"step": 18432
|
129034 |
+
},
|
129035 |
+
{
|
129036 |
+
"epoch": 0.48546220700553067,
|
129037 |
+
"grad_norm": 3.3315203189849854,
|
129038 |
+
"learning_rate": 2.574005794047933e-05,
|
129039 |
+
"loss": 2.1508,
|
129040 |
+
"step": 18433
|
129041 |
+
},
|
129042 |
+
{
|
129043 |
+
"epoch": 0.4854885435870424,
|
129044 |
+
"grad_norm": 1.8240033388137817,
|
129045 |
+
"learning_rate": 2.5738741111403742e-05,
|
129046 |
+
"loss": 1.3215,
|
129047 |
+
"step": 18434
|
129048 |
+
},
|
129049 |
+
{
|
129050 |
+
"epoch": 0.4855148801685541,
|
129051 |
+
"grad_norm": 1.2926433086395264,
|
129052 |
+
"learning_rate": 2.5737424282328154e-05,
|
129053 |
+
"loss": 1.5452,
|
129054 |
+
"step": 18435
|
129055 |
+
},
|
129056 |
+
{
|
129057 |
+
"epoch": 0.48554121675006584,
|
129058 |
+
"grad_norm": 3.809260845184326,
|
129059 |
+
"learning_rate": 2.5736107453252566e-05,
|
129060 |
+
"loss": 1.4537,
|
129061 |
+
"step": 18436
|
129062 |
+
},
|
129063 |
+
{
|
129064 |
+
"epoch": 0.4855675533315776,
|
129065 |
+
"grad_norm": 2.7715842723846436,
|
129066 |
+
"learning_rate": 2.5734790624176985e-05,
|
129067 |
+
"loss": 0.8447,
|
129068 |
+
"step": 18437
|
129069 |
+
},
|
129070 |
+
{
|
129071 |
+
"epoch": 0.48559388991308927,
|
129072 |
+
"grad_norm": 5.419256687164307,
|
129073 |
+
"learning_rate": 2.5733473795101398e-05,
|
129074 |
+
"loss": 2.0753,
|
129075 |
+
"step": 18438
|
129076 |
+
},
|
129077 |
+
{
|
129078 |
+
"epoch": 0.485620226494601,
|
129079 |
+
"grad_norm": 1.581094741821289,
|
129080 |
+
"learning_rate": 2.573215696602581e-05,
|
129081 |
+
"loss": 1.4794,
|
129082 |
+
"step": 18439
|
129083 |
+
},
|
129084 |
+
{
|
129085 |
+
"epoch": 0.4856465630761127,
|
129086 |
+
"grad_norm": 2.4569525718688965,
|
129087 |
+
"learning_rate": 2.5730840136950225e-05,
|
129088 |
+
"loss": 1.0419,
|
129089 |
+
"step": 18440
|
129090 |
+
},
|
129091 |
+
{
|
129092 |
+
"epoch": 0.48567289965762445,
|
129093 |
+
"grad_norm": 2.8132851123809814,
|
129094 |
+
"learning_rate": 2.5729523307874638e-05,
|
129095 |
+
"loss": 1.2611,
|
129096 |
+
"step": 18441
|
129097 |
+
},
|
129098 |
+
{
|
129099 |
+
"epoch": 0.48569923623913613,
|
129100 |
+
"grad_norm": 3.084397315979004,
|
129101 |
+
"learning_rate": 2.5728206478799057e-05,
|
129102 |
+
"loss": 2.0227,
|
129103 |
+
"step": 18442
|
129104 |
+
},
|
129105 |
+
{
|
129106 |
+
"epoch": 0.4857255728206479,
|
129107 |
+
"grad_norm": 3.458346128463745,
|
129108 |
+
"learning_rate": 2.572688964972347e-05,
|
129109 |
+
"loss": 2.8425,
|
129110 |
+
"step": 18443
|
129111 |
+
},
|
129112 |
+
{
|
129113 |
+
"epoch": 0.4857519094021596,
|
129114 |
+
"grad_norm": 2.111849069595337,
|
129115 |
+
"learning_rate": 2.572557282064788e-05,
|
129116 |
+
"loss": 1.9156,
|
129117 |
+
"step": 18444
|
129118 |
+
},
|
129119 |
+
{
|
129120 |
+
"epoch": 0.4857782459836713,
|
129121 |
+
"grad_norm": 1.8623846769332886,
|
129122 |
+
"learning_rate": 2.5724255991572293e-05,
|
129123 |
+
"loss": 2.0972,
|
129124 |
+
"step": 18445
|
129125 |
+
},
|
129126 |
+
{
|
129127 |
+
"epoch": 0.48580458256518305,
|
129128 |
+
"grad_norm": 2.8974735736846924,
|
129129 |
+
"learning_rate": 2.5722939162496705e-05,
|
129130 |
+
"loss": 1.4019,
|
129131 |
+
"step": 18446
|
129132 |
+
},
|
129133 |
+
{
|
129134 |
+
"epoch": 0.48583091914669474,
|
129135 |
+
"grad_norm": 1.8519647121429443,
|
129136 |
+
"learning_rate": 2.5721622333421124e-05,
|
129137 |
+
"loss": 2.0956,
|
129138 |
+
"step": 18447
|
129139 |
+
},
|
129140 |
+
{
|
129141 |
+
"epoch": 0.4858572557282065,
|
129142 |
+
"grad_norm": 3.604668617248535,
|
129143 |
+
"learning_rate": 2.5720305504345537e-05,
|
129144 |
+
"loss": 0.9024,
|
129145 |
+
"step": 18448
|
129146 |
+
},
|
129147 |
+
{
|
129148 |
+
"epoch": 0.4858835923097182,
|
129149 |
+
"grad_norm": 2.3852856159210205,
|
129150 |
+
"learning_rate": 2.571898867526995e-05,
|
129151 |
+
"loss": 1.6787,
|
129152 |
+
"step": 18449
|
129153 |
+
},
|
129154 |
+
{
|
129155 |
+
"epoch": 0.4859099288912299,
|
129156 |
+
"grad_norm": 1.5685434341430664,
|
129157 |
+
"learning_rate": 2.5717671846194364e-05,
|
129158 |
+
"loss": 1.896,
|
129159 |
+
"step": 18450
|
129160 |
+
},
|
129161 |
+
{
|
129162 |
+
"epoch": 0.48593626547274166,
|
129163 |
+
"grad_norm": 3.2516274452209473,
|
129164 |
+
"learning_rate": 2.571635501711878e-05,
|
129165 |
+
"loss": 1.9251,
|
129166 |
+
"step": 18451
|
129167 |
+
},
|
129168 |
+
{
|
129169 |
+
"epoch": 0.48596260205425335,
|
129170 |
+
"grad_norm": 1.8900718688964844,
|
129171 |
+
"learning_rate": 2.5715038188043196e-05,
|
129172 |
+
"loss": 1.4476,
|
129173 |
+
"step": 18452
|
129174 |
+
},
|
129175 |
+
{
|
129176 |
+
"epoch": 0.4859889386357651,
|
129177 |
+
"grad_norm": 3.0719430446624756,
|
129178 |
+
"learning_rate": 2.5713721358967608e-05,
|
129179 |
+
"loss": 1.9875,
|
129180 |
+
"step": 18453
|
129181 |
+
},
|
129182 |
+
{
|
129183 |
+
"epoch": 0.4860152752172768,
|
129184 |
+
"grad_norm": 2.0585649013519287,
|
129185 |
+
"learning_rate": 2.571240452989202e-05,
|
129186 |
+
"loss": 0.7618,
|
129187 |
+
"step": 18454
|
129188 |
+
},
|
129189 |
+
{
|
129190 |
+
"epoch": 0.4860416117987885,
|
129191 |
+
"grad_norm": 1.8559067249298096,
|
129192 |
+
"learning_rate": 2.5711087700816432e-05,
|
129193 |
+
"loss": 1.5609,
|
129194 |
+
"step": 18455
|
129195 |
+
},
|
129196 |
+
{
|
129197 |
+
"epoch": 0.48606794838030026,
|
129198 |
+
"grad_norm": 2.169935941696167,
|
129199 |
+
"learning_rate": 2.570977087174085e-05,
|
129200 |
+
"loss": 2.1225,
|
129201 |
+
"step": 18456
|
129202 |
+
},
|
129203 |
+
{
|
129204 |
+
"epoch": 0.48609428496181195,
|
129205 |
+
"grad_norm": 2.295045852661133,
|
129206 |
+
"learning_rate": 2.5708454042665263e-05,
|
129207 |
+
"loss": 1.6724,
|
129208 |
+
"step": 18457
|
129209 |
+
},
|
129210 |
+
{
|
129211 |
+
"epoch": 0.4861206215433237,
|
129212 |
+
"grad_norm": 1.7074811458587646,
|
129213 |
+
"learning_rate": 2.5707137213589676e-05,
|
129214 |
+
"loss": 1.6424,
|
129215 |
+
"step": 18458
|
129216 |
+
},
|
129217 |
+
{
|
129218 |
+
"epoch": 0.4861469581248354,
|
129219 |
+
"grad_norm": 4.542028903961182,
|
129220 |
+
"learning_rate": 2.570582038451409e-05,
|
129221 |
+
"loss": 1.4893,
|
129222 |
+
"step": 18459
|
129223 |
+
},
|
129224 |
+
{
|
129225 |
+
"epoch": 0.4861732947063471,
|
129226 |
+
"grad_norm": 1.9196478128433228,
|
129227 |
+
"learning_rate": 2.5704503555438503e-05,
|
129228 |
+
"loss": 1.4574,
|
129229 |
+
"step": 18460
|
129230 |
+
},
|
129231 |
+
{
|
129232 |
+
"epoch": 0.4861996312878588,
|
129233 |
+
"grad_norm": 2.9205195903778076,
|
129234 |
+
"learning_rate": 2.5703186726362922e-05,
|
129235 |
+
"loss": 1.4391,
|
129236 |
+
"step": 18461
|
129237 |
+
},
|
129238 |
+
{
|
129239 |
+
"epoch": 0.48622596786937056,
|
129240 |
+
"grad_norm": 1.8574875593185425,
|
129241 |
+
"learning_rate": 2.5701869897287335e-05,
|
129242 |
+
"loss": 2.1417,
|
129243 |
+
"step": 18462
|
129244 |
+
},
|
129245 |
+
{
|
129246 |
+
"epoch": 0.4862523044508823,
|
129247 |
+
"grad_norm": 2.81107234954834,
|
129248 |
+
"learning_rate": 2.5700553068211747e-05,
|
129249 |
+
"loss": 2.5147,
|
129250 |
+
"step": 18463
|
129251 |
+
},
|
129252 |
+
{
|
129253 |
+
"epoch": 0.486278641032394,
|
129254 |
+
"grad_norm": 2.6533210277557373,
|
129255 |
+
"learning_rate": 2.569923623913616e-05,
|
129256 |
+
"loss": 1.7436,
|
129257 |
+
"step": 18464
|
129258 |
+
},
|
129259 |
+
{
|
129260 |
+
"epoch": 0.48630497761390573,
|
129261 |
+
"grad_norm": 4.165168762207031,
|
129262 |
+
"learning_rate": 2.5697919410060578e-05,
|
129263 |
+
"loss": 2.0054,
|
129264 |
+
"step": 18465
|
129265 |
+
},
|
129266 |
+
{
|
129267 |
+
"epoch": 0.4863313141954174,
|
129268 |
+
"grad_norm": 1.9256856441497803,
|
129269 |
+
"learning_rate": 2.569660258098499e-05,
|
129270 |
+
"loss": 1.9637,
|
129271 |
+
"step": 18466
|
129272 |
+
},
|
129273 |
+
{
|
129274 |
+
"epoch": 0.48635765077692916,
|
129275 |
+
"grad_norm": 1.7320513725280762,
|
129276 |
+
"learning_rate": 2.5695285751909402e-05,
|
129277 |
+
"loss": 1.8098,
|
129278 |
+
"step": 18467
|
129279 |
+
},
|
129280 |
+
{
|
129281 |
+
"epoch": 0.48638398735844085,
|
129282 |
+
"grad_norm": 2.1129705905914307,
|
129283 |
+
"learning_rate": 2.5693968922833818e-05,
|
129284 |
+
"loss": 1.1088,
|
129285 |
+
"step": 18468
|
129286 |
+
},
|
129287 |
+
{
|
129288 |
+
"epoch": 0.4864103239399526,
|
129289 |
+
"grad_norm": 5.155686378479004,
|
129290 |
+
"learning_rate": 2.569265209375823e-05,
|
129291 |
+
"loss": 1.3488,
|
129292 |
+
"step": 18469
|
129293 |
+
},
|
129294 |
+
{
|
129295 |
+
"epoch": 0.48643666052146434,
|
129296 |
+
"grad_norm": 3.6876680850982666,
|
129297 |
+
"learning_rate": 2.569133526468265e-05,
|
129298 |
+
"loss": 1.6031,
|
129299 |
+
"step": 18470
|
129300 |
+
},
|
129301 |
+
{
|
129302 |
+
"epoch": 0.486462997102976,
|
129303 |
+
"grad_norm": 1.8359041213989258,
|
129304 |
+
"learning_rate": 2.569001843560706e-05,
|
129305 |
+
"loss": 1.4716,
|
129306 |
+
"step": 18471
|
129307 |
+
},
|
129308 |
+
{
|
129309 |
+
"epoch": 0.48648933368448777,
|
129310 |
+
"grad_norm": 2.9589924812316895,
|
129311 |
+
"learning_rate": 2.5688701606531474e-05,
|
129312 |
+
"loss": 0.6421,
|
129313 |
+
"step": 18472
|
129314 |
+
},
|
129315 |
+
{
|
129316 |
+
"epoch": 0.48651567026599946,
|
129317 |
+
"grad_norm": 1.6885493993759155,
|
129318 |
+
"learning_rate": 2.5687384777455886e-05,
|
129319 |
+
"loss": 2.1624,
|
129320 |
+
"step": 18473
|
129321 |
+
},
|
129322 |
+
{
|
129323 |
+
"epoch": 0.4865420068475112,
|
129324 |
+
"grad_norm": 3.8228204250335693,
|
129325 |
+
"learning_rate": 2.5686067948380298e-05,
|
129326 |
+
"loss": 1.9418,
|
129327 |
+
"step": 18474
|
129328 |
+
},
|
129329 |
+
{
|
129330 |
+
"epoch": 0.4865683434290229,
|
129331 |
+
"grad_norm": 3.8486132621765137,
|
129332 |
+
"learning_rate": 2.5684751119304717e-05,
|
129333 |
+
"loss": 1.9678,
|
129334 |
+
"step": 18475
|
129335 |
+
},
|
129336 |
+
{
|
129337 |
+
"epoch": 0.48659468001053463,
|
129338 |
+
"grad_norm": 2.923513412475586,
|
129339 |
+
"learning_rate": 2.568343429022913e-05,
|
129340 |
+
"loss": 1.6008,
|
129341 |
+
"step": 18476
|
129342 |
+
},
|
129343 |
+
{
|
129344 |
+
"epoch": 0.4866210165920464,
|
129345 |
+
"grad_norm": 2.0346717834472656,
|
129346 |
+
"learning_rate": 2.568211746115354e-05,
|
129347 |
+
"loss": 1.6052,
|
129348 |
+
"step": 18477
|
129349 |
+
},
|
129350 |
+
{
|
129351 |
+
"epoch": 0.48664735317355806,
|
129352 |
+
"grad_norm": 4.474893569946289,
|
129353 |
+
"learning_rate": 2.5680800632077957e-05,
|
129354 |
+
"loss": 1.6256,
|
129355 |
+
"step": 18478
|
129356 |
+
},
|
129357 |
+
{
|
129358 |
+
"epoch": 0.4866736897550698,
|
129359 |
+
"grad_norm": 4.485257625579834,
|
129360 |
+
"learning_rate": 2.567948380300237e-05,
|
129361 |
+
"loss": 1.468,
|
129362 |
+
"step": 18479
|
129363 |
+
},
|
129364 |
+
{
|
129365 |
+
"epoch": 0.4867000263365815,
|
129366 |
+
"grad_norm": 2.5774567127227783,
|
129367 |
+
"learning_rate": 2.5678166973926788e-05,
|
129368 |
+
"loss": 1.6469,
|
129369 |
+
"step": 18480
|
129370 |
+
},
|
129371 |
+
{
|
129372 |
+
"epoch": 0.48672636291809324,
|
129373 |
+
"grad_norm": 2.047030448913574,
|
129374 |
+
"learning_rate": 2.56768501448512e-05,
|
129375 |
+
"loss": 2.1457,
|
129376 |
+
"step": 18481
|
129377 |
+
},
|
129378 |
+
{
|
129379 |
+
"epoch": 0.4867526994996049,
|
129380 |
+
"grad_norm": 1.8974688053131104,
|
129381 |
+
"learning_rate": 2.5675533315775613e-05,
|
129382 |
+
"loss": 2.2784,
|
129383 |
+
"step": 18482
|
129384 |
+
},
|
129385 |
+
{
|
129386 |
+
"epoch": 0.48677903608111667,
|
129387 |
+
"grad_norm": 4.346259593963623,
|
129388 |
+
"learning_rate": 2.5674216486700025e-05,
|
129389 |
+
"loss": 1.9088,
|
129390 |
+
"step": 18483
|
129391 |
+
},
|
129392 |
+
{
|
129393 |
+
"epoch": 0.4868053726626284,
|
129394 |
+
"grad_norm": 1.8092743158340454,
|
129395 |
+
"learning_rate": 2.5672899657624444e-05,
|
129396 |
+
"loss": 1.8927,
|
129397 |
+
"step": 18484
|
129398 |
+
},
|
129399 |
+
{
|
129400 |
+
"epoch": 0.4868317092441401,
|
129401 |
+
"grad_norm": 3.205528736114502,
|
129402 |
+
"learning_rate": 2.5671582828548856e-05,
|
129403 |
+
"loss": 1.3568,
|
129404 |
+
"step": 18485
|
129405 |
+
},
|
129406 |
+
{
|
129407 |
+
"epoch": 0.48685804582565184,
|
129408 |
+
"grad_norm": 1.6814615726470947,
|
129409 |
+
"learning_rate": 2.567026599947327e-05,
|
129410 |
+
"loss": 2.2728,
|
129411 |
+
"step": 18486
|
129412 |
+
},
|
129413 |
+
{
|
129414 |
+
"epoch": 0.48688438240716353,
|
129415 |
+
"grad_norm": 5.064326763153076,
|
129416 |
+
"learning_rate": 2.5668949170397684e-05,
|
129417 |
+
"loss": 1.3509,
|
129418 |
+
"step": 18487
|
129419 |
+
},
|
129420 |
+
{
|
129421 |
+
"epoch": 0.4869107189886753,
|
129422 |
+
"grad_norm": 3.4415476322174072,
|
129423 |
+
"learning_rate": 2.5667632341322096e-05,
|
129424 |
+
"loss": 2.8161,
|
129425 |
+
"step": 18488
|
129426 |
+
},
|
129427 |
+
{
|
129428 |
+
"epoch": 0.486937055570187,
|
129429 |
+
"grad_norm": 2.522833824157715,
|
129430 |
+
"learning_rate": 2.5666315512246515e-05,
|
129431 |
+
"loss": 1.0294,
|
129432 |
+
"step": 18489
|
129433 |
+
},
|
129434 |
+
{
|
129435 |
+
"epoch": 0.4869633921516987,
|
129436 |
+
"grad_norm": 2.1585633754730225,
|
129437 |
+
"learning_rate": 2.5664998683170927e-05,
|
129438 |
+
"loss": 1.3564,
|
129439 |
+
"step": 18490
|
129440 |
+
},
|
129441 |
+
{
|
129442 |
+
"epoch": 0.48698972873321045,
|
129443 |
+
"grad_norm": 2.394378900527954,
|
129444 |
+
"learning_rate": 2.566368185409534e-05,
|
129445 |
+
"loss": 2.4195,
|
129446 |
+
"step": 18491
|
129447 |
+
},
|
129448 |
+
{
|
129449 |
+
"epoch": 0.48701606531472214,
|
129450 |
+
"grad_norm": 2.5911078453063965,
|
129451 |
+
"learning_rate": 2.5662365025019752e-05,
|
129452 |
+
"loss": 1.2326,
|
129453 |
+
"step": 18492
|
129454 |
+
},
|
129455 |
+
{
|
129456 |
+
"epoch": 0.4870424018962339,
|
129457 |
+
"grad_norm": 1.7802774906158447,
|
129458 |
+
"learning_rate": 2.5661048195944164e-05,
|
129459 |
+
"loss": 0.6364,
|
129460 |
+
"step": 18493
|
129461 |
+
},
|
129462 |
+
{
|
129463 |
+
"epoch": 0.48706873847774557,
|
129464 |
+
"grad_norm": 2.638225555419922,
|
129465 |
+
"learning_rate": 2.5659731366868583e-05,
|
129466 |
+
"loss": 1.918,
|
129467 |
+
"step": 18494
|
129468 |
+
},
|
129469 |
+
{
|
129470 |
+
"epoch": 0.4870950750592573,
|
129471 |
+
"grad_norm": 1.846629023551941,
|
129472 |
+
"learning_rate": 2.5658414537792995e-05,
|
129473 |
+
"loss": 1.646,
|
129474 |
+
"step": 18495
|
129475 |
+
},
|
129476 |
+
{
|
129477 |
+
"epoch": 0.48712141164076905,
|
129478 |
+
"grad_norm": 2.197716236114502,
|
129479 |
+
"learning_rate": 2.565709770871741e-05,
|
129480 |
+
"loss": 1.7315,
|
129481 |
+
"step": 18496
|
129482 |
+
},
|
129483 |
+
{
|
129484 |
+
"epoch": 0.48714774822228074,
|
129485 |
+
"grad_norm": 1.91517174243927,
|
129486 |
+
"learning_rate": 2.5655780879641823e-05,
|
129487 |
+
"loss": 1.6118,
|
129488 |
+
"step": 18497
|
129489 |
+
},
|
129490 |
+
{
|
129491 |
+
"epoch": 0.4871740848037925,
|
129492 |
+
"grad_norm": 4.605914115905762,
|
129493 |
+
"learning_rate": 2.5654464050566242e-05,
|
129494 |
+
"loss": 1.6682,
|
129495 |
+
"step": 18498
|
129496 |
+
},
|
129497 |
+
{
|
129498 |
+
"epoch": 0.48720042138530417,
|
129499 |
+
"grad_norm": 2.0153045654296875,
|
129500 |
+
"learning_rate": 2.5653147221490654e-05,
|
129501 |
+
"loss": 1.3794,
|
129502 |
+
"step": 18499
|
129503 |
+
},
|
129504 |
+
{
|
129505 |
+
"epoch": 0.4872267579668159,
|
129506 |
+
"grad_norm": 1.849891185760498,
|
129507 |
+
"learning_rate": 2.5651830392415066e-05,
|
129508 |
+
"loss": 1.4523,
|
129509 |
+
"step": 18500
|
129510 |
}
|
129511 |
],
|
129512 |
"logging_steps": 1,
|
|
|
129526 |
"attributes": {}
|
129527 |
}
|
129528 |
},
|
129529 |
+
"total_flos": 3.6567889688508826e+17,
|
129530 |
"train_batch_size": 1,
|
129531 |
"trial_name": null,
|
129532 |
"trial_params": null
|